Index: vendor/illumos/dist/cmd/zdb/zdb.c
===================================================================
--- vendor/illumos/dist/cmd/zdb/zdb.c	(revision 329752)
+++ vendor/illumos/dist/cmd/zdb/zdb.c	(revision 329753)
@@ -1,4717 +1,4720 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright 2017 RackTop Systems.
  */
 
 #include <stdio.h>
 #include <unistd.h>
 #include <stdio_ext.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/dmu.h>
 #include <sys/zap.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_sa.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/metaslab_impl.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_pool.h>
 #include <sys/dbuf.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
 #include <sys/stat.h>
 #include <sys/resource.h>
 #include <sys/dmu_traverse.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/zfs_fuid.h>
 #include <sys/arc.h>
 #include <sys/ddt.h>
 #include <sys/zfeature.h>
 #include <sys/abd.h>
 #include <sys/blkptr.h>
 #include <zfs_comutil.h>
 #include <libcmdutils.h>
 #undef verify
 #include <libzfs.h>
 
 #include "zdb.h"
 
 #define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?	\
 	zio_compress_table[(idx)].ci_name : "UNKNOWN")
 #define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?	\
 	zio_checksum_table[(idx)].ci_name : "UNKNOWN")
 #define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ?	\
 	dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ?	\
 	dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
 #define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :		\
 	(idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA ?	\
 	DMU_OT_ZAP_OTHER : \
 	(idx) == DMU_OTN_UINT64_DATA || (idx) == DMU_OTN_UINT64_METADATA ? \
 	DMU_OT_UINT64_OTHER : DMU_OT_NUMTYPES)
 
 #ifndef lint
 extern int reference_tracking_enable;
 extern boolean_t zfs_recover;
 extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
 extern int zfs_vdev_async_read_max_active;
+extern int aok;
 #else
 int reference_tracking_enable;
 boolean_t zfs_recover;
 uint64_t zfs_arc_max, zfs_arc_meta_limit;
 int zfs_vdev_async_read_max_active;
+int aok;
 #endif
 
 static const char cmdname[] = "zdb";
 uint8_t dump_opt[256];
 
 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
 
 uint64_t *zopt_object = NULL;
 static unsigned zopt_objects = 0;
 libzfs_handle_t *g_zfs;
 uint64_t max_inflight = 1000;
 
 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
  * debugging facilities.
  */
 const char *
 _umem_debug_init()
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 
 static void
 usage(void)
 {
 	(void) fprintf(stderr,
 	    "Usage:\t%s [-AbcdDFGhiLMPsvX] [-e [-V] [-p <path> ...]] "
 	    "[-I <inflight I/Os>]\n"
 	    "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
 	    "\t\t[<poolname> [<object> ...]]\n"
 	    "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> "
 	    "[<object> ...]\n"
 	    "\t%s -C [-A] [-U <cache>]\n"
 	    "\t%s -l [-Aqu] <device>\n"
 	    "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
 	    "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
 	    "\t%s -O <dataset> <path>\n"
 	    "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
 	    "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
 	    "\t%s -E [-A] word0:word1:...:word15\n"
 	    "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 	    "<poolname>\n\n",
 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
 	    cmdname, cmdname);
 
 	(void) fprintf(stderr, "    Dataset name must include at least one "
 	    "separator character '/' or '@'\n");
 	(void) fprintf(stderr, "    If dataset name is specified, only that "
 	    "dataset is dumped\n");
 	(void) fprintf(stderr, "    If object numbers are specified, only "
 	    "those objects are dumped\n\n");
 	(void) fprintf(stderr, "    Options to control amount of output:\n");
 	(void) fprintf(stderr, "        -b block statistics\n");
 	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
 	    "all data) blocks\n");
 	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
 	(void) fprintf(stderr, "        -d dataset(s)\n");
 	(void) fprintf(stderr, "        -D dedup statistics\n");
 	(void) fprintf(stderr, "        -E decode and display block from an "
 	    "embedded block pointer\n");
 	(void) fprintf(stderr, "        -h pool history\n");
 	(void) fprintf(stderr, "        -i intent logs\n");
 	(void) fprintf(stderr, "        -l read label contents\n");
 	(void) fprintf(stderr, "        -L disable leak tracking (do not "
 	    "load spacemaps)\n");
 	(void) fprintf(stderr, "        -m metaslabs\n");
 	(void) fprintf(stderr, "        -M metaslab groups\n");
 	(void) fprintf(stderr, "        -O perform object lookups by path\n");
 	(void) fprintf(stderr, "        -R read and display block from a "
 	    "device\n");
 	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
 	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
 	(void) fprintf(stderr, "        -v verbose (applies to all "
 	    "others)\n\n");
 	(void) fprintf(stderr, "    Below options are intended for use "
 	    "with other options:\n");
 	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
 	    "panic recovery (-AA) or both (-AAA)\n");
 	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
 	    "has altroot/not in a cachefile\n");
 	(void) fprintf(stderr, "        -F attempt automatic rewind within "
 	    "safe range of transaction groups\n");
 	(void) fprintf(stderr, "        -G dump zfs_dbgmsg buffer before "
 	    "exiting\n");
 	(void) fprintf(stderr, "        -I <number of inflight I/Os> -- "
 	    "specify the maximum number of "
 	    "checksumming I/Os [default is 200]\n");
 	(void) fprintf(stderr, "        -o <variable>=<value> set global "
 	    "variable to an unsigned 32-bit integer value\n");
 	(void) fprintf(stderr, "        -p <path> -- use one or more with "
 	    "-e to specify path to vdev dir\n");
 	(void) fprintf(stderr, "        -P print numbers in parseable form\n");
 	(void) fprintf(stderr, "        -q don't print label contents\n");
 	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
 	    "searching for uberblocks\n");
 	(void) fprintf(stderr, "        -u uberblock\n");
 	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
 	    "cachefile\n");
 	(void) fprintf(stderr, "        -V do verbatim import\n");
 	(void) fprintf(stderr, "        -x <dumpdir> -- "
 	    "dump all read blocks into specified directory\n");
 	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
 	    "work with dataset)\n\n");
 	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
 	    "to make only that option verbose\n");
 	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
 	exit(1);
 }
 
 static void
 dump_debug_buffer()
 {
 	if (dump_opt['G']) {
 		(void) printf("\n");
 		zfs_dbgmsg_print("zdb");
 	}
 }
 
 /*
  * Called for usage errors that are discovered after a call to spa_open(),
  * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
  */
 
 static void
 fatal(const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	(void) fprintf(stderr, "%s: ", cmdname);
 	(void) vfprintf(stderr, fmt, ap);
 	va_end(ap);
 	(void) fprintf(stderr, "\n");
 
 	dump_debug_buffer();
 
 	exit(1);
 }
 
 /* ARGSUSED */
 static void
 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	nvlist_t *nv;
 	size_t nvsize = *(uint64_t *)data;
 	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
 
 	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
 
 	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
 
 	umem_free(packed, nvsize);
 
 	dump_nvlist(nv, 8);
 
 	nvlist_free(nv);
 }
 
 /* ARGSUSED */
 static void
 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	spa_history_phys_t *shp = data;
 
 	if (shp == NULL)
 		return;
 
 	(void) printf("\t\tpool_create_len = %llu\n",
 	    (u_longlong_t)shp->sh_pool_create_len);
 	(void) printf("\t\tphys_max_off = %llu\n",
 	    (u_longlong_t)shp->sh_phys_max_off);
 	(void) printf("\t\tbof = %llu\n",
 	    (u_longlong_t)shp->sh_bof);
 	(void) printf("\t\teof = %llu\n",
 	    (u_longlong_t)shp->sh_eof);
 	(void) printf("\t\trecords_lost = %llu\n",
 	    (u_longlong_t)shp->sh_records_lost);
 }
 
 static void
 zdb_nicenum(uint64_t num, char *buf, size_t buflen)
 {
 	if (dump_opt['P'])
 		(void) snprintf(buf, buflen, "%llu", (longlong_t)num);
 	else
 		nicenum(num, buf, sizeof (buf));
 }
 
 static const char histo_stars[] = "****************************************";
 static const uint64_t histo_width = sizeof (histo_stars) - 1;
 
 static void
 dump_histogram(const uint64_t *histo, int size, int offset)
 {
 	int i;
 	int minidx = size - 1;
 	int maxidx = 0;
 	uint64_t max = 0;
 
 	for (i = 0; i < size; i++) {
 		if (histo[i] > max)
 			max = histo[i];
 		if (histo[i] > 0 && i > maxidx)
 			maxidx = i;
 		if (histo[i] > 0 && i < minidx)
 			minidx = i;
 	}
 
 	if (max < histo_width)
 		max = histo_width;
 
 	for (i = minidx; i <= maxidx; i++) {
 		(void) printf("\t\t\t%3u: %6llu %s\n",
 		    i + offset, (u_longlong_t)histo[i],
 		    &histo_stars[(max - histo[i]) * histo_width / max]);
 	}
 }
 
 static void
 dump_zap_stats(objset_t *os, uint64_t object)
 {
 	int error;
 	zap_stats_t zs;
 
 	error = zap_get_stats(os, object, &zs);
 	if (error)
 		return;
 
 	if (zs.zs_ptrtbl_len == 0) {
 		ASSERT(zs.zs_num_blocks == 1);
 		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
 		    (u_longlong_t)zs.zs_blocksize,
 		    (u_longlong_t)zs.zs_num_entries);
 		return;
 	}
 
 	(void) printf("\tFat ZAP stats:\n");
 
 	(void) printf("\t\tPointer table:\n");
 	(void) printf("\t\t\t%llu elements\n",
 	    (u_longlong_t)zs.zs_ptrtbl_len);
 	(void) printf("\t\t\tzt_blk: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
 	(void) printf("\t\t\tzt_numblks: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
 	(void) printf("\t\t\tzt_shift: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
 	(void) printf("\t\t\tzt_blks_copied: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
 	(void) printf("\t\t\tzt_nextblk: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
 
 	(void) printf("\t\tZAP entries: %llu\n",
 	    (u_longlong_t)zs.zs_num_entries);
 	(void) printf("\t\tLeaf blocks: %llu\n",
 	    (u_longlong_t)zs.zs_num_leafs);
 	(void) printf("\t\tTotal blocks: %llu\n",
 	    (u_longlong_t)zs.zs_num_blocks);
 	(void) printf("\t\tzap_block_type: 0x%llx\n",
 	    (u_longlong_t)zs.zs_block_type);
 	(void) printf("\t\tzap_magic: 0x%llx\n",
 	    (u_longlong_t)zs.zs_magic);
 	(void) printf("\t\tzap_salt: 0x%llx\n",
 	    (u_longlong_t)zs.zs_salt);
 
 	(void) printf("\t\tLeafs with 2^n pointers:\n");
 	dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
 
 	(void) printf("\t\tBlocks with n*5 entries:\n");
 	dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
 
 	(void) printf("\t\tBlocks n/10 full:\n");
 	dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
 
 	(void) printf("\t\tEntries with n chunks:\n");
 	dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
 
 	(void) printf("\t\tBuckets with n entries:\n");
 	dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
 }
 
 /*ARGSUSED*/
 static void
 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 /*ARGSUSED*/
 static void
 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	(void) printf("\tUNKNOWN OBJECT TYPE\n");
 }
 
 /*ARGSUSED*/
 static void
 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 /*ARGSUSED*/
 static void
 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 /*ARGSUSED*/
 static void
 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
 	zap_attribute_t attr;
 	void *prop;
 	unsigned i;
 
 	dump_zap_stats(os, object);
 	(void) printf("\n");
 
 	for (zap_cursor_init(&zc, os, object);
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		(void) printf("\t\t%s = ", attr.za_name);
 		if (attr.za_num_integers == 0) {
 			(void) printf("\n");
 			continue;
 		}
 		prop = umem_zalloc(attr.za_num_integers *
 		    attr.za_integer_length, UMEM_NOFAIL);
 		(void) zap_lookup(os, object, attr.za_name,
 		    attr.za_integer_length, attr.za_num_integers, prop);
 		if (attr.za_integer_length == 1) {
 			(void) printf("%s", (char *)prop);
 		} else {
 			for (i = 0; i < attr.za_num_integers; i++) {
 				switch (attr.za_integer_length) {
 				case 2:
 					(void) printf("%u ",
 					    ((uint16_t *)prop)[i]);
 					break;
 				case 4:
 					(void) printf("%u ",
 					    ((uint32_t *)prop)[i]);
 					break;
 				case 8:
 					(void) printf("%lld ",
 					    (u_longlong_t)((int64_t *)prop)[i]);
 					break;
 				}
 			}
 		}
 		(void) printf("\n");
 		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
 	}
 	zap_cursor_fini(&zc);
 }
 
 static void
 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	bpobj_phys_t *bpop = data;
 	char bytes[32], comp[32], uncomp[32];
 
 	/* make sure the output won't get truncated */
 	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
 
 	if (bpop == NULL)
 		return;
 
 	zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
 	zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
 	zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
 
 	(void) printf("\t\tnum_blkptrs = %llu\n",
 	    (u_longlong_t)bpop->bpo_num_blkptrs);
 	(void) printf("\t\tbytes = %s\n", bytes);
 	if (size >= BPOBJ_SIZE_V1) {
 		(void) printf("\t\tcomp = %s\n", comp);
 		(void) printf("\t\tuncomp = %s\n", uncomp);
 	}
 	if (size >= sizeof (*bpop)) {
 		(void) printf("\t\tsubobjs = %llu\n",
 		    (u_longlong_t)bpop->bpo_subobjs);
 		(void) printf("\t\tnum_subobjs = %llu\n",
 		    (u_longlong_t)bpop->bpo_num_subobjs);
 	}
 
 	if (dump_opt['d'] < 5)
 		return;
 
 	for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) {
 		char blkbuf[BP_SPRINTF_LEN];
 		blkptr_t bp;
 
 		int err = dmu_read(os, object,
 		    i * sizeof (bp), sizeof (bp), &bp, 0);
 		if (err != 0) {
 			(void) printf("got error %u from dmu_read\n", err);
 			break;
 		}
 		snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
 		(void) printf("\t%s\n", blkbuf);
 	}
 }
 
 /* ARGSUSED */
 static void
 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	dmu_object_info_t doi;
 
 	VERIFY0(dmu_object_info(os, object, &doi));
 	uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
 
 	int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
 	if (err != 0) {
 		(void) printf("got error %u from dmu_read\n", err);
 		kmem_free(subobjs, doi.doi_max_offset);
 		return;
 	}
 
 	int64_t last_nonzero = -1;
 	for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) {
 		if (subobjs[i] != 0)
 			last_nonzero = i;
 	}
 
 	for (int64_t i = 0; i <= last_nonzero; i++) {
 		(void) printf("\t%llu\n", (longlong_t)subobjs[i]);
 	}
 	kmem_free(subobjs, doi.doi_max_offset);
 }
 
 /*ARGSUSED*/
 static void
 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	dump_zap_stats(os, object);
 	/* contents are printed elsewhere, properly decoded */
 }
 
 /*ARGSUSED*/
 static void
 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
 	zap_attribute_t attr;
 
 	dump_zap_stats(os, object);
 	(void) printf("\n");
 
 	for (zap_cursor_init(&zc, os, object);
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		(void) printf("\t\t%s = ", attr.za_name);
 		if (attr.za_num_integers == 0) {
 			(void) printf("\n");
 			continue;
 		}
 		(void) printf(" %llx : [%d:%d:%d]\n",
 		    (u_longlong_t)attr.za_first_integer,
 		    (int)ATTR_LENGTH(attr.za_first_integer),
 		    (int)ATTR_BSWAP(attr.za_first_integer),
 		    (int)ATTR_NUM(attr.za_first_integer));
 	}
 	zap_cursor_fini(&zc);
 }
 
 /*ARGSUSED*/
 static void
 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
 	zap_attribute_t attr;
 	uint16_t *layout_attrs;
 	unsigned i;
 
 	dump_zap_stats(os, object);
 	(void) printf("\n");
 
 	for (zap_cursor_init(&zc, os, object);
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		(void) printf("\t\t%s = [", attr.za_name);
 		if (attr.za_num_integers == 0) {
 			(void) printf("\n");
 			continue;
 		}
 
 		VERIFY(attr.za_integer_length == 2);
 		layout_attrs = umem_zalloc(attr.za_num_integers *
 		    attr.za_integer_length, UMEM_NOFAIL);
 
 		VERIFY(zap_lookup(os, object, attr.za_name,
 		    attr.za_integer_length,
 		    attr.za_num_integers, layout_attrs) == 0);
 
 		for (i = 0; i != attr.za_num_integers; i++)
 			(void) printf(" %d ", (int)layout_attrs[i]);
 		(void) printf("]\n");
 		umem_free(layout_attrs,
 		    attr.za_num_integers * attr.za_integer_length);
 	}
 	zap_cursor_fini(&zc);
 }
 
 /*ARGSUSED*/
 static void
 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
 	zap_attribute_t attr;
 	const char *typenames[] = {
 		/* 0 */ "not specified",
 		/* 1 */ "FIFO",
 		/* 2 */ "Character Device",
 		/* 3 */ "3 (invalid)",
 		/* 4 */ "Directory",
 		/* 5 */ "5 (invalid)",
 		/* 6 */ "Block Device",
 		/* 7 */ "7 (invalid)",
 		/* 8 */ "Regular File",
 		/* 9 */ "9 (invalid)",
 		/* 10 */ "Symbolic Link",
 		/* 11 */ "11 (invalid)",
 		/* 12 */ "Socket",
 		/* 13 */ "Door",
 		/* 14 */ "Event Port",
 		/* 15 */ "15 (invalid)",
 	};
 
 	dump_zap_stats(os, object);
 	(void) printf("\n");
 
 	for (zap_cursor_init(&zc, os, object);
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		(void) printf("\t\t%s = %lld (type: %s)\n",
 		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
 		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
 	}
 	zap_cursor_fini(&zc);
 }
 
 static int
 get_dtl_refcount(vdev_t *vd)
 {
 	int refcount = 0;
 
 	if (vd->vdev_ops->vdev_op_leaf) {
 		space_map_t *sm = vd->vdev_dtl_sm;
 
 		if (sm != NULL &&
 		    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
 			return (1);
 		return (0);
 	}
 
 	for (unsigned c = 0; c < vd->vdev_children; c++)
 		refcount += get_dtl_refcount(vd->vdev_child[c]);
 	return (refcount);
 }
 
 static int
 get_metaslab_refcount(vdev_t *vd)
 {
 	int refcount = 0;
 
 	if (vd->vdev_top == vd) {
 		for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 			space_map_t *sm = vd->vdev_ms[m]->ms_sm;
 
 			if (sm != NULL &&
 			    sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
 				refcount++;
 		}
 	}
 	for (unsigned c = 0; c < vd->vdev_children; c++)
 		refcount += get_metaslab_refcount(vd->vdev_child[c]);
 
 	return (refcount);
 }
 
 static int
 get_obsolete_refcount(vdev_t *vd)
 {
 	int refcount = 0;
 
 	uint64_t obsolete_sm_obj = vdev_obsolete_sm_object(vd);
 	if (vd->vdev_top == vd && obsolete_sm_obj != 0) {
 		dmu_object_info_t doi;
 		VERIFY0(dmu_object_info(vd->vdev_spa->spa_meta_objset,
 		    obsolete_sm_obj, &doi));
 		if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
 			refcount++;
 		}
 	} else {
 		ASSERT3P(vd->vdev_obsolete_sm, ==, NULL);
 		ASSERT3U(obsolete_sm_obj, ==, 0);
 	}
 	for (unsigned c = 0; c < vd->vdev_children; c++) {
 		refcount += get_obsolete_refcount(vd->vdev_child[c]);
 	}
 
 	return (refcount);
 }
 
 static int
 get_prev_obsolete_spacemap_refcount(spa_t *spa)
 {
 	uint64_t prev_obj =
 	    spa->spa_condensing_indirect_phys.scip_prev_obsolete_sm_object;
 	if (prev_obj != 0) {
 		dmu_object_info_t doi;
 		VERIFY0(dmu_object_info(spa->spa_meta_objset, prev_obj, &doi));
 		if (doi.doi_bonus_size == sizeof (space_map_phys_t)) {
 			return (1);
 		}
 	}
 	return (0);
 }
 
 static int
 verify_spacemap_refcounts(spa_t *spa)
 {
 	uint64_t expected_refcount = 0;
 	uint64_t actual_refcount;
 
 	(void) feature_get_refcount(spa,
 	    &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
 	    &expected_refcount);
 	actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
 	actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
 	actual_refcount += get_obsolete_refcount(spa->spa_root_vdev);
 	actual_refcount += get_prev_obsolete_spacemap_refcount(spa);
 
 	if (expected_refcount != actual_refcount) {
 		(void) printf("space map refcount mismatch: expected %lld != "
 		    "actual %lld\n",
 		    (longlong_t)expected_refcount,
 		    (longlong_t)actual_refcount);
 		return (2);
 	}
 	return (0);
 }
 
 static void
 dump_spacemap(objset_t *os, space_map_t *sm)
 {
 	uint64_t alloc, offset, entry;
 	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 	    "INVALID", "INVALID", "INVALID", "INVALID" };
 
 	if (sm == NULL)
 		return;
 
 	(void) printf("space map object %llu:\n",
 	    (longlong_t)sm->sm_phys->smp_object);
 	(void) printf("  smp_objsize = 0x%llx\n",
 	    (longlong_t)sm->sm_phys->smp_objsize);
 	(void) printf("  smp_alloc = 0x%llx\n",
 	    (longlong_t)sm->sm_phys->smp_alloc);
 
 	/*
 	 * Print out the freelist entries in both encoded and decoded form.
 	 */
 	alloc = 0;
 	for (offset = 0; offset < space_map_length(sm);
 	    offset += sizeof (entry)) {
 		uint8_t mapshift = sm->sm_shift;
 
 		VERIFY0(dmu_read(os, space_map_object(sm), offset,
 		    sizeof (entry), &entry, DMU_READ_PREFETCH));
 		if (SM_DEBUG_DECODE(entry)) {
 
 			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
 			    (u_longlong_t)(offset / sizeof (entry)),
 			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
 			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
 			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
 		} else {
 			(void) printf("\t    [%6llu]    %c  range:"
 			    " %010llx-%010llx  size: %06llx\n",
 			    (u_longlong_t)(offset / sizeof (entry)),
 			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
 			    mapshift) + sm->sm_start),
 			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
 			    mapshift) + sm->sm_start +
 			    (SM_RUN_DECODE(entry) << mapshift)),
 			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
 			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
 				alloc += SM_RUN_DECODE(entry) << mapshift;
 			else
 				alloc -= SM_RUN_DECODE(entry) << mapshift;
 		}
 	}
 	if (alloc != space_map_allocated(sm)) {
 		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
 		    "with space map summary (%llu)\n",
 		    (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
 	}
 }
 
 static void
 dump_metaslab_stats(metaslab_t *msp)
 {
 	char maxbuf[32];
 	range_tree_t *rt = msp->ms_tree;
 	avl_tree_t *t = &msp->ms_size_tree;
 	int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
 
 	/* max sure nicenum has enough space */
 	CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
 
 	zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
 
 	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
 	    "segments", avl_numnodes(t), "maxsize", maxbuf,
 	    "freepct", free_pct);
 	(void) printf("\tIn-memory histogram:\n");
 	dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 }
 
 static void
 dump_metaslab(metaslab_t *msp)
 {
 	vdev_t *vd = msp->ms_group->mg_vd;
 	spa_t *spa = vd->vdev_spa;
 	space_map_t *sm = msp->ms_sm;
 	char freebuf[32];
 
 	zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
 	    sizeof (freebuf));
 
 	(void) printf(
 	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
 	    (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
 	    (u_longlong_t)space_map_object(sm), freebuf);
 
 	if (dump_opt['m'] > 2 && !dump_opt['L']) {
 		mutex_enter(&msp->ms_lock);
 		metaslab_load_wait(msp);
 		if (!msp->ms_loaded) {
 			VERIFY0(metaslab_load(msp));
 			range_tree_stat_verify(msp->ms_tree);
 		}
 		dump_metaslab_stats(msp);
 		metaslab_unload(msp);
 		mutex_exit(&msp->ms_lock);
 	}
 
 	if (dump_opt['m'] > 1 && sm != NULL &&
 	    spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
 		/*
 		 * The space map histogram represents free space in chunks
 		 * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
 		 */
 		(void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
 		    (u_longlong_t)msp->ms_fragmentation);
 		dump_histogram(sm->sm_phys->smp_histogram,
 		    SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
 	}
 
 	if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
 		ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
 
 		dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
 	}
 }
 
 static void
 print_vdev_metaslab_header(vdev_t *vd)
 {
 	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
 	    (u_longlong_t)vd->vdev_id,
 	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
 	    "offset", "spacemap", "free");
 	(void) printf("\t%15s   %19s   %15s   %10s\n",
 	    "---------------", "-------------------",
 	    "---------------", "-------------");
 }
 
 static void
 dump_metaslab_groups(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	metaslab_class_t *mc = spa_normal_class(spa);
 	uint64_t fragmentation;
 
 	metaslab_class_histogram_verify(mc);
 
 	for (unsigned c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		metaslab_group_t *mg = tvd->vdev_mg;
 
 		if (mg->mg_class != mc)
 			continue;
 
 		metaslab_group_histogram_verify(mg);
 		mg->mg_fragmentation = metaslab_group_fragmentation(mg);
 
 		(void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
 		    "fragmentation",
 		    (u_longlong_t)tvd->vdev_id,
 		    (u_longlong_t)tvd->vdev_ms_count);
 		if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
 			(void) printf("%3s\n", "-");
 		} else {
 			(void) printf("%3llu%%\n",
 			    (u_longlong_t)mg->mg_fragmentation);
 		}
 		dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 	}
 
 	(void) printf("\tpool %s\tfragmentation", spa_name(spa));
 	fragmentation = metaslab_class_fragmentation(mc);
 	if (fragmentation == ZFS_FRAG_INVALID)
 		(void) printf("\t%3s\n", "-");
 	else
 		(void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
 	dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 }
 
 static void
 print_vdev_indirect(vdev_t *vd)
 {
 	vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 	vdev_indirect_births_t *vib = vd->vdev_indirect_births;
 
 	if (vim == NULL) {
 		ASSERT3P(vib, ==, NULL);
 		return;
 	}
 
 	ASSERT3U(vdev_indirect_mapping_object(vim), ==,
 	    vic->vic_mapping_object);
 	ASSERT3U(vdev_indirect_births_object(vib), ==,
 	    vic->vic_births_object);
 
 	(void) printf("indirect births obj %llu:\n",
 	    (longlong_t)vic->vic_births_object);
 	(void) printf("    vib_count = %llu\n",
 	    (longlong_t)vdev_indirect_births_count(vib));
 	for (uint64_t i = 0; i < vdev_indirect_births_count(vib); i++) {
 		vdev_indirect_birth_entry_phys_t *cur_vibe =
 		    &vib->vib_entries[i];
 		(void) printf("\toffset %llx -> txg %llu\n",
 		    (longlong_t)cur_vibe->vibe_offset,
 		    (longlong_t)cur_vibe->vibe_phys_birth_txg);
 	}
 	(void) printf("\n");
 
 	(void) printf("indirect mapping obj %llu:\n",
 	    (longlong_t)vic->vic_mapping_object);
 	(void) printf("    vim_max_offset = 0x%llx\n",
 	    (longlong_t)vdev_indirect_mapping_max_offset(vim));
 	(void) printf("    vim_bytes_mapped = 0x%llx\n",
 	    (longlong_t)vdev_indirect_mapping_bytes_mapped(vim));
 	(void) printf("    vim_count = %llu\n",
 	    (longlong_t)vdev_indirect_mapping_num_entries(vim));
 
 	if (dump_opt['d'] <= 5 && dump_opt['m'] <= 3)
 		return;
 
 	uint32_t *counts = vdev_indirect_mapping_load_obsolete_counts(vim);
 
 	for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
 		vdev_indirect_mapping_entry_phys_t *vimep =
 		    &vim->vim_entries[i];
 		(void) printf("\t<%llx:%llx:%llx> -> "
 		    "<%llx:%llx:%llx> (%x obsolete)\n",
 		    (longlong_t)vd->vdev_id,
 		    (longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
 		    (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
 		    (longlong_t)DVA_GET_VDEV(&vimep->vimep_dst),
 		    (longlong_t)DVA_GET_OFFSET(&vimep->vimep_dst),
 		    (longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
 		    counts[i]);
 	}
 	(void) printf("\n");
 
 	uint64_t obsolete_sm_object = vdev_obsolete_sm_object(vd);
 	if (obsolete_sm_object != 0) {
 		objset_t *mos = vd->vdev_spa->spa_meta_objset;
 		(void) printf("obsolete space map object %llu:\n",
 		    (u_longlong_t)obsolete_sm_object);
 		ASSERT(vd->vdev_obsolete_sm != NULL);
 		ASSERT3U(space_map_object(vd->vdev_obsolete_sm), ==,
 		    obsolete_sm_object);
 		dump_spacemap(mos, vd->vdev_obsolete_sm);
 		(void) printf("\n");
 	}
 }
 
 static void
 dump_metaslabs(spa_t *spa)
 {
 	vdev_t *vd, *rvd = spa->spa_root_vdev;
 	uint64_t m, c = 0, children = rvd->vdev_children;
 
 	(void) printf("\nMetaslabs:\n");
 
 	if (!dump_opt['d'] && zopt_objects > 0) {
 		c = zopt_object[0];
 
 		if (c >= children)
 			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
 
 		if (zopt_objects > 1) {
 			vd = rvd->vdev_child[c];
 			print_vdev_metaslab_header(vd);
 
 			for (m = 1; m < zopt_objects; m++) {
 				if (zopt_object[m] < vd->vdev_ms_count)
 					dump_metaslab(
 					    vd->vdev_ms[zopt_object[m]]);
 				else
 					(void) fprintf(stderr, "bad metaslab "
 					    "number %llu\n",
 					    (u_longlong_t)zopt_object[m]);
 			}
 			(void) printf("\n");
 			return;
 		}
 		children = c + 1;
 	}
 	for (; c < children; c++) {
 		vd = rvd->vdev_child[c];
 		print_vdev_metaslab_header(vd);
 
 		print_vdev_indirect(vd);
 
 		for (m = 0; m < vd->vdev_ms_count; m++)
 			dump_metaslab(vd->vdev_ms[m]);
 		(void) printf("\n");
 	}
 }
 
 static void
 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
 {
 	const ddt_phys_t *ddp = dde->dde_phys;
 	const ddt_key_t *ddk = &dde->dde_key;
 	const char *types[4] = { "ditto", "single", "double", "triple" };
 	char blkbuf[BP_SPRINTF_LEN];
 	blkptr_t blk;
 
 	for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 		if (ddp->ddp_phys_birth == 0)
 			continue;
 		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
 		snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
 		(void) printf("index %llx refcnt %llu %s %s\n",
 		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
 		    types[p], blkbuf);
 	}
 }
 
 static void
 dump_dedup_ratio(const ddt_stat_t *dds)
 {
 	double rL, rP, rD, D, dedup, compress, copies;
 
 	if (dds->dds_blocks == 0)
 		return;
 
 	rL = (double)dds->dds_ref_lsize;
 	rP = (double)dds->dds_ref_psize;
 	rD = (double)dds->dds_ref_dsize;
 	D = (double)dds->dds_dsize;
 
 	dedup = rD / D;
 	compress = rL / rP;
 	copies = rD / rP;
 
 	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
 	    "dedup * compress / copies = %.2f\n\n",
 	    dedup, compress, copies, dedup * compress / copies);
 }
 
 static void
 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
 {
 	char name[DDT_NAMELEN];
 	ddt_entry_t dde;
 	uint64_t walk = 0;
 	dmu_object_info_t doi;
 	uint64_t count, dspace, mspace;
 	int error;
 
 	error = ddt_object_info(ddt, type, class, &doi);
 
 	if (error == ENOENT)
 		return;
 	ASSERT(error == 0);
 
 	if ((count = ddt_object_count(ddt, type, class)) == 0)
 		return;
 
 	dspace = doi.doi_physical_blocks_512 << 9;
 	mspace = doi.doi_fill_count * doi.doi_data_block_size;
 
 	ddt_object_name(ddt, type, class, name);
 
 	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
 	    name,
 	    (u_longlong_t)count,
 	    (u_longlong_t)(dspace / count),
 	    (u_longlong_t)(mspace / count));
 
 	if (dump_opt['D'] < 3)
 		return;
 
 	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
 
 	if (dump_opt['D'] < 4)
 		return;
 
 	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
 		return;
 
 	(void) printf("%s contents:\n\n", name);
 
 	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
 		dump_dde(ddt, &dde, walk);
 
 	ASSERT(error == ENOENT);
 
 	(void) printf("\n");
 }
 
 static void
 dump_all_ddts(spa_t *spa)
 {
 	ddt_histogram_t ddh_total;
 	ddt_stat_t dds_total;
 
 	bzero(&ddh_total, sizeof (ddh_total));
 	bzero(&dds_total, sizeof (dds_total));
 
 	for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 		ddt_t *ddt = spa->spa_ddt[c];
 		for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
 			for (enum ddt_class class = 0; class < DDT_CLASSES;
 			    class++) {
 				dump_ddt(ddt, type, class);
 			}
 		}
 	}
 
 	ddt_get_dedup_stats(spa, &dds_total);
 
 	if (dds_total.dds_blocks == 0) {
 		(void) printf("All DDTs are empty\n");
 		return;
 	}
 
 	(void) printf("\n");
 
 	if (dump_opt['D'] > 1) {
 		(void) printf("DDT histogram (aggregated over all DDTs):\n");
 		ddt_get_dedup_histogram(spa, &ddh_total);
 		zpool_dump_ddt(&dds_total, &ddh_total);
 	}
 
 	dump_dedup_ratio(&dds_total);
 }
 
 static void
 dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
 {
 	char *prefix = arg;
 
 	(void) printf("%s [%llu,%llu) length %llu\n",
 	    prefix,
 	    (u_longlong_t)start,
 	    (u_longlong_t)(start + size),
 	    (u_longlong_t)(size));
 }
 
 static void
 dump_dtl(vdev_t *vd, int indent)
 {
 	spa_t *spa = vd->vdev_spa;
 	boolean_t required;
 	const char *name[DTL_TYPES] = { "missing", "partial", "scrub",
 		"outage" };
 	char prefix[256];
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 	required = vdev_dtl_required(vd);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 
 	if (indent == 0)
 		(void) printf("\nDirty time logs:\n\n");
 
 	(void) printf("\t%*s%s [%s]\n", indent, "",
 	    vd->vdev_path ? vd->vdev_path :
 	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
 	    required ? "DTL-required" : "DTL-expendable");
 
 	for (int t = 0; t < DTL_TYPES; t++) {
 		range_tree_t *rt = vd->vdev_dtl[t];
 		if (range_tree_space(rt) == 0)
 			continue;
 		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
 		    indent + 2, "", name[t]);
 		range_tree_walk(rt, dump_dtl_seg, prefix);
 		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
 			dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
 	}
 
 	for (unsigned c = 0; c < vd->vdev_children; c++)
 		dump_dtl(vd->vdev_child[c], indent + 4);
 }
 
 static void
 dump_history(spa_t *spa)
 {
 	nvlist_t **events = NULL;
 	char buf[SPA_MAXBLOCKSIZE];
 	uint64_t resid, len, off = 0;
 	uint_t num = 0;
 	int error;
 	time_t tsec;
 	struct tm t;
 	char tbuf[30];
 	char internalstr[MAXPATHLEN];
 
 	do {
 		len = sizeof (buf);
 
 		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
 			(void) fprintf(stderr, "Unable to read history: "
 			    "error %d\n", error);
 			return;
 		}
 
 		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
 			break;
 
 		off -= resid;
 	} while (len != 0);
 
 	(void) printf("\nHistory:\n");
 	for (unsigned i = 0; i < num; i++) {
 		uint64_t time, txg, ievent;
 		char *cmd, *intstr;
 		boolean_t printed = B_FALSE;
 
 		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
 		    &time) != 0)
 			goto next;
 		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
 		    &cmd) != 0) {
 			if (nvlist_lookup_uint64(events[i],
 			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
 				goto next;
 			verify(nvlist_lookup_uint64(events[i],
 			    ZPOOL_HIST_TXG, &txg) == 0);
 			verify(nvlist_lookup_string(events[i],
 			    ZPOOL_HIST_INT_STR, &intstr) == 0);
 			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
 				goto next;
 
 			(void) snprintf(internalstr,
 			    sizeof (internalstr),
 			    "[internal %s txg:%ju] %s",
 			    zfs_history_event_names[ievent], (uintmax_t)txg,
 			    intstr);
 			cmd = internalstr;
 		}
 		tsec = time;
 		(void) localtime_r(&tsec, &t);
 		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
 		(void) printf("%s %s\n", tbuf, cmd);
 		printed = B_TRUE;
 
 next:
 		if (dump_opt['h'] > 1) {
 			if (!printed)
 				(void) printf("unrecognized record:\n");
 			dump_nvlist(events[i], 2);
 		}
 	}
 }
 
 /*ARGSUSED*/
 static void
 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 static uint64_t
 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
     const zbookmark_phys_t *zb)
 {
 	if (dnp == NULL) {
 		ASSERT(zb->zb_level < 0);
 		if (zb->zb_object == 0)
 			return (zb->zb_blkid);
 		return (zb->zb_blkid * BP_GET_LSIZE(bp));
 	}
 
 	ASSERT(zb->zb_level >= 0);
 
 	return ((zb->zb_blkid <<
 	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
 	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 }
 
 static void
 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
 {
 	const dva_t *dva = bp->blk_dva;
 	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
 
 	if (dump_opt['b'] >= 6) {
 		snprintf_blkptr(blkbuf, buflen, bp);
 		return;
 	}
 
 	if (BP_IS_EMBEDDED(bp)) {
 		(void) sprintf(blkbuf,
 		    "EMBEDDED et=%u %llxL/%llxP B=%llu",
 		    (int)BPE_GET_ETYPE(bp),
 		    (u_longlong_t)BPE_GET_LSIZE(bp),
 		    (u_longlong_t)BPE_GET_PSIZE(bp),
 		    (u_longlong_t)bp->blk_birth);
 		return;
 	}
 
 	blkbuf[0] = '\0';
 	for (int i = 0; i < ndvas; i++)
 		(void) snprintf(blkbuf + strlen(blkbuf),
 		    buflen - strlen(blkbuf), "%llu:%llx:%llx ",
 		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
 		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
 		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
 
 	if (BP_IS_HOLE(bp)) {
 		(void) snprintf(blkbuf + strlen(blkbuf),
 		    buflen - strlen(blkbuf),
 		    "%llxL B=%llu",
 		    (u_longlong_t)BP_GET_LSIZE(bp),
 		    (u_longlong_t)bp->blk_birth);
 	} else {
 		(void) snprintf(blkbuf + strlen(blkbuf),
 		    buflen - strlen(blkbuf),
 		    "%llxL/%llxP F=%llu B=%llu/%llu",
 		    (u_longlong_t)BP_GET_LSIZE(bp),
 		    (u_longlong_t)BP_GET_PSIZE(bp),
 		    (u_longlong_t)BP_GET_FILL(bp),
 		    (u_longlong_t)bp->blk_birth,
 		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
 	}
 }
 
 static void
 print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
     const dnode_phys_t *dnp)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 	int l;
 
 	if (!BP_IS_EMBEDDED(bp)) {
 		ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
 		ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
 	}
 
 	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
 
 	ASSERT(zb->zb_level >= 0);
 
 	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
 		if (l == zb->zb_level) {
 			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
 		} else {
 			(void) printf(" ");
 		}
 	}
 
 	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
 	(void) printf("%s\n", blkbuf);
 }
 
 static int
 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
     blkptr_t *bp, const zbookmark_phys_t *zb)
 {
 	int err = 0;
 
 	if (bp->blk_birth == 0)
 		return (0);
 
 	print_indirect(bp, zb, dnp);
 
 	if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
 		arc_flags_t flags = ARC_FLAG_WAIT;
 		int i;
 		blkptr_t *cbp;
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 		arc_buf_t *buf;
 		uint64_t fill = 0;
 
 		err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err)
 			return (err);
 		ASSERT(buf->b_data);
 
 		/* recursively visit blocks below this */
 		cbp = buf->b_data;
 		for (i = 0; i < epb; i++, cbp++) {
 			zbookmark_phys_t czb;
 
 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 			    zb->zb_level - 1,
 			    zb->zb_blkid * epb + i);
 			err = visit_indirect(spa, dnp, cbp, &czb);
 			if (err)
 				break;
 			fill += BP_GET_FILL(cbp);
 		}
 		if (!err)
 			ASSERT3U(fill, ==, BP_GET_FILL(bp));
 		arc_buf_destroy(buf, &buf);
 	}
 
 	return (err);
 }
 
 /*ARGSUSED*/
 static void
 dump_indirect(dnode_t *dn)
 {
 	dnode_phys_t *dnp = dn->dn_phys;
 	int j;
 	zbookmark_phys_t czb;
 
 	(void) printf("Indirect blocks:\n");
 
 	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
 	    dn->dn_object, dnp->dn_nlevels - 1, 0);
 	for (j = 0; j < dnp->dn_nblkptr; j++) {
 		czb.zb_blkid = j;
 		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
 		    &dnp->dn_blkptr[j], &czb);
 	}
 
 	(void) printf("\n");
 }
 
 /*ARGSUSED*/
 static void
 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	dsl_dir_phys_t *dd = data;
 	time_t crtime;
 	char nice[32];
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ);
 
 	if (dd == NULL)
 		return;
 
 	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
 
 	crtime = dd->dd_creation_time;
 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
 	(void) printf("\t\thead_dataset_obj = %llu\n",
 	    (u_longlong_t)dd->dd_head_dataset_obj);
 	(void) printf("\t\tparent_dir_obj = %llu\n",
 	    (u_longlong_t)dd->dd_parent_obj);
 	(void) printf("\t\torigin_obj = %llu\n",
 	    (u_longlong_t)dd->dd_origin_obj);
 	(void) printf("\t\tchild_dir_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_child_dir_zapobj);
 	zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice));
 	(void) printf("\t\tused_bytes = %s\n", nice);
 	zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice));
 	(void) printf("\t\tcompressed_bytes = %s\n", nice);
 	zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice));
 	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
 	zdb_nicenum(dd->dd_quota, nice, sizeof (nice));
 	(void) printf("\t\tquota = %s\n", nice);
 	zdb_nicenum(dd->dd_reserved, nice, sizeof (nice));
 	(void) printf("\t\treserved = %s\n", nice);
 	(void) printf("\t\tprops_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_props_zapobj);
 	(void) printf("\t\tdeleg_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_deleg_zapobj);
 	(void) printf("\t\tflags = %llx\n",
 	    (u_longlong_t)dd->dd_flags);
 
 #define	DO(which) \
 	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \
 	    sizeof (nice)); \
 	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
 	DO(HEAD);
 	DO(SNAP);
 	DO(CHILD);
 	DO(CHILD_RSRV);
 	DO(REFRSRV);
 #undef DO
 }
 
 /*ARGSUSED*/
 static void
 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	dsl_dataset_phys_t *ds = data;
 	time_t crtime;
 	char used[32], compressed[32], uncompressed[32], unique[32];
 	char blkbuf[BP_SPRINTF_LEN];
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (used) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ);
 
 	if (ds == NULL)
 		return;
 
 	ASSERT(size == sizeof (*ds));
 	crtime = ds->ds_creation_time;
 	zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used));
 	zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed));
 	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed,
 	    sizeof (uncompressed));
 	zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique));
 	snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
 
 	(void) printf("\t\tdir_obj = %llu\n",
 	    (u_longlong_t)ds->ds_dir_obj);
 	(void) printf("\t\tprev_snap_obj = %llu\n",
 	    (u_longlong_t)ds->ds_prev_snap_obj);
 	(void) printf("\t\tprev_snap_txg = %llu\n",
 	    (u_longlong_t)ds->ds_prev_snap_txg);
 	(void) printf("\t\tnext_snap_obj = %llu\n",
 	    (u_longlong_t)ds->ds_next_snap_obj);
 	(void) printf("\t\tsnapnames_zapobj = %llu\n",
 	    (u_longlong_t)ds->ds_snapnames_zapobj);
 	(void) printf("\t\tnum_children = %llu\n",
 	    (u_longlong_t)ds->ds_num_children);
 	(void) printf("\t\tuserrefs_obj = %llu\n",
 	    (u_longlong_t)ds->ds_userrefs_obj);
 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
 	(void) printf("\t\tcreation_txg = %llu\n",
 	    (u_longlong_t)ds->ds_creation_txg);
 	(void) printf("\t\tdeadlist_obj = %llu\n",
 	    (u_longlong_t)ds->ds_deadlist_obj);
 	(void) printf("\t\tused_bytes = %s\n", used);
 	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
 	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
 	(void) printf("\t\tunique = %s\n", unique);
 	(void) printf("\t\tfsid_guid = %llu\n",
 	    (u_longlong_t)ds->ds_fsid_guid);
 	(void) printf("\t\tguid = %llu\n",
 	    (u_longlong_t)ds->ds_guid);
 	(void) printf("\t\tflags = %llx\n",
 	    (u_longlong_t)ds->ds_flags);
 	(void) printf("\t\tnext_clones_obj = %llu\n",
 	    (u_longlong_t)ds->ds_next_clones_obj);
 	(void) printf("\t\tprops_obj = %llu\n",
 	    (u_longlong_t)ds->ds_props_obj);
 	(void) printf("\t\tbp = %s\n", blkbuf);
 }
 
 /* ARGSUSED */
 static int
 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
 	if (bp->blk_birth != 0) {
 		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 		(void) printf("\t%s\n", blkbuf);
 	}
 	return (0);
 }
 
 static void
 dump_bptree(objset_t *os, uint64_t obj, const char *name)
 {
 	char bytes[32];
 	bptree_phys_t *bt;
 	dmu_buf_t *db;
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
 
 	if (dump_opt['d'] < 3)
 		return;
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
 	bt = db->db_data;
 	zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes));
 	(void) printf("\n    %s: %llu datasets, %s\n",
 	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
 	dmu_buf_rele(db, FTAG);
 
 	if (dump_opt['d'] < 5)
 		return;
 
 	(void) printf("\n");
 
 	(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
 }
 
 /* ARGSUSED */
 static int
 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
 	ASSERT(bp->blk_birth != 0);
 	snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
 	(void) printf("\t%s\n", blkbuf);
 	return (0);
 }
 
 static void
 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
 {
 	char bytes[32];
 	char comp[32];
 	char uncomp[32];
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
 
 	if (dump_opt['d'] < 3)
 		return;
 
 	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes));
 	if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
 		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
 		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
 		(void) printf("    %*s: object %llu, %llu local blkptrs, "
 		    "%llu subobjs in object %llu, %s (%s/%s comp)\n",
 		    indent * 8, name,
 		    (u_longlong_t)bpo->bpo_object,
 		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
 		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
 		    (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
 		    bytes, comp, uncomp);
 
 		for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
 			uint64_t subobj;
 			bpobj_t subbpo;
 			int error;
 			VERIFY0(dmu_read(bpo->bpo_os,
 			    bpo->bpo_phys->bpo_subobjs,
 			    i * sizeof (subobj), sizeof (subobj), &subobj, 0));
 			error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
 			if (error != 0) {
 				(void) printf("ERROR %u while trying to open "
 				    "subobj id %llu\n",
 				    error, (u_longlong_t)subobj);
 				continue;
 			}
 			dump_full_bpobj(&subbpo, "subobj", indent + 1);
 			bpobj_close(&subbpo);
 		}
 	} else {
 		(void) printf("    %*s: object %llu, %llu blkptrs, %s\n",
 		    indent * 8, name,
 		    (u_longlong_t)bpo->bpo_object,
 		    (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
 		    bytes);
 	}
 
 	if (dump_opt['d'] < 5)
 		return;
 
 
 	if (indent == 0) {
 		(void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
 		(void) printf("\n");
 	}
 }
 
 static void
 dump_deadlist(dsl_deadlist_t *dl)
 {
 	dsl_deadlist_entry_t *dle;
 	uint64_t unused;
 	char bytes[32];
 	char comp[32];
 	char uncomp[32];
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
 
 	if (dump_opt['d'] < 3)
 		return;
 
 	if (dl->dl_oldfmt) {
 		dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
 		return;
 	}
 
 	zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
 	zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
 	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
 	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
 	    bytes, comp, uncomp);
 
 	if (dump_opt['d'] < 4)
 		return;
 
 	(void) printf("\n");
 
 	/* force the tree to be loaded */
 	dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
 
 	for (dle = avl_first(&dl->dl_tree); dle;
 	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
 		if (dump_opt['d'] >= 5) {
 			char buf[128];
 			(void) snprintf(buf, sizeof (buf),
 			    "mintxg %llu -> obj %llu",
 			    (longlong_t)dle->dle_mintxg,
 			    (longlong_t)dle->dle_bpobj.bpo_object);
 
 			dump_full_bpobj(&dle->dle_bpobj, buf, 0);
 		} else {
 			(void) printf("mintxg %llu -> obj %llu\n",
 			    (longlong_t)dle->dle_mintxg,
 			    (longlong_t)dle->dle_bpobj.bpo_object);
 
 		}
 	}
 }
 
 static avl_tree_t idx_tree;
 static avl_tree_t domain_tree;
 static boolean_t fuid_table_loaded;
 static objset_t *sa_os = NULL;
 static sa_attr_type_t *sa_attr_table = NULL;
 
 static int
 open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
 {
 	int err;
 	uint64_t sa_attrs = 0;
 	uint64_t version = 0;
 
 	VERIFY3P(sa_os, ==, NULL);
 	err = dmu_objset_own(path, type, B_TRUE, tag, osp);
 	if (err != 0) {
 		(void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
 		    strerror(err));
 		return (err);
 	}
 
 	if (dmu_objset_type(*osp) == DMU_OST_ZFS) {
 		(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 		    8, 1, &version);
 		if (version >= ZPL_VERSION_SA) {
 			(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
 			    8, 1, &sa_attrs);
 		}
 		err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END,
 		    &sa_attr_table);
 		if (err != 0) {
 			(void) fprintf(stderr, "sa_setup failed: %s\n",
 			    strerror(err));
 			dmu_objset_disown(*osp, tag);
 			*osp = NULL;
 		}
 	}
 	sa_os = *osp;
 
 	return (0);
 }
 
 static void
 close_objset(objset_t *os, void *tag)
 {
 	VERIFY3P(os, ==, sa_os);
 	if (os->os_sa != NULL)
 		sa_tear_down(os);
 	dmu_objset_disown(os, tag);
 	sa_attr_table = NULL;
 	sa_os = NULL;
 }
 
 static void
 fuid_table_destroy()
 {
 	if (fuid_table_loaded) {
 		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
 		fuid_table_loaded = B_FALSE;
 	}
 }
 
 /*
  * print uid or gid information.
  * For normal POSIX id just the id is printed in decimal format.
  * For CIFS files with FUID the fuid is printed in hex followed by
  * the domain-rid string.
  */
 static void
 print_idstr(uint64_t id, const char *id_type)
 {
 	if (FUID_INDEX(id)) {
 		char *domain;
 
 		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
 		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
 		    (u_longlong_t)id, domain, (int)FUID_RID(id));
 	} else {
 		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
 	}
 
 }
 
 static void
 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
 {
 	uint32_t uid_idx, gid_idx;
 
 	uid_idx = FUID_INDEX(uid);
 	gid_idx = FUID_INDEX(gid);
 
 	/* Load domain table, if not already loaded */
 	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
 		uint64_t fuid_obj;
 
 		/* first find the fuid object.  It lives in the master node */
 		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
 		    8, 1, &fuid_obj) == 0);
 		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
 		(void) zfs_fuid_table_load(os, fuid_obj,
 		    &idx_tree, &domain_tree);
 		fuid_table_loaded = B_TRUE;
 	}
 
 	print_idstr(uid, "uid");
 	print_idstr(gid, "gid");
 }
 
 /*ARGSUSED*/
 static void
 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
 	sa_handle_t *hdl;
 	uint64_t xattr, rdev, gen;
 	uint64_t uid, gid, mode, fsize, parent, links;
 	uint64_t pflags;
 	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
 	time_t z_crtime, z_atime, z_mtime, z_ctime;
 	sa_bulk_attr_t bulk[12];
 	int idx = 0;
 	int error;
 
 	VERIFY3P(os, ==, sa_os);
 	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
 		(void) printf("Failed to get handle for SA znode\n");
 		return;
 	}
 
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
 	    &links, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
 	    &mode, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
 	    NULL, &parent, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
 	    &fsize, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
 	    acctm, 16);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
 	    modtm, 16);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
 	    crtm, 16);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
 	    chgtm, 16);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
 	    &pflags, 8);
 
 	if (sa_bulk_lookup(hdl, bulk, idx)) {
 		(void) sa_handle_destroy(hdl);
 		return;
 	}
 
 	z_crtime = (time_t)crtm[0];
 	z_atime = (time_t)acctm[0];
 	z_mtime = (time_t)modtm[0];
 	z_ctime = (time_t)chgtm[0];
 
 	if (dump_opt['d'] > 4) {
 		error = zfs_obj_to_path(os, object, path, sizeof (path));
 		if (error != 0) {
 			(void) snprintf(path, sizeof (path),
 			    "\?\?\?<object#%llu>", (u_longlong_t)object);
 		}
 		(void) printf("\tpath	%s\n", path);
 	}
 	dump_uidgid(os, uid, gid);
 	(void) printf("\tatime	%s", ctime(&z_atime));
 	(void) printf("\tmtime	%s", ctime(&z_mtime));
 	(void) printf("\tctime	%s", ctime(&z_ctime));
 	(void) printf("\tcrtime	%s", ctime(&z_crtime));
 	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
 	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
 	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
 	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
 	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
 	(void) printf("\tpflags	%llx\n", (u_longlong_t)pflags);
 	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
 	    sizeof (uint64_t)) == 0)
 		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
 	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
 	    sizeof (uint64_t)) == 0)
 		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
 	sa_handle_destroy(hdl);
 }
 
 /*ARGSUSED*/
 static void
 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 /*ARGSUSED*/
 static void
 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
 	dump_none,		/* unallocated			*/
 	dump_zap,		/* object directory		*/
 	dump_uint64,		/* object array			*/
 	dump_none,		/* packed nvlist		*/
 	dump_packed_nvlist,	/* packed nvlist size		*/
 	dump_none,		/* bpobj			*/
 	dump_bpobj,		/* bpobj header			*/
 	dump_none,		/* SPA space map header		*/
 	dump_none,		/* SPA space map		*/
 	dump_none,		/* ZIL intent log		*/
 	dump_dnode,		/* DMU dnode			*/
 	dump_dmu_objset,	/* DMU objset			*/
 	dump_dsl_dir,		/* DSL directory		*/
 	dump_zap,		/* DSL directory child map	*/
 	dump_zap,		/* DSL dataset snap map		*/
 	dump_zap,		/* DSL props			*/
 	dump_dsl_dataset,	/* DSL dataset			*/
 	dump_znode,		/* ZFS znode			*/
 	dump_acl,		/* ZFS V0 ACL			*/
 	dump_uint8,		/* ZFS plain file		*/
 	dump_zpldir,		/* ZFS directory		*/
 	dump_zap,		/* ZFS master node		*/
 	dump_zap,		/* ZFS delete queue		*/
 	dump_uint8,		/* zvol object			*/
 	dump_zap,		/* zvol prop			*/
 	dump_uint8,		/* other uint8[]		*/
 	dump_uint64,		/* other uint64[]		*/
 	dump_zap,		/* other ZAP			*/
 	dump_zap,		/* persistent error log		*/
 	dump_uint8,		/* SPA history			*/
 	dump_history_offsets,	/* SPA history offsets		*/
 	dump_zap,		/* Pool properties		*/
 	dump_zap,		/* DSL permissions		*/
 	dump_acl,		/* ZFS ACL			*/
 	dump_uint8,		/* ZFS SYSACL			*/
 	dump_none,		/* FUID nvlist			*/
 	dump_packed_nvlist,	/* FUID nvlist size		*/
 	dump_zap,		/* DSL dataset next clones	*/
 	dump_zap,		/* DSL scrub queue		*/
 	dump_zap,		/* ZFS user/group used		*/
 	dump_zap,		/* ZFS user/group quota		*/
 	dump_zap,		/* snapshot refcount tags	*/
 	dump_ddt_zap,		/* DDT ZAP object		*/
 	dump_zap,		/* DDT statistics		*/
 	dump_znode,		/* SA object			*/
 	dump_zap,		/* SA Master Node		*/
 	dump_sa_attrs,		/* SA attribute registration	*/
 	dump_sa_layouts,	/* SA attribute layouts		*/
 	dump_zap,		/* DSL scrub translations	*/
 	dump_none,		/* fake dedup BP		*/
 	dump_zap,		/* deadlist			*/
 	dump_none,		/* deadlist hdr			*/
 	dump_zap,		/* dsl clones			*/
 	dump_bpobj_subobjs,	/* bpobj subobjs		*/
 	dump_unknown,		/* Unknown type, must be last	*/
 };
 
 static void
 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
 {
 	dmu_buf_t *db = NULL;
 	dmu_object_info_t doi;
 	dnode_t *dn;
 	void *bonus = NULL;
 	size_t bsize = 0;
 	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
 	char bonus_size[32];
 	char aux[50];
 	int error;
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
 	CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
 
 	if (*print_header) {
 		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
 		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
 		    "%full", "type");
 		*print_header = 0;
 	}
 
 	if (object == 0) {
 		dn = DMU_META_DNODE(os);
 	} else {
 		error = dmu_bonus_hold(os, object, FTAG, &db);
 		if (error)
 			fatal("dmu_bonus_hold(%llu) failed, errno %u",
 			    object, error);
 		bonus = db->db_data;
 		bsize = db->db_size;
 		dn = DB_DNODE((dmu_buf_impl_t *)db);
 	}
 	dmu_object_info_from_dnode(dn, &doi);
 
 	zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
 	zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
 	zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
 	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
 	zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
 	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
 	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
 	    doi.doi_max_offset);
 
 	aux[0] = '\0';
 
 	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
 		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
 		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
 	}
 
 	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
 		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
 		    ZDB_COMPRESS_NAME(doi.doi_compress));
 	}
 
 	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
 	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
 	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
 
 	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
 		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
 		    "", "", "", "", "", bonus_size, "bonus",
 		    ZDB_OT_NAME(doi.doi_bonus_type));
 	}
 
 	if (verbosity >= 4) {
 		(void) printf("\tdnode flags: %s%s%s\n",
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
 		    "USED_BYTES " : "",
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
 		    "USERUSED_ACCOUNTED " : "",
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
 		    "SPILL_BLKPTR" : "");
 		(void) printf("\tdnode maxblkid: %llu\n",
 		    (longlong_t)dn->dn_phys->dn_maxblkid);
 
 		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
 		    bonus, bsize);
 		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
 		*print_header = 1;
 	}
 
 	if (verbosity >= 5)
 		dump_indirect(dn);
 
 	if (verbosity >= 5) {
 		/*
 		 * Report the list of segments that comprise the object.
 		 */
 		uint64_t start = 0;
 		uint64_t end;
 		uint64_t blkfill = 1;
 		int minlvl = 1;
 
 		if (dn->dn_type == DMU_OT_DNODE) {
 			minlvl = 0;
 			blkfill = DNODES_PER_BLOCK;
 		}
 
 		for (;;) {
 			char segsize[32];
 			/* make sure nicenum has enough space */
 			CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ);
 			error = dnode_next_offset(dn,
 			    0, &start, minlvl, blkfill, 0);
 			if (error)
 				break;
 			end = start;
 			error = dnode_next_offset(dn,
 			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
 			zdb_nicenum(end - start, segsize, sizeof (segsize));
 			(void) printf("\t\tsegment [%016llx, %016llx)"
 			    " size %5s\n", (u_longlong_t)start,
 			    (u_longlong_t)end, segsize);
 			if (error)
 				break;
 			start = end;
 		}
 	}
 
 	if (db != NULL)
 		dmu_buf_rele(db, FTAG);
 }
 
 static const char *objset_types[DMU_OST_NUMTYPES] = {
 	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
 
 static void
 dump_dir(objset_t *os)
 {
 	dmu_objset_stats_t dds;
 	uint64_t object, object_count;
 	uint64_t refdbytes, usedobjs, scratch;
 	char numbuf[32];
 	char blkbuf[BP_SPRINTF_LEN + 20];
 	char osname[ZFS_MAX_DATASET_NAME_LEN];
 	const char *type = "UNKNOWN";
 	int verbosity = dump_opt['d'];
 	int print_header = 1;
 	unsigned i;
 	int error;
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
 
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 	dmu_objset_fast_stat(os, &dds);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 
 	if (dds.dds_type < DMU_OST_NUMTYPES)
 		type = objset_types[dds.dds_type];
 
 	if (dds.dds_type == DMU_OST_META) {
 		dds.dds_creation_txg = TXG_INITIAL;
 		usedobjs = BP_GET_FILL(os->os_rootbp);
 		refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
 		    dd_used_bytes;
 	} else {
 		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
 	}
 
 	ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
 
 	zdb_nicenum(refdbytes, numbuf, sizeof (numbuf));
 
 	if (verbosity >= 4) {
 		(void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
 		(void) snprintf_blkptr(blkbuf + strlen(blkbuf),
 		    sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
 	} else {
 		blkbuf[0] = '\0';
 	}
 
 	dmu_objset_name(os, osname);
 
 	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
 	    "%s, %llu objects%s\n",
 	    osname, type, (u_longlong_t)dmu_objset_id(os),
 	    (u_longlong_t)dds.dds_creation_txg,
 	    numbuf, (u_longlong_t)usedobjs, blkbuf);
 
 	if (zopt_objects != 0) {
 		for (i = 0; i < zopt_objects; i++)
 			dump_object(os, zopt_object[i], verbosity,
 			    &print_header);
 		(void) printf("\n");
 		return;
 	}
 
 	if (dump_opt['i'] != 0 || verbosity >= 2)
 		dump_intent_log(dmu_objset_zil(os));
 
 	if (dmu_objset_ds(os) != NULL) {
 		dsl_dataset_t *ds = dmu_objset_ds(os);
 		dump_deadlist(&ds->ds_deadlist);
 
 		if (dsl_dataset_remap_deadlist_exists(ds)) {
 			(void) printf("ds_remap_deadlist:\n");
 			dump_deadlist(&ds->ds_remap_deadlist);
 		}
 	}
 
 	if (verbosity < 2)
 		return;
 
 	if (BP_IS_HOLE(os->os_rootbp))
 		return;
 
 	dump_object(os, 0, verbosity, &print_header);
 	object_count = 0;
 	if (DMU_USERUSED_DNODE(os) != NULL &&
 	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
 		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
 		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
 	}
 
 	object = 0;
 	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
 		dump_object(os, object, verbosity, &print_header);
 		object_count++;
 	}
 
 	ASSERT3U(object_count, ==, usedobjs);
 
 	(void) printf("\n");
 
 	if (error != ESRCH) {
 		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
 		abort();
 	}
 }
 
 static void
 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
 {
 	time_t timestamp = ub->ub_timestamp;
 
 	(void) printf("%s", header ? header : "");
 	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
 	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
 	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
 	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
 	(void) printf("\ttimestamp = %llu UTC = %s",
 	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
 	if (dump_opt['u'] >= 3) {
 		char blkbuf[BP_SPRINTF_LEN];
 		snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
 		(void) printf("\trootbp = %s\n", blkbuf);
 	}
 	(void) printf("%s", footer ? footer : "");
 }
 
 static void
 dump_config(spa_t *spa)
 {
 	dmu_buf_t *db;
 	size_t nvsize = 0;
 	int error = 0;
 
 
 	error = dmu_bonus_hold(spa->spa_meta_objset,
 	    spa->spa_config_object, FTAG, &db);
 
 	if (error == 0) {
 		nvsize = *(uint64_t *)db->db_data;
 		dmu_buf_rele(db, FTAG);
 
 		(void) printf("\nMOS Configuration:\n");
 		dump_packed_nvlist(spa->spa_meta_objset,
 		    spa->spa_config_object, (void *)&nvsize, 1);
 	} else {
 		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
 		    (u_longlong_t)spa->spa_config_object, error);
 	}
 }
 
 static void
 dump_cachefile(const char *cachefile)
 {
 	int fd;
 	struct stat64 statbuf;
 	char *buf;
 	nvlist_t *config;
 
 	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
 		(void) printf("cannot open '%s': %s\n", cachefile,
 		    strerror(errno));
 		exit(1);
 	}
 
 	if (fstat64(fd, &statbuf) != 0) {
 		(void) printf("failed to stat '%s': %s\n", cachefile,
 		    strerror(errno));
 		exit(1);
 	}
 
 	if ((buf = malloc(statbuf.st_size)) == NULL) {
 		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
 		    (u_longlong_t)statbuf.st_size);
 		exit(1);
 	}
 
 	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
 		(void) fprintf(stderr, "failed to read %llu bytes\n",
 		    (u_longlong_t)statbuf.st_size);
 		exit(1);
 	}
 
 	(void) close(fd);
 
 	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
 		(void) fprintf(stderr, "failed to unpack nvlist\n");
 		exit(1);
 	}
 
 	free(buf);
 
 	dump_nvlist(config, 0);
 
 	nvlist_free(config);
 }
 
 #define	ZDB_MAX_UB_HEADER_SIZE 32
 
 static void
 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
 {
 	vdev_t vd;
 	vdev_t *vdp = &vd;
 	char header[ZDB_MAX_UB_HEADER_SIZE];
 
 	vd.vdev_ashift = ashift;
 	vdp->vdev_top = vdp;
 
 	for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
 		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
 		uberblock_t *ub = (void *)((char *)lbl + uoff);
 
 		if (uberblock_verify(ub))
 			continue;
 		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
 		    "Uberblock[%d]\n", i);
 		dump_uberblock(ub, header, "");
 	}
 }
 
 static char curpath[PATH_MAX];
 
 /*
  * Iterate through the path components, recursively passing
  * current one's obj and remaining path until we find the obj
  * for the last one.
  */
 static int
 dump_path_impl(objset_t *os, uint64_t obj, char *name)
 {
 	int err;
 	int header = 1;
 	uint64_t child_obj;
 	char *s;
 	dmu_buf_t *db;
 	dmu_object_info_t doi;
 
 	if ((s = strchr(name, '/')) != NULL)
 		*s = '\0';
 	err = zap_lookup(os, obj, name, 8, 1, &child_obj);
 
 	(void) strlcat(curpath, name, sizeof (curpath));
 
 	if (err != 0) {
 		(void) fprintf(stderr, "failed to lookup %s: %s\n",
 		    curpath, strerror(err));
 		return (err);
 	}
 
 	child_obj = ZFS_DIRENT_OBJ(child_obj);
 	err = sa_buf_hold(os, child_obj, FTAG, &db);
 	if (err != 0) {
 		(void) fprintf(stderr,
 		    "failed to get SA dbuf for obj %llu: %s\n",
 		    (u_longlong_t)child_obj, strerror(err));
 		return (EINVAL);
 	}
 	dmu_object_info_from_db(db, &doi);
 	sa_buf_rele(db, FTAG);
 
 	if (doi.doi_bonus_type != DMU_OT_SA &&
 	    doi.doi_bonus_type != DMU_OT_ZNODE) {
 		(void) fprintf(stderr, "invalid bonus type %d for obj %llu\n",
 		    doi.doi_bonus_type, (u_longlong_t)child_obj);
 		return (EINVAL);
 	}
 
 	if (dump_opt['v'] > 6) {
 		(void) printf("obj=%llu %s type=%d bonustype=%d\n",
 		    (u_longlong_t)child_obj, curpath, doi.doi_type,
 		    doi.doi_bonus_type);
 	}
 
 	(void) strlcat(curpath, "/", sizeof (curpath));
 
 	switch (doi.doi_type) {
 	case DMU_OT_DIRECTORY_CONTENTS:
 		if (s != NULL && *(s + 1) != '\0')
 			return (dump_path_impl(os, child_obj, s + 1));
 		/*FALLTHROUGH*/
 	case DMU_OT_PLAIN_FILE_CONTENTS:
 		dump_object(os, child_obj, dump_opt['v'], &header);
 		return (0);
 	default:
 		(void) fprintf(stderr, "object %llu has non-file/directory "
 		    "type %d\n", (u_longlong_t)obj, doi.doi_type);
 		break;
 	}
 
 	return (EINVAL);
 }
 
 /*
  * Dump the blocks for the object specified by path inside the dataset.
  */
 static int
 dump_path(char *ds, char *path)
 {
 	int err;
 	objset_t *os;
 	uint64_t root_obj;
 
 	err = open_objset(ds, DMU_OST_ZFS, FTAG, &os);
 	if (err != 0)
 		return (err);
 
 	err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj);
 	if (err != 0) {
 		(void) fprintf(stderr, "can't lookup root znode: %s\n",
 		    strerror(err));
 		dmu_objset_disown(os, FTAG);
 		return (EINVAL);
 	}
 
 	(void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
 
 	err = dump_path_impl(os, root_obj, path);
 
 	close_objset(os, FTAG);
 	return (err);
 }
 
 static int
 dump_label(const char *dev)
 {
 	int fd;
 	vdev_label_t label;
 	char path[MAXPATHLEN];
 	char *buf = label.vl_vdev_phys.vp_nvlist;
 	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
 	struct stat64 statbuf;
 	uint64_t psize, ashift;
 	boolean_t label_found = B_FALSE;
 
 	(void) strlcpy(path, dev, sizeof (path));
 	if (dev[0] == '/') {
 		if (strncmp(dev, ZFS_DISK_ROOTD,
 		    strlen(ZFS_DISK_ROOTD)) == 0) {
 			(void) snprintf(path, sizeof (path), "%s%s",
 			    ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD));
 		}
 	} else if (stat64(path, &statbuf) != 0) {
 		char *s;
 
 		(void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD,
 		    dev);
 		if (((s = strrchr(dev, 's')) == NULL &&
 		    (s = strchr(dev, 'p')) == NULL) ||
 		    !isdigit(*(s + 1)))
 			(void) strlcat(path, "s0", sizeof (path));
 	}
 
 	if ((fd = open64(path, O_RDONLY)) < 0) {
 		(void) fprintf(stderr, "cannot open '%s': %s\n", path,
 		    strerror(errno));
 		exit(1);
 	}
 
 	if (fstat64(fd, &statbuf) != 0) {
 		(void) fprintf(stderr, "failed to stat '%s': %s\n", path,
 		    strerror(errno));
 		(void) close(fd);
 		exit(1);
 	}
 
 	if (S_ISBLK(statbuf.st_mode)) {
 		(void) fprintf(stderr,
 		    "cannot use '%s': character device required\n", path);
 		(void) close(fd);
 		exit(1);
 	}
 
 	psize = statbuf.st_size;
 	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
 
 	for (int l = 0; l < VDEV_LABELS; l++) {
 		nvlist_t *config = NULL;
 
 		if (!dump_opt['q']) {
 			(void) printf("------------------------------------\n");
 			(void) printf("LABEL %d\n", l);
 			(void) printf("------------------------------------\n");
 		}
 
 		if (pread64(fd, &label, sizeof (label),
 		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
 			if (!dump_opt['q'])
 				(void) printf("failed to read label %d\n", l);
 			continue;
 		}
 
 		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
 			if (!dump_opt['q'])
 				(void) printf("failed to unpack label %d\n", l);
 			ashift = SPA_MINBLOCKSHIFT;
 		} else {
 			nvlist_t *vdev_tree = NULL;
 
 			if (!dump_opt['q'])
 				dump_nvlist(config, 4);
 			if ((nvlist_lookup_nvlist(config,
 			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
 			    (nvlist_lookup_uint64(vdev_tree,
 			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
 				ashift = SPA_MINBLOCKSHIFT;
 			nvlist_free(config);
 			label_found = B_TRUE;
 		}
 		if (dump_opt['u'])
 			dump_label_uberblocks(&label, ashift);
 	}
 
 	(void) close(fd);
 
 	return (label_found ? 0 : 2);
 }
 
 static uint64_t dataset_feature_count[SPA_FEATURES];
 static uint64_t remap_deadlist_count = 0;
 
 /*ARGSUSED*/
 static int
 dump_one_dir(const char *dsname, void *arg)
 {
 	int error;
 	objset_t *os;
 
 	error = open_objset(dsname, DMU_OST_ANY, FTAG, &os);
 	if (error != 0)
 		return (0);
 
 	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
 		if (!dmu_objset_ds(os)->ds_feature_inuse[f])
 			continue;
 		ASSERT(spa_feature_table[f].fi_flags &
 		    ZFEATURE_FLAG_PER_DATASET);
 		dataset_feature_count[f]++;
 	}
 
 	if (dsl_dataset_remap_deadlist_exists(dmu_objset_ds(os))) {
 		remap_deadlist_count++;
 	}
 
 	dump_dir(os);
 	close_objset(os, FTAG);
 	fuid_table_destroy();
 	return (0);
 }
 
 /*
  * Block statistics.
  */
 #define	PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
 typedef struct zdb_blkstats {
 	uint64_t zb_asize;
 	uint64_t zb_lsize;
 	uint64_t zb_psize;
 	uint64_t zb_count;
 	uint64_t zb_gangs;
 	uint64_t zb_ditto_samevdev;
 	uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
 } zdb_blkstats_t;
 
 /*
  * Extended object types to report deferred frees and dedup auto-ditto blocks.
  */
 #define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
 #define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
 #define	ZDB_OT_OTHER	(DMU_OT_NUMTYPES + 2)
 #define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 3)
 
 static const char *zdb_ot_extname[] = {
 	"deferred free",
 	"dedup ditto",
 	"other",
 	"Total",
 };
 
 #define	ZB_TOTAL	DN_MAX_LEVELS
 
 typedef struct zdb_cb {
 	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
 	uint64_t	zcb_removing_size;
 	uint64_t	zcb_dedup_asize;
 	uint64_t	zcb_dedup_blocks;
 	uint64_t	zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
 	uint64_t	zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
 	    [BPE_PAYLOAD_SIZE];
 	uint64_t	zcb_start;
 	hrtime_t	zcb_lastprint;
 	uint64_t	zcb_totalasize;
 	uint64_t	zcb_errors[256];
 	int		zcb_readfails;
 	int		zcb_haderrors;
 	spa_t		*zcb_spa;
 	uint32_t	**zcb_vd_obsolete_counts;
 } zdb_cb_t;
 
 static void
 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
     dmu_object_type_t type)
 {
 	uint64_t refcnt = 0;
 
 	ASSERT(type < ZDB_OT_TOTAL);
 
 	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
 		return;
 
 	for (int i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
 		int t = (i & 1) ? type : ZDB_OT_TOTAL;
 		int equal;
 		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
 
 		zb->zb_asize += BP_GET_ASIZE(bp);
 		zb->zb_lsize += BP_GET_LSIZE(bp);
 		zb->zb_psize += BP_GET_PSIZE(bp);
 		zb->zb_count++;
 
 		/*
 		 * The histogram is only big enough to record blocks up to
 		 * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
 		 * "other", bucket.
 		 */
 		unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
 		idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
 		zb->zb_psize_histogram[idx]++;
 
 		zb->zb_gangs += BP_COUNT_GANG(bp);
 
 		switch (BP_GET_NDVAS(bp)) {
 		case 2:
 			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[1]))
 				zb->zb_ditto_samevdev++;
 			break;
 		case 3:
 			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[1])) +
 			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[2])) +
 			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[2]));
 			if (equal != 0)
 				zb->zb_ditto_samevdev++;
 			break;
 		}
 
 	}
 
 	if (BP_IS_EMBEDDED(bp)) {
 		zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
 		zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
 		    [BPE_GET_PSIZE(bp)]++;
 		return;
 	}
 
 	if (dump_opt['L'])
 		return;
 
 	if (BP_GET_DEDUP(bp)) {
 		ddt_t *ddt;
 		ddt_entry_t *dde;
 
 		ddt = ddt_select(zcb->zcb_spa, bp);
 		ddt_enter(ddt);
 		dde = ddt_lookup(ddt, bp, B_FALSE);
 
 		if (dde == NULL) {
 			refcnt = 0;
 		} else {
 			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
 			ddt_phys_decref(ddp);
 			refcnt = ddp->ddp_refcnt;
 			if (ddt_phys_total_refcnt(dde) == 0)
 				ddt_remove(ddt, dde);
 		}
 		ddt_exit(ddt);
 	}
 
 	VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
 	    refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
 	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
 }
 
 static void
 zdb_blkptr_done(zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 	blkptr_t *bp = zio->io_bp;
 	int ioerr = zio->io_error;
 	zdb_cb_t *zcb = zio->io_private;
 	zbookmark_phys_t *zb = &zio->io_bookmark;
 
 	abd_free(zio->io_abd);
 
 	mutex_enter(&spa->spa_scrub_lock);
 	spa->spa_scrub_inflight--;
 	cv_broadcast(&spa->spa_scrub_io_cv);
 
 	if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
 		char blkbuf[BP_SPRINTF_LEN];
 
 		zcb->zcb_haderrors = 1;
 		zcb->zcb_errors[ioerr]++;
 
 		if (dump_opt['b'] >= 2)
 			snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 		else
 			blkbuf[0] = '\0';
 
 		(void) printf("zdb_blkptr_cb: "
 		    "Got error %d reading "
 		    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
 		    ioerr,
 		    (u_longlong_t)zb->zb_objset,
 		    (u_longlong_t)zb->zb_object,
 		    (u_longlong_t)zb->zb_level,
 		    (u_longlong_t)zb->zb_blkid,
 		    blkbuf);
 	}
 	mutex_exit(&spa->spa_scrub_lock);
 }
 
 static int
 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	zdb_cb_t *zcb = arg;
 	dmu_object_type_t type;
 	boolean_t is_metadata;
 
 	if (bp == NULL)
 		return (0);
 
 	if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
 		char blkbuf[BP_SPRINTF_LEN];
 		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 		(void) printf("objset %llu object %llu "
 		    "level %lld offset 0x%llx %s\n",
 		    (u_longlong_t)zb->zb_objset,
 		    (u_longlong_t)zb->zb_object,
 		    (longlong_t)zb->zb_level,
 		    (u_longlong_t)blkid2offset(dnp, bp, zb),
 		    blkbuf);
 	}
 
 	if (BP_IS_HOLE(bp))
 		return (0);
 
 	type = BP_GET_TYPE(bp);
 
 	zdb_count_block(zcb, zilog, bp,
 	    (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
 
 	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
 
 	if (!BP_IS_EMBEDDED(bp) &&
 	    (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
 		size_t size = BP_GET_PSIZE(bp);
 		abd_t *abd = abd_alloc(size, B_FALSE);
 		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
 
 		/* If it's an intent log block, failure is expected. */
 		if (zb->zb_level == ZB_ZIL_LEVEL)
 			flags |= ZIO_FLAG_SPECULATIVE;
 
 		mutex_enter(&spa->spa_scrub_lock);
 		while (spa->spa_scrub_inflight > max_inflight)
 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 		spa->spa_scrub_inflight++;
 		mutex_exit(&spa->spa_scrub_lock);
 
 		zio_nowait(zio_read(NULL, spa, bp, abd, size,
 		    zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
 	}
 
 	zcb->zcb_readfails = 0;
 
 	/* only call gethrtime() every 100 blocks */
 	static int iters;
 	if (++iters > 100)
 		iters = 0;
 	else
 		return (0);
 
 	if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
 		uint64_t now = gethrtime();
 		char buf[10];
 		uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
 		int kb_per_sec =
 		    1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
 		int sec_remaining =
 		    (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
 
 		/* make sure nicenum has enough space */
 		CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ);
 
 		zfs_nicenum(bytes, buf, sizeof (buf));
 		(void) fprintf(stderr,
 		    "\r%5s completed (%4dMB/s) "
 		    "estimated time remaining: %uhr %02umin %02usec        ",
 		    buf, kb_per_sec / 1024,
 		    sec_remaining / 60 / 60,
 		    sec_remaining / 60 % 60,
 		    sec_remaining % 60);
 
 		zcb->zcb_lastprint = now;
 	}
 
 	return (0);
 }
 
 static void
 zdb_leak(void *arg, uint64_t start, uint64_t size)
 {
 	vdev_t *vd = arg;
 
 	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
 	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
 }
 
 static metaslab_ops_t zdb_metaslab_ops = {
 	NULL	/* alloc */
 };
 
 static void
 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
 {
 	ddt_bookmark_t ddb;
 	ddt_entry_t dde;
 	int error;
 
 	bzero(&ddb, sizeof (ddb));
 	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
 		blkptr_t blk;
 		ddt_phys_t *ddp = dde.dde_phys;
 
 		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
 			return;
 
 		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
 
 		for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 			if (ddp->ddp_phys_birth == 0)
 				continue;
 			ddt_bp_create(ddb.ddb_checksum,
 			    &dde.dde_key, ddp, &blk);
 			if (p == DDT_PHYS_DITTO) {
 				zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
 			} else {
 				zcb->zcb_dedup_asize +=
 				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
 				zcb->zcb_dedup_blocks++;
 			}
 		}
 		if (!dump_opt['L']) {
 			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
 			ddt_enter(ddt);
 			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
 			ddt_exit(ddt);
 		}
 	}
 
 	ASSERT(error == ENOENT);
 }
 
 /* ARGSUSED */
 static void
 claim_segment_impl_cb(uint64_t inner_offset, vdev_t *vd, uint64_t offset,
     uint64_t size, void *arg)
 {
 	/*
 	 * This callback was called through a remap from
 	 * a device being removed. Therefore, the vdev that
 	 * this callback is applied to is a concrete
 	 * vdev.
 	 */
 	ASSERT(vdev_is_concrete(vd));
 
 	VERIFY0(metaslab_claim_impl(vd, offset, size,
 	    spa_first_txg(vd->vdev_spa)));
 }
 
 static void
 claim_segment_cb(void *arg, uint64_t offset, uint64_t size)
 {
 	vdev_t *vd = arg;
 
 	vdev_indirect_ops.vdev_op_remap(vd, offset, size,
 	    claim_segment_impl_cb, NULL);
 }
 
 /*
  * After accounting for all allocated blocks that are directly referenced,
  * we might have missed a reference to a block from a partially complete
  * (and thus unused) indirect mapping object. We perform a secondary pass
  * through the metaslabs we have already mapped and claim the destination
  * blocks.
  */
 static void
 zdb_claim_removing(spa_t *spa, zdb_cb_t *zcb)
 {
 	if (spa->spa_vdev_removal == NULL)
 		return;
 
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 	spa_vdev_removal_t *svr = spa->spa_vdev_removal;
 	vdev_t *vd = svr->svr_vdev;
 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 
 	for (uint64_t msi = 0; msi < vd->vdev_ms_count; msi++) {
 		metaslab_t *msp = vd->vdev_ms[msi];
 
 		if (msp->ms_start >= vdev_indirect_mapping_max_offset(vim))
 			break;
 
 		ASSERT0(range_tree_space(svr->svr_allocd_segs));
 
 		if (msp->ms_sm != NULL) {
 			VERIFY0(space_map_load(msp->ms_sm,
 			    svr->svr_allocd_segs, SM_ALLOC));
 
 			/*
 			 * Clear everything past what has been synced,
 			 * because we have not allocated mappings for it yet.
 			 */
 			range_tree_clear(svr->svr_allocd_segs,
 			    vdev_indirect_mapping_max_offset(vim),
 			    msp->ms_sm->sm_start + msp->ms_sm->sm_size -
 			    vdev_indirect_mapping_max_offset(vim));
 		}
 
 		zcb->zcb_removing_size +=
 		    range_tree_space(svr->svr_allocd_segs);
 		range_tree_vacate(svr->svr_allocd_segs, claim_segment_cb, vd);
 	}
 
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 }
 
 /*
  * vm_idxp is an in-out parameter which (for indirect vdevs) is the
  * index in vim_entries that has the first entry in this metaslab.  On
  * return, it will be set to the first entry after this metaslab.
  */
 static void
 zdb_leak_init_ms(metaslab_t *msp, uint64_t *vim_idxp)
 {
 	metaslab_group_t *mg = msp->ms_group;
 	vdev_t *vd = mg->mg_vd;
 	vdev_t *rvd = vd->vdev_spa->spa_root_vdev;
 
 	mutex_enter(&msp->ms_lock);
 	metaslab_unload(msp);
 
 	/*
 	 * We don't want to spend the CPU manipulating the size-ordered
 	 * tree, so clear the range_tree ops.
 	 */
 	msp->ms_tree->rt_ops = NULL;
 
 	(void) fprintf(stderr,
 	    "\rloading vdev %llu of %llu, metaslab %llu of %llu ...",
 	    (longlong_t)vd->vdev_id,
 	    (longlong_t)rvd->vdev_children,
 	    (longlong_t)msp->ms_id,
 	    (longlong_t)vd->vdev_ms_count);
 
 	/*
 	 * For leak detection, we overload the metaslab ms_tree to
 	 * contain allocated segments instead of free segments. As a
 	 * result, we can't use the normal metaslab_load/unload
 	 * interfaces.
 	 */
 	if (vd->vdev_ops == &vdev_indirect_ops) {
 		vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 		for (; *vim_idxp < vdev_indirect_mapping_num_entries(vim);
 		    (*vim_idxp)++) {
 			vdev_indirect_mapping_entry_phys_t *vimep =
 			    &vim->vim_entries[*vim_idxp];
 			uint64_t ent_offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
 			uint64_t ent_len = DVA_GET_ASIZE(&vimep->vimep_dst);
 			ASSERT3U(ent_offset, >=, msp->ms_start);
 			if (ent_offset >= msp->ms_start + msp->ms_size)
 				break;
 
 			/*
 			 * Mappings do not cross metaslab boundaries,
 			 * because we create them by walking the metaslabs.
 			 */
 			ASSERT3U(ent_offset + ent_len, <=,
 			    msp->ms_start + msp->ms_size);
 			range_tree_add(msp->ms_tree, ent_offset, ent_len);
 		}
 	} else if (msp->ms_sm != NULL) {
 		VERIFY0(space_map_load(msp->ms_sm, msp->ms_tree, SM_ALLOC));
 	}
 
 	if (!msp->ms_loaded) {
 		msp->ms_loaded = B_TRUE;
 	}
 	mutex_exit(&msp->ms_lock);
 }
 
 /* ARGSUSED */
 static int
 increment_indirect_mapping_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	zdb_cb_t *zcb = arg;
 	spa_t *spa = zcb->zcb_spa;
 	vdev_t *vd;
 	const dva_t *dva = &bp->blk_dva[0];
 
 	ASSERT(!dump_opt['L']);
 	ASSERT3U(BP_GET_NDVAS(bp), ==, 1);
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 	vd = vdev_lookup_top(zcb->zcb_spa, DVA_GET_VDEV(dva));
 	ASSERT3P(vd, !=, NULL);
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	ASSERT(vd->vdev_indirect_config.vic_mapping_object != 0);
 	ASSERT3P(zcb->zcb_vd_obsolete_counts[vd->vdev_id], !=, NULL);
 
 	vdev_indirect_mapping_increment_obsolete_count(
 	    vd->vdev_indirect_mapping,
 	    DVA_GET_OFFSET(dva), DVA_GET_ASIZE(dva),
 	    zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
 
 	return (0);
 }
 
 static uint32_t *
 zdb_load_obsolete_counts(vdev_t *vd)
 {
 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 	spa_t *spa = vd->vdev_spa;
 	spa_condensing_indirect_phys_t *scip =
 	    &spa->spa_condensing_indirect_phys;
 	uint32_t *counts;
 
 	EQUIV(vdev_obsolete_sm_object(vd) != 0, vd->vdev_obsolete_sm != NULL);
 	counts = vdev_indirect_mapping_load_obsolete_counts(vim);
 	if (vd->vdev_obsolete_sm != NULL) {
 		vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
 		    vd->vdev_obsolete_sm);
 	}
 	if (scip->scip_vdev == vd->vdev_id &&
 	    scip->scip_prev_obsolete_sm_object != 0) {
 		space_map_t *prev_obsolete_sm = NULL;
 		VERIFY0(space_map_open(&prev_obsolete_sm, spa->spa_meta_objset,
 		    scip->scip_prev_obsolete_sm_object, 0, vd->vdev_asize, 0));
 		space_map_update(prev_obsolete_sm);
 		vdev_indirect_mapping_load_obsolete_spacemap(vim, counts,
 		    prev_obsolete_sm);
 		space_map_close(prev_obsolete_sm);
 	}
 	return (counts);
 }
 
 static void
 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
 {
 	zcb->zcb_spa = spa;
 
 	if (!dump_opt['L']) {
 		dsl_pool_t *dp = spa->spa_dsl_pool;
 		vdev_t *rvd = spa->spa_root_vdev;
 
 		/*
 		 * We are going to be changing the meaning of the metaslab's
 		 * ms_tree.  Ensure that the allocator doesn't try to
 		 * use the tree.
 		 */
 		spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
 		spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
 
 		zcb->zcb_vd_obsolete_counts =
 		    umem_zalloc(rvd->vdev_children * sizeof (uint32_t *),
 		    UMEM_NOFAIL);
 
 
 		for (uint64_t c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *vd = rvd->vdev_child[c];
 			uint64_t vim_idx = 0;
 
 			ASSERT3U(c, ==, vd->vdev_id);
 
 			/*
 			 * Note: we don't check for mapping leaks on
 			 * removing vdevs because their ms_tree's are
 			 * used to look for leaks in allocated space.
 			 */
 			if (vd->vdev_ops == &vdev_indirect_ops) {
 				zcb->zcb_vd_obsolete_counts[c] =
 				    zdb_load_obsolete_counts(vd);
 
 				/*
 				 * Normally, indirect vdevs don't have any
 				 * metaslabs.  We want to set them up for
 				 * zio_claim().
 				 */
 				VERIFY0(vdev_metaslab_init(vd, 0));
 			}
 
 			for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 				zdb_leak_init_ms(vd->vdev_ms[m], &vim_idx);
 			}
 			if (vd->vdev_ops == &vdev_indirect_ops) {
 				ASSERT3U(vim_idx, ==,
 				    vdev_indirect_mapping_num_entries(
 				    vd->vdev_indirect_mapping));
 			}
 		}
 		(void) fprintf(stderr, "\n");
 
 		if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
 			ASSERT(spa_feature_is_enabled(spa,
 			    SPA_FEATURE_DEVICE_REMOVAL));
 			(void) bpobj_iterate_nofree(&dp->dp_obsolete_bpobj,
 			    increment_indirect_mapping_cb, zcb, NULL);
 		}
 	}
 
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 	zdb_ddt_leak_init(spa, zcb);
 
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 }
 
 static boolean_t
 zdb_check_for_obsolete_leaks(vdev_t *vd, zdb_cb_t *zcb)
 {
 	boolean_t leaks = B_FALSE;
 	vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 	uint64_t total_leaked = 0;
 
 	ASSERT(vim != NULL);
 
 	for (uint64_t i = 0; i < vdev_indirect_mapping_num_entries(vim); i++) {
 		vdev_indirect_mapping_entry_phys_t *vimep =
 		    &vim->vim_entries[i];
 		uint64_t obsolete_bytes = 0;
 		uint64_t offset = DVA_MAPPING_GET_SRC_OFFSET(vimep);
 		metaslab_t *msp = vd->vdev_ms[offset >> vd->vdev_ms_shift];
 
 		/*
 		 * This is not very efficient but it's easy to
 		 * verify correctness.
 		 */
 		for (uint64_t inner_offset = 0;
 		    inner_offset < DVA_GET_ASIZE(&vimep->vimep_dst);
 		    inner_offset += 1 << vd->vdev_ashift) {
 			if (range_tree_contains(msp->ms_tree,
 			    offset + inner_offset, 1 << vd->vdev_ashift)) {
 				obsolete_bytes += 1 << vd->vdev_ashift;
 			}
 		}
 
 		int64_t bytes_leaked = obsolete_bytes -
 		    zcb->zcb_vd_obsolete_counts[vd->vdev_id][i];
 		ASSERT3U(DVA_GET_ASIZE(&vimep->vimep_dst), >=,
 		    zcb->zcb_vd_obsolete_counts[vd->vdev_id][i]);
 		if (bytes_leaked != 0 &&
 		    (vdev_obsolete_counts_are_precise(vd) ||
 		    dump_opt['d'] >= 5)) {
 			(void) printf("obsolete indirect mapping count "
 			    "mismatch on %llu:%llx:%llx : %llx bytes leaked\n",
 			    (u_longlong_t)vd->vdev_id,
 			    (u_longlong_t)DVA_MAPPING_GET_SRC_OFFSET(vimep),
 			    (u_longlong_t)DVA_GET_ASIZE(&vimep->vimep_dst),
 			    (u_longlong_t)bytes_leaked);
 		}
 		total_leaked += ABS(bytes_leaked);
 	}
 
 	if (!vdev_obsolete_counts_are_precise(vd) && total_leaked > 0) {
 		int pct_leaked = total_leaked * 100 /
 		    vdev_indirect_mapping_bytes_mapped(vim);
 		(void) printf("cannot verify obsolete indirect mapping "
 		    "counts of vdev %llu because precise feature was not "
 		    "enabled when it was removed: %d%% (%llx bytes) of mapping"
 		    "unreferenced\n",
 		    (u_longlong_t)vd->vdev_id, pct_leaked,
 		    (u_longlong_t)total_leaked);
 	} else if (total_leaked > 0) {
 		(void) printf("obsolete indirect mapping count mismatch "
 		    "for vdev %llu -- %llx total bytes mismatched\n",
 		    (u_longlong_t)vd->vdev_id,
 		    (u_longlong_t)total_leaked);
 		leaks |= B_TRUE;
 	}
 
 	vdev_indirect_mapping_free_obsolete_counts(vim,
 	    zcb->zcb_vd_obsolete_counts[vd->vdev_id]);
 	zcb->zcb_vd_obsolete_counts[vd->vdev_id] = NULL;
 
 	return (leaks);
 }
 
 static boolean_t
 zdb_leak_fini(spa_t *spa, zdb_cb_t *zcb)
 {
 	boolean_t leaks = B_FALSE;
 	if (!dump_opt['L']) {
 		vdev_t *rvd = spa->spa_root_vdev;
 		for (unsigned c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *vd = rvd->vdev_child[c];
 			metaslab_group_t *mg = vd->vdev_mg;
 
 			if (zcb->zcb_vd_obsolete_counts[c] != NULL) {
 				leaks |= zdb_check_for_obsolete_leaks(vd, zcb);
 			}
 
 			for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
 				ASSERT3P(mg, ==, msp->ms_group);
 
 				/*
 				 * The ms_tree has been overloaded to
 				 * contain allocated segments. Now that we
 				 * finished traversing all blocks, any
 				 * block that remains in the ms_tree
 				 * represents an allocated block that we
 				 * did not claim during the traversal.
 				 * Claimed blocks would have been removed
 				 * from the ms_tree.  For indirect vdevs,
 				 * space remaining in the tree represents
 				 * parts of the mapping that are not
 				 * referenced, which is not a bug.
 				 */
 				if (vd->vdev_ops == &vdev_indirect_ops) {
 					range_tree_vacate(msp->ms_tree,
 					    NULL, NULL);
 				} else {
 					range_tree_vacate(msp->ms_tree,
 					    zdb_leak, vd);
 				}
 
 				if (msp->ms_loaded) {
 					msp->ms_loaded = B_FALSE;
 				}
 			}
 		}
 
 		umem_free(zcb->zcb_vd_obsolete_counts,
 		    rvd->vdev_children * sizeof (uint32_t *));
 		zcb->zcb_vd_obsolete_counts = NULL;
 	}
 	return (leaks);
 }
 
 /* ARGSUSED */
 static int
 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	zdb_cb_t *zcb = arg;
 
 	if (dump_opt['b'] >= 5) {
 		char blkbuf[BP_SPRINTF_LEN];
 		snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 		(void) printf("[%s] %s\n",
 		    "deferred free", blkbuf);
 	}
 	zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
 	return (0);
 }
 
 static int
 dump_block_stats(spa_t *spa)
 {
 	zdb_cb_t zcb;
 	zdb_blkstats_t *zb, *tzb;
 	uint64_t norm_alloc, norm_space, total_alloc, total_found;
 	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
 	boolean_t leaks = B_FALSE;
 
 	bzero(&zcb, sizeof (zcb));
 	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
 	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
 	    (dump_opt['c'] == 1) ? "metadata " : "",
 	    dump_opt['c'] ? "checksums " : "",
 	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
 	    !dump_opt['L'] ? "nothing leaked " : "");
 
 	/*
 	 * Load all space maps as SM_ALLOC maps, then traverse the pool
 	 * claiming each block we discover.  If the pool is perfectly
 	 * consistent, the space maps will be empty when we're done.
 	 * Anything left over is a leak; any block we can't claim (because
 	 * it's not part of any space map) is a double allocation,
 	 * reference to a freed block, or an unclaimed log block.
 	 */
 	zdb_leak_init(spa, &zcb);
 
 	/*
 	 * If there's a deferred-free bplist, process that first.
 	 */
 	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
 	    count_block_cb, &zcb, NULL);
 
 	if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 		(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
 		    count_block_cb, &zcb, NULL);
 	}
 
 	zdb_claim_removing(spa, &zcb);
 
 	if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
 		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
 		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
 		    &zcb, NULL));
 	}
 
 	if (dump_opt['c'] > 1)
 		flags |= TRAVERSE_PREFETCH_DATA;
 
 	zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
 	zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
 	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
 
 	/*
 	 * If we've traversed the data blocks then we need to wait for those
 	 * I/Os to complete. We leverage "The Godfather" zio to wait on
 	 * all async I/Os to complete.
 	 */
 	if (dump_opt['c']) {
 		for (int i = 0; i < max_ncpus; i++) {
 			(void) zio_wait(spa->spa_async_zio_root[i]);
 			spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
 			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
 			    ZIO_FLAG_GODFATHER);
 		}
 	}
 
 	if (zcb.zcb_haderrors) {
 		(void) printf("\nError counts:\n\n");
 		(void) printf("\t%5s  %s\n", "errno", "count");
 		for (int e = 0; e < 256; e++) {
 			if (zcb.zcb_errors[e] != 0) {
 				(void) printf("\t%5d  %llu\n",
 				    e, (u_longlong_t)zcb.zcb_errors[e]);
 			}
 		}
 	}
 
 	/*
 	 * Report any leaked segments.
 	 */
 	leaks |= zdb_leak_fini(spa, &zcb);
 
 	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
 
 	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 	norm_space = metaslab_class_get_space(spa_normal_class(spa));
 
 	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
 	total_found = tzb->zb_asize - zcb.zcb_dedup_asize +
 	    zcb.zcb_removing_size;
 
 	if (total_found == total_alloc) {
 		if (!dump_opt['L'])
 			(void) printf("\n\tNo leaks (block sum matches space"
 			    " maps exactly)\n");
 	} else {
 		(void) printf("block traversal size %llu != alloc %llu "
 		    "(%s %lld)\n",
 		    (u_longlong_t)total_found,
 		    (u_longlong_t)total_alloc,
 		    (dump_opt['L']) ? "unreachable" : "leaked",
 		    (longlong_t)(total_alloc - total_found));
 		leaks = B_TRUE;
 	}
 
 	if (tzb->zb_count == 0)
 		return (2);
 
 	(void) printf("\n");
 	(void) printf("\tbp count:      %10llu\n",
 	    (u_longlong_t)tzb->zb_count);
 	(void) printf("\tganged count:  %10llu\n",
 	    (longlong_t)tzb->zb_gangs);
 	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
 	    (u_longlong_t)tzb->zb_lsize,
 	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
 	(void) printf("\tbp physical:   %10llu      avg:"
 	    " %6llu     compression: %6.2f\n",
 	    (u_longlong_t)tzb->zb_psize,
 	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
 	    (double)tzb->zb_lsize / tzb->zb_psize);
 	(void) printf("\tbp allocated:  %10llu      avg:"
 	    " %6llu     compression: %6.2f\n",
 	    (u_longlong_t)tzb->zb_asize,
 	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
 	    (double)tzb->zb_lsize / tzb->zb_asize);
 	(void) printf("\tbp deduped:    %10llu    ref>1:"
 	    " %6llu   deduplication: %6.2f\n",
 	    (u_longlong_t)zcb.zcb_dedup_asize,
 	    (u_longlong_t)zcb.zcb_dedup_blocks,
 	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
 	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
 	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
 
 	for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
 		if (zcb.zcb_embedded_blocks[i] == 0)
 			continue;
 		(void) printf("\n");
 		(void) printf("\tadditional, non-pointer bps of type %u: "
 		    "%10llu\n",
 		    i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
 
 		if (dump_opt['b'] >= 3) {
 			(void) printf("\t number of (compressed) bytes:  "
 			    "number of bps\n");
 			dump_histogram(zcb.zcb_embedded_histogram[i],
 			    sizeof (zcb.zcb_embedded_histogram[i]) /
 			    sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
 		}
 	}
 
 	if (tzb->zb_ditto_samevdev != 0) {
 		(void) printf("\tDittoed blocks on same vdev: %llu\n",
 		    (longlong_t)tzb->zb_ditto_samevdev);
 	}
 
 	for (uint64_t v = 0; v < spa->spa_root_vdev->vdev_children; v++) {
 		vdev_t *vd = spa->spa_root_vdev->vdev_child[v];
 		vdev_indirect_mapping_t *vim = vd->vdev_indirect_mapping;
 
 		if (vim == NULL) {
 			continue;
 		}
 
 		char mem[32];
 		zdb_nicenum(vdev_indirect_mapping_num_entries(vim),
 		    mem, vdev_indirect_mapping_size(vim));
 
 		(void) printf("\tindirect vdev id %llu has %llu segments "
 		    "(%s in memory)\n",
 		    (longlong_t)vd->vdev_id,
 		    (longlong_t)vdev_indirect_mapping_num_entries(vim), mem);
 	}
 
 	if (dump_opt['b'] >= 2) {
 		int l, t, level;
 		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
 		    "\t  avg\t comp\t%%Total\tType\n");
 
 		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
 			char csize[32], lsize[32], psize[32], asize[32];
 			char avg[32], gang[32];
 			const char *typename;
 
 			/* make sure nicenum has enough space */
 			CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ);
 			CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
 			CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ);
 			CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
 			CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ);
 			CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ);
 
 			if (t < DMU_OT_NUMTYPES)
 				typename = dmu_ot[t].ot_name;
 			else
 				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
 
 			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
 				(void) printf("%6s\t%5s\t%5s\t%5s"
 				    "\t%5s\t%5s\t%6s\t%s\n",
 				    "-",
 				    "-",
 				    "-",
 				    "-",
 				    "-",
 				    "-",
 				    "-",
 				    typename);
 				continue;
 			}
 
 			for (l = ZB_TOTAL - 1; l >= -1; l--) {
 				level = (l == -1 ? ZB_TOTAL : l);
 				zb = &zcb.zcb_type[level][t];
 
 				if (zb->zb_asize == 0)
 					continue;
 
 				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
 					continue;
 
 				if (level == 0 && zb->zb_asize ==
 				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
 					continue;
 
 				zdb_nicenum(zb->zb_count, csize,
 				    sizeof (csize));
 				zdb_nicenum(zb->zb_lsize, lsize,
 				    sizeof (lsize));
 				zdb_nicenum(zb->zb_psize, psize,
 				    sizeof (psize));
 				zdb_nicenum(zb->zb_asize, asize,
 				    sizeof (asize));
 				zdb_nicenum(zb->zb_asize / zb->zb_count, avg,
 				    sizeof (avg));
 				zdb_nicenum(zb->zb_gangs, gang, sizeof (gang));
 
 				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
 				    "\t%5.2f\t%6.2f\t",
 				    csize, lsize, psize, asize, avg,
 				    (double)zb->zb_lsize / zb->zb_psize,
 				    100.0 * zb->zb_asize / tzb->zb_asize);
 
 				if (level == ZB_TOTAL)
 					(void) printf("%s\n", typename);
 				else
 					(void) printf("    L%d %s\n",
 					    level, typename);
 
 				if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
 					(void) printf("\t number of ganged "
 					    "blocks: %s\n", gang);
 				}
 
 				if (dump_opt['b'] >= 4) {
 					(void) printf("psize "
 					    "(in 512-byte sectors): "
 					    "number of blocks\n");
 					dump_histogram(zb->zb_psize_histogram,
 					    PSIZE_HISTO_SIZE, 0);
 				}
 			}
 		}
 	}
 
 	(void) printf("\n");
 
 	if (leaks)
 		return (2);
 
 	if (zcb.zcb_haderrors)
 		return (3);
 
 	return (0);
 }
 
 typedef struct zdb_ddt_entry {
 	ddt_key_t	zdde_key;
 	uint64_t	zdde_ref_blocks;
 	uint64_t	zdde_ref_lsize;
 	uint64_t	zdde_ref_psize;
 	uint64_t	zdde_ref_dsize;
 	avl_node_t	zdde_node;
 } zdb_ddt_entry_t;
 
 /* ARGSUSED */
 static int
 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	avl_tree_t *t = arg;
 	avl_index_t where;
 	zdb_ddt_entry_t *zdde, zdde_search;
 
 	if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
 		return (0);
 
 	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
 		(void) printf("traversing objset %llu, %llu objects, "
 		    "%lu blocks so far\n",
 		    (u_longlong_t)zb->zb_objset,
 		    (u_longlong_t)BP_GET_FILL(bp),
 		    avl_numnodes(t));
 	}
 
 	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
 	    BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
 		return (0);
 
 	ddt_key_fill(&zdde_search.zdde_key, bp);
 
 	zdde = avl_find(t, &zdde_search, &where);
 
 	if (zdde == NULL) {
 		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
 		zdde->zdde_key = zdde_search.zdde_key;
 		avl_insert(t, zdde, where);
 	}
 
 	zdde->zdde_ref_blocks += 1;
 	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
 	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
 	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
 
 	return (0);
 }
 
 static void
 dump_simulated_ddt(spa_t *spa)
 {
 	avl_tree_t t;
 	void *cookie = NULL;
 	zdb_ddt_entry_t *zdde;
 	ddt_histogram_t ddh_total;
 	ddt_stat_t dds_total;
 
 	bzero(&ddh_total, sizeof (ddh_total));
 	bzero(&dds_total, sizeof (dds_total));
 	avl_create(&t, ddt_entry_compare,
 	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
 
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
 	    zdb_ddt_add_cb, &t);
 
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 
 	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
 		ddt_stat_t dds;
 		uint64_t refcnt = zdde->zdde_ref_blocks;
 		ASSERT(refcnt != 0);
 
 		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
 		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
 		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
 		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
 
 		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
 		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
 		dds.dds_ref_psize = zdde->zdde_ref_psize;
 		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
 
 		ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
 		    &dds, 0);
 
 		umem_free(zdde, sizeof (*zdde));
 	}
 
 	avl_destroy(&t);
 
 	ddt_histogram_stat(&dds_total, &ddh_total);
 
 	(void) printf("Simulated DDT histogram:\n");
 
 	zpool_dump_ddt(&dds_total, &ddh_total);
 
 	dump_dedup_ratio(&dds_total);
 }
 
 static int
 verify_device_removal_feature_counts(spa_t *spa)
 {
 	uint64_t dr_feature_refcount = 0;
 	uint64_t oc_feature_refcount = 0;
 	uint64_t indirect_vdev_count = 0;
 	uint64_t precise_vdev_count = 0;
 	uint64_t obsolete_counts_object_count = 0;
 	uint64_t obsolete_sm_count = 0;
 	uint64_t obsolete_counts_count = 0;
 	uint64_t scip_count = 0;
 	uint64_t obsolete_bpobj_count = 0;
 	int ret = 0;
 
 	spa_condensing_indirect_phys_t *scip =
 	    &spa->spa_condensing_indirect_phys;
 	if (scip->scip_next_mapping_object != 0) {
 		vdev_t *vd = spa->spa_root_vdev->vdev_child[scip->scip_vdev];
 		ASSERT(scip->scip_prev_obsolete_sm_object != 0);
 		ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
 
 		(void) printf("Condensing indirect vdev %llu: new mapping "
 		    "object %llu, prev obsolete sm %llu\n",
 		    (u_longlong_t)scip->scip_vdev,
 		    (u_longlong_t)scip->scip_next_mapping_object,
 		    (u_longlong_t)scip->scip_prev_obsolete_sm_object);
 		if (scip->scip_prev_obsolete_sm_object != 0) {
 			space_map_t *prev_obsolete_sm = NULL;
 			VERIFY0(space_map_open(&prev_obsolete_sm,
 			    spa->spa_meta_objset,
 			    scip->scip_prev_obsolete_sm_object,
 			    0, vd->vdev_asize, 0));
 			space_map_update(prev_obsolete_sm);
 			dump_spacemap(spa->spa_meta_objset, prev_obsolete_sm);
 			(void) printf("\n");
 			space_map_close(prev_obsolete_sm);
 		}
 
 		scip_count += 2;
 	}
 
 	for (uint64_t i = 0; i < spa->spa_root_vdev->vdev_children; i++) {
 		vdev_t *vd = spa->spa_root_vdev->vdev_child[i];
 		vdev_indirect_config_t *vic = &vd->vdev_indirect_config;
 
 		if (vic->vic_mapping_object != 0) {
 			ASSERT(vd->vdev_ops == &vdev_indirect_ops ||
 			    vd->vdev_removing);
 			indirect_vdev_count++;
 
 			if (vd->vdev_indirect_mapping->vim_havecounts) {
 				obsolete_counts_count++;
 			}
 		}
 		if (vdev_obsolete_counts_are_precise(vd)) {
 			ASSERT(vic->vic_mapping_object != 0);
 			precise_vdev_count++;
 		}
 		if (vdev_obsolete_sm_object(vd) != 0) {
 			ASSERT(vic->vic_mapping_object != 0);
 			obsolete_sm_count++;
 		}
 	}
 
 	(void) feature_get_refcount(spa,
 	    &spa_feature_table[SPA_FEATURE_DEVICE_REMOVAL],
 	    &dr_feature_refcount);
 	(void) feature_get_refcount(spa,
 	    &spa_feature_table[SPA_FEATURE_OBSOLETE_COUNTS],
 	    &oc_feature_refcount);
 
 	if (dr_feature_refcount != indirect_vdev_count) {
 		ret = 1;
 		(void) printf("Number of indirect vdevs (%llu) " \
 		    "does not match feature count (%llu)\n",
 		    (u_longlong_t)indirect_vdev_count,
 		    (u_longlong_t)dr_feature_refcount);
 	} else {
 		(void) printf("Verified device_removal feature refcount " \
 		    "of %llu is correct\n",
 		    (u_longlong_t)dr_feature_refcount);
 	}
 
 	if (zap_contains(spa_meta_objset(spa), DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_OBSOLETE_BPOBJ) == 0) {
 		obsolete_bpobj_count++;
 	}
 
 
 	obsolete_counts_object_count = precise_vdev_count;
 	obsolete_counts_object_count += obsolete_sm_count;
 	obsolete_counts_object_count += obsolete_counts_count;
 	obsolete_counts_object_count += scip_count;
 	obsolete_counts_object_count += obsolete_bpobj_count;
 	obsolete_counts_object_count += remap_deadlist_count;
 
 	if (oc_feature_refcount != obsolete_counts_object_count) {
 		ret = 1;
 		(void) printf("Number of obsolete counts objects (%llu) " \
 		    "does not match feature count (%llu)\n",
 		    (u_longlong_t)obsolete_counts_object_count,
 		    (u_longlong_t)oc_feature_refcount);
 		(void) printf("pv:%llu os:%llu oc:%llu sc:%llu "
 		    "ob:%llu rd:%llu\n",
 		    (u_longlong_t)precise_vdev_count,
 		    (u_longlong_t)obsolete_sm_count,
 		    (u_longlong_t)obsolete_counts_count,
 		    (u_longlong_t)scip_count,
 		    (u_longlong_t)obsolete_bpobj_count,
 		    (u_longlong_t)remap_deadlist_count);
 	} else {
 		(void) printf("Verified indirect_refcount feature refcount " \
 		    "of %llu is correct\n",
 		    (u_longlong_t)oc_feature_refcount);
 	}
 	return (ret);
 }
 
 static void
 dump_zpool(spa_t *spa)
 {
 	dsl_pool_t *dp = spa_get_dsl(spa);
 	int rc = 0;
 
 	if (dump_opt['S']) {
 		dump_simulated_ddt(spa);
 		return;
 	}
 
 	if (!dump_opt['e'] && dump_opt['C'] > 1) {
 		(void) printf("\nCached configuration:\n");
 		dump_nvlist(spa->spa_config, 8);
 	}
 
 	if (dump_opt['C'])
 		dump_config(spa);
 
 	if (dump_opt['u'])
 		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
 
 	if (dump_opt['D'])
 		dump_all_ddts(spa);
 
 	if (dump_opt['d'] > 2 || dump_opt['m'])
 		dump_metaslabs(spa);
 	if (dump_opt['M'])
 		dump_metaslab_groups(spa);
 
 	if (dump_opt['d'] || dump_opt['i']) {
 		dump_dir(dp->dp_meta_objset);
 		if (dump_opt['d'] >= 3) {
 			dsl_pool_t *dp = spa->spa_dsl_pool;
 			dump_full_bpobj(&spa->spa_deferred_bpobj,
 			    "Deferred frees", 0);
 			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 				dump_full_bpobj(&dp->dp_free_bpobj,
 				    "Pool snapshot frees", 0);
 			}
 			if (bpobj_is_open(&dp->dp_obsolete_bpobj)) {
 				ASSERT(spa_feature_is_enabled(spa,
 				    SPA_FEATURE_DEVICE_REMOVAL));
 				dump_full_bpobj(&dp->dp_obsolete_bpobj,
 				    "Pool obsolete blocks", 0);
 			}
 
 			if (spa_feature_is_active(spa,
 			    SPA_FEATURE_ASYNC_DESTROY)) {
 				dump_bptree(spa->spa_meta_objset,
 				    dp->dp_bptree_obj,
 				    "Pool dataset frees");
 			}
 			dump_dtl(spa->spa_root_vdev, 0);
 		}
 		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
 		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
 
 		for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
 			uint64_t refcount;
 
 			if (!(spa_feature_table[f].fi_flags &
 			    ZFEATURE_FLAG_PER_DATASET)) {
 				ASSERT0(dataset_feature_count[f]);
 				continue;
 			}
 			(void) feature_get_refcount(spa,
 			    &spa_feature_table[f], &refcount);
 			if (dataset_feature_count[f] != refcount) {
 				(void) printf("%s feature refcount mismatch: "
 				    "%lld datasets != %lld refcount\n",
 				    spa_feature_table[f].fi_uname,
 				    (longlong_t)dataset_feature_count[f],
 				    (longlong_t)refcount);
 				rc = 2;
 			} else {
 				(void) printf("Verified %s feature refcount "
 				    "of %llu is correct\n",
 				    spa_feature_table[f].fi_uname,
 				    (longlong_t)refcount);
 			}
 		}
 
 		if (rc == 0) {
 			rc = verify_device_removal_feature_counts(spa);
 		}
 	}
 	if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
 		rc = dump_block_stats(spa);
 
 	if (rc == 0)
 		rc = verify_spacemap_refcounts(spa);
 
 	if (dump_opt['s'])
 		show_pool_stats(spa);
 
 	if (dump_opt['h'])
 		dump_history(spa);
 
 	if (rc != 0) {
 		dump_debug_buffer();
 		exit(rc);
 	}
 }
 
 #define	ZDB_FLAG_CHECKSUM	0x0001
 #define	ZDB_FLAG_DECOMPRESS	0x0002
 #define	ZDB_FLAG_BSWAP		0x0004
 #define	ZDB_FLAG_GBH		0x0008
 #define	ZDB_FLAG_INDIRECT	0x0010
 #define	ZDB_FLAG_PHYS		0x0020
 #define	ZDB_FLAG_RAW		0x0040
 #define	ZDB_FLAG_PRINT_BLKPTR	0x0080
 
 static int flagbits[256];
 
 static void
 zdb_print_blkptr(blkptr_t *bp, int flags)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
 	if (flags & ZDB_FLAG_BSWAP)
 		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
 
 	snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
 	(void) printf("%s\n", blkbuf);
 }
 
 static void
 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
 {
 	int i;
 
 	for (i = 0; i < nbps; i++)
 		zdb_print_blkptr(&bp[i], flags);
 }
 
 static void
 zdb_dump_gbh(void *buf, int flags)
 {
 	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
 }
 
 static void
 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
 {
 	if (flags & ZDB_FLAG_BSWAP)
 		byteswap_uint64_array(buf, size);
 	(void) write(1, buf, size);
 }
 
 static void
 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
 {
 	uint64_t *d = (uint64_t *)buf;
 	unsigned nwords = size / sizeof (uint64_t);
 	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
 	unsigned i, j;
 	const char *hdr;
 	char *c;
 
 
 	if (do_bswap)
 		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
 	else
 		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
 
 	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
 
 	for (i = 0; i < nwords; i += 2) {
 		(void) printf("%06llx:  %016llx  %016llx  ",
 		    (u_longlong_t)(i * sizeof (uint64_t)),
 		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
 		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
 
 		c = (char *)&d[i];
 		for (j = 0; j < 2 * sizeof (uint64_t); j++)
 			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
 		(void) printf("\n");
 	}
 }
 
 /*
  * There are two acceptable formats:
  *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
  *	child[.child]*    - For example: 0.1.1
  *
  * The second form can be used to specify arbitrary vdevs anywhere
  * in the heirarchy.  For example, in a pool with a mirror of
  * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
  */
 static vdev_t *
 zdb_vdev_lookup(vdev_t *vdev, const char *path)
 {
 	char *s, *p, *q;
 	unsigned i;
 
 	if (vdev == NULL)
 		return (NULL);
 
 	/* First, assume the x.x.x.x format */
 	i = strtoul(path, &s, 10);
 	if (s == path || (s && *s != '.' && *s != '\0'))
 		goto name;
 	if (i >= vdev->vdev_children)
 		return (NULL);
 
 	vdev = vdev->vdev_child[i];
 	if (*s == '\0')
 		return (vdev);
 	return (zdb_vdev_lookup(vdev, s+1));
 
 name:
 	for (i = 0; i < vdev->vdev_children; i++) {
 		vdev_t *vc = vdev->vdev_child[i];
 
 		if (vc->vdev_path == NULL) {
 			vc = zdb_vdev_lookup(vc, path);
 			if (vc == NULL)
 				continue;
 			else
 				return (vc);
 		}
 
 		p = strrchr(vc->vdev_path, '/');
 		p = p ? p + 1 : vc->vdev_path;
 		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
 
 		if (strcmp(vc->vdev_path, path) == 0)
 			return (vc);
 		if (strcmp(p, path) == 0)
 			return (vc);
 		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
 			return (vc);
 	}
 
 	return (NULL);
 }
 
 /* ARGSUSED */
 static int
 random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
 {
 	return (random_get_pseudo_bytes(buf, len));
 }
 
 /*
  * Read a block from a pool and print it out.  The syntax of the
  * block descriptor is:
  *
  *	pool:vdev_specifier:offset:size[:flags]
  *
  *	pool           - The name of the pool you wish to read from
  *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
  *	offset         - offset, in hex, in bytes
  *	size           - Amount of data to read, in hex, in bytes
  *	flags          - A string of characters specifying options
  *		 b: Decode a blkptr at given offset within block
  *		*c: Calculate and display checksums
  *		 d: Decompress data before dumping
  *		 e: Byteswap data before dumping
  *		 g: Display data as a gang block header
  *		 i: Display as an indirect block
  *		 p: Do I/O to physical offset
  *		 r: Dump raw data to stdout
  *
  *              * = not yet implemented
  */
 static void
 zdb_read_block(char *thing, spa_t *spa)
 {
 	blkptr_t blk, *bp = &blk;
 	dva_t *dva = bp->blk_dva;
 	int flags = 0;
 	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
 	zio_t *zio;
 	vdev_t *vd;
 	abd_t *pabd;
 	void *lbuf, *buf;
 	const char *s, *vdev;
 	char *p, *dup, *flagstr;
 	int i, error;
 
 	dup = strdup(thing);
 	s = strtok(dup, ":");
 	vdev = s ? s : "";
 	s = strtok(NULL, ":");
 	offset = strtoull(s ? s : "", NULL, 16);
 	s = strtok(NULL, ":");
 	size = strtoull(s ? s : "", NULL, 16);
 	s = strtok(NULL, ":");
 	if (s)
 		flagstr = strdup(s);
 	else
 		flagstr = strdup("");
 
 	s = NULL;
 	if (size == 0)
 		s = "size must not be zero";
 	if (!IS_P2ALIGNED(size, DEV_BSIZE))
 		s = "size must be a multiple of sector size";
 	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
 		s = "offset must be a multiple of sector size";
 	if (s) {
 		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
 		free(dup);
 		return;
 	}
 
 	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
 		for (i = 0; flagstr[i]; i++) {
 			int bit = flagbits[(uchar_t)flagstr[i]];
 
 			if (bit == 0) {
 				(void) printf("***Invalid flag: %c\n",
 				    flagstr[i]);
 				continue;
 			}
 			flags |= bit;
 
 			/* If it's not something with an argument, keep going */
 			if ((bit & (ZDB_FLAG_CHECKSUM |
 			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
 				continue;
 
 			p = &flagstr[i + 1];
 			if (bit == ZDB_FLAG_PRINT_BLKPTR)
 				blkptr_offset = strtoull(p, &p, 16);
 			if (*p != ':' && *p != '\0') {
 				(void) printf("***Invalid flag arg: '%s'\n", s);
 				free(dup);
 				return;
 			}
 		}
 	}
 	free(flagstr);
 
 	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
 	if (vd == NULL) {
 		(void) printf("***Invalid vdev: %s\n", vdev);
 		free(dup);
 		return;
 	} else {
 		if (vd->vdev_path)
 			(void) fprintf(stderr, "Found vdev: %s\n",
 			    vd->vdev_path);
 		else
 			(void) fprintf(stderr, "Found vdev type: %s\n",
 			    vd->vdev_ops->vdev_op_type);
 	}
 
 	psize = size;
 	lsize = size;
 
 	pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
 	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
 
 	BP_ZERO(bp);
 
 	DVA_SET_VDEV(&dva[0], vd->vdev_id);
 	DVA_SET_OFFSET(&dva[0], offset);
 	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
 	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
 
 	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
 
 	BP_SET_LSIZE(bp, lsize);
 	BP_SET_PSIZE(bp, psize);
 	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
 	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
 	BP_SET_TYPE(bp, DMU_OT_NONE);
 	BP_SET_LEVEL(bp, 0);
 	BP_SET_DEDUP(bp, 0);
 	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 	zio = zio_root(spa, NULL, NULL, 0);
 
 	if (vd == vd->vdev_top) {
 		/*
 		 * Treat this as a normal block read.
 		 */
 		zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
 		    ZIO_PRIORITY_SYNC_READ,
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
 	} else {
 		/*
 		 * Treat this as a vdev child I/O.
 		 */
 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
 		    psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
 		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
 		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW | ZIO_FLAG_OPTIONAL,
 		    NULL, NULL));
 	}
 
 	error = zio_wait(zio);
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	if (error) {
 		(void) printf("Read of %s failed, error: %d\n", thing, error);
 		goto out;
 	}
 
 	if (flags & ZDB_FLAG_DECOMPRESS) {
 		/*
 		 * We don't know how the data was compressed, so just try
 		 * every decompress function at every inflated blocksize.
 		 */
 		enum zio_compress c;
 		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
 		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
 
 		abd_copy_to_buf(pbuf2, pabd, psize);
 
 		VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
 		    random_get_pseudo_bytes_cb, NULL));
 
 		VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
 		    SPA_MAXBLOCKSIZE - psize));
 
 		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
 		    lsize -= SPA_MINBLOCKSIZE) {
 			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
 				if (zio_decompress_data(c, pabd,
 				    lbuf, psize, lsize) == 0 &&
 				    zio_decompress_data_buf(c, pbuf2,
 				    lbuf2, psize, lsize) == 0 &&
 				    bcmp(lbuf, lbuf2, lsize) == 0)
 					break;
 			}
 			if (c != ZIO_COMPRESS_FUNCTIONS)
 				break;
 			lsize -= SPA_MINBLOCKSIZE;
 		}
 
 		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
 		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
 
 		if (lsize <= psize) {
 			(void) printf("Decompress of %s failed\n", thing);
 			goto out;
 		}
 		buf = lbuf;
 		size = lsize;
 	} else {
 		buf = abd_to_buf(pabd);
 		size = psize;
 	}
 
 	if (flags & ZDB_FLAG_PRINT_BLKPTR)
 		zdb_print_blkptr((blkptr_t *)(void *)
 		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
 	else if (flags & ZDB_FLAG_RAW)
 		zdb_dump_block_raw(buf, size, flags);
 	else if (flags & ZDB_FLAG_INDIRECT)
 		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
 		    flags);
 	else if (flags & ZDB_FLAG_GBH)
 		zdb_dump_gbh(buf, flags);
 	else
 		zdb_dump_block(thing, buf, size, flags);
 
 out:
 	abd_free(pabd);
 	umem_free(lbuf, SPA_MAXBLOCKSIZE);
 	free(dup);
 }
 
 static void
 zdb_embedded_block(char *thing)
 {
 	blkptr_t bp;
 	unsigned long long *words = (void *)&bp;
 	char buf[SPA_MAXBLOCKSIZE];
 	int err;
 
 	bzero(&bp, sizeof (bp));
 	err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:"
 	    "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx",
 	    words + 0, words + 1, words + 2, words + 3,
 	    words + 4, words + 5, words + 6, words + 7,
 	    words + 8, words + 9, words + 10, words + 11,
 	    words + 12, words + 13, words + 14, words + 15);
 	if (err != 16) {
 		(void) printf("invalid input format\n");
 		exit(1);
 	}
 	ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE);
 	err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp));
 	if (err != 0) {
 		(void) printf("decode failed: %u\n", err);
 		exit(1);
 	}
 	zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0);
 }
 
 static boolean_t
 pool_match(nvlist_t *cfg, char *tgt)
 {
 	uint64_t v, guid = strtoull(tgt, NULL, 0);
 	char *s;
 
 	if (guid != 0) {
 		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
 			return (v == guid);
 	} else {
 		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
 			return (strcmp(s, tgt) == 0);
 	}
 	return (B_FALSE);
 }
 
 static char *
 find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
 {
 	nvlist_t *pools;
 	nvlist_t *match = NULL;
 	char *name = NULL;
 	char *sepp = NULL;
 	char sep = '\0';
 	int count = 0;
 	importargs_t args;
 
 	bzero(&args, sizeof (args));
 	args.paths = dirc;
 	args.path = dirv;
 	args.can_be_active = B_TRUE;
 
 	if ((sepp = strpbrk(*target, "/@")) != NULL) {
 		sep = *sepp;
 		*sepp = '\0';
 	}
 
 	pools = zpool_search_import(g_zfs, &args);
 
 	if (pools != NULL) {
 		nvpair_t *elem = NULL;
 		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
 			verify(nvpair_value_nvlist(elem, configp) == 0);
 			if (pool_match(*configp, *target)) {
 				count++;
 				if (match != NULL) {
 					/* print previously found config */
 					if (name != NULL) {
 						(void) printf("%s\n", name);
 						dump_nvlist(match, 8);
 						name = NULL;
 					}
 					(void) printf("%s\n",
 					    nvpair_name(elem));
 					dump_nvlist(*configp, 8);
 				} else {
 					match = *configp;
 					name = nvpair_name(elem);
 				}
 			}
 		}
 	}
 	if (count > 1)
 		(void) fatal("\tMatched %d pools - use pool GUID "
 		    "instead of pool name or \n"
 		    "\tpool name part of a dataset name to select pool", count);
 
 	if (sepp)
 		*sepp = sep;
 	/*
 	 * If pool GUID was specified for pool id, replace it with pool name
 	 */
 	if (name && (strstr(*target, name) != *target)) {
 		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
 
 		*target = umem_alloc(sz, UMEM_NOFAIL);
 		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
 	}
 
 	*configp = name ? match : NULL;
 
 	return (name);
 }
 
 int
 main(int argc, char **argv)
 {
 	int c;
 	struct rlimit rl = { 1024, 1024 };
 	spa_t *spa = NULL;
 	objset_t *os = NULL;
 	int dump_all = 1;
 	int verbose = 0;
 	int error = 0;
 	char **searchdirs = NULL;
 	int nsearch = 0;
 	char *target;
 	nvlist_t *policy = NULL;
 	uint64_t max_txg = UINT64_MAX;
 	int flags = ZFS_IMPORT_MISSING_LOG;
 	int rewind = ZPOOL_NEVER_REWIND;
 	char *spa_config_path_env;
 	boolean_t target_is_spa = B_TRUE;
 
 	(void) setrlimit(RLIMIT_NOFILE, &rl);
 	(void) enable_extended_FILE_stdio(-1, -1);
 
 	dprintf_setup(&argc, argv);
 
 	/*
 	 * If there is an environment variable SPA_CONFIG_PATH it overrides
 	 * default spa_config_path setting. If -U flag is specified it will
 	 * override this environment variable settings once again.
 	 */
 	spa_config_path_env = getenv("SPA_CONFIG_PATH");
 	if (spa_config_path_env != NULL)
 		spa_config_path = spa_config_path_env;
 
 	while ((c = getopt(argc, argv,
 	    "AbcCdDeEFGhiI:lLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
 		switch (c) {
 		case 'b':
 		case 'c':
 		case 'C':
 		case 'd':
 		case 'D':
 		case 'E':
 		case 'G':
 		case 'h':
 		case 'i':
 		case 'l':
 		case 'm':
 		case 'M':
 		case 'O':
 		case 'R':
 		case 's':
 		case 'S':
 		case 'u':
 			dump_opt[c]++;
 			dump_all = 0;
 			break;
 		case 'A':
 		case 'e':
 		case 'F':
 		case 'L':
 		case 'P':
 		case 'q':
 		case 'X':
 			dump_opt[c]++;
 			break;
 		/* NB: Sort single match options below. */
 		case 'I':
 			max_inflight = strtoull(optarg, NULL, 0);
 			if (max_inflight == 0) {
 				(void) fprintf(stderr, "maximum number "
 				    "of inflight I/Os must be greater "
 				    "than 0\n");
 				usage();
 			}
 			break;
 		case 'o':
 			error = set_global_var(optarg);
 			if (error != 0)
 				usage();
 			break;
 		case 'p':
 			if (searchdirs == NULL) {
 				searchdirs = umem_alloc(sizeof (char *),
 				    UMEM_NOFAIL);
 			} else {
 				char **tmp = umem_alloc((nsearch + 1) *
 				    sizeof (char *), UMEM_NOFAIL);
 				bcopy(searchdirs, tmp, nsearch *
 				    sizeof (char *));
 				umem_free(searchdirs,
 				    nsearch * sizeof (char *));
 				searchdirs = tmp;
 			}
 			searchdirs[nsearch++] = optarg;
 			break;
 		case 't':
 			max_txg = strtoull(optarg, NULL, 0);
 			if (max_txg < TXG_INITIAL) {
 				(void) fprintf(stderr, "incorrect txg "
 				    "specified: %s\n", optarg);
 				usage();
 			}
 			break;
 		case 'U':
 			spa_config_path = optarg;
 			if (spa_config_path[0] != '/') {
 				(void) fprintf(stderr,
 				    "cachefile must be an absolute path "
 				    "(i.e. start with a slash)\n");
 				usage();
 			}
 			break;
 		case 'v':
 			verbose++;
 			break;
 		case 'V':
 			flags = ZFS_IMPORT_VERBATIM;
 			break;
 		case 'x':
 			vn_dumpdir = optarg;
 			break;
 		default:
 			usage();
 			break;
 		}
 	}
 
 	if (!dump_opt['e'] && searchdirs != NULL) {
 		(void) fprintf(stderr, "-p option requires use of -e\n");
 		usage();
 	}
 
 	/*
 	 * ZDB does not typically re-read blocks; therefore limit the ARC
 	 * to 256 MB, which can be used entirely for metadata.
 	 */
 	zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
 
 	/*
 	 * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
 	 * "zdb -b" uses traversal prefetch which uses async reads.
 	 * For good performance, let several of them be active at once.
 	 */
 	zfs_vdev_async_read_max_active = 10;
 
 	/*
 	 * Disable reference tracking for better performance.
 	 */
 	reference_tracking_enable = B_FALSE;
 
 	kernel_init(FREAD);
 	g_zfs = libzfs_init();
 	ASSERT(g_zfs != NULL);
 
 	if (dump_all)
 		verbose = MAX(verbose, 1);
 
 	for (c = 0; c < 256; c++) {
 		if (dump_all && strchr("AeEFlLOPRSX", c) == NULL)
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;
 	}
 
 	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
 	zfs_recover = (dump_opt['A'] > 1);
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 2 && dump_opt['R'])
 		usage();
 
 	if (dump_opt['E']) {
 		if (argc != 1)
 			usage();
 		zdb_embedded_block(argv[0]);
 		return (0);
 	}
 
 	if (argc < 1) {
 		if (!dump_opt['e'] && dump_opt['C']) {
 			dump_cachefile(spa_config_path);
 			return (0);
 		}
 		usage();
 	}
 
 	if (dump_opt['l'])
 		return (dump_label(argv[0]));
 
 	if (dump_opt['O']) {
 		if (argc != 2)
 			usage();
 		dump_opt['v'] = verbose + 3;
 		return (dump_path(argv[0], argv[1]));
 	}
 
 	if (dump_opt['X'] || dump_opt['F'])
 		rewind = ZPOOL_DO_REWIND |
 		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
 
 	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
 	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
 	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
 		fatal("internal error: %s", strerror(ENOMEM));
 
 	error = 0;
 	target = argv[0];
 
 	if (dump_opt['e']) {
 		nvlist_t *cfg = NULL;
 		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
 
 		error = ENOENT;
 		if (name) {
 			if (dump_opt['C'] > 1) {
 				(void) printf("\nConfiguration for import:\n");
 				dump_nvlist(cfg, 8);
 			}
 			if (nvlist_add_nvlist(cfg,
 			    ZPOOL_REWIND_POLICY, policy) != 0) {
 				fatal("can't open '%s': %s",
 				    target, strerror(ENOMEM));
 			}
 			error = spa_import(name, cfg, NULL, flags);
 		}
 	}
 
 	if (strpbrk(target, "/@") != NULL) {
 		size_t targetlen;
 
 		target_is_spa = B_FALSE;
 		/*
 		 * Remove any trailing slash.  Later code would get confused
 		 * by it, but we want to allow it so that "pool/" can
 		 * indicate that we want to dump the topmost filesystem,
 		 * rather than the whole pool.
 		 */
 		targetlen = strlen(target);
 		if (targetlen != 0 && target[targetlen - 1] == '/')
 			target[targetlen - 1] = '\0';
 	}
 
 	if (error == 0) {
 		if (target_is_spa || dump_opt['R']) {
 			error = spa_open_rewind(target, &spa, FTAG, policy,
 			    NULL);
 			if (error) {
 				/*
 				 * If we're missing the log device then
 				 * try opening the pool after clearing the
 				 * log state.
 				 */
 				mutex_enter(&spa_namespace_lock);
 				if ((spa = spa_lookup(target)) != NULL &&
 				    spa->spa_log_state == SPA_LOG_MISSING) {
 					spa->spa_log_state = SPA_LOG_CLEAR;
 					error = 0;
 				}
 				mutex_exit(&spa_namespace_lock);
 
 				if (!error) {
 					error = spa_open_rewind(target, &spa,
 					    FTAG, policy, NULL);
 				}
 			}
 		} else {
 			error = open_objset(target, DMU_OST_ANY, FTAG, &os);
 		}
 	}
 	nvlist_free(policy);
 
 	if (error)
 		fatal("can't open '%s': %s", target, strerror(error));
 
 	argv++;
 	argc--;
 	if (!dump_opt['R']) {
 		if (argc > 0) {
 			zopt_objects = argc;
 			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
 			for (unsigned i = 0; i < zopt_objects; i++) {
 				errno = 0;
 				zopt_object[i] = strtoull(argv[i], NULL, 0);
 				if (zopt_object[i] == 0 && errno != 0)
 					fatal("bad number %s: %s",
 					    argv[i], strerror(errno));
 			}
 		}
 		if (os != NULL) {
 			dump_dir(os);
 		} else if (zopt_objects > 0 && !dump_opt['m']) {
 			dump_dir(spa->spa_meta_objset);
 		} else {
 			dump_zpool(spa);
 		}
 	} else {
 		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
 		flagbits['c'] = ZDB_FLAG_CHECKSUM;
 		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
 		flagbits['e'] = ZDB_FLAG_BSWAP;
 		flagbits['g'] = ZDB_FLAG_GBH;
 		flagbits['i'] = ZDB_FLAG_INDIRECT;
 		flagbits['p'] = ZDB_FLAG_PHYS;
 		flagbits['r'] = ZDB_FLAG_RAW;
 
 		for (int i = 0; i < argc; i++)
 			zdb_read_block(argv[i], spa);
 	}
 
 	if (os != NULL)
 		close_objset(os, FTAG);
 	else
 		spa_close(spa, FTAG);
 
 	fuid_table_destroy();
 
 	dump_debug_buffer();
 
 	libzfs_fini(g_zfs);
 	kernel_fini();
 
 	return (0);
 }
Index: vendor/illumos/dist/cmd/ztest/ztest.c
===================================================================
--- vendor/illumos/dist/cmd/ztest/ztest.c	(revision 329752)
+++ vendor/illumos/dist/cmd/ztest/ztest.c	(revision 329753)
@@ -1,6489 +1,6490 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Joyent, Inc.
+ * Copyright 2017 RackTop Systems.
  */
 
 /*
  * The objective of this program is to provide a DMU/ZAP/SPA stress test
  * that runs entirely in userland, is easy to use, and easy to extend.
  *
  * The overall design of the ztest program is as follows:
  *
  * (1) For each major functional area (e.g. adding vdevs to a pool,
  *     creating and destroying datasets, reading and writing objects, etc)
  *     we have a simple routine to test that functionality.  These
  *     individual routines do not have to do anything "stressful".
  *
  * (2) We turn these simple functionality tests into a stress test by
  *     running them all in parallel, with as many threads as desired,
  *     and spread across as many datasets, objects, and vdevs as desired.
  *
  * (3) While all this is happening, we inject faults into the pool to
  *     verify that self-healing data really works.
  *
  * (4) Every time we open a dataset, we change its checksum and compression
  *     functions.  Thus even individual objects vary from block to block
  *     in which checksum they use and whether they're compressed.
  *
  * (5) To verify that we never lose on-disk consistency after a crash,
  *     we run the entire test in a child of the main process.
  *     At random times, the child self-immolates with a SIGKILL.
  *     This is the software equivalent of pulling the power cord.
  *     The parent then runs the test again, using the existing
  *     storage pool, as many times as desired. If backwards compatibility
  *     testing is enabled ztest will sometimes run the "older" version
  *     of ztest after a SIGKILL.
  *
  * (6) To verify that we don't have future leaks or temporal incursions,
  *     many of the functional tests record the transaction group number
  *     as part of their data.  When reading old data, they verify that
  *     the transaction group number is less than the current, open txg.
  *     If you add a new test, please do this if applicable.
  *
  * When run with no arguments, ztest runs for about five minutes and
  * produces no output if successful.  To get a little bit of information,
  * specify -V.  To get more information, specify -VV, and so on.
  *
  * To turn this into an overnight stress test, use -T to specify run time.
  *
  * You can ask more more vdevs [-v], datasets [-d], or threads [-t]
  * to increase the pool capacity, fanout, and overall stress level.
  *
  * Use the -k option to set the desired frequency of kills.
  *
  * When ztest invokes itself it passes all relevant information through a
  * temporary file which is mmap-ed in the child process. This allows shared
  * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always
  * stored at offset 0 of this file and contains information on the size and
  * number of shared structures in the file. The information stored in this file
  * must remain backwards compatible with older versions of ztest so that
  * ztest can invoke them during backwards compatibility testing (-B).
  */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/dmu.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/zap.h>
 #include <sys/dmu_objset.h>
 #include <sys/poll.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 #include <sys/mman.h>
 #include <sys/resource.h>
 #include <sys/zio.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_file.h>
 #include <sys/spa_impl.h>
 #include <sys/metaslab_impl.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_scan.h>
 #include <sys/zio_checksum.h>
 #include <sys/refcount.h>
 #include <sys/zfeature.h>
 #include <sys/dsl_userhold.h>
 #include <sys/abd.h>
 #include <stdio.h>
 #include <stdio_ext.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <signal.h>
 #include <umem.h>
 #include <dlfcn.h>
 #include <ctype.h>
 #include <math.h>
 #include <sys/fs/zfs.h>
 #include <libnvpair.h>
 #include <libcmdutils.h>
 
 static int ztest_fd_data = -1;
 static int ztest_fd_rand = -1;
 
 typedef struct ztest_shared_hdr {
 	uint64_t	zh_hdr_size;
 	uint64_t	zh_opts_size;
 	uint64_t	zh_size;
 	uint64_t	zh_stats_size;
 	uint64_t	zh_stats_count;
 	uint64_t	zh_ds_size;
 	uint64_t	zh_ds_count;
 } ztest_shared_hdr_t;
 
 static ztest_shared_hdr_t *ztest_shared_hdr;
 
 typedef struct ztest_shared_opts {
 	char zo_pool[ZFS_MAX_DATASET_NAME_LEN];
 	char zo_dir[ZFS_MAX_DATASET_NAME_LEN];
 	char zo_alt_ztest[MAXNAMELEN];
 	char zo_alt_libpath[MAXNAMELEN];
 	uint64_t zo_vdevs;
 	uint64_t zo_vdevtime;
 	size_t zo_vdev_size;
 	int zo_ashift;
 	int zo_mirrors;
 	int zo_raidz;
 	int zo_raidz_parity;
 	int zo_datasets;
 	int zo_threads;
 	uint64_t zo_passtime;
 	uint64_t zo_killrate;
 	int zo_verbose;
 	int zo_init;
 	uint64_t zo_time;
 	uint64_t zo_maxloops;
 	uint64_t zo_metaslab_gang_bang;
 } ztest_shared_opts_t;
 
 static const ztest_shared_opts_t ztest_opts_defaults = {
 	.zo_pool = { 'z', 't', 'e', 's', 't', '\0' },
 	.zo_dir = { '/', 't', 'm', 'p', '\0' },
 	.zo_alt_ztest = { '\0' },
 	.zo_alt_libpath = { '\0' },
 	.zo_vdevs = 5,
 	.zo_ashift = SPA_MINBLOCKSHIFT,
 	.zo_mirrors = 2,
 	.zo_raidz = 4,
 	.zo_raidz_parity = 1,
 	.zo_vdev_size = SPA_MINDEVSIZE * 4,	/* 256m default size */
 	.zo_datasets = 7,
 	.zo_threads = 23,
 	.zo_passtime = 60,		/* 60 seconds */
 	.zo_killrate = 70,		/* 70% kill rate */
 	.zo_verbose = 0,
 	.zo_init = 1,
 	.zo_time = 300,			/* 5 minutes */
 	.zo_maxloops = 50,		/* max loops during spa_freeze() */
 	.zo_metaslab_gang_bang = 32 << 10
 };
 
 extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;
 extern uint64_t zfs_deadman_synctime_ms;
 extern int metaslab_preload_limit;
 extern boolean_t zfs_compressed_arc_enabled;
 extern boolean_t zfs_abd_scatter_enabled;
 
 static ztest_shared_opts_t *ztest_shared_opts;
 static ztest_shared_opts_t ztest_opts;
 
 typedef struct ztest_shared_ds {
 	uint64_t	zd_seq;
 } ztest_shared_ds_t;
 
 static ztest_shared_ds_t *ztest_shared_ds;
 #define	ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d])
 
 #define	BT_MAGIC	0x123456789abcdefULL
 #define	MAXFAULTS() \
 	(MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1)
 
 enum ztest_io_type {
 	ZTEST_IO_WRITE_TAG,
 	ZTEST_IO_WRITE_PATTERN,
 	ZTEST_IO_WRITE_ZEROES,
 	ZTEST_IO_TRUNCATE,
 	ZTEST_IO_SETATTR,
 	ZTEST_IO_REWRITE,
 	ZTEST_IO_TYPES
 };
 
 typedef struct ztest_block_tag {
 	uint64_t	bt_magic;
 	uint64_t	bt_objset;
 	uint64_t	bt_object;
 	uint64_t	bt_offset;
 	uint64_t	bt_gen;
 	uint64_t	bt_txg;
 	uint64_t	bt_crtxg;
 } ztest_block_tag_t;
 
 typedef struct bufwad {
 	uint64_t	bw_index;
 	uint64_t	bw_txg;
 	uint64_t	bw_data;
 } bufwad_t;
 
 /*
  * XXX -- fix zfs range locks to be generic so we can use them here.
  */
 typedef enum {
 	RL_READER,
 	RL_WRITER,
 	RL_APPEND
 } rl_type_t;
 
 typedef struct rll {
 	void		*rll_writer;
 	int		rll_readers;
-	mutex_t		rll_lock;
-	cond_t		rll_cv;
+	kmutex_t	rll_lock;
+	kcondvar_t	rll_cv;
 } rll_t;
 
 typedef struct rl {
 	uint64_t	rl_object;
 	uint64_t	rl_offset;
 	uint64_t	rl_size;
 	rll_t		*rl_lock;
 } rl_t;
 
 #define	ZTEST_RANGE_LOCKS	64
 #define	ZTEST_OBJECT_LOCKS	64
 
 /*
  * Object descriptor.  Used as a template for object lookup/create/remove.
  */
 typedef struct ztest_od {
 	uint64_t	od_dir;
 	uint64_t	od_object;
 	dmu_object_type_t od_type;
 	dmu_object_type_t od_crtype;
 	uint64_t	od_blocksize;
 	uint64_t	od_crblocksize;
 	uint64_t	od_gen;
 	uint64_t	od_crgen;
 	char		od_name[ZFS_MAX_DATASET_NAME_LEN];
 } ztest_od_t;
 
 /*
  * Per-dataset state.
  */
 typedef struct ztest_ds {
 	ztest_shared_ds_t *zd_shared;
 	objset_t	*zd_os;
-	rwlock_t	zd_zilog_lock;
+	krwlock_t	zd_zilog_lock;
 	zilog_t		*zd_zilog;
 	ztest_od_t	*zd_od;		/* debugging aid */
 	char		zd_name[ZFS_MAX_DATASET_NAME_LEN];
-	mutex_t		zd_dirobj_lock;
+	kmutex_t	zd_dirobj_lock;
 	rll_t		zd_object_lock[ZTEST_OBJECT_LOCKS];
 	rll_t		zd_range_lock[ZTEST_RANGE_LOCKS];
 } ztest_ds_t;
 
 /*
  * Per-iteration state.
  */
 typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
 
 typedef struct ztest_info {
 	ztest_func_t	*zi_func;	/* test function */
 	uint64_t	zi_iters;	/* iterations per execution */
 	uint64_t	*zi_interval;	/* execute every <interval> seconds */
 } ztest_info_t;
 
 typedef struct ztest_shared_callstate {
 	uint64_t	zc_count;	/* per-pass count */
 	uint64_t	zc_time;	/* per-pass time */
 	uint64_t	zc_next;	/* next time to call this function */
 } ztest_shared_callstate_t;
 
 static ztest_shared_callstate_t *ztest_shared_callstate;
 #define	ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c])
 
 /*
  * Note: these aren't static because we want dladdr() to work.
  */
 ztest_func_t ztest_dmu_read_write;
 ztest_func_t ztest_dmu_write_parallel;
 ztest_func_t ztest_dmu_object_alloc_free;
 ztest_func_t ztest_dmu_commit_callbacks;
 ztest_func_t ztest_zap;
 ztest_func_t ztest_zap_parallel;
 ztest_func_t ztest_zil_commit;
 ztest_func_t ztest_zil_remount;
 ztest_func_t ztest_dmu_read_write_zcopy;
 ztest_func_t ztest_dmu_objset_create_destroy;
 ztest_func_t ztest_dmu_prealloc;
 ztest_func_t ztest_fzap;
 ztest_func_t ztest_dmu_snapshot_create_destroy;
 ztest_func_t ztest_dsl_prop_get_set;
 ztest_func_t ztest_spa_prop_get_set;
 ztest_func_t ztest_spa_create_destroy;
 ztest_func_t ztest_fault_inject;
 ztest_func_t ztest_ddt_repair;
 ztest_func_t ztest_dmu_snapshot_hold;
 ztest_func_t ztest_spa_rename;
 ztest_func_t ztest_scrub;
 ztest_func_t ztest_dsl_dataset_promote_busy;
 ztest_func_t ztest_vdev_attach_detach;
 ztest_func_t ztest_vdev_LUN_growth;
 ztest_func_t ztest_vdev_add_remove;
 ztest_func_t ztest_vdev_aux_add_remove;
 ztest_func_t ztest_split_pool;
 ztest_func_t ztest_reguid;
 ztest_func_t ztest_spa_upgrade;
 ztest_func_t ztest_device_removal;
 ztest_func_t ztest_remap_blocks;
 
 uint64_t zopt_always = 0ULL * NANOSEC;		/* all the time */
 uint64_t zopt_incessant = 1ULL * NANOSEC / 10;	/* every 1/10 second */
 uint64_t zopt_often = 1ULL * NANOSEC;		/* every second */
 uint64_t zopt_sometimes = 10ULL * NANOSEC;	/* every 10 seconds */
 uint64_t zopt_rarely = 60ULL * NANOSEC;		/* every 60 seconds */
 
 ztest_info_t ztest_info[] = {
 	{ ztest_dmu_read_write,			1,	&zopt_always	},
 	{ ztest_dmu_write_parallel,		10,	&zopt_always	},
 	{ ztest_dmu_object_alloc_free,		1,	&zopt_always	},
 	{ ztest_dmu_commit_callbacks,		1,	&zopt_always	},
 	{ ztest_zap,				30,	&zopt_always	},
 	{ ztest_zap_parallel,			100,	&zopt_always	},
 	{ ztest_split_pool,			1,	&zopt_always	},
 	{ ztest_zil_commit,			1,	&zopt_incessant	},
 	{ ztest_zil_remount,			1,	&zopt_sometimes	},
 	{ ztest_dmu_read_write_zcopy,		1,	&zopt_often	},
 	{ ztest_dmu_objset_create_destroy,	1,	&zopt_often	},
 	{ ztest_dsl_prop_get_set,		1,	&zopt_often	},
 	{ ztest_spa_prop_get_set,		1,	&zopt_sometimes	},
 #if 0
 	{ ztest_dmu_prealloc,			1,	&zopt_sometimes	},
 #endif
 	{ ztest_fzap,				1,	&zopt_sometimes	},
 	{ ztest_dmu_snapshot_create_destroy,	1,	&zopt_sometimes	},
 	{ ztest_spa_create_destroy,		1,	&zopt_sometimes	},
 	{ ztest_fault_inject,			1,	&zopt_sometimes	},
 	{ ztest_ddt_repair,			1,	&zopt_sometimes	},
 	{ ztest_dmu_snapshot_hold,		1,	&zopt_sometimes	},
 	{ ztest_reguid,				1,	&zopt_rarely	},
 	{ ztest_spa_rename,			1,	&zopt_rarely	},
 	{ ztest_scrub,				1,	&zopt_rarely	},
 	{ ztest_spa_upgrade,			1,	&zopt_rarely	},
 	{ ztest_dsl_dataset_promote_busy,	1,	&zopt_rarely	},
 	{ ztest_vdev_attach_detach,		1,	&zopt_sometimes	},
 	{ ztest_vdev_LUN_growth,		1,	&zopt_rarely	},
 	{ ztest_vdev_add_remove,		1,
 	    &ztest_opts.zo_vdevtime				},
 	{ ztest_vdev_aux_add_remove,		1,
 	    &ztest_opts.zo_vdevtime				},
 	{ ztest_device_removal,			1,	&zopt_sometimes	},
 	{ ztest_remap_blocks,			1,	&zopt_sometimes }
 };
 
 #define	ZTEST_FUNCS	(sizeof (ztest_info) / sizeof (ztest_info_t))
 
 /*
  * The following struct is used to hold a list of uncalled commit callbacks.
  * The callbacks are ordered by txg number.
  */
 typedef struct ztest_cb_list {
-	mutex_t	zcl_callbacks_lock;
+	kmutex_t zcl_callbacks_lock;
 	list_t	zcl_callbacks;
 } ztest_cb_list_t;
 
 /*
  * Stuff we need to share writably between parent and child.
  */
 typedef struct ztest_shared {
 	boolean_t	zs_do_init;
 	hrtime_t	zs_proc_start;
 	hrtime_t	zs_proc_stop;
 	hrtime_t	zs_thread_start;
 	hrtime_t	zs_thread_stop;
 	hrtime_t	zs_thread_kill;
 	uint64_t	zs_enospc_count;
 	uint64_t	zs_vdev_next_leaf;
 	uint64_t	zs_vdev_aux;
 	uint64_t	zs_alloc;
 	uint64_t	zs_space;
 	uint64_t	zs_splits;
 	uint64_t	zs_mirrors;
 	uint64_t	zs_metaslab_sz;
 	uint64_t	zs_metaslab_df_alloc_threshold;
 	uint64_t	zs_guid;
 } ztest_shared_t;
 
 #define	ID_PARALLEL	-1ULL
 
 static char ztest_dev_template[] = "%s/%s.%llua";
 static char ztest_aux_template[] = "%s/%s.%s.%llu";
 ztest_shared_t *ztest_shared;
 
 static spa_t *ztest_spa = NULL;
 static ztest_ds_t *ztest_ds;
 
-static mutex_t ztest_vdev_lock;
+static kmutex_t ztest_vdev_lock;
 
 /*
  * The ztest_name_lock protects the pool and dataset namespace used by
  * the individual tests. To modify the namespace, consumers must grab
  * this lock as writer. Grabbing the lock as reader will ensure that the
  * namespace does not change while the lock is held.
  */
-static rwlock_t ztest_name_lock;
+static krwlock_t ztest_name_lock;
 
 static boolean_t ztest_dump_core = B_TRUE;
 static boolean_t ztest_exiting;
 
 /* Global commit callback list */
 static ztest_cb_list_t zcl;
 
 enum ztest_object {
 	ZTEST_META_DNODE = 0,
 	ZTEST_DIROBJ,
 	ZTEST_OBJECTS
 };
 
 static void usage(boolean_t) __NORETURN;
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
  * debugging facilities.
  */
 const char *
 _umem_debug_init()
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 
 #define	FATAL_MSG_SZ	1024
 
 char *fatal_msg;
 
 static void
 fatal(int do_perror, char *message, ...)
 {
 	va_list args;
 	int save_errno = errno;
 	char buf[FATAL_MSG_SZ];
 
 	(void) fflush(stdout);
 
 	va_start(args, message);
 	(void) sprintf(buf, "ztest: ");
 	/* LINTED */
 	(void) vsprintf(buf + strlen(buf), message, args);
 	va_end(args);
 	if (do_perror) {
 		(void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf),
 		    ": %s", strerror(save_errno));
 	}
 	(void) fprintf(stderr, "%s\n", buf);
 	fatal_msg = buf;			/* to ease debugging */
 	if (ztest_dump_core)
 		abort();
 	exit(3);
 }
 
 static int
 str2shift(const char *buf)
 {
 	const char *ends = "BKMGTPEZ";
 	int i;
 
 	if (buf[0] == '\0')
 		return (0);
 	for (i = 0; i < strlen(ends); i++) {
 		if (toupper(buf[0]) == ends[i])
 			break;
 	}
 	if (i == strlen(ends)) {
 		(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n",
 		    buf);
 		usage(B_FALSE);
 	}
 	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) {
 		return (10*i);
 	}
 	(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf);
 	usage(B_FALSE);
 	/* NOTREACHED */
 }
 
 static uint64_t
 nicenumtoull(const char *buf)
 {
 	char *end;
 	uint64_t val;
 
 	val = strtoull(buf, &end, 0);
 	if (end == buf) {
 		(void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf);
 		usage(B_FALSE);
 	} else if (end[0] == '.') {
 		double fval = strtod(buf, &end);
 		fval *= pow(2, str2shift(end));
 		if (fval > UINT64_MAX) {
 			(void) fprintf(stderr, "ztest: value too large: %s\n",
 			    buf);
 			usage(B_FALSE);
 		}
 		val = (uint64_t)fval;
 	} else {
 		int shift = str2shift(end);
 		if (shift >= 64 || (val << shift) >> shift != val) {
 			(void) fprintf(stderr, "ztest: value too large: %s\n",
 			    buf);
 			usage(B_FALSE);
 		}
 		val <<= shift;
 	}
 	return (val);
 }
 
 static void
 usage(boolean_t requested)
 {
 	const ztest_shared_opts_t *zo = &ztest_opts_defaults;
 
 	char nice_vdev_size[NN_NUMBUF_SZ];
 	char nice_gang_bang[NN_NUMBUF_SZ];
 	FILE *fp = requested ? stdout : stderr;
 
 	nicenum(zo->zo_vdev_size, nice_vdev_size, sizeof (nice_vdev_size));
 	nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang,
 	    sizeof (nice_gang_bang));
 
 	(void) fprintf(fp, "Usage: %s\n"
 	    "\t[-v vdevs (default: %llu)]\n"
 	    "\t[-s size_of_each_vdev (default: %s)]\n"
 	    "\t[-a alignment_shift (default: %d)] use 0 for random\n"
 	    "\t[-m mirror_copies (default: %d)]\n"
 	    "\t[-r raidz_disks (default: %d)]\n"
 	    "\t[-R raidz_parity (default: %d)]\n"
 	    "\t[-d datasets (default: %d)]\n"
 	    "\t[-t threads (default: %d)]\n"
 	    "\t[-g gang_block_threshold (default: %s)]\n"
 	    "\t[-i init_count (default: %d)] initialize pool i times\n"
 	    "\t[-k kill_percentage (default: %llu%%)]\n"
 	    "\t[-p pool_name (default: %s)]\n"
 	    "\t[-f dir (default: %s)] file directory for vdev files\n"
 	    "\t[-V] verbose (use multiple times for ever more blather)\n"
 	    "\t[-E] use existing pool instead of creating new one\n"
 	    "\t[-T time (default: %llu sec)] total run time\n"
 	    "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n"
 	    "\t[-P passtime (default: %llu sec)] time per pass\n"
 	    "\t[-B alt_ztest (default: <none>)] alternate ztest path\n"
 	    "\t[-o variable=value] ... set global variable to an unsigned\n"
 	    "\t    32-bit integer value\n"
 	    "\t[-h] (print help)\n"
 	    "",
 	    zo->zo_pool,
 	    (u_longlong_t)zo->zo_vdevs,			/* -v */
 	    nice_vdev_size,				/* -s */
 	    zo->zo_ashift,				/* -a */
 	    zo->zo_mirrors,				/* -m */
 	    zo->zo_raidz,				/* -r */
 	    zo->zo_raidz_parity,			/* -R */
 	    zo->zo_datasets,				/* -d */
 	    zo->zo_threads,				/* -t */
 	    nice_gang_bang,				/* -g */
 	    zo->zo_init,				/* -i */
 	    (u_longlong_t)zo->zo_killrate,		/* -k */
 	    zo->zo_pool,				/* -p */
 	    zo->zo_dir,					/* -f */
 	    (u_longlong_t)zo->zo_time,			/* -T */
 	    (u_longlong_t)zo->zo_maxloops,		/* -F */
 	    (u_longlong_t)zo->zo_passtime);
 	exit(requested ? 0 : 1);
 }
 
 static void
 process_options(int argc, char **argv)
 {
 	char *path;
 	ztest_shared_opts_t *zo = &ztest_opts;
 
 	int opt;
 	uint64_t value;
 	char altdir[MAXNAMELEN] = { 0 };
 
 	bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
 
 	while ((opt = getopt(argc, argv,
 	    "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:o:")) != EOF) {
 		value = 0;
 		switch (opt) {
 		case 'v':
 		case 's':
 		case 'a':
 		case 'm':
 		case 'r':
 		case 'R':
 		case 'd':
 		case 't':
 		case 'g':
 		case 'i':
 		case 'k':
 		case 'T':
 		case 'P':
 		case 'F':
 			value = nicenumtoull(optarg);
 		}
 		switch (opt) {
 		case 'v':
 			zo->zo_vdevs = value;
 			break;
 		case 's':
 			zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value);
 			break;
 		case 'a':
 			zo->zo_ashift = value;
 			break;
 		case 'm':
 			zo->zo_mirrors = value;
 			break;
 		case 'r':
 			zo->zo_raidz = MAX(1, value);
 			break;
 		case 'R':
 			zo->zo_raidz_parity = MIN(MAX(value, 1), 3);
 			break;
 		case 'd':
 			zo->zo_datasets = MAX(1, value);
 			break;
 		case 't':
 			zo->zo_threads = MAX(1, value);
 			break;
 		case 'g':
 			zo->zo_metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1,
 			    value);
 			break;
 		case 'i':
 			zo->zo_init = value;
 			break;
 		case 'k':
 			zo->zo_killrate = value;
 			break;
 		case 'p':
 			(void) strlcpy(zo->zo_pool, optarg,
 			    sizeof (zo->zo_pool));
 			break;
 		case 'f':
 			path = realpath(optarg, NULL);
 			if (path == NULL) {
 				(void) fprintf(stderr, "error: %s: %s\n",
 				    optarg, strerror(errno));
 				usage(B_FALSE);
 			} else {
 				(void) strlcpy(zo->zo_dir, path,
 				    sizeof (zo->zo_dir));
 			}
 			break;
 		case 'V':
 			zo->zo_verbose++;
 			break;
 		case 'E':
 			zo->zo_init = 0;
 			break;
 		case 'T':
 			zo->zo_time = value;
 			break;
 		case 'P':
 			zo->zo_passtime = MAX(1, value);
 			break;
 		case 'F':
 			zo->zo_maxloops = MAX(1, value);
 			break;
 		case 'B':
 			(void) strlcpy(altdir, optarg, sizeof (altdir));
 			break;
 		case 'o':
 			if (set_global_var(optarg) != 0)
 				usage(B_FALSE);
 			break;
 		case 'h':
 			usage(B_TRUE);
 			break;
 		case '?':
 		default:
 			usage(B_FALSE);
 			break;
 		}
 	}
 
 	zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1);
 
 	zo->zo_vdevtime =
 	    (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs :
 	    UINT64_MAX >> 2);
 
 	if (strlen(altdir) > 0) {
 		char *cmd;
 		char *realaltdir;
 		char *bin;
 		char *ztest;
 		char *isa;
 		int isalen;
 
 		cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 		realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 		VERIFY(NULL != realpath(getexecname(), cmd));
 		if (0 != access(altdir, F_OK)) {
 			ztest_dump_core = B_FALSE;
 			fatal(B_TRUE, "invalid alternate ztest path: %s",
 			    altdir);
 		}
 		VERIFY(NULL != realpath(altdir, realaltdir));
 
 		/*
 		 * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest".
 		 * We want to extract <isa> to determine if we should use
 		 * 32 or 64 bit binaries.
 		 */
 		bin = strstr(cmd, "/usr/bin/");
 		ztest = strstr(bin, "/ztest");
 		isa = bin + 9;
 		isalen = ztest - isa;
 		(void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest),
 		    "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa);
 		(void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath),
 		    "%s/usr/lib/%.*s", realaltdir, isalen, isa);
 
 		if (0 != access(zo->zo_alt_ztest, X_OK)) {
 			ztest_dump_core = B_FALSE;
 			fatal(B_TRUE, "invalid alternate ztest: %s",
 			    zo->zo_alt_ztest);
 		} else if (0 != access(zo->zo_alt_libpath, X_OK)) {
 			ztest_dump_core = B_FALSE;
 			fatal(B_TRUE, "invalid alternate lib directory %s",
 			    zo->zo_alt_libpath);
 		}
 
 		umem_free(cmd, MAXPATHLEN);
 		umem_free(realaltdir, MAXPATHLEN);
 	}
 }
 
 static void
 ztest_kill(ztest_shared_t *zs)
 {
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
 
 	/*
 	 * Before we kill off ztest, make sure that the config is updated.
 	 * See comment above spa_write_cachefile().
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE);
 	mutex_exit(&spa_namespace_lock);
 
 	zfs_dbgmsg_print(FTAG);
 	(void) kill(getpid(), SIGKILL);
 }
 
 static uint64_t
 ztest_random(uint64_t range)
 {
 	uint64_t r;
 
 	ASSERT3S(ztest_fd_rand, >=, 0);
 
 	if (range == 0)
 		return (0);
 
 	if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
 		fatal(1, "short read from /dev/urandom");
 
 	return (r % range);
 }
 
 /* ARGSUSED */
 static void
 ztest_record_enospc(const char *s)
 {
 	ztest_shared->zs_enospc_count++;
 }
 
 static uint64_t
 ztest_get_ashift(void)
 {
 	if (ztest_opts.zo_ashift == 0)
 		return (SPA_MINBLOCKSHIFT + ztest_random(5));
 	return (ztest_opts.zo_ashift);
 }
 
 static nvlist_t *
 make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
 {
 	char pathbuf[MAXPATHLEN];
 	uint64_t vdev;
 	nvlist_t *file;
 
 	if (ashift == 0)
 		ashift = ztest_get_ashift();
 
 	if (path == NULL) {
 		path = pathbuf;
 
 		if (aux != NULL) {
 			vdev = ztest_shared->zs_vdev_aux;
 			(void) snprintf(path, sizeof (pathbuf),
 			    ztest_aux_template, ztest_opts.zo_dir,
 			    pool == NULL ? ztest_opts.zo_pool : pool,
 			    aux, vdev);
 		} else {
 			vdev = ztest_shared->zs_vdev_next_leaf++;
 			(void) snprintf(path, sizeof (pathbuf),
 			    ztest_dev_template, ztest_opts.zo_dir,
 			    pool == NULL ? ztest_opts.zo_pool : pool, vdev);
 		}
 	}
 
 	if (size != 0) {
 		int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
 		if (fd == -1)
 			fatal(1, "can't open %s", path);
 		if (ftruncate(fd, size) != 0)
 			fatal(1, "can't ftruncate %s", path);
 		(void) close(fd);
 	}
 
 	VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
 	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
 	VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
 
 	return (file);
 }
 
 static nvlist_t *
 make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
     uint64_t ashift, int r)
 {
 	nvlist_t *raidz, **child;
 	int c;
 
 	if (r < 2)
 		return (make_vdev_file(path, aux, pool, size, ashift));
 	child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < r; c++)
 		child[c] = make_vdev_file(path, aux, pool, size, ashift);
 
 	VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_RAIDZ) == 0);
 	VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
 	    ztest_opts.zo_raidz_parity) == 0);
 	VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
 	    child, r) == 0);
 
 	for (c = 0; c < r; c++)
 		nvlist_free(child[c]);
 
 	umem_free(child, r * sizeof (nvlist_t *));
 
 	return (raidz);
 }
 
 static nvlist_t *
 make_vdev_mirror(char *path, char *aux, char *pool, size_t size,
     uint64_t ashift, int r, int m)
 {
 	nvlist_t *mirror, **child;
 	int c;
 
 	if (m < 1)
 		return (make_vdev_raidz(path, aux, pool, size, ashift, r));
 
 	child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < m; c++)
 		child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r);
 
 	VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_MIRROR) == 0);
 	VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
 	    child, m) == 0);
 
 	for (c = 0; c < m; c++)
 		nvlist_free(child[c]);
 
 	umem_free(child, m * sizeof (nvlist_t *));
 
 	return (mirror);
 }
 
 static nvlist_t *
 make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift,
     int log, int r, int m, int t)
 {
 	nvlist_t *root, **child;
 	int c;
 
 	ASSERT(t > 0);
 
 	child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < t; c++) {
 		child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
 		    r, m);
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    log) == 0);
 	}
 
 	VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
 	VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
 	    child, t) == 0);
 
 	for (c = 0; c < t; c++)
 		nvlist_free(child[c]);
 
 	umem_free(child, t * sizeof (nvlist_t *));
 
 	return (root);
 }
 
 /*
  * Find a random spa version. Returns back a random spa version in the
  * range [initial_version, SPA_VERSION_FEATURES].
  */
 static uint64_t
 ztest_random_spa_version(uint64_t initial_version)
 {
 	uint64_t version = initial_version;
 
 	if (version <= SPA_VERSION_BEFORE_FEATURES) {
 		version = version +
 		    ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1);
 	}
 
 	if (version > SPA_VERSION_BEFORE_FEATURES)
 		version = SPA_VERSION_FEATURES;
 
 	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 	return (version);
 }
 
 static int
 ztest_random_blocksize(void)
 {
 	uint64_t block_shift;
 	/*
 	 * Choose a block size >= the ashift.
 	 * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks.
 	 */
 	int maxbs = SPA_OLD_MAXBLOCKSHIFT;
 	if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE)
 		maxbs = 20;
 	block_shift = ztest_random(maxbs - ztest_spa->spa_max_ashift + 1);
 	return (1 << (SPA_MINBLOCKSHIFT + block_shift));
 }
 
 static int
 ztest_random_ibshift(void)
 {
 	return (DN_MIN_INDBLKSHIFT +
 	    ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
 }
 
 static uint64_t
 ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
 {
 	uint64_t top;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *tvd;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
 
 	do {
 		top = ztest_random(rvd->vdev_children);
 		tvd = rvd->vdev_child[top];
 	} while (!vdev_is_concrete(tvd) || (tvd->vdev_islog && !log_ok) ||
 	    tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
 
 	return (top);
 }
 
 static uint64_t
 ztest_random_dsl_prop(zfs_prop_t prop)
 {
 	uint64_t value;
 
 	do {
 		value = zfs_prop_random_value(prop, ztest_random(-1ULL));
 	} while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
 
 	return (value);
 }
 
 static int
 ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
     boolean_t inherit)
 {
 	const char *propname = zfs_prop_to_name(prop);
 	const char *valname;
 	char setpoint[MAXPATHLEN];
 	uint64_t curval;
 	int error;
 
 	error = dsl_prop_set_int(osname, propname,
 	    (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT0(error);
 
 	VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint));
 
 	if (ztest_opts.zo_verbose >= 6) {
 		VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
 		(void) printf("%s %s = %s at '%s'\n",
 		    osname, propname, valname, setpoint);
 	}
 
 	return (error);
 }
 
 static int
 ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value)
 {
 	spa_t *spa = ztest_spa;
 	nvlist_t *props = NULL;
 	int error;
 
 	VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
 
 	error = spa_prop_set(spa, props);
 
 	nvlist_free(props);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT0(error);
 
 	return (error);
 }
 
 static void
 ztest_rll_init(rll_t *rll)
 {
 	rll->rll_writer = NULL;
 	rll->rll_readers = 0;
-	VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0);
-	VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0);
+	mutex_init(&rll->rll_lock, NULL, USYNC_THREAD, NULL);
+	cv_init(&rll->rll_cv, NULL, USYNC_THREAD, NULL);
 }
 
 static void
 ztest_rll_destroy(rll_t *rll)
 {
 	ASSERT(rll->rll_writer == NULL);
 	ASSERT(rll->rll_readers == 0);
-	VERIFY(_mutex_destroy(&rll->rll_lock) == 0);
-	VERIFY(cond_destroy(&rll->rll_cv) == 0);
+	mutex_destroy(&rll->rll_lock);
+	cv_destroy(&rll->rll_cv);
 }
 
 static void
 ztest_rll_lock(rll_t *rll, rl_type_t type)
 {
-	VERIFY(mutex_lock(&rll->rll_lock) == 0);
+	mutex_enter(&rll->rll_lock);
 
 	if (type == RL_READER) {
 		while (rll->rll_writer != NULL)
-			(void) cond_wait(&rll->rll_cv, &rll->rll_lock);
+			cv_wait(&rll->rll_cv, &rll->rll_lock);
 		rll->rll_readers++;
 	} else {
 		while (rll->rll_writer != NULL || rll->rll_readers)
-			(void) cond_wait(&rll->rll_cv, &rll->rll_lock);
+			cv_wait(&rll->rll_cv, &rll->rll_lock);
 		rll->rll_writer = curthread;
 	}
 
-	VERIFY(mutex_unlock(&rll->rll_lock) == 0);
+	mutex_exit(&rll->rll_lock);
 }
 
 static void
 ztest_rll_unlock(rll_t *rll)
 {
-	VERIFY(mutex_lock(&rll->rll_lock) == 0);
+	mutex_enter(&rll->rll_lock);
 
 	if (rll->rll_writer) {
 		ASSERT(rll->rll_readers == 0);
 		rll->rll_writer = NULL;
 	} else {
 		ASSERT(rll->rll_readers != 0);
 		ASSERT(rll->rll_writer == NULL);
 		rll->rll_readers--;
 	}
 
 	if (rll->rll_writer == NULL && rll->rll_readers == 0)
-		VERIFY(cond_broadcast(&rll->rll_cv) == 0);
+		cv_broadcast(&rll->rll_cv);
 
-	VERIFY(mutex_unlock(&rll->rll_lock) == 0);
+	mutex_exit(&rll->rll_lock);
 }
 
 static void
 ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
 {
 	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
 
 	ztest_rll_lock(rll, type);
 }
 
 static void
 ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
 {
 	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
 
 	ztest_rll_unlock(rll);
 }
 
 static rl_t *
 ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
     uint64_t size, rl_type_t type)
 {
 	uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
 	rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
 	rl_t *rl;
 
 	rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
 	rl->rl_object = object;
 	rl->rl_offset = offset;
 	rl->rl_size = size;
 	rl->rl_lock = rll;
 
 	ztest_rll_lock(rll, type);
 
 	return (rl);
 }
 
 static void
 ztest_range_unlock(rl_t *rl)
 {
 	rll_t *rll = rl->rl_lock;
 
 	ztest_rll_unlock(rll);
 
 	umem_free(rl, sizeof (*rl));
 }
 
 static void
 ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
 {
 	zd->zd_os = os;
 	zd->zd_zilog = dmu_objset_zil(os);
 	zd->zd_shared = szd;
 	dmu_objset_name(os, zd->zd_name);
 
 	if (zd->zd_shared != NULL)
 		zd->zd_shared->zd_seq = 0;
 
-	VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0);
-	VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0);
+	rw_init(&zd->zd_zilog_lock, NULL, USYNC_THREAD, NULL);
+	mutex_init(&zd->zd_dirobj_lock, NULL, USYNC_THREAD, NULL);
 
 	for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
 		ztest_rll_init(&zd->zd_object_lock[l]);
 
 	for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
 		ztest_rll_init(&zd->zd_range_lock[l]);
 }
 
 static void
 ztest_zd_fini(ztest_ds_t *zd)
 {
-	VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0);
+	mutex_destroy(&zd->zd_dirobj_lock);
 
 	for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
 		ztest_rll_destroy(&zd->zd_object_lock[l]);
 
 	for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
 		ztest_rll_destroy(&zd->zd_range_lock[l]);
 }
 
 #define	TXG_MIGHTWAIT	(ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
 
 static uint64_t
 ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
 {
 	uint64_t txg;
 	int error;
 
 	/*
 	 * Attempt to assign tx to some transaction group.
 	 */
 	error = dmu_tx_assign(tx, txg_how);
 	if (error) {
 		if (error == ERESTART) {
 			ASSERT(txg_how == TXG_NOWAIT);
 			dmu_tx_wait(tx);
 		} else {
 			ASSERT3U(error, ==, ENOSPC);
 			ztest_record_enospc(tag);
 		}
 		dmu_tx_abort(tx);
 		return (0);
 	}
 	txg = dmu_tx_get_txg(tx);
 	ASSERT(txg != 0);
 	return (txg);
 }
 
 static void
 ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
 {
 	uint64_t *ip = buf;
 	uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
 
 	while (ip < ip_end)
 		*ip++ = value;
 }
 
 static boolean_t
 ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
 {
 	uint64_t *ip = buf;
 	uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
 	uint64_t diff = 0;
 
 	while (ip < ip_end)
 		diff |= (value - *ip++);
 
 	return (diff == 0);
 }
 
 static void
 ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
     uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
 {
 	bt->bt_magic = BT_MAGIC;
 	bt->bt_objset = dmu_objset_id(os);
 	bt->bt_object = object;
 	bt->bt_offset = offset;
 	bt->bt_gen = gen;
 	bt->bt_txg = txg;
 	bt->bt_crtxg = crtxg;
 }
 
 static void
 ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
     uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
 {
 	ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
 	ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
 	ASSERT3U(bt->bt_object, ==, object);
 	ASSERT3U(bt->bt_offset, ==, offset);
 	ASSERT3U(bt->bt_gen, <=, gen);
 	ASSERT3U(bt->bt_txg, <=, txg);
 	ASSERT3U(bt->bt_crtxg, ==, crtxg);
 }
 
 static ztest_block_tag_t *
 ztest_bt_bonus(dmu_buf_t *db)
 {
 	dmu_object_info_t doi;
 	ztest_block_tag_t *bt;
 
 	dmu_object_info_from_db(db, &doi);
 	ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
 	ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
 	bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
 
 	return (bt);
 }
 
 /*
  * ZIL logging ops
  */
 
 #define	lrz_type	lr_mode
 #define	lrz_blocksize	lr_uid
 #define	lrz_ibshift	lr_gid
 #define	lrz_bonustype	lr_rdev
 #define	lrz_bonuslen	lr_crtime[1]
 
 static void
 ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
 {
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	size_t namesize = strlen(name) + 1;
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize);
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) + namesize - sizeof (lr_t));
 
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object)
 {
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	size_t namesize = strlen(name) + 1;
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize);
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) + namesize - sizeof (lr_t));
 
 	itx->itx_oid = object;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
 {
 	itx_t *itx;
 	itx_wr_state_t write_state = ztest_random(WR_NUM_STATES);
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	if (lr->lr_length > ZIL_MAX_LOG_DATA)
 		write_state = WR_INDIRECT;
 
 	itx = zil_itx_create(TX_WRITE,
 	    sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0));
 
 	if (write_state == WR_COPIED &&
 	    dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
 	    ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
 		zil_itx_destroy(itx);
 		itx = zil_itx_create(TX_WRITE, sizeof (*lr));
 		write_state = WR_NEED_COPY;
 	}
 	itx->itx_private = zd;
 	itx->itx_wr_state = write_state;
 	itx->itx_sync = (ztest_random(8) == 0);
 
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr)
 {
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	itx->itx_sync = B_FALSE;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr)
 {
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_SETATTR, sizeof (*lr));
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	itx->itx_sync = B_FALSE;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 /*
  * ZIL replay ops
  */
 static int
 ztest_replay_create(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_create_t *lr = arg2;
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	objset_t *os = zd->zd_os;
 	ztest_block_tag_t *bbt;
 	dmu_buf_t *db;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	int error = 0;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ASSERT(lr->lr_doid == ZTEST_DIROBJ);
 	ASSERT(name[0] != '\0');
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name);
 
 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
 	} else {
 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
 	}
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0)
 		return (ENOSPC);
 
 	ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
 
 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
 		if (lr->lr_foid == 0) {
 			lr->lr_foid = zap_create(os,
 			    lr->lrz_type, lr->lrz_bonustype,
 			    lr->lrz_bonuslen, tx);
 		} else {
 			error = zap_create_claim(os, lr->lr_foid,
 			    lr->lrz_type, lr->lrz_bonustype,
 			    lr->lrz_bonuslen, tx);
 		}
 	} else {
 		if (lr->lr_foid == 0) {
 			lr->lr_foid = dmu_object_alloc(os,
 			    lr->lrz_type, 0, lr->lrz_bonustype,
 			    lr->lrz_bonuslen, tx);
 		} else {
 			error = dmu_object_claim(os, lr->lr_foid,
 			    lr->lrz_type, 0, lr->lrz_bonustype,
 			    lr->lrz_bonuslen, tx);
 		}
 	}
 
 	if (error) {
 		ASSERT3U(error, ==, EEXIST);
 		ASSERT(zd->zd_zilog->zl_replay);
 		dmu_tx_commit(tx);
 		return (error);
 	}
 
 	ASSERT(lr->lr_foid != 0);
 
 	if (lr->lrz_type != DMU_OT_ZAP_OTHER)
 		VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid,
 		    lr->lrz_blocksize, lr->lrz_ibshift, tx));
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 	bbt = ztest_bt_bonus(db);
 	dmu_buf_will_dirty(db, tx);
 	ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg);
 	dmu_buf_rele(db, FTAG);
 
 	VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
 	    &lr->lr_foid, tx));
 
 	(void) ztest_log_create(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	return (0);
 }
 
 static int
 ztest_replay_remove(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_remove_t *lr = arg2;
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	objset_t *os = zd->zd_os;
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
 	uint64_t object, txg;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ASSERT(lr->lr_doid == ZTEST_DIROBJ);
 	ASSERT(name[0] != '\0');
 
 	VERIFY3U(0, ==,
 	    zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
 	ASSERT(object != 0);
 
 	ztest_object_lock(zd, object, RL_WRITER);
 
 	VERIFY3U(0, ==, dmu_object_info(os, object, &doi));
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name);
 	dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		ztest_object_unlock(zd, object);
 		return (ENOSPC);
 	}
 
 	if (doi.doi_type == DMU_OT_ZAP_OTHER) {
 		VERIFY3U(0, ==, zap_destroy(os, object, tx));
 	} else {
 		VERIFY3U(0, ==, dmu_object_free(os, object, tx));
 	}
 
 	VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx));
 
 	(void) ztest_log_remove(zd, tx, lr, object);
 
 	dmu_tx_commit(tx);
 
 	ztest_object_unlock(zd, object);
 
 	return (0);
 }
 
 static int
 ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_write_t *lr = arg2;
 	objset_t *os = zd->zd_os;
 	void *data = lr + 1;			/* data follows lr */
 	uint64_t offset, length;
 	ztest_block_tag_t *bt = data;
 	ztest_block_tag_t *bbt;
 	uint64_t gen, txg, lrtxg, crtxg;
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
 	dmu_buf_t *db;
 	arc_buf_t *abuf = NULL;
 	rl_t *rl;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	offset = lr->lr_offset;
 	length = lr->lr_length;
 
 	/* If it's a dmu_sync() block, write the whole block */
 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
 		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
 		if (length < blocksize) {
 			offset -= offset % blocksize;
 			length = blocksize;
 		}
 	}
 
 	if (bt->bt_magic == BSWAP_64(BT_MAGIC))
 		byteswap_uint64_array(bt, sizeof (*bt));
 
 	if (bt->bt_magic != BT_MAGIC)
 		bt = NULL;
 
 	ztest_object_lock(zd, lr->lr_foid, RL_READER);
 	rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 
 	dmu_object_info_from_db(db, &doi);
 
 	bbt = ztest_bt_bonus(db);
 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 	gen = bbt->bt_gen;
 	crtxg = bbt->bt_crtxg;
 	lrtxg = lr->lr_common.lrc_txg;
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, lr->lr_foid, offset, length);
 
 	if (ztest_random(8) == 0 && length == doi.doi_data_block_size &&
 	    P2PHASE(offset, length) == 0)
 		abuf = dmu_request_arcbuf(db, length);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		if (abuf != NULL)
 			dmu_return_arcbuf(abuf);
 		dmu_buf_rele(db, FTAG);
 		ztest_range_unlock(rl);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	if (bt != NULL) {
 		/*
 		 * Usually, verify the old data before writing new data --
 		 * but not always, because we also want to verify correct
 		 * behavior when the data was not recently read into cache.
 		 */
 		ASSERT(offset % doi.doi_data_block_size == 0);
 		if (ztest_random(4) != 0) {
 			int prefetch = ztest_random(2) ?
 			    DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
 			ztest_block_tag_t rbt;
 
 			VERIFY(dmu_read(os, lr->lr_foid, offset,
 			    sizeof (rbt), &rbt, prefetch) == 0);
 			if (rbt.bt_magic == BT_MAGIC) {
 				ztest_bt_verify(&rbt, os, lr->lr_foid,
 				    offset, gen, txg, crtxg);
 			}
 		}
 
 		/*
 		 * Writes can appear to be newer than the bonus buffer because
 		 * the ztest_get_data() callback does a dmu_read() of the
 		 * open-context data, which may be different than the data
 		 * as it was when the write was generated.
 		 */
 		if (zd->zd_zilog->zl_replay) {
 			ztest_bt_verify(bt, os, lr->lr_foid, offset,
 			    MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
 			    bt->bt_crtxg);
 		}
 
 		/*
 		 * Set the bt's gen/txg to the bonus buffer's gen/txg
 		 * so that all of the usual ASSERTs will work.
 		 */
 		ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg);
 	}
 
 	if (abuf == NULL) {
 		dmu_write(os, lr->lr_foid, offset, length, data, tx);
 	} else {
 		bcopy(data, abuf->b_data, length);
 		dmu_assign_arcbuf(db, offset, abuf, tx);
 	}
 
 	(void) ztest_log_write(zd, tx, lr);
 
 	dmu_buf_rele(db, FTAG);
 
 	dmu_tx_commit(tx);
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 static int
 ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_truncate_t *lr = arg2;
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	rl_t *rl;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ztest_object_lock(zd, lr->lr_foid, RL_READER);
 	rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
 	    RL_WRITER);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		ztest_range_unlock(rl);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
 	    lr->lr_length, tx) == 0);
 
 	(void) ztest_log_truncate(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 static int
 ztest_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_setattr_t *lr = arg2;
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	dmu_buf_t *db;
 	ztest_block_tag_t *bbt;
 	uint64_t txg, lrtxg, crtxg;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ztest_object_lock(zd, lr->lr_foid, RL_WRITER);
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_bonus(tx, lr->lr_foid);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		dmu_buf_rele(db, FTAG);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	bbt = ztest_bt_bonus(db);
 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 	crtxg = bbt->bt_crtxg;
 	lrtxg = lr->lr_common.lrc_txg;
 
 	if (zd->zd_zilog->zl_replay) {
 		ASSERT(lr->lr_size != 0);
 		ASSERT(lr->lr_mode != 0);
 		ASSERT(lrtxg != 0);
 	} else {
 		/*
 		 * Randomly change the size and increment the generation.
 		 */
 		lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
 		    sizeof (*bbt);
 		lr->lr_mode = bbt->bt_gen + 1;
 		ASSERT(lrtxg == 0);
 	}
 
 	/*
 	 * Verify that the current bonus buffer is not newer than our txg.
 	 */
 	ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode,
 	    MAX(txg, lrtxg), crtxg);
 
 	dmu_buf_will_dirty(db, tx);
 
 	ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
 	ASSERT3U(lr->lr_size, <=, db->db_size);
 	VERIFY0(dmu_set_bonus(db, lr->lr_size, tx));
 	bbt = ztest_bt_bonus(db);
 
 	ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
 
 	dmu_buf_rele(db, FTAG);
 
 	(void) ztest_log_setattr(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
 	NULL,			/* 0 no such transaction type */
 	ztest_replay_create,	/* TX_CREATE */
 	NULL,			/* TX_MKDIR */
 	NULL,			/* TX_MKXATTR */
 	NULL,			/* TX_SYMLINK */
 	ztest_replay_remove,	/* TX_REMOVE */
 	NULL,			/* TX_RMDIR */
 	NULL,			/* TX_LINK */
 	NULL,			/* TX_RENAME */
 	ztest_replay_write,	/* TX_WRITE */
 	ztest_replay_truncate,	/* TX_TRUNCATE */
 	ztest_replay_setattr,	/* TX_SETATTR */
 	NULL,			/* TX_ACL */
 	NULL,			/* TX_CREATE_ACL */
 	NULL,			/* TX_CREATE_ATTR */
 	NULL,			/* TX_CREATE_ACL_ATTR */
 	NULL,			/* TX_MKDIR_ACL */
 	NULL,			/* TX_MKDIR_ATTR */
 	NULL,			/* TX_MKDIR_ACL_ATTR */
 	NULL,			/* TX_WRITE2 */
 };
 
 /*
  * ZIL get_data callbacks
  */
 
 static void
 ztest_get_done(zgd_t *zgd, int error)
 {
 	ztest_ds_t *zd = zgd->zgd_private;
 	uint64_t object = zgd->zgd_rl->rl_object;
 
 	if (zgd->zgd_db)
 		dmu_buf_rele(zgd->zgd_db, zgd);
 
 	ztest_range_unlock(zgd->zgd_rl);
 	ztest_object_unlock(zd, object);
 
 	if (error == 0 && zgd->zgd_bp)
 		zil_lwb_add_block(zgd->zgd_lwb, zgd->zgd_bp);
 
 	umem_free(zgd, sizeof (*zgd));
 }
 
 static int
 ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
     zio_t *zio)
 {
 	ztest_ds_t *zd = arg;
 	objset_t *os = zd->zd_os;
 	uint64_t object = lr->lr_foid;
 	uint64_t offset = lr->lr_offset;
 	uint64_t size = lr->lr_length;
 	uint64_t txg = lr->lr_common.lrc_txg;
 	uint64_t crtxg;
 	dmu_object_info_t doi;
 	dmu_buf_t *db;
 	zgd_t *zgd;
 	int error;
 
 	ASSERT3P(lwb, !=, NULL);
 	ASSERT3P(zio, !=, NULL);
 	ASSERT3U(size, !=, 0);
 
 	ztest_object_lock(zd, object, RL_READER);
 	error = dmu_bonus_hold(os, object, FTAG, &db);
 	if (error) {
 		ztest_object_unlock(zd, object);
 		return (error);
 	}
 
 	crtxg = ztest_bt_bonus(db)->bt_crtxg;
 
 	if (crtxg == 0 || crtxg > txg) {
 		dmu_buf_rele(db, FTAG);
 		ztest_object_unlock(zd, object);
 		return (ENOENT);
 	}
 
 	dmu_object_info_from_db(db, &doi);
 	dmu_buf_rele(db, FTAG);
 	db = NULL;
 
 	zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
 	zgd->zgd_lwb = lwb;
 	zgd->zgd_private = zd;
 
 	if (buf != NULL) {	/* immediate write */
 		zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
 		    RL_READER);
 
 		error = dmu_read(os, object, offset, size, buf,
 		    DMU_READ_NO_PREFETCH);
 		ASSERT(error == 0);
 	} else {
 		size = doi.doi_data_block_size;
 		if (ISP2(size)) {
 			offset = P2ALIGN(offset, size);
 		} else {
 			ASSERT(offset < size);
 			offset = 0;
 		}
 
 		zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
 		    RL_READER);
 
 		error = dmu_buf_hold(os, object, offset, zgd, &db,
 		    DMU_READ_NO_PREFETCH);
 
 		if (error == 0) {
 			blkptr_t *bp = &lr->lr_blkptr;
 
 			zgd->zgd_db = db;
 			zgd->zgd_bp = bp;
 
 			ASSERT(db->db_offset == offset);
 			ASSERT(db->db_size == size);
 
 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
 			    ztest_get_done, zgd);
 
 			if (error == 0)
 				return (0);
 		}
 	}
 
 	ztest_get_done(zgd, error);
 
 	return (error);
 }
 
 static void *
 ztest_lr_alloc(size_t lrsize, char *name)
 {
 	char *lr;
 	size_t namesize = name ? strlen(name) + 1 : 0;
 
 	lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL);
 
 	if (name)
 		bcopy(name, lr + lrsize, namesize);
 
 	return (lr);
 }
 
 void
 ztest_lr_free(void *lr, size_t lrsize, char *name)
 {
 	size_t namesize = name ? strlen(name) + 1 : 0;
 
 	umem_free(lr, lrsize + namesize);
 }
 
 /*
  * Lookup a bunch of objects.  Returns the number of objects not found.
  */
 static int
 ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 	int error;
 
-	ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
 
 	for (int i = 0; i < count; i++, od++) {
 		od->od_object = 0;
 		error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
 		    sizeof (uint64_t), 1, &od->od_object);
 		if (error) {
 			ASSERT(error == ENOENT);
 			ASSERT(od->od_object == 0);
 			missing++;
 		} else {
 			dmu_buf_t *db;
 			ztest_block_tag_t *bbt;
 			dmu_object_info_t doi;
 
 			ASSERT(od->od_object != 0);
 			ASSERT(missing == 0);	/* there should be no gaps */
 
 			ztest_object_lock(zd, od->od_object, RL_READER);
 			VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
 			    od->od_object, FTAG, &db));
 			dmu_object_info_from_db(db, &doi);
 			bbt = ztest_bt_bonus(db);
 			ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 			od->od_type = doi.doi_type;
 			od->od_blocksize = doi.doi_data_block_size;
 			od->od_gen = bbt->bt_gen;
 			dmu_buf_rele(db, FTAG);
 			ztest_object_unlock(zd, od->od_object);
 		}
 	}
 
 	return (missing);
 }
 
 static int
 ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 
-	ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
 
 	for (int i = 0; i < count; i++, od++) {
 		if (missing) {
 			od->od_object = 0;
 			missing++;
 			continue;
 		}
 
 		lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
 
 		lr->lr_doid = od->od_dir;
 		lr->lr_foid = 0;	/* 0 to allocate, > 0 to claim */
 		lr->lrz_type = od->od_crtype;
 		lr->lrz_blocksize = od->od_crblocksize;
 		lr->lrz_ibshift = ztest_random_ibshift();
 		lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
 		lr->lrz_bonuslen = dmu_bonus_max();
 		lr->lr_gen = od->od_crgen;
 		lr->lr_crtime[0] = time(NULL);
 
 		if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
 			ASSERT(missing == 0);
 			od->od_object = 0;
 			missing++;
 		} else {
 			od->od_object = lr->lr_foid;
 			od->od_type = od->od_crtype;
 			od->od_blocksize = od->od_crblocksize;
 			od->od_gen = od->od_crgen;
 			ASSERT(od->od_object != 0);
 		}
 
 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
 	}
 
 	return (missing);
 }
 
 static int
 ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 	int error;
 
-	ASSERT(_mutex_held(&zd->zd_dirobj_lock));
+	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
 
 	od += count - 1;
 
 	for (int i = count - 1; i >= 0; i--, od--) {
 		if (missing) {
 			missing++;
 			continue;
 		}
 
 		/*
 		 * No object was found.
 		 */
 		if (od->od_object == 0)
 			continue;
 
 		lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
 
 		lr->lr_doid = od->od_dir;
 
 		if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {
 			ASSERT3U(error, ==, ENOSPC);
 			missing++;
 		} else {
 			od->od_object = 0;
 		}
 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
 	}
 
 	return (missing);
 }
 
 static int
 ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size,
     void *data)
 {
 	lr_write_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr) + size, NULL);
 
 	lr->lr_foid = object;
 	lr->lr_offset = offset;
 	lr->lr_length = size;
 	lr->lr_blkoff = 0;
 	BP_ZERO(&lr->lr_blkptr);
 
 	bcopy(data, lr + 1, size);
 
 	error = ztest_replay_write(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr) + size, NULL);
 
 	return (error);
 }
 
 static int
 ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
 {
 	lr_truncate_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr), NULL);
 
 	lr->lr_foid = object;
 	lr->lr_offset = offset;
 	lr->lr_length = size;
 
 	error = ztest_replay_truncate(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr), NULL);
 
 	return (error);
 }
 
 static int
 ztest_setattr(ztest_ds_t *zd, uint64_t object)
 {
 	lr_setattr_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr), NULL);
 
 	lr->lr_foid = object;
 	lr->lr_size = 0;
 	lr->lr_mode = 0;
 
 	error = ztest_replay_setattr(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr), NULL);
 
 	return (error);
 }
 
 static void
 ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
 {
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	rl_t *rl;
 
 	txg_wait_synced(dmu_objset_pool(os), 0);
 
 	ztest_object_lock(zd, object, RL_READER);
 	rl = ztest_range_lock(zd, object, offset, size, RL_WRITER);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, object, offset, size);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 
 	if (txg != 0) {
 		dmu_prealloc(os, object, offset, size, tx);
 		dmu_tx_commit(tx);
 		txg_wait_synced(dmu_objset_pool(os), txg);
 	} else {
 		(void) dmu_free_long_range(os, object, offset, size);
 	}
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, object);
 }
 
 static void
 ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
 {
 	int err;
 	ztest_block_tag_t wbt;
 	dmu_object_info_t doi;
 	enum ztest_io_type io_type;
 	uint64_t blocksize;
 	void *data;
 
 	VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
 	blocksize = doi.doi_data_block_size;
 	data = umem_alloc(blocksize, UMEM_NOFAIL);
 
 	/*
 	 * Pick an i/o type at random, biased toward writing block tags.
 	 */
 	io_type = ztest_random(ZTEST_IO_TYPES);
 	if (ztest_random(2) == 0)
 		io_type = ZTEST_IO_WRITE_TAG;
 
-	(void) rw_rdlock(&zd->zd_zilog_lock);
+	rw_enter(&zd->zd_zilog_lock, RW_READER);
 
 	switch (io_type) {
 
 	case ZTEST_IO_WRITE_TAG:
 		ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
 		(void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
 		break;
 
 	case ZTEST_IO_WRITE_PATTERN:
 		(void) memset(data, 'a' + (object + offset) % 5, blocksize);
 		if (ztest_random(2) == 0) {
 			/*
 			 * Induce fletcher2 collisions to ensure that
 			 * zio_ddt_collision() detects and resolves them
 			 * when using fletcher2-verify for deduplication.
 			 */
 			((uint64_t *)data)[0] ^= 1ULL << 63;
 			((uint64_t *)data)[4] ^= 1ULL << 63;
 		}
 		(void) ztest_write(zd, object, offset, blocksize, data);
 		break;
 
 	case ZTEST_IO_WRITE_ZEROES:
 		bzero(data, blocksize);
 		(void) ztest_write(zd, object, offset, blocksize, data);
 		break;
 
 	case ZTEST_IO_TRUNCATE:
 		(void) ztest_truncate(zd, object, offset, blocksize);
 		break;
 
 	case ZTEST_IO_SETATTR:
 		(void) ztest_setattr(zd, object);
 		break;
 
 	case ZTEST_IO_REWRITE:
-		(void) rw_rdlock(&ztest_name_lock);
+		rw_enter(&ztest_name_lock, RW_READER);
 		err = ztest_dsl_prop_set_uint64(zd->zd_name,
 		    ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa),
 		    B_FALSE);
 		VERIFY(err == 0 || err == ENOSPC);
 		err = ztest_dsl_prop_set_uint64(zd->zd_name,
 		    ZFS_PROP_COMPRESSION,
 		    ztest_random_dsl_prop(ZFS_PROP_COMPRESSION),
 		    B_FALSE);
 		VERIFY(err == 0 || err == ENOSPC);
-		(void) rw_unlock(&ztest_name_lock);
+		rw_exit(&ztest_name_lock);
 
 		VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data,
 		    DMU_READ_NO_PREFETCH));
 
 		(void) ztest_write(zd, object, offset, blocksize, data);
 		break;
 	}
 
-	(void) rw_unlock(&zd->zd_zilog_lock);
+	rw_exit(&zd->zd_zilog_lock);
 
 	umem_free(data, blocksize);
 }
 
 /*
  * Initialize an object description template.
  */
 static void
 ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
     dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
 {
 	od->od_dir = ZTEST_DIROBJ;
 	od->od_object = 0;
 
 	od->od_crtype = type;
 	od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
 	od->od_crgen = gen;
 
 	od->od_type = DMU_OT_NONE;
 	od->od_blocksize = 0;
 	od->od_gen = 0;
 
 	(void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]",
 	    tag, (int64_t)id, index);
 }
 
 /*
  * Lookup or create the objects for a test using the od template.
  * If the objects do not all exist, or if 'remove' is specified,
  * remove any existing objects and create new ones.  Otherwise,
  * use the existing objects.
  */
 static int
 ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
 {
 	int count = size / sizeof (*od);
 	int rv = 0;
 
-	VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0);
+	mutex_enter(&zd->zd_dirobj_lock);
 	if ((ztest_lookup(zd, od, count) != 0 || remove) &&
 	    (ztest_remove(zd, od, count) != 0 ||
 	    ztest_create(zd, od, count) != 0))
 		rv = -1;
 	zd->zd_od = od;
-	VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
+	mutex_exit(&zd->zd_dirobj_lock);
 
 	return (rv);
 }
 
 /* ARGSUSED */
 void
 ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
 {
 	zilog_t *zilog = zd->zd_zilog;
 
-	(void) rw_rdlock(&zd->zd_zilog_lock);
+	rw_enter(&zd->zd_zilog_lock, RW_READER);
 
 	zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
 
 	/*
 	 * Remember the committed values in zd, which is in parent/child
 	 * shared memory.  If we die, the next iteration of ztest_run()
 	 * will verify that the log really does contain this record.
 	 */
 	mutex_enter(&zilog->zl_lock);
 	ASSERT(zd->zd_shared != NULL);
 	ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
 	zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
 	mutex_exit(&zilog->zl_lock);
 
-	(void) rw_unlock(&zd->zd_zilog_lock);
+	rw_exit(&zd->zd_zilog_lock);
 }
 
 /*
  * This function is designed to simulate the operations that occur during a
  * mount/unmount operation.  We hold the dataset across these operations in an
  * attempt to expose any implicit assumptions about ZIL management.
  */
 /* ARGSUSED */
 void
 ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 
 	/*
 	 * We grab the zd_dirobj_lock to ensure that no other thread is
 	 * updating the zil (i.e. adding in-memory log records) and the
 	 * zd_zilog_lock to block any I/O.
 	 */
-	VERIFY0(mutex_lock(&zd->zd_dirobj_lock));
-	(void) rw_wrlock(&zd->zd_zilog_lock);
+	mutex_enter(&zd->zd_dirobj_lock);
+	rw_enter(&zd->zd_zilog_lock, RW_WRITER);
 
 	/* zfsvfs_teardown() */
 	zil_close(zd->zd_zilog);
 
 	/* zfsvfs_setup() */
 	VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
 	zil_replay(os, zd, ztest_replay_vector);
 
-	(void) rw_unlock(&zd->zd_zilog_lock);
-	VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
+	rw_exit(&zd->zd_zilog_lock);
+	mutex_exit(&zd->zd_dirobj_lock);
 }
 
 /*
  * Verify that we can't destroy an active pool, create an existing pool,
  * or create a pool with a bad vdev spec.
  */
 /* ARGSUSED */
 void
 ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_opts_t *zo = &ztest_opts;
 	spa_t *spa;
 	nvlist_t *nvroot;
 
 	/*
 	 * Attempt to create using a bad file.
 	 */
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
 	VERIFY3U(ENOENT, ==,
 	    spa_create("ztest_bad_file", nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 
 	/*
 	 * Attempt to create using a bad mirror.
 	 */
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
 	VERIFY3U(ENOENT, ==,
 	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 
 	/*
 	 * Attempt to create an existing pool.  It shouldn't matter
 	 * what's in the nvroot; we should fail with EEXIST.
 	 */
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
 	VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 	VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
 	VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
 	spa_close(spa, FTAG);
 
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /* ARGSUSED */
 void
 ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa;
 	uint64_t initial_version = SPA_VERSION_INITIAL;
 	uint64_t version, newversion;
 	nvlist_t *nvroot, *props;
 	char *name;
 
-	VERIFY0(mutex_lock(&ztest_vdev_lock));
+	mutex_enter(&ztest_vdev_lock);
 	name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
 
 	/*
 	 * Clean up from previous runs.
 	 */
 	(void) spa_destroy(name);
 
 	nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
 	    0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
 
 	/*
 	 * If we're configuring a RAIDZ device then make sure that the
 	 * the initial version is capable of supporting that feature.
 	 */
 	switch (ztest_opts.zo_raidz_parity) {
 	case 0:
 	case 1:
 		initial_version = SPA_VERSION_INITIAL;
 		break;
 	case 2:
 		initial_version = SPA_VERSION_RAIDZ2;
 		break;
 	case 3:
 		initial_version = SPA_VERSION_RAIDZ3;
 		break;
 	}
 
 	/*
 	 * Create a pool with a spa version that can be upgraded. Pick
 	 * a value between initial_version and SPA_VERSION_BEFORE_FEATURES.
 	 */
 	do {
 		version = ztest_random_spa_version(initial_version);
 	} while (version > SPA_VERSION_BEFORE_FEATURES);
 
 	props = fnvlist_alloc();
 	fnvlist_add_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
 	VERIFY0(spa_create(name, nvroot, props, NULL));
 	fnvlist_free(nvroot);
 	fnvlist_free(props);
 
 	VERIFY0(spa_open(name, &spa, FTAG));
 	VERIFY3U(spa_version(spa), ==, version);
 	newversion = ztest_random_spa_version(version + 1);
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("upgrading spa version from %llu to %llu\n",
 		    (u_longlong_t)version, (u_longlong_t)newversion);
 	}
 
 	spa_upgrade(spa, newversion);
 	VERIFY3U(spa_version(spa), >, version);
 	VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION)));
 	spa_close(spa, FTAG);
 
 	strfree(name);
-	VERIFY0(mutex_unlock(&ztest_vdev_lock));
+	mutex_exit(&ztest_vdev_lock);
 }
 
 static vdev_t *
 vdev_lookup_by_path(vdev_t *vd, const char *path)
 {
 	vdev_t *mvd;
 
 	if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
 		return (vd);
 
 	for (int c = 0; c < vd->vdev_children; c++)
 		if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
 		    NULL)
 			return (mvd);
 
 	return (NULL);
 }
 
 /*
  * Find the first available hole which can be used as a top-level.
  */
 int
 find_vdev_hole(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	int c;
 
 	ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV);
 
 	for (c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *cvd = rvd->vdev_child[c];
 
 		if (cvd->vdev_ishole)
 			break;
 	}
 	return (c);
 }
 
 /*
  * Verify that vdev_add() works as expected.
  */
 /* ARGSUSED */
 void
 ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	uint64_t leaves;
 	uint64_t guid;
 	nvlist_t *nvroot;
 	int error;
 
-	VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+	mutex_enter(&ztest_vdev_lock);
 	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
 
 	/*
 	 * If we have slogs then remove them 1/4 of the time.
 	 */
 	if (spa_has_slogs(spa) && ztest_random(4) == 0) {
 		/*
 		 * Grab the guid from the head of the log class rotor.
 		 */
 		guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid;
 
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 
 		/*
 		 * We have to grab the zs_name_lock as writer to
 		 * prevent a race between removing a slog (dmu_objset_find)
 		 * and destroying a dataset. Removing the slog will
 		 * grab a reference on the dataset which may cause
 		 * dmu_objset_destroy() to fail with EBUSY thus
 		 * leaving the dataset in an inconsistent state.
 		 */
-		VERIFY(rw_wrlock(&ztest_name_lock) == 0);
+		rw_enter(&ztest_name_lock, RW_WRITER);
 		error = spa_vdev_remove(spa, guid, B_FALSE);
-		VERIFY(rw_unlock(&ztest_name_lock) == 0);
+		rw_exit(&ztest_name_lock);
 
 		if (error && error != EEXIST)
 			fatal(0, "spa_vdev_remove() = %d", error);
 	} else {
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 
 		/*
 		 * Make 1/4 of the devices be log devices.
 		 */
 		nvroot = make_vdev_root(NULL, NULL, NULL,
 		    ztest_opts.zo_vdev_size, 0,
 		    ztest_random(4) == 0, ztest_opts.zo_raidz,
 		    zs->zs_mirrors, 1);
 
 		error = spa_vdev_add(spa, nvroot);
 		nvlist_free(nvroot);
 
 		if (error == ENOSPC)
 			ztest_record_enospc("spa_vdev_add");
 		else if (error != 0)
 			fatal(0, "spa_vdev_add() = %d", error);
 	}
 
-	VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
  */
 /* ARGSUSED */
 void
 ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	spa_aux_vdev_t *sav;
 	char *aux;
 	uint64_t guid = 0;
 	int error;
 
 	if (ztest_random(2) == 0) {
 		sav = &spa->spa_spares;
 		aux = ZPOOL_CONFIG_SPARES;
 	} else {
 		sav = &spa->spa_l2cache;
 		aux = ZPOOL_CONFIG_L2CACHE;
 	}
 
-	VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+	mutex_enter(&ztest_vdev_lock);
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	if (sav->sav_count != 0 && ztest_random(4) == 0) {
 		/*
 		 * Pick a random device to remove.
 		 */
 		guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
 	} else {
 		/*
 		 * Find an unused device we can add.
 		 */
 		zs->zs_vdev_aux = 0;
 		for (;;) {
 			char path[MAXPATHLEN];
 			int c;
 			(void) snprintf(path, sizeof (path), ztest_aux_template,
 			    ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
 			    zs->zs_vdev_aux);
 			for (c = 0; c < sav->sav_count; c++)
 				if (strcmp(sav->sav_vdevs[c]->vdev_path,
 				    path) == 0)
 					break;
 			if (c == sav->sav_count &&
 			    vdev_lookup_by_path(rvd, path) == NULL)
 				break;
 			zs->zs_vdev_aux++;
 		}
 	}
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	if (guid == 0) {
 		/*
 		 * Add a new device.
 		 */
 		nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
 		    (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
 		error = spa_vdev_add(spa, nvroot);
 		if (error != 0)
 			fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
 		nvlist_free(nvroot);
 	} else {
 		/*
 		 * Remove an existing device.  Sometimes, dirty its
 		 * vdev state first to make sure we handle removal
 		 * of devices that have pending state changes.
 		 */
 		if (ztest_random(2) == 0)
 			(void) vdev_online(spa, guid, 0, NULL);
 
 		error = spa_vdev_remove(spa, guid, B_FALSE);
 		if (error != 0 && error != EBUSY)
 			fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
 	}
 
-	VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * split a pool if it has mirror tlvdevs
  */
 /* ARGSUSED */
 void
 ztest_split_pool(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	nvlist_t *tree, **child, *config, *split, **schild;
 	uint_t c, children, schildren = 0, lastlogid = 0;
 	int error = 0;
 
-	VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+	mutex_enter(&ztest_vdev_lock);
 
 	/* ensure we have a useable config; mirrors of raidz aren't supported */
 	if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) {
-		VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/* clean up the old pool, if any */
 	(void) spa_destroy("splitp");
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	/* generate a config from the existing config */
 	mutex_enter(&spa->spa_props_lock);
 	VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &tree) == 0);
 	mutex_exit(&spa->spa_props_lock);
 
 	VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) == 0);
 
 	schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
 	for (c = 0; c < children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		nvlist_t **mchild;
 		uint_t mchildren;
 
 		if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) {
 			VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME,
 			    0) == 0);
 			VERIFY(nvlist_add_string(schild[schildren],
 			    ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0);
 			VERIFY(nvlist_add_uint64(schild[schildren],
 			    ZPOOL_CONFIG_IS_HOLE, 1) == 0);
 			if (lastlogid == 0)
 				lastlogid = schildren;
 			++schildren;
 			continue;
 		}
 		lastlogid = 0;
 		VERIFY(nvlist_lookup_nvlist_array(child[c],
 		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
 		VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0);
 	}
 
 	/* OK, create a config that can be used to split */
 	VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_ROOT) == 0);
 	VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
 	    lastlogid != 0 ? lastlogid : schildren) == 0);
 
 	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0);
 
 	for (c = 0; c < schildren; c++)
 		nvlist_free(schild[c]);
 	free(schild);
 	nvlist_free(split);
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
-	(void) rw_wrlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_WRITER);
 	error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 
 	nvlist_free(config);
 
 	if (error == 0) {
 		(void) printf("successful split - results:\n");
 		mutex_enter(&spa_namespace_lock);
 		show_pool_stats(spa);
 		show_pool_stats(spa_lookup("splitp"));
 		mutex_exit(&spa_namespace_lock);
 		++zs->zs_splits;
 		--zs->zs_mirrors;
 	}
-	VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+	mutex_exit(&ztest_vdev_lock);
 
 }
 
 /*
  * Verify that we can attach and detach devices.
  */
 /* ARGSUSED */
 void
 ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	spa_aux_vdev_t *sav = &spa->spa_spares;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *oldvd, *newvd, *pvd;
 	nvlist_t *root;
 	uint64_t leaves;
 	uint64_t leaf, top;
 	uint64_t ashift = ztest_get_ashift();
 	uint64_t oldguid, pguid;
 	uint64_t oldsize, newsize;
 	char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
 	int replacing;
 	int oldvd_has_siblings = B_FALSE;
 	int newvd_is_spare = B_FALSE;
 	int oldvd_is_log;
 	int error, expected_error;
 
-	VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+	mutex_enter(&ztest_vdev_lock);
 	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	/*
 	 * If a vdev is in the process of being removed, its removal may
 	 * finish while we are in progress, leading to an unexpected error
 	 * value.  Don't bother trying to attach while we are in the middle
 	 * of removal.
 	 */
 	if (spa->spa_vdev_removal != NULL) {
 		spa_config_exit(spa, SCL_ALL, FTAG);
-		VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/*
 	 * Decide whether to do an attach or a replace.
 	 */
 	replacing = ztest_random(2);
 
 	/*
 	 * Pick a random top-level vdev.
 	 */
 	top = ztest_random_vdev_top(spa, B_TRUE);
 
 	/*
 	 * Pick a random leaf within it.
 	 */
 	leaf = ztest_random(leaves);
 
 	/*
 	 * Locate this vdev.
 	 */
 	oldvd = rvd->vdev_child[top];
 	if (zs->zs_mirrors >= 1) {
 		ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
 		ASSERT(oldvd->vdev_children >= zs->zs_mirrors);
 		oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz];
 	}
 	if (ztest_opts.zo_raidz > 1) {
 		ASSERT(oldvd->vdev_ops == &vdev_raidz_ops);
 		ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz);
 		oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz];
 	}
 
 	/*
 	 * If we're already doing an attach or replace, oldvd may be a
 	 * mirror vdev -- in which case, pick a random child.
 	 */
 	while (oldvd->vdev_children != 0) {
 		oldvd_has_siblings = B_TRUE;
 		ASSERT(oldvd->vdev_children >= 2);
 		oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
 	}
 
 	oldguid = oldvd->vdev_guid;
 	oldsize = vdev_get_min_asize(oldvd);
 	oldvd_is_log = oldvd->vdev_top->vdev_islog;
 	(void) strcpy(oldpath, oldvd->vdev_path);
 	pvd = oldvd->vdev_parent;
 	pguid = pvd->vdev_guid;
 
 	/*
 	 * If oldvd has siblings, then half of the time, detach it.
 	 */
 	if (oldvd_has_siblings && ztest_random(2) == 0) {
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
 		if (error != 0 && error != ENODEV && error != EBUSY &&
 		    error != ENOTSUP)
 			fatal(0, "detach (%s) returned %d", oldpath, error);
-		VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/*
 	 * For the new vdev, choose with equal probability between the two
 	 * standard paths (ending in either 'a' or 'b') or a random hot spare.
 	 */
 	if (sav->sav_count != 0 && ztest_random(3) == 0) {
 		newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		newvd_is_spare = B_TRUE;
 		(void) strcpy(newpath, newvd->vdev_path);
 	} else {
 		(void) snprintf(newpath, sizeof (newpath), ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + leaf);
 		if (ztest_random(2) == 0)
 			newpath[strlen(newpath) - 1] = 'b';
 		newvd = vdev_lookup_by_path(rvd, newpath);
 	}
 
 	if (newvd) {
 		/*
 		 * Reopen to ensure the vdev's asize field isn't stale.
 		 */
 		vdev_reopen(newvd);
 		newsize = vdev_get_min_asize(newvd);
 	} else {
 		/*
 		 * Make newsize a little bigger or smaller than oldsize.
 		 * If it's smaller, the attach should fail.
 		 * If it's larger, and we're doing a replace,
 		 * we should get dynamic LUN growth when we're done.
 		 */
 		newsize = 10 * oldsize / (9 + ztest_random(3));
 	}
 
 	/*
 	 * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
 	 * unless it's a replace; in that case any non-replacing parent is OK.
 	 *
 	 * If newvd is already part of the pool, it should fail with EBUSY.
 	 *
 	 * If newvd is too small, it should fail with EOVERFLOW.
 	 */
 	if (pvd->vdev_ops != &vdev_mirror_ops &&
 	    pvd->vdev_ops != &vdev_root_ops && (!replacing ||
 	    pvd->vdev_ops == &vdev_replacing_ops ||
 	    pvd->vdev_ops == &vdev_spare_ops))
 		expected_error = ENOTSUP;
 	else if (newvd_is_spare && (!replacing || oldvd_is_log))
 		expected_error = ENOTSUP;
 	else if (newvd == oldvd)
 		expected_error = replacing ? 0 : EBUSY;
 	else if (vdev_lookup_by_path(rvd, newpath) != NULL)
 		expected_error = EBUSY;
 	else if (newsize < oldsize)
 		expected_error = EOVERFLOW;
 	else if (ashift > oldvd->vdev_top->vdev_ashift)
 		expected_error = EDOM;
 	else
 		expected_error = 0;
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	/*
 	 * Build the nvlist describing newpath.
 	 */
 	root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
 	    ashift, 0, 0, 0, 1);
 
 	error = spa_vdev_attach(spa, oldguid, root, replacing);
 
 	nvlist_free(root);
 
 	/*
 	 * If our parent was the replacing vdev, but the replace completed,
 	 * then instead of failing with ENOTSUP we may either succeed,
 	 * fail with ENODEV, or fail with EOVERFLOW.
 	 */
 	if (expected_error == ENOTSUP &&
 	    (error == 0 || error == ENODEV || error == EOVERFLOW))
 		expected_error = error;
 
 	/*
 	 * If someone grew the LUN, the replacement may be too small.
 	 */
 	if (error == EOVERFLOW || error == EBUSY)
 		expected_error = error;
 
 	/* XXX workaround 6690467 */
 	if (error != expected_error && expected_error != EBUSY) {
 		fatal(0, "attach (%s %llu, %s %llu, %d) "
 		    "returned %d, expected %d",
 		    oldpath, oldsize, newpath,
 		    newsize, replacing, error, expected_error);
 	}
 
-	VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+	mutex_exit(&ztest_vdev_lock);
 }
 
 /* ARGSUSED */
 void
 ztest_device_removal(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa = ztest_spa;
 	vdev_t *vd;
 	uint64_t guid;
 
-	VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+	mutex_enter(&ztest_vdev_lock);
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 	vd = vdev_lookup_top(spa, ztest_random_vdev_top(spa, B_FALSE));
 	guid = vd->vdev_guid;
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	(void) spa_vdev_remove(spa, guid, B_FALSE);
 
-	VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Callback function which expands the physical size of the vdev.
  */
 vdev_t *
 grow_vdev(vdev_t *vd, void *arg)
 {
 	spa_t *spa = vd->vdev_spa;
 	size_t *newsize = arg;
 	size_t fsize;
 	int fd;
 
 	ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
 		return (vd);
 
 	fsize = lseek(fd, 0, SEEK_END);
 	(void) ftruncate(fd, *newsize);
 
 	if (ztest_opts.zo_verbose >= 6) {
 		(void) printf("%s grew from %lu to %lu bytes\n",
 		    vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize);
 	}
 	(void) close(fd);
 	return (NULL);
 }
 
 /*
  * Callback function which expands a given vdev by calling vdev_online().
  */
 /* ARGSUSED */
 vdev_t *
 online_vdev(vdev_t *vd, void *arg)
 {
 	spa_t *spa = vd->vdev_spa;
 	vdev_t *tvd = vd->vdev_top;
 	uint64_t guid = vd->vdev_guid;
 	uint64_t generation = spa->spa_config_generation + 1;
 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 	int error;
 
 	ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	/* Calling vdev_online will initialize the new metaslabs */
 	spa_config_exit(spa, SCL_STATE, spa);
 	error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate);
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	/*
 	 * If vdev_online returned an error or the underlying vdev_open
 	 * failed then we abort the expand. The only way to know that
 	 * vdev_open fails is by checking the returned newstate.
 	 */
 	if (error || newstate != VDEV_STATE_HEALTHY) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Unable to expand vdev, state %llu, "
 			    "error %d\n", (u_longlong_t)newstate, error);
 		}
 		return (vd);
 	}
 	ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY);
 
 	/*
 	 * Since we dropped the lock we need to ensure that we're
 	 * still talking to the original vdev. It's possible this
 	 * vdev may have been detached/replaced while we were
 	 * trying to online it.
 	 */
 	if (generation != spa->spa_config_generation) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("vdev configuration has changed, "
 			    "guid %llu, state %llu, expected gen %llu, "
 			    "got gen %llu\n",
 			    (u_longlong_t)guid,
 			    (u_longlong_t)tvd->vdev_state,
 			    (u_longlong_t)generation,
 			    (u_longlong_t)spa->spa_config_generation);
 		}
 		return (vd);
 	}
 	return (NULL);
 }
 
 /*
  * Traverse the vdev tree calling the supplied function.
  * We continue to walk the tree until we either have walked all
  * children or we receive a non-NULL return from the callback.
  * If a NULL callback is passed, then we just return back the first
  * leaf vdev we encounter.
  */
 vdev_t *
 vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
 {
 	if (vd->vdev_ops->vdev_op_leaf) {
 		if (func == NULL)
 			return (vd);
 		else
 			return (func(vd, arg));
 	}
 
 	for (uint_t c = 0; c < vd->vdev_children; c++) {
 		vdev_t *cvd = vd->vdev_child[c];
 		if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
 			return (cvd);
 	}
 	return (NULL);
 }
 
 /*
  * Verify that dynamic LUN growth works as expected.
  */
 /* ARGSUSED */
 void
 ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa = ztest_spa;
 	vdev_t *vd, *tvd;
 	metaslab_class_t *mc;
 	metaslab_group_t *mg;
 	size_t psize, newsize;
 	uint64_t top;
 	uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
 
-	VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+	mutex_enter(&ztest_vdev_lock);
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	/*
 	 * If there is a vdev removal in progress, it could complete while
 	 * we are running, in which case we would not be able to verify
 	 * that the metaslab_class space increased (because it decreases
 	 * when the device removal completes).
 	 */
 	if (spa->spa_vdev_removal != NULL) {
 		spa_config_exit(spa, SCL_STATE, FTAG);
-		VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	top = ztest_random_vdev_top(spa, B_TRUE);
 
 	tvd = spa->spa_root_vdev->vdev_child[top];
 	mg = tvd->vdev_mg;
 	mc = mg->mg_class;
 	old_ms_count = tvd->vdev_ms_count;
 	old_class_space = metaslab_class_get_space(mc);
 
 	/*
 	 * Determine the size of the first leaf vdev associated with
 	 * our top-level device.
 	 */
 	vd = vdev_walk_tree(tvd, NULL, NULL);
 	ASSERT3P(vd, !=, NULL);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	psize = vd->vdev_psize;
 
 	/*
 	 * We only try to expand the vdev if it's healthy, less than 4x its
 	 * original size, and it has a valid psize.
 	 */
 	if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
 	    psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) {
 		spa_config_exit(spa, SCL_STATE, spa);
-		VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 	ASSERT(psize > 0);
 	newsize = psize + psize / 8;
 	ASSERT3U(newsize, >, psize);
 
 	if (ztest_opts.zo_verbose >= 6) {
 		(void) printf("Expanding LUN %s from %lu to %lu\n",
 		    vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
 	}
 
 	/*
 	 * Growing the vdev is a two step process:
 	 *	1). expand the physical size (i.e. relabel)
 	 *	2). online the vdev to create the new metaslabs
 	 */
 	if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
 	    vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
 	    tvd->vdev_state != VDEV_STATE_HEALTHY) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Could not expand LUN because "
 			    "the vdev configuration changed.\n");
 		}
 		spa_config_exit(spa, SCL_STATE, spa);
-		VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	spa_config_exit(spa, SCL_STATE, spa);
 
 	/*
 	 * Expanding the LUN will update the config asynchronously,
 	 * thus we must wait for the async thread to complete any
 	 * pending tasks before proceeding.
 	 */
 	for (;;) {
 		boolean_t done;
 		mutex_enter(&spa->spa_async_lock);
 		done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks);
 		mutex_exit(&spa->spa_async_lock);
 		if (done)
 			break;
 		txg_wait_synced(spa_get_dsl(spa), 0);
 		(void) poll(NULL, 0, 100);
 	}
 
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	tvd = spa->spa_root_vdev->vdev_child[top];
 	new_ms_count = tvd->vdev_ms_count;
 	new_class_space = metaslab_class_get_space(mc);
 
 	if (tvd->vdev_mg != mg || mg->mg_class != mc) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Could not verify LUN expansion due to "
 			    "intervening vdev offline or remove.\n");
 		}
 		spa_config_exit(spa, SCL_STATE, spa);
-		VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/*
 	 * Make sure we were able to grow the vdev.
 	 */
 	if (new_ms_count <= old_ms_count) {
 		fatal(0, "LUN expansion failed: ms_count %llu < %llu\n",
 		    old_ms_count, new_ms_count);
 	}
 
 	/*
 	 * Make sure we were able to grow the pool.
 	 */
 	if (new_class_space <= old_class_space) {
 		fatal(0, "LUN expansion failed: class_space %llu < %llu\n",
 		    old_class_space, new_class_space);
 	}
 
 	if (ztest_opts.zo_verbose >= 5) {
 		char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
 
 		nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));
 		nicenum(new_class_space, newnumbuf, sizeof (newnumbuf));
 		(void) printf("%s grew from %s to %s\n",
 		    spa->spa_name, oldnumbuf, newnumbuf);
 	}
 
 	spa_config_exit(spa, SCL_STATE, spa);
-	VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Verify that dmu_objset_{create,destroy,open,close} work as expected.
  */
 /* ARGSUSED */
 static void
 ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 {
 	/*
 	 * Create the objects common to all ztest datasets.
 	 */
 	VERIFY(zap_create_claim(os, ZTEST_DIROBJ,
 	    DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
 }
 
 static int
 ztest_dataset_create(char *dsname)
 {
 	uint64_t zilset = ztest_random(100);
 	int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0,
 	    ztest_objset_create_cb, NULL);
 
 	if (err || zilset < 80)
 		return (err);
 
 	if (ztest_opts.zo_verbose >= 6)
 		(void) printf("Setting dataset %s to sync always\n", dsname);
 	return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC,
 	    ZFS_SYNC_ALWAYS, B_FALSE));
 }
 
 /* ARGSUSED */
 static int
 ztest_objset_destroy_cb(const char *name, void *arg)
 {
 	objset_t *os;
 	dmu_object_info_t doi;
 	int error;
 
 	/*
 	 * Verify that the dataset contains a directory object.
 	 */
 	VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os));
 	error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
 	if (error != ENOENT) {
 		/* We could have crashed in the middle of destroying it */
 		ASSERT0(error);
 		ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
 		ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
 	}
 	dmu_objset_disown(os, FTAG);
 
 	/*
 	 * Destroy the dataset.
 	 */
 	if (strchr(name, '@') != NULL) {
 		VERIFY0(dsl_destroy_snapshot(name, B_FALSE));
 	} else {
 		VERIFY0(dsl_destroy_head(name));
 	}
 	return (0);
 }
 
 static boolean_t
 ztest_snapshot_create(char *osname, uint64_t id)
 {
 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	(void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id);
 
 	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (B_FALSE);
 	}
 	if (error != 0 && error != EEXIST) {
 		fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname,
 		    snapname, error);
 	}
 	return (B_TRUE);
 }
 
 static boolean_t
 ztest_snapshot_destroy(char *osname, uint64_t id)
 {
 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	(void) snprintf(snapname, sizeof (snapname), "%s@%llu", osname,
 	    (u_longlong_t)id);
 
 	error = dsl_destroy_snapshot(snapname, B_FALSE);
 	if (error != 0 && error != ENOENT)
 		fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
 	return (B_TRUE);
 }
 
 /* ARGSUSED */
 void
 ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_ds_t zdtmp;
 	int iters;
 	int error;
 	objset_t *os, *os2;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	zilog_t *zilog;
 
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 
 	(void) snprintf(name, sizeof (name), "%s/temp_%llu",
 	    ztest_opts.zo_pool, (u_longlong_t)id);
 
 	/*
 	 * If this dataset exists from a previous run, process its replay log
 	 * half of the time.  If we don't replay it, then dmu_objset_destroy()
 	 * (invoked from ztest_objset_destroy_cb()) should just throw it away.
 	 */
 	if (ztest_random(2) == 0 &&
 	    dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
 		ztest_zd_init(&zdtmp, NULL, os);
 		zil_replay(os, &zdtmp, ztest_replay_vector);
 		ztest_zd_fini(&zdtmp);
 		dmu_objset_disown(os, FTAG);
 	}
 
 	/*
 	 * There may be an old instance of the dataset we're about to
 	 * create lying around from a previous run.  If so, destroy it
 	 * and all of its snapshots.
 	 */
 	(void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
 	    DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
 
 	/*
 	 * Verify that the destroyed dataset is no longer in the namespace.
 	 */
 	VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
 	    FTAG, &os));
 
 	/*
 	 * Verify that we can create a new dataset.
 	 */
 	error = ztest_dataset_create(name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
-			(void) rw_unlock(&ztest_name_lock);
+			rw_exit(&ztest_name_lock);
 			return;
 		}
 		fatal(0, "dmu_objset_create(%s) = %d", name, error);
 	}
 
 	VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
 
 	ztest_zd_init(&zdtmp, NULL, os);
 
 	/*
 	 * Open the intent log for it.
 	 */
 	zilog = zil_open(os, ztest_get_data);
 
 	/*
 	 * Put some objects in there, do a little I/O to them,
 	 * and randomly take a couple of snapshots along the way.
 	 */
 	iters = ztest_random(5);
 	for (int i = 0; i < iters; i++) {
 		ztest_dmu_object_alloc_free(&zdtmp, id);
 		if (ztest_random(iters) == 0)
 			(void) ztest_snapshot_create(name, i);
 	}
 
 	/*
 	 * Verify that we cannot create an existing dataset.
 	 */
 	VERIFY3U(EEXIST, ==,
 	    dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL));
 
 	/*
 	 * Verify that we can hold an objset that is also owned.
 	 */
 	VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2));
 	dmu_objset_rele(os2, FTAG);
 
 	/*
 	 * Verify that we cannot own an objset that is already owned.
 	 */
 	VERIFY3U(EBUSY, ==,
 	    dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2));
 
 	zil_close(zilog);
 	dmu_objset_disown(os, FTAG);
 	ztest_zd_fini(&zdtmp);
 
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /*
  * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
  */
 void
 ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 	(void) ztest_snapshot_destroy(zd->zd_name, id);
 	(void) ztest_snapshot_create(zd->zd_name, id);
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /*
  * Cleanup non-standard snapshots and clones.
  */
 void
 ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
 {
 	char snap1name[ZFS_MAX_DATASET_NAME_LEN];
 	char clone1name[ZFS_MAX_DATASET_NAME_LEN];
 	char snap2name[ZFS_MAX_DATASET_NAME_LEN];
 	char clone2name[ZFS_MAX_DATASET_NAME_LEN];
 	char snap3name[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	(void) snprintf(snap1name, sizeof (snap1name),
 	    "%s@s1_%llu", osname, id);
 	(void) snprintf(clone1name, sizeof (clone1name),
 	    "%s/c1_%llu", osname, id);
 	(void) snprintf(snap2name, sizeof (snap2name),
 	    "%s@s2_%llu", clone1name, id);
 	(void) snprintf(clone2name, sizeof (clone2name),
 	    "%s/c2_%llu", osname, id);
 	(void) snprintf(snap3name, sizeof (snap3name),
 	    "%s@s3_%llu", clone1name, id);
 
 	error = dsl_destroy_head(clone2name);
 	if (error && error != ENOENT)
 		fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error);
 	error = dsl_destroy_snapshot(snap3name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error);
 	error = dsl_destroy_snapshot(snap2name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error);
 	error = dsl_destroy_head(clone1name);
 	if (error && error != ENOENT)
 		fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error);
 	error = dsl_destroy_snapshot(snap1name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error);
 }
 
 /*
  * Verify dsl_dataset_promote handles EBUSY
  */
 void
 ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os;
 	char snap1name[ZFS_MAX_DATASET_NAME_LEN];
 	char clone1name[ZFS_MAX_DATASET_NAME_LEN];
 	char snap2name[ZFS_MAX_DATASET_NAME_LEN];
 	char clone2name[ZFS_MAX_DATASET_NAME_LEN];
 	char snap3name[ZFS_MAX_DATASET_NAME_LEN];
 	char *osname = zd->zd_name;
 	int error;
 
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 
 	ztest_dsl_dataset_cleanup(osname, id);
 
 	(void) snprintf(snap1name, sizeof (snap1name),
 	    "%s@s1_%llu", osname, id);
 	(void) snprintf(clone1name, sizeof (clone1name),
 	    "%s/c1_%llu", osname, id);
 	(void) snprintf(snap2name, sizeof (snap2name),
 	    "%s@s2_%llu", clone1name, id);
 	(void) snprintf(clone2name, sizeof (clone2name),
 	    "%s/c2_%llu", osname, id);
 	(void) snprintf(snap3name, sizeof (snap3name),
 	    "%s@s3_%llu", clone1name, id);
 
 	error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
 	}
 
 	error = dmu_objset_clone(clone1name, snap1name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_objset_create(%s) = %d", clone1name, error);
 	}
 
 	error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error);
 	}
 
 	error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
 	}
 
 	error = dmu_objset_clone(clone2name, snap3name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
 	}
 
 	error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
 	if (error)
 		fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
 	error = dsl_dataset_promote(clone2name, NULL);
 	if (error == ENOSPC) {
 		dmu_objset_disown(os, FTAG);
 		ztest_record_enospc(FTAG);
 		goto out;
 	}
 	if (error != EBUSY)
 		fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
 		    error);
 	dmu_objset_disown(os, FTAG);
 
 out:
 	ztest_dsl_dataset_cleanup(osname, id);
 
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /*
  * Verify that dmu_object_{alloc,free} work as expected.
  */
 void
 ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_od_t od[4];
 	int batchsize = sizeof (od) / sizeof (od[0]);
 
 	for (int b = 0; b < batchsize; b++)
 		ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0);
 
 	/*
 	 * Destroy the previous batch of objects, create a new batch,
 	 * and do some I/O on the new objects.
 	 */
 	if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0)
 		return;
 
 	while (ztest_random(4 * batchsize) != 0)
 		ztest_io(zd, od[ztest_random(batchsize)].od_object,
 		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 }
 
 /*
  * Verify that dmu_{read,write} work as expected.
  */
 void
 ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t od[2];
 	dmu_tx_t *tx;
 	int i, freeit, error;
 	uint64_t n, s, txg;
 	bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
 	uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
 	uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t);
 	uint64_t regions = 997;
 	uint64_t stride = 123456789ULL;
 	uint64_t width = 40;
 	int free_percent = 5;
 
 	/*
 	 * This test uses two objects, packobj and bigobj, that are always
 	 * updated together (i.e. in the same tx) so that their contents are
 	 * in sync and can be compared.  Their contents relate to each other
 	 * in a simple way: packobj is a dense array of 'bufwad' structures,
 	 * while bigobj is a sparse array of the same bufwads.  Specifically,
 	 * for any index n, there are three bufwads that should be identical:
 	 *
 	 *	packobj, at offset n * sizeof (bufwad_t)
 	 *	bigobj, at the head of the nth chunk
 	 *	bigobj, at the tail of the nth chunk
 	 *
 	 * The chunk size is arbitrary. It doesn't have to be a power of two,
 	 * and it doesn't have any relation to the object blocksize.
 	 * The only requirement is that it can hold at least two bufwads.
 	 *
 	 * Normally, we write the bufwad to each of these locations.
 	 * However, free_percent of the time we instead write zeroes to
 	 * packobj and perform a dmu_free_range() on bigobj.  By comparing
 	 * bigobj to packobj, we can verify that the DMU is correctly
 	 * tracking which parts of an object are allocated and free,
 	 * and that the contents of the allocated blocks are correct.
 	 */
 
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
 	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize);
 	ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
 
 	bigobj = od[0].od_object;
 	packobj = od[1].od_object;
 	chunksize = od[0].od_gen;
 	ASSERT(chunksize == od[1].od_gen);
 
 	/*
 	 * Prefetch a random chunk of the big object.
 	 * Our aim here is to get some async reads in flight
 	 * for blocks that we may free below; the DMU should
 	 * handle this race correctly.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(2 * width - 1);
 	dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize,
 	    ZIO_PRIORITY_SYNC_READ);
 
 	/*
 	 * Pick a random index and compute the offsets into packobj and bigobj.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(width - 1);
 
 	packoff = n * sizeof (bufwad_t);
 	packsize = s * sizeof (bufwad_t);
 
 	bigoff = n * chunksize;
 	bigsize = s * chunksize;
 
 	packbuf = umem_alloc(packsize, UMEM_NOFAIL);
 	bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
 
 	/*
 	 * free_percent of the time, free a range of bigobj rather than
 	 * overwriting it.
 	 */
 	freeit = (ztest_random(100) < free_percent);
 
 	/*
 	 * Read the current contents of our objects.
 	 */
 	error = dmu_read(os, packobj, packoff, packsize, packbuf,
 	    DMU_READ_PREFETCH);
 	ASSERT0(error);
 	error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf,
 	    DMU_READ_PREFETCH);
 	ASSERT0(error);
 
 	/*
 	 * Get a tx for the mods to both packobj and bigobj.
 	 */
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, packobj, packoff, packsize);
 
 	if (freeit)
 		dmu_tx_hold_free(tx, bigobj, bigoff, bigsize);
 	else
 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
 
 	/* This accounts for setting the checksum/compression. */
 	dmu_tx_hold_bonus(tx, bigobj);
 
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0) {
 		umem_free(packbuf, packsize);
 		umem_free(bigbuf, bigsize);
 		return;
 	}
 
 	enum zio_checksum cksum;
 	do {
 		cksum = (enum zio_checksum)
 		    ztest_random_dsl_prop(ZFS_PROP_CHECKSUM);
 	} while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS);
 	dmu_object_set_checksum(os, bigobj, cksum, tx);
 
 	enum zio_compress comp;
 	do {
 		comp = (enum zio_compress)
 		    ztest_random_dsl_prop(ZFS_PROP_COMPRESSION);
 	} while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS);
 	dmu_object_set_compress(os, bigobj, comp, tx);
 
 	/*
 	 * For each index from n to n + s, verify that the existing bufwad
 	 * in packobj matches the bufwads at the head and tail of the
 	 * corresponding chunk in bigobj.  Then update all three bufwads
 	 * with the new values we want to write out.
 	 */
 	for (i = 0; i < s; i++) {
 		/* LINTED */
 		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
 		/* LINTED */
 		bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
 		/* LINTED */
 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
 
 		ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
 		ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
 
 		if (pack->bw_txg > txg)
 			fatal(0, "future leak: got %llx, open txg is %llx",
 			    pack->bw_txg, txg);
 
 		if (pack->bw_data != 0 && pack->bw_index != n + i)
 			fatal(0, "wrong index: got %llx, wanted %llx+%llx",
 			    pack->bw_index, n, i);
 
 		if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
 			fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
 
 		if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
 			fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
 
 		if (freeit) {
 			bzero(pack, sizeof (bufwad_t));
 		} else {
 			pack->bw_index = n + i;
 			pack->bw_txg = txg;
 			pack->bw_data = 1 + ztest_random(-2ULL);
 		}
 		*bigH = *pack;
 		*bigT = *pack;
 	}
 
 	/*
 	 * We've verified all the old bufwads, and made new ones.
 	 * Now write them out.
 	 */
 	dmu_write(os, packobj, packoff, packsize, packbuf, tx);
 
 	if (freeit) {
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("freeing offset %llx size %llx"
 			    " txg %llx\n",
 			    (u_longlong_t)bigoff,
 			    (u_longlong_t)bigsize,
 			    (u_longlong_t)txg);
 		}
 		VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx));
 	} else {
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("writing offset %llx size %llx"
 			    " txg %llx\n",
 			    (u_longlong_t)bigoff,
 			    (u_longlong_t)bigsize,
 			    (u_longlong_t)txg);
 		}
 		dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx);
 	}
 
 	dmu_tx_commit(tx);
 
 	/*
 	 * Sanity check the stuff we just wrote.
 	 */
 	{
 		void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 		void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
 
 		VERIFY(0 == dmu_read(os, packobj, packoff,
 		    packsize, packcheck, DMU_READ_PREFETCH));
 		VERIFY(0 == dmu_read(os, bigobj, bigoff,
 		    bigsize, bigcheck, DMU_READ_PREFETCH));
 
 		ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
 		ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
 
 		umem_free(packcheck, packsize);
 		umem_free(bigcheck, bigsize);
 	}
 
 	umem_free(packbuf, packsize);
 	umem_free(bigbuf, bigsize);
 }
 
 void
 compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
     uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg)
 {
 	uint64_t i;
 	bufwad_t *pack;
 	bufwad_t *bigH;
 	bufwad_t *bigT;
 
 	/*
 	 * For each index from n to n + s, verify that the existing bufwad
 	 * in packobj matches the bufwads at the head and tail of the
 	 * corresponding chunk in bigobj.  Then update all three bufwads
 	 * with the new values we want to write out.
 	 */
 	for (i = 0; i < s; i++) {
 		/* LINTED */
 		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
 		/* LINTED */
 		bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
 		/* LINTED */
 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
 
 		ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
 		ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
 
 		if (pack->bw_txg > txg)
 			fatal(0, "future leak: got %llx, open txg is %llx",
 			    pack->bw_txg, txg);
 
 		if (pack->bw_data != 0 && pack->bw_index != n + i)
 			fatal(0, "wrong index: got %llx, wanted %llx+%llx",
 			    pack->bw_index, n, i);
 
 		if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
 			fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
 
 		if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
 			fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
 
 		pack->bw_index = n + i;
 		pack->bw_txg = txg;
 		pack->bw_data = 1 + ztest_random(-2ULL);
 
 		*bigH = *pack;
 		*bigT = *pack;
 	}
 }
 
 void
 ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t od[2];
 	dmu_tx_t *tx;
 	uint64_t i;
 	int error;
 	uint64_t n, s, txg;
 	bufwad_t *packbuf, *bigbuf;
 	uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
 	uint64_t blocksize = ztest_random_blocksize();
 	uint64_t chunksize = blocksize;
 	uint64_t regions = 997;
 	uint64_t stride = 123456789ULL;
 	uint64_t width = 9;
 	dmu_buf_t *bonus_db;
 	arc_buf_t **bigbuf_arcbufs;
 	dmu_object_info_t doi;
 
 	/*
 	 * This test uses two objects, packobj and bigobj, that are always
 	 * updated together (i.e. in the same tx) so that their contents are
 	 * in sync and can be compared.  Their contents relate to each other
 	 * in a simple way: packobj is a dense array of 'bufwad' structures,
 	 * while bigobj is a sparse array of the same bufwads.  Specifically,
 	 * for any index n, there are three bufwads that should be identical:
 	 *
 	 *	packobj, at offset n * sizeof (bufwad_t)
 	 *	bigobj, at the head of the nth chunk
 	 *	bigobj, at the tail of the nth chunk
 	 *
 	 * The chunk size is set equal to bigobj block size so that
 	 * dmu_assign_arcbuf() can be tested for object updates.
 	 */
 
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
 	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
 	ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
 
 	bigobj = od[0].od_object;
 	packobj = od[1].od_object;
 	blocksize = od[0].od_blocksize;
 	chunksize = blocksize;
 	ASSERT(chunksize == od[1].od_gen);
 
 	VERIFY(dmu_object_info(os, bigobj, &doi) == 0);
 	VERIFY(ISP2(doi.doi_data_block_size));
 	VERIFY(chunksize == doi.doi_data_block_size);
 	VERIFY(chunksize >= 2 * sizeof (bufwad_t));
 
 	/*
 	 * Pick a random index and compute the offsets into packobj and bigobj.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(width - 1);
 
 	packoff = n * sizeof (bufwad_t);
 	packsize = s * sizeof (bufwad_t);
 
 	bigoff = n * chunksize;
 	bigsize = s * chunksize;
 
 	packbuf = umem_zalloc(packsize, UMEM_NOFAIL);
 	bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL);
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db));
 
 	bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL);
 
 	/*
 	 * Iteration 0 test zcopy for DB_UNCACHED dbufs.
 	 * Iteration 1 test zcopy to already referenced dbufs.
 	 * Iteration 2 test zcopy to dirty dbuf in the same txg.
 	 * Iteration 3 test zcopy to dbuf dirty in previous txg.
 	 * Iteration 4 test zcopy when dbuf is no longer dirty.
 	 * Iteration 5 test zcopy when it can't be done.
 	 * Iteration 6 one more zcopy write.
 	 */
 	for (i = 0; i < 7; i++) {
 		uint64_t j;
 		uint64_t off;
 
 		/*
 		 * In iteration 5 (i == 5) use arcbufs
 		 * that don't match bigobj blksz to test
 		 * dmu_assign_arcbuf() when it can't directly
 		 * assign an arcbuf to a dbuf.
 		 */
 		for (j = 0; j < s; j++) {
 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				bigbuf_arcbufs[j] =
 				    dmu_request_arcbuf(bonus_db, chunksize);
 			} else {
 				bigbuf_arcbufs[2 * j] =
 				    dmu_request_arcbuf(bonus_db, chunksize / 2);
 				bigbuf_arcbufs[2 * j + 1] =
 				    dmu_request_arcbuf(bonus_db, chunksize / 2);
 			}
 		}
 
 		/*
 		 * Get a tx for the mods to both packobj and bigobj.
 		 */
 		tx = dmu_tx_create(os);
 
 		dmu_tx_hold_write(tx, packobj, packoff, packsize);
 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
 
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0) {
 			umem_free(packbuf, packsize);
 			umem_free(bigbuf, bigsize);
 			for (j = 0; j < s; j++) {
 				if (i != 5 ||
 				    chunksize < (SPA_MINBLOCKSIZE * 2)) {
 					dmu_return_arcbuf(bigbuf_arcbufs[j]);
 				} else {
 					dmu_return_arcbuf(
 					    bigbuf_arcbufs[2 * j]);
 					dmu_return_arcbuf(
 					    bigbuf_arcbufs[2 * j + 1]);
 				}
 			}
 			umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
 			dmu_buf_rele(bonus_db, FTAG);
 			return;
 		}
 
 		/*
 		 * 50% of the time don't read objects in the 1st iteration to
 		 * test dmu_assign_arcbuf() for the case when there're no
 		 * existing dbufs for the specified offsets.
 		 */
 		if (i != 0 || ztest_random(2) != 0) {
 			error = dmu_read(os, packobj, packoff,
 			    packsize, packbuf, DMU_READ_PREFETCH);
 			ASSERT0(error);
 			error = dmu_read(os, bigobj, bigoff, bigsize,
 			    bigbuf, DMU_READ_PREFETCH);
 			ASSERT0(error);
 		}
 		compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
 		    n, chunksize, txg);
 
 		/*
 		 * We've verified all the old bufwads, and made new ones.
 		 * Now write them out.
 		 */
 		dmu_write(os, packobj, packoff, packsize, packbuf, tx);
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("writing offset %llx size %llx"
 			    " txg %llx\n",
 			    (u_longlong_t)bigoff,
 			    (u_longlong_t)bigsize,
 			    (u_longlong_t)txg);
 		}
 		for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
 			dmu_buf_t *dbt;
 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				bcopy((caddr_t)bigbuf + (off - bigoff),
 				    bigbuf_arcbufs[j]->b_data, chunksize);
 			} else {
 				bcopy((caddr_t)bigbuf + (off - bigoff),
 				    bigbuf_arcbufs[2 * j]->b_data,
 				    chunksize / 2);
 				bcopy((caddr_t)bigbuf + (off - bigoff) +
 				    chunksize / 2,
 				    bigbuf_arcbufs[2 * j + 1]->b_data,
 				    chunksize / 2);
 			}
 
 			if (i == 1) {
 				VERIFY(dmu_buf_hold(os, bigobj, off,
 				    FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
 			}
 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				dmu_assign_arcbuf(bonus_db, off,
 				    bigbuf_arcbufs[j], tx);
 			} else {
 				dmu_assign_arcbuf(bonus_db, off,
 				    bigbuf_arcbufs[2 * j], tx);
 				dmu_assign_arcbuf(bonus_db,
 				    off + chunksize / 2,
 				    bigbuf_arcbufs[2 * j + 1], tx);
 			}
 			if (i == 1) {
 				dmu_buf_rele(dbt, FTAG);
 			}
 		}
 		dmu_tx_commit(tx);
 
 		/*
 		 * Sanity check the stuff we just wrote.
 		 */
 		{
 			void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 			void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
 
 			VERIFY(0 == dmu_read(os, packobj, packoff,
 			    packsize, packcheck, DMU_READ_PREFETCH));
 			VERIFY(0 == dmu_read(os, bigobj, bigoff,
 			    bigsize, bigcheck, DMU_READ_PREFETCH));
 
 			ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
 			ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
 
 			umem_free(packcheck, packsize);
 			umem_free(bigcheck, bigsize);
 		}
 		if (i == 2) {
 			txg_wait_open(dmu_objset_pool(os), 0);
 		} else if (i == 3) {
 			txg_wait_synced(dmu_objset_pool(os), 0);
 		}
 	}
 
 	dmu_buf_rele(bonus_db, FTAG);
 	umem_free(packbuf, packsize);
 	umem_free(bigbuf, bigsize);
 	umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
 }
 
 /* ARGSUSED */
 void
 ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_od_t od[1];
 	uint64_t offset = (1ULL << (ztest_random(20) + 43)) +
 	    (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 
 	/*
 	 * Have multiple threads write to large offsets in an object
 	 * to verify that parallel writes to an object -- even to the
 	 * same blocks within the object -- doesn't cause any trouble.
 	 */
 	ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
 
 	while (ztest_random(10) != 0)
 		ztest_io(zd, od[0].od_object, offset);
 }
 
 void
 ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_od_t od[1];
 	uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) +
 	    (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 	uint64_t count = ztest_random(20) + 1;
 	uint64_t blocksize = ztest_random_blocksize();
 	void *data;
 
 	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
 		return;
 
 	if (ztest_truncate(zd, od[0].od_object, offset, count * blocksize) != 0)
 		return;
 
 	ztest_prealloc(zd, od[0].od_object, offset, count * blocksize);
 
 	data = umem_zalloc(blocksize, UMEM_NOFAIL);
 
 	while (ztest_random(count) != 0) {
 		uint64_t randoff = offset + (ztest_random(count) * blocksize);
 		if (ztest_write(zd, od[0].od_object, randoff, blocksize,
 		    data) != 0)
 			break;
 		while (ztest_random(4) != 0)
 			ztest_io(zd, od[0].od_object, randoff);
 	}
 
 	umem_free(data, blocksize);
 }
 
 /*
  * Verify that zap_{create,destroy,add,remove,update} work as expected.
  */
 #define	ZTEST_ZAP_MIN_INTS	1
 #define	ZTEST_ZAP_MAX_INTS	4
 #define	ZTEST_ZAP_MAX_PROPS	1000
 
 void
 ztest_zap(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t od[1];
 	uint64_t object;
 	uint64_t txg, last_txg;
 	uint64_t value[ZTEST_ZAP_MAX_INTS];
 	uint64_t zl_ints, zl_intsize, prop;
 	int i, ints;
 	dmu_tx_t *tx;
 	char propname[100], txgname[100];
 	int error;
 	char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
 
 	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
 		return;
 
 	object = od[0].od_object;
 
 	/*
 	 * Generate a known hash collision, and verify that
 	 * we can lookup and remove both entries.
 	 */
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		return;
 	for (i = 0; i < 2; i++) {
 		value[i] = i;
 		VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t),
 		    1, &value[i], tx));
 	}
 	for (i = 0; i < 2; i++) {
 		VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i],
 		    sizeof (uint64_t), 1, &value[i], tx));
 		VERIFY3U(0, ==,
 		    zap_length(os, object, hc[i], &zl_intsize, &zl_ints));
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, 1);
 	}
 	for (i = 0; i < 2; i++) {
 		VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx));
 	}
 	dmu_tx_commit(tx);
 
 	/*
 	 * Generate a buch of random entries.
 	 */
 	ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
 
 	prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
 	(void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
 	(void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
 	bzero(value, sizeof (value));
 	last_txg = 0;
 
 	/*
 	 * If these zap entries already exist, validate their contents.
 	 */
 	error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
 	if (error == 0) {
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, 1);
 
 		VERIFY(zap_lookup(os, object, txgname, zl_intsize,
 		    zl_ints, &last_txg) == 0);
 
 		VERIFY(zap_length(os, object, propname, &zl_intsize,
 		    &zl_ints) == 0);
 
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, ints);
 
 		VERIFY(zap_lookup(os, object, propname, zl_intsize,
 		    zl_ints, value) == 0);
 
 		for (i = 0; i < ints; i++) {
 			ASSERT3U(value[i], ==, last_txg + object + i);
 		}
 	} else {
 		ASSERT3U(error, ==, ENOENT);
 	}
 
 	/*
 	 * Atomically update two entries in our zap object.
 	 * The first is named txg_%llu, and contains the txg
 	 * in which the property was last updated.  The second
 	 * is named prop_%llu, and the nth element of its value
 	 * should be txg + object + n.
 	 */
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		return;
 
 	if (last_txg > txg)
 		fatal(0, "zap future leak: old %llu new %llu", last_txg, txg);
 
 	for (i = 0; i < ints; i++)
 		value[i] = txg + object + i;
 
 	VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t),
 	    1, &txg, tx));
 	VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t),
 	    ints, value, tx));
 
 	dmu_tx_commit(tx);
 
 	/*
 	 * Remove a random pair of entries.
 	 */
 	prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
 	(void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
 	(void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
 
 	error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
 
 	if (error == ENOENT)
 		return;
 
 	ASSERT0(error);
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		return;
 	VERIFY3U(0, ==, zap_remove(os, object, txgname, tx));
 	VERIFY3U(0, ==, zap_remove(os, object, propname, tx));
 	dmu_tx_commit(tx);
 }
 
 /*
  * Testcase to test the upgrading of a microzap to fatzap.
  */
 void
 ztest_fzap(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t od[1];
 	uint64_t object, txg;
 
 	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
 		return;
 
 	object = od[0].od_object;
 
 	/*
 	 * Add entries to this ZAP and make sure it spills over
 	 * and gets upgraded to a fatzap. Also, since we are adding
 	 * 2050 entries we should see ptrtbl growth and leaf-block split.
 	 */
 	for (int i = 0; i < 2050; i++) {
 		char name[ZFS_MAX_DATASET_NAME_LEN];
 		uint64_t value = i;
 		dmu_tx_t *tx;
 		int error;
 
 		(void) snprintf(name, sizeof (name), "fzap-%llu-%llu",
 		    id, value);
 
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_zap(tx, object, B_TRUE, name);
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0)
 			return;
 		error = zap_add(os, object, name, sizeof (uint64_t), 1,
 		    &value, tx);
 		ASSERT(error == 0 || error == EEXIST);
 		dmu_tx_commit(tx);
 	}
 }
 
 /* ARGSUSED */
 void
 ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t od[1];
 	uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
 	dmu_tx_t *tx;
 	int i, namelen, error;
 	int micro = ztest_random(2);
 	char name[20], string_value[20];
 	void *data;
 
 	ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
 
 	object = od[0].od_object;
 
 	/*
 	 * Generate a random name of the form 'xxx.....' where each
 	 * x is a random printable character and the dots are dots.
 	 * There are 94 such characters, and the name length goes from
 	 * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
 	 */
 	namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
 
 	for (i = 0; i < 3; i++)
 		name[i] = '!' + ztest_random('~' - '!' + 1);
 	for (; i < namelen - 1; i++)
 		name[i] = '.';
 	name[i] = '\0';
 
 	if ((namelen & 1) || micro) {
 		wsize = sizeof (txg);
 		wc = 1;
 		data = &txg;
 	} else {
 		wsize = 1;
 		wc = namelen;
 		data = string_value;
 	}
 
 	count = -1ULL;
 	VERIFY0(zap_count(os, object, &count));
 	ASSERT(count != -1ULL);
 
 	/*
 	 * Select an operation: length, lookup, add, update, remove.
 	 */
 	i = ztest_random(5);
 
 	if (i >= 2) {
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0)
 			return;
 		bcopy(name, string_value, namelen);
 	} else {
 		tx = NULL;
 		txg = 0;
 		bzero(string_value, namelen);
 	}
 
 	switch (i) {
 
 	case 0:
 		error = zap_length(os, object, name, &zl_wsize, &zl_wc);
 		if (error == 0) {
 			ASSERT3U(wsize, ==, zl_wsize);
 			ASSERT3U(wc, ==, zl_wc);
 		} else {
 			ASSERT3U(error, ==, ENOENT);
 		}
 		break;
 
 	case 1:
 		error = zap_lookup(os, object, name, wsize, wc, data);
 		if (error == 0) {
 			if (data == string_value &&
 			    bcmp(name, data, namelen) != 0)
 				fatal(0, "name '%s' != val '%s' len %d",
 				    name, data, namelen);
 		} else {
 			ASSERT3U(error, ==, ENOENT);
 		}
 		break;
 
 	case 2:
 		error = zap_add(os, object, name, wsize, wc, data, tx);
 		ASSERT(error == 0 || error == EEXIST);
 		break;
 
 	case 3:
 		VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0);
 		break;
 
 	case 4:
 		error = zap_remove(os, object, name, tx);
 		ASSERT(error == 0 || error == ENOENT);
 		break;
 	}
 
 	if (tx != NULL)
 		dmu_tx_commit(tx);
 }
 
 /*
  * Commit callback data.
  */
 typedef struct ztest_cb_data {
 	list_node_t		zcd_node;
 	uint64_t		zcd_txg;
 	int			zcd_expected_err;
 	boolean_t		zcd_added;
 	boolean_t		zcd_called;
 	spa_t			*zcd_spa;
 } ztest_cb_data_t;
 
 /* This is the actual commit callback function */
 static void
 ztest_commit_callback(void *arg, int error)
 {
 	ztest_cb_data_t *data = arg;
 	uint64_t synced_txg;
 
 	VERIFY(data != NULL);
 	VERIFY3S(data->zcd_expected_err, ==, error);
 	VERIFY(!data->zcd_called);
 
 	synced_txg = spa_last_synced_txg(data->zcd_spa);
 	if (data->zcd_txg > synced_txg)
 		fatal(0, "commit callback of txg %" PRIu64 " called prematurely"
 		    ", last synced txg = %" PRIu64 "\n", data->zcd_txg,
 		    synced_txg);
 
 	data->zcd_called = B_TRUE;
 
 	if (error == ECANCELED) {
 		ASSERT0(data->zcd_txg);
 		ASSERT(!data->zcd_added);
 
 		/*
 		 * The private callback data should be destroyed here, but
 		 * since we are going to check the zcd_called field after
 		 * dmu_tx_abort(), we will destroy it there.
 		 */
 		return;
 	}
 
 	/* Was this callback added to the global callback list? */
 	if (!data->zcd_added)
 		goto out;
 
 	ASSERT3U(data->zcd_txg, !=, 0);
 
 	/* Remove our callback from the list */
-	(void) mutex_lock(&zcl.zcl_callbacks_lock);
+	mutex_enter(&zcl.zcl_callbacks_lock);
 	list_remove(&zcl.zcl_callbacks, data);
-	(void) mutex_unlock(&zcl.zcl_callbacks_lock);
+	mutex_exit(&zcl.zcl_callbacks_lock);
 
 out:
 	umem_free(data, sizeof (ztest_cb_data_t));
 }
 
 /* Allocate and initialize callback data structure */
 static ztest_cb_data_t *
 ztest_create_cb_data(objset_t *os, uint64_t txg)
 {
 	ztest_cb_data_t *cb_data;
 
 	cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL);
 
 	cb_data->zcd_txg = txg;
 	cb_data->zcd_spa = dmu_objset_spa(os);
 
 	return (cb_data);
 }
 
 /*
  * If a number of txgs equal to this threshold have been created after a commit
  * callback has been registered but not called, then we assume there is an
  * implementation bug.
  */
 #define	ZTEST_COMMIT_CALLBACK_THRESH	(TXG_CONCURRENT_STATES + 2)
 
 /*
  * Commit callback test.
  */
 void
 ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t od[1];
 	dmu_tx_t *tx;
 	ztest_cb_data_t *cb_data[3], *tmp_cb;
 	uint64_t old_txg, txg;
 	int i, error;
 
 	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
 
 	tx = dmu_tx_create(os);
 
 	cb_data[0] = ztest_create_cb_data(os, 0);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]);
 
 	dmu_tx_hold_write(tx, od[0].od_object, 0, sizeof (uint64_t));
 
 	/* Every once in a while, abort the transaction on purpose */
 	if (ztest_random(100) == 0)
 		error = -1;
 
 	if (!error)
 		error = dmu_tx_assign(tx, TXG_NOWAIT);
 
 	txg = error ? 0 : dmu_tx_get_txg(tx);
 
 	cb_data[0]->zcd_txg = txg;
 	cb_data[1] = ztest_create_cb_data(os, txg);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]);
 
 	if (error) {
 		/*
 		 * It's not a strict requirement to call the registered
 		 * callbacks from inside dmu_tx_abort(), but that's what
 		 * it's supposed to happen in the current implementation
 		 * so we will check for that.
 		 */
 		for (i = 0; i < 2; i++) {
 			cb_data[i]->zcd_expected_err = ECANCELED;
 			VERIFY(!cb_data[i]->zcd_called);
 		}
 
 		dmu_tx_abort(tx);
 
 		for (i = 0; i < 2; i++) {
 			VERIFY(cb_data[i]->zcd_called);
 			umem_free(cb_data[i], sizeof (ztest_cb_data_t));
 		}
 
 		return;
 	}
 
 	cb_data[2] = ztest_create_cb_data(os, txg);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]);
 
 	/*
 	 * Read existing data to make sure there isn't a future leak.
 	 */
 	VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t),
 	    &old_txg, DMU_READ_PREFETCH));
 
 	if (old_txg > txg)
 		fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64,
 		    old_txg, txg);
 
 	dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx);
 
-	(void) mutex_lock(&zcl.zcl_callbacks_lock);
+	mutex_enter(&zcl.zcl_callbacks_lock);
 
 	/*
 	 * Since commit callbacks don't have any ordering requirement and since
 	 * it is theoretically possible for a commit callback to be called
 	 * after an arbitrary amount of time has elapsed since its txg has been
 	 * synced, it is difficult to reliably determine whether a commit
 	 * callback hasn't been called due to high load or due to a flawed
 	 * implementation.
 	 *
 	 * In practice, we will assume that if after a certain number of txgs a
 	 * commit callback hasn't been called, then most likely there's an
 	 * implementation bug..
 	 */
 	tmp_cb = list_head(&zcl.zcl_callbacks);
 	if (tmp_cb != NULL &&
 	    (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) {
 		fatal(0, "Commit callback threshold exceeded, oldest txg: %"
 		    PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg);
 	}
 
 	/*
 	 * Let's find the place to insert our callbacks.
 	 *
 	 * Even though the list is ordered by txg, it is possible for the
 	 * insertion point to not be the end because our txg may already be
 	 * quiescing at this point and other callbacks in the open txg
 	 * (from other objsets) may have sneaked in.
 	 */
 	tmp_cb = list_tail(&zcl.zcl_callbacks);
 	while (tmp_cb != NULL && tmp_cb->zcd_txg > txg)
 		tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb);
 
 	/* Add the 3 callbacks to the list */
 	for (i = 0; i < 3; i++) {
 		if (tmp_cb == NULL)
 			list_insert_head(&zcl.zcl_callbacks, cb_data[i]);
 		else
 			list_insert_after(&zcl.zcl_callbacks, tmp_cb,
 			    cb_data[i]);
 
 		cb_data[i]->zcd_added = B_TRUE;
 		VERIFY(!cb_data[i]->zcd_called);
 
 		tmp_cb = cb_data[i];
 	}
 
-	(void) mutex_unlock(&zcl.zcl_callbacks_lock);
+	mutex_exit(&zcl.zcl_callbacks_lock);
 
 	dmu_tx_commit(tx);
 }
 
 /* ARGSUSED */
 void
 ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
 {
 	zfs_prop_t proplist[] = {
 		ZFS_PROP_CHECKSUM,
 		ZFS_PROP_COMPRESSION,
 		ZFS_PROP_COPIES,
 		ZFS_PROP_DEDUP
 	};
 
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 
 	for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
 		(void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
 		    ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
 
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /* ARGSUSED */
 void
 ztest_remap_blocks(ztest_ds_t *zd, uint64_t id)
 {
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 
 	int error = dmu_objset_remap_indirects(zd->zd_name);
 	if (error == ENOSPC)
 		error = 0;
 	ASSERT0(error);
 
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /* ARGSUSED */
 void
 ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
 {
 	nvlist_t *props = NULL;
 
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 
 	(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
 	    ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
 
 	VERIFY0(spa_prop_get(ztest_spa, &props));
 
 	if (ztest_opts.zo_verbose >= 6)
 		dump_nvlist(props, 4);
 
 	nvlist_free(props);
 
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 static int
 user_release_one(const char *snapname, const char *holdname)
 {
 	nvlist_t *snaps, *holds;
 	int error;
 
 	snaps = fnvlist_alloc();
 	holds = fnvlist_alloc();
 	fnvlist_add_boolean(holds, holdname);
 	fnvlist_add_nvlist(snaps, snapname, holds);
 	fnvlist_free(holds);
 	error = dsl_dataset_user_release(snaps, NULL);
 	fnvlist_free(snaps);
 	return (error);
 }
 
 /*
  * Test snapshot hold/release and deferred destroy.
  */
 void
 ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
 {
 	int error;
 	objset_t *os = zd->zd_os;
 	objset_t *origin;
 	char snapname[100];
 	char fullname[100];
 	char clonename[100];
 	char tag[100];
 	char osname[ZFS_MAX_DATASET_NAME_LEN];
 	nvlist_t *holds;
 
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 
 	dmu_objset_name(os, osname);
 
 	(void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id);
 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
 	(void) snprintf(clonename, sizeof (clonename),
 	    "%s/ch1_%llu", osname, id);
 	(void) snprintf(tag, sizeof (tag), "tag_%llu", id);
 
 	/*
 	 * Clean up from any previous run.
 	 */
 	error = dsl_destroy_head(clonename);
 	if (error != ENOENT)
 		ASSERT0(error);
 	error = user_release_one(fullname, tag);
 	if (error != ESRCH && error != ENOENT)
 		ASSERT0(error);
 	error = dsl_destroy_snapshot(fullname, B_FALSE);
 	if (error != ENOENT)
 		ASSERT0(error);
 
 	/*
 	 * Create snapshot, clone it, mark snap for deferred destroy,
 	 * destroy clone, verify snap was also destroyed.
 	 */
 	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_snapshot");
 			goto out;
 		}
 		fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
 	error = dmu_objset_clone(clonename, fullname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_clone");
 			goto out;
 		}
 		fatal(0, "dmu_objset_clone(%s) = %d", clonename, error);
 	}
 
 	error = dsl_destroy_snapshot(fullname, B_TRUE);
 	if (error) {
 		fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
 		    fullname, error);
 	}
 
 	error = dsl_destroy_head(clonename);
 	if (error)
 		fatal(0, "dsl_destroy_head(%s) = %d", clonename, error);
 
 	error = dmu_objset_hold(fullname, FTAG, &origin);
 	if (error != ENOENT)
 		fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
 
 	/*
 	 * Create snapshot, add temporary hold, verify that we can't
 	 * destroy a held snapshot, mark for deferred destroy,
 	 * release hold, verify snapshot was destroyed.
 	 */
 	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_snapshot");
 			goto out;
 		}
 		fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
 	holds = fnvlist_alloc();
 	fnvlist_add_string(holds, fullname, tag);
 	error = dsl_dataset_user_hold(holds, 0, NULL);
 	fnvlist_free(holds);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc("dsl_dataset_user_hold");
 		goto out;
 	} else if (error) {
 		fatal(0, "dsl_dataset_user_hold(%s, %s) = %u",
 		    fullname, tag, error);
 	}
 
 	error = dsl_destroy_snapshot(fullname, B_FALSE);
 	if (error != EBUSY) {
 		fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
 		    fullname, error);
 	}
 
 	error = dsl_destroy_snapshot(fullname, B_TRUE);
 	if (error) {
 		fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
 		    fullname, error);
 	}
 
 	error = user_release_one(fullname, tag);
 	if (error)
 		fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error);
 
 	VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
 
 out:
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /*
  * Inject random faults into the on-disk data.
  */
 /* ARGSUSED */
 void
 ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	int fd;
 	uint64_t offset;
 	uint64_t leaves;
 	uint64_t bad = 0x1990c0ffeedecade;
 	uint64_t top, leaf;
 	char path0[MAXPATHLEN];
 	char pathrand[MAXPATHLEN];
 	size_t fsize;
 	int bshift = SPA_MAXBLOCKSHIFT + 2;
 	int iters = 1000;
 	int maxfaults;
 	int mirror_save;
 	vdev_t *vd0 = NULL;
 	uint64_t guid0 = 0;
 	boolean_t islog = B_FALSE;
 
-	VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+	mutex_enter(&ztest_vdev_lock);
 	maxfaults = MAXFAULTS();
 	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
 	mirror_save = zs->zs_mirrors;
-	VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+	mutex_exit(&ztest_vdev_lock);
 
 	ASSERT(leaves >= 1);
 
 	/*
 	 * Grab the name lock as reader. There are some operations
 	 * which don't like to have their vdevs changed while
 	 * they are in progress (i.e. spa_change_guid). Those
 	 * operations will have grabbed the name lock as writer.
 	 */
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 
 	/*
 	 * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
 	 */
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
 	if (ztest_random(2) == 0) {
 		/*
 		 * Inject errors on a normal data device or slog device.
 		 */
 		top = ztest_random_vdev_top(spa, B_TRUE);
 		leaf = ztest_random(leaves) + zs->zs_splits;
 
 		/*
 		 * Generate paths to the first leaf in this top-level vdev,
 		 * and to the random leaf we selected.  We'll induce transient
 		 * write failures and random online/offline activity on leaf 0,
 		 * and we'll write random garbage to the randomly chosen leaf.
 		 */
 		(void) snprintf(path0, sizeof (path0), ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + zs->zs_splits);
 		(void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + leaf);
 
 		vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
 		if (vd0 != NULL && vd0->vdev_top->vdev_islog)
 			islog = B_TRUE;
 
 		/*
 		 * If the top-level vdev needs to be resilvered
 		 * then we only allow faults on the device that is
 		 * resilvering.
 		 */
 		if (vd0 != NULL && maxfaults != 1 &&
 		    (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
 		    vd0->vdev_resilver_txg != 0)) {
 			/*
 			 * Make vd0 explicitly claim to be unreadable,
 			 * or unwriteable, or reach behind its back
 			 * and close the underlying fd.  We can do this if
 			 * maxfaults == 0 because we'll fail and reexecute,
 			 * and we can do it if maxfaults >= 2 because we'll
 			 * have enough redundancy.  If maxfaults == 1, the
 			 * combination of this with injection of random data
 			 * corruption below exceeds the pool's fault tolerance.
 			 */
 			vdev_file_t *vf = vd0->vdev_tsd;
 
 			zfs_dbgmsg("injecting fault to vdev %llu; maxfaults=%d",
 			    (long long)vd0->vdev_id, (int)maxfaults);
 
 			if (vf != NULL && ztest_random(3) == 0) {
 				(void) close(vf->vf_vnode->v_fd);
 				vf->vf_vnode->v_fd = -1;
 			} else if (ztest_random(2) == 0) {
 				vd0->vdev_cant_read = B_TRUE;
 			} else {
 				vd0->vdev_cant_write = B_TRUE;
 			}
 			guid0 = vd0->vdev_guid;
 		}
 	} else {
 		/*
 		 * Inject errors on an l2cache device.
 		 */
 		spa_aux_vdev_t *sav = &spa->spa_l2cache;
 
 		if (sav->sav_count == 0) {
 			spa_config_exit(spa, SCL_STATE, FTAG);
-			(void) rw_unlock(&ztest_name_lock);
+			rw_exit(&ztest_name_lock);
 			return;
 		}
 		vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		guid0 = vd0->vdev_guid;
 		(void) strcpy(path0, vd0->vdev_path);
 		(void) strcpy(pathrand, vd0->vdev_path);
 
 		leaf = 0;
 		leaves = 1;
 		maxfaults = INT_MAX;	/* no limit on cache devices */
 	}
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 
 	/*
 	 * If we can tolerate two or more faults, or we're dealing
 	 * with a slog, randomly online/offline vd0.
 	 */
 	if ((maxfaults >= 2 || islog) && guid0 != 0) {
 		if (ztest_random(10) < 6) {
 			int flags = (ztest_random(2) == 0 ?
 			    ZFS_OFFLINE_TEMPORARY : 0);
 
 			/*
 			 * We have to grab the zs_name_lock as writer to
 			 * prevent a race between offlining a slog and
 			 * destroying a dataset. Offlining the slog will
 			 * grab a reference on the dataset which may cause
 			 * dmu_objset_destroy() to fail with EBUSY thus
 			 * leaving the dataset in an inconsistent state.
 			 */
 			if (islog)
-				(void) rw_wrlock(&ztest_name_lock);
+				rw_enter(&ztest_name_lock, RW_WRITER);
 
 			VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
 
 			if (islog)
-				(void) rw_unlock(&ztest_name_lock);
+				rw_exit(&ztest_name_lock);
 		} else {
 			/*
 			 * Ideally we would like to be able to randomly
 			 * call vdev_[on|off]line without holding locks
 			 * to force unpredictable failures but the side
 			 * effects of vdev_[on|off]line prevent us from
 			 * doing so. We grab the ztest_vdev_lock here to
 			 * prevent a race between injection testing and
 			 * aux_vdev removal.
 			 */
-			VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+			mutex_enter(&ztest_vdev_lock);
 			(void) vdev_online(spa, guid0, 0, NULL);
-			VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+			mutex_exit(&ztest_vdev_lock);
 		}
 	}
 
 	if (maxfaults == 0)
 		return;
 
 	/*
 	 * We have at least single-fault tolerance, so inject data corruption.
 	 */
 	fd = open(pathrand, O_RDWR);
 
 	if (fd == -1)	/* we hit a gap in the device namespace */
 		return;
 
 	fsize = lseek(fd, 0, SEEK_END);
 
 	while (--iters != 0) {
 		/*
 		 * The offset must be chosen carefully to ensure that
 		 * we do not inject a given logical block with errors
 		 * on two different leaf devices, because ZFS can not
 		 * tolerate that (if maxfaults==1).
 		 *
 		 * We divide each leaf into chunks of size
 		 * (# leaves * SPA_MAXBLOCKSIZE * 4).  Within each chunk
 		 * there is a series of ranges to which we can inject errors.
 		 * Each range can accept errors on only a single leaf vdev.
 		 * The error injection ranges are separated by ranges
 		 * which we will not inject errors on any device (DMZs).
 		 * Each DMZ must be large enough such that a single block
 		 * can not straddle it, so that a single block can not be
 		 * a target in two different injection ranges (on different
 		 * leaf vdevs).
 		 *
 		 * For example, with 3 leaves, each chunk looks like:
 		 *    0 to  32M: injection range for leaf 0
 		 *  32M to  64M: DMZ - no injection allowed
 		 *  64M to  96M: injection range for leaf 1
 		 *  96M to 128M: DMZ - no injection allowed
 		 * 128M to 160M: injection range for leaf 2
 		 * 160M to 192M: DMZ - no injection allowed
 		 */
 		offset = ztest_random(fsize / (leaves << bshift)) *
 		    (leaves << bshift) + (leaf << bshift) +
 		    (ztest_random(1ULL << (bshift - 1)) & -8ULL);
 
 		/*
 		 * Only allow damage to the labels at one end of the vdev.
 		 *
 		 * If all labels are damaged, the device will be totally
 		 * inaccessible, which will result in loss of data,
 		 * because we also damage (parts of) the other side of
 		 * the mirror/raidz.
 		 *
 		 * Additionally, we will always have both an even and an
 		 * odd label, so that we can handle crashes in the
 		 * middle of vdev_config_sync().
 		 */
 		if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE)
 			continue;
 
 		/*
 		 * The two end labels are stored at the "end" of the disk, but
 		 * the end of the disk (vdev_psize) is aligned to
 		 * sizeof (vdev_label_t).
 		 */
 		uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t));
 		if ((leaf & 1) == 1 &&
 		    offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE)
 			continue;
 
-		VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
+		mutex_enter(&ztest_vdev_lock);
 		if (mirror_save != zs->zs_mirrors) {
-			VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+			mutex_exit(&ztest_vdev_lock);
 			(void) close(fd);
 			return;
 		}
 
 		if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
 			fatal(1, "can't inject bad word at 0x%llx in %s",
 			    offset, pathrand);
 
-		VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
+		mutex_exit(&ztest_vdev_lock);
 
 		if (ztest_opts.zo_verbose >= 7)
 			(void) printf("injected bad word into %s,"
 			    " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
 	}
 
 	(void) close(fd);
 }
 
 /*
  * Verify that DDT repair works as expected.
  */
 void
 ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	objset_t *os = zd->zd_os;
 	ztest_od_t od[1];
 	uint64_t object, blocksize, txg, pattern, psize;
 	enum zio_checksum checksum = spa_dedup_checksum(spa);
 	dmu_buf_t *db;
 	dmu_tx_t *tx;
 	abd_t *abd;
 	blkptr_t blk;
 	int copies = 2 * ZIO_DEDUPDITTO_MIN;
 
 	blocksize = ztest_random_blocksize();
 	blocksize = MIN(blocksize, 2048);	/* because we write so many */
 
 	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
 
 	/*
 	 * Take the name lock as writer to prevent anyone else from changing
 	 * the pool and dataset properies we need to maintain during this test.
 	 */
-	(void) rw_wrlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_WRITER);
 
 	if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
 	    B_FALSE) != 0 ||
 	    ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
 	    B_FALSE) != 0) {
-		(void) rw_unlock(&ztest_name_lock);
+		rw_exit(&ztest_name_lock);
 		return;
 	}
 
 	dmu_objset_stats_t dds;
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 	dmu_objset_fast_stat(os, &dds);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 
 	object = od[0].od_object;
 	blocksize = od[0].od_blocksize;
 	pattern = zs->zs_guid ^ dds.dds_guid;
 
 	ASSERT(object != 0);
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_write(tx, object, 0, copies * blocksize);
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
-		(void) rw_unlock(&ztest_name_lock);
+		rw_exit(&ztest_name_lock);
 		return;
 	}
 
 	/*
 	 * Write all the copies of our block.
 	 */
 	for (int i = 0; i < copies; i++) {
 		uint64_t offset = i * blocksize;
 		int error = dmu_buf_hold(os, object, offset, FTAG, &db,
 		    DMU_READ_NO_PREFETCH);
 		if (error != 0) {
 			fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
 			    os, (long long)object, (long long) offset, error);
 		}
 		ASSERT(db->db_offset == offset);
 		ASSERT(db->db_size == blocksize);
 		ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
 		    ztest_pattern_match(db->db_data, db->db_size, 0ULL));
 		dmu_buf_will_fill(db, tx);
 		ztest_pattern_set(db->db_data, db->db_size, pattern);
 		dmu_buf_rele(db, FTAG);
 	}
 
 	dmu_tx_commit(tx);
 	txg_wait_synced(spa_get_dsl(spa), txg);
 
 	/*
 	 * Find out what block we got.
 	 */
 	VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db,
 	    DMU_READ_NO_PREFETCH));
 	blk = *((dmu_buf_impl_t *)db)->db_blkptr;
 	dmu_buf_rele(db, FTAG);
 
 	/*
 	 * Damage the block.  Dedup-ditto will save us when we read it later.
 	 */
 	psize = BP_GET_PSIZE(&blk);
 	abd = abd_alloc_linear(psize, B_TRUE);
 	ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);
 
 	(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
 	    abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));
 
 	abd_free(abd);
 
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /*
  * Scrub the pool.
  */
 /* ARGSUSED */
 void
 ztest_scrub(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa = ztest_spa;
 
 	(void) spa_scan(spa, POOL_SCAN_SCRUB);
 	(void) poll(NULL, 0, 100); /* wait a moment, then force a restart */
 	(void) spa_scan(spa, POOL_SCAN_SCRUB);
 }
 
 /*
  * Change the guid for the pool.
  */
 /* ARGSUSED */
 void
 ztest_reguid(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa = ztest_spa;
 	uint64_t orig, load;
 	int error;
 
 	orig = spa_guid(spa);
 	load = spa_load_guid(spa);
 
-	(void) rw_wrlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_WRITER);
 	error = spa_change_guid(spa);
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 
 	if (error != 0)
 		return;
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("Changed guid old %llu -> %llu\n",
 		    (u_longlong_t)orig, (u_longlong_t)spa_guid(spa));
 	}
 
 	VERIFY3U(orig, !=, spa_guid(spa));
 	VERIFY3U(load, ==, spa_load_guid(spa));
 }
 
 /*
  * Rename the pool to a different name and then rename it back.
  */
 /* ARGSUSED */
 void
 ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
 {
 	char *oldname, *newname;
 	spa_t *spa;
 
-	(void) rw_wrlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_WRITER);
 
 	oldname = ztest_opts.zo_pool;
 	newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
 	(void) strcpy(newname, oldname);
 	(void) strcat(newname, "_tmp");
 
 	/*
 	 * Do the rename
 	 */
 	VERIFY3U(0, ==, spa_rename(oldname, newname));
 
 	/*
 	 * Try to open it under the old name, which shouldn't exist
 	 */
 	VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Open it under the new name and make sure it's still the same spa_t.
 	 */
 	VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
 
 	ASSERT(spa == ztest_spa);
 	spa_close(spa, FTAG);
 
 	/*
 	 * Rename it back to the original
 	 */
 	VERIFY3U(0, ==, spa_rename(newname, oldname));
 
 	/*
 	 * Make sure it can still be opened
 	 */
 	VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
 
 	ASSERT(spa == ztest_spa);
 	spa_close(spa, FTAG);
 
 	umem_free(newname, strlen(newname) + 1);
 
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 }
 
 /*
  * Verify pool integrity by running zdb.
  */
 static void
 ztest_run_zdb(char *pool)
 {
 	int status;
 	char zdb[MAXPATHLEN + MAXNAMELEN + 20];
 	char zbuf[1024];
 	char *bin;
 	char *ztest;
 	char *isa;
 	int isalen;
 	FILE *fp;
 
 	(void) realpath(getexecname(), zdb);
 
 	/* zdb lives in /usr/sbin, while ztest lives in /usr/bin */
 	bin = strstr(zdb, "/usr/bin/");
 	ztest = strstr(bin, "/ztest");
 	isa = bin + 8;
 	isalen = ztest - isa;
 	isa = strdup(isa);
 	/* LINTED */
 	(void) sprintf(bin,
 	    "/usr/sbin%.*s/zdb -bcc%s%s -G -d -U %s %s",
 	    isalen,
 	    isa,
 	    ztest_opts.zo_verbose >= 3 ? "s" : "",
 	    ztest_opts.zo_verbose >= 4 ? "v" : "",
 	    spa_config_path,
 	    pool);
 	free(isa);
 
 	if (ztest_opts.zo_verbose >= 5)
 		(void) printf("Executing %s\n", strstr(zdb, "zdb "));
 
 	fp = popen(zdb, "r");
 
 	while (fgets(zbuf, sizeof (zbuf), fp) != NULL)
 		if (ztest_opts.zo_verbose >= 3)
 			(void) printf("%s", zbuf);
 
 	status = pclose(fp);
 
 	if (status == 0)
 		return;
 
 	ztest_dump_core = 0;
 	if (WIFEXITED(status))
 		fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status));
 	else
 		fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status));
 }
 
 static void
 ztest_walk_pool_directory(char *header)
 {
 	spa_t *spa = NULL;
 
 	if (ztest_opts.zo_verbose >= 6)
 		(void) printf("%s\n", header);
 
 	mutex_enter(&spa_namespace_lock);
 	while ((spa = spa_next(spa)) != NULL)
 		if (ztest_opts.zo_verbose >= 6)
 			(void) printf("\t%s\n", spa_name(spa));
 	mutex_exit(&spa_namespace_lock);
 }
 
 static void
 ztest_spa_import_export(char *oldname, char *newname)
 {
 	nvlist_t *config, *newconfig;
 	uint64_t pool_guid;
 	spa_t *spa;
 	int error;
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("import/export: old = %s, new = %s\n",
 		    oldname, newname);
 	}
 
 	/*
 	 * Clean up from previous runs.
 	 */
 	(void) spa_destroy(newname);
 
 	/*
 	 * Get the pool's configuration and guid.
 	 */
 	VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Kick off a scrub to tickle scrub/export races.
 	 */
 	if (ztest_random(2) == 0)
 		(void) spa_scan(spa, POOL_SCAN_SCRUB);
 
 	pool_guid = spa_guid(spa);
 	spa_close(spa, FTAG);
 
 	ztest_walk_pool_directory("pools before export");
 
 	/*
 	 * Export it.
 	 */
 	VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE));
 
 	ztest_walk_pool_directory("pools after export");
 
 	/*
 	 * Try to import it.
 	 */
 	newconfig = spa_tryimport(config);
 	ASSERT(newconfig != NULL);
 	nvlist_free(newconfig);
 
 	/*
 	 * Import it under the new name.
 	 */
 	error = spa_import(newname, config, NULL, 0);
 	if (error != 0) {
 		dump_nvlist(config, 0);
 		fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
 		    oldname, newname, error);
 	}
 
 	ztest_walk_pool_directory("pools after import");
 
 	/*
 	 * Try to import it again -- should fail with EEXIST.
 	 */
 	VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0));
 
 	/*
 	 * Try to import it under a different name -- should fail with EEXIST.
 	 */
 	VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0));
 
 	/*
 	 * Verify that the pool is no longer visible under the old name.
 	 */
 	VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Verify that we can open and close the pool using the new name.
 	 */
 	VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
 	ASSERT(pool_guid == spa_guid(spa));
 	spa_close(spa, FTAG);
 
 	nvlist_free(config);
 }
 
 static void
 ztest_resume(spa_t *spa)
 {
 	if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6)
 		(void) printf("resuming from suspended state\n");
 	spa_vdev_state_enter(spa, SCL_NONE);
 	vdev_clear(spa, NULL);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	(void) zio_resume(spa);
 }
 
 static void *
 ztest_resume_thread(void *arg)
 {
 	spa_t *spa = arg;
 
 	while (!ztest_exiting) {
 		if (spa_suspended(spa))
 			ztest_resume(spa);
 		(void) poll(NULL, 0, 100);
 
 		/*
 		 * Periodically change the zfs_compressed_arc_enabled setting.
 		 */
 		if (ztest_random(10) == 0)
 			zfs_compressed_arc_enabled = ztest_random(2);
 
 		/*
 		 * Periodically change the zfs_abd_scatter_enabled setting.
 		 */
 		if (ztest_random(10) == 0)
 			zfs_abd_scatter_enabled = ztest_random(2);
 	}
 	return (NULL);
 }
 
 static void *
 ztest_deadman_thread(void *arg)
 {
 	ztest_shared_t *zs = arg;
 	spa_t *spa = ztest_spa;
 	hrtime_t delta, total = 0;
 
 	for (;;) {
 		delta = zs->zs_thread_stop - zs->zs_thread_start +
 		    MSEC2NSEC(zfs_deadman_synctime_ms);
 
 		(void) poll(NULL, 0, (int)NSEC2MSEC(delta));
 
 		/*
 		 * If the pool is suspended then fail immediately. Otherwise,
 		 * check to see if the pool is making any progress. If
 		 * vdev_deadman() discovers that there hasn't been any recent
 		 * I/Os then it will end up aborting the tests.
 		 */
 		if (spa_suspended(spa) || spa->spa_root_vdev == NULL) {
 			fatal(0, "aborting test after %llu seconds because "
 			    "pool has transitioned to a suspended state.",
 			    zfs_deadman_synctime_ms / 1000);
 			return (NULL);
 		}
 		vdev_deadman(spa->spa_root_vdev);
 
 		total += zfs_deadman_synctime_ms/1000;
 		(void) printf("ztest has been running for %lld seconds\n",
 		    total);
 	}
 }
 
 static void
 ztest_execute(int test, ztest_info_t *zi, uint64_t id)
 {
 	ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets];
 	ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test);
 	hrtime_t functime = gethrtime();
 
 	for (int i = 0; i < zi->zi_iters; i++)
 		zi->zi_func(zd, id);
 
 	functime = gethrtime() - functime;
 
 	atomic_add_64(&zc->zc_count, 1);
 	atomic_add_64(&zc->zc_time, functime);
 
 	if (ztest_opts.zo_verbose >= 4) {
 		Dl_info dli;
 		(void) dladdr((void *)zi->zi_func, &dli);
 		(void) printf("%6.2f sec in %s\n",
 		    (double)functime / NANOSEC, dli.dli_sname);
 	}
 }
 
 static void *
 ztest_thread(void *arg)
 {
 	int rand;
 	uint64_t id = (uintptr_t)arg;
 	ztest_shared_t *zs = ztest_shared;
 	uint64_t call_next;
 	hrtime_t now;
 	ztest_info_t *zi;
 	ztest_shared_callstate_t *zc;
 
 	while ((now = gethrtime()) < zs->zs_thread_stop) {
 		/*
 		 * See if it's time to force a crash.
 		 */
 		if (now > zs->zs_thread_kill)
 			ztest_kill(zs);
 
 		/*
 		 * If we're getting ENOSPC with some regularity, stop.
 		 */
 		if (zs->zs_enospc_count > 10)
 			break;
 
 		/*
 		 * Pick a random function to execute.
 		 */
 		rand = ztest_random(ZTEST_FUNCS);
 		zi = &ztest_info[rand];
 		zc = ZTEST_GET_SHARED_CALLSTATE(rand);
 		call_next = zc->zc_next;
 
 		if (now >= call_next &&
 		    atomic_cas_64(&zc->zc_next, call_next, call_next +
 		    ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) {
 			ztest_execute(rand, zi, id);
 		}
 	}
 
 	return (NULL);
 }
 
 static void
 ztest_dataset_name(char *dsname, char *pool, int d)
 {
 	(void) snprintf(dsname, ZFS_MAX_DATASET_NAME_LEN, "%s/ds_%d", pool, d);
 }
 
 static void
 ztest_dataset_destroy(int d)
 {
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 
 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
 
 	if (ztest_opts.zo_verbose >= 3)
 		(void) printf("Destroying %s to free up space\n", name);
 
 	/*
 	 * Cleanup any non-standard clones and snapshots.  In general,
 	 * ztest thread t operates on dataset (t % zopt_datasets),
 	 * so there may be more than one thing to clean up.
 	 */
 	for (int t = d; t < ztest_opts.zo_threads;
 	    t += ztest_opts.zo_datasets) {
 		ztest_dsl_dataset_cleanup(name, t);
 	}
 
 	(void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
 	    DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
 }
 
 static void
 ztest_dataset_dirobj_verify(ztest_ds_t *zd)
 {
 	uint64_t usedobjs, dirobjs, scratch;
 
 	/*
 	 * ZTEST_DIROBJ is the object directory for the entire dataset.
 	 * Therefore, the number of objects in use should equal the
 	 * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself.
 	 * If not, we have an object leak.
 	 *
 	 * Note that we can only check this in ztest_dataset_open(),
 	 * when the open-context and syncing-context values agree.
 	 * That's because zap_count() returns the open-context value,
 	 * while dmu_objset_space() returns the rootbp fill count.
 	 */
 	VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
 	dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch);
 	ASSERT3U(dirobjs + 1, ==, usedobjs);
 }
 
 static int
 ztest_dataset_open(int d)
 {
 	ztest_ds_t *zd = &ztest_ds[d];
 	uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq;
 	objset_t *os;
 	zilog_t *zilog;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
 
-	(void) rw_rdlock(&ztest_name_lock);
+	rw_enter(&ztest_name_lock, RW_READER);
 
 	error = ztest_dataset_create(name);
 	if (error == ENOSPC) {
-		(void) rw_unlock(&ztest_name_lock);
+		rw_exit(&ztest_name_lock);
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT(error == 0 || error == EEXIST);
 
 	VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
-	(void) rw_unlock(&ztest_name_lock);
+	rw_exit(&ztest_name_lock);
 
 	ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
 
 	zilog = zd->zd_zilog;
 
 	if (zilog->zl_header->zh_claim_lr_seq != 0 &&
 	    zilog->zl_header->zh_claim_lr_seq < committed_seq)
 		fatal(0, "missing log records: claimed %llu < committed %llu",
 		    zilog->zl_header->zh_claim_lr_seq, committed_seq);
 
 	ztest_dataset_dirobj_verify(zd);
 
 	zil_replay(os, zd, ztest_replay_vector);
 
 	ztest_dataset_dirobj_verify(zd);
 
 	if (ztest_opts.zo_verbose >= 6)
 		(void) printf("%s replay %llu blocks, %llu records, seq %llu\n",
 		    zd->zd_name,
 		    (u_longlong_t)zilog->zl_parse_blk_count,
 		    (u_longlong_t)zilog->zl_parse_lr_count,
 		    (u_longlong_t)zilog->zl_replaying_seq);
 
 	zilog = zil_open(os, ztest_get_data);
 
 	if (zilog->zl_replaying_seq != 0 &&
 	    zilog->zl_replaying_seq < committed_seq)
 		fatal(0, "missing log records: replayed %llu < committed %llu",
 		    zilog->zl_replaying_seq, committed_seq);
 
 	return (0);
 }
 
 static void
 ztest_dataset_close(int d)
 {
 	ztest_ds_t *zd = &ztest_ds[d];
 
 	zil_close(zd->zd_zilog);
 	dmu_objset_disown(zd->zd_os, zd);
 
 	ztest_zd_fini(zd);
 }
 
 /*
  * Kick off threads to run tests on all datasets in parallel.
  */
 static void
 ztest_run(ztest_shared_t *zs)
 {
 	thread_t *tid;
 	spa_t *spa;
 	objset_t *os;
 	thread_t resume_tid;
 	int error;
 
 	ztest_exiting = B_FALSE;
 
 	/*
 	 * Initialize parent/child shared state.
 	 */
-	VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0);
-	VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0);
+	mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
+	rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
 
 	zs->zs_thread_start = gethrtime();
 	zs->zs_thread_stop =
 	    zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;
 	zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
 	zs->zs_thread_kill = zs->zs_thread_stop;
 	if (ztest_random(100) < ztest_opts.zo_killrate) {
 		zs->zs_thread_kill -=
 		    ztest_random(ztest_opts.zo_passtime * NANOSEC);
 	}
 
-	(void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL);
+	mutex_init(&zcl.zcl_callbacks_lock, NULL, USYNC_THREAD, NULL);
 
 	list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
 	    offsetof(ztest_cb_data_t, zcd_node));
 
 	/*
 	 * Open our pool.
 	 */
 	kernel_init(FREAD | FWRITE);
 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	spa->spa_debug = B_TRUE;
 	metaslab_preload_limit = ztest_random(20) + 1;
 	ztest_spa = spa;
 
 	dmu_objset_stats_t dds;
 	VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
 	    DMU_OST_ANY, B_TRUE, FTAG, &os));
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 	dmu_objset_fast_stat(os, &dds);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 	zs->zs_guid = dds.dds_guid;
 	dmu_objset_disown(os, FTAG);
 
 	spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
 
 	/*
 	 * We don't expect the pool to suspend unless maxfaults == 0,
 	 * in which case ztest_fault_inject() temporarily takes away
 	 * the only valid replica.
 	 */
 	if (MAXFAULTS() == 0)
 		spa->spa_failmode = ZIO_FAILURE_MODE_WAIT;
 	else
 		spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
 
 	/*
 	 * Create a thread to periodically resume suspended I/O.
 	 */
 	VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND,
 	    &resume_tid) == 0);
 
 	/*
 	 * Create a deadman thread to abort() if we hang.
 	 */
 	VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND,
 	    NULL) == 0);
 
 	/*
 	 * Verify that we can safely inquire about about any object,
 	 * whether it's allocated or not.  To make it interesting,
 	 * we probe a 5-wide window around each power of two.
 	 * This hits all edge cases, including zero and the max.
 	 */
 	for (int t = 0; t < 64; t++) {
 		for (int d = -5; d <= 5; d++) {
 			error = dmu_object_info(spa->spa_meta_objset,
 			    (1ULL << t) + d, NULL);
 			ASSERT(error == 0 || error == ENOENT ||
 			    error == EINVAL);
 		}
 	}
 
 	/*
 	 * If we got any ENOSPC errors on the previous run, destroy something.
 	 */
 	if (zs->zs_enospc_count != 0) {
 		int d = ztest_random(ztest_opts.zo_datasets);
 		ztest_dataset_destroy(d);
 	}
 	zs->zs_enospc_count = 0;
 
 	tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t),
 	    UMEM_NOFAIL);
 
 	if (ztest_opts.zo_verbose >= 4)
 		(void) printf("starting main threads...\n");
 
 	/*
 	 * Kick off all the tests that run in parallel.
 	 */
 	for (int t = 0; t < ztest_opts.zo_threads; t++) {
 		if (t < ztest_opts.zo_datasets &&
 		    ztest_dataset_open(t) != 0)
 			return;
 		VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t,
 		    THR_BOUND, &tid[t]) == 0);
 	}
 
 	/*
 	 * Wait for all of the tests to complete.  We go in reverse order
 	 * so we don't close datasets while threads are still using them.
 	 */
 	for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) {
 		VERIFY(thr_join(tid[t], NULL, NULL) == 0);
 		if (t < ztest_opts.zo_datasets)
 			ztest_dataset_close(t);
 	}
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
 	zfs_dbgmsg_print(FTAG);
 
 	umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t));
 
 	/* Kill the resume thread */
 	ztest_exiting = B_TRUE;
 	VERIFY(thr_join(resume_tid, NULL, NULL) == 0);
 	ztest_resume(spa);
 
 	/*
 	 * Right before closing the pool, kick off a bunch of async I/O;
 	 * spa_close() should wait for it to complete.
 	 */
 	for (uint64_t object = 1; object < 50; object++) {
 		dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
 		    ZIO_PRIORITY_SYNC_READ);
 	}
 
 	spa_close(spa, FTAG);
 
 	/*
 	 * Verify that we can loop over all pools.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa))
 		if (ztest_opts.zo_verbose > 3)
 			(void) printf("spa_next: found %s\n", spa_name(spa));
 	mutex_exit(&spa_namespace_lock);
 
 	/*
 	 * Verify that we can export the pool and reimport it under a
 	 * different name.
 	 */
 	if (ztest_random(2) == 0) {
 		char name[ZFS_MAX_DATASET_NAME_LEN];
 		(void) snprintf(name, sizeof (name), "%s_import",
 		    ztest_opts.zo_pool);
 		ztest_spa_import_export(ztest_opts.zo_pool, name);
 		ztest_spa_import_export(name, ztest_opts.zo_pool);
 	}
 
 	kernel_fini();
 
 	list_destroy(&zcl.zcl_callbacks);
 
-	(void) _mutex_destroy(&zcl.zcl_callbacks_lock);
+	mutex_destroy(&zcl.zcl_callbacks_lock);
 
-	(void) rwlock_destroy(&ztest_name_lock);
-	(void) _mutex_destroy(&ztest_vdev_lock);
+	rw_destroy(&ztest_name_lock);
+	mutex_destroy(&ztest_vdev_lock);
 }
 
 static void
 ztest_freeze(void)
 {
 	ztest_ds_t *zd = &ztest_ds[0];
 	spa_t *spa;
 	int numloops = 0;
 
 	if (ztest_opts.zo_verbose >= 3)
 		(void) printf("testing spa_freeze()...\n");
 
 	kernel_init(FREAD | FWRITE);
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	VERIFY3U(0, ==, ztest_dataset_open(0));
 	spa->spa_debug = B_TRUE;
 	ztest_spa = spa;
 
 	/*
 	 * Force the first log block to be transactionally allocated.
 	 * We have to do this before we freeze the pool -- otherwise
 	 * the log chain won't be anchored.
 	 */
 	while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
 		ztest_dmu_object_alloc_free(zd, 0);
 		zil_commit(zd->zd_zilog, 0);
 	}
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	/*
 	 * Freeze the pool.  This stops spa_sync() from doing anything,
 	 * so that the only way to record changes from now on is the ZIL.
 	 */
 	spa_freeze(spa);
 
 	/*
 	 * Because it is hard to predict how much space a write will actually
 	 * require beforehand, we leave ourselves some fudge space to write over
 	 * capacity.
 	 */
 	uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2;
 
 	/*
 	 * Run tests that generate log records but don't alter the pool config
 	 * or depend on DSL sync tasks (snapshots, objset create/destroy, etc).
 	 * We do a txg_wait_synced() after each iteration to force the txg
 	 * to increase well beyond the last synced value in the uberblock.
 	 * The ZIL should be OK with that.
 	 *
 	 * Run a random number of times less than zo_maxloops and ensure we do
 	 * not run out of space on the pool.
 	 */
 	while (ztest_random(10) != 0 &&
 	    numloops++ < ztest_opts.zo_maxloops &&
 	    metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) {
 		ztest_od_t od;
 		ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
 		VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE));
 		ztest_io(zd, od.od_object,
 		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 		txg_wait_synced(spa_get_dsl(spa), 0);
 	}
 
 	/*
 	 * Commit all of the changes we just generated.
 	 */
 	zil_commit(zd->zd_zilog, 0);
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	/*
 	 * Close our dataset and close the pool.
 	 */
 	ztest_dataset_close(0);
 	spa_close(spa, FTAG);
 	kernel_fini();
 
 	/*
 	 * Open and close the pool and dataset to induce log replay.
 	 */
 	kernel_init(FREAD | FWRITE);
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	ASSERT(spa_freeze_txg(spa) == UINT64_MAX);
 	VERIFY3U(0, ==, ztest_dataset_open(0));
 	ztest_dataset_close(0);
 
 	spa->spa_debug = B_TRUE;
 	ztest_spa = spa;
 	txg_wait_synced(spa_get_dsl(spa), 0);
 	ztest_reguid(NULL, 0);
 
 	spa_close(spa, FTAG);
 	kernel_fini();
 }
 
 void
 print_time(hrtime_t t, char *timebuf)
 {
 	hrtime_t s = t / NANOSEC;
 	hrtime_t m = s / 60;
 	hrtime_t h = m / 60;
 	hrtime_t d = h / 24;
 
 	s -= m * 60;
 	m -= h * 60;
 	h -= d * 24;
 
 	timebuf[0] = '\0';
 
 	if (d)
 		(void) sprintf(timebuf,
 		    "%llud%02lluh%02llum%02llus", d, h, m, s);
 	else if (h)
 		(void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
 	else if (m)
 		(void) sprintf(timebuf, "%llum%02llus", m, s);
 	else
 		(void) sprintf(timebuf, "%llus", s);
 }
 
 static nvlist_t *
 make_random_props()
 {
 	nvlist_t *props;
 
 	VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
 	if (ztest_random(2) == 0)
 		return (props);
 	VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
 
 	return (props);
 }
 
 /*
  * Create a storage pool with the given name and initial vdev size.
  * Then test spa_freeze() functionality.
  */
 static void
 ztest_init(ztest_shared_t *zs)
 {
 	spa_t *spa;
 	nvlist_t *nvroot, *props;
 
-	VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0);
-	VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0);
+	mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
+	rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
 
 	kernel_init(FREAD | FWRITE);
 
 	/*
 	 * Create the storage pool.
 	 */
 	(void) spa_destroy(ztest_opts.zo_pool);
 	ztest_shared->zs_vdev_next_leaf = 0;
 	zs->zs_splits = 0;
 	zs->zs_mirrors = ztest_opts.zo_mirrors;
 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
 	    0, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
 	props = make_random_props();
 	for (int i = 0; i < SPA_FEATURES; i++) {
 		char buf[1024];
 		(void) snprintf(buf, sizeof (buf), "feature@%s",
 		    spa_feature_table[i].fi_uname);
 		VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
 	}
 	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
 	nvlist_free(nvroot);
 
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
 
 	spa_close(spa, FTAG);
 
 	kernel_fini();
 
 	ztest_run_zdb(ztest_opts.zo_pool);
 
 	ztest_freeze();
 
 	ztest_run_zdb(ztest_opts.zo_pool);
 
-	(void) rwlock_destroy(&ztest_name_lock);
-	(void) _mutex_destroy(&ztest_vdev_lock);
+	rw_destroy(&ztest_name_lock);
+	mutex_destroy(&ztest_vdev_lock);
 }
 
 static void
 setup_data_fd(void)
 {
 	static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
 
 	ztest_fd_data = mkstemp(ztest_name_data);
 	ASSERT3S(ztest_fd_data, >=, 0);
 	(void) unlink(ztest_name_data);
 }
 
 
 static int
 shared_data_size(ztest_shared_hdr_t *hdr)
 {
 	int size;
 
 	size = hdr->zh_hdr_size;
 	size += hdr->zh_opts_size;
 	size += hdr->zh_size;
 	size += hdr->zh_stats_size * hdr->zh_stats_count;
 	size += hdr->zh_ds_size * hdr->zh_ds_count;
 
 	return (size);
 }
 
 static void
 setup_hdr(void)
 {
 	int size;
 	ztest_shared_hdr_t *hdr;
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT(hdr != MAP_FAILED);
 
 	VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t)));
 
 	hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t);
 	hdr->zh_opts_size = sizeof (ztest_shared_opts_t);
 	hdr->zh_size = sizeof (ztest_shared_t);
 	hdr->zh_stats_size = sizeof (ztest_shared_callstate_t);
 	hdr->zh_stats_count = ZTEST_FUNCS;
 	hdr->zh_ds_size = sizeof (ztest_shared_ds_t);
 	hdr->zh_ds_count = ztest_opts.zo_datasets;
 
 	size = shared_data_size(hdr);
 	VERIFY3U(0, ==, ftruncate(ztest_fd_data, size));
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 }
 
 static void
 setup_data(void)
 {
 	int size, offset;
 	ztest_shared_hdr_t *hdr;
 	uint8_t *buf;
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT(hdr != MAP_FAILED);
 
 	size = shared_data_size(hdr);
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 	hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT(hdr != MAP_FAILED);
 	buf = (uint8_t *)hdr;
 
 	offset = hdr->zh_hdr_size;
 	ztest_shared_opts = (void *)&buf[offset];
 	offset += hdr->zh_opts_size;
 	ztest_shared = (void *)&buf[offset];
 	offset += hdr->zh_size;
 	ztest_shared_callstate = (void *)&buf[offset];
 	offset += hdr->zh_stats_size * hdr->zh_stats_count;
 	ztest_shared_ds = (void *)&buf[offset];
 }
 
 static boolean_t
 exec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp)
 {
 	pid_t pid;
 	int status;
 	char *cmdbuf = NULL;
 
 	pid = fork();
 
 	if (cmd == NULL) {
 		cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 		(void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN);
 		cmd = cmdbuf;
 	}
 
 	if (pid == -1)
 		fatal(1, "fork failed");
 
 	if (pid == 0) {	/* child */
 		char *emptyargv[2] = { cmd, NULL };
 		char fd_data_str[12];
 
 		struct rlimit rl = { 1024, 1024 };
 		(void) setrlimit(RLIMIT_NOFILE, &rl);
 
 		(void) close(ztest_fd_rand);
 		VERIFY3U(11, >=,
 		    snprintf(fd_data_str, 12, "%d", ztest_fd_data));
 		VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1));
 
 		(void) enable_extended_FILE_stdio(-1, -1);
 		if (libpath != NULL)
 			VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1));
 		(void) execv(cmd, emptyargv);
 		ztest_dump_core = B_FALSE;
 		fatal(B_TRUE, "exec failed: %s", cmd);
 	}
 
 	if (cmdbuf != NULL) {
 		umem_free(cmdbuf, MAXPATHLEN);
 		cmd = NULL;
 	}
 
 	while (waitpid(pid, &status, 0) != pid)
 		continue;
 	if (statusp != NULL)
 		*statusp = status;
 
 	if (WIFEXITED(status)) {
 		if (WEXITSTATUS(status) != 0) {
 			(void) fprintf(stderr, "child exited with code %d\n",
 			    WEXITSTATUS(status));
 			exit(2);
 		}
 		return (B_FALSE);
 	} else if (WIFSIGNALED(status)) {
 		if (!ignorekill || WTERMSIG(status) != SIGKILL) {
 			(void) fprintf(stderr, "child died with signal %d\n",
 			    WTERMSIG(status));
 			exit(3);
 		}
 		return (B_TRUE);
 	} else {
 		(void) fprintf(stderr, "something strange happened to child\n");
 		exit(4);
 		/* NOTREACHED */
 	}
 }
 
 static void
 ztest_run_init(void)
 {
 	ztest_shared_t *zs = ztest_shared;
 
 	ASSERT(ztest_opts.zo_init != 0);
 
 	/*
 	 * Blow away any existing copy of zpool.cache
 	 */
 	(void) remove(spa_config_path);
 
 	/*
 	 * Create and initialize our storage pool.
 	 */
 	for (int i = 1; i <= ztest_opts.zo_init; i++) {
 		bzero(zs, sizeof (ztest_shared_t));
 		if (ztest_opts.zo_verbose >= 3 &&
 		    ztest_opts.zo_init != 1) {
 			(void) printf("ztest_init(), pass %d\n", i);
 		}
 		ztest_init(zs);
 	}
 }
 
 int
 main(int argc, char **argv)
 {
 	int kills = 0;
 	int iters = 0;
 	int older = 0;
 	int newer = 0;
 	ztest_shared_t *zs;
 	ztest_info_t *zi;
 	ztest_shared_callstate_t *zc;
 	char timebuf[100];
 	char numbuf[NN_NUMBUF_SZ];
 	spa_t *spa;
 	char *cmd;
 	boolean_t hasalt;
 	char *fd_data_str = getenv("ZTEST_FD_DATA");
 
 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
 
 	dprintf_setup(&argc, argv);
 	zfs_deadman_synctime_ms = 300000;
 
 	ztest_fd_rand = open("/dev/urandom", O_RDONLY);
 	ASSERT3S(ztest_fd_rand, >=, 0);
 
 	if (!fd_data_str) {
 		process_options(argc, argv);
 
 		setup_data_fd();
 		setup_hdr();
 		setup_data();
 		bcopy(&ztest_opts, ztest_shared_opts,
 		    sizeof (*ztest_shared_opts));
 	} else {
 		ztest_fd_data = atoi(fd_data_str);
 		setup_data();
 		bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts));
 	}
 	ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count);
 
 	/* Override location of zpool.cache */
 	VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache",
 	    ztest_opts.zo_dir), !=, -1);
 
 	ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t),
 	    UMEM_NOFAIL);
 	zs = ztest_shared;
 
 	if (fd_data_str) {
 		metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang;
 		metaslab_df_alloc_threshold =
 		    zs->zs_metaslab_df_alloc_threshold;
 
 		if (zs->zs_do_init)
 			ztest_run_init();
 		else
 			ztest_run(zs);
 		exit(0);
 	}
 
 	hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0);
 
 	if (ztest_opts.zo_verbose >= 1) {
 		(void) printf("%llu vdevs, %d datasets, %d threads,"
 		    " %llu seconds...\n",
 		    (u_longlong_t)ztest_opts.zo_vdevs,
 		    ztest_opts.zo_datasets,
 		    ztest_opts.zo_threads,
 		    (u_longlong_t)ztest_opts.zo_time);
 	}
 
 	cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	(void) strlcpy(cmd, getexecname(), MAXNAMELEN);
 
 	zs->zs_do_init = B_TRUE;
 	if (strlen(ztest_opts.zo_alt_ztest) != 0) {
 		if (ztest_opts.zo_verbose >= 1) {
 			(void) printf("Executing older ztest for "
 			    "initialization: %s\n", ztest_opts.zo_alt_ztest);
 		}
 		VERIFY(!exec_child(ztest_opts.zo_alt_ztest,
 		    ztest_opts.zo_alt_libpath, B_FALSE, NULL));
 	} else {
 		VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL));
 	}
 	zs->zs_do_init = B_FALSE;
 
 	zs->zs_proc_start = gethrtime();
 	zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC;
 
 	for (int f = 0; f < ZTEST_FUNCS; f++) {
 		zi = &ztest_info[f];
 		zc = ZTEST_GET_SHARED_CALLSTATE(f);
 		if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop)
 			zc->zc_next = UINT64_MAX;
 		else
 			zc->zc_next = zs->zs_proc_start +
 			    ztest_random(2 * zi->zi_interval[0] + 1);
 	}
 
 	/*
 	 * Run the tests in a loop.  These tests include fault injection
 	 * to verify that self-healing data works, and forced crashes
 	 * to verify that we never lose on-disk consistency.
 	 */
 	while (gethrtime() < zs->zs_proc_stop) {
 		int status;
 		boolean_t killed;
 
 		/*
 		 * Initialize the workload counters for each function.
 		 */
 		for (int f = 0; f < ZTEST_FUNCS; f++) {
 			zc = ZTEST_GET_SHARED_CALLSTATE(f);
 			zc->zc_count = 0;
 			zc->zc_time = 0;
 		}
 
 		/* Set the allocation switch size */
 		zs->zs_metaslab_df_alloc_threshold =
 		    ztest_random(zs->zs_metaslab_sz / 4) + 1;
 
 		if (!hasalt || ztest_random(2) == 0) {
 			if (hasalt && ztest_opts.zo_verbose >= 1) {
 				(void) printf("Executing newer ztest: %s\n",
 				    cmd);
 			}
 			newer++;
 			killed = exec_child(cmd, NULL, B_TRUE, &status);
 		} else {
 			if (hasalt && ztest_opts.zo_verbose >= 1) {
 				(void) printf("Executing older ztest: %s\n",
 				    ztest_opts.zo_alt_ztest);
 			}
 			older++;
 			killed = exec_child(ztest_opts.zo_alt_ztest,
 			    ztest_opts.zo_alt_libpath, B_TRUE, &status);
 		}
 
 		if (killed)
 			kills++;
 		iters++;
 
 		if (ztest_opts.zo_verbose >= 1) {
 			hrtime_t now = gethrtime();
 
 			now = MIN(now, zs->zs_proc_stop);
 			print_time(zs->zs_proc_stop - now, timebuf);
 			nicenum(zs->zs_space, numbuf, sizeof (numbuf));
 
 			(void) printf("Pass %3d, %8s, %3llu ENOSPC, "
 			    "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
 			    iters,
 			    WIFEXITED(status) ? "Complete" : "SIGKILL",
 			    (u_longlong_t)zs->zs_enospc_count,
 			    100.0 * zs->zs_alloc / zs->zs_space,
 			    numbuf,
 			    100.0 * (now - zs->zs_proc_start) /
 			    (ztest_opts.zo_time * NANOSEC), timebuf);
 		}
 
 		if (ztest_opts.zo_verbose >= 2) {
 			(void) printf("\nWorkload summary:\n\n");
 			(void) printf("%7s %9s   %s\n",
 			    "Calls", "Time", "Function");
 			(void) printf("%7s %9s   %s\n",
 			    "-----", "----", "--------");
 			for (int f = 0; f < ZTEST_FUNCS; f++) {
 				Dl_info dli;
 
 				zi = &ztest_info[f];
 				zc = ZTEST_GET_SHARED_CALLSTATE(f);
 				print_time(zc->zc_time, timebuf);
 				(void) dladdr((void *)zi->zi_func, &dli);
 				(void) printf("%7llu %9s   %s\n",
 				    (u_longlong_t)zc->zc_count, timebuf,
 				    dli.dli_sname);
 			}
 			(void) printf("\n");
 		}
 
 		/*
 		 * It's possible that we killed a child during a rename test,
 		 * in which case we'll have a 'ztest_tmp' pool lying around
 		 * instead of 'ztest'.  Do a blind rename in case this happened.
 		 */
 		kernel_init(FREAD);
 		if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) {
 			spa_close(spa, FTAG);
 		} else {
 			char tmpname[ZFS_MAX_DATASET_NAME_LEN];
 			kernel_fini();
 			kernel_init(FREAD | FWRITE);
 			(void) snprintf(tmpname, sizeof (tmpname), "%s_tmp",
 			    ztest_opts.zo_pool);
 			(void) spa_rename(tmpname, ztest_opts.zo_pool);
 		}
 		kernel_fini();
 
 		ztest_run_zdb(ztest_opts.zo_pool);
 	}
 
 	if (ztest_opts.zo_verbose >= 1) {
 		if (hasalt) {
 			(void) printf("%d runs of older ztest: %s\n", older,
 			    ztest_opts.zo_alt_ztest);
 			(void) printf("%d runs of newer ztest: %s\n", newer,
 			    cmd);
 		}
 		(void) printf("%d killed, %d completed, %.0f%% kill rate\n",
 		    kills, iters - kills, (100.0 * kills) / MAX(1, iters));
 	}
 
 	umem_free(cmd, MAXNAMELEN);
 
 	return (0);
 }
Index: vendor/illumos/dist/lib/libzpool/common/kernel.c
===================================================================
--- vendor/illumos/dist/lib/libzpool/common/kernel.c	(revision 329752)
+++ vendor/illumos/dist/lib/libzpool/common/kernel.c	(revision 329753)
@@ -1,1161 +1,586 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #include <assert.h>
 #include <fcntl.h>
 #include <poll.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <zlib.h>
 #include <libgen.h>
 #include <sys/spa.h>
 #include <sys/stat.h>
 #include <sys/processor.h>
 #include <sys/zfs_context.h>
 #include <sys/rrwlock.h>
 #include <sys/zmod.h>
 #include <sys/utsname.h>
 #include <sys/systeminfo.h>
 
+extern void system_taskq_init(void);
+extern void system_taskq_fini(void);
+
 /*
  * Emulation of kernel services in userland.
  */
 
-int aok;
-uint64_t physmem;
+pgcnt_t physmem;
 vnode_t *rootdir = (vnode_t *)0xabcd1234;
 char hw_serial[HW_HOSTID_LEN];
 kmutex_t cpu_lock;
 vmem_t *zio_arena = NULL;
 
 /* If set, all blocks read will be copied to the specified directory. */
 char *vn_dumpdir = NULL;
 
 struct utsname utsname = {
 	"userland", "libzpool", "1", "1", "na"
 };
 
-/* this only exists to have its address taken */
-struct proc p0;
-
 /*
  * =========================================================================
- * threads
- * =========================================================================
- */
-/*ARGSUSED*/
-kthread_t *
-zk_thread_create(void (*func)(), void *arg)
-{
-	thread_t tid;
-
-	VERIFY(thr_create(0, 0, (void *(*)(void *))func, arg, THR_DETACHED,
-	    &tid) == 0);
-
-	return ((void *)(uintptr_t)tid);
-}
-
-/*
- * =========================================================================
- * kstats
- * =========================================================================
- */
-/*ARGSUSED*/
-kstat_t *
-kstat_create(const char *module, int instance, const char *name,
-    const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
-{
-	return (NULL);
-}
-
-/*ARGSUSED*/
-void
-kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type)
-{}
-
-/*ARGSUSED*/
-void
-kstat_install(kstat_t *ksp)
-{}
-
-/*ARGSUSED*/
-void
-kstat_delete(kstat_t *ksp)
-{}
-
-/*ARGSUSED*/
-void
-kstat_waitq_enter(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_waitq_exit(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_runq_enter(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_runq_exit(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_waitq_to_runq(kstat_io_t *kiop)
-{}
-
-/*ARGSUSED*/
-void
-kstat_runq_back_to_waitq(kstat_io_t *kiop)
-{}
-
-/*
- * =========================================================================
- * mutexes
- * =========================================================================
- */
-void
-zmutex_init(kmutex_t *mp)
-{
-	mp->m_owner = NULL;
-	mp->initialized = B_TRUE;
-	(void) _mutex_init(&mp->m_lock, USYNC_THREAD, NULL);
-}
-
-void
-zmutex_destroy(kmutex_t *mp)
-{
-	ASSERT(mp->initialized == B_TRUE);
-	ASSERT(mp->m_owner == NULL);
-	(void) _mutex_destroy(&(mp)->m_lock);
-	mp->m_owner = (void *)-1UL;
-	mp->initialized = B_FALSE;
-}
-
-void
-mutex_enter(kmutex_t *mp)
-{
-	ASSERT(mp->initialized == B_TRUE);
-	ASSERT(mp->m_owner != (void *)-1UL);
-	ASSERT(mp->m_owner != curthread);
-	VERIFY(mutex_lock(&mp->m_lock) == 0);
-	ASSERT(mp->m_owner == NULL);
-	mp->m_owner = curthread;
-}
-
-int
-mutex_tryenter(kmutex_t *mp)
-{
-	ASSERT(mp->initialized == B_TRUE);
-	ASSERT(mp->m_owner != (void *)-1UL);
-	if (0 == mutex_trylock(&mp->m_lock)) {
-		ASSERT(mp->m_owner == NULL);
-		mp->m_owner = curthread;
-		return (1);
-	} else {
-		return (0);
-	}
-}
-
-void
-mutex_exit(kmutex_t *mp)
-{
-	ASSERT(mp->initialized == B_TRUE);
-	ASSERT(mutex_owner(mp) == curthread);
-	mp->m_owner = NULL;
-	VERIFY(mutex_unlock(&mp->m_lock) == 0);
-}
-
-void *
-mutex_owner(kmutex_t *mp)
-{
-	ASSERT(mp->initialized == B_TRUE);
-	return (mp->m_owner);
-}
-
-/*
- * =========================================================================
- * rwlocks
- * =========================================================================
- */
-/*ARGSUSED*/
-void
-rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
-{
-	rwlock_init(&rwlp->rw_lock, USYNC_THREAD, NULL);
-	rwlp->rw_owner = NULL;
-	rwlp->initialized = B_TRUE;
-}
-
-void
-rw_destroy(krwlock_t *rwlp)
-{
-	rwlock_destroy(&rwlp->rw_lock);
-	rwlp->rw_owner = (void *)-1UL;
-	rwlp->initialized = B_FALSE;
-}
-
-void
-rw_enter(krwlock_t *rwlp, krw_t rw)
-{
-	ASSERT(!RW_LOCK_HELD(rwlp));
-	ASSERT(rwlp->initialized == B_TRUE);
-	ASSERT(rwlp->rw_owner != (void *)-1UL);
-	ASSERT(rwlp->rw_owner != curthread);
-
-	if (rw == RW_WRITER)
-		VERIFY(rw_wrlock(&rwlp->rw_lock) == 0);
-	else
-		VERIFY(rw_rdlock(&rwlp->rw_lock) == 0);
-
-	rwlp->rw_owner = curthread;
-}
-
-void
-rw_exit(krwlock_t *rwlp)
-{
-	ASSERT(rwlp->initialized == B_TRUE);
-	ASSERT(rwlp->rw_owner != (void *)-1UL);
-
-	rwlp->rw_owner = NULL;
-	VERIFY(rw_unlock(&rwlp->rw_lock) == 0);
-}
-
-int
-rw_tryenter(krwlock_t *rwlp, krw_t rw)
-{
-	int rv;
-
-	ASSERT(rwlp->initialized == B_TRUE);
-	ASSERT(rwlp->rw_owner != (void *)-1UL);
-
-	if (rw == RW_WRITER)
-		rv = rw_trywrlock(&rwlp->rw_lock);
-	else
-		rv = rw_tryrdlock(&rwlp->rw_lock);
-
-	if (rv == 0) {
-		rwlp->rw_owner = curthread;
-		return (1);
-	}
-
-	return (0);
-}
-
-/*ARGSUSED*/
-int
-rw_tryupgrade(krwlock_t *rwlp)
-{
-	ASSERT(rwlp->initialized == B_TRUE);
-	ASSERT(rwlp->rw_owner != (void *)-1UL);
-
-	return (0);
-}
-
-/*
- * =========================================================================
- * condition variables
- * =========================================================================
- */
-/*ARGSUSED*/
-void
-cv_init(kcondvar_t *cv, char *name, int type, void *arg)
-{
-	VERIFY(cond_init(cv, type, NULL) == 0);
-}
-
-void
-cv_destroy(kcondvar_t *cv)
-{
-	VERIFY(cond_destroy(cv) == 0);
-}
-
-void
-cv_wait(kcondvar_t *cv, kmutex_t *mp)
-{
-	ASSERT(mutex_owner(mp) == curthread);
-	mp->m_owner = NULL;
-	int ret = cond_wait(cv, &mp->m_lock);
-	VERIFY(ret == 0 || ret == EINTR);
-	mp->m_owner = curthread;
-}
-
-clock_t
-cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
-{
-	int error;
-	timestruc_t ts;
-	clock_t delta;
-
-top:
-	delta = abstime - ddi_get_lbolt();
-	if (delta <= 0)
-		return (-1);
-
-	ts.tv_sec = delta / hz;
-	ts.tv_nsec = (delta % hz) * (NANOSEC / hz);
-
-	ASSERT(mutex_owner(mp) == curthread);
-	mp->m_owner = NULL;
-	error = cond_reltimedwait(cv, &mp->m_lock, &ts);
-	mp->m_owner = curthread;
-
-	if (error == ETIME)
-		return (-1);
-
-	if (error == EINTR)
-		goto top;
-
-	ASSERT(error == 0);
-
-	return (1);
-}
-
-/*ARGSUSED*/
-clock_t
-cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
-    int flag)
-{
-	int error;
-	timestruc_t ts;
-	hrtime_t delta;
-
-	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
-
-top:
-	delta = tim;
-	if (flag & CALLOUT_FLAG_ABSOLUTE)
-		delta -= gethrtime();
-
-	if (delta <= 0)
-		return (-1);
-
-	ts.tv_sec = delta / NANOSEC;
-	ts.tv_nsec = delta % NANOSEC;
-
-	ASSERT(mutex_owner(mp) == curthread);
-	mp->m_owner = NULL;
-	error = cond_reltimedwait(cv, &mp->m_lock, &ts);
-	mp->m_owner = curthread;
-
-	if (error == ETIME)
-		return (-1);
-
-	if (error == EINTR)
-		goto top;
-
-	ASSERT(error == 0);
-
-	return (1);
-}
-
-void
-cv_signal(kcondvar_t *cv)
-{
-	VERIFY(cond_signal(cv) == 0);
-}
-
-void
-cv_broadcast(kcondvar_t *cv)
-{
-	VERIFY(cond_broadcast(cv) == 0);
-}
-
-/*
- * =========================================================================
  * vnode operations
  * =========================================================================
  */
 /*
  * Note: for the xxxat() versions of these functions, we assume that the
  * starting vp is always rootdir (which is true for spa_directory.c, the only
  * ZFS consumer of these interfaces).  We assert this is true, and then emulate
  * them by adding '/' in front of the path.
  */
 
 /*ARGSUSED*/
 int
 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
 {
 	int fd;
 	int dump_fd;
 	vnode_t *vp;
 	int old_umask;
 	char realpath[MAXPATHLEN];
 	struct stat64 st;
 
 	/*
 	 * If we're accessing a real disk from userland, we need to use
 	 * the character interface to avoid caching.  This is particularly
 	 * important if we're trying to look at a real in-kernel storage
 	 * pool from userland, e.g. via zdb, because otherwise we won't
 	 * see the changes occurring under the segmap cache.
 	 * On the other hand, the stupid character device returns zero
 	 * for its size.  So -- gag -- we open the block device to get
 	 * its size, and remember it for subsequent VOP_GETATTR().
 	 */
 	if (strncmp(path, "/dev/", 5) == 0) {
 		char *dsk;
 		fd = open64(path, O_RDONLY);
 		if (fd == -1)
 			return (errno);
 		if (fstat64(fd, &st) == -1) {
 			close(fd);
 			return (errno);
 		}
 		close(fd);
 		(void) sprintf(realpath, "%s", path);
 		dsk = strstr(path, "/dsk/");
 		if (dsk != NULL)
 			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
 			    dsk + 1);
 	} else {
 		(void) sprintf(realpath, "%s", path);
 		if (!(flags & FCREAT) && stat64(realpath, &st) == -1)
 			return (errno);
 	}
 
 	if (flags & FCREAT)
 		old_umask = umask(0);
 
 	/*
 	 * The construct 'flags - FREAD' conveniently maps combinations of
 	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
 	 */
 	fd = open64(realpath, flags - FREAD, mode);
 
 	if (flags & FCREAT)
 		(void) umask(old_umask);
 
 	if (vn_dumpdir != NULL) {
 		char dumppath[MAXPATHLEN];
 		(void) snprintf(dumppath, sizeof (dumppath),
 		    "%s/%s", vn_dumpdir, basename(realpath));
 		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
 		if (dump_fd == -1)
 			return (errno);
 	} else {
 		dump_fd = -1;
 	}
 
 	if (fd == -1)
 		return (errno);
 
 	if (fstat64(fd, &st) == -1) {
 		close(fd);
 		return (errno);
 	}
 
 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
 
 	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
 
 	vp->v_fd = fd;
 	vp->v_size = st.st_size;
 	vp->v_path = spa_strdup(path);
 	vp->v_dump_fd = dump_fd;
 
 	return (0);
 }
 
 /*ARGSUSED*/
 int
 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
     int x3, vnode_t *startvp, int fd)
 {
 	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
 	int ret;
 
 	ASSERT(startvp == rootdir);
 	(void) sprintf(realpath, "/%s", path);
 
 	/* fd ignored for now, need if want to simulate nbmand support */
 	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
 
 	umem_free(realpath, strlen(path) + 2);
 
 	return (ret);
 }
 
 /*ARGSUSED*/
 int
 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
     int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
 {
 	ssize_t iolen, split;
 
 	if (uio == UIO_READ) {
 		iolen = pread64(vp->v_fd, addr, len, offset);
 		if (vp->v_dump_fd != -1) {
 			int status =
 			    pwrite64(vp->v_dump_fd, addr, iolen, offset);
 			ASSERT(status != -1);
 		}
 	} else {
 		/*
 		 * To simulate partial disk writes, we split writes into two
 		 * system calls so that the process can be killed in between.
 		 */
 		int sectors = len >> SPA_MINBLOCKSHIFT;
 		split = (sectors > 0 ? rand() % sectors : 0) <<
 		    SPA_MINBLOCKSHIFT;
 		iolen = pwrite64(vp->v_fd, addr, split, offset);
 		iolen += pwrite64(vp->v_fd, (char *)addr + split,
 		    len - split, offset + split);
 	}
 
 	if (iolen == -1)
 		return (errno);
 	if (residp)
 		*residp = len - iolen;
 	else if (iolen != len)
 		return (EIO);
 	return (0);
 }
 
 void
 vn_close(vnode_t *vp)
 {
 	close(vp->v_fd);
 	if (vp->v_dump_fd != -1)
 		close(vp->v_dump_fd);
 	spa_strfree(vp->v_path);
 	umem_free(vp, sizeof (vnode_t));
 }
 
 /*
  * At a minimum we need to update the size since vdev_reopen()
  * will no longer call vn_openat().
  */
 int
 fop_getattr(vnode_t *vp, vattr_t *vap)
 {
 	struct stat64 st;
 
 	if (fstat64(vp->v_fd, &st) == -1) {
 		close(vp->v_fd);
 		return (errno);
 	}
 
 	vap->va_size = st.st_size;
 	return (0);
 }
 
 #ifdef ZFS_DEBUG
 
 /*
  * =========================================================================
  * Figure out which debugging statements to print
  * =========================================================================
  */
 
 static char *dprintf_string;
 static int dprintf_print_all;
 
 int
 dprintf_find_string(const char *string)
 {
 	char *tmp_str = dprintf_string;
 	int len = strlen(string);
 
 	/*
 	 * Find out if this is a string we want to print.
 	 * String format: file1.c,function_name1,file2.c,file3.c
 	 */
 
 	while (tmp_str != NULL) {
 		if (strncmp(tmp_str, string, len) == 0 &&
 		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
 			return (1);
 		tmp_str = strchr(tmp_str, ',');
 		if (tmp_str != NULL)
 			tmp_str++; /* Get rid of , */
 	}
 	return (0);
 }
 
 void
 dprintf_setup(int *argc, char **argv)
 {
 	int i, j;
 
 	/*
 	 * Debugging can be specified two ways: by setting the
 	 * environment variable ZFS_DEBUG, or by including a
 	 * "debug=..."  argument on the command line.  The command
 	 * line setting overrides the environment variable.
 	 */
 
 	for (i = 1; i < *argc; i++) {
 		int len = strlen("debug=");
 		/* First look for a command line argument */
 		if (strncmp("debug=", argv[i], len) == 0) {
 			dprintf_string = argv[i] + len;
 			/* Remove from args */
 			for (j = i; j < *argc; j++)
 				argv[j] = argv[j+1];
 			argv[j] = NULL;
 			(*argc)--;
 		}
 	}
 
 	if (dprintf_string == NULL) {
 		/* Look for ZFS_DEBUG environment variable */
 		dprintf_string = getenv("ZFS_DEBUG");
 	}
 
 	/*
 	 * Are we just turning on all debugging?
 	 */
 	if (dprintf_find_string("on"))
 		dprintf_print_all = 1;
 
 	if (dprintf_string != NULL)
 		zfs_flags |= ZFS_DEBUG_DPRINTF;
 }
 
 /*
  * =========================================================================
  * debug printfs
  * =========================================================================
  */
 void
 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 {
 	const char *newfile;
 	va_list adx;
 
 	/*
 	 * Get rid of annoying "../common/" prefix to filename.
 	 */
 	newfile = strrchr(file, '/');
 	if (newfile != NULL) {
 		newfile = newfile + 1; /* Get rid of leading / */
 	} else {
 		newfile = file;
 	}
 
 	if (dprintf_print_all ||
 	    dprintf_find_string(newfile) ||
 	    dprintf_find_string(func)) {
 		/* Print out just the function name if requested */
 		flockfile(stdout);
 		if (dprintf_find_string("pid"))
 			(void) printf("%d ", getpid());
 		if (dprintf_find_string("tid"))
 			(void) printf("%u ", thr_self());
 		if (dprintf_find_string("cpu"))
 			(void) printf("%u ", getcpuid());
 		if (dprintf_find_string("time"))
 			(void) printf("%llu ", gethrtime());
 		if (dprintf_find_string("long"))
 			(void) printf("%s, line %d: ", newfile, line);
 		(void) printf("%s: ", func);
 		va_start(adx, fmt);
 		(void) vprintf(fmt, adx);
 		va_end(adx);
 		funlockfile(stdout);
 	}
 }
 
 #endif /* ZFS_DEBUG */
 
 /*
  * =========================================================================
- * cmn_err() and panic()
- * =========================================================================
- */
-static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
-static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
-
-void
-vpanic(const char *fmt, va_list adx)
-{
-	char buf[512];
-	(void) vsnprintf(buf, 512, fmt, adx);
-	assfail(buf, NULL, 0);
-	abort(); /* necessary to make vpanic meet noreturn requirements */
-}
-
-void
-panic(const char *fmt, ...)
-{
-	va_list adx;
-
-	va_start(adx, fmt);
-	vpanic(fmt, adx);
-	va_end(adx);
-}
-
-void
-vcmn_err(int ce, const char *fmt, va_list adx)
-{
-	if (ce == CE_PANIC)
-		vpanic(fmt, adx);
-	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
-		(void) fprintf(stderr, "%s", ce_prefix[ce]);
-		(void) vfprintf(stderr, fmt, adx);
-		(void) fprintf(stderr, "%s", ce_suffix[ce]);
-	}
-}
-
-/*PRINTFLIKE2*/
-void
-cmn_err(int ce, const char *fmt, ...)
-{
-	va_list adx;
-
-	va_start(adx, fmt);
-	vcmn_err(ce, fmt, adx);
-	va_end(adx);
-}
-
-/*
- * =========================================================================
  * kobj interfaces
  * =========================================================================
  */
 struct _buf *
 kobj_open_file(char *name)
 {
 	struct _buf *file;
 	vnode_t *vp;
 
 	/* set vp as the _fd field of the file */
 	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
 	    -1) != 0)
 		return ((void *)-1UL);
 
 	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
 	file->_fd = (intptr_t)vp;
 	return (file);
 }
 
 int
 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
 {
 	ssize_t resid;
 
 	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
 	    UIO_SYSSPACE, 0, 0, 0, &resid);
 
 	return (size - resid);
 }
 
 void
 kobj_close_file(struct _buf *file)
 {
 	vn_close((vnode_t *)file->_fd);
 	umem_free(file, sizeof (struct _buf));
 }
 
 int
 kobj_get_filesize(struct _buf *file, uint64_t *size)
 {
 	struct stat64 st;
 	vnode_t *vp = (vnode_t *)file->_fd;
 
 	if (fstat64(vp->v_fd, &st) == -1) {
 		vn_close(vp);
 		return (errno);
 	}
 	*size = st.st_size;
 	return (0);
 }
 
 /*
  * =========================================================================
- * misc routines
- * =========================================================================
- */
-
-void
-delay(clock_t ticks)
-{
-	poll(0, 0, ticks * (1000 / hz));
-}
-
-/*
- * Find highest one bit set.
- *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
- */
-int
-highbit64(uint64_t i)
-{
-	int h = 1;
-
-	if (i == 0)
-		return (0);
-	if (i & 0xffffffff00000000ULL) {
-		h += 32; i >>= 32;
-	}
-	if (i & 0xffff0000) {
-		h += 16; i >>= 16;
-	}
-	if (i & 0xff00) {
-		h += 8; i >>= 8;
-	}
-	if (i & 0xf0) {
-		h += 4; i >>= 4;
-	}
-	if (i & 0xc) {
-		h += 2; i >>= 2;
-	}
-	if (i & 0x2) {
-		h += 1;
-	}
-	return (h);
-}
-
-static int random_fd = -1, urandom_fd = -1;
-
-static int
-random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
-{
-	size_t resid = len;
-	ssize_t bytes;
-
-	ASSERT(fd != -1);
-
-	while (resid != 0) {
-		bytes = read(fd, ptr, resid);
-		ASSERT3S(bytes, >=, 0);
-		ptr += bytes;
-		resid -= bytes;
-	}
-
-	return (0);
-}
-
-int
-random_get_bytes(uint8_t *ptr, size_t len)
-{
-	return (random_get_bytes_common(ptr, len, random_fd));
-}
-
-int
-random_get_pseudo_bytes(uint8_t *ptr, size_t len)
-{
-	return (random_get_bytes_common(ptr, len, urandom_fd));
-}
-
-int
-ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
-{
-	char *end;
-
-	*result = strtoul(hw_serial, &end, base);
-	if (*result == 0)
-		return (errno);
-	return (0);
-}
-
-int
-ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
-{
-	char *end;
-
-	*result = strtoull(str, &end, base);
-	if (*result == 0)
-		return (errno);
-	return (0);
-}
-
-/* ARGSUSED */
-cyclic_id_t
-cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when)
-{
-	return (1);
-}
-
-/* ARGSUSED */
-void
-cyclic_remove(cyclic_id_t id)
-{
-}
-
-/* ARGSUSED */
-int
-cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
-{
-	return (1);
-}
-
-/*
- * =========================================================================
  * kernel emulation setup & teardown
  * =========================================================================
  */
 static int
 umem_out_of_memory(void)
 {
 	char errmsg[] = "out of memory -- generating core dump\n";
 
 	write(fileno(stderr), errmsg, sizeof (errmsg));
 	abort();
 	return (0);
 }
 
 void
 kernel_init(int mode)
 {
 	extern uint_t rrw_tsd_key;
 
 	umem_nofail_callback(umem_out_of_memory);
 
 	physmem = sysconf(_SC_PHYS_PAGES);
 
 	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
 
 	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
 	    (mode & FWRITE) ? gethostid() : 0);
 
-	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
-	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
-
 	system_taskq_init();
 
 	mutex_init(&cpu_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	spa_init(mode);
 
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 }
 
 void
 kernel_fini(void)
 {
 	spa_fini();
 
 	system_taskq_fini();
-
-	close(random_fd);
-	close(urandom_fd);
-
-	random_fd = -1;
-	urandom_fd = -1;
 }
 
 int
 z_uncompress(void *dst, size_t *dstlen, const void *src, size_t srclen)
 {
 	int ret;
 	uLongf len = *dstlen;
 
 	if ((ret = uncompress(dst, &len, src, srclen)) == Z_OK)
 		*dstlen = (size_t)len;
 
 	return (ret);
 }
 
 int
 z_compress_level(void *dst, size_t *dstlen, const void *src, size_t srclen,
     int level)
 {
 	int ret;
 	uLongf len = *dstlen;
 
 	if ((ret = compress2(dst, &len, src, srclen, level)) == Z_OK)
 		*dstlen = (size_t)len;
 
 	return (ret);
 }
 
-uid_t
-crgetuid(cred_t *cr)
-{
-	return (0);
-}
-
-uid_t
-crgetruid(cred_t *cr)
-{
-	return (0);
-}
-
-gid_t
-crgetgid(cred_t *cr)
-{
-	return (0);
-}
-
 int
-crgetngroups(cred_t *cr)
-{
-	return (0);
-}
-
-gid_t *
-crgetgroups(cred_t *cr)
-{
-	return (NULL);
-}
-
-int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
 	return (0);
 }
 
 int
 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 {
 	return (0);
 }
 
 int
 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 {
 	return (0);
-}
-
-ksiddomain_t *
-ksid_lookupdomain(const char *dom)
-{
-	ksiddomain_t *kd;
-
-	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
-	kd->kd_name = spa_strdup(dom);
-	return (kd);
-}
-
-void
-ksiddomain_rele(ksiddomain_t *ksid)
-{
-	spa_strfree(ksid->kd_name);
-	umem_free(ksid, sizeof (ksiddomain_t));
-}
-
-/*
- * Do not change the length of the returned string; it must be freed
- * with strfree().
- */
-char *
-kmem_asprintf(const char *fmt, ...)
-{
-	int size;
-	va_list adx;
-	char *buf;
-
-	va_start(adx, fmt);
-	size = vsnprintf(NULL, 0, fmt, adx) + 1;
-	va_end(adx);
-
-	buf = kmem_alloc(size, KM_SLEEP);
-
-	va_start(adx, fmt);
-	size = vsnprintf(buf, size, fmt, adx);
-	va_end(adx);
-
-	return (buf);
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_fd_hold(int fd, minor_t *minorp)
 {
 	*minorp = 0;
 	return (0);
 }
 
 /* ARGSUSED */
 void
 zfs_onexit_fd_rele(int fd)
 {
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle)
 {
 	return (0);
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
 {
 	return (0);
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
 {
 	return (0);
 }
 
 void
 bioinit(buf_t *bp)
 {
 	bzero(bp, sizeof (buf_t));
 }
 
 void
 biodone(buf_t *bp)
 {
 	if (bp->b_iodone != NULL) {
 		(*(bp->b_iodone))(bp);
 		return;
 	}
 	ASSERT((bp->b_flags & B_DONE) == 0);
 	bp->b_flags |= B_DONE;
 }
 
 void
 bioerror(buf_t *bp, int error)
 {
 	ASSERT(bp != NULL);
 	ASSERT(error >= 0);
 
 	if (error != 0) {
 		bp->b_flags |= B_ERROR;
 	} else {
 		bp->b_flags &= ~B_ERROR;
 	}
 	bp->b_error = error;
 }
 
 
 int
 geterror(struct buf *bp)
 {
 	int error = 0;
 
 	if (bp->b_flags & B_ERROR) {
 		error = bp->b_error;
 		if (!error)
 			error = EIO;
 	}
 	return (error);
 }
Index: vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h
===================================================================
--- vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h	(revision 329752)
+++ vendor/illumos/dist/lib/libzpool/common/sys/zfs_context.h	(revision 329753)
@@ -1,698 +1,333 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef _SYS_ZFS_CONTEXT_H
 #define	_SYS_ZFS_CONTEXT_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
-#define	_SYS_MUTEX_H
-#define	_SYS_RWLOCK_H
-#define	_SYS_CONDVAR_H
-#define	_SYS_SYSTM_H
-#define	_SYS_T_LOCK_H
+#define	_SYNCH_H
+
 #define	_SYS_VNODE_H
 #define	_SYS_VFS_H
-#define	_SYS_SUNDDI_H
 #define	_SYS_CALLB_H
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <string.h>
 #include <strings.h>
-#include <synch.h>
 #include <thread.h>
 #include <assert.h>
 #include <alloca.h>
 #include <umem.h>
 #include <limits.h>
 #include <atomic.h>
 #include <dirent.h>
 #include <time.h>
 #include <procfs.h>
 #include <pthread.h>
 #include <setjmp.h>
 #include <sys/debug.h>
 #include <libsysevent.h>
 #include <sys/note.h>
 #include <sys/types.h>
 #include <sys/cred.h>
 #include <sys/sysmacros.h>
 #include <sys/bitmap.h>
 #include <sys/resource.h>
 #include <sys/byteorder.h>
 #include <sys/list.h>
 #include <sys/uio.h>
 #include <sys/zfs_debug.h>
 #include <sys/sdt.h>
 #include <sys/kstat.h>
 #include <sys/u8_textprep.h>
 #include <sys/sysevent/eventdefs.h>
 #include <sys/sysevent/dev.h>
-#include <sys/sunddi.h>
 #include <sys/debug.h>
+#include <sys/taskq.h>
+#include <sys/taskq_impl.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/condvar.h>
+#include <sys/cmn_err.h>
+#include <sys/kmem.h>
+#include <sys/systm.h>
+#include <sys/random.h>
+#include <sys/buf.h>
+#include <sys/sid.h>
+#include <sys/acl.h>
+#include <sys/bitmap.h>
+#include <sys/systeminfo.h>
+#include <sys/cpuvar.h>
+#include <sys/pset.h>
+#include <sys/kobj.h>
+#include <sys/fm/util.h>
 #include "zfs.h"
 
 /*
- * Debugging
- */
-
-/*
- * Note that we are not using the debugging levels.
- */
-
-#define	CE_CONT		0	/* continuation		*/
-#define	CE_NOTE		1	/* notice		*/
-#define	CE_WARN		2	/* warning		*/
-#define	CE_PANIC	3	/* panic		*/
-#define	CE_IGNORE	4	/* print nothing	*/
-
-/*
  * ZFS debugging
  */
 
 #ifdef ZFS_DEBUG
 extern void dprintf_setup(int *argc, char **argv);
 #endif /* ZFS_DEBUG */
 
-extern void cmn_err(int, const char *, ...);
-extern void vcmn_err(int, const char *, __va_list);
-extern void panic(const char *, ...)  __NORETURN;
-extern void vpanic(const char *, __va_list)  __NORETURN;
-
-#define	fm_panic	panic
-
-extern int aok;
-
 /*
  * DTrace SDT probes have different signatures in userland than they do in
  * the kernel.  If they're being used in kernel code, re-define them out of
  * existence for their counterparts in libzpool.
  *
  * Here's an example of how to use the set-error probes in userland:
  * zfs$target:::set-error /arg0 == EBUSY/ {stack();}
  *
  * Here's an example of how to use DTRACE_PROBE probes in userland:
  * If there is a probe declared as follows:
  * DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn);
  * Then you can use it as follows:
  * zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/
  *     {printf("%u %p\n", arg1, arg2);}
  */
 
 #ifdef DTRACE_PROBE
 #undef	DTRACE_PROBE
 #endif	/* DTRACE_PROBE */
 #define	DTRACE_PROBE(a) \
 	ZFS_PROBE0(#a)
 
 #ifdef DTRACE_PROBE1
 #undef	DTRACE_PROBE1
 #endif	/* DTRACE_PROBE1 */
 #define	DTRACE_PROBE1(a, b, c) \
 	ZFS_PROBE1(#a, (unsigned long)c)
 
 #ifdef DTRACE_PROBE2
 #undef	DTRACE_PROBE2
 #endif	/* DTRACE_PROBE2 */
 #define	DTRACE_PROBE2(a, b, c, d, e) \
 	ZFS_PROBE2(#a, (unsigned long)c, (unsigned long)e)
 
 #ifdef DTRACE_PROBE3
 #undef	DTRACE_PROBE3
 #endif	/* DTRACE_PROBE3 */
 #define	DTRACE_PROBE3(a, b, c, d, e, f, g) \
 	ZFS_PROBE3(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g)
 
 #ifdef DTRACE_PROBE4
 #undef	DTRACE_PROBE4
 #endif	/* DTRACE_PROBE4 */
 #define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i) \
 	ZFS_PROBE4(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g, \
 	(unsigned long)i)
 
 /*
  * We use the comma operator so that this macro can be used without much
  * additional code.  For example, "return (EINVAL);" becomes
  * "return (SET_ERROR(EINVAL));".  Note that the argument will be evaluated
  * twice, so it should not have side effects (e.g. something like:
  * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
  */
 #define	SET_ERROR(err) (ZFS_SET_ERROR(err), err)
 
 /*
  * Threads
  */
-#define	curthread	((void *)(uintptr_t)thr_self())
-
 #define	kpreempt(x)	yield()
-
-typedef struct kthread kthread_t;
-
-#define	thread_create(stk, stksize, func, arg, len, pp, state, pri)	\
-	zk_thread_create(func, arg)
-#define	thread_exit() thr_exit(NULL)
-#define	thread_join(t)	panic("libzpool cannot join threads")
-
 #define	newproc(f, a, cid, pri, ctp, pid)	(ENOSYS)
 
-/* in libzpool, p0 exists only to have its address taken */
-struct proc {
-	uintptr_t	this_is_never_used_dont_dereference_it;
-};
-
-extern struct proc p0;
-#define	curproc		(&p0)
-
-#define	PS_NONE		-1
-
-extern kthread_t *zk_thread_create(void (*func)(void*), void *arg,
-    uint64_t len);
-
-#define	issig(why)	(FALSE)
-#define	ISSIG(thr, why)	(FALSE)
-
 /*
- * Mutexes
+ * vnodes
  */
-typedef struct kmutex {
-	void		*m_owner;
-	boolean_t	initialized;
-	mutex_t		m_lock;
-} kmutex_t;
-
-#define	MUTEX_DEFAULT	USYNC_THREAD
-#undef	MUTEX_HELD
-#undef	MUTEX_NOT_HELD
-#define	MUTEX_HELD(m) _mutex_held(&(m)->m_lock)
-#define	MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))
-
-/*
- * Argh -- we have to get cheesy here because the kernel and userland
- * have different signatures for the same routine.
- */
-extern int _mutex_init(mutex_t *mp, int type, void *arg);
-extern int _mutex_destroy(mutex_t *mp);
-
-#define	mutex_init(mp, b, c, d)		zmutex_init((kmutex_t *)(mp))
-#define	mutex_destroy(mp)		zmutex_destroy((kmutex_t *)(mp))
-
-extern void zmutex_init(kmutex_t *mp);
-extern void zmutex_destroy(kmutex_t *mp);
-extern void mutex_enter(kmutex_t *mp);
-extern void mutex_exit(kmutex_t *mp);
-extern int mutex_tryenter(kmutex_t *mp);
-extern void *mutex_owner(kmutex_t *mp);
-
-/*
- * RW locks
- */
-typedef struct krwlock {
-	void		*rw_owner;
-	boolean_t	initialized;
-	rwlock_t	rw_lock;
-} krwlock_t;
-
-typedef int krw_t;
-
-#define	RW_READER	0
-#define	RW_WRITER	1
-#define	RW_DEFAULT	USYNC_THREAD
-
-#undef RW_READ_HELD
-#define	RW_READ_HELD(x)		_rw_read_held(&(x)->rw_lock)
-
-#undef RW_WRITE_HELD
-#define	RW_WRITE_HELD(x)	_rw_write_held(&(x)->rw_lock)
-
-#undef RW_LOCK_HELD
-#define	RW_LOCK_HELD(x)		(RW_READ_HELD(x) || RW_WRITE_HELD(x))
-
-extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
-extern void rw_destroy(krwlock_t *rwlp);
-extern void rw_enter(krwlock_t *rwlp, krw_t rw);
-extern int rw_tryenter(krwlock_t *rwlp, krw_t rw);
-extern int rw_tryupgrade(krwlock_t *rwlp);
-extern void rw_exit(krwlock_t *rwlp);
-#define	rw_downgrade(rwlp) do { } while (0)
-
-extern uid_t crgetuid(cred_t *cr);
-extern uid_t crgetruid(cred_t *cr);
-extern gid_t crgetgid(cred_t *cr);
-extern int crgetngroups(cred_t *cr);
-extern gid_t *crgetgroups(cred_t *cr);
-
-/*
- * Condition variables
- */
-typedef cond_t kcondvar_t;
-
-#define	CV_DEFAULT	USYNC_THREAD
-#define	CALLOUT_FLAG_ABSOLUTE	0x2
-
-extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
-extern void cv_destroy(kcondvar_t *cv);
-extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
-extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
-extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
-    hrtime_t res, int flag);
-extern void cv_signal(kcondvar_t *cv);
-extern void cv_broadcast(kcondvar_t *cv);
-
-/*
- * Thread-specific data
- */
-#define	tsd_get(k) pthread_getspecific(k)
-#define	tsd_set(k, v) pthread_setspecific(k, v)
-#define	tsd_create(kp, d) pthread_key_create(kp, d)
-#define	tsd_destroy(kp) /* nothing */
-
-/*
- * kstat creation, installation and deletion
- */
-extern kstat_t *kstat_create(const char *, int,
-    const char *, const char *, uchar_t, ulong_t, uchar_t);
-extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
-extern void kstat_install(kstat_t *);
-extern void kstat_delete(kstat_t *);
-extern void kstat_waitq_enter(kstat_io_t *);
-extern void kstat_waitq_exit(kstat_io_t *);
-extern void kstat_runq_enter(kstat_io_t *);
-extern void kstat_runq_exit(kstat_io_t *);
-extern void kstat_waitq_to_runq(kstat_io_t *);
-extern void kstat_runq_back_to_waitq(kstat_io_t *);
-
-/*
- * Kernel memory
- */
-#define	KM_SLEEP		UMEM_NOFAIL
-#define	KM_PUSHPAGE		KM_SLEEP
-#define	KM_NOSLEEP		UMEM_DEFAULT
-#define	KM_NORMALPRI		0	/* not needed with UMEM_DEFAULT */
-#define	KMC_NODEBUG		UMC_NODEBUG
-#define	KMC_NOTOUCH		0	/* not needed for userland caches */
-#define	kmem_alloc(_s, _f)	umem_alloc(_s, _f)
-#define	kmem_zalloc(_s, _f)	umem_zalloc(_s, _f)
-#define	kmem_free(_b, _s)	umem_free(_b, _s)
-#define	kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
-	umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
-#define	kmem_cache_destroy(_c)	umem_cache_destroy(_c)
-#define	kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
-#define	kmem_cache_free(_c, _b)	umem_cache_free(_c, _b)
-#define	kmem_debugging()	0
-#define	kmem_cache_reap_now(_c)		/* nothing */
-#define	kmem_cache_set_move(_c, _cb)	/* nothing */
-#define	vmem_qcache_reap(_v)		/* nothing */
-#define	POINTER_INVALIDATE(_pp)		/* nothing */
-#define	POINTER_IS_VALID(_p)	0
-
-extern vmem_t *zio_arena;
-
-typedef umem_cache_t kmem_cache_t;
-
-typedef enum kmem_cbrc {
-	KMEM_CBRC_YES,
-	KMEM_CBRC_NO,
-	KMEM_CBRC_LATER,
-	KMEM_CBRC_DONT_NEED,
-	KMEM_CBRC_DONT_KNOW
-} kmem_cbrc_t;
-
-/*
- * Task queues
- */
-typedef struct taskq taskq_t;
-typedef uintptr_t taskqid_t;
-typedef void (task_func_t)(void *);
-
-typedef struct taskq_ent {
-	struct taskq_ent	*tqent_next;
-	struct taskq_ent	*tqent_prev;
-	task_func_t		*tqent_func;
-	void			*tqent_arg;
-	uintptr_t		tqent_flags;
-} taskq_ent_t;
-
-#define	TQENT_FLAG_PREALLOC	0x1	/* taskq_dispatch_ent used */
-
-#define	TASKQ_PREPOPULATE	0x0001
-#define	TASKQ_CPR_SAFE		0x0002	/* Use CPR safe protocol */
-#define	TASKQ_DYNAMIC		0x0004	/* Use dynamic thread scheduling */
-#define	TASKQ_THREADS_CPU_PCT	0x0008	/* Scale # threads by # cpus */
-#define	TASKQ_DC_BATCH		0x0010	/* Mark threads as batch */
-
-#define	TQ_SLEEP	KM_SLEEP	/* Can block for memory */
-#define	TQ_NOSLEEP	KM_NOSLEEP	/* cannot block for memory; may fail */
-#define	TQ_NOQUEUE	0x02		/* Do not enqueue if can't dispatch */
-#define	TQ_FRONT	0x08		/* Queue in front */
-
-
-extern taskq_t *system_taskq;
-
-extern taskq_t	*taskq_create(const char *, int, pri_t, int, int, uint_t);
-#define	taskq_create_proc(a, b, c, d, e, p, f) \
-	    (taskq_create(a, b, c, d, e, f))
-#define	taskq_create_sysdc(a, b, d, e, p, dc, f) \
-	    (taskq_create(a, b, maxclsyspri, d, e, f))
-extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
-extern void	taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
-    taskq_ent_t *);
-extern void	taskq_destroy(taskq_t *);
-extern void	taskq_wait(taskq_t *);
-extern int	taskq_member(taskq_t *, void *);
-extern void	system_taskq_init(void);
-extern void	system_taskq_fini(void);
-
 #define	XVA_MAPSIZE	3
 #define	XVA_MAGIC	0x78766174
 
-/*
- * vnodes
- */
 typedef struct vnode {
 	uint64_t	v_size;
 	int		v_fd;
 	char		*v_path;
 	int		v_dump_fd;
 } vnode_t;
 
 extern char *vn_dumpdir;
 #define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
 
 typedef struct xoptattr {
 	timestruc_t	xoa_createtime;	/* Create time of file */
 	uint8_t		xoa_archive;
 	uint8_t		xoa_system;
 	uint8_t		xoa_readonly;
 	uint8_t		xoa_hidden;
 	uint8_t		xoa_nounlink;
 	uint8_t		xoa_immutable;
 	uint8_t		xoa_appendonly;
 	uint8_t		xoa_nodump;
 	uint8_t		xoa_settable;
 	uint8_t		xoa_opaque;
 	uint8_t		xoa_av_quarantined;
 	uint8_t		xoa_av_modified;
 	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
 	uint8_t		xoa_reparse;
 	uint8_t		xoa_offline;
 	uint8_t		xoa_sparse;
 } xoptattr_t;
 
 typedef struct vattr {
 	uint_t		va_mask;	/* bit-mask of attributes */
 	u_offset_t	va_size;	/* file size in bytes */
 } vattr_t;
 
 
 typedef struct xvattr {
 	vattr_t		xva_vattr;	/* Embedded vattr structure */
 	uint32_t	xva_magic;	/* Magic Number */
 	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
 	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
 	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
 	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
 	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
 } xvattr_t;
 
 typedef struct vsecattr {
 	uint_t		vsa_mask;	/* See below */
 	int		vsa_aclcnt;	/* ACL entry count */
 	void		*vsa_aclentp;	/* pointer to ACL entries */
 	int		vsa_dfaclcnt;	/* default ACL entry count */
 	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
 	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
 } vsecattr_t;
 
 #define	AT_TYPE		0x00001
 #define	AT_MODE		0x00002
 #define	AT_UID		0x00004
 #define	AT_GID		0x00008
 #define	AT_FSID		0x00010
 #define	AT_NODEID	0x00020
 #define	AT_NLINK	0x00040
 #define	AT_SIZE		0x00080
 #define	AT_ATIME	0x00100
 #define	AT_MTIME	0x00200
 #define	AT_CTIME	0x00400
 #define	AT_RDEV		0x00800
 #define	AT_BLKSIZE	0x01000
 #define	AT_NBLOCKS	0x02000
 #define	AT_SEQ		0x08000
 #define	AT_XVATTR	0x10000
 
 #define	CRCREAT		0
 
 extern int fop_getattr(vnode_t *vp, vattr_t *vap);
 
 #define	VOP_CLOSE(vp, f, c, o, cr, ct)	0
 #define	VOP_PUTPAGE(vp, of, sz, fl, cr, ct)	0
 #define	VOP_GETATTR(vp, vap, fl, cr, ct)  fop_getattr((vp), (vap));
 
 #define	VOP_FSYNC(vp, f, cr, ct)	fsync((vp)->v_fd)
 
 #define	VN_RELE(vp)	vn_close(vp)
 
 extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
     int x2, int x3);
 extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
     int x2, int x3, vnode_t *vp, int fd);
 extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
     offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
 extern void vn_close(vnode_t *vp);
 
 #define	vn_remove(path, x1, x2)		remove(path)
 #define	vn_rename(from, to, seg)	rename((from), (to))
 #define	vn_is_readonly(vp)		B_FALSE
 
 extern vnode_t *rootdir;
 
 #include <sys/file.h>		/* for FREAD, FWRITE, etc */
+#include <sys/sunddi.h>		/* for ddi_strtoul, ddi_strtoull, etc */
+#include <sys/cyclic.h>		/* for cyclic_add, cyclic remove, etc */
+#include <vm/seg_kmem.h>	/* for zio_arena */
 
 /*
  * Random stuff
  */
-#define	ddi_get_lbolt()		(gethrtime() >> 23)
-#define	ddi_get_lbolt64()	(gethrtime() >> 23)
-#define	hz	119	/* frequency when using gethrtime() >> 23 for lbolt */
-
-extern void delay(clock_t ticks);
-
-#define	SEC_TO_TICK(sec)	((sec) * hz)
-#define	NSEC_TO_TICK(usec)	((usec) / (NANOSEC / hz))
-
-#define	gethrestime_sec() time(NULL)
-#define	gethrestime(t) \
-	do {\
-		(t)->tv_sec = gethrestime_sec();\
-		(t)->tv_nsec = 0;\
-	} while (0);
-
 #define	max_ncpus	64
 #define	boot_ncpus	(sysconf(_SC_NPROCESSORS_ONLN))
 
 #define	minclsyspri	60
 #define	maxclsyspri	99
 
 #define	CPU_SEQID	(thr_self() & (max_ncpus - 1))
 
-#define	kcred		NULL
-#define	CRED()		NULL
-
-#define	ptob(x)		((x) * PAGESIZE)
-
-extern uint64_t physmem;
-
-extern int highbit64(uint64_t i);
-extern int random_get_bytes(uint8_t *ptr, size_t len);
-extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
-
 extern void kernel_init(int);
 extern void kernel_fini(void);
 
 struct spa;
-extern void nicenum(uint64_t num, char *buf, size_t);
 extern void show_pool_stats(struct spa *);
 extern int set_global_var(char *arg);
 
 typedef struct callb_cpr {
 	kmutex_t	*cc_lockp;
 } callb_cpr_t;
 
 #define	CALLB_CPR_INIT(cp, lockp, func, name)	{		\
 	(cp)->cc_lockp = lockp;					\
 }
 
 #define	CALLB_CPR_SAFE_BEGIN(cp) {				\
 	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
 }
 
 #define	CALLB_CPR_SAFE_END(cp, lockp) {				\
 	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
 }
 
 #define	CALLB_CPR_EXIT(cp) {					\
 	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
 	mutex_exit((cp)->cc_lockp);				\
 }
 
 #define	zone_dataset_visible(x, y)	(1)
 #define	INGLOBALZONE(z)			(1)
 
-extern char *kmem_asprintf(const char *fmt, ...);
-#define	strfree(str) kmem_free((str), strlen(str) + 1)
-
-/*
- * Hostname information
- */
-extern char hw_serial[];	/* for userland-emulated hostid access */
-extern int ddi_strtoul(const char *str, char **nptr, int base,
-    unsigned long *result);
-
-extern int ddi_strtoull(const char *str, char **nptr, int base,
-    u_longlong_t *result);
-
-/* ZFS Boot Related stuff. */
-
-struct _buf {
-	intptr_t	_fd;
-};
-
-struct bootstat {
-	uint64_t st_size;
-};
-
-typedef struct ace_object {
-	uid_t		a_who;
-	uint32_t	a_access_mask;
-	uint16_t	a_flags;
-	uint16_t	a_type;
-	uint8_t		a_obj_type[16];
-	uint8_t		a_inherit_obj_type[16];
-} ace_object_t;
-
-
-#define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE	0x05
-#define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE	0x06
-#define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE	0x07
-#define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE	0x08
-
-extern struct _buf *kobj_open_file(char *name);
-extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
-    unsigned off);
-extern void kobj_close_file(struct _buf *file);
-extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
 extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
 extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
     cred_t *cr);
 extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
-extern zoneid_t getzoneid(void);
 
-/* SID stuff */
-typedef struct ksiddomain {
-	uint_t	kd_ref;
-	uint_t	kd_len;
-	char	*kd_name;
-} ksiddomain_t;
-
-ksiddomain_t *ksid_lookupdomain(const char *);
-void ksiddomain_rele(ksiddomain_t *);
-
-#define	DDI_SLEEP	KM_SLEEP
 #define	ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) \
 	sysevent_post_event(_c, _d, _b, "libzpool", _e, _f)
-
-/*
- * Cyclic information
- */
-extern kmutex_t cpu_lock;
-
-typedef uintptr_t cyclic_id_t;
-typedef uint16_t cyc_level_t;
-typedef void (*cyc_func_t)(void *);
-
-#define	CY_LOW_LEVEL	0
-#define	CY_INFINITY	INT64_MAX
-#define	CYCLIC_NONE	((cyclic_id_t)0)
-
-typedef struct cyc_time {
-	hrtime_t cyt_when;
-	hrtime_t cyt_interval;
-} cyc_time_t;
-
-typedef struct cyc_handler {
-	cyc_func_t cyh_func;
-	void *cyh_arg;
-	cyc_level_t cyh_level;
-} cyc_handler_t;
-
-extern cyclic_id_t cyclic_add(cyc_handler_t *, cyc_time_t *);
-extern void cyclic_remove(cyclic_id_t);
-extern int cyclic_reprogram(cyclic_id_t, hrtime_t);
-
-/*
- * Buf structure
- */
-#define	B_BUSY		0x0001
-#define	B_DONE		0x0002
-#define	B_ERROR		0x0004
-#define	B_READ		0x0040	/* read when I/O occurs */
-#define	B_WRITE		0x0100	/* non-read pseudo-flag */
-
-typedef struct buf {
-	int	b_flags;
-	size_t b_bcount;
-	union {
-		caddr_t b_addr;
-	} b_un;
-
-	lldaddr_t	_b_blkno;
-#define	b_lblkno	_b_blkno._f
-	size_t	b_resid;
-	size_t	b_bufsize;
-	int	(*b_iodone)(struct buf *);
-	int	b_error;
-	void	*b_private;
-} buf_t;
-
-extern void bioinit(buf_t *);
-extern void biodone(buf_t *);
-extern void bioerror(buf_t *, int);
-extern int geterror(buf_t *);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_ZFS_CONTEXT_H */
Index: vendor/illumos/dist/lib/libzpool/common/taskq.c
===================================================================
--- vendor/illumos/dist/lib/libzpool/common/taskq.c	(revision 329752)
+++ vendor/illumos/dist/lib/libzpool/common/taskq.c	(revision 329753)
@@ -1,342 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
- * Copyright (c) 2014 by Delphix. All rights reserved.
- */
-
-#include <sys/zfs_context.h>
-
-int taskq_now;
-taskq_t *system_taskq;
-
-#define	TASKQ_ACTIVE	0x00010000
-#define	TASKQ_NAMELEN	31
-
-struct taskq {
-	char		tq_name[TASKQ_NAMELEN + 1];
-	kmutex_t	tq_lock;
-	krwlock_t	tq_threadlock;
-	kcondvar_t	tq_dispatch_cv;
-	kcondvar_t	tq_wait_cv;
-	thread_t	*tq_threadlist;
-	int		tq_flags;
-	int		tq_active;
-	int		tq_nthreads;
-	int		tq_nalloc;
-	int		tq_minalloc;
-	int		tq_maxalloc;
-	kcondvar_t	tq_maxalloc_cv;
-	int		tq_maxalloc_wait;
-	taskq_ent_t	*tq_freelist;
-	taskq_ent_t	tq_task;
-};
-
-static taskq_ent_t *
-task_alloc(taskq_t *tq, int tqflags)
-{
-	taskq_ent_t *t;
-	int rv;
-
-again:	if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
-		tq->tq_freelist = t->tqent_next;
-	} else {
-		if (tq->tq_nalloc >= tq->tq_maxalloc) {
-			if (!(tqflags & KM_SLEEP))
-				return (NULL);
-
-			/*
-			 * We don't want to exceed tq_maxalloc, but we can't
-			 * wait for other tasks to complete (and thus free up
-			 * task structures) without risking deadlock with
-			 * the caller.  So, we just delay for one second
-			 * to throttle the allocation rate. If we have tasks
-			 * complete before one second timeout expires then
-			 * taskq_ent_free will signal us and we will
-			 * immediately retry the allocation.
-			 */
-			tq->tq_maxalloc_wait++;
-			rv = cv_timedwait(&tq->tq_maxalloc_cv,
-			    &tq->tq_lock, ddi_get_lbolt() + hz);
-			tq->tq_maxalloc_wait--;
-			if (rv > 0)
-				goto again;		/* signaled */
-		}
-		mutex_exit(&tq->tq_lock);
-
-		t = kmem_alloc(sizeof (taskq_ent_t), tqflags);
-
-		mutex_enter(&tq->tq_lock);
-		if (t != NULL)
-			tq->tq_nalloc++;
-	}
-	return (t);
-}
-
-static void
-task_free(taskq_t *tq, taskq_ent_t *t)
-{
-	if (tq->tq_nalloc <= tq->tq_minalloc) {
-		t->tqent_next = tq->tq_freelist;
-		tq->tq_freelist = t;
-	} else {
-		tq->tq_nalloc--;
-		mutex_exit(&tq->tq_lock);
-		kmem_free(t, sizeof (taskq_ent_t));
-		mutex_enter(&tq->tq_lock);
-	}
-
-	if (tq->tq_maxalloc_wait)
-		cv_signal(&tq->tq_maxalloc_cv);
-}
-
-taskqid_t
-taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
-{
-	taskq_ent_t *t;
-
-	if (taskq_now) {
-		func(arg);
-		return (1);
-	}
-
-	mutex_enter(&tq->tq_lock);
-	ASSERT(tq->tq_flags & TASKQ_ACTIVE);
-	if ((t = task_alloc(tq, tqflags)) == NULL) {
-		mutex_exit(&tq->tq_lock);
-		return (0);
-	}
-	if (tqflags & TQ_FRONT) {
-		t->tqent_next = tq->tq_task.tqent_next;
-		t->tqent_prev = &tq->tq_task;
-	} else {
-		t->tqent_next = &tq->tq_task;
-		t->tqent_prev = tq->tq_task.tqent_prev;
-	}
-	t->tqent_next->tqent_prev = t;
-	t->tqent_prev->tqent_next = t;
-	t->tqent_func = func;
-	t->tqent_arg = arg;
-	t->tqent_flags = 0;
-	cv_signal(&tq->tq_dispatch_cv);
-	mutex_exit(&tq->tq_lock);
-	return (1);
-}
-
-void
-taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
-    taskq_ent_t *t)
-{
-	ASSERT(func != NULL);
-	ASSERT(!(tq->tq_flags & TASKQ_DYNAMIC));
-
-	/*
-	 * Mark it as a prealloc'd task.  This is important
-	 * to ensure that we don't free it later.
-	 */
-	t->tqent_flags |= TQENT_FLAG_PREALLOC;
-	/*
-	 * Enqueue the task to the underlying queue.
-	 */
-	mutex_enter(&tq->tq_lock);
-
-	if (flags & TQ_FRONT) {
-		t->tqent_next = tq->tq_task.tqent_next;
-		t->tqent_prev = &tq->tq_task;
-	} else {
-		t->tqent_next = &tq->tq_task;
-		t->tqent_prev = tq->tq_task.tqent_prev;
-	}
-	t->tqent_next->tqent_prev = t;
-	t->tqent_prev->tqent_next = t;
-	t->tqent_func = func;
-	t->tqent_arg = arg;
-	cv_signal(&tq->tq_dispatch_cv);
-	mutex_exit(&tq->tq_lock);
-}
-
-void
-taskq_wait(taskq_t *tq)
-{
-	mutex_enter(&tq->tq_lock);
-	while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
-		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
-	mutex_exit(&tq->tq_lock);
-}
-
-static void *
-taskq_thread(void *arg)
-{
-	taskq_t *tq = arg;
-	taskq_ent_t *t;
-	boolean_t prealloc;
-
-	mutex_enter(&tq->tq_lock);
-	while (tq->tq_flags & TASKQ_ACTIVE) {
-		if ((t = tq->tq_task.tqent_next) == &tq->tq_task) {
-			if (--tq->tq_active == 0)
-				cv_broadcast(&tq->tq_wait_cv);
-			cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
-			tq->tq_active++;
-			continue;
-		}
-		t->tqent_prev->tqent_next = t->tqent_next;
-		t->tqent_next->tqent_prev = t->tqent_prev;
-		t->tqent_next = NULL;
-		t->tqent_prev = NULL;
-		prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC;
-		mutex_exit(&tq->tq_lock);
-
-		rw_enter(&tq->tq_threadlock, RW_READER);
-		t->tqent_func(t->tqent_arg);
-		rw_exit(&tq->tq_threadlock);
-
-		mutex_enter(&tq->tq_lock);
-		if (!prealloc)
-			task_free(tq, t);
-	}
-	tq->tq_nthreads--;
-	cv_broadcast(&tq->tq_wait_cv);
-	mutex_exit(&tq->tq_lock);
-	return (NULL);
-}
-
-/*ARGSUSED*/
-taskq_t *
-taskq_create(const char *name, int nthreads, pri_t pri,
-	int minalloc, int maxalloc, uint_t flags)
-{
-	taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP);
-	int t;
-
-	if (flags & TASKQ_THREADS_CPU_PCT) {
-		int pct;
-		ASSERT3S(nthreads, >=, 0);
-		ASSERT3S(nthreads, <=, 100);
-		pct = MIN(nthreads, 100);
-		pct = MAX(pct, 0);
-
-		nthreads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100;
-		nthreads = MAX(nthreads, 1);	/* need at least 1 thread */
-	} else {
-		ASSERT3S(nthreads, >=, 1);
-	}
-
-	rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
-	mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
-	cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
-	cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
-	cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
-	(void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1);
-	tq->tq_flags = flags | TASKQ_ACTIVE;
-	tq->tq_active = nthreads;
-	tq->tq_nthreads = nthreads;
-	tq->tq_minalloc = minalloc;
-	tq->tq_maxalloc = maxalloc;
-	tq->tq_task.tqent_next = &tq->tq_task;
-	tq->tq_task.tqent_prev = &tq->tq_task;
-	tq->tq_threadlist = kmem_alloc(nthreads * sizeof (thread_t), KM_SLEEP);
-
-	if (flags & TASKQ_PREPOPULATE) {
-		mutex_enter(&tq->tq_lock);
-		while (minalloc-- > 0)
-			task_free(tq, task_alloc(tq, KM_SLEEP));
-		mutex_exit(&tq->tq_lock);
-	}
-
-	for (t = 0; t < nthreads; t++)
-		(void) thr_create(0, 0, taskq_thread,
-		    tq, THR_BOUND, &tq->tq_threadlist[t]);
-
-	return (tq);
-}
-
-void
-taskq_destroy(taskq_t *tq)
-{
-	int t;
-	int nthreads = tq->tq_nthreads;
-
-	taskq_wait(tq);
-
-	mutex_enter(&tq->tq_lock);
-
-	tq->tq_flags &= ~TASKQ_ACTIVE;
-	cv_broadcast(&tq->tq_dispatch_cv);
-
-	while (tq->tq_nthreads != 0)
-		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
-
-	tq->tq_minalloc = 0;
-	while (tq->tq_nalloc != 0) {
-		ASSERT(tq->tq_freelist != NULL);
-		task_free(tq, task_alloc(tq, KM_SLEEP));
-	}
-
-	mutex_exit(&tq->tq_lock);
-
-	for (t = 0; t < nthreads; t++)
-		(void) thr_join(tq->tq_threadlist[t], NULL, NULL);
-
-	kmem_free(tq->tq_threadlist, nthreads * sizeof (thread_t));
-
-	rw_destroy(&tq->tq_threadlock);
-	mutex_destroy(&tq->tq_lock);
-	cv_destroy(&tq->tq_dispatch_cv);
-	cv_destroy(&tq->tq_wait_cv);
-	cv_destroy(&tq->tq_maxalloc_cv);
-
-	kmem_free(tq, sizeof (taskq_t));
-}
-
-int
-taskq_member(taskq_t *tq, void *t)
-{
-	int i;
-
-	if (taskq_now)
-		return (1);
-
-	for (i = 0; i < tq->tq_nthreads; i++)
-		if (tq->tq_threadlist[i] == (thread_t)(uintptr_t)t)
-			return (1);
-
-	return (0);
-}
-
-void
-system_taskq_init(void)
-{
-	system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
-	    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
-}
-
-void
-system_taskq_fini(void)
-{
-	taskq_destroy(system_taskq);
-	system_taskq = NULL; /* defensive */
-}
Index: vendor/illumos/dist/lib/libzpool/common/util.c
===================================================================
--- vendor/illumos/dist/lib/libzpool/common/util.c	(revision 329752)
+++ vendor/illumos/dist/lib/libzpool/common/util.c	(revision 329753)
@@ -1,187 +1,190 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #include <assert.h>
 #include <sys/zfs_context.h>
 #include <sys/avl.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/spa.h>
 #include <sys/fs/zfs.h>
 #include <sys/refcount.h>
 #include <dlfcn.h>
+
+extern void nicenum(uint64_t num, char *buf, size_t);
 
 /*
  * Routines needed by more than one client of libzpool.
  */
 
 static void
 show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
 {
 	vdev_stat_t *vs;
 	vdev_stat_t v0 = { 0 };
 	uint64_t sec;
 	uint64_t is_log = 0;
 	nvlist_t **child;
 	uint_t c, children;
 	char used[6], avail[6];
 	char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6];
 	char *prefix = "";
 
 	if (indent == 0 && desc != NULL) {
 		(void) printf("                           "
 		    " capacity   operations   bandwidth  ---- errors ----\n");
 		(void) printf("description                "
 		    "used avail  read write  read write  read write cksum\n");
 	}
 
 	if (desc != NULL) {
 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
 
 		if (is_log)
 			prefix = "log ";
 
 		if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &c) != 0)
 			vs = &v0;
 
 		sec = MAX(1, vs->vs_timestamp / NANOSEC);
 
 		nicenum(vs->vs_alloc, used, sizeof (used));
 		nicenum(vs->vs_space - vs->vs_alloc, avail, sizeof (avail));
 		nicenum(vs->vs_ops[ZIO_TYPE_READ] / sec, rops, sizeof (rops));
 		nicenum(vs->vs_ops[ZIO_TYPE_WRITE] / sec, wops, sizeof (wops));
 		nicenum(vs->vs_bytes[ZIO_TYPE_READ] / sec, rbytes,
 		    sizeof (rbytes));
 		nicenum(vs->vs_bytes[ZIO_TYPE_WRITE] / sec, wbytes,
 		    sizeof (wbytes));
 		nicenum(vs->vs_read_errors, rerr, sizeof (rerr));
 		nicenum(vs->vs_write_errors, werr, sizeof (werr));
 		nicenum(vs->vs_checksum_errors, cerr, sizeof (cerr));
 
 		(void) printf("%*s%s%*s%*s%*s %5s %5s %5s %5s %5s %5s %5s\n",
 		    indent, "",
 		    prefix,
 		    indent + strlen(prefix) - 25 - (vs->vs_space ? 0 : 12),
 		    desc,
 		    vs->vs_space ? 6 : 0, vs->vs_space ? used : "",
 		    vs->vs_space ? 6 : 0, vs->vs_space ? avail : "",
 		    rops, wops, rbytes, wbytes, rerr, werr, cerr);
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0)
 		return;
 
 	for (c = 0; c < children; c++) {
 		nvlist_t *cnv = child[c];
 		char *cname, *tname;
 		uint64_t np;
 		if (nvlist_lookup_string(cnv, ZPOOL_CONFIG_PATH, &cname) &&
 		    nvlist_lookup_string(cnv, ZPOOL_CONFIG_TYPE, &cname))
 			cname = "<unknown>";
 		tname = calloc(1, strlen(cname) + 2);
 		(void) strcpy(tname, cname);
 		if (nvlist_lookup_uint64(cnv, ZPOOL_CONFIG_NPARITY, &np) == 0)
 			tname[strlen(tname)] = '0' + np;
 		show_vdev_stats(tname, ctype, cnv, indent + 2);
 		free(tname);
 	}
 }
 
 void
 show_pool_stats(spa_t *spa)
 {
 	nvlist_t *config, *nvroot;
 	char *name;
 
 	VERIFY(spa_get_stats(spa_name(spa), &config, NULL, 0) == 0);
 
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &name) == 0);
 
 	show_vdev_stats(name, ZPOOL_CONFIG_CHILDREN, nvroot, 0);
 	show_vdev_stats(NULL, ZPOOL_CONFIG_L2CACHE, nvroot, 0);
 	show_vdev_stats(NULL, ZPOOL_CONFIG_SPARES, nvroot, 0);
 
 	nvlist_free(config);
 }
 
 /*
  * Sets given global variable in libzpool to given unsigned 32-bit value.
  * arg: "<variable>=<value>"
  */
 int
 set_global_var(char *arg)
 {
 	void *zpoolhdl;
 	char *varname = arg, *varval;
 	u_longlong_t val;
 
 #ifndef _LITTLE_ENDIAN
 	/*
 	 * On big endian systems changing a 64-bit variable would set the high
 	 * 32 bits instead of the low 32 bits, which could cause unexpected
 	 * results.
 	 */
 	fprintf(stderr, "Setting global variables is only supported on "
 	    "little-endian systems\n", varname);
 	return (ENOTSUP);
 #endif
 	if ((varval = strchr(arg, '=')) != NULL) {
 		*varval = '\0';
 		varval++;
 		val = strtoull(varval, NULL, 0);
 		if (val > UINT32_MAX) {
 			fprintf(stderr, "Value for global variable '%s' must "
 			    "be a 32-bit unsigned integer\n", varname);
 			return (EOVERFLOW);
 		}
 	} else {
 		return (EINVAL);
 	}
 
 	zpoolhdl = dlopen("libzpool.so", RTLD_LAZY);
 	if (zpoolhdl != NULL) {
 		uint32_t *var;
 		var = dlsym(zpoolhdl, varname);
 		if (var == NULL) {
 			fprintf(stderr, "Global variable '%s' does not exist "
 			    "in libzpool.so\n", varname);
 			return (EINVAL);
 		}
 		*var = (uint32_t)val;
 
 		dlclose(zpoolhdl);
 	} else {
 		fprintf(stderr, "Failed to open libzpool.so to set global "
 		    "variable\n");
 		return (EIO);
 	}
 
 	return (0);
 }
Index: vendor-sys/illumos/dist/uts/common/fs/zfs/dnode.c
===================================================================
--- vendor-sys/illumos/dist/uts/common/fs/zfs/dnode.c	(revision 329752)
+++ vendor-sys/illumos/dist/uts/common/fs/zfs/dnode.c	(revision 329753)
@@ -1,1992 +1,1999 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2017 RackTop Systems.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/dbuf.h>
 #include <sys/dnode.h>
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/spa.h>
 #include <sys/zio.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/range_tree.h>
 
 static kmem_cache_t *dnode_cache;
 /*
  * Define DNODE_STATS to turn on statistic gathering. By default, it is only
  * turned on when DEBUG is also defined.
  */
 #ifdef	DEBUG
 #define	DNODE_STATS
 #endif	/* DEBUG */
 
 #ifdef	DNODE_STATS
 #define	DNODE_STAT_ADD(stat)			((stat)++)
 #else
 #define	DNODE_STAT_ADD(stat)			/* nothing */
 #endif	/* DNODE_STATS */
 
 static dnode_phys_t dnode_phys_zero;
 
 int zfs_default_bs = SPA_MINBLOCKSHIFT;
 int zfs_default_ibs = DN_MAX_INDBLKSHIFT;
 
+#ifdef	_KERNEL
 static kmem_cbrc_t dnode_move(void *, void *, size_t, void *);
+#endif	/* _KERNEL */
 
 static int
 dbuf_compare(const void *x1, const void *x2)
 {
 	const dmu_buf_impl_t *d1 = x1;
 	const dmu_buf_impl_t *d2 = x2;
 
 	if (d1->db_level < d2->db_level) {
 		return (-1);
 	}
 	if (d1->db_level > d2->db_level) {
 		return (1);
 	}
 
 	if (d1->db_blkid < d2->db_blkid) {
 		return (-1);
 	}
 	if (d1->db_blkid > d2->db_blkid) {
 		return (1);
 	}
 
 	if (d1->db_state == DB_SEARCH) {
 		ASSERT3S(d2->db_state, !=, DB_SEARCH);
 		return (-1);
 	} else if (d2->db_state == DB_SEARCH) {
 		ASSERT3S(d1->db_state, !=, DB_SEARCH);
 		return (1);
 	}
 
 	if ((uintptr_t)d1 < (uintptr_t)d2) {
 		return (-1);
 	}
 	if ((uintptr_t)d1 > (uintptr_t)d2) {
 		return (1);
 	}
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 dnode_cons(void *arg, void *unused, int kmflag)
 {
 	dnode_t *dn = arg;
 	int i;
 
 	rw_init(&dn->dn_struct_rwlock, NULL, RW_DEFAULT, NULL);
 	mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
 
 	/*
 	 * Every dbuf has a reference, and dropping a tracked reference is
 	 * O(number of references), so don't track dn_holds.
 	 */
 	refcount_create_untracked(&dn->dn_holds);
 	refcount_create(&dn->dn_tx_holds);
 	list_link_init(&dn->dn_link);
 
 	bzero(&dn->dn_next_nblkptr[0], sizeof (dn->dn_next_nblkptr));
 	bzero(&dn->dn_next_nlevels[0], sizeof (dn->dn_next_nlevels));
 	bzero(&dn->dn_next_indblkshift[0], sizeof (dn->dn_next_indblkshift));
 	bzero(&dn->dn_next_bonustype[0], sizeof (dn->dn_next_bonustype));
 	bzero(&dn->dn_rm_spillblk[0], sizeof (dn->dn_rm_spillblk));
 	bzero(&dn->dn_next_bonuslen[0], sizeof (dn->dn_next_bonuslen));
 	bzero(&dn->dn_next_blksz[0], sizeof (dn->dn_next_blksz));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		list_link_init(&dn->dn_dirty_link[i]);
 		dn->dn_free_ranges[i] = NULL;
 		list_create(&dn->dn_dirty_records[i],
 		    sizeof (dbuf_dirty_record_t),
 		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
 	}
 
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
 	dn->dn_assigned_txg = 0;
 	dn->dn_dirtyctx = 0;
 	dn->dn_dirtyctx_firstset = NULL;
 	dn->dn_bonus = NULL;
 	dn->dn_have_spill = B_FALSE;
 	dn->dn_zio = NULL;
 	dn->dn_oldused = 0;
 	dn->dn_oldflags = 0;
 	dn->dn_olduid = 0;
 	dn->dn_oldgid = 0;
 	dn->dn_newuid = 0;
 	dn->dn_newgid = 0;
 	dn->dn_id_flags = 0;
 
 	dn->dn_dbufs_count = 0;
 	avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
 	    offsetof(dmu_buf_impl_t, db_link));
 
 	dn->dn_moved = 0;
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dnode_dest(void *arg, void *unused)
 {
 	int i;
 	dnode_t *dn = arg;
 
 	rw_destroy(&dn->dn_struct_rwlock);
 	mutex_destroy(&dn->dn_mtx);
 	mutex_destroy(&dn->dn_dbufs_mtx);
 	cv_destroy(&dn->dn_notxholds);
 	refcount_destroy(&dn->dn_holds);
 	refcount_destroy(&dn->dn_tx_holds);
 	ASSERT(!list_link_active(&dn->dn_link));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
 		list_destroy(&dn->dn_dirty_records[i]);
 		ASSERT0(dn->dn_next_nblkptr[i]);
 		ASSERT0(dn->dn_next_nlevels[i]);
 		ASSERT0(dn->dn_next_indblkshift[i]);
 		ASSERT0(dn->dn_next_bonustype[i]);
 		ASSERT0(dn->dn_rm_spillblk[i]);
 		ASSERT0(dn->dn_next_bonuslen[i]);
 		ASSERT0(dn->dn_next_blksz[i]);
 	}
 
 	ASSERT0(dn->dn_allocated_txg);
 	ASSERT0(dn->dn_free_txg);
 	ASSERT0(dn->dn_assigned_txg);
 	ASSERT0(dn->dn_dirtyctx);
 	ASSERT3P(dn->dn_dirtyctx_firstset, ==, NULL);
 	ASSERT3P(dn->dn_bonus, ==, NULL);
 	ASSERT(!dn->dn_have_spill);
 	ASSERT3P(dn->dn_zio, ==, NULL);
 	ASSERT0(dn->dn_oldused);
 	ASSERT0(dn->dn_oldflags);
 	ASSERT0(dn->dn_olduid);
 	ASSERT0(dn->dn_oldgid);
 	ASSERT0(dn->dn_newuid);
 	ASSERT0(dn->dn_newgid);
 	ASSERT0(dn->dn_id_flags);
 
 	ASSERT0(dn->dn_dbufs_count);
 	avl_destroy(&dn->dn_dbufs);
 }
 
 void
 dnode_init(void)
 {
 	ASSERT(dnode_cache == NULL);
 	dnode_cache = kmem_cache_create("dnode_t",
 	    sizeof (dnode_t),
 	    0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0);
+#ifdef	_KERNEL
 	kmem_cache_set_move(dnode_cache, dnode_move);
+#endif	/* _KERNEL */
 }
 
 void
 dnode_fini(void)
 {
 	kmem_cache_destroy(dnode_cache);
 	dnode_cache = NULL;
 }
 
 
 #ifdef ZFS_DEBUG
 void
 dnode_verify(dnode_t *dn)
 {
 	int drop_struct_lock = FALSE;
 
 	ASSERT(dn->dn_phys);
 	ASSERT(dn->dn_objset);
 	ASSERT(dn->dn_handle->dnh_dnode == dn);
 
 	ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
 
 	if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
 		return;
 
 	if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
 		drop_struct_lock = TRUE;
 	}
 	if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) {
 		int i;
 		ASSERT3U(dn->dn_indblkshift, >=, 0);
 		ASSERT3U(dn->dn_indblkshift, <=, SPA_MAXBLOCKSHIFT);
 		if (dn->dn_datablkshift) {
 			ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT);
 			ASSERT3U(dn->dn_datablkshift, <=, SPA_MAXBLOCKSHIFT);
 			ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
 		}
 		ASSERT3U(dn->dn_nlevels, <=, 30);
 		ASSERT(DMU_OT_IS_VALID(dn->dn_type));
 		ASSERT3U(dn->dn_nblkptr, >=, 1);
 		ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
 		ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
 		ASSERT3U(dn->dn_datablksz, ==,
 		    dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 		ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0);
 		ASSERT3U((dn->dn_nblkptr - 1) * sizeof (blkptr_t) +
 		    dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
 		for (i = 0; i < TXG_SIZE; i++) {
 			ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels);
 		}
 	}
 	if (dn->dn_phys->dn_type != DMU_OT_NONE)
 		ASSERT3U(dn->dn_phys->dn_nlevels, <=, dn->dn_nlevels);
 	ASSERT(DMU_OBJECT_IS_SPECIAL(dn->dn_object) || dn->dn_dbuf != NULL);
 	if (dn->dn_dbuf != NULL) {
 		ASSERT3P(dn->dn_phys, ==,
 		    (dnode_phys_t *)dn->dn_dbuf->db.db_data +
 		    (dn->dn_object % (dn->dn_dbuf->db.db_size >> DNODE_SHIFT)));
 	}
 	if (drop_struct_lock)
 		rw_exit(&dn->dn_struct_rwlock);
 }
 #endif
 
 void
 dnode_byteswap(dnode_phys_t *dnp)
 {
 	uint64_t *buf64 = (void*)&dnp->dn_blkptr;
 	int i;
 
 	if (dnp->dn_type == DMU_OT_NONE) {
 		bzero(dnp, sizeof (dnode_phys_t));
 		return;
 	}
 
 	dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
 	dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
 	dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
 	dnp->dn_used = BSWAP_64(dnp->dn_used);
 
 	/*
 	 * dn_nblkptr is only one byte, so it's OK to read it in either
 	 * byte order.  We can't read dn_bouslen.
 	 */
 	ASSERT(dnp->dn_indblkshift <= SPA_MAXBLOCKSHIFT);
 	ASSERT(dnp->dn_nblkptr <= DN_MAX_NBLKPTR);
 	for (i = 0; i < dnp->dn_nblkptr * sizeof (blkptr_t)/8; i++)
 		buf64[i] = BSWAP_64(buf64[i]);
 
 	/*
 	 * OK to check dn_bonuslen for zero, because it won't matter if
 	 * we have the wrong byte order.  This is necessary because the
 	 * dnode dnode is smaller than a regular dnode.
 	 */
 	if (dnp->dn_bonuslen != 0) {
 		/*
 		 * Note that the bonus length calculated here may be
 		 * longer than the actual bonus buffer.  This is because
 		 * we always put the bonus buffer after the last block
 		 * pointer (instead of packing it against the end of the
 		 * dnode buffer).
 		 */
 		int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
 		size_t len = DN_MAX_BONUSLEN - off;
 		ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
 		dmu_object_byteswap_t byteswap =
 		    DMU_OT_BYTESWAP(dnp->dn_bonustype);
 		dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
 	}
 
 	/* Swap SPILL block if we have one */
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
 		byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
 
 }
 
 void
 dnode_buf_byteswap(void *vbuf, size_t size)
 {
 	dnode_phys_t *buf = vbuf;
 	int i;
 
 	ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
 	ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
 
 	size >>= DNODE_SHIFT;
 	for (i = 0; i < size; i++) {
 		dnode_byteswap(buf);
 		buf++;
 	}
 }
 
 void
 dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
 {
 	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
 
 	dnode_setdirty(dn, tx);
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	ASSERT3U(newsize, <=, DN_MAX_BONUSLEN -
 	    (dn->dn_nblkptr-1) * sizeof (blkptr_t));
 	dn->dn_bonuslen = newsize;
 	if (newsize == 0)
 		dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = DN_ZERO_BONUSLEN;
 	else
 		dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
 void
 dnode_setbonus_type(dnode_t *dn, dmu_object_type_t newtype, dmu_tx_t *tx)
 {
 	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
 	dnode_setdirty(dn, tx);
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	dn->dn_bonustype = newtype;
 	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
 void
 dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx)
 {
 	ASSERT3U(refcount_count(&dn->dn_holds), >=, 1);
 	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
 	dnode_setdirty(dn, tx);
 	dn->dn_rm_spillblk[tx->tx_txg&TXG_MASK] = DN_KILL_SPILLBLK;
 	dn->dn_have_spill = B_FALSE;
 }
 
 static void
 dnode_setdblksz(dnode_t *dn, int size)
 {
 	ASSERT0(P2PHASE(size, SPA_MINBLOCKSIZE));
 	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 	ASSERT3U(size, >=, SPA_MINBLOCKSIZE);
 	ASSERT3U(size >> SPA_MINBLOCKSHIFT, <,
 	    1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
 	dn->dn_datablksz = size;
 	dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
 	dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0;
 }
 
 static dnode_t *
 dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
     uint64_t object, dnode_handle_t *dnh)
 {
 	dnode_t *dn;
 
 	dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
+#ifdef _KERNEL
 	ASSERT(!POINTER_IS_VALID(dn->dn_objset));
+#endif /* _KERNEL */
 	dn->dn_moved = 0;
 
 	/*
 	 * Defer setting dn_objset until the dnode is ready to be a candidate
 	 * for the dnode_move() callback.
 	 */
 	dn->dn_object = object;
 	dn->dn_dbuf = db;
 	dn->dn_handle = dnh;
 	dn->dn_phys = dnp;
 
 	if (dnp->dn_datablkszsec) {
 		dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 	} else {
 		dn->dn_datablksz = 0;
 		dn->dn_datablkszsec = 0;
 		dn->dn_datablkshift = 0;
 	}
 	dn->dn_indblkshift = dnp->dn_indblkshift;
 	dn->dn_nlevels = dnp->dn_nlevels;
 	dn->dn_type = dnp->dn_type;
 	dn->dn_nblkptr = dnp->dn_nblkptr;
 	dn->dn_checksum = dnp->dn_checksum;
 	dn->dn_compress = dnp->dn_compress;
 	dn->dn_bonustype = dnp->dn_bonustype;
 	dn->dn_bonuslen = dnp->dn_bonuslen;
 	dn->dn_maxblkid = dnp->dn_maxblkid;
 	dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
 	dn->dn_id_flags = 0;
 
 	dmu_zfetch_init(&dn->dn_zfetch, dn);
 
 	ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
 
 	mutex_enter(&os->os_lock);
 	if (dnh->dnh_dnode != NULL) {
 		/* Lost the allocation race. */
 		mutex_exit(&os->os_lock);
 		kmem_cache_free(dnode_cache, dn);
 		return (dnh->dnh_dnode);
 	}
 
 	/*
 	 * Exclude special dnodes from os_dnodes so an empty os_dnodes
 	 * signifies that the special dnodes have no references from
 	 * their children (the entries in os_dnodes).  This allows
 	 * dnode_destroy() to easily determine if the last child has
 	 * been removed and then complete eviction of the objset.
 	 */
 	if (!DMU_OBJECT_IS_SPECIAL(object))
 		list_insert_head(&os->os_dnodes, dn);
 	membar_producer();
 
 	/*
 	 * Everything else must be valid before assigning dn_objset
 	 * makes the dnode eligible for dnode_move().
 	 */
 	dn->dn_objset = os;
 
 	dnh->dnh_dnode = dn;
 	mutex_exit(&os->os_lock);
 
 	arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
 	return (dn);
 }
 
 /*
  * Caller must be holding the dnode handle, which is released upon return.
  */
 static void
 dnode_destroy(dnode_t *dn)
 {
 	objset_t *os = dn->dn_objset;
 	boolean_t complete_os_eviction = B_FALSE;
 
 	ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
 
 	mutex_enter(&os->os_lock);
 	POINTER_INVALIDATE(&dn->dn_objset);
 	if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
 		list_remove(&os->os_dnodes, dn);
 		complete_os_eviction =
 		    list_is_empty(&os->os_dnodes) &&
 		    list_link_active(&os->os_evicting_node);
 	}
 	mutex_exit(&os->os_lock);
 
 	/* the dnode can no longer move, so we can release the handle */
 	zrl_remove(&dn->dn_handle->dnh_zrlock);
 
 	dn->dn_allocated_txg = 0;
 	dn->dn_free_txg = 0;
 	dn->dn_assigned_txg = 0;
 
 	dn->dn_dirtyctx = 0;
 	if (dn->dn_dirtyctx_firstset != NULL) {
 		kmem_free(dn->dn_dirtyctx_firstset, 1);
 		dn->dn_dirtyctx_firstset = NULL;
 	}
 	if (dn->dn_bonus != NULL) {
 		mutex_enter(&dn->dn_bonus->db_mtx);
 		dbuf_destroy(dn->dn_bonus);
 		dn->dn_bonus = NULL;
 	}
 	dn->dn_zio = NULL;
 
 	dn->dn_have_spill = B_FALSE;
 	dn->dn_oldused = 0;
 	dn->dn_oldflags = 0;
 	dn->dn_olduid = 0;
 	dn->dn_oldgid = 0;
 	dn->dn_newuid = 0;
 	dn->dn_newgid = 0;
 	dn->dn_id_flags = 0;
 
 	dmu_zfetch_fini(&dn->dn_zfetch);
 	kmem_cache_free(dnode_cache, dn);
 	arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
 
 	if (complete_os_eviction)
 		dmu_objset_evict_done(os);
 }
 
 void
 dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	int i;
 
 	ASSERT3U(blocksize, <=,
 	    spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
 	if (blocksize == 0)
 		blocksize = 1 << zfs_default_bs;
 	else
 		blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE);
 
 	if (ibs == 0)
 		ibs = zfs_default_ibs;
 
 	ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
 
 	dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d\n", dn->dn_objset,
 	    dn->dn_object, tx->tx_txg, blocksize, ibs);
 
 	ASSERT(dn->dn_type == DMU_OT_NONE);
 	ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
 	ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
 	ASSERT(ot != DMU_OT_NONE);
 	ASSERT(DMU_OT_IS_VALID(ot));
 	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
 	    (bonustype == DMU_OT_SA && bonuslen == 0) ||
 	    (bonustype != DMU_OT_NONE && bonuslen != 0));
 	ASSERT(DMU_OT_IS_VALID(bonustype));
 	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
 	ASSERT(dn->dn_type == DMU_OT_NONE);
 	ASSERT0(dn->dn_maxblkid);
 	ASSERT0(dn->dn_allocated_txg);
 	ASSERT0(dn->dn_assigned_txg);
 	ASSERT(refcount_is_zero(&dn->dn_tx_holds));
 	ASSERT3U(refcount_count(&dn->dn_holds), <=, 1);
 	ASSERT(avl_is_empty(&dn->dn_dbufs));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		ASSERT0(dn->dn_next_nblkptr[i]);
 		ASSERT0(dn->dn_next_nlevels[i]);
 		ASSERT0(dn->dn_next_indblkshift[i]);
 		ASSERT0(dn->dn_next_bonuslen[i]);
 		ASSERT0(dn->dn_next_bonustype[i]);
 		ASSERT0(dn->dn_rm_spillblk[i]);
 		ASSERT0(dn->dn_next_blksz[i]);
 		ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
 		ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
 		ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
 	}
 
 	dn->dn_type = ot;
 	dnode_setdblksz(dn, blocksize);
 	dn->dn_indblkshift = ibs;
 	dn->dn_nlevels = 1;
 	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
 		dn->dn_nblkptr = 1;
 	else
 		dn->dn_nblkptr = 1 +
 		    ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 	dn->dn_bonustype = bonustype;
 	dn->dn_bonuslen = bonuslen;
 	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
 	dn->dn_compress = ZIO_COMPRESS_INHERIT;
 	dn->dn_dirtyctx = 0;
 
 	dn->dn_free_txg = 0;
 	if (dn->dn_dirtyctx_firstset) {
 		kmem_free(dn->dn_dirtyctx_firstset, 1);
 		dn->dn_dirtyctx_firstset = NULL;
 	}
 
 	dn->dn_allocated_txg = tx->tx_txg;
 	dn->dn_id_flags = 0;
 
 	dnode_setdirty(dn, tx);
 	dn->dn_next_indblkshift[tx->tx_txg & TXG_MASK] = ibs;
 	dn->dn_next_bonuslen[tx->tx_txg & TXG_MASK] = dn->dn_bonuslen;
 	dn->dn_next_bonustype[tx->tx_txg & TXG_MASK] = dn->dn_bonustype;
 	dn->dn_next_blksz[tx->tx_txg & TXG_MASK] = dn->dn_datablksz;
 }
 
 void
 dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	int nblkptr;
 
 	ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
 	ASSERT3U(blocksize, <=,
 	    spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
 	ASSERT0(blocksize % SPA_MINBLOCKSIZE);
 	ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
 	ASSERT(tx->tx_txg != 0);
 	ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
 	    (bonustype != DMU_OT_NONE && bonuslen != 0) ||
 	    (bonustype == DMU_OT_SA && bonuslen == 0));
 	ASSERT(DMU_OT_IS_VALID(bonustype));
 	ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
 
 	/* clean up any unreferenced dbufs */
 	dnode_evict_dbufs(dn);
 
 	dn->dn_id_flags = 0;
 
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	dnode_setdirty(dn, tx);
 	if (dn->dn_datablksz != blocksize) {
 		/* change blocksize */
 		ASSERT(dn->dn_maxblkid == 0 &&
 		    (BP_IS_HOLE(&dn->dn_phys->dn_blkptr[0]) ||
 		    dnode_block_freed(dn, 0)));
 		dnode_setdblksz(dn, blocksize);
 		dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = blocksize;
 	}
 	if (dn->dn_bonuslen != bonuslen)
 		dn->dn_next_bonuslen[tx->tx_txg&TXG_MASK] = bonuslen;
 
 	if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
 		nblkptr = 1;
 	else
 		nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
 	if (dn->dn_bonustype != bonustype)
 		dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
 	if (dn->dn_nblkptr != nblkptr)
 		dn->dn_next_nblkptr[tx->tx_txg&TXG_MASK] = nblkptr;
 	if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 		dbuf_rm_spill(dn, tx);
 		dnode_rm_spill(dn, tx);
 	}
 	rw_exit(&dn->dn_struct_rwlock);
 
 	/* change type */
 	dn->dn_type = ot;
 
 	/* change bonus size and type */
 	mutex_enter(&dn->dn_mtx);
 	dn->dn_bonustype = bonustype;
 	dn->dn_bonuslen = bonuslen;
 	dn->dn_nblkptr = nblkptr;
 	dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
 	dn->dn_compress = ZIO_COMPRESS_INHERIT;
 	ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
 
 	/* fix up the bonus db_size */
 	if (dn->dn_bonus) {
 		dn->dn_bonus->db.db_size =
 		    DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t);
 		ASSERT(dn->dn_bonuslen <= dn->dn_bonus->db.db_size);
 	}
 
 	dn->dn_allocated_txg = tx->tx_txg;
 	mutex_exit(&dn->dn_mtx);
 }
 
 #ifdef	DNODE_STATS
 static struct {
 	uint64_t dms_dnode_invalid;
 	uint64_t dms_dnode_recheck1;
 	uint64_t dms_dnode_recheck2;
 	uint64_t dms_dnode_special;
 	uint64_t dms_dnode_handle;
 	uint64_t dms_dnode_rwlock;
 	uint64_t dms_dnode_active;
 } dnode_move_stats;
 #endif	/* DNODE_STATS */
 
+#ifdef	_KERNEL
 static void
 dnode_move_impl(dnode_t *odn, dnode_t *ndn)
 {
 	int i;
 
 	ASSERT(!RW_LOCK_HELD(&odn->dn_struct_rwlock));
 	ASSERT(MUTEX_NOT_HELD(&odn->dn_mtx));
 	ASSERT(MUTEX_NOT_HELD(&odn->dn_dbufs_mtx));
 	ASSERT(!RW_LOCK_HELD(&odn->dn_zfetch.zf_rwlock));
 
 	/* Copy fields. */
 	ndn->dn_objset = odn->dn_objset;
 	ndn->dn_object = odn->dn_object;
 	ndn->dn_dbuf = odn->dn_dbuf;
 	ndn->dn_handle = odn->dn_handle;
 	ndn->dn_phys = odn->dn_phys;
 	ndn->dn_type = odn->dn_type;
 	ndn->dn_bonuslen = odn->dn_bonuslen;
 	ndn->dn_bonustype = odn->dn_bonustype;
 	ndn->dn_nblkptr = odn->dn_nblkptr;
 	ndn->dn_checksum = odn->dn_checksum;
 	ndn->dn_compress = odn->dn_compress;
 	ndn->dn_nlevels = odn->dn_nlevels;
 	ndn->dn_indblkshift = odn->dn_indblkshift;
 	ndn->dn_datablkshift = odn->dn_datablkshift;
 	ndn->dn_datablkszsec = odn->dn_datablkszsec;
 	ndn->dn_datablksz = odn->dn_datablksz;
 	ndn->dn_maxblkid = odn->dn_maxblkid;
 	bcopy(&odn->dn_next_nblkptr[0], &ndn->dn_next_nblkptr[0],
 	    sizeof (odn->dn_next_nblkptr));
 	bcopy(&odn->dn_next_nlevels[0], &ndn->dn_next_nlevels[0],
 	    sizeof (odn->dn_next_nlevels));
 	bcopy(&odn->dn_next_indblkshift[0], &ndn->dn_next_indblkshift[0],
 	    sizeof (odn->dn_next_indblkshift));
 	bcopy(&odn->dn_next_bonustype[0], &ndn->dn_next_bonustype[0],
 	    sizeof (odn->dn_next_bonustype));
 	bcopy(&odn->dn_rm_spillblk[0], &ndn->dn_rm_spillblk[0],
 	    sizeof (odn->dn_rm_spillblk));
 	bcopy(&odn->dn_next_bonuslen[0], &ndn->dn_next_bonuslen[0],
 	    sizeof (odn->dn_next_bonuslen));
 	bcopy(&odn->dn_next_blksz[0], &ndn->dn_next_blksz[0],
 	    sizeof (odn->dn_next_blksz));
 	for (i = 0; i < TXG_SIZE; i++) {
 		list_move_tail(&ndn->dn_dirty_records[i],
 		    &odn->dn_dirty_records[i]);
 	}
 	bcopy(&odn->dn_free_ranges[0], &ndn->dn_free_ranges[0],
 	    sizeof (odn->dn_free_ranges));
 	ndn->dn_allocated_txg = odn->dn_allocated_txg;
 	ndn->dn_free_txg = odn->dn_free_txg;
 	ndn->dn_assigned_txg = odn->dn_assigned_txg;
 	ndn->dn_dirtyctx = odn->dn_dirtyctx;
 	ndn->dn_dirtyctx_firstset = odn->dn_dirtyctx_firstset;
 	ASSERT(refcount_count(&odn->dn_tx_holds) == 0);
 	refcount_transfer(&ndn->dn_holds, &odn->dn_holds);
 	ASSERT(avl_is_empty(&ndn->dn_dbufs));
 	avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs);
 	ndn->dn_dbufs_count = odn->dn_dbufs_count;
 	ndn->dn_bonus = odn->dn_bonus;
 	ndn->dn_have_spill = odn->dn_have_spill;
 	ndn->dn_zio = odn->dn_zio;
 	ndn->dn_oldused = odn->dn_oldused;
 	ndn->dn_oldflags = odn->dn_oldflags;
 	ndn->dn_olduid = odn->dn_olduid;
 	ndn->dn_oldgid = odn->dn_oldgid;
 	ndn->dn_newuid = odn->dn_newuid;
 	ndn->dn_newgid = odn->dn_newgid;
 	ndn->dn_id_flags = odn->dn_id_flags;
 	dmu_zfetch_init(&ndn->dn_zfetch, NULL);
 	list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream);
 	ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode;
 
 	/*
 	 * Update back pointers. Updating the handle fixes the back pointer of
 	 * every descendant dbuf as well as the bonus dbuf.
 	 */
 	ASSERT(ndn->dn_handle->dnh_dnode == odn);
 	ndn->dn_handle->dnh_dnode = ndn;
 	if (ndn->dn_zfetch.zf_dnode == odn) {
 		ndn->dn_zfetch.zf_dnode = ndn;
 	}
 
 	/*
 	 * Invalidate the original dnode by clearing all of its back pointers.
 	 */
 	odn->dn_dbuf = NULL;
 	odn->dn_handle = NULL;
 	avl_create(&odn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
 	    offsetof(dmu_buf_impl_t, db_link));
 	odn->dn_dbufs_count = 0;
 	odn->dn_bonus = NULL;
 	odn->dn_zfetch.zf_dnode = NULL;
 
 	/*
 	 * Set the low bit of the objset pointer to ensure that dnode_move()
 	 * recognizes the dnode as invalid in any subsequent callback.
 	 */
 	POINTER_INVALIDATE(&odn->dn_objset);
 
 	/*
 	 * Satisfy the destructor.
 	 */
 	for (i = 0; i < TXG_SIZE; i++) {
 		list_create(&odn->dn_dirty_records[i],
 		    sizeof (dbuf_dirty_record_t),
 		    offsetof(dbuf_dirty_record_t, dr_dirty_node));
 		odn->dn_free_ranges[i] = NULL;
 		odn->dn_next_nlevels[i] = 0;
 		odn->dn_next_indblkshift[i] = 0;
 		odn->dn_next_bonustype[i] = 0;
 		odn->dn_rm_spillblk[i] = 0;
 		odn->dn_next_bonuslen[i] = 0;
 		odn->dn_next_blksz[i] = 0;
 	}
 	odn->dn_allocated_txg = 0;
 	odn->dn_free_txg = 0;
 	odn->dn_assigned_txg = 0;
 	odn->dn_dirtyctx = 0;
 	odn->dn_dirtyctx_firstset = NULL;
 	odn->dn_have_spill = B_FALSE;
 	odn->dn_zio = NULL;
 	odn->dn_oldused = 0;
 	odn->dn_oldflags = 0;
 	odn->dn_olduid = 0;
 	odn->dn_oldgid = 0;
 	odn->dn_newuid = 0;
 	odn->dn_newgid = 0;
 	odn->dn_id_flags = 0;
 
 	/*
 	 * Mark the dnode.
 	 */
 	ndn->dn_moved = 1;
 	odn->dn_moved = (uint8_t)-1;
 }
 
-#ifdef	_KERNEL
 /*ARGSUSED*/
 static kmem_cbrc_t
 dnode_move(void *buf, void *newbuf, size_t size, void *arg)
 {
 	dnode_t *odn = buf, *ndn = newbuf;
 	objset_t *os;
 	int64_t refcount;
 	uint32_t dbufs;
 
 	/*
 	 * The dnode is on the objset's list of known dnodes if the objset
 	 * pointer is valid. We set the low bit of the objset pointer when
 	 * freeing the dnode to invalidate it, and the memory patterns written
 	 * by kmem (baddcafe and deadbeef) set at least one of the two low bits.
 	 * A newly created dnode sets the objset pointer last of all to indicate
 	 * that the dnode is known and in a valid state to be moved by this
 	 * function.
 	 */
 	os = odn->dn_objset;
 	if (!POINTER_IS_VALID(os)) {
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_invalid);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * Ensure that the objset does not go away during the move.
 	 */
 	rw_enter(&os_lock, RW_WRITER);
 	if (os != odn->dn_objset) {
 		rw_exit(&os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck1);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * If the dnode is still valid, then so is the objset. We know that no
 	 * valid objset can be freed while we hold os_lock, so we can safely
 	 * ensure that the objset remains in use.
 	 */
 	mutex_enter(&os->os_lock);
 
 	/*
 	 * Recheck the objset pointer in case the dnode was removed just before
 	 * acquiring the lock.
 	 */
 	if (os != odn->dn_objset) {
 		mutex_exit(&os->os_lock);
 		rw_exit(&os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck2);
 		return (KMEM_CBRC_DONT_KNOW);
 	}
 
 	/*
 	 * At this point we know that as long as we hold os->os_lock, the dnode
 	 * cannot be freed and fields within the dnode can be safely accessed.
 	 * The objset listing this dnode cannot go away as long as this dnode is
 	 * on its list.
 	 */
 	rw_exit(&os_lock);
 	if (DMU_OBJECT_IS_SPECIAL(odn->dn_object)) {
 		mutex_exit(&os->os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_special);
 		return (KMEM_CBRC_NO);
 	}
 	ASSERT(odn->dn_dbuf != NULL); /* only "special" dnodes have no parent */
 
 	/*
 	 * Lock the dnode handle to prevent the dnode from obtaining any new
 	 * holds. This also prevents the descendant dbufs and the bonus dbuf
 	 * from accessing the dnode, so that we can discount their holds. The
 	 * handle is safe to access because we know that while the dnode cannot
 	 * go away, neither can its handle. Once we hold dnh_zrlock, we can
 	 * safely move any dnode referenced only by dbufs.
 	 */
 	if (!zrl_tryenter(&odn->dn_handle->dnh_zrlock)) {
 		mutex_exit(&os->os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_handle);
 		return (KMEM_CBRC_LATER);
 	}
 
 	/*
 	 * Ensure a consistent view of the dnode's holds and the dnode's dbufs.
 	 * We need to guarantee that there is a hold for every dbuf in order to
 	 * determine whether the dnode is actively referenced. Falsely matching
 	 * a dbuf to an active hold would lead to an unsafe move. It's possible
 	 * that a thread already having an active dnode hold is about to add a
 	 * dbuf, and we can't compare hold and dbuf counts while the add is in
 	 * progress.
 	 */
 	if (!rw_tryenter(&odn->dn_struct_rwlock, RW_WRITER)) {
 		zrl_exit(&odn->dn_handle->dnh_zrlock);
 		mutex_exit(&os->os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_rwlock);
 		return (KMEM_CBRC_LATER);
 	}
 
 	/*
 	 * A dbuf may be removed (evicted) without an active dnode hold. In that
 	 * case, the dbuf count is decremented under the handle lock before the
 	 * dbuf's hold is released. This order ensures that if we count the hold
 	 * after the dbuf is removed but before its hold is released, we will
 	 * treat the unmatched hold as active and exit safely. If we count the
 	 * hold before the dbuf is removed, the hold is discounted, and the
 	 * removal is blocked until the move completes.
 	 */
 	refcount = refcount_count(&odn->dn_holds);
 	ASSERT(refcount >= 0);
 	dbufs = odn->dn_dbufs_count;
 
 	/* We can't have more dbufs than dnode holds. */
 	ASSERT3U(dbufs, <=, refcount);
 	DTRACE_PROBE3(dnode__move, dnode_t *, odn, int64_t, refcount,
 	    uint32_t, dbufs);
 
 	if (refcount > dbufs) {
 		rw_exit(&odn->dn_struct_rwlock);
 		zrl_exit(&odn->dn_handle->dnh_zrlock);
 		mutex_exit(&os->os_lock);
 		DNODE_STAT_ADD(dnode_move_stats.dms_dnode_active);
 		return (KMEM_CBRC_LATER);
 	}
 
 	rw_exit(&odn->dn_struct_rwlock);
 
 	/*
 	 * At this point we know that anyone with a hold on the dnode is not
 	 * actively referencing it. The dnode is known and in a valid state to
 	 * move. We're holding the locks needed to execute the critical section.
 	 */
 	dnode_move_impl(odn, ndn);
 
 	list_link_replace(&odn->dn_link, &ndn->dn_link);
 	/* If the dnode was safe to move, the refcount cannot have changed. */
 	ASSERT(refcount == refcount_count(&ndn->dn_holds));
 	ASSERT(dbufs == ndn->dn_dbufs_count);
 	zrl_exit(&ndn->dn_handle->dnh_zrlock); /* handle has moved */
 	mutex_exit(&os->os_lock);
 
 	return (KMEM_CBRC_YES);
 }
 #endif	/* _KERNEL */
 
 void
 dnode_special_close(dnode_handle_t *dnh)
 {
 	dnode_t *dn = dnh->dnh_dnode;
 
 	/*
 	 * Wait for final references to the dnode to clear.  This can
 	 * only happen if the arc is asyncronously evicting state that
 	 * has a hold on this dnode while we are trying to evict this
 	 * dnode.
 	 */
 	while (refcount_count(&dn->dn_holds) > 0)
 		delay(1);
 	ASSERT(dn->dn_dbuf == NULL ||
 	    dmu_buf_get_user(&dn->dn_dbuf->db) == NULL);
 	zrl_add(&dnh->dnh_zrlock);
 	dnode_destroy(dn); /* implicit zrl_remove() */
 	zrl_destroy(&dnh->dnh_zrlock);
 	dnh->dnh_dnode = NULL;
 }
 
 void
 dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
     dnode_handle_t *dnh)
 {
 	dnode_t *dn;
 
 	dn = dnode_create(os, dnp, NULL, object, dnh);
 	zrl_init(&dnh->dnh_zrlock);
 	DNODE_VERIFY(dn);
 }
 
 static void
 dnode_buf_evict_async(void *dbu)
 {
 	dnode_children_t *children_dnodes = dbu;
 	int i;
 
 	for (i = 0; i < children_dnodes->dnc_count; i++) {
 		dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
 		dnode_t *dn;
 
 		/*
 		 * The dnode handle lock guards against the dnode moving to
 		 * another valid address, so there is no need here to guard
 		 * against changes to or from NULL.
 		 */
 		if (dnh->dnh_dnode == NULL) {
 			zrl_destroy(&dnh->dnh_zrlock);
 			continue;
 		}
 
 		zrl_add(&dnh->dnh_zrlock);
 		dn = dnh->dnh_dnode;
 		/*
 		 * If there are holds on this dnode, then there should
 		 * be holds on the dnode's containing dbuf as well; thus
 		 * it wouldn't be eligible for eviction and this function
 		 * would not have been called.
 		 */
 		ASSERT(refcount_is_zero(&dn->dn_holds));
 		ASSERT(refcount_is_zero(&dn->dn_tx_holds));
 
 		dnode_destroy(dn); /* implicit zrl_remove() */
 		zrl_destroy(&dnh->dnh_zrlock);
 		dnh->dnh_dnode = NULL;
 	}
 	kmem_free(children_dnodes, sizeof (dnode_children_t) +
 	    children_dnodes->dnc_count * sizeof (dnode_handle_t));
 }
 
 /*
  * errors:
  * EINVAL - invalid object number.
  * EIO - i/o error.
  * succeeds even for free dnodes.
  */
 int
 dnode_hold_impl(objset_t *os, uint64_t object, int flag,
     void *tag, dnode_t **dnp)
 {
 	int epb, idx, err;
 	int drop_struct_lock = FALSE;
 	int type;
 	uint64_t blk;
 	dnode_t *mdn, *dn;
 	dmu_buf_impl_t *db;
 	dnode_children_t *children_dnodes;
 	dnode_handle_t *dnh;
 
 	/*
 	 * If you are holding the spa config lock as writer, you shouldn't
 	 * be asking the DMU to do *anything* unless it's the root pool
 	 * which may require us to read from the root filesystem while
 	 * holding some (not all) of the locks as writer.
 	 */
 	ASSERT(spa_config_held(os->os_spa, SCL_ALL, RW_WRITER) == 0 ||
 	    (spa_is_root(os->os_spa) &&
 	    spa_config_held(os->os_spa, SCL_STATE, RW_WRITER)));
 
 	if (object == DMU_USERUSED_OBJECT || object == DMU_GROUPUSED_OBJECT) {
 		dn = (object == DMU_USERUSED_OBJECT) ?
 		    DMU_USERUSED_DNODE(os) : DMU_GROUPUSED_DNODE(os);
 		if (dn == NULL)
 			return (SET_ERROR(ENOENT));
 		type = dn->dn_type;
 		if ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE)
 			return (SET_ERROR(ENOENT));
 		if ((flag & DNODE_MUST_BE_FREE) && type != DMU_OT_NONE)
 			return (SET_ERROR(EEXIST));
 		DNODE_VERIFY(dn);
 		(void) refcount_add(&dn->dn_holds, tag);
 		*dnp = dn;
 		return (0);
 	}
 
 	if (object == 0 || object >= DN_MAX_OBJECT)
 		return (SET_ERROR(EINVAL));
 
 	mdn = DMU_META_DNODE(os);
 	ASSERT(mdn->dn_object == DMU_META_DNODE_OBJECT);
 
 	DNODE_VERIFY(mdn);
 
 	if (!RW_WRITE_HELD(&mdn->dn_struct_rwlock)) {
 		rw_enter(&mdn->dn_struct_rwlock, RW_READER);
 		drop_struct_lock = TRUE;
 	}
 
 	blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t));
 
 	db = dbuf_hold(mdn, blk, FTAG);
 	if (drop_struct_lock)
 		rw_exit(&mdn->dn_struct_rwlock);
 	if (db == NULL)
 		return (SET_ERROR(EIO));
 	err = dbuf_read(db, NULL, DB_RF_CANFAIL);
 	if (err) {
 		dbuf_rele(db, FTAG);
 		return (err);
 	}
 
 	ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
 	epb = db->db.db_size >> DNODE_SHIFT;
 
 	idx = object & (epb-1);
 
 	ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
 	children_dnodes = dmu_buf_get_user(&db->db);
 	if (children_dnodes == NULL) {
 		int i;
 		dnode_children_t *winner;
 		children_dnodes = kmem_zalloc(sizeof (dnode_children_t) +
 		    epb * sizeof (dnode_handle_t), KM_SLEEP);
 		children_dnodes->dnc_count = epb;
 		dnh = &children_dnodes->dnc_children[0];
 		for (i = 0; i < epb; i++) {
 			zrl_init(&dnh[i].dnh_zrlock);
 		}
 		dmu_buf_init_user(&children_dnodes->dnc_dbu, NULL,
 		    dnode_buf_evict_async, NULL);
 		winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
 		if (winner != NULL) {
 
 			for (i = 0; i < epb; i++) {
 				zrl_destroy(&dnh[i].dnh_zrlock);
 			}
 
 			kmem_free(children_dnodes, sizeof (dnode_children_t) +
 			    epb * sizeof (dnode_handle_t));
 			children_dnodes = winner;
 		}
 	}
 	ASSERT(children_dnodes->dnc_count == epb);
 
 	dnh = &children_dnodes->dnc_children[idx];
 	zrl_add(&dnh->dnh_zrlock);
 	dn = dnh->dnh_dnode;
 	if (dn == NULL) {
 		dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
 
 		dn = dnode_create(os, phys, db, object, dnh);
 	}
 
 	mutex_enter(&dn->dn_mtx);
 	type = dn->dn_type;
 	if (dn->dn_free_txg ||
 	    ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
 	    ((flag & DNODE_MUST_BE_FREE) &&
 	    (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
 		mutex_exit(&dn->dn_mtx);
 		zrl_remove(&dnh->dnh_zrlock);
 		dbuf_rele(db, FTAG);
 		return (type == DMU_OT_NONE ? ENOENT : EEXIST);
 	}
 	if (refcount_add(&dn->dn_holds, tag) == 1)
 		dbuf_add_ref(db, dnh);
 	mutex_exit(&dn->dn_mtx);
 
 	/* Now we can rely on the hold to prevent the dnode from moving. */
 	zrl_remove(&dnh->dnh_zrlock);
 
 	DNODE_VERIFY(dn);
 	ASSERT3P(dn->dn_dbuf, ==, db);
 	ASSERT3U(dn->dn_object, ==, object);
 	dbuf_rele(db, FTAG);
 
 	*dnp = dn;
 	return (0);
 }
 
 /*
  * Return held dnode if the object is allocated, NULL if not.
  */
 int
 dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
 {
 	return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
 }
 
 /*
  * Can only add a reference if there is already at least one
  * reference on the dnode.  Returns FALSE if unable to add a
  * new reference.
  */
 boolean_t
 dnode_add_ref(dnode_t *dn, void *tag)
 {
 	mutex_enter(&dn->dn_mtx);
 	if (refcount_is_zero(&dn->dn_holds)) {
 		mutex_exit(&dn->dn_mtx);
 		return (FALSE);
 	}
 	VERIFY(1 < refcount_add(&dn->dn_holds, tag));
 	mutex_exit(&dn->dn_mtx);
 	return (TRUE);
 }
 
 void
 dnode_rele(dnode_t *dn, void *tag)
 {
 	mutex_enter(&dn->dn_mtx);
 	dnode_rele_and_unlock(dn, tag);
 }
 
 void
 dnode_rele_and_unlock(dnode_t *dn, void *tag)
 {
 	uint64_t refs;
 	/* Get while the hold prevents the dnode from moving. */
 	dmu_buf_impl_t *db = dn->dn_dbuf;
 	dnode_handle_t *dnh = dn->dn_handle;
 
 	refs = refcount_remove(&dn->dn_holds, tag);
 	mutex_exit(&dn->dn_mtx);
 
 	/*
 	 * It's unsafe to release the last hold on a dnode by dnode_rele() or
 	 * indirectly by dbuf_rele() while relying on the dnode handle to
 	 * prevent the dnode from moving, since releasing the last hold could
 	 * result in the dnode's parent dbuf evicting its dnode handles. For
 	 * that reason anyone calling dnode_rele() or dbuf_rele() without some
 	 * other direct or indirect hold on the dnode must first drop the dnode
 	 * handle.
 	 */
 	ASSERT(refs > 0 || dnh->dnh_zrlock.zr_owner != curthread);
 
 	/* NOTE: the DNODE_DNODE does not have a dn_dbuf */
 	if (refs == 0 && db != NULL) {
 		/*
 		 * Another thread could add a hold to the dnode handle in
 		 * dnode_hold_impl() while holding the parent dbuf. Since the
 		 * hold on the parent dbuf prevents the handle from being
 		 * destroyed, the hold on the handle is OK. We can't yet assert
 		 * that the handle has zero references, but that will be
 		 * asserted anyway when the handle gets destroyed.
 		 */
 		dbuf_rele(db, dnh);
 	}
 }
 
 void
 dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
 {
 	objset_t *os = dn->dn_objset;
 	uint64_t txg = tx->tx_txg;
 
 	if (DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
 		dsl_dataset_dirty(os->os_dsl_dataset, tx);
 		return;
 	}
 
 	DNODE_VERIFY(dn);
 
 #ifdef ZFS_DEBUG
 	mutex_enter(&dn->dn_mtx);
 	ASSERT(dn->dn_phys->dn_type || dn->dn_allocated_txg);
 	ASSERT(dn->dn_free_txg == 0 || dn->dn_free_txg >= txg);
 	mutex_exit(&dn->dn_mtx);
 #endif
 
 	/*
 	 * Determine old uid/gid when necessary
 	 */
 	dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
 
 	multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK];
 	multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn);
 
 	/*
 	 * If we are already marked dirty, we're done.
 	 */
 	if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
 		multilist_sublist_unlock(mls);
 		return;
 	}
 
 	ASSERT(!refcount_is_zero(&dn->dn_holds) ||
 	    !avl_is_empty(&dn->dn_dbufs));
 	ASSERT(dn->dn_datablksz != 0);
 	ASSERT0(dn->dn_next_bonuslen[txg&TXG_MASK]);
 	ASSERT0(dn->dn_next_blksz[txg&TXG_MASK]);
 	ASSERT0(dn->dn_next_bonustype[txg&TXG_MASK]);
 
 	dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
 	    dn->dn_object, txg);
 
 	multilist_sublist_insert_head(mls, dn);
 
 	multilist_sublist_unlock(mls);
 
 	/*
 	 * The dnode maintains a hold on its containing dbuf as
 	 * long as there are holds on it.  Each instantiated child
 	 * dbuf maintains a hold on the dnode.  When the last child
 	 * drops its hold, the dnode will drop its hold on the
 	 * containing dbuf. We add a "dirty hold" here so that the
 	 * dnode will hang around after we finish processing its
 	 * children.
 	 */
 	VERIFY(dnode_add_ref(dn, (void *)(uintptr_t)tx->tx_txg));
 
 	(void) dbuf_dirty(dn->dn_dbuf, tx);
 
 	dsl_dataset_dirty(os->os_dsl_dataset, tx);
 }
 
 void
 dnode_free(dnode_t *dn, dmu_tx_t *tx)
 {
 	mutex_enter(&dn->dn_mtx);
 	if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg) {
 		mutex_exit(&dn->dn_mtx);
 		return;
 	}
 	dn->dn_free_txg = tx->tx_txg;
 	mutex_exit(&dn->dn_mtx);
 
 	dnode_setdirty(dn, tx);
 }
 
 /*
  * Try to change the block size for the indicated dnode.  This can only
  * succeed if there are no blocks allocated or dirty beyond first block
  */
 int
 dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db;
 	int err;
 
 	ASSERT3U(size, <=, spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
 	if (size == 0)
 		size = SPA_MINBLOCKSIZE;
 	else
 		size = P2ROUNDUP(size, SPA_MINBLOCKSIZE);
 
 	if (ibs == dn->dn_indblkshift)
 		ibs = 0;
 
 	if (size >> SPA_MINBLOCKSHIFT == dn->dn_datablkszsec && ibs == 0)
 		return (0);
 
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 
 	/* Check for any allocated blocks beyond the first */
 	if (dn->dn_maxblkid != 0)
 		goto fail;
 
 	mutex_enter(&dn->dn_dbufs_mtx);
 	for (db = avl_first(&dn->dn_dbufs); db != NULL;
 	    db = AVL_NEXT(&dn->dn_dbufs, db)) {
 		if (db->db_blkid != 0 && db->db_blkid != DMU_BONUS_BLKID &&
 		    db->db_blkid != DMU_SPILL_BLKID) {
 			mutex_exit(&dn->dn_dbufs_mtx);
 			goto fail;
 		}
 	}
 	mutex_exit(&dn->dn_dbufs_mtx);
 
 	if (ibs && dn->dn_nlevels != 1)
 		goto fail;
 
 	/* resize the old block */
 	err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
 	if (err == 0)
 		dbuf_new_size(db, size, tx);
 	else if (err != ENOENT)
 		goto fail;
 
 	dnode_setdblksz(dn, size);
 	dnode_setdirty(dn, tx);
 	dn->dn_next_blksz[tx->tx_txg&TXG_MASK] = size;
 	if (ibs) {
 		dn->dn_indblkshift = ibs;
 		dn->dn_next_indblkshift[tx->tx_txg&TXG_MASK] = ibs;
 	}
 	/* rele after we have fixed the blocksize in the dnode */
 	if (db)
 		dbuf_rele(db, FTAG);
 
 	rw_exit(&dn->dn_struct_rwlock);
 	return (0);
 
 fail:
 	rw_exit(&dn->dn_struct_rwlock);
 	return (SET_ERROR(ENOTSUP));
 }
 
 /* read-holding callers must not rely on the lock being continuously held */
 void
 dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
 {
 	uint64_t txgoff = tx->tx_txg & TXG_MASK;
 	int epbs, new_nlevels;
 	uint64_t sz;
 
 	ASSERT(blkid != DMU_BONUS_BLKID);
 
 	ASSERT(have_read ?
 	    RW_READ_HELD(&dn->dn_struct_rwlock) :
 	    RW_WRITE_HELD(&dn->dn_struct_rwlock));
 
 	/*
 	 * if we have a read-lock, check to see if we need to do any work
 	 * before upgrading to a write-lock.
 	 */
 	if (have_read) {
 		if (blkid <= dn->dn_maxblkid)
 			return;
 
 		if (!rw_tryupgrade(&dn->dn_struct_rwlock)) {
 			rw_exit(&dn->dn_struct_rwlock);
 			rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 		}
 	}
 
 	if (blkid <= dn->dn_maxblkid)
 		goto out;
 
 	dn->dn_maxblkid = blkid;
 
 	/*
 	 * Compute the number of levels necessary to support the new maxblkid.
 	 */
 	new_nlevels = 1;
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 	for (sz = dn->dn_nblkptr;
 	    sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs)
 		new_nlevels++;
 
 	if (new_nlevels > dn->dn_nlevels) {
 		int old_nlevels = dn->dn_nlevels;
 		dmu_buf_impl_t *db;
 		list_t *list;
 		dbuf_dirty_record_t *new, *dr, *dr_next;
 
 		dn->dn_nlevels = new_nlevels;
 
 		ASSERT3U(new_nlevels, >, dn->dn_next_nlevels[txgoff]);
 		dn->dn_next_nlevels[txgoff] = new_nlevels;
 
 		/* dirty the left indirects */
 		db = dbuf_hold_level(dn, old_nlevels, 0, FTAG);
 		ASSERT(db != NULL);
 		new = dbuf_dirty(db, tx);
 		dbuf_rele(db, FTAG);
 
 		/* transfer the dirty records to the new indirect */
 		mutex_enter(&dn->dn_mtx);
 		mutex_enter(&new->dt.di.dr_mtx);
 		list = &dn->dn_dirty_records[txgoff];
 		for (dr = list_head(list); dr; dr = dr_next) {
 			dr_next = list_next(&dn->dn_dirty_records[txgoff], dr);
 			if (dr->dr_dbuf->db_level != new_nlevels-1 &&
 			    dr->dr_dbuf->db_blkid != DMU_BONUS_BLKID &&
 			    dr->dr_dbuf->db_blkid != DMU_SPILL_BLKID) {
 				ASSERT(dr->dr_dbuf->db_level == old_nlevels-1);
 				list_remove(&dn->dn_dirty_records[txgoff], dr);
 				list_insert_tail(&new->dt.di.dr_children, dr);
 				dr->dr_parent = new;
 			}
 		}
 		mutex_exit(&new->dt.di.dr_mtx);
 		mutex_exit(&dn->dn_mtx);
 	}
 
 out:
 	if (have_read)
 		rw_downgrade(&dn->dn_struct_rwlock);
 }
 
 static void
 dnode_dirty_l1(dnode_t *dn, uint64_t l1blkid, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db = dbuf_hold_level(dn, 1, l1blkid, FTAG);
 	if (db != NULL) {
 		dmu_buf_will_dirty(&db->db, tx);
 		dbuf_rele(db, FTAG);
 	}
 }
 
 void
 dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
 {
 	dmu_buf_impl_t *db;
 	uint64_t blkoff, blkid, nblks;
 	int blksz, blkshift, head, tail;
 	int trunc = FALSE;
 	int epbs;
 
 	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 	blksz = dn->dn_datablksz;
 	blkshift = dn->dn_datablkshift;
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 
 	if (len == DMU_OBJECT_END) {
 		len = UINT64_MAX - off;
 		trunc = TRUE;
 	}
 
 	/*
 	 * First, block align the region to free:
 	 */
 	if (ISP2(blksz)) {
 		head = P2NPHASE(off, blksz);
 		blkoff = P2PHASE(off, blksz);
 		if ((off >> blkshift) > dn->dn_maxblkid)
 			goto out;
 	} else {
 		ASSERT(dn->dn_maxblkid == 0);
 		if (off == 0 && len >= blksz) {
 			/*
 			 * Freeing the whole block; fast-track this request.
 			 * Note that we won't dirty any indirect blocks,
 			 * which is fine because we will be freeing the entire
 			 * file and thus all indirect blocks will be freed
 			 * by free_children().
 			 */
 			blkid = 0;
 			nblks = 1;
 			goto done;
 		} else if (off >= blksz) {
 			/* Freeing past end-of-data */
 			goto out;
 		} else {
 			/* Freeing part of the block. */
 			head = blksz - off;
 			ASSERT3U(head, >, 0);
 		}
 		blkoff = off;
 	}
 	/* zero out any partial block data at the start of the range */
 	if (head) {
 		ASSERT3U(blkoff + head, ==, blksz);
 		if (len < head)
 			head = len;
 		if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
 		    TRUE, FALSE, FTAG, &db) == 0) {
 			caddr_t data;
 
 			/* don't dirty if it isn't on disk and isn't dirty */
 			if (db->db_last_dirty ||
 			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
 				rw_exit(&dn->dn_struct_rwlock);
 				dmu_buf_will_dirty(&db->db, tx);
 				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				data = db->db.db_data;
 				bzero(data + blkoff, head);
 			}
 			dbuf_rele(db, FTAG);
 		}
 		off += head;
 		len -= head;
 	}
 
 	/* If the range was less than one block, we're done */
 	if (len == 0)
 		goto out;
 
 	/* If the remaining range is past end of file, we're done */
 	if ((off >> blkshift) > dn->dn_maxblkid)
 		goto out;
 
 	ASSERT(ISP2(blksz));
 	if (trunc)
 		tail = 0;
 	else
 		tail = P2PHASE(len, blksz);
 
 	ASSERT0(P2PHASE(off, blksz));
 	/* zero out any partial block data at the end of the range */
 	if (tail) {
 		if (len < tail)
 			tail = len;
 		if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
 		    TRUE, FALSE, FTAG, &db) == 0) {
 			/* don't dirty if not on disk and not dirty */
 			if (db->db_last_dirty ||
 			    (db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
 				rw_exit(&dn->dn_struct_rwlock);
 				dmu_buf_will_dirty(&db->db, tx);
 				rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 				bzero(db->db.db_data, tail);
 			}
 			dbuf_rele(db, FTAG);
 		}
 		len -= tail;
 	}
 
 	/* If the range did not include a full block, we are done */
 	if (len == 0)
 		goto out;
 
 	ASSERT(IS_P2ALIGNED(off, blksz));
 	ASSERT(trunc || IS_P2ALIGNED(len, blksz));
 	blkid = off >> blkshift;
 	nblks = len >> blkshift;
 	if (trunc)
 		nblks += 1;
 
 	/*
 	 * Dirty all the indirect blocks in this range.  Note that only
 	 * the first and last indirect blocks can actually be written
 	 * (if they were partially freed) -- they must be dirtied, even if
 	 * they do not exist on disk yet.  The interior blocks will
 	 * be freed by free_children(), so they will not actually be written.
 	 * Even though these interior blocks will not be written, we
 	 * dirty them for two reasons:
 	 *
 	 *  - It ensures that the indirect blocks remain in memory until
 	 *    syncing context.  (They have already been prefetched by
 	 *    dmu_tx_hold_free(), so we don't have to worry about reading
 	 *    them serially here.)
 	 *
 	 *  - The dirty space accounting will put pressure on the txg sync
 	 *    mechanism to begin syncing, and to delay transactions if there
 	 *    is a large amount of freeing.  Even though these indirect
 	 *    blocks will not be written, we could need to write the same
 	 *    amount of space if we copy the freed BPs into deadlists.
 	 */
 	if (dn->dn_nlevels > 1) {
 		uint64_t first, last;
 
 		first = blkid >> epbs;
 		dnode_dirty_l1(dn, first, tx);
 		if (trunc)
 			last = dn->dn_maxblkid >> epbs;
 		else
 			last = (blkid + nblks - 1) >> epbs;
 		if (last != first)
 			dnode_dirty_l1(dn, last, tx);
 
 		int shift = dn->dn_datablkshift + dn->dn_indblkshift -
 		    SPA_BLKPTRSHIFT;
 		for (uint64_t i = first + 1; i < last; i++) {
 			/*
 			 * Set i to the blockid of the next non-hole
 			 * level-1 indirect block at or after i.  Note
 			 * that dnode_next_offset() operates in terms of
 			 * level-0-equivalent bytes.
 			 */
 			uint64_t ibyte = i << shift;
 			int err = dnode_next_offset(dn, DNODE_FIND_HAVELOCK,
 			    &ibyte, 2, 1, 0);
 			i = ibyte >> shift;
 			if (i >= last)
 				break;
 
 			/*
 			 * Normally we should not see an error, either
 			 * from dnode_next_offset() or dbuf_hold_level()
 			 * (except for ESRCH from dnode_next_offset).
 			 * If there is an i/o error, then when we read
 			 * this block in syncing context, it will use
 			 * ZIO_FLAG_MUSTSUCCEED, and thus hang/panic according
 			 * to the "failmode" property.  dnode_next_offset()
 			 * doesn't have a flag to indicate MUSTSUCCEED.
 			 */
 			if (err != 0)
 				break;
 
 			dnode_dirty_l1(dn, i, tx);
 		}
 	}
 
 done:
 	/*
 	 * Add this range to the dnode range list.
 	 * We will finish up this free operation in the syncing phase.
 	 */
 	mutex_enter(&dn->dn_mtx);
 	int txgoff = tx->tx_txg & TXG_MASK;
 	if (dn->dn_free_ranges[txgoff] == NULL) {
 		dn->dn_free_ranges[txgoff] = range_tree_create(NULL, NULL);
 	}
 	range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
 	range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
 	dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
 	    blkid, nblks, tx->tx_txg);
 	mutex_exit(&dn->dn_mtx);
 
 	dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
 	dnode_setdirty(dn, tx);
 out:
 
 	rw_exit(&dn->dn_struct_rwlock);
 }
 
 static boolean_t
 dnode_spill_freed(dnode_t *dn)
 {
 	int i;
 
 	mutex_enter(&dn->dn_mtx);
 	for (i = 0; i < TXG_SIZE; i++) {
 		if (dn->dn_rm_spillblk[i] == DN_KILL_SPILLBLK)
 			break;
 	}
 	mutex_exit(&dn->dn_mtx);
 	return (i < TXG_SIZE);
 }
 
 /* return TRUE if this blkid was freed in a recent txg, or FALSE if it wasn't */
 uint64_t
 dnode_block_freed(dnode_t *dn, uint64_t blkid)
 {
 	void *dp = spa_get_dsl(dn->dn_objset->os_spa);
 	int i;
 
 	if (blkid == DMU_BONUS_BLKID)
 		return (FALSE);
 
 	/*
 	 * If we're in the process of opening the pool, dp will not be
 	 * set yet, but there shouldn't be anything dirty.
 	 */
 	if (dp == NULL)
 		return (FALSE);
 
 	if (dn->dn_free_txg)
 		return (TRUE);
 
 	if (blkid == DMU_SPILL_BLKID)
 		return (dnode_spill_freed(dn));
 
 	mutex_enter(&dn->dn_mtx);
 	for (i = 0; i < TXG_SIZE; i++) {
 		if (dn->dn_free_ranges[i] != NULL &&
 		    range_tree_contains(dn->dn_free_ranges[i], blkid, 1))
 			break;
 	}
 	mutex_exit(&dn->dn_mtx);
 	return (i < TXG_SIZE);
 }
 
 /* call from syncing context when we actually write/free space for this dnode */
 void
 dnode_diduse_space(dnode_t *dn, int64_t delta)
 {
 	uint64_t space;
 	dprintf_dnode(dn, "dn=%p dnp=%p used=%llu delta=%lld\n",
 	    dn, dn->dn_phys,
 	    (u_longlong_t)dn->dn_phys->dn_used,
 	    (longlong_t)delta);
 
 	mutex_enter(&dn->dn_mtx);
 	space = DN_USED_BYTES(dn->dn_phys);
 	if (delta > 0) {
 		ASSERT3U(space + delta, >=, space); /* no overflow */
 	} else {
 		ASSERT3U(space, >=, -delta); /* no underflow */
 	}
 	space += delta;
 	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_DNODE_BYTES) {
 		ASSERT((dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) == 0);
 		ASSERT0(P2PHASE(space, 1<<DEV_BSHIFT));
 		dn->dn_phys->dn_used = space >> DEV_BSHIFT;
 	} else {
 		dn->dn_phys->dn_used = space;
 		dn->dn_phys->dn_flags |= DNODE_FLAG_USED_BYTES;
 	}
 	mutex_exit(&dn->dn_mtx);
 }
 
 /*
  * Scans a block at the indicated "level" looking for a hole or data,
  * depending on 'flags'.
  *
  * If level > 0, then we are scanning an indirect block looking at its
  * pointers.  If level == 0, then we are looking at a block of dnodes.
  *
  * If we don't find what we are looking for in the block, we return ESRCH.
  * Otherwise, return with *offset pointing to the beginning (if searching
  * forwards) or end (if searching backwards) of the range covered by the
  * block pointer we matched on (or dnode).
  *
  * The basic search algorithm used below by dnode_next_offset() is to
  * use this function to search up the block tree (widen the search) until
  * we find something (i.e., we don't return ESRCH) and then search back
  * down the tree (narrow the search) until we reach our original search
  * level.
  */
 static int
 dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
     int lvl, uint64_t blkfill, uint64_t txg)
 {
 	dmu_buf_impl_t *db = NULL;
 	void *data = NULL;
 	uint64_t epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
 	uint64_t epb = 1ULL << epbs;
 	uint64_t minfill, maxfill;
 	boolean_t hole;
 	int i, inc, error, span;
 
 	dprintf("probing object %llu offset %llx level %d of %u\n",
 	    dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
 
 	hole = ((flags & DNODE_FIND_HOLE) != 0);
 	inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
 	ASSERT(txg == 0 || !hole);
 
 	if (lvl == dn->dn_phys->dn_nlevels) {
 		error = 0;
 		epb = dn->dn_phys->dn_nblkptr;
 		data = dn->dn_phys->dn_blkptr;
 	} else {
 		uint64_t blkid = dbuf_whichblock(dn, lvl, *offset);
 		error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FALSE, FTAG, &db);
 		if (error) {
 			if (error != ENOENT)
 				return (error);
 			if (hole)
 				return (0);
 			/*
 			 * This can only happen when we are searching up
 			 * the block tree for data.  We don't really need to
 			 * adjust the offset, as we will just end up looking
 			 * at the pointer to this block in its parent, and its
 			 * going to be unallocated, so we will skip over it.
 			 */
 			return (SET_ERROR(ESRCH));
 		}
 		error = dbuf_read(db, NULL, DB_RF_CANFAIL | DB_RF_HAVESTRUCT);
 		if (error) {
 			dbuf_rele(db, FTAG);
 			return (error);
 		}
 		data = db->db.db_data;
 	}
 
 
 	if (db != NULL && txg != 0 && (db->db_blkptr == NULL ||
 	    db->db_blkptr->blk_birth <= txg ||
 	    BP_IS_HOLE(db->db_blkptr))) {
 		/*
 		 * This can only happen when we are searching up the tree
 		 * and these conditions mean that we need to keep climbing.
 		 */
 		error = SET_ERROR(ESRCH);
 	} else if (lvl == 0) {
 		dnode_phys_t *dnp = data;
 		span = DNODE_SHIFT;
 		ASSERT(dn->dn_type == DMU_OT_DNODE);
 
 		for (i = (*offset >> span) & (blkfill - 1);
 		    i >= 0 && i < blkfill; i += inc) {
 			if ((dnp[i].dn_type == DMU_OT_NONE) == hole)
 				break;
 			*offset += (1ULL << span) * inc;
 		}
 		if (i < 0 || i == blkfill)
 			error = SET_ERROR(ESRCH);
 	} else {
 		blkptr_t *bp = data;
 		uint64_t start = *offset;
 		span = (lvl - 1) * epbs + dn->dn_datablkshift;
 		minfill = 0;
 		maxfill = blkfill << ((lvl - 1) * epbs);
 
 		if (hole)
 			maxfill--;
 		else
 			minfill++;
 
 		*offset = *offset >> span;
 		for (i = BF64_GET(*offset, 0, epbs);
 		    i >= 0 && i < epb; i += inc) {
 			if (BP_GET_FILL(&bp[i]) >= minfill &&
 			    BP_GET_FILL(&bp[i]) <= maxfill &&
 			    (hole || bp[i].blk_birth > txg))
 				break;
 			if (inc > 0 || *offset > 0)
 				*offset += inc;
 		}
 		*offset = *offset << span;
 		if (inc < 0) {
 			/* traversing backwards; position offset at the end */
 			ASSERT3U(*offset, <=, start);
 			*offset = MIN(*offset + (1ULL << span) - 1, start);
 		} else if (*offset < start) {
 			*offset = start;
 		}
 		if (i < 0 || i >= epb)
 			error = SET_ERROR(ESRCH);
 	}
 
 	if (db)
 		dbuf_rele(db, FTAG);
 
 	return (error);
 }
 
 /*
  * Find the next hole, data, or sparse region at or after *offset.
  * The value 'blkfill' tells us how many items we expect to find
  * in an L0 data block; this value is 1 for normal objects,
  * DNODES_PER_BLOCK for the meta dnode, and some fraction of
  * DNODES_PER_BLOCK when searching for sparse regions thereof.
  *
  * Examples:
  *
  * dnode_next_offset(dn, flags, offset, 1, 1, 0);
  *	Finds the next/previous hole/data in a file.
  *	Used in dmu_offset_next().
  *
  * dnode_next_offset(mdn, flags, offset, 0, DNODES_PER_BLOCK, txg);
  *	Finds the next free/allocated dnode an objset's meta-dnode.
  *	Only finds objects that have new contents since txg (ie.
  *	bonus buffer changes and content removal are ignored).
  *	Used in dmu_object_next().
  *
  * dnode_next_offset(mdn, DNODE_FIND_HOLE, offset, 2, DNODES_PER_BLOCK >> 2, 0);
  *	Finds the next L2 meta-dnode bp that's at most 1/4 full.
  *	Used in dmu_object_alloc().
  */
 int
 dnode_next_offset(dnode_t *dn, int flags, uint64_t *offset,
     int minlvl, uint64_t blkfill, uint64_t txg)
 {
 	uint64_t initial_offset = *offset;
 	int lvl, maxlvl;
 	int error = 0;
 
 	if (!(flags & DNODE_FIND_HAVELOCK))
 		rw_enter(&dn->dn_struct_rwlock, RW_READER);
 
 	if (dn->dn_phys->dn_nlevels == 0) {
 		error = SET_ERROR(ESRCH);
 		goto out;
 	}
 
 	if (dn->dn_datablkshift == 0) {
 		if (*offset < dn->dn_datablksz) {
 			if (flags & DNODE_FIND_HOLE)
 				*offset = dn->dn_datablksz;
 		} else {
 			error = SET_ERROR(ESRCH);
 		}
 		goto out;
 	}
 
 	maxlvl = dn->dn_phys->dn_nlevels;
 
 	for (lvl = minlvl; lvl <= maxlvl; lvl++) {
 		error = dnode_next_offset_level(dn,
 		    flags, offset, lvl, blkfill, txg);
 		if (error != ESRCH)
 			break;
 	}
 
 	while (error == 0 && --lvl >= minlvl) {
 		error = dnode_next_offset_level(dn,
 		    flags, offset, lvl, blkfill, txg);
 	}
 
 	/*
 	 * There's always a "virtual hole" at the end of the object, even
 	 * if all BP's which physically exist are non-holes.
 	 */
 	if ((flags & DNODE_FIND_HOLE) && error == ESRCH && txg == 0 &&
 	    minlvl == 1 && blkfill == 1 && !(flags & DNODE_FIND_BACKWARDS)) {
 		error = 0;
 	}
 
 	if (error == 0 && (flags & DNODE_FIND_BACKWARDS ?
 	    initial_offset < *offset : initial_offset > *offset))
 		error = SET_ERROR(ESRCH);
 out:
 	if (!(flags & DNODE_FIND_HAVELOCK))
 		rw_exit(&dn->dn_struct_rwlock);
 
 	return (error);
 }
Index: vendor-sys/illumos/dist/uts/common/sys/acl.h
===================================================================
--- vendor-sys/illumos/dist/uts/common/sys/acl.h	(revision 329752)
+++ vendor-sys/illumos/dist/uts/common/sys/acl.h	(revision 329753)
@@ -1,320 +1,321 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  *
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  *
  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef _SYS_ACL_H
 #define	_SYS_ACL_H
 
 #include <sys/types.h>
 #include <sys/acl_impl.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #define	MAX_ACL_ENTRIES		(1024)	/* max entries of each type */
 typedef struct acl {
 	int		a_type;		/* the type of ACL entry */
 	uid_t		a_id;		/* the entry in -uid or gid */
 	o_mode_t	a_perm;		/* the permission field */
 } aclent_t;
 
 typedef struct ace {
 	uid_t		a_who;		/* uid or gid */
 	uint32_t	a_access_mask;	/* read,write,... */
 	uint16_t	a_flags;	/* see below */
 	uint16_t	a_type;		/* allow or deny */
 } ace_t;
 
 typedef struct acl_info acl_t;
 
 /*
  * The following are Defined types for an aclent_t.
  */
 #define	USER_OBJ	(0x01)		/* object owner */
 #define	USER		(0x02)		/* additional users */
 #define	GROUP_OBJ	(0x04)		/* owning group of the object */
 #define	GROUP		(0x08)		/* additional groups */
 #define	CLASS_OBJ	(0x10)		/* file group class and mask entry */
 #define	OTHER_OBJ	(0x20)		/* other entry for the object */
 #define	ACL_DEFAULT	(0x1000)	/* default flag */
 /* default object owner */
 #define	DEF_USER_OBJ	(ACL_DEFAULT | USER_OBJ)
 /* default additional users */
 #define	DEF_USER	(ACL_DEFAULT | USER)
 /* default owning group */
 #define	DEF_GROUP_OBJ	(ACL_DEFAULT | GROUP_OBJ)
 /* default additional groups */
 #define	DEF_GROUP	(ACL_DEFAULT | GROUP)
 /* default mask entry */
 #define	DEF_CLASS_OBJ	(ACL_DEFAULT | CLASS_OBJ)
 /* default other entry */
 #define	DEF_OTHER_OBJ	(ACL_DEFAULT | OTHER_OBJ)
 
 /*
  * The following are defined for ace_t.
  *
  * Note, these are intentionally the same as the Windows
  * "File Access Rights Constants" you can find on MSDN.
  * (See also: "Standard Access Rights" on MSDN).
  *
  * The equivalent Windows names for these are just like
  * those show below, with FILE_ in place of ACE_, except
  * as noted below.  Also note that Windows uses a special
  * privilege: BYPASS_TRAVERSE_CHECKING, normally granted
  * to everyone, that causes the absence of ACE_TRAVERSE
  * to be ignored.
  */
 #define	ACE_READ_DATA		0x00000001	/* file: read data */
 #define	ACE_LIST_DIRECTORY	0x00000001	/* dir: list files */
 #define	ACE_WRITE_DATA		0x00000002	/* file: write data */
 #define	ACE_ADD_FILE		0x00000002	/* dir: create file */
 #define	ACE_APPEND_DATA		0x00000004	/* file: append data */
 #define	ACE_ADD_SUBDIRECTORY	0x00000004	/* dir: create subdir */
 #define	ACE_READ_NAMED_ATTRS	0x00000008	/* FILE_READ_EA */
 #define	ACE_WRITE_NAMED_ATTRS	0x00000010	/* FILE_WRITE_EA */
 #define	ACE_EXECUTE		0x00000020	/* file: execute */
 #define	ACE_TRAVERSE		0x00000020	/* dir: lookup name */
 #define	ACE_DELETE_CHILD	0x00000040	/* dir: unlink child */
 #define	ACE_READ_ATTRIBUTES	0x00000080	/* (all) stat, etc. */
 #define	ACE_WRITE_ATTRIBUTES	0x00000100	/* (all) utimes, etc. */
 #define	ACE_DELETE		0x00010000	/* (all) unlink self */
 #define	ACE_READ_ACL		0x00020000	/* (all) getsecattr */
 #define	ACE_WRITE_ACL		0x00040000	/* (all) setsecattr */
 #define	ACE_WRITE_OWNER		0x00080000	/* (all) chown */
 #define	ACE_SYNCHRONIZE		0x00100000	/* (all) see MSDN */
 
 /*
  * Some of the following are the same as Windows uses. (but NOT ALL!)
  * See the "ACE_HEADER" structure description on MSDN for details.
  * Comments show relations to the MSDN names.
  */
 #define	ACE_FILE_INHERIT_ACE		0x0001	/* = OBJECT_INHERIT_ACE */
 #define	ACE_DIRECTORY_INHERIT_ACE	0x0002	/* = CONTAINER_INHERIT_ACE */
 #define	ACE_NO_PROPAGATE_INHERIT_ACE	0x0004	/* = NO_PROPAGATE_INHERIT_ACE */
 #define	ACE_INHERIT_ONLY_ACE		0x0008	/* = INHERIT_ONLY_ACE */
 #define	ACE_SUCCESSFUL_ACCESS_ACE_FLAG	0x0010
 #define	ACE_FAILED_ACCESS_ACE_FLAG	0x0020
 #define	ACE_IDENTIFIER_GROUP		0x0040
 #define	ACE_INHERITED_ACE		0x0080	/* INHERITED_ACE, 0x10 on NT */
 #define	ACE_OWNER			0x1000
 #define	ACE_GROUP			0x2000
 #define	ACE_EVERYONE			0x4000
 
 /* These four are the same as Windows, but with an ACE_ prefix added. */
 #define	ACE_ACCESS_ALLOWED_ACE_TYPE	0x0000
 #define	ACE_ACCESS_DENIED_ACE_TYPE	0x0001
 #define	ACE_SYSTEM_AUDIT_ACE_TYPE	0x0002
 #define	ACE_SYSTEM_ALARM_ACE_TYPE	0x0003
 
 #define	ACL_AUTO_INHERIT		0x0001
 #define	ACL_PROTECTED			0x0002
 #define	ACL_DEFAULTED			0x0004
 #define	ACL_FLAGS_ALL			(ACL_AUTO_INHERIT|ACL_PROTECTED| \
     ACL_DEFAULTED)
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
 
 /*
  * These are only applicable in a CIFS context.
  * Here again, same as Windows, but with an ACE_ prefix added.
  */
 #define	ACE_ACCESS_ALLOWED_COMPOUND_ACE_TYPE		0x04
 #define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE		0x05
 #define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE		0x06
 #define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE		0x07
 #define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE		0x08
 #define	ACE_ACCESS_ALLOWED_CALLBACK_ACE_TYPE		0x09
 #define	ACE_ACCESS_DENIED_CALLBACK_ACE_TYPE		0x0A
 #define	ACE_ACCESS_ALLOWED_CALLBACK_OBJECT_ACE_TYPE	0x0B
 #define	ACE_ACCESS_DENIED_CALLBACK_OBJECT_ACE_TYPE	0x0C
 #define	ACE_SYSTEM_AUDIT_CALLBACK_ACE_TYPE		0x0D
 #define	ACE_SYSTEM_ALARM_CALLBACK_ACE_TYPE		0x0E
 #define	ACE_SYSTEM_AUDIT_CALLBACK_OBJECT_ACE_TYPE	0x0F
 #define	ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE	0x10
 
 #define	ACE_ALL_TYPES	0x001F
 
 typedef struct ace_object {
 	uid_t		a_who;		/* uid or gid */
 	uint32_t	a_access_mask;	/* read,write,... */
 	uint16_t	a_flags;	/* see below */
 	uint16_t	a_type;		/* allow or deny */
 	uint8_t		a_obj_type[16];	/* obj type */
 	uint8_t		a_inherit_obj_type[16];  /* inherit obj */
 } ace_object_t;
 
 #endif
 
 #define	ACE_ALL_PERMS	(ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
     ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
     ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_WRITE_ACL| \
     ACE_WRITE_OWNER|ACE_SYNCHRONIZE)
 
 #define	ACE_ALL_WRITE_PERMS (ACE_WRITE_DATA|ACE_APPEND_DATA| \
     ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS|ACE_WRITE_ACL| \
     ACE_WRITE_OWNER|ACE_DELETE|ACE_DELETE_CHILD)
 
 #define	ACE_READ_PERMS	(ACE_READ_DATA|ACE_READ_ACL|ACE_READ_ATTRIBUTES| \
     ACE_READ_NAMED_ATTRS)
 
 #define	ACE_WRITE_PERMS	(ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES| \
     ACE_WRITE_NAMED_ATTRS)
 
 #define	ACE_MODIFY_PERMS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \
     ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_READ_NAMED_ATTRS| \
     ACE_WRITE_NAMED_ATTRS|ACE_EXECUTE|ACE_DELETE_CHILD|ACE_READ_ATTRIBUTES| \
     ACE_WRITE_ATTRIBUTES|ACE_DELETE|ACE_READ_ACL|ACE_SYNCHRONIZE)
 /*
  * The following flags are supported by both NFSv4 ACLs and ace_t.
  */
 #define	ACE_NFSV4_SUP_FLAGS (ACE_FILE_INHERIT_ACE | \
     ACE_DIRECTORY_INHERIT_ACE | \
     ACE_NO_PROPAGATE_INHERIT_ACE | \
     ACE_INHERIT_ONLY_ACE | \
     ACE_IDENTIFIER_GROUP)
 
 #define	ACE_TYPE_FLAGS		(ACE_OWNER|ACE_GROUP|ACE_EVERYONE| \
     ACE_IDENTIFIER_GROUP)
 #define	ACE_INHERIT_FLAGS	(ACE_FILE_INHERIT_ACE| \
     ACE_DIRECTORY_INHERIT_ACE|ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE)
 
 /* cmd args to acl(2) for aclent_t  */
 #define	GETACL			1
 #define	SETACL			2
 #define	GETACLCNT		3
 
 /* cmd's to manipulate ace acls. */
 #define	ACE_GETACL		4
 #define	ACE_SETACL		5
 #define	ACE_GETACLCNT		6
 
 /* minimal acl entries from GETACLCNT */
 #define	MIN_ACL_ENTRIES		4
 
 #if !defined(_KERNEL)
 
 /* acl check errors */
 #define	GRP_ERROR		1
 #define	USER_ERROR		2
 #define	OTHER_ERROR		3
 #define	CLASS_ERROR		4
 #define	DUPLICATE_ERROR		5
 #define	MISS_ERROR		6
 #define	MEM_ERROR		7
 #define	ENTRY_ERROR		8
 
 
 /*
  * similar to ufs_acl.h: changed to char type for user commands (tar, cpio)
  * Attribute types
  */
 #define	UFSD_FREE	('0')	/* Free entry */
 #define	UFSD_ACL	('1')	/* Access Control Lists */
 #define	UFSD_DFACL	('2')	/* reserved for future use */
 #define	ACE_ACL		('3')	/* ace_t style acls */
 
 /*
  * flag to [f]acl_get()
  * controls whether a trivial acl should be returned.
  */
 #define	ACL_NO_TRIVIAL	0x2
 
 
 /*
  * Flags to control acl_totext()
  */
 
 #define	ACL_APPEND_ID	0x1 	/* append uid/gid to user/group entries */
 #define	ACL_COMPACT_FMT	0x2 	/* build ACL in ls -V format */
 #define	ACL_NORESOLVE	0x4	/* don't do name service lookups */
 #define	ACL_SID_FMT	0x8	/* use usersid/groupsid when appropriate */
 
 /*
  * Legacy aclcheck errors for aclent_t ACLs
  */
 #define	EACL_GRP_ERROR		GRP_ERROR
 #define	EACL_USER_ERROR		USER_ERROR
 #define	EACL_OTHER_ERROR	OTHER_ERROR
 #define	EACL_CLASS_ERROR	CLASS_ERROR
 #define	EACL_DUPLICATE_ERROR	DUPLICATE_ERROR
 #define	EACL_MISS_ERROR		MISS_ERROR
 #define	EACL_MEM_ERROR		MEM_ERROR
 #define	EACL_ENTRY_ERROR	ENTRY_ERROR
 
 #define	EACL_INHERIT_ERROR	9		/* invalid inherit flags */
 #define	EACL_FLAGS_ERROR	10		/* unknown flag value */
 #define	EACL_PERM_MASK_ERROR	11		/* unknown permission */
 #define	EACL_COUNT_ERROR	12		/* invalid acl count */
 
 #define	EACL_INVALID_SLOT	13		/* invalid acl slot */
 #define	EACL_NO_ACL_ENTRY	14		/* Entry doesn't exist */
 #define	EACL_DIFF_TYPE		15		/* acls aren't same type */
 
 #define	EACL_INVALID_USER_GROUP	16		/* need user/group name */
 #define	EACL_INVALID_STR	17		/* invalid acl string */
 #define	EACL_FIELD_NOT_BLANK	18		/* can't have blank field */
 #define	EACL_INVALID_ACCESS_TYPE 19		/* invalid access type */
 #define	EACL_UNKNOWN_DATA	20		/* Unrecognized data in ACL */
 #define	EACL_MISSING_FIELDS	21		/* missing fields in acl */
 
 #define	EACL_INHERIT_NOTDIR	22		/* Need dir for inheritance */
 
 extern int aclcheck(aclent_t *, int, int *);
 extern int acltomode(aclent_t *, int, mode_t *);
 extern int aclfrommode(aclent_t *, int, mode_t *);
 extern int aclsort(int, int, aclent_t *);
 extern char *acltotext(aclent_t *, int);
 extern aclent_t *aclfromtext(char *, int *);
 extern void acl_free(acl_t *);
 extern int acl_get(const char *, int, acl_t **);
 extern int facl_get(int, int, acl_t **);
 extern int acl_set(const char *, acl_t *acl);
 extern int facl_set(int, acl_t *acl);
 extern int acl_strip(const char *, uid_t, gid_t, mode_t);
 extern int acl_trivial(const char *);
 extern char *acl_totext(acl_t *, int);
 extern int acl_fromtext(const char *, acl_t **);
 extern int acl_check(acl_t *, int);
 
 #else	/* !defined(_KERNEL) */
 
 extern void ksort(caddr_t, int, int, int (*)(void *, void *));
 extern int cmp2acls(void *, void *);
 
 #endif	/* !defined(_KERNEL) */
 
 extern int acl(const char *path, int cmd, int cnt, void *buf);
 extern int facl(int fd, int cmd, int cnt, void *buf);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_ACL_H */
Index: vendor-sys/illumos/dist/uts/common/sys/bitmap.h
===================================================================
--- vendor-sys/illumos/dist/uts/common/sys/bitmap.h	(revision 329752)
+++ vendor-sys/illumos/dist/uts/common/sys/bitmap.h	(revision 329753)
@@ -1,197 +1,198 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
 
 #ifndef _SYS_BITMAP_H
 #define	_SYS_BITMAP_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #include <sys/feature_tests.h>
 #if defined(__GNUC__) && defined(_ASM_INLINES) && \
 	(defined(__i386) || defined(__amd64))
 #include <asm/bitmap.h>
 #endif
 
 /*
  * Operations on bitmaps of arbitrary size
  * A bitmap is a vector of 1 or more ulong_t's.
  * The user of the package is responsible for range checks and keeping
  * track of sizes.
  */
 
 #ifdef _LP64
 #define	BT_ULSHIFT	6 /* log base 2 of BT_NBIPUL, to extract word index */
 #define	BT_ULSHIFT32	5 /* log base 2 of BT_NBIPUL, to extract word index */
 #else
 #define	BT_ULSHIFT	5 /* log base 2 of BT_NBIPUL, to extract word index */
 #endif
 
 #define	BT_NBIPUL	(1 << BT_ULSHIFT)	/* n bits per ulong_t */
 #define	BT_ULMASK	(BT_NBIPUL - 1)		/* to extract bit index */
 
 #ifdef _LP64
 #define	BT_NBIPUL32	(1 << BT_ULSHIFT32)	/* n bits per ulong_t */
 #define	BT_ULMASK32	(BT_NBIPUL32 - 1)	/* to extract bit index */
 #define	BT_ULMAXMASK	0xffffffffffffffff	/* used by bt_getlowbit */
 #else
 #define	BT_ULMAXMASK	0xffffffff
 #endif
 
 /*
  * bitmap is a ulong_t *, bitindex an index_t
  *
  * The macros BT_WIM and BT_BIW internal; there is no need
  * for users of this package to use them.
  */
 
 /*
  * word in map
  */
 #define	BT_WIM(bitmap, bitindex) \
 	((bitmap)[(bitindex) >> BT_ULSHIFT])
 /*
  * bit in word
  */
 #define	BT_BIW(bitindex) \
 	(1UL << ((bitindex) & BT_ULMASK))
 
 #ifdef _LP64
 #define	BT_WIM32(bitmap, bitindex) \
 	((bitmap)[(bitindex) >> BT_ULSHIFT32])
 
 #define	BT_BIW32(bitindex) \
 	(1UL << ((bitindex) & BT_ULMASK32))
 #endif
 
 /*
  * These are public macros
  *
  * BT_BITOUL == n bits to n ulong_t's
  */
 #define	BT_BITOUL(nbits) \
 	(((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL)
 #define	BT_SIZEOFMAP(nbits) \
 	(BT_BITOUL(nbits) * sizeof (ulong_t))
 #define	BT_TEST(bitmap, bitindex) \
 	((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0)
 #define	BT_SET(bitmap, bitindex) \
 	{ BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); }
 #define	BT_CLEAR(bitmap, bitindex) \
 	{ BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); }
 
 #ifdef _LP64
 #define	BT_BITOUL32(nbits) \
 	(((nbits) + BT_NBIPUL32 - 1l) / BT_NBIPUL32)
 #define	BT_SIZEOFMAP32(nbits) \
 	(BT_BITOUL32(nbits) * sizeof (uint_t))
 #define	BT_TEST32(bitmap, bitindex) \
 	((BT_WIM32((bitmap), (bitindex)) & BT_BIW32(bitindex)) ? 1 : 0)
 #define	BT_SET32(bitmap, bitindex) \
 	{ BT_WIM32((bitmap), (bitindex)) |= BT_BIW32(bitindex); }
 #define	BT_CLEAR32(bitmap, bitindex) \
 	{ BT_WIM32((bitmap), (bitindex)) &= ~BT_BIW32(bitindex); }
 #endif /* _LP64 */
 
 
 /*
  * BIT_ONLYONESET is a private macro not designed for bitmaps of
  * arbitrary size.  u must be an unsigned integer/long.  It returns
  * true if one and only one bit is set in u.
  */
 #define	BIT_ONLYONESET(u) \
 	((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0))
 
-#if defined(_KERNEL) && !defined(_ASM)
+#if (defined(_KERNEL) || defined(_FAKE_KERNEL)) && !defined(_ASM)
 #include <sys/atomic.h>
 
 /*
  * return next available bit index from map with specified number of bits
  */
 extern index_t	bt_availbit(ulong_t *bitmap, size_t nbits);
 /*
  * find the highest order bit that is on, and is within or below
  * the word specified by wx
  */
 extern int	bt_gethighbit(ulong_t *mapp, int wx);
 extern int	bt_range(ulong_t *bitmap, size_t *pos1, size_t *pos2,
 			size_t end_pos);
 /*
  * Find highest and lowest one bit set.
  *	Returns bit number + 1 of bit that is set, otherwise returns 0.
  * Low order bit is 0, high order bit is 31.
  */
 extern int	highbit(ulong_t);
 extern int	highbit64(uint64_t);
 extern int	lowbit(ulong_t);
 extern int	bt_getlowbit(ulong_t *bitmap, size_t start, size_t stop);
 extern void	bt_copy(ulong_t *, ulong_t *, ulong_t);
 
 /*
  * find the parity
  */
 extern int	odd_parity(ulong_t);
 
 /*
  * Atomically set/clear bits
  * Atomic exclusive operations will set "result" to "-1"
  * if the bit is already set/cleared. "result" will be set
  * to 0 otherwise.
  */
 #define	BT_ATOMIC_SET(bitmap, bitindex) \
 	{ atomic_or_ulong(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); }
 #define	BT_ATOMIC_CLEAR(bitmap, bitindex) \
 	{ atomic_and_ulong(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); }
 
 #define	BT_ATOMIC_SET_EXCL(bitmap, bitindex, result) \
 	{ result = atomic_set_long_excl(&(BT_WIM(bitmap, bitindex)),	\
 	    (bitindex) % BT_NBIPUL); }
 #define	BT_ATOMIC_CLEAR_EXCL(bitmap, bitindex, result) \
 	{ result = atomic_clear_long_excl(&(BT_WIM(bitmap, bitindex)),	\
 	    (bitindex) % BT_NBIPUL); }
 
 /*
  * Extracts bits between index h (high, inclusive) and l (low, exclusive) from
  * u, which must be an unsigned integer.
  */
 #define	BITX(u, h, l)	(((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
 
-#endif	/* _KERNEL && !_ASM */
+#endif	/* (_KERNEL || _FAKE_KERNEL) && !_ASM */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_BITMAP_H */
Index: vendor-sys/illumos/dist/uts/common/sys/cpupart.h
===================================================================
--- vendor-sys/illumos/dist/uts/common/sys/cpupart.h	(revision 329752)
+++ vendor-sys/illumos/dist/uts/common/sys/cpupart.h	(revision 329753)
@@ -1,157 +1,158 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef	_SYS_CPUPART_H
 #define	_SYS_CPUPART_H
 
 #include <sys/types.h>
 #include <sys/processor.h>
 #include <sys/cpuvar.h>
 #include <sys/disp.h>
 #include <sys/pset.h>
 #include <sys/lgrp.h>
 #include <sys/lgrp_user.h>
 #include <sys/pg.h>
 #include <sys/bitset.h>
 #include <sys/time.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
 
 typedef int	cpupartid_t;
 
 /*
  * Special partition id.
  */
 #define	CP_DEFAULT	0
 
 /*
  * Flags for cpupart_list()
  */
 #define	CP_ALL		0		/* return all cpu partitions */
 #define	CP_NONEMPTY	1		/* return only non-empty ones */
 
 typedef struct cpupart {
 	disp_t		cp_kp_queue;	/* partition-wide kpreempt queue */
 	cpupartid_t	cp_id;		/* partition ID */
 	int		cp_ncpus;	/* number of online processors */
 	struct cpupart	*cp_next;	/* next partition in list */
 	struct cpupart	*cp_prev;	/* previous partition in list */
 	struct cpu	*cp_cpulist;	/* processor list */
 	struct kstat	*cp_kstat;	/* per-partition statistics */
 
 	/*
 	 * cp_nrunnable and cp_nrunning are used to calculate load average.
 	 */
 	uint_t		cp_nrunnable;	/* current # of runnable threads */
 	uint_t		cp_nrunning;	/* current # of running threads */
 
 	/*
 	 * cp_updates, cp_nrunnable_cum, cp_nwaiting_cum, and cp_hp_avenrun
 	 * are used to generate kstat information on an as-needed basis.
 	 */
 	uint64_t	cp_updates;	/* number of statistics updates */
 	uint64_t	cp_nrunnable_cum; /* cum. # of runnable threads */
 	uint64_t	cp_nwaiting_cum;  /* cum. # of waiting threads */
 
 	struct loadavg_s cp_loadavg;	/* cpupart loadavg */
 
 	klgrpset_t	cp_lgrpset;	/* set of lgroups on which this */
 					/*    partition has cpus */
 	lpl_t		*cp_lgrploads;	/* table of load averages for this  */
 					/*    partition, indexed by lgrp ID */
 	int		cp_nlgrploads;	/* size of cp_lgrploads table */
 	uint64_t	cp_hp_avenrun[3]; /* high-precision load average */
 	uint_t		cp_attr;	/* bitmask of attributes */
 	lgrp_gen_t	cp_gen;		/* generation number */
 	lgrp_id_t	cp_lgrp_hint;	/* last home lgroup chosen */
 	bitset_t	cp_cmt_pgs;	/* CMT PGs represented */
 	bitset_t	cp_haltset;	/* halted CPUs */
 } cpupart_t;
 
 typedef struct cpupart_kstat {
 	kstat_named_t	cpk_updates;		/* number of updates */
 	kstat_named_t	cpk_runnable;		/* cum # of runnable threads */
 	kstat_named_t	cpk_waiting;		/* cum # waiting for I/O */
 	kstat_named_t	cpk_ncpus;		/* current # of CPUs */
 	kstat_named_t	cpk_avenrun_1min;	/* 1-minute load average */
 	kstat_named_t	cpk_avenrun_5min;	/* 5-minute load average */
 	kstat_named_t	cpk_avenrun_15min;	/* 15-minute load average */
 } cpupart_kstat_t;
 
 /*
  * Macro to obtain the maximum run priority for the global queue associated
  * with given cpu partition.
  */
 #define	CP_MAXRUNPRI(cp)	((cp)->cp_kp_queue.disp_maxrunpri)
 
 /*
  * This macro is used to determine if the given thread must surrender
  * CPU to higher priority runnable threads on one of its dispatch queues.
  * This should really be defined in <sys/disp.h> but it is not because
  * including <sys/cpupart.h> there would cause recursive includes.
  */
 #define	DISP_MUST_SURRENDER(t)				\
 	((DISP_MAXRUNPRI(t) > DISP_PRIO(t)) ||		\
 	(CP_MAXRUNPRI(t->t_cpupart) > DISP_PRIO(t)))
 
 extern cpupart_t	cp_default;
 extern cpupart_t	*cp_list_head;
 extern uint_t		cp_numparts;
 extern uint_t		cp_numparts_nonempty;
 
 /*
  * Each partition contains a bitset that indicates which CPUs are halted and
  * which ones are running. Given the growing number of CPUs in current and
  * future platforms, it's important to fanout each CPU within its partition's
  * haltset to prevent contention due to false sharing. The fanout factor
  * is platform specific, and declared accordingly.
  */
 extern uint_t cp_haltset_fanout;
 
 extern void	cpupart_initialize_default();
 extern cpupart_t *cpupart_find(psetid_t);
 extern int	cpupart_create(psetid_t *);
 extern int	cpupart_destroy(psetid_t);
 extern psetid_t	cpupart_query_cpu(cpu_t *);
 extern int	cpupart_attach_cpu(psetid_t, cpu_t *, int);
 extern int	cpupart_get_cpus(psetid_t *, processorid_t *, uint_t *);
 extern int	cpupart_bind_thread(kthread_id_t, psetid_t, int, void *,
     void *);
 extern void	cpupart_kpqalloc(pri_t);
 extern int	cpupart_get_loadavg(psetid_t, int *, int);
 extern uint_t	cpupart_list(psetid_t *, uint_t, int);
 extern int	cpupart_setattr(psetid_t, uint_t);
 extern int	cpupart_getattr(psetid_t, uint_t *);
 
-#endif	/* _KERNEL */
+#endif	/* _KERNEL || _FAKE_KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_CPUPART_H */
Index: vendor-sys/illumos/dist/uts/common/sys/cpuvar.h
===================================================================
--- vendor-sys/illumos/dist/uts/common/sys/cpuvar.h	(revision 329752)
+++ vendor-sys/illumos/dist/uts/common/sys/cpuvar.h	(revision 329753)
@@ -1,832 +1,834 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef _SYS_CPUVAR_H
 #define	_SYS_CPUVAR_H
 
 #include <sys/thread.h>
 #include <sys/sysinfo.h>	/* has cpu_stat_t definition */
 #include <sys/disp.h>
 #include <sys/processor.h>
+#include <sys/kcpc.h>		/* has kcpc_ctx_t definition */
 
 #include <sys/loadavg.h>
 #if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP)
 #include <sys/machcpuvar.h>
 #endif
 
 #include <sys/types.h>
 #include <sys/file.h>
 #include <sys/bitmap.h>
 #include <sys/rwlock.h>
 #include <sys/msacct.h>
 #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL) && \
 	(defined(__i386) || defined(__amd64))
 #include <asm/cpuvar.h>
 #endif
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 struct squeue_set_s;
 
 #define	CPU_CACHE_COHERENCE_SIZE	64
 
 /*
  * For fast event tracing.
  */
 struct ftrace_record;
 typedef struct ftrace_data {
 	int			ftd_state;	/* ftrace flags */
 	kmutex_t		ftd_unused;	/* ftrace buffer lock, unused */
 	struct ftrace_record	*ftd_cur;	/* current record */
 	struct ftrace_record	*ftd_first;	/* first record */
 	struct ftrace_record	*ftd_last;	/* last record */
 } ftrace_data_t;
 
 struct cyc_cpu;
 struct nvlist;
 
 /*
  * Per-CPU data.
  *
  * Be careful adding new members: if they are not the same in all modules (e.g.
  * change size depending on a #define), CTF uniquification can fail to work
  * properly.  Furthermore, this is transitive in that it applies recursively to
  * all types pointed to by cpu_t.
  */
 typedef struct cpu {
 	processorid_t	cpu_id;			/* CPU number */
 	processorid_t	cpu_seqid;	/* sequential CPU id (0..ncpus-1) */
 	volatile cpu_flag_t cpu_flags;		/* flags indicating CPU state */
 	struct cpu	*cpu_self;		/* pointer to itself */
 	kthread_t	*cpu_thread;		/* current thread */
 	kthread_t	*cpu_idle_thread;	/* idle thread for this CPU */
 	kthread_t	*cpu_pause_thread;	/* pause thread for this CPU */
 	klwp_id_t	cpu_lwp;		/* current lwp (if any) */
 	klwp_id_t	cpu_fpowner;		/* currently loaded fpu owner */
 	struct cpupart	*cpu_part;		/* partition with this CPU */
 	struct lgrp_ld	*cpu_lpl;		/* pointer to this cpu's load */
 	int		cpu_cache_offset;	/* see kmem.c for details */
 
 	/*
 	 * Links to other CPUs.  It is safe to walk these lists if
 	 * one of the following is true:
 	 * 	- cpu_lock held
 	 * 	- preemption disabled via kpreempt_disable
 	 * 	- PIL >= DISP_LEVEL
 	 * 	- acting thread is an interrupt thread
 	 * 	- all other CPUs are paused
 	 */
 	struct cpu	*cpu_next;		/* next existing CPU */
 	struct cpu	*cpu_prev;		/* prev existing CPU */
 	struct cpu	*cpu_next_onln;		/* next online (enabled) CPU */
 	struct cpu	*cpu_prev_onln;		/* prev online (enabled) CPU */
 	struct cpu	*cpu_next_part;		/* next CPU in partition */
 	struct cpu	*cpu_prev_part;		/* prev CPU in partition */
 	struct cpu	*cpu_next_lgrp;		/* next CPU in latency group */
 	struct cpu	*cpu_prev_lgrp;		/* prev CPU in latency group */
 	struct cpu	*cpu_next_lpl;		/* next CPU in lgrp partition */
 	struct cpu	*cpu_prev_lpl;
 
 	struct cpu_pg	*cpu_pg;		/* cpu's processor groups */
 
 	void		*cpu_reserved[4];	/* reserved for future use */
 
 	/*
 	 * Scheduling variables.
 	 */
 	disp_t		*cpu_disp;		/* dispatch queue data */
 	/*
 	 * Note that cpu_disp is set before the CPU is added to the system
 	 * and is never modified.  Hence, no additional locking is needed
 	 * beyond what's necessary to access the cpu_t structure.
 	 */
 	char		cpu_runrun;	/* scheduling flag - set to preempt */
 	char		cpu_kprunrun;		/* force kernel preemption */
 	pri_t		cpu_chosen_level; 	/* priority at which cpu */
 						/* was chosen for scheduling */
 	kthread_t	*cpu_dispthread; /* thread selected for dispatch */
 	disp_lock_t	cpu_thread_lock; /* dispatcher lock on current thread */
 	uint8_t		cpu_disp_flags;	/* flags used by dispatcher */
 	/*
 	 * The following field is updated when ever the cpu_dispthread
 	 * changes. Also in places, where the current thread(cpu_dispthread)
 	 * priority changes. This is used in disp_lowpri_cpu()
 	 */
 	pri_t		cpu_dispatch_pri; /* priority of cpu_dispthread */
 	clock_t		cpu_last_swtch;	/* last time switched to new thread */
 
 	/*
 	 * Interrupt data.
 	 */
 	caddr_t		cpu_intr_stack;	/* interrupt stack */
 	kthread_t	*cpu_intr_thread; /* interrupt thread list */
 	uint_t		cpu_intr_actv;	/* interrupt levels active (bitmask) */
 	int		cpu_base_spl;	/* priority for highest rupt active */
 
 	/*
 	 * Statistics.
 	 */
 	cpu_stats_t	cpu_stats;		/* per-CPU statistics */
 	struct kstat	*cpu_info_kstat;	/* kstat for cpu info */
 
 	uintptr_t	cpu_profile_pc;	/* kernel PC in profile interrupt */
 	uintptr_t	cpu_profile_upc; /* user PC in profile interrupt */
 	uintptr_t	cpu_profile_pil; /* PIL when profile interrupted */
 
 	ftrace_data_t	cpu_ftrace;		/* per cpu ftrace data */
 
 	clock_t		cpu_deadman_counter;	/* used by deadman() */
 	uint_t		cpu_deadman_countdown;	/* used by deadman() */
 
 	kmutex_t	cpu_cpc_ctxlock; /* protects context for idle thread */
 	kcpc_ctx_t	*cpu_cpc_ctx;	/* performance counter context */
 
 	/*
 	 * Configuration information for the processor_info system call.
 	 */
 	processor_info_t cpu_type_info;	/* config info */
 	time_t		cpu_state_begin; /* when CPU entered current state */
 	char		cpu_cpr_flags;	/* CPR related info */
 	struct cyc_cpu	*cpu_cyclic;	/* per cpu cyclic subsystem data */
 	struct squeue_set_s *cpu_squeue_set;	/* per cpu squeue set */
 	struct nvlist	*cpu_props;	/* pool-related properties */
 
 	krwlock_t	cpu_ft_lock;		/* DTrace: fasttrap lock */
 	uintptr_t	cpu_dtrace_caller;	/* DTrace: caller, if any */
 	hrtime_t	cpu_dtrace_chillmark;	/* DTrace: chill mark time */
 	hrtime_t	cpu_dtrace_chilled;	/* DTrace: total chill time */
 	uint64_t	cpu_dtrace_probes;	/* DTrace: total probes fired */
 	hrtime_t	cpu_dtrace_nsec;	/* DTrace: ns in dtrace_probe */
 
 	volatile uint16_t cpu_mstate;		/* cpu microstate */
 	volatile uint16_t cpu_mstate_gen;	/* generation counter */
 	volatile hrtime_t cpu_mstate_start;	/* cpu microstate start time */
 	volatile hrtime_t cpu_acct[NCMSTATES];	/* cpu microstate data */
 	hrtime_t	cpu_intracct[NCMSTATES]; /* interrupt mstate data */
 	hrtime_t	cpu_waitrq;		/* cpu run-queue wait time */
 	struct loadavg_s cpu_loadavg;		/* loadavg info for this cpu */
 
 	char		*cpu_idstr;	/* for printing and debugging */
 	char		*cpu_brandstr;	/* for printing */
 
 	/*
 	 * Sum of all device interrupt weights that are currently directed at
 	 * this cpu. Cleared at start of interrupt redistribution.
 	 */
 	int32_t		cpu_intr_weight;
 	void		*cpu_vm_data;
 
 	struct cpu_physid *cpu_physid;	/* physical associations */
 
 	uint64_t	cpu_curr_clock;		/* current clock freq in Hz */
 	char		*cpu_supp_freqs;	/* supported freqs in Hz */
 
 	uintptr_t	cpu_cpcprofile_pc;	/* kernel PC in cpc interrupt */
 	uintptr_t	cpu_cpcprofile_upc;	/* user PC in cpc interrupt */
 
 	/*
 	 * Interrupt load factor used by dispatcher & softcall
 	 */
 	hrtime_t	cpu_intrlast;   /* total interrupt time (nsec) */
 	int		cpu_intrload;   /* interrupt load factor (0-99%) */
 
 	uint_t		cpu_rotor;	/* for cheap pseudo-random numbers */
 
 	struct cu_cpu_info	*cpu_cu_info;	/* capacity & util. info */
 
 	/*
 	 * cpu_generation is updated whenever CPU goes on-line or off-line.
 	 * Updates to cpu_generation are protected by cpu_lock.
 	 *
 	 * See CPU_NEW_GENERATION() macro below.
 	 */
 	volatile uint_t		cpu_generation;	/* tracking on/off-line */
 
 	/*
 	 * New members must be added /before/ this member, as the CTF tools
 	 * rely on this being the last field before cpu_m, so they can
 	 * correctly calculate the offset when synthetically adding the cpu_m
 	 * member in objects that do not have it.  This fixup is required for
 	 * uniquification to work correctly.
 	 */
 	uintptr_t	cpu_m_pad;
 
 #if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP)
 	struct machcpu	cpu_m;		/* per architecture info */
 #endif
 } cpu_t;
 
 /*
  * The cpu_core structure consists of per-CPU state available in any context.
  * On some architectures, this may mean that the page(s) containing the
  * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
  * is up to the platform to assure that this is performed properly.  Note that
  * the structure is sized to avoid false sharing.
  */
 #define	CPUC_SIZE		(sizeof (uint16_t) + sizeof (uint8_t) + \
 				sizeof (uintptr_t) + sizeof (kmutex_t))
 #define	CPUC_PADSIZE		CPU_CACHE_COHERENCE_SIZE - CPUC_SIZE
 
 typedef struct cpu_core {
 	uint16_t	cpuc_dtrace_flags;	/* DTrace flags */
 	uint8_t		cpuc_dcpc_intr_state;	/* DCPC provider intr state */
 	uint8_t		cpuc_pad[CPUC_PADSIZE];	/* padding */
 	uintptr_t	cpuc_dtrace_illval;	/* DTrace illegal value */
 	kmutex_t	cpuc_pid_lock;		/* DTrace pid provider lock */
 } cpu_core_t;
 
 #ifdef _KERNEL
 extern cpu_core_t cpu_core[];
 #endif /* _KERNEL */
 
 /*
  * CPU_ON_INTR() macro. Returns non-zero if currently on interrupt stack.
  * Note that this isn't a test for a high PIL.  For example, cpu_intr_actv
  * does not get updated when we go through sys_trap from TL>0 at high PIL.
  * getpil() should be used instead to check for PIL levels.
  */
 #define	CPU_ON_INTR(cpup) ((cpup)->cpu_intr_actv >> (LOCK_LEVEL + 1))
 
 /*
  * Check to see if an interrupt thread might be active at a given ipl.
  * If so return true.
  * We must be conservative--it is ok to give a false yes, but a false no
  * will cause disaster.  (But if the situation changes after we check it is
  * ok--the caller is trying to ensure that an interrupt routine has been
  * exited).
  * This is used when trying to remove an interrupt handler from an autovector
  * list in avintr.c.
  */
 #define	INTR_ACTIVE(cpup, level)	\
 	((level) <= LOCK_LEVEL ? 	\
 	((cpup)->cpu_intr_actv & (1 << (level))) : (CPU_ON_INTR(cpup)))
 
 /*
  * CPU_PSEUDO_RANDOM() returns a per CPU value that changes each time one
  * looks at it. It's meant as a cheap mechanism to be incorporated in routines
  * wanting to avoid biasing, but where true randomness isn't needed (just
  * something that changes).
  */
 #define	CPU_PSEUDO_RANDOM() (CPU->cpu_rotor++)
 
 #if defined(_KERNEL) || defined(_KMEMUSER)
 
 #define	INTR_STACK_SIZE	MAX(DEFAULTSTKSZ, PAGESIZE)
 
 /* MEMBERS PROTECTED BY "atomicity": cpu_flags */
 
 /*
  * Flags in the CPU structure.
  *
  * These are protected by cpu_lock (except during creation).
  *
  * Offlined-CPUs have three stages of being offline:
  *
  * CPU_ENABLE indicates that the CPU is participating in I/O interrupts
  * that can be directed at a number of different CPUs.  If CPU_ENABLE
  * is off, the CPU will not be given interrupts that can be sent elsewhere,
  * but will still get interrupts from devices associated with that CPU only,
  * and from other CPUs.
  *
  * CPU_OFFLINE indicates that the dispatcher should not allow any threads
  * other than interrupt threads to run on that CPU.  A CPU will not have
  * CPU_OFFLINE set if there are any bound threads (besides interrupts).
  *
  * CPU_QUIESCED is set if p_offline was able to completely turn idle the
  * CPU and it will not have to run interrupt threads.  In this case it'll
  * stay in the idle loop until CPU_QUIESCED is turned off.
  *
  * CPU_FROZEN is used only by CPR to mark CPUs that have been successfully
  * suspended (in the suspend path), or have yet to be resumed (in the resume
  * case).
  *
  * On some platforms CPUs can be individually powered off.
  * The following flags are set for powered off CPUs: CPU_QUIESCED,
  * CPU_OFFLINE, and CPU_POWEROFF.  The following flags are cleared:
  * CPU_RUNNING, CPU_READY, CPU_EXISTS, and CPU_ENABLE.
  */
 #define	CPU_RUNNING	0x001		/* CPU running */
 #define	CPU_READY	0x002		/* CPU ready for cross-calls */
 #define	CPU_QUIESCED	0x004		/* CPU will stay in idle */
 #define	CPU_EXISTS	0x008		/* CPU is configured */
 #define	CPU_ENABLE	0x010		/* CPU enabled for interrupts */
 #define	CPU_OFFLINE	0x020		/* CPU offline via p_online */
 #define	CPU_POWEROFF	0x040		/* CPU is powered off */
 #define	CPU_FROZEN	0x080		/* CPU is frozen via CPR suspend */
 #define	CPU_SPARE	0x100		/* CPU offline available for use */
 #define	CPU_FAULTED	0x200		/* CPU offline diagnosed faulty */
 
 #define	FMT_CPU_FLAGS							\
 	"\20\12fault\11spare\10frozen"					\
 	"\7poweroff\6offline\5enable\4exist\3quiesced\2ready\1run"
 
 #define	CPU_ACTIVE(cpu)	(((cpu)->cpu_flags & CPU_OFFLINE) == 0)
 
 /*
  * Flags for cpu_offline(), cpu_faulted(), and cpu_spare().
  */
 #define	CPU_FORCED	0x0001		/* Force CPU offline */
 
 /*
  * DTrace flags.
  */
 #define	CPU_DTRACE_NOFAULT	0x0001	/* Don't fault */
 #define	CPU_DTRACE_DROP		0x0002	/* Drop this ECB */
 #define	CPU_DTRACE_BADADDR	0x0004	/* DTrace fault: bad address */
 #define	CPU_DTRACE_BADALIGN	0x0008	/* DTrace fault: bad alignment */
 #define	CPU_DTRACE_DIVZERO	0x0010	/* DTrace fault: divide by zero */
 #define	CPU_DTRACE_ILLOP	0x0020	/* DTrace fault: illegal operation */
 #define	CPU_DTRACE_NOSCRATCH	0x0040	/* DTrace fault: out of scratch */
 #define	CPU_DTRACE_KPRIV	0x0080	/* DTrace fault: bad kernel access */
 #define	CPU_DTRACE_UPRIV	0x0100	/* DTrace fault: bad user access */
 #define	CPU_DTRACE_TUPOFLOW	0x0200	/* DTrace fault: tuple stack overflow */
 #if defined(__sparc)
 #define	CPU_DTRACE_FAKERESTORE	0x0400	/* pid provider hint to getreg */
 #endif
 #define	CPU_DTRACE_ENTRY	0x0800	/* pid provider hint to ustack() */
 #define	CPU_DTRACE_BADSTACK	0x1000	/* DTrace fault: bad stack */
 
 #define	CPU_DTRACE_FAULT	(CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \
 				CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \
 				CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \
 				CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \
 				CPU_DTRACE_BADSTACK)
 #define	CPU_DTRACE_ERROR	(CPU_DTRACE_FAULT | CPU_DTRACE_DROP)
 
 /*
  * Dispatcher flags
  * These flags must be changed only by the current CPU.
  */
 #define	CPU_DISP_DONTSTEAL	0x01	/* CPU undergoing context swtch */
 #define	CPU_DISP_HALTED		0x02	/* CPU halted waiting for interrupt */
 
 #endif /* _KERNEL || _KMEMUSER */
 
 #if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP)
 
 /*
  * Macros for manipulating sets of CPUs as a bitmap.  Note that this
  * bitmap may vary in size depending on the maximum CPU id a specific
  * platform supports.  This may be different than the number of CPUs
  * the platform supports, since CPU ids can be sparse.  We define two
  * sets of macros; one for platforms where the maximum CPU id is less
  * than the number of bits in a single word (32 in a 32-bit kernel,
  * 64 in a 64-bit kernel), and one for platforms that require bitmaps
  * of more than one word.
  */
 
 #define	CPUSET_WORDS	BT_BITOUL(NCPU)
 #define	CPUSET_NOTINSET	((uint_t)-1)
 
 #if	CPUSET_WORDS > 1
 
 typedef struct cpuset {
 	ulong_t	cpub[CPUSET_WORDS];
 } cpuset_t;
 
 /*
  * Private functions for manipulating cpusets that do not fit in a
  * single word.  These should not be used directly; instead the
  * CPUSET_* macros should be used so the code will be portable
  * across different definitions of NCPU.
  */
 extern	void	cpuset_all(cpuset_t *);
 extern	void	cpuset_all_but(cpuset_t *, uint_t);
 extern	int	cpuset_isnull(cpuset_t *);
 extern	int	cpuset_cmp(cpuset_t *, cpuset_t *);
 extern	void	cpuset_only(cpuset_t *, uint_t);
 extern	uint_t	cpuset_find(cpuset_t *);
 extern	void	cpuset_bounds(cpuset_t *, uint_t *, uint_t *);
 
 #define	CPUSET_ALL(set)			cpuset_all(&(set))
 #define	CPUSET_ALL_BUT(set, cpu)	cpuset_all_but(&(set), cpu)
 #define	CPUSET_ONLY(set, cpu)		cpuset_only(&(set), cpu)
 #define	CPU_IN_SET(set, cpu)		BT_TEST((set).cpub, cpu)
 #define	CPUSET_ADD(set, cpu)		BT_SET((set).cpub, cpu)
 #define	CPUSET_DEL(set, cpu)		BT_CLEAR((set).cpub, cpu)
 #define	CPUSET_ISNULL(set)		cpuset_isnull(&(set))
 #define	CPUSET_ISEQUAL(set1, set2)	cpuset_cmp(&(set1), &(set2))
 
 /*
  * Find one CPU in the cpuset.
  * Sets "cpu" to the id of the found CPU, or CPUSET_NOTINSET if no cpu
  * could be found. (i.e. empty set)
  */
 #define	CPUSET_FIND(set, cpu)		{		\
 	cpu = cpuset_find(&(set));			\
 }
 
 /*
  * Determine the smallest and largest CPU id in the set. Returns
  * CPUSET_NOTINSET in smallest and largest when set is empty.
  */
 #define	CPUSET_BOUNDS(set, smallest, largest)	{		\
 	cpuset_bounds(&(set), &(smallest), &(largest));		\
 }
 
 /*
  * Atomic cpuset operations
  * These are safe to use for concurrent cpuset manipulations.
  * "xdel" and "xadd" are exclusive operations, that set "result" to "0"
  * if the add or del was successful, or "-1" if not successful.
  * (e.g. attempting to add a cpu to a cpuset that's already there, or
  * deleting a cpu that's not in the cpuset)
  */
 
 #define	CPUSET_ATOMIC_DEL(set, cpu)	BT_ATOMIC_CLEAR((set).cpub, (cpu))
 #define	CPUSET_ATOMIC_ADD(set, cpu)	BT_ATOMIC_SET((set).cpub, (cpu))
 
 #define	CPUSET_ATOMIC_XADD(set, cpu, result) \
 	BT_ATOMIC_SET_EXCL((set).cpub, cpu, result)
 
 #define	CPUSET_ATOMIC_XDEL(set, cpu, result) \
 	BT_ATOMIC_CLEAR_EXCL((set).cpub, cpu, result)
 
 
 #define	CPUSET_OR(set1, set2)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
 		(set1).cpub[_i] |= (set2).cpub[_i];	\
 }
 
 #define	CPUSET_XOR(set1, set2)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
 		(set1).cpub[_i] ^= (set2).cpub[_i];	\
 }
 
 #define	CPUSET_AND(set1, set2)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
 		(set1).cpub[_i] &= (set2).cpub[_i];	\
 }
 
 #define	CPUSET_ZERO(set)		{		\
 	int _i;						\
 	for (_i = 0; _i < CPUSET_WORDS; _i++)		\
 		(set).cpub[_i] = 0;			\
 }
 
 #elif	CPUSET_WORDS == 1
 
 typedef	ulong_t	cpuset_t;	/* a set of CPUs */
 
 #define	CPUSET(cpu)			(1UL << (cpu))
 
 #define	CPUSET_ALL(set)			((void)((set) = ~0UL))
 #define	CPUSET_ALL_BUT(set, cpu)	((void)((set) = ~CPUSET(cpu)))
 #define	CPUSET_ONLY(set, cpu)		((void)((set) = CPUSET(cpu)))
 #define	CPU_IN_SET(set, cpu)		((set) & CPUSET(cpu))
 #define	CPUSET_ADD(set, cpu)		((void)((set) |= CPUSET(cpu)))
 #define	CPUSET_DEL(set, cpu)		((void)((set) &= ~CPUSET(cpu)))
 #define	CPUSET_ISNULL(set)		((set) == 0)
 #define	CPUSET_ISEQUAL(set1, set2)	((set1) == (set2))
 #define	CPUSET_OR(set1, set2)		((void)((set1) |= (set2)))
 #define	CPUSET_XOR(set1, set2)		((void)((set1) ^= (set2)))
 #define	CPUSET_AND(set1, set2)		((void)((set1) &= (set2)))
 #define	CPUSET_ZERO(set)		((void)((set) = 0))
 
 #define	CPUSET_FIND(set, cpu)		{		\
 	cpu = (uint_t)(lowbit(set) - 1);				\
 }
 
 #define	CPUSET_BOUNDS(set, smallest, largest)	{	\
 	smallest = (uint_t)(lowbit(set) - 1);		\
 	largest = (uint_t)(highbit(set) - 1);		\
 }
 
 #define	CPUSET_ATOMIC_DEL(set, cpu)	atomic_and_ulong(&(set), ~CPUSET(cpu))
 #define	CPUSET_ATOMIC_ADD(set, cpu)	atomic_or_ulong(&(set), CPUSET(cpu))
 
 #define	CPUSET_ATOMIC_XADD(set, cpu, result) \
 	{ result = atomic_set_long_excl(&(set), (cpu)); }
 
 #define	CPUSET_ATOMIC_XDEL(set, cpu, result) \
 	{ result = atomic_clear_long_excl(&(set), (cpu)); }
 
 #else	/* CPUSET_WORDS <= 0 */
 
 #error NCPU is undefined or invalid
 
 #endif	/* CPUSET_WORDS	*/
 
 extern cpuset_t cpu_seqid_inuse;
 
 #endif	/* (_KERNEL || _KMEMUSER) && _MACHDEP */
 
 #define	CPU_CPR_OFFLINE		0x0
 #define	CPU_CPR_ONLINE		0x1
 #define	CPU_CPR_IS_OFFLINE(cpu)	(((cpu)->cpu_cpr_flags & CPU_CPR_ONLINE) == 0)
 #define	CPU_CPR_IS_ONLINE(cpu)	((cpu)->cpu_cpr_flags & CPU_CPR_ONLINE)
 #define	CPU_SET_CPR_FLAGS(cpu, flag)	((cpu)->cpu_cpr_flags |= flag)
 
 #if defined(_KERNEL) || defined(_KMEMUSER)
 
 extern struct cpu	*cpu[];		/* indexed by CPU number */
 extern struct cpu	**cpu_seq;	/* indexed by sequential CPU id */
 extern cpu_t		*cpu_list;	/* list of CPUs */
 extern cpu_t		*cpu_active;	/* list of active CPUs */
 extern int		ncpus;		/* number of CPUs present */
 extern int		ncpus_online;	/* number of CPUs not quiesced */
 extern int		max_ncpus;	/* max present before ncpus is known */
 extern int		boot_max_ncpus;	/* like max_ncpus but for real */
 extern int		boot_ncpus;	/* # cpus present @ boot */
 extern processorid_t	max_cpuid;	/* maximum CPU number */
 extern struct cpu	*cpu_inmotion;	/* offline or partition move target */
 extern cpu_t		*clock_cpu_list;
 extern processorid_t	max_cpu_seqid_ever;	/* maximum seqid ever given */
 
 #if defined(__i386) || defined(__amd64)
 extern struct cpu *curcpup(void);
 #define	CPU		(curcpup())	/* Pointer to current CPU */
 #else
 #define	CPU		(curthread->t_cpu)	/* Pointer to current CPU */
 #endif
 
 /*
  * CPU_CURRENT indicates to thread_affinity_set to use CPU->cpu_id
  * as the target and to grab cpu_lock instead of requiring the caller
  * to grab it.
  */
 #define	CPU_CURRENT	-3
 
 /*
  * Per-CPU statistics
  *
  * cpu_stats_t contains numerous system and VM-related statistics, in the form
  * of gauges or monotonically-increasing event occurrence counts.
  */
 
 #define	CPU_STATS_ENTER_K()	kpreempt_disable()
 #define	CPU_STATS_EXIT_K()	kpreempt_enable()
 
 #define	CPU_STATS_ADD_K(class, stat, amount) \
 	{	kpreempt_disable(); /* keep from switching CPUs */\
 		CPU_STATS_ADDQ(CPU, class, stat, amount); \
 		kpreempt_enable(); \
 	}
 
 #define	CPU_STATS_ADDQ(cp, class, stat, amount)	{			\
 	extern void __dtrace_probe___cpu_##class##info_##stat(uint_t,	\
 	    uint64_t *, cpu_t *);					\
 	uint64_t *stataddr = &((cp)->cpu_stats.class.stat);		\
 	__dtrace_probe___cpu_##class##info_##stat((amount),		\
 	    stataddr, cp);						\
 	*(stataddr) += (amount);					\
 }
 
 #define	CPU_STATS(cp, stat)                                       \
 	((cp)->cpu_stats.stat)
 
 /*
  * Increment CPU generation value.
  * This macro should be called whenever CPU goes on-line or off-line.
  * Updates to cpu_generation should be protected by cpu_lock.
  */
 #define	CPU_NEW_GENERATION(cp)	((cp)->cpu_generation++)
 
 #endif /* _KERNEL || _KMEMUSER */
 
 /*
- * CPU support routines.
+ * CPU support routines (not for genassym.c)
  */
-#if	defined(_KERNEL) && defined(__STDC__)	/* not for genassym.c */
+#if	(defined(_KERNEL) || defined(_FAKE_KERNEL)) && defined(__STDC__)
 
 struct zone;
 
 void	cpu_list_init(cpu_t *);
 void	cpu_add_unit(cpu_t *);
 void	cpu_del_unit(int cpuid);
 void	cpu_add_active(cpu_t *);
 void	cpu_kstat_init(cpu_t *);
 void	cpu_visibility_add(cpu_t *, struct zone *);
 void	cpu_visibility_remove(cpu_t *, struct zone *);
 void	cpu_visibility_configure(cpu_t *, struct zone *);
 void	cpu_visibility_unconfigure(cpu_t *, struct zone *);
 void	cpu_visibility_online(cpu_t *, struct zone *);
 void	cpu_visibility_offline(cpu_t *, struct zone *);
 void	cpu_create_intrstat(cpu_t *);
 void	cpu_delete_intrstat(cpu_t *);
 int	cpu_kstat_intrstat_update(kstat_t *, int);
 void	cpu_intr_swtch_enter(kthread_t *);
 void	cpu_intr_swtch_exit(kthread_t *);
 
 void	mbox_lock_init(void);	 /* initialize cross-call locks */
 void	mbox_init(int cpun);	 /* initialize cross-calls */
 void	poke_cpu(int cpun);	 /* interrupt another CPU (to preempt) */
 
 /*
  * values for safe_list.  Pause state that CPUs are in.
  */
 #define	PAUSE_IDLE	0		/* normal state */
 #define	PAUSE_READY	1		/* paused thread ready to spl */
 #define	PAUSE_WAIT	2		/* paused thread is spl-ed high */
 #define	PAUSE_DIE	3		/* tell pause thread to leave */
 #define	PAUSE_DEAD	4		/* pause thread has left */
 
 void	mach_cpu_pause(volatile char *);
 
 void	pause_cpus(cpu_t *off_cp, void *(*func)(void *));
 void	start_cpus(void);
 int	cpus_paused(void);
 
 void	cpu_pause_init(void);
 cpu_t	*cpu_get(processorid_t cpun);	/* get the CPU struct associated */
 
 int	cpu_online(cpu_t *cp);			/* take cpu online */
 int	cpu_offline(cpu_t *cp, int flags);	/* take cpu offline */
 int	cpu_spare(cpu_t *cp, int flags);	/* take cpu to spare */
 int	cpu_faulted(cpu_t *cp, int flags);	/* take cpu to faulted */
 int	cpu_poweron(cpu_t *cp);		/* take powered-off cpu to offline */
 int	cpu_poweroff(cpu_t *cp);	/* take offline cpu to powered-off */
 
 cpu_t	*cpu_intr_next(cpu_t *cp);	/* get next online CPU taking intrs */
 int	cpu_intr_count(cpu_t *cp);	/* count # of CPUs handling intrs */
 int	cpu_intr_on(cpu_t *cp);		/* CPU taking I/O interrupts? */
 void	cpu_intr_enable(cpu_t *cp);	/* enable I/O interrupts */
 int	cpu_intr_disable(cpu_t *cp);	/* disable I/O interrupts */
 void	cpu_intr_alloc(cpu_t *cp, int n); /* allocate interrupt threads */
 
 /*
  * Routines for checking CPU states.
  */
 int	cpu_is_online(cpu_t *);		/* check if CPU is online */
 int	cpu_is_nointr(cpu_t *);		/* check if CPU can service intrs */
 int	cpu_is_active(cpu_t *);		/* check if CPU can run threads */
 int	cpu_is_offline(cpu_t *);	/* check if CPU is offline */
 int	cpu_is_poweredoff(cpu_t *);	/* check if CPU is powered off */
 
 int	cpu_flagged_online(cpu_flag_t);	/* flags show CPU is online */
 int	cpu_flagged_nointr(cpu_flag_t);	/* flags show CPU not handling intrs */
 int	cpu_flagged_active(cpu_flag_t); /* flags show CPU scheduling threads */
 int	cpu_flagged_offline(cpu_flag_t); /* flags show CPU is offline */
 int	cpu_flagged_poweredoff(cpu_flag_t); /* flags show CPU is powered off */
 
 /*
  * The processor_info(2) state of a CPU is a simplified representation suitable
  * for use by an application program.  Kernel subsystems should utilize the
  * internal per-CPU state as given by the cpu_flags member of the cpu structure,
  * as this information may include platform- or architecture-specific state
  * critical to a subsystem's disposition of a particular CPU.
  */
 void	cpu_set_state(cpu_t *);		/* record/timestamp current state */
 int	cpu_get_state(cpu_t *);		/* get current cpu state */
 const char *cpu_get_state_str(cpu_t *);	/* get current cpu state as string */
 
 
 void	cpu_set_curr_clock(uint64_t);	/* indicate the current CPU's freq */
 void	cpu_set_supp_freqs(cpu_t *, const char *); /* set the CPU supported */
 						/* frequencies */
 
 int	cpu_configure(int);
 int	cpu_unconfigure(int);
 void	cpu_destroy_bound_threads(cpu_t *cp);
 
 extern int cpu_bind_thread(kthread_t *tp, processorid_t bind,
     processorid_t *obind, int *error);
 extern int cpu_unbind(processorid_t cpu_id, boolean_t force);
 extern void thread_affinity_set(kthread_t *t, int cpu_id);
 extern void thread_affinity_clear(kthread_t *t);
 extern void affinity_set(int cpu_id);
 extern void affinity_clear(void);
 extern void init_cpu_mstate(struct cpu *, int);
 extern void term_cpu_mstate(struct cpu *);
 extern void new_cpu_mstate(int, hrtime_t);
 extern void get_cpu_mstate(struct cpu *, hrtime_t *);
 extern void thread_nomigrate(void);
 extern void thread_allowmigrate(void);
 extern void weakbinding_stop(void);
 extern void weakbinding_start(void);
 
 /*
  * The following routines affect the CPUs participation in interrupt processing,
  * if that is applicable on the architecture.  This only affects interrupts
  * which aren't directed at the processor (not cross calls).
  *
  * cpu_disable_intr returns non-zero if interrupts were previously enabled.
  */
 int	cpu_disable_intr(struct cpu *cp); /* stop issuing interrupts to cpu */
 void	cpu_enable_intr(struct cpu *cp); /* start issuing interrupts to cpu */
 
 /*
  * The mutex cpu_lock protects cpu_flags for all CPUs, as well as the ncpus
  * and ncpus_online counts.
  */
 extern kmutex_t	cpu_lock;	/* lock protecting CPU data */
 
 /*
  * CPU state change events
  *
  * Various subsystems need to know when CPUs change their state. They get this
  * information by registering  CPU state change callbacks using
  * register_cpu_setup_func(). Whenever any CPU changes its state, the callback
  * function is called. The callback function is passed three arguments:
  *
  *   Event, described by cpu_setup_t
  *   CPU ID
  *   Transparent pointer passed when registering the callback
  *
  * The callback function is called with cpu_lock held. The return value from the
  * callback function is usually ignored, except for CPU_CONFIG and CPU_UNCONFIG
  * events. For these two events, non-zero return value indicates a failure and
  * prevents successful completion of the operation.
  *
  * New events may be added in the future. Callback functions should ignore any
  * events that they do not understand.
  *
  * The following events provide notification callbacks:
  *
  *  CPU_INIT	A new CPU is started and added to the list of active CPUs
  *		  This event is only used during boot
  *
  *  CPU_CONFIG	A newly inserted CPU is prepared for starting running code
  *		  This event is called by DR code
  *
  *  CPU_UNCONFIG CPU has been powered off and needs cleanup
  *		  This event is called by DR code
  *
  *  CPU_ON	CPU is enabled but does not run anything yet
  *
  *  CPU_INTR_ON	CPU is enabled and has interrupts enabled
  *
  *  CPU_OFF	CPU is going offline but can still run threads
  *
  *  CPU_CPUPART_OUT	CPU is going to move out of its partition
  *
  *  CPU_CPUPART_IN	CPU is going to move to a new partition
  *
  *  CPU_SETUP	CPU is set up during boot and can run threads
  */
 typedef enum {
 	CPU_INIT,
 	CPU_CONFIG,
 	CPU_UNCONFIG,
 	CPU_ON,
 	CPU_OFF,
 	CPU_CPUPART_IN,
 	CPU_CPUPART_OUT,
 	CPU_SETUP,
 	CPU_INTR_ON
 } cpu_setup_t;
 
 typedef int cpu_setup_func_t(cpu_setup_t, int, void *);
 
 /*
  * Routines used to register interest in cpu's being added to or removed
  * from the system.
  */
 extern void register_cpu_setup_func(cpu_setup_func_t *, void *);
 extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *);
 extern void cpu_state_change_notify(int, cpu_setup_t);
 
 /*
  * Call specified function on the given CPU
  */
 typedef void (*cpu_call_func_t)(uintptr_t, uintptr_t);
 extern void cpu_call(cpu_t *, cpu_call_func_t, uintptr_t, uintptr_t);
 
 
 /*
  * Create various strings that describe the given CPU for the
  * processor_info system call and configuration-related kstats.
  */
 #define	CPU_IDSTRLEN	100
 
 extern void init_cpu_info(struct cpu *);
 extern void populate_idstr(struct cpu *);
 extern void cpu_vm_data_init(struct cpu *);
 extern void cpu_vm_data_destroy(struct cpu *);
 
-#endif	/* _KERNEL */
+#endif	/* _KERNEL || _FAKE_KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_CPUVAR_H */
Index: vendor-sys/illumos/dist/uts/common/sys/fm/util.h
===================================================================
--- vendor-sys/illumos/dist/uts/common/sys/fm/util.h	(revision 329752)
+++ vendor-sys/illumos/dist/uts/common/sys/fm/util.h	(revision 329753)
@@ -1,103 +1,104 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 #ifndef	_SYS_FM_UTIL_H
 #define	_SYS_FM_UTIL_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #include <sys/nvpair.h>
 #include <sys/errorq.h>
 
 /*
  * Shared user/kernel definitions for class length, error channel name,
  * and kernel event publisher string.
  */
 #define	FM_MAX_CLASS 100
 #define	FM_ERROR_CHAN	"com.sun:fm:error"
 #define	FM_PUB		"fm"
 
 /*
  * ereport dump device transport support
  *
  * Ereports are written out to the dump device at a proscribed offset from the
  * end, similar to in-transit log messages.  The ereports are represented as a
  * erpt_dump_t header followed by ed_size bytes of packed native nvlist data.
  *
  * NOTE: All of these constants and the header must be defined so they have the
  * same representation for *both* 32-bit and 64-bit producers and consumers.
  */
 #define	ERPT_MAGIC	0xf00d4eddU
 #define	ERPT_MAX_ERRS	16
 #define	ERPT_DATA_SZ	(6 * 1024)
 #define	ERPT_EVCH_MAX	256
 #define	ERPT_HIWAT	64
 
 typedef struct erpt_dump {
 	uint32_t ed_magic;	/* ERPT_MAGIC or zero to indicate end */
 	uint32_t ed_chksum;	/* checksum32() of packed nvlist data */
 	uint32_t ed_size;	/* ereport (nvl) fixed buf size */
 	uint32_t ed_pad;	/* reserved for future use */
 	hrtime_t ed_hrt_nsec;	/* hrtime of this ereport */
 	hrtime_t ed_hrt_base;	/* hrtime sample corresponding to ed_tod_base */
 	struct {
 		uint64_t sec;	/* seconds since gettimeofday() Epoch */
 		uint64_t nsec;	/* nanoseconds past ed_tod_base.sec */
 	} ed_tod_base;
 } erpt_dump_t;
 
-#ifdef _KERNEL
+#if defined(_KERNEL) || defined(_FAKE_KERNEL)
 #include <sys/systm.h>
 
 #define	FM_STK_DEPTH	20	/* maximum stack depth */
 #define	FM_SYM_SZ	64	/* maximum symbol size */
 #define	FM_ERR_PIL	2	/* PIL for ereport_errorq drain processing */
 
 #define	FM_EREPORT_PAYLOAD_NAME_STACK		"stack"
 
 extern errorq_t *ereport_errorq;
 extern void *ereport_dumpbuf;
 extern size_t ereport_dumplen;
 
 extern void fm_init(void);
 extern void fm_nvprint(nvlist_t *);
 extern void fm_panic(const char *, ...);
 extern void fm_banner(void);
 
 extern void fm_ereport_dump(void);
 extern void fm_ereport_post(nvlist_t *, int);
 
 extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int);
 
 extern int is_fm_panic();
-#endif  /* _KERNEL */
+#endif  /* _KERNEL || _FAKE_KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_FM_UTIL_H */
Index: vendor-sys/illumos/dist/uts/common/sys/vnode.h
===================================================================
--- vendor-sys/illumos/dist/uts/common/sys/vnode.h	(revision 329752)
+++ vendor-sys/illumos/dist/uts/common/sys/vnode.h	(revision 329753)
@@ -1,1549 +1,1552 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2017, Joyent, Inc.
  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright 2017 RackTop Systems.
  */
 
 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved  	*/
 
 /*
  * University Copyright- Copyright (c) 1982, 1986, 1988
  * The Regents of the University of California
  * All Rights Reserved
  *
  * University Acknowledgment- Portions of this document are derived from
  * software developed by the University of California, Berkeley, and its
  * contributors.
  */
 
 #ifndef _SYS_VNODE_H
 #define	_SYS_VNODE_H
 
 #include <sys/types.h>
 #include <sys/t_lock.h>
 #include <sys/rwstlock.h>
 #include <sys/time_impl.h>
 #include <sys/cred.h>
 #include <sys/uio.h>
 #include <sys/resource.h>
 #include <vm/seg_enum.h>
 #include <sys/kstat.h>
 #include <sys/kmem.h>
 #include <sys/list.h>
 #ifdef	_KERNEL
 #include <sys/buf.h>
 #include <sys/sdt.h>
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * Statistics for all vnode operations.
  * All operations record number of ops (since boot/mount/zero'ed).
  * Certain I/O operations (read, write, readdir) also record number
  * of bytes transferred.
  * This appears in two places in the system: one is embedded in each
  * vfs_t.  There is also an array of vopstats_t structures allocated
  * on a per-fstype basis.
  */
 
 #define	VOPSTATS_STR	"vopstats_"	/* Initial string for vopstat kstats */
 
 typedef struct vopstats {
 	kstat_named_t	nopen;		/* VOP_OPEN */
 	kstat_named_t	nclose;		/* VOP_CLOSE */
 	kstat_named_t	nread;		/* VOP_READ */
 	kstat_named_t	read_bytes;
 	kstat_named_t	nwrite;		/* VOP_WRITE */
 	kstat_named_t	write_bytes;
 	kstat_named_t	nioctl;		/* VOP_IOCTL */
 	kstat_named_t	nsetfl;		/* VOP_SETFL */
 	kstat_named_t	ngetattr;	/* VOP_GETATTR */
 	kstat_named_t	nsetattr;	/* VOP_SETATTR */
 	kstat_named_t	naccess;	/* VOP_ACCESS */
 	kstat_named_t	nlookup;	/* VOP_LOOKUP */
 	kstat_named_t	ncreate;	/* VOP_CREATE */
 	kstat_named_t	nremove;	/* VOP_REMOVE */
 	kstat_named_t	nlink;		/* VOP_LINK */
 	kstat_named_t	nrename;	/* VOP_RENAME */
 	kstat_named_t	nmkdir;		/* VOP_MKDIR */
 	kstat_named_t	nrmdir;		/* VOP_RMDIR */
 	kstat_named_t	nreaddir;	/* VOP_READDIR */
 	kstat_named_t	readdir_bytes;
 	kstat_named_t	nsymlink;	/* VOP_SYMLINK */
 	kstat_named_t	nreadlink;	/* VOP_READLINK */
 	kstat_named_t	nfsync;		/* VOP_FSYNC */
 	kstat_named_t	ninactive;	/* VOP_INACTIVE */
 	kstat_named_t	nfid;		/* VOP_FID */
 	kstat_named_t	nrwlock;	/* VOP_RWLOCK */
 	kstat_named_t	nrwunlock;	/* VOP_RWUNLOCK */
 	kstat_named_t	nseek;		/* VOP_SEEK */
 	kstat_named_t	ncmp;		/* VOP_CMP */
 	kstat_named_t	nfrlock;	/* VOP_FRLOCK */
 	kstat_named_t	nspace;		/* VOP_SPACE */
 	kstat_named_t	nrealvp;	/* VOP_REALVP */
 	kstat_named_t	ngetpage;	/* VOP_GETPAGE */
 	kstat_named_t	nputpage;	/* VOP_PUTPAGE */
 	kstat_named_t	nmap;		/* VOP_MAP */
 	kstat_named_t	naddmap;	/* VOP_ADDMAP */
 	kstat_named_t	ndelmap;	/* VOP_DELMAP */
 	kstat_named_t	npoll;		/* VOP_POLL */
 	kstat_named_t	ndump;		/* VOP_DUMP */
 	kstat_named_t	npathconf;	/* VOP_PATHCONF */
 	kstat_named_t	npageio;	/* VOP_PAGEIO */
 	kstat_named_t	ndumpctl;	/* VOP_DUMPCTL */
 	kstat_named_t	ndispose;	/* VOP_DISPOSE */
 	kstat_named_t	nsetsecattr;	/* VOP_SETSECATTR */
 	kstat_named_t	ngetsecattr;	/* VOP_GETSECATTR */
 	kstat_named_t	nshrlock;	/* VOP_SHRLOCK */
 	kstat_named_t	nvnevent;	/* VOP_VNEVENT */
 	kstat_named_t	nreqzcbuf;	/* VOP_REQZCBUF */
 	kstat_named_t	nretzcbuf;	/* VOP_RETZCBUF */
 } vopstats_t;
 
 /*
  * The vnode is the focus of all file activity in UNIX.
  * A vnode is allocated for each active file, each current
  * directory, each mounted-on file, and the root.
  *
  * Each vnode is usually associated with a file-system-specific node (for
  * UFS, this is the in-memory inode).  Generally, a vnode and an fs-node
  * should be created and destroyed together as a pair.
  *
  * If a vnode is reused for a new file, it should be reinitialized by calling
  * either vn_reinit() or vn_recycle().
  *
  * vn_reinit() resets the entire vnode as if it was returned by vn_alloc().
  * The caller is responsible for setting up the entire vnode after calling
  * vn_reinit().  This is important when using kmem caching where the vnode is
  * allocated by a constructor, for instance.
  *
  * vn_recycle() is used when the file system keeps some state around in both
  * the vnode and the associated FS-node.  In UFS, for example, the inode of
  * a deleted file can be reused immediately.  The v_data, v_vfsp, v_op, etc.
  * remains the same but certain fields related to the previous instance need
  * to be reset.  In particular:
  *	v_femhead
  *	v_path
  *	v_rdcnt, v_wrcnt
  *	v_mmap_read, v_mmap_write
  */
 
 /*
  * vnode types.  VNON means no type.  These values are unrelated to
  * values in on-disk inodes.
  */
 typedef enum vtype {
 	VNON	= 0,
 	VREG	= 1,
 	VDIR	= 2,
 	VBLK	= 3,
 	VCHR	= 4,
 	VLNK	= 5,
 	VFIFO	= 6,
 	VDOOR	= 7,
 	VPROC	= 8,
 	VSOCK	= 9,
 	VPORT	= 10,
 	VBAD	= 11
 } vtype_t;
 
 /*
  * VSD - Vnode Specific Data
  * Used to associate additional private data with a vnode.
  */
 struct vsd_node {
 	list_node_t vs_nodes;		/* list of all VSD nodes */
 	uint_t vs_nkeys;		/* entries in value array */
 	void **vs_value;		/* array of value/key */
 };
 
 /*
  * Many of the fields in the vnode are read-only once they are initialized
  * at vnode creation time.  Other fields are protected by locks.
  *
  * IMPORTANT: vnodes should be created ONLY by calls to vn_alloc().  They
  * may not be embedded into the file-system specific node (inode).  The
  * size of vnodes may change.
  *
  * The v_lock protects:
  *   v_flag
  *   v_stream
  *   v_count
  *   v_shrlocks
  *   v_path
  *   v_vsd
  *   v_xattrdir
  *
  * A special lock (implemented by vn_vfswlock in vnode.c) protects:
  *   v_vfsmountedhere
  *
  * The global flock_lock mutex (in flock.c) protects:
  *   v_filocks
  *
  * IMPORTANT NOTE:
  *
  *   The following vnode fields are considered public and may safely be
  *   accessed by file systems or other consumers:
  *
  *     v_lock
  *     v_flag
  *     v_count
  *     v_data
  *     v_vfsp
  *     v_stream
  *     v_type
  *     v_rdev
  *
  * ALL OTHER FIELDS SHOULD BE ACCESSED ONLY BY THE OWNER OF THAT FIELD.
  * In particular, file systems should not access other fields; they may
  * change or even be removed.  The functionality which was once provided
  * by these fields is available through vn_* functions.
  *
  * VNODE PATH THEORY:
  * In each vnode, the v_path field holds a cached version of the canonical
  * filesystem path which that node represents.  Because vnodes lack contextual
  * information about their own name or position in the VFS hierarchy, this path
  * must be calculated when the vnode is instantiated by operations such as
  * fop_create, fop_lookup, or fop_mkdir.  During said operations, both the
  * parent vnode (and its cached v_path) and future name are known, so the
  * v_path of the resulting object can easily be set.
  *
  * The caching nature of v_path is complicated in the face of directory
  * renames.  Filesystem drivers are responsible for calling vn_renamepath when
  * a fop_rename operation succeeds.  While the v_path on the renamed vnode will
  * be updated, existing children of the directory (direct, or at deeper levels)
  * will now possess v_path caches which are stale.
  *
  * It is expensive (and for non-directories, impossible) to recalculate stale
  * v_path entries during operations such as vnodetopath.  The best time during
  * which to correct such wrongs is the same as when v_path is first
  * initialized: during fop_create/fop_lookup/fop_mkdir/etc, where adequate
  * context is available to generate the current path.
  *
  * In order to quickly detect stale v_path entries (without full lookup
  * verification) to trigger a v_path update, the v_path_stamp field has been
  * added to vnode_t.  As part of successful fop_create/fop_lookup/fop_mkdir
  * operations, where the name and parent vnode are available, the following
  * rules are used to determine updates to the child:
  *
  * 1. If the parent lacks a v_path, clear any existing v_path and v_path_stamp
  *    on the child.  Until the parent v_path is refreshed to a valid state, the
  *    child v_path must be considered invalid too.
  *
  * 2. If the child lacks a v_path (implying v_path_stamp == 0), it inherits the
  *    v_path_stamp value from its parent and its v_path is updated.
  *
  * 3. If the child v_path_stamp is less than v_path_stamp in the parent, it is
  *    an indication that the child v_path is stale.  The v_path is updated and
  *    v_path_stamp in the child is set to the current hrtime().
  *
  *    It does _not_ inherit the parent v_path_stamp in order to propagate the
  *    the time of v_path invalidation through the directory structure.  This
  *    prevents concurrent invalidations (operating with a now-incorrect v_path)
  *    at deeper levels in the tree from persisting.
  *
  * 4. If the child v_path_stamp is greater or equal to the parent, no action
  *    needs to be taken.
  *
  * Note that fop_rename operations do not follow this ruleset.  They perform an
  * explicit update of v_path and v_path_stamp (setting it to the current time)
  *
  * With these constraints in place, v_path invalidations and updates should
  * proceed in a timely manner as vnodes are accessed.  While there still are
  * limited cases where vnodetopath operations will fail, the risk is minimized.
  */
 
 struct fem_head;	/* from fem.h */
 
 typedef struct vnode {
 	kmutex_t	v_lock;		/* protects vnode fields */
 	uint_t		v_flag;		/* vnode flags (see below) */
 	uint_t		v_count;	/* reference count */
 	void		*v_data;	/* private data for fs */
 	struct vfs	*v_vfsp;	/* ptr to containing VFS */
 	struct stdata	*v_stream;	/* associated stream */
 	enum vtype	v_type;		/* vnode type */
 	dev_t		v_rdev;		/* device (VCHR, VBLK) */
 
 	/* PRIVATE FIELDS BELOW - DO NOT USE */
 
 	struct vfs	*v_vfsmountedhere; /* ptr to vfs mounted here */
 	struct vnodeops	*v_op;		/* vnode operations */
 	struct page	*v_pages;	/* vnode pages list */
 	struct filock	*v_filocks;	/* ptr to filock list */
 	struct shrlocklist *v_shrlocks;	/* ptr to shrlock list */
 	krwlock_t	v_nbllock;	/* sync for NBMAND locks */
 	kcondvar_t	v_cv;		/* synchronize locking */
 	void		*v_locality;	/* hook for locality info */
 	struct fem_head	*v_femhead;	/* fs monitoring */
 	char		*v_path;	/* cached path */
 	hrtime_t	v_path_stamp;	/* timestamp for cached path */
 	uint_t		v_rdcnt;	/* open for read count  (VREG only) */
 	uint_t		v_wrcnt;	/* open for write count (VREG only) */
 	u_longlong_t	v_mmap_read;	/* mmap read count */
 	u_longlong_t	v_mmap_write;	/* mmap write count */
 	void		*v_mpssdata;	/* info for large page mappings */
 	void		*v_fopdata;	/* list of file ops event watches */
 	kmutex_t	v_vsd_lock;	/* protects v_vsd field */
 	struct vsd_node *v_vsd;		/* vnode specific data */
 	struct vnode	*v_xattrdir;	/* unnamed extended attr dir (GFS) */
 	uint_t		v_count_dnlc;	/* dnlc reference count */
 } vnode_t;
 
 #define	IS_DEVVP(vp)	\
 	((vp)->v_type == VCHR || (vp)->v_type == VBLK || (vp)->v_type == VFIFO)
 
 #define	VNODE_ALIGN	64
 /* Count of low-order 0 bits in a vnode *, based on size and alignment. */
 #if defined(_LP64)
 #define	VNODE_ALIGN_LOG2	8
 #else
 #define	VNODE_ALIGN_LOG2	7
 #endif
 
 /*
  * vnode flags.
  */
 #define	VROOT		0x01	/* root of its file system */
 #define	VNOCACHE	0x02	/* don't keep cache pages on vnode */
 #define	VNOMAP		0x04	/* file cannot be mapped/faulted */
 #define	VDUP		0x08	/* file should be dup'ed rather then opened */
 #define	VNOSWAP		0x10	/* file cannot be used as virtual swap device */
 #define	VNOMOUNT	0x20	/* file cannot be covered by mount */
 #define	VISSWAP		0x40	/* vnode is being used for swap */
 #define	VSWAPLIKE	0x80	/* vnode acts like swap (but may not be) */
 
 #define	IS_SWAPVP(vp)	(((vp)->v_flag & (VISSWAP | VSWAPLIKE)) != 0)
 
+#ifdef _KERNEL
 typedef struct vn_vfslocks_entry {
 	rwstlock_t ve_lock;
 	void *ve_vpvfs;
 	struct vn_vfslocks_entry *ve_next;
 	uint32_t ve_refcnt;
 	char pad[64 - sizeof (rwstlock_t) - 2 * sizeof (void *) - \
 	    sizeof (uint32_t)];
 } vn_vfslocks_entry_t;
+#endif
 
 /*
  * The following two flags are used to lock the v_vfsmountedhere field
  */
 #define	VVFSLOCK	0x100
 #define	VVFSWAIT	0x200
 
 /*
  * Used to serialize VM operations on a vnode
  */
 #define	VVMLOCK		0x400
 
 /*
  * Tell vn_open() not to fail a directory open for writing but
  * to go ahead and call VOP_OPEN() to let the filesystem check.
  */
 #define	VDIROPEN	0x800
 
 /*
  * Flag to let the VM system know that this file is most likely a binary
  * or shared library since it has been mmap()ed EXEC at some time.
  */
 #define	VVMEXEC		0x1000
 
 #define	VPXFS		0x2000  /* clustering: global fs proxy vnode */
 
 #define	IS_PXFSVP(vp)	((vp)->v_flag & VPXFS)
 
 #define	V_XATTRDIR	0x4000	/* attribute unnamed directory */
 
 #define	IS_XATTRDIR(vp)	((vp)->v_flag & V_XATTRDIR)
 
 #define	V_LOCALITY	0x8000	/* whether locality aware */
 
 /*
  * Flag that indicates the VM should maintain the v_pages list with all modified
  * pages on one end and unmodified pages at the other. This makes finding dirty
  * pages to write back to disk much faster at the expense of taking a minor
  * fault on the first store instruction which touches a writable page.
  */
 #define	VMODSORT	(0x10000)
 #define	IS_VMODSORT(vp) \
 	(pvn_vmodsort_supported != 0 && ((vp)->v_flag  & VMODSORT) != 0)
 
 #define	VISSWAPFS	0x20000	/* vnode is being used for swapfs */
 
 /*
  * The mdb memstat command assumes that IS_SWAPFSVP only uses the
  * vnode's v_flag field.  If this changes, cache the additional
  * fields in mdb; see vn_get in mdb/common/modules/genunix/memory.c
  */
 #define	IS_SWAPFSVP(vp)	(((vp)->v_flag & VISSWAPFS) != 0)
 
 #define	V_SYSATTR	0x40000	/* vnode is a GFS system attribute */
 
 /*
  * Indication that VOP_LOOKUP operations on this vnode may yield results from a
  * different VFS instance.  The main use of this is to suppress v_path
  * calculation logic when filesystems such as procfs emit results which defy
  * expectations about normal VFS behavior.
  */
 #define	VTRAVERSE	0x80000
 
 /*
  * Vnode attributes.  A bit-mask is supplied as part of the
  * structure to indicate the attributes the caller wants to
  * set (setattr) or extract (getattr).
  */
 
 /*
  * Note that va_nodeid and va_nblocks are 64bit data type.
  * We support large files over NFSV3. With Solaris client and
  * Server that generates 64bit ino's and sizes these fields
  * will overflow if they are 32 bit sizes.
  */
 
 typedef struct vattr {
 	uint_t		va_mask;	/* bit-mask of attributes */
 	vtype_t		va_type;	/* vnode type (for create) */
 	mode_t		va_mode;	/* file access mode */
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
 	dev_t		va_fsid;	/* file system id (dev for now) */
 	u_longlong_t	va_nodeid;	/* node id */
 	nlink_t		va_nlink;	/* number of references to file */
 	u_offset_t	va_size;	/* file size in bytes */
 	timestruc_t	va_atime;	/* time of last access */
 	timestruc_t	va_mtime;	/* time of last modification */
 	timestruc_t	va_ctime;	/* time of last status change */
 	dev_t		va_rdev;	/* device the file represents */
 	uint_t		va_blksize;	/* fundamental block size */
 	u_longlong_t	va_nblocks;	/* # of blocks allocated */
 	uint_t		va_seq;		/* sequence number */
 } vattr_t;
 
 #define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
 
 /*
  * Structure of all optional attributes.
  */
 typedef struct xoptattr {
 	timestruc_t	xoa_createtime;	/* Create time of file */
 	uint8_t		xoa_archive;
 	uint8_t		xoa_system;
 	uint8_t		xoa_readonly;
 	uint8_t		xoa_hidden;
 	uint8_t		xoa_nounlink;
 	uint8_t		xoa_immutable;
 	uint8_t		xoa_appendonly;
 	uint8_t		xoa_nodump;
 	uint8_t		xoa_opaque;
 	uint8_t		xoa_av_quarantined;
 	uint8_t		xoa_av_modified;
 	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
 	uint8_t		xoa_reparse;
 	uint64_t	xoa_generation;
 	uint8_t		xoa_offline;
 	uint8_t		xoa_sparse;
 } xoptattr_t;
 
 /*
  * The xvattr structure is really a variable length structure that
  * is made up of:
  * - The classic vattr_t (xva_vattr)
  * - a 32 bit quantity (xva_mapsize) that specifies the size of the
  *   attribute bitmaps in 32 bit words.
  * - A pointer to the returned attribute bitmap (needed because the
  *   previous element, the requested attribute bitmap) is variable lenth.
  * - The requested attribute bitmap, which is an array of 32 bit words.
  *   Callers use the XVA_SET_REQ() macro to set the bits corresponding to
  *   the attributes that are being requested.
  * - The returned attribute bitmap, which is an array of 32 bit words.
  *   File systems that support optional attributes use the XVA_SET_RTN()
  *   macro to set the bits corresponding to the attributes that are being
  *   returned.
  * - The xoptattr_t structure which contains the attribute values
  *
  * xva_mapsize determines how many words in the attribute bitmaps.
  * Immediately following the attribute bitmaps is the xoptattr_t.
  * xva_getxoptattr() is used to get the pointer to the xoptattr_t
  * section.
  */
 
 #define	XVA_MAPSIZE	3		/* Size of attr bitmaps */
 #define	XVA_MAGIC	0x78766174	/* Magic # for verification */
 
 /*
  * The xvattr structure is an extensible structure which permits optional
  * attributes to be requested/returned.  File systems may or may not support
  * optional attributes.  They do so at their own discretion but if they do
  * support optional attributes, they must register the VFSFT_XVATTR feature
  * so that the optional attributes can be set/retrived.
  *
  * The fields of the xvattr structure are:
  *
  * xva_vattr - The first element of an xvattr is a legacy vattr structure
  * which includes the common attributes.  If AT_XVATTR is set in the va_mask
  * then the entire structure is treated as an xvattr.  If AT_XVATTR is not
  * set, then only the xva_vattr structure can be used.
  *
  * xva_magic - 0x78766174 (hex for "xvat"). Magic number for verification.
  *
  * xva_mapsize - Size of requested and returned attribute bitmaps.
  *
  * xva_rtnattrmapp - Pointer to xva_rtnattrmap[].  We need this since the
  * size of the array before it, xva_reqattrmap[], could change which means
  * the location of xva_rtnattrmap[] could change.  This will allow unbundled
  * file systems to find the location of xva_rtnattrmap[] when the sizes change.
  *
  * xva_reqattrmap[] - Array of requested attributes.  Attributes are
  * represented by a specific bit in a specific element of the attribute
  * map array.  Callers set the bits corresponding to the attributes
  * that the caller wants to get/set.
  *
  * xva_rtnattrmap[] - Array of attributes that the file system was able to
  * process.  Not all file systems support all optional attributes.  This map
  * informs the caller which attributes the underlying file system was able
  * to set/get.  (Same structure as the requested attributes array in terms
  * of each attribute  corresponding to specific bits and array elements.)
  *
  * xva_xoptattrs - Structure containing values of optional attributes.
  * These values are only valid if the corresponding bits in xva_reqattrmap
  * are set and the underlying file system supports those attributes.
  */
 typedef struct xvattr {
 	vattr_t		xva_vattr;	/* Embedded vattr structure */
 	uint32_t	xva_magic;	/* Magic Number */
 	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
 	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
 	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
 	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
 	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
 } xvattr_t;
 
 #ifdef _SYSCALL32
 /*
  * For bigtypes time_t changed to 64 bit on the 64-bit kernel.
  * Define an old version for user/kernel interface
  */
 
 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
 #pragma pack(4)
 #endif
 
 typedef struct vattr32 {
 	uint32_t	va_mask;	/* bit-mask of attributes */
 	vtype_t		va_type;	/* vnode type (for create) */
 	mode32_t	va_mode;	/* file access mode */
 	uid32_t		va_uid;		/* owner user id */
 	gid32_t		va_gid;		/* owner group id */
 	dev32_t		va_fsid;	/* file system id (dev for now) */
 	u_longlong_t	va_nodeid;	/* node id */
 	nlink_t		va_nlink;	/* number of references to file */
 	u_offset_t	va_size;	/* file size in bytes */
 	timestruc32_t	va_atime;	/* time of last access */
 	timestruc32_t	va_mtime;	/* time of last modification */
 	timestruc32_t	va_ctime;	/* time of last status change */
 	dev32_t		va_rdev;	/* device the file represents */
 	uint32_t	va_blksize;	/* fundamental block size */
 	u_longlong_t	va_nblocks;	/* # of blocks allocated */
 	uint32_t	va_seq;		/* sequence number */
 } vattr32_t;
 
 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
 #pragma pack()
 #endif
 
 #else  /* not _SYSCALL32 */
 #define	vattr32		vattr
 typedef vattr_t		vattr32_t;
 #endif /* _SYSCALL32 */
 
 /*
  * Attributes of interest to the caller of setattr or getattr.
  */
 #define	AT_TYPE		0x00001
 #define	AT_MODE		0x00002
 #define	AT_UID		0x00004
 #define	AT_GID		0x00008
 #define	AT_FSID		0x00010
 #define	AT_NODEID	0x00020
 #define	AT_NLINK	0x00040
 #define	AT_SIZE		0x00080
 #define	AT_ATIME	0x00100
 #define	AT_MTIME	0x00200
 #define	AT_CTIME	0x00400
 #define	AT_RDEV		0x00800
 #define	AT_BLKSIZE	0x01000
 #define	AT_NBLOCKS	0x02000
 /*			0x04000 */	/* unused */
 #define	AT_SEQ		0x08000
 /*
  * If AT_XVATTR is set then there are additional bits to process in
  * the xvattr_t's attribute bitmap.  If this is not set then the bitmap
  * MUST be ignored.  Note that this bit must be set/cleared explicitly.
  * That is, setting AT_ALL will NOT set AT_XVATTR.
  */
 #define	AT_XVATTR	0x10000
 
 #define	AT_ALL		(AT_TYPE|AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|\
 			AT_NLINK|AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|\
 			AT_RDEV|AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
 
 #define	AT_STAT		(AT_MODE|AT_UID|AT_GID|AT_FSID|AT_NODEID|AT_NLINK|\
 			AT_SIZE|AT_ATIME|AT_MTIME|AT_CTIME|AT_RDEV|AT_TYPE)
 
 #define	AT_TIMES	(AT_ATIME|AT_MTIME|AT_CTIME)
 
 #define	AT_NOSET	(AT_NLINK|AT_RDEV|AT_FSID|AT_NODEID|AT_TYPE|\
 			AT_BLKSIZE|AT_NBLOCKS|AT_SEQ)
 
 /*
  * Attribute bits used in the extensible attribute's (xva's) attribute
  * bitmaps.  Note that the bitmaps are made up of a variable length number
  * of 32-bit words.  The convention is to use XAT{n}_{attrname} where "n"
  * is the element in the bitmap (starting at 1).  This convention is for
  * the convenience of the maintainer to keep track of which element each
  * attribute belongs to.
  *
  * NOTE THAT CONSUMERS MUST *NOT* USE THE XATn_* DEFINES DIRECTLY.  CONSUMERS
  * MUST USE THE XAT_* DEFINES.
  */
 #define	XAT0_INDEX	0LL		/* Index into bitmap for XAT0 attrs */
 #define	XAT0_CREATETIME	0x00000001	/* Create time of file */
 #define	XAT0_ARCHIVE	0x00000002	/* Archive */
 #define	XAT0_SYSTEM	0x00000004	/* System */
 #define	XAT0_READONLY	0x00000008	/* Readonly */
 #define	XAT0_HIDDEN	0x00000010	/* Hidden */
 #define	XAT0_NOUNLINK	0x00000020	/* Nounlink */
 #define	XAT0_IMMUTABLE	0x00000040	/* immutable */
 #define	XAT0_APPENDONLY	0x00000080	/* appendonly */
 #define	XAT0_NODUMP	0x00000100	/* nodump */
 #define	XAT0_OPAQUE	0x00000200	/* opaque */
 #define	XAT0_AV_QUARANTINED	0x00000400	/* anti-virus quarantine */
 #define	XAT0_AV_MODIFIED	0x00000800	/* anti-virus modified */
 #define	XAT0_AV_SCANSTAMP	0x00001000	/* anti-virus scanstamp */
 #define	XAT0_REPARSE	0x00002000	/* FS reparse point */
 #define	XAT0_GEN	0x00004000	/* object generation number */
 #define	XAT0_OFFLINE	0x00008000	/* offline */
 #define	XAT0_SPARSE	0x00010000	/* sparse */
 
 #define	XAT0_ALL_ATTRS	(XAT0_CREATETIME|XAT0_ARCHIVE|XAT0_SYSTEM| \
     XAT0_READONLY|XAT0_HIDDEN|XAT0_NOUNLINK|XAT0_IMMUTABLE|XAT0_APPENDONLY| \
     XAT0_NODUMP|XAT0_OPAQUE|XAT0_AV_QUARANTINED|  XAT0_AV_MODIFIED| \
     XAT0_AV_SCANSTAMP|XAT0_REPARSE|XATO_GEN|XAT0_OFFLINE|XAT0_SPARSE)
 
 /* Support for XAT_* optional attributes */
 #define	XVA_MASK		0xffffffff	/* Used to mask off 32 bits */
 #define	XVA_SHFT		32		/* Used to shift index */
 
 /*
  * Used to pry out the index and attribute bits from the XAT_* attributes
  * defined below.  Note that we're masking things down to 32 bits then
  * casting to uint32_t.
  */
 #define	XVA_INDEX(attr)		((uint32_t)(((attr) >> XVA_SHFT) & XVA_MASK))
 #define	XVA_ATTRBIT(attr)	((uint32_t)((attr) & XVA_MASK))
 
 /*
  * The following defines present a "flat namespace" so that consumers don't
  * need to keep track of which element belongs to which bitmap entry.
  *
  * NOTE THAT THESE MUST NEVER BE OR-ed TOGETHER
  */
 #define	XAT_CREATETIME		((XAT0_INDEX << XVA_SHFT) | XAT0_CREATETIME)
 #define	XAT_ARCHIVE		((XAT0_INDEX << XVA_SHFT) | XAT0_ARCHIVE)
 #define	XAT_SYSTEM		((XAT0_INDEX << XVA_SHFT) | XAT0_SYSTEM)
 #define	XAT_READONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_READONLY)
 #define	XAT_HIDDEN		((XAT0_INDEX << XVA_SHFT) | XAT0_HIDDEN)
 #define	XAT_NOUNLINK		((XAT0_INDEX << XVA_SHFT) | XAT0_NOUNLINK)
 #define	XAT_IMMUTABLE		((XAT0_INDEX << XVA_SHFT) | XAT0_IMMUTABLE)
 #define	XAT_APPENDONLY		((XAT0_INDEX << XVA_SHFT) | XAT0_APPENDONLY)
 #define	XAT_NODUMP		((XAT0_INDEX << XVA_SHFT) | XAT0_NODUMP)
 #define	XAT_OPAQUE		((XAT0_INDEX << XVA_SHFT) | XAT0_OPAQUE)
 #define	XAT_AV_QUARANTINED	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_QUARANTINED)
 #define	XAT_AV_MODIFIED		((XAT0_INDEX << XVA_SHFT) | XAT0_AV_MODIFIED)
 #define	XAT_AV_SCANSTAMP	((XAT0_INDEX << XVA_SHFT) | XAT0_AV_SCANSTAMP)
 #define	XAT_REPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_REPARSE)
 #define	XAT_GEN			((XAT0_INDEX << XVA_SHFT) | XAT0_GEN)
 #define	XAT_OFFLINE		((XAT0_INDEX << XVA_SHFT) | XAT0_OFFLINE)
 #define	XAT_SPARSE		((XAT0_INDEX << XVA_SHFT) | XAT0_SPARSE)
 
 /*
  * The returned attribute map array (xva_rtnattrmap[]) is located past the
  * requested attribute map array (xva_reqattrmap[]).  Its location changes
  * when the array sizes change.  We use a separate pointer in a known location
  * (xva_rtnattrmapp) to hold the location of xva_rtnattrmap[].  This is
  * set in xva_init()
  */
 #define	XVA_RTNATTRMAP(xvap)	((xvap)->xva_rtnattrmapp)
 
 /*
  * XVA_SET_REQ() sets an attribute bit in the proper element in the bitmap
  * of requested attributes (xva_reqattrmap[]).
  */
 #define	XVA_SET_REQ(xvap, attr)					\
 	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
 	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
 	(xvap)->xva_reqattrmap[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
 /*
  * XVA_CLR_REQ() clears an attribute bit in the proper element in the bitmap
  * of requested attributes (xva_reqattrmap[]).
  */
 #define	XVA_CLR_REQ(xvap, attr)					\
 	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
 	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
 	(xvap)->xva_reqattrmap[XVA_INDEX(attr)] &= ~XVA_ATTRBIT(attr)
 
 /*
  * XVA_SET_RTN() sets an attribute bit in the proper element in the bitmap
  * of returned attributes (xva_rtnattrmap[]).
  */
 #define	XVA_SET_RTN(xvap, attr)					\
 	ASSERT((xvap)->xva_vattr.va_mask | AT_XVATTR);		\
 	ASSERT((xvap)->xva_magic == XVA_MAGIC);			\
 	(XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] |= XVA_ATTRBIT(attr)
 
 /*
  * XVA_ISSET_REQ() checks the requested attribute bitmap (xva_reqattrmap[])
  * to see of the corresponding attribute bit is set.  If so, returns non-zero.
  */
 #define	XVA_ISSET_REQ(xvap, attr)					\
 	((((xvap)->xva_vattr.va_mask | AT_XVATTR) &&			\
 		((xvap)->xva_magic == XVA_MAGIC) &&			\
 		((xvap)->xva_mapsize > XVA_INDEX(attr))) ?		\
 	((xvap)->xva_reqattrmap[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) :	0)
 
 /*
  * XVA_ISSET_RTN() checks the returned attribute bitmap (xva_rtnattrmap[])
  * to see of the corresponding attribute bit is set.  If so, returns non-zero.
  */
 #define	XVA_ISSET_RTN(xvap, attr)					\
 	((((xvap)->xva_vattr.va_mask | AT_XVATTR) &&			\
 		((xvap)->xva_magic == XVA_MAGIC) &&			\
 		((xvap)->xva_mapsize > XVA_INDEX(attr))) ?		\
 	((XVA_RTNATTRMAP(xvap))[XVA_INDEX(attr)] & XVA_ATTRBIT(attr)) : 0)
 
 /*
  *  Modes.  Some values same as S_xxx entries from stat.h for convenience.
  */
 #define	VSUID		04000		/* set user id on execution */
 #define	VSGID		02000		/* set group id on execution */
 #define	VSVTX		01000		/* save swapped text even after use */
 
 /*
  * Permissions.
  */
 #define	VREAD		00400
 #define	VWRITE		00200
 #define	VEXEC		00100
 
 #define	MODEMASK	07777		/* mode bits plus permission bits */
 #define	PERMMASK	00777		/* permission bits */
 
 /*
  * VOP_ACCESS flags
  */
 #define	V_ACE_MASK	0x1	/* mask represents  NFSv4 ACE permissions */
 #define	V_APPEND	0x2	/* want to do append only check */
 
 /*
  * Check whether mandatory file locking is enabled.
  */
 
 #define	MANDMODE(mode)		(((mode) & (VSGID|(VEXEC>>3))) == VSGID)
 #define	MANDLOCK(vp, mode)	((vp)->v_type == VREG && MANDMODE(mode))
 
 /*
  * Flags for vnode operations.
  */
 enum rm		{ RMFILE, RMDIRECTORY };	/* rm or rmdir (remove) */
 enum symfollow	{ NO_FOLLOW, FOLLOW };		/* follow symlinks (or not) */
 enum vcexcl	{ NONEXCL, EXCL };		/* (non)excl create */
 enum create	{ CRCREAT, CRMKNOD, CRMKDIR };	/* reason for create */
 
 typedef enum rm		rm_t;
 typedef enum symfollow	symfollow_t;
 typedef enum vcexcl	vcexcl_t;
 typedef enum create	create_t;
 
 /*
  * Vnode Events - Used by VOP_VNEVENT
  * The VE_PRE_RENAME_* events fire before the rename operation and are
  * primarily used for specialized applications, such as NFSv4 delegation, which
  * need to know about rename before it occurs.
  */
 typedef enum vnevent	{
 	VE_SUPPORT	= 0,	/* Query */
 	VE_RENAME_SRC	= 1,	/* Rename, with vnode as source */
 	VE_RENAME_DEST	= 2,	/* Rename, with vnode as target/destination */
 	VE_REMOVE	= 3,	/* Remove of vnode's name */
 	VE_RMDIR	= 4,	/* Remove of directory vnode's name */
 	VE_CREATE	= 5,	/* Create with vnode's name which exists */
 	VE_LINK		= 6, 	/* Link with vnode's name as source */
 	VE_RENAME_DEST_DIR	= 7, 	/* Rename with vnode as target dir */
 	VE_MOUNTEDOVER	= 8, 	/* File or Filesystem got mounted over vnode */
 	VE_TRUNCATE = 9,	/* Truncate */
 	VE_PRE_RENAME_SRC = 10,	/* Pre-rename, with vnode as source */
 	VE_PRE_RENAME_DEST = 11, /* Pre-rename, with vnode as target/dest. */
 	VE_PRE_RENAME_DEST_DIR = 12 /* Pre-rename with vnode as target dir */
 } vnevent_t;
 
 /*
  * Values for checking vnode open and map counts
  */
 enum v_mode { V_READ, V_WRITE, V_RDORWR, V_RDANDWR };
 
 typedef enum v_mode v_mode_t;
 
 #define	V_TRUE	1
 #define	V_FALSE	0
 
 /*
  * Structure used on VOP_GETSECATTR and VOP_SETSECATTR operations
  */
 
 typedef struct vsecattr {
 	uint_t		vsa_mask;	/* See below */
 	int		vsa_aclcnt;	/* ACL entry count */
 	void		*vsa_aclentp;	/* pointer to ACL entries */
 	int		vsa_dfaclcnt;	/* default ACL entry count */
 	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
 	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
 	uint_t		vsa_aclflags;	/* ACE ACL flags */
 } vsecattr_t;
 
 /* vsa_mask values */
 #define	VSA_ACL			0x0001
 #define	VSA_ACLCNT		0x0002
 #define	VSA_DFACL		0x0004
 #define	VSA_DFACLCNT		0x0008
 #define	VSA_ACE			0x0010
 #define	VSA_ACECNT		0x0020
 #define	VSA_ACE_ALLTYPES	0x0040
 #define	VSA_ACE_ACLFLAGS	0x0080	/* get/set ACE ACL flags */
 
 /*
  * Structure used by various vnode operations to determine
  * the context (pid, host, identity) of a caller.
  *
  * The cc_caller_id is used to identify one or more callers who invoke
  * operations, possibly on behalf of others.  For example, the NFS
  * server could have it's own cc_caller_id which can be detected by
  * vnode/vfs operations or (FEM) monitors on those operations.  New
  * caller IDs are generated by fs_new_caller_id().
  */
 typedef struct caller_context {
 	pid_t		cc_pid;		/* Process ID of the caller */
 	int		cc_sysid;	/* System ID, used for remote calls */
 	u_longlong_t	cc_caller_id;	/* Identifier for (set of) caller(s) */
 	ulong_t		cc_flags;
 } caller_context_t;
 
 /*
  * Flags for caller context.  The caller sets CC_DONTBLOCK if it does not
  * want to block inside of a FEM monitor.  The monitor will set CC_WOULDBLOCK
  * and return EAGAIN if the operation would have blocked.
  */
 #define	CC_WOULDBLOCK	0x01
 #define	CC_DONTBLOCK	0x02
 
 /*
  * Structure tags for function prototypes, defined elsewhere.
  */
 struct pathname;
 struct fid;
 struct flock64;
 struct flk_callback;
 struct shrlock;
 struct page;
 struct seg;
 struct as;
 struct pollhead;
 struct taskq;
 
 #ifdef	_KERNEL
 
 /*
  * VNODE_OPS defines all the vnode operations.  It is used to define
  * the vnodeops structure (below) and the fs_func_p union (vfs_opreg.h).
  */
 #define	VNODE_OPS							\
 	int	(*vop_open)(vnode_t **, int, cred_t *,			\
 				caller_context_t *);			\
 	int	(*vop_close)(vnode_t *, int, int, offset_t, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_read)(vnode_t *, uio_t *, int, cred_t *,		\
 				caller_context_t *);			\
 	int	(*vop_write)(vnode_t *, uio_t *, int, cred_t *,		\
 				caller_context_t *);			\
 	int	(*vop_ioctl)(vnode_t *, int, intptr_t, int, cred_t *,	\
 				int *, caller_context_t *);		\
 	int	(*vop_setfl)(vnode_t *, int, int, cred_t *,		\
 				caller_context_t *);			\
 	int	(*vop_getattr)(vnode_t *, vattr_t *, int, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_setattr)(vnode_t *, vattr_t *, int, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_access)(vnode_t *, int, int, cred_t *,		\
 				caller_context_t *);			\
 	int	(*vop_lookup)(vnode_t *, char *, vnode_t **,		\
 				struct pathname *,			\
 				int, vnode_t *, cred_t *,		\
 				caller_context_t *, int *,		\
 				struct pathname *);			\
 	int	(*vop_create)(vnode_t *, char *, vattr_t *, vcexcl_t,	\
 				int, vnode_t **, cred_t *, int,		\
 				caller_context_t *, vsecattr_t *);	\
 	int	(*vop_remove)(vnode_t *, char *, cred_t *,		\
 				caller_context_t *, int);		\
 	int	(*vop_link)(vnode_t *, vnode_t *, char *, cred_t *,	\
 				caller_context_t *, int);		\
 	int	(*vop_rename)(vnode_t *, char *, vnode_t *, char *,	\
 				cred_t *, caller_context_t *, int);	\
 	int	(*vop_mkdir)(vnode_t *, char *, vattr_t *, vnode_t **,	\
 				cred_t *, caller_context_t *, int,	\
 				vsecattr_t *);				\
 	int	(*vop_rmdir)(vnode_t *, char *, vnode_t *, cred_t *,	\
 				caller_context_t *, int);		\
 	int	(*vop_readdir)(vnode_t *, uio_t *, cred_t *, int *,	\
 				caller_context_t *, int);		\
 	int	(*vop_symlink)(vnode_t *, char *, vattr_t *, char *,	\
 				cred_t *, caller_context_t *, int);	\
 	int	(*vop_readlink)(vnode_t *, uio_t *, cred_t *,		\
 				caller_context_t *);			\
 	int	(*vop_fsync)(vnode_t *, int, cred_t *,			\
 				caller_context_t *);			\
 	void	(*vop_inactive)(vnode_t *, cred_t *,			\
 				caller_context_t *);			\
 	int	(*vop_fid)(vnode_t *, struct fid *,			\
 				caller_context_t *);			\
 	int	(*vop_rwlock)(vnode_t *, int, caller_context_t *);	\
 	void	(*vop_rwunlock)(vnode_t *, int, caller_context_t *);	\
 	int	(*vop_seek)(vnode_t *, offset_t, offset_t *,		\
 				caller_context_t *);			\
 	int	(*vop_cmp)(vnode_t *, vnode_t *, caller_context_t *);	\
 	int	(*vop_frlock)(vnode_t *, int, struct flock64 *,		\
 				int, offset_t,				\
 				struct flk_callback *, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_space)(vnode_t *, int, struct flock64 *,		\
 				int, offset_t,				\
 				cred_t *, caller_context_t *);		\
 	int	(*vop_realvp)(vnode_t *, vnode_t **,			\
 				caller_context_t *);			\
 	int	(*vop_getpage)(vnode_t *, offset_t, size_t, uint_t *,	\
 				struct page **, size_t, struct seg *,	\
 				caddr_t, enum seg_rw, cred_t *,		\
 				caller_context_t *);			\
 	int	(*vop_putpage)(vnode_t *, offset_t, size_t,		\
 				int, cred_t *, caller_context_t *);	\
 	int	(*vop_map)(vnode_t *, offset_t, struct as *,		\
 				caddr_t *, size_t,			\
 				uchar_t, uchar_t, uint_t, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_addmap)(vnode_t *, offset_t, struct as *,		\
 				caddr_t, size_t,			\
 				uchar_t, uchar_t, uint_t, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_delmap)(vnode_t *, offset_t, struct as *,		\
 				caddr_t, size_t,			\
 				uint_t, uint_t, uint_t, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_poll)(vnode_t *, short, int, short *,		\
 				struct pollhead **,			\
 				caller_context_t *);			\
 	int	(*vop_dump)(vnode_t *, caddr_t, offset_t, offset_t,	\
 				caller_context_t *);			\
 	int	(*vop_pathconf)(vnode_t *, int, ulong_t *, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_pageio)(vnode_t *, struct page *,			\
 				u_offset_t, size_t, int, cred_t *,	\
 				caller_context_t *);			\
 	int	(*vop_dumpctl)(vnode_t *, int, offset_t *,		\
 				caller_context_t *);			\
 	void	(*vop_dispose)(vnode_t *, struct page *,		\
 				int, int, cred_t *,			\
 				caller_context_t *);			\
 	int	(*vop_setsecattr)(vnode_t *, vsecattr_t *,		\
 				int, cred_t *, caller_context_t *);	\
 	int	(*vop_getsecattr)(vnode_t *, vsecattr_t *,		\
 				int, cred_t *, caller_context_t *);	\
 	int	(*vop_shrlock)(vnode_t *, int, struct shrlock *,	\
 				int, cred_t *, caller_context_t *);	\
 	int	(*vop_vnevent)(vnode_t *, vnevent_t, vnode_t *,		\
 				char *, caller_context_t *);		\
 	int	(*vop_reqzcbuf)(vnode_t *, enum uio_rw, xuio_t *,	\
 				cred_t *, caller_context_t *);		\
 	int	(*vop_retzcbuf)(vnode_t *, xuio_t *, cred_t *,		\
 				caller_context_t *)
 	/* NB: No ";" */
 
 /*
  * Operations on vnodes.  Note: File systems must never operate directly
  * on a 'vnodeops' structure -- it WILL change in future releases!  They
  * must use vn_make_ops() to create the structure.
  */
 typedef struct vnodeops {
 	const char *vnop_name;
 	VNODE_OPS;	/* Signatures of all vnode operations (vops) */
 } vnodeops_t;
 
 typedef int (*fs_generic_func_p) ();	/* Generic vop/vfsop/femop/fsemop ptr */
 
 extern int	fop_open(vnode_t **, int, cred_t *, caller_context_t *);
 extern int	fop_close(vnode_t *, int, int, offset_t, cred_t *,
 				caller_context_t *);
 extern int	fop_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
 extern int	fop_write(vnode_t *, uio_t *, int, cred_t *,
 				caller_context_t *);
 extern int	fop_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *,
 				caller_context_t *);
 extern int	fop_setfl(vnode_t *, int, int, cred_t *, caller_context_t *);
 extern int	fop_getattr(vnode_t *, vattr_t *, int, cred_t *,
 				caller_context_t *);
 extern int	fop_setattr(vnode_t *, vattr_t *, int, cred_t *,
 				caller_context_t *);
 extern int	fop_access(vnode_t *, int, int, cred_t *, caller_context_t *);
 extern int	fop_lookup(vnode_t *, char *, vnode_t **, struct pathname *,
 				int, vnode_t *, cred_t *, caller_context_t *,
 				int *, struct pathname *);
 extern int	fop_create(vnode_t *, char *, vattr_t *, vcexcl_t, int,
 				vnode_t **, cred_t *, int, caller_context_t *,
 				vsecattr_t *);
 extern int	fop_remove(vnode_t *vp, char *, cred_t *, caller_context_t *,
 				int);
 extern int	fop_link(vnode_t *, vnode_t *, char *, cred_t *,
 				caller_context_t *, int);
 extern int	fop_rename(vnode_t *, char *, vnode_t *, char *, cred_t *,
 				caller_context_t *, int);
 extern int	fop_mkdir(vnode_t *, char *, vattr_t *, vnode_t **, cred_t *,
 				caller_context_t *, int, vsecattr_t *);
 extern int	fop_rmdir(vnode_t *, char *, vnode_t *, cred_t *,
 				caller_context_t *, int);
 extern int	fop_readdir(vnode_t *, uio_t *, cred_t *, int *,
 				caller_context_t *, int);
 extern int	fop_symlink(vnode_t *, char *, vattr_t *, char *, cred_t *,
 				caller_context_t *, int);
 extern int	fop_readlink(vnode_t *, uio_t *, cred_t *, caller_context_t *);
 extern int	fop_fsync(vnode_t *, int, cred_t *, caller_context_t *);
 extern void	fop_inactive(vnode_t *, cred_t *, caller_context_t *);
 extern int	fop_fid(vnode_t *, struct fid *, caller_context_t *);
 extern int	fop_rwlock(vnode_t *, int, caller_context_t *);
 extern void	fop_rwunlock(vnode_t *, int, caller_context_t *);
 extern int	fop_seek(vnode_t *, offset_t, offset_t *, caller_context_t *);
 extern int	fop_cmp(vnode_t *, vnode_t *, caller_context_t *);
 extern int	fop_frlock(vnode_t *, int, struct flock64 *, int, offset_t,
 				struct flk_callback *, cred_t *,
 				caller_context_t *);
 extern int	fop_space(vnode_t *, int, struct flock64 *, int, offset_t,
 				cred_t *, caller_context_t *);
 extern int	fop_realvp(vnode_t *, vnode_t **, caller_context_t *);
 extern int	fop_getpage(vnode_t *, offset_t, size_t, uint_t *,
 				struct page **, size_t, struct seg *,
 				caddr_t, enum seg_rw, cred_t *,
 				caller_context_t *);
 extern int	fop_putpage(vnode_t *, offset_t, size_t, int, cred_t *,
 				caller_context_t *);
 extern int	fop_map(vnode_t *, offset_t, struct as *, caddr_t *, size_t,
 				uchar_t, uchar_t, uint_t, cred_t *cr,
 				caller_context_t *);
 extern int	fop_addmap(vnode_t *, offset_t, struct as *, caddr_t, size_t,
 				uchar_t, uchar_t, uint_t, cred_t *,
 				caller_context_t *);
 extern int	fop_delmap(vnode_t *, offset_t, struct as *, caddr_t, size_t,
 				uint_t, uint_t, uint_t, cred_t *,
 				caller_context_t *);
 extern int	fop_poll(vnode_t *, short, int, short *, struct pollhead **,
 				caller_context_t *);
 extern int	fop_dump(vnode_t *, caddr_t, offset_t, offset_t,
     caller_context_t *);
 extern int	fop_pathconf(vnode_t *, int, ulong_t *, cred_t *,
 				caller_context_t *);
 extern int	fop_pageio(vnode_t *, struct page *, u_offset_t, size_t, int,
 				cred_t *, caller_context_t *);
 extern int	fop_dumpctl(vnode_t *, int, offset_t *, caller_context_t *);
 extern void	fop_dispose(vnode_t *, struct page *, int, int, cred_t *,
 				caller_context_t *);
 extern int	fop_setsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
 				caller_context_t *);
 extern int	fop_getsecattr(vnode_t *, vsecattr_t *, int, cred_t *,
 				caller_context_t *);
 extern int	fop_shrlock(vnode_t *, int, struct shrlock *, int, cred_t *,
 				caller_context_t *);
 extern int	fop_vnevent(vnode_t *, vnevent_t, vnode_t *, char *,
 				caller_context_t *);
 extern int	fop_reqzcbuf(vnode_t *, enum uio_rw, xuio_t *, cred_t *,
 				caller_context_t *);
 extern int	fop_retzcbuf(vnode_t *, xuio_t *, cred_t *, caller_context_t *);
 
 #endif	/* _KERNEL */
 
 #define	VOP_OPEN(vpp, mode, cr, ct) \
 	fop_open(vpp, mode, cr, ct)
 #define	VOP_CLOSE(vp, f, c, o, cr, ct) \
 	fop_close(vp, f, c, o, cr, ct)
 #define	VOP_READ(vp, uiop, iof, cr, ct) \
 	fop_read(vp, uiop, iof, cr, ct)
 #define	VOP_WRITE(vp, uiop, iof, cr, ct) \
 	fop_write(vp, uiop, iof, cr, ct)
 #define	VOP_IOCTL(vp, cmd, a, f, cr, rvp, ct) \
 	fop_ioctl(vp, cmd, a, f, cr, rvp, ct)
 #define	VOP_SETFL(vp, f, a, cr, ct) \
 	fop_setfl(vp, f, a, cr, ct)
 #define	VOP_GETATTR(vp, vap, f, cr, ct) \
 	fop_getattr(vp, vap, f, cr, ct)
 #define	VOP_SETATTR(vp, vap, f, cr, ct) \
 	fop_setattr(vp, vap, f, cr, ct)
 #define	VOP_ACCESS(vp, mode, f, cr, ct) \
 	fop_access(vp, mode, f, cr, ct)
 #define	VOP_LOOKUP(vp, cp, vpp, pnp, f, rdir, cr, ct, defp, rpnp) \
 	fop_lookup(vp, cp, vpp, pnp, f, rdir, cr, ct, defp, rpnp)
 #define	VOP_CREATE(dvp, p, vap, ex, mode, vpp, cr, flag, ct, vsap) \
 	fop_create(dvp, p, vap, ex, mode, vpp, cr, flag, ct, vsap)
 #define	VOP_REMOVE(dvp, p, cr, ct, f) \
 	fop_remove(dvp, p, cr, ct, f)
 #define	VOP_LINK(tdvp, fvp, p, cr, ct, f) \
 	fop_link(tdvp, fvp, p, cr, ct, f)
 #define	VOP_RENAME(fvp, fnm, tdvp, tnm, cr, ct, f) \
 	fop_rename(fvp, fnm, tdvp, tnm, cr, ct, f)
 #define	VOP_MKDIR(dp, p, vap, vpp, cr, ct, f, vsap) \
 	fop_mkdir(dp, p, vap, vpp, cr, ct, f, vsap)
 #define	VOP_RMDIR(dp, p, cdir, cr, ct, f) \
 	fop_rmdir(dp, p, cdir, cr, ct, f)
 #define	VOP_READDIR(vp, uiop, cr, eofp, ct, f) \
 	fop_readdir(vp, uiop, cr, eofp, ct, f)
 #define	VOP_SYMLINK(dvp, lnm, vap, tnm, cr, ct, f) \
 	fop_symlink(dvp, lnm, vap, tnm, cr, ct, f)
 #define	VOP_READLINK(vp, uiop, cr, ct) \
 	fop_readlink(vp, uiop, cr, ct)
 #define	VOP_FSYNC(vp, syncflag, cr, ct) \
 	fop_fsync(vp, syncflag, cr, ct)
 #define	VOP_INACTIVE(vp, cr, ct) \
 	fop_inactive(vp, cr, ct)
 #define	VOP_FID(vp, fidp, ct) \
 	fop_fid(vp, fidp, ct)
 #define	VOP_RWLOCK(vp, w, ct) \
 	fop_rwlock(vp, w, ct)
 #define	VOP_RWUNLOCK(vp, w, ct) \
 	fop_rwunlock(vp, w, ct)
 #define	VOP_SEEK(vp, ooff, noffp, ct) \
 	fop_seek(vp, ooff, noffp, ct)
 #define	VOP_CMP(vp1, vp2, ct) \
 	fop_cmp(vp1, vp2, ct)
 #define	VOP_FRLOCK(vp, cmd, a, f, o, cb, cr, ct) \
 	fop_frlock(vp, cmd, a, f, o, cb, cr, ct)
 #define	VOP_SPACE(vp, cmd, a, f, o, cr, ct) \
 	fop_space(vp, cmd, a, f, o, cr, ct)
 #define	VOP_REALVP(vp1, vp2, ct) \
 	fop_realvp(vp1, vp2, ct)
 #define	VOP_GETPAGE(vp, of, sz, pr, pl, ps, sg, a, rw, cr, ct) \
 	fop_getpage(vp, of, sz, pr, pl, ps, sg, a, rw, cr, ct)
 #define	VOP_PUTPAGE(vp, of, sz, fl, cr, ct) \
 	fop_putpage(vp, of, sz, fl, cr, ct)
 #define	VOP_MAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
 	fop_map(vp, of, as, a, sz, p, mp, fl, cr, ct)
 #define	VOP_ADDMAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
 	fop_addmap(vp, of, as, a, sz, p, mp, fl, cr, ct)
 #define	VOP_DELMAP(vp, of, as, a, sz, p, mp, fl, cr, ct) \
 	fop_delmap(vp, of, as, a, sz, p, mp, fl, cr, ct)
 #define	VOP_POLL(vp, events, anyyet, reventsp, phpp, ct) \
 	fop_poll(vp, events, anyyet, reventsp, phpp, ct)
 #define	VOP_DUMP(vp, addr, bn, count, ct) \
 	fop_dump(vp, addr, bn, count, ct)
 #define	VOP_PATHCONF(vp, cmd, valp, cr, ct) \
 	fop_pathconf(vp, cmd, valp, cr, ct)
 #define	VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct) \
 	fop_pageio(vp, pp, io_off, io_len, flags, cr, ct)
 #define	VOP_DUMPCTL(vp, action, blkp, ct) \
 	fop_dumpctl(vp, action, blkp, ct)
 #define	VOP_DISPOSE(vp, pp, flag, dn, cr, ct) \
 	fop_dispose(vp, pp, flag, dn, cr, ct)
 #define	VOP_GETSECATTR(vp, vsap, f, cr, ct) \
 	fop_getsecattr(vp, vsap, f, cr, ct)
 #define	VOP_SETSECATTR(vp, vsap, f, cr, ct) \
 	fop_setsecattr(vp, vsap, f, cr, ct)
 #define	VOP_SHRLOCK(vp, cmd, shr, f, cr, ct) \
 	fop_shrlock(vp, cmd, shr, f, cr, ct)
 #define	VOP_VNEVENT(vp, vnevent, dvp, fnm, ct) \
 	fop_vnevent(vp, vnevent, dvp, fnm, ct)
 #define	VOP_REQZCBUF(vp, rwflag, xuiop, cr, ct) \
 	fop_reqzcbuf(vp, rwflag, xuiop, cr, ct)
 #define	VOP_RETZCBUF(vp, xuiop, cr, ct) \
 	fop_retzcbuf(vp, xuiop, cr, ct)
 
 #define	VOPNAME_OPEN		"open"
 #define	VOPNAME_CLOSE		"close"
 #define	VOPNAME_READ		"read"
 #define	VOPNAME_WRITE		"write"
 #define	VOPNAME_IOCTL		"ioctl"
 #define	VOPNAME_SETFL		"setfl"
 #define	VOPNAME_GETATTR		"getattr"
 #define	VOPNAME_SETATTR		"setattr"
 #define	VOPNAME_ACCESS		"access"
 #define	VOPNAME_LOOKUP		"lookup"
 #define	VOPNAME_CREATE		"create"
 #define	VOPNAME_REMOVE		"remove"
 #define	VOPNAME_LINK		"link"
 #define	VOPNAME_RENAME		"rename"
 #define	VOPNAME_MKDIR		"mkdir"
 #define	VOPNAME_RMDIR		"rmdir"
 #define	VOPNAME_READDIR		"readdir"
 #define	VOPNAME_SYMLINK		"symlink"
 #define	VOPNAME_READLINK	"readlink"
 #define	VOPNAME_FSYNC		"fsync"
 #define	VOPNAME_INACTIVE	"inactive"
 #define	VOPNAME_FID		"fid"
 #define	VOPNAME_RWLOCK		"rwlock"
 #define	VOPNAME_RWUNLOCK	"rwunlock"
 #define	VOPNAME_SEEK		"seek"
 #define	VOPNAME_CMP		"cmp"
 #define	VOPNAME_FRLOCK		"frlock"
 #define	VOPNAME_SPACE		"space"
 #define	VOPNAME_REALVP		"realvp"
 #define	VOPNAME_GETPAGE		"getpage"
 #define	VOPNAME_PUTPAGE		"putpage"
 #define	VOPNAME_MAP		"map"
 #define	VOPNAME_ADDMAP		"addmap"
 #define	VOPNAME_DELMAP		"delmap"
 #define	VOPNAME_POLL		"poll"
 #define	VOPNAME_DUMP		"dump"
 #define	VOPNAME_PATHCONF	"pathconf"
 #define	VOPNAME_PAGEIO		"pageio"
 #define	VOPNAME_DUMPCTL		"dumpctl"
 #define	VOPNAME_DISPOSE		"dispose"
 #define	VOPNAME_GETSECATTR	"getsecattr"
 #define	VOPNAME_SETSECATTR	"setsecattr"
 #define	VOPNAME_SHRLOCK		"shrlock"
 #define	VOPNAME_VNEVENT		"vnevent"
 #define	VOPNAME_REQZCBUF	"reqzcbuf"
 #define	VOPNAME_RETZCBUF	"retzcbuf"
 
 /*
  * Flags for VOP_LOOKUP
  *
  * Defined in file.h, but also possible, FIGNORECASE and FSEARCH
  *
  */
 #define	LOOKUP_DIR		0x01	/* want parent dir vp */
 #define	LOOKUP_XATTR		0x02	/* lookup up extended attr dir */
 #define	CREATE_XATTR_DIR	0x04	/* Create extended attr dir */
 #define	LOOKUP_HAVE_SYSATTR_DIR	0x08	/* Already created virtual GFS dir */
 
 /*
  * Flags for VOP_READDIR
  */
 #define	V_RDDIR_ENTFLAGS	0x01	/* request dirent flags */
 #define	V_RDDIR_ACCFILTER	0x02	/* filter out inaccessible dirents */
 
 /*
  * Flags for VOP_RWLOCK/VOP_RWUNLOCK
  * VOP_RWLOCK will return the flag that was actually set, or -1 if none.
  */
 #define	V_WRITELOCK_TRUE	(1)	/* Request write-lock on the vnode */
 #define	V_WRITELOCK_FALSE	(0)	/* Request read-lock on the vnode */
 
 /*
  * Flags for VOP_DUMPCTL
  */
 #define	DUMP_ALLOC	0
 #define	DUMP_FREE	1
 #define	DUMP_SCAN	2
 
 /*
  * Public vnode manipulation functions.
  */
 #ifdef	_KERNEL
 
 vnode_t *vn_alloc(int);
 void	vn_reinit(vnode_t *);
 void	vn_recycle(vnode_t *);
 void	vn_free(vnode_t *);
 
 int	vn_is_readonly(vnode_t *);
 int   	vn_is_opened(vnode_t *, v_mode_t);
 int   	vn_is_mapped(vnode_t *, v_mode_t);
 int   	vn_has_other_opens(vnode_t *, v_mode_t);
 void	vn_open_upgrade(vnode_t *, int);
 void	vn_open_downgrade(vnode_t *, int);
 
 int	vn_can_change_zones(vnode_t *vp);
 
 int	vn_has_flocks(vnode_t *);
 int	vn_has_mandatory_locks(vnode_t *, int);
 int	vn_has_cached_data(vnode_t *);
 
 void	vn_setops(vnode_t *, vnodeops_t *);
 vnodeops_t *vn_getops(vnode_t *);
 int	vn_matchops(vnode_t *, vnodeops_t *);
 int	vn_matchopval(vnode_t *, char *, fs_generic_func_p);
 int	vn_ismntpt(vnode_t *);
 
 struct vfs *vn_mountedvfs(vnode_t *);
 
 int	vn_in_dnlc(vnode_t *);
 
 void	vn_create_cache(void);
 void	vn_destroy_cache(void);
 
 void	vn_freevnodeops(vnodeops_t *);
 
 int	vn_open(char *pnamep, enum uio_seg seg, int filemode, int createmode,
 		struct vnode **vpp, enum create crwhy, mode_t umask);
 int	vn_openat(char *pnamep, enum uio_seg seg, int filemode, int createmode,
 		struct vnode **vpp, enum create crwhy,
 		mode_t umask, struct vnode *startvp, int fd);
 int	vn_create(char *pnamep, enum uio_seg seg, struct vattr *vap,
 		enum vcexcl excl, int mode, struct vnode **vpp,
 		enum create why, int flag, mode_t umask);
 int	vn_createat(char *pnamep, enum uio_seg seg, struct vattr *vap,
 		enum vcexcl excl, int mode, struct vnode **vpp,
 		enum create why, int flag, mode_t umask, struct vnode *startvp);
 int	vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len,
 		offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit,
 		cred_t *cr, ssize_t *residp);
 void	vn_rele(struct vnode *vp);
 void	vn_rele_async(struct vnode *vp, struct taskq *taskq);
 void	vn_rele_dnlc(struct vnode *vp);
 void	vn_rele_stream(struct vnode *vp);
 int	vn_link(char *from, char *to, enum uio_seg seg);
 int	vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
 		vnode_t *tstartvp, char *to, enum uio_seg seg);
 int	vn_rename(char *from, char *to, enum uio_seg seg);
 int	vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp, char *tname,
 		enum uio_seg seg);
 int	vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag);
 int	vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg,
 		enum rm dirflag);
 int	vn_compare(vnode_t *vp1, vnode_t *vp2);
 int	vn_vfswlock(struct vnode *vp);
 int	vn_vfswlock_wait(struct vnode *vp);
 int	vn_vfsrlock(struct vnode *vp);
 int	vn_vfsrlock_wait(struct vnode *vp);
 void	vn_vfsunlock(struct vnode *vp);
 int	vn_vfswlock_held(struct vnode *vp);
 vnode_t *specvp(struct vnode *vp, dev_t dev, vtype_t type, struct cred *cr);
 vnode_t *makespecvp(dev_t dev, vtype_t type);
 vn_vfslocks_entry_t *vn_vfslocks_getlock(void *);
 void	vn_vfslocks_rele(vn_vfslocks_entry_t *);
 boolean_t vn_is_reparse(vnode_t *, cred_t *, caller_context_t *);
 
 void vn_copypath(struct vnode *src, struct vnode *dst);
 void vn_setpath_str(struct vnode *vp, const char *str, size_t len);
 void vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
     const char *path, size_t plen);
 void vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len);
 
 /* Private vnode manipulation functions */
 void vn_clearpath(vnode_t *, hrtime_t);
 void vn_updatepath(vnode_t *, vnode_t *, const char *);
 
 
 /* Vnode event notification */
 void	vnevent_rename_src(vnode_t *, vnode_t *, char *, caller_context_t *);
 void	vnevent_rename_dest(vnode_t *, vnode_t *, char *, caller_context_t *);
 void	vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *);
 void	vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *);
 void	vnevent_create(vnode_t *, caller_context_t *);
 void	vnevent_link(vnode_t *, caller_context_t *);
 void	vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct);
 void	vnevent_mountedover(vnode_t *, caller_context_t *);
 void	vnevent_truncate(vnode_t *, caller_context_t *);
 int	vnevent_support(vnode_t *, caller_context_t *);
 void	vnevent_pre_rename_src(vnode_t *, vnode_t *, char *,
 	    caller_context_t *);
 void	vnevent_pre_rename_dest(vnode_t *, vnode_t *, char *,
 	    caller_context_t *);
 void	vnevent_pre_rename_dest_dir(vnode_t *, vnode_t *, char *,
 	    caller_context_t *);
 
 /* Vnode specific data */
 void vsd_create(uint_t *, void (*)(void *));
 void vsd_destroy(uint_t *);
 void *vsd_get(vnode_t *, uint_t);
 int vsd_set(vnode_t *, uint_t, void *);
 void vsd_free(vnode_t *);
 
 /*
  * Extensible vnode attribute (xva) routines:
  * xva_init() initializes an xvattr_t (zero struct, init mapsize, set AT_XATTR)
  * xva_getxoptattr() returns a ponter to the xoptattr_t section of xvattr_t
  */
 void		xva_init(xvattr_t *);
 xoptattr_t	*xva_getxoptattr(xvattr_t *);	/* Get ptr to xoptattr_t */
 
 void xattr_init(void);		/* Initialize vnodeops for xattrs */
 
 /* GFS tunnel for xattrs */
 int xattr_dir_lookup(vnode_t *, vnode_t **, int, cred_t *);
 
 /* Reparse Point */
 void reparse_point_init(void);
 
 /* Context identification */
 u_longlong_t	fs_new_caller_id();
 
 int	vn_vmpss_usepageio(vnode_t *);
 
 /* Empty v_path placeholder */
 extern char *vn_vpath_empty;
 
 /*
  * Needed for use of IS_VMODSORT() in kernel.
  */
 extern uint_t pvn_vmodsort_supported;
 
 /*
  * All changes to v_count should be done through VN_HOLD() or VN_RELE(), or
  * one of their variants. This makes it possible to ensure proper locking,
  * and to guarantee that all modifications are accompanied by a firing of
  * the vn-hold or vn-rele SDT DTrace probe.
  *
  * Example DTrace command for tracing vnode references using these probes:
  *
  * dtrace -q -n 'sdt:::vn-hold,sdt:::vn-rele
  * {
  *	this->vp = (vnode_t *)arg0;
  *	printf("%s %s(%p[%s]) %d\n", execname, probename, this->vp,
  *	    this->vp->v_path == NULL ? "NULL" : stringof(this->vp->v_path),
  *	    this->vp->v_count)
  * }'
  */
 #define	VN_HOLD_LOCKED(vp) {			\
 	ASSERT(mutex_owned(&(vp)->v_lock));	\
 	(vp)->v_count++;			\
 	DTRACE_PROBE1(vn__hold, vnode_t *, vp);	\
 }
 
 #define	VN_HOLD(vp)	{		\
 	mutex_enter(&(vp)->v_lock);	\
 	VN_HOLD_LOCKED(vp);		\
 	mutex_exit(&(vp)->v_lock);	\
 }
 
 #define	VN_RELE(vp)	{ \
 	vn_rele(vp); \
 }
 
 #define	VN_RELE_ASYNC(vp, taskq)	{ \
 	vn_rele_async(vp, taskq); \
 }
 
 #define	VN_RELE_LOCKED(vp) {			\
 	ASSERT(mutex_owned(&(vp)->v_lock));	\
 	ASSERT((vp)->v_count >= 1);		\
 	(vp)->v_count--;			\
 	DTRACE_PROBE1(vn__rele, vnode_t *, vp);	\
 }
 
 #define	VN_SET_VFS_TYPE_DEV(vp, vfsp, type, dev)	{ \
 	(vp)->v_vfsp = (vfsp); \
 	(vp)->v_type = (type); \
 	(vp)->v_rdev = (dev); \
 }
 
 /*
  * Compare two vnodes for equality.  In general this macro should be used
  * in preference to calling VOP_CMP directly.
  */
 #define	VN_CMP(VP1, VP2)	((VP1) == (VP2) ? 1 : 	\
 	((VP1) && (VP2) && (vn_getops(VP1) == vn_getops(VP2)) ? \
 	VOP_CMP(VP1, VP2, NULL) : 0))
 
 /*
  * Some well-known global vnodes used by the VM system to name pages.
  */
 extern struct vnode kvps[];
 
 typedef enum {
 	KV_KVP,		/* vnode for all segkmem pages */
 	KV_ZVP,		/* vnode for all ZFS pages */
 #if defined(__sparc)
 	KV_MPVP,	/* vnode for all page_t meta-pages */
 	KV_PROMVP,	/* vnode for all PROM pages */
 #endif	/* __sparc */
 	KV_MAX		/* total number of vnodes in kvps[] */
 } kvps_index_t;
 
 #define	VN_ISKAS(vp)	((vp) >= &kvps[0] && (vp) < &kvps[KV_MAX])
 
 #endif	/* _KERNEL */
 
 /*
  * Flags to VOP_SETATTR/VOP_GETATTR.
  */
 #define	ATTR_UTIME	0x01	/* non-default utime(2) request */
 #define	ATTR_EXEC	0x02	/* invocation from exec(2) */
 #define	ATTR_COMM	0x04	/* yield common vp attributes */
 #define	ATTR_HINT	0x08	/* information returned will be `hint' */
 #define	ATTR_REAL	0x10	/* yield attributes of the real vp */
 #define	ATTR_NOACLCHECK	0x20	/* Don't check ACL when checking permissions */
 #define	ATTR_TRIGGER	0x40	/* Mount first if vnode is a trigger mount */
 /*
  * Generally useful macros.
  */
 #define	VBSIZE(vp)	((vp)->v_vfsp->vfs_bsize)
 
 #define	VTOZONE(vp)	((vp)->v_vfsp->vfs_zone)
 
 #define	NULLVP		((struct vnode *)0)
 #define	NULLVPP		((struct vnode **)0)
 
 #ifdef	_KERNEL
 
 /*
  * Structure used while handling asynchronous VOP_PUTPAGE operations.
  */
 struct async_reqs {
 	struct async_reqs *a_next;	/* pointer to next arg struct */
 	struct vnode *a_vp;		/* vnode pointer */
 	u_offset_t a_off;			/* offset in file */
 	uint_t a_len;			/* size of i/o request */
 	int a_flags;			/* flags to indicate operation type */
 	struct cred *a_cred;		/* cred pointer	*/
 	ushort_t a_prealloced;		/* set if struct is pre-allocated */
 };
 
 /*
  * VN_DISPOSE() -- given a page pointer, safely invoke VOP_DISPOSE().
  * Note that there is no guarantee that the page passed in will be
  * freed.  If that is required, then a check after calling VN_DISPOSE would
  * be necessary to ensure the page was freed.
  */
 #define	VN_DISPOSE(pp, flag, dn, cr)	{ \
 	if ((pp)->p_vnode != NULL && !VN_ISKAS((pp)->p_vnode)) \
 		VOP_DISPOSE((pp)->p_vnode, (pp), (flag), (dn), (cr), NULL); \
 	else if ((flag) == B_FREE) \
 		page_free((pp), (dn)); \
 	else \
 		page_destroy((pp), (dn)); \
 	}
 
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_VNODE_H */