diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
index 82491ad5f2f3..060498ae2c50 100644
--- a/cmd/zdb/zdb.c
+++ b/cmd/zdb/zdb.c
@@ -1,3355 +1,3386 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <stdio.h>
 #include <stdio_ext.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/dmu.h>
 #include <sys/zap.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_sa.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/metaslab_impl.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_pool.h>
 #include <sys/dbuf.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
 #include <sys/stat.h>
 #include <sys/resource.h>
 #include <sys/dmu_traverse.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/zfs_fuid.h>
 #include <sys/arc.h>
 #include <sys/ddt.h>
 #include <sys/zfeature.h>
+#include <zfs_comutil.h>
 #undef ZFS_MAXNAMELEN
 #include <libzfs.h>
 
 #define	ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?	\
 	zio_compress_table[(idx)].ci_name : "UNKNOWN")
 #define	ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?	\
 	zio_checksum_table[(idx)].ci_name : "UNKNOWN")
 #define	ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ?	\
 	dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ?	\
 	dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
 #define	ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :		\
 	(((idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA) ?	\
 	DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES))
 
 #ifndef lint
 extern int zfs_recover;
 #else
 int zfs_recover;
 #endif
 
 const char cmdname[] = "zdb";
 uint8_t dump_opt[256];
 
 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
 
 extern void dump_intent_log(zilog_t *);
 uint64_t *zopt_object = NULL;
 int zopt_objects = 0;
 libzfs_handle_t *g_zfs;
 uint64_t max_inflight = 200;
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
  * debugging facilities.
  */
 const char *
 _umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 
 static void
 usage(void)
 {
 	(void) fprintf(stderr,
 	    "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
 	    "[-U config] [-M inflight I/Os] poolname [object...]\n"
 	    "       %s [-divPA] [-e -p path...] [-U config] dataset "
 	    "[object...]\n"
 	    "       %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
 	    "poolname [vdev [metaslab...]]\n"
 	    "       %s -R [-A] [-e [-p path...]] poolname "
 	    "vdev:offset:size[:flags]\n"
 	    "       %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
 	    "       %s -l [-uA] device\n"
 	    "       %s -C [-A] [-U config]\n\n",
 	    cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
 
 	(void) fprintf(stderr, "    Dataset name must include at least one "
 	    "separator character '/' or '@'\n");
 	(void) fprintf(stderr, "    If dataset name is specified, only that "
 	    "dataset is dumped\n");
 	(void) fprintf(stderr, "    If object numbers are specified, only "
 	    "those objects are dumped\n\n");
 	(void) fprintf(stderr, "    Options to control amount of output:\n");
 	(void) fprintf(stderr, "        -u uberblock\n");
 	(void) fprintf(stderr, "        -d dataset(s)\n");
 	(void) fprintf(stderr, "        -i intent logs\n");
 	(void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
 	(void) fprintf(stderr, "        -h pool history\n");
 	(void) fprintf(stderr, "        -b block statistics\n");
 	(void) fprintf(stderr, "        -m metaslabs\n");
 	(void) fprintf(stderr, "        -c checksum all metadata (twice for "
 	    "all data) blocks\n");
 	(void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
 	(void) fprintf(stderr, "        -D dedup statistics\n");
 	(void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
 	(void) fprintf(stderr, "        -v verbose (applies to all others)\n");
 	(void) fprintf(stderr, "        -l dump label contents\n");
 	(void) fprintf(stderr, "        -L disable leak tracking (do not "
 	    "load spacemaps)\n");
 	(void) fprintf(stderr, "        -R read and display block from a "
 	    "device\n\n");
 	(void) fprintf(stderr, "    Below options are intended for use "
 	    "with other options (except -l):\n");
 	(void) fprintf(stderr, "        -A ignore assertions (-A), enable "
 	    "panic recovery (-AA) or both (-AAA)\n");
 	(void) fprintf(stderr, "        -F attempt automatic rewind within "
 	    "safe range of transaction groups\n");
 	(void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
 	    "cachefile\n");
 	(void) fprintf(stderr, "        -X attempt extreme rewind (does not "
 	    "work with dataset)\n");
 	(void) fprintf(stderr, "        -e pool is exported/destroyed/"
 	    "has altroot/not in a cachefile\n");
 	(void) fprintf(stderr, "        -p <path> -- use one or more with "
 	    "-e to specify path to vdev dir\n");
 	(void) fprintf(stderr, "        -P print numbers in parseable form\n");
 	(void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
 	    "searching for uberblocks\n");
 	(void) fprintf(stderr, "        -M <number of inflight I/Os> -- "
 	    "specify the maximum number of checksumming I/Os [default is 200]\n");
 	(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
 	    "to make only that option verbose\n");
 	(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
 	exit(1);
 }
 
 /*
  * Called for usage errors that are discovered after a call to spa_open(),
  * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
  */
 
 static void
 fatal(const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	(void) fprintf(stderr, "%s: ", cmdname);
 	(void) vfprintf(stderr, fmt, ap);
 	va_end(ap);
 	(void) fprintf(stderr, "\n");
 
 	exit(1);
 }
 
 /* ARGSUSED */
 static void
 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	nvlist_t *nv;
 	size_t nvsize = *(uint64_t *)data;
 	char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
 
 	VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
 
 	VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
 
 	umem_free(packed, nvsize);
 
 	dump_nvlist(nv, 8);
 
 	nvlist_free(nv);
 }
 
+/* ARGSUSED */
+static void
+dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
+{
+	spa_history_phys_t *shp = data;
+
+	if (shp == NULL)
+		return;
+
+	(void) printf("\t\tpool_create_len = %llu\n",
+	    (u_longlong_t)shp->sh_pool_create_len);
+	(void) printf("\t\tphys_max_off = %llu\n",
+	    (u_longlong_t)shp->sh_phys_max_off);
+	(void) printf("\t\tbof = %llu\n",
+	    (u_longlong_t)shp->sh_bof);
+	(void) printf("\t\teof = %llu\n",
+	    (u_longlong_t)shp->sh_eof);
+	(void) printf("\t\trecords_lost = %llu\n",
+	    (u_longlong_t)shp->sh_records_lost);
+}
+
 static void
 zdb_nicenum(uint64_t num, char *buf)
 {
 	if (dump_opt['P'])
 		(void) sprintf(buf, "%llu", (longlong_t)num);
 	else
 		nicenum(num, buf);
 }
 
 const char dump_zap_stars[] = "****************************************";
 const int dump_zap_width = sizeof (dump_zap_stars) - 1;
 
 static void
 dump_zap_histogram(uint64_t histo[ZAP_HISTOGRAM_SIZE])
 {
 	int i;
 	int minidx = ZAP_HISTOGRAM_SIZE - 1;
 	int maxidx = 0;
 	uint64_t max = 0;
 
 	for (i = 0; i < ZAP_HISTOGRAM_SIZE; i++) {
 		if (histo[i] > max)
 			max = histo[i];
 		if (histo[i] > 0 && i > maxidx)
 			maxidx = i;
 		if (histo[i] > 0 && i < minidx)
 			minidx = i;
 	}
 
 	if (max < dump_zap_width)
 		max = dump_zap_width;
 
 	for (i = minidx; i <= maxidx; i++)
 		(void) printf("\t\t\t%u: %6llu %s\n", i, (u_longlong_t)histo[i],
 		    &dump_zap_stars[(max - histo[i]) * dump_zap_width / max]);
 }
 
 static void
 dump_zap_stats(objset_t *os, uint64_t object)
 {
 	int error;
 	zap_stats_t zs;
 
 	error = zap_get_stats(os, object, &zs);
 	if (error)
 		return;
 
 	if (zs.zs_ptrtbl_len == 0) {
 		ASSERT(zs.zs_num_blocks == 1);
 		(void) printf("\tmicrozap: %llu bytes, %llu entries\n",
 		    (u_longlong_t)zs.zs_blocksize,
 		    (u_longlong_t)zs.zs_num_entries);
 		return;
 	}
 
 	(void) printf("\tFat ZAP stats:\n");
 
 	(void) printf("\t\tPointer table:\n");
 	(void) printf("\t\t\t%llu elements\n",
 	    (u_longlong_t)zs.zs_ptrtbl_len);
 	(void) printf("\t\t\tzt_blk: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_zt_blk);
 	(void) printf("\t\t\tzt_numblks: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
 	(void) printf("\t\t\tzt_shift: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_zt_shift);
 	(void) printf("\t\t\tzt_blks_copied: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_blks_copied);
 	(void) printf("\t\t\tzt_nextblk: %llu\n",
 	    (u_longlong_t)zs.zs_ptrtbl_nextblk);
 
 	(void) printf("\t\tZAP entries: %llu\n",
 	    (u_longlong_t)zs.zs_num_entries);
 	(void) printf("\t\tLeaf blocks: %llu\n",
 	    (u_longlong_t)zs.zs_num_leafs);
 	(void) printf("\t\tTotal blocks: %llu\n",
 	    (u_longlong_t)zs.zs_num_blocks);
 	(void) printf("\t\tzap_block_type: 0x%llx\n",
 	    (u_longlong_t)zs.zs_block_type);
 	(void) printf("\t\tzap_magic: 0x%llx\n",
 	    (u_longlong_t)zs.zs_magic);
 	(void) printf("\t\tzap_salt: 0x%llx\n",
 	    (u_longlong_t)zs.zs_salt);
 
 	(void) printf("\t\tLeafs with 2^n pointers:\n");
 	dump_zap_histogram(zs.zs_leafs_with_2n_pointers);
 
 	(void) printf("\t\tBlocks with n*5 entries:\n");
 	dump_zap_histogram(zs.zs_blocks_with_n5_entries);
 
 	(void) printf("\t\tBlocks n/10 full:\n");
 	dump_zap_histogram(zs.zs_blocks_n_tenths_full);
 
 	(void) printf("\t\tEntries with n chunks:\n");
 	dump_zap_histogram(zs.zs_entries_using_n_chunks);
 
 	(void) printf("\t\tBuckets with n entries:\n");
 	dump_zap_histogram(zs.zs_buckets_with_n_entries);
 }
 
 /*ARGSUSED*/
 static void
 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 /*ARGSUSED*/
 static void
 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	(void) printf("\tUNKNOWN OBJECT TYPE\n");
 }
 
 /*ARGSUSED*/
 void
 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 /*ARGSUSED*/
 static void
 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 /*ARGSUSED*/
 static void
 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
 	zap_attribute_t attr;
 	void *prop;
 	int i;
 
 	dump_zap_stats(os, object);
 	(void) printf("\n");
 
 	for (zap_cursor_init(&zc, os, object);
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		(void) printf("\t\t%s = ", attr.za_name);
 		if (attr.za_num_integers == 0) {
 			(void) printf("\n");
 			continue;
 		}
 		prop = umem_zalloc(attr.za_num_integers *
 		    attr.za_integer_length, UMEM_NOFAIL);
 		(void) zap_lookup(os, object, attr.za_name,
 		    attr.za_integer_length, attr.za_num_integers, prop);
 		if (attr.za_integer_length == 1) {
 			(void) printf("%s", (char *)prop);
 		} else {
 			for (i = 0; i < attr.za_num_integers; i++) {
 				switch (attr.za_integer_length) {
 				case 2:
 					(void) printf("%u ",
 					    ((uint16_t *)prop)[i]);
 					break;
 				case 4:
 					(void) printf("%u ",
 					    ((uint32_t *)prop)[i]);
 					break;
 				case 8:
 					(void) printf("%lld ",
 					    (u_longlong_t)((int64_t *)prop)[i]);
 					break;
 				}
 			}
 		}
 		(void) printf("\n");
 		umem_free(prop, attr.za_num_integers * attr.za_integer_length);
 	}
 	zap_cursor_fini(&zc);
 }
 
 /*ARGSUSED*/
 static void
 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	dump_zap_stats(os, object);
 	/* contents are printed elsewhere, properly decoded */
 }
 
 /*ARGSUSED*/
 static void
 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
 	zap_attribute_t attr;
 
 	dump_zap_stats(os, object);
 	(void) printf("\n");
 
 	for (zap_cursor_init(&zc, os, object);
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		(void) printf("\t\t%s = ", attr.za_name);
 		if (attr.za_num_integers == 0) {
 			(void) printf("\n");
 			continue;
 		}
 		(void) printf(" %llx : [%d:%d:%d]\n",
 		    (u_longlong_t)attr.za_first_integer,
 		    (int)ATTR_LENGTH(attr.za_first_integer),
 		    (int)ATTR_BSWAP(attr.za_first_integer),
 		    (int)ATTR_NUM(attr.za_first_integer));
 	}
 	zap_cursor_fini(&zc);
 }
 
 /*ARGSUSED*/
 static void
 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
 	zap_attribute_t attr;
 	uint16_t *layout_attrs;
 	int i;
 
 	dump_zap_stats(os, object);
 	(void) printf("\n");
 
 	for (zap_cursor_init(&zc, os, object);
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		(void) printf("\t\t%s = [", attr.za_name);
 		if (attr.za_num_integers == 0) {
 			(void) printf("\n");
 			continue;
 		}
 
 		VERIFY(attr.za_integer_length == 2);
 		layout_attrs = umem_zalloc(attr.za_num_integers *
 		    attr.za_integer_length, UMEM_NOFAIL);
 
 		VERIFY(zap_lookup(os, object, attr.za_name,
 		    attr.za_integer_length,
 		    attr.za_num_integers, layout_attrs) == 0);
 
 		for (i = 0; i != attr.za_num_integers; i++)
 			(void) printf(" %d ", (int)layout_attrs[i]);
 		(void) printf("]\n");
 		umem_free(layout_attrs,
 		    attr.za_num_integers * attr.za_integer_length);
 	}
 	zap_cursor_fini(&zc);
 }
 
 /*ARGSUSED*/
 static void
 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	zap_cursor_t zc;
 	zap_attribute_t attr;
 	const char *typenames[] = {
 		/* 0 */ "not specified",
 		/* 1 */ "FIFO",
 		/* 2 */ "Character Device",
 		/* 3 */ "3 (invalid)",
 		/* 4 */ "Directory",
 		/* 5 */ "5 (invalid)",
 		/* 6 */ "Block Device",
 		/* 7 */ "7 (invalid)",
 		/* 8 */ "Regular File",
 		/* 9 */ "9 (invalid)",
 		/* 10 */ "Symbolic Link",
 		/* 11 */ "11 (invalid)",
 		/* 12 */ "Socket",
 		/* 13 */ "Door",
 		/* 14 */ "Event Port",
 		/* 15 */ "15 (invalid)",
 	};
 
 	dump_zap_stats(os, object);
 	(void) printf("\n");
 
 	for (zap_cursor_init(&zc, os, object);
 	    zap_cursor_retrieve(&zc, &attr) == 0;
 	    zap_cursor_advance(&zc)) {
 		(void) printf("\t\t%s = %lld (type: %s)\n",
 		    attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
 		    typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
 	}
 	zap_cursor_fini(&zc);
 }
 
 static void
 dump_spacemap(objset_t *os, space_map_obj_t *smo, space_map_t *sm)
 {
 	uint64_t alloc, offset, entry;
 	uint8_t mapshift = sm->sm_shift;
 	uint64_t mapstart = sm->sm_start;
 	char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 			    "INVALID", "INVALID", "INVALID", "INVALID" };
 
 	if (smo->smo_object == 0)
 		return;
 
 	/*
 	 * Print out the freelist entries in both encoded and decoded form.
 	 */
 	alloc = 0;
 	for (offset = 0; offset < smo->smo_objsize; offset += sizeof (entry)) {
 		VERIFY3U(0, ==, dmu_read(os, smo->smo_object, offset,
 		    sizeof (entry), &entry, DMU_READ_PREFETCH));
 		if (SM_DEBUG_DECODE(entry)) {
 			(void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
 			    (u_longlong_t)(offset / sizeof (entry)),
 			    ddata[SM_DEBUG_ACTION_DECODE(entry)],
 			    (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
 			    (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
 		} else {
 			(void) printf("\t    [%6llu]    %c  range:"
 			    " %010llx-%010llx  size: %06llx\n",
 			    (u_longlong_t)(offset / sizeof (entry)),
 			    SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
 			    mapshift) + mapstart),
 			    (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
 			    mapshift) + mapstart + (SM_RUN_DECODE(entry) <<
 			    mapshift)),
 			    (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
 			if (SM_TYPE_DECODE(entry) == SM_ALLOC)
 				alloc += SM_RUN_DECODE(entry) << mapshift;
 			else
 				alloc -= SM_RUN_DECODE(entry) << mapshift;
 		}
 	}
 	if (alloc != smo->smo_alloc) {
 		(void) printf("space_map_object alloc (%llu) INCONSISTENT "
 		    "with space map summary (%llu)\n",
 		    (u_longlong_t)smo->smo_alloc, (u_longlong_t)alloc);
 	}
 }
 
 static void
 dump_metaslab_stats(metaslab_t *msp)
 {
 	char maxbuf[32];
 	space_map_t *sm = msp->ms_map;
 	avl_tree_t *t = sm->sm_pp_root;
 	int free_pct = sm->sm_space * 100 / sm->sm_size;
 
 	zdb_nicenum(space_map_maxsize(sm), maxbuf);
 
 	(void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
 	    "segments", avl_numnodes(t), "maxsize", maxbuf,
 	    "freepct", free_pct);
 }
 
 static void
 dump_metaslab(metaslab_t *msp)
 {
 	vdev_t *vd = msp->ms_group->mg_vd;
 	spa_t *spa = vd->vdev_spa;
 	space_map_t *sm = msp->ms_map;
 	space_map_obj_t *smo = &msp->ms_smo;
 	char freebuf[32];
 
 	zdb_nicenum(sm->sm_size - smo->smo_alloc, freebuf);
 
 	(void) printf(
 	    "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
 	    (u_longlong_t)(sm->sm_start / sm->sm_size),
 	    (u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
 
 	if (dump_opt['m'] > 1 && !dump_opt['L']) {
 		mutex_enter(&msp->ms_lock);
 		space_map_load_wait(sm);
 		if (!sm->sm_loaded)
 			VERIFY(space_map_load(sm, zfs_metaslab_ops,
 			    SM_FREE, smo, spa->spa_meta_objset) == 0);
 		dump_metaslab_stats(msp);
 		space_map_unload(sm);
 		mutex_exit(&msp->ms_lock);
 	}
 
 	if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
 		ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
 
 		mutex_enter(&msp->ms_lock);
 		dump_spacemap(spa->spa_meta_objset, smo, sm);
 		mutex_exit(&msp->ms_lock);
 	}
 }
 
 static void
 print_vdev_metaslab_header(vdev_t *vd)
 {
 	(void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
 	    (u_longlong_t)vd->vdev_id,
 	    "metaslabs", (u_longlong_t)vd->vdev_ms_count,
 	    "offset", "spacemap", "free");
 	(void) printf("\t%15s   %19s   %15s   %10s\n",
 	    "---------------", "-------------------",
 	    "---------------", "-------------");
 }
 
 static void
 dump_metaslabs(spa_t *spa)
 {
 	vdev_t *vd, *rvd = spa->spa_root_vdev;
 	uint64_t m, c = 0, children = rvd->vdev_children;
 
 	(void) printf("\nMetaslabs:\n");
 
 	if (!dump_opt['d'] && zopt_objects > 0) {
 		c = zopt_object[0];
 
 		if (c >= children)
 			(void) fatal("bad vdev id: %llu", (u_longlong_t)c);
 
 		if (zopt_objects > 1) {
 			vd = rvd->vdev_child[c];
 			print_vdev_metaslab_header(vd);
 
 			for (m = 1; m < zopt_objects; m++) {
 				if (zopt_object[m] < vd->vdev_ms_count)
 					dump_metaslab(
 					    vd->vdev_ms[zopt_object[m]]);
 				else
 					(void) fprintf(stderr, "bad metaslab "
 					    "number %llu\n",
 					    (u_longlong_t)zopt_object[m]);
 			}
 			(void) printf("\n");
 			return;
 		}
 		children = c + 1;
 	}
 	for (; c < children; c++) {
 		vd = rvd->vdev_child[c];
 		print_vdev_metaslab_header(vd);
 
 		for (m = 0; m < vd->vdev_ms_count; m++)
 			dump_metaslab(vd->vdev_ms[m]);
 		(void) printf("\n");
 	}
 }
 
 static void
 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
 {
 	const ddt_phys_t *ddp = dde->dde_phys;
 	const ddt_key_t *ddk = &dde->dde_key;
 	char *types[4] = { "ditto", "single", "double", "triple" };
 	char blkbuf[BP_SPRINTF_LEN];
 	blkptr_t blk;
 	int p;
 
 	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 		if (ddp->ddp_phys_birth == 0)
 			continue;
 		ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
 		sprintf_blkptr(blkbuf, &blk);
 		(void) printf("index %llx refcnt %llu %s %s\n",
 		    (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
 		    types[p], blkbuf);
 	}
 }
 
 static void
 dump_dedup_ratio(const ddt_stat_t *dds)
 {
 	double rL, rP, rD, D, dedup, compress, copies;
 
 	if (dds->dds_blocks == 0)
 		return;
 
 	rL = (double)dds->dds_ref_lsize;
 	rP = (double)dds->dds_ref_psize;
 	rD = (double)dds->dds_ref_dsize;
 	D = (double)dds->dds_dsize;
 
 	dedup = rD / D;
 	compress = rL / rP;
 	copies = rD / rP;
 
 	(void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
 	    "dedup * compress / copies = %.2f\n\n",
 	    dedup, compress, copies, dedup * compress / copies);
 }
 
 static void
 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
 {
 	char name[DDT_NAMELEN];
 	ddt_entry_t dde;
 	uint64_t walk = 0;
 	dmu_object_info_t doi;
 	uint64_t count, dspace, mspace;
 	int error;
 
 	error = ddt_object_info(ddt, type, class, &doi);
 
 	if (error == ENOENT)
 		return;
 	ASSERT(error == 0);
 
 	error = ddt_object_count(ddt, type, class, &count);
 	ASSERT(error == 0);
 	if (count == 0)
 		return;
 
 	dspace = doi.doi_physical_blocks_512 << 9;
 	mspace = doi.doi_fill_count * doi.doi_data_block_size;
 
 	ddt_object_name(ddt, type, class, name);
 
 	(void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
 	    name,
 	    (u_longlong_t)count,
 	    (u_longlong_t)(dspace / count),
 	    (u_longlong_t)(mspace / count));
 
 	if (dump_opt['D'] < 3)
 		return;
 
 	zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
 
 	if (dump_opt['D'] < 4)
 		return;
 
 	if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
 		return;
 
 	(void) printf("%s contents:\n\n", name);
 
 	while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
 		dump_dde(ddt, &dde, walk);
 
 	ASSERT(error == ENOENT);
 
 	(void) printf("\n");
 }
 
 static void
 dump_all_ddts(spa_t *spa)
 {
 	ddt_histogram_t ddh_total;
 	ddt_stat_t dds_total;
 	enum zio_checksum c;
 	enum ddt_type type;
 	enum ddt_class class;
 
 	bzero(&ddh_total, sizeof (ddt_histogram_t));
 	bzero(&dds_total, sizeof (ddt_stat_t));
 
 	for (c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
 		ddt_t *ddt = spa->spa_ddt[c];
 		for (type = 0; type < DDT_TYPES; type++) {
 			for (class = 0; class < DDT_CLASSES;
 			    class++) {
 				dump_ddt(ddt, type, class);
 			}
 		}
 	}
 
 	ddt_get_dedup_stats(spa, &dds_total);
 
 	if (dds_total.dds_blocks == 0) {
 		(void) printf("All DDTs are empty\n");
 		return;
 	}
 
 	(void) printf("\n");
 
 	if (dump_opt['D'] > 1) {
 		(void) printf("DDT histogram (aggregated over all DDTs):\n");
 		ddt_get_dedup_histogram(spa, &ddh_total);
 		zpool_dump_ddt(&dds_total, &ddh_total);
 	}
 
 	dump_dedup_ratio(&dds_total);
 }
 
 static void
 dump_dtl_seg(space_map_t *sm, uint64_t start, uint64_t size)
 {
 	char *prefix = (void *)sm;
 
 	(void) printf("%s [%llu,%llu) length %llu\n",
 	    prefix,
 	    (u_longlong_t)start,
 	    (u_longlong_t)(start + size),
 	    (u_longlong_t)(size));
 }
 
 static void
 dump_dtl(vdev_t *vd, int indent)
 {
 	spa_t *spa = vd->vdev_spa;
 	boolean_t required;
 	char *name[DTL_TYPES] = { "missing", "partial", "scrub", "outage" };
 	char prefix[256];
 	int c, t;
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 	required = vdev_dtl_required(vd);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 
 	if (indent == 0)
 		(void) printf("\nDirty time logs:\n\n");
 
 	(void) printf("\t%*s%s [%s]\n", indent, "",
 	    vd->vdev_path ? vd->vdev_path :
 	    vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
 	    required ? "DTL-required" : "DTL-expendable");
 
 	for (t = 0; t < DTL_TYPES; t++) {
 		space_map_t *sm = &vd->vdev_dtl[t];
 		if (sm->sm_space == 0)
 			continue;
 		(void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
 		    indent + 2, "", name[t]);
 		mutex_enter(sm->sm_lock);
 		space_map_walk(sm, dump_dtl_seg, (void *)prefix);
 		mutex_exit(sm->sm_lock);
 		if (dump_opt['d'] > 5 && vd->vdev_children == 0)
 			dump_spacemap(spa->spa_meta_objset,
 			    &vd->vdev_dtl_smo, sm);
 	}
 
 	for (c = 0; c < vd->vdev_children; c++)
 		dump_dtl(vd->vdev_child[c], indent + 4);
 }
 
 static void
 dump_history(spa_t *spa)
 {
 	nvlist_t **events = NULL;
 	char buf[SPA_MAXBLOCKSIZE];
 	uint64_t resid, len, off = 0;
 	uint_t num = 0;
 	int error;
 	time_t tsec;
 	struct tm t;
 	char tbuf[30];
 	char internalstr[MAXPATHLEN];
 	int i;
 
 	do {
 		len = sizeof (buf);
 
 		if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
 			(void) fprintf(stderr, "Unable to read history: "
 			    "error %d\n", error);
 			return;
 		}
 
 		if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
 			break;
 
 		off -= resid;
 	} while (len != 0);
 
 	(void) printf("\nHistory:\n");
 	for (i = 0; i < num; i++) {
 		uint64_t time, txg, ievent;
 		char *cmd, *intstr;
+		boolean_t printed = B_FALSE;
 
 		if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
 		    &time) != 0)
-			continue;
+			goto next;
 		if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
 		    &cmd) != 0) {
 			if (nvlist_lookup_uint64(events[i],
 			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
-				continue;
+				goto next;
 			verify(nvlist_lookup_uint64(events[i],
 			    ZPOOL_HIST_TXG, &txg) == 0);
 			verify(nvlist_lookup_string(events[i],
 			    ZPOOL_HIST_INT_STR, &intstr) == 0);
-			if (ievent >= LOG_END)
-				continue;
+			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
+				goto next;
 
 			(void) snprintf(internalstr,
 			    sizeof (internalstr),
 			    "[internal %s txg:%lld] %s",
 			    zfs_history_event_names[ievent],
 			    (longlong_t)txg, intstr);
 			cmd = internalstr;
 		}
 		tsec = time;
 		(void) localtime_r(&tsec, &t);
 		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
 		(void) printf("%s %s\n", tbuf, cmd);
+		printed = B_TRUE;
+
+next:
+		if (dump_opt['h'] > 1) {
+			if (!printed)
+				(void) printf("unrecognized record:\n");
+			dump_nvlist(events[i], 2);
+		}
 	}
 }
 
 /*ARGSUSED*/
 static void
 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 static uint64_t
 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp, const zbookmark_t *zb)
 {
 	if (dnp == NULL) {
 		ASSERT(zb->zb_level < 0);
 		if (zb->zb_object == 0)
 			return (zb->zb_blkid);
 		return (zb->zb_blkid * BP_GET_LSIZE(bp));
 	}
 
 	ASSERT(zb->zb_level >= 0);
 
 	return ((zb->zb_blkid <<
 	    (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
 	    dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 }
 
 static void
 sprintf_blkptr_compact(char *blkbuf, const blkptr_t *bp)
 {
 	const dva_t *dva = bp->blk_dva;
 	int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
 	int i;
 
 	if (dump_opt['b'] >= 5) {
 		sprintf_blkptr(blkbuf, bp);
 		return;
 	}
 
 	blkbuf[0] = '\0';
 
 	for (i = 0; i < ndvas; i++)
 		(void) sprintf(blkbuf + strlen(blkbuf), "%llu:%llx:%llx ",
 		    (u_longlong_t)DVA_GET_VDEV(&dva[i]),
 		    (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
 		    (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
 
 	(void) sprintf(blkbuf + strlen(blkbuf),
 	    "%llxL/%llxP F=%llu B=%llu/%llu",
 	    (u_longlong_t)BP_GET_LSIZE(bp),
 	    (u_longlong_t)BP_GET_PSIZE(bp),
 	    (u_longlong_t)bp->blk_fill,
 	    (u_longlong_t)bp->blk_birth,
 	    (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
 }
 
 static void
 print_indirect(blkptr_t *bp, const zbookmark_t *zb,
     const dnode_phys_t *dnp)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 	int l;
 
 	ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
 	ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
 
 	(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
 
 	ASSERT(zb->zb_level >= 0);
 
 	for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
 		if (l == zb->zb_level) {
 			(void) printf("L%llx", (u_longlong_t)zb->zb_level);
 		} else {
 			(void) printf(" ");
 		}
 	}
 
 	sprintf_blkptr_compact(blkbuf, bp);
 	(void) printf("%s\n", blkbuf);
 }
 
 static int
 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
     blkptr_t *bp, const zbookmark_t *zb)
 {
 	int err = 0;
 
 	if (bp->blk_birth == 0)
 		return (0);
 
 	print_indirect(bp, zb, dnp);
 
 	if (BP_GET_LEVEL(bp) > 0) {
 		uint32_t flags = ARC_WAIT;
 		int i;
 		blkptr_t *cbp;
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 		arc_buf_t *buf;
 		uint64_t fill = 0;
 
 		err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 		if (err)
 			return (err);
 		ASSERT(buf->b_data);
 
 		/* recursively visit blocks below this */
 		cbp = buf->b_data;
 		for (i = 0; i < epb; i++, cbp++) {
 			zbookmark_t czb;
 
 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 			    zb->zb_level - 1,
 			    zb->zb_blkid * epb + i);
 			err = visit_indirect(spa, dnp, cbp, &czb);
 			if (err)
 				break;
 			fill += cbp->blk_fill;
 		}
 		if (!err)
 			ASSERT3U(fill, ==, bp->blk_fill);
 		(void) arc_buf_remove_ref(buf, &buf);
 	}
 
 	return (err);
 }
 
 /*ARGSUSED*/
 static void
 dump_indirect(dnode_t *dn)
 {
 	dnode_phys_t *dnp = dn->dn_phys;
 	int j;
 	zbookmark_t czb;
 
 	(void) printf("Indirect blocks:\n");
 
 	SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
 	    dn->dn_object, dnp->dn_nlevels - 1, 0);
 	for (j = 0; j < dnp->dn_nblkptr; j++) {
 		czb.zb_blkid = j;
 		(void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
 		    &dnp->dn_blkptr[j], &czb);
 	}
 
 	(void) printf("\n");
 }
 
 /*ARGSUSED*/
 static void
 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	dsl_dir_phys_t *dd = data;
 	time_t crtime;
 	char nice[32];
 
 	if (dd == NULL)
 		return;
 
 	ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
 
 	crtime = dd->dd_creation_time;
 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
 	(void) printf("\t\thead_dataset_obj = %llu\n",
 	    (u_longlong_t)dd->dd_head_dataset_obj);
 	(void) printf("\t\tparent_dir_obj = %llu\n",
 	    (u_longlong_t)dd->dd_parent_obj);
 	(void) printf("\t\torigin_obj = %llu\n",
 	    (u_longlong_t)dd->dd_origin_obj);
 	(void) printf("\t\tchild_dir_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_child_dir_zapobj);
 	zdb_nicenum(dd->dd_used_bytes, nice);
 	(void) printf("\t\tused_bytes = %s\n", nice);
 	zdb_nicenum(dd->dd_compressed_bytes, nice);
 	(void) printf("\t\tcompressed_bytes = %s\n", nice);
 	zdb_nicenum(dd->dd_uncompressed_bytes, nice);
 	(void) printf("\t\tuncompressed_bytes = %s\n", nice);
 	zdb_nicenum(dd->dd_quota, nice);
 	(void) printf("\t\tquota = %s\n", nice);
 	zdb_nicenum(dd->dd_reserved, nice);
 	(void) printf("\t\treserved = %s\n", nice);
 	(void) printf("\t\tprops_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_props_zapobj);
 	(void) printf("\t\tdeleg_zapobj = %llu\n",
 	    (u_longlong_t)dd->dd_deleg_zapobj);
 	(void) printf("\t\tflags = %llx\n",
 	    (u_longlong_t)dd->dd_flags);
 
 #define	DO(which) \
 	zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice); \
 	(void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
 	DO(HEAD);
 	DO(SNAP);
 	DO(CHILD);
 	DO(CHILD_RSRV);
 	DO(REFRSRV);
 #undef DO
 }
 
 /*ARGSUSED*/
 static void
 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	dsl_dataset_phys_t *ds = data;
 	time_t crtime;
 	char used[32], compressed[32], uncompressed[32], unique[32];
 	char blkbuf[BP_SPRINTF_LEN];
 
 	if (ds == NULL)
 		return;
 
 	ASSERT(size == sizeof (*ds));
 	crtime = ds->ds_creation_time;
 	zdb_nicenum(ds->ds_referenced_bytes, used);
 	zdb_nicenum(ds->ds_compressed_bytes, compressed);
 	zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
 	zdb_nicenum(ds->ds_unique_bytes, unique);
 	sprintf_blkptr(blkbuf, &ds->ds_bp);
 
 	(void) printf("\t\tdir_obj = %llu\n",
 	    (u_longlong_t)ds->ds_dir_obj);
 	(void) printf("\t\tprev_snap_obj = %llu\n",
 	    (u_longlong_t)ds->ds_prev_snap_obj);
 	(void) printf("\t\tprev_snap_txg = %llu\n",
 	    (u_longlong_t)ds->ds_prev_snap_txg);
 	(void) printf("\t\tnext_snap_obj = %llu\n",
 	    (u_longlong_t)ds->ds_next_snap_obj);
 	(void) printf("\t\tsnapnames_zapobj = %llu\n",
 	    (u_longlong_t)ds->ds_snapnames_zapobj);
 	(void) printf("\t\tnum_children = %llu\n",
 	    (u_longlong_t)ds->ds_num_children);
 	(void) printf("\t\tuserrefs_obj = %llu\n",
 	    (u_longlong_t)ds->ds_userrefs_obj);
 	(void) printf("\t\tcreation_time = %s", ctime(&crtime));
 	(void) printf("\t\tcreation_txg = %llu\n",
 	    (u_longlong_t)ds->ds_creation_txg);
 	(void) printf("\t\tdeadlist_obj = %llu\n",
 	    (u_longlong_t)ds->ds_deadlist_obj);
 	(void) printf("\t\tused_bytes = %s\n", used);
 	(void) printf("\t\tcompressed_bytes = %s\n", compressed);
 	(void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
 	(void) printf("\t\tunique = %s\n", unique);
 	(void) printf("\t\tfsid_guid = %llu\n",
 	    (u_longlong_t)ds->ds_fsid_guid);
 	(void) printf("\t\tguid = %llu\n",
 	    (u_longlong_t)ds->ds_guid);
 	(void) printf("\t\tflags = %llx\n",
 	    (u_longlong_t)ds->ds_flags);
 	(void) printf("\t\tnext_clones_obj = %llu\n",
 	    (u_longlong_t)ds->ds_next_clones_obj);
 	(void) printf("\t\tprops_obj = %llu\n",
 	    (u_longlong_t)ds->ds_props_obj);
 	(void) printf("\t\tbp = %s\n", blkbuf);
 }
 
 /* ARGSUSED */
 static int
 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
 	if (bp->blk_birth != 0) {
 		sprintf_blkptr(blkbuf, bp);
 		(void) printf("\t%s\n", blkbuf);
 	}
 	return (0);
 }
 
 static void
 dump_bptree(objset_t *os, uint64_t obj, char *name)
 {
 	char bytes[32];
 	bptree_phys_t *bt;
 	dmu_buf_t *db;
 
 	if (dump_opt['d'] < 3)
 		return;
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
 	bt = db->db_data;
 	zdb_nicenum(bt->bt_bytes, bytes);
 	(void) printf("\n    %s: %llu datasets, %s\n",
 	    name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
 	dmu_buf_rele(db, FTAG);
 
 	if (dump_opt['d'] < 5)
 		return;
 
 	(void) printf("\n");
 
 	(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
 }
 
 /* ARGSUSED */
 static int
 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
 	ASSERT(bp->blk_birth != 0);
 	sprintf_blkptr_compact(blkbuf, bp);
 	(void) printf("\t%s\n", blkbuf);
 	return (0);
 }
 
 static void
 dump_bpobj(bpobj_t *bpo, char *name)
 {
 	char bytes[32];
 	char comp[32];
 	char uncomp[32];
 
 	if (dump_opt['d'] < 3)
 		return;
 
 	zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes);
 	if (bpo->bpo_havesubobj) {
 		zdb_nicenum(bpo->bpo_phys->bpo_comp, comp);
 		zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp);
 		(void) printf("\n    %s: %llu local blkptrs, %llu subobjs, "
 		    "%s (%s/%s comp)\n",
 		    name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
 		    (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
 		    bytes, comp, uncomp);
 	} else {
 		(void) printf("\n    %s: %llu blkptrs, %s\n",
 		    name, (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs, bytes);
 	}
 
 	if (dump_opt['d'] < 5)
 		return;
 
 	(void) printf("\n");
 
 	(void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
 }
 
 static void
 dump_deadlist(dsl_deadlist_t *dl)
 {
 	dsl_deadlist_entry_t *dle;
 	char bytes[32];
 	char comp[32];
 	char uncomp[32];
 
 	if (dump_opt['d'] < 3)
 		return;
 
 	zdb_nicenum(dl->dl_phys->dl_used, bytes);
 	zdb_nicenum(dl->dl_phys->dl_comp, comp);
 	zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp);
 	(void) printf("\n    Deadlist: %s (%s/%s comp)\n",
 	    bytes, comp, uncomp);
 
 	if (dump_opt['d'] < 4)
 		return;
 
 	(void) printf("\n");
 
 	for (dle = avl_first(&dl->dl_tree); dle;
 	    dle = AVL_NEXT(&dl->dl_tree, dle)) {
 		(void) printf("      mintxg %llu -> obj %llu\n",
 		    (longlong_t)dle->dle_mintxg,
 		    (longlong_t)dle->dle_bpobj.bpo_object);
 
 		if (dump_opt['d'] >= 5)
 			dump_bpobj(&dle->dle_bpobj, "");
 	}
 }
 
 static avl_tree_t idx_tree;
 static avl_tree_t domain_tree;
 static boolean_t fuid_table_loaded;
 static boolean_t sa_loaded;
 sa_attr_type_t *sa_attr_table;
 
 static void
 fuid_table_destroy(void)
 {
 	if (fuid_table_loaded) {
 		zfs_fuid_table_destroy(&idx_tree, &domain_tree);
 		fuid_table_loaded = B_FALSE;
 	}
 }
 
 /*
  * print uid or gid information.
  * For normal POSIX id just the id is printed in decimal format.
  * For CIFS files with FUID the fuid is printed in hex followed by
  * the doman-rid string.
  */
 static void
 print_idstr(uint64_t id, const char *id_type)
 {
 	if (FUID_INDEX(id)) {
 		char *domain;
 
 		domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
 		(void) printf("\t%s     %llx [%s-%d]\n", id_type,
 		    (u_longlong_t)id, domain, (int)FUID_RID(id));
 	} else {
 		(void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
 	}
 
 }
 
 static void
 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
 {
 	uint32_t uid_idx, gid_idx;
 
 	uid_idx = FUID_INDEX(uid);
 	gid_idx = FUID_INDEX(gid);
 
 	/* Load domain table, if not already loaded */
 	if (!fuid_table_loaded && (uid_idx || gid_idx)) {
 		uint64_t fuid_obj;
 
 		/* first find the fuid object.  It lives in the master node */
 		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
 		    8, 1, &fuid_obj) == 0);
 		zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
 		(void) zfs_fuid_table_load(os, fuid_obj,
 		    &idx_tree, &domain_tree);
 		fuid_table_loaded = B_TRUE;
 	}
 
 	print_idstr(uid, "uid");
 	print_idstr(gid, "gid");
 }
 
 static void
 dump_znode_sa_xattr(sa_handle_t *hdl)
 {
 	nvlist_t *sa_xattr;
 	nvpair_t *elem = NULL;
 	int sa_xattr_size = 0;
 	int sa_xattr_entries = 0;
 	int error;
 	char *sa_xattr_packed;
 
 	error = sa_size(hdl, sa_attr_table[ZPL_DXATTR], &sa_xattr_size);
 	if (error || sa_xattr_size == 0)
 		return;
 
 	sa_xattr_packed = malloc(sa_xattr_size);
 	if (sa_xattr_packed == NULL)
 		return;
 
 	error = sa_lookup(hdl, sa_attr_table[ZPL_DXATTR],
 	    sa_xattr_packed, sa_xattr_size);
 	if (error) {
 		free(sa_xattr_packed);
 		return;
 	}
 
 	error = nvlist_unpack(sa_xattr_packed, sa_xattr_size, &sa_xattr, 0);
 	if (error) {
 		free(sa_xattr_packed);
 		return;
 	}
 
 	while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL)
 		sa_xattr_entries++;
 
 	(void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
 	    sa_xattr_size, sa_xattr_entries);
 	while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
 		uchar_t *value;
 		uint_t cnt, idx;
 
 		(void) printf("\t\t%s = ", nvpair_name(elem));
 		nvpair_value_byte_array(elem, &value, &cnt);
 		for (idx = 0 ; idx < cnt ; ++idx) {
 			if (isprint(value[idx]))
 				(void) putchar(value[idx]);
 			else
 				(void) printf("\\%3.3o", value[idx]);
 		}
 		(void) putchar('\n');
 	}
 
 	nvlist_free(sa_xattr);
 	free(sa_xattr_packed);
 }
 
 /*ARGSUSED*/
 static void
 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
 {
 	char path[MAXPATHLEN * 2];	/* allow for xattr and failure prefix */
 	sa_handle_t *hdl;
 	uint64_t xattr, rdev, gen;
 	uint64_t uid, gid, mode, fsize, parent, links;
 	uint64_t pflags;
 	uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
 	time_t z_crtime, z_atime, z_mtime, z_ctime;
 	sa_bulk_attr_t bulk[12];
 	int idx = 0;
 	int error;
 
 	if (!sa_loaded) {
 		uint64_t sa_attrs = 0;
 		uint64_t version;
 
 		VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 		    8, 1, &version) == 0);
 		if (version >= ZPL_VERSION_SA) {
 			VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
 			    8, 1, &sa_attrs) == 0);
 		}
 		if ((error = sa_setup(os, sa_attrs, zfs_attr_table,
 		    ZPL_END, &sa_attr_table)) != 0) {
 			(void) printf("sa_setup failed errno %d, can't "
 			    "display znode contents\n", error);
 			return;
 		}
 		sa_loaded = B_TRUE;
 	}
 
 	if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
 		(void) printf("Failed to get handle for SA znode\n");
 		return;
 	}
 
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
 	    &links, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
 	    &mode, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
 	    NULL, &parent, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
 	    &fsize, 8);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
 	    acctm, 16);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
 	    modtm, 16);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
 	    crtm, 16);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
 	    chgtm, 16);
 	SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
 	    &pflags, 8);
 
 	if (sa_bulk_lookup(hdl, bulk, idx)) {
 		(void) sa_handle_destroy(hdl);
 		return;
 	}
 
 	error = zfs_obj_to_path(os, object, path, sizeof (path));
 	if (error != 0) {
 		(void) snprintf(path, sizeof (path), "\?\?\?<object#%llu>",
 		    (u_longlong_t)object);
 	}
 	if (dump_opt['d'] < 3) {
 		(void) printf("\t%s\n", path);
 		(void) sa_handle_destroy(hdl);
 		return;
 	}
 
 	z_crtime = (time_t)crtm[0];
 	z_atime = (time_t)acctm[0];
 	z_mtime = (time_t)modtm[0];
 	z_ctime = (time_t)chgtm[0];
 
 	(void) printf("\tpath	%s\n", path);
 	dump_uidgid(os, uid, gid);
 	(void) printf("\tatime	%s", ctime(&z_atime));
 	(void) printf("\tmtime	%s", ctime(&z_mtime));
 	(void) printf("\tctime	%s", ctime(&z_ctime));
 	(void) printf("\tcrtime	%s", ctime(&z_crtime));
 	(void) printf("\tgen	%llu\n", (u_longlong_t)gen);
 	(void) printf("\tmode	%llo\n", (u_longlong_t)mode);
 	(void) printf("\tsize	%llu\n", (u_longlong_t)fsize);
 	(void) printf("\tparent	%llu\n", (u_longlong_t)parent);
 	(void) printf("\tlinks	%llu\n", (u_longlong_t)links);
 	(void) printf("\tpflags	%llx\n", (u_longlong_t)pflags);
 	if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
 	    sizeof (uint64_t)) == 0)
 		(void) printf("\txattr	%llu\n", (u_longlong_t)xattr);
 	if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
 	    sizeof (uint64_t)) == 0)
 		(void) printf("\trdev	0x%016llx\n", (u_longlong_t)rdev);
 	dump_znode_sa_xattr(hdl);
 	sa_handle_destroy(hdl);
 }
 
 /*ARGSUSED*/
 static void
 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 /*ARGSUSED*/
 static void
 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
 {
 }
 
 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
 	dump_none,		/* unallocated			*/
 	dump_zap,		/* object directory		*/
 	dump_uint64,		/* object array			*/
 	dump_none,		/* packed nvlist		*/
 	dump_packed_nvlist,	/* packed nvlist size		*/
 	dump_none,		/* bplist			*/
 	dump_none,		/* bplist header		*/
 	dump_none,		/* SPA space map header		*/
 	dump_none,		/* SPA space map		*/
 	dump_none,		/* ZIL intent log		*/
 	dump_dnode,		/* DMU dnode			*/
 	dump_dmu_objset,	/* DMU objset			*/
 	dump_dsl_dir,		/* DSL directory		*/
 	dump_zap,		/* DSL directory child map	*/
 	dump_zap,		/* DSL dataset snap map		*/
 	dump_zap,		/* DSL props			*/
 	dump_dsl_dataset,	/* DSL dataset			*/
 	dump_znode,		/* ZFS znode			*/
 	dump_acl,		/* ZFS V0 ACL			*/
 	dump_uint8,		/* ZFS plain file		*/
 	dump_zpldir,		/* ZFS directory		*/
 	dump_zap,		/* ZFS master node		*/
 	dump_zap,		/* ZFS delete queue		*/
 	dump_uint8,		/* zvol object			*/
 	dump_zap,		/* zvol prop			*/
 	dump_uint8,		/* other uint8[]		*/
 	dump_uint64,		/* other uint64[]		*/
 	dump_zap,		/* other ZAP			*/
 	dump_zap,		/* persistent error log		*/
 	dump_uint8,		/* SPA history			*/
-	dump_uint64,		/* SPA history offsets		*/
+	dump_history_offsets,	/* SPA history offsets		*/
 	dump_zap,		/* Pool properties		*/
 	dump_zap,		/* DSL permissions		*/
 	dump_acl,		/* ZFS ACL			*/
 	dump_uint8,		/* ZFS SYSACL			*/
 	dump_none,		/* FUID nvlist			*/
 	dump_packed_nvlist,	/* FUID nvlist size		*/
 	dump_zap,		/* DSL dataset next clones	*/
 	dump_zap,		/* DSL scrub queue		*/
 	dump_zap,		/* ZFS user/group used		*/
 	dump_zap,		/* ZFS user/group quota		*/
 	dump_zap,		/* snapshot refcount tags	*/
 	dump_ddt_zap,		/* DDT ZAP object		*/
 	dump_zap,		/* DDT statistics		*/
 	dump_znode,		/* SA object			*/
 	dump_zap,		/* SA Master Node		*/
 	dump_sa_attrs,		/* SA attribute registration	*/
 	dump_sa_layouts,	/* SA attribute layouts		*/
 	dump_zap,		/* DSL scrub translations	*/
 	dump_none,		/* fake dedup BP		*/
 	dump_zap,		/* deadlist			*/
 	dump_none,		/* deadlist hdr			*/
 	dump_zap,		/* dsl clones			*/
 	dump_none,		/* bpobj subobjs		*/
 	dump_unknown,		/* Unknown type, must be last	*/
 };
 
 static void
 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
 {
 	dmu_buf_t *db = NULL;
 	dmu_object_info_t doi;
 	dnode_t *dn;
 	void *bonus = NULL;
 	size_t bsize = 0;
 	char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
 	char bonus_size[32];
 	char aux[50];
 	int error;
 
 	if (*print_header) {
 		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
 		    "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
 		    "%full", "type");
 		*print_header = 0;
 	}
 
 	if (object == 0) {
 		dn = DMU_META_DNODE(os);
 	} else {
 		error = dmu_bonus_hold(os, object, FTAG, &db);
 		if (error)
 			fatal("dmu_bonus_hold(%llu) failed, errno %u",
 			    object, error);
 		bonus = db->db_data;
 		bsize = db->db_size;
 		dn = DB_DNODE((dmu_buf_impl_t *)db);
 	}
 	dmu_object_info_from_dnode(dn, &doi);
 
 	zdb_nicenum(doi.doi_metadata_block_size, iblk);
 	zdb_nicenum(doi.doi_data_block_size, dblk);
 	zdb_nicenum(doi.doi_max_offset, lsize);
 	zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize);
 	zdb_nicenum(doi.doi_bonus_size, bonus_size);
 	(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
 	    doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
 	    doi.doi_max_offset);
 
 	aux[0] = '\0';
 
 	if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
 		(void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
 		    ZDB_CHECKSUM_NAME(doi.doi_checksum));
 	}
 
 	if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
 		(void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
 		    ZDB_COMPRESS_NAME(doi.doi_compress));
 	}
 
 	(void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
 	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
 	    asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
 
 	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
 		(void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
 		    "", "", "", "", "", bonus_size, "bonus",
 		    ZDB_OT_NAME(doi.doi_bonus_type));
 	}
 
 	if (verbosity >= 4) {
 		(void) printf("\tdnode flags: %s%s%s\n",
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
 		    "USED_BYTES " : "",
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
 		    "USERUSED_ACCOUNTED " : "",
 		    (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
 		    "SPILL_BLKPTR" : "");
 		(void) printf("\tdnode maxblkid: %llu\n",
 		    (longlong_t)dn->dn_phys->dn_maxblkid);
 
 		object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
 		    bonus, bsize);
 		object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
 		*print_header = 1;
 	}
 
 	if (verbosity >= 5)
 		dump_indirect(dn);
 
 	if (verbosity >= 5) {
 		/*
 		 * Report the list of segments that comprise the object.
 		 */
 		uint64_t start = 0;
 		uint64_t end;
 		uint64_t blkfill = 1;
 		int minlvl = 1;
 
 		if (dn->dn_type == DMU_OT_DNODE) {
 			minlvl = 0;
 			blkfill = DNODES_PER_BLOCK;
 		}
 
 		for (;;) {
 			char segsize[32];
 			error = dnode_next_offset(dn,
 			    0, &start, minlvl, blkfill, 0);
 			if (error)
 				break;
 			end = start;
 			error = dnode_next_offset(dn,
 			    DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
 			zdb_nicenum(end - start, segsize);
 			(void) printf("\t\tsegment [%016llx, %016llx)"
 			    " size %5s\n", (u_longlong_t)start,
 			    (u_longlong_t)end, segsize);
 			if (error)
 				break;
 			start = end;
 		}
 	}
 
 	if (db != NULL)
 		dmu_buf_rele(db, FTAG);
 }
 
 static char *objset_types[DMU_OST_NUMTYPES] = {
 	"NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
 
 static void
 dump_dir(objset_t *os)
 {
 	dmu_objset_stats_t dds;
 	uint64_t object, object_count;
 	uint64_t refdbytes, usedobjs, scratch;
 	char numbuf[32];
 	char blkbuf[BP_SPRINTF_LEN + 20];
 	char osname[MAXNAMELEN];
 	char *type = "UNKNOWN";
 	int verbosity = dump_opt['d'];
 	int print_header = 1;
 	int i, error;
 
 	dmu_objset_fast_stat(os, &dds);
 
 	if (dds.dds_type < DMU_OST_NUMTYPES)
 		type = objset_types[dds.dds_type];
 
 	if (dds.dds_type == DMU_OST_META) {
 		dds.dds_creation_txg = TXG_INITIAL;
 		usedobjs = os->os_rootbp->blk_fill;
 		refdbytes = os->os_spa->spa_dsl_pool->
 		    dp_mos_dir->dd_phys->dd_used_bytes;
 	} else {
 		dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
 	}
 
 	ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
 
 	zdb_nicenum(refdbytes, numbuf);
 
 	if (verbosity >= 4) {
 		(void) sprintf(blkbuf, ", rootbp ");
 		(void) sprintf_blkptr(blkbuf + strlen(blkbuf), os->os_rootbp);
 	} else {
 		blkbuf[0] = '\0';
 	}
 
 	dmu_objset_name(os, osname);
 
 	(void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
 	    "%s, %llu objects%s\n",
 	    osname, type, (u_longlong_t)dmu_objset_id(os),
 	    (u_longlong_t)dds.dds_creation_txg,
 	    numbuf, (u_longlong_t)usedobjs, blkbuf);
 
 	if (zopt_objects != 0) {
 		for (i = 0; i < zopt_objects; i++)
 			dump_object(os, zopt_object[i], verbosity,
 			    &print_header);
 		(void) printf("\n");
 		return;
 	}
 
 	if (dump_opt['i'] != 0 || verbosity >= 2)
 		dump_intent_log(dmu_objset_zil(os));
 
 	if (dmu_objset_ds(os) != NULL)
 		dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
 
 	if (verbosity < 2)
 		return;
 
 	if (os->os_rootbp->blk_birth == 0)
 		return;
 
 	dump_object(os, 0, verbosity, &print_header);
 	object_count = 0;
 	if (DMU_USERUSED_DNODE(os) != NULL &&
 	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
 		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
 		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
 	}
 
 	object = 0;
 	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
 		dump_object(os, object, verbosity, &print_header);
 		object_count++;
 	}
 
 	ASSERT3U(object_count, ==, usedobjs);
 
 	(void) printf("\n");
 
 	if (error != ESRCH) {
 		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
 		abort();
 	}
 }
 
 static void
 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
 {
 	time_t timestamp = ub->ub_timestamp;
 
 	(void) printf("%s", header ? header : "");
 	(void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
 	(void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
 	(void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
 	(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
 	(void) printf("\ttimestamp = %llu UTC = %s",
 	    (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
 	if (dump_opt['u'] >= 3) {
 		char blkbuf[BP_SPRINTF_LEN];
 		sprintf_blkptr(blkbuf, &ub->ub_rootbp);
 		(void) printf("\trootbp = %s\n", blkbuf);
 	}
 	(void) printf("%s", footer ? footer : "");
 }
 
 static void
 dump_config(spa_t *spa)
 {
 	dmu_buf_t *db;
 	size_t nvsize = 0;
 	int error = 0;
 
 
 	error = dmu_bonus_hold(spa->spa_meta_objset,
 	    spa->spa_config_object, FTAG, &db);
 
 	if (error == 0) {
 		nvsize = *(uint64_t *)db->db_data;
 		dmu_buf_rele(db, FTAG);
 
 		(void) printf("\nMOS Configuration:\n");
 		dump_packed_nvlist(spa->spa_meta_objset,
 		    spa->spa_config_object, (void *)&nvsize, 1);
 	} else {
 		(void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
 		    (u_longlong_t)spa->spa_config_object, error);
 	}
 }
 
 static void
 dump_cachefile(const char *cachefile)
 {
 	int fd;
 	struct stat64 statbuf;
 	char *buf;
 	nvlist_t *config;
 
 	if ((fd = open64(cachefile, O_RDONLY)) < 0) {
 		(void) printf("cannot open '%s': %s\n", cachefile,
 		    strerror(errno));
 		exit(1);
 	}
 
 	if (fstat64(fd, &statbuf) != 0) {
 		(void) printf("failed to stat '%s': %s\n", cachefile,
 		    strerror(errno));
 		exit(1);
 	}
 
 	if ((buf = malloc(statbuf.st_size)) == NULL) {
 		(void) fprintf(stderr, "failed to allocate %llu bytes\n",
 		    (u_longlong_t)statbuf.st_size);
 		exit(1);
 	}
 
 	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
 		(void) fprintf(stderr, "failed to read %llu bytes\n",
 		    (u_longlong_t)statbuf.st_size);
 		exit(1);
 	}
 
 	(void) close(fd);
 
 	if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
 		(void) fprintf(stderr, "failed to unpack nvlist\n");
 		exit(1);
 	}
 
 	free(buf);
 
 	dump_nvlist(config, 0);
 
 	nvlist_free(config);
 }
 
 #define	ZDB_MAX_UB_HEADER_SIZE 32
 
 static void
 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
 {
 	vdev_t vd;
 	vdev_t *vdp = &vd;
 	char header[ZDB_MAX_UB_HEADER_SIZE];
 	int i;
 
 	vd.vdev_ashift = ashift;
 	vdp->vdev_top = vdp;
 
 	for (i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
 		uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
 		uberblock_t *ub = (void *)((char *)lbl + uoff);
 
 		if (uberblock_verify(ub))
 			continue;
 		(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
 		    "Uberblock[%d]\n", i);
 		dump_uberblock(ub, header, "");
 	}
 }
 
 static void
 dump_label(const char *dev)
 {
 	int fd;
 	vdev_label_t label;
 	char *path, *buf = label.vl_vdev_phys.vp_nvlist;
 	size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
 	struct stat64 statbuf;
 	uint64_t psize, ashift;
 	int len = strlen(dev) + 1;
 	int l;
 
 	if (strncmp(dev, "/dev/dsk/", 9) == 0) {
 		len++;
 		path = malloc(len);
 		(void) snprintf(path, len, "%s%s", "/dev/rdsk/", dev + 9);
 	} else {
 		path = strdup(dev);
 	}
 
 	if ((fd = open64(path, O_RDONLY)) < 0) {
 		(void) printf("cannot open '%s': %s\n", path, strerror(errno));
 		free(path);
 		exit(1);
 	}
 
 	if (fstat64_blk(fd, &statbuf) != 0) {
 		(void) printf("failed to stat '%s': %s\n", path,
 		    strerror(errno));
 		free(path);
 		(void) close(fd);
 		exit(1);
 	}
 
 	psize = statbuf.st_size;
 	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
 
 	for (l = 0; l < VDEV_LABELS; l++) {
 		nvlist_t *config = NULL;
 
 		(void) printf("--------------------------------------------\n");
 		(void) printf("LABEL %d\n", l);
 		(void) printf("--------------------------------------------\n");
 
 		if (pread64(fd, &label, sizeof (label),
 		    vdev_label_offset(psize, l, 0)) != sizeof (label)) {
 			(void) printf("failed to read label %d\n", l);
 			continue;
 		}
 
 		if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
 			(void) printf("failed to unpack label %d\n", l);
 			ashift = SPA_MINBLOCKSHIFT;
 		} else {
 			nvlist_t *vdev_tree = NULL;
 
 			dump_nvlist(config, 4);
 			if ((nvlist_lookup_nvlist(config,
 			    ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
 			    (nvlist_lookup_uint64(vdev_tree,
 			    ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
 				ashift = SPA_MINBLOCKSHIFT;
 			nvlist_free(config);
 		}
 		if (dump_opt['u'])
 			dump_label_uberblocks(&label, ashift);
 	}
 
 	free(path);
 	(void) close(fd);
 }
 
 /*ARGSUSED*/
 static int
 dump_one_dir(const char *dsname, void *arg)
 {
 	int error;
 	objset_t *os;
 
 	error = dmu_objset_own(dsname, DMU_OST_ANY, B_TRUE, FTAG, &os);
 	if (error) {
 		(void) printf("Could not open %s, error %d\n", dsname, error);
 		return (0);
 	}
 	dump_dir(os);
 	dmu_objset_disown(os, FTAG);
 	fuid_table_destroy();
 	sa_loaded = B_FALSE;
 	return (0);
 }
 
 /*
  * Block statistics.
  */
 typedef struct zdb_blkstats {
 	uint64_t	zb_asize;
 	uint64_t	zb_lsize;
 	uint64_t	zb_psize;
 	uint64_t	zb_count;
 } zdb_blkstats_t;
 
 /*
  * Extended object types to report deferred frees and dedup auto-ditto blocks.
  */
 #define	ZDB_OT_DEFERRED	(DMU_OT_NUMTYPES + 0)
 #define	ZDB_OT_DITTO	(DMU_OT_NUMTYPES + 1)
 #define	ZDB_OT_OTHER	(DMU_OT_NUMTYPES + 2)
 #define	ZDB_OT_TOTAL	(DMU_OT_NUMTYPES + 3)
 
 static char *zdb_ot_extname[] = {
 	"deferred free",
 	"dedup ditto",
 	"other",
 	"Total",
 };
 
 #define	ZB_TOTAL	DN_MAX_LEVELS
 
 typedef struct zdb_cb {
 	zdb_blkstats_t	zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
 	uint64_t	zcb_dedup_asize;
 	uint64_t	zcb_dedup_blocks;
 	uint64_t	zcb_errors[256];
 	int		zcb_readfails;
 	int		zcb_haderrors;
 	spa_t		*zcb_spa;
 } zdb_cb_t;
 
 static void
 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
     dmu_object_type_t type)
 {
 	uint64_t refcnt = 0;
 	int i;
 
 	ASSERT(type < ZDB_OT_TOTAL);
 
 	if (zilog && zil_bp_tree_add(zilog, bp) != 0)
 		return;
 
 	for (i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
 		int t = (i & 1) ? type : ZDB_OT_TOTAL;
 		zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
 
 		zb->zb_asize += BP_GET_ASIZE(bp);
 		zb->zb_lsize += BP_GET_LSIZE(bp);
 		zb->zb_psize += BP_GET_PSIZE(bp);
 		zb->zb_count++;
 	}
 
 	if (dump_opt['L'])
 		return;
 
 	if (BP_GET_DEDUP(bp)) {
 		ddt_t *ddt;
 		ddt_entry_t *dde;
 
 		ddt = ddt_select(zcb->zcb_spa, bp);
 		ddt_enter(ddt);
 		dde = ddt_lookup(ddt, bp, B_FALSE);
 
 		if (dde == NULL) {
 			refcnt = 0;
 		} else {
 			ddt_phys_t *ddp = ddt_phys_select(dde, bp);
 			ddt_phys_decref(ddp);
 			refcnt = ddp->ddp_refcnt;
 			if (ddt_phys_total_refcnt(dde) == 0)
 				ddt_remove(ddt, dde);
 		}
 		ddt_exit(ddt);
 	}
 
 	VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
 	    refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
 	    bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
 }
 
 static void
 zdb_blkptr_done(zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 	blkptr_t *bp = zio->io_bp;
 	int ioerr = zio->io_error;
 	zdb_cb_t *zcb = zio->io_private;
 	zbookmark_t *zb = &zio->io_bookmark;
 
 	zio_data_buf_free(zio->io_data, zio->io_size);
 
 	mutex_enter(&spa->spa_scrub_lock);
 	spa->spa_scrub_inflight--;
 	cv_broadcast(&spa->spa_scrub_io_cv);
 
 	if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
 		char blkbuf[BP_SPRINTF_LEN];
 
 		zcb->zcb_haderrors = 1;
 		zcb->zcb_errors[ioerr]++;
 
 		if (dump_opt['b'] >= 2)
 			sprintf_blkptr(blkbuf, bp);
 		else
 			blkbuf[0] = '\0';
 
 		(void) printf("zdb_blkptr_cb: "
 		    "Got error %d reading "
 		    "<%llu, %llu, %lld, %llx> %s -- skipping\n",
 		    ioerr,
 		    (u_longlong_t)zb->zb_objset,
 		    (u_longlong_t)zb->zb_object,
 		    (u_longlong_t)zb->zb_level,
 		    (u_longlong_t)zb->zb_blkid,
 		    blkbuf);
 	}
 	mutex_exit(&spa->spa_scrub_lock);
 }
 
 static int
 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	zdb_cb_t *zcb = arg;
 	char blkbuf[BP_SPRINTF_LEN];
 	dmu_object_type_t type;
 	boolean_t is_metadata;
 
 	if (bp == NULL)
 		return (0);
 
 	type = BP_GET_TYPE(bp);
 
 	zdb_count_block(zcb, zilog, bp,
 	    (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
 
 	is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
 
 	if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
 		size_t size = BP_GET_PSIZE(bp);
 		void *data = zio_data_buf_alloc(size);
 		int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
 
 		/* If it's an intent log block, failure is expected. */
 		if (zb->zb_level == ZB_ZIL_LEVEL)
 			flags |= ZIO_FLAG_SPECULATIVE;
 
 		mutex_enter(&spa->spa_scrub_lock);
 		while (spa->spa_scrub_inflight > max_inflight)
 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 		spa->spa_scrub_inflight++;
 		mutex_exit(&spa->spa_scrub_lock);
 
 		zio_nowait(zio_read(NULL, spa, bp, data, size,
 		    zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
 
 	}
 
 	zcb->zcb_readfails = 0;
 
 	if (dump_opt['b'] >= 4) {
 		sprintf_blkptr(blkbuf, bp);
 		(void) printf("objset %llu object %llu "
 		    "level %lld offset 0x%llx %s\n",
 		    (u_longlong_t)zb->zb_objset,
 		    (u_longlong_t)zb->zb_object,
 		    (longlong_t)zb->zb_level,
 		    (u_longlong_t)blkid2offset(dnp, bp, zb),
 		    blkbuf);
 	}
 
 	return (0);
 }
 
 static void
 zdb_leak(space_map_t *sm, uint64_t start, uint64_t size)
 {
 	vdev_t *vd = sm->sm_ppd;
 
 	(void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
 	    (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
 }
 
 /* ARGSUSED */
 static void
 zdb_space_map_load(space_map_t *sm)
 {
 }
 
 static void
 zdb_space_map_unload(space_map_t *sm)
 {
 	space_map_vacate(sm, zdb_leak, sm);
 }
 
 /* ARGSUSED */
 static void
 zdb_space_map_claim(space_map_t *sm, uint64_t start, uint64_t size)
 {
 }
 
 static space_map_ops_t zdb_space_map_ops = {
 	zdb_space_map_load,
 	zdb_space_map_unload,
 	NULL,	/* alloc */
 	zdb_space_map_claim,
 	NULL,	/* free */
 	NULL	/* maxsize */
 };
 
 static void
 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
 {
 	ddt_bookmark_t ddb = { 0 };
 	ddt_entry_t dde;
 	int error;
 	int p;
 
 	while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
 		blkptr_t blk;
 		ddt_phys_t *ddp = dde.dde_phys;
 
 		if (ddb.ddb_class == DDT_CLASS_UNIQUE)
 			return;
 
 		ASSERT(ddt_phys_total_refcnt(&dde) > 1);
 
 		for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 			if (ddp->ddp_phys_birth == 0)
 				continue;
 			ddt_bp_create(ddb.ddb_checksum,
 			    &dde.dde_key, ddp, &blk);
 			if (p == DDT_PHYS_DITTO) {
 				zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
 			} else {
 				zcb->zcb_dedup_asize +=
 				    BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
 				zcb->zcb_dedup_blocks++;
 			}
 		}
 		if (!dump_opt['L']) {
 			ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
 			ddt_enter(ddt);
 			VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
 			ddt_exit(ddt);
 		}
 	}
 
 	ASSERT(error == ENOENT);
 }
 
 static void
 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
 {
 	zcb->zcb_spa = spa;
 	int c, m;
 
 	if (!dump_opt['L']) {
 		vdev_t *rvd = spa->spa_root_vdev;
 		for (c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *vd = rvd->vdev_child[c];
 			for (m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
 				mutex_enter(&msp->ms_lock);
 				space_map_unload(msp->ms_map);
 				VERIFY(space_map_load(msp->ms_map,
 				    &zdb_space_map_ops, SM_ALLOC, &msp->ms_smo,
 				    spa->spa_meta_objset) == 0);
 				msp->ms_map->sm_ppd = vd;
 				mutex_exit(&msp->ms_lock);
 			}
 		}
 	}
 
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 	zdb_ddt_leak_init(spa, zcb);
 
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 }
 
 static void
 zdb_leak_fini(spa_t *spa)
 {
 	int c, m;
 
 	if (!dump_opt['L']) {
 		vdev_t *rvd = spa->spa_root_vdev;
 		for (c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *vd = rvd->vdev_child[c];
 			for (m = 0; m < vd->vdev_ms_count; m++) {
 				metaslab_t *msp = vd->vdev_ms[m];
 				mutex_enter(&msp->ms_lock);
 				space_map_unload(msp->ms_map);
 				mutex_exit(&msp->ms_lock);
 			}
 		}
 	}
 }
 
 /* ARGSUSED */
 static int
 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	zdb_cb_t *zcb = arg;
 
 	if (dump_opt['b'] >= 4) {
 		char blkbuf[BP_SPRINTF_LEN];
 		sprintf_blkptr(blkbuf, bp);
 		(void) printf("[%s] %s\n",
 		    "deferred free", blkbuf);
 	}
 	zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
 	return (0);
 }
 
 static int
 dump_block_stats(spa_t *spa)
 {
 	zdb_cb_t zcb;
 	zdb_blkstats_t *zb, *tzb;
 	uint64_t norm_alloc, norm_space, total_alloc, total_found;
 	int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
 	int leaks = 0;
 	int e;
 
 	(void) printf("\nTraversing all blocks %s%s%s%s%s...\n",
 	    (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
 	    (dump_opt['c'] == 1) ? "metadata " : "",
 	    dump_opt['c'] ? "checksums " : "",
 	    (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
 	    !dump_opt['L'] ? "nothing leaked " : "");
 
 	/*
 	 * Load all space maps as SM_ALLOC maps, then traverse the pool
 	 * claiming each block we discover.  If the pool is perfectly
 	 * consistent, the space maps will be empty when we're done.
 	 * Anything left over is a leak; any block we can't claim (because
 	 * it's not part of any space map) is a double allocation,
 	 * reference to a freed block, or an unclaimed log block.
 	 */
 	bzero(&zcb, sizeof(zdb_cb_t));
 	zdb_leak_init(spa, &zcb);
 
 	/*
 	 * If there's a deferred-free bplist, process that first.
 	 */
 	(void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
 	    count_block_cb, &zcb, NULL);
 	(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
 	    count_block_cb, &zcb, NULL);
 	if (spa_feature_is_active(spa,
 	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
 		VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
 		    spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
 		    &zcb, NULL));
 	}
 
 	if (dump_opt['c'] > 1)
 		flags |= TRAVERSE_PREFETCH_DATA;
 
 	zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
 
 	/*
 	 * If we've traversed the data blocks then we need to wait for those
 	 * I/Os to complete. We leverage "The Godfather" zio to wait on
 	 * all async I/Os to complete.
 	 */
 	if (dump_opt['c']) {
 		(void) zio_wait(spa->spa_async_zio_root);
 		spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
 		    ZIO_FLAG_GODFATHER);
 	}
 
 	if (zcb.zcb_haderrors) {
 		(void) printf("\nError counts:\n\n");
 		(void) printf("\t%5s  %s\n", "errno", "count");
 		for (e = 0; e < 256; e++) {
 			if (zcb.zcb_errors[e] != 0) {
 				(void) printf("\t%5d  %llu\n",
 				    e, (u_longlong_t)zcb.zcb_errors[e]);
 			}
 		}
 	}
 
 	/*
 	 * Report any leaked segments.
 	 */
 	zdb_leak_fini(spa);
 
 	tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
 
 	norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 	norm_space = metaslab_class_get_space(spa_normal_class(spa));
 
 	total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
 	total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
 
 	if (total_found == total_alloc) {
 		if (!dump_opt['L'])
 			(void) printf("\n\tNo leaks (block sum matches space"
 			    " maps exactly)\n");
 	} else {
 		(void) printf("block traversal size %llu != alloc %llu "
 		    "(%s %lld)\n",
 		    (u_longlong_t)total_found,
 		    (u_longlong_t)total_alloc,
 		    (dump_opt['L']) ? "unreachable" : "leaked",
 		    (longlong_t)(total_alloc - total_found));
 		leaks = 1;
 	}
 
 	if (tzb->zb_count == 0)
 		return (2);
 
 	(void) printf("\n");
 	(void) printf("\tbp count:      %10llu\n",
 	    (u_longlong_t)tzb->zb_count);
 	(void) printf("\tbp logical:    %10llu      avg: %6llu\n",
 	    (u_longlong_t)tzb->zb_lsize,
 	    (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
 	(void) printf("\tbp physical:   %10llu      avg:"
 	    " %6llu     compression: %6.2f\n",
 	    (u_longlong_t)tzb->zb_psize,
 	    (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
 	    (double)tzb->zb_lsize / tzb->zb_psize);
 	(void) printf("\tbp allocated:  %10llu      avg:"
 	    " %6llu     compression: %6.2f\n",
 	    (u_longlong_t)tzb->zb_asize,
 	    (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
 	    (double)tzb->zb_lsize / tzb->zb_asize);
 	(void) printf("\tbp deduped:    %10llu    ref>1:"
 	    " %6llu   deduplication: %6.2f\n",
 	    (u_longlong_t)zcb.zcb_dedup_asize,
 	    (u_longlong_t)zcb.zcb_dedup_blocks,
 	    (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
 	(void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
 	    (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
 
 	if (dump_opt['b'] >= 2) {
 		int l, t, level;
 		(void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
 		    "\t  avg\t comp\t%%Total\tType\n");
 
 		for (t = 0; t <= ZDB_OT_TOTAL; t++) {
 			char csize[32], lsize[32], psize[32], asize[32];
 			char avg[32];
 			char *typename;
 
 			if (t < DMU_OT_NUMTYPES)
 				typename = dmu_ot[t].ot_name;
 			else
 				typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
 
 			if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
 				(void) printf("%6s\t%5s\t%5s\t%5s"
 				    "\t%5s\t%5s\t%6s\t%s\n",
 				    "-",
 				    "-",
 				    "-",
 				    "-",
 				    "-",
 				    "-",
 				    "-",
 				    typename);
 				continue;
 			}
 
 			for (l = ZB_TOTAL - 1; l >= -1; l--) {
 				level = (l == -1 ? ZB_TOTAL : l);
 				zb = &zcb.zcb_type[level][t];
 
 				if (zb->zb_asize == 0)
 					continue;
 
 				if (dump_opt['b'] < 3 && level != ZB_TOTAL)
 					continue;
 
 				if (level == 0 && zb->zb_asize ==
 				    zcb.zcb_type[ZB_TOTAL][t].zb_asize)
 					continue;
 
 				zdb_nicenum(zb->zb_count, csize);
 				zdb_nicenum(zb->zb_lsize, lsize);
 				zdb_nicenum(zb->zb_psize, psize);
 				zdb_nicenum(zb->zb_asize, asize);
 				zdb_nicenum(zb->zb_asize / zb->zb_count, avg);
 
 				(void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
 				    "\t%5.2f\t%6.2f\t",
 				    csize, lsize, psize, asize, avg,
 				    (double)zb->zb_lsize / zb->zb_psize,
 				    100.0 * zb->zb_asize / tzb->zb_asize);
 
 				if (level == ZB_TOTAL)
 					(void) printf("%s\n", typename);
 				else
 					(void) printf("    L%d %s\n",
 					    level, typename);
 			}
 		}
 	}
 
 	(void) printf("\n");
 
 	if (leaks)
 		return (2);
 
 	if (zcb.zcb_haderrors)
 		return (3);
 
 	return (0);
 }
 
 typedef struct zdb_ddt_entry {
 	ddt_key_t	zdde_key;
 	uint64_t	zdde_ref_blocks;
 	uint64_t	zdde_ref_lsize;
 	uint64_t	zdde_ref_psize;
 	uint64_t	zdde_ref_dsize;
 	avl_node_t	zdde_node;
 } zdb_ddt_entry_t;
 
 /* ARGSUSED */
 static int
 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	avl_tree_t *t = arg;
 	avl_index_t where;
 	zdb_ddt_entry_t *zdde, zdde_search;
 
 	if (bp == NULL)
 		return (0);
 
 	if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
 		(void) printf("traversing objset %llu, %llu objects, "
 		    "%lu blocks so far\n",
 		    (u_longlong_t)zb->zb_objset,
 		    (u_longlong_t)bp->blk_fill,
 		    avl_numnodes(t));
 	}
 
 	if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
 	    BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
 		return (0);
 
 	ddt_key_fill(&zdde_search.zdde_key, bp);
 
 	zdde = avl_find(t, &zdde_search, &where);
 
 	if (zdde == NULL) {
 		zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
 		zdde->zdde_key = zdde_search.zdde_key;
 		avl_insert(t, zdde, where);
 	}
 
 	zdde->zdde_ref_blocks += 1;
 	zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
 	zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
 	zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
 
 	return (0);
 }
 
 static void
 dump_simulated_ddt(spa_t *spa)
 {
 	avl_tree_t t;
 	void *cookie = NULL;
 	zdb_ddt_entry_t *zdde;
 	ddt_histogram_t ddh_total;
 	ddt_stat_t dds_total;
 
 	bzero(&ddh_total, sizeof (ddt_histogram_t));
 	bzero(&dds_total, sizeof (ddt_stat_t));
 
 	avl_create(&t, ddt_entry_compare,
 	    sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
 
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 	(void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
 	    zdb_ddt_add_cb, &t);
 
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 
 	while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
 		ddt_stat_t dds;
 		uint64_t refcnt = zdde->zdde_ref_blocks;
 		ASSERT(refcnt != 0);
 
 		dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
 		dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
 		dds.dds_psize = zdde->zdde_ref_psize / refcnt;
 		dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
 
 		dds.dds_ref_blocks = zdde->zdde_ref_blocks;
 		dds.dds_ref_lsize = zdde->zdde_ref_lsize;
 		dds.dds_ref_psize = zdde->zdde_ref_psize;
 		dds.dds_ref_dsize = zdde->zdde_ref_dsize;
 
 		ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
 
 		umem_free(zdde, sizeof (*zdde));
 	}
 
 	avl_destroy(&t);
 
 	ddt_histogram_stat(&dds_total, &ddh_total);
 
 	(void) printf("Simulated DDT histogram:\n");
 
 	zpool_dump_ddt(&dds_total, &ddh_total);
 
 	dump_dedup_ratio(&dds_total);
 }
 
 static void
 dump_zpool(spa_t *spa)
 {
 	dsl_pool_t *dp = spa_get_dsl(spa);
 	int rc = 0;
 
 	if (dump_opt['S']) {
 		dump_simulated_ddt(spa);
 		return;
 	}
 
 	if (!dump_opt['e'] && dump_opt['C'] > 1) {
 		(void) printf("\nCached configuration:\n");
 		dump_nvlist(spa->spa_config, 8);
 	}
 
 	if (dump_opt['C'])
 		dump_config(spa);
 
 	if (dump_opt['u'])
 		dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
 
 	if (dump_opt['D'])
 		dump_all_ddts(spa);
 
 	if (dump_opt['d'] > 2 || dump_opt['m'])
 		dump_metaslabs(spa);
 
 	if (dump_opt['d'] || dump_opt['i']) {
 		dump_dir(dp->dp_meta_objset);
 		if (dump_opt['d'] >= 3) {
 			dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees");
 			if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
 				dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
 				    "Pool snapshot frees");
 			}
 
 			if (spa_feature_is_active(spa,
 			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
 				dump_bptree(spa->spa_meta_objset,
 				    spa->spa_dsl_pool->dp_bptree_obj,
 				    "Pool dataset frees");
 			}
 			dump_dtl(spa->spa_root_vdev, 0);
 		}
 		(void) dmu_objset_find(spa_name(spa), dump_one_dir,
 		    NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
 	}
 	if (dump_opt['b'] || dump_opt['c'])
 		rc = dump_block_stats(spa);
 
 	if (dump_opt['s'])
 		show_pool_stats(spa);
 
 	if (dump_opt['h'])
 		dump_history(spa);
 
 	if (rc != 0)
 		exit(rc);
 }
 
 #define	ZDB_FLAG_CHECKSUM	0x0001
 #define	ZDB_FLAG_DECOMPRESS	0x0002
 #define	ZDB_FLAG_BSWAP		0x0004
 #define	ZDB_FLAG_GBH		0x0008
 #define	ZDB_FLAG_INDIRECT	0x0010
 #define	ZDB_FLAG_PHYS		0x0020
 #define	ZDB_FLAG_RAW		0x0040
 #define	ZDB_FLAG_PRINT_BLKPTR	0x0080
 
 int flagbits[256];
 
 static void
 zdb_print_blkptr(blkptr_t *bp, int flags)
 {
 	char blkbuf[BP_SPRINTF_LEN];
 
 	if (flags & ZDB_FLAG_BSWAP)
 		byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
 
 	sprintf_blkptr(blkbuf, bp);
 	(void) printf("%s\n", blkbuf);
 }
 
 static void
 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
 {
 	int i;
 
 	for (i = 0; i < nbps; i++)
 		zdb_print_blkptr(&bp[i], flags);
 }
 
 static void
 zdb_dump_gbh(void *buf, int flags)
 {
 	zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
 }
 
 static void
 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
 {
 	if (flags & ZDB_FLAG_BSWAP)
 		byteswap_uint64_array(buf, size);
 	VERIFY(write(fileno(stdout), buf, size) == size);
 }
 
 static void
 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
 {
 	uint64_t *d = (uint64_t *)buf;
 	int nwords = size / sizeof (uint64_t);
 	int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
 	int i, j;
 	char *hdr, *c;
 
 
 	if (do_bswap)
 		hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
 	else
 		hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
 
 	(void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
 
 	for (i = 0; i < nwords; i += 2) {
 		(void) printf("%06llx:  %016llx  %016llx  ",
 		    (u_longlong_t)(i * sizeof (uint64_t)),
 		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
 		    (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
 
 		c = (char *)&d[i];
 		for (j = 0; j < 2 * sizeof (uint64_t); j++)
 			(void) printf("%c", isprint(c[j]) ? c[j] : '.');
 		(void) printf("\n");
 	}
 }
 
 /*
  * There are two acceptable formats:
  *	leaf_name	  - For example: c1t0d0 or /tmp/ztest.0a
  *	child[.child]*    - For example: 0.1.1
  *
  * The second form can be used to specify arbitrary vdevs anywhere
  * in the heirarchy.  For example, in a pool with a mirror of
  * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
  */
 static vdev_t *
 zdb_vdev_lookup(vdev_t *vdev, char *path)
 {
 	char *s, *p, *q;
 	int i;
 
 	if (vdev == NULL)
 		return (NULL);
 
 	/* First, assume the x.x.x.x format */
 	i = (int)strtoul(path, &s, 10);
 	if (s == path || (s && *s != '.' && *s != '\0'))
 		goto name;
 	if (i < 0 || i >= vdev->vdev_children)
 		return (NULL);
 
 	vdev = vdev->vdev_child[i];
 	if (*s == '\0')
 		return (vdev);
 	return (zdb_vdev_lookup(vdev, s+1));
 
 name:
 	for (i = 0; i < vdev->vdev_children; i++) {
 		vdev_t *vc = vdev->vdev_child[i];
 
 		if (vc->vdev_path == NULL) {
 			vc = zdb_vdev_lookup(vc, path);
 			if (vc == NULL)
 				continue;
 			else
 				return (vc);
 		}
 
 		p = strrchr(vc->vdev_path, '/');
 		p = p ? p + 1 : vc->vdev_path;
 		q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
 
 		if (strcmp(vc->vdev_path, path) == 0)
 			return (vc);
 		if (strcmp(p, path) == 0)
 			return (vc);
 		if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
 			return (vc);
 	}
 
 	return (NULL);
 }
 
 /*
  * Read a block from a pool and print it out.  The syntax of the
  * block descriptor is:
  *
  *	pool:vdev_specifier:offset:size[:flags]
  *
  *	pool           - The name of the pool you wish to read from
  *	vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
  *	offset         - offset, in hex, in bytes
  *	size           - Amount of data to read, in hex, in bytes
  *	flags          - A string of characters specifying options
  *		 b: Decode a blkptr at given offset within block
  *		*c: Calculate and display checksums
  *		 d: Decompress data before dumping
  *		 e: Byteswap data before dumping
  *		 g: Display data as a gang block header
  *		 i: Display as an indirect block
  *		 p: Do I/O to physical offset
  *		 r: Dump raw data to stdout
  *
  *              * = not yet implemented
  */
 static void
 zdb_read_block(char *thing, spa_t *spa)
 {
 	blkptr_t blk, *bp = &blk;
 	dva_t *dva = bp->blk_dva;
 	int flags = 0;
 	uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
 	zio_t *zio;
 	vdev_t *vd;
 	void *pbuf, *lbuf, *buf;
 	char *s, *p, *dup, *vdev, *flagstr;
 	int i, error;
 
 	dup = strdup(thing);
 	s = strtok(dup, ":");
 	vdev = s ? s : "";
 	s = strtok(NULL, ":");
 	offset = strtoull(s ? s : "", NULL, 16);
 	s = strtok(NULL, ":");
 	size = strtoull(s ? s : "", NULL, 16);
 	s = strtok(NULL, ":");
 	flagstr = s ? s : "";
 
 	s = NULL;
 	if (size == 0)
 		s = "size must not be zero";
 	if (!IS_P2ALIGNED(size, DEV_BSIZE))
 		s = "size must be a multiple of sector size";
 	if (!IS_P2ALIGNED(offset, DEV_BSIZE))
 		s = "offset must be a multiple of sector size";
 	if (s) {
 		(void) printf("Invalid block specifier: %s  - %s\n", thing, s);
 		free(dup);
 		return;
 	}
 
 	for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
 		for (i = 0; flagstr[i]; i++) {
 			int bit = flagbits[(uchar_t)flagstr[i]];
 
 			if (bit == 0) {
 				(void) printf("***Invalid flag: %c\n",
 				    flagstr[i]);
 				continue;
 			}
 			flags |= bit;
 
 			/* If it's not something with an argument, keep going */
 			if ((bit & (ZDB_FLAG_CHECKSUM |
 			    ZDB_FLAG_PRINT_BLKPTR)) == 0)
 				continue;
 
 			p = &flagstr[i + 1];
 			if (bit == ZDB_FLAG_PRINT_BLKPTR)
 				blkptr_offset = strtoull(p, &p, 16);
 			if (*p != ':' && *p != '\0') {
 				(void) printf("***Invalid flag arg: '%s'\n", s);
 				free(dup);
 				return;
 			}
 		}
 	}
 
 	vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
 	if (vd == NULL) {
 		(void) printf("***Invalid vdev: %s\n", vdev);
 		free(dup);
 		return;
 	} else {
 		if (vd->vdev_path)
 			(void) fprintf(stderr, "Found vdev: %s\n",
 			    vd->vdev_path);
 		else
 			(void) fprintf(stderr, "Found vdev type: %s\n",
 			    vd->vdev_ops->vdev_op_type);
 	}
 
 	psize = size;
 	lsize = size;
 
 	pbuf = umem_alloc_aligned(SPA_MAXBLOCKSIZE, 512, UMEM_NOFAIL);
 	lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
 
 	BP_ZERO(bp);
 
 	DVA_SET_VDEV(&dva[0], vd->vdev_id);
 	DVA_SET_OFFSET(&dva[0], offset);
 	DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
 	DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
 
 	BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
 
 	BP_SET_LSIZE(bp, lsize);
 	BP_SET_PSIZE(bp, psize);
 	BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
 	BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
 	BP_SET_TYPE(bp, DMU_OT_NONE);
 	BP_SET_LEVEL(bp, 0);
 	BP_SET_DEDUP(bp, 0);
 	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 	zio = zio_root(spa, NULL, NULL, 0);
 
 	if (vd == vd->vdev_top) {
 		/*
 		 * Treat this as a normal block read.
 		 */
 		zio_nowait(zio_read(zio, spa, bp, pbuf, psize, NULL, NULL,
 		    ZIO_PRIORITY_SYNC_READ,
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
 	} else {
 		/*
 		 * Treat this as a vdev child I/O.
 		 */
 		zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pbuf, psize,
 		    ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
 		    ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
 		    ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
 		    ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
 	}
 
 	error = zio_wait(zio);
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	if (error) {
 		(void) printf("Read of %s failed, error: %d\n", thing, error);
 		goto out;
 	}
 
 	if (flags & ZDB_FLAG_DECOMPRESS) {
 		/*
 		 * We don't know how the data was compressed, so just try
 		 * every decompress function at every inflated blocksize.
 		 */
 		enum zio_compress c;
 		void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
 		void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
 
 		bcopy(pbuf, pbuf2, psize);
 
 		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf + psize,
 		    SPA_MAXBLOCKSIZE - psize) == 0);
 
 		VERIFY(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
 		    SPA_MAXBLOCKSIZE - psize) == 0);
 
 		for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
 		    lsize -= SPA_MINBLOCKSIZE) {
 			for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
 				if (zio_decompress_data(c, pbuf, lbuf,
 				    psize, lsize) == 0 &&
 				    zio_decompress_data(c, pbuf2, lbuf2,
 				    psize, lsize) == 0 &&
 				    bcmp(lbuf, lbuf2, lsize) == 0)
 					break;
 			}
 			if (c != ZIO_COMPRESS_FUNCTIONS)
 				break;
 			lsize -= SPA_MINBLOCKSIZE;
 		}
 
 		umem_free(pbuf2, SPA_MAXBLOCKSIZE);
 		umem_free(lbuf2, SPA_MAXBLOCKSIZE);
 
 		if (lsize <= psize) {
 			(void) printf("Decompress of %s failed\n", thing);
 			goto out;
 		}
 		buf = lbuf;
 		size = lsize;
 	} else {
 		buf = pbuf;
 		size = psize;
 	}
 
 	if (flags & ZDB_FLAG_PRINT_BLKPTR)
 		zdb_print_blkptr((blkptr_t *)(void *)
 		    ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
 	else if (flags & ZDB_FLAG_RAW)
 		zdb_dump_block_raw(buf, size, flags);
 	else if (flags & ZDB_FLAG_INDIRECT)
 		zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
 		    flags);
 	else if (flags & ZDB_FLAG_GBH)
 		zdb_dump_gbh(buf, flags);
 	else
 		zdb_dump_block(thing, buf, size, flags);
 
 out:
 	umem_free(pbuf, SPA_MAXBLOCKSIZE);
 	umem_free(lbuf, SPA_MAXBLOCKSIZE);
 	free(dup);
 }
 
 static boolean_t
 pool_match(nvlist_t *cfg, char *tgt)
 {
 	uint64_t v, guid = strtoull(tgt, NULL, 0);
 	char *s;
 
 	if (guid != 0) {
 		if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
 			return (v == guid);
 	} else {
 		if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
 			return (strcmp(s, tgt) == 0);
 	}
 	return (B_FALSE);
 }
 
 static char *
 find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
 {
 	nvlist_t *pools;
 	nvlist_t *match = NULL;
 	char *name = NULL;
 	char *sepp = NULL;
 	char sep = 0;
 	int count = 0;
 	importargs_t args = { 0 };
 
 	args.paths = dirc;
 	args.path = dirv;
 	args.can_be_active = B_TRUE;
 
 	if ((sepp = strpbrk(*target, "/@")) != NULL) {
 		sep = *sepp;
 		*sepp = '\0';
 	}
 
 	pools = zpool_search_import(g_zfs, &args);
 
 	if (pools != NULL) {
 		nvpair_t *elem = NULL;
 		while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
 			verify(nvpair_value_nvlist(elem, configp) == 0);
 			if (pool_match(*configp, *target)) {
 				count++;
 				if (match != NULL) {
 					/* print previously found config */
 					if (name != NULL) {
 						(void) printf("%s\n", name);
 						dump_nvlist(match, 8);
 						name = NULL;
 					}
 					(void) printf("%s\n",
 					    nvpair_name(elem));
 					dump_nvlist(*configp, 8);
 				} else {
 					match = *configp;
 					name = nvpair_name(elem);
 				}
 			}
 		}
 	}
 	if (count > 1)
 		(void) fatal("\tMatched %d pools - use pool GUID "
 		    "instead of pool name or \n"
 		    "\tpool name part of a dataset name to select pool", count);
 
 	if (sepp)
 		*sepp = sep;
 	/*
 	 * If pool GUID was specified for pool id, replace it with pool name
 	 */
 	if (name && (strstr(*target, name) != *target)) {
 		int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
 
 		*target = umem_alloc(sz, UMEM_NOFAIL);
 		(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
 	}
 
 	*configp = name ? match : NULL;
 
 	return (name);
 }
 
 int
 main(int argc, char **argv)
 {
 	int i, c;
 	struct rlimit rl = { 1024, 1024 };
 	spa_t *spa = NULL;
 	objset_t *os = NULL;
 	int dump_all = 1;
 	int verbose = 0;
 	int error = 0;
 	char **searchdirs = NULL;
 	int nsearch = 0;
 	char *target;
 	nvlist_t *policy = NULL;
 	uint64_t max_txg = UINT64_MAX;
 	int rewind = ZPOOL_NEVER_REWIND;
 	char *spa_config_path_env;
 
 	(void) setrlimit(RLIMIT_NOFILE, &rl);
 	(void) enable_extended_FILE_stdio(-1, -1);
 
 	dprintf_setup(&argc, argv);
 
 	/*
 	 * If there is an environment variable SPA_CONFIG_PATH it overrides
 	 * default spa_config_path setting. If -U flag is specified it will
 	 * override this environment variable settings once again.
 	 */
 	spa_config_path_env = getenv("SPA_CONFIG_PATH");
 	if (spa_config_path_env != NULL)
 		spa_config_path = spa_config_path_env;
 
 	while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
 		switch (c) {
 		case 'b':
 		case 'c':
 		case 'd':
 		case 'h':
 		case 'i':
 		case 'l':
 		case 'm':
 		case 's':
 		case 'u':
 		case 'C':
 		case 'D':
 		case 'R':
 		case 'S':
 			dump_opt[c]++;
 			dump_all = 0;
 			break;
 		case 'A':
 		case 'F':
 		case 'L':
 		case 'X':
 		case 'e':
 		case 'P':
 			dump_opt[c]++;
 			break;
 		case 'v':
 			verbose++;
 			break;
 		case 'M':
 			max_inflight = strtoull(optarg, NULL, 0);
 			if (max_inflight == 0) {
 				(void) fprintf(stderr, "maximum number "
 				    "of inflight I/Os must be greater "
 				    "than 0\n");
 				usage();
 			}
 			break;
 		case 'p':
 			if (searchdirs == NULL) {
 				searchdirs = umem_alloc(sizeof (char *),
 				    UMEM_NOFAIL);
 			} else {
 				char **tmp = umem_alloc((nsearch + 1) *
 				    sizeof (char *), UMEM_NOFAIL);
 				bcopy(searchdirs, tmp, nsearch *
 				    sizeof (char *));
 				umem_free(searchdirs,
 				    nsearch * sizeof (char *));
 				searchdirs = tmp;
 			}
 			searchdirs[nsearch++] = optarg;
 			break;
 		case 't':
 			max_txg = strtoull(optarg, NULL, 0);
 			if (max_txg < TXG_INITIAL) {
 				(void) fprintf(stderr, "incorrect txg "
 				    "specified: %s\n", optarg);
 				usage();
 			}
 			break;
 		case 'U':
 			spa_config_path = optarg;
 			break;
 		default:
 			usage();
 			break;
 		}
 	}
 
 	if (!dump_opt['e'] && searchdirs != NULL) {
 		(void) fprintf(stderr, "-p option requires use of -e\n");
 		usage();
 	}
 
 	kernel_init(FREAD);
 	if ((g_zfs = libzfs_init()) == NULL)
 		return (1);
 
 	if (dump_all)
 		verbose = MAX(verbose, 1);
 
 	for (c = 0; c < 256; c++) {
 		if (dump_all && !strchr("elAFLRSXP", c))
 			dump_opt[c] = 1;
 		if (dump_opt[c])
 			dump_opt[c] += verbose;
 	}
 
 	aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
 	zfs_recover = (dump_opt['A'] > 1);
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 2 && dump_opt['R'])
 		usage();
 	if (argc < 1) {
 		if (!dump_opt['e'] && dump_opt['C']) {
 			dump_cachefile(spa_config_path);
 			return (0);
 		}
 		usage();
 	}
 
 	if (dump_opt['l']) {
 		dump_label(argv[0]);
 		return (0);
 	}
 
 	if (dump_opt['X'] || dump_opt['F'])
 		rewind = ZPOOL_DO_REWIND |
 		    (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
 
 	if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
 	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
 	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
 		fatal("internal error: %s", strerror(ENOMEM));
 
 	error = 0;
 	target = argv[0];
 
 	if (dump_opt['e']) {
 		nvlist_t *cfg = NULL;
 		char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
 
 		error = ENOENT;
 		if (name) {
 			if (dump_opt['C'] > 1) {
 				(void) printf("\nConfiguration for import:\n");
 				dump_nvlist(cfg, 8);
 			}
 			if (nvlist_add_nvlist(cfg,
 			    ZPOOL_REWIND_POLICY, policy) != 0) {
 				fatal("can't open '%s': %s",
 				    target, strerror(ENOMEM));
 			}
 			if ((error = spa_import(name, cfg, NULL,
 			    ZFS_IMPORT_MISSING_LOG)) != 0) {
 				error = spa_import(name, cfg, NULL,
 				    ZFS_IMPORT_VERBATIM);
 			}
 		}
 	}
 
 	if (error == 0) {
 		if (strpbrk(target, "/@") == NULL || dump_opt['R']) {
 			error = spa_open_rewind(target, &spa, FTAG, policy,
 			    NULL);
 			if (error) {
 				/*
 				 * If we're missing the log device then
 				 * try opening the pool after clearing the
 				 * log state.
 				 */
 				mutex_enter(&spa_namespace_lock);
 				if ((spa = spa_lookup(target)) != NULL &&
 				    spa->spa_log_state == SPA_LOG_MISSING) {
 					spa->spa_log_state = SPA_LOG_CLEAR;
 					error = 0;
 				}
 				mutex_exit(&spa_namespace_lock);
 
 				if (!error) {
 					error = spa_open_rewind(target, &spa,
 					    FTAG, policy, NULL);
 				}
 			}
 		} else {
 			error = dmu_objset_own(target, DMU_OST_ANY,
 			    B_TRUE, FTAG, &os);
 		}
 	}
 	nvlist_free(policy);
 
 	if (error)
 		fatal("can't open '%s': %s", target, strerror(error));
 
 	argv++;
 	argc--;
 	if (!dump_opt['R']) {
 		if (argc > 0) {
 			zopt_objects = argc;
 			zopt_object = calloc(zopt_objects, sizeof (uint64_t));
 			for (i = 0; i < zopt_objects; i++) {
 				errno = 0;
 				zopt_object[i] = strtoull(argv[i], NULL, 0);
 				if (zopt_object[i] == 0 && errno != 0)
 					fatal("bad number %s: %s",
 					    argv[i], strerror(errno));
 			}
 		}
 		if (os != NULL) {
 			dump_dir(os);
 		} else if (zopt_objects > 0 && !dump_opt['m']) {
 			dump_dir(spa->spa_meta_objset);
 		} else {
 			dump_zpool(spa);
 		}
 	} else {
 		flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
 		flagbits['c'] = ZDB_FLAG_CHECKSUM;
 		flagbits['d'] = ZDB_FLAG_DECOMPRESS;
 		flagbits['e'] = ZDB_FLAG_BSWAP;
 		flagbits['g'] = ZDB_FLAG_GBH;
 		flagbits['i'] = ZDB_FLAG_INDIRECT;
 		flagbits['p'] = ZDB_FLAG_PHYS;
 		flagbits['r'] = ZDB_FLAG_RAW;
 
 		for (i = 0; i < argc; i++)
 			zdb_read_block(argv[i], spa);
 	}
 
 	(os != NULL) ? dmu_objset_disown(os, FTAG) : spa_close(spa, FTAG);
 
 	fuid_table_destroy();
 	sa_loaded = B_FALSE;
 
 	libzfs_fini(g_zfs);
 	kernel_fini();
 
 	return (0);
 }
diff --git a/cmd/zfs/Makefile.am b/cmd/zfs/Makefile.am
index 8f381f190603..c1499a2a8971 100644
--- a/cmd/zfs/Makefile.am
+++ b/cmd/zfs/Makefile.am
@@ -1,21 +1,22 @@
 include $(top_srcdir)/config/Rules.am
 
 DEFAULT_INCLUDES += \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/lib/libspl/include
 
 sbin_PROGRAMS = zfs
 
 zfs_SOURCES = \
 	$(top_srcdir)/cmd/zfs/zfs_iter.c \
 	$(top_srcdir)/cmd/zfs/zfs_iter.h \
 	$(top_srcdir)/cmd/zfs/zfs_main.c \
 	$(top_srcdir)/cmd/zfs/zfs_util.h
 
 zfs_LDADD = \
 	$(top_builddir)/lib/libnvpair/libnvpair.la \
 	$(top_builddir)/lib/libuutil/libuutil.la \
 	$(top_builddir)/lib/libzpool/libzpool.la \
-	$(top_builddir)/lib/libzfs/libzfs.la
+	$(top_builddir)/lib/libzfs/libzfs.la \
+	$(top_builddir)/lib/libzfs_core/libzfs_core.la
 
 zfs_LDFLAGS = -pthread -lm $(ZLIB) -lrt -ldl $(LIBUUID) $(LIBBLKID)
diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 5753ccea0d8e..7176c94169b5 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -1,6421 +1,6476 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <libgen.h>
 #include <libintl.h>
 #include <libuutil.h>
 #include <libnvpair.h>
 #include <locale.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <zone.h>
 #include <grp.h>
 #include <pwd.h>
 #include <signal.h>
 #include <sys/list.h>
 #include <sys/mkdev.h>
 #include <sys/mntent.h>
 #include <sys/mnttab.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/fs/zfs.h>
 #include <sys/types.h>
 #include <time.h>
 
 #include <libzfs.h>
+#include <libzfs_core.h>
 #include <zfs_prop.h>
 #include <zfs_deleg.h>
 #include <libuutil.h>
 #ifdef HAVE_IDMAP
 #include <aclutils.h>
 #include <directory.h>
 #endif /* HAVE_IDMAP */
 
 #include "zfs_iter.h"
 #include "zfs_util.h"
 #include "zfs_comutil.h"
 #include "libzfs_impl.h"
 
 libzfs_handle_t *g_zfs;
 
 static FILE *mnttab_file;
 static char history_str[HIS_MAX_RECORD_LEN];
+static boolean_t log_history = B_TRUE;
 
 static int zfs_do_clone(int argc, char **argv);
 static int zfs_do_create(int argc, char **argv);
 static int zfs_do_destroy(int argc, char **argv);
 static int zfs_do_get(int argc, char **argv);
 static int zfs_do_inherit(int argc, char **argv);
 static int zfs_do_list(int argc, char **argv);
 static int zfs_do_mount(int argc, char **argv);
 static int zfs_do_rename(int argc, char **argv);
 static int zfs_do_rollback(int argc, char **argv);
 static int zfs_do_set(int argc, char **argv);
 static int zfs_do_upgrade(int argc, char **argv);
 static int zfs_do_snapshot(int argc, char **argv);
 static int zfs_do_unmount(int argc, char **argv);
 static int zfs_do_share(int argc, char **argv);
 static int zfs_do_unshare(int argc, char **argv);
 static int zfs_do_send(int argc, char **argv);
 static int zfs_do_receive(int argc, char **argv);
 static int zfs_do_promote(int argc, char **argv);
 static int zfs_do_userspace(int argc, char **argv);
 static int zfs_do_allow(int argc, char **argv);
 static int zfs_do_unallow(int argc, char **argv);
 static int zfs_do_hold(int argc, char **argv);
 static int zfs_do_holds(int argc, char **argv);
 static int zfs_do_release(int argc, char **argv);
 static int zfs_do_diff(int argc, char **argv);
 
 /*
  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
  */
 
 #ifdef DEBUG
 const char *
 _umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 #endif
 
 typedef enum {
 	HELP_CLONE,
 	HELP_CREATE,
 	HELP_DESTROY,
 	HELP_GET,
 	HELP_INHERIT,
 	HELP_UPGRADE,
 	HELP_LIST,
 	HELP_MOUNT,
 	HELP_PROMOTE,
 	HELP_RECEIVE,
 	HELP_RENAME,
 	HELP_ROLLBACK,
 	HELP_SEND,
 	HELP_SET,
 	HELP_SHARE,
 	HELP_SNAPSHOT,
 	HELP_UNMOUNT,
 	HELP_UNSHARE,
 	HELP_ALLOW,
 	HELP_UNALLOW,
 	HELP_USERSPACE,
 	HELP_GROUPSPACE,
 	HELP_HOLD,
 	HELP_HOLDS,
 	HELP_RELEASE,
 	HELP_DIFF,
 } zfs_help_t;
 
 typedef struct zfs_command {
 	const char	*name;
 	int		(*func)(int argc, char **argv);
 	zfs_help_t	usage;
 } zfs_command_t;
 
 /*
  * Master command table.  Each ZFS command has a name, associated function, and
  * usage message.  The usage messages need to be internationalized, so we have
  * to have a function to return the usage message based on a command index.
  *
  * These commands are organized according to how they are displayed in the usage
  * message.  An empty command (one with a NULL name) indicates an empty line in
  * the generic usage message.
  */
 static zfs_command_t command_table[] = {
 	{ "create",	zfs_do_create,		HELP_CREATE		},
 	{ "destroy",	zfs_do_destroy,		HELP_DESTROY		},
 	{ NULL },
 	{ "snapshot",	zfs_do_snapshot,	HELP_SNAPSHOT		},
 	{ "rollback",	zfs_do_rollback,	HELP_ROLLBACK		},
 	{ "clone",	zfs_do_clone,		HELP_CLONE		},
 	{ "promote",	zfs_do_promote,		HELP_PROMOTE		},
 	{ "rename",	zfs_do_rename,		HELP_RENAME		},
 	{ NULL },
 	{ "list",	zfs_do_list,		HELP_LIST		},
 	{ NULL },
 	{ "set",	zfs_do_set,		HELP_SET		},
 	{ "get",	zfs_do_get,		HELP_GET		},
 	{ "inherit",	zfs_do_inherit,		HELP_INHERIT		},
 	{ "upgrade",	zfs_do_upgrade,		HELP_UPGRADE		},
 	{ "userspace",	zfs_do_userspace,	HELP_USERSPACE		},
 	{ "groupspace",	zfs_do_userspace,	HELP_GROUPSPACE		},
 	{ NULL },
 	{ "mount",	zfs_do_mount,		HELP_MOUNT		},
 	{ "unmount",	zfs_do_unmount,		HELP_UNMOUNT		},
 	{ "share",	zfs_do_share,		HELP_SHARE		},
 	{ "unshare",	zfs_do_unshare,		HELP_UNSHARE		},
 	{ NULL },
 	{ "send",	zfs_do_send,		HELP_SEND		},
 	{ "receive",	zfs_do_receive,		HELP_RECEIVE		},
 	{ NULL },
 	{ "allow",	zfs_do_allow,		HELP_ALLOW		},
 	{ NULL },
 	{ "unallow",	zfs_do_unallow,		HELP_UNALLOW		},
 	{ NULL },
 	{ "hold",	zfs_do_hold,		HELP_HOLD		},
 	{ "holds",	zfs_do_holds,		HELP_HOLDS		},
 	{ "release",	zfs_do_release,		HELP_RELEASE		},
 	{ "diff",	zfs_do_diff,		HELP_DIFF		},
 };
 
 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
 
 zfs_command_t *current_command;
 
 static const char *
 get_usage(zfs_help_t idx)
 {
 	switch (idx) {
 	case HELP_CLONE:
 		return (gettext("\tclone [-p] [-o property=value] ... "
 		    "<snapshot> <filesystem|volume>\n"));
 	case HELP_CREATE:
 		return (gettext("\tcreate [-p] [-o property=value] ... "
 		    "<filesystem>\n"
 		    "\tcreate [-ps] [-b blocksize] [-o property=value] ... "
 		    "-V <size> <volume>\n"));
 	case HELP_DESTROY:
 		return (gettext("\tdestroy [-fnpRrv] <filesystem|volume>\n"
 		    "\tdestroy [-dnpRrv] "
 		    "<filesystem|volume>@<snap>[%<snap>][,...]\n"));
 	case HELP_GET:
 		return (gettext("\tget [-rHp] [-d max] "
 		    "[-o \"all\" | field[,...]] [-t type[,...]] "
 		    "[-s source[,...]]\n"
 		    "\t    <\"all\" | property[,...]> "
 		    "[filesystem|volume|snapshot] ...\n"));
 	case HELP_INHERIT:
 		return (gettext("\tinherit [-rS] <property> "
 		    "<filesystem|volume|snapshot> ...\n"));
 	case HELP_UPGRADE:
 		return (gettext("\tupgrade [-v]\n"
 		    "\tupgrade [-r] [-V version] <-a | filesystem ...>\n"));
 	case HELP_LIST:
 		return (gettext("\tlist [-rH][-d max] "
 		    "[-o property[,...]] [-t type[,...]] [-s property] ...\n"
 		    "\t    [-S property] ... "
 		    "[filesystem|volume|snapshot|snap] ...\n"));
 	case HELP_MOUNT:
 		return (gettext("\tmount\n"
 		    "\tmount [-vO] [-o opts] <-a | filesystem>\n"));
 	case HELP_PROMOTE:
 		return (gettext("\tpromote <clone-filesystem>\n"));
 	case HELP_RECEIVE:
 		return (gettext("\treceive [-vnFu] <filesystem|volume|"
 		"snapshot>\n"
 		"\treceive [-vnFu] [-d | -e] <filesystem>\n"));
 	case HELP_RENAME:
 		return (gettext("\trename [-f] <filesystem|volume|snapshot> "
 		    "<filesystem|volume|snapshot>\n"
 		    "\trename [-f] -p <filesystem|volume> <filesystem|volume>\n"
 		    "\trename -r <snapshot> <snapshot>"));
 	case HELP_ROLLBACK:
 		return (gettext("\trollback [-rRf] <snapshot>\n"));
 	case HELP_SEND:
 		return (gettext("\tsend [-DnPpRrv] [-[iI] snapshot] "
 		    "<snapshot>\n"));
 	case HELP_SET:
 		return (gettext("\tset <property=value> "
 		    "<filesystem|volume|snapshot> ...\n"));
 	case HELP_SHARE:
 		return (gettext("\tshare <-a | filesystem>\n"));
 	case HELP_SNAPSHOT:
 		return (gettext("\tsnapshot|snap [-r] [-o property=value] ... "
-		    "<filesystem@snapname|volume@snapname>\n"));
+		    "<filesystem@snapname|volume@snapname> ...\n"));
 	case HELP_UNMOUNT:
 		return (gettext("\tunmount [-f] "
 		    "<-a | filesystem|mountpoint>\n"));
 	case HELP_UNSHARE:
 		return (gettext("\tunshare "
 		    "<-a | filesystem|mountpoint>\n"));
 	case HELP_ALLOW:
 		return (gettext("\tallow <filesystem|volume>\n"
 		    "\tallow [-ldug] "
 		    "<\"everyone\"|user|group>[,...] <perm|@setname>[,...]\n"
 		    "\t    <filesystem|volume>\n"
 		    "\tallow [-ld] -e <perm|@setname>[,...] "
 		    "<filesystem|volume>\n"
 		    "\tallow -c <perm|@setname>[,...] <filesystem|volume>\n"
 		    "\tallow -s @setname <perm|@setname>[,...] "
 		    "<filesystem|volume>\n"));
 	case HELP_UNALLOW:
 		return (gettext("\tunallow [-rldug] "
 		    "<\"everyone\"|user|group>[,...]\n"
 		    "\t    [<perm|@setname>[,...]] <filesystem|volume>\n"
 		    "\tunallow [-rld] -e [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"
 		    "\tunallow [-r] -c [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"
 		    "\tunallow [-r] -s @setname [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"));
 	case HELP_USERSPACE:
 		return (gettext("\tuserspace [-Hinp] [-o field[,...]] "
 		    "[-s field] ...\n\t[-S field] ... "
 		    "[-t type[,...]] <filesystem|snapshot>\n"));
 	case HELP_GROUPSPACE:
 		return (gettext("\tgroupspace [-Hinp] [-o field[,...]] "
 		    "[-s field] ...\n\t[-S field] ... "
 		    "[-t type[,...]] <filesystem|snapshot>\n"));
 	case HELP_HOLD:
 		return (gettext("\thold [-r] <tag> <snapshot> ...\n"));
 	case HELP_HOLDS:
 		return (gettext("\tholds [-r] <snapshot> ...\n"));
 	case HELP_RELEASE:
 		return (gettext("\trelease [-r] <tag> <snapshot> ...\n"));
 	case HELP_DIFF:
 		return (gettext("\tdiff [-FHt] <snapshot> "
 		    "[snapshot|filesystem]\n"));
 	}
 
 	abort();
 	/* NOTREACHED */
 }
 
 void
 nomem(void)
 {
 	(void) fprintf(stderr, gettext("internal error: out of memory\n"));
 	exit(1);
 }
 
 /*
  * Utility function to guarantee malloc() success.
  */
 
 void *
 safe_malloc(size_t size)
 {
 	void *data;
 
 	if ((data = calloc(1, size)) == NULL)
 		nomem();
 
 	return (data);
 }
 
 static char *
 safe_strdup(char *str)
 {
 	char *dupstr = strdup(str);
 
 	if (dupstr == NULL)
 		nomem();
 
 	return (dupstr);
 }
 
 /*
  * Callback routine that will print out information for each of
  * the properties.
  */
 static int
 usage_prop_cb(int prop, void *cb)
 {
 	FILE *fp = cb;
 
 	(void) fprintf(fp, "\t%-15s ", zfs_prop_to_name(prop));
 
 	if (zfs_prop_readonly(prop))
 		(void) fprintf(fp, " NO    ");
 	else
 		(void) fprintf(fp, "YES    ");
 
 	if (zfs_prop_inheritable(prop))
 		(void) fprintf(fp, "  YES   ");
 	else
 		(void) fprintf(fp, "   NO   ");
 
 	if (zfs_prop_values(prop) == NULL)
 		(void) fprintf(fp, "-\n");
 	else
 		(void) fprintf(fp, "%s\n", zfs_prop_values(prop));
 
 	return (ZPROP_CONT);
 }
 
 /*
  * Display usage message.  If we're inside a command, display only the usage for
  * that command.  Otherwise, iterate over the entire command table and display
  * a complete usage message.
  */
 static void
 usage(boolean_t requested)
 {
 	int i;
 	boolean_t show_properties = B_FALSE;
 	FILE *fp = requested ? stdout : stderr;
 
 	if (current_command == NULL) {
 
 		(void) fprintf(fp, gettext("usage: zfs command args ...\n"));
 		(void) fprintf(fp,
 		    gettext("where 'command' is one of the following:\n\n"));
 
 		for (i = 0; i < NCOMMAND; i++) {
 			if (command_table[i].name == NULL)
 				(void) fprintf(fp, "\n");
 			else
 				(void) fprintf(fp, "%s",
 				    get_usage(command_table[i].usage));
 		}
 
 		(void) fprintf(fp, gettext("\nEach dataset is of the form: "
 		    "pool/[dataset/]*dataset[@name]\n"));
 	} else {
 		(void) fprintf(fp, gettext("usage:\n"));
 		(void) fprintf(fp, "%s", get_usage(current_command->usage));
 	}
 
 	if (current_command != NULL &&
 	    (strcmp(current_command->name, "set") == 0 ||
 	    strcmp(current_command->name, "get") == 0 ||
 	    strcmp(current_command->name, "inherit") == 0 ||
 	    strcmp(current_command->name, "list") == 0))
 		show_properties = B_TRUE;
 
 	if (show_properties) {
 		(void) fprintf(fp,
 		    gettext("\nThe following properties are supported:\n"));
 
 		(void) fprintf(fp, "\n\t%-14s %s  %s   %s\n\n",
 		    "PROPERTY", "EDIT", "INHERIT", "VALUES");
 
 		/* Iterate over all properties */
 		(void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE,
 		    ZFS_TYPE_DATASET);
 
 		(void) fprintf(fp, "\t%-15s ", "userused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "groupused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "userquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "groupquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "written@<snap>");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 
 		(void) fprintf(fp, gettext("\nSizes are specified in bytes "
 		    "with standard units such as K, M, G, etc.\n"));
 		(void) fprintf(fp, gettext("\nUser-defined properties can "
 		    "be specified by using a name containing a colon (:).\n"));
 		(void) fprintf(fp, gettext("\nThe {user|group}{used|quota}@ "
 		    "properties must be appended with\n"
 		    "a user or group specifier of one of these forms:\n"
 		    "    POSIX name      (eg: \"matt\")\n"
 		    "    POSIX id        (eg: \"126829\")\n"
 		    "    SMB name@domain (eg: \"matt@sun\")\n"
 		    "    SMB SID         (eg: \"S-1-234-567-89\")\n"));
 	} else {
 		(void) fprintf(fp,
 		    gettext("\nFor the property list, run: %s\n"),
 		    "zfs set|get");
 		(void) fprintf(fp,
 		    gettext("\nFor the delegated permission list, run: %s\n"),
 		    "zfs allow|unallow");
 	}
 
 	/*
 	 * See comments at end of main().
 	 */
 	if (getenv("ZFS_ABORT") != NULL) {
 		(void) printf("dumping core by request\n");
 		abort();
 	}
 
 	exit(requested ? 0 : 2);
 }
 
 static int
 parseprop(nvlist_t *props)
 {
 	char *propname = optarg;
 	char *propval, *strval;
 
 	if ((propval = strchr(propname, '=')) == NULL) {
 		(void) fprintf(stderr, gettext("missing "
 		    "'=' for -o option\n"));
 		return (-1);
 	}
 	*propval = '\0';
 	propval++;
 	if (nvlist_lookup_string(props, propname, &strval) == 0) {
 		(void) fprintf(stderr, gettext("property '%s' "
 		    "specified multiple times\n"), propname);
 		return (-1);
 	}
 	if (nvlist_add_string(props, propname, propval) != 0)
 		nomem();
 	return (0);
 }
 
 static int
 parse_depth(char *opt, int *flags)
 {
 	char *tmp;
 	int depth;
 
 	depth = (int)strtol(opt, &tmp, 0);
 	if (*tmp) {
 		(void) fprintf(stderr,
 		    gettext("%s is not an integer\n"), optarg);
 		usage(B_FALSE);
 	}
 	if (depth < 0) {
 		(void) fprintf(stderr,
 		    gettext("Depth can not be negative.\n"));
 		usage(B_FALSE);
 	}
 	*flags |= (ZFS_ITER_DEPTH_LIMIT|ZFS_ITER_RECURSE);
 	return (depth);
 }
 
 #define	PROGRESS_DELAY 2		/* seconds */
 
 static char *pt_reverse = "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
 static time_t pt_begin;
 static char *pt_header = NULL;
 static boolean_t pt_shown;
 
 static void
 start_progress_timer(void)
 {
 	pt_begin = time(NULL) + PROGRESS_DELAY;
 	pt_shown = B_FALSE;
 }
 
 static void
 set_progress_header(char *header)
 {
 	assert(pt_header == NULL);
 	pt_header = safe_strdup(header);
 	if (pt_shown) {
 		(void) printf("%s: ", header);
 		(void) fflush(stdout);
 	}
 }
 
 static void
 update_progress(char *update)
 {
 	if (!pt_shown && time(NULL) > pt_begin) {
 		int len = strlen(update);
 
 		(void) printf("%s: %s%*.*s", pt_header, update, len, len,
 		    pt_reverse);
 		(void) fflush(stdout);
 		pt_shown = B_TRUE;
 	} else if (pt_shown) {
 		int len = strlen(update);
 
 		(void) printf("%s%*.*s", update, len, len, pt_reverse);
 		(void) fflush(stdout);
 	}
 }
 
 static void
 finish_progress(char *done)
 {
 	if (pt_shown) {
 		(void) printf("%s\n", done);
 		(void) fflush(stdout);
 	}
 	free(pt_header);
 	pt_header = NULL;
 }
 
 /*
  * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
  *
  * Given an existing dataset, create a writable copy whose initial contents
  * are the same as the source.  The newly created dataset maintains a
  * dependency on the original; the original cannot be destroyed so long as
  * the clone exists.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  */
 static int
 zfs_do_clone(int argc, char **argv)
 {
 	zfs_handle_t *zhp = NULL;
 	boolean_t parents = B_FALSE;
 	nvlist_t *props;
 	int ret = 0;
 	int c;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, "o:p")) != -1) {
 		switch (c) {
 		case 'o':
 			if (parseprop(props))
 				return (1);
 			break;
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing source dataset "
 		    "argument\n"));
 		goto usage;
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing target dataset "
 		    "argument\n"));
 		goto usage;
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		goto usage;
 	}
 
 	/* open the source dataset */
 	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
 	if (parents && zfs_name_valid(argv[1], ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) {
 		/*
 		 * Now create the ancestors of the target dataset.  If the
 		 * target already exists and '-p' option was used we should not
 		 * complain.
 		 */
 		if (zfs_dataset_exists(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM |
 		    ZFS_TYPE_VOLUME))
 			return (0);
 		if (zfs_create_ancestors(g_zfs, argv[1]) != 0)
 			return (1);
 	}
 
 	/* pass to libzfs */
 	ret = zfs_clone(zhp, argv[1], props);
 
 	/* create the mountpoint if necessary */
 	if (ret == 0) {
 		zfs_handle_t *clone;
 
 		clone = zfs_open(g_zfs, argv[1], ZFS_TYPE_DATASET);
 		if (clone != NULL) {
 			if (zfs_get_type(clone) != ZFS_TYPE_VOLUME)
 				if ((ret = zfs_mount(clone, NULL, 0)) == 0)
 					ret = zfs_share(clone);
 			zfs_close(clone);
 		}
 	}
 
 	zfs_close(zhp);
 	nvlist_free(props);
 
 	return (!!ret);
 
 usage:
 	if (zhp)
 		zfs_close(zhp);
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (-1);
 }
 
 /*
  * zfs create [-p] [-o prop=value] ... fs
  * zfs create [-ps] [-b blocksize] [-o prop=value] ... -V vol size
  *
  * Create a new dataset.  This command can be used to create filesystems
  * and volumes.  Snapshot creation is handled by 'zfs snapshot'.
  * For volumes, the user must specify a size to be used.
  *
  * The '-s' flag applies only to volumes, and indicates that we should not try
  * to set the reservation for this volume.  By default we set a reservation
  * equal to the size for any volume.  For pools with SPA_VERSION >=
  * SPA_VERSION_REFRESERVATION, we set a refreservation instead.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  */
 static int
 zfs_do_create(int argc, char **argv)
 {
 	zfs_type_t type = ZFS_TYPE_FILESYSTEM;
 	zfs_handle_t *zhp = NULL;
 	uint64_t volsize = 0;
 	int c;
 	boolean_t noreserve = B_FALSE;
 	boolean_t bflag = B_FALSE;
 	boolean_t parents = B_FALSE;
 	int ret = 1;
 	nvlist_t *props;
 	uint64_t intval;
 	int canmount = ZFS_CANMOUNT_OFF;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":V:b:so:p")) != -1) {
 		switch (c) {
 		case 'V':
 			type = ZFS_TYPE_VOLUME;
 			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
 				(void) fprintf(stderr, gettext("bad volume "
 				    "size '%s': %s\n"), optarg,
 				    libzfs_error_description(g_zfs));
 				goto error;
 			}
 
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE), intval) != 0)
 				nomem();
 			volsize = intval;
 			break;
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case 'b':
 			bflag = B_TRUE;
 			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
 				(void) fprintf(stderr, gettext("bad volume "
 				    "block size '%s': %s\n"), optarg,
 				    libzfs_error_description(g_zfs));
 				goto error;
 			}
 
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 			    intval) != 0)
 				nomem();
 			break;
 		case 'o':
 			if (parseprop(props))
 				goto error;
 			break;
 		case 's':
 			noreserve = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing size "
 			    "argument\n"));
 			goto badusage;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto badusage;
 		}
 	}
 
 	if ((bflag || noreserve) && type != ZFS_TYPE_VOLUME) {
 		(void) fprintf(stderr, gettext("'-s' and '-b' can only be "
 		    "used when creating a volume\n"));
 		goto badusage;
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc == 0) {
 		(void) fprintf(stderr, gettext("missing %s argument\n"),
 		    zfs_type_to_name(type));
 		goto badusage;
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		goto badusage;
 	}
 
 	if (type == ZFS_TYPE_VOLUME && !noreserve) {
 		zpool_handle_t *zpool_handle;
 		uint64_t spa_version;
 		char *p;
 		zfs_prop_t resv_prop;
 		char *strval;
 
 		if ((p = strchr(argv[0], '/')))
 			*p = '\0';
 		zpool_handle = zpool_open(g_zfs, argv[0]);
 		if (p != NULL)
 			*p = '/';
 		if (zpool_handle == NULL)
 			goto error;
 		spa_version = zpool_get_prop_int(zpool_handle,
 		    ZPOOL_PROP_VERSION, NULL);
 		zpool_close(zpool_handle);
 		if (spa_version >= SPA_VERSION_REFRESERVATION)
 			resv_prop = ZFS_PROP_REFRESERVATION;
 		else
 			resv_prop = ZFS_PROP_RESERVATION;
 		volsize = zvol_volsize_to_reservation(volsize, props);
 
 		if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop),
 		    &strval) != 0) {
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(resv_prop), volsize) != 0) {
 				nvlist_free(props);
 				nomem();
 			}
 		}
 	}
 
 	if (parents && zfs_name_valid(argv[0], type)) {
 		/*
 		 * Now create the ancestors of target dataset.  If the target
 		 * already exists and '-p' option was used we should not
 		 * complain.
 		 */
 		if (zfs_dataset_exists(g_zfs, argv[0], type)) {
 			ret = 0;
 			goto error;
 		}
 		if (zfs_create_ancestors(g_zfs, argv[0]) != 0)
 			goto error;
 	}
 
 	/* pass to libzfs */
 	if (zfs_create(g_zfs, argv[0], type, props) != 0)
 		goto error;
 
 	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
 		goto error;
 
 	ret = 0;
 	/*
 	 * if the user doesn't want the dataset automatically mounted,
 	 * then skip the mount/share step
 	 */
 	if (zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, type))
 		canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
 
 	/*
 	 * Mount and/or share the new filesystem as appropriate.  We provide a
 	 * verbose error message to let the user know that their filesystem was
 	 * in fact created, even if we failed to mount or share it.
 	 */
 	if (canmount == ZFS_CANMOUNT_ON) {
 		if (zfs_mount(zhp, NULL, 0) != 0) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but not mounted\n"));
 			ret = 1;
 		} else if (zfs_share(zhp) != 0) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but not shared\n"));
 			ret = 1;
 		}
 	}
 
 error:
 	if (zhp)
 		zfs_close(zhp);
 	nvlist_free(props);
 	return (ret);
 badusage:
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (2);
 }
 
 /*
  * zfs destroy [-rRf] <fs, vol>
  * zfs destroy [-rRd] <snap>
  *
  *	-r	Recursively destroy all children
  *	-R	Recursively destroy all dependents, including clones
  *	-f	Force unmounting of any dependents
  *	-d	If we can't destroy now, mark for deferred destruction
  *
  * Destroys the given dataset.  By default, it will unmount any filesystems,
  * and refuse to destroy a dataset that has any dependents.  A dependent can
  * either be a child, or a clone of a child.
  */
 typedef struct destroy_cbdata {
 	boolean_t	cb_first;
 	boolean_t	cb_force;
 	boolean_t	cb_recurse;
 	boolean_t	cb_error;
 	boolean_t	cb_doclones;
 	zfs_handle_t	*cb_target;
 	boolean_t	cb_defer_destroy;
 	boolean_t	cb_verbose;
 	boolean_t	cb_parsable;
 	boolean_t	cb_dryrun;
 	nvlist_t	*cb_nvl;
 
 	/* first snap in contiguous run */
-	zfs_handle_t	*cb_firstsnap;
+	char		*cb_firstsnap;
 	/* previous snap in contiguous run */
-	zfs_handle_t	*cb_prevsnap;
+	char		*cb_prevsnap;
 	int64_t		cb_snapused;
 	char		*cb_snapspec;
 } destroy_cbdata_t;
 
 /*
  * Check for any dependents based on the '-r' or '-R' flags.
  */
 static int
 destroy_check_dependent(zfs_handle_t *zhp, void *data)
 {
 	destroy_cbdata_t *cbp = data;
 	const char *tname = zfs_get_name(cbp->cb_target);
 	const char *name = zfs_get_name(zhp);
 
 	if (strncmp(tname, name, strlen(tname)) == 0 &&
 	    (name[strlen(tname)] == '/' || name[strlen(tname)] == '@')) {
 		/*
 		 * This is a direct descendant, not a clone somewhere else in
 		 * the hierarchy.
 		 */
 		if (cbp->cb_recurse)
 			goto out;
 
 		if (cbp->cb_first) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "%s has children\n"),
 			    zfs_get_name(cbp->cb_target),
 			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
 			(void) fprintf(stderr, gettext("use '-r' to destroy "
 			    "the following datasets:\n"));
 			cbp->cb_first = B_FALSE;
 			cbp->cb_error = B_TRUE;
 		}
 
 		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 	} else {
 		/*
 		 * This is a clone.  We only want to report this if the '-r'
 		 * wasn't specified, or the target is a snapshot.
 		 */
 		if (!cbp->cb_recurse &&
 		    zfs_get_type(cbp->cb_target) != ZFS_TYPE_SNAPSHOT)
 			goto out;
 
 		if (cbp->cb_first) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "%s has dependent clones\n"),
 			    zfs_get_name(cbp->cb_target),
 			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
 			(void) fprintf(stderr, gettext("use '-R' to destroy "
 			    "the following datasets:\n"));
 			cbp->cb_first = B_FALSE;
 			cbp->cb_error = B_TRUE;
 			cbp->cb_dryrun = B_TRUE;
 		}
 
 		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 	}
 
 out:
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 destroy_callback(zfs_handle_t *zhp, void *data)
 {
 	destroy_cbdata_t *cb = data;
 	const char *name = zfs_get_name(zhp);
 
 	if (cb->cb_verbose) {
 		if (cb->cb_parsable) {
 			(void) printf("destroy\t%s\n", name);
 		} else if (cb->cb_dryrun) {
 			(void) printf(gettext("would destroy %s\n"),
 			    name);
 		} else {
 			(void) printf(gettext("will destroy %s\n"),
 			    name);
 		}
 	}
 
 	/*
 	 * Ignore pools (which we've already flagged as an error before getting
 	 * here).
 	 */
 	if (strchr(zfs_get_name(zhp), '/') == NULL &&
 	    zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	if (!cb->cb_dryrun) {
 		if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
 		    zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
 			zfs_close(zhp);
 			return (-1);
 		}
 	}
 
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 destroy_print_cb(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	const char *name = zfs_get_name(zhp);
 	int err = 0;
 
 	if (nvlist_exists(cb->cb_nvl, name)) {
 		if (cb->cb_firstsnap == NULL)
-			cb->cb_firstsnap = zfs_handle_dup(zhp);
+			cb->cb_firstsnap = strdup(name);
 		if (cb->cb_prevsnap != NULL)
-			zfs_close(cb->cb_prevsnap);
+			free(cb->cb_prevsnap);
 		/* this snap continues the current range */
-		cb->cb_prevsnap = zfs_handle_dup(zhp);
+		cb->cb_prevsnap = strdup(name);
+		if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL)
+			nomem();
 		if (cb->cb_verbose) {
 			if (cb->cb_parsable) {
 				(void) printf("destroy\t%s\n", name);
 			} else if (cb->cb_dryrun) {
 				(void) printf(gettext("would destroy %s\n"),
 				    name);
 			} else {
 				(void) printf(gettext("will destroy %s\n"),
 				    name);
 			}
 		}
 	} else if (cb->cb_firstsnap != NULL) {
 		/* end of this range */
 		uint64_t used = 0;
-		err = zfs_get_snapused_int(cb->cb_firstsnap,
+		err = lzc_snaprange_space(cb->cb_firstsnap,
 		    cb->cb_prevsnap, &used);
 		cb->cb_snapused += used;
-		zfs_close(cb->cb_firstsnap);
+		free(cb->cb_firstsnap);
 		cb->cb_firstsnap = NULL;
-		zfs_close(cb->cb_prevsnap);
+		free(cb->cb_prevsnap);
 		cb->cb_prevsnap = NULL;
 	}
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb)
 {
 	int err;
 	assert(cb->cb_firstsnap == NULL);
 	assert(cb->cb_prevsnap == NULL);
 	err = zfs_iter_snapshots_sorted(fs_zhp, destroy_print_cb, cb);
 	if (cb->cb_firstsnap != NULL) {
 		uint64_t used = 0;
 		if (err == 0) {
-			err = zfs_get_snapused_int(cb->cb_firstsnap,
+			err = lzc_snaprange_space(cb->cb_firstsnap,
 			    cb->cb_prevsnap, &used);
 		}
 		cb->cb_snapused += used;
-		zfs_close(cb->cb_firstsnap);
+		free(cb->cb_firstsnap);
 		cb->cb_firstsnap = NULL;
-		zfs_close(cb->cb_prevsnap);
+		free(cb->cb_prevsnap);
 		cb->cb_prevsnap = NULL;
 	}
 	return (err);
 }
 
 static int
 snapshot_to_nvl_cb(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	int err = 0;
 
 	/* Check for clones. */
 	if (!cb->cb_doclones && !cb->cb_defer_destroy) {
 		cb->cb_target = zhp;
 		cb->cb_first = B_TRUE;
 		err = zfs_iter_dependents(zhp, B_TRUE,
 		    destroy_check_dependent, cb);
 	}
 
 	if (err == 0) {
 		if (nvlist_add_boolean(cb->cb_nvl, zfs_get_name(zhp)))
 			nomem();
 	}
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 gather_snapshots(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	int err = 0;
 
 	err = zfs_iter_snapspec(zhp, cb->cb_snapspec, snapshot_to_nvl_cb, cb);
 	if (err == ENOENT)
 		err = 0;
 	if (err != 0)
 		goto out;
 
 	if (cb->cb_verbose) {
 		err = destroy_print_snapshots(zhp, cb);
 		if (err != 0)
 			goto out;
 	}
 
 	if (cb->cb_recurse)
 		err = zfs_iter_filesystems(zhp, gather_snapshots, cb);
 
 out:
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 destroy_clones(destroy_cbdata_t *cb)
 {
 	nvpair_t *pair;
 	for (pair = nvlist_next_nvpair(cb->cb_nvl, NULL);
 	    pair != NULL;
 	    pair = nvlist_next_nvpair(cb->cb_nvl, pair)) {
 		zfs_handle_t *zhp = zfs_open(g_zfs, nvpair_name(pair),
 		    ZFS_TYPE_SNAPSHOT);
 		if (zhp != NULL) {
 			boolean_t defer = cb->cb_defer_destroy;
 			int err;
 
 			/*
 			 * We can't defer destroy non-snapshots, so set it to
 			 * false while destroying the clones.
 			 */
 			cb->cb_defer_destroy = B_FALSE;
 			err = zfs_iter_dependents(zhp, B_FALSE,
 			    destroy_callback, cb);
 			cb->cb_defer_destroy = defer;
 			zfs_close(zhp);
 			if (err != 0)
 				return (err);
 		}
 	}
 	return (0);
 }
 
 static int
 zfs_do_destroy(int argc, char **argv)
 {
 	destroy_cbdata_t cb = { 0 };
 	int c;
 	zfs_handle_t *zhp;
 	char *at;
 	zfs_type_t type = ZFS_TYPE_DATASET;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "vpndfrR")) != -1) {
 		switch (c) {
 		case 'v':
 			cb.cb_verbose = B_TRUE;
 			break;
 		case 'p':
 			cb.cb_verbose = B_TRUE;
 			cb.cb_parsable = B_TRUE;
 			break;
 		case 'n':
 			cb.cb_dryrun = B_TRUE;
 			break;
 		case 'd':
 			cb.cb_defer_destroy = B_TRUE;
 			type = ZFS_TYPE_SNAPSHOT;
 			break;
 		case 'f':
 			cb.cb_force = B_TRUE;
 			break;
 		case 'r':
 			cb.cb_recurse = B_TRUE;
 			break;
 		case 'R':
 			cb.cb_recurse = B_TRUE;
 			cb.cb_doclones = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc == 0) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	at = strchr(argv[0], '@');
 	if (at != NULL) {
 		int err = 0;
 
 		/* Build the list of snaps to destroy in cb_nvl. */
 		if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0)
 			nomem();
 
 		*at = '\0';
 		zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (zhp == NULL)
 			return (1);
 
 		cb.cb_snapspec = at + 1;
 		if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 ||
 		    cb.cb_error) {
 			zfs_close(zhp);
 			nvlist_free(cb.cb_nvl);
 			return (1);
 		}
 
 		if (nvlist_empty(cb.cb_nvl)) {
 			(void) fprintf(stderr, gettext("could not find any "
 			    "snapshots to destroy; check snapshot names.\n"));
 			zfs_close(zhp);
 			nvlist_free(cb.cb_nvl);
 			return (1);
 		}
 
 		if (cb.cb_verbose) {
 			char buf[16];
 			zfs_nicenum(cb.cb_snapused, buf, sizeof (buf));
 			if (cb.cb_parsable) {
 				(void) printf("reclaim\t%llu\n",
 				    (u_longlong_t)cb.cb_snapused);
 			} else if (cb.cb_dryrun) {
 				(void) printf(gettext("would reclaim %s\n"),
 				    buf);
 			} else {
 				(void) printf(gettext("will reclaim %s\n"),
 				    buf);
 			}
 		}
 
 		if (!cb.cb_dryrun) {
 			if (cb.cb_doclones)
 				err = destroy_clones(&cb);
 			if (err == 0) {
 				err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl,
 				    cb.cb_defer_destroy);
 			}
 		}
 
 		zfs_close(zhp);
 		nvlist_free(cb.cb_nvl);
 		if (err != 0)
 			return (1);
 	} else {
 		/* Open the given dataset */
 		if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
 			return (1);
 
 		cb.cb_target = zhp;
 
 		/*
 		 * Perform an explicit check for pools before going any further.
 		 */
 		if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL &&
 		    zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "operation does not apply to pools\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use 'zfs destroy -r "
 			    "%s' to destroy all datasets in the pool\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use 'zpool destroy %s' "
 			    "to destroy the pool itself\n"), zfs_get_name(zhp));
 			zfs_close(zhp);
 			return (1);
 		}
 
 		/*
 		 * Check for any dependents and/or clones.
 		 */
 		cb.cb_first = B_TRUE;
 		if (!cb.cb_doclones &&
 		    zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
 		    &cb) != 0) {
 			zfs_close(zhp);
 			return (1);
 		}
 
 		if (cb.cb_error) {
 			zfs_close(zhp);
 			return (1);
 		}
 
 		if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback,
 		    &cb) != 0) {
 			zfs_close(zhp);
 			return (1);
 		}
 
 		/*
 		 * Do the real thing.  The callback will close the
 		 * handle regardless of whether it succeeds or not.
 		 */
 		if (destroy_callback(zhp, &cb) != 0)
 			return (1);
 	}
 
 	return (0);
 }
 
 static boolean_t
 is_recvd_column(zprop_get_cbdata_t *cbp)
 {
 	int i;
 	zfs_get_column_t col;
 
 	for (i = 0; i < ZFS_GET_NCOLS &&
 	    (col = cbp->cb_columns[i]) != GET_COL_NONE; i++)
 		if (col == GET_COL_RECVD)
 			return (B_TRUE);
 	return (B_FALSE);
 }
 
 /*
  * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...]
  *	< all | property[,property]... > < fs | snap | vol > ...
  *
  *	-r	recurse over any child datasets
  *	-H	scripted mode.  Headers are stripped, and fields are separated
  *		by tabs instead of spaces.
  *	-o	Set of fields to display.  One of "name,property,value,
  *		received,source". Default is "name,property,value,source".
  *		"all" is an alias for all five.
  *	-s	Set of sources to allow.  One of
  *		"local,default,inherited,received,temporary,none".  Default is
  *		all six.
  *	-p	Display values in parsable (literal) format.
  *
  *  Prints properties for the given datasets.  The user can control which
  *  columns to display as well as which property types to allow.
  */
 
 /*
  * Invoked to display the properties for a single dataset.
  */
 static int
 get_callback(zfs_handle_t *zhp, void *data)
 {
 	char buf[ZFS_MAXPROPLEN];
 	char rbuf[ZFS_MAXPROPLEN];
 	zprop_source_t sourcetype;
 	char source[ZFS_MAXNAMELEN];
 	zprop_get_cbdata_t *cbp = data;
 	nvlist_t *user_props = zfs_get_user_props(zhp);
 	zprop_list_t *pl = cbp->cb_proplist;
 	nvlist_t *propval;
 	char *strval;
 	char *sourceval;
 	boolean_t received = is_recvd_column(cbp);
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		char *recvdval = NULL;
 		/*
 		 * Skip the special fake placeholder.  This will also skip over
 		 * the name property when 'all' is specified.
 		 */
 		if (pl->pl_prop == ZFS_PROP_NAME &&
 		    pl == cbp->cb_proplist)
 			continue;
 
 		if (pl->pl_prop != ZPROP_INVAL) {
 			if (zfs_prop_get(zhp, pl->pl_prop, buf,
 			    sizeof (buf), &sourcetype, source,
 			    sizeof (source),
 			    cbp->cb_literal) != 0) {
 				if (pl->pl_all)
 					continue;
 				if (!zfs_prop_valid_for_type(pl->pl_prop,
 				    ZFS_TYPE_DATASET)) {
 					(void) fprintf(stderr,
 					    gettext("No such property '%s'\n"),
 					    zfs_prop_to_name(pl->pl_prop));
 					continue;
 				}
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			if (received && (zfs_prop_get_recvd(zhp,
 			    zfs_prop_to_name(pl->pl_prop), rbuf, sizeof (rbuf),
 			    cbp->cb_literal) == 0))
 				recvdval = rbuf;
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    zfs_prop_to_name(pl->pl_prop),
 			    buf, sourcetype, source, recvdval);
 		} else if (zfs_prop_userquota(pl->pl_user_prop)) {
 			sourcetype = ZPROP_SRC_LOCAL;
 
 			if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
 			    buf, sizeof (buf), cbp->cb_literal) != 0) {
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, buf, sourcetype, source, NULL);
 		} else if (zfs_prop_written(pl->pl_user_prop)) {
 			sourcetype = ZPROP_SRC_LOCAL;
 
 			if (zfs_prop_get_written(zhp, pl->pl_user_prop,
 			    buf, sizeof (buf), cbp->cb_literal) != 0) {
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, buf, sourcetype, source, NULL);
 		} else {
 			if (nvlist_lookup_nvlist(user_props,
 			    pl->pl_user_prop, &propval) != 0) {
 				if (pl->pl_all)
 					continue;
 				sourcetype = ZPROP_SRC_NONE;
 				strval = "-";
 			} else {
 				verify(nvlist_lookup_string(propval,
 				    ZPROP_VALUE, &strval) == 0);
 				verify(nvlist_lookup_string(propval,
 				    ZPROP_SOURCE, &sourceval) == 0);
 
 				if (strcmp(sourceval,
 				    zfs_get_name(zhp)) == 0) {
 					sourcetype = ZPROP_SRC_LOCAL;
 				} else if (strcmp(sourceval,
 				    ZPROP_SOURCE_VAL_RECVD) == 0) {
 					sourcetype = ZPROP_SRC_RECEIVED;
 				} else {
 					sourcetype = ZPROP_SRC_INHERITED;
 					(void) strlcpy(source,
 					    sourceval, sizeof (source));
 				}
 			}
 
 			if (received && (zfs_prop_get_recvd(zhp,
 			    pl->pl_user_prop, rbuf, sizeof (rbuf),
 			    cbp->cb_literal) == 0))
 				recvdval = rbuf;
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, strval, sourcetype,
 			    source, recvdval);
 		}
 	}
 
 	return (0);
 }
 
 static int
 zfs_do_get(int argc, char **argv)
 {
 	zprop_get_cbdata_t cb = { 0 };
 	int i, c, flags = ZFS_ITER_ARGS_CAN_BE_PATHS;
 	int types = ZFS_TYPE_DATASET;
 	char *value, *fields;
 	int ret = 0;
 	int limit = 0;
 	zprop_list_t fake_name = { 0 };
 
 	/*
 	 * Set up default columns and sources.
 	 */
 	cb.cb_sources = ZPROP_SRC_ALL;
 	cb.cb_columns[0] = GET_COL_NAME;
 	cb.cb_columns[1] = GET_COL_PROPERTY;
 	cb.cb_columns[2] = GET_COL_VALUE;
 	cb.cb_columns[3] = GET_COL_SOURCE;
 	cb.cb_type = ZFS_TYPE_DATASET;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":d:o:s:rt:Hp")) != -1) {
 		switch (c) {
 		case 'p':
 			cb.cb_literal = B_TRUE;
 			break;
 		case 'd':
 			limit = parse_depth(optarg, &flags);
 			break;
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'H':
 			cb.cb_scripted = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case 'o':
 			/*
 			 * Process the set of columns to display.  We zero out
 			 * the structure to give us a blank slate.
 			 */
 			bzero(&cb.cb_columns, sizeof (cb.cb_columns));
 			i = 0;
 			while (*optarg != '\0') {
 				static char *col_subopts[] =
 				    { "name", "property", "value", "received",
 				    "source", "all", NULL };
 
 				if (i == ZFS_GET_NCOLS) {
 					(void) fprintf(stderr, gettext("too "
 					    "many fields given to -o "
 					    "option\n"));
 					usage(B_FALSE);
 				}
 
 				switch (getsubopt(&optarg, col_subopts,
 				    &value)) {
 				case 0:
 					cb.cb_columns[i++] = GET_COL_NAME;
 					break;
 				case 1:
 					cb.cb_columns[i++] = GET_COL_PROPERTY;
 					break;
 				case 2:
 					cb.cb_columns[i++] = GET_COL_VALUE;
 					break;
 				case 3:
 					cb.cb_columns[i++] = GET_COL_RECVD;
 					flags |= ZFS_ITER_RECVD_PROPS;
 					break;
 				case 4:
 					cb.cb_columns[i++] = GET_COL_SOURCE;
 					break;
 				case 5:
 					if (i > 0) {
 						(void) fprintf(stderr,
 						    gettext("\"all\" conflicts "
 						    "with specific fields "
 						    "given to -o option\n"));
 						usage(B_FALSE);
 					}
 					cb.cb_columns[0] = GET_COL_NAME;
 					cb.cb_columns[1] = GET_COL_PROPERTY;
 					cb.cb_columns[2] = GET_COL_VALUE;
 					cb.cb_columns[3] = GET_COL_RECVD;
 					cb.cb_columns[4] = GET_COL_SOURCE;
 					flags |= ZFS_ITER_RECVD_PROPS;
 					i = ZFS_GET_NCOLS;
 					break;
 				default:
 					(void) fprintf(stderr,
 					    gettext("invalid column name "
 					    "'%s'\n"), value);
 					usage(B_FALSE);
 				}
 			}
 			break;
 
 		case 's':
 			cb.cb_sources = 0;
 			while (*optarg != '\0') {
 				static char *source_subopts[] = {
 					"local", "default", "inherited",
 					"received", "temporary", "none",
 					NULL };
 
 				switch (getsubopt(&optarg, source_subopts,
 				    &value)) {
 				case 0:
 					cb.cb_sources |= ZPROP_SRC_LOCAL;
 					break;
 				case 1:
 					cb.cb_sources |= ZPROP_SRC_DEFAULT;
 					break;
 				case 2:
 					cb.cb_sources |= ZPROP_SRC_INHERITED;
 					break;
 				case 3:
 					cb.cb_sources |= ZPROP_SRC_RECEIVED;
 					break;
 				case 4:
 					cb.cb_sources |= ZPROP_SRC_TEMPORARY;
 					break;
 				case 5:
 					cb.cb_sources |= ZPROP_SRC_NONE;
 					break;
 				default:
 					(void) fprintf(stderr,
 					    gettext("invalid source "
 					    "'%s'\n"), value);
 					usage(B_FALSE);
 				}
 			}
 			break;
 
 		case 't':
 			types = 0;
 			flags &= ~ZFS_ITER_PROP_LISTSNAPS;
 			while (*optarg != '\0') {
 				static char *type_subopts[] = { "filesystem",
 				    "volume", "snapshot", "all", NULL };
 
 				switch (getsubopt(&optarg, type_subopts,
 				    &value)) {
 				case 0:
 					types |= ZFS_TYPE_FILESYSTEM;
 					break;
 				case 1:
 					types |= ZFS_TYPE_VOLUME;
 					break;
 				case 2:
 					types |= ZFS_TYPE_SNAPSHOT;
 					break;
 				case 3:
 					types = ZFS_TYPE_DATASET;
 					break;
 
 				default:
 					(void) fprintf(stderr,
 					    gettext("invalid type '%s'\n"),
 					    value);
 					usage(B_FALSE);
 				}
 			}
 			break;
 
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 
 	fields = argv[0];
 
 	if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET)
 	    != 0)
 		usage(B_FALSE);
 
 	argc--;
 	argv++;
 
 	/*
 	 * As part of zfs_expand_proplist(), we keep track of the maximum column
 	 * width for each property.  For the 'NAME' (and 'SOURCE') columns, we
 	 * need to know the maximum name length.  However, the user likely did
 	 * not specify 'name' as one of the properties to fetch, so we need to
 	 * make sure we always include at least this property for
 	 * print_get_headers() to work properly.
 	 */
 	if (cb.cb_proplist != NULL) {
 		fake_name.pl_prop = ZFS_PROP_NAME;
 		fake_name.pl_width = strlen(gettext("NAME"));
 		fake_name.pl_next = cb.cb_proplist;
 		cb.cb_proplist = &fake_name;
 	}
 
 	cb.cb_first = B_TRUE;
 
 	/* run for each object */
 	ret = zfs_for_each(argc, argv, flags, types, NULL,
 	    &cb.cb_proplist, limit, get_callback, &cb);
 
 	if (cb.cb_proplist == &fake_name)
 		zprop_free_list(fake_name.pl_next);
 	else
 		zprop_free_list(cb.cb_proplist);
 
 	return (ret);
 }
 
 /*
  * inherit [-rS] <property> <fs|vol> ...
  *
  *	-r	Recurse over all children
  *	-S	Revert to received value, if any
  *
  * For each dataset specified on the command line, inherit the given property
  * from its parent.  Inheriting a property at the pool level will cause it to
  * use the default value.  The '-r' flag will recurse over all children, and is
  * useful for setting a property on a hierarchy-wide basis, regardless of any
  * local modifications for each dataset.
  */
 
 typedef struct inherit_cbdata {
 	const char *cb_propname;
 	boolean_t cb_received;
 } inherit_cbdata_t;
 
 static int
 inherit_recurse_cb(zfs_handle_t *zhp, void *data)
 {
 	inherit_cbdata_t *cb = data;
 	zfs_prop_t prop = zfs_name_to_prop(cb->cb_propname);
 
 	/*
 	 * If we're doing it recursively, then ignore properties that
 	 * are not valid for this type of dataset.
 	 */
 	if (prop != ZPROP_INVAL &&
 	    !zfs_prop_valid_for_type(prop, zfs_get_type(zhp)))
 		return (0);
 
 	return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
 }
 
 static int
 inherit_cb(zfs_handle_t *zhp, void *data)
 {
 	inherit_cbdata_t *cb = data;
 
 	return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
 }
 
 static int
 zfs_do_inherit(int argc, char **argv)
 {
 	int c;
 	zfs_prop_t prop;
 	inherit_cbdata_t cb = { 0 };
 	char *propname;
 	int ret = 0;
 	int flags = 0;
 	boolean_t received = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rS")) != -1) {
 		switch (c) {
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'S':
 			received = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 
 	propname = argv[0];
 	argc--;
 	argv++;
 
 	if ((prop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
 		if (zfs_prop_readonly(prop)) {
 			(void) fprintf(stderr, gettext(
 			    "%s property is read-only\n"),
 			    propname);
 			return (1);
 		}
 		if (!zfs_prop_inheritable(prop) && !received) {
 			(void) fprintf(stderr, gettext("'%s' property cannot "
 			    "be inherited\n"), propname);
 			if (prop == ZFS_PROP_QUOTA ||
 			    prop == ZFS_PROP_RESERVATION ||
 			    prop == ZFS_PROP_REFQUOTA ||
 			    prop == ZFS_PROP_REFRESERVATION)
 				(void) fprintf(stderr, gettext("use 'zfs set "
 				    "%s=none' to clear\n"), propname);
 			return (1);
 		}
 		if (received && (prop == ZFS_PROP_VOLSIZE ||
 		    prop == ZFS_PROP_VERSION)) {
 			(void) fprintf(stderr, gettext("'%s' property cannot "
 			    "be reverted to a received value\n"), propname);
 			return (1);
 		}
 	} else if (!zfs_prop_user(propname)) {
 		(void) fprintf(stderr, gettext("invalid property '%s'\n"),
 		    propname);
 		usage(B_FALSE);
 	}
 
 	cb.cb_propname = propname;
 	cb.cb_received = received;
 
 	if (flags & ZFS_ITER_RECURSE) {
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
 		    NULL, NULL, 0, inherit_recurse_cb, &cb);
 	} else {
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
 		    NULL, NULL, 0, inherit_cb, &cb);
 	}
 
 	return (ret);
 }
 
 typedef struct upgrade_cbdata {
 	uint64_t cb_numupgraded;
 	uint64_t cb_numsamegraded;
 	uint64_t cb_numfailed;
 	uint64_t cb_version;
 	boolean_t cb_newer;
 	boolean_t cb_foundone;
 	char cb_lastfs[ZFS_MAXNAMELEN];
 } upgrade_cbdata_t;
 
 static int
 same_pool(zfs_handle_t *zhp, const char *name)
 {
 	int len1 = strcspn(name, "/@");
 	const char *zhname = zfs_get_name(zhp);
 	int len2 = strcspn(zhname, "/@");
 
 	if (len1 != len2)
 		return (B_FALSE);
 	return (strncmp(name, zhname, len1) == 0);
 }
 
 static int
 upgrade_list_callback(zfs_handle_t *zhp, void *data)
 {
 	upgrade_cbdata_t *cb = data;
 	int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 
 	/* list if it's old/new */
 	if ((!cb->cb_newer && version < ZPL_VERSION) ||
 	    (cb->cb_newer && version > ZPL_VERSION)) {
 		char *str;
 		if (cb->cb_newer) {
 			str = gettext("The following filesystems are "
 			    "formatted using a newer software version and\n"
 			    "cannot be accessed on the current system.\n\n");
 		} else {
 			str = gettext("The following filesystems are "
 			    "out of date, and can be upgraded.  After being\n"
 			    "upgraded, these filesystems (and any 'zfs send' "
 			    "streams generated from\n"
 			    "subsequent snapshots) will no longer be "
 			    "accessible by older software versions.\n\n");
 		}
 
 		if (!cb->cb_foundone) {
 			(void) puts(str);
 			(void) printf(gettext("VER  FILESYSTEM\n"));
 			(void) printf(gettext("---  ------------\n"));
 			cb->cb_foundone = B_TRUE;
 		}
 
 		(void) printf("%2u   %s\n", version, zfs_get_name(zhp));
 	}
 
 	return (0);
 }
 
 static int
 upgrade_set_callback(zfs_handle_t *zhp, void *data)
 {
 	upgrade_cbdata_t *cb = data;
 	int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 	int needed_spa_version;
 	int spa_version;
 
 	if (zfs_spa_version(zhp, &spa_version) < 0)
 		return (-1);
 
 	needed_spa_version = zfs_spa_version_map(cb->cb_version);
 
 	if (needed_spa_version < 0)
 		return (-1);
 
 	if (spa_version < needed_spa_version) {
 		/* can't upgrade */
 		(void) printf(gettext("%s: can not be "
 		    "upgraded; the pool version needs to first "
 		    "be upgraded\nto version %d\n\n"),
 		    zfs_get_name(zhp), needed_spa_version);
 		cb->cb_numfailed++;
 		return (0);
 	}
 
 	/* upgrade */
 	if (version < cb->cb_version) {
 		char verstr[16];
 		(void) snprintf(verstr, sizeof (verstr),
 		    "%llu", (u_longlong_t)cb->cb_version);
 		if (cb->cb_lastfs[0] && !same_pool(zhp, cb->cb_lastfs)) {
 			/*
 			 * If they did "zfs upgrade -a", then we could
 			 * be doing ioctls to different pools.  We need
-			 * to log this history once to each pool.
+			 * to log this history once to each pool, and bypass
+			 * the normal history logging that happens in main().
 			 */
-			verify(zpool_stage_history(g_zfs, history_str) == 0);
+			(void) zpool_log_history(g_zfs, history_str);
+			log_history = B_FALSE;
 		}
 		if (zfs_prop_set(zhp, "version", verstr) == 0)
 			cb->cb_numupgraded++;
 		else
 			cb->cb_numfailed++;
 		(void) strcpy(cb->cb_lastfs, zfs_get_name(zhp));
 	} else if (version > cb->cb_version) {
 		/* can't downgrade */
 		(void) printf(gettext("%s: can not be downgraded; "
 		    "it is already at version %u\n"),
 		    zfs_get_name(zhp), version);
 		cb->cb_numfailed++;
 	} else {
 		cb->cb_numsamegraded++;
 	}
 	return (0);
 }
 
 /*
  * zfs upgrade
  * zfs upgrade -v
  * zfs upgrade [-r] [-V <version>] <-a | filesystem>
  */
 static int
 zfs_do_upgrade(int argc, char **argv)
 {
 	boolean_t all = B_FALSE;
 	boolean_t showversions = B_FALSE;
 	int ret = 0;
 	upgrade_cbdata_t cb = { 0 };
 	signed char c;
 	int flags = ZFS_ITER_ARGS_CAN_BE_PATHS;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rvV:a")) != -1) {
 		switch (c) {
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'v':
 			showversions = B_TRUE;
 			break;
 		case 'V':
 			if (zfs_prop_string_to_index(ZFS_PROP_VERSION,
 			    optarg, &cb.cb_version) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid version %s\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 'a':
 			all = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if ((!all && !argc) && ((flags & ZFS_ITER_RECURSE) | cb.cb_version))
 		usage(B_FALSE);
 	if (showversions && (flags & ZFS_ITER_RECURSE || all ||
 	    cb.cb_version || argc))
 		usage(B_FALSE);
 	if ((all || argc) && (showversions))
 		usage(B_FALSE);
 	if (all && argc)
 		usage(B_FALSE);
 
 	if (showversions) {
 		/* Show info on available versions. */
 		(void) printf(gettext("The following filesystem versions are "
 		    "supported:\n\n"));
 		(void) printf(gettext("VER  DESCRIPTION\n"));
 		(void) printf("---  -----------------------------------------"
 		    "---------------\n");
 		(void) printf(gettext(" 1   Initial ZFS filesystem version\n"));
 		(void) printf(gettext(" 2   Enhanced directory entries\n"));
 		(void) printf(gettext(" 3   Case insensitive and filesystem "
 		    "user identifier (FUID)\n"));
 		(void) printf(gettext(" 4   userquota, groupquota "
 		    "properties\n"));
 		(void) printf(gettext(" 5   System attributes\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases,\n"));
 		(void) printf("see the ZFS Administration Guide.\n\n");
 		ret = 0;
 	} else if (argc || all) {
 		/* Upgrade filesystems */
 		if (cb.cb_version == 0)
 			cb.cb_version = ZPL_VERSION;
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_set_callback, &cb);
 		(void) printf(gettext("%llu filesystems upgraded\n"),
 		    (u_longlong_t)cb.cb_numupgraded);
 		if (cb.cb_numsamegraded) {
 			(void) printf(gettext("%llu filesystems already at "
 			    "this version\n"),
 			    (u_longlong_t)cb.cb_numsamegraded);
 		}
 		if (cb.cb_numfailed != 0)
 			ret = 1;
 	} else {
 		/* List old-version filesytems */
 		boolean_t found;
 		(void) printf(gettext("This system is currently running "
 		    "ZFS filesystem version %llu.\n\n"), ZPL_VERSION);
 
 		flags |= ZFS_ITER_RECURSE;
 		ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_list_callback, &cb);
 
 		found = cb.cb_foundone;
 		cb.cb_foundone = B_FALSE;
 		cb.cb_newer = B_TRUE;
 
 		ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_list_callback, &cb);
 
 		if (!cb.cb_foundone && !found) {
 			(void) printf(gettext("All filesystems are "
 			    "formatted with the current version.\n"));
 		}
 	}
 
 	return (ret);
 }
 
 /*
  * zfs userspace [-Hinp] [-o field[,...]] [-s field [-s field]...]
  *               [-S field [-S field]...] [-t type[,...]] filesystem | snapshot
  * zfs groupspace [-Hinp] [-o field[,...]] [-s field [-s field]...]
  *                [-S field [-S field]...] [-t type[,...]] filesystem | snapshot
  *
  *	-H      Scripted mode; elide headers and separate columns by tabs.
  *	-i	Translate SID to POSIX ID.
  *	-n	Print numeric ID instead of user/group name.
  *	-o      Control which fields to display.
  *	-p	Use exact (parseable) numeric output.
  *	-s      Specify sort columns, descending order.
  *	-S      Specify sort columns, ascending order.
  *	-t      Control which object types to display.
  *
  *	Displays space consumed by, and quotas on, each user in the specified
  *	filesystem or snapshot.
  */
 
 /* us_field_types, us_field_hdr and us_field_names should be kept in sync */
 enum us_field_types {
 	USFIELD_TYPE,
 	USFIELD_NAME,
 	USFIELD_USED,
 	USFIELD_QUOTA
 };
 static char *us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA" };
 static char *us_field_names[] = { "type", "name", "used", "quota" };
 #define	USFIELD_LAST	(sizeof (us_field_names) / sizeof (char *))
 
 #define	USTYPE_PSX_GRP	(1 << 0)
 #define	USTYPE_PSX_USR	(1 << 1)
 #define	USTYPE_SMB_GRP	(1 << 2)
 #define	USTYPE_SMB_USR	(1 << 3)
 #define	USTYPE_ALL	\
 	(USTYPE_PSX_GRP | USTYPE_PSX_USR | USTYPE_SMB_GRP | USTYPE_SMB_USR)
 
 static int us_type_bits[] = {
 	USTYPE_PSX_GRP,
 	USTYPE_PSX_USR,
 	USTYPE_SMB_GRP,
 	USTYPE_SMB_USR,
 	USTYPE_ALL
 };
 static char *us_type_names[] = { "posixgroup", "posxiuser", "smbgroup",
 	"smbuser", "all" };
 
 typedef struct us_node {
 	nvlist_t	*usn_nvl;
 	uu_avl_node_t	usn_avlnode;
 	uu_list_node_t	usn_listnode;
 } us_node_t;
 
 typedef struct us_cbdata {
 	nvlist_t	**cb_nvlp;
 	uu_avl_pool_t	*cb_avl_pool;
 	uu_avl_t	*cb_avl;
 	boolean_t	cb_numname;
 	boolean_t	cb_nicenum;
 	boolean_t	cb_sid2posix;
 	zfs_userquota_prop_t cb_prop;
 	zfs_sort_column_t *cb_sortcol;
 	size_t		cb_width[USFIELD_LAST];
 } us_cbdata_t;
 
 static boolean_t us_populated = B_FALSE;
 
 typedef struct {
 	zfs_sort_column_t *si_sortcol;
 	boolean_t	si_numname;
 } us_sort_info_t;
 
 static int
 us_field_index(char *field)
 {
 	int i;
 
 	for (i = 0; i < USFIELD_LAST; i++) {
 		if (strcmp(field, us_field_names[i]) == 0)
 			return (i);
 	}
 
 	return (-1);
 }
 
 static int
 us_compare(const void *larg, const void *rarg, void *unused)
 {
 	const us_node_t *l = larg;
 	const us_node_t *r = rarg;
 	us_sort_info_t *si = (us_sort_info_t *)unused;
 	zfs_sort_column_t *sortcol = si->si_sortcol;
 	boolean_t numname = si->si_numname;
 	nvlist_t *lnvl = l->usn_nvl;
 	nvlist_t *rnvl = r->usn_nvl;
 	int rc = 0;
 	boolean_t lvb, rvb;
 
 	for (; sortcol != NULL; sortcol = sortcol->sc_next) {
 		char *lvstr = "";
 		char *rvstr = "";
 		uint32_t lv32 = 0;
 		uint32_t rv32 = 0;
 		uint64_t lv64 = 0;
 		uint64_t rv64 = 0;
 		zfs_prop_t prop = sortcol->sc_prop;
 		const char *propname = NULL;
 		boolean_t reverse = sortcol->sc_reverse;
 
 		switch (prop) {
 		case ZFS_PROP_TYPE:
 			propname = "type";
 			(void) nvlist_lookup_uint32(lnvl, propname, &lv32);
 			(void) nvlist_lookup_uint32(rnvl, propname, &rv32);
 			if (rv32 != lv32)
 				rc = (rv32 < lv32) ? 1 : -1;
 			break;
 		case ZFS_PROP_NAME:
 			propname = "name";
 			if (numname) {
 				(void) nvlist_lookup_uint64(lnvl, propname,
 				    &lv64);
 				(void) nvlist_lookup_uint64(rnvl, propname,
 				    &rv64);
 				if (rv64 != lv64)
 					rc = (rv64 < lv64) ? 1 : -1;
 			} else {
 				(void) nvlist_lookup_string(lnvl, propname,
 				    &lvstr);
 				(void) nvlist_lookup_string(rnvl, propname,
 				    &rvstr);
 				rc = strcmp(lvstr, rvstr);
 			}
 			break;
 		case ZFS_PROP_USED:
 		case ZFS_PROP_QUOTA:
 			if (!us_populated)
 				break;
 			if (prop == ZFS_PROP_USED)
 				propname = "used";
 			else
 				propname = "quota";
 			(void) nvlist_lookup_uint64(lnvl, propname, &lv64);
 			(void) nvlist_lookup_uint64(rnvl, propname, &rv64);
 			if (rv64 != lv64)
 				rc = (rv64 < lv64) ? 1 : -1;
 			break;
 		default:
 			break;
 		}
 
 		if (rc != 0) {
 			if (rc < 0)
 				return (reverse ? 1 : -1);
 			else
 				return (reverse ? -1 : 1);
 		}
 	}
 
 	/*
 	 * If entries still seem to be the same, check if they are of the same
 	 * type (smbentity is added only if we are doing SID to POSIX ID
 	 * translation where we can have duplicate type/name combinations).
 	 */
 	if (nvlist_lookup_boolean_value(lnvl, "smbentity", &lvb) == 0 &&
 	    nvlist_lookup_boolean_value(rnvl, "smbentity", &rvb) == 0 &&
 	    lvb != rvb)
 		return (lvb < rvb ? -1 : 1);
 
 	return (0);
 }
 
 static inline const char *
 us_type2str(unsigned field_type)
 {
 	switch (field_type) {
 	case USTYPE_PSX_USR:
 		return ("POSIX User");
 	case USTYPE_PSX_GRP:
 		return ("POSIX Group");
 	case USTYPE_SMB_USR:
 		return ("SMB User");
 	case USTYPE_SMB_GRP:
 		return ("SMB Group");
 	default:
 		return ("Undefined");
 	}
 }
 
 static int
 userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
 {
 	us_cbdata_t *cb = (us_cbdata_t *)arg;
 	zfs_userquota_prop_t prop = cb->cb_prop;
 	char *name = NULL;
 	char *propname;
 	char sizebuf[32];
 	us_node_t *node;
 	uu_avl_pool_t *avl_pool = cb->cb_avl_pool;
 	uu_avl_t *avl = cb->cb_avl;
 	uu_avl_index_t idx;
 	nvlist_t *props;
 	us_node_t *n;
 	zfs_sort_column_t *sortcol = cb->cb_sortcol;
 	unsigned type = 0;
 	const char *typestr;
 	size_t namelen;
 	size_t typelen;
 	size_t sizelen;
 	int typeidx, nameidx, sizeidx;
 	us_sort_info_t sortinfo = { sortcol, cb->cb_numname };
 	boolean_t smbentity = B_FALSE;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	node = safe_malloc(sizeof (us_node_t));
 	uu_avl_node_init(node, &node->usn_avlnode, avl_pool);
 	node->usn_nvl = props;
 
 	if (domain != NULL && domain[0] != '\0') {
 #ifdef HAVE_IDMAP
 		/* SMB */
 		char sid[ZFS_MAXNAMELEN + 32];
 		uid_t id;
 		uint64_t classes;
 		int err;
 		directory_error_t e;
 
 		smbentity = B_TRUE;
 
 		(void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid);
 
 		if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
 			type = USTYPE_SMB_GRP;
 			err = sid_to_id(sid, B_FALSE, &id);
 		} else {
 			type = USTYPE_SMB_USR;
 			err = sid_to_id(sid, B_TRUE, &id);
 		}
 
 		if (err == 0) {
 			rid = id;
 			if (!cb->cb_sid2posix) {
 				e = directory_name_from_sid(NULL, sid, &name,
 				    &classes);
 				if (e != NULL)
 					directory_error_free(e);
 				if (name == NULL)
 					name = sid;
 			}
 		}
 #else
 		nvlist_free(props);
 		free(node);
 
 		return (-1);
 #endif /* HAVE_IDMAP */
 	}
 
 	if (cb->cb_sid2posix || domain == NULL || domain[0] == '\0') {
 		/* POSIX or -i */
 		if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
 			type = USTYPE_PSX_GRP;
 			if (!cb->cb_numname) {
 				struct group *g;
 
 				if ((g = getgrgid(rid)) != NULL)
 					name = g->gr_name;
 			}
 		} else {
 			type = USTYPE_PSX_USR;
 			if (!cb->cb_numname) {
 				struct passwd *p;
 
 				if ((p = getpwuid(rid)) != NULL)
 					name = p->pw_name;
 			}
 		}
 	}
 
 	/*
 	 * Make sure that the type/name combination is unique when doing
 	 * SID to POSIX ID translation (hence changing the type from SMB to
 	 * POSIX).
 	 */
 	if (cb->cb_sid2posix &&
 	    nvlist_add_boolean_value(props, "smbentity", smbentity) != 0)
 		nomem();
 
 	/* Calculate/update width of TYPE field */
 	typestr = us_type2str(type);
 	typelen = strlen(gettext(typestr));
 	typeidx = us_field_index("type");
 	if (typelen > cb->cb_width[typeidx])
 		cb->cb_width[typeidx] = typelen;
 	if (nvlist_add_uint32(props, "type", type) != 0)
 		nomem();
 
 	/* Calculate/update width of NAME field */
 	if ((cb->cb_numname && cb->cb_sid2posix) || name == NULL) {
 		if (nvlist_add_uint64(props, "name", rid) != 0)
 			nomem();
 		namelen = snprintf(NULL, 0, "%u", rid);
 	} else {
 		if (nvlist_add_string(props, "name", name) != 0)
 			nomem();
 		namelen = strlen(name);
 	}
 	nameidx = us_field_index("name");
 	if (namelen > cb->cb_width[nameidx])
 		cb->cb_width[nameidx] = namelen;
 
 	/*
 	 * Check if this type/name combination is in the list and update it;
 	 * otherwise add new node to the list.
 	 */
 	if ((n = uu_avl_find(avl, node, &sortinfo, &idx)) == NULL) {
 		uu_avl_insert(avl, node, idx);
 	} else {
 		nvlist_free(props);
 		free(node);
 		node = n;
 		props = node->usn_nvl;
 	}
 
 	/* Calculate/update width of USED/QUOTA fields */
 	if (cb->cb_nicenum)
 		zfs_nicenum(space, sizebuf, sizeof (sizebuf));
 	else
 		(void) snprintf(sizebuf, sizeof (sizebuf), "%llu",
 		    (u_longlong_t)space);
 	sizelen = strlen(sizebuf);
 	if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED) {
 		propname = "used";
 		if (!nvlist_exists(props, "quota"))
 			(void) nvlist_add_uint64(props, "quota", 0);
 	} else {
 		propname = "quota";
 		if (!nvlist_exists(props, "used"))
 			(void) nvlist_add_uint64(props, "used", 0);
 	}
 	sizeidx = us_field_index(propname);
 	if (sizelen > cb->cb_width[sizeidx])
 		cb->cb_width[sizeidx] = sizelen;
 
 	if (nvlist_add_uint64(props, propname, space) != 0)
 		nomem();
 
 	return (0);
 }
 
 static void
 print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types,
     size_t *width, us_node_t *node)
 {
 	nvlist_t *nvl = node->usn_nvl;
 	char valstr[ZFS_MAXNAMELEN];
 	boolean_t first = B_TRUE;
 	int cfield = 0;
 	int field;
 	uint32_t ustype;
 
 	/* Check type */
 	(void) nvlist_lookup_uint32(nvl, "type", &ustype);
 	if (!(ustype & types))
 		return;
 
 	while ((field = fields[cfield]) != USFIELD_LAST) {
 		nvpair_t *nvp = NULL;
 		data_type_t type;
 		uint32_t val32;
 		uint64_t val64;
 		char *strval = NULL;
 
 		while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 			if (strcmp(nvpair_name(nvp),
 			    us_field_names[field]) == 0)
 				break;
 		}
 
 		type = nvpair_type(nvp);
 		switch (type) {
 		case DATA_TYPE_UINT32:
 			(void) nvpair_value_uint32(nvp, &val32);
 			break;
 		case DATA_TYPE_UINT64:
 			(void) nvpair_value_uint64(nvp, &val64);
 			break;
 		case DATA_TYPE_STRING:
 			(void) nvpair_value_string(nvp, &strval);
 			break;
 		default:
 			(void) fprintf(stderr, "invalid data type\n");
 		}
 
 		switch (field) {
 		case USFIELD_TYPE:
 			strval = (char *)us_type2str(val32);
 			break;
 		case USFIELD_NAME:
 			if (type == DATA_TYPE_UINT64) {
 				(void) sprintf(valstr, "%llu",
 				    (u_longlong_t) val64);
 				strval = valstr;
 			}
 			break;
 		case USFIELD_USED:
 		case USFIELD_QUOTA:
 			if (type == DATA_TYPE_UINT64) {
 				if (parsable) {
 					(void) sprintf(valstr, "%llu",
 					    (u_longlong_t) val64);
 				} else {
 					zfs_nicenum(val64, valstr,
 					    sizeof (valstr));
 				}
 				if (field == USFIELD_QUOTA &&
 				    strcmp(valstr, "0") == 0)
 					strval = "none";
 				else
 					strval = valstr;
 			}
 			break;
 		}
 
 		if (!first) {
 			if (scripted)
 				(void) printf("\t");
 			else
 				(void) printf("  ");
 		}
 		if (scripted)
 			(void) printf("%s", strval);
 		else if (field == USFIELD_TYPE || field == USFIELD_NAME)
 			(void) printf("%-*s", (int) width[field], strval);
 		else
 			(void) printf("%*s", (int) width[field], strval);
 
 		first = B_FALSE;
 		cfield++;
 	}
 
 	(void) printf("\n");
 }
 
 static void
 print_us(boolean_t scripted, boolean_t parsable, int *fields, int types,
     size_t *width, boolean_t rmnode, uu_avl_t *avl)
 {
 	us_node_t *node;
 	const char *col;
 	int cfield = 0;
 	int field;
 
 	if (!scripted) {
 		boolean_t first = B_TRUE;
 
 		while ((field = fields[cfield]) != USFIELD_LAST) {
 			col = gettext(us_field_hdr[field]);
 			if (field == USFIELD_TYPE || field == USFIELD_NAME) {
 				(void) printf(first ? "%-*s" : "  %-*s",
 				    (int) width[field], col);
 			} else {
 				(void) printf(first ? "%*s" : "  %*s",
 				    (int) width[field], col);
 			}
 			first = B_FALSE;
 			cfield++;
 		}
 		(void) printf("\n");
 	}
 
 	for (node = uu_avl_first(avl); node; node = uu_avl_next(avl, node)) {
 		print_us_node(scripted, parsable, fields, types, width, node);
 		if (rmnode)
 			nvlist_free(node->usn_nvl);
 	}
 }
 
 static int
 zfs_do_userspace(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	zfs_userquota_prop_t p;
 	uu_avl_pool_t *avl_pool;
 	uu_avl_t *avl_tree;
 	uu_avl_walk_t *walk;
 	char *delim;
 	char deffields[] = "type,name,used,quota";
 	char *ofield = NULL;
 	char *tfield = NULL;
 	int cfield = 0;
 	int fields[256];
 	int i;
 	boolean_t scripted = B_FALSE;
 	boolean_t prtnum = B_FALSE;
 	boolean_t parsable = B_FALSE;
 	boolean_t sid2posix = B_FALSE;
 	int ret = 0;
 	int c;
 	zfs_sort_column_t *sortcol = NULL;
 	int types = USTYPE_PSX_USR | USTYPE_SMB_USR;
 	us_cbdata_t cb;
 	us_node_t *node;
 	us_node_t *rmnode;
 	uu_list_pool_t *listpool;
 	uu_list_t *list;
 	uu_avl_index_t idx = 0;
 	uu_list_index_t idx2 = 0;
 
 	if (argc < 2)
 		usage(B_FALSE);
 
 	if (strcmp(argv[0], "groupspace") == 0)
 		/* Toggle default group types */
 		types = USTYPE_PSX_GRP | USTYPE_SMB_GRP;
 
 	while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) {
 		switch (c) {
 		case 'n':
 			prtnum = B_TRUE;
 			break;
 		case 'H':
 			scripted = B_TRUE;
 			break;
 		case 'p':
 			parsable = B_TRUE;
 			break;
 		case 'o':
 			ofield = optarg;
 			break;
 		case 's':
 		case 'S':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    c == 's' ? B_FALSE : B_TRUE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid field '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 't':
 			tfield = optarg;
 			break;
 		case 'i':
 			sid2posix = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing dataset name\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	/* Use default output fields if not specified using -o */
 	if (ofield == NULL)
 		ofield = deffields;
 	do {
 		if ((delim = strchr(ofield, ',')) != NULL)
 			*delim = '\0';
 		if ((fields[cfield++] = us_field_index(ofield)) == -1) {
 			(void) fprintf(stderr, gettext("invalid type '%s' "
 			    "for -o option\n"), ofield);
 			return (-1);
 		}
 		if (delim != NULL)
 			ofield = delim + 1;
 	} while (delim != NULL);
 	fields[cfield] = USFIELD_LAST;
 
 	/* Override output types (-t option) */
 	if (tfield != NULL) {
 		types = 0;
 
 		do {
 			boolean_t found = B_FALSE;
 
 			if ((delim = strchr(tfield, ',')) != NULL)
 				*delim = '\0';
 			for (i = 0; i < sizeof (us_type_bits) / sizeof (int);
 			    i++) {
 				if (strcmp(tfield, us_type_names[i]) == 0) {
 					found = B_TRUE;
 					types |= us_type_bits[i];
 					break;
 				}
 			}
 			if (!found) {
 				(void) fprintf(stderr, gettext("invalid type "
 				    "'%s' for -t option\n"), tfield);
 				return (-1);
 			}
 			if (delim != NULL)
 				tfield = delim + 1;
 		} while (delim != NULL);
 	}
 
 	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET)) == NULL)
 		return (1);
 
 	if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t),
 	    offsetof(us_node_t, usn_avlnode), us_compare, UU_DEFAULT)) == NULL)
 		nomem();
 	if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
 		nomem();
 
 	/* Always add default sorting columns */
 	(void) zfs_add_sort_column(&sortcol, "type", B_FALSE);
 	(void) zfs_add_sort_column(&sortcol, "name", B_FALSE);
 
 	cb.cb_sortcol = sortcol;
 	cb.cb_numname = prtnum;
 	cb.cb_nicenum = !parsable;
 	cb.cb_avl_pool = avl_pool;
 	cb.cb_avl = avl_tree;
 	cb.cb_sid2posix = sid2posix;
 
 	for (i = 0; i < USFIELD_LAST; i++)
 		cb.cb_width[i] = strlen(gettext(us_field_hdr[i]));
 
 	for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
 		if (((p == ZFS_PROP_USERUSED || p == ZFS_PROP_USERQUOTA) &&
 		    !(types & (USTYPE_PSX_USR | USTYPE_SMB_USR))) ||
 		    ((p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA) &&
 		    !(types & (USTYPE_PSX_GRP | USTYPE_SMB_GRP))))
 			continue;
 		cb.cb_prop = p;
 		if ((ret = zfs_userspace(zhp, p, userspace_cb, &cb)) != 0)
 			return (ret);
 	}
 
 	/* Sort the list */
 	if ((node = uu_avl_first(avl_tree)) == NULL)
 		return (0);
 
 	us_populated = B_TRUE;
 
 	listpool = uu_list_pool_create("tmplist", sizeof (us_node_t),
 	    offsetof(us_node_t, usn_listnode), NULL, UU_DEFAULT);
 	list = uu_list_create(listpool, NULL, UU_DEFAULT);
 	uu_list_node_init(node, &node->usn_listnode, listpool);
 
 	while (node != NULL) {
 		rmnode = node;
 		node = uu_avl_next(avl_tree, node);
 		uu_avl_remove(avl_tree, rmnode);
 		if (uu_list_find(list, rmnode, NULL, &idx2) == NULL)
 			uu_list_insert(list, rmnode, idx2);
 	}
 
 	for (node = uu_list_first(list); node != NULL;
 	    node = uu_list_next(list, node)) {
 		us_sort_info_t sortinfo = { sortcol, cb.cb_numname };
 
 		if (uu_avl_find(avl_tree, node, &sortinfo, &idx) == NULL)
 			uu_avl_insert(avl_tree, node, idx);
 	}
 
 	uu_list_destroy(list);
 	uu_list_pool_destroy(listpool);
 
 	/* Print and free node nvlist memory */
 	print_us(scripted, parsable, fields, types, cb.cb_width, B_TRUE,
 	    cb.cb_avl);
 
 	zfs_free_sort_columns(sortcol);
 
 	/* Clean up the AVL tree */
 	if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
 		nomem();
 
 	while ((node = uu_avl_walk_next(walk)) != NULL) {
 		uu_avl_remove(cb.cb_avl, node);
 		free(node);
 	}
 
 	uu_avl_walk_end(walk);
 	uu_avl_destroy(avl_tree);
 	uu_avl_pool_destroy(avl_pool);
 
 	return (ret);
 }
 
 /*
  * list [-r][-d max] [-H] [-o property[,property]...] [-t type[,type]...]
  *      [-s property [-s property]...] [-S property [-S property]...]
  *      <dataset> ...
  *
  *	-r	Recurse over all children
  *	-d	Limit recursion by depth.
  *	-H	Scripted mode; elide headers and separate columns by tabs
  *	-o	Control which fields to display.
  *	-t	Control which object types to display.
  *	-s	Specify sort columns, descending order.
  *	-S	Specify sort columns, ascending order.
  *
  * When given no arguments, lists all filesystems in the system.
  * Otherwise, list the specified datasets, optionally recursing down them if
  * '-r' is specified.
  */
 typedef struct list_cbdata {
 	boolean_t	cb_first;
 	boolean_t	cb_scripted;
 	zprop_list_t	*cb_proplist;
 } list_cbdata_t;
 
 /*
  * Given a list of columns to display, output appropriate headers for each one.
  */
 static void
 print_header(zprop_list_t *pl)
 {
 	char headerbuf[ZFS_MAXPROPLEN];
 	const char *header;
 	int i;
 	boolean_t first = B_TRUE;
 	boolean_t right_justify;
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		if (!first) {
 			(void) printf("  ");
 		} else {
 			first = B_FALSE;
 		}
 
 		right_justify = B_FALSE;
 		if (pl->pl_prop != ZPROP_INVAL) {
 			header = zfs_prop_column_name(pl->pl_prop);
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else {
 			for (i = 0; pl->pl_user_prop[i] != '\0'; i++)
 				headerbuf[i] = toupper(pl->pl_user_prop[i]);
 			headerbuf[i] = '\0';
 			header = headerbuf;
 		}
 
 		if (pl->pl_next == NULL && !right_justify)
 			(void) printf("%s", header);
 		else if (right_justify)
 			(void) printf("%*s", (int)pl->pl_width, header);
 		else
 			(void) printf("%-*s", (int)pl->pl_width, header);
 	}
 
 	(void) printf("\n");
 }
 
 /*
  * Given a dataset and a list of fields, print out all the properties according
  * to the described layout.
  */
 static void
 print_dataset(zfs_handle_t *zhp, zprop_list_t *pl, boolean_t scripted)
 {
 	boolean_t first = B_TRUE;
 	char property[ZFS_MAXPROPLEN];
 	nvlist_t *userprops = zfs_get_user_props(zhp);
 	nvlist_t *propval;
 	char *propstr;
 	boolean_t right_justify;
 	int width;
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		if (!first) {
 			if (scripted)
 				(void) printf("\t");
 			else
 				(void) printf("  ");
 		} else {
 			first = B_FALSE;
 		}
 
 		if (pl->pl_prop == ZFS_PROP_NAME) {
 			(void) strlcpy(property, zfs_get_name(zhp),
 			    sizeof(property));
 			propstr = property;
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else if (pl->pl_prop != ZPROP_INVAL) {
 			if (zfs_prop_get(zhp, pl->pl_prop, property,
 			    sizeof (property), NULL, NULL, 0, B_FALSE) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else if (zfs_prop_userquota(pl->pl_user_prop)) {
 			if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
 			    property, sizeof (property), B_FALSE) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 			right_justify = B_TRUE;
 		} else if (zfs_prop_written(pl->pl_user_prop)) {
 			if (zfs_prop_get_written(zhp, pl->pl_user_prop,
 			    property, sizeof (property), B_FALSE) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 			right_justify = B_TRUE;
 		} else {
 			if (nvlist_lookup_nvlist(userprops,
 			    pl->pl_user_prop, &propval) != 0)
 				propstr = "-";
 			else
 				verify(nvlist_lookup_string(propval,
 				    ZPROP_VALUE, &propstr) == 0);
 			right_justify = B_FALSE;
 		}
 
 		width = pl->pl_width;
 
 		/*
 		 * If this is being called in scripted mode, or if this is the
 		 * last column and it is left-justified, don't include a width
 		 * format specifier.
 		 */
 		if (scripted || (pl->pl_next == NULL && !right_justify))
 			(void) printf("%s", propstr);
 		else if (right_justify)
 			(void) printf("%*s", width, propstr);
 		else
 			(void) printf("%-*s", width, propstr);
 	}
 
 	(void) printf("\n");
 }
 
 /*
  * Generic callback function to list a dataset or snapshot.
  */
 static int
 list_callback(zfs_handle_t *zhp, void *data)
 {
 	list_cbdata_t *cbp = data;
 
 	if (cbp->cb_first) {
 		if (!cbp->cb_scripted)
 			print_header(cbp->cb_proplist);
 		cbp->cb_first = B_FALSE;
 	}
 
 	print_dataset(zhp, cbp->cb_proplist, cbp->cb_scripted);
 
 	return (0);
 }
 
 static int
 zfs_do_list(int argc, char **argv)
 {
 	int c;
 	boolean_t scripted = B_FALSE;
 	static char default_fields[] =
 	    "name,used,available,referenced,mountpoint";
 	int types = ZFS_TYPE_DATASET;
 	boolean_t types_specified = B_FALSE;
 	char *fields = NULL;
 	list_cbdata_t cb = { 0 };
 	char *value;
 	int limit = 0;
 	int ret = 0;
 	zfs_sort_column_t *sortcol = NULL;
 	int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":d:o:rt:Hs:S:")) != -1) {
 		switch (c) {
 		case 'o':
 			fields = optarg;
 			break;
 		case 'd':
 			limit = parse_depth(optarg, &flags);
 			break;
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'H':
 			scripted = B_TRUE;
 			break;
 		case 's':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    B_FALSE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid property '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 'S':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    B_TRUE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid property '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 't':
 			types = 0;
 			types_specified = B_TRUE;
 			flags &= ~ZFS_ITER_PROP_LISTSNAPS;
 			while (*optarg != '\0') {
 				static char *type_subopts[] = { "filesystem",
 				    "volume", "snapshot", "snap", "all", NULL };
 
 				switch (getsubopt(&optarg, type_subopts,
 				    &value)) {
 				case 0:
 					types |= ZFS_TYPE_FILESYSTEM;
 					break;
 				case 1:
 					types |= ZFS_TYPE_VOLUME;
 					break;
 				case 2:
 				case 3:
 					types |= ZFS_TYPE_SNAPSHOT;
 					break;
 				case 4:
 					types = ZFS_TYPE_DATASET;
 					break;
 
 				default:
 					(void) fprintf(stderr,
 					    gettext("invalid type '%s'\n"),
 					    value);
 					usage(B_FALSE);
 				}
 			}
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (fields == NULL)
 		fields = default_fields;
 
 	/*
 	 * If we are only going to list snapshot names and sort by name,
 	 * then we can use faster version.
 	 */
 	if (strcmp(fields, "name") == 0 && zfs_sort_only_by_name(sortcol))
 		flags |= ZFS_ITER_SIMPLE;
 
 	/*
 	 * If "-o space" and no types were specified, don't display snapshots.
 	 */
 	if (strcmp(fields, "space") == 0 && types_specified == B_FALSE)
 		types &= ~ZFS_TYPE_SNAPSHOT;
 
 	/*
 	 * If the user specifies '-o all', the zprop_get_list() doesn't
 	 * normally include the name of the dataset.  For 'zfs list', we always
 	 * want this property to be first.
 	 */
 	if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET)
 	    != 0)
 		usage(B_FALSE);
 
 	cb.cb_scripted = scripted;
 	cb.cb_first = B_TRUE;
 
 	ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist,
 	    limit, list_callback, &cb);
 
 	zprop_free_list(cb.cb_proplist);
 	zfs_free_sort_columns(sortcol);
 
 	if (ret == 0 && cb.cb_first && !cb.cb_scripted)
 		(void) fprintf(stderr, gettext("no datasets available\n"));
 
 	return (ret);
 }
 
 /*
  * zfs rename [-f] <fs | snap | vol> <fs | snap | vol>
  * zfs rename [-f] -p <fs | vol> <fs | vol>
  * zfs rename -r <snap> <snap>
  *
  * Renames the given dataset to another of the same type.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  */
 /* ARGSUSED */
 static int
 zfs_do_rename(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	int c;
 	int ret = 0;
 	boolean_t recurse = B_FALSE;
 	boolean_t parents = B_FALSE;
 	boolean_t force_unmount = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "prf")) != -1) {
 		switch (c) {
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case 'r':
 			recurse = B_TRUE;
 			break;
 		case 'f':
 			force_unmount = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing source dataset "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing target dataset "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if (recurse && parents) {
 		(void) fprintf(stderr, gettext("-p and -r options are mutually "
 		    "exclusive\n"));
 		usage(B_FALSE);
 	}
 
 	if (recurse && strchr(argv[0], '@') == 0) {
 		(void) fprintf(stderr, gettext("source dataset for recursive "
 		    "rename must be a snapshot\n"));
 		usage(B_FALSE);
 	}
 
 	if ((zhp = zfs_open(g_zfs, argv[0], parents ? ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME : ZFS_TYPE_DATASET)) == NULL)
 		return (1);
 
 	/* If we were asked and the name looks good, try to create ancestors. */
 	if (parents && zfs_name_valid(argv[1], zfs_get_type(zhp)) &&
 	    zfs_create_ancestors(g_zfs, argv[1]) != 0) {
 		zfs_close(zhp);
 		return (1);
 	}
 
 	ret = (zfs_rename(zhp, argv[1], recurse, force_unmount) != 0);
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * zfs promote <fs>
  *
  * Promotes the given clone fs to be the parent
  */
 /* ARGSUSED */
 static int
 zfs_do_promote(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	int ret = 0;
 
 	/* check options */
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
 		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing clone filesystem"
 		    " argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (1);
 
 	ret = (zfs_promote(zhp) != 0);
 
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * zfs rollback [-rRf] <snapshot>
  *
  *	-r	Delete any intervening snapshots before doing rollback
  *	-R	Delete any snapshots and their clones
  *	-f	ignored for backwards compatability
  *
  * Given a filesystem, rollback to a specific snapshot, discarding any changes
  * since then and making it the active dataset.  If more recent snapshots exist,
  * the command will complain unless the '-r' flag is given.
  */
 typedef struct rollback_cbdata {
 	uint64_t	cb_create;
 	boolean_t	cb_first;
 	int		cb_doclones;
 	char		*cb_target;
 	int		cb_error;
 	boolean_t	cb_recurse;
 	boolean_t	cb_dependent;
 } rollback_cbdata_t;
 
 /*
  * Report any snapshots more recent than the one specified.  Used when '-r' is
  * not specified.  We reuse this same callback for the snapshot dependents - if
  * 'cb_dependent' is set, then this is a dependent and we should report it
  * without checking the transaction group.
  */
 static int
 rollback_check(zfs_handle_t *zhp, void *data)
 {
 	rollback_cbdata_t *cbp = data;
 
 	if (cbp->cb_doclones) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	if (!cbp->cb_dependent) {
 		if (strcmp(zfs_get_name(zhp), cbp->cb_target) != 0 &&
 		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
 		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
 		    cbp->cb_create) {
 
 			if (cbp->cb_first && !cbp->cb_recurse) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "rollback to '%s': more recent snapshots "
 				    "exist\n"),
 				    cbp->cb_target);
 				(void) fprintf(stderr, gettext("use '-r' to "
 				    "force deletion of the following "
 				    "snapshots:\n"));
 				cbp->cb_first = 0;
 				cbp->cb_error = 1;
 			}
 
 			if (cbp->cb_recurse) {
 				cbp->cb_dependent = B_TRUE;
 				if (zfs_iter_dependents(zhp, B_TRUE,
 				    rollback_check, cbp) != 0) {
 					zfs_close(zhp);
 					return (-1);
 				}
 				cbp->cb_dependent = B_FALSE;
 			} else {
 				(void) fprintf(stderr, "%s\n",
 				    zfs_get_name(zhp));
 			}
 		}
 	} else {
 		if (cbp->cb_first && cbp->cb_recurse) {
 			(void) fprintf(stderr, gettext("cannot rollback to "
 			    "'%s': clones of previous snapshots exist\n"),
 			    cbp->cb_target);
 			(void) fprintf(stderr, gettext("use '-R' to "
 			    "force deletion of the following clones and "
 			    "dependents:\n"));
 			cbp->cb_first = 0;
 			cbp->cb_error = 1;
 		}
 
 		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 	}
 
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 zfs_do_rollback(int argc, char **argv)
 {
 	int ret = 0;
 	int c;
 	boolean_t force = B_FALSE;
 	rollback_cbdata_t cb = { 0 };
 	zfs_handle_t *zhp, *snap;
 	char parentname[ZFS_MAXNAMELEN];
 	char *delim;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rRf")) != -1) {
 		switch (c) {
 		case 'r':
 			cb.cb_recurse = 1;
 			break;
 		case 'R':
 			cb.cb_recurse = 1;
 			cb.cb_doclones = 1;
 			break;
 		case 'f':
 			force = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	/* open the snapshot */
 	if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
 	/* open the parent dataset */
 	(void) strlcpy(parentname, argv[0], sizeof (parentname));
 	verify((delim = strrchr(parentname, '@')) != NULL);
 	*delim = '\0';
 	if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_DATASET)) == NULL) {
 		zfs_close(snap);
 		return (1);
 	}
 
 	/*
 	 * Check for more recent snapshots and/or clones based on the presence
 	 * of '-r' and '-R'.
 	 */
 	cb.cb_target = argv[0];
 	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
 	cb.cb_first = B_TRUE;
 	cb.cb_error = 0;
 	if ((ret = zfs_iter_children(zhp, rollback_check, &cb)) != 0)
 		goto out;
 
 	if ((ret = cb.cb_error) != 0)
 		goto out;
 
 	/*
 	 * Rollback parent to the given snapshot.
 	 */
 	ret = zfs_rollback(zhp, snap, force);
 
 out:
 	zfs_close(snap);
 	zfs_close(zhp);
 
 	if (ret == 0)
 		return (0);
 	else
 		return (1);
 }
 
 /*
  * zfs set property=value { fs | snap | vol } ...
  *
  * Sets the given property for all datasets specified on the command line.
  */
 typedef struct set_cbdata {
 	char		*cb_propname;
 	char		*cb_value;
 } set_cbdata_t;
 
 static int
 set_callback(zfs_handle_t *zhp, void *data)
 {
 	set_cbdata_t *cbp = data;
 
 	if (zfs_prop_set(zhp, cbp->cb_propname, cbp->cb_value) != 0) {
 		switch (libzfs_errno(g_zfs)) {
 		case EZFS_MOUNTFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
 			    "but unable to remount filesystem\n"));
 			break;
 		case EZFS_SHARENFSFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
 			    "but unable to reshare filesystem\n"));
 			break;
 		}
 		return (1);
 	}
 	return (0);
 }
 
 static int
 zfs_do_set(int argc, char **argv)
 {
 	set_cbdata_t cb;
 	int ret = 0;
 
 	/* check for options */
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
 		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing property=value "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("missing dataset name\n"));
 		usage(B_FALSE);
 	}
 
 	/* validate property=value argument */
 	cb.cb_propname = argv[1];
 	if (((cb.cb_value = strchr(cb.cb_propname, '=')) == NULL) ||
 	    (cb.cb_value[1] == '\0')) {
 		(void) fprintf(stderr, gettext("missing value in "
 		    "property=value argument\n"));
 		usage(B_FALSE);
 	}
 
 	*cb.cb_value = '\0';
 	cb.cb_value++;
 
 	if (*cb.cb_propname == '\0') {
 		(void) fprintf(stderr,
 		    gettext("missing property in property=value argument\n"));
 		usage(B_FALSE);
 	}
 
 	ret = zfs_for_each(argc - 2, argv + 2, 0,
 	    ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, &cb);
 
 	return (ret);
 }
 
+typedef struct snap_cbdata {
+	nvlist_t *sd_nvl;
+	boolean_t sd_recursive;
+	const char *sd_snapname;
+} snap_cbdata_t;
+
+static int
+zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
+{
+	snap_cbdata_t *sd = arg;
+	char *name;
+	int rv = 0;
+	int error;
+
+	error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname);
+	if (error == -1)
+		nomem();
+	fnvlist_add_boolean(sd->sd_nvl, name);
+	free(name);
+
+	if (sd->sd_recursive)
+		rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
+	zfs_close(zhp);
+	return (rv);
+}
+
 /*
  * zfs snapshot [-r] [-o prop=value] ... <fs@snap>
  *
  * Creates a snapshot with the given name.  While functionally equivalent to
  * 'zfs create', it is a separate command to differentiate intent.
  */
 static int
 zfs_do_snapshot(int argc, char **argv)
 {
-	boolean_t recursive = B_FALSE;
 	int ret = 0;
 	signed char c;
 	nvlist_t *props;
+	snap_cbdata_t sd = { 0 };
+	boolean_t multiple_snaps = B_FALSE;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
+	if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0)
+		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, "ro:")) != -1) {
 		switch (c) {
 		case 'o':
 			if (parseprop(props))
 				return (1);
 			break;
 		case 'r':
-			recursive = B_TRUE;
+			sd.sd_recursive = B_TRUE;
+			multiple_snaps = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		goto usage;
 	}
-	if (argc > 1) {
-		(void) fprintf(stderr, gettext("too many arguments\n"));
-		goto usage;
+
+	if (argc > 1)
+		multiple_snaps = B_TRUE;
+	for (; argc > 0; argc--, argv++) {
+		char *atp;
+		zfs_handle_t *zhp;
+
+		atp = strchr(argv[0], '@');
+		if (atp == NULL)
+			goto usage;
+		*atp = '\0';
+		sd.sd_snapname = atp + 1;
+		zhp = zfs_open(g_zfs, argv[0],
+		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
+		if (zhp == NULL)
+			goto usage;
+		if (zfs_snapshot_cb(zhp, &sd) != 0)
+			goto usage;
 	}
 
-	ret = zfs_snapshot(g_zfs, argv[0], recursive, props);
+	ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props);
+	nvlist_free(sd.sd_nvl);
 	nvlist_free(props);
-	if (ret && recursive)
+	if (ret != 0 && multiple_snaps)
 		(void) fprintf(stderr, gettext("no snapshots were created\n"));
 	return (ret != 0);
 
 usage:
+	nvlist_free(sd.sd_nvl);
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (-1);
 }
 
 /*
  * Send a backup stream to stdout.
  */
 static int
 zfs_do_send(int argc, char **argv)
 {
 	char *fromname = NULL;
 	char *toname = NULL;
 	char *cp;
 	zfs_handle_t *zhp;
 	sendflags_t flags = { 0 };
 	int c, err;
 	nvlist_t *dbgnv = NULL;
 	boolean_t extraverbose = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) {
 		switch (c) {
 		case 'i':
 			if (fromname)
 				usage(B_FALSE);
 			fromname = optarg;
 			break;
 		case 'I':
 			if (fromname)
 				usage(B_FALSE);
 			fromname = optarg;
 			flags.doall = B_TRUE;
 			break;
 		case 'R':
 			flags.replicate = B_TRUE;
 			break;
 		case 'p':
 			flags.props = B_TRUE;
 			break;
 		case 'P':
 			flags.parsable = B_TRUE;
 			flags.verbose = B_TRUE;
 			break;
 		case 'v':
 			if (flags.verbose)
 				extraverbose = B_TRUE;
 			flags.verbose = B_TRUE;
 			flags.progress = B_TRUE;
 			break;
 		case 'D':
 			flags.dedup = B_TRUE;
 			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if (!flags.dryrun && isatty(STDOUT_FILENO)) {
 		(void) fprintf(stderr,
 		    gettext("Error: Stream can not be written to a terminal.\n"
 		    "You must redirect standard output.\n"));
 		return (1);
 	}
 
 	cp = strchr(argv[0], '@');
 	if (cp == NULL) {
 		(void) fprintf(stderr,
 		    gettext("argument must be a snapshot\n"));
 		usage(B_FALSE);
 	}
 	*cp = '\0';
 	toname = cp + 1;
 	zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (1);
 
 	/*
 	 * If they specified the full path to the snapshot, chop off
 	 * everything except the short name of the snapshot, but special
 	 * case if they specify the origin.
 	 */
 	if (fromname && (cp = strchr(fromname, '@')) != NULL) {
 		char origin[ZFS_MAXNAMELEN];
 		zprop_source_t src;
 
 		(void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN,
 		    origin, sizeof (origin), &src, NULL, 0, B_FALSE);
 
 		if (strcmp(origin, fromname) == 0) {
 			fromname = NULL;
 			flags.fromorigin = B_TRUE;
 		} else {
 			*cp = '\0';
 			if (cp != fromname && strcmp(argv[0], fromname)) {
 				(void) fprintf(stderr,
 				    gettext("incremental source must be "
 				    "in same filesystem\n"));
 				usage(B_FALSE);
 			}
 			fromname = cp + 1;
 			if (strchr(fromname, '@') || strchr(fromname, '/')) {
 				(void) fprintf(stderr,
 				    gettext("invalid incremental source\n"));
 				usage(B_FALSE);
 			}
 		}
 	}
 
 	if (flags.replicate && fromname == NULL)
 		flags.doall = B_TRUE;
 
 	err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO, NULL, 0,
 	    extraverbose ? &dbgnv : NULL);
 
 	if (extraverbose && dbgnv != NULL) {
 		/*
 		 * dump_nvlist prints to stdout, but that's been
 		 * redirected to a file.  Make it print to stderr
 		 * instead.
 		 */
 		(void) dup2(STDERR_FILENO, STDOUT_FILENO);
 		dump_nvlist(dbgnv, 0);
 		nvlist_free(dbgnv);
 	}
 	zfs_close(zhp);
 
 	return (err != 0);
 }
 
 /*
  * zfs receive [-vnFu] [-d | -e] <fs@snap>
  *
  * Restore a backup stream from stdin.
  */
 static int
 zfs_do_receive(int argc, char **argv)
 {
 	int c, err;
 	recvflags_t flags = { 0 };
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":denuvF")) != -1) {
 		switch (c) {
 		case 'd':
 			flags.isprefix = B_TRUE;
 			break;
 		case 'e':
 			flags.isprefix = B_TRUE;
 			flags.istail = B_TRUE;
 			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
 			break;
 		case 'u':
 			flags.nomount = B_TRUE;
 			break;
 		case 'v':
 			flags.verbose = B_TRUE;
 			break;
 		case 'F':
 			flags.force = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if (isatty(STDIN_FILENO)) {
 		(void) fprintf(stderr,
 		    gettext("Error: Backup stream can not be read "
 		    "from a terminal.\n"
 		    "You must redirect standard input.\n"));
 		return (1);
 	}
 
 	err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL);
 
 	return (err != 0);
 }
 
 /*
  * allow/unallow stuff
  */
 /* copied from zfs/sys/dsl_deleg.h */
 #define	ZFS_DELEG_PERM_CREATE		"create"
 #define	ZFS_DELEG_PERM_DESTROY		"destroy"
 #define	ZFS_DELEG_PERM_SNAPSHOT		"snapshot"
 #define	ZFS_DELEG_PERM_ROLLBACK		"rollback"
 #define	ZFS_DELEG_PERM_CLONE		"clone"
 #define	ZFS_DELEG_PERM_PROMOTE		"promote"
 #define	ZFS_DELEG_PERM_RENAME		"rename"
 #define	ZFS_DELEG_PERM_MOUNT		"mount"
 #define	ZFS_DELEG_PERM_SHARE		"share"
 #define	ZFS_DELEG_PERM_SEND		"send"
 #define	ZFS_DELEG_PERM_RECEIVE		"receive"
 #define	ZFS_DELEG_PERM_ALLOW		"allow"
 #define	ZFS_DELEG_PERM_USERPROP		"userprop"
 #define	ZFS_DELEG_PERM_VSCAN		"vscan" /* ??? */
 #define	ZFS_DELEG_PERM_USERQUOTA	"userquota"
 #define	ZFS_DELEG_PERM_GROUPQUOTA	"groupquota"
 #define	ZFS_DELEG_PERM_USERUSED		"userused"
 #define	ZFS_DELEG_PERM_GROUPUSED	"groupused"
 #define	ZFS_DELEG_PERM_HOLD		"hold"
 #define	ZFS_DELEG_PERM_RELEASE		"release"
 #define	ZFS_DELEG_PERM_DIFF		"diff"
 
 #define	ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE
 
 static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
 	{ ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW },
 	{ ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE },
 	{ ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE },
 	{ ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY },
 	{ ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF},
 	{ ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
 	{ ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT },
 	{ ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE },
 	{ ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE },
 	{ ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
 	{ ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
 	{ ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
 	{ ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
 	{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
 	{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
 
 	{ ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
 	{ ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
 	{ ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
 	{ ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
 	{ ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
 	{ NULL, ZFS_DELEG_NOTE_NONE }
 };
 
 /* permission structure */
 typedef struct deleg_perm {
 	zfs_deleg_who_type_t	dp_who_type;
 	const char		*dp_name;
 	boolean_t		dp_local;
 	boolean_t		dp_descend;
 } deleg_perm_t;
 
 /* */
 typedef struct deleg_perm_node {
 	deleg_perm_t		dpn_perm;
 
 	uu_avl_node_t		dpn_avl_node;
 } deleg_perm_node_t;
 
 typedef struct fs_perm fs_perm_t;
 
 /* permissions set */
 typedef struct who_perm {
 	zfs_deleg_who_type_t	who_type;
 	const char		*who_name;		/* id */
 	char			who_ug_name[256];	/* user/group name */
 	fs_perm_t		*who_fsperm;		/* uplink */
 
 	uu_avl_t		*who_deleg_perm_avl;	/* permissions */
 } who_perm_t;
 
 /* */
 typedef struct who_perm_node {
 	who_perm_t	who_perm;
 	uu_avl_node_t	who_avl_node;
 } who_perm_node_t;
 
 typedef struct fs_perm_set fs_perm_set_t;
 /* fs permissions */
 struct fs_perm {
 	const char		*fsp_name;
 
 	uu_avl_t		*fsp_sc_avl;	/* sets,create */
 	uu_avl_t		*fsp_uge_avl;	/* user,group,everyone */
 
 	fs_perm_set_t		*fsp_set;	/* uplink */
 };
 
 /* */
 typedef struct fs_perm_node {
 	fs_perm_t	fspn_fsperm;
 	uu_avl_t	*fspn_avl;
 
 	uu_list_node_t	fspn_list_node;
 } fs_perm_node_t;
 
 /* top level structure */
 struct fs_perm_set {
 	uu_list_pool_t	*fsps_list_pool;
 	uu_list_t	*fsps_list; /* list of fs_perms */
 
 	uu_avl_pool_t	*fsps_named_set_avl_pool;
 	uu_avl_pool_t	*fsps_who_perm_avl_pool;
 	uu_avl_pool_t	*fsps_deleg_perm_avl_pool;
 };
 
 static inline const char *
 deleg_perm_type(zfs_deleg_note_t note)
 {
 	/* subcommands */
 	switch (note) {
 		/* SUBCOMMANDS */
 		/* OTHER */
 	case ZFS_DELEG_NOTE_GROUPQUOTA:
 	case ZFS_DELEG_NOTE_GROUPUSED:
 	case ZFS_DELEG_NOTE_USERPROP:
 	case ZFS_DELEG_NOTE_USERQUOTA:
 	case ZFS_DELEG_NOTE_USERUSED:
 		/* other */
 		return (gettext("other"));
 	default:
 		return (gettext("subcommand"));
 	}
 }
 
 static int inline
 who_type2weight(zfs_deleg_who_type_t who_type)
 {
 	int res;
 	switch (who_type) {
 		case ZFS_DELEG_NAMED_SET_SETS:
 		case ZFS_DELEG_NAMED_SET:
 			res = 0;
 			break;
 		case ZFS_DELEG_CREATE_SETS:
 		case ZFS_DELEG_CREATE:
 			res = 1;
 			break;
 		case ZFS_DELEG_USER_SETS:
 		case ZFS_DELEG_USER:
 			res = 2;
 			break;
 		case ZFS_DELEG_GROUP_SETS:
 		case ZFS_DELEG_GROUP:
 			res = 3;
 			break;
 		case ZFS_DELEG_EVERYONE_SETS:
 		case ZFS_DELEG_EVERYONE:
 			res = 4;
 			break;
 		default:
 			res = -1;
 	}
 
 	return (res);
 }
 
 /* ARGSUSED */
 static int
 who_perm_compare(const void *larg, const void *rarg, void *unused)
 {
 	const who_perm_node_t *l = larg;
 	const who_perm_node_t *r = rarg;
 	zfs_deleg_who_type_t ltype = l->who_perm.who_type;
 	zfs_deleg_who_type_t rtype = r->who_perm.who_type;
 	int lweight = who_type2weight(ltype);
 	int rweight = who_type2weight(rtype);
 	int res = lweight - rweight;
 	if (res == 0)
 		res = strncmp(l->who_perm.who_name, r->who_perm.who_name,
 		    ZFS_MAX_DELEG_NAME-1);
 
 	if (res == 0)
 		return (0);
 	if (res > 0)
 		return (1);
 	else
 		return (-1);
 }
 
 /* ARGSUSED */
 static int
 deleg_perm_compare(const void *larg, const void *rarg, void *unused)
 {
 	const deleg_perm_node_t *l = larg;
 	const deleg_perm_node_t *r = rarg;
 	int res =  strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name,
 	    ZFS_MAX_DELEG_NAME-1);
 
 	if (res == 0)
 		return (0);
 
 	if (res > 0)
 		return (1);
 	else
 		return (-1);
 }
 
 static inline void
 fs_perm_set_init(fs_perm_set_t *fspset)
 {
 	bzero(fspset, sizeof (fs_perm_set_t));
 
 	if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool",
 	    sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node),
 	    NULL, UU_DEFAULT)) == NULL)
 		nomem();
 	if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create(
 	    "named_set_avl_pool", sizeof (who_perm_node_t), offsetof(
 	    who_perm_node_t, who_avl_node), who_perm_compare,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create(
 	    "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof(
 	    who_perm_node_t, who_avl_node), who_perm_compare,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create(
 	    "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof(
 	    deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT))
 	    == NULL)
 		nomem();
 }
 
 static inline void fs_perm_fini(fs_perm_t *);
 static inline void who_perm_fini(who_perm_t *);
 
 static inline void
 fs_perm_set_fini(fs_perm_set_t *fspset)
 {
 	fs_perm_node_t *node = uu_list_first(fspset->fsps_list);
 
 	while (node != NULL) {
 		fs_perm_node_t *next_node =
 		    uu_list_next(fspset->fsps_list, node);
 		fs_perm_t *fsperm = &node->fspn_fsperm;
 		fs_perm_fini(fsperm);
 		uu_list_remove(fspset->fsps_list, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool);
 	uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool);
 	uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool);
 }
 
 static inline void
 deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type,
     const char *name)
 {
 	deleg_perm->dp_who_type = type;
 	deleg_perm->dp_name = name;
 }
 
 static inline void
 who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm,
     zfs_deleg_who_type_t type, const char *name)
 {
 	uu_avl_pool_t	*pool;
 	pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool;
 
 	bzero(who_perm, sizeof (who_perm_t));
 
 	if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	who_perm->who_type = type;
 	who_perm->who_name = name;
 	who_perm->who_fsperm = fsperm;
 }
 
 static inline void
 who_perm_fini(who_perm_t *who_perm)
 {
 	deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl);
 
 	while (node != NULL) {
 		deleg_perm_node_t *next_node =
 		    uu_avl_next(who_perm->who_deleg_perm_avl, node);
 
 		uu_avl_remove(who_perm->who_deleg_perm_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_destroy(who_perm->who_deleg_perm_avl);
 }
 
 static inline void
 fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname)
 {
 	uu_avl_pool_t	*nset_pool = fspset->fsps_named_set_avl_pool;
 	uu_avl_pool_t	*who_pool = fspset->fsps_who_perm_avl_pool;
 
 	bzero(fsperm, sizeof (fs_perm_t));
 
 	if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT))
 	    == NULL)
 		nomem();
 
 	if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT))
 	    == NULL)
 		nomem();
 
 	fsperm->fsp_set = fspset;
 	fsperm->fsp_name = fsname;
 }
 
 static inline void
 fs_perm_fini(fs_perm_t *fsperm)
 {
 	who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl);
 	while (node != NULL) {
 		who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl,
 		    node);
 		who_perm_t *who_perm = &node->who_perm;
 		who_perm_fini(who_perm);
 		uu_avl_remove(fsperm->fsp_sc_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	node = uu_avl_first(fsperm->fsp_uge_avl);
 	while (node != NULL) {
 		who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl,
 		    node);
 		who_perm_t *who_perm = &node->who_perm;
 		who_perm_fini(who_perm);
 		uu_avl_remove(fsperm->fsp_uge_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_destroy(fsperm->fsp_sc_avl);
 	uu_avl_destroy(fsperm->fsp_uge_avl);
 }
 
 static void inline
 set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node,
     zfs_deleg_who_type_t who_type, const char *name, char locality)
 {
 	uu_avl_index_t idx = 0;
 
 	deleg_perm_node_t *found_node = NULL;
 	deleg_perm_t	*deleg_perm = &node->dpn_perm;
 
 	deleg_perm_init(deleg_perm, who_type, name);
 
 	if ((found_node = uu_avl_find(avl, node, NULL, &idx))
 	    == NULL)
 		uu_avl_insert(avl, node, idx);
 	else {
 		node = found_node;
 		deleg_perm = &node->dpn_perm;
 	}
 
 
 	switch (locality) {
 	case ZFS_DELEG_LOCAL:
 		deleg_perm->dp_local = B_TRUE;
 		break;
 	case ZFS_DELEG_DESCENDENT:
 		deleg_perm->dp_descend = B_TRUE;
 		break;
 	case ZFS_DELEG_NA:
 		break;
 	default:
 		assert(B_FALSE); /* invalid locality */
 	}
 }
 
 static inline int
 parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality)
 {
 	nvpair_t *nvp = NULL;
 	fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set;
 	uu_avl_t *avl = who_perm->who_deleg_perm_avl;
 	zfs_deleg_who_type_t who_type = who_perm->who_type;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		const char *name = nvpair_name(nvp);
 		data_type_t type = nvpair_type(nvp);
 		uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool;
 		deleg_perm_node_t *node =
 		    safe_malloc(sizeof (deleg_perm_node_t));
 
 		VERIFY(type == DATA_TYPE_BOOLEAN);
 
 		uu_avl_node_init(node, &node->dpn_avl_node, avl_pool);
 		set_deleg_perm_node(avl, node, who_type, name, locality);
 	}
 
 	return (0);
 }
 
 static inline int
 parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl)
 {
 	nvpair_t *nvp = NULL;
 	fs_perm_set_t *fspset = fsperm->fsp_set;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		nvlist_t *nvl2 = NULL;
 		const char *name = nvpair_name(nvp);
 		uu_avl_t *avl = NULL;
 		uu_avl_pool_t *avl_pool = NULL;
 		zfs_deleg_who_type_t perm_type = name[0];
 		char perm_locality = name[1];
 		const char *perm_name = name + 3;
 		boolean_t is_set = B_TRUE;
 		who_perm_t *who_perm = NULL;
 
 		assert('$' == name[2]);
 
 		if (nvpair_value_nvlist(nvp, &nvl2) != 0)
 			return (-1);
 
 		switch (perm_type) {
 		case ZFS_DELEG_CREATE:
 		case ZFS_DELEG_CREATE_SETS:
 		case ZFS_DELEG_NAMED_SET:
 		case ZFS_DELEG_NAMED_SET_SETS:
 			avl_pool = fspset->fsps_named_set_avl_pool;
 			avl = fsperm->fsp_sc_avl;
 			break;
 		case ZFS_DELEG_USER:
 		case ZFS_DELEG_USER_SETS:
 		case ZFS_DELEG_GROUP:
 		case ZFS_DELEG_GROUP_SETS:
 		case ZFS_DELEG_EVERYONE:
 		case ZFS_DELEG_EVERYONE_SETS:
 			avl_pool = fspset->fsps_who_perm_avl_pool;
 			avl = fsperm->fsp_uge_avl;
 			break;
 		default:
 			break;
 		}
 
 		if (is_set) {
 			who_perm_node_t *found_node = NULL;
 			who_perm_node_t *node = safe_malloc(
 			    sizeof (who_perm_node_t));
 			who_perm = &node->who_perm;
 			uu_avl_index_t idx = 0;
 
 			uu_avl_node_init(node, &node->who_avl_node, avl_pool);
 			who_perm_init(who_perm, fsperm, perm_type, perm_name);
 
 			if ((found_node = uu_avl_find(avl, node, NULL, &idx))
 			    == NULL) {
 				if (avl == fsperm->fsp_uge_avl) {
 					uid_t rid = 0;
 					struct passwd *p = NULL;
 					struct group *g = NULL;
 					const char *nice_name = NULL;
 
 					switch (perm_type) {
 					case ZFS_DELEG_USER_SETS:
 					case ZFS_DELEG_USER:
 						rid = atoi(perm_name);
 						p = getpwuid(rid);
 						if (p)
 							nice_name = p->pw_name;
 						break;
 					case ZFS_DELEG_GROUP_SETS:
 					case ZFS_DELEG_GROUP:
 						rid = atoi(perm_name);
 						g = getgrgid(rid);
 						if (g)
 							nice_name = g->gr_name;
 						break;
 					default:
 						break;
 					}
 
 					if (nice_name != NULL)
 						(void) strlcpy(
 						    node->who_perm.who_ug_name,
 						    nice_name, 256);
 				}
 
 				uu_avl_insert(avl, node, idx);
 			} else {
 				node = found_node;
 				who_perm = &node->who_perm;
 			}
 		}
 
 		(void) parse_who_perm(who_perm, nvl2, perm_locality);
 	}
 
 	return (0);
 }
 
 static inline int
 parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl)
 {
 	nvpair_t *nvp = NULL;
 	uu_avl_index_t idx = 0;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		nvlist_t *nvl2 = NULL;
 		const char *fsname = nvpair_name(nvp);
 		data_type_t type = nvpair_type(nvp);
 		fs_perm_t *fsperm = NULL;
 		fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t));
 		if (node == NULL)
 			nomem();
 
 		fsperm = &node->fspn_fsperm;
 
 		VERIFY(DATA_TYPE_NVLIST == type);
 
 		uu_list_node_init(node, &node->fspn_list_node,
 		    fspset->fsps_list_pool);
 
 		idx = uu_list_numnodes(fspset->fsps_list);
 		fs_perm_init(fsperm, fspset, fsname);
 
 		if (nvpair_value_nvlist(nvp, &nvl2) != 0)
 			return (-1);
 
 		(void) parse_fs_perm(fsperm, nvl2);
 
 		uu_list_insert(fspset->fsps_list, node, idx);
 	}
 
 	return (0);
 }
 
 static inline const char *
 deleg_perm_comment(zfs_deleg_note_t note)
 {
 	const char *str = "";
 
 	/* subcommands */
 	switch (note) {
 		/* SUBCOMMANDS */
 	case ZFS_DELEG_NOTE_ALLOW:
 		str = gettext("Must also have the permission that is being"
 		    "\n\t\t\t\tallowed");
 		break;
 	case ZFS_DELEG_NOTE_CLONE:
 		str = gettext("Must also have the 'create' ability and 'mount'"
 		    "\n\t\t\t\tability in the origin file system");
 		break;
 	case ZFS_DELEG_NOTE_CREATE:
 		str = gettext("Must also have the 'mount' ability");
 		break;
 	case ZFS_DELEG_NOTE_DESTROY:
 		str = gettext("Must also have the 'mount' ability");
 		break;
 	case ZFS_DELEG_NOTE_DIFF:
 		str = gettext("Allows lookup of paths within a dataset;"
 		    "\n\t\t\t\tgiven an object number. Ordinary users need this"
 		    "\n\t\t\t\tin order to use zfs diff");
 		break;
 	case ZFS_DELEG_NOTE_HOLD:
 		str = gettext("Allows adding a user hold to a snapshot");
 		break;
 	case ZFS_DELEG_NOTE_MOUNT:
 		str = gettext("Allows mount/umount of ZFS datasets");
 		break;
 	case ZFS_DELEG_NOTE_PROMOTE:
 		str = gettext("Must also have the 'mount'\n\t\t\t\tand"
 		    " 'promote' ability in the origin file system");
 		break;
 	case ZFS_DELEG_NOTE_RECEIVE:
 		str = gettext("Must also have the 'mount' and 'create'"
 		    " ability");
 		break;
 	case ZFS_DELEG_NOTE_RELEASE:
 		str = gettext("Allows releasing a user hold which\n\t\t\t\t"
 		    "might destroy the snapshot");
 		break;
 	case ZFS_DELEG_NOTE_RENAME:
 		str = gettext("Must also have the 'mount' and 'create'"
 		    "\n\t\t\t\tability in the new parent");
 		break;
 	case ZFS_DELEG_NOTE_ROLLBACK:
 		str = gettext("");
 		break;
 	case ZFS_DELEG_NOTE_SEND:
 		str = gettext("");
 		break;
 	case ZFS_DELEG_NOTE_SHARE:
 		str = gettext("Allows sharing file systems over NFS or SMB"
 		    "\n\t\t\t\tprotocols");
 		break;
 	case ZFS_DELEG_NOTE_SNAPSHOT:
 		str = gettext("");
 		break;
 /*
  *	case ZFS_DELEG_NOTE_VSCAN:
  *		str = gettext("");
  *		break;
  */
 		/* OTHER */
 	case ZFS_DELEG_NOTE_GROUPQUOTA:
 		str = gettext("Allows accessing any groupquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_GROUPUSED:
 		str = gettext("Allows reading any groupused@... property");
 		break;
 	case ZFS_DELEG_NOTE_USERPROP:
 		str = gettext("Allows changing any user property");
 		break;
 	case ZFS_DELEG_NOTE_USERQUOTA:
 		str = gettext("Allows accessing any userquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_USERUSED:
 		str = gettext("Allows reading any userused@... property");
 		break;
 		/* other */
 	default:
 		str = "";
 	}
 
 	return (str);
 }
 
 struct allow_opts {
 	boolean_t local;
 	boolean_t descend;
 	boolean_t user;
 	boolean_t group;
 	boolean_t everyone;
 	boolean_t create;
 	boolean_t set;
 	boolean_t recursive; /* unallow only */
 	boolean_t prt_usage;
 
 	boolean_t prt_perms;
 	char *who;
 	char *perms;
 	const char *dataset;
 };
 
 static inline int
 prop_cmp(const void *a, const void *b)
 {
 	const char *str1 = *(const char **)a;
 	const char *str2 = *(const char **)b;
 	return (strcmp(str1, str2));
 }
 
 static void
 allow_usage(boolean_t un, boolean_t requested, const char *msg)
 {
 	const char *opt_desc[] = {
 		"-h", gettext("show this help message and exit"),
 		"-l", gettext("set permission locally"),
 		"-d", gettext("set permission for descents"),
 		"-u", gettext("set permission for user"),
 		"-g", gettext("set permission for group"),
 		"-e", gettext("set permission for everyone"),
 		"-c", gettext("set create time permission"),
 		"-s", gettext("define permission set"),
 		/* unallow only */
 		"-r", gettext("remove permissions recursively"),
 	};
 	size_t unallow_size = sizeof (opt_desc) / sizeof (char *);
 	size_t allow_size = unallow_size - 2;
 	const char *props[ZFS_NUM_PROPS];
 	int i;
 	size_t count = 0;
 	FILE *fp = requested ? stdout : stderr;
 	zprop_desc_t *pdtbl = zfs_prop_get_table();
 	const char *fmt = gettext("%-16s %-14s\t%s\n");
 
 	(void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW :
 	    HELP_ALLOW));
 	(void) fprintf(fp, gettext("Options:\n"));
 	for (i = 0; i < (un ? unallow_size : allow_size); i++) {
 		const char *opt = opt_desc[i++];
 		const char *optdsc = opt_desc[i];
 		(void) fprintf(fp, gettext("  %-10s  %s\n"), opt, optdsc);
 	}
 
 	(void) fprintf(fp, gettext("\nThe following permissions are "
 	    "supported:\n\n"));
 	(void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"),
 	    gettext("NOTES"));
 	for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) {
 		const char *perm_name = zfs_deleg_perm_tbl[i].z_perm;
 		zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note;
 		const char *perm_type = deleg_perm_type(perm_note);
 		const char *perm_comment = deleg_perm_comment(perm_note);
 		(void) fprintf(fp, fmt, perm_name, perm_type, perm_comment);
 	}
 
 	for (i = 0; i < ZFS_NUM_PROPS; i++) {
 		zprop_desc_t *pd = &pdtbl[i];
 		if (pd->pd_visible != B_TRUE)
 			continue;
 
 		if (pd->pd_attr == PROP_READONLY)
 			continue;
 
 		props[count++] = pd->pd_name;
 	}
 	props[count] = NULL;
 
 	qsort(props, count, sizeof (char *), prop_cmp);
 
 	for (i = 0; i < count; i++)
 		(void) fprintf(fp, fmt, props[i], gettext("property"), "");
 
 	if (msg != NULL)
 		(void) fprintf(fp, gettext("\nzfs: error: %s"), msg);
 
 	exit(requested ? 0 : 2);
 }
 
 static inline const char *
 munge_args(int argc, char **argv, boolean_t un, size_t expected_argc,
     char **permsp)
 {
 	if (un && argc == expected_argc - 1)
 		*permsp = NULL;
 	else if (argc == expected_argc)
 		*permsp = argv[argc - 2];
 	else
 		allow_usage(un, B_FALSE,
 		    gettext("wrong number of parameters\n"));
 
 	return (argv[argc - 1]);
 }
 
 static void
 parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts)
 {
 	int uge_sum = opts->user + opts->group + opts->everyone;
 	int csuge_sum = opts->create + opts->set + uge_sum;
 	int ldcsuge_sum = csuge_sum + opts->local + opts->descend;
 	int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum;
 
 	if (uge_sum > 1)
 		allow_usage(un, B_FALSE,
 		    gettext("-u, -g, and -e are mutually exclusive\n"));
 
 	if (opts->prt_usage) {
 		if (argc == 0 && all_sum == 0)
 			allow_usage(un, B_TRUE, NULL);
 		else
 			usage(B_FALSE);
 	}
 
 	if (opts->set) {
 		if (csuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -s\n"));
 
 		opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
 		if (argv[0][0] != '@')
 			allow_usage(un, B_FALSE,
 			    gettext("invalid set name: missing '@' prefix\n"));
 		opts->who = argv[0];
 	} else if (opts->create) {
 		if (ldcsuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -c\n"));
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (opts->everyone) {
 		if (csuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -e\n"));
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone")
 	    == 0) {
 		opts->everyone = B_TRUE;
 		argc--;
 		argv++;
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (argc == 1 && !un) {
 		opts->prt_perms = B_TRUE;
 		opts->dataset = argv[argc-1];
 	} else {
 		opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
 		opts->who = argv[0];
 	}
 
 	if (!opts->local && !opts->descend) {
 		opts->local = B_TRUE;
 		opts->descend = B_TRUE;
 	}
 }
 
 static void
 store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend,
     const char *who, char *perms, nvlist_t *top_nvl)
 {
 	int i;
 	char ld[2] = { '\0', '\0' };
 	char who_buf[ZFS_MAXNAMELEN+32];
 	char base_type = ZFS_DELEG_WHO_UNKNOWN;
 	char set_type = ZFS_DELEG_WHO_UNKNOWN;
 	nvlist_t *base_nvl = NULL;
 	nvlist_t *set_nvl = NULL;
 	nvlist_t *nvl;
 
 	if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) !=  0)
 		nomem();
 
 	switch (type) {
 	case ZFS_DELEG_NAMED_SET_SETS:
 	case ZFS_DELEG_NAMED_SET:
 		set_type = ZFS_DELEG_NAMED_SET_SETS;
 		base_type = ZFS_DELEG_NAMED_SET;
 		ld[0] = ZFS_DELEG_NA;
 		break;
 	case ZFS_DELEG_CREATE_SETS:
 	case ZFS_DELEG_CREATE:
 		set_type = ZFS_DELEG_CREATE_SETS;
 		base_type = ZFS_DELEG_CREATE;
 		ld[0] = ZFS_DELEG_NA;
 		break;
 	case ZFS_DELEG_USER_SETS:
 	case ZFS_DELEG_USER:
 		set_type = ZFS_DELEG_USER_SETS;
 		base_type = ZFS_DELEG_USER;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 		break;
 	case ZFS_DELEG_GROUP_SETS:
 	case ZFS_DELEG_GROUP:
 		set_type = ZFS_DELEG_GROUP_SETS;
 		base_type = ZFS_DELEG_GROUP;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 		break;
 	case ZFS_DELEG_EVERYONE_SETS:
 	case ZFS_DELEG_EVERYONE:
 		set_type = ZFS_DELEG_EVERYONE_SETS;
 		base_type = ZFS_DELEG_EVERYONE;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 	default:
 		break;
 	}
 
 	if (perms != NULL) {
 		char *curr = perms;
 		char *end = curr + strlen(perms);
 
 		while (curr < end) {
 			char *delim = strchr(curr, ',');
 			if (delim == NULL)
 				delim = end;
 			else
 				*delim = '\0';
 
 			if (curr[0] == '@')
 				nvl = set_nvl;
 			else
 				nvl = base_nvl;
 
 			(void) nvlist_add_boolean(nvl, curr);
 			if (delim != end)
 				*delim = ',';
 			curr = delim + 1;
 		}
 
 		for (i = 0; i < 2; i++) {
 			char locality = ld[i];
 			if (locality == 0)
 				continue;
 
 			if (!nvlist_empty(base_nvl)) {
 				if (who != NULL)
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$%s",
 					    base_type, locality, who);
 				else
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$",
 					    base_type, locality);
 
 				(void) nvlist_add_nvlist(top_nvl, who_buf,
 				    base_nvl);
 			}
 
 
 			if (!nvlist_empty(set_nvl)) {
 				if (who != NULL)
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$%s",
 					    set_type, locality, who);
 				else
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$",
 					    set_type, locality);
 
 				(void) nvlist_add_nvlist(top_nvl, who_buf,
 				    set_nvl);
 			}
 		}
 	} else {
 		for (i = 0; i < 2; i++) {
 			char locality = ld[i];
 			if (locality == 0)
 				continue;
 
 			if (who != NULL)
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$%s", base_type, locality, who);
 			else
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$", base_type, locality);
 			(void) nvlist_add_boolean(top_nvl, who_buf);
 
 			if (who != NULL)
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$%s", set_type, locality, who);
 			else
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$", set_type, locality);
 			(void) nvlist_add_boolean(top_nvl, who_buf);
 		}
 	}
 }
 
 static int
 construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp)
 {
 	if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	if (opts->set) {
 		store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local,
 		    opts->descend, opts->who, opts->perms, *nvlp);
 	} else if (opts->create) {
 		store_allow_perm(ZFS_DELEG_CREATE, opts->local,
 		    opts->descend, NULL, opts->perms, *nvlp);
 	} else if (opts->everyone) {
 		store_allow_perm(ZFS_DELEG_EVERYONE, opts->local,
 		    opts->descend, NULL, opts->perms, *nvlp);
 	} else {
 		char *curr = opts->who;
 		char *end = curr + strlen(curr);
 
 		while (curr < end) {
 			const char *who;
 			zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN;
 			char *endch;
 			char *delim = strchr(curr, ',');
 			char errbuf[256];
 			char id[64];
 			struct passwd *p = NULL;
 			struct group *g = NULL;
 
 			uid_t rid;
 			if (delim == NULL)
 				delim = end;
 			else
 				*delim = '\0';
 
 			rid = (uid_t)strtol(curr, &endch, 0);
 			if (opts->user) {
 				who_type = ZFS_DELEG_USER;
 				if (*endch != '\0')
 					p = getpwnam(curr);
 				else
 					p = getpwuid(rid);
 
 				if (p != NULL)
 					rid = p->pw_uid;
 				else {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid user %s"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else if (opts->group) {
 				who_type = ZFS_DELEG_GROUP;
 				if (*endch != '\0')
 					g = getgrnam(curr);
 				else
 					g = getgrgid(rid);
 
 				if (g != NULL)
 					rid = g->gr_gid;
 				else {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid group %s"),  curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else {
 				if (*endch != '\0') {
 					p = getpwnam(curr);
 				} else {
 					p = getpwuid(rid);
 				}
 
 				if (p == NULL) {
 					if (*endch != '\0') {
 						g = getgrnam(curr);
 					} else {
 						g = getgrgid(rid);
 					}
 				}
 
 				if (p != NULL) {
 					who_type = ZFS_DELEG_USER;
 					rid = p->pw_uid;
 				} else if (g != NULL) {
 					who_type = ZFS_DELEG_GROUP;
 					rid = g->gr_gid;
 				} else {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid user/group %s"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			}
 
 			(void) sprintf(id, "%u", rid);
 			who = id;
 
 			store_allow_perm(who_type, opts->local,
 			    opts->descend, who, opts->perms, *nvlp);
 			curr = delim + 1;
 		}
 	}
 
 	return (0);
 }
 
 static void
 print_set_creat_perms(uu_avl_t *who_avl)
 {
 	const char *sc_title[] = {
 		gettext("Permission sets:\n"),
 		gettext("Create time permissions:\n"),
 		NULL
 	};
 	const char **title_ptr = sc_title;
 	who_perm_node_t *who_node = NULL;
 	int prev_weight = -1;
 
 	for (who_node = uu_avl_first(who_avl); who_node != NULL;
 	    who_node = uu_avl_next(who_avl, who_node)) {
 		uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
 		zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
 		const char *who_name = who_node->who_perm.who_name;
 		int weight = who_type2weight(who_type);
 		boolean_t first = B_TRUE;
 		deleg_perm_node_t *deleg_node;
 
 		if (prev_weight != weight) {
 			(void) printf("%s", *title_ptr++);
 			prev_weight = weight;
 		}
 
 		if (who_name == NULL || strnlen(who_name, 1) == 0)
 			(void) printf("\t");
 		else
 			(void) printf("\t%s ", who_name);
 
 		for (deleg_node = uu_avl_first(avl); deleg_node != NULL;
 		    deleg_node = uu_avl_next(avl, deleg_node)) {
 			if (first) {
 				(void) printf("%s",
 				    deleg_node->dpn_perm.dp_name);
 				first = B_FALSE;
 			} else
 				(void) printf(",%s",
 				    deleg_node->dpn_perm.dp_name);
 		}
 
 		(void) printf("\n");
 	}
 }
 
 static void inline
 print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend,
     const char *title)
 {
 	who_perm_node_t *who_node = NULL;
 	boolean_t prt_title = B_TRUE;
 	uu_avl_walk_t *walk;
 
 	if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL)
 		nomem();
 
 	while ((who_node = uu_avl_walk_next(walk)) != NULL) {
 		const char *who_name = who_node->who_perm.who_name;
 		const char *nice_who_name = who_node->who_perm.who_ug_name;
 		uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
 		zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
 		char delim = ' ';
 		deleg_perm_node_t *deleg_node;
 		boolean_t prt_who = B_TRUE;
 
 		for (deleg_node = uu_avl_first(avl);
 		    deleg_node != NULL;
 		    deleg_node = uu_avl_next(avl, deleg_node)) {
 			if (local != deleg_node->dpn_perm.dp_local ||
 			    descend != deleg_node->dpn_perm.dp_descend)
 				continue;
 
 			if (prt_who) {
 				const char *who = NULL;
 				if (prt_title) {
 					prt_title = B_FALSE;
 					(void) printf("%s", title);
 				}
 
 				switch (who_type) {
 				case ZFS_DELEG_USER_SETS:
 				case ZFS_DELEG_USER:
 					who = gettext("user");
 					if (nice_who_name)
 						who_name  = nice_who_name;
 					break;
 				case ZFS_DELEG_GROUP_SETS:
 				case ZFS_DELEG_GROUP:
 					who = gettext("group");
 					if (nice_who_name)
 						who_name  = nice_who_name;
 					break;
 				case ZFS_DELEG_EVERYONE_SETS:
 				case ZFS_DELEG_EVERYONE:
 					who = gettext("everyone");
 					who_name = NULL;
 				default:
 					break;
 				}
 
 				prt_who = B_FALSE;
 				if (who_name == NULL)
 					(void) printf("\t%s", who);
 				else
 					(void) printf("\t%s %s", who, who_name);
 			}
 
 			(void) printf("%c%s", delim,
 			    deleg_node->dpn_perm.dp_name);
 			delim = ',';
 		}
 
 		if (!prt_who)
 			(void) printf("\n");
 	}
 
 	uu_avl_walk_end(walk);
 }
 
 static void
 print_fs_perms(fs_perm_set_t *fspset)
 {
 	fs_perm_node_t *node = NULL;
 	char buf[ZFS_MAXNAMELEN+32];
 	const char *dsname = buf;
 
 	for (node = uu_list_first(fspset->fsps_list); node != NULL;
 	    node = uu_list_next(fspset->fsps_list, node)) {
 		uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl;
 		uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl;
 		int left = 0;
 
 		(void) snprintf(buf, ZFS_MAXNAMELEN+32,
 		    gettext("---- Permissions on %s "),
 		    node->fspn_fsperm.fsp_name);
 		(void) printf("%s", dsname);
 		left = 70 - strlen(buf);
 		while (left-- > 0)
 			(void) printf("-");
 		(void) printf("\n");
 
 		print_set_creat_perms(sc_avl);
 		print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE,
 		    gettext("Local permissions:\n"));
 		print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE,
 		    gettext("Descendent permissions:\n"));
 		print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE,
 		    gettext("Local+Descendent permissions:\n"));
 	}
 }
 
 static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL };
 
 struct deleg_perms {
 	boolean_t un;
 	nvlist_t *nvl;
 };
 
 static int
 set_deleg_perms(zfs_handle_t *zhp, void *data)
 {
 	struct deleg_perms *perms = (struct deleg_perms *)data;
 	zfs_type_t zfs_type = zfs_get_type(zhp);
 
 	if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME)
 		return (0);
 
 	return (zfs_set_fsacl(zhp, perms->un, perms->nvl));
 }
 
 static int
 zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un)
 {
 	zfs_handle_t *zhp;
 	nvlist_t *perm_nvl = NULL;
 	nvlist_t *update_perm_nvl = NULL;
 	int error = 1;
 	int c;
 	struct allow_opts opts = { 0 };
 
 	const char *optstr = un ? "ldugecsrh" : "ldugecsh";
 
 	/* check opts */
 	while ((c = getopt(argc, argv, optstr)) != -1) {
 		switch (c) {
 		case 'l':
 			opts.local = B_TRUE;
 			break;
 		case 'd':
 			opts.descend = B_TRUE;
 			break;
 		case 'u':
 			opts.user = B_TRUE;
 			break;
 		case 'g':
 			opts.group = B_TRUE;
 			break;
 		case 'e':
 			opts.everyone = B_TRUE;
 			break;
 		case 's':
 			opts.set = B_TRUE;
 			break;
 		case 'c':
 			opts.create = B_TRUE;
 			break;
 		case 'r':
 			opts.recursive = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case 'h':
 			opts.prt_usage = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check arguments */
 	parse_allow_args(argc, argv, un, &opts);
 
 	/* try to open the dataset */
 	if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) == NULL) {
 		(void) fprintf(stderr, "Failed to open dataset: %s\n",
 		    opts.dataset);
 		return (-1);
 	}
 
 	if (zfs_get_fsacl(zhp, &perm_nvl) != 0)
 		goto cleanup2;
 
 	fs_perm_set_init(&fs_perm_set);
 	if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) {
 		(void) fprintf(stderr, "Failed to parse fsacl permissions\n");
 		goto cleanup1;
 	}
 
 	if (opts.prt_perms)
 		print_fs_perms(&fs_perm_set);
 	else {
 		(void) construct_fsacl_list(un, &opts, &update_perm_nvl);
 		if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0)
 			goto cleanup0;
 
 		if (un && opts.recursive) {
 			struct deleg_perms data = { un, update_perm_nvl };
 			if (zfs_iter_filesystems(zhp, set_deleg_perms,
 			    &data) != 0)
 				goto cleanup0;
 		}
 	}
 
 	error = 0;
 
 cleanup0:
 	nvlist_free(perm_nvl);
 	if (update_perm_nvl != NULL)
 		nvlist_free(update_perm_nvl);
 cleanup1:
 	fs_perm_set_fini(&fs_perm_set);
 cleanup2:
 	zfs_close(zhp);
 
 	return (error);
 }
 
 /*
  * zfs allow [-r] [-t] <tag> <snap> ...
  *
  *	-r	Recursively hold
  *	-t	Temporary hold (hidden option)
  *
  * Apply a user-hold with the given tag to the list of snapshots.
  */
 static int
 zfs_do_allow(int argc, char **argv)
 {
 	return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
 }
 
 /*
  * zfs unallow [-r] [-t] <tag> <snap> ...
  *
  *	-r	Recursively hold
  *	-t	Temporary hold (hidden option)
  *
  * Apply a user-hold with the given tag to the list of snapshots.
  */
 static int
 zfs_do_unallow(int argc, char **argv)
 {
 	return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE));
 }
 
 static int
 zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
 {
 	int errors = 0;
 	int i;
 	const char *tag;
 	boolean_t recursive = B_FALSE;
 	boolean_t temphold = B_FALSE;
 	const char *opts = holding ? "rt" : "r";
 	int c;
 
 	/* check options */
 	while ((c = getopt(argc, argv, opts)) != -1) {
 		switch (c) {
 		case 'r':
 			recursive = B_TRUE;
 			break;
 		case 't':
 			temphold = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 2)
 		usage(B_FALSE);
 
 	tag = argv[0];
 	--argc;
 	++argv;
 
 	if (holding && tag[0] == '.') {
 		/* tags starting with '.' are reserved for libzfs */
 		(void) fprintf(stderr, gettext("tag may not start with '.'\n"));
 		usage(B_FALSE);
 	}
 
 	for (i = 0; i < argc; ++i) {
 		zfs_handle_t *zhp;
 		char parent[ZFS_MAXNAMELEN];
 		const char *delim;
 		char *path = argv[i];
 
 		delim = strchr(path, '@');
 		if (delim == NULL) {
 			(void) fprintf(stderr,
 			    gettext("'%s' is not a snapshot\n"), path);
 			++errors;
 			continue;
 		}
 		(void) strncpy(parent, path, delim - path);
 		parent[delim - path] = '\0';
 
 		zhp = zfs_open(g_zfs, parent,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (zhp == NULL) {
 			++errors;
 			continue;
 		}
 		if (holding) {
 			if (zfs_hold(zhp, delim+1, tag, recursive,
 			    temphold, B_FALSE, -1, 0, 0) != 0)
 				++errors;
 		} else {
 			if (zfs_release(zhp, delim+1, tag, recursive) != 0)
 				++errors;
 		}
 		zfs_close(zhp);
 	}
 
 	return (errors != 0);
 }
 
 /*
  * zfs hold [-r] [-t] <tag> <snap> ...
  *
  *	-r	Recursively hold
  *	-t	Temporary hold (hidden option)
  *
  * Apply a user-hold with the given tag to the list of snapshots.
  */
 static int
 zfs_do_hold(int argc, char **argv)
 {
 	return (zfs_do_hold_rele_impl(argc, argv, B_TRUE));
 }
 
 /*
  * zfs release [-r] <tag> <snap> ...
  *
  *	-r	Recursively release
  *
  * Release a user-hold with the given tag from the list of snapshots.
  */
 static int
 zfs_do_release(int argc, char **argv)
 {
 	return (zfs_do_hold_rele_impl(argc, argv, B_FALSE));
 }
 
 typedef struct holds_cbdata {
 	boolean_t	cb_recursive;
 	const char	*cb_snapname;
 	nvlist_t	**cb_nvlp;
 	size_t		cb_max_namelen;
 	size_t		cb_max_taglen;
 } holds_cbdata_t;
 
 #define	STRFTIME_FMT_STR "%a %b %e %k:%M %Y"
 #define	DATETIME_BUF_LEN (32)
 /*
  *
  */
 static void
 print_holds(boolean_t scripted, int nwidth, int tagwidth, nvlist_t *nvl)
 {
 	int i;
 	nvpair_t *nvp = NULL;
 	char *hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" };
 	const char *col;
 
 	if (!scripted) {
 		for (i = 0; i < 3; i++) {
 			col = gettext(hdr_cols[i]);
 			if (i < 2)
 				(void) printf("%-*s  ", i ? tagwidth : nwidth,
 				    col);
 			else
 				(void) printf("%s\n", col);
 		}
 	}
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		char *zname = nvpair_name(nvp);
 		nvlist_t *nvl2;
 		nvpair_t *nvp2 = NULL;
 		(void) nvpair_value_nvlist(nvp, &nvl2);
 		while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) {
 			char tsbuf[DATETIME_BUF_LEN];
 			char *tagname = nvpair_name(nvp2);
 			uint64_t val = 0;
 			time_t time;
 			struct tm t;
 			char sep = scripted ? '\t' : ' ';
 			int sepnum = scripted ? 1 : 2;
 
 			(void) nvpair_value_uint64(nvp2, &val);
 			time = (time_t)val;
 			(void) localtime_r(&time, &t);
 			(void) strftime(tsbuf, DATETIME_BUF_LEN,
 			    gettext(STRFTIME_FMT_STR), &t);
 
 			(void) printf("%-*s%*c%-*s%*c%s\n", nwidth, zname,
 			    sepnum, sep, tagwidth, tagname, sepnum, sep, tsbuf);
 		}
 	}
 }
 
 /*
  * Generic callback function to list a dataset or snapshot.
  */
 static int
 holds_callback(zfs_handle_t *zhp, void *data)
 {
 	holds_cbdata_t *cbp = data;
 	nvlist_t *top_nvl = *cbp->cb_nvlp;
 	nvlist_t *nvl = NULL;
 	nvpair_t *nvp = NULL;
 	const char *zname = zfs_get_name(zhp);
 	size_t znamelen = strnlen(zname, ZFS_MAXNAMELEN);
 
 	if (cbp->cb_recursive) {
 		const char *snapname;
 		char *delim  = strchr(zname, '@');
 		if (delim == NULL)
 			return (0);
 
 		snapname = delim + 1;
 		if (strcmp(cbp->cb_snapname, snapname))
 			return (0);
 	}
 
 	if (zfs_get_holds(zhp, &nvl) != 0)
 		return (-1);
 
 	if (znamelen > cbp->cb_max_namelen)
 		cbp->cb_max_namelen  = znamelen;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		const char *tag = nvpair_name(nvp);
 		size_t taglen = strnlen(tag, MAXNAMELEN);
 		if (taglen > cbp->cb_max_taglen)
 			cbp->cb_max_taglen  = taglen;
 	}
 
 	return (nvlist_add_nvlist(top_nvl, zname, nvl));
 }
 
 /*
  * zfs holds [-r] <snap> ...
  *
  *	-r	Recursively hold
  */
 static int
 zfs_do_holds(int argc, char **argv)
 {
 	int errors = 0;
 	int c;
 	int i;
 	boolean_t scripted = B_FALSE;
 	boolean_t recursive = B_FALSE;
 	const char *opts = "rH";
 	nvlist_t *nvl;
 
 	int types = ZFS_TYPE_SNAPSHOT;
 	holds_cbdata_t cb = { 0 };
 
 	int limit = 0;
 	int ret = 0;
 	int flags = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, opts)) != -1) {
 		switch (c) {
 		case 'r':
 			recursive = B_TRUE;
 			break;
 		case 'H':
 			scripted = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	if (recursive) {
 		types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
 		flags |= ZFS_ITER_RECURSE;
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1)
 		usage(B_FALSE);
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	for (i = 0; i < argc; ++i) {
 		char *snapshot = argv[i];
 		const char *delim;
 		const char *snapname;
 
 		delim = strchr(snapshot, '@');
 		if (delim == NULL) {
 			(void) fprintf(stderr,
 			    gettext("'%s' is not a snapshot\n"), snapshot);
 			++errors;
 			continue;
 		}
 		snapname = delim + 1;
 		if (recursive)
 			snapshot[delim - snapshot] = '\0';
 
 		cb.cb_recursive = recursive;
 		cb.cb_snapname = snapname;
 		cb.cb_nvlp = &nvl;
 
 		/*
 		 *  1. collect holds data, set format options
 		 */
 		ret = zfs_for_each(argc, argv, flags, types, NULL, NULL, limit,
 		    holds_callback, &cb);
 		if (ret != 0)
 			++errors;
 	}
 
 	/*
 	 *  2. print holds data
 	 */
 	print_holds(scripted, cb.cb_max_namelen, cb.cb_max_taglen, nvl);
 
 	if (nvlist_empty(nvl))
 		(void) fprintf(stderr, gettext("no datasets available\n"));
 
 	nvlist_free(nvl);
 
 	return (0 != errors);
 }
 
 #define	CHECK_SPINNER 30
 #define	SPINNER_TIME 3		/* seconds */
 #define	MOUNT_TIME 5		/* seconds */
 
 static int
 get_one_dataset(zfs_handle_t *zhp, void *data)
 {
 	static char *spin[] = { "-", "\\", "|", "/" };
 	static int spinval = 0;
 	static int spincheck = 0;
 	static time_t last_spin_time = (time_t)0;
 	get_all_cb_t *cbp = data;
 	zfs_type_t type = zfs_get_type(zhp);
 
 	if (cbp->cb_verbose) {
 		if (--spincheck < 0) {
 			time_t now = time(NULL);
 			if (last_spin_time + SPINNER_TIME < now) {
 				update_progress(spin[spinval++ % 4]);
 				last_spin_time = now;
 			}
 			spincheck = CHECK_SPINNER;
 		}
 	}
 
 	/*
 	 * Iterate over any nested datasets.
 	 */
 	if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
 		zfs_close(zhp);
 		return (1);
 	}
 
 	/*
 	 * Skip any datasets whose type does not match.
 	 */
 	if ((type & ZFS_TYPE_FILESYSTEM) == 0) {
 		zfs_close(zhp);
 		return (0);
 	}
 	libzfs_add_handle(cbp, zhp);
 	assert(cbp->cb_used <= cbp->cb_alloc);
 
 	return (0);
 }
 
 static void
 get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
 {
 	get_all_cb_t cb = { 0 };
 	cb.cb_verbose = verbose;
 	cb.cb_getone = get_one_dataset;
 
 	if (verbose)
 		set_progress_header(gettext("Reading ZFS config"));
 	(void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
 
 	*dslist = cb.cb_handles;
 	*count = cb.cb_used;
 
 	if (verbose)
 		finish_progress(gettext("done."));
 }
 
 /*
  * Generic callback for sharing or mounting filesystems.  Because the code is so
  * similar, we have a common function with an extra parameter to determine which
  * mode we are using.
  */
 #define	OP_SHARE	0x1
 #define	OP_MOUNT	0x2
 
 /*
  * Share or mount a dataset.
  */
 static int
 share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
     boolean_t explicit, const char *options)
 {
 	char mountpoint[ZFS_MAXPROPLEN];
 	char shareopts[ZFS_MAXPROPLEN];
 	char smbshareopts[ZFS_MAXPROPLEN];
 	const char *cmdname = op == OP_SHARE ? "share" : "mount";
 	struct mnttab mnt;
 	uint64_t zoned, canmount;
 	boolean_t shared_nfs, shared_smb;
 
 	assert(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM);
 
 	/*
 	 * Check to make sure we can mount/share this dataset.  If we
 	 * are in the global zone and the filesystem is exported to a
 	 * local zone, or if we are in a local zone and the
 	 * filesystem is not exported, then it is an error.
 	 */
 	zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
 
 	if (zoned && getzoneid() == GLOBAL_ZONEID) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "dataset is exported to a local zone\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 
 	} else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "permission denied\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * Ignore any filesystems which don't apply to us. This
 	 * includes those with a legacy mountpoint, or those with
 	 * legacy share options.
 	 */
 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
 	verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
 	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
 	verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
 	    sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
 
 	if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
 	    strcmp(smbshareopts, "off") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot share '%s': "
 		    "legacy share\n"), zfs_get_name(zhp));
 		(void) fprintf(stderr, gettext("use share(1M) to "
 		    "share this filesystem, or set "
 		    "sharenfs property on\n"));
 		return (1);
 	}
 
 	/*
 	 * We cannot share or mount legacy filesystems. If the
 	 * shareopts is non-legacy but the mountpoint is legacy, we
 	 * treat it as a legacy share.
 	 */
 	if (strcmp(mountpoint, "legacy") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
 		(void) fprintf(stderr, gettext("use %s(1M) to "
 		    "%s this filesystem\n"), cmdname, cmdname);
 		return (1);
 	}
 
 	if (strcmp(mountpoint, "none") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': no "
 		    "mountpoint set\n"), cmdname, zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * canmount	explicit	outcome
 	 * on		no		pass through
 	 * on		yes		pass through
 	 * off		no		return 0
 	 * off		yes		display error, return 1
 	 * noauto	no		return 0
 	 * noauto	yes		pass through
 	 */
 	canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
 	if (canmount == ZFS_CANMOUNT_OFF) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "'canmount' property is set to 'off'\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 	} else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
 		return (0);
 	}
 
 	/*
 	 * At this point, we have verified that the mountpoint and/or
 	 * shareopts are appropriate for auto management. If the
 	 * filesystem is already mounted or shared, return (failing
 	 * for explicit requests); otherwise mount or share the
 	 * filesystem.
 	 */
 	switch (op) {
 	case OP_SHARE:
 
 		shared_nfs = zfs_is_shared_nfs(zhp, NULL);
 		shared_smb = zfs_is_shared_smb(zhp, NULL);
 
 		if ((shared_nfs && shared_smb) ||
 		    ((shared_nfs && strcmp(shareopts, "on") == 0) &&
 		    (strcmp(smbshareopts, "off") == 0)) ||
 		    ((shared_smb && strcmp(smbshareopts, "on") == 0) &&
 		    (strcmp(shareopts, "off") == 0))) {
 			if (!explicit)
 				return (0);
 
 			(void) fprintf(stderr, gettext("cannot share "
 			    "'%s': filesystem already shared\n"),
 			    zfs_get_name(zhp));
 			return (1);
 		}
 
 		if (!zfs_is_mounted(zhp, NULL) &&
 		    zfs_mount(zhp, NULL, 0) != 0)
 			return (1);
 
 		if (protocol == NULL) {
 			if (zfs_shareall(zhp) != 0)
 				return (1);
 		} else if (strcmp(protocol, "nfs") == 0) {
 			if (zfs_share_nfs(zhp))
 				return (1);
 		} else if (strcmp(protocol, "smb") == 0) {
 			if (zfs_share_smb(zhp))
 				return (1);
 		} else {
 			(void) fprintf(stderr, gettext("cannot share "
 			    "'%s': invalid share type '%s' "
 			    "specified\n"),
 			    zfs_get_name(zhp), protocol);
 			return (1);
 		}
 
 		break;
 
 	case OP_MOUNT:
 		if (options == NULL)
 			mnt.mnt_mntopts = "";
 		else
 			mnt.mnt_mntopts = (char *)options;
 
 		if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
 		    zfs_is_mounted(zhp, NULL)) {
 			if (!explicit)
 				return (0);
 
 			(void) fprintf(stderr, gettext("cannot mount "
 			    "'%s': filesystem already mounted\n"),
 			    zfs_get_name(zhp));
 			return (1);
 		}
 
 		if (zfs_mount(zhp, options, flags) != 0)
 			return (1);
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Reports progress in the form "(current/total)".  Not thread-safe.
  */
 static void
 report_mount_progress(int current, int total)
 {
 	static time_t last_progress_time = 0;
 	time_t now = time(NULL);
 	char info[32];
 
 	/* report 1..n instead of 0..n-1 */
 	++current;
 
 	/* display header if we're here for the first time */
 	if (current == 1) {
 		set_progress_header(gettext("Mounting ZFS filesystems"));
 	} else if (current != total && last_progress_time + MOUNT_TIME >= now) {
 		/* too soon to report again */
 		return;
 	}
 
 	last_progress_time = now;
 
 	(void) sprintf(info, "(%d/%d)", current, total);
 
 	if (current == total)
 		finish_progress(info);
 	else
 		update_progress(info);
 }
 
 static void
 append_options(char *mntopts, char *newopts)
 {
 	int len = strlen(mntopts);
 
 	/* original length plus new string to append plus 1 for the comma */
 	if (len + 1 + strlen(newopts) >= MNT_LINE_MAX) {
 		(void) fprintf(stderr, gettext("the opts argument for "
 		    "'%s' option is too long (more than %d chars)\n"),
 		    "-o", MNT_LINE_MAX);
 		usage(B_FALSE);
 	}
 
 	if (*mntopts)
 		mntopts[len++] = ',';
 
 	(void) strcpy(&mntopts[len], newopts);
 }
 
 static int
 share_mount(int op, int argc, char **argv)
 {
 	int do_all = 0;
 	boolean_t verbose = B_FALSE;
 	int c, ret = 0;
 	char *options = NULL;
 	int flags = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, op == OP_MOUNT ? ":avo:O" : "a"))
 	    != -1) {
 		switch (c) {
 		case 'a':
 			do_all = 1;
 			break;
 		case 'v':
 			verbose = B_TRUE;
 			break;
 		case 'o':
 			if (*optarg == '\0') {
 				(void) fprintf(stderr, gettext("empty mount "
 				    "options (-o) specified\n"));
 				usage(B_FALSE);
 			}
 
 			if (options == NULL)
 				options = safe_malloc(MNT_LINE_MAX + 1);
 
 			/* option validation is done later */
 			append_options(options, optarg);
 			break;
 		case 'O':
 			flags |= MS_OVERLAY;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (do_all) {
 		zfs_handle_t **dslist = NULL;
 		size_t i, count = 0;
 		char *protocol = NULL;
 
 		if (op == OP_SHARE && argc > 0) {
 			if (strcmp(argv[0], "nfs") != 0 &&
 			    strcmp(argv[0], "smb") != 0) {
 				(void) fprintf(stderr, gettext("share type "
 				    "must be 'nfs' or 'smb'\n"));
 				usage(B_FALSE);
 			}
 			protocol = argv[0];
 			argc--;
 			argv++;
 		}
 
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		start_progress_timer();
 		get_all_datasets(&dslist, &count, verbose);
 
 		if (count == 0)
 			return (0);
 
 		qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp);
 
 		for (i = 0; i < count; i++) {
 			if (verbose)
 				report_mount_progress(i, count);
 
 			if (share_mount_one(dslist[i], op, flags, protocol,
 			    B_FALSE, options) != 0)
 				ret = 1;
 			zfs_close(dslist[i]);
 		}
 
 		free(dslist);
 	} else if (argc == 0) {
 		struct mnttab entry;
 
 		if ((op == OP_SHARE) || (options != NULL)) {
 			(void) fprintf(stderr, gettext("missing filesystem "
 			    "argument (specify -a for all)\n"));
 			usage(B_FALSE);
 		}
 
 		/*
 		 * When mount is given no arguments, go through /etc/mtab and
 		 * display any active ZFS mounts.  We hide any snapshots, since
 		 * they are controlled automatically.
 		 */
 		rewind(mnttab_file);
 		while (getmntent(mnttab_file, &entry) == 0) {
 			if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 ||
 			    strchr(entry.mnt_special, '@') != NULL)
 				continue;
 
 			(void) printf("%-30s  %s\n", entry.mnt_special,
 			    entry.mnt_mountp);
 		}
 
 	} else {
 		zfs_handle_t *zhp;
 
 		if (argc > 1) {
 			(void) fprintf(stderr,
 			    gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		if ((zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM)) == NULL) {
 			ret = 1;
 		} else {
 			ret = share_mount_one(zhp, op, flags, NULL, B_TRUE,
 			    options);
 			zfs_close(zhp);
 		}
 	}
 
 	return (ret);
 }
 
 /*
  * zfs mount -a [nfs]
  * zfs mount filesystem
  *
  * Mount all filesystems, or mount the given filesystem.
  */
 static int
 zfs_do_mount(int argc, char **argv)
 {
 	return (share_mount(OP_MOUNT, argc, argv));
 }
 
 /*
  * zfs share -a [nfs | smb]
  * zfs share filesystem
  *
  * Share all filesystems, or share the given filesystem.
  */
 static int
 zfs_do_share(int argc, char **argv)
 {
 	return (share_mount(OP_SHARE, argc, argv));
 }
 
 typedef struct unshare_unmount_node {
 	zfs_handle_t	*un_zhp;
 	char		*un_mountp;
 	uu_avl_node_t	un_avlnode;
 } unshare_unmount_node_t;
 
 /* ARGSUSED */
 static int
 unshare_unmount_compare(const void *larg, const void *rarg, void *unused)
 {
 	const unshare_unmount_node_t *l = larg;
 	const unshare_unmount_node_t *r = rarg;
 
 	return (strcmp(l->un_mountp, r->un_mountp));
 }
 
 /*
  * Convenience routine used by zfs_do_umount() and manual_unmount().  Given an
  * absolute path, find the entry /etc/mtab, verify that its a ZFS filesystem,
  * and unmount it appropriately.
  */
 static int
 unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
 {
 	zfs_handle_t *zhp;
 	int ret = 0;
 	struct stat64 statbuf;
 	struct extmnttab entry;
 	const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
 	ino_t path_inode;
 
 	/*
 	 * Search for the path in /etc/mtab.  Rather than looking for the
 	 * specific path, which can be fooled by non-standard paths (i.e. ".."
 	 * or "//"), we stat() the path and search for the corresponding
 	 * (major,minor) device pair.
 	 */
 	if (stat64(path, &statbuf) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': %s\n"),
 		    cmdname, path, strerror(errno));
 		return (1);
 	}
 	path_inode = statbuf.st_ino;
 
 	/*
 	 * Search for the given (major,minor) pair in the mount table.
 	 */
 	rewind(mnttab_file);
 	while ((ret = getextmntent(mnttab_file, &entry, 0)) == 0) {
 		if (entry.mnt_major == major(statbuf.st_dev) &&
 		    entry.mnt_minor == minor(statbuf.st_dev))
 			break;
 	}
 	if (ret != 0) {
 		if (op == OP_SHARE) {
 			(void) fprintf(stderr, gettext("cannot %s '%s': not "
 			    "currently mounted\n"), cmdname, path);
 			return (1);
 		}
 		(void) fprintf(stderr, gettext("warning: %s not in mtab\n"),
 		    path);
 		if ((ret = umount2(path, flags)) != 0)
 			(void) fprintf(stderr, gettext("%s: %s\n"), path,
 			    strerror(errno));
 		return (ret != 0);
 	}
 
 	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS "
 		    "filesystem\n"), cmdname, path);
 		return (1);
 	}
 
 	if ((zhp = zfs_open(g_zfs, entry.mnt_special,
 	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (1);
 
 	ret = 1;
 	if (stat64(entry.mnt_mountp, &statbuf) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': %s\n"),
 		    cmdname, path, strerror(errno));
 		goto out;
 	} else if (statbuf.st_ino != path_inode) {
 		(void) fprintf(stderr, gettext("cannot "
 		    "%s '%s': not a mountpoint\n"), cmdname, path);
 		goto out;
 	}
 
 	if (op == OP_SHARE) {
 		char nfs_mnt_prop[ZFS_MAXPROPLEN];
 		char smbshare_prop[ZFS_MAXPROPLEN];
 
 		verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, nfs_mnt_prop,
 		    sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0);
 		verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshare_prop,
 		    sizeof (smbshare_prop), NULL, NULL, 0, B_FALSE) == 0);
 
 		if (strcmp(nfs_mnt_prop, "off") == 0 &&
 		    strcmp(smbshare_prop, "off") == 0) {
 			(void) fprintf(stderr, gettext("cannot unshare "
 			    "'%s': legacy share\n"), path);
 			(void) fprintf(stderr, gettext("use exportfs(8) "
 			    "or smbcontrol(1) to unshare this filesystem\n"));
 		} else if (!zfs_is_shared(zhp)) {
 			(void) fprintf(stderr, gettext("cannot unshare '%s': "
 			    "not currently shared\n"), path);
 		} else {
 			ret = zfs_unshareall_bypath(zhp, path);
 		}
 	} else {
 		char mtpt_prop[ZFS_MAXPROPLEN];
 
 		verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mtpt_prop,
 		    sizeof (mtpt_prop), NULL, NULL, 0, B_FALSE) == 0);
 
 		if (is_manual) {
 			ret = zfs_unmount(zhp, NULL, flags);
 		} else if (strcmp(mtpt_prop, "legacy") == 0) {
 			(void) fprintf(stderr, gettext("cannot unmount "
 			    "'%s': legacy mountpoint\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use umount(8) "
 			    "to unmount this filesystem\n"));
 		} else {
 			ret = zfs_unmountall(zhp, flags);
 		}
 	}
 
 out:
 	zfs_close(zhp);
 
 	return (ret != 0);
 }
 
 /*
  * Generic callback for unsharing or unmounting a filesystem.
  */
 static int
 unshare_unmount(int op, int argc, char **argv)
 {
 	int do_all = 0;
 	int flags = 0;
 	int ret = 0;
 	int c;
 	zfs_handle_t *zhp;
 	char nfs_mnt_prop[ZFS_MAXPROPLEN];
 	char sharesmb[ZFS_MAXPROPLEN];
 
 	/* check options */
 	while ((c = getopt(argc, argv, op == OP_SHARE ? "a" : "af")) != -1) {
 		switch (c) {
 		case 'a':
 			do_all = 1;
 			break;
 		case 'f':
 			flags = MS_FORCE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (do_all) {
 		/*
 		 * We could make use of zfs_for_each() to walk all datasets in
 		 * the system, but this would be very inefficient, especially
 		 * since we would have to linearly search /etc/mtab for each
 		 * one.  Instead, do one pass through /etc/mtab looking for
 		 * zfs entries and call zfs_unmount() for each one.
 		 *
 		 * Things get a little tricky if the administrator has created
 		 * mountpoints beneath other ZFS filesystems.  In this case, we
 		 * have to unmount the deepest filesystems first.  To accomplish
 		 * this, we place all the mountpoints in an AVL tree sorted by
 		 * the special type (dataset name), and walk the result in
 		 * reverse to make sure to get any snapshots first.
 		 */
 		struct mnttab entry;
 		uu_avl_pool_t *pool;
 		uu_avl_t *tree = NULL;
 		unshare_unmount_node_t *node;
 		uu_avl_index_t idx;
 		uu_avl_walk_t *walk;
 
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		if (((pool = uu_avl_pool_create("unmount_pool",
 		    sizeof (unshare_unmount_node_t),
 		    offsetof(unshare_unmount_node_t, un_avlnode),
 		    unshare_unmount_compare, UU_DEFAULT)) == NULL) ||
 		    ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL))
 			nomem();
 
 		rewind(mnttab_file);
 		while (getmntent(mnttab_file, &entry) == 0) {
 
 			/* ignore non-ZFS entries */
 			if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
 				continue;
 
 			/* ignore snapshots */
 			if (strchr(entry.mnt_special, '@') != NULL)
 				continue;
 
 			if ((zhp = zfs_open(g_zfs, entry.mnt_special,
 			    ZFS_TYPE_FILESYSTEM)) == NULL) {
 				ret = 1;
 				continue;
 			}
 
 			switch (op) {
 			case OP_SHARE:
 				verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "off") != 0)
 					break;
 				verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "off") == 0)
 					continue;
 				break;
 			case OP_MOUNT:
 				/* Ignore legacy mounts */
 				verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "legacy") == 0)
 					continue;
 				/* Ignore canmount=noauto mounts */
 				if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) ==
 				    ZFS_CANMOUNT_NOAUTO)
 					continue;
 			default:
 				break;
 			}
 
 			node = safe_malloc(sizeof (unshare_unmount_node_t));
 			node->un_zhp = zhp;
 			node->un_mountp = safe_strdup(entry.mnt_mountp);
 
 			uu_avl_node_init(node, &node->un_avlnode, pool);
 
 			if (uu_avl_find(tree, node, NULL, &idx) == NULL) {
 				uu_avl_insert(tree, node, idx);
 			} else {
 				zfs_close(node->un_zhp);
 				free(node->un_mountp);
 				free(node);
 			}
 		}
 
 		/*
 		 * Walk the AVL tree in reverse, unmounting each filesystem and
 		 * removing it from the AVL tree in the process.
 		 */
 		if ((walk = uu_avl_walk_start(tree,
 		    UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL)
 			nomem();
 
 		while ((node = uu_avl_walk_next(walk)) != NULL) {
 			uu_avl_remove(tree, node);
 
 			switch (op) {
 			case OP_SHARE:
 				if (zfs_unshareall_bypath(node->un_zhp,
 				    node->un_mountp) != 0)
 					ret = 1;
 				break;
 
 			case OP_MOUNT:
 				if (zfs_unmount(node->un_zhp,
 				    node->un_mountp, flags) != 0)
 					ret = 1;
 				break;
 			}
 
 			zfs_close(node->un_zhp);
 			free(node->un_mountp);
 			free(node);
 		}
 
 		uu_avl_walk_end(walk);
 		uu_avl_destroy(tree);
 		uu_avl_pool_destroy(pool);
 
 	} else {
 		if (argc != 1) {
 			if (argc == 0)
 				(void) fprintf(stderr,
 				    gettext("missing filesystem argument\n"));
 			else
 				(void) fprintf(stderr,
 				    gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		/*
 		 * We have an argument, but it may be a full path or a ZFS
 		 * filesystem.  Pass full paths off to unmount_path() (shared by
 		 * manual_unmount), otherwise open the filesystem and pass to
 		 * zfs_unmount().
 		 */
 		if (argv[0][0] == '/')
 			return (unshare_unmount_path(op, argv[0],
 			    flags, B_FALSE));
 
 		if ((zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM)) == NULL)
 			return (1);
 
 		verify(zfs_prop_get(zhp, op == OP_SHARE ?
 		    ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
 		    nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL,
 		    NULL, 0, B_FALSE) == 0);
 
 		switch (op) {
 		case OP_SHARE:
 			verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
 			    nfs_mnt_prop,
 			    sizeof (nfs_mnt_prop),
 			    NULL, NULL, 0, B_FALSE) == 0);
 			verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
 			    sharesmb, sizeof (sharesmb), NULL, NULL,
 			    0, B_FALSE) == 0);
 
 			if (strcmp(nfs_mnt_prop, "off") == 0 &&
 			    strcmp(sharesmb, "off") == 0) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unshare '%s': legacy share\n"),
 				    zfs_get_name(zhp));
 				(void) fprintf(stderr, gettext("use "
 				    "unshare(1M) to unshare this "
 				    "filesystem\n"));
 				ret = 1;
 			} else if (!zfs_is_shared(zhp)) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unshare '%s': not currently "
 				    "shared\n"), zfs_get_name(zhp));
 				ret = 1;
 			} else if (zfs_unshareall(zhp) != 0) {
 				ret = 1;
 			}
 			break;
 
 		case OP_MOUNT:
 			if (strcmp(nfs_mnt_prop, "legacy") == 0) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unmount '%s': legacy "
 				    "mountpoint\n"), zfs_get_name(zhp));
 				(void) fprintf(stderr, gettext("use "
 				    "umount(1M) to unmount this "
 				    "filesystem\n"));
 				ret = 1;
 			} else if (!zfs_is_mounted(zhp, NULL)) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unmount '%s': not currently "
 				    "mounted\n"),
 				    zfs_get_name(zhp));
 				ret = 1;
 			} else if (zfs_unmountall(zhp, flags) != 0) {
 				ret = 1;
 			}
 			break;
 		}
 
 		zfs_close(zhp);
 	}
 
 	return (ret);
 }
 
 /*
  * zfs unmount -a
  * zfs unmount filesystem
  *
  * Unmount all filesystems, or a specific ZFS filesystem.
  */
 static int
 zfs_do_unmount(int argc, char **argv)
 {
 	return (unshare_unmount(OP_MOUNT, argc, argv));
 }
 
 /*
  * zfs unshare -a
  * zfs unshare filesystem
  *
  * Unshare all filesystems, or a specific ZFS filesystem.
  */
 static int
 zfs_do_unshare(int argc, char **argv)
 {
 	return (unshare_unmount(OP_SHARE, argc, argv));
 }
 
 static int
 find_command_idx(char *command, int *idx)
 {
 	int i;
 
 	for (i = 0; i < NCOMMAND; i++) {
 		if (command_table[i].name == NULL)
 			continue;
 
 		if (strcmp(command, command_table[i].name) == 0) {
 			*idx = i;
 			return (0);
 		}
 	}
 	return (1);
 }
 
 static int
 zfs_do_diff(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	int flags = 0;
 	char *tosnap = NULL;
 	char *fromsnap = NULL;
 	char *atp, *copy;
 	int err = 0;
 	int c;
 
 	while ((c = getopt(argc, argv, "FHt")) != -1) {
 		switch (c) {
 		case 'F':
 			flags |= ZFS_DIFF_CLASSIFY;
 			break;
 		case 'H':
 			flags |= ZFS_DIFF_PARSEABLE;
 			break;
 		case 't':
 			flags |= ZFS_DIFF_TIMESTAMP;
 			break;
 		default:
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr,
 		gettext("must provide at least one snapshot name\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	fromsnap = argv[0];
 	tosnap = (argc == 2) ? argv[1] : NULL;
 
 	copy = NULL;
 	if (*fromsnap != '@')
 		copy = strdup(fromsnap);
 	else if (tosnap)
 		copy = strdup(tosnap);
 	if (copy == NULL)
 		usage(B_FALSE);
 
 	if ((atp = strchr(copy, '@')))
 		*atp = '\0';
 
 	if ((zhp = zfs_open(g_zfs, copy, ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (1);
 
 	free(copy);
 
 	/*
 	 * Ignore SIGPIPE so that the library can give us
 	 * information on any failure
 	 */
 	(void) sigignore(SIGPIPE);
 
 	err = zfs_show_diffs(zhp, STDOUT_FILENO, fromsnap, tosnap, flags);
 
 	zfs_close(zhp);
 
 	return (err != 0);
 }
 
 int
 main(int argc, char **argv)
 {
 	int ret = 0;
 	int i = 0;
 	char *cmdname;
 
 	(void) setlocale(LC_ALL, "");
 	(void) textdomain(TEXT_DOMAIN);
 
 	opterr = 0;
 
 	/*
 	 * Make sure the user has specified some command.
 	 */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing command\n"));
 		usage(B_FALSE);
 	}
 
 	cmdname = argv[1];
 
 	/*
 	 * The 'umount' command is an alias for 'unmount'
 	 */
 	if (strcmp(cmdname, "umount") == 0)
 		cmdname = "unmount";
 
 	/*
 	 * The 'recv' command is an alias for 'receive'
 	 */
 	if (strcmp(cmdname, "recv") == 0)
 		cmdname = "receive";
 
 	/*
 	 * The 'snap' command is an alias for 'snapshot'
 	 */
 	if (strcmp(cmdname, "snap") == 0)
 		cmdname = "snapshot";
 
 	/*
 	 * Special case '-?'
 	 */
 	if ((strcmp(cmdname, "-?") == 0) ||
 	    (strcmp(cmdname, "--help") == 0))
 		usage(B_TRUE);
 
 	if ((g_zfs = libzfs_init()) == NULL)
 		return (1);
 
 	mnttab_file = g_zfs->libzfs_mnttab;
 
-	zpool_set_history_str("zfs", argc, argv, history_str);
-	verify(zpool_stage_history(g_zfs, history_str) == 0);
+	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
 
 	libzfs_print_on_error(g_zfs, B_TRUE);
 
 	/*
 	 * Run the appropriate command.
 	 */
 	libzfs_mnttab_cache(g_zfs, B_FALSE);
 	if (find_command_idx(cmdname, &i) == 0) {
 		current_command = &command_table[i];
 		ret = command_table[i].func(argc - 1, argv + 1);
 	} else if (strchr(cmdname, '=') != NULL) {
 		verify(find_command_idx("set", &i) == 0);
 		current_command = &command_table[i];
 		ret = command_table[i].func(argc, argv);
 	} else {
 		(void) fprintf(stderr, gettext("unrecognized "
 		    "command '%s'\n"), cmdname);
 		usage(B_FALSE);
 		ret = 1;
 	}
 	libzfs_fini(g_zfs);
 
+	if (ret == 0 && log_history)
+		(void) zpool_log_history(g_zfs, history_str);
+
 	/*
 	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
 	 * for the purposes of running ::findleaks.
 	 */
 	if (getenv("ZFS_ABORT") != NULL) {
 		(void) printf("dumping core by request\n");
 		abort();
 	}
 
 	return (ret);
 }
diff --git a/cmd/zhack/zhack.c b/cmd/zhack/zhack.c
index b2cf815ca57c..4f80dde6957b 100644
--- a/cmd/zhack/zhack.c
+++ b/cmd/zhack/zhack.c
@@ -1,533 +1,540 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
  * zhack is a debugging tool that can write changes to ZFS pool using libzpool
  * for testing purposes. Altering pools with zhack is unsupported and may
  * result in corrupted pools.
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/dmu.h>
 #include <sys/zap.h>
 #include <sys/zfs_znode.h>
 #include <sys/dsl_synctask.h>
 #include <sys/vdev.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_pool.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/zfeature.h>
 #undef ZFS_MAXNAMELEN
 #include <libzfs.h>
 
 extern boolean_t zfeature_checks_disable;
 
 const char cmdname[] = "zhack";
 libzfs_handle_t *g_zfs;
 static importargs_t g_importargs;
 static char *g_pool;
 static boolean_t g_readonly;
 
 static void
 usage(void)
 {
 	(void) fprintf(stderr,
 	    "Usage: %s [-c cachefile] [-d dir] <subcommand> <args> ...\n"
 	    "where <subcommand> <args> is one of the following:\n"
 	    "\n", cmdname);
 
 	(void) fprintf(stderr,
 	    "    feature stat <pool>\n"
 	    "        print information about enabled features\n"
 	    "    feature enable [-d desc] <pool> <feature>\n"
 	    "        add a new enabled feature to the pool\n"
 	    "        -d <desc> sets the feature's description\n"
 	    "    feature ref [-md] <pool> <feature>\n"
 	    "        change the refcount on the given feature\n"
 	    "        -d decrease instead of increase the refcount\n"
 	    "        -m add the feature to the label if increasing refcount\n"
 	    "\n"
 	    "    <feature> : should be a feature guid\n");
 	exit(1);
 }
 
 
 static void
 fatal(const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 	(void) fprintf(stderr, "%s: ", cmdname);
 	(void) vfprintf(stderr, fmt, ap);
 	va_end(ap);
 	(void) fprintf(stderr, "\n");
 
 	exit(1);
 }
 
 /* ARGSUSED */
 static int
 space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
 {
 	/*
 	 * Is it a valid type of object to track?
 	 */
 	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
 		return (ENOENT);
 	(void) fprintf(stderr, "modifying object that needs user accounting");
 	abort();
 	/* NOTREACHED */
 }
 
 /*
  * Target is the dataset whose pool we want to open.
  */
 static void
 import_pool(const char *target, boolean_t readonly)
 {
 	nvlist_t *config;
 	nvlist_t *pools;
 	int error;
 	char *sepp;
 	spa_t *spa;
 	nvpair_t *elem;
 	nvlist_t *props;
 	const char *name;
 
 	kernel_init(readonly ? FREAD : (FREAD | FWRITE));
 	g_zfs = libzfs_init();
 	ASSERT(g_zfs != NULL);
 
 	dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
 
 	g_readonly = readonly;
 
 	/*
 	 * If we only want readonly access, it's OK if we find
 	 * a potentially-active (ie, imported into the kernel) pool from the
 	 * default cachefile.
 	 */
 	if (readonly && spa_open(target, &spa, FTAG) == 0) {
 		spa_close(spa, FTAG);
 		return;
 	}
 
 	g_importargs.unique = B_TRUE;
 	g_importargs.can_be_active = readonly;
 	g_pool = strdup(target);
 	if ((sepp = strpbrk(g_pool, "/@")) != NULL)
 		*sepp = '\0';
 	g_importargs.poolname = g_pool;
 	pools = zpool_search_import(g_zfs, &g_importargs);
 
 	if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) {
 		if (!g_importargs.can_be_active) {
 			g_importargs.can_be_active = B_TRUE;
 			if (zpool_search_import(g_zfs, &g_importargs) != NULL ||
 			    spa_open(target, &spa, FTAG) == 0) {
 				fatal("cannot import '%s': pool is active; run "
 				    "\"zpool export %s\" first\n",
 				    g_pool, g_pool);
 			}
 		}
 
 		fatal("cannot import '%s': no such pool available\n", g_pool);
 	}
 
 	elem = nvlist_next_nvpair(pools, NULL);
 	name = nvpair_name(elem);
 	VERIFY(nvpair_value_nvlist(elem, &config) == 0);
 
 	props = NULL;
 	if (readonly) {
 		VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
 		VERIFY(nvlist_add_uint64(props,
 		    zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0);
 	}
 
 	zfeature_checks_disable = B_TRUE;
 	error = spa_import(name, config, props, ZFS_IMPORT_NORMAL);
 	zfeature_checks_disable = B_FALSE;
 	if (error == EEXIST)
 		error = 0;
 
 	if (error)
 		fatal("can't import '%s': %s", name, strerror(error));
 }
 
 static void
 zhack_spa_open(const char *target, boolean_t readonly, void *tag, spa_t **spa)
 {
 	int err;
 
 	import_pool(target, readonly);
 
 	zfeature_checks_disable = B_TRUE;
 	err = spa_open(target, spa, tag);
 	zfeature_checks_disable = B_FALSE;
 
 	if (err != 0)
 		fatal("cannot open '%s': %s", target, strerror(err));
 	if (spa_version(*spa) < SPA_VERSION_FEATURES) {
 		fatal("'%s' has version %d, features not enabled", target,
 		    (int)spa_version(*spa));
 	}
 }
 
 static void
 dump_obj(objset_t *os, uint64_t obj, const char *name)
 {
 	zap_cursor_t zc;
 	zap_attribute_t za;
 
 	(void) printf("%s_obj:\n", name);
 
 	for (zap_cursor_init(&zc, os, obj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		if (za.za_integer_length == 8) {
 			ASSERT(za.za_num_integers == 1);
 			(void) printf("\t%s = %llu\n",
 			    za.za_name, (u_longlong_t)za.za_first_integer);
 		} else {
 			ASSERT(za.za_integer_length == 1);
 			char val[1024];
 			VERIFY(zap_lookup(os, obj, za.za_name,
 			    1, sizeof (val), val) == 0);
 			(void) printf("\t%s = %s\n", za.za_name, val);
 		}
 	}
 	zap_cursor_fini(&zc);
 }
 
 static void
 dump_mos(spa_t *spa)
 {
 	nvlist_t *nv = spa->spa_label_features;
 	nvpair_t *pair;
 
 	(void) printf("label config:\n");
 	for (pair = nvlist_next_nvpair(nv, NULL);
 	    pair != NULL;
 	    pair = nvlist_next_nvpair(nv, pair)) {
 		(void) printf("\t%s\n", nvpair_name(pair));
 	}
 }
 
 static void
 zhack_do_feature_stat(int argc, char **argv)
 {
 	spa_t *spa;
 	objset_t *os;
 	char *target;
 
 	argc--;
 	argv++;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, "error: missing pool name\n");
 		usage();
 	}
 	target = argv[0];
 
 	zhack_spa_open(target, B_TRUE, FTAG, &spa);
 	os = spa->spa_meta_objset;
 
 	dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
 	dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
 	dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
 	dump_mos(spa);
 
 	spa_close(spa, FTAG);
 }
 
 static void
 feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	zfeature_info_t *feature = arg2;
 
 	spa_feature_enable(spa, feature, tx);
+	spa_history_log_internal(spa, "zhack enable feature", tx,
+	    "name=%s can_readonly=%u",
+	    feature->fi_guid, feature->fi_can_readonly);
 }
 
 static void
 zhack_do_feature_enable(int argc, char **argv)
 {
 	char c;
 	char *desc, *target;
 	spa_t *spa;
 	objset_t *mos;
 	zfeature_info_t feature;
 	zfeature_info_t *nodeps[] = { NULL };
 
 	/*
 	 * Features are not added to the pool's label until their refcounts
 	 * are incremented, so fi_mos can just be left as false for now.
 	 */
 	desc = NULL;
 	feature.fi_uname = "zhack";
 	feature.fi_mos = B_FALSE;
 	feature.fi_can_readonly = B_FALSE;
 	feature.fi_depends = nodeps;
 
 	optind = 1;
 	while ((c = getopt(argc, argv, "rmd:")) != -1) {
 		switch (c) {
 		case 'r':
 			feature.fi_can_readonly = B_TRUE;
 			break;
 		case 'd':
 			desc = strdup(optarg);
 			break;
 		default:
 			usage();
 			break;
 		}
 	}
 
 	if (desc == NULL)
 		desc = strdup("zhack injected");
 	feature.fi_desc = desc;
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 2) {
 		(void) fprintf(stderr, "error: missing feature or pool name\n");
 		usage();
 	}
 	target = argv[0];
 	feature.fi_guid = argv[1];
 
 	if (!zfeature_is_valid_guid(feature.fi_guid))
 		fatal("invalid feature guid: %s", feature.fi_guid);
 
 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
 	mos = spa->spa_meta_objset;
 
 	if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
 		fatal("'%s' is a real feature, will not enable");
 	if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
 		fatal("feature already enabled: %s", feature.fi_guid);
 
 	VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
 	    feature_enable_sync, spa, &feature, 5));
 
 	spa_close(spa, FTAG);
 
 	free(desc);
 }
 
 static void
 feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	zfeature_info_t *feature = arg2;
 
 	spa_feature_incr(spa, feature, tx);
+	spa_history_log_internal(spa, "zhack feature incr", tx,
+	    "name=%s", feature->fi_guid);
 }
 
 static void
 feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	zfeature_info_t *feature = arg2;
 
 	spa_feature_decr(spa, feature, tx);
+	spa_history_log_internal(spa, "zhack feature decr", tx,
+	    "name=%s", feature->fi_guid);
 }
 
 static void
 zhack_do_feature_ref(int argc, char **argv)
 {
 	char c;
 	char *target;
 	boolean_t decr = B_FALSE;
 	spa_t *spa;
 	objset_t *mos;
 	zfeature_info_t feature;
 	zfeature_info_t *nodeps[] = { NULL };
 
 	/*
 	 * fi_desc does not matter here because it was written to disk
 	 * when the feature was enabled, but we need to properly set the
 	 * feature for read or write based on the information we read off
 	 * disk later.
 	 */
 	feature.fi_uname = "zhack";
 	feature.fi_mos = B_FALSE;
 	feature.fi_desc = NULL;
 	feature.fi_depends = nodeps;
 
 	optind = 1;
 	while ((c = getopt(argc, argv, "md")) != -1) {
 		switch (c) {
 		case 'm':
 			feature.fi_mos = B_TRUE;
 			break;
 		case 'd':
 			decr = B_TRUE;
 			break;
 		default:
 			usage();
 			break;
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 2) {
 		(void) fprintf(stderr, "error: missing feature or pool name\n");
 		usage();
 	}
 	target = argv[0];
 	feature.fi_guid = argv[1];
 
 	if (!zfeature_is_valid_guid(feature.fi_guid))
 		fatal("invalid feature guid: %s", feature.fi_guid);
 
 	zhack_spa_open(target, B_FALSE, FTAG, &spa);
 	mos = spa->spa_meta_objset;
 
 	if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
 		fatal("'%s' is a real feature, will not change refcount");
 
 	if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
 	    feature.fi_guid)) {
 		feature.fi_can_readonly = B_FALSE;
 	} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
 	    feature.fi_guid)) {
 		feature.fi_can_readonly = B_TRUE;
 	} else {
 		fatal("feature is not enabled: %s", feature.fi_guid);
 	}
 
 	if (decr && !spa_feature_is_active(spa, &feature))
 		fatal("feature refcount already 0: %s", feature.fi_guid);
 
 	VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
 	    decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5));
 
 	spa_close(spa, FTAG);
 }
 
 static int
 zhack_do_feature(int argc, char **argv)
 {
 	char *subcommand;
 
 	argc--;
 	argv++;
 	if (argc == 0) {
 		(void) fprintf(stderr,
 		    "error: no feature operation specified\n");
 		usage();
 	}
 
 	subcommand = argv[0];
 	if (strcmp(subcommand, "stat") == 0) {
 		zhack_do_feature_stat(argc, argv);
 	} else if (strcmp(subcommand, "enable") == 0) {
 		zhack_do_feature_enable(argc, argv);
 	} else if (strcmp(subcommand, "ref") == 0) {
 		zhack_do_feature_ref(argc, argv);
 	} else {
 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
 		    subcommand);
 		usage();
 	}
 
 	return (0);
 }
 
 #define	MAX_NUM_PATHS 1024
 
 int
 main(int argc, char **argv)
 {
 	extern void zfs_prop_init(void);
 
 	char *path[MAX_NUM_PATHS];
 	const char *subcommand;
 	int rv = 0;
 	char c;
 
 	g_importargs.path = path;
 
 	dprintf_setup(&argc, argv);
 	zfs_prop_init();
 
 	while ((c = getopt(argc, argv, "c:d:")) != -1) {
 		switch (c) {
 		case 'c':
 			g_importargs.cachefile = optarg;
 			break;
 		case 'd':
 			assert(g_importargs.paths < MAX_NUM_PATHS);
 			g_importargs.path[g_importargs.paths++] = optarg;
 			break;
 		default:
 			usage();
 			break;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 	optind = 1;
 
 	if (argc == 0) {
 		(void) fprintf(stderr, "error: no command specified\n");
 		usage();
 	}
 
 	subcommand = argv[0];
 
 	if (strcmp(subcommand, "feature") == 0) {
 		rv = zhack_do_feature(argc, argv);
 	} else {
 		(void) fprintf(stderr, "error: unknown subcommand: %s\n",
 		    subcommand);
 		usage();
 	}
 
 	if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_TRUE) != 0) {
 		fatal("pool export failed; "
 		    "changes may not be committed to disk\n");
 	}
 
 	libzfs_fini(g_zfs);
 	kernel_fini();
 
 	return (rv);
 }
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
index b96fbe4ace5b..152f289594d0 100644
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -1,5681 +1,5705 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012 by Frederik Wessels. All rights reserved.
  * Copyright (c) 2012 by Cyril Plisko. All rights reserved.
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <dirent.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <libintl.h>
 #include <libuutil.h>
 #include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <unistd.h>
 #include <priv.h>
 #include <pwd.h>
 #include <zone.h>
 #include <zfs_prop.h>
 #include <sys/fs/zfs.h>
 #include <sys/stat.h>
 #include <sys/fm/util.h>
 #include <sys/fm/protocol.h>
 
 #include <libzfs.h>
 
 #include "zpool_util.h"
 #include "zfs_comutil.h"
 #include "zfeature_common.h"
 
 #include "statcommon.h"
 
 static int zpool_do_create(int, char **);
 static int zpool_do_destroy(int, char **);
 
 static int zpool_do_add(int, char **);
 static int zpool_do_remove(int, char **);
 static int zpool_do_labelclear(int, char **);
 
 static int zpool_do_list(int, char **);
 static int zpool_do_iostat(int, char **);
 static int zpool_do_status(int, char **);
 
 static int zpool_do_online(int, char **);
 static int zpool_do_offline(int, char **);
 static int zpool_do_clear(int, char **);
 static int zpool_do_reopen(int, char **);
 
 static int zpool_do_reguid(int, char **);
 
 static int zpool_do_attach(int, char **);
 static int zpool_do_detach(int, char **);
 static int zpool_do_replace(int, char **);
 static int zpool_do_split(int, char **);
 
 static int zpool_do_scrub(int, char **);
 
 static int zpool_do_import(int, char **);
 static int zpool_do_export(int, char **);
 
 static int zpool_do_upgrade(int, char **);
 
 static int zpool_do_history(int, char **);
 static int zpool_do_events(int, char **);
 
 static int zpool_do_get(int, char **);
 static int zpool_do_set(int, char **);
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
  * debugging facilities.
  */
 
 #ifdef DEBUG
 const char *
 _umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 #endif
 
 typedef enum {
 	HELP_ADD,
 	HELP_ATTACH,
 	HELP_CLEAR,
 	HELP_CREATE,
 	HELP_DESTROY,
 	HELP_DETACH,
 	HELP_EXPORT,
 	HELP_HISTORY,
 	HELP_IMPORT,
 	HELP_IOSTAT,
 	HELP_LABELCLEAR,
 	HELP_LIST,
 	HELP_OFFLINE,
 	HELP_ONLINE,
 	HELP_REPLACE,
 	HELP_REMOVE,
 	HELP_SCRUB,
 	HELP_STATUS,
 	HELP_UPGRADE,
 	HELP_EVENTS,
 	HELP_GET,
 	HELP_SET,
 	HELP_SPLIT,
 	HELP_REGUID,
 	HELP_REOPEN
 } zpool_help_t;
 
 
 typedef struct zpool_command {
 	const char	*name;
 	int		(*func)(int, char **);
 	zpool_help_t	usage;
 } zpool_command_t;
 
 /*
  * Master command table.  Each ZFS command has a name, associated function, and
  * usage message.  The usage messages need to be internationalized, so we have
  * to have a function to return the usage message based on a command index.
  *
  * These commands are organized according to how they are displayed in the usage
  * message.  An empty command (one with a NULL name) indicates an empty line in
  * the generic usage message.
  */
 static zpool_command_t command_table[] = {
 	{ "create",	zpool_do_create,	HELP_CREATE		},
 	{ "destroy",	zpool_do_destroy,	HELP_DESTROY		},
 	{ NULL },
 	{ "add",	zpool_do_add,		HELP_ADD		},
 	{ "remove",	zpool_do_remove,	HELP_REMOVE		},
 	{ NULL },
 	{ "labelclear",	zpool_do_labelclear,	HELP_LABELCLEAR		},
 	{ NULL },
 	{ "list",	zpool_do_list,		HELP_LIST		},
 	{ "iostat",	zpool_do_iostat,	HELP_IOSTAT		},
 	{ "status",	zpool_do_status,	HELP_STATUS		},
 	{ NULL },
 	{ "online",	zpool_do_online,	HELP_ONLINE		},
 	{ "offline",	zpool_do_offline,	HELP_OFFLINE		},
 	{ "clear",	zpool_do_clear,		HELP_CLEAR		},
 	{ "reopen",	zpool_do_reopen,	HELP_REOPEN		},
 	{ NULL },
 	{ "attach",	zpool_do_attach,	HELP_ATTACH		},
 	{ "detach",	zpool_do_detach,	HELP_DETACH		},
 	{ "replace",	zpool_do_replace,	HELP_REPLACE		},
 	{ "split",	zpool_do_split,		HELP_SPLIT		},
 	{ NULL },
 	{ "scrub",	zpool_do_scrub,		HELP_SCRUB		},
 	{ NULL },
 	{ "import",	zpool_do_import,	HELP_IMPORT		},
 	{ "export",	zpool_do_export,	HELP_EXPORT		},
 	{ "upgrade",	zpool_do_upgrade,	HELP_UPGRADE		},
 	{ "reguid",	zpool_do_reguid,	HELP_REGUID		},
 	{ NULL },
 	{ "history",	zpool_do_history,	HELP_HISTORY		},
 	{ "events",	zpool_do_events,	HELP_EVENTS		},
 	{ NULL },
 	{ "get",	zpool_do_get,		HELP_GET		},
 	{ "set",	zpool_do_set,		HELP_SET		},
 };
 
 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
 
-zpool_command_t *current_command;
+static zpool_command_t *current_command;
 static char history_str[HIS_MAX_RECORD_LEN];
-
+static boolean_t log_history = B_TRUE;
 static uint_t timestamp_fmt = NODATE;
 
 static const char *
 get_usage(zpool_help_t idx) {
 	switch (idx) {
 	case HELP_ADD:
 		return (gettext("\tadd [-fn] [-o property=value] "
 		    "<pool> <vdev> ...\n"));
 	case HELP_ATTACH:
 		return (gettext("\tattach [-f] [-o property=value] "
 		    "<pool> <device> <new-device>\n"));
 	case HELP_CLEAR:
 		return (gettext("\tclear [-nF] <pool> [device]\n"));
 	case HELP_CREATE:
 		return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
 		    "\t    [-O file-system-property=value] ... \n"
 		    "\t    [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
 	case HELP_DESTROY:
 		return (gettext("\tdestroy [-f] <pool>\n"));
 	case HELP_DETACH:
 		return (gettext("\tdetach <pool> <device>\n"));
 	case HELP_EXPORT:
 		return (gettext("\texport [-f] <pool> ...\n"));
 	case HELP_HISTORY:
 		return (gettext("\thistory [-il] [<pool>] ...\n"));
 	case HELP_IMPORT:
 		return (gettext("\timport [-d dir] [-D]\n"
 		    "\timport [-d dir | -c cachefile] [-F [-n]] <pool | id>\n"
 		    "\timport [-o mntopts] [-o property=value] ... \n"
 		    "\t    [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
 		    "[-R root] [-F [-n]] -a\n"
 		    "\timport [-o mntopts] [-o property=value] ... \n"
 		    "\t    [-d dir | -c cachefile] [-D] [-f] [-m] [-N] "
 		    "[-R root] [-F [-n]]\n"
 		    "\t    <pool | id> [newpool]\n"));
 	case HELP_IOSTAT:
 		return (gettext("\tiostat [-v] [-T d|u] [pool] ... [interval "
 		    "[count]]\n"));
 	case HELP_LABELCLEAR:
 		return (gettext("\tlabelclear [-f] <vdev>\n"));
 	case HELP_LIST:
 		return (gettext("\tlist [-Hv] [-o property[,...]] "
 		    "[-T d|u] [pool] ... [interval [count]]\n"));
 	case HELP_OFFLINE:
 		return (gettext("\toffline [-t] <pool> <device> ...\n"));
 	case HELP_ONLINE:
 		return (gettext("\tonline <pool> <device> ...\n"));
 	case HELP_REPLACE:
 		return (gettext("\treplace [-f] <pool> <device> "
 		    "[new-device]\n"));
 	case HELP_REMOVE:
 		return (gettext("\tremove <pool> <device> ...\n"));
 	case HELP_REOPEN:
 		return (gettext("\treopen <pool>\n"));
 	case HELP_SCRUB:
 		return (gettext("\tscrub [-s] <pool> ...\n"));
 	case HELP_STATUS:
 		return (gettext("\tstatus [-vx] [-T d|u] [pool] ... [interval "
 		    "[count]]\n"));
 	case HELP_UPGRADE:
 		return (gettext("\tupgrade\n"
 		    "\tupgrade -v\n"
 		    "\tupgrade [-V version] <-a | pool ...>\n"));
 	case HELP_EVENTS:
 		return (gettext("\tevents [-vHfc]\n"));
 	case HELP_GET:
 		return (gettext("\tget <\"all\" | property[,...]> "
 		    "<pool> ...\n"));
 	case HELP_SET:
 		return (gettext("\tset <property=value> <pool> \n"));
 	case HELP_SPLIT:
 		return (gettext("\tsplit [-n] [-R altroot] [-o mntopts]\n"
 		    "\t    [-o property=value] <pool> <newpool> "
 		    "[<device> ...]\n"));
 	case HELP_REGUID:
 		return (gettext("\treguid <pool>\n"));
 	}
 
 	abort();
 	/* NOTREACHED */
 }
 
 
 /*
  * Callback routine that will print out a pool property value.
  */
 static int
 print_prop_cb(int prop, void *cb)
 {
 	FILE *fp = cb;
 
 	(void) fprintf(fp, "\t%-15s  ", zpool_prop_to_name(prop));
 
 	if (zpool_prop_readonly(prop))
 		(void) fprintf(fp, "  NO   ");
 	else
 		(void) fprintf(fp, " YES   ");
 
 	if (zpool_prop_values(prop) == NULL)
 		(void) fprintf(fp, "-\n");
 	else
 		(void) fprintf(fp, "%s\n", zpool_prop_values(prop));
 
 	return (ZPROP_CONT);
 }
 
 /*
  * Display usage message.  If we're inside a command, display only the usage for
  * that command.  Otherwise, iterate over the entire command table and display
  * a complete usage message.
  */
 void
 usage(boolean_t requested)
 {
 	FILE *fp = requested ? stdout : stderr;
 
 	if (current_command == NULL) {
 		int i;
 
 		(void) fprintf(fp, gettext("usage: zpool command args ...\n"));
 		(void) fprintf(fp,
 		    gettext("where 'command' is one of the following:\n\n"));
 
 		for (i = 0; i < NCOMMAND; i++) {
 			if (command_table[i].name == NULL)
 				(void) fprintf(fp, "\n");
 			else
 				(void) fprintf(fp, "%s",
 				    get_usage(command_table[i].usage));
 		}
 	} else {
 		(void) fprintf(fp, gettext("usage:\n"));
 		(void) fprintf(fp, "%s", get_usage(current_command->usage));
 	}
 
 	if (current_command != NULL &&
 	    ((strcmp(current_command->name, "set") == 0) ||
 	    (strcmp(current_command->name, "get") == 0) ||
 	    (strcmp(current_command->name, "list") == 0))) {
 
 		(void) fprintf(fp,
 		    gettext("\nthe following properties are supported:\n"));
 
 		(void) fprintf(fp, "\n\t%-15s  %s   %s\n\n",
 		    "PROPERTY", "EDIT", "VALUES");
 
 		/* Iterate over all properties */
 		(void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE,
 		    ZFS_TYPE_POOL);
 
 		(void) fprintf(fp, "\t%-15s   ", "feature@...");
 		(void) fprintf(fp, "YES   disabled | enabled | active\n");
 
 		(void) fprintf(fp, gettext("\nThe feature@ properties must be "
 		    "appended with a feature name.\nSee zpool-features(5).\n"));
 	}
 
 	/*
 	 * See comments at end of main().
 	 */
 	if (getenv("ZFS_ABORT") != NULL) {
 		(void) printf("dumping core by request\n");
 		abort();
 	}
 
 	exit(requested ? 0 : 2);
 }
 
 void
 print_vdev_tree(zpool_handle_t *zhp, const char *name, nvlist_t *nv, int indent,
     boolean_t print_logs)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	char *vname;
 
 	if (name != NULL)
 		(void) printf("\t%*s%s\n", indent, "", name);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		return;
 
 	for (c = 0; c < children; c++) {
 		uint64_t is_log = B_FALSE;
 
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &is_log);
 		if ((is_log && !print_logs) || (!is_log && print_logs))
 			continue;
 
 		vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
 		print_vdev_tree(zhp, vname, child[c], indent + 2,
 		    B_FALSE);
 		free(vname);
 	}
 }
 
 static boolean_t
 prop_list_contains_feature(nvlist_t *proplist)
 {
 	nvpair_t *nvp;
 	for (nvp = nvlist_next_nvpair(proplist, NULL); NULL != nvp;
 	    nvp = nvlist_next_nvpair(proplist, nvp)) {
 		if (zpool_prop_feature(nvpair_name(nvp)))
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 /*
  * Add a property pair (name, string-value) into a property nvlist.
  */
 static int
 add_prop_list(const char *propname, char *propval, nvlist_t **props,
     boolean_t poolprop)
 {
 	zpool_prop_t prop = ZPROP_INVAL;
 	zfs_prop_t fprop;
 	nvlist_t *proplist;
 	const char *normnm;
 	char *strval;
 
 	if (*props == NULL &&
 	    nvlist_alloc(props, NV_UNIQUE_NAME, 0) != 0) {
 		(void) fprintf(stderr,
 		    gettext("internal error: out of memory\n"));
 		return (1);
 	}
 
 	proplist = *props;
 
 	if (poolprop) {
 		const char *vname = zpool_prop_to_name(ZPOOL_PROP_VERSION);
 
 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL &&
 		    !zpool_prop_feature(propname)) {
 			(void) fprintf(stderr, gettext("property '%s' is "
 			    "not a valid pool property\n"), propname);
 			return (2);
 		}
 
 		/*
 		 * feature@ properties and version should not be specified
 		 * at the same time.
 		 */
 		if ((prop == ZPROP_INVAL && zpool_prop_feature(propname) &&
 		    nvlist_exists(proplist, vname)) ||
 		    (prop == ZPOOL_PROP_VERSION &&
 		    prop_list_contains_feature(proplist))) {
 			(void) fprintf(stderr, gettext("'feature@' and "
 			    "'version' properties cannot be specified "
 			    "together\n"));
 			return (2);
 		}
 
 
 		if (zpool_prop_feature(propname))
 			normnm = propname;
 		else
 			normnm = zpool_prop_to_name(prop);
 	} else {
 		if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
 			normnm = zfs_prop_to_name(fprop);
 		} else {
 			normnm = propname;
 		}
 	}
 
 	if (nvlist_lookup_string(proplist, normnm, &strval) == 0 &&
 	    prop != ZPOOL_PROP_CACHEFILE) {
 		(void) fprintf(stderr, gettext("property '%s' "
 		    "specified multiple times\n"), propname);
 		return (2);
 	}
 
 	if (nvlist_add_string(proplist, normnm, propval) != 0) {
 		(void) fprintf(stderr, gettext("internal "
 		    "error: out of memory\n"));
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * zpool add [-fn] [-o property=value] <pool> <vdev> ...
  *
  *	-f	Force addition of devices, even if they appear in use
  *	-n	Do not add the devices, but display the resulting layout if
  *		they were to be added.
  *	-o	Set property=value.
  *
  * Adds the given vdevs to 'pool'.  As with create, the bulk of this work is
  * handled by get_vdev_spec(), which constructs the nvlist needed to pass to
  * libzfs.
  */
 int
 zpool_do_add(int argc, char **argv)
 {
 	boolean_t force = B_FALSE;
 	boolean_t dryrun = B_FALSE;
 	int c;
 	nvlist_t *nvroot;
 	char *poolname;
 	int ret;
 	zpool_handle_t *zhp;
 	nvlist_t *config;
 	nvlist_t *props = NULL;
 	char *propval;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "fno:")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
 			break;
 		case 'n':
 			dryrun = B_TRUE;
 			break;
 		case 'o':
 			if ((propval = strchr(optarg, '=')) == NULL) {
 				(void) fprintf(stderr, gettext("missing "
 				    "'=' for -o option\n"));
 				usage(B_FALSE);
 			}
 			*propval = '\0';
 			propval++;
 
 			if ((strcmp(optarg, ZPOOL_CONFIG_ASHIFT) != 0) ||
 			    (add_prop_list(optarg, propval, &props, B_TRUE)))
 				usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing vdev specification\n"));
 		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 
 	argc--;
 	argv++;
 
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
 		(void) fprintf(stderr, gettext("pool '%s' is unavailable\n"),
 		    poolname);
 		zpool_close(zhp);
 		return (1);
 	}
 
 	/* pass off to get_vdev_spec for processing */
 	nvroot = make_root_vdev(zhp, props, force, !force, B_FALSE, dryrun,
 	    argc, argv);
 	if (nvroot == NULL) {
 		zpool_close(zhp);
 		return (1);
 	}
 
 	if (dryrun) {
 		nvlist_t *poolnvroot;
 
 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &poolnvroot) == 0);
 
 		(void) printf(gettext("would update '%s' to the following "
 		    "configuration:\n"), zpool_get_name(zhp));
 
 		/* print original main pool and new tree */
 		print_vdev_tree(zhp, poolname, poolnvroot, 0, B_FALSE);
 		print_vdev_tree(zhp, NULL, nvroot, 0, B_FALSE);
 
 		/* Do the same for the logs */
 		if (num_logs(poolnvroot) > 0) {
 			print_vdev_tree(zhp, "logs", poolnvroot, 0, B_TRUE);
 			print_vdev_tree(zhp, NULL, nvroot, 0, B_TRUE);
 		} else if (num_logs(nvroot) > 0) {
 			print_vdev_tree(zhp, "logs", nvroot, 0, B_TRUE);
 		}
 
 		ret = 0;
 	} else {
 		ret = (zpool_add(zhp, nvroot) != 0);
 	}
 
 	nvlist_free(props);
 	nvlist_free(nvroot);
 	zpool_close(zhp);
 
 	return (ret);
 }
 
 /*
  * zpool remove  <pool> <vdev> ...
  *
  * Removes the given vdev from the pool.  Currently, this supports removing
  * spares, cache, and log devices from the pool.
  */
 int
 zpool_do_remove(int argc, char **argv)
 {
 	char *poolname;
 	int i, ret = 0;
 	zpool_handle_t *zhp;
 
 	argc--;
 	argv++;
 
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing device\n"));
 		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	for (i = 1; i < argc; i++) {
 		if (zpool_vdev_remove(zhp, argv[i]) != 0)
 			ret = 1;
 	}
 
 	return (ret);
 }
 
 /*
  * zpool labelclear <vdev>
  *
  * Verifies that the vdev is not active and zeros out the label information
  * on the device.
  */
 int
 zpool_do_labelclear(int argc, char **argv)
 {
 	char *vdev, *name;
 	int c, fd = -1, ret = 0;
 	pool_state_t state;
 	boolean_t inuse = B_FALSE;
 	boolean_t force = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "f")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
 			break;
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* get vdev name */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing vdev device name\n"));
 		usage(B_FALSE);
 	}
 
 	vdev = argv[0];
 	if ((fd = open(vdev, O_RDWR)) < 0) {
 		(void) fprintf(stderr, gettext("Unable to open %s\n"), vdev);
 		return (B_FALSE);
 	}
 
 	name = NULL;
 	if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0) {
 		if (force)
 			goto wipe_label;
 
 		(void) fprintf(stderr,
 		    gettext("Unable to determine pool state for %s\n"
 		    "Use -f to force the clearing any label data\n"), vdev);
 
 		return (1);
 	}
 
 	if (inuse) {
 		switch (state) {
 		default:
 		case POOL_STATE_ACTIVE:
 		case POOL_STATE_SPARE:
 		case POOL_STATE_L2CACHE:
 			(void) fprintf(stderr,
 			    gettext("labelclear operation failed.\n"
 			    "\tVdev %s is a member (%s), of pool \"%s\".\n"
 			    "\tTo remove label information from this device, "
 			    "export or destroy\n\tthe pool, or remove %s from "
 			    "the configuration of this pool\n\tand retry the "
 			    "labelclear operation.\n"),
 			    vdev, zpool_pool_state_to_name(state), name, vdev);
 			ret = 1;
 			goto errout;
 
 		case POOL_STATE_EXPORTED:
 			if (force)
 				break;
 
 			(void) fprintf(stderr,
 			    gettext("labelclear operation failed.\n\tVdev "
 			    "%s is a member of the exported pool \"%s\".\n"
 			    "\tUse \"zpool labelclear -f %s\" to force the "
 			    "removal of label\n\tinformation.\n"),
 			    vdev, name, vdev);
 			ret = 1;
 			goto errout;
 
 		case POOL_STATE_POTENTIALLY_ACTIVE:
 			if (force)
 				break;
 
 			(void) fprintf(stderr,
 			    gettext("labelclear operation failed.\n"
 			    "\tVdev %s is a member of the pool \"%s\".\n"
 			    "\tThis pool is unknown to this system, but may "
 			    "be active on\n\tanother system. Use "
 			    "\'zpool labelclear -f %s\' to force the\n"
 			    "\tremoval of label information.\n"),
 			    vdev, name, vdev);
 			ret = 1;
 			goto errout;
 
 		case POOL_STATE_DESTROYED:
 			/* inuse should never be set for a destroyed pool... */
 			break;
 		}
 	}
 
 wipe_label:
 	if (zpool_clear_label(fd) != 0) {
 		(void) fprintf(stderr,
 		    gettext("Label clear failed on vdev %s\n"), vdev);
 		ret = 1;
 	}
 
 errout:
 	close(fd);
 	if (name != NULL)
 		free(name);
 
 	return (ret);
 }
 
 /*
  * zpool create [-fnd] [-o property=value] ...
  *		[-O file-system-property=value] ...
  *		[-R root] [-m mountpoint] <pool> <dev> ...
  *
  *	-f	Force creation, even if devices appear in use
  *	-n	Do not create the pool, but display the resulting layout if it
  *		were to be created.
  *      -R	Create a pool under an alternate root
  *      -m	Set default mountpoint for the root dataset.  By default it's
  *		'/<pool>'
  *	-o	Set property=value.
  *	-d	Don't automatically enable all supported pool features
  *		(individual features can be enabled with -o).
  *	-O	Set fsproperty=value in the pool's root file system
  *
  * Creates the named pool according to the given vdev specification.  The
  * bulk of the vdev processing is done in get_vdev_spec() in zpool_vdev.c.  Once
  * we get the nvlist back from get_vdev_spec(), we either print out the contents
  * (if '-n' was specified), or pass it to libzfs to do the creation.
  */
 int
 zpool_do_create(int argc, char **argv)
 {
 	boolean_t force = B_FALSE;
 	boolean_t dryrun = B_FALSE;
 	boolean_t enable_all_pool_feat = B_TRUE;
 	int c;
 	nvlist_t *nvroot = NULL;
 	char *poolname;
 	int ret = 1;
 	char *altroot = NULL;
 	char *mountpoint = NULL;
 	nvlist_t *fsprops = NULL;
 	nvlist_t *props = NULL;
 	char *propval;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":fndR:m:o:O:")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
 			break;
 		case 'n':
 			dryrun = B_TRUE;
 			break;
 		case 'd':
 			enable_all_pool_feat = B_FALSE;
 			break;
 		case 'R':
 			altroot = optarg;
 			if (add_prop_list(zpool_prop_to_name(
 			    ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE))
 				goto errout;
 			if (nvlist_lookup_string(props,
 			    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
 			    &propval) == 0)
 				break;
 			if (add_prop_list(zpool_prop_to_name(
 			    ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
 				goto errout;
 			break;
 		case 'm':
 			mountpoint = optarg;
 			break;
 		case 'o':
 			if ((propval = strchr(optarg, '=')) == NULL) {
 				(void) fprintf(stderr, gettext("missing "
 				    "'=' for -o option\n"));
 				goto errout;
 			}
 			*propval = '\0';
 			propval++;
 
 			if (add_prop_list(optarg, propval, &props, B_TRUE))
 				goto errout;
 
 			/*
 			 * If the user is creating a pool that doesn't support
 			 * feature flags, don't enable any features.
 			 */
 			if (zpool_name_to_prop(optarg) == ZPOOL_PROP_VERSION) {
 				char *end;
 				u_longlong_t ver;
 
 				ver = strtoull(propval, &end, 10);
 				if (*end == '\0' &&
 				    ver < SPA_VERSION_FEATURES) {
 					enable_all_pool_feat = B_FALSE;
 				}
 			}
 			break;
 		case 'O':
 			if ((propval = strchr(optarg, '=')) == NULL) {
 				(void) fprintf(stderr, gettext("missing "
 				    "'=' for -O option\n"));
 				goto errout;
 			}
 			*propval = '\0';
 			propval++;
 
 			if (add_prop_list(optarg, propval, &fsprops, B_FALSE))
 				goto errout;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			goto badusage;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto badusage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
 		goto badusage;
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing vdev specification\n"));
 		goto badusage;
 	}
 
 	poolname = argv[0];
 
 	/*
 	 * As a special case, check for use of '/' in the name, and direct the
 	 * user to use 'zfs create' instead.
 	 */
 	if (strchr(poolname, '/') != NULL) {
 		(void) fprintf(stderr, gettext("cannot create '%s': invalid "
 		    "character '/' in pool name\n"), poolname);
 		(void) fprintf(stderr, gettext("use 'zfs create' to "
 		    "create a dataset\n"));
 		goto errout;
 	}
 
 	/* pass off to get_vdev_spec for bulk processing */
 	nvroot = make_root_vdev(NULL, props, force, !force, B_FALSE, dryrun,
 	    argc - 1, argv + 1);
 	if (nvroot == NULL)
 		goto errout;
 
 	/* make_root_vdev() allows 0 toplevel children if there are spares */
 	if (!zfs_allocatable_devs(nvroot)) {
 		(void) fprintf(stderr, gettext("invalid vdev "
 		    "specification: at least one toplevel vdev must be "
 		    "specified\n"));
 		goto errout;
 	}
 
 	if (altroot != NULL && altroot[0] != '/') {
 		(void) fprintf(stderr, gettext("invalid alternate root '%s': "
 		    "must be an absolute path\n"), altroot);
 		goto errout;
 	}
 
 	/*
 	 * Check the validity of the mountpoint and direct the user to use the
 	 * '-m' mountpoint option if it looks like its in use.
 	 */
 	if (mountpoint == NULL ||
 	    (strcmp(mountpoint, ZFS_MOUNTPOINT_LEGACY) != 0 &&
 	    strcmp(mountpoint, ZFS_MOUNTPOINT_NONE) != 0)) {
 		char buf[MAXPATHLEN];
 		DIR *dirp;
 
 		if (mountpoint && mountpoint[0] != '/') {
 			(void) fprintf(stderr, gettext("invalid mountpoint "
 			    "'%s': must be an absolute path, 'legacy', or "
 			    "'none'\n"), mountpoint);
 			goto errout;
 		}
 
 		if (mountpoint == NULL) {
 			if (altroot != NULL)
 				(void) snprintf(buf, sizeof (buf), "%s/%s",
 				    altroot, poolname);
 			else
 				(void) snprintf(buf, sizeof (buf), "/%s",
 				    poolname);
 		} else {
 			if (altroot != NULL)
 				(void) snprintf(buf, sizeof (buf), "%s%s",
 				    altroot, mountpoint);
 			else
 				(void) snprintf(buf, sizeof (buf), "%s",
 				    mountpoint);
 		}
 
 		if ((dirp = opendir(buf)) == NULL && errno != ENOENT) {
 			(void) fprintf(stderr, gettext("mountpoint '%s' : "
 			    "%s\n"), buf, strerror(errno));
 			(void) fprintf(stderr, gettext("use '-m' "
 			    "option to provide a different default\n"));
 			goto errout;
 		} else if (dirp) {
 			int count = 0;
 
 			while (count < 3 && readdir(dirp) != NULL)
 				count++;
 			(void) closedir(dirp);
 
 			if (count > 2) {
 				(void) fprintf(stderr, gettext("mountpoint "
 				    "'%s' exists and is not empty\n"), buf);
 				(void) fprintf(stderr, gettext("use '-m' "
 				    "option to provide a "
 				    "different default\n"));
 				goto errout;
 			}
 		}
 	}
 
 	if (dryrun) {
 		/*
 		 * For a dry run invocation, print out a basic message and run
 		 * through all the vdevs in the list and print out in an
 		 * appropriate hierarchy.
 		 */
 		(void) printf(gettext("would create '%s' with the "
 		    "following layout:\n\n"), poolname);
 
 		print_vdev_tree(NULL, poolname, nvroot, 0, B_FALSE);
 		if (num_logs(nvroot) > 0)
 			print_vdev_tree(NULL, "logs", nvroot, 0, B_TRUE);
 
 		ret = 0;
 	} else {
 		/*
 		 * Hand off to libzfs.
 		 */
 		if (enable_all_pool_feat) {
 			int i;
 			for (i = 0; i < SPA_FEATURES; i++) {
 				char propname[MAXPATHLEN];
 				zfeature_info_t *feat = &spa_feature_table[i];
 
 				(void) snprintf(propname, sizeof (propname),
 				    "feature@%s", feat->fi_uname);
 
 				/*
 				 * Skip feature if user specified it manually
 				 * on the command line.
 				 */
 				if (nvlist_exists(props, propname))
 					continue;
 
 				if (add_prop_list(propname, ZFS_FEATURE_ENABLED,
 				    &props, B_TRUE) != 0)
 					goto errout;
 			}
 		}
 		if (zpool_create(g_zfs, poolname,
 		    nvroot, props, fsprops) == 0) {
 			zfs_handle_t *pool = zfs_open(g_zfs, poolname,
 			    ZFS_TYPE_FILESYSTEM);
 			if (pool != NULL) {
 				if (mountpoint != NULL)
 					verify(zfs_prop_set(pool,
 					    zfs_prop_to_name(
 					    ZFS_PROP_MOUNTPOINT),
 					    mountpoint) == 0);
 				if (zfs_mount(pool, NULL, 0) == 0)
 					ret = zfs_shareall(pool);
 				zfs_close(pool);
 			}
 		} else if (libzfs_errno(g_zfs) == EZFS_INVALIDNAME) {
 			(void) fprintf(stderr, gettext("pool name may have "
 			    "been omitted\n"));
 		}
 	}
 
 errout:
 	nvlist_free(nvroot);
 	nvlist_free(fsprops);
 	nvlist_free(props);
 	return (ret);
 badusage:
 	nvlist_free(fsprops);
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (2);
 }
 
 /*
  * zpool destroy <pool>
  *
  * 	-f	Forcefully unmount any datasets
  *
  * Destroy the given pool.  Automatically unmounts any datasets in the pool.
  */
 int
 zpool_do_destroy(int argc, char **argv)
 {
 	boolean_t force = B_FALSE;
 	int c;
 	char *pool;
 	zpool_handle_t *zhp;
 	int ret;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "f")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	pool = argv[0];
 
 	if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) {
 		/*
 		 * As a special case, check for use of '/' in the name, and
 		 * direct the user to use 'zfs destroy' instead.
 		 */
 		if (strchr(pool, '/') != NULL)
 			(void) fprintf(stderr, gettext("use 'zfs destroy' to "
 			    "destroy a dataset\n"));
 		return (1);
 	}
 
 	if (zpool_disable_datasets(zhp, force) != 0) {
 		(void) fprintf(stderr, gettext("could not destroy '%s': "
 		    "could not unmount datasets\n"), zpool_get_name(zhp));
 		return (1);
 	}
 
-	ret = (zpool_destroy(zhp) != 0);
+	/* The history must be logged as part of the export */
+	log_history = B_FALSE;
+
+	ret = (zpool_destroy(zhp, history_str) != 0);
 
 	zpool_close(zhp);
 
 	return (ret);
 }
 
 /*
  * zpool export [-f] <pool> ...
  *
  *	-f	Forcefully unmount datasets
  *
  * Export the given pools.  By default, the command will attempt to cleanly
  * unmount any active datasets within the pool.  If the '-f' flag is specified,
  * then the datasets will be forcefully unmounted.
  */
 int
 zpool_do_export(int argc, char **argv)
 {
 	boolean_t force = B_FALSE;
 	boolean_t hardforce = B_FALSE;
 	int c;
 	zpool_handle_t *zhp;
 	int ret;
 	int i;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "fF")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
 			break;
 		case 'F':
 			hardforce = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool argument\n"));
 		usage(B_FALSE);
 	}
 
 	ret = 0;
 	for (i = 0; i < argc; i++) {
 		if ((zhp = zpool_open_canfail(g_zfs, argv[i])) == NULL) {
 			ret = 1;
 			continue;
 		}
 
 		if (zpool_disable_datasets(zhp, force) != 0) {
 			ret = 1;
 			zpool_close(zhp);
 			continue;
 		}
 
+		/* The history must be logged as part of the export */
+		log_history = B_FALSE;
+
 		if (hardforce) {
-			if (zpool_export_force(zhp) != 0)
+			if (zpool_export_force(zhp, history_str) != 0)
 				ret = 1;
-		} else if (zpool_export(zhp, force) != 0) {
+		} else if (zpool_export(zhp, force, history_str) != 0) {
 			ret = 1;
 		}
 
 		zpool_close(zhp);
 	}
 
 	return (ret);
 }
 
 /*
  * Given a vdev configuration, determine the maximum width needed for the device
  * name column.
  */
 static int
 max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
 {
 	char *name = zpool_vdev_name(g_zfs, zhp, nv, B_TRUE);
 	nvlist_t **child;
 	uint_t c, children;
 	int ret;
 
 	if (strlen(name) + depth > max)
 		max = strlen(name) + depth;
 
 	free(name);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			if ((ret = max_width(zhp, child[c], depth + 2,
 			    max)) > max)
 				max = ret;
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			if ((ret = max_width(zhp, child[c], depth + 2,
 			    max)) > max)
 				max = ret;
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			if ((ret = max_width(zhp, child[c], depth + 2,
 			    max)) > max)
 				max = ret;
 	}
 
 
 	return (max);
 }
 
 typedef struct spare_cbdata {
 	uint64_t	cb_guid;
 	zpool_handle_t	*cb_zhp;
 } spare_cbdata_t;
 
 static boolean_t
 find_vdev(nvlist_t *nv, uint64_t search)
 {
 	uint64_t guid;
 	nvlist_t **child;
 	uint_t c, children;
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
 	    search == guid)
 		return (B_TRUE);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			if (find_vdev(child[c], search))
 				return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 static int
 find_spare(zpool_handle_t *zhp, void *data)
 {
 	spare_cbdata_t *cbp = data;
 	nvlist_t *config, *nvroot;
 
 	config = zpool_get_config(zhp, NULL);
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
 	if (find_vdev(nvroot, cbp->cb_guid)) {
 		cbp->cb_zhp = zhp;
 		return (1);
 	}
 
 	zpool_close(zhp);
 	return (0);
 }
 
 /*
  * Print out configuration state as requested by status_callback.
  */
 void
 print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
     int namewidth, int depth, boolean_t isspare)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	pool_scan_stat_t *ps = NULL;
 	vdev_stat_t *vs;
 	char rbuf[6], wbuf[6], cbuf[6];
 	char *vname;
 	uint64_t notpresent;
 	spare_cbdata_t cb;
 	char *state;
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		children = 0;
 
 	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 
 	state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 	if (isspare) {
 		/*
 		 * For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
 		 * online drives.
 		 */
 		if (vs->vs_aux == VDEV_AUX_SPARED)
 			state = "INUSE";
 		else if (vs->vs_state == VDEV_STATE_HEALTHY)
 			state = "AVAIL";
 	}
 
 	(void) printf("\t%*s%-*s  %-8s", depth, "", namewidth - depth,
 	    name, state);
 
 	if (!isspare) {
 		zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
 		zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
 		zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
 		(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
 	}
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
 	    &notpresent) == 0) {
 		char *path;
 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
 		(void) printf("  was %s", path);
 	} else if (vs->vs_aux != 0) {
 		(void) printf("  ");
 
 		switch (vs->vs_aux) {
 		case VDEV_AUX_OPEN_FAILED:
 			(void) printf(gettext("cannot open"));
 			break;
 
 		case VDEV_AUX_BAD_GUID_SUM:
 			(void) printf(gettext("missing device"));
 			break;
 
 		case VDEV_AUX_NO_REPLICAS:
 			(void) printf(gettext("insufficient replicas"));
 			break;
 
 		case VDEV_AUX_VERSION_NEWER:
 			(void) printf(gettext("newer version"));
 			break;
 
 		case VDEV_AUX_UNSUP_FEAT:
 			(void) printf(gettext("unsupported feature(s)"));
 			break;
 
 		case VDEV_AUX_SPARED:
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 			    &cb.cb_guid) == 0);
 			if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
 				if (strcmp(zpool_get_name(cb.cb_zhp),
 				    zpool_get_name(zhp)) == 0)
 					(void) printf(gettext("currently in "
 					    "use"));
 				else
 					(void) printf(gettext("in use by "
 					    "pool '%s'"),
 					    zpool_get_name(cb.cb_zhp));
 				zpool_close(cb.cb_zhp);
 			} else {
 				(void) printf(gettext("currently in use"));
 			}
 			break;
 
 		case VDEV_AUX_ERR_EXCEEDED:
 			(void) printf(gettext("too many errors"));
 			break;
 
 		case VDEV_AUX_IO_FAILURE:
 			(void) printf(gettext("experienced I/O failures"));
 			break;
 
 		case VDEV_AUX_BAD_LOG:
 			(void) printf(gettext("bad intent log"));
 			break;
 
 		case VDEV_AUX_EXTERNAL:
 			(void) printf(gettext("external device fault"));
 			break;
 
 		case VDEV_AUX_SPLIT_POOL:
 			(void) printf(gettext("split into new pool"));
 			break;
 
 		default:
 			(void) printf(gettext("corrupted data"));
 			break;
 		}
 	}
 
 	(void) nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_SCAN_STATS,
 	    (uint64_t **)&ps, &c);
 
 	if (ps && ps->pss_state == DSS_SCANNING &&
 	    vs->vs_scan_processed != 0 && children == 0) {
 		(void) printf(gettext("  (%s)"),
 		    (ps->pss_func == POOL_SCAN_RESILVER) ?
 		    "resilvering" : "repairing");
 	}
 
 	(void) printf("\n");
 
 	for (c = 0; c < children; c++) {
 		uint64_t islog = B_FALSE, ishole = B_FALSE;
 
 		/* Don't print logs or holes here */
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &islog);
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
 		    &ishole);
 		if (islog || ishole)
 			continue;
 		vname = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE);
 		print_status_config(zhp, vname, child[c],
 		    namewidth, depth + 2, isspare);
 		free(vname);
 	}
 }
 
 
 /*
  * Print the configuration of an exported pool.  Iterate over all vdevs in the
  * pool, printing out the name and status for each one.
  */
 void
 print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	vdev_stat_t *vs;
 	char *type, *vname;
 
 	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 	if (strcmp(type, VDEV_TYPE_MISSING) == 0 ||
 	    strcmp(type, VDEV_TYPE_HOLE) == 0)
 		return;
 
 	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 
 	(void) printf("\t%*s%-*s", depth, "", namewidth - depth, name);
 	(void) printf("  %s", zpool_state_to_name(vs->vs_state, vs->vs_aux));
 
 	if (vs->vs_aux != 0) {
 		(void) printf("  ");
 
 		switch (vs->vs_aux) {
 		case VDEV_AUX_OPEN_FAILED:
 			(void) printf(gettext("cannot open"));
 			break;
 
 		case VDEV_AUX_BAD_GUID_SUM:
 			(void) printf(gettext("missing device"));
 			break;
 
 		case VDEV_AUX_NO_REPLICAS:
 			(void) printf(gettext("insufficient replicas"));
 			break;
 
 		case VDEV_AUX_VERSION_NEWER:
 			(void) printf(gettext("newer version"));
 			break;
 
 		case VDEV_AUX_UNSUP_FEAT:
 			(void) printf(gettext("unsupported feature(s)"));
 			break;
 
 		case VDEV_AUX_ERR_EXCEEDED:
 			(void) printf(gettext("too many errors"));
 			break;
 
 		default:
 			(void) printf(gettext("corrupted data"));
 			break;
 		}
 	}
 	(void) printf("\n");
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		return;
 
 	for (c = 0; c < children; c++) {
 		uint64_t is_log = B_FALSE;
 
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &is_log);
 		if (is_log)
 			continue;
 
 		vname = zpool_vdev_name(g_zfs, NULL, child[c], B_TRUE);
 		print_import_config(vname, child[c], namewidth, depth + 2);
 		free(vname);
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0) {
 		(void) printf(gettext("\tcache\n"));
 		for (c = 0; c < children; c++) {
 			vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE);
 			(void) printf("\t  %s\n", vname);
 			free(vname);
 		}
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0) {
 		(void) printf(gettext("\tspares\n"));
 		for (c = 0; c < children; c++) {
 			vname = zpool_vdev_name(g_zfs, NULL, child[c], B_FALSE);
 			(void) printf("\t  %s\n", vname);
 			free(vname);
 		}
 	}
 }
 
 /*
  * Print log vdevs.
  * Logs are recorded as top level vdevs in the main pool child array
  * but with "is_log" set to 1. We use either print_status_config() or
  * print_import_config() to print the top level logs then any log
  * children (eg mirrored slogs) are printed recursively - which
  * works because only the top level vdev is marked "is_log"
  */
 static void
 print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
 {
 	uint_t c, children;
 	nvlist_t **child;
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) != 0)
 		return;
 
 	(void) printf(gettext("\tlogs\n"));
 
 	for (c = 0; c < children; c++) {
 		uint64_t is_log = B_FALSE;
 		char *name;
 
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &is_log);
 		if (!is_log)
 			continue;
 		name = zpool_vdev_name(g_zfs, zhp, child[c], B_TRUE);
 		if (verbose)
 			print_status_config(zhp, name, child[c], namewidth,
 			    2, B_FALSE);
 		else
 			print_import_config(name, child[c], namewidth, 2);
 		free(name);
 	}
 }
 
 /*
  * Display the status for the given pool.
  */
 static void
 show_import(nvlist_t *config)
 {
 	uint64_t pool_state;
 	vdev_stat_t *vs;
 	char *name;
 	uint64_t guid;
 	char *msgid;
 	nvlist_t *nvroot;
 	int reason;
 	const char *health;
 	uint_t vsc;
 	int namewidth;
 	char *comment;
 
 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &name) == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    &guid) == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 	    &pool_state) == 0);
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &vsc) == 0);
 	health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 
 	reason = zpool_import_status(config, &msgid);
 
 	(void) printf(gettext("   pool: %s\n"), name);
 	(void) printf(gettext("     id: %llu\n"), (u_longlong_t)guid);
 	(void) printf(gettext("  state: %s"), health);
 	if (pool_state == POOL_STATE_DESTROYED)
 		(void) printf(gettext(" (DESTROYED)"));
 	(void) printf("\n");
 
 	switch (reason) {
 	case ZPOOL_STATUS_MISSING_DEV_R:
 	case ZPOOL_STATUS_MISSING_DEV_NR:
 	case ZPOOL_STATUS_BAD_GUID_SUM:
 		(void) printf(gettext(" status: One or more devices are "
 		    "missing from the system.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_LABEL_R:
 	case ZPOOL_STATUS_CORRUPT_LABEL_NR:
 		(void) printf(gettext(" status: One or more devices contains "
 		    "corrupted data.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_DATA:
 		(void) printf(
 		    gettext(" status: The pool data is corrupted.\n"));
 		break;
 
 	case ZPOOL_STATUS_OFFLINE_DEV:
 		(void) printf(gettext(" status: One or more devices "
 		    "are offlined.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_POOL:
 		(void) printf(gettext(" status: The pool metadata is "
 		    "corrupted.\n"));
 		break;
 
 	case ZPOOL_STATUS_VERSION_OLDER:
 		(void) printf(gettext(" status: The pool is formatted using a "
 		    "legacy on-disk version.\n"));
 		break;
 
 	case ZPOOL_STATUS_VERSION_NEWER:
 		(void) printf(gettext(" status: The pool is formatted using an "
 		    "incompatible version.\n"));
 		break;
 
 	case ZPOOL_STATUS_FEAT_DISABLED:
 		(void) printf(gettext(" status: Some supported features are "
 		    "not enabled on the pool.\n"));
 		break;
 
 	case ZPOOL_STATUS_UNSUP_FEAT_READ:
 		(void) printf(gettext("status: The pool uses the following "
 		    "feature(s) not supported on this sytem:\n"));
 		zpool_print_unsup_feat(config);
 		break;
 
 	case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
 		(void) printf(gettext("status: The pool can only be accessed "
 		    "in read-only mode on this system. It\n\tcannot be "
 		    "accessed in read-write mode because it uses the "
 		    "following\n\tfeature(s) not supported on this system:\n"));
 		zpool_print_unsup_feat(config);
 		break;
 
 	case ZPOOL_STATUS_HOSTID_MISMATCH:
 		(void) printf(gettext(" status: The pool was last accessed by "
 		    "another system.\n"));
 		break;
 
 	case ZPOOL_STATUS_FAULTED_DEV_R:
 	case ZPOOL_STATUS_FAULTED_DEV_NR:
 		(void) printf(gettext(" status: One or more devices are "
 		    "faulted.\n"));
 		break;
 
 	case ZPOOL_STATUS_BAD_LOG:
 		(void) printf(gettext(" status: An intent log record cannot be "
 		    "read.\n"));
 		break;
 
 	case ZPOOL_STATUS_RESILVERING:
 		(void) printf(gettext(" status: One or more devices were being "
 		    "resilvered.\n"));
 		break;
 
 	default:
 		/*
 		 * No other status can be seen when importing pools.
 		 */
 		assert(reason == ZPOOL_STATUS_OK);
 	}
 
 	/*
 	 * Print out an action according to the overall state of the pool.
 	 */
 	if (vs->vs_state == VDEV_STATE_HEALTHY) {
 		if (reason == ZPOOL_STATUS_VERSION_OLDER ||
 		    reason == ZPOOL_STATUS_FEAT_DISABLED) {
 			(void) printf(gettext(" action: The pool can be "
 			    "imported using its name or numeric identifier, "
 			    "though\n\tsome features will not be available "
 			    "without an explicit 'zpool upgrade'.\n"));
 		} else if (reason == ZPOOL_STATUS_HOSTID_MISMATCH) {
 			(void) printf(gettext(" action: The pool can be "
 			    "imported using its name or numeric "
 			    "identifier and\n\tthe '-f' flag.\n"));
 		} else {
 			(void) printf(gettext(" action: The pool can be "
 			    "imported using its name or numeric "
 			    "identifier.\n"));
 		}
 	} else if (vs->vs_state == VDEV_STATE_DEGRADED) {
 		(void) printf(gettext(" action: The pool can be imported "
 		    "despite missing or damaged devices.  The\n\tfault "
 		    "tolerance of the pool may be compromised if imported.\n"));
 	} else {
 		switch (reason) {
 		case ZPOOL_STATUS_VERSION_NEWER:
 			(void) printf(gettext(" action: The pool cannot be "
 			    "imported.  Access the pool on a system running "
 			    "newer\n\tsoftware, or recreate the pool from "
 			    "backup.\n"));
 			break;
 		case ZPOOL_STATUS_UNSUP_FEAT_READ:
 			(void) printf(gettext("action: The pool cannot be "
 			    "imported. Access the pool on a system that "
 			    "supports\n\tthe required feature(s), or recreate "
 			    "the pool from backup.\n"));
 			break;
 		case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
 			(void) printf(gettext("action: The pool cannot be "
 			    "imported in read-write mode. Import the pool "
 			    "with\n"
 			    "\t\"-o readonly=on\", access the pool on a system "
 			    "that supports the\n\trequired feature(s), or "
 			    "recreate the pool from backup.\n"));
 			break;
 		case ZPOOL_STATUS_MISSING_DEV_R:
 		case ZPOOL_STATUS_MISSING_DEV_NR:
 		case ZPOOL_STATUS_BAD_GUID_SUM:
 			(void) printf(gettext(" action: The pool cannot be "
 			    "imported. Attach the missing\n\tdevices and try "
 			    "again.\n"));
 			break;
 		default:
 			(void) printf(gettext(" action: The pool cannot be "
 			    "imported due to damaged devices or data.\n"));
 		}
 	}
 
 	/* Print the comment attached to the pool. */
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
 		(void) printf(gettext("comment: %s\n"), comment);
 
 	/*
 	 * If the state is "closed" or "can't open", and the aux state
 	 * is "corrupt data":
 	 */
 	if (((vs->vs_state == VDEV_STATE_CLOSED) ||
 	    (vs->vs_state == VDEV_STATE_CANT_OPEN)) &&
 	    (vs->vs_aux == VDEV_AUX_CORRUPT_DATA)) {
 		if (pool_state == POOL_STATE_DESTROYED)
 			(void) printf(gettext("\tThe pool was destroyed, "
 			    "but can be imported using the '-Df' flags.\n"));
 		else if (pool_state != POOL_STATE_EXPORTED)
 			(void) printf(gettext("\tThe pool may be active on "
 			    "another system, but can be imported using\n\t"
 			    "the '-f' flag.\n"));
 	}
 
 	if (msgid != NULL)
 		(void) printf(gettext("   see: http://zfsonlinux.org/msg/%s\n"),
 		    msgid);
 
 	(void) printf(gettext(" config:\n\n"));
 
 	namewidth = max_width(NULL, nvroot, 0, 0);
 	if (namewidth < 10)
 		namewidth = 10;
 
 	print_import_config(name, nvroot, namewidth, 0);
 	if (num_logs(nvroot) > 0)
 		print_logs(NULL, nvroot, namewidth, B_FALSE);
 
 	if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
 		(void) printf(gettext("\n\tAdditional devices are known to "
 		    "be part of this pool, though their\n\texact "
 		    "configuration cannot be determined.\n"));
 	}
 }
 
 /*
  * Perform the import for the given configuration.  This passes the heavy
  * lifting off to zpool_import_props(), and then mounts the datasets contained
  * within the pool.
  */
 static int
 do_import(nvlist_t *config, const char *newname, const char *mntopts,
     nvlist_t *props, int flags)
 {
 	zpool_handle_t *zhp;
 	char *name;
 	uint64_t state;
 	uint64_t version;
 
 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &name) == 0);
 
 	verify(nvlist_lookup_uint64(config,
 	    ZPOOL_CONFIG_POOL_STATE, &state) == 0);
 	verify(nvlist_lookup_uint64(config,
 	    ZPOOL_CONFIG_VERSION, &version) == 0);
 	if (!SPA_VERSION_IS_SUPPORTED(version)) {
 		(void) fprintf(stderr, gettext("cannot import '%s': pool "
 		    "is formatted using an unsupported ZFS version\n"), name);
 		return (1);
 	} else if (state != POOL_STATE_EXPORTED &&
 	    !(flags & ZFS_IMPORT_ANY_HOST)) {
 		uint64_t hostid;
 
 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID,
 		    &hostid) == 0) {
 			unsigned long system_hostid = gethostid() & 0xffffffff;
 
 			if ((unsigned long)hostid != system_hostid) {
 				char *hostname;
 				uint64_t timestamp;
 				time_t t;
 
 				verify(nvlist_lookup_string(config,
 				    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
 				verify(nvlist_lookup_uint64(config,
 				    ZPOOL_CONFIG_TIMESTAMP, &timestamp) == 0);
 				t = timestamp;
 				(void) fprintf(stderr, gettext("cannot import "
 				    "'%s': pool may be in use from other "
 				    "system, it was last accessed by %s "
 				    "(hostid: 0x%lx) on %s"), name, hostname,
 				    (unsigned long)hostid,
 				    asctime(localtime(&t)));
 				(void) fprintf(stderr, gettext("use '-f' to "
 				    "import anyway\n"));
 				return (1);
 			}
 		} else {
 			(void) fprintf(stderr, gettext("cannot import '%s': "
 			    "pool may be in use from other system\n"), name);
 			(void) fprintf(stderr, gettext("use '-f' to import "
 			    "anyway\n"));
 			return (1);
 		}
 	}
 
 	if (zpool_import_props(g_zfs, config, newname, props, flags) != 0)
 		return (1);
 
 	if (newname != NULL)
 		name = (char *)newname;
 
 	if ((zhp = zpool_open_canfail(g_zfs, name)) == NULL)
 		return (1);
 
 	if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
 	    !(flags & ZFS_IMPORT_ONLY) &&
 	    zpool_enable_datasets(zhp, mntopts, 0) != 0) {
 		zpool_close(zhp);
 		return (1);
 	}
 
 	zpool_close(zhp);
 	return (0);
 }
 
 /*
  * zpool import [-d dir] [-D]
  *       import [-o mntopts] [-o prop=value] ... [-R root] [-D]
  *              [-d dir | -c cachefile] [-f] -a
  *       import [-o mntopts] [-o prop=value] ... [-R root] [-D]
  *              [-d dir | -c cachefile] [-f] [-n] [-F] <pool | id> [newpool]
  *
  *	 -c	Read pool information from a cachefile instead of searching
  *		devices.
  *
  *       -d	Scan in a specific directory, other than /dev/.  More than
  *		one directory can be specified using multiple '-d' options.
  *
  *       -D     Scan for previously destroyed pools or import all or only
  *              specified destroyed pools.
  *
  *       -R	Temporarily import the pool, with all mountpoints relative to
  *		the given root.  The pool will remain exported when the machine
  *		is rebooted.
  *
  *       -V	Import even in the presence of faulted vdevs.  This is an
  *       	intentionally undocumented option for testing purposes, and
  *       	treats the pool configuration as complete, leaving any bad
  *		vdevs in the FAULTED state. In other words, it does verbatim
  *		import.
  *
  *       -f	Force import, even if it appears that the pool is active.
  *
  *       -F     Attempt rewind if necessary.
  *
  *       -n     See if rewind would work, but don't actually rewind.
  *
  *       -N     Import the pool but don't mount datasets.
  *
  *       -T     Specify a starting txg to use for import. This option is
  *       	intentionally undocumented option for testing purposes.
  *
  *       -a	Import all pools found.
  *
  *       -o	Set property=value and/or temporary mount options (without '=').
  *
  * The import command scans for pools to import, and import pools based on pool
  * name and GUID.  The pool can also be renamed as part of the import process.
  */
 int
 zpool_do_import(int argc, char **argv)
 {
 	char **searchdirs = NULL;
 	char *env, *envdup = NULL;
 	int nsearch = 0;
 	int c;
 	int err = 0;
 	nvlist_t *pools = NULL;
 	boolean_t do_all = B_FALSE;
 	boolean_t do_destroyed = B_FALSE;
 	char *mntopts = NULL;
 	nvpair_t *elem;
 	nvlist_t *config;
 	uint64_t searchguid = 0;
 	char *searchname = NULL;
 	char *propval;
 	nvlist_t *found_config;
 	nvlist_t *policy = NULL;
 	nvlist_t *props = NULL;
 	boolean_t first;
 	int flags = ZFS_IMPORT_NORMAL;
 	uint32_t rewind_policy = ZPOOL_NO_REWIND;
 	boolean_t dryrun = B_FALSE;
 	boolean_t do_rewind = B_FALSE;
 	boolean_t xtreme_rewind = B_FALSE;
 	uint64_t pool_state, txg = -1ULL;
 	char *cachefile = NULL;
 	importargs_t idata = { 0 };
 	char *endptr;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":aCc:d:DEfFmnNo:rR:T:VX")) != -1) {
 		switch (c) {
 		case 'a':
 			do_all = B_TRUE;
 			break;
 		case 'c':
 			cachefile = optarg;
 			break;
 		case 'd':
 			if (searchdirs == NULL) {
 				searchdirs = safe_malloc(sizeof (char *));
 			} else {
 				char **tmp = safe_malloc((nsearch + 1) *
 				    sizeof (char *));
 				bcopy(searchdirs, tmp, nsearch *
 				    sizeof (char *));
 				free(searchdirs);
 				searchdirs = tmp;
 			}
 			searchdirs[nsearch++] = optarg;
 			break;
 		case 'D':
 			do_destroyed = B_TRUE;
 			break;
 		case 'f':
 			flags |= ZFS_IMPORT_ANY_HOST;
 			break;
 		case 'F':
 			do_rewind = B_TRUE;
 			break;
 		case 'm':
 			flags |= ZFS_IMPORT_MISSING_LOG;
 			break;
 		case 'n':
 			dryrun = B_TRUE;
 			break;
 		case 'N':
 			flags |= ZFS_IMPORT_ONLY;
 			break;
 		case 'o':
 			if ((propval = strchr(optarg, '=')) != NULL) {
 				*propval = '\0';
 				propval++;
 				if (add_prop_list(optarg, propval,
 				    &props, B_TRUE))
 					goto error;
 			} else {
 				mntopts = optarg;
 			}
 			break;
 		case 'R':
 			if (add_prop_list(zpool_prop_to_name(
 			    ZPOOL_PROP_ALTROOT), optarg, &props, B_TRUE))
 				goto error;
 			if (nvlist_lookup_string(props,
 			    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
 			    &propval) == 0)
 				break;
 			if (add_prop_list(zpool_prop_to_name(
 			    ZPOOL_PROP_CACHEFILE), "none", &props, B_TRUE))
 				goto error;
 			break;
 		case 'T':
 			errno = 0;
 			txg = strtoull(optarg, &endptr, 10);
 			if (errno != 0 || *endptr != '\0') {
 				(void) fprintf(stderr,
 				    gettext("invalid txg value\n"));
 				usage(B_FALSE);
 			}
 			rewind_policy = ZPOOL_DO_REWIND | ZPOOL_EXTREME_REWIND;
 			break;
 		case 'V':
 			flags |= ZFS_IMPORT_VERBATIM;
 			break;
 		case 'X':
 			xtreme_rewind = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (cachefile && nsearch != 0) {
 		(void) fprintf(stderr, gettext("-c is incompatible with -d\n"));
 		usage(B_FALSE);
 	}
 
 	if ((dryrun || xtreme_rewind) && !do_rewind) {
 		(void) fprintf(stderr,
 		    gettext("-n or -X only meaningful with -F\n"));
 		usage(B_FALSE);
 	}
 	if (dryrun)
 		rewind_policy = ZPOOL_TRY_REWIND;
 	else if (do_rewind)
 		rewind_policy = ZPOOL_DO_REWIND;
 	if (xtreme_rewind)
 		rewind_policy |= ZPOOL_EXTREME_REWIND;
 
 	/* In the future, we can capture further policy and include it here */
 	if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 ||
 	    nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, txg) != 0 ||
 	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0)
 		goto error;
 
 	/* check argument count */
 	if (do_all) {
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 	} else {
 		if (argc > 2) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		/*
 		 * Check for the SYS_CONFIG privilege.  We do this explicitly
 		 * here because otherwise any attempt to discover pools will
 		 * silently fail.
 		 */
 		if (argc == 0 && !priv_ineffect(PRIV_SYS_CONFIG)) {
 			(void) fprintf(stderr, gettext("cannot "
 			    "discover pools: permission denied\n"));
 			if (searchdirs != NULL)
 				free(searchdirs);
 
 			nvlist_free(policy);
 			return (1);
 		}
 	}
 
 	/*
 	 * Depending on the arguments given, we do one of the following:
 	 *
 	 *	<none>	Iterate through all pools and display information about
 	 *		each one.
 	 *
 	 *	-a	Iterate through all pools and try to import each one.
 	 *
 	 *	<id>	Find the pool that corresponds to the given GUID/pool
 	 *		name and import that one.
 	 *
 	 *	-D	Above options applies only to destroyed pools.
 	 */
 	if (argc != 0) {
 		char *endptr;
 
 		errno = 0;
 		searchguid = strtoull(argv[0], &endptr, 10);
 		if (errno != 0 || *endptr != '\0')
 			searchname = argv[0];
 		found_config = NULL;
 
 		/*
 		 * User specified a name or guid.  Ensure it's unique.
 		 */
 		idata.unique = B_TRUE;
 	}
 
 	/*
 	 * Check the environment for the preferred search path.
 	 */
 	if ((searchdirs == NULL) && (env = getenv("ZPOOL_IMPORT_PATH"))) {
 		char *dir;
 
 		envdup = strdup(env);
 
 		dir = strtok(envdup, ":");
 		while (dir != NULL) {
 			if (searchdirs == NULL) {
 				searchdirs = safe_malloc(sizeof (char *));
 			} else {
 				char **tmp = safe_malloc((nsearch + 1) *
 				    sizeof (char *));
 				bcopy(searchdirs, tmp, nsearch *
 				    sizeof (char *));
 				free(searchdirs);
 				searchdirs = tmp;
 			}
 			searchdirs[nsearch++] = dir;
 			dir = strtok(NULL, ":");
 		}
 	}
 
 	idata.path = searchdirs;
 	idata.paths = nsearch;
 	idata.poolname = searchname;
 	idata.guid = searchguid;
 	idata.cachefile = cachefile;
 
 	pools = zpool_search_import(g_zfs, &idata);
 
 	if (pools != NULL && idata.exists &&
 	    (argc == 1 || strcmp(argv[0], argv[1]) == 0)) {
 		(void) fprintf(stderr, gettext("cannot import '%s': "
 		    "a pool with that name already exists\n"),
 		    argv[0]);
 		(void) fprintf(stderr, gettext("use the form '%s "
 		    "<pool | id> <newpool>' to give it a new name\n"),
 		    "zpool import");
 		err = 1;
 	} else if (pools == NULL && idata.exists) {
 		(void) fprintf(stderr, gettext("cannot import '%s': "
 		    "a pool with that name is already created/imported,\n"),
 		    argv[0]);
 		(void) fprintf(stderr, gettext("and no additional pools "
 		    "with that name were found\n"));
 		err = 1;
 	} else if (pools == NULL) {
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("cannot import '%s': "
 			    "no such pool available\n"), argv[0]);
 		}
 		err = 1;
 	}
 
 	if (err == 1) {
 		if (searchdirs != NULL)
 			free(searchdirs);
 		if (envdup != NULL)
 			free(envdup);
 		nvlist_free(policy);
 		return (1);
 	}
 
 	/*
 	 * At this point we have a list of import candidate configs. Even if
 	 * we were searching by pool name or guid, we still need to
 	 * post-process the list to deal with pool state and possible
 	 * duplicate names.
 	 */
 	err = 0;
 	elem = NULL;
 	first = B_TRUE;
 	while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
 
 		verify(nvpair_value_nvlist(elem, &config) == 0);
 
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 		    &pool_state) == 0);
 		if (!do_destroyed && pool_state == POOL_STATE_DESTROYED)
 			continue;
 		if (do_destroyed && pool_state != POOL_STATE_DESTROYED)
 			continue;
 
 		verify(nvlist_add_nvlist(config, ZPOOL_REWIND_POLICY,
 		    policy) == 0);
 
 		if (argc == 0) {
 			if (first)
 				first = B_FALSE;
 			else if (!do_all)
 				(void) printf("\n");
 
 			if (do_all) {
 				err |= do_import(config, NULL, mntopts,
 				    props, flags);
 			} else {
 				show_import(config);
 			}
 		} else if (searchname != NULL) {
 			char *name;
 
 			/*
 			 * We are searching for a pool based on name.
 			 */
 			verify(nvlist_lookup_string(config,
 			    ZPOOL_CONFIG_POOL_NAME, &name) == 0);
 
 			if (strcmp(name, searchname) == 0) {
 				if (found_config != NULL) {
 					(void) fprintf(stderr, gettext(
 					    "cannot import '%s': more than "
 					    "one matching pool\n"), searchname);
 					(void) fprintf(stderr, gettext(
 					    "import by numeric ID instead\n"));
 					err = B_TRUE;
 				}
 				found_config = config;
 			}
 		} else {
 			uint64_t guid;
 
 			/*
 			 * Search for a pool by guid.
 			 */
 			verify(nvlist_lookup_uint64(config,
 			    ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
 
 			if (guid == searchguid)
 				found_config = config;
 		}
 	}
 
 	/*
 	 * If we were searching for a specific pool, verify that we found a
 	 * pool, and then do the import.
 	 */
 	if (argc != 0 && err == 0) {
 		if (found_config == NULL) {
 			(void) fprintf(stderr, gettext("cannot import '%s': "
 			    "no such pool available\n"), argv[0]);
 			err = B_TRUE;
 		} else {
 			err |= do_import(found_config, argc == 1 ? NULL :
 			    argv[1], mntopts, props, flags);
 		}
 	}
 
 	/*
 	 * If we were just looking for pools, report an error if none were
 	 * found.
 	 */
 	if (argc == 0 && first)
 		(void) fprintf(stderr,
 		    gettext("no pools available to import\n"));
 
 error:
 	nvlist_free(props);
 	nvlist_free(pools);
 	nvlist_free(policy);
 	if (searchdirs != NULL)
 		free(searchdirs);
 	if (envdup != NULL)
 		free(envdup);
 
 	return (err ? 1 : 0);
 }
 
 typedef struct iostat_cbdata {
 	boolean_t cb_verbose;
 	int cb_namewidth;
 	int cb_iteration;
 	zpool_list_t *cb_list;
 } iostat_cbdata_t;
 
 static void
 print_iostat_separator(iostat_cbdata_t *cb)
 {
 	int i = 0;
 
 	for (i = 0; i < cb->cb_namewidth; i++)
 		(void) printf("-");
 	(void) printf("  -----  -----  -----  -----  -----  -----\n");
 }
 
 static void
 print_iostat_header(iostat_cbdata_t *cb)
 {
 	(void) printf("%*s     capacity     operations    bandwidth\n",
 	    cb->cb_namewidth, "");
 	(void) printf("%-*s  alloc   free   read  write   read  write\n",
 	    cb->cb_namewidth, "pool");
 	print_iostat_separator(cb);
 }
 
 /*
  * Display a single statistic.
  */
 static void
 print_one_stat(uint64_t value)
 {
 	char buf[64];
 
 	zfs_nicenum(value, buf, sizeof (buf));
 	(void) printf("  %5s", buf);
 }
 
 /*
  * Print out all the statistics for the given vdev.  This can either be the
  * toplevel configuration, or called recursively.  If 'name' is NULL, then this
  * is a verbose output, and we don't want to display the toplevel pool stats.
  */
 void
 print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
     nvlist_t *newnv, iostat_cbdata_t *cb, int depth)
 {
 	nvlist_t **oldchild, **newchild;
 	uint_t c, children;
 	vdev_stat_t *oldvs, *newvs;
 	vdev_stat_t zerovs = { 0 };
 	uint64_t tdelta;
 	double scale;
 	char *vname;
 
 	if (oldnv != NULL) {
 		verify(nvlist_lookup_uint64_array(oldnv,
 		    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&oldvs, &c) == 0);
 	} else {
 		oldvs = &zerovs;
 	}
 
 	verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&newvs, &c) == 0);
 
 	if (strlen(name) + depth > cb->cb_namewidth)
 		(void) printf("%*s%s", depth, "", name);
 	else
 		(void) printf("%*s%s%*s", depth, "", name,
 		    (int)(cb->cb_namewidth - strlen(name) - depth), "");
 
 	tdelta = newvs->vs_timestamp - oldvs->vs_timestamp;
 
 	if (tdelta == 0)
 		scale = 1.0;
 	else
 		scale = (double)NANOSEC / tdelta;
 
 	/* only toplevel vdevs have capacity stats */
 	if (newvs->vs_space == 0) {
 		(void) printf("      -      -");
 	} else {
 		print_one_stat(newvs->vs_alloc);
 		print_one_stat(newvs->vs_space - newvs->vs_alloc);
 	}
 
 	print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_READ] -
 	    oldvs->vs_ops[ZIO_TYPE_READ])));
 
 	print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_WRITE] -
 	    oldvs->vs_ops[ZIO_TYPE_WRITE])));
 
 	print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_READ] -
 	    oldvs->vs_bytes[ZIO_TYPE_READ])));
 
 	print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_WRITE] -
 	    oldvs->vs_bytes[ZIO_TYPE_WRITE])));
 
 	(void) printf("\n");
 
 	if (!cb->cb_verbose)
 		return;
 
 	if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN,
 	    &newchild, &children) != 0)
 		return;
 
 	if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN,
 	    &oldchild, &c) != 0)
 		return;
 
 	for (c = 0; c < children; c++) {
 		uint64_t ishole = B_FALSE, islog = B_FALSE;
 
 		(void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_HOLE,
 		    &ishole);
 
 		(void) nvlist_lookup_uint64(newchild[c], ZPOOL_CONFIG_IS_LOG,
 		    &islog);
 
 		if (ishole || islog)
 			continue;
 
 		vname = zpool_vdev_name(g_zfs, zhp, newchild[c], B_FALSE);
 		print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
 		    newchild[c], cb, depth + 2);
 		free(vname);
 	}
 
 	/*
 	 * Log device section
 	 */
 
 	if (num_logs(newnv) > 0) {
 		(void) printf("%-*s      -      -      -      -      -      "
 		    "-\n", cb->cb_namewidth, "logs");
 
 		for (c = 0; c < children; c++) {
 			uint64_t islog = B_FALSE;
 			(void) nvlist_lookup_uint64(newchild[c],
 			    ZPOOL_CONFIG_IS_LOG, &islog);
 
 			if (islog) {
 				vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
 				    B_FALSE);
 				print_vdev_stats(zhp, vname, oldnv ?
 				    oldchild[c] : NULL, newchild[c],
 				    cb, depth + 2);
 				free(vname);
 			}
 		}
 
 	}
 
 	/*
 	 * Include level 2 ARC devices in iostat output
 	 */
 	if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE,
 	    &newchild, &children) != 0)
 		return;
 
 	if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_L2CACHE,
 	    &oldchild, &c) != 0)
 		return;
 
 	if (children > 0) {
 		(void) printf("%-*s      -      -      -      -      -      "
 		    "-\n", cb->cb_namewidth, "cache");
 		for (c = 0; c < children; c++) {
 			vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
 			    B_FALSE);
 			print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
 			    newchild[c], cb, depth + 2);
 			free(vname);
 		}
 	}
 }
 
 static int
 refresh_iostat(zpool_handle_t *zhp, void *data)
 {
 	iostat_cbdata_t *cb = data;
 	boolean_t missing;
 
 	/*
 	 * If the pool has disappeared, remove it from the list and continue.
 	 */
 	if (zpool_refresh_stats(zhp, &missing) != 0)
 		return (-1);
 
 	if (missing)
 		pool_list_remove(cb->cb_list, zhp);
 
 	return (0);
 }
 
 /*
  * Callback to print out the iostats for the given pool.
  */
 int
 print_iostat(zpool_handle_t *zhp, void *data)
 {
 	iostat_cbdata_t *cb = data;
 	nvlist_t *oldconfig, *newconfig;
 	nvlist_t *oldnvroot, *newnvroot;
 
 	newconfig = zpool_get_config(zhp, &oldconfig);
 
 	if (cb->cb_iteration == 1)
 		oldconfig = NULL;
 
 	verify(nvlist_lookup_nvlist(newconfig, ZPOOL_CONFIG_VDEV_TREE,
 	    &newnvroot) == 0);
 
 	if (oldconfig == NULL)
 		oldnvroot = NULL;
 	else
 		verify(nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE,
 		    &oldnvroot) == 0);
 
 	/*
 	 * Print out the statistics for the pool.
 	 */
 	print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0);
 
 	if (cb->cb_verbose)
 		print_iostat_separator(cb);
 
 	return (0);
 }
 
 static int
 get_columns(void)
 {
 	struct winsize ws;
 	int columns = 80;
 	int error;
 
 	if (isatty(STDOUT_FILENO)) {
 		error = ioctl(STDOUT_FILENO, TIOCGWINSZ, &ws);
 		if (error == 0)
 			columns = ws.ws_col;
 	} else {
 		columns = 999;
 	}
 
 	return columns;
 }
 
 int
 get_namewidth(zpool_handle_t *zhp, void *data)
 {
 	iostat_cbdata_t *cb = data;
 	nvlist_t *config, *nvroot;
 	int columns;
 
 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &nvroot) == 0);
 		if (!cb->cb_verbose)
 			cb->cb_namewidth = strlen(zpool_get_name(zhp));
 		else
 			cb->cb_namewidth = max_width(zhp, nvroot, 0,
 			    cb->cb_namewidth);
 	}
 
 	/*
 	 * The width must be at least 10, but may be as large as the
 	 * column width - 42 so that we can still fit in one line.
 	 */
 	columns = get_columns();
 
 	if (cb->cb_namewidth < 10)
 		cb->cb_namewidth = 10;
 	if (cb->cb_namewidth > columns - 42)
 		cb->cb_namewidth = columns - 42;
 
 	return (0);
 }
 
 /*
  * Parse the input string, get the 'interval' and 'count' value if there is one.
  */
 static void
 get_interval_count(int *argcp, char **argv, unsigned long *iv,
     unsigned long *cnt)
 {
 	unsigned long interval = 0, count = 0;
 	int argc = *argcp;
 
 	/*
 	 * Determine if the last argument is an integer or a pool name
 	 */
 	if (argc > 0 && isdigit(argv[argc - 1][0])) {
 		char *end;
 
 		errno = 0;
 		interval = strtoul(argv[argc - 1], &end, 10);
 
 		if (*end == '\0' && errno == 0) {
 			if (interval == 0) {
 				(void) fprintf(stderr, gettext("interval "
 				    "cannot be zero\n"));
 				usage(B_FALSE);
 			}
 			/*
 			 * Ignore the last parameter
 			 */
 			argc--;
 		} else {
 			/*
 			 * If this is not a valid number, just plow on.  The
 			 * user will get a more informative error message later
 			 * on.
 			 */
 			interval = 0;
 		}
 	}
 
 	/*
 	 * If the last argument is also an integer, then we have both a count
 	 * and an interval.
 	 */
 	if (argc > 0 && isdigit(argv[argc - 1][0])) {
 		char *end;
 
 		errno = 0;
 		count = interval;
 		interval = strtoul(argv[argc - 1], &end, 10);
 
 		if (*end == '\0' && errno == 0) {
 			if (interval == 0) {
 				(void) fprintf(stderr, gettext("interval "
 				    "cannot be zero\n"));
 				usage(B_FALSE);
 			}
 
 			/*
 			 * Ignore the last parameter
 			 */
 			argc--;
 		} else {
 			interval = 0;
 		}
 	}
 
 	*iv = interval;
 	*cnt = count;
 	*argcp = argc;
 }
 
 static void
 get_timestamp_arg(char c)
 {
 	if (c == 'u')
 		timestamp_fmt = UDATE;
 	else if (c == 'd')
 		timestamp_fmt = DDATE;
 	else
 		usage(B_FALSE);
 }
 
 /*
  * zpool iostat [-v] [-T d|u] [pool] ... [interval [count]]
  *
  *	-v	Display statistics for individual vdevs
  *	-T	Display a timestamp in date(1) or Unix format
  *
  * This command can be tricky because we want to be able to deal with pool
  * creation/destruction as well as vdev configuration changes.  The bulk of this
  * processing is handled by the pool_list_* routines in zpool_iter.c.  We rely
  * on pool_list_update() to detect the addition of new pools.  Configuration
  * changes are all handled within libzfs.
  */
 int
 zpool_do_iostat(int argc, char **argv)
 {
 	int c;
 	int ret;
 	int npools;
 	unsigned long interval = 0, count = 0;
 	zpool_list_t *list;
 	boolean_t verbose = B_FALSE;
 	iostat_cbdata_t cb;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "T:v")) != -1) {
 		switch (c) {
 		case 'T':
 			get_timestamp_arg(*optarg);
 			break;
 		case 'v':
 			verbose = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	get_interval_count(&argc, argv, &interval, &count);
 
 	/*
 	 * Construct the list of all interesting pools.
 	 */
 	ret = 0;
 	if ((list = pool_list_get(argc, argv, NULL, &ret)) == NULL)
 		return (1);
 
 	if (pool_list_count(list) == 0 && argc != 0) {
 		pool_list_free(list);
 		return (1);
 	}
 
 	if (pool_list_count(list) == 0 && interval == 0) {
 		pool_list_free(list);
 		(void) fprintf(stderr, gettext("no pools available\n"));
 		return (1);
 	}
 
 	/*
 	 * Enter the main iostat loop.
 	 */
 	cb.cb_list = list;
 	cb.cb_verbose = verbose;
 	cb.cb_iteration = 0;
 	cb.cb_namewidth = 0;
 
 	for (;;) {
 		pool_list_update(list);
 
 		if ((npools = pool_list_count(list)) == 0)
 			(void) fprintf(stderr, gettext("no pools available\n"));
 		else {
 			/*
 			 * Refresh all statistics.  This is done as an
 			 * explicit step before calculating the maximum name
 			 * width, so that any * configuration changes are
 			 * properly accounted for.
 			 */
 			(void) pool_list_iter(list, B_FALSE, refresh_iostat,
 				&cb);
 
 			/*
 			 * Iterate over all pools to determine the maximum width
 			 * for the pool / device name column across all pools.
 			 */
 			cb.cb_namewidth = 0;
 			(void) pool_list_iter(list, B_FALSE, get_namewidth,
 				&cb);
 
 			if (timestamp_fmt != NODATE)
 				print_timestamp(timestamp_fmt);
 
 			/*
 			 * If it's the first time, or verbose mode, print the
 			 * header.
 			 */
 			if (++cb.cb_iteration == 1 || verbose)
 				print_iostat_header(&cb);
 
 			(void) pool_list_iter(list, B_FALSE, print_iostat, &cb);
 
 			/*
 			 * If there's more than one pool, and we're not in
 			 * verbose mode (which prints a separator for us),
 			 * then print a separator.
 			 */
 			if (npools > 1 && !verbose)
 				print_iostat_separator(&cb);
 
 			if (verbose)
 				(void) printf("\n");
 		}
 
 		/*
 		 * Flush the output so that redirection to a file isn't buffered
 		 * indefinitely.
 		 */
 		(void) fflush(stdout);
 
 		if (interval == 0)
 			break;
 
 		if (count != 0 && --count == 0)
 			break;
 
 		(void) sleep(interval);
 	}
 
 	pool_list_free(list);
 
 	return (ret);
 }
 
 typedef struct list_cbdata {
 	boolean_t	cb_verbose;
 	int		cb_namewidth;
 	boolean_t	cb_scripted;
 	zprop_list_t	*cb_proplist;
 } list_cbdata_t;
 
 /*
  * Given a list of columns to display, output appropriate headers for each one.
  */
 static void
 print_header(list_cbdata_t *cb)
 {
 	zprop_list_t *pl = cb->cb_proplist;
 	char headerbuf[ZPOOL_MAXPROPLEN];
 	const char *header;
 	boolean_t first = B_TRUE;
 	boolean_t right_justify;
 	size_t width = 0;
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		width = pl->pl_width;
 		if (first && cb->cb_verbose) {
 			/*
 			 * Reset the width to accommodate the verbose listing
 			 * of devices.
 			 */
 			width = cb->cb_namewidth;
 		}
 
 		if (!first)
 			(void) printf("  ");
 		else
 			first = B_FALSE;
 
 		right_justify = B_FALSE;
 		if (pl->pl_prop != ZPROP_INVAL) {
 			header = zpool_prop_column_name(pl->pl_prop);
 			right_justify = zpool_prop_align_right(pl->pl_prop);
 		} else {
 			int i;
 
 			for (i = 0; pl->pl_user_prop[i] != '\0'; i++)
 				headerbuf[i] = toupper(pl->pl_user_prop[i]);
 			headerbuf[i] = '\0';
 			header = headerbuf;
 		}
 
 		if (pl->pl_next == NULL && !right_justify)
 			(void) printf("%s", header);
 		else if (right_justify)
 			(void) printf("%*s", (int)width, header);
 		else
 			(void) printf("%-*s", (int)width, header);
 	}
 
 	(void) printf("\n");
 }
 
 /*
  * Given a pool and a list of properties, print out all the properties according
  * to the described layout.
  */
 static void
 print_pool(zpool_handle_t *zhp, list_cbdata_t *cb)
 {
 	zprop_list_t *pl = cb->cb_proplist;
 	boolean_t first = B_TRUE;
 	char property[ZPOOL_MAXPROPLEN];
 	char *propstr;
 	boolean_t right_justify;
 	size_t width;
 
 	for (; pl != NULL; pl = pl->pl_next) {
 
 		width = pl->pl_width;
 		if (first && cb->cb_verbose) {
 			/*
 			 * Reset the width to accommodate the verbose listing
 			 * of devices.
 			 */
 			width = cb->cb_namewidth;
 		}
 
 		if (!first) {
 			if (cb->cb_scripted)
 				(void) printf("\t");
 			else
 				(void) printf("  ");
 		} else {
 			first = B_FALSE;
 		}
 
 		right_justify = B_FALSE;
 		if (pl->pl_prop != ZPROP_INVAL) {
 			if (pl->pl_prop == ZPOOL_PROP_EXPANDSZ &&
 			    zpool_get_prop_int(zhp, pl->pl_prop, NULL) == 0)
 				propstr = "-";
 			else if (zpool_get_prop(zhp, pl->pl_prop, property,
 			    sizeof (property), NULL) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 
 			right_justify = zpool_prop_align_right(pl->pl_prop);
 		} else if ((zpool_prop_feature(pl->pl_user_prop) ||
 		    zpool_prop_unsupported(pl->pl_user_prop)) &&
 		    zpool_prop_get_feature(zhp, pl->pl_user_prop, property,
 		    sizeof (property)) == 0) {
 			propstr = property;
 		} else {
 			propstr = "-";
 		}
 
 
 		/*
 		 * If this is being called in scripted mode, or if this is the
 		 * last column and it is left-justified, don't include a width
 		 * format specifier.
 		 */
 		if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify))
 			(void) printf("%s", propstr);
 		else if (right_justify)
 			(void) printf("%*s", (int)width, propstr);
 		else
 			(void) printf("%-*s", (int)width, propstr);
 	}
 
 	(void) printf("\n");
 }
 
 static void
 print_one_column(zpool_prop_t prop, uint64_t value, boolean_t scripted)
 {
 	char propval[64];
 	boolean_t fixed;
 	size_t width = zprop_width(prop, &fixed, ZFS_TYPE_POOL);
 
 	zfs_nicenum(value, propval, sizeof (propval));
 
 	if (prop == ZPOOL_PROP_EXPANDSZ && value == 0)
 		(void) strlcpy(propval, "-", sizeof (propval));
 
 	if (scripted)
 		(void) printf("\t%s", propval);
 	else
 		(void) printf("  %*s", (int)width, propval);
 }
 
 void
 print_list_stats(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
     list_cbdata_t *cb, int depth)
 {
 	nvlist_t **child;
 	vdev_stat_t *vs;
 	uint_t c, children;
 	char *vname;
 	boolean_t scripted = cb->cb_scripted;
 
 	verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 
 	if (name != NULL) {
 		if (scripted)
 			(void) printf("\t%s", name);
 		else if (strlen(name) + depth > cb->cb_namewidth)
 			(void) printf("%*s%s", depth, "", name);
 		else
 			(void) printf("%*s%s%*s", depth, "", name,
 			    (int)(cb->cb_namewidth - strlen(name) - depth), "");
 
 		/* only toplevel vdevs have capacity stats */
 		if (vs->vs_space == 0) {
 			if (scripted)
 				(void) printf("\t-\t-\t-");
 			else
 				(void) printf("      -      -      -");
 		} else {
 			print_one_column(ZPOOL_PROP_SIZE, vs->vs_space,
 			    scripted);
 			print_one_column(ZPOOL_PROP_CAPACITY, vs->vs_alloc,
 			    scripted);
 			print_one_column(ZPOOL_PROP_FREE,
 			    vs->vs_space - vs->vs_alloc, scripted);
 		}
 		print_one_column(ZPOOL_PROP_EXPANDSZ, vs->vs_esize,
 		    scripted);
 		(void) printf("\n");
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		return;
 
 	for (c = 0; c < children; c++) {
 		uint64_t ishole = B_FALSE;
 
 		if (nvlist_lookup_uint64(child[c],
 		    ZPOOL_CONFIG_IS_HOLE, &ishole) == 0 && ishole)
 			continue;
 
 		vname = zpool_vdev_name(g_zfs, zhp, child[c], B_FALSE);
 		print_list_stats(zhp, vname, child[c], cb, depth + 2);
 		free(vname);
 	}
 
 	/*
 	 * Include level 2 ARC devices in iostat output
 	 */
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) != 0)
 		return;
 
 	if (children > 0) {
 		(void) printf("%-*s      -      -      -      -      -      "
 		    "-\n", cb->cb_namewidth, "cache");
 		for (c = 0; c < children; c++) {
 			vname = zpool_vdev_name(g_zfs, zhp, child[c],
 			    B_FALSE);
 			print_list_stats(zhp, vname, child[c], cb, depth + 2);
 			free(vname);
 		}
 	}
 }
 
 
 /*
  * Generic callback function to list a pool.
  */
 int
 list_callback(zpool_handle_t *zhp, void *data)
 {
 	list_cbdata_t *cbp = data;
 	nvlist_t *config;
 	nvlist_t *nvroot;
 
 	config = zpool_get_config(zhp, NULL);
 
 	print_pool(zhp, cbp);
 	if (!cbp->cb_verbose)
 		return (0);
 
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 	print_list_stats(zhp, NULL, nvroot, cbp, 0);
 
 	return (0);
 }
 
 /*
  * zpool list [-H] [-o prop[,prop]*] [-T d|u] [pool] ... [interval [count]]
  *
  *	-H	Scripted mode.  Don't display headers, and separate properties
  *		by a single tab.
  *	-o	List of properties to display.  Defaults to
  *		"name,size,allocated,free,capacity,health,altroot"
  *	-T	Display a timestamp in date(1) or Unix format
  *
  * List all pools in the system, whether or not they're healthy.  Output space
  * statistics for each one, as well as health status summary.
  */
 int
 zpool_do_list(int argc, char **argv)
 {
 	int c;
 	int ret = 0;
 	list_cbdata_t cb = { 0 };
 	static char default_props[] =
 	    "name,size,allocated,free,capacity,dedupratio,"
 	    "health,altroot";
 	char *props = default_props;
 	unsigned long interval = 0, count = 0;
 	zpool_list_t *list;
 	boolean_t first = B_TRUE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":Ho:T:v")) != -1) {
 		switch (c) {
 		case 'H':
 			cb.cb_scripted = B_TRUE;
 			break;
 		case 'o':
 			props = optarg;
 			break;
 		case 'T':
 			get_timestamp_arg(*optarg);
 			break;
 		case 'v':
 			cb.cb_verbose = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	get_interval_count(&argc, argv, &interval, &count);
 
 	if (zprop_get_list(g_zfs, props, &cb.cb_proplist, ZFS_TYPE_POOL) != 0)
 		usage(B_FALSE);
 
 	if ((list = pool_list_get(argc, argv, &cb.cb_proplist, &ret)) == NULL)
 		return (1);
 
 	if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) {
 		(void) printf(gettext("no pools available\n"));
 		zprop_free_list(cb.cb_proplist);
 		return (0);
 	}
 
 	for (;;) {
 		pool_list_update(list);
 
 		if (pool_list_count(list) == 0)
 			break;
 
 		if (timestamp_fmt != NODATE)
 			print_timestamp(timestamp_fmt);
 
 		if (!cb.cb_scripted && (first || cb.cb_verbose)) {
 			print_header(&cb);
 			first = B_FALSE;
 		}
 		ret = pool_list_iter(list, B_TRUE, list_callback, &cb);
 
 		if (interval == 0)
 			break;
 
 		if (count != 0 && --count == 0)
 			break;
 
 		(void) sleep(interval);
 	}
 
 	zprop_free_list(cb.cb_proplist);
 	return (ret);
 }
 
 static int
 zpool_do_attach_or_replace(int argc, char **argv, int replacing)
 {
 	boolean_t force = B_FALSE;
 	int c;
 	nvlist_t *nvroot;
 	char *poolname, *old_disk, *new_disk;
 	zpool_handle_t *zhp;
 	nvlist_t *props = NULL;
 	char *propval;
 	int ret;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "fo:")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
 			break;
 		case 'o':
 			if ((propval = strchr(optarg, '=')) == NULL) {
 				(void) fprintf(stderr, gettext("missing "
 				    "'=' for -o option\n"));
 				usage(B_FALSE);
 			}
 			*propval = '\0';
 			propval++;
 
 			if ((strcmp(optarg, ZPOOL_CONFIG_ASHIFT) != 0) ||
 			    (add_prop_list(optarg, propval, &props, B_TRUE)))
 				usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
 		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 
 	if (argc < 2) {
 		(void) fprintf(stderr,
 		    gettext("missing <device> specification\n"));
 		usage(B_FALSE);
 	}
 
 	old_disk = argv[1];
 
 	if (argc < 3) {
 		if (!replacing) {
 			(void) fprintf(stderr,
 			    gettext("missing <new_device> specification\n"));
 			usage(B_FALSE);
 		}
 		new_disk = old_disk;
 		argc -= 1;
 		argv += 1;
 	} else {
 		new_disk = argv[2];
 		argc -= 2;
 		argv += 2;
 	}
 
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	if (zpool_get_config(zhp, NULL) == NULL) {
 		(void) fprintf(stderr, gettext("pool '%s' is unavailable\n"),
 		    poolname);
 		zpool_close(zhp);
 		return (1);
 	}
 
 	nvroot = make_root_vdev(zhp, props, force, B_FALSE, replacing, B_FALSE,
 	    argc, argv);
 	if (nvroot == NULL) {
 		zpool_close(zhp);
 		return (1);
 	}
 
 	ret = zpool_vdev_attach(zhp, old_disk, new_disk, nvroot, replacing);
 
 	nvlist_free(nvroot);
 	zpool_close(zhp);
 
 	return (ret);
 }
 
 /*
  * zpool replace [-f] <pool> <device> <new_device>
  *
  *	-f	Force attach, even if <new_device> appears to be in use.
  *
  * Replace <device> with <new_device>.
  */
 /* ARGSUSED */
 int
 zpool_do_replace(int argc, char **argv)
 {
 	return (zpool_do_attach_or_replace(argc, argv, B_TRUE));
 }
 
 /*
  * zpool attach [-f] [-o property=value] <pool> <device> <new_device>
  *
  *	-f	Force attach, even if <new_device> appears to be in use.
  *	-o	Set property=value.
  *
  * Attach <new_device> to the mirror containing <device>.  If <device> is not
  * part of a mirror, then <device> will be transformed into a mirror of
  * <device> and <new_device>.  In either case, <new_device> will begin life
  * with a DTL of [0, now], and will immediately begin to resilver itself.
  */
 int
 zpool_do_attach(int argc, char **argv)
 {
 	return (zpool_do_attach_or_replace(argc, argv, B_FALSE));
 }
 
 /*
  * zpool detach [-f] <pool> <device>
  *
  *	-f	Force detach of <device>, even if DTLs argue against it
  *		(not supported yet)
  *
  * Detach a device from a mirror.  The operation will be refused if <device>
  * is the last device in the mirror, or if the DTLs indicate that this device
  * has the only valid copy of some data.
  */
 /* ARGSUSED */
 int
 zpool_do_detach(int argc, char **argv)
 {
 	int c;
 	char *poolname, *path;
 	zpool_handle_t *zhp;
 	int ret;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "f")) != -1) {
 		switch (c) {
 		case 'f':
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc < 2) {
 		(void) fprintf(stderr,
 		    gettext("missing <device> specification\n"));
 		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 	path = argv[1];
 
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	ret = zpool_vdev_detach(zhp, path);
 
 	zpool_close(zhp);
 
 	return (ret);
 }
 
 /*
  * zpool split [-n] [-o prop=val] ...
  *		[-o mntopt] ...
  *		[-R altroot] <pool> <newpool> [<device> ...]
  *
  *	-n	Do not split the pool, but display the resulting layout if
  *		it were to be split.
  *	-o	Set property=value, or set mount options.
  *	-R	Mount the split-off pool under an alternate root.
  *
  * Splits the named pool and gives it the new pool name.  Devices to be split
  * off may be listed, provided that no more than one device is specified
  * per top-level vdev mirror.  The newly split pool is left in an exported
  * state unless -R is specified.
  *
  * Restrictions: the top-level of the pool pool must only be made up of
  * mirrors; all devices in the pool must be healthy; no device may be
  * undergoing a resilvering operation.
  */
 int
 zpool_do_split(int argc, char **argv)
 {
 	char *srcpool, *newpool, *propval;
 	char *mntopts = NULL;
 	splitflags_t flags;
 	int c, ret = 0;
 	zpool_handle_t *zhp;
 	nvlist_t *config, *props = NULL;
 
 	flags.dryrun = B_FALSE;
 	flags.import = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":R:no:")) != -1) {
 		switch (c) {
 		case 'R':
 			flags.import = B_TRUE;
 			if (add_prop_list(
 			    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), optarg,
 			    &props, B_TRUE) != 0) {
 				if (props)
 					nvlist_free(props);
 				usage(B_FALSE);
 			}
 			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
 			break;
 		case 'o':
 			if ((propval = strchr(optarg, '=')) != NULL) {
 				*propval = '\0';
 				propval++;
 				if (add_prop_list(optarg, propval,
 				    &props, B_TRUE) != 0) {
 					if (props)
 						nvlist_free(props);
 					usage(B_FALSE);
 				}
 			} else {
 				mntopts = optarg;
 			}
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 			break;
 		}
 	}
 
 	if (!flags.import && mntopts != NULL) {
 		(void) fprintf(stderr, gettext("setting mntopts is only "
 		    "valid when importing the pool\n"));
 		usage(B_FALSE);
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("Missing pool name\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("Missing new pool name\n"));
 		usage(B_FALSE);
 	}
 
 	srcpool = argv[0];
 	newpool = argv[1];
 
 	argc -= 2;
 	argv += 2;
 
 	if ((zhp = zpool_open(g_zfs, srcpool)) == NULL)
 		return (1);
 
 	config = split_mirror_vdev(zhp, newpool, props, flags, argc, argv);
 	if (config == NULL) {
 		ret = 1;
 	} else {
 		if (flags.dryrun) {
 			(void) printf(gettext("would create '%s' with the "
 			    "following layout:\n\n"), newpool);
 			print_vdev_tree(NULL, newpool, config, 0, B_FALSE);
 		}
 		nvlist_free(config);
 	}
 
 	zpool_close(zhp);
 
 	if (ret != 0 || flags.dryrun || !flags.import)
 		return (ret);
 
 	/*
 	 * The split was successful. Now we need to open the new
 	 * pool and import it.
 	 */
 	if ((zhp = zpool_open_canfail(g_zfs, newpool)) == NULL)
 		return (1);
 	if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL &&
 	    zpool_enable_datasets(zhp, mntopts, 0) != 0) {
 		ret = 1;
 		(void) fprintf(stderr, gettext("Split was successful, but "
 		    "the datasets could not all be mounted\n"));
 		(void) fprintf(stderr, gettext("Try doing '%s' with a "
 		    "different altroot\n"), "zpool import");
 	}
 	zpool_close(zhp);
 
 	return (ret);
 }
 
 
 
 /*
  * zpool online <pool> <device> ...
  */
 int
 zpool_do_online(int argc, char **argv)
 {
 	int c, i;
 	char *poolname;
 	zpool_handle_t *zhp;
 	int ret = 0;
 	vdev_state_t newstate;
 	int flags = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "et")) != -1) {
 		switch (c) {
 		case 'e':
 			flags |= ZFS_ONLINE_EXPAND;
 			break;
 		case 't':
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing device name\n"));
 		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	for (i = 1; i < argc; i++) {
 		if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
 			if (newstate != VDEV_STATE_HEALTHY) {
 				(void) printf(gettext("warning: device '%s' "
 				    "onlined, but remains in faulted state\n"),
 				    argv[i]);
 				if (newstate == VDEV_STATE_FAULTED)
 					(void) printf(gettext("use 'zpool "
 					    "clear' to restore a faulted "
 					    "device\n"));
 				else
 					(void) printf(gettext("use 'zpool "
 					    "replace' to replace devices "
 					    "that are no longer present\n"));
 			}
 		} else {
 			ret = 1;
 		}
 	}
 
 	zpool_close(zhp);
 
 	return (ret);
 }
 
 /*
  * zpool offline [-ft] <pool> <device> ...
  *
  *	-f	Force the device into the offline state, even if doing
  *		so would appear to compromise pool availability.
  *		(not supported yet)
  *
  *	-t	Only take the device off-line temporarily.  The offline
  *		state will not be persistent across reboots.
  */
 /* ARGSUSED */
 int
 zpool_do_offline(int argc, char **argv)
 {
 	int c, i;
 	char *poolname;
 	zpool_handle_t *zhp;
 	int ret = 0;
 	boolean_t istmp = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "ft")) != -1) {
 		switch (c) {
 		case 't':
 			istmp = B_TRUE;
 			break;
 		case 'f':
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing device name\n"));
 		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	for (i = 1; i < argc; i++) {
 		if (zpool_vdev_offline(zhp, argv[i], istmp) != 0)
 			ret = 1;
 	}
 
 	zpool_close(zhp);
 
 	return (ret);
 }
 
 /*
  * zpool clear <pool> [device]
  *
  * Clear all errors associated with a pool or a particular device.
  */
 int
 zpool_do_clear(int argc, char **argv)
 {
 	int c;
 	int ret = 0;
 	boolean_t dryrun = B_FALSE;
 	boolean_t do_rewind = B_FALSE;
 	boolean_t xtreme_rewind = B_FALSE;
 	uint32_t rewind_policy = ZPOOL_NO_REWIND;
 	nvlist_t *policy = NULL;
 	zpool_handle_t *zhp;
 	char *pool, *device;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "FnX")) != -1) {
 		switch (c) {
 		case 'F':
 			do_rewind = B_TRUE;
 			break;
 		case 'n':
 			dryrun = B_TRUE;
 			break;
 		case 'X':
 			xtreme_rewind = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if ((dryrun || xtreme_rewind) && !do_rewind) {
 		(void) fprintf(stderr,
 		    gettext("-n or -X only meaningful with -F\n"));
 		usage(B_FALSE);
 	}
 	if (dryrun)
 		rewind_policy = ZPOOL_TRY_REWIND;
 	else if (do_rewind)
 		rewind_policy = ZPOOL_DO_REWIND;
 	if (xtreme_rewind)
 		rewind_policy |= ZPOOL_EXTREME_REWIND;
 
 	/* In future, further rewind policy choices can be passed along here */
 	if (nvlist_alloc(&policy, NV_UNIQUE_NAME, 0) != 0 ||
 	    nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind_policy) != 0)
 		return (1);
 
 	pool = argv[0];
 	device = argc == 2 ? argv[1] : NULL;
 
 	if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL) {
 		nvlist_free(policy);
 		return (1);
 	}
 
 	if (zpool_clear(zhp, device, policy) != 0)
 		ret = 1;
 
 	zpool_close(zhp);
 
 	nvlist_free(policy);
 
 	return (ret);
 }
 
 /*
  * zpool reguid <pool>
  */
 int
 zpool_do_reguid(int argc, char **argv)
 {
 	int c;
 	char *poolname;
 	zpool_handle_t *zhp;
 	int ret = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "")) != -1) {
 		switch (c) {
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* get pool name and check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	poolname = argv[0];
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL)
 		return (1);
 
 	ret = zpool_reguid(zhp);
 
 	zpool_close(zhp);
 	return (ret);
 }
 
 
 /*
  * zpool reopen <pool>
  *
  * Reopen the pool so that the kernel can update the sizes of all vdevs.
  */
 int
 zpool_do_reopen(int argc, char **argv)
 {
 	int c;
 	int ret = 0;
 	zpool_handle_t *zhp;
 	char *pool;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "")) != -1) {
 		switch (c) {
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc--;
 	argv++;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	pool = argv[0];
 	if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
 		return (1);
 
 	ret = zpool_reopen(zhp);
 	zpool_close(zhp);
 	return (ret);
 }
 
 typedef struct scrub_cbdata {
 	int	cb_type;
 	int	cb_argc;
 	char	**cb_argv;
 } scrub_cbdata_t;
 
 int
 scrub_callback(zpool_handle_t *zhp, void *data)
 {
 	scrub_cbdata_t *cb = data;
 	int err;
 
 	/*
 	 * Ignore faulted pools.
 	 */
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
 		(void) fprintf(stderr, gettext("cannot scrub '%s': pool is "
 		    "currently unavailable\n"), zpool_get_name(zhp));
 		return (1);
 	}
 
 	err = zpool_scan(zhp, cb->cb_type);
 
 	return (err != 0);
 }
 
 /*
  * zpool scrub [-s] <pool> ...
  *
  *	-s	Stop.  Stops any in-progress scrub.
  */
 int
 zpool_do_scrub(int argc, char **argv)
 {
 	int c;
 	scrub_cbdata_t cb;
 
 	cb.cb_type = POOL_SCAN_SCRUB;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "s")) != -1) {
 		switch (c) {
 		case 's':
 			cb.cb_type = POOL_SCAN_NONE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	cb.cb_argc = argc;
 	cb.cb_argv = argv;
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing pool name argument\n"));
 		usage(B_FALSE);
 	}
 
 	return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
 }
 
 typedef struct status_cbdata {
 	int		cb_count;
 	boolean_t	cb_allpools;
 	boolean_t	cb_verbose;
 	boolean_t	cb_explain;
 	boolean_t	cb_first;
 	boolean_t	cb_dedup_stats;
 } status_cbdata_t;
 
 /*
  * Print out detailed scrub status.
  */
 void
 print_scan_status(pool_scan_stat_t *ps)
 {
 	time_t start, end;
 	uint64_t elapsed, mins_left, hours_left;
 	uint64_t pass_exam, examined, total;
 	uint_t rate;
 	double fraction_done;
 	char processed_buf[7], examined_buf[7], total_buf[7], rate_buf[7];
 
 	(void) printf(gettext("  scan: "));
 
 	/* If there's never been a scan, there's not much to say. */
 	if (ps == NULL || ps->pss_func == POOL_SCAN_NONE ||
 	    ps->pss_func >= POOL_SCAN_FUNCS) {
 		(void) printf(gettext("none requested\n"));
 		return;
 	}
 
 	start = ps->pss_start_time;
 	end = ps->pss_end_time;
 	zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf));
 
 	assert(ps->pss_func == POOL_SCAN_SCRUB ||
 	    ps->pss_func == POOL_SCAN_RESILVER);
 	/*
 	 * Scan is finished or canceled.
 	 */
 	if (ps->pss_state == DSS_FINISHED) {
 		uint64_t minutes_taken = (end - start) / 60;
 		char *fmt = NULL;
 
 		if (ps->pss_func == POOL_SCAN_SCRUB) {
 			fmt = gettext("scrub repaired %s in %lluh%um with "
 			    "%llu errors on %s");
 		} else if (ps->pss_func == POOL_SCAN_RESILVER) {
 			fmt = gettext("resilvered %s in %lluh%um with "
 			    "%llu errors on %s");
 		}
 		/* LINTED */
 		(void) printf(fmt, processed_buf,
 		    (u_longlong_t)(minutes_taken / 60),
 		    (uint_t)(minutes_taken % 60),
 		    (u_longlong_t)ps->pss_errors,
 		    ctime((time_t *)&end));
 		return;
 	} else if (ps->pss_state == DSS_CANCELED) {
 		if (ps->pss_func == POOL_SCAN_SCRUB) {
 			(void) printf(gettext("scrub canceled on %s"),
 			    ctime(&end));
 		} else if (ps->pss_func == POOL_SCAN_RESILVER) {
 			(void) printf(gettext("resilver canceled on %s"),
 			    ctime(&end));
 		}
 		return;
 	}
 
 	assert(ps->pss_state == DSS_SCANNING);
 
 	/*
 	 * Scan is in progress.
 	 */
 	if (ps->pss_func == POOL_SCAN_SCRUB) {
 		(void) printf(gettext("scrub in progress since %s"),
 		    ctime(&start));
 	} else if (ps->pss_func == POOL_SCAN_RESILVER) {
 		(void) printf(gettext("resilver in progress since %s"),
 		    ctime(&start));
 	}
 
 	examined = ps->pss_examined ? ps->pss_examined : 1;
 	total = ps->pss_to_examine;
 	fraction_done = (double)examined / total;
 
 	/* elapsed time for this pass */
 	elapsed = time(NULL) - ps->pss_pass_start;
 	elapsed = elapsed ? elapsed : 1;
 	pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
 	rate = pass_exam / elapsed;
 	rate = rate ? rate : 1;
 	mins_left = ((total - examined) / rate) / 60;
 	hours_left = mins_left / 60;
 
 	zfs_nicenum(examined, examined_buf, sizeof (examined_buf));
 	zfs_nicenum(total, total_buf, sizeof (total_buf));
 	zfs_nicenum(rate, rate_buf, sizeof (rate_buf));
 
 	/*
 	 * do not print estimated time if hours_left is more than 30 days
 	 */
 	(void) printf(gettext("    %s scanned out of %s at %s/s"),
 	    examined_buf, total_buf, rate_buf);
 	if (hours_left < (30 * 24)) {
 		(void) printf(gettext(", %lluh%um to go\n"),
 		    (u_longlong_t)hours_left, (uint_t)(mins_left % 60));
 	} else {
 		(void) printf(gettext(
 		    ", (scan is slow, no estimated time)\n"));
 	}
 
 	if (ps->pss_func == POOL_SCAN_RESILVER) {
 		(void) printf(gettext("    %s resilvered, %.2f%% done\n"),
 		    processed_buf, 100 * fraction_done);
 	} else if (ps->pss_func == POOL_SCAN_SCRUB) {
 		(void) printf(gettext("    %s repaired, %.2f%% done\n"),
 		    processed_buf, 100 * fraction_done);
 	}
 }
 
 static void
 print_error_log(zpool_handle_t *zhp)
 {
 	nvlist_t *nverrlist = NULL;
 	nvpair_t *elem;
 	char *pathname;
 	size_t len = MAXPATHLEN * 2;
 
 	if (zpool_get_errlog(zhp, &nverrlist) != 0) {
 		(void) printf("errors: List of errors unavailable "
 		    "(insufficient privileges)\n");
 		return;
 	}
 
 	(void) printf("errors: Permanent errors have been "
 	    "detected in the following files:\n\n");
 
 	pathname = safe_malloc(len);
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(nverrlist, elem)) != NULL) {
 		nvlist_t *nv;
 		uint64_t dsobj, obj;
 
 		verify(nvpair_value_nvlist(elem, &nv) == 0);
 		verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_DATASET,
 		    &dsobj) == 0);
 		verify(nvlist_lookup_uint64(nv, ZPOOL_ERR_OBJECT,
 		    &obj) == 0);
 		zpool_obj_to_path(zhp, dsobj, obj, pathname, len);
 		(void) printf("%7s %s\n", "", pathname);
 	}
 	free(pathname);
 	nvlist_free(nverrlist);
 }
 
 static void
 print_spares(zpool_handle_t *zhp, nvlist_t **spares, uint_t nspares,
     int namewidth)
 {
 	uint_t i;
 	char *name;
 
 	if (nspares == 0)
 		return;
 
 	(void) printf(gettext("\tspares\n"));
 
 	for (i = 0; i < nspares; i++) {
 		name = zpool_vdev_name(g_zfs, zhp, spares[i], B_FALSE);
 		print_status_config(zhp, name, spares[i],
 		    namewidth, 2, B_TRUE);
 		free(name);
 	}
 }
 
 static void
 print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache,
     int namewidth)
 {
 	uint_t i;
 	char *name;
 
 	if (nl2cache == 0)
 		return;
 
 	(void) printf(gettext("\tcache\n"));
 
 	for (i = 0; i < nl2cache; i++) {
 		name = zpool_vdev_name(g_zfs, zhp, l2cache[i], B_FALSE);
 		print_status_config(zhp, name, l2cache[i],
 		    namewidth, 2, B_FALSE);
 		free(name);
 	}
 }
 
 static void
 print_dedup_stats(nvlist_t *config)
 {
 	ddt_histogram_t *ddh;
 	ddt_stat_t *dds;
 	ddt_object_t *ddo;
 	uint_t c;
 
 	/*
 	 * If the pool was faulted then we may not have been able to
 	 * obtain the config. Otherwise, if we have anything in the dedup
 	 * table continue processing the stats.
 	 */
 	if (nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_OBJ_STATS,
 	    (uint64_t **)&ddo, &c) != 0)
 		return;
 
 	(void) printf("\n");
 	(void) printf(gettext(" dedup: "));
 	if (ddo->ddo_count == 0) {
 		(void) printf(gettext("no DDT entries\n"));
 		return;
 	}
 
 	(void) printf("DDT entries %llu, size %llu on disk, %llu in core\n",
 	    (u_longlong_t)ddo->ddo_count,
 	    (u_longlong_t)ddo->ddo_dspace,
 	    (u_longlong_t)ddo->ddo_mspace);
 
 	verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_STATS,
 	    (uint64_t **)&dds, &c) == 0);
 	verify(nvlist_lookup_uint64_array(config, ZPOOL_CONFIG_DDT_HISTOGRAM,
 	    (uint64_t **)&ddh, &c) == 0);
 	zpool_dump_ddt(dds, ddh);
 }
 
 /*
  * Display a summary of pool status.  Displays a summary such as:
  *
  *        pool: tank
  *	status: DEGRADED
  *	reason: One or more devices ...
  *         see: http://zfsonlinux.org/msg/ZFS-xxxx-01
  *	config:
  *		mirror		DEGRADED
  *                c1t0d0	OK
  *                c2t0d0	UNAVAIL
  *
  * When given the '-v' option, we print out the complete config.  If the '-e'
  * option is specified, then we print out error rate information as well.
  */
 int
 status_callback(zpool_handle_t *zhp, void *data)
 {
 	status_cbdata_t *cbp = data;
 	nvlist_t *config, *nvroot;
 	char *msgid;
 	int reason;
 	const char *health;
 	uint_t c;
 	vdev_stat_t *vs;
 
 	config = zpool_get_config(zhp, NULL);
 	reason = zpool_get_status(zhp, &msgid);
 
 	cbp->cb_count++;
 
 	/*
 	 * If we were given 'zpool status -x', only report those pools with
 	 * problems.
 	 */
 	if (cbp->cb_explain &&
 	    (reason == ZPOOL_STATUS_OK ||
 	    reason == ZPOOL_STATUS_VERSION_OLDER ||
 	    reason == ZPOOL_STATUS_FEAT_DISABLED)) {
 		if (!cbp->cb_allpools) {
 			(void) printf(gettext("pool '%s' is healthy\n"),
 			    zpool_get_name(zhp));
 			if (cbp->cb_first)
 				cbp->cb_first = B_FALSE;
 		}
 		return (0);
 	}
 
 	if (cbp->cb_first)
 		cbp->cb_first = B_FALSE;
 	else
 		(void) printf("\n");
 
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
 	    (uint64_t **)&vs, &c) == 0);
 	health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 
 	(void) printf(gettext("  pool: %s\n"), zpool_get_name(zhp));
 	(void) printf(gettext(" state: %s\n"), health);
 
 	switch (reason) {
 	case ZPOOL_STATUS_MISSING_DEV_R:
 		(void) printf(gettext("status: One or more devices could not "
 		    "be opened.  Sufficient replicas exist for\n\tthe pool to "
 		    "continue functioning in a degraded state.\n"));
 		(void) printf(gettext("action: Attach the missing device and "
 		    "online it using 'zpool online'.\n"));
 		break;
 
 	case ZPOOL_STATUS_MISSING_DEV_NR:
 		(void) printf(gettext("status: One or more devices could not "
 		    "be opened.  There are insufficient\n\treplicas for the "
 		    "pool to continue functioning.\n"));
 		(void) printf(gettext("action: Attach the missing device and "
 		    "online it using 'zpool online'.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_LABEL_R:
 		(void) printf(gettext("status: One or more devices could not "
 		    "be used because the label is missing or\n\tinvalid.  "
 		    "Sufficient replicas exist for the pool to continue\n\t"
 		    "functioning in a degraded state.\n"));
 		(void) printf(gettext("action: Replace the device using "
 		    "'zpool replace'.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_LABEL_NR:
 		(void) printf(gettext("status: One or more devices could not "
 		    "be used because the label is missing \n\tor invalid.  "
 		    "There are insufficient replicas for the pool to "
 		    "continue\n\tfunctioning.\n"));
 		zpool_explain_recover(zpool_get_handle(zhp),
 		    zpool_get_name(zhp), reason, config);
 		break;
 
 	case ZPOOL_STATUS_FAILING_DEV:
 		(void) printf(gettext("status: One or more devices has "
 		    "experienced an unrecoverable error.  An\n\tattempt was "
 		    "made to correct the error.  Applications are "
 		    "unaffected.\n"));
 		(void) printf(gettext("action: Determine if the device needs "
 		    "to be replaced, and clear the errors\n\tusing "
 		    "'zpool clear' or replace the device with 'zpool "
 		    "replace'.\n"));
 		break;
 
 	case ZPOOL_STATUS_OFFLINE_DEV:
 		(void) printf(gettext("status: One or more devices has "
 		    "been taken offline by the administrator.\n\tSufficient "
 		    "replicas exist for the pool to continue functioning in "
 		    "a\n\tdegraded state.\n"));
 		(void) printf(gettext("action: Online the device using "
 		    "'zpool online' or replace the device with\n\t'zpool "
 		    "replace'.\n"));
 		break;
 
 	case ZPOOL_STATUS_REMOVED_DEV:
 		(void) printf(gettext("status: One or more devices has "
 		    "been removed by the administrator.\n\tSufficient "
 		    "replicas exist for the pool to continue functioning in "
 		    "a\n\tdegraded state.\n"));
 		(void) printf(gettext("action: Online the device using "
 		    "'zpool online' or replace the device with\n\t'zpool "
 		    "replace'.\n"));
 		break;
 
 	case ZPOOL_STATUS_RESILVERING:
 		(void) printf(gettext("status: One or more devices is "
 		    "currently being resilvered.  The pool will\n\tcontinue "
 		    "to function, possibly in a degraded state.\n"));
 		(void) printf(gettext("action: Wait for the resilver to "
 		    "complete.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_DATA:
 		(void) printf(gettext("status: One or more devices has "
 		    "experienced an error resulting in data\n\tcorruption.  "
 		    "Applications may be affected.\n"));
 		(void) printf(gettext("action: Restore the file in question "
 		    "if possible.  Otherwise restore the\n\tentire pool from "
 		    "backup.\n"));
 		break;
 
 	case ZPOOL_STATUS_CORRUPT_POOL:
 		(void) printf(gettext("status: The pool metadata is corrupted "
 		    "and the pool cannot be opened.\n"));
 		zpool_explain_recover(zpool_get_handle(zhp),
 		    zpool_get_name(zhp), reason, config);
 		break;
 
 	case ZPOOL_STATUS_VERSION_OLDER:
 		(void) printf(gettext("status: The pool is formatted using a "
 		    "legacy on-disk format.  The pool can\n\tstill be used, "
 		    "but some features are unavailable.\n"));
 		(void) printf(gettext("action: Upgrade the pool using 'zpool "
 		    "upgrade'.  Once this is done, the\n\tpool will no longer "
 		    "be accessible on software that does not support\n\t"
 		    "feature flags.\n"));
 		break;
 
 	case ZPOOL_STATUS_VERSION_NEWER:
 		(void) printf(gettext("status: The pool has been upgraded to a "
 		    "newer, incompatible on-disk version.\n\tThe pool cannot "
 		    "be accessed on this system.\n"));
 		(void) printf(gettext("action: Access the pool from a system "
 		    "running more recent software, or\n\trestore the pool from "
 		    "backup.\n"));
 		break;
 
 	case ZPOOL_STATUS_FEAT_DISABLED:
 		(void) printf(gettext("status: Some supported features are not "
 		    "enabled on the pool. The pool can\n\tstill be used, but "
 		    "some features are unavailable.\n"));
 		(void) printf(gettext("action: Enable all features using "
 		    "'zpool upgrade'. Once this is done,\n\tthe pool may no "
 		    "longer be accessible by software that does not support\n\t"
 		    "the features. See zpool-features(5) for details.\n"));
 		break;
 
 	case ZPOOL_STATUS_UNSUP_FEAT_READ:
 		(void) printf(gettext("status: The pool cannot be accessed on "
 		    "this system because it uses the\n\tfollowing feature(s) "
 		    "not supported on this system:\n"));
 		zpool_print_unsup_feat(config);
 		(void) printf("\n");
 		(void) printf(gettext("action: Access the pool from a system "
 		    "that supports the required feature(s),\n\tor restore the "
 		    "pool from backup.\n"));
 		break;
 
 	case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
 		(void) printf(gettext("status: The pool can only be accessed "
 		    "in read-only mode on this system. It\n\tcannot be "
 		    "accessed in read-write mode because it uses the "
 		    "following\n\tfeature(s) not supported on this system:\n"));
 		zpool_print_unsup_feat(config);
 		(void) printf("\n");
 		(void) printf(gettext("action: The pool cannot be accessed in "
 		    "read-write mode. Import the pool with\n"
 		    "\t\"-o readonly=on\", access the pool from a system that "
 		    "supports the\n\trequired feature(s), or restore the "
 		    "pool from backup.\n"));
 		break;
 
 	case ZPOOL_STATUS_FAULTED_DEV_R:
 		(void) printf(gettext("status: One or more devices are "
 		    "faulted in response to persistent errors.\n\tSufficient "
 		    "replicas exist for the pool to continue functioning "
 		    "in a\n\tdegraded state.\n"));
 		(void) printf(gettext("action: Replace the faulted device, "
 		    "or use 'zpool clear' to mark the device\n\trepaired.\n"));
 		break;
 
 	case ZPOOL_STATUS_FAULTED_DEV_NR:
 		(void) printf(gettext("status: One or more devices are "
 		    "faulted in response to persistent errors.  There are "
 		    "insufficient replicas for the pool to\n\tcontinue "
 		    "functioning.\n"));
 		(void) printf(gettext("action: Destroy and re-create the pool "
 		    "from a backup source.  Manually marking the device\n"
 		    "\trepaired using 'zpool clear' may allow some data "
 		    "to be recovered.\n"));
 		break;
 
 	case ZPOOL_STATUS_IO_FAILURE_WAIT:
 	case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
 		(void) printf(gettext("status: One or more devices are "
 		    "faulted in response to IO failures.\n"));
 		(void) printf(gettext("action: Make sure the affected devices "
 		    "are connected, then run 'zpool clear'.\n"));
 		break;
 
 	case ZPOOL_STATUS_BAD_LOG:
 		(void) printf(gettext("status: An intent log record "
 		    "could not be read.\n"
 		    "\tWaiting for adminstrator intervention to fix the "
 		    "faulted pool.\n"));
 		(void) printf(gettext("action: Either restore the affected "
 		    "device(s) and run 'zpool online',\n"
 		    "\tor ignore the intent log records by running "
 		    "'zpool clear'.\n"));
 		break;
 
 	default:
 		/*
 		 * The remaining errors can't actually be generated, yet.
 		 */
 		assert(reason == ZPOOL_STATUS_OK);
 	}
 
 	if (msgid != NULL)
 		(void) printf(gettext("   see: http://zfsonlinux.org/msg/%s\n"),
 		    msgid);
 
 	if (config != NULL) {
 		int namewidth;
 		uint64_t nerr;
 		nvlist_t **spares, **l2cache;
 		uint_t nspares, nl2cache;
 		pool_scan_stat_t *ps = NULL;
 
 		(void) nvlist_lookup_uint64_array(nvroot,
 		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &c);
 		print_scan_status(ps);
 
 		namewidth = max_width(zhp, nvroot, 0, 0);
 		if (namewidth < 10)
 			namewidth = 10;
 
 		(void) printf(gettext("config:\n\n"));
 		(void) printf(gettext("\t%-*s  %-8s %5s %5s %5s\n"), namewidth,
 		    "NAME", "STATE", "READ", "WRITE", "CKSUM");
 		print_status_config(zhp, zpool_get_name(zhp), nvroot,
 		    namewidth, 0, B_FALSE);
 
 		if (num_logs(nvroot) > 0)
 			print_logs(zhp, nvroot, namewidth, B_TRUE);
 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 		    &l2cache, &nl2cache) == 0)
 			print_l2cache(zhp, l2cache, nl2cache, namewidth);
 
 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 		    &spares, &nspares) == 0)
 			print_spares(zhp, spares, nspares, namewidth);
 
 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
 		    &nerr) == 0) {
 			nvlist_t *nverrlist = NULL;
 
 			/*
 			 * If the approximate error count is small, get a
 			 * precise count by fetching the entire log and
 			 * uniquifying the results.
 			 */
 			if (nerr > 0 && nerr < 100 && !cbp->cb_verbose &&
 			    zpool_get_errlog(zhp, &nverrlist) == 0) {
 				nvpair_t *elem;
 
 				elem = NULL;
 				nerr = 0;
 				while ((elem = nvlist_next_nvpair(nverrlist,
 				    elem)) != NULL) {
 					nerr++;
 				}
 			}
 			nvlist_free(nverrlist);
 
 			(void) printf("\n");
 
 			if (nerr == 0)
 				(void) printf(gettext("errors: No known data "
 				    "errors\n"));
 			else if (!cbp->cb_verbose)
 				(void) printf(gettext("errors: %llu data "
 				    "errors, use '-v' for a list\n"),
 				    (u_longlong_t)nerr);
 			else
 				print_error_log(zhp);
 		}
 
 		if (cbp->cb_dedup_stats)
 			print_dedup_stats(config);
 	} else {
 		(void) printf(gettext("config: The configuration cannot be "
 		    "determined.\n"));
 	}
 
 	return (0);
 }
 
 /*
  * zpool status [-vx] [-T d|u] [pool] ... [interval [count]]
  *
  *	-v	Display complete error logs
  *	-x	Display only pools with potential problems
  *	-D	Display dedup status (undocumented)
  *	-T	Display a timestamp in date(1) or Unix format
  *
  * Describes the health status of all pools or some subset.
  */
 int
 zpool_do_status(int argc, char **argv)
 {
 	int c;
 	int ret;
 	unsigned long interval = 0, count = 0;
 	status_cbdata_t cb = { 0 };
 
 	/* check options */
 	while ((c = getopt(argc, argv, "vxDT:")) != -1) {
 		switch (c) {
 		case 'v':
 			cb.cb_verbose = B_TRUE;
 			break;
 		case 'x':
 			cb.cb_explain = B_TRUE;
 			break;
 		case 'D':
 			cb.cb_dedup_stats = B_TRUE;
 			break;
 		case 'T':
 			get_timestamp_arg(*optarg);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	get_interval_count(&argc, argv, &interval, &count);
 
 	if (argc == 0)
 		cb.cb_allpools = B_TRUE;
 
 	cb.cb_first = B_TRUE;
 
 	for (;;) {
 		if (timestamp_fmt != NODATE)
 			print_timestamp(timestamp_fmt);
 
 		ret = for_each_pool(argc, argv, B_TRUE, NULL,
 		    status_callback, &cb);
 
 		if (argc == 0 && cb.cb_count == 0)
 			(void) fprintf(stderr, gettext("no pools available\n"));
 		else if (cb.cb_explain && cb.cb_first && cb.cb_allpools)
 			(void) printf(gettext("all pools are healthy\n"));
 
 		if (ret != 0)
 			return (ret);
 
 		if (interval == 0)
 			break;
 
 		if (count != 0 && --count == 0)
 			break;
 
 		(void) sleep(interval);
 	}
 
 	return (0);
 }
 
 typedef struct upgrade_cbdata {
 	int	cb_first;
 	int	cb_argc;
 	uint64_t cb_version;
 	char	**cb_argv;
 } upgrade_cbdata_t;
 
 static int
 upgrade_version(zpool_handle_t *zhp, uint64_t version)
 {
 	int ret;
 	nvlist_t *config;
 	uint64_t oldversion;
 
 	config = zpool_get_config(zhp, NULL);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &oldversion) == 0);
 
 	assert(SPA_VERSION_IS_SUPPORTED(oldversion));
 	assert(oldversion < version);
 
 	ret = zpool_upgrade(zhp, version);
 	if (ret != 0)
 		return (ret);
 
 	if (version >= SPA_VERSION_FEATURES) {
 		(void) printf(gettext("Successfully upgraded "
 		    "'%s' from version %llu to feature flags.\n"),
 		    zpool_get_name(zhp), (u_longlong_t) oldversion);
 	} else {
 		(void) printf(gettext("Successfully upgraded "
 		    "'%s' from version %llu to version %llu.\n"),
 		    zpool_get_name(zhp), (u_longlong_t) oldversion,
 		    (u_longlong_t) version);
 	}
 
 	return (0);
 }
 
 static int
 upgrade_enable_all(zpool_handle_t *zhp, int *countp)
 {
 	int i, ret, count;
 	boolean_t firstff = B_TRUE;
 	nvlist_t *enabled = zpool_get_features(zhp);
 
 	count = 0;
 	for (i = 0; i < SPA_FEATURES; i++) {
 		const char *fname = spa_feature_table[i].fi_uname;
 		const char *fguid = spa_feature_table[i].fi_guid;
 		if (!nvlist_exists(enabled, fguid)) {
 			char *propname;
 			verify(-1 != asprintf(&propname, "feature@%s", fname));
 			ret = zpool_set_prop(zhp, propname,
 			    ZFS_FEATURE_ENABLED);
 			if (ret != 0) {
 				free(propname);
 				return (ret);
 			}
 			count++;
 
 			if (firstff) {
 				(void) printf(gettext("Enabled the "
 				    "following features on '%s':\n"),
 				    zpool_get_name(zhp));
 				firstff = B_FALSE;
 			}
 			(void) printf(gettext("  %s\n"), fname);
 			free(propname);
 		}
 	}
 
 	if (countp != NULL)
 		*countp = count;
 	return (0);
 }
 
 static int
 upgrade_cb(zpool_handle_t *zhp, void *arg)
 {
 	upgrade_cbdata_t *cbp = arg;
 	nvlist_t *config;
 	uint64_t version;
 	boolean_t printnl = B_FALSE;
 	int ret;
 
 	config = zpool_get_config(zhp, NULL);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &version) == 0);
 
 	assert(SPA_VERSION_IS_SUPPORTED(version));
 
 	if (version < cbp->cb_version) {
 		cbp->cb_first = B_FALSE;
 		ret = upgrade_version(zhp, cbp->cb_version);
 		if (ret != 0)
 			return (ret);
 		printnl = B_TRUE;
 
-#if 0
-		/*
-		 * XXX: This code can be enabled when Illumos commit
-		 * 4445fffbbb1ea25fd0e9ea68b9380dd7a6709025 is merged.
-		 * It reworks the history logging among other things.
-		 */
-
 		/*
 		 * If they did "zpool upgrade -a", then we could
 		 * be doing ioctls to different pools.  We need
 		 * to log this history once to each pool, and bypass
 		 * the normal history logging that happens in main().
 		 */
 		(void) zpool_log_history(g_zfs, history_str);
 		log_history = B_FALSE;
-#endif
 	}
 
 	if (cbp->cb_version >= SPA_VERSION_FEATURES) {
 		int count;
 		ret = upgrade_enable_all(zhp, &count);
 		if (ret != 0)
 			return (ret);
 
 		if (count > 0) {
 			cbp->cb_first = B_FALSE;
 			printnl = B_TRUE;
 		}
 	}
 
 	if (printnl) {
 		(void) printf(gettext("\n"));
 	}
 
 	return (0);
 }
 
 static int
 upgrade_list_older_cb(zpool_handle_t *zhp, void *arg)
 {
 	upgrade_cbdata_t *cbp = arg;
 	nvlist_t *config;
 	uint64_t version;
 
 	config = zpool_get_config(zhp, NULL);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &version) == 0);
 
 	assert(SPA_VERSION_IS_SUPPORTED(version));
 
 	if (version < SPA_VERSION_FEATURES) {
 		if (cbp->cb_first) {
 			(void) printf(gettext("The following pools are "
 			    "formatted with legacy version numbers and can\n"
 			    "be upgraded to use feature flags.  After "
 			    "being upgraded, these pools\nwill no "
 			    "longer be accessible by software that does not "
 			    "support feature\nflags.\n\n"));
 			(void) printf(gettext("VER  POOL\n"));
 			(void) printf(gettext("---  ------------\n"));
 			cbp->cb_first = B_FALSE;
 		}
 
 		(void) printf("%2llu   %s\n", (u_longlong_t)version,
 		    zpool_get_name(zhp));
 	}
 
 	return (0);
 }
 
 static int
 upgrade_list_disabled_cb(zpool_handle_t *zhp, void *arg)
 {
 	upgrade_cbdata_t *cbp = arg;
 	nvlist_t *config;
 	uint64_t version;
 
 	config = zpool_get_config(zhp, NULL);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &version) == 0);
 
 	if (version >= SPA_VERSION_FEATURES) {
 		int i;
 		boolean_t poolfirst = B_TRUE;
 		nvlist_t *enabled = zpool_get_features(zhp);
 
 		for (i = 0; i < SPA_FEATURES; i++) {
 			const char *fguid = spa_feature_table[i].fi_guid;
 			const char *fname = spa_feature_table[i].fi_uname;
 			if (!nvlist_exists(enabled, fguid)) {
 				if (cbp->cb_first) {
 					(void) printf(gettext("\nSome "
 					    "supported features are not "
 					    "enabled on the following pools. "
 					    "Once a\nfeature is enabled the "
 					    "pool may become incompatible with "
 					    "software\nthat does not support "
 					    "the feature. See "
 					    "zpool-features(5) for "
 					    "details.\n\n"));
 					(void) printf(gettext("POOL  "
 					    "FEATURE\n"));
 					(void) printf(gettext("------"
 					    "---------\n"));
 					cbp->cb_first = B_FALSE;
 				}
 
 				if (poolfirst) {
 					(void) printf(gettext("%s\n"),
 					    zpool_get_name(zhp));
 					poolfirst = B_FALSE;
 				}
 
 				(void) printf(gettext("      %s\n"), fname);
 			}
+			/*
+			 * If they did "zpool upgrade -a", then we could
+			 * be doing ioctls to different pools.  We need
+			 * to log this history once to each pool, and bypass
+			 * the normal history logging that happens in main().
+			 */
+			(void) zpool_log_history(g_zfs, history_str);
+			log_history = B_FALSE;
 		}
 	}
 
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 upgrade_one(zpool_handle_t *zhp, void *data)
 {
 	boolean_t printnl = B_FALSE;
 	upgrade_cbdata_t *cbp = data;
 	uint64_t cur_version;
 	int ret;
 
 	if (strcmp("log", zpool_get_name(zhp)) == 0) {
 		(void) printf(gettext("'log' is now a reserved word\n"
 		    "Pool 'log' must be renamed using export and import"
 		    " to upgrade.\n"));
 		return (1);
 	}
 
 	cur_version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
 	if (cur_version > cbp->cb_version) {
 		(void) printf(gettext("Pool '%s' is already formatted "
 		    "using more current version '%llu'.\n\n"),
 		    zpool_get_name(zhp), (u_longlong_t) cur_version);
 		return (0);
 	}
 
 	if (cbp->cb_version != SPA_VERSION && cur_version == cbp->cb_version) {
 		(void) printf(gettext("Pool '%s' is already formatted "
 		    "using version %llu.\n\n"), zpool_get_name(zhp),
 		    (u_longlong_t) cbp->cb_version);
 		return (0);
 	}
 
 	if (cur_version != cbp->cb_version) {
 		printnl = B_TRUE;
 		ret = upgrade_version(zhp, cbp->cb_version);
 		if (ret != 0)
 			return (ret);
 	}
 
 	if (cbp->cb_version >= SPA_VERSION_FEATURES) {
 		int count = 0;
 		ret = upgrade_enable_all(zhp, &count);
 		if (ret != 0)
 			return (ret);
 
 		if (count != 0) {
 			printnl = B_TRUE;
 		} else if (cur_version == SPA_VERSION) {
 			(void) printf(gettext("Pool '%s' already has all "
 			    "supported features enabled.\n"),
 			    zpool_get_name(zhp));
 		}
 	}
 
 	if (printnl) {
 		(void) printf(gettext("\n"));
 	}
 
 	return (0);
 }
 
 /*
  * zpool upgrade
  * zpool upgrade -v
  * zpool upgrade [-V version] <-a | pool ...>
  *
  * With no arguments, display downrev'd ZFS pool available for upgrade.
  * Individual pools can be upgraded by specifying the pool, and '-a' will
  * upgrade all pools.
  */
 int
 zpool_do_upgrade(int argc, char **argv)
 {
 	int c;
 	upgrade_cbdata_t cb = { 0 };
 	int ret = 0;
 	boolean_t showversions = B_FALSE;
 	boolean_t upgradeall = B_FALSE;
 	char *end;
 
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":avV:")) != -1) {
 		switch (c) {
 		case 'a':
 			upgradeall = B_TRUE;
 			break;
 		case 'v':
 			showversions = B_TRUE;
 			break;
 		case 'V':
 			cb.cb_version = strtoll(optarg, &end, 10);
 			if (*end != '\0' ||
 			    !SPA_VERSION_IS_SUPPORTED(cb.cb_version)) {
 				(void) fprintf(stderr,
 				    gettext("invalid version '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	cb.cb_argc = argc;
 	cb.cb_argv = argv;
 	argc -= optind;
 	argv += optind;
 
 	if (cb.cb_version == 0) {
 		cb.cb_version = SPA_VERSION;
 	} else if (!upgradeall && argc == 0) {
 		(void) fprintf(stderr, gettext("-V option is "
 		    "incompatible with other arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if (showversions) {
 		if (upgradeall || argc != 0) {
 			(void) fprintf(stderr, gettext("-v option is "
 			    "incompatible with other arguments\n"));
 			usage(B_FALSE);
 		}
 	} else if (upgradeall) {
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("-a option should not "
 			    "be used along with a pool name\n"));
 			usage(B_FALSE);
 		}
 	}
 
 	(void) printf(gettext("This system supports ZFS pool feature "
 	    "flags.\n\n"));
 	if (showversions) {
 		int i;
 
 		(void) printf(gettext("The following features are "
 		    "supported:\n\n"));
 		(void) printf(gettext("FEAT DESCRIPTION\n"));
 		(void) printf("----------------------------------------------"
 		    "---------------\n");
 		for (i = 0; i < SPA_FEATURES; i++) {
 			zfeature_info_t *fi = &spa_feature_table[i];
 			const char *ro = fi->fi_can_readonly ?
 			    " (read-only compatible)" : "";
 
 			(void) printf("%-37s%s\n", fi->fi_uname, ro);
 			(void) printf("     %s\n", fi->fi_desc);
 		}
 		(void) printf("\n");
 
 		(void) printf(gettext("The following legacy versions are also "
 		    "supported:\n\n"));
 		(void) printf(gettext("VER  DESCRIPTION\n"));
 		(void) printf("---  -----------------------------------------"
 		    "---------------\n");
 		(void) printf(gettext(" 1   Initial ZFS version\n"));
 		(void) printf(gettext(" 2   Ditto blocks "
 		    "(replicated metadata)\n"));
 		(void) printf(gettext(" 3   Hot spares and double parity "
 		    "RAID-Z\n"));
 		(void) printf(gettext(" 4   zpool history\n"));
 		(void) printf(gettext(" 5   Compression using the gzip "
 		    "algorithm\n"));
 		(void) printf(gettext(" 6   bootfs pool property\n"));
 		(void) printf(gettext(" 7   Separate intent log devices\n"));
 		(void) printf(gettext(" 8   Delegated administration\n"));
 		(void) printf(gettext(" 9   refquota and refreservation "
 		    "properties\n"));
 		(void) printf(gettext(" 10  Cache devices\n"));
 		(void) printf(gettext(" 11  Improved scrub performance\n"));
 		(void) printf(gettext(" 12  Snapshot properties\n"));
 		(void) printf(gettext(" 13  snapused property\n"));
 		(void) printf(gettext(" 14  passthrough-x aclinherit\n"));
 		(void) printf(gettext(" 15  user/group space accounting\n"));
 		(void) printf(gettext(" 16  stmf property support\n"));
 		(void) printf(gettext(" 17  Triple-parity RAID-Z\n"));
 		(void) printf(gettext(" 18  Snapshot user holds\n"));
 		(void) printf(gettext(" 19  Log device removal\n"));
 		(void) printf(gettext(" 20  Compression using zle "
 		    "(zero-length encoding)\n"));
 		(void) printf(gettext(" 21  Deduplication\n"));
 		(void) printf(gettext(" 22  Received properties\n"));
 		(void) printf(gettext(" 23  Slim ZIL\n"));
 		(void) printf(gettext(" 24  System attributes\n"));
 		(void) printf(gettext(" 25  Improved scrub stats\n"));
 		(void) printf(gettext(" 26  Improved snapshot deletion "
 		    "performance\n"));
 		(void) printf(gettext(" 27  Improved snapshot creation "
 		    "performance\n"));
 		(void) printf(gettext(" 28  Multiple vdev replacements\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases,\n"));
 		(void) printf(gettext("see the ZFS Administration Guide.\n\n"));
 	} else if (argc == 0 && upgradeall) {
 		cb.cb_first = B_TRUE;
 		ret = zpool_iter(g_zfs, upgrade_cb, &cb);
 		if (ret == 0 && cb.cb_first) {
 			if (cb.cb_version == SPA_VERSION) {
 				(void) printf(gettext("All pools are already "
 				    "formatted using feature flags.\n\n"));
 				(void) printf(gettext("Every feature flags "
 				    "pool already has all supported features "
 				    "enabled.\n"));
 			} else {
 				(void) printf(gettext("All pools are already "
 				    "formatted with version %llu or higher.\n"),
 				    (u_longlong_t) cb.cb_version);
 			}
 		}
 	} else if (argc == 0) {
 		cb.cb_first = B_TRUE;
 		ret = zpool_iter(g_zfs, upgrade_list_older_cb, &cb);
 		assert(ret == 0);
 
 		if (cb.cb_first) {
 			(void) printf(gettext("All pools are formatted "
 			    "using feature flags.\n\n"));
 		} else {
 			(void) printf(gettext("\nUse 'zpool upgrade -v' "
 			    "for a list of available legacy versions.\n"));
 		}
 
 		cb.cb_first = B_TRUE;
 		ret = zpool_iter(g_zfs, upgrade_list_disabled_cb, &cb);
 		assert(ret == 0);
 
 		if (cb.cb_first) {
 			(void) printf(gettext("Every feature flags pool has "
 			    "all supported features enabled.\n"));
 		} else {
 			(void) printf(gettext("\n"));
 		}
 	} else {
 		ret = for_each_pool(argc, argv, B_FALSE, NULL,
 		    upgrade_one, &cb);
 	}
 
 	return (ret);
 }
 
 typedef struct hist_cbdata {
 	boolean_t first;
-	int longfmt;
-	int internal;
+	boolean_t longfmt;
+	boolean_t internal;
 } hist_cbdata_t;
 
 /*
  * Print out the command history for a specific pool.
  */
 static int
 get_history_one(zpool_handle_t *zhp, void *data)
 {
 	nvlist_t *nvhis;
 	nvlist_t **records;
 	uint_t numrecords;
-	char *cmdstr;
-	char *pathstr;
-	uint64_t dst_time;
-	time_t tsec;
-	struct tm t;
-	char tbuf[30];
 	int ret, i;
-	uint64_t who;
-	struct passwd *pwd;
-	char *hostname;
-	char *zonename;
-	char internalstr[MAXPATHLEN];
 	hist_cbdata_t *cb = (hist_cbdata_t *)data;
-	uint64_t txg;
-	uint64_t ievent;
 
 	cb->first = B_FALSE;
 
 	(void) printf(gettext("History for '%s':\n"), zpool_get_name(zhp));
 
 	if ((ret = zpool_get_history(zhp, &nvhis)) != 0)
 		return (ret);
 
 	verify(nvlist_lookup_nvlist_array(nvhis, ZPOOL_HIST_RECORD,
 	    &records, &numrecords) == 0);
 	for (i = 0; i < numrecords; i++) {
-		if (nvlist_lookup_uint64(records[i], ZPOOL_HIST_TIME,
-		    &dst_time) != 0)
-			continue;
+		nvlist_t *rec = records[i];
+		char tbuf[30] = "";
 
-		/* is it an internal event or a standard event? */
-		if (nvlist_lookup_string(records[i], ZPOOL_HIST_CMD,
-		    &cmdstr) != 0) {
-			if (cb->internal == 0)
-				continue;
+		if (nvlist_exists(rec, ZPOOL_HIST_TIME)) {
+			time_t tsec;
+			struct tm t;
 
-			if (nvlist_lookup_uint64(records[i],
-			    ZPOOL_HIST_INT_EVENT, &ievent) != 0)
+			tsec = fnvlist_lookup_uint64(records[i],
+			    ZPOOL_HIST_TIME);
+			(void) localtime_r(&tsec, &t);
+			(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
+		}
+
+		if (nvlist_exists(rec, ZPOOL_HIST_CMD)) {
+			(void) printf("%s %s", tbuf,
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_CMD));
+		} else if (nvlist_exists(rec, ZPOOL_HIST_INT_EVENT)) {
+			int ievent =
+			    fnvlist_lookup_uint64(rec, ZPOOL_HIST_INT_EVENT);
+			if (!cb->internal)
 				continue;
-			verify(nvlist_lookup_uint64(records[i],
-			    ZPOOL_HIST_TXG, &txg) == 0);
-			verify(nvlist_lookup_string(records[i],
-			    ZPOOL_HIST_INT_STR, &pathstr) == 0);
-			if (ievent >= LOG_END)
+			if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS) {
+				(void) printf("%s unrecognized record:\n",
+				    tbuf);
+				dump_nvlist(rec, 4);
 				continue;
-			(void) snprintf(internalstr,
-			    sizeof (internalstr),
-			    "[internal %s txg:%llu] %s",
-			    zfs_history_event_names[ievent], (u_longlong_t)txg,
-			    pathstr);
-			cmdstr = internalstr;
+			}
+			(void) printf("%s [internal %s txg:%lld] %s", tbuf,
+			    zfs_history_event_names[ievent],
+			    (long long int)fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG),
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_INT_STR));
+		} else if (nvlist_exists(rec, ZPOOL_HIST_INT_NAME)) {
+			if (!cb->internal)
+				continue;
+			(void) printf("%s [txg:%lld] %s", tbuf,
+			    (long long int)fnvlist_lookup_uint64(rec, ZPOOL_HIST_TXG),
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_INT_NAME));
+			if (nvlist_exists(rec, ZPOOL_HIST_DSNAME)) {
+				(void) printf(" %s (%llu)",
+				    fnvlist_lookup_string(rec,
+				    ZPOOL_HIST_DSNAME),
+				    (long long unsigned int)fnvlist_lookup_uint64(rec,
+				    ZPOOL_HIST_DSID));
+			}
+			(void) printf(" %s", fnvlist_lookup_string(rec,
+			    ZPOOL_HIST_INT_STR));
+		} else if (nvlist_exists(rec, ZPOOL_HIST_IOCTL)) {
+			if (!cb->internal)
+				continue;
+			(void) printf("%s ioctl %s\n", tbuf,
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_IOCTL));
+			if (nvlist_exists(rec, ZPOOL_HIST_INPUT_NVL)) {
+				(void) printf("    input:\n");
+				dump_nvlist(fnvlist_lookup_nvlist(rec,
+				    ZPOOL_HIST_INPUT_NVL), 8);
+			}
+			if (nvlist_exists(rec, ZPOOL_HIST_OUTPUT_NVL)) {
+				(void) printf("    output:\n");
+				dump_nvlist(fnvlist_lookup_nvlist(rec,
+				    ZPOOL_HIST_OUTPUT_NVL), 8);
+			}
+		} else {
+			if (!cb->internal)
+				continue;
+			(void) printf("%s unrecognized record:\n", tbuf);
+			dump_nvlist(rec, 4);
 		}
-		tsec = dst_time;
-		(void) localtime_r(&tsec, &t);
-		(void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
-		(void) printf("%s %s", tbuf, cmdstr);
 
 		if (!cb->longfmt) {
 			(void) printf("\n");
 			continue;
 		}
 		(void) printf(" [");
-		if (nvlist_lookup_uint64(records[i],
-		    ZPOOL_HIST_WHO, &who) == 0) {
-			pwd = getpwuid((uid_t)who);
-			if (pwd)
-				(void) printf("user %s on",
-				    pwd->pw_name);
-			else
-				(void) printf("user %d on",
-				    (int)who);
-		} else {
-			(void) printf(gettext("no info]\n"));
-			continue;
+		if (nvlist_exists(rec, ZPOOL_HIST_WHO)) {
+			uid_t who = fnvlist_lookup_uint64(rec, ZPOOL_HIST_WHO);
+			struct passwd *pwd = getpwuid(who);
+			(void) printf("user %d ", (int)who);
+			if (pwd != NULL)
+				(void) printf("(%s) ", pwd->pw_name);
 		}
-		if (nvlist_lookup_string(records[i],
-		    ZPOOL_HIST_HOST, &hostname) == 0) {
-			(void) printf(" %s", hostname);
+		if (nvlist_exists(rec, ZPOOL_HIST_HOST)) {
+			(void) printf("on %s",
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_HOST));
 		}
-		if (nvlist_lookup_string(records[i],
-		    ZPOOL_HIST_ZONE, &zonename) == 0) {
-			(void) printf(":%s", zonename);
+		if (nvlist_exists(rec, ZPOOL_HIST_ZONE)) {
+			(void) printf(":%s",
+			    fnvlist_lookup_string(rec, ZPOOL_HIST_ZONE));
 		}
 
 		(void) printf("]");
 		(void) printf("\n");
 	}
 	(void) printf("\n");
 	nvlist_free(nvhis);
 
 	return (ret);
 }
 
 /*
  * zpool history <pool>
  *
  * Displays the history of commands that modified pools.
  */
-
-
 int
 zpool_do_history(int argc, char **argv)
 {
 	hist_cbdata_t cbdata = { 0 };
 	int ret;
 	int c;
 
 	cbdata.first = B_TRUE;
 	/* check options */
 	while ((c = getopt(argc, argv, "li")) != -1) {
 		switch (c) {
 		case 'l':
-			cbdata.longfmt = 1;
+			cbdata.longfmt = B_TRUE;
 			break;
 		case 'i':
-			cbdata.internal = 1;
+			cbdata.internal = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	ret = for_each_pool(argc, argv, B_FALSE,  NULL, get_history_one,
 	    &cbdata);
 
 	if (argc == 0 && cbdata.first == B_TRUE) {
 		(void) fprintf(stderr, gettext("no pools available\n"));
 		return (0);
 	}
 
 	return (ret);
 }
 
 typedef struct ev_opts {
 	int verbose;
 	int scripted;
 	int follow;
 	int clear;
 } ev_opts_t;
 
 static void
 zpool_do_events_short(nvlist_t *nvl)
 {
 	char ctime_str[26], str[32], *ptr;
 	int64_t *tv;
 	uint_t n;
 
 	verify(nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0);
 	memset(str, ' ', 32);
 	(void) ctime_r((const time_t *)&tv[0], ctime_str);
 	(void) strncpy(str,    ctime_str+4,  6);             /* 'Jun 30'     */
 	(void) strncpy(str+7,  ctime_str+20, 4);             /* '1993'       */
 	(void) strncpy(str+12, ctime_str+11, 8);             /* '21:49:08'   */
 	(void) sprintf(str+20, ".%09lld", (longlong_t)tv[1]);/* '.123456789' */
 	(void) printf(gettext("%s "), str);
 
 	verify(nvlist_lookup_string(nvl, FM_CLASS, &ptr) == 0);
 	(void) printf(gettext("%s\n"), ptr);
 }
 
 static void
 zpool_do_events_nvprint(nvlist_t *nvl, int depth)
 {
 	nvpair_t *nvp;
 
 	for (nvp = nvlist_next_nvpair(nvl, NULL);
 	    nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
 
 		data_type_t type = nvpair_type(nvp);
 		const char *name = nvpair_name(nvp);
 
 		boolean_t b;
 		uint8_t i8;
 		uint16_t i16;
 		uint32_t i32;
 		uint64_t i64;
 		char *str;
 		nvlist_t *cnv;
 
 		printf(gettext("%*s%s = "), depth, "", name);
 
 		switch (type) {
 		case DATA_TYPE_BOOLEAN:
 			printf(gettext("%s"), "1");
 			break;
 
 		case DATA_TYPE_BOOLEAN_VALUE:
 			(void) nvpair_value_boolean_value(nvp, &b);
 			printf(gettext("%s"), b ? "1" : "0");
 			break;
 
 		case DATA_TYPE_BYTE:
 			(void) nvpair_value_byte(nvp, &i8);
 			printf(gettext("0x%x"), i8);
 			break;
 
 		case DATA_TYPE_INT8:
 			(void) nvpair_value_int8(nvp, (void *)&i8);
 			printf(gettext("0x%x"), i8);
 			break;
 
 		case DATA_TYPE_UINT8:
 			(void) nvpair_value_uint8(nvp, &i8);
 			printf(gettext("0x%x"), i8);
 			break;
 
 		case DATA_TYPE_INT16:
 			(void) nvpair_value_int16(nvp, (void *)&i16);
 			printf(gettext("0x%x"), i16);
 			break;
 
 		case DATA_TYPE_UINT16:
 			(void) nvpair_value_uint16(nvp, &i16);
 			printf(gettext("0x%x"), i16);
 			break;
 
 		case DATA_TYPE_INT32:
 			(void) nvpair_value_int32(nvp, (void *)&i32);
 			printf(gettext("0x%x"), i32);
 			break;
 
 		case DATA_TYPE_UINT32:
 			(void) nvpair_value_uint32(nvp, &i32);
 			printf(gettext("0x%x"), i32);
 			break;
 
 		case DATA_TYPE_INT64:
 			(void) nvpair_value_int64(nvp, (void *)&i64);
 			printf(gettext("0x%llx"), (u_longlong_t)i64);
 			break;
 
 		case DATA_TYPE_UINT64:
 			(void) nvpair_value_uint64(nvp, &i64);
 			printf(gettext("0x%llx"), (u_longlong_t)i64);
 			break;
 
 		case DATA_TYPE_HRTIME:
 			(void) nvpair_value_hrtime(nvp, (void *)&i64);
 			printf(gettext("0x%llx"), (u_longlong_t)i64);
 			break;
 
 		case DATA_TYPE_STRING:
 			(void) nvpair_value_string(nvp, &str);
 			printf(gettext("\"%s\""), str ? str : "<NULL>");
 			break;
 
 		case DATA_TYPE_NVLIST:
 			printf(gettext("(embedded nvlist)\n"));
 			(void) nvpair_value_nvlist(nvp, &cnv);
 			zpool_do_events_nvprint(cnv, depth + 8);
 			printf(gettext("%*s(end %s)"), depth, "", name);
 			break;
 
 		case DATA_TYPE_NVLIST_ARRAY: {
 			nvlist_t **val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
 			printf(gettext("(%d embedded nvlists)\n"), nelem);
 			for (i = 0; i < nelem; i++) {
 				printf(gettext("%*s%s[%d] = %s\n"),
 				       depth, "", name, i, "(embedded nvlist)");
 				zpool_do_events_nvprint(val[i], depth + 8);
 				printf(gettext("%*s(end %s[%i])\n"),
 				       depth, "", name, i);
 			}
 			printf(gettext("%*s(end %s)\n"), depth, "", name);
 			}
 			break;
 
 		case DATA_TYPE_INT8_ARRAY: {
 			int8_t *val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_int8_array(nvp, &val, &nelem);
 			for (i = 0; i < nelem; i++)
 				printf(gettext("0x%x "), val[i]);
 
 			break;
 			}
 
 		case DATA_TYPE_UINT8_ARRAY: {
 			uint8_t *val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_uint8_array(nvp, &val, &nelem);
 			for (i = 0; i < nelem; i++)
 				printf(gettext("0x%x "), val[i]);
 
 			break;
 			}
 
 		case DATA_TYPE_INT16_ARRAY: {
 			int16_t *val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_int16_array(nvp, &val, &nelem);
 			for (i = 0; i < nelem; i++)
 				printf(gettext("0x%x "), val[i]);
 
 			break;
 			}
 
 		case DATA_TYPE_UINT16_ARRAY: {
 			uint16_t *val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_uint16_array(nvp, &val, &nelem);
 			for (i = 0; i < nelem; i++)
 				printf(gettext("0x%x "), val[i]);
 
 			break;
 			}
 
 		case DATA_TYPE_INT32_ARRAY: {
 			int32_t *val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_int32_array(nvp, &val, &nelem);
 			for (i = 0; i < nelem; i++)
 				printf(gettext("0x%x "), val[i]);
 
 			break;
 			}
 
 		case DATA_TYPE_UINT32_ARRAY: {
 			uint32_t *val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_uint32_array(nvp, &val, &nelem);
 			for (i = 0; i < nelem; i++)
 				printf(gettext("0x%x "), val[i]);
 
 			break;
 			}
 
 		case DATA_TYPE_INT64_ARRAY: {
 			int64_t *val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_int64_array(nvp, &val, &nelem);
 			for (i = 0; i < nelem; i++)
 				printf(gettext("0x%llx "), (u_longlong_t)val[i]);
 
 			break;
 			}
 
 		case DATA_TYPE_UINT64_ARRAY: {
 			uint64_t *val;
 			uint_t i, nelem;
 
 			(void) nvpair_value_uint64_array(nvp, &val, &nelem);
 			for (i = 0; i < nelem; i++)
 				printf(gettext("0x%llx "), (u_longlong_t)val[i]);
 
 			break;
 			}
 
 		case DATA_TYPE_STRING_ARRAY:
 		case DATA_TYPE_BOOLEAN_ARRAY:
 		case DATA_TYPE_BYTE_ARRAY:
 		case DATA_TYPE_DOUBLE:
 		case DATA_TYPE_UNKNOWN:
 			printf(gettext("<unknown>"));
 			break;
 		}
 
 		printf(gettext("\n"));
 	}
 }
 
 static int
 zpool_do_events_next(ev_opts_t *opts)
 {
 	nvlist_t *nvl;
 	int cleanup_fd, ret, dropped;
 
         cleanup_fd = open(ZFS_DEV, O_RDWR);
         VERIFY(cleanup_fd >= 0);
 
 	if (!opts->scripted)
 		(void) printf(gettext("%-30s %s\n"), "TIME", "CLASS");
 
 	while (1) {
 		ret = zpool_events_next(g_zfs, &nvl, &dropped,
 		    !!opts->follow, cleanup_fd);
 		if (ret || nvl == NULL)
 			break;
 
 		if (dropped > 0)
 			(void) printf(gettext("dropped %d events\n"), dropped);
 
 		zpool_do_events_short(nvl);
 
 		if (opts->verbose) {
 			zpool_do_events_nvprint(nvl, 8);
 			printf(gettext("\n"));
 		}
 		(void) fflush(stdout);
 
 		nvlist_free(nvl);
 	}
 
         VERIFY(0 == close(cleanup_fd));
 
 	return (ret);
 }
 
 static int
 zpool_do_events_clear(ev_opts_t *opts)
 {
 	int count, ret;
 
 	ret = zpool_events_clear(g_zfs, &count);
 	if (!ret)
 		(void) printf(gettext("cleared %d events\n"), count);
 
 	return (ret);
 }
 
 /*
  * zpool events [-vfc]
  *
  * Displays events logs by ZFS.
  */
 int
 zpool_do_events(int argc, char **argv)
 {
 	ev_opts_t opts = { 0 };
 	int ret;
 	int c;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "vHfc")) != -1) {
 		switch (c) {
 		case 'v':
 			opts.verbose = 1;
 			break;
 		case 'H':
 			opts.scripted = 1;
 			break;
 		case 'f':
 			opts.follow = 1;
 			break;
 		case 'c':
 			opts.clear = 1;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 
 	if (opts.clear)
 		ret = zpool_do_events_clear(&opts);
 	else
 		ret = zpool_do_events_next(&opts);
 
 	return ret;
 }
 
 static int
 get_callback(zpool_handle_t *zhp, void *data)
 {
 	zprop_get_cbdata_t *cbp = (zprop_get_cbdata_t *)data;
 	char value[MAXNAMELEN];
 	zprop_source_t srctype;
 	zprop_list_t *pl;
 
 	for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
 
 		/*
 		 * Skip the special fake placeholder. This will also skip
 		 * over the name property when 'all' is specified.
 		 */
 		if (pl->pl_prop == ZPOOL_PROP_NAME &&
 		    pl == cbp->cb_proplist)
 			continue;
 
 		if (pl->pl_prop == ZPROP_INVAL &&
 		    (zpool_prop_feature(pl->pl_user_prop) ||
 		    zpool_prop_unsupported(pl->pl_user_prop))) {
 			srctype = ZPROP_SRC_LOCAL;
 
 			if (zpool_prop_get_feature(zhp, pl->pl_user_prop,
 			    value, sizeof (value)) == 0) {
 				zprop_print_one_property(zpool_get_name(zhp),
 				    cbp, pl->pl_user_prop, value, srctype,
 				    NULL, NULL);
 			}
 		} else {
 			if (zpool_get_prop(zhp, pl->pl_prop, value,
 			    sizeof (value), &srctype) != 0)
 				continue;
 
 			zprop_print_one_property(zpool_get_name(zhp), cbp,
 			    zpool_prop_to_name(pl->pl_prop), value, srctype,
 			    NULL, NULL);
 		}
 	}
 	return (0);
 }
 
 int
 zpool_do_get(int argc, char **argv)
 {
 	zprop_get_cbdata_t cb = { 0 };
 	zprop_list_t fake_name = { 0 };
 	int ret;
 
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing property "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 
 	cb.cb_first = B_TRUE;
 	cb.cb_sources = ZPROP_SRC_ALL;
 	cb.cb_columns[0] = GET_COL_NAME;
 	cb.cb_columns[1] = GET_COL_PROPERTY;
 	cb.cb_columns[2] = GET_COL_VALUE;
 	cb.cb_columns[3] = GET_COL_SOURCE;
 	cb.cb_type = ZFS_TYPE_POOL;
 
 	if (zprop_get_list(g_zfs, argv[1], &cb.cb_proplist,
 	    ZFS_TYPE_POOL) != 0)
 		usage(B_FALSE);
 
 	if (cb.cb_proplist != NULL) {
 		fake_name.pl_prop = ZPOOL_PROP_NAME;
 		fake_name.pl_width = strlen(gettext("NAME"));
 		fake_name.pl_next = cb.cb_proplist;
 		cb.cb_proplist = &fake_name;
 	}
 
 	ret = for_each_pool(argc - 2, argv + 2, B_TRUE, &cb.cb_proplist,
 	    get_callback, &cb);
 
 	if (cb.cb_proplist == &fake_name)
 		zprop_free_list(fake_name.pl_next);
 	else
 		zprop_free_list(cb.cb_proplist);
 
 	return (ret);
 }
 
 typedef struct set_cbdata {
 	char *cb_propname;
 	char *cb_value;
 	boolean_t cb_any_successful;
 } set_cbdata_t;
 
 int
 set_callback(zpool_handle_t *zhp, void *data)
 {
 	int error;
 	set_cbdata_t *cb = (set_cbdata_t *)data;
 
 	error = zpool_set_prop(zhp, cb->cb_propname, cb->cb_value);
 
 	if (!error)
 		cb->cb_any_successful = B_TRUE;
 
 	return (error);
 }
 
 int
 zpool_do_set(int argc, char **argv)
 {
 	set_cbdata_t cb = { 0 };
 	int error;
 
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
 		usage(B_FALSE);
 	}
 
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing property=value "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("missing pool name\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc > 3) {
 		(void) fprintf(stderr, gettext("too many pool names\n"));
 		usage(B_FALSE);
 	}
 
 	cb.cb_propname = argv[1];
 	cb.cb_value = strchr(cb.cb_propname, '=');
 	if (cb.cb_value == NULL) {
 		(void) fprintf(stderr, gettext("missing value in "
 		    "property=value argument\n"));
 		usage(B_FALSE);
 	}
 
 	*(cb.cb_value) = '\0';
 	cb.cb_value++;
 
 	error = for_each_pool(argc - 2, argv + 2, B_TRUE, NULL,
 	    set_callback, &cb);
 
 	return (error);
 }
 
 static int
 find_command_idx(char *command, int *idx)
 {
 	int i;
 
 	for (i = 0; i < NCOMMAND; i++) {
 		if (command_table[i].name == NULL)
 			continue;
 
 		if (strcmp(command, command_table[i].name) == 0) {
 			*idx = i;
 			return (0);
 		}
 	}
 	return (1);
 }
 
 int
 main(int argc, char **argv)
 {
 	int ret;
 	int i = 0;
 	char *cmdname;
 
 	(void) setlocale(LC_ALL, "");
 	(void) textdomain(TEXT_DOMAIN);
 
 	opterr = 0;
 
 	/*
 	 * Make sure the user has specified some command.
 	 */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing command\n"));
 		usage(B_FALSE);
 	}
 
 	cmdname = argv[1];
 
 	/*
 	 * Special case '-?'
 	 */
 	if ((strcmp(cmdname, "-?") == 0) ||
 	     strcmp(cmdname, "--help") == 0)
 		usage(B_TRUE);
 
 	if ((g_zfs = libzfs_init()) == NULL)
 		return (1);
 
 	libzfs_print_on_error(g_zfs, B_TRUE);
 
-	zpool_set_history_str("zpool", argc, argv, history_str);
-	verify(zpool_stage_history(g_zfs, history_str) == 0);
+	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
 
 	/*
 	 * Run the appropriate command.
 	 */
 	if (find_command_idx(cmdname, &i) == 0) {
 		current_command = &command_table[i];
 		ret = command_table[i].func(argc - 1, argv + 1);
 	} else if (strchr(cmdname, '=')) {
 		verify(find_command_idx("set", &i) == 0);
 		current_command = &command_table[i];
 		ret = command_table[i].func(argc, argv);
 	} else if (strcmp(cmdname, "freeze") == 0 && argc == 3) {
 		/*
 		 * 'freeze' is a vile debugging abomination, so we treat
 		 * it as such.
 		 */
 		char buf[16384];
 		int fd = open(ZFS_DEV, O_RDWR);
 		(void) strcpy((void *)buf, argv[2]);
 		return (!!ioctl(fd, ZFS_IOC_POOL_FREEZE, buf));
 	} else {
 		(void) fprintf(stderr, gettext("unrecognized "
 		    "command '%s'\n"), cmdname);
 		usage(B_FALSE);
 		ret = 1;
 	}
 
+	if (ret == 0 && log_history)
+		(void) zpool_log_history(g_zfs, history_str);
+
 	libzfs_fini(g_zfs);
 
 	/*
 	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
 	 * for the purposes of running ::findleaks.
 	 */
 	if (getenv("ZFS_ABORT") != NULL) {
 		(void) printf("dumping core by request\n");
 		abort();
 	}
 
 	return (ret);
 }
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
index 93a5f1ecf54a..28570a09075c 100644
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -1,6351 +1,6344 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  */
 
 /*
  * The objective of this program is to provide a DMU/ZAP/SPA stress test
  * that runs entirely in userland, is easy to use, and easy to extend.
  *
  * The overall design of the ztest program is as follows:
  *
  * (1) For each major functional area (e.g. adding vdevs to a pool,
  *     creating and destroying datasets, reading and writing objects, etc)
  *     we have a simple routine to test that functionality.  These
  *     individual routines do not have to do anything "stressful".
  *
  * (2) We turn these simple functionality tests into a stress test by
  *     running them all in parallel, with as many threads as desired,
  *     and spread across as many datasets, objects, and vdevs as desired.
  *
  * (3) While all this is happening, we inject faults into the pool to
  *     verify that self-healing data really works.
  *
  * (4) Every time we open a dataset, we change its checksum and compression
  *     functions.  Thus even individual objects vary from block to block
  *     in which checksum they use and whether they're compressed.
  *
  * (5) To verify that we never lose on-disk consistency after a crash,
  *     we run the entire test in a child of the main process.
  *     At random times, the child self-immolates with a SIGKILL.
  *     This is the software equivalent of pulling the power cord.
  *     The parent then runs the test again, using the existing
  *     storage pool, as many times as desired. If backwards compatability
  *     testing is enabled ztest will sometimes run the "older" version
  *     of ztest after a SIGKILL.
  *
  * (6) To verify that we don't have future leaks or temporal incursions,
  *     many of the functional tests record the transaction group number
  *     as part of their data.  When reading old data, they verify that
  *     the transaction group number is less than the current, open txg.
  *     If you add a new test, please do this if applicable.
  *
  * (7) Threads are created with a reduced stack size, for sanity checking.
  *     Therefore, it's important not to allocate huge buffers on the stack.
  *
  * When run with no arguments, ztest runs for about five minutes and
  * produces no output if successful.  To get a little bit of information,
  * specify -V.  To get more information, specify -VV, and so on.
  *
  * To turn this into an overnight stress test, use -T to specify run time.
  *
  * You can ask more more vdevs [-v], datasets [-d], or threads [-t]
  * to increase the pool capacity, fanout, and overall stress level.
  *
  * Use the -k option to set the desired frequency of kills.
  *
  * When ztest invokes itself it passes all relevant information through a
  * temporary file which is mmap-ed in the child process. This allows shared
  * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always
  * stored at offset 0 of this file and contains information on the size and
  * number of shared structures in the file. The information stored in this file
  * must remain backwards compatible with older versions of ztest so that
  * ztest can invoke them during backwards compatibility testing (-B).
  */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/dmu.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/zap.h>
 #include <sys/dmu_objset.h>
 #include <sys/poll.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 #include <sys/mman.h>
 #include <sys/resource.h>
 #include <sys/zio.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_file.h>
 #include <sys/spa_impl.h>
 #include <sys/metaslab_impl.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_scan.h>
 #include <sys/zio_checksum.h>
 #include <sys/refcount.h>
 #include <sys/zfeature.h>
 #include <stdio.h>
 #include <stdio_ext.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <signal.h>
 #include <umem.h>
 #include <dlfcn.h>
 #include <ctype.h>
 #include <math.h>
 #include <sys/fs/zfs.h>
 #include <libnvpair.h>
 
 static int ztest_fd_data = -1;
 static int ztest_fd_rand = -1;
 
 typedef struct ztest_shared_hdr {
 	uint64_t	zh_hdr_size;
 	uint64_t	zh_opts_size;
 	uint64_t	zh_size;
 	uint64_t	zh_stats_size;
 	uint64_t	zh_stats_count;
 	uint64_t	zh_ds_size;
 	uint64_t	zh_ds_count;
 } ztest_shared_hdr_t;
 
 static ztest_shared_hdr_t *ztest_shared_hdr;
 
 typedef struct ztest_shared_opts {
 	char zo_pool[MAXNAMELEN];
 	char zo_dir[MAXNAMELEN];
 	char zo_alt_ztest[MAXNAMELEN];
 	char zo_alt_libpath[MAXNAMELEN];
 	uint64_t zo_vdevs;
 	uint64_t zo_vdevtime;
 	size_t zo_vdev_size;
 	int zo_ashift;
 	int zo_mirrors;
 	int zo_raidz;
 	int zo_raidz_parity;
 	int zo_datasets;
 	int zo_threads;
 	uint64_t zo_passtime;
 	uint64_t zo_killrate;
 	int zo_verbose;
 	int zo_init;
 	uint64_t zo_time;
 	uint64_t zo_maxloops;
 	uint64_t zo_metaslab_gang_bang;
 } ztest_shared_opts_t;
 
 static const ztest_shared_opts_t ztest_opts_defaults = {
 	.zo_pool = { 'z', 't', 'e', 's', 't', '\0' },
 	.zo_dir = { '/', 't', 'm', 'p', '\0' },
 	.zo_alt_ztest = { '\0' },
 	.zo_alt_libpath = { '\0' },
 	.zo_vdevs = 5,
 	.zo_ashift = SPA_MINBLOCKSHIFT,
 	.zo_mirrors = 2,
 	.zo_raidz = 4,
 	.zo_raidz_parity = 1,
 	.zo_vdev_size = SPA_MINDEVSIZE,
 	.zo_datasets = 7,
 	.zo_threads = 23,
 	.zo_passtime = 60,		/* 60 seconds */
 	.zo_killrate = 70,		/* 70% kill rate */
 	.zo_verbose = 0,
 	.zo_init = 1,
 	.zo_time = 300,			/* 5 minutes */
 	.zo_maxloops = 50,		/* max loops during spa_freeze() */
 	.zo_metaslab_gang_bang = 32 << 10
 };
 
 extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;
 
 static ztest_shared_opts_t *ztest_shared_opts;
 static ztest_shared_opts_t ztest_opts;
 
 typedef struct ztest_shared_ds {
 	uint64_t	zd_seq;
 } ztest_shared_ds_t;
 
 static ztest_shared_ds_t *ztest_shared_ds;
 #define	ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d])
 
 #define	BT_MAGIC	0x123456789abcdefULL
 #define	MAXFAULTS() \
 	(MAX(zs->zs_mirrors, 1) * (ztest_opts.zo_raidz_parity + 1) - 1)
 
 enum ztest_io_type {
 	ZTEST_IO_WRITE_TAG,
 	ZTEST_IO_WRITE_PATTERN,
 	ZTEST_IO_WRITE_ZEROES,
 	ZTEST_IO_TRUNCATE,
 	ZTEST_IO_SETATTR,
 	ZTEST_IO_TYPES
 };
 
 typedef struct ztest_block_tag {
 	uint64_t	bt_magic;
 	uint64_t	bt_objset;
 	uint64_t	bt_object;
 	uint64_t	bt_offset;
 	uint64_t	bt_gen;
 	uint64_t	bt_txg;
 	uint64_t	bt_crtxg;
 } ztest_block_tag_t;
 
 typedef struct bufwad {
 	uint64_t	bw_index;
 	uint64_t	bw_txg;
 	uint64_t	bw_data;
 } bufwad_t;
 
 /*
  * XXX -- fix zfs range locks to be generic so we can use them here.
  */
 typedef enum {
 	RL_READER,
 	RL_WRITER,
 	RL_APPEND
 } rl_type_t;
 
 typedef struct rll {
 	void		*rll_writer;
 	int		rll_readers;
 	kmutex_t	rll_lock;
 	kcondvar_t	rll_cv;
 } rll_t;
 
 typedef struct rl {
 	uint64_t	rl_object;
 	uint64_t	rl_offset;
 	uint64_t	rl_size;
 	rll_t		*rl_lock;
 } rl_t;
 
 #define	ZTEST_RANGE_LOCKS	64
 #define	ZTEST_OBJECT_LOCKS	64
 
 /*
  * Object descriptor.  Used as a template for object lookup/create/remove.
  */
 typedef struct ztest_od {
 	uint64_t	od_dir;
 	uint64_t	od_object;
 	dmu_object_type_t od_type;
 	dmu_object_type_t od_crtype;
 	uint64_t	od_blocksize;
 	uint64_t	od_crblocksize;
 	uint64_t	od_gen;
 	uint64_t	od_crgen;
 	char		od_name[MAXNAMELEN];
 } ztest_od_t;
 
 /*
  * Per-dataset state.
  */
 typedef struct ztest_ds {
 	ztest_shared_ds_t *zd_shared;
 	objset_t	*zd_os;
 	krwlock_t	zd_zilog_lock;
 	zilog_t		*zd_zilog;
 	ztest_od_t	*zd_od;		/* debugging aid */
 	char		zd_name[MAXNAMELEN];
 	kmutex_t	zd_dirobj_lock;
 	rll_t		zd_object_lock[ZTEST_OBJECT_LOCKS];
 	rll_t		zd_range_lock[ZTEST_RANGE_LOCKS];
 } ztest_ds_t;
 
 /*
  * Per-iteration state.
  */
 typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
 
 typedef struct ztest_info {
 	ztest_func_t	*zi_func;	/* test function */
 	uint64_t	zi_iters;	/* iterations per execution */
 	uint64_t	*zi_interval;	/* execute every <interval> seconds */
 } ztest_info_t;
 
 typedef struct ztest_shared_callstate {
 	uint64_t	zc_count;	/* per-pass count */
 	uint64_t	zc_time;	/* per-pass time */
 	uint64_t	zc_next;	/* next time to call this function */
 } ztest_shared_callstate_t;
 
 static ztest_shared_callstate_t *ztest_shared_callstate;
 #define	ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c])
 
 /*
  * Note: these aren't static because we want dladdr() to work.
  */
 ztest_func_t ztest_dmu_read_write;
 ztest_func_t ztest_dmu_write_parallel;
 ztest_func_t ztest_dmu_object_alloc_free;
 ztest_func_t ztest_dmu_commit_callbacks;
 ztest_func_t ztest_zap;
 ztest_func_t ztest_zap_parallel;
 ztest_func_t ztest_zil_commit;
 ztest_func_t ztest_zil_remount;
 ztest_func_t ztest_dmu_read_write_zcopy;
 ztest_func_t ztest_dmu_objset_create_destroy;
 ztest_func_t ztest_dmu_prealloc;
 ztest_func_t ztest_fzap;
 ztest_func_t ztest_dmu_snapshot_create_destroy;
 ztest_func_t ztest_dsl_prop_get_set;
 ztest_func_t ztest_spa_prop_get_set;
 ztest_func_t ztest_spa_create_destroy;
 ztest_func_t ztest_fault_inject;
 ztest_func_t ztest_ddt_repair;
 ztest_func_t ztest_dmu_snapshot_hold;
 ztest_func_t ztest_spa_rename;
 ztest_func_t ztest_scrub;
 ztest_func_t ztest_dsl_dataset_promote_busy;
 ztest_func_t ztest_vdev_attach_detach;
 ztest_func_t ztest_vdev_LUN_growth;
 ztest_func_t ztest_vdev_add_remove;
 ztest_func_t ztest_vdev_aux_add_remove;
 ztest_func_t ztest_split_pool;
 ztest_func_t ztest_reguid;
 ztest_func_t ztest_spa_upgrade;
 
 uint64_t zopt_always = 0ULL * NANOSEC;		/* all the time */
 uint64_t zopt_incessant = 1ULL * NANOSEC / 10;	/* every 1/10 second */
 uint64_t zopt_often = 1ULL * NANOSEC;		/* every second */
 uint64_t zopt_sometimes = 10ULL * NANOSEC;	/* every 10 seconds */
 uint64_t zopt_rarely = 60ULL * NANOSEC;		/* every 60 seconds */
 
 ztest_info_t ztest_info[] = {
 	{ ztest_dmu_read_write,			1,	&zopt_always	},
 	{ ztest_dmu_write_parallel,		10,	&zopt_always	},
 	{ ztest_dmu_object_alloc_free,		1,	&zopt_always	},
 	{ ztest_dmu_commit_callbacks,		1,	&zopt_always	},
 	{ ztest_zap,				30,	&zopt_always	},
 	{ ztest_zap_parallel,			100,	&zopt_always	},
 	{ ztest_split_pool,			1,	&zopt_always	},
 	{ ztest_zil_commit,			1,	&zopt_incessant	},
 	{ ztest_zil_remount,			1,	&zopt_sometimes	},
 	{ ztest_dmu_read_write_zcopy,		1,	&zopt_often	},
 	{ ztest_dmu_objset_create_destroy,	1,	&zopt_often	},
 	{ ztest_dsl_prop_get_set,		1,	&zopt_often	},
 	{ ztest_spa_prop_get_set,		1,	&zopt_sometimes	},
 #if 0
 	{ ztest_dmu_prealloc,			1,	&zopt_sometimes	},
 #endif
 	{ ztest_fzap,				1,	&zopt_sometimes	},
 	{ ztest_dmu_snapshot_create_destroy,	1,	&zopt_sometimes	},
 	{ ztest_spa_create_destroy,		1,	&zopt_sometimes	},
 	{ ztest_fault_inject,			1,	&zopt_sometimes	},
 	{ ztest_ddt_repair,			1,	&zopt_sometimes	},
 	{ ztest_dmu_snapshot_hold,		1,	&zopt_sometimes	},
 	{ ztest_reguid,				1,	&zopt_sometimes },
 	{ ztest_spa_rename,			1,	&zopt_rarely	},
 	{ ztest_scrub,				1,	&zopt_rarely	},
 	{ ztest_spa_upgrade,			1,	&zopt_rarely	},
 	{ ztest_dsl_dataset_promote_busy,	1,	&zopt_rarely	},
 	{ ztest_vdev_attach_detach,		1,	&zopt_rarely	},
 	{ ztest_vdev_LUN_growth,		1,	&zopt_rarely	},
 	{ ztest_vdev_add_remove,		1,
 	    &ztest_opts.zo_vdevtime				},
 	{ ztest_vdev_aux_add_remove,		1,
 	    &ztest_opts.zo_vdevtime				},
 };
 
 #define	ZTEST_FUNCS	(sizeof (ztest_info) / sizeof (ztest_info_t))
 
 /*
  * The following struct is used to hold a list of uncalled commit callbacks.
  * The callbacks are ordered by txg number.
  */
 typedef struct ztest_cb_list {
 	kmutex_t	zcl_callbacks_lock;
 	list_t		zcl_callbacks;
 } ztest_cb_list_t;
 
 /*
  * Stuff we need to share writably between parent and child.
  */
 typedef struct ztest_shared {
 	boolean_t	zs_do_init;
 	hrtime_t	zs_proc_start;
 	hrtime_t	zs_proc_stop;
 	hrtime_t	zs_thread_start;
 	hrtime_t	zs_thread_stop;
 	hrtime_t	zs_thread_kill;
 	uint64_t	zs_enospc_count;
 	uint64_t	zs_vdev_next_leaf;
 	uint64_t	zs_vdev_aux;
 	uint64_t	zs_alloc;
 	uint64_t	zs_space;
 	uint64_t	zs_splits;
 	uint64_t	zs_mirrors;
 	uint64_t	zs_metaslab_sz;
 	uint64_t	zs_metaslab_df_alloc_threshold;
 	uint64_t	zs_guid;
 } ztest_shared_t;
 
 #define	ID_PARALLEL	-1ULL
 
 static char ztest_dev_template[] = "%s/%s.%llua";
 static char ztest_aux_template[] = "%s/%s.%s.%llu";
 ztest_shared_t *ztest_shared;
 
 static spa_t *ztest_spa = NULL;
 static ztest_ds_t *ztest_ds;
 
 static kmutex_t ztest_vdev_lock;
 
 /*
  * The ztest_name_lock protects the pool and dataset namespace used by
  * the individual tests. To modify the namespace, consumers must grab
  * this lock as writer. Grabbing the lock as reader will ensure that the
  * namespace does not change while the lock is held.
  */
 static krwlock_t ztest_name_lock;
 
 static boolean_t ztest_dump_core = B_TRUE;
 static boolean_t ztest_exiting;
 
 /* Global commit callback list */
 static ztest_cb_list_t zcl;
 /* Commit cb delay */
 static uint64_t zc_min_txg_delay = UINT64_MAX;
 static int zc_cb_counter = 0;
 
 /*
  * Minimum number of commit callbacks that need to be registered for us to check
  * whether the minimum txg delay is acceptable.
  */
 #define	ZTEST_COMMIT_CB_MIN_REG	100
 
 /*
  * If a number of txgs equal to this threshold have been created after a commit
  * callback has been registered but not called, then we assume there is an
  * implementation bug.
  */
 #define	ZTEST_COMMIT_CB_THRESH	(TXG_CONCURRENT_STATES + 1000)
 
 extern uint64_t metaslab_gang_bang;
 extern uint64_t metaslab_df_alloc_threshold;
 
 enum ztest_object {
 	ZTEST_META_DNODE = 0,
 	ZTEST_DIROBJ,
 	ZTEST_OBJECTS
 };
 
 static void usage(boolean_t) __NORETURN;
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
  * debugging facilities.
  */
 const char *
 _umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 
 #define	FATAL_MSG_SZ	1024
 
 char *fatal_msg;
 
 static void
 fatal(int do_perror, char *message, ...)
 {
 	va_list args;
 	int save_errno = errno;
 	char *buf;
 
 	(void) fflush(stdout);
 	buf = umem_alloc(FATAL_MSG_SZ, UMEM_NOFAIL);
 
 	va_start(args, message);
 	(void) sprintf(buf, "ztest: ");
 	/* LINTED */
 	(void) vsprintf(buf + strlen(buf), message, args);
 	va_end(args);
 	if (do_perror) {
 		(void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf),
 		    ": %s", strerror(save_errno));
 	}
 	(void) fprintf(stderr, "%s\n", buf);
 	fatal_msg = buf;			/* to ease debugging */
 	if (ztest_dump_core)
 		abort();
 	exit(3);
 }
 
 static int
 str2shift(const char *buf)
 {
 	const char *ends = "BKMGTPEZ";
 	int i;
 
 	if (buf[0] == '\0')
 		return (0);
 	for (i = 0; i < strlen(ends); i++) {
 		if (toupper(buf[0]) == ends[i])
 			break;
 	}
 	if (i == strlen(ends)) {
 		(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n",
 		    buf);
 		usage(B_FALSE);
 	}
 	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) {
 		return (10*i);
 	}
 	(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf);
 	usage(B_FALSE);
 	/* NOTREACHED */
 }
 
 static uint64_t
 nicenumtoull(const char *buf)
 {
 	char *end;
 	uint64_t val;
 
 	val = strtoull(buf, &end, 0);
 	if (end == buf) {
 		(void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf);
 		usage(B_FALSE);
 	} else if (end[0] == '.') {
 		double fval = strtod(buf, &end);
 		fval *= pow(2, str2shift(end));
 		if (fval > UINT64_MAX) {
 			(void) fprintf(stderr, "ztest: value too large: %s\n",
 			    buf);
 			usage(B_FALSE);
 		}
 		val = (uint64_t)fval;
 	} else {
 		int shift = str2shift(end);
 		if (shift >= 64 || (val << shift) >> shift != val) {
 			(void) fprintf(stderr, "ztest: value too large: %s\n",
 			    buf);
 			usage(B_FALSE);
 		}
 		val <<= shift;
 	}
 	return (val);
 }
 
 static void
 usage(boolean_t requested)
 {
 	const ztest_shared_opts_t *zo = &ztest_opts_defaults;
 
 	char nice_vdev_size[10];
 	char nice_gang_bang[10];
 	FILE *fp = requested ? stdout : stderr;
 
 	nicenum(zo->zo_vdev_size, nice_vdev_size);
 	nicenum(zo->zo_metaslab_gang_bang, nice_gang_bang);
 
 	(void) fprintf(fp, "Usage: %s\n"
 	    "\t[-v vdevs (default: %llu)]\n"
 	    "\t[-s size_of_each_vdev (default: %s)]\n"
 	    "\t[-a alignment_shift (default: %d)] use 0 for random\n"
 	    "\t[-m mirror_copies (default: %d)]\n"
 	    "\t[-r raidz_disks (default: %d)]\n"
 	    "\t[-R raidz_parity (default: %d)]\n"
 	    "\t[-d datasets (default: %d)]\n"
 	    "\t[-t threads (default: %d)]\n"
 	    "\t[-g gang_block_threshold (default: %s)]\n"
 	    "\t[-i init_count (default: %d)] initialize pool i times\n"
 	    "\t[-k kill_percentage (default: %llu%%)]\n"
 	    "\t[-p pool_name (default: %s)]\n"
 	    "\t[-f dir (default: %s)] file directory for vdev files\n"
 	    "\t[-V] verbose (use multiple times for ever more blather)\n"
 	    "\t[-E] use existing pool instead of creating new one\n"
 	    "\t[-T time (default: %llu sec)] total run time\n"
 	    "\t[-F freezeloops (default: %llu)] max loops in spa_freeze()\n"
 	    "\t[-P passtime (default: %llu sec)] time per pass\n"
 	    "\t[-B alt_ztest (default: <none>)] alternate ztest path\n"
 	    "\t[-h] (print help)\n"
 	    "",
 	    zo->zo_pool,
 	    (u_longlong_t)zo->zo_vdevs,			/* -v */
 	    nice_vdev_size,				/* -s */
 	    zo->zo_ashift,				/* -a */
 	    zo->zo_mirrors,				/* -m */
 	    zo->zo_raidz,				/* -r */
 	    zo->zo_raidz_parity,			/* -R */
 	    zo->zo_datasets,				/* -d */
 	    zo->zo_threads,				/* -t */
 	    nice_gang_bang,				/* -g */
 	    zo->zo_init,				/* -i */
 	    (u_longlong_t)zo->zo_killrate,		/* -k */
 	    zo->zo_pool,				/* -p */
 	    zo->zo_dir,					/* -f */
 	    (u_longlong_t)zo->zo_time,			/* -T */
 	    (u_longlong_t)zo->zo_maxloops,		/* -F */
 	    (u_longlong_t)zo->zo_passtime);
 	exit(requested ? 0 : 1);
 }
 
 static void
 process_options(int argc, char **argv)
 {
 	char *path;
 	ztest_shared_opts_t *zo = &ztest_opts;
 
 	int opt;
 	uint64_t value;
 	char altdir[MAXNAMELEN] = { 0 };
 
 	bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
 
 	while ((opt = getopt(argc, argv,
 	    "v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:")) != EOF) {
 		value = 0;
 		switch (opt) {
 		case 'v':
 		case 's':
 		case 'a':
 		case 'm':
 		case 'r':
 		case 'R':
 		case 'd':
 		case 't':
 		case 'g':
 		case 'i':
 		case 'k':
 		case 'T':
 		case 'P':
 		case 'F':
 			value = nicenumtoull(optarg);
 		}
 		switch (opt) {
 		case 'v':
 			zo->zo_vdevs = value;
 			break;
 		case 's':
 			zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value);
 			break;
 		case 'a':
 			zo->zo_ashift = value;
 			break;
 		case 'm':
 			zo->zo_mirrors = value;
 			break;
 		case 'r':
 			zo->zo_raidz = MAX(1, value);
 			break;
 		case 'R':
 			zo->zo_raidz_parity = MIN(MAX(value, 1), 3);
 			break;
 		case 'd':
 			zo->zo_datasets = MAX(1, value);
 			break;
 		case 't':
 			zo->zo_threads = MAX(1, value);
 			break;
 		case 'g':
 			zo->zo_metaslab_gang_bang = MAX(SPA_MINBLOCKSIZE << 1,
 			    value);
 			break;
 		case 'i':
 			zo->zo_init = value;
 			break;
 		case 'k':
 			zo->zo_killrate = value;
 			break;
 		case 'p':
 			(void) strlcpy(zo->zo_pool, optarg,
 			    sizeof (zo->zo_pool));
 			break;
 		case 'f':
 			path = realpath(optarg, NULL);
 			if (path == NULL) {
 				(void) fprintf(stderr, "error: %s: %s\n",
 				    optarg, strerror(errno));
 				usage(B_FALSE);
 			} else {
 				(void) strlcpy(zo->zo_dir, path,
 				    sizeof (zo->zo_dir));
 			}
 			break;
 		case 'V':
 			zo->zo_verbose++;
 			break;
 		case 'E':
 			zo->zo_init = 0;
 			break;
 		case 'T':
 			zo->zo_time = value;
 			break;
 		case 'P':
 			zo->zo_passtime = MAX(1, value);
 			break;
 		case 'F':
 			zo->zo_maxloops = MAX(1, value);
 			break;
 		case 'B':
 			(void) strlcpy(altdir, optarg, sizeof (altdir));
 			break;
 		case 'h':
 			usage(B_TRUE);
 			break;
 		case '?':
 		default:
 			usage(B_FALSE);
 			break;
 		}
 	}
 
 	zo->zo_raidz_parity = MIN(zo->zo_raidz_parity, zo->zo_raidz - 1);
 
 	zo->zo_vdevtime =
 	    (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs :
 	    UINT64_MAX >> 2);
 
 	if (strlen(altdir) > 0) {
 		char *cmd;
 		char *realaltdir;
 		char *bin;
 		char *ztest;
 		char *isa;
 		int isalen;
 
 		cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 		realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 		VERIFY(NULL != realpath(getexecname(), cmd));
 		if (0 != access(altdir, F_OK)) {
 			ztest_dump_core = B_FALSE;
 			fatal(B_TRUE, "invalid alternate ztest path: %s",
 			    altdir);
 		}
 		VERIFY(NULL != realpath(altdir, realaltdir));
 
 		/*
 		 * 'cmd' should be of the form "<anything>/usr/bin/<isa>/ztest".
 		 * We want to extract <isa> to determine if we should use
 		 * 32 or 64 bit binaries.
 		 */
 		bin = strstr(cmd, "/usr/bin/");
 		ztest = strstr(bin, "/ztest");
 		isa = bin + 9;
 		isalen = ztest - isa;
 		(void) snprintf(zo->zo_alt_ztest, sizeof (zo->zo_alt_ztest),
 		    "%s/usr/bin/%.*s/ztest", realaltdir, isalen, isa);
 		(void) snprintf(zo->zo_alt_libpath, sizeof (zo->zo_alt_libpath),
 		    "%s/usr/lib/%.*s", realaltdir, isalen, isa);
 
 		if (0 != access(zo->zo_alt_ztest, X_OK)) {
 			ztest_dump_core = B_FALSE;
 			fatal(B_TRUE, "invalid alternate ztest: %s",
 			    zo->zo_alt_ztest);
 		} else if (0 != access(zo->zo_alt_libpath, X_OK)) {
 			ztest_dump_core = B_FALSE;
 			fatal(B_TRUE, "invalid alternate lib directory %s",
 			    zo->zo_alt_libpath);
 		}
 
 		umem_free(cmd, MAXPATHLEN);
 		umem_free(realaltdir, MAXPATHLEN);
 	}
 }
 
 static void
 ztest_kill(ztest_shared_t *zs)
 {
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
 	(void) kill(getpid(), SIGKILL);
 }
 
 static uint64_t
 ztest_random(uint64_t range)
 {
 	uint64_t r;
 
 	ASSERT3S(ztest_fd_rand, >=, 0);
 
 	if (range == 0)
 		return (0);
 
 	if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
 		fatal(1, "short read from /dev/urandom");
 
 	return (r % range);
 }
 
 /* ARGSUSED */
 static void
 ztest_record_enospc(const char *s)
 {
 	ztest_shared->zs_enospc_count++;
 }
 
 static uint64_t
 ztest_get_ashift(void)
 {
 	if (ztest_opts.zo_ashift == 0)
 		return (SPA_MINBLOCKSHIFT + ztest_random(3));
 	return (ztest_opts.zo_ashift);
 }
 
 static nvlist_t *
 make_vdev_file(char *path, char *aux, char *pool, size_t size, uint64_t ashift)
 {
 	char *pathbuf;
 	uint64_t vdev;
 	nvlist_t *file;
 
 	pathbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	if (ashift == 0)
 		ashift = ztest_get_ashift();
 
 	if (path == NULL) {
 		path = pathbuf;
 
 		if (aux != NULL) {
 			vdev = ztest_shared->zs_vdev_aux;
 			(void) snprintf(path, MAXPATHLEN,
 			    ztest_aux_template, ztest_opts.zo_dir,
 			    pool == NULL ? ztest_opts.zo_pool : pool,
 			    aux, vdev);
 		} else {
 			vdev = ztest_shared->zs_vdev_next_leaf++;
 			(void) snprintf(path, MAXPATHLEN,
 			    ztest_dev_template, ztest_opts.zo_dir,
 			    pool == NULL ? ztest_opts.zo_pool : pool, vdev);
 		}
 	}
 
 	if (size != 0) {
 		int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
 		if (fd == -1)
 			fatal(1, "can't open %s", path);
 		if (ftruncate(fd, size) != 0)
 			fatal(1, "can't ftruncate %s", path);
 		(void) close(fd);
 	}
 
 	VERIFY(nvlist_alloc(&file, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_TYPE, VDEV_TYPE_FILE) == 0);
 	VERIFY(nvlist_add_string(file, ZPOOL_CONFIG_PATH, path) == 0);
 	VERIFY(nvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift) == 0);
 	umem_free(pathbuf, MAXPATHLEN);
 
 	return (file);
 }
 
 static nvlist_t *
 make_vdev_raidz(char *path, char *aux, char *pool, size_t size,
     uint64_t ashift, int r)
 {
 	nvlist_t *raidz, **child;
 	int c;
 
 	if (r < 2)
 		return (make_vdev_file(path, aux, pool, size, ashift));
 	child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < r; c++)
 		child[c] = make_vdev_file(path, aux, pool, size, ashift);
 
 	VERIFY(nvlist_alloc(&raidz, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(raidz, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_RAIDZ) == 0);
 	VERIFY(nvlist_add_uint64(raidz, ZPOOL_CONFIG_NPARITY,
 	    ztest_opts.zo_raidz_parity) == 0);
 	VERIFY(nvlist_add_nvlist_array(raidz, ZPOOL_CONFIG_CHILDREN,
 	    child, r) == 0);
 
 	for (c = 0; c < r; c++)
 		nvlist_free(child[c]);
 
 	umem_free(child, r * sizeof (nvlist_t *));
 
 	return (raidz);
 }
 
 static nvlist_t *
 make_vdev_mirror(char *path, char *aux, char *pool, size_t size,
     uint64_t ashift, int r, int m)
 {
 	nvlist_t *mirror, **child;
 	int c;
 
 	if (m < 1)
 		return (make_vdev_raidz(path, aux, pool, size, ashift, r));
 
 	child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < m; c++)
 		child[c] = make_vdev_raidz(path, aux, pool, size, ashift, r);
 
 	VERIFY(nvlist_alloc(&mirror, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(mirror, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_MIRROR) == 0);
 	VERIFY(nvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
 	    child, m) == 0);
 
 	for (c = 0; c < m; c++)
 		nvlist_free(child[c]);
 
 	umem_free(child, m * sizeof (nvlist_t *));
 
 	return (mirror);
 }
 
 static nvlist_t *
 make_vdev_root(char *path, char *aux, char *pool, size_t size, uint64_t ashift,
     int log, int r, int m, int t)
 {
 	nvlist_t *root, **child;
 	int c;
 
 	ASSERT(t > 0);
 
 	child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < t; c++) {
 		child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
 		    r, m);
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    log) == 0);
 	}
 
 	VERIFY(nvlist_alloc(&root, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) == 0);
 	VERIFY(nvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
 	    child, t) == 0);
 
 	for (c = 0; c < t; c++)
 		nvlist_free(child[c]);
 
 	umem_free(child, t * sizeof (nvlist_t *));
 
 	return (root);
 }
 
 /*
  * Find a random spa version. Returns back a random spa version in the
  * range [initial_version, SPA_VERSION_FEATURES].
  */
 static uint64_t
 ztest_random_spa_version(uint64_t initial_version)
 {
 	uint64_t version = initial_version;
 
 	if (version <= SPA_VERSION_BEFORE_FEATURES) {
 		version = version +
 		    ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1);
 	}
 
 	if (version > SPA_VERSION_BEFORE_FEATURES)
 		version = SPA_VERSION_FEATURES;
 
 	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 	return (version);
 }
 
 static int
 ztest_random_blocksize(void)
 {
 	return (1 << (SPA_MINBLOCKSHIFT +
 	    ztest_random(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)));
 }
 
 static int
 ztest_random_ibshift(void)
 {
 	return (DN_MIN_INDBLKSHIFT +
 	    ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
 }
 
 static uint64_t
 ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
 {
 	uint64_t top;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *tvd;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
 
 	do {
 		top = ztest_random(rvd->vdev_children);
 		tvd = rvd->vdev_child[top];
 	} while (tvd->vdev_ishole || (tvd->vdev_islog && !log_ok) ||
 	    tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
 
 	return (top);
 }
 
 static uint64_t
 ztest_random_dsl_prop(zfs_prop_t prop)
 {
 	uint64_t value;
 
 	do {
 		value = zfs_prop_random_value(prop, ztest_random(-1ULL));
 	} while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
 
 	return (value);
 }
 
 static int
 ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
     boolean_t inherit)
 {
 	const char *propname = zfs_prop_to_name(prop);
 	const char *valname;
 	char *setpoint;
 	uint64_t curval;
 	int error;
 
 	error = dsl_prop_set(osname, propname,
 	    (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL),
 	    sizeof (value), 1, &value);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT0(error);
 
 	setpoint = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 	VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
 	    1, &curval, setpoint), ==, 0);
 
 	if (ztest_opts.zo_verbose >= 6) {
 		VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
 		(void) printf("%s %s = %s at '%s'\n",
 		    osname, propname, valname, setpoint);
 	}
 	umem_free(setpoint, MAXPATHLEN);
 
 	return (error);
 }
 
 static int
 ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value)
 {
 	spa_t *spa = ztest_spa;
 	nvlist_t *props = NULL;
 	int error;
 
 	VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
 
 	error = spa_prop_set(spa, props);
 
 	nvlist_free(props);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT0(error);
 
 	return (error);
 }
 
 static void
 ztest_rll_init(rll_t *rll)
 {
 	rll->rll_writer = NULL;
 	rll->rll_readers = 0;
 	mutex_init(&rll->rll_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&rll->rll_cv, NULL, CV_DEFAULT, NULL);
 }
 
 static void
 ztest_rll_destroy(rll_t *rll)
 {
 	ASSERT(rll->rll_writer == NULL);
 	ASSERT(rll->rll_readers == 0);
 	mutex_destroy(&rll->rll_lock);
 	cv_destroy(&rll->rll_cv);
 }
 
 static void
 ztest_rll_lock(rll_t *rll, rl_type_t type)
 {
 	mutex_enter(&rll->rll_lock);
 
 	if (type == RL_READER) {
 		while (rll->rll_writer != NULL)
 			(void) cv_wait(&rll->rll_cv, &rll->rll_lock);
 		rll->rll_readers++;
 	} else {
 		while (rll->rll_writer != NULL || rll->rll_readers)
 			(void) cv_wait(&rll->rll_cv, &rll->rll_lock);
 		rll->rll_writer = curthread;
 	}
 
 	mutex_exit(&rll->rll_lock);
 }
 
 static void
 ztest_rll_unlock(rll_t *rll)
 {
 	mutex_enter(&rll->rll_lock);
 
 	if (rll->rll_writer) {
 		ASSERT(rll->rll_readers == 0);
 		rll->rll_writer = NULL;
 	} else {
 		ASSERT(rll->rll_readers != 0);
 		ASSERT(rll->rll_writer == NULL);
 		rll->rll_readers--;
 	}
 
 	if (rll->rll_writer == NULL && rll->rll_readers == 0)
 		cv_broadcast(&rll->rll_cv);
 
 	mutex_exit(&rll->rll_lock);
 }
 
 static void
 ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
 {
 	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
 
 	ztest_rll_lock(rll, type);
 }
 
 static void
 ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
 {
 	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
 
 	ztest_rll_unlock(rll);
 }
 
 static rl_t *
 ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
     uint64_t size, rl_type_t type)
 {
 	uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
 	rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
 	rl_t *rl;
 
 	rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
 	rl->rl_object = object;
 	rl->rl_offset = offset;
 	rl->rl_size = size;
 	rl->rl_lock = rll;
 
 	ztest_rll_lock(rll, type);
 
 	return (rl);
 }
 
 static void
 ztest_range_unlock(rl_t *rl)
 {
 	rll_t *rll = rl->rl_lock;
 
 	ztest_rll_unlock(rll);
 
 	umem_free(rl, sizeof (*rl));
 }
 
 static void
 ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
 {
 	zd->zd_os = os;
 	zd->zd_zilog = dmu_objset_zil(os);
 	zd->zd_shared = szd;
 	dmu_objset_name(os, zd->zd_name);
 	int l;
 
 	if (zd->zd_shared != NULL)
 		zd->zd_shared->zd_seq = 0;
 
 	rw_init(&zd->zd_zilog_lock, NULL, RW_DEFAULT, NULL);
 	mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
 		ztest_rll_init(&zd->zd_object_lock[l]);
 
 	for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
 		ztest_rll_init(&zd->zd_range_lock[l]);
 }
 
 static void
 ztest_zd_fini(ztest_ds_t *zd)
 {
 	int l;
 
 	mutex_destroy(&zd->zd_dirobj_lock);
 	rw_destroy(&zd->zd_zilog_lock);
 
 	for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
 		ztest_rll_destroy(&zd->zd_object_lock[l]);
 
 	for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
 		ztest_rll_destroy(&zd->zd_range_lock[l]);
 }
 
 #define	TXG_MIGHTWAIT	(ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
 
 static uint64_t
 ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
 {
 	uint64_t txg;
 	int error;
 
 	/*
 	 * Attempt to assign tx to some transaction group.
 	 */
 	error = dmu_tx_assign(tx, txg_how);
 	if (error) {
 		if (error == ERESTART) {
 			ASSERT(txg_how == TXG_NOWAIT);
 			dmu_tx_wait(tx);
 		} else {
 			ASSERT3U(error, ==, ENOSPC);
 			ztest_record_enospc(tag);
 		}
 		dmu_tx_abort(tx);
 		return (0);
 	}
 	txg = dmu_tx_get_txg(tx);
 	ASSERT(txg != 0);
 	return (txg);
 }
 
 static void
 ztest_pattern_set(void *buf, uint64_t size, uint64_t value)
 {
 	uint64_t *ip = buf;
 	uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
 
 	while (ip < ip_end)
 		*ip++ = value;
 }
 
 #ifndef NDEBUG
 static boolean_t
 ztest_pattern_match(void *buf, uint64_t size, uint64_t value)
 {
 	uint64_t *ip = buf;
 	uint64_t *ip_end = (uint64_t *)((uintptr_t)buf + (uintptr_t)size);
 	uint64_t diff = 0;
 
 	while (ip < ip_end)
 		diff |= (value - *ip++);
 
 	return (diff == 0);
 }
 #endif
 
 static void
 ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
     uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
 {
 	bt->bt_magic = BT_MAGIC;
 	bt->bt_objset = dmu_objset_id(os);
 	bt->bt_object = object;
 	bt->bt_offset = offset;
 	bt->bt_gen = gen;
 	bt->bt_txg = txg;
 	bt->bt_crtxg = crtxg;
 }
 
 static void
 ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
     uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
 {
 	ASSERT(bt->bt_magic == BT_MAGIC);
 	ASSERT(bt->bt_objset == dmu_objset_id(os));
 	ASSERT(bt->bt_object == object);
 	ASSERT(bt->bt_offset == offset);
 	ASSERT(bt->bt_gen <= gen);
 	ASSERT(bt->bt_txg <= txg);
 	ASSERT(bt->bt_crtxg == crtxg);
 }
 
 static ztest_block_tag_t *
 ztest_bt_bonus(dmu_buf_t *db)
 {
 	dmu_object_info_t doi;
 	ztest_block_tag_t *bt;
 
 	dmu_object_info_from_db(db, &doi);
 	ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
 	ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
 	bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
 
 	return (bt);
 }
 
 /*
  * ZIL logging ops
  */
 
 #define	lrz_type	lr_mode
 #define	lrz_blocksize	lr_uid
 #define	lrz_ibshift	lr_gid
 #define	lrz_bonustype	lr_rdev
 #define	lrz_bonuslen	lr_crtime[1]
 
 static void
 ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
 {
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	size_t namesize = strlen(name) + 1;
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize);
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) + namesize - sizeof (lr_t));
 
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object)
 {
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	size_t namesize = strlen(name) + 1;
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize);
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) + namesize - sizeof (lr_t));
 
 	itx->itx_oid = object;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
 {
 	itx_t *itx;
 	itx_wr_state_t write_state = ztest_random(WR_NUM_STATES);
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	if (lr->lr_length > ZIL_MAX_LOG_DATA)
 		write_state = WR_INDIRECT;
 
 	itx = zil_itx_create(TX_WRITE,
 	    sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0));
 
 	if (write_state == WR_COPIED &&
 	    dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
 	    ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
 		zil_itx_destroy(itx);
 		itx = zil_itx_create(TX_WRITE, sizeof (*lr));
 		write_state = WR_NEED_COPY;
 	}
 	itx->itx_private = zd;
 	itx->itx_wr_state = write_state;
 	itx->itx_sync = (ztest_random(8) == 0);
 	itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0);
 
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr)
 {
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	itx->itx_sync = B_FALSE;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr)
 {
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_SETATTR, sizeof (*lr));
 	bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	itx->itx_sync = B_FALSE;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 /*
  * ZIL replay ops
  */
 static int
 ztest_replay_create(ztest_ds_t *zd, lr_create_t *lr, boolean_t byteswap)
 {
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	objset_t *os = zd->zd_os;
 	ztest_block_tag_t *bbt;
 	dmu_buf_t *db;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	int error = 0;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ASSERT(lr->lr_doid == ZTEST_DIROBJ);
 	ASSERT(name[0] != '\0');
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name);
 
 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
 	} else {
 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
 	}
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0)
 		return (ENOSPC);
 
 	ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
 
 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
 		if (lr->lr_foid == 0) {
 			lr->lr_foid = zap_create(os,
 			    lr->lrz_type, lr->lrz_bonustype,
 			    lr->lrz_bonuslen, tx);
 		} else {
 			error = zap_create_claim(os, lr->lr_foid,
 			    lr->lrz_type, lr->lrz_bonustype,
 			    lr->lrz_bonuslen, tx);
 		}
 	} else {
 		if (lr->lr_foid == 0) {
 			lr->lr_foid = dmu_object_alloc(os,
 			    lr->lrz_type, 0, lr->lrz_bonustype,
 			    lr->lrz_bonuslen, tx);
 		} else {
 			error = dmu_object_claim(os, lr->lr_foid,
 			    lr->lrz_type, 0, lr->lrz_bonustype,
 			    lr->lrz_bonuslen, tx);
 		}
 	}
 
 	if (error) {
 		ASSERT3U(error, ==, EEXIST);
 		ASSERT(zd->zd_zilog->zl_replay);
 		dmu_tx_commit(tx);
 		return (error);
 	}
 
 	ASSERT(lr->lr_foid != 0);
 
 	if (lr->lrz_type != DMU_OT_ZAP_OTHER)
 		VERIFY3U(0, ==, dmu_object_set_blocksize(os, lr->lr_foid,
 		    lr->lrz_blocksize, lr->lrz_ibshift, tx));
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 	bbt = ztest_bt_bonus(db);
 	dmu_buf_will_dirty(db, tx);
 	ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg);
 	dmu_buf_rele(db, FTAG);
 
 	VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
 	    &lr->lr_foid, tx));
 
 	(void) ztest_log_create(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	return (0);
 }
 
 static int
 ztest_replay_remove(ztest_ds_t *zd, lr_remove_t *lr, boolean_t byteswap)
 {
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	objset_t *os = zd->zd_os;
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
 	uint64_t object, txg;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ASSERT(lr->lr_doid == ZTEST_DIROBJ);
 	ASSERT(name[0] != '\0');
 
 	VERIFY3U(0, ==,
 	    zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
 	ASSERT(object != 0);
 
 	ztest_object_lock(zd, object, RL_WRITER);
 
 	VERIFY3U(0, ==, dmu_object_info(os, object, &doi));
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name);
 	dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		ztest_object_unlock(zd, object);
 		return (ENOSPC);
 	}
 
 	if (doi.doi_type == DMU_OT_ZAP_OTHER) {
 		VERIFY3U(0, ==, zap_destroy(os, object, tx));
 	} else {
 		VERIFY3U(0, ==, dmu_object_free(os, object, tx));
 	}
 
 	VERIFY3U(0, ==, zap_remove(os, lr->lr_doid, name, tx));
 
 	(void) ztest_log_remove(zd, tx, lr, object);
 
 	dmu_tx_commit(tx);
 
 	ztest_object_unlock(zd, object);
 
 	return (0);
 }
 
 static int
 ztest_replay_write(ztest_ds_t *zd, lr_write_t *lr, boolean_t byteswap)
 {
 	objset_t *os = zd->zd_os;
 	void *data = lr + 1;			/* data follows lr */
 	uint64_t offset, length;
 	ztest_block_tag_t *bt = data;
 	ztest_block_tag_t *bbt;
 	uint64_t gen, txg, lrtxg, crtxg;
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
 	dmu_buf_t *db;
 	arc_buf_t *abuf = NULL;
 	rl_t *rl;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	offset = lr->lr_offset;
 	length = lr->lr_length;
 
 	/* If it's a dmu_sync() block, write the whole block */
 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
 		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
 		if (length < blocksize) {
 			offset -= offset % blocksize;
 			length = blocksize;
 		}
 	}
 
 	if (bt->bt_magic == BSWAP_64(BT_MAGIC))
 		byteswap_uint64_array(bt, sizeof (*bt));
 
 	if (bt->bt_magic != BT_MAGIC)
 		bt = NULL;
 
 	ztest_object_lock(zd, lr->lr_foid, RL_READER);
 	rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 
 	dmu_object_info_from_db(db, &doi);
 
 	bbt = ztest_bt_bonus(db);
 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 	gen = bbt->bt_gen;
 	crtxg = bbt->bt_crtxg;
 	lrtxg = lr->lr_common.lrc_txg;
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, lr->lr_foid, offset, length);
 
 	if (ztest_random(8) == 0 && length == doi.doi_data_block_size &&
 	    P2PHASE(offset, length) == 0)
 		abuf = dmu_request_arcbuf(db, length);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		if (abuf != NULL)
 			dmu_return_arcbuf(abuf);
 		dmu_buf_rele(db, FTAG);
 		ztest_range_unlock(rl);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	if (bt != NULL) {
 		/*
 		 * Usually, verify the old data before writing new data --
 		 * but not always, because we also want to verify correct
 		 * behavior when the data was not recently read into cache.
 		 */
 		ASSERT(offset % doi.doi_data_block_size == 0);
 		if (ztest_random(4) != 0) {
 			int prefetch = ztest_random(2) ?
 			    DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
 			ztest_block_tag_t rbt;
 
 			VERIFY(dmu_read(os, lr->lr_foid, offset,
 			    sizeof (rbt), &rbt, prefetch) == 0);
 			if (rbt.bt_magic == BT_MAGIC) {
 				ztest_bt_verify(&rbt, os, lr->lr_foid,
 				    offset, gen, txg, crtxg);
 			}
 		}
 
 		/*
 		 * Writes can appear to be newer than the bonus buffer because
 		 * the ztest_get_data() callback does a dmu_read() of the
 		 * open-context data, which may be different than the data
 		 * as it was when the write was generated.
 		 */
 		if (zd->zd_zilog->zl_replay) {
 			ztest_bt_verify(bt, os, lr->lr_foid, offset,
 			    MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
 			    bt->bt_crtxg);
 		}
 
 		/*
 		 * Set the bt's gen/txg to the bonus buffer's gen/txg
 		 * so that all of the usual ASSERTs will work.
 		 */
 		ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg);
 	}
 
 	if (abuf == NULL) {
 		dmu_write(os, lr->lr_foid, offset, length, data, tx);
 	} else {
 		bcopy(data, abuf->b_data, length);
 		dmu_assign_arcbuf(db, offset, abuf, tx);
 	}
 
 	(void) ztest_log_write(zd, tx, lr);
 
 	dmu_buf_rele(db, FTAG);
 
 	dmu_tx_commit(tx);
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 static int
 ztest_replay_truncate(ztest_ds_t *zd, lr_truncate_t *lr, boolean_t byteswap)
 {
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	rl_t *rl;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ztest_object_lock(zd, lr->lr_foid, RL_READER);
 	rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
 	    RL_WRITER);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		ztest_range_unlock(rl);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	VERIFY(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
 	    lr->lr_length, tx) == 0);
 
 	(void) ztest_log_truncate(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 static int
 ztest_replay_setattr(ztest_ds_t *zd, lr_setattr_t *lr, boolean_t byteswap)
 {
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	dmu_buf_t *db;
 	ztest_block_tag_t *bbt;
 	uint64_t txg, lrtxg, crtxg;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ztest_object_lock(zd, lr->lr_foid, RL_WRITER);
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_bonus(tx, lr->lr_foid);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		dmu_buf_rele(db, FTAG);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	bbt = ztest_bt_bonus(db);
 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 	crtxg = bbt->bt_crtxg;
 	lrtxg = lr->lr_common.lrc_txg;
 
 	if (zd->zd_zilog->zl_replay) {
 		ASSERT(lr->lr_size != 0);
 		ASSERT(lr->lr_mode != 0);
 		ASSERT(lrtxg != 0);
 	} else {
 		/*
 		 * Randomly change the size and increment the generation.
 		 */
 		lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
 		    sizeof (*bbt);
 		lr->lr_mode = bbt->bt_gen + 1;
 		ASSERT(lrtxg == 0);
 	}
 
 	/*
 	 * Verify that the current bonus buffer is not newer than our txg.
 	 */
 	ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode,
 	    MAX(txg, lrtxg), crtxg);
 
 	dmu_buf_will_dirty(db, tx);
 
 	ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
 	ASSERT3U(lr->lr_size, <=, db->db_size);
 	VERIFY0(dmu_set_bonus(db, lr->lr_size, tx));
 	bbt = ztest_bt_bonus(db);
 
 	ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
 
 	dmu_buf_rele(db, FTAG);
 
 	(void) ztest_log_setattr(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 zil_replay_func_t ztest_replay_vector[TX_MAX_TYPE] = {
 	NULL,				/* 0 no such transaction type */
 	(zil_replay_func_t)ztest_replay_create,		/* TX_CREATE */
 	NULL,						/* TX_MKDIR */
 	NULL,						/* TX_MKXATTR */
 	NULL,						/* TX_SYMLINK */
 	(zil_replay_func_t)ztest_replay_remove,		/* TX_REMOVE */
 	NULL,						/* TX_RMDIR */
 	NULL,						/* TX_LINK */
 	NULL,						/* TX_RENAME */
 	(zil_replay_func_t)ztest_replay_write,		/* TX_WRITE */
 	(zil_replay_func_t)ztest_replay_truncate,	/* TX_TRUNCATE */
 	(zil_replay_func_t)ztest_replay_setattr,	/* TX_SETATTR */
 	NULL,						/* TX_ACL */
 	NULL,						/* TX_CREATE_ACL */
 	NULL,						/* TX_CREATE_ATTR */
 	NULL,						/* TX_CREATE_ACL_ATTR */
 	NULL,						/* TX_MKDIR_ACL */
 	NULL,						/* TX_MKDIR_ATTR */
 	NULL,						/* TX_MKDIR_ACL_ATTR */
 	NULL,						/* TX_WRITE2 */
 };
 
 /*
  * ZIL get_data callbacks
  */
 
 static void
 ztest_get_done(zgd_t *zgd, int error)
 {
 	ztest_ds_t *zd = zgd->zgd_private;
 	uint64_t object = zgd->zgd_rl->rl_object;
 
 	if (zgd->zgd_db)
 		dmu_buf_rele(zgd->zgd_db, zgd);
 
 	ztest_range_unlock(zgd->zgd_rl);
 	ztest_object_unlock(zd, object);
 
 	if (error == 0 && zgd->zgd_bp)
 		zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
 
 	umem_free(zgd, sizeof (*zgd));
 }
 
 static int
 ztest_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
 {
 	ztest_ds_t *zd = arg;
 	objset_t *os = zd->zd_os;
 	uint64_t object = lr->lr_foid;
 	uint64_t offset = lr->lr_offset;
 	uint64_t size = lr->lr_length;
 	blkptr_t *bp = &lr->lr_blkptr;
 	uint64_t txg = lr->lr_common.lrc_txg;
 	uint64_t crtxg;
 	dmu_object_info_t doi;
 	dmu_buf_t *db;
 	zgd_t *zgd;
 	int error;
 
 	ztest_object_lock(zd, object, RL_READER);
 	error = dmu_bonus_hold(os, object, FTAG, &db);
 	if (error) {
 		ztest_object_unlock(zd, object);
 		return (error);
 	}
 
 	crtxg = ztest_bt_bonus(db)->bt_crtxg;
 
 	if (crtxg == 0 || crtxg > txg) {
 		dmu_buf_rele(db, FTAG);
 		ztest_object_unlock(zd, object);
 		return (ENOENT);
 	}
 
 	dmu_object_info_from_db(db, &doi);
 	dmu_buf_rele(db, FTAG);
 	db = NULL;
 
 	zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
 	zgd->zgd_zilog = zd->zd_zilog;
 	zgd->zgd_private = zd;
 
 	if (buf != NULL) {	/* immediate write */
 		zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
 		    RL_READER);
 
 		error = dmu_read(os, object, offset, size, buf,
 		    DMU_READ_NO_PREFETCH);
 		ASSERT(error == 0);
 	} else {
 		size = doi.doi_data_block_size;
 		if (ISP2(size)) {
 			offset = P2ALIGN(offset, size);
 		} else {
 			ASSERT(offset < size);
 			offset = 0;
 		}
 
 		zgd->zgd_rl = ztest_range_lock(zd, object, offset, size,
 		    RL_READER);
 
 		error = dmu_buf_hold(os, object, offset, zgd, &db,
 		    DMU_READ_NO_PREFETCH);
 
 		if (error == 0) {
 			zgd->zgd_db = db;
 			zgd->zgd_bp = bp;
 
 			ASSERT(db->db_offset == offset);
 			ASSERT(db->db_size == size);
 
 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
 			    ztest_get_done, zgd);
 
 			if (error == 0)
 				return (0);
 		}
 	}
 
 	ztest_get_done(zgd, error);
 
 	return (error);
 }
 
 static void *
 ztest_lr_alloc(size_t lrsize, char *name)
 {
 	char *lr;
 	size_t namesize = name ? strlen(name) + 1 : 0;
 
 	lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL);
 
 	if (name)
 		bcopy(name, lr + lrsize, namesize);
 
 	return (lr);
 }
 
 void
 ztest_lr_free(void *lr, size_t lrsize, char *name)
 {
 	size_t namesize = name ? strlen(name) + 1 : 0;
 
 	umem_free(lr, lrsize + namesize);
 }
 
 /*
  * Lookup a bunch of objects.  Returns the number of objects not found.
  */
 static int
 ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 	int error;
 	int i;
 
 	ASSERT(mutex_held(&zd->zd_dirobj_lock));
 
 	for (i = 0; i < count; i++, od++) {
 		od->od_object = 0;
 		error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
 		    sizeof (uint64_t), 1, &od->od_object);
 		if (error) {
 			ASSERT(error == ENOENT);
 			ASSERT(od->od_object == 0);
 			missing++;
 		} else {
 			dmu_buf_t *db;
 			ztest_block_tag_t *bbt;
 			dmu_object_info_t doi;
 
 			ASSERT(od->od_object != 0);
 			ASSERT(missing == 0);	/* there should be no gaps */
 
 			ztest_object_lock(zd, od->od_object, RL_READER);
 			VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
 			    od->od_object, FTAG, &db));
 			dmu_object_info_from_db(db, &doi);
 			bbt = ztest_bt_bonus(db);
 			ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 			od->od_type = doi.doi_type;
 			od->od_blocksize = doi.doi_data_block_size;
 			od->od_gen = bbt->bt_gen;
 			dmu_buf_rele(db, FTAG);
 			ztest_object_unlock(zd, od->od_object);
 		}
 	}
 
 	return (missing);
 }
 
 static int
 ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 	int i;
 
 	ASSERT(mutex_held(&zd->zd_dirobj_lock));
 
 	for (i = 0; i < count; i++, od++) {
 		if (missing) {
 			od->od_object = 0;
 			missing++;
 			continue;
 		}
 
 		lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
 
 		lr->lr_doid = od->od_dir;
 		lr->lr_foid = 0;	/* 0 to allocate, > 0 to claim */
 		lr->lrz_type = od->od_crtype;
 		lr->lrz_blocksize = od->od_crblocksize;
 		lr->lrz_ibshift = ztest_random_ibshift();
 		lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
 		lr->lrz_bonuslen = dmu_bonus_max();
 		lr->lr_gen = od->od_crgen;
 		lr->lr_crtime[0] = time(NULL);
 
 		if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
 			ASSERT(missing == 0);
 			od->od_object = 0;
 			missing++;
 		} else {
 			od->od_object = lr->lr_foid;
 			od->od_type = od->od_crtype;
 			od->od_blocksize = od->od_crblocksize;
 			od->od_gen = od->od_crgen;
 			ASSERT(od->od_object != 0);
 		}
 
 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
 	}
 
 	return (missing);
 }
 
 static int
 ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 	int error;
 	int i;
 
 	ASSERT(mutex_held(&zd->zd_dirobj_lock));
 
 	od += count - 1;
 
 	for (i = count - 1; i >= 0; i--, od--) {
 		if (missing) {
 			missing++;
 			continue;
 		}
 
 		if (od->od_object == 0)
 			continue;
 
 		lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
 
 		lr->lr_doid = od->od_dir;
 
 		if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {
 			ASSERT3U(error, ==, ENOSPC);
 			missing++;
 		} else {
 			od->od_object = 0;
 		}
 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
 	}
 
 	return (missing);
 }
 
 static int
 ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size,
     void *data)
 {
 	lr_write_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr) + size, NULL);
 
 	lr->lr_foid = object;
 	lr->lr_offset = offset;
 	lr->lr_length = size;
 	lr->lr_blkoff = 0;
 	BP_ZERO(&lr->lr_blkptr);
 
 	bcopy(data, lr + 1, size);
 
 	error = ztest_replay_write(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr) + size, NULL);
 
 	return (error);
 }
 
 static int
 ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
 {
 	lr_truncate_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr), NULL);
 
 	lr->lr_foid = object;
 	lr->lr_offset = offset;
 	lr->lr_length = size;
 
 	error = ztest_replay_truncate(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr), NULL);
 
 	return (error);
 }
 
 static int
 ztest_setattr(ztest_ds_t *zd, uint64_t object)
 {
 	lr_setattr_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr), NULL);
 
 	lr->lr_foid = object;
 	lr->lr_size = 0;
 	lr->lr_mode = 0;
 
 	error = ztest_replay_setattr(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr), NULL);
 
 	return (error);
 }
 
 static void
 ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
 {
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	rl_t *rl;
 
 	txg_wait_synced(dmu_objset_pool(os), 0);
 
 	ztest_object_lock(zd, object, RL_READER);
 	rl = ztest_range_lock(zd, object, offset, size, RL_WRITER);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, object, offset, size);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 
 	if (txg != 0) {
 		dmu_prealloc(os, object, offset, size, tx);
 		dmu_tx_commit(tx);
 		txg_wait_synced(dmu_objset_pool(os), txg);
 	} else {
 		(void) dmu_free_long_range(os, object, offset, size);
 	}
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, object);
 }
 
 static void
 ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
 {
 	ztest_block_tag_t wbt;
 	dmu_object_info_t doi;
 	enum ztest_io_type io_type;
 	uint64_t blocksize;
 	void *data;
 
 	VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
 	blocksize = doi.doi_data_block_size;
 	data = umem_alloc(blocksize, UMEM_NOFAIL);
 
 	/*
 	 * Pick an i/o type at random, biased toward writing block tags.
 	 */
 	io_type = ztest_random(ZTEST_IO_TYPES);
 	if (ztest_random(2) == 0)
 		io_type = ZTEST_IO_WRITE_TAG;
 
 	(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
 
 	switch (io_type) {
 
 	case ZTEST_IO_WRITE_TAG:
 		ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
 		(void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
 		break;
 
 	case ZTEST_IO_WRITE_PATTERN:
 		(void) memset(data, 'a' + (object + offset) % 5, blocksize);
 		if (ztest_random(2) == 0) {
 			/*
 			 * Induce fletcher2 collisions to ensure that
 			 * zio_ddt_collision() detects and resolves them
 			 * when using fletcher2-verify for deduplication.
 			 */
 			((uint64_t *)data)[0] ^= 1ULL << 63;
 			((uint64_t *)data)[4] ^= 1ULL << 63;
 		}
 		(void) ztest_write(zd, object, offset, blocksize, data);
 		break;
 
 	case ZTEST_IO_WRITE_ZEROES:
 		bzero(data, blocksize);
 		(void) ztest_write(zd, object, offset, blocksize, data);
 		break;
 
 	case ZTEST_IO_TRUNCATE:
 		(void) ztest_truncate(zd, object, offset, blocksize);
 		break;
 
 	case ZTEST_IO_SETATTR:
 		(void) ztest_setattr(zd, object);
 		break;
 	default:
 		break;
 	}
 
 	(void) rw_exit(&zd->zd_zilog_lock);
 
 	umem_free(data, blocksize);
 }
 
 /*
  * Initialize an object description template.
  */
 static void
 ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
     dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
 {
 	od->od_dir = ZTEST_DIROBJ;
 	od->od_object = 0;
 
 	od->od_crtype = type;
 	od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
 	od->od_crgen = gen;
 
 	od->od_type = DMU_OT_NONE;
 	od->od_blocksize = 0;
 	od->od_gen = 0;
 
 	(void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]",
 	    tag, (longlong_t)id, (u_longlong_t)index);
 }
 
 /*
  * Lookup or create the objects for a test using the od template.
  * If the objects do not all exist, or if 'remove' is specified,
  * remove any existing objects and create new ones.  Otherwise,
  * use the existing objects.
  */
 static int
 ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
 {
 	int count = size / sizeof (*od);
 	int rv = 0;
 
 	mutex_enter(&zd->zd_dirobj_lock);
 	if ((ztest_lookup(zd, od, count) != 0 || remove) &&
 	    (ztest_remove(zd, od, count) != 0 ||
 	    ztest_create(zd, od, count) != 0))
 		rv = -1;
 	zd->zd_od = od;
 	mutex_exit(&zd->zd_dirobj_lock);
 
 	return (rv);
 }
 
 /* ARGSUSED */
 void
 ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
 {
 	zilog_t *zilog = zd->zd_zilog;
 
 	(void) rw_enter(&zd->zd_zilog_lock, RW_READER);
 
 	zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
 
 	/*
 	 * Remember the committed values in zd, which is in parent/child
 	 * shared memory.  If we die, the next iteration of ztest_run()
 	 * will verify that the log really does contain this record.
 	 */
 	mutex_enter(&zilog->zl_lock);
 	ASSERT(zd->zd_shared != NULL);
 	ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
 	zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
 	mutex_exit(&zilog->zl_lock);
 
 	(void) rw_exit(&zd->zd_zilog_lock);
 }
 
 /*
  * This function is designed to simulate the operations that occur during a
  * mount/unmount operation.  We hold the dataset across these operations in an
  * attempt to expose any implicit assumptions about ZIL management.
  */
 /* ARGSUSED */
 void
 ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 
 	mutex_enter(&zd->zd_dirobj_lock);
 	(void) rw_enter(&zd->zd_zilog_lock, RW_WRITER);
 
 	/* zfs_sb_teardown() */
 	zil_close(zd->zd_zilog);
 
 	/* zfsvfs_setup() */
 	VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
 	zil_replay(os, zd, ztest_replay_vector);
 
 	(void) rw_exit(&zd->zd_zilog_lock);
 	mutex_exit(&zd->zd_dirobj_lock);
 }
 
 /*
  * Verify that we can't destroy an active pool, create an existing pool,
  * or create a pool with a bad vdev spec.
  */
 /* ARGSUSED */
 void
 ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_opts_t *zo = &ztest_opts;
 	spa_t *spa;
 	nvlist_t *nvroot;
 
 	/*
 	 * Attempt to create using a bad file.
 	 */
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
 	VERIFY3U(ENOENT, ==,
-	    spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
+	    spa_create("ztest_bad_file", nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 
 	/*
 	 * Attempt to create using a bad mirror.
 	 */
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
 	VERIFY3U(ENOENT, ==,
-	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
+	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 
 	/*
 	 * Attempt to create an existing pool.  It shouldn't matter
 	 * what's in the nvroot; we should fail with EEXIST.
 	 */
 	(void) rw_enter(&ztest_name_lock, RW_READER);
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
-	VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
+	VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
 	nvlist_free(nvroot);
 	VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
 	VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
 	spa_close(spa, FTAG);
 
 	(void) rw_exit(&ztest_name_lock);
 }
 
 /* ARGSUSED */
 void
 ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa;
 	uint64_t initial_version = SPA_VERSION_INITIAL;
 	uint64_t version, newversion;
 	nvlist_t *nvroot, *props;
 	char *name;
 
 	mutex_enter(&ztest_vdev_lock);
 	name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
 
 	/*
 	 * Clean up from previous runs.
 	 */
 	(void) spa_destroy(name);
 
 	nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
 	    0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
 
 	/*
 	 * If we're configuring a RAIDZ device then make sure that the
 	 * the initial version is capable of supporting that feature.
 	 */
 	switch (ztest_opts.zo_raidz_parity) {
 	case 0:
 	case 1:
 		initial_version = SPA_VERSION_INITIAL;
 		break;
 	case 2:
 		initial_version = SPA_VERSION_RAIDZ2;
 		break;
 	case 3:
 		initial_version = SPA_VERSION_RAIDZ3;
 		break;
 	}
 
 	/*
 	 * Create a pool with a spa version that can be upgraded. Pick
 	 * a value between initial_version and SPA_VERSION_BEFORE_FEATURES.
 	 */
 	do {
 		version = ztest_random_spa_version(initial_version);
 	} while (version > SPA_VERSION_BEFORE_FEATURES);
 
 	props = fnvlist_alloc();
 	fnvlist_add_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
-	VERIFY3S(spa_create(name, nvroot, props, NULL, NULL), ==, 0);
+	VERIFY3S(spa_create(name, nvroot, props, NULL), ==, 0);
 	fnvlist_free(nvroot);
 	fnvlist_free(props);
 
 	VERIFY3S(spa_open(name, &spa, FTAG), ==, 0);
 	VERIFY3U(spa_version(spa), ==, version);
 	newversion = ztest_random_spa_version(version + 1);
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("upgrading spa version from %llu to %llu\n",
 		    (u_longlong_t)version, (u_longlong_t)newversion);
 	}
 
 	spa_upgrade(spa, newversion);
 	VERIFY3U(spa_version(spa), >, version);
 	VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION)));
 	spa_close(spa, FTAG);
 
 	strfree(name);
 	mutex_exit(&ztest_vdev_lock);
 }
 
 static vdev_t *
 vdev_lookup_by_path(vdev_t *vd, const char *path)
 {
 	vdev_t *mvd;
 	int c;
 
 	if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
 		return (vd);
 
 	for (c = 0; c < vd->vdev_children; c++)
 		if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
 		    NULL)
 			return (mvd);
 
 	return (NULL);
 }
 
 /*
  * Find the first available hole which can be used as a top-level.
  */
 int
 find_vdev_hole(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	int c;
 
 	ASSERT(spa_config_held(spa, SCL_VDEV, RW_READER) == SCL_VDEV);
 
 	for (c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *cvd = rvd->vdev_child[c];
 
 		if (cvd->vdev_ishole)
 			break;
 	}
 	return (c);
 }
 
 /*
  * Verify that vdev_add() works as expected.
  */
 /* ARGSUSED */
 void
 ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	uint64_t leaves;
 	uint64_t guid;
 	nvlist_t *nvroot;
 	int error;
 
 	mutex_enter(&ztest_vdev_lock);
 	leaves =
 		MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
 
 	/*
 	 * If we have slogs then remove them 1/4 of the time.
 	 */
 	if (spa_has_slogs(spa) && ztest_random(4) == 0) {
 		/*
 		 * Grab the guid from the head of the log class rotor.
 		 */
 		guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid;
 
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 
 		/*
 		 * We have to grab the zs_name_lock as writer to
 		 * prevent a race between removing a slog (dmu_objset_find)
 		 * and destroying a dataset. Removing the slog will
 		 * grab a reference on the dataset which may cause
 		 * dmu_objset_destroy() to fail with EBUSY thus
 		 * leaving the dataset in an inconsistent state.
 		 */
 		rw_enter(&ztest_name_lock, RW_WRITER);
 		error = spa_vdev_remove(spa, guid, B_FALSE);
 		rw_exit(&ztest_name_lock);
 
 		if (error && error != EEXIST)
 			fatal(0, "spa_vdev_remove() = %d", error);
 	} else {
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 
 		/*
 		 * Make 1/4 of the devices be log devices.
 		 */
 		nvroot = make_vdev_root(NULL, NULL, NULL,
 		    ztest_opts.zo_vdev_size, 0,
 		    ztest_random(4) == 0, ztest_opts.zo_raidz,
 		    zs->zs_mirrors, 1);
 
 		error = spa_vdev_add(spa, nvroot);
 		nvlist_free(nvroot);
 
 		if (error == ENOSPC)
 			ztest_record_enospc("spa_vdev_add");
 		else if (error != 0)
 			fatal(0, "spa_vdev_add() = %d", error);
 	}
 
 	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
  */
 /* ARGSUSED */
 void
 ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	spa_aux_vdev_t *sav;
 	char *aux;
 	char *path;
 	uint64_t guid = 0;
 	int error;
 
 	path = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	if (ztest_random(2) == 0) {
 		sav = &spa->spa_spares;
 		aux = ZPOOL_CONFIG_SPARES;
 	} else {
 		sav = &spa->spa_l2cache;
 		aux = ZPOOL_CONFIG_L2CACHE;
 	}
 
 	mutex_enter(&ztest_vdev_lock);
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	if (sav->sav_count != 0 && ztest_random(4) == 0) {
 		/*
 		 * Pick a random device to remove.
 		 */
 		guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
 	} else {
 		/*
 		 * Find an unused device we can add.
 		 */
 		zs->zs_vdev_aux = 0;
 		for (;;) {
 			int c;
 			(void) snprintf(path, MAXPATHLEN, ztest_aux_template,
 			    ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
 			    zs->zs_vdev_aux);
 			for (c = 0; c < sav->sav_count; c++)
 				if (strcmp(sav->sav_vdevs[c]->vdev_path,
 				    path) == 0)
 					break;
 			if (c == sav->sav_count &&
 			    vdev_lookup_by_path(rvd, path) == NULL)
 				break;
 			zs->zs_vdev_aux++;
 		}
 	}
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	if (guid == 0) {
 		/*
 		 * Add a new device.
 		 */
 		nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
 		    (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
 		error = spa_vdev_add(spa, nvroot);
 		if (error != 0)
 			fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
 		nvlist_free(nvroot);
 	} else {
 		/*
 		 * Remove an existing device.  Sometimes, dirty its
 		 * vdev state first to make sure we handle removal
 		 * of devices that have pending state changes.
 		 */
 		if (ztest_random(2) == 0)
 			(void) vdev_online(spa, guid, 0, NULL);
 
 		error = spa_vdev_remove(spa, guid, B_FALSE);
 		if (error != 0 && error != EBUSY)
 			fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
 	}
 
 	mutex_exit(&ztest_vdev_lock);
 
 	umem_free(path, MAXPATHLEN);
 }
 
 /*
  * split a pool if it has mirror tlvdevs
  */
 /* ARGSUSED */
 void
 ztest_split_pool(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	nvlist_t *tree, **child, *config, *split, **schild;
 	uint_t c, children, schildren = 0, lastlogid = 0;
 	int error = 0;
 
 	mutex_enter(&ztest_vdev_lock);
 
 	/* ensure we have a useable config; mirrors of raidz aren't supported */
 	if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) {
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/* clean up the old pool, if any */
 	(void) spa_destroy("splitp");
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	/* generate a config from the existing config */
 	mutex_enter(&spa->spa_props_lock);
 	VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &tree) == 0);
 	mutex_exit(&spa->spa_props_lock);
 
 	VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) == 0);
 
 	schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
 	for (c = 0; c < children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		nvlist_t **mchild;
 		uint_t mchildren;
 
 		if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) {
 			VERIFY(nvlist_alloc(&schild[schildren], NV_UNIQUE_NAME,
 			    0) == 0);
 			VERIFY(nvlist_add_string(schild[schildren],
 			    ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE) == 0);
 			VERIFY(nvlist_add_uint64(schild[schildren],
 			    ZPOOL_CONFIG_IS_HOLE, 1) == 0);
 			if (lastlogid == 0)
 				lastlogid = schildren;
 			++schildren;
 			continue;
 		}
 		lastlogid = 0;
 		VERIFY(nvlist_lookup_nvlist_array(child[c],
 		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
 		VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0);
 	}
 
 	/* OK, create a config that can be used to split */
 	VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_ROOT) == 0);
 	VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
 	    lastlogid != 0 ? lastlogid : schildren) == 0);
 
 	VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0);
 
 	for (c = 0; c < schildren; c++)
 		nvlist_free(schild[c]);
 	free(schild);
 	nvlist_free(split);
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	(void) rw_enter(&ztest_name_lock, RW_WRITER);
 	error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
 	(void) rw_exit(&ztest_name_lock);
 
 	nvlist_free(config);
 
 	if (error == 0) {
 		(void) printf("successful split - results:\n");
 		mutex_enter(&spa_namespace_lock);
 		show_pool_stats(spa);
 		show_pool_stats(spa_lookup("splitp"));
 		mutex_exit(&spa_namespace_lock);
 		++zs->zs_splits;
 		--zs->zs_mirrors;
 	}
 	mutex_exit(&ztest_vdev_lock);
 
 }
 
 /*
  * Verify that we can attach and detach devices.
  */
 /* ARGSUSED */
 void
 ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	spa_aux_vdev_t *sav = &spa->spa_spares;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *oldvd, *newvd, *pvd;
 	nvlist_t *root;
 	uint64_t leaves;
 	uint64_t leaf, top;
 	uint64_t ashift = ztest_get_ashift();
 	uint64_t oldguid, pguid;
 	size_t oldsize, newsize;
 	char *oldpath, *newpath;
 	int replacing;
 	int oldvd_has_siblings = B_FALSE;
 	int newvd_is_spare = B_FALSE;
 	int oldvd_is_log;
 	int error, expected_error;
 
 	oldpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 	newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	mutex_enter(&ztest_vdev_lock);
 	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	/*
 	 * Decide whether to do an attach or a replace.
 	 */
 	replacing = ztest_random(2);
 
 	/*
 	 * Pick a random top-level vdev.
 	 */
 	top = ztest_random_vdev_top(spa, B_TRUE);
 
 	/*
 	 * Pick a random leaf within it.
 	 */
 	leaf = ztest_random(leaves);
 
 	/*
 	 * Locate this vdev.
 	 */
 	oldvd = rvd->vdev_child[top];
 	if (zs->zs_mirrors >= 1) {
 		ASSERT(oldvd->vdev_ops == &vdev_mirror_ops);
 		ASSERT(oldvd->vdev_children >= zs->zs_mirrors);
 		oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raidz];
 	}
 	if (ztest_opts.zo_raidz > 1) {
 		ASSERT(oldvd->vdev_ops == &vdev_raidz_ops);
 		ASSERT(oldvd->vdev_children == ztest_opts.zo_raidz);
 		oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raidz];
 	}
 
 	/*
 	 * If we're already doing an attach or replace, oldvd may be a
 	 * mirror vdev -- in which case, pick a random child.
 	 */
 	while (oldvd->vdev_children != 0) {
 		oldvd_has_siblings = B_TRUE;
 		ASSERT(oldvd->vdev_children >= 2);
 		oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
 	}
 
 	oldguid = oldvd->vdev_guid;
 	oldsize = vdev_get_min_asize(oldvd);
 	oldvd_is_log = oldvd->vdev_top->vdev_islog;
 	(void) strcpy(oldpath, oldvd->vdev_path);
 	pvd = oldvd->vdev_parent;
 	pguid = pvd->vdev_guid;
 
 	/*
 	 * If oldvd has siblings, then half of the time, detach it.
 	 */
 	if (oldvd_has_siblings && ztest_random(2) == 0) {
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 		error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
 		if (error != 0 && error != ENODEV && error != EBUSY &&
 		    error != ENOTSUP)
 			fatal(0, "detach (%s) returned %d", oldpath, error);
 		goto out;
 	}
 
 	/*
 	 * For the new vdev, choose with equal probability between the two
 	 * standard paths (ending in either 'a' or 'b') or a random hot spare.
 	 */
 	if (sav->sav_count != 0 && ztest_random(3) == 0) {
 		newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		newvd_is_spare = B_TRUE;
 		(void) strcpy(newpath, newvd->vdev_path);
 	} else {
 		(void) snprintf(newpath, MAXPATHLEN, ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + leaf);
 		if (ztest_random(2) == 0)
 			newpath[strlen(newpath) - 1] = 'b';
 		newvd = vdev_lookup_by_path(rvd, newpath);
 	}
 
 	if (newvd) {
 		newsize = vdev_get_min_asize(newvd);
 	} else {
 		/*
 		 * Make newsize a little bigger or smaller than oldsize.
 		 * If it's smaller, the attach should fail.
 		 * If it's larger, and we're doing a replace,
 		 * we should get dynamic LUN growth when we're done.
 		 */
 		newsize = 10 * oldsize / (9 + ztest_random(3));
 	}
 
 	/*
 	 * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
 	 * unless it's a replace; in that case any non-replacing parent is OK.
 	 *
 	 * If newvd is already part of the pool, it should fail with EBUSY.
 	 *
 	 * If newvd is too small, it should fail with EOVERFLOW.
 	 */
 	if (pvd->vdev_ops != &vdev_mirror_ops &&
 	    pvd->vdev_ops != &vdev_root_ops && (!replacing ||
 	    pvd->vdev_ops == &vdev_replacing_ops ||
 	    pvd->vdev_ops == &vdev_spare_ops))
 		expected_error = ENOTSUP;
 	else if (newvd_is_spare && (!replacing || oldvd_is_log))
 		expected_error = ENOTSUP;
 	else if (newvd == oldvd)
 		expected_error = replacing ? 0 : EBUSY;
 	else if (vdev_lookup_by_path(rvd, newpath) != NULL)
 		expected_error = EBUSY;
 	else if (newsize < oldsize)
 		expected_error = EOVERFLOW;
 	else if (ashift > oldvd->vdev_top->vdev_ashift)
 		expected_error = EDOM;
 	else
 		expected_error = 0;
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	/*
 	 * Build the nvlist describing newpath.
 	 */
 	root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
 	    ashift, 0, 0, 0, 1);
 
 	error = spa_vdev_attach(spa, oldguid, root, replacing);
 
 	nvlist_free(root);
 
 	/*
 	 * If our parent was the replacing vdev, but the replace completed,
 	 * then instead of failing with ENOTSUP we may either succeed,
 	 * fail with ENODEV, or fail with EOVERFLOW.
 	 */
 	if (expected_error == ENOTSUP &&
 	    (error == 0 || error == ENODEV || error == EOVERFLOW))
 		expected_error = error;
 
 	/*
 	 * If someone grew the LUN, the replacement may be too small.
 	 */
 	if (error == EOVERFLOW || error == EBUSY)
 		expected_error = error;
 
 	/* XXX workaround 6690467 */
 	if (error != expected_error && expected_error != EBUSY) {
 		fatal(0, "attach (%s %llu, %s %llu, %d) "
 		    "returned %d, expected %d",
 		    oldpath, (longlong_t)oldsize, newpath,
 		    (longlong_t)newsize, replacing, error, expected_error);
 	}
 out:
 	mutex_exit(&ztest_vdev_lock);
 
 	umem_free(oldpath, MAXPATHLEN);
 	umem_free(newpath, MAXPATHLEN);
 }
 
 /*
  * Callback function which expands the physical size of the vdev.
  */
 vdev_t *
 grow_vdev(vdev_t *vd, void *arg)
 {
 	ASSERTV(spa_t *spa = vd->vdev_spa);
 	size_t *newsize = arg;
 	size_t fsize;
 	int fd;
 
 	ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
 		return (vd);
 
 	fsize = lseek(fd, 0, SEEK_END);
 	VERIFY(ftruncate(fd, *newsize) == 0);
 
 	if (ztest_opts.zo_verbose >= 6) {
 		(void) printf("%s grew from %lu to %lu bytes\n",
 		    vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize);
 	}
 	(void) close(fd);
 	return (NULL);
 }
 
 /*
  * Callback function which expands a given vdev by calling vdev_online().
  */
 /* ARGSUSED */
 vdev_t *
 online_vdev(vdev_t *vd, void *arg)
 {
 	spa_t *spa = vd->vdev_spa;
 	vdev_t *tvd = vd->vdev_top;
 	uint64_t guid = vd->vdev_guid;
 	uint64_t generation = spa->spa_config_generation + 1;
 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 	int error;
 
 	ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	/* Calling vdev_online will initialize the new metaslabs */
 	spa_config_exit(spa, SCL_STATE, spa);
 	error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate);
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	/*
 	 * If vdev_online returned an error or the underlying vdev_open
 	 * failed then we abort the expand. The only way to know that
 	 * vdev_open fails is by checking the returned newstate.
 	 */
 	if (error || newstate != VDEV_STATE_HEALTHY) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Unable to expand vdev, state %llu, "
 			    "error %d\n", (u_longlong_t)newstate, error);
 		}
 		return (vd);
 	}
 	ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY);
 
 	/*
 	 * Since we dropped the lock we need to ensure that we're
 	 * still talking to the original vdev. It's possible this
 	 * vdev may have been detached/replaced while we were
 	 * trying to online it.
 	 */
 	if (generation != spa->spa_config_generation) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("vdev configuration has changed, "
 			    "guid %llu, state %llu, expected gen %llu, "
 			    "got gen %llu\n",
 			    (u_longlong_t)guid,
 			    (u_longlong_t)tvd->vdev_state,
 			    (u_longlong_t)generation,
 			    (u_longlong_t)spa->spa_config_generation);
 		}
 		return (vd);
 	}
 	return (NULL);
 }
 
 /*
  * Traverse the vdev tree calling the supplied function.
  * We continue to walk the tree until we either have walked all
  * children or we receive a non-NULL return from the callback.
  * If a NULL callback is passed, then we just return back the first
  * leaf vdev we encounter.
  */
 vdev_t *
 vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
 {
 	uint_t c;
 
 	if (vd->vdev_ops->vdev_op_leaf) {
 		if (func == NULL)
 			return (vd);
 		else
 			return (func(vd, arg));
 	}
 
 	for (c = 0; c < vd->vdev_children; c++) {
 		vdev_t *cvd = vd->vdev_child[c];
 		if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
 			return (cvd);
 	}
 	return (NULL);
 }
 
 /*
  * Verify that dynamic LUN growth works as expected.
  */
 /* ARGSUSED */
 void
 ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa = ztest_spa;
 	vdev_t *vd, *tvd;
 	metaslab_class_t *mc;
 	metaslab_group_t *mg;
 	size_t psize, newsize;
 	uint64_t top;
 	uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
 
 	mutex_enter(&ztest_vdev_lock);
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	top = ztest_random_vdev_top(spa, B_TRUE);
 
 	tvd = spa->spa_root_vdev->vdev_child[top];
 	mg = tvd->vdev_mg;
 	mc = mg->mg_class;
 	old_ms_count = tvd->vdev_ms_count;
 	old_class_space = metaslab_class_get_space(mc);
 
 	/*
 	 * Determine the size of the first leaf vdev associated with
 	 * our top-level device.
 	 */
 	vd = vdev_walk_tree(tvd, NULL, NULL);
 	ASSERT3P(vd, !=, NULL);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	psize = vd->vdev_psize;
 
 	/*
 	 * We only try to expand the vdev if it's healthy, less than 4x its
 	 * original size, and it has a valid psize.
 	 */
 	if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
 	    psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) {
 		spa_config_exit(spa, SCL_STATE, spa);
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 	ASSERT(psize > 0);
 	newsize = psize + psize / 8;
 	ASSERT3U(newsize, >, psize);
 
 	if (ztest_opts.zo_verbose >= 6) {
 		(void) printf("Expanding LUN %s from %lu to %lu\n",
 		    vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
 	}
 
 	/*
 	 * Growing the vdev is a two step process:
 	 *	1). expand the physical size (i.e. relabel)
 	 *	2). online the vdev to create the new metaslabs
 	 */
 	if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
 	    vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
 	    tvd->vdev_state != VDEV_STATE_HEALTHY) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Could not expand LUN because "
 			    "the vdev configuration changed.\n");
 		}
 		spa_config_exit(spa, SCL_STATE, spa);
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	spa_config_exit(spa, SCL_STATE, spa);
 
 	/*
 	 * Expanding the LUN will update the config asynchronously,
 	 * thus we must wait for the async thread to complete any
 	 * pending tasks before proceeding.
 	 */
 	for (;;) {
 		boolean_t done;
 		mutex_enter(&spa->spa_async_lock);
 		done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks);
 		mutex_exit(&spa->spa_async_lock);
 		if (done)
 			break;
 		txg_wait_synced(spa_get_dsl(spa), 0);
 		(void) poll(NULL, 0, 100);
 	}
 
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	tvd = spa->spa_root_vdev->vdev_child[top];
 	new_ms_count = tvd->vdev_ms_count;
 	new_class_space = metaslab_class_get_space(mc);
 
 	if (tvd->vdev_mg != mg || mg->mg_class != mc) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Could not verify LUN expansion due to "
 			    "intervening vdev offline or remove.\n");
 		}
 		spa_config_exit(spa, SCL_STATE, spa);
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/*
 	 * Make sure we were able to grow the vdev.
 	 */
 	if (new_ms_count <= old_ms_count)
 		fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n",
 		    old_ms_count, new_ms_count);
 
 	/*
 	 * Make sure we were able to grow the pool.
 	 */
 	if (new_class_space <= old_class_space)
 		fatal(0, "LUN expansion failed: class_space %llu <= %llu\n",
 		    old_class_space, new_class_space);
 
 	if (ztest_opts.zo_verbose >= 5) {
 		char oldnumbuf[6], newnumbuf[6];
 
 		nicenum(old_class_space, oldnumbuf);
 		nicenum(new_class_space, newnumbuf);
 		(void) printf("%s grew from %s to %s\n",
 		    spa->spa_name, oldnumbuf, newnumbuf);
 	}
 
 	spa_config_exit(spa, SCL_STATE, spa);
 	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Verify that dmu_objset_{create,destroy,open,close} work as expected.
  */
 /* ARGSUSED */
 static void
 ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 {
 	/*
 	 * Create the objects common to all ztest datasets.
 	 */
 	VERIFY(zap_create_claim(os, ZTEST_DIROBJ,
 	    DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
 }
 
 static int
 ztest_dataset_create(char *dsname)
 {
 	uint64_t zilset = ztest_random(100);
 	int err = dmu_objset_create(dsname, DMU_OST_OTHER, 0,
 	    ztest_objset_create_cb, NULL);
 
 	if (err || zilset < 80)
 		return (err);
 
 	if (ztest_opts.zo_verbose >= 5)
 		(void) printf("Setting dataset %s to sync always\n", dsname);
 	return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC,
 	    ZFS_SYNC_ALWAYS, B_FALSE));
 }
 
 /* ARGSUSED */
 static int
 ztest_objset_destroy_cb(const char *name, void *arg)
 {
 	objset_t *os;
 	dmu_object_info_t doi;
 	int error;
 
 	/*
 	 * Verify that the dataset contains a directory object.
 	 */
 	VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os));
 	error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
 	if (error != ENOENT) {
 		/* We could have crashed in the middle of destroying it */
 		ASSERT0(error);
 		ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
 		ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
 	}
 	dmu_objset_rele(os, FTAG);
 
 	/*
 	 * Destroy the dataset.
 	 */
 	VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE));
 	return (0);
 }
 
 static boolean_t
 ztest_snapshot_create(char *osname, uint64_t id)
 {
 	char snapname[MAXNAMELEN];
 	int error;
 
 	(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
 	    (u_longlong_t)id);
 
-	error = dmu_objset_snapshot(osname, strchr(snapname, '@') + 1,
-	    NULL, NULL, B_FALSE, B_FALSE, -1);
+	error = dmu_objset_snapshot_one(osname, strchr(snapname, '@') + 1);
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (B_FALSE);
 	}
 	if (error != 0 && error != EEXIST)
 		fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error);
 	return (B_TRUE);
 }
 
 static boolean_t
 ztest_snapshot_destroy(char *osname, uint64_t id)
 {
 	char snapname[MAXNAMELEN];
 	int error;
 
 	(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
 	    (u_longlong_t)id);
 
 	error = dmu_objset_destroy(snapname, B_FALSE);
 	if (error != 0 && error != ENOENT)
 		fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
 	return (B_TRUE);
 }
 
 /* ARGSUSED */
 void
 ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_ds_t *zdtmp;
 	int iters;
 	int error;
 	objset_t *os, *os2;
 	char *name;
 	zilog_t *zilog;
 	int i;
 
 	zdtmp = umem_alloc(sizeof (ztest_ds_t), UMEM_NOFAIL);
 	name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 
 	(void) rw_enter(&ztest_name_lock, RW_READER);
 
 	(void) snprintf(name, MAXNAMELEN, "%s/temp_%llu",
 	    ztest_opts.zo_pool, (u_longlong_t)id);
 
 	/*
 	 * If this dataset exists from a previous run, process its replay log
 	 * half of the time.  If we don't replay it, then dmu_objset_destroy()
 	 * (invoked from ztest_objset_destroy_cb()) should just throw it away.
 	 */
 	if (ztest_random(2) == 0 &&
 	    dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
 		ztest_zd_init(zdtmp, NULL, os);
 		zil_replay(os, zdtmp, ztest_replay_vector);
 		ztest_zd_fini(zdtmp);
 		dmu_objset_disown(os, FTAG);
 	}
 
 	/*
 	 * There may be an old instance of the dataset we're about to
 	 * create lying around from a previous run.  If so, destroy it
 	 * and all of its snapshots.
 	 */
 	(void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
 	    DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
 
 	/*
 	 * Verify that the destroyed dataset is no longer in the namespace.
 	 */
 	VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os));
 
 	/*
 	 * Verify that we can create a new dataset.
 	 */
 	error = ztest_dataset_create(name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_objset_create(%s) = %d", name, error);
 	}
 
 	VERIFY3U(0, ==,
 	    dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
 
 	ztest_zd_init(zdtmp, NULL, os);
 
 	/*
 	 * Open the intent log for it.
 	 */
 	zilog = zil_open(os, ztest_get_data);
 
 	/*
 	 * Put some objects in there, do a little I/O to them,
 	 * and randomly take a couple of snapshots along the way.
 	 */
 	iters = ztest_random(5);
 	for (i = 0; i < iters; i++) {
 		ztest_dmu_object_alloc_free(zdtmp, id);
 		if (ztest_random(iters) == 0)
 			(void) ztest_snapshot_create(name, i);
 	}
 
 	/*
 	 * Verify that we cannot create an existing dataset.
 	 */
 	VERIFY3U(EEXIST, ==,
 	    dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL));
 
 	/*
 	 * Verify that we can hold an objset that is also owned.
 	 */
 	VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2));
 	dmu_objset_rele(os2, FTAG);
 
 	/*
 	 * Verify that we cannot own an objset that is already owned.
 	 */
 	VERIFY3U(EBUSY, ==,
 	    dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2));
 
 	zil_close(zilog);
 	dmu_objset_disown(os, FTAG);
 	ztest_zd_fini(zdtmp);
 out:
 	(void) rw_exit(&ztest_name_lock);
 
 	umem_free(name, MAXNAMELEN);
 	umem_free(zdtmp, sizeof (ztest_ds_t));
 }
 
 /*
  * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
  */
 void
 ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
 	(void) rw_enter(&ztest_name_lock, RW_READER);
 	(void) ztest_snapshot_destroy(zd->zd_name, id);
 	(void) ztest_snapshot_create(zd->zd_name, id);
 	(void) rw_exit(&ztest_name_lock);
 }
 
 /*
  * Cleanup non-standard snapshots and clones.
  */
 void
 ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
 {
 	char *snap1name;
 	char *clone1name;
 	char *snap2name;
 	char *clone2name;
 	char *snap3name;
 	int error;
 
 	snap1name  = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	clone1name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	snap2name  = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	clone2name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	snap3name  = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 
 	(void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu",
 	    osname, (u_longlong_t)id);
 	(void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu",
 	    osname, (u_longlong_t)id);
 	(void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu",
 	    clone1name, (u_longlong_t)id);
 	(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu",
 	    osname, (u_longlong_t)id);
 	(void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu",
 	    clone1name, (u_longlong_t)id);
 
 	error = dmu_objset_destroy(clone2name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
 	error = dmu_objset_destroy(snap3name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
 	error = dmu_objset_destroy(snap2name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
 	error = dmu_objset_destroy(clone1name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
 	error = dmu_objset_destroy(snap1name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
 
 	umem_free(snap1name, MAXNAMELEN);
 	umem_free(clone1name, MAXNAMELEN);
 	umem_free(snap2name, MAXNAMELEN);
 	umem_free(clone2name, MAXNAMELEN);
 	umem_free(snap3name, MAXNAMELEN);
 }
 
 /*
  * Verify dsl_dataset_promote handles EBUSY
  */
 void
 ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *clone;
 	dsl_dataset_t *ds;
 	char *snap1name;
 	char *clone1name;
 	char *snap2name;
 	char *clone2name;
 	char *snap3name;
 	char *osname = zd->zd_name;
 	int error;
 
 	snap1name  = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	clone1name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	snap2name  = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	clone2name = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	snap3name  = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 
 	(void) rw_enter(&ztest_name_lock, RW_READER);
 
 	ztest_dsl_dataset_cleanup(osname, id);
 
 	(void) snprintf(snap1name, MAXNAMELEN, "%s@s1_%llu",
 	    osname, (u_longlong_t)id);
 	(void) snprintf(clone1name, MAXNAMELEN, "%s/c1_%llu",
 	    osname, (u_longlong_t)id);
 	(void) snprintf(snap2name, MAXNAMELEN, "%s@s2_%llu",
 	    clone1name, (u_longlong_t)id);
 	(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu",
 	    osname, (u_longlong_t)id);
 	(void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu",
 	    clone1name, (u_longlong_t)id);
 
-	error = dmu_objset_snapshot(osname, strchr(snap1name, '@')+1,
-	    NULL, NULL, B_FALSE, B_FALSE, -1);
+	error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
 	}
 
 	error = dmu_objset_hold(snap1name, FTAG, &clone);
 	if (error)
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
 
 	error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
 	dmu_objset_rele(clone, FTAG);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_objset_create(%s) = %d", clone1name, error);
 	}
 
-	error = dmu_objset_snapshot(clone1name, strchr(snap2name, '@')+1,
-	    NULL, NULL, B_FALSE, B_FALSE, -1);
+	error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap2name, error);
 	}
 
-	error = dmu_objset_snapshot(clone1name, strchr(snap3name, '@')+1,
-	    NULL, NULL, B_FALSE, B_FALSE, -1);
+	error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
 	}
 
 	error = dmu_objset_hold(snap3name, FTAG, &clone);
 	if (error)
 		fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
 
 	error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
 	dmu_objset_rele(clone, FTAG);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
 	}
 
 	error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds);
 	if (error)
 		fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error);
 	error = dsl_dataset_promote(clone2name, NULL);
 	if (error != EBUSY)
 		fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
 		    error);
 	dsl_dataset_disown(ds, FTAG);
 
 out:
 	ztest_dsl_dataset_cleanup(osname, id);
 
 	(void) rw_exit(&ztest_name_lock);
 
 	umem_free(snap1name, MAXNAMELEN);
 	umem_free(clone1name, MAXNAMELEN);
 	umem_free(snap2name, MAXNAMELEN);
 	umem_free(clone2name, MAXNAMELEN);
 	umem_free(snap3name, MAXNAMELEN);
 }
 
 #undef OD_ARRAY_SIZE
 #define OD_ARRAY_SIZE	4
 
 /*
  * Verify that dmu_object_{alloc,free} work as expected.
  */
 void
 ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_od_t *od;
 	int batchsize;
 	int size;
 	int b;
 
 	size = sizeof(ztest_od_t) * OD_ARRAY_SIZE;
 	od = umem_alloc(size, UMEM_NOFAIL);
 	batchsize = OD_ARRAY_SIZE;
 
 	for (b = 0; b < batchsize; b++)
 		ztest_od_init(od + b, id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0);
 
 	/*
 	 * Destroy the previous batch of objects, create a new batch,
 	 * and do some I/O on the new objects.
 	 */
 	if (ztest_object_init(zd, od, size, B_TRUE) != 0)
 		return;
 
 	while (ztest_random(4 * batchsize) != 0)
 		ztest_io(zd, od[ztest_random(batchsize)].od_object,
 		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 
 	umem_free(od, size);
 }
 
 #undef OD_ARRAY_SIZE
 #define OD_ARRAY_SIZE	2
 
 /*
  * Verify that dmu_{read,write} work as expected.
  */
 void
 ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
 {
 	int size;
 	ztest_od_t *od;
 
 	objset_t *os = zd->zd_os;
 	size = sizeof(ztest_od_t) * OD_ARRAY_SIZE;
 	od = umem_alloc(size, UMEM_NOFAIL);
 	dmu_tx_t *tx;
 	int i, freeit, error;
 	uint64_t n, s, txg;
 	bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
 	uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
 	uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t);
 	uint64_t regions = 997;
 	uint64_t stride = 123456789ULL;
 	uint64_t width = 40;
 	int free_percent = 5;
 
 	/*
 	 * This test uses two objects, packobj and bigobj, that are always
 	 * updated together (i.e. in the same tx) so that their contents are
 	 * in sync and can be compared.  Their contents relate to each other
 	 * in a simple way: packobj is a dense array of 'bufwad' structures,
 	 * while bigobj is a sparse array of the same bufwads.  Specifically,
 	 * for any index n, there are three bufwads that should be identical:
 	 *
 	 *	packobj, at offset n * sizeof (bufwad_t)
 	 *	bigobj, at the head of the nth chunk
 	 *	bigobj, at the tail of the nth chunk
 	 *
 	 * The chunk size is arbitrary. It doesn't have to be a power of two,
 	 * and it doesn't have any relation to the object blocksize.
 	 * The only requirement is that it can hold at least two bufwads.
 	 *
 	 * Normally, we write the bufwad to each of these locations.
 	 * However, free_percent of the time we instead write zeroes to
 	 * packobj and perform a dmu_free_range() on bigobj.  By comparing
 	 * bigobj to packobj, we can verify that the DMU is correctly
 	 * tracking which parts of an object are allocated and free,
 	 * and that the contents of the allocated blocks are correct.
 	 */
 
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize);
 	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
 
 	if (ztest_object_init(zd, od, size, B_FALSE) != 0) {
 		umem_free(od, size);
 		return;
 	}
 
 	bigobj = od[0].od_object;
 	packobj = od[1].od_object;
 	chunksize = od[0].od_gen;
 	ASSERT(chunksize == od[1].od_gen);
 
 	/*
 	 * Prefetch a random chunk of the big object.
 	 * Our aim here is to get some async reads in flight
 	 * for blocks that we may free below; the DMU should
 	 * handle this race correctly.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(2 * width - 1);
 	dmu_prefetch(os, bigobj, n * chunksize, s * chunksize);
 
 	/*
 	 * Pick a random index and compute the offsets into packobj and bigobj.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(width - 1);
 
 	packoff = n * sizeof (bufwad_t);
 	packsize = s * sizeof (bufwad_t);
 
 	bigoff = n * chunksize;
 	bigsize = s * chunksize;
 
 	packbuf = umem_alloc(packsize, UMEM_NOFAIL);
 	bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
 
 	/*
 	 * free_percent of the time, free a range of bigobj rather than
 	 * overwriting it.
 	 */
 	freeit = (ztest_random(100) < free_percent);
 
 	/*
 	 * Read the current contents of our objects.
 	 */
 	error = dmu_read(os, packobj, packoff, packsize, packbuf,
 	    DMU_READ_PREFETCH);
 	ASSERT0(error);
 	error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf,
 	    DMU_READ_PREFETCH);
 	ASSERT0(error);
 
 	/*
 	 * Get a tx for the mods to both packobj and bigobj.
 	 */
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, packobj, packoff, packsize);
 
 	if (freeit)
 		dmu_tx_hold_free(tx, bigobj, bigoff, bigsize);
 	else
 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
 
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0) {
 		umem_free(packbuf, packsize);
 		umem_free(bigbuf, bigsize);
 		umem_free(od, size);
 		return;
 	}
 
 	dmu_object_set_checksum(os, bigobj,
 	    (enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx);
 
 	dmu_object_set_compress(os, bigobj,
 	    (enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx);
 
 	/*
 	 * For each index from n to n + s, verify that the existing bufwad
 	 * in packobj matches the bufwads at the head and tail of the
 	 * corresponding chunk in bigobj.  Then update all three bufwads
 	 * with the new values we want to write out.
 	 */
 	for (i = 0; i < s; i++) {
 		/* LINTED */
 		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
 		/* LINTED */
 		bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
 		/* LINTED */
 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
 
 		ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
 		ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
 
 		if (pack->bw_txg > txg)
 			fatal(0, "future leak: got %llx, open txg is %llx",
 			    pack->bw_txg, txg);
 
 		if (pack->bw_data != 0 && pack->bw_index != n + i)
 			fatal(0, "wrong index: got %llx, wanted %llx+%llx",
 			    pack->bw_index, n, i);
 
 		if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
 			fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
 
 		if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
 			fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
 
 		if (freeit) {
 			bzero(pack, sizeof (bufwad_t));
 		} else {
 			pack->bw_index = n + i;
 			pack->bw_txg = txg;
 			pack->bw_data = 1 + ztest_random(-2ULL);
 		}
 		*bigH = *pack;
 		*bigT = *pack;
 	}
 
 	/*
 	 * We've verified all the old bufwads, and made new ones.
 	 * Now write them out.
 	 */
 	dmu_write(os, packobj, packoff, packsize, packbuf, tx);
 
 	if (freeit) {
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("freeing offset %llx size %llx"
 			    " txg %llx\n",
 			    (u_longlong_t)bigoff,
 			    (u_longlong_t)bigsize,
 			    (u_longlong_t)txg);
 		}
 		VERIFY(0 == dmu_free_range(os, bigobj, bigoff, bigsize, tx));
 	} else {
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("writing offset %llx size %llx"
 			    " txg %llx\n",
 			    (u_longlong_t)bigoff,
 			    (u_longlong_t)bigsize,
 			    (u_longlong_t)txg);
 		}
 		dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx);
 	}
 
 	dmu_tx_commit(tx);
 
 	/*
 	 * Sanity check the stuff we just wrote.
 	 */
 	{
 		void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 		void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
 
 		VERIFY(0 == dmu_read(os, packobj, packoff,
 		    packsize, packcheck, DMU_READ_PREFETCH));
 		VERIFY(0 == dmu_read(os, bigobj, bigoff,
 		    bigsize, bigcheck, DMU_READ_PREFETCH));
 
 		ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
 		ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
 
 		umem_free(packcheck, packsize);
 		umem_free(bigcheck, bigsize);
 	}
 
 	umem_free(packbuf, packsize);
 	umem_free(bigbuf, bigsize);
 	umem_free(od, size);
 }
 
 void
 compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
     uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg)
 {
 	uint64_t i;
 	bufwad_t *pack;
 	bufwad_t *bigH;
 	bufwad_t *bigT;
 
 	/*
 	 * For each index from n to n + s, verify that the existing bufwad
 	 * in packobj matches the bufwads at the head and tail of the
 	 * corresponding chunk in bigobj.  Then update all three bufwads
 	 * with the new values we want to write out.
 	 */
 	for (i = 0; i < s; i++) {
 		/* LINTED */
 		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
 		/* LINTED */
 		bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
 		/* LINTED */
 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
 
 		ASSERT((uintptr_t)bigH - (uintptr_t)bigbuf < bigsize);
 		ASSERT((uintptr_t)bigT - (uintptr_t)bigbuf < bigsize);
 
 		if (pack->bw_txg > txg)
 			fatal(0, "future leak: got %llx, open txg is %llx",
 			    pack->bw_txg, txg);
 
 		if (pack->bw_data != 0 && pack->bw_index != n + i)
 			fatal(0, "wrong index: got %llx, wanted %llx+%llx",
 			    pack->bw_index, n, i);
 
 		if (bcmp(pack, bigH, sizeof (bufwad_t)) != 0)
 			fatal(0, "pack/bigH mismatch in %p/%p", pack, bigH);
 
 		if (bcmp(pack, bigT, sizeof (bufwad_t)) != 0)
 			fatal(0, "pack/bigT mismatch in %p/%p", pack, bigT);
 
 		pack->bw_index = n + i;
 		pack->bw_txg = txg;
 		pack->bw_data = 1 + ztest_random(-2ULL);
 
 		*bigH = *pack;
 		*bigT = *pack;
 	}
 }
 
 #undef OD_ARRAY_SIZE
 #define OD_ARRAY_SIZE	2
 
 void
 ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	dmu_tx_t *tx;
 	uint64_t i;
 	int error;
 	int size;
 	uint64_t n, s, txg;
 	bufwad_t *packbuf, *bigbuf;
 	uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
 	uint64_t blocksize = ztest_random_blocksize();
 	uint64_t chunksize = blocksize;
 	uint64_t regions = 997;
 	uint64_t stride = 123456789ULL;
 	uint64_t width = 9;
 	dmu_buf_t *bonus_db;
 	arc_buf_t **bigbuf_arcbufs;
 	dmu_object_info_t doi;
 
 	size = sizeof(ztest_od_t) * OD_ARRAY_SIZE;
 	od = umem_alloc(size, UMEM_NOFAIL);
 
 	/*
 	 * This test uses two objects, packobj and bigobj, that are always
 	 * updated together (i.e. in the same tx) so that their contents are
 	 * in sync and can be compared.  Their contents relate to each other
 	 * in a simple way: packobj is a dense array of 'bufwad' structures,
 	 * while bigobj is a sparse array of the same bufwads.  Specifically,
 	 * for any index n, there are three bufwads that should be identical:
 	 *
 	 *	packobj, at offset n * sizeof (bufwad_t)
 	 *	bigobj, at the head of the nth chunk
 	 *	bigobj, at the tail of the nth chunk
 	 *
 	 * The chunk size is set equal to bigobj block size so that
 	 * dmu_assign_arcbuf() can be tested for object updates.
 	 */
 
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
 	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
 
 
 	if (ztest_object_init(zd, od, size, B_FALSE) != 0) {
 		umem_free(od, size);
 		return;
 	}
 
 	bigobj = od[0].od_object;
 	packobj = od[1].od_object;
 	blocksize = od[0].od_blocksize;
 	chunksize = blocksize;
 	ASSERT(chunksize == od[1].od_gen);
 
 	VERIFY(dmu_object_info(os, bigobj, &doi) == 0);
 	VERIFY(ISP2(doi.doi_data_block_size));
 	VERIFY(chunksize == doi.doi_data_block_size);
 	VERIFY(chunksize >= 2 * sizeof (bufwad_t));
 
 	/*
 	 * Pick a random index and compute the offsets into packobj and bigobj.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(width - 1);
 
 	packoff = n * sizeof (bufwad_t);
 	packsize = s * sizeof (bufwad_t);
 
 	bigoff = n * chunksize;
 	bigsize = s * chunksize;
 
 	packbuf = umem_zalloc(packsize, UMEM_NOFAIL);
 	bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL);
 
 	VERIFY3U(0, ==, dmu_bonus_hold(os, bigobj, FTAG, &bonus_db));
 
 	bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL);
 
 	/*
 	 * Iteration 0 test zcopy for DB_UNCACHED dbufs.
 	 * Iteration 1 test zcopy to already referenced dbufs.
 	 * Iteration 2 test zcopy to dirty dbuf in the same txg.
 	 * Iteration 3 test zcopy to dbuf dirty in previous txg.
 	 * Iteration 4 test zcopy when dbuf is no longer dirty.
 	 * Iteration 5 test zcopy when it can't be done.
 	 * Iteration 6 one more zcopy write.
 	 */
 	for (i = 0; i < 7; i++) {
 		uint64_t j;
 		uint64_t off;
 
 		/*
 		 * In iteration 5 (i == 5) use arcbufs
 		 * that don't match bigobj blksz to test
 		 * dmu_assign_arcbuf() when it can't directly
 		 * assign an arcbuf to a dbuf.
 		 */
 		for (j = 0; j < s; j++) {
 			if (i != 5) {
 				bigbuf_arcbufs[j] =
 				    dmu_request_arcbuf(bonus_db, chunksize);
 			} else {
 				bigbuf_arcbufs[2 * j] =
 				    dmu_request_arcbuf(bonus_db, chunksize / 2);
 				bigbuf_arcbufs[2 * j + 1] =
 				    dmu_request_arcbuf(bonus_db, chunksize / 2);
 			}
 		}
 
 		/*
 		 * Get a tx for the mods to both packobj and bigobj.
 		 */
 		tx = dmu_tx_create(os);
 
 		dmu_tx_hold_write(tx, packobj, packoff, packsize);
 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
 
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0) {
 			umem_free(packbuf, packsize);
 			umem_free(bigbuf, bigsize);
 			for (j = 0; j < s; j++) {
 				if (i != 5) {
 					dmu_return_arcbuf(bigbuf_arcbufs[j]);
 				} else {
 					dmu_return_arcbuf(
 					    bigbuf_arcbufs[2 * j]);
 					dmu_return_arcbuf(
 					    bigbuf_arcbufs[2 * j + 1]);
 				}
 			}
 			umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
 			umem_free(od, size);
 			dmu_buf_rele(bonus_db, FTAG);
 			return;
 		}
 
 		/*
 		 * 50% of the time don't read objects in the 1st iteration to
 		 * test dmu_assign_arcbuf() for the case when there're no
 		 * existing dbufs for the specified offsets.
 		 */
 		if (i != 0 || ztest_random(2) != 0) {
 			error = dmu_read(os, packobj, packoff,
 			    packsize, packbuf, DMU_READ_PREFETCH);
 			ASSERT0(error);
 			error = dmu_read(os, bigobj, bigoff, bigsize,
 			    bigbuf, DMU_READ_PREFETCH);
 			ASSERT0(error);
 		}
 		compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
 		    n, chunksize, txg);
 
 		/*
 		 * We've verified all the old bufwads, and made new ones.
 		 * Now write them out.
 		 */
 		dmu_write(os, packobj, packoff, packsize, packbuf, tx);
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("writing offset %llx size %llx"
 			    " txg %llx\n",
 			    (u_longlong_t)bigoff,
 			    (u_longlong_t)bigsize,
 			    (u_longlong_t)txg);
 		}
 		for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
 			dmu_buf_t *dbt;
 			if (i != 5) {
 				bcopy((caddr_t)bigbuf + (off - bigoff),
 				    bigbuf_arcbufs[j]->b_data, chunksize);
 			} else {
 				bcopy((caddr_t)bigbuf + (off - bigoff),
 				    bigbuf_arcbufs[2 * j]->b_data,
 				    chunksize / 2);
 				bcopy((caddr_t)bigbuf + (off - bigoff) +
 				    chunksize / 2,
 				    bigbuf_arcbufs[2 * j + 1]->b_data,
 				    chunksize / 2);
 			}
 
 			if (i == 1) {
 				VERIFY(dmu_buf_hold(os, bigobj, off,
 				    FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
 			}
 			if (i != 5) {
 				dmu_assign_arcbuf(bonus_db, off,
 				    bigbuf_arcbufs[j], tx);
 			} else {
 				dmu_assign_arcbuf(bonus_db, off,
 				    bigbuf_arcbufs[2 * j], tx);
 				dmu_assign_arcbuf(bonus_db,
 				    off + chunksize / 2,
 				    bigbuf_arcbufs[2 * j + 1], tx);
 			}
 			if (i == 1) {
 				dmu_buf_rele(dbt, FTAG);
 			}
 		}
 		dmu_tx_commit(tx);
 
 		/*
 		 * Sanity check the stuff we just wrote.
 		 */
 		{
 			void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 			void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
 
 			VERIFY(0 == dmu_read(os, packobj, packoff,
 			    packsize, packcheck, DMU_READ_PREFETCH));
 			VERIFY(0 == dmu_read(os, bigobj, bigoff,
 			    bigsize, bigcheck, DMU_READ_PREFETCH));
 
 			ASSERT(bcmp(packbuf, packcheck, packsize) == 0);
 			ASSERT(bcmp(bigbuf, bigcheck, bigsize) == 0);
 
 			umem_free(packcheck, packsize);
 			umem_free(bigcheck, bigsize);
 		}
 		if (i == 2) {
 			txg_wait_open(dmu_objset_pool(os), 0);
 		} else if (i == 3) {
 			txg_wait_synced(dmu_objset_pool(os), 0);
 		}
 	}
 
 	dmu_buf_rele(bonus_db, FTAG);
 	umem_free(packbuf, packsize);
 	umem_free(bigbuf, bigsize);
 	umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
 	umem_free(od, size);
 }
 
 /* ARGSUSED */
 void
 ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_od_t *od;
 
 	od = umem_alloc(sizeof(ztest_od_t), UMEM_NOFAIL);
 	uint64_t offset = (1ULL << (ztest_random(20) + 43)) +
 	    (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 
 	/*
 	 * Have multiple threads write to large offsets in an object
 	 * to verify that parallel writes to an object -- even to the
 	 * same blocks within the object -- doesn't cause any trouble.
 	 */
 	ztest_od_init(od, ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0)
 		return;
 
 	while (ztest_random(10) != 0)
 		ztest_io(zd, od->od_object, offset);
 
 	umem_free(od, sizeof(ztest_od_t));
 }
 
 void
 ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_od_t *od;
 	uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) +
 	    (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 	uint64_t count = ztest_random(20) + 1;
 	uint64_t blocksize = ztest_random_blocksize();
 	void *data;
 
 	od = umem_alloc(sizeof(ztest_od_t), UMEM_NOFAIL);
 
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), !ztest_random(2)) != 0) {
 		umem_free(od, sizeof(ztest_od_t));
 		return;
 	}
 
 	if (ztest_truncate(zd, od->od_object, offset, count * blocksize) != 0) {
 		umem_free(od, sizeof(ztest_od_t));
 		return;
 	}
 
 	ztest_prealloc(zd, od->od_object, offset, count * blocksize);
 
 	data = umem_zalloc(blocksize, UMEM_NOFAIL);
 
 	while (ztest_random(count) != 0) {
 		uint64_t randoff = offset + (ztest_random(count) * blocksize);
 		if (ztest_write(zd, od->od_object, randoff, blocksize,
 		    data) != 0)
 			break;
 		while (ztest_random(4) != 0)
 			ztest_io(zd, od->od_object, randoff);
 	}
 
 	umem_free(data, blocksize);
 	umem_free(od, sizeof(ztest_od_t));
 }
 
 /*
  * Verify that zap_{create,destroy,add,remove,update} work as expected.
  */
 #define	ZTEST_ZAP_MIN_INTS	1
 #define	ZTEST_ZAP_MAX_INTS	4
 #define	ZTEST_ZAP_MAX_PROPS	1000
 
 void
 ztest_zap(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	uint64_t object;
 	uint64_t txg, last_txg;
 	uint64_t value[ZTEST_ZAP_MAX_INTS];
 	uint64_t zl_ints, zl_intsize, prop;
 	int i, ints;
 	dmu_tx_t *tx;
 	char propname[100], txgname[100];
 	int error;
 	char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
 
 	od = umem_alloc(sizeof(ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
 			!ztest_random(2)) != 0)
 		goto out;
 
 	object = od->od_object;
 
 	/*
 	 * Generate a known hash collision, and verify that
 	 * we can lookup and remove both entries.
 	 */
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		goto out;
 	for (i = 0; i < 2; i++) {
 		value[i] = i;
 		VERIFY3U(0, ==, zap_add(os, object, hc[i], sizeof (uint64_t),
 		    1, &value[i], tx));
 	}
 	for (i = 0; i < 2; i++) {
 		VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i],
 		    sizeof (uint64_t), 1, &value[i], tx));
 		VERIFY3U(0, ==,
 		    zap_length(os, object, hc[i], &zl_intsize, &zl_ints));
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, 1);
 	}
 	for (i = 0; i < 2; i++) {
 		VERIFY3U(0, ==, zap_remove(os, object, hc[i], tx));
 	}
 	dmu_tx_commit(tx);
 
 	/*
 	 * Generate a buch of random entries.
 	 */
 	ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
 
 	prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
 	(void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
 	(void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
 	bzero(value, sizeof (value));
 	last_txg = 0;
 
 	/*
 	 * If these zap entries already exist, validate their contents.
 	 */
 	error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
 	if (error == 0) {
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, 1);
 
 		VERIFY(zap_lookup(os, object, txgname, zl_intsize,
 		    zl_ints, &last_txg) == 0);
 
 		VERIFY(zap_length(os, object, propname, &zl_intsize,
 		    &zl_ints) == 0);
 
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, ints);
 
 		VERIFY(zap_lookup(os, object, propname, zl_intsize,
 		    zl_ints, value) == 0);
 
 		for (i = 0; i < ints; i++) {
 			ASSERT3U(value[i], ==, last_txg + object + i);
 		}
 	} else {
 		ASSERT3U(error, ==, ENOENT);
 	}
 
 	/*
 	 * Atomically update two entries in our zap object.
 	 * The first is named txg_%llu, and contains the txg
 	 * in which the property was last updated.  The second
 	 * is named prop_%llu, and the nth element of its value
 	 * should be txg + object + n.
 	 */
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		goto out;
 
 	if (last_txg > txg)
 		fatal(0, "zap future leak: old %llu new %llu", last_txg, txg);
 
 	for (i = 0; i < ints; i++)
 		value[i] = txg + object + i;
 
 	VERIFY3U(0, ==, zap_update(os, object, txgname, sizeof (uint64_t),
 	    1, &txg, tx));
 	VERIFY3U(0, ==, zap_update(os, object, propname, sizeof (uint64_t),
 	    ints, value, tx));
 
 	dmu_tx_commit(tx);
 
 	/*
 	 * Remove a random pair of entries.
 	 */
 	prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
 	(void) sprintf(propname, "prop_%llu", (u_longlong_t)prop);
 	(void) sprintf(txgname, "txg_%llu", (u_longlong_t)prop);
 
 	error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
 
 	if (error == ENOENT)
 		goto out;
 
 	ASSERT0(error);
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		goto out;
 	VERIFY3U(0, ==, zap_remove(os, object, txgname, tx));
 	VERIFY3U(0, ==, zap_remove(os, object, propname, tx));
 	dmu_tx_commit(tx);
 out:
 	umem_free(od, sizeof(ztest_od_t));
 }
 
 /*
  * Testcase to test the upgrading of a microzap to fatzap.
  */
 void
 ztest_fzap(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	uint64_t object, txg;
 	int i;
 
 	od = umem_alloc(sizeof(ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
 				!ztest_random(2)) != 0)
 		goto out;
 	object = od->od_object;
 
 	/*
 	 * Add entries to this ZAP and make sure it spills over
 	 * and gets upgraded to a fatzap. Also, since we are adding
 	 * 2050 entries we should see ptrtbl growth and leaf-block split.
 	 */
 	for (i = 0; i < 2050; i++) {
 		char name[MAXNAMELEN];
 		uint64_t value = i;
 		dmu_tx_t *tx;
 		int error;
 
 		(void) snprintf(name, sizeof (name), "fzap-%llu-%llu",
 		    (u_longlong_t)id, (u_longlong_t)value);
 
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_zap(tx, object, B_TRUE, name);
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0)
 			goto out;
 		error = zap_add(os, object, name, sizeof (uint64_t), 1,
 		    &value, tx);
 		ASSERT(error == 0 || error == EEXIST);
 		dmu_tx_commit(tx);
 	}
 out:
 	umem_free(od, sizeof(ztest_od_t));
 }
 
 /* ARGSUSED */
 void
 ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
 	dmu_tx_t *tx;
 	int i, namelen, error;
 	int micro = ztest_random(2);
 	char name[20], string_value[20];
 	void *data;
 
 	od = umem_alloc(sizeof(ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
 		umem_free(od, sizeof(ztest_od_t));
 		return;
 	}
 
 	object = od->od_object;
 
 	/*
 	 * Generate a random name of the form 'xxx.....' where each
 	 * x is a random printable character and the dots are dots.
 	 * There are 94 such characters, and the name length goes from
 	 * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
 	 */
 	namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
 
 	for (i = 0; i < 3; i++)
 		name[i] = '!' + ztest_random('~' - '!' + 1);
 	for (; i < namelen - 1; i++)
 		name[i] = '.';
 	name[i] = '\0';
 
 	if ((namelen & 1) || micro) {
 		wsize = sizeof (txg);
 		wc = 1;
 		data = &txg;
 	} else {
 		wsize = 1;
 		wc = namelen;
 		data = string_value;
 	}
 
 	count = -1ULL;
 	VERIFY(zap_count(os, object, &count) == 0);
 	ASSERT(count != -1ULL);
 
 	/*
 	 * Select an operation: length, lookup, add, update, remove.
 	 */
 	i = ztest_random(5);
 
 	if (i >= 2) {
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0)
 			return;
 		bcopy(name, string_value, namelen);
 	} else {
 		tx = NULL;
 		txg = 0;
 		bzero(string_value, namelen);
 	}
 
 	switch (i) {
 
 	case 0:
 		error = zap_length(os, object, name, &zl_wsize, &zl_wc);
 		if (error == 0) {
 			ASSERT3U(wsize, ==, zl_wsize);
 			ASSERT3U(wc, ==, zl_wc);
 		} else {
 			ASSERT3U(error, ==, ENOENT);
 		}
 		break;
 
 	case 1:
 		error = zap_lookup(os, object, name, wsize, wc, data);
 		if (error == 0) {
 			if (data == string_value &&
 			    bcmp(name, data, namelen) != 0)
 				fatal(0, "name '%s' != val '%s' len %d",
 				    name, data, namelen);
 		} else {
 			ASSERT3U(error, ==, ENOENT);
 		}
 		break;
 
 	case 2:
 		error = zap_add(os, object, name, wsize, wc, data, tx);
 		ASSERT(error == 0 || error == EEXIST);
 		break;
 
 	case 3:
 		VERIFY(zap_update(os, object, name, wsize, wc, data, tx) == 0);
 		break;
 
 	case 4:
 		error = zap_remove(os, object, name, tx);
 		ASSERT(error == 0 || error == ENOENT);
 		break;
 	}
 
 	if (tx != NULL)
 		dmu_tx_commit(tx);
 
 	umem_free(od, sizeof(ztest_od_t));
 }
 
 /*
  * Commit callback data.
  */
 typedef struct ztest_cb_data {
 	list_node_t		zcd_node;
 	uint64_t		zcd_txg;
 	int			zcd_expected_err;
 	boolean_t		zcd_added;
 	boolean_t		zcd_called;
 	spa_t			*zcd_spa;
 } ztest_cb_data_t;
 
 /* This is the actual commit callback function */
 static void
 ztest_commit_callback(void *arg, int error)
 {
 	ztest_cb_data_t *data = arg;
 	uint64_t synced_txg;
 
 	VERIFY(data != NULL);
 	VERIFY3S(data->zcd_expected_err, ==, error);
 	VERIFY(!data->zcd_called);
 
 	synced_txg = spa_last_synced_txg(data->zcd_spa);
 	if (data->zcd_txg > synced_txg)
 		fatal(0, "commit callback of txg %" PRIu64 " called prematurely"
 		    ", last synced txg = %" PRIu64 "\n", data->zcd_txg,
 		    synced_txg);
 
 	data->zcd_called = B_TRUE;
 
 	if (error == ECANCELED) {
 		ASSERT0(data->zcd_txg);
 		ASSERT(!data->zcd_added);
 
 		/*
 		 * The private callback data should be destroyed here, but
 		 * since we are going to check the zcd_called field after
 		 * dmu_tx_abort(), we will destroy it there.
 		 */
 		return;
 	}
 
 	ASSERT(data->zcd_added);
 	ASSERT3U(data->zcd_txg, !=, 0);
 
 	(void) mutex_enter(&zcl.zcl_callbacks_lock);
 
 	/* See if this cb was called more quickly */
 	if ((synced_txg - data->zcd_txg) < zc_min_txg_delay)
 		zc_min_txg_delay = synced_txg - data->zcd_txg;
 
 	/* Remove our callback from the list */
 	list_remove(&zcl.zcl_callbacks, data);
 
 	(void) mutex_exit(&zcl.zcl_callbacks_lock);
 
 	umem_free(data, sizeof (ztest_cb_data_t));
 }
 
 /* Allocate and initialize callback data structure */
 static ztest_cb_data_t *
 ztest_create_cb_data(objset_t *os, uint64_t txg)
 {
 	ztest_cb_data_t *cb_data;
 
 	cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL);
 
 	cb_data->zcd_txg = txg;
 	cb_data->zcd_spa = dmu_objset_spa(os);
 	list_link_init(&cb_data->zcd_node);
 
 	return (cb_data);
 }
 
 /*
  * Commit callback test.
  */
 void
 ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	dmu_tx_t *tx;
 	ztest_cb_data_t *cb_data[3], *tmp_cb;
 	uint64_t old_txg, txg;
 	int i, error = 0;
 
 	od = umem_alloc(sizeof(ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
 		umem_free(od, sizeof(ztest_od_t));
 		return;
 	}
 
 	tx = dmu_tx_create(os);
 
 	cb_data[0] = ztest_create_cb_data(os, 0);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]);
 
 	dmu_tx_hold_write(tx, od->od_object, 0, sizeof (uint64_t));
 
 	/* Every once in a while, abort the transaction on purpose */
 	if (ztest_random(100) == 0)
 		error = -1;
 
 	if (!error)
 		error = dmu_tx_assign(tx, TXG_NOWAIT);
 
 	txg = error ? 0 : dmu_tx_get_txg(tx);
 
 	cb_data[0]->zcd_txg = txg;
 	cb_data[1] = ztest_create_cb_data(os, txg);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]);
 
 	if (error) {
 		/*
 		 * It's not a strict requirement to call the registered
 		 * callbacks from inside dmu_tx_abort(), but that's what
 		 * it's supposed to happen in the current implementation
 		 * so we will check for that.
 		 */
 		for (i = 0; i < 2; i++) {
 			cb_data[i]->zcd_expected_err = ECANCELED;
 			VERIFY(!cb_data[i]->zcd_called);
 		}
 
 		dmu_tx_abort(tx);
 
 		for (i = 0; i < 2; i++) {
 			VERIFY(cb_data[i]->zcd_called);
 			umem_free(cb_data[i], sizeof (ztest_cb_data_t));
 		}
 
 		umem_free(od, sizeof(ztest_od_t));
 		return;
 	}
 
 	cb_data[2] = ztest_create_cb_data(os, txg);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]);
 
 	/*
 	 * Read existing data to make sure there isn't a future leak.
 	 */
 	VERIFY(0 == dmu_read(os, od->od_object, 0, sizeof (uint64_t),
 	    &old_txg, DMU_READ_PREFETCH));
 
 	if (old_txg > txg)
 		fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64,
 		    old_txg, txg);
 
 	dmu_write(os, od->od_object, 0, sizeof (uint64_t), &txg, tx);
 
 	(void) mutex_enter(&zcl.zcl_callbacks_lock);
 
 	/*
 	 * Since commit callbacks don't have any ordering requirement and since
 	 * it is theoretically possible for a commit callback to be called
 	 * after an arbitrary amount of time has elapsed since its txg has been
 	 * synced, it is difficult to reliably determine whether a commit
 	 * callback hasn't been called due to high load or due to a flawed
 	 * implementation.
 	 *
 	 * In practice, we will assume that if after a certain number of txgs a
 	 * commit callback hasn't been called, then most likely there's an
 	 * implementation bug..
 	 */
 	tmp_cb = list_head(&zcl.zcl_callbacks);
 	if (tmp_cb != NULL &&
 	    tmp_cb->zcd_txg + ZTEST_COMMIT_CB_THRESH < txg) {
 		fatal(0, "Commit callback threshold exceeded, oldest txg: %"
 		    PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg);
 	}
 
 	/*
 	 * Let's find the place to insert our callbacks.
 	 *
 	 * Even though the list is ordered by txg, it is possible for the
 	 * insertion point to not be the end because our txg may already be
 	 * quiescing at this point and other callbacks in the open txg
 	 * (from other objsets) may have sneaked in.
 	 */
 	tmp_cb = list_tail(&zcl.zcl_callbacks);
 	while (tmp_cb != NULL && tmp_cb->zcd_txg > txg)
 		tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb);
 
 	/* Add the 3 callbacks to the list */
 	for (i = 0; i < 3; i++) {
 		if (tmp_cb == NULL)
 			list_insert_head(&zcl.zcl_callbacks, cb_data[i]);
 		else
 			list_insert_after(&zcl.zcl_callbacks, tmp_cb,
 			    cb_data[i]);
 
 		cb_data[i]->zcd_added = B_TRUE;
 		VERIFY(!cb_data[i]->zcd_called);
 
 		tmp_cb = cb_data[i];
 	}
 
 	zc_cb_counter += 3;
 
 	(void) mutex_exit(&zcl.zcl_callbacks_lock);
 
 	dmu_tx_commit(tx);
 
 	umem_free(od, sizeof(ztest_od_t));
 }
 
 /* ARGSUSED */
 void
 ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
 {
 	zfs_prop_t proplist[] = {
 		ZFS_PROP_CHECKSUM,
 		ZFS_PROP_COMPRESSION,
 		ZFS_PROP_COPIES,
 		ZFS_PROP_DEDUP
 	};
 	int p;
 
 	(void) rw_enter(&ztest_name_lock, RW_READER);
 
 	for (p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
 		(void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
 		    ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
 
 	(void) rw_exit(&ztest_name_lock);
 }
 
 /* ARGSUSED */
 void
 ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
 {
 	nvlist_t *props = NULL;
 
 	(void) rw_enter(&ztest_name_lock, RW_READER);
 
 	(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
 	    ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
 
 	VERIFY0(spa_prop_get(ztest_spa, &props));
 
 	if (ztest_opts.zo_verbose >= 6)
 		dump_nvlist(props, 4);
 
 	nvlist_free(props);
 
 	(void) rw_exit(&ztest_name_lock);
 }
 
 /*
  * Test snapshot hold/release and deferred destroy.
  */
 void
 ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
 {
 	int error;
 	objset_t *os = zd->zd_os;
 	objset_t *origin;
 	char snapname[100];
 	char fullname[100];
 	char clonename[100];
 	char tag[100];
 	char osname[MAXNAMELEN];
 
 	(void) rw_enter(&ztest_name_lock, RW_READER);
 
 	dmu_objset_name(os, osname);
 
 	(void) snprintf(snapname, 100, "sh1_%llu", (u_longlong_t)id);
 	(void) snprintf(fullname, 100, "%s@%s", osname, snapname);
 	(void) snprintf(clonename, 100, "%s/ch1_%llu",osname,(u_longlong_t)id);
 	(void) snprintf(tag, 100, "tag_%llu", (u_longlong_t)id);
 
 	/*
 	 * Clean up from any previous run.
 	 */
 	(void) dmu_objset_destroy(clonename, B_FALSE);
 	(void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
 	(void) dmu_objset_destroy(fullname, B_FALSE);
 
 	/*
 	 * Create snapshot, clone it, mark snap for deferred destroy,
 	 * destroy clone, verify snap was also destroyed.
 	 */
-	error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE,
-	    FALSE, -1);
+	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_snapshot");
 			goto out;
 		}
 		fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
 	error = dmu_objset_hold(fullname, FTAG, &origin);
 	if (error)
 		fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
 
 	error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0);
 	dmu_objset_rele(origin, FTAG);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_clone");
 			goto out;
 		}
 		fatal(0, "dmu_objset_clone(%s) = %d", clonename, error);
 	}
 
 	error = dmu_objset_destroy(fullname, B_TRUE);
 	if (error) {
 		fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
 		    fullname, error);
 	}
 
 	error = dmu_objset_destroy(clonename, B_FALSE);
 	if (error)
 		fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error);
 
 	error = dmu_objset_hold(fullname, FTAG, &origin);
 	if (error != ENOENT)
 		fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
 
 	/*
 	 * Create snapshot, add temporary hold, verify that we can't
 	 * destroy a held snapshot, mark for deferred destroy,
 	 * release hold, verify snapshot was destroyed.
 	 */
-	error = dmu_objset_snapshot(osname, snapname, NULL, NULL, FALSE,
-	    FALSE, -1);
+	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_snapshot");
 			goto out;
 		}
 		fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
 	error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE,
 	    B_TRUE, -1);
 	if (error)
 		fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
 
 	error = dmu_objset_destroy(fullname, B_FALSE);
 	if (error != EBUSY) {
 		fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d",
 		    fullname, error);
 	}
 
 	error = dmu_objset_destroy(fullname, B_TRUE);
 	if (error) {
 		fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
 		    fullname, error);
 	}
 
 	error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
 	if (error)
 		fatal(0, "dsl_dataset_user_release(%s)", fullname, tag);
 
 	VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT);
 
 out:
 	(void) rw_exit(&ztest_name_lock);
 }
 
 /*
  * Inject random faults into the on-disk data.
  */
 /* ARGSUSED */
 void
 ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	int fd;
 	uint64_t offset;
 	uint64_t leaves;
 	uint64_t bad = 0x1990c0ffeedecadeull;
 	uint64_t top, leaf;
 	char *path0;
 	char *pathrand;
 	size_t fsize;
 	int bshift = SPA_MAXBLOCKSHIFT + 2;	/* don't scrog all labels */
 	int iters = 1000;
 	int maxfaults;
 	int mirror_save;
 	vdev_t *vd0 = NULL;
 	uint64_t guid0 = 0;
 	boolean_t islog = B_FALSE;
 
 	path0 = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 	pathrand = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	mutex_enter(&ztest_vdev_lock);
 	maxfaults = MAXFAULTS();
 	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
 	mirror_save = zs->zs_mirrors;
 	mutex_exit(&ztest_vdev_lock);
 
 	ASSERT(leaves >= 1);
 
 	/*
 	 * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
 	 */
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
 	if (ztest_random(2) == 0) {
 		/*
 		 * Inject errors on a normal data device or slog device.
 		 */
 		top = ztest_random_vdev_top(spa, B_TRUE);
 		leaf = ztest_random(leaves) + zs->zs_splits;
 
 		/*
 		 * Generate paths to the first leaf in this top-level vdev,
 		 * and to the random leaf we selected.  We'll induce transient
 		 * write failures and random online/offline activity on leaf 0,
 		 * and we'll write random garbage to the randomly chosen leaf.
 		 */
 		(void) snprintf(path0, MAXPATHLEN, ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + zs->zs_splits);
 		(void) snprintf(pathrand, MAXPATHLEN, ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + leaf);
 
 		vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
 		if (vd0 != NULL && vd0->vdev_top->vdev_islog)
 			islog = B_TRUE;
 
 		if (vd0 != NULL && maxfaults != 1) {
 			/*
 			 * Make vd0 explicitly claim to be unreadable,
 			 * or unwriteable, or reach behind its back
 			 * and close the underlying fd.  We can do this if
 			 * maxfaults == 0 because we'll fail and reexecute,
 			 * and we can do it if maxfaults >= 2 because we'll
 			 * have enough redundancy.  If maxfaults == 1, the
 			 * combination of this with injection of random data
 			 * corruption below exceeds the pool's fault tolerance.
 			 */
 			vdev_file_t *vf = vd0->vdev_tsd;
 
 			if (vf != NULL && ztest_random(3) == 0) {
 				(void) close(vf->vf_vnode->v_fd);
 				vf->vf_vnode->v_fd = -1;
 			} else if (ztest_random(2) == 0) {
 				vd0->vdev_cant_read = B_TRUE;
 			} else {
 				vd0->vdev_cant_write = B_TRUE;
 			}
 			guid0 = vd0->vdev_guid;
 		}
 	} else {
 		/*
 		 * Inject errors on an l2cache device.
 		 */
 		spa_aux_vdev_t *sav = &spa->spa_l2cache;
 
 		if (sav->sav_count == 0) {
 			spa_config_exit(spa, SCL_STATE, FTAG);
 			goto out;
 		}
 		vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		guid0 = vd0->vdev_guid;
 		(void) strcpy(path0, vd0->vdev_path);
 		(void) strcpy(pathrand, vd0->vdev_path);
 
 		leaf = 0;
 		leaves = 1;
 		maxfaults = INT_MAX;	/* no limit on cache devices */
 	}
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	/*
 	 * If we can tolerate two or more faults, or we're dealing
 	 * with a slog, randomly online/offline vd0.
 	 */
 	if ((maxfaults >= 2 || islog) && guid0 != 0) {
 		if (ztest_random(10) < 6) {
 			int flags = (ztest_random(2) == 0 ?
 			    ZFS_OFFLINE_TEMPORARY : 0);
 
 			/*
 			 * We have to grab the zs_name_lock as writer to
 			 * prevent a race between offlining a slog and
 			 * destroying a dataset. Offlining the slog will
 			 * grab a reference on the dataset which may cause
 			 * dmu_objset_destroy() to fail with EBUSY thus
 			 * leaving the dataset in an inconsistent state.
 			 */
 			if (islog)
 				(void) rw_enter(&ztest_name_lock,
 				    RW_WRITER);
 
 			VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
 
 			if (islog)
 				(void) rw_exit(&ztest_name_lock);
 		} else {
 			/*
 			 * Ideally we would like to be able to randomly
 			 * call vdev_[on|off]line without holding locks
 			 * to force unpredictable failures but the side
 			 * effects of vdev_[on|off]line prevent us from
 			 * doing so. We grab the ztest_vdev_lock here to
 			 * prevent a race between injection testing and
 			 * aux_vdev removal.
 			 */
 			mutex_enter(&ztest_vdev_lock);
 			(void) vdev_online(spa, guid0, 0, NULL);
 			mutex_exit(&ztest_vdev_lock);
 		}
 	}
 
 	if (maxfaults == 0)
 		goto out;
 
 	/*
 	 * We have at least single-fault tolerance, so inject data corruption.
 	 */
 	fd = open(pathrand, O_RDWR);
 
 	if (fd == -1)	/* we hit a gap in the device namespace */
 		goto out;
 
 	fsize = lseek(fd, 0, SEEK_END);
 
 	while (--iters != 0) {
 		offset = ztest_random(fsize / (leaves << bshift)) *
 		    (leaves << bshift) + (leaf << bshift) +
 		    (ztest_random(1ULL << (bshift - 1)) & -8ULL);
 
 		if (offset >= fsize)
 			continue;
 
 		mutex_enter(&ztest_vdev_lock);
 		if (mirror_save != zs->zs_mirrors) {
 			mutex_exit(&ztest_vdev_lock);
 			(void) close(fd);
 			goto out;
 		}
 
 		if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
 			fatal(1, "can't inject bad word at 0x%llx in %s",
 			    offset, pathrand);
 
 		mutex_exit(&ztest_vdev_lock);
 
 		if (ztest_opts.zo_verbose >= 7)
 			(void) printf("injected bad word into %s,"
 			    " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
 	}
 
 	(void) close(fd);
 out:
 	umem_free(path0, MAXPATHLEN);
 	umem_free(pathrand, MAXPATHLEN);
 }
 
 /*
  * Verify that DDT repair works as expected.
  */
 void
 ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	uint64_t object, blocksize, txg, pattern, psize;
 	enum zio_checksum checksum = spa_dedup_checksum(spa);
 	dmu_buf_t *db;
 	dmu_tx_t *tx;
 	void *buf;
 	blkptr_t blk;
 	int copies = 2 * ZIO_DEDUPDITTO_MIN;
 	int i;
 
 	blocksize = ztest_random_blocksize();
 	blocksize = MIN(blocksize, 2048);	/* because we write so many */
 
 	od = umem_alloc(sizeof(ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
 		umem_free(od, sizeof(ztest_od_t));
 		return;
 	}
 
 	/*
 	 * Take the name lock as writer to prevent anyone else from changing
 	 * the pool and dataset properies we need to maintain during this test.
 	 */
 	(void) rw_enter(&ztest_name_lock, RW_WRITER);
 
 	if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
 	    B_FALSE) != 0 ||
 	    ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
 	    B_FALSE) != 0) {
 		(void) rw_exit(&ztest_name_lock);
 		umem_free(od, sizeof(ztest_od_t));
 		return;
 	}
 
 	object = od[0].od_object;
 	blocksize = od[0].od_blocksize;
 	pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os);
 
 	ASSERT(object != 0);
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_write(tx, object, 0, copies * blocksize);
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		(void) rw_exit(&ztest_name_lock);
 		umem_free(od, sizeof(ztest_od_t));
 		return;
 	}
 
 	/*
 	 * Write all the copies of our block.
 	 */
 	for (i = 0; i < copies; i++) {
 		uint64_t offset = i * blocksize;
 		VERIFY(dmu_buf_hold(os, object, offset, FTAG, &db,
 		    DMU_READ_NO_PREFETCH) == 0);
 		ASSERT(db->db_offset == offset);
 		ASSERT(db->db_size == blocksize);
 		ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
 		    ztest_pattern_match(db->db_data, db->db_size, 0ULL));
 		dmu_buf_will_fill(db, tx);
 		ztest_pattern_set(db->db_data, db->db_size, pattern);
 		dmu_buf_rele(db, FTAG);
 	}
 
 	dmu_tx_commit(tx);
 	txg_wait_synced(spa_get_dsl(spa), txg);
 
 	/*
 	 * Find out what block we got.
 	 */
 	VERIFY(dmu_buf_hold(os, object, 0, FTAG, &db,
 	    DMU_READ_NO_PREFETCH) == 0);
 	blk = *((dmu_buf_impl_t *)db)->db_blkptr;
 	dmu_buf_rele(db, FTAG);
 
 	/*
 	 * Damage the block.  Dedup-ditto will save us when we read it later.
 	 */
 	psize = BP_GET_PSIZE(&blk);
 	buf = zio_buf_alloc(psize);
 	ztest_pattern_set(buf, psize, ~pattern);
 
 	(void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
 	    buf, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));
 
 	zio_buf_free(buf, psize);
 
 	(void) rw_exit(&ztest_name_lock);
 	umem_free(od, sizeof(ztest_od_t));
 }
 
 /*
  * Scrub the pool.
  */
 /* ARGSUSED */
 void
 ztest_scrub(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa = ztest_spa;
 
 	(void) spa_scan(spa, POOL_SCAN_SCRUB);
 	(void) poll(NULL, 0, 100); /* wait a moment, then force a restart */
 	(void) spa_scan(spa, POOL_SCAN_SCRUB);
 }
 
 /*
  * Change the guid for the pool.
  */
 /* ARGSUSED */
 void
 ztest_reguid(ztest_ds_t *zd, uint64_t id)
 {
 	spa_t *spa = ztest_spa;
 	uint64_t orig, load;
 	int error;
 
 	orig = spa_guid(spa);
 	load = spa_load_guid(spa);
 
 	(void) rw_enter(&ztest_name_lock, RW_WRITER);
 	error = spa_change_guid(spa);
 	(void) rw_exit(&ztest_name_lock);
 
 	if (error != 0)
 		return;
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("Changed guid old %llu -> %llu\n",
 		    (u_longlong_t)orig, (u_longlong_t)spa_guid(spa));
 	}
 
 	VERIFY3U(orig, !=, spa_guid(spa));
 	VERIFY3U(load, ==, spa_load_guid(spa));
 }
 
 /*
  * Rename the pool to a different name and then rename it back.
  */
 /* ARGSUSED */
 void
 ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
 {
 	char *oldname, *newname;
 	spa_t *spa;
 
 	(void) rw_enter(&ztest_name_lock, RW_WRITER);
 
 	oldname = ztest_opts.zo_pool;
 	newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
 	(void) strcpy(newname, oldname);
 	(void) strcat(newname, "_tmp");
 
 	/*
 	 * Do the rename
 	 */
 	VERIFY3U(0, ==, spa_rename(oldname, newname));
 
 	/*
 	 * Try to open it under the old name, which shouldn't exist
 	 */
 	VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Open it under the new name and make sure it's still the same spa_t.
 	 */
 	VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
 
 	ASSERT(spa == ztest_spa);
 	spa_close(spa, FTAG);
 
 	/*
 	 * Rename it back to the original
 	 */
 	VERIFY3U(0, ==, spa_rename(newname, oldname));
 
 	/*
 	 * Make sure it can still be opened
 	 */
 	VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
 
 	ASSERT(spa == ztest_spa);
 	spa_close(spa, FTAG);
 
 	umem_free(newname, strlen(newname) + 1);
 
 	(void) rw_exit(&ztest_name_lock);
 }
 
 /*
  * Verify pool integrity by running zdb.
  */
 static void
 ztest_run_zdb(char *pool)
 {
 	int status;
 	char *bin;
 	char *zdb;
 	char *zbuf;
 	FILE *fp;
 
 	bin = umem_alloc(MAXPATHLEN + MAXNAMELEN + 20, UMEM_NOFAIL);
 	zdb = umem_alloc(MAXPATHLEN + MAXNAMELEN + 20, UMEM_NOFAIL);
 	zbuf = umem_alloc(1024, UMEM_NOFAIL);
 
 	VERIFY(realpath(getexecname(), bin) != NULL);
 	if (strncmp(bin, "/usr/sbin/ztest", 15) == 0) {
 		strcpy(bin, "/usr/sbin/zdb"); /* Installed */
 	} else if (strncmp(bin, "/sbin/ztest", 11) == 0) {
 		strcpy(bin, "/sbin/zdb"); /* Installed */
 	} else {
 		strstr(bin, "/ztest/")[0] = '\0'; /* In-tree */
 		strcat(bin, "/zdb/zdb");
 	}
 
 	(void) sprintf(zdb,
 	    "%s -bcc%s%s -U %s %s",
 	    bin,
 	    ztest_opts.zo_verbose >= 3 ? "s" : "",
 	    ztest_opts.zo_verbose >= 4 ? "v" : "",
 	    spa_config_path,
 	    pool);
 
 	if (ztest_opts.zo_verbose >= 5)
 		(void) printf("Executing %s\n", strstr(zdb, "zdb "));
 
 	fp = popen(zdb, "r");
 
 	while (fgets(zbuf, 1024, fp) != NULL)
 		if (ztest_opts.zo_verbose >= 3)
 			(void) printf("%s", zbuf);
 
 	status = pclose(fp);
 
 	if (status == 0)
 		goto out;
 
 	ztest_dump_core = 0;
 	if (WIFEXITED(status))
 		fatal(0, "'%s' exit code %d", zdb, WEXITSTATUS(status));
 	else
 		fatal(0, "'%s' died with signal %d", zdb, WTERMSIG(status));
 out:
 	umem_free(bin, MAXPATHLEN + MAXNAMELEN + 20);
 	umem_free(zdb, MAXPATHLEN + MAXNAMELEN + 20);
 	umem_free(zbuf, 1024);
 }
 
 static void
 ztest_walk_pool_directory(char *header)
 {
 	spa_t *spa = NULL;
 
 	if (ztest_opts.zo_verbose >= 6)
 		(void) printf("%s\n", header);
 
 	mutex_enter(&spa_namespace_lock);
 	while ((spa = spa_next(spa)) != NULL)
 		if (ztest_opts.zo_verbose >= 6)
 			(void) printf("\t%s\n", spa_name(spa));
 	mutex_exit(&spa_namespace_lock);
 }
 
 static void
 ztest_spa_import_export(char *oldname, char *newname)
 {
 	nvlist_t *config, *newconfig;
 	uint64_t pool_guid;
 	spa_t *spa;
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("import/export: old = %s, new = %s\n",
 		    oldname, newname);
 	}
 
 	/*
 	 * Clean up from previous runs.
 	 */
 	(void) spa_destroy(newname);
 
 	/*
 	 * Get the pool's configuration and guid.
 	 */
 	VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Kick off a scrub to tickle scrub/export races.
 	 */
 	if (ztest_random(2) == 0)
 		(void) spa_scan(spa, POOL_SCAN_SCRUB);
 
 	pool_guid = spa_guid(spa);
 	spa_close(spa, FTAG);
 
 	ztest_walk_pool_directory("pools before export");
 
 	/*
 	 * Export it.
 	 */
 	VERIFY3U(0, ==, spa_export(oldname, &config, B_FALSE, B_FALSE));
 
 	ztest_walk_pool_directory("pools after export");
 
 	/*
 	 * Try to import it.
 	 */
 	newconfig = spa_tryimport(config);
 	ASSERT(newconfig != NULL);
 	nvlist_free(newconfig);
 
 	/*
 	 * Import it under the new name.
 	 */
 	VERIFY3U(0, ==, spa_import(newname, config, NULL, 0));
 
 	ztest_walk_pool_directory("pools after import");
 
 	/*
 	 * Try to import it again -- should fail with EEXIST.
 	 */
 	VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0));
 
 	/*
 	 * Try to import it under a different name -- should fail with EEXIST.
 	 */
 	VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0));
 
 	/*
 	 * Verify that the pool is no longer visible under the old name.
 	 */
 	VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Verify that we can open and close the pool using the new name.
 	 */
 	VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
 	ASSERT(pool_guid == spa_guid(spa));
 	spa_close(spa, FTAG);
 
 	nvlist_free(config);
 }
 
 static void
 ztest_resume(spa_t *spa)
 {
 	if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6)
 		(void) printf("resuming from suspended state\n");
 	spa_vdev_state_enter(spa, SCL_NONE);
 	vdev_clear(spa, NULL);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	(void) zio_resume(spa);
 }
 
 static void *
 ztest_resume_thread(void *arg)
 {
 	spa_t *spa = arg;
 
 	while (!ztest_exiting) {
 		if (spa_suspended(spa))
 			ztest_resume(spa);
 		(void) poll(NULL, 0, 100);
 	}
 
 	thread_exit();
 
 	return (NULL);
 }
 
 #define GRACE	300
 
 #if 0
 static void
 ztest_deadman_alarm(int sig)
 {
 	fatal(0, "failed to complete within %d seconds of deadline", GRACE);
 }
 #endif
 
 static void
 ztest_execute(int test, ztest_info_t *zi, uint64_t id)
 {
 	ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets];
 	ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test);
 	hrtime_t functime = gethrtime();
 	int i;
 
 	for (i = 0; i < zi->zi_iters; i++)
 		zi->zi_func(zd, id);
 
 	functime = gethrtime() - functime;
 
 	atomic_add_64(&zc->zc_count, 1);
 	atomic_add_64(&zc->zc_time, functime);
 
 	if (ztest_opts.zo_verbose >= 4) {
 		Dl_info dli;
 		(void) dladdr((void *)zi->zi_func, &dli);
 		(void) printf("%6.2f sec in %s\n",
 		    (double)functime / NANOSEC, dli.dli_sname);
 	}
 }
 
 static void *
 ztest_thread(void *arg)
 {
 	int rand;
 	uint64_t id = (uintptr_t)arg;
 	ztest_shared_t *zs = ztest_shared;
 	uint64_t call_next;
 	hrtime_t now;
 	ztest_info_t *zi;
 	ztest_shared_callstate_t *zc;
 
 	while ((now = gethrtime()) < zs->zs_thread_stop) {
 		/*
 		 * See if it's time to force a crash.
 		 */
 		if (now > zs->zs_thread_kill)
 			ztest_kill(zs);
 
 		/*
 		 * If we're getting ENOSPC with some regularity, stop.
 		 */
 		if (zs->zs_enospc_count > 10)
 			break;
 
 		/*
 		 * Pick a random function to execute.
 		 */
 		rand = ztest_random(ZTEST_FUNCS);
 		zi = &ztest_info[rand];
 		zc = ZTEST_GET_SHARED_CALLSTATE(rand);
 		call_next = zc->zc_next;
 
 		if (now >= call_next &&
 		    atomic_cas_64(&zc->zc_next, call_next, call_next +
 		    ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) {
 			ztest_execute(rand, zi, id);
 		}
 	}
 
 	thread_exit();
 
 	return (NULL);
 }
 
 static void
 ztest_dataset_name(char *dsname, char *pool, int d)
 {
 	(void) snprintf(dsname, MAXNAMELEN, "%s/ds_%d", pool, d);
 }
 
 static void
 ztest_dataset_destroy(int d)
 {
 	char name[MAXNAMELEN];
 	int t;
 
 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
 
 	if (ztest_opts.zo_verbose >= 3)
 		(void) printf("Destroying %s to free up space\n", name);
 
 	/*
 	 * Cleanup any non-standard clones and snapshots.  In general,
 	 * ztest thread t operates on dataset (t % zopt_datasets),
 	 * so there may be more than one thing to clean up.
 	 */
 	for (t = d; t < ztest_opts.zo_threads;
 	    t += ztest_opts.zo_datasets)
 		ztest_dsl_dataset_cleanup(name, t);
 
 	(void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
 	    DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
 }
 
 static void
 ztest_dataset_dirobj_verify(ztest_ds_t *zd)
 {
 	uint64_t usedobjs, dirobjs, scratch;
 
 	/*
 	 * ZTEST_DIROBJ is the object directory for the entire dataset.
 	 * Therefore, the number of objects in use should equal the
 	 * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself.
 	 * If not, we have an object leak.
 	 *
 	 * Note that we can only check this in ztest_dataset_open(),
 	 * when the open-context and syncing-context values agree.
 	 * That's because zap_count() returns the open-context value,
 	 * while dmu_objset_space() returns the rootbp fill count.
 	 */
 	VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
 	dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch);
 	ASSERT3U(dirobjs + 1, ==, usedobjs);
 }
 
 static int
 ztest_dataset_open(int d)
 {
 	ztest_ds_t *zd = &ztest_ds[d];
 	uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq;
 	objset_t *os;
 	zilog_t *zilog;
 	char name[MAXNAMELEN];
 	int error;
 
 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
 
 	(void) rw_enter(&ztest_name_lock, RW_READER);
 
 	error = ztest_dataset_create(name);
 	if (error == ENOSPC) {
 		(void) rw_exit(&ztest_name_lock);
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT(error == 0 || error == EEXIST);
 
 	VERIFY0(dmu_objset_hold(name, zd, &os));
 	(void) rw_exit(&ztest_name_lock);
 
 	ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
 
 	zilog = zd->zd_zilog;
 
 	if (zilog->zl_header->zh_claim_lr_seq != 0 &&
 	    zilog->zl_header->zh_claim_lr_seq < committed_seq)
 		fatal(0, "missing log records: claimed %llu < committed %llu",
 		    zilog->zl_header->zh_claim_lr_seq, committed_seq);
 
 	ztest_dataset_dirobj_verify(zd);
 
 	zil_replay(os, zd, ztest_replay_vector);
 
 	ztest_dataset_dirobj_verify(zd);
 
 	if (ztest_opts.zo_verbose >= 6)
 		(void) printf("%s replay %llu blocks, %llu records, seq %llu\n",
 		    zd->zd_name,
 		    (u_longlong_t)zilog->zl_parse_blk_count,
 		    (u_longlong_t)zilog->zl_parse_lr_count,
 		    (u_longlong_t)zilog->zl_replaying_seq);
 
 	zilog = zil_open(os, ztest_get_data);
 
 	if (zilog->zl_replaying_seq != 0 &&
 	    zilog->zl_replaying_seq < committed_seq)
 		fatal(0, "missing log records: replayed %llu < committed %llu",
 		    zilog->zl_replaying_seq, committed_seq);
 
 	return (0);
 }
 
 static void
 ztest_dataset_close(int d)
 {
 	ztest_ds_t *zd = &ztest_ds[d];
 
 	zil_close(zd->zd_zilog);
 	dmu_objset_rele(zd->zd_os, zd);
 
 	ztest_zd_fini(zd);
 }
 
 /*
  * Kick off threads to run tests on all datasets in parallel.
  */
 static void
 ztest_run(ztest_shared_t *zs)
 {
 	kt_did_t *tid;
 	spa_t *spa;
 	objset_t *os;
 	kthread_t *resume_thread;
 	uint64_t object;
 	int error;
 	int t, d;
 
 	ztest_exiting = B_FALSE;
 
 	/*
 	 * Initialize parent/child shared state.
 	 */
 	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
 	rw_init(&ztest_name_lock, NULL, RW_DEFAULT, NULL);
 
 	zs->zs_thread_start = gethrtime();
 	zs->zs_thread_stop =
 	    zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;
 	zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
 	zs->zs_thread_kill = zs->zs_thread_stop;
 	if (ztest_random(100) < ztest_opts.zo_killrate) {
 		zs->zs_thread_kill -=
 		    ztest_random(ztest_opts.zo_passtime * NANOSEC);
 	}
 
 	mutex_init(&zcl.zcl_callbacks_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
 	    offsetof(ztest_cb_data_t, zcd_node));
 
 	/*
 	 * Open our pool.
 	 */
 	kernel_init(FREAD | FWRITE);
 	VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0);
 	spa->spa_debug = B_TRUE;
 	ztest_spa = spa;
 
 	VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os));
 	zs->zs_guid = dmu_objset_fsid_guid(os);
 	dmu_objset_rele(os, FTAG);
 
 	spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
 
 	/*
 	 * We don't expect the pool to suspend unless maxfaults == 0,
 	 * in which case ztest_fault_inject() temporarily takes away
 	 * the only valid replica.
 	 */
 	if (MAXFAULTS() == 0)
 		spa->spa_failmode = ZIO_FAILURE_MODE_WAIT;
 	else
 		spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
 
 	/*
 	 * Create a thread to periodically resume suspended I/O.
 	 */
 	VERIFY3P((resume_thread = zk_thread_create(NULL, 0,
 	    (thread_func_t)ztest_resume_thread, spa, TS_RUN, NULL, 0, 0,
 	    PTHREAD_CREATE_JOINABLE)), !=, NULL);
 
 #if 0
 	/*
 	 * Set a deadman alarm to abort() if we hang.
 	 */
 	signal(SIGALRM, ztest_deadman_alarm);
 	alarm((zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + GRACE);
 #endif
 
 	/*
 	 * Verify that we can safely inquire about about any object,
 	 * whether it's allocated or not.  To make it interesting,
 	 * we probe a 5-wide window around each power of two.
 	 * This hits all edge cases, including zero and the max.
 	 */
 	for (t = 0; t < 64; t++) {
 		for (d = -5; d <= 5; d++) {
 			error = dmu_object_info(spa->spa_meta_objset,
 			    (1ULL << t) + d, NULL);
 			ASSERT(error == 0 || error == ENOENT ||
 			    error == EINVAL);
 		}
 	}
 
 	/*
 	 * If we got any ENOSPC errors on the previous run, destroy something.
 	 */
 	if (zs->zs_enospc_count != 0) {
 		int d = ztest_random(ztest_opts.zo_datasets);
 		ztest_dataset_destroy(d);
 	}
 	zs->zs_enospc_count = 0;
 
 	tid = umem_zalloc(ztest_opts.zo_threads * sizeof (kt_did_t),
 	    UMEM_NOFAIL);
 
 	if (ztest_opts.zo_verbose >= 4)
 		(void) printf("starting main threads...\n");
 
 	/*
 	 * Kick off all the tests that run in parallel.
 	 */
 	for (t = 0; t < ztest_opts.zo_threads; t++) {
 		kthread_t *thread;
 
 		if (t < ztest_opts.zo_datasets &&
 		    ztest_dataset_open(t) != 0)
 			return;
 
 		VERIFY3P(thread = zk_thread_create(NULL, 0,
 		    (thread_func_t)ztest_thread,
 		    (void *)(uintptr_t)t, TS_RUN, NULL, 0, 0,
 		    PTHREAD_CREATE_JOINABLE), !=, NULL);
 		tid[t] = thread->t_tid;
 	}
 
 	/*
 	 * Wait for all of the tests to complete.  We go in reverse order
 	 * so we don't close datasets while threads are still using them.
 	 */
 	for (t = ztest_opts.zo_threads - 1; t >= 0; t--) {
 		thread_join(tid[t]);
 		if (t < ztest_opts.zo_datasets)
 			ztest_dataset_close(t);
 	}
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
 
 	umem_free(tid, ztest_opts.zo_threads * sizeof (kt_did_t));
 
 	/* Kill the resume thread */
 	ztest_exiting = B_TRUE;
 	thread_join(resume_thread->t_tid);
 	ztest_resume(spa);
 
 	/*
 	 * Right before closing the pool, kick off a bunch of async I/O;
 	 * spa_close() should wait for it to complete.
 	 */
 	for (object = 1; object < 50; object++)
 		dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
 
 	/* Verify that at least one commit cb was called in a timely fashion */
 	if (zc_cb_counter >= ZTEST_COMMIT_CB_MIN_REG)
 		VERIFY0(zc_min_txg_delay);
 
 	spa_close(spa, FTAG);
 
 	/*
 	 * Verify that we can loop over all pools.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa))
 		if (ztest_opts.zo_verbose > 3)
 			(void) printf("spa_next: found %s\n", spa_name(spa));
 	mutex_exit(&spa_namespace_lock);
 
 	/*
 	 * Verify that we can export the pool and reimport it under a
 	 * different name.
 	 */
 	if (ztest_random(2) == 0) {
 		char name[MAXNAMELEN];
 		(void) snprintf(name, MAXNAMELEN, "%s_import",
 		    ztest_opts.zo_pool);
 		ztest_spa_import_export(ztest_opts.zo_pool, name);
 		ztest_spa_import_export(name, ztest_opts.zo_pool);
 	}
 
 	kernel_fini();
 
 	list_destroy(&zcl.zcl_callbacks);
 	mutex_destroy(&zcl.zcl_callbacks_lock);
 	rw_destroy(&ztest_name_lock);
 	mutex_destroy(&ztest_vdev_lock);
 }
 
 static void
 ztest_freeze(void)
 {
 	ztest_ds_t *zd = &ztest_ds[0];
 	spa_t *spa;
 	int numloops = 0;
 
 	if (ztest_opts.zo_verbose >= 3)
 		(void) printf("testing spa_freeze()...\n");
 
 	kernel_init(FREAD | FWRITE);
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	VERIFY3U(0, ==, ztest_dataset_open(0));
 
 	/*
 	 * Force the first log block to be transactionally allocated.
 	 * We have to do this before we freeze the pool -- otherwise
 	 * the log chain won't be anchored.
 	 */
 	while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
 		ztest_dmu_object_alloc_free(zd, 0);
 		zil_commit(zd->zd_zilog, 0);
 	}
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	/*
 	 * Freeze the pool.  This stops spa_sync() from doing anything,
 	 * so that the only way to record changes from now on is the ZIL.
 	 */
 	spa_freeze(spa);
 
 	/*
 	 * Run tests that generate log records but don't alter the pool config
 	 * or depend on DSL sync tasks (snapshots, objset create/destroy, etc).
 	 * We do a txg_wait_synced() after each iteration to force the txg
 	 * to increase well beyond the last synced value in the uberblock.
 	 * The ZIL should be OK with that.
 	 */
 	while (ztest_random(10) != 0 &&
 	    numloops++ < ztest_opts.zo_maxloops) {
 		ztest_dmu_write_parallel(zd, 0);
 		ztest_dmu_object_alloc_free(zd, 0);
 		txg_wait_synced(spa_get_dsl(spa), 0);
 	}
 
 	/*
 	 * Commit all of the changes we just generated.
 	 */
 	zil_commit(zd->zd_zilog, 0);
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	/*
 	 * Close our dataset and close the pool.
 	 */
 	ztest_dataset_close(0);
 	spa_close(spa, FTAG);
 	kernel_fini();
 
 	/*
 	 * Open and close the pool and dataset to induce log replay.
 	 */
 	kernel_init(FREAD | FWRITE);
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	ASSERT(spa_freeze_txg(spa) == UINT64_MAX);
 	VERIFY3U(0, ==, ztest_dataset_open(0));
 	ztest_dataset_close(0);
 
 	spa->spa_debug = B_TRUE;
 	ztest_spa = spa;
 	txg_wait_synced(spa_get_dsl(spa), 0);
 	ztest_reguid(NULL, 0);
 
 	spa_close(spa, FTAG);
 	kernel_fini();
 }
 
 void
 print_time(hrtime_t t, char *timebuf)
 {
 	hrtime_t s = t / NANOSEC;
 	hrtime_t m = s / 60;
 	hrtime_t h = m / 60;
 	hrtime_t d = h / 24;
 
 	s -= m * 60;
 	m -= h * 60;
 	h -= d * 24;
 
 	timebuf[0] = '\0';
 
 	if (d)
 		(void) sprintf(timebuf,
 		    "%llud%02lluh%02llum%02llus", d, h, m, s);
 	else if (h)
 		(void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
 	else if (m)
 		(void) sprintf(timebuf, "%llum%02llus", m, s);
 	else
 		(void) sprintf(timebuf, "%llus", s);
 }
 
 static nvlist_t *
 make_random_props(void)
 {
 	nvlist_t *props;
 
 	VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
 	if (ztest_random(2) == 0)
 		return (props);
 	VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
 
 	return (props);
 }
 
 /*
  * Create a storage pool with the given name and initial vdev size.
  * Then test spa_freeze() functionality.
  */
 static void
 ztest_init(ztest_shared_t *zs)
 {
 	spa_t *spa;
 	nvlist_t *nvroot, *props;
 	int i;
 
 	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
 	rw_init(&ztest_name_lock, NULL, RW_DEFAULT, NULL);
 
 	kernel_init(FREAD | FWRITE);
 
 	/*
 	 * Create the storage pool.
 	 */
 	(void) spa_destroy(ztest_opts.zo_pool);
 	ztest_shared->zs_vdev_next_leaf = 0;
 	zs->zs_splits = 0;
 	zs->zs_mirrors = ztest_opts.zo_mirrors;
 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
 	    0, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
 	props = make_random_props();
 	for (i = 0; i < SPA_FEATURES; i++) {
 		char *buf;
 		VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
 		    spa_feature_table[i].fi_uname));
 		VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
 		free(buf);
 	}
-	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props,
-	    NULL, NULL));
+	VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
 	nvlist_free(nvroot);
 
 	VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
 	spa_close(spa, FTAG);
 
 	kernel_fini();
 
 	ztest_run_zdb(ztest_opts.zo_pool);
 
 	ztest_freeze();
 
 	ztest_run_zdb(ztest_opts.zo_pool);
 
 	rw_destroy(&ztest_name_lock);
 	mutex_destroy(&ztest_vdev_lock);
 }
 
 static void
 setup_data_fd(void)
 {
 	static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
 
 	ztest_fd_data = mkstemp(ztest_name_data);
 	ASSERT3S(ztest_fd_data, >=, 0);
 	(void) unlink(ztest_name_data);
 }
 
 static int
 shared_data_size(ztest_shared_hdr_t *hdr)
 {
 	int size;
 
 	size = hdr->zh_hdr_size;
 	size += hdr->zh_opts_size;
 	size += hdr->zh_size;
 	size += hdr->zh_stats_size * hdr->zh_stats_count;
 	size += hdr->zh_ds_size * hdr->zh_ds_count;
 
 	return (size);
 }
 
 static void
 setup_hdr(void)
 {
 	int size;
 	ztest_shared_hdr_t *hdr;
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT(hdr != MAP_FAILED);
 
 	VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t)));
 
 	hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t);
 	hdr->zh_opts_size = sizeof (ztest_shared_opts_t);
 	hdr->zh_size = sizeof (ztest_shared_t);
 	hdr->zh_stats_size = sizeof (ztest_shared_callstate_t);
 	hdr->zh_stats_count = ZTEST_FUNCS;
 	hdr->zh_ds_size = sizeof (ztest_shared_ds_t);
 	hdr->zh_ds_count = ztest_opts.zo_datasets;
 
 	size = shared_data_size(hdr);
 	VERIFY3U(0, ==, ftruncate(ztest_fd_data, size));
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 }
 
 static void
 setup_data(void)
 {
 	int size, offset;
 	ztest_shared_hdr_t *hdr;
 	uint8_t *buf;
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT(hdr != MAP_FAILED);
 
 	size = shared_data_size(hdr);
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 	hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT(hdr != MAP_FAILED);
 	buf = (uint8_t *)hdr;
 
 	offset = hdr->zh_hdr_size;
 	ztest_shared_opts = (void *)&buf[offset];
 	offset += hdr->zh_opts_size;
 	ztest_shared = (void *)&buf[offset];
 	offset += hdr->zh_size;
 	ztest_shared_callstate = (void *)&buf[offset];
 	offset += hdr->zh_stats_size * hdr->zh_stats_count;
 	ztest_shared_ds = (void *)&buf[offset];
 }
 
 static boolean_t
 exec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp)
 {
 	pid_t pid;
 	int status;
 	char *cmdbuf = NULL;
 
 	pid = fork();
 
 	if (cmd == NULL) {
 		cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 		(void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN);
 		cmd = cmdbuf;
 	}
 
 	if (pid == -1)
 		fatal(1, "fork failed");
 
 	if (pid == 0) {	/* child */
 		char *emptyargv[2] = { cmd, NULL };
 		char fd_data_str[12];
 
 		struct rlimit rl = { 1024, 1024 };
 		(void) setrlimit(RLIMIT_NOFILE, &rl);
 
 		(void) close(ztest_fd_rand);
 		VERIFY(11 >= snprintf(fd_data_str, 12, "%d", ztest_fd_data));
 		VERIFY(0 == setenv("ZTEST_FD_DATA", fd_data_str, 1));
 
 		(void) enable_extended_FILE_stdio(-1, -1);
 		if (libpath != NULL)
 			VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1));
 		(void) execv(cmd, emptyargv);
 		ztest_dump_core = B_FALSE;
 		fatal(B_TRUE, "exec failed: %s", cmd);
 	}
 
 	if (cmdbuf != NULL) {
 		umem_free(cmdbuf, MAXPATHLEN);
 		cmd = NULL;
 	}
 
 	while (waitpid(pid, &status, 0) != pid)
 		continue;
 	if (statusp != NULL)
 		*statusp = status;
 
 	if (WIFEXITED(status)) {
 		if (WEXITSTATUS(status) != 0) {
 			(void) fprintf(stderr, "child exited with code %d\n",
 			    WEXITSTATUS(status));
 			exit(2);
 		}
 		return (B_FALSE);
 	} else if (WIFSIGNALED(status)) {
 		if (!ignorekill || WTERMSIG(status) != SIGKILL) {
 			(void) fprintf(stderr, "child died with signal %d\n",
 			    WTERMSIG(status));
 			exit(3);
 		}
 		return (B_TRUE);
 	} else {
 		(void) fprintf(stderr, "something strange happened to child\n");
 		exit(4);
 		/* NOTREACHED */
 	}
 }
 
 static void
 ztest_run_init(void)
 {
 	int i;
 
 	ztest_shared_t *zs = ztest_shared;
 
 	ASSERT(ztest_opts.zo_init != 0);
 
 	/*
 	 * Blow away any existing copy of zpool.cache
 	 */
 	(void) remove(spa_config_path);
 
 	/*
 	 * Create and initialize our storage pool.
 	 */
 	for (i = 1; i <= ztest_opts.zo_init; i++) {
 		bzero(zs, sizeof (ztest_shared_t));
 		if (ztest_opts.zo_verbose >= 3 &&
 		    ztest_opts.zo_init != 1) {
 			(void) printf("ztest_init(), pass %d\n", i);
 		}
 		ztest_init(zs);
 	}
 }
 
 int
 main(int argc, char **argv)
 {
 	int kills = 0;
 	int iters = 0;
 	int older = 0;
 	int newer = 0;
 	ztest_shared_t *zs;
 	ztest_info_t *zi;
 	ztest_shared_callstate_t *zc;
 	char timebuf[100];
 	char numbuf[6];
 	spa_t *spa;
 	char *cmd;
 	boolean_t hasalt;
 	int f;
 	char *fd_data_str = getenv("ZTEST_FD_DATA");
 
 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
 
 	ztest_fd_rand = open("/dev/urandom", O_RDONLY);
 	ASSERT3S(ztest_fd_rand, >=, 0);
 
 	if (!fd_data_str) {
 		dprintf_setup(&argc, argv);
 		process_options(argc, argv);
 
 		setup_data_fd();
 		setup_hdr();
 		setup_data();
 		bcopy(&ztest_opts, ztest_shared_opts,
 		    sizeof (*ztest_shared_opts));
 	} else {
 		ztest_fd_data = atoi(fd_data_str);
 		setup_data();
 		bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts));
 	}
 	ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count);
 
 	/* Override location of zpool.cache */
 	VERIFY(asprintf((char **)&spa_config_path, "%s/zpool.cache",
 	    ztest_opts.zo_dir) != -1);
 
 	ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t),
 	    UMEM_NOFAIL);
 	zs = ztest_shared;
 
 	if (fd_data_str) {
 		metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang;
 		metaslab_df_alloc_threshold =
 		    zs->zs_metaslab_df_alloc_threshold;
 
 		if (zs->zs_do_init)
 			ztest_run_init();
 		else
 			ztest_run(zs);
 		exit(0);
 	}
 
 	hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0);
 
 	if (ztest_opts.zo_verbose >= 1) {
 		(void) printf("%llu vdevs, %d datasets, %d threads,"
 		    " %llu seconds...\n",
 		    (u_longlong_t)ztest_opts.zo_vdevs,
 		    ztest_opts.zo_datasets,
 		    ztest_opts.zo_threads,
 		    (u_longlong_t)ztest_opts.zo_time);
 	}
 
 	cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	(void) strlcpy(cmd, getexecname(), MAXNAMELEN);
 
 	zs->zs_do_init = B_TRUE;
 	if (strlen(ztest_opts.zo_alt_ztest) != 0) {
 		if (ztest_opts.zo_verbose >= 1) {
 			(void) printf("Executing older ztest for "
 			    "initialization: %s\n", ztest_opts.zo_alt_ztest);
 		}
 		VERIFY(!exec_child(ztest_opts.zo_alt_ztest,
 		    ztest_opts.zo_alt_libpath, B_FALSE, NULL));
 	} else {
 		VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL));
 	}
 	zs->zs_do_init = B_FALSE;
 
 	zs->zs_proc_start = gethrtime();
 	zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC;
 
 	for (f = 0; f < ZTEST_FUNCS; f++) {
 		zi = &ztest_info[f];
 		zc = ZTEST_GET_SHARED_CALLSTATE(f);
 		if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop)
 			zc->zc_next = UINT64_MAX;
 		else
 			zc->zc_next = zs->zs_proc_start +
 			    ztest_random(2 * zi->zi_interval[0] + 1);
 	}
 
 	/*
 	 * Run the tests in a loop.  These tests include fault injection
 	 * to verify that self-healing data works, and forced crashes
 	 * to verify that we never lose on-disk consistency.
 	 */
 	while (gethrtime() < zs->zs_proc_stop) {
 		int status;
 		boolean_t killed;
 
 		/*
 		 * Initialize the workload counters for each function.
 		 */
 		for (f = 0; f < ZTEST_FUNCS; f++) {
 			zc = ZTEST_GET_SHARED_CALLSTATE(f);
 			zc->zc_count = 0;
 			zc->zc_time = 0;
 		}
 
 		/* Set the allocation switch size */
 		zs->zs_metaslab_df_alloc_threshold =
 		    ztest_random(zs->zs_metaslab_sz / 4) + 1;
 
 		if (!hasalt || ztest_random(2) == 0) {
 			if (hasalt && ztest_opts.zo_verbose >= 1) {
 				(void) printf("Executing newer ztest: %s\n",
 				    cmd);
 			}
 			newer++;
 			killed = exec_child(cmd, NULL, B_TRUE, &status);
 		} else {
 			if (hasalt && ztest_opts.zo_verbose >= 1) {
 				(void) printf("Executing older ztest: %s\n",
 				    ztest_opts.zo_alt_ztest);
 			}
 			older++;
 			killed = exec_child(ztest_opts.zo_alt_ztest,
 			    ztest_opts.zo_alt_libpath, B_TRUE, &status);
 		}
 
 		if (killed)
 			kills++;
 		iters++;
 
 		if (ztest_opts.zo_verbose >= 1) {
 			hrtime_t now = gethrtime();
 
 			now = MIN(now, zs->zs_proc_stop);
 			print_time(zs->zs_proc_stop - now, timebuf);
 			nicenum(zs->zs_space, numbuf);
 
 			(void) printf("Pass %3d, %8s, %3llu ENOSPC, "
 			    "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
 			    iters,
 			    WIFEXITED(status) ? "Complete" : "SIGKILL",
 			    (u_longlong_t)zs->zs_enospc_count,
 			    100.0 * zs->zs_alloc / zs->zs_space,
 			    numbuf,
 			    100.0 * (now - zs->zs_proc_start) /
 			    (ztest_opts.zo_time * NANOSEC), timebuf);
 		}
 
 		if (ztest_opts.zo_verbose >= 2) {
 			(void) printf("\nWorkload summary:\n\n");
 			(void) printf("%7s %9s   %s\n",
 			    "Calls", "Time", "Function");
 			(void) printf("%7s %9s   %s\n",
 			    "-----", "----", "--------");
 			for (f = 0; f < ZTEST_FUNCS; f++) {
 				Dl_info dli;
 
 				zi = &ztest_info[f];
 				zc = ZTEST_GET_SHARED_CALLSTATE(f);
 				print_time(zc->zc_time, timebuf);
 				(void) dladdr((void *)zi->zi_func, &dli);
 				(void) printf("%7llu %9s   %s\n",
 				    (u_longlong_t)zc->zc_count, timebuf,
 				    dli.dli_sname);
 			}
 			(void) printf("\n");
 		}
 
 		/*
 		 * It's possible that we killed a child during a rename test,
 		 * in which case we'll have a 'ztest_tmp' pool lying around
 		 * instead of 'ztest'.  Do a blind rename in case this happened.
 		 */
 		kernel_init(FREAD);
 		if (spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0) {
 			spa_close(spa, FTAG);
 		} else {
 			char tmpname[MAXNAMELEN];
 			kernel_fini();
 			kernel_init(FREAD | FWRITE);
 			(void) snprintf(tmpname, sizeof (tmpname), "%s_tmp",
 			    ztest_opts.zo_pool);
 			(void) spa_rename(tmpname, ztest_opts.zo_pool);
 		}
 		kernel_fini();
 
 		ztest_run_zdb(ztest_opts.zo_pool);
 	}
 
 	if (ztest_opts.zo_verbose >= 1) {
 		if (hasalt) {
 			(void) printf("%d runs of older ztest: %s\n", older,
 			    ztest_opts.zo_alt_ztest);
 			(void) printf("%d runs of newer ztest: %s\n", newer,
 			    cmd);
 		}
 		(void) printf("%d killed, %d completed, %.0f%% kill rate\n",
 		    kills, iters - kills, (100.0 * kills) / MAX(1, iters));
 	}
 
 	umem_free(cmd, MAXNAMELEN);
 
 	return (0);
 }
diff --git a/configure.ac b/configure.ac
index 58e21587051a..08b807e3f16c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,137 +1,138 @@
 /*
  * This file is part of the ZFS Linux port.
  *
  * Copyright (c) 2009 Lawrence Livermore National Security, LLC.
  * Produced at Lawrence Livermore National Laboratory
  * Written by:
  *         Brian Behlendorf <behlendorf1@llnl.gov>,
  *         Herb Wartens <wartens2@llnl.gov>,
  *         Jim Garlick <garlick@llnl.gov>
  * LLNL-CODE-403049
  *
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License, Version 1.0 only
  * (the "License").  You may not use this file except in compliance
  * with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 AC_INIT
 AC_LANG(C)
 ZFS_AC_META
 AC_CONFIG_AUX_DIR([config])
 AC_CONFIG_MACRO_DIR([config])
 AC_CANONICAL_SYSTEM
 AM_MAINTAINER_MODE
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 AM_INIT_AUTOMAKE([$ZFS_META_NAME], [$ZFS_META_VERSION])
 AC_CONFIG_HEADERS([zfs_config.h], [
 	(mv zfs_config.h zfs_config.h.tmp &&
 	awk -f ${ac_srcdir}/config/config.awk zfs_config.h.tmp >zfs_config.h &&
 	rm zfs_config.h.tmp) || exit 1])
 
 AC_PROG_INSTALL
 AC_PROG_CC
 AC_PROG_LIBTOOL
 AM_PROG_AS
 
 ZFS_AC_LICENSE
 ZFS_AC_PACKAGE
 ZFS_AC_CONFIG
 ZFS_AC_DEBUG
 ZFS_AC_DEBUG_DMU_TX
 
 AC_CONFIG_FILES([ 
 	Makefile
 	dracut/Makefile
 	dracut/90zfs/Makefile
 	udev/Makefile
 	udev/rules.d/Makefile
 	etc/Makefile
 	etc/init.d/Makefile
 	etc/zfs/Makefile
 	man/Makefile
 	man/man1/Makefile
 	man/man5/Makefile
 	man/man8/Makefile
 	lib/Makefile
 	lib/libspl/Makefile
 	lib/libspl/asm-generic/Makefile
 	lib/libspl/asm-i386/Makefile
 	lib/libspl/asm-x86_64/Makefile
 	lib/libspl/include/Makefile
 	lib/libspl/include/ia32/Makefile
 	lib/libspl/include/ia32/sys/Makefile
 	lib/libspl/include/rpc/Makefile
 	lib/libspl/include/sys/Makefile
 	lib/libspl/include/sys/sysevent/Makefile
 	lib/libspl/include/sys/dktp/Makefile
 	lib/libspl/include/util/Makefile
 	lib/libavl/Makefile
 	lib/libefi/Makefile
 	lib/libnvpair/Makefile
 	lib/libunicode/Makefile
 	lib/libuutil/Makefile
 	lib/libzpool/Makefile
 	lib/libzfs/Makefile
+	lib/libzfs_core/Makefile
 	lib/libshare/Makefile
 	cmd/Makefile
 	cmd/zdb/Makefile
 	cmd/zhack/Makefile
 	cmd/zfs/Makefile
 	cmd/zinject/Makefile
 	cmd/zpool/Makefile
 	cmd/zstreamdump/Makefile
 	cmd/ztest/Makefile
 	cmd/zpios/Makefile
 	cmd/mount_zfs/Makefile
 	cmd/fsck_zfs/Makefile
 	cmd/zvol_id/Makefile
 	cmd/vdev_id/Makefile
 	cmd/arcstat/Makefile
 	module/Makefile
 	module/avl/Makefile
 	module/nvpair/Makefile
 	module/unicode/Makefile
 	module/zcommon/Makefile
 	module/zfs/Makefile
 	module/zpios/Makefile
 	include/Makefile
 	include/linux/Makefile
 	include/sys/Makefile
 	include/sys/fs/Makefile
 	include/sys/fm/Makefile
 	include/sys/fm/fs/Makefile
 	scripts/Makefile
 	scripts/zpios-profile/Makefile
 	scripts/zpios-test/Makefile
 	scripts/zpool-config/Makefile
 	scripts/common.sh
 	rpm/Makefile
 	rpm/fedora/Makefile
 	rpm/fedora/zfs.spec
 	rpm/fedora/zfs-kmod.spec
 	rpm/fedora/zfs-dkms.spec
 	rpm/generic/Makefile
 	rpm/generic/zfs.spec
 	rpm/generic/zfs-kmod.spec
 	rpm/generic/zfs-dkms.spec
 	zfs-script-config.sh
 	zfs.release
 ])
 
 AC_OUTPUT
diff --git a/include/Makefile.am b/include/Makefile.am
index 64141d9bce30..2e1c31aaf81c 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -1,33 +1,34 @@
 SUBDIRS = linux sys
 
 COMMON_H = \
 	$(top_srcdir)/include/zfeature_common.h \
 	$(top_srcdir)/include/zfs_comutil.h \
 	$(top_srcdir)/include/zfs_deleg.h \
 	$(top_srcdir)/include/zfs_fletcher.h \
 	$(top_srcdir)/include/zfs_namecheck.h \
 	$(top_srcdir)/include/zfs_prop.h \
 	$(top_srcdir)/include/zpios-ctl.h
 
 KERNEL_H = \
 	$(top_srcdir)/include/zpios-internal.h
 
 USER_H = \
 	$(top_srcdir)/include/libnvpair.h \
 	$(top_srcdir)/include/libuutil_common.h \
 	$(top_srcdir)/include/libuutil.h \
 	$(top_srcdir)/include/libuutil_impl.h \
 	$(top_srcdir)/include/libzfs.h \
+	$(top_srcdir)/include/libzfs_core.h \
 	$(top_srcdir)/include/libzfs_impl.h
 
 EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
 
 if CONFIG_USER
 libzfsdir = $(includedir)/libzfs
 libzfs_HEADERS = $(COMMON_H) $(USER_H)
 endif
 
 if CONFIG_KERNEL
 kerneldir = /usr/src/zfs-$(VERSION)/include
 kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
 endif
diff --git a/include/libzfs.h b/include/libzfs.h
index 3472b7699360..d51a71d7598b 100644
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -1,769 +1,770 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #ifndef	_LIBZFS_H
 #define	_LIBZFS_H
 
 #include <assert.h>
 #include <libnvpair.h>
 #include <sys/mnttab.h>
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/varargs.h>
 #include <sys/fs/zfs.h>
 #include <sys/avl.h>
 #include <ucred.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * Miscellaneous ZFS constants
  */
 #define	ZFS_MAXNAMELEN		MAXNAMELEN
 #define	ZPOOL_MAXNAMELEN	MAXNAMELEN
 #define	ZFS_MAXPROPLEN		MAXPATHLEN
 #define	ZPOOL_MAXPROPLEN	MAXPATHLEN
 
 /*
  * Default device paths
  */
 #define	DISK_ROOT		"/dev"
 #define	UDISK_ROOT		"/dev/disk"
 
 #define	DEFAULT_IMPORT_PATH_SIZE	7
 extern char *zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE];
 
 /*
  * libzfs errors
  */
-enum {
+typedef enum zfs_error {
+	EZFS_SUCCESS = 0,	/* no error -- success */
 	EZFS_NOMEM = 2000,	/* out of memory */
 	EZFS_BADPROP,		/* invalid property value */
 	EZFS_PROPREADONLY,	/* cannot set readonly property */
 	EZFS_PROPTYPE,		/* property does not apply to dataset type */
 	EZFS_PROPNONINHERIT,	/* property is not inheritable */
 	EZFS_PROPSPACE,		/* bad quota or reservation */
 	EZFS_BADTYPE,		/* dataset is not of appropriate type */
 	EZFS_BUSY,		/* pool or dataset is busy */
 	EZFS_EXISTS,		/* pool or dataset already exists */
 	EZFS_NOENT,		/* no such pool or dataset */
 	EZFS_BADSTREAM,		/* bad backup stream */
 	EZFS_DSREADONLY,	/* dataset is readonly */
 	EZFS_VOLTOOBIG,		/* volume is too large for 32-bit system */
 	EZFS_INVALIDNAME,	/* invalid dataset name */
 	EZFS_BADRESTORE,	/* unable to restore to destination */
 	EZFS_BADBACKUP,		/* backup failed */
 	EZFS_BADTARGET,		/* bad attach/detach/replace target */
 	EZFS_NODEVICE,		/* no such device in pool */
 	EZFS_BADDEV,		/* invalid device to add */
 	EZFS_NOREPLICAS,	/* no valid replicas */
 	EZFS_RESILVERING,	/* currently resilvering */
 	EZFS_BADVERSION,	/* unsupported version */
 	EZFS_POOLUNAVAIL,	/* pool is currently unavailable */
 	EZFS_DEVOVERFLOW,	/* too many devices in one vdev */
 	EZFS_BADPATH,		/* must be an absolute path */
 	EZFS_CROSSTARGET,	/* rename or clone across pool or dataset */
 	EZFS_ZONED,		/* used improperly in local zone */
 	EZFS_MOUNTFAILED,	/* failed to mount dataset */
 	EZFS_UMOUNTFAILED,	/* failed to unmount dataset */
 	EZFS_UNSHARENFSFAILED,	/* unshare(1M) failed */
 	EZFS_SHARENFSFAILED,	/* share(1M) failed */
 	EZFS_PERM,		/* permission denied */
 	EZFS_NOSPC,		/* out of space */
 	EZFS_FAULT,		/* bad address */
 	EZFS_IO,		/* I/O error */
 	EZFS_INTR,		/* signal received */
 	EZFS_ISSPARE,		/* device is a hot spare */
 	EZFS_INVALCONFIG,	/* invalid vdev configuration */
 	EZFS_RECURSIVE,		/* recursive dependency */
 	EZFS_NOHISTORY,		/* no history object */
 	EZFS_POOLPROPS,		/* couldn't retrieve pool props */
 	EZFS_POOL_NOTSUP,	/* ops not supported for this type of pool */
 	EZFS_POOL_INVALARG,	/* invalid argument for this pool operation */
 	EZFS_NAMETOOLONG,	/* dataset name is too long */
 	EZFS_OPENFAILED,	/* open of device failed */
 	EZFS_NOCAP,		/* couldn't get capacity */
 	EZFS_LABELFAILED,	/* write of label failed */
 	EZFS_BADWHO,		/* invalid permission who */
 	EZFS_BADPERM,		/* invalid permission */
 	EZFS_BADPERMSET,	/* invalid permission set name */
 	EZFS_NODELEGATION,	/* delegated administration is disabled */
 	EZFS_UNSHARESMBFAILED,	/* failed to unshare over smb */
 	EZFS_SHARESMBFAILED,	/* failed to share over smb */
 	EZFS_BADCACHE,		/* bad cache file */
 	EZFS_ISL2CACHE,		/* device is for the level 2 ARC */
 	EZFS_VDEVNOTSUP,	/* unsupported vdev type */
 	EZFS_NOTSUP,		/* ops not supported on this dataset */
 	EZFS_ACTIVE_SPARE,	/* pool has active shared spare devices */
 	EZFS_UNPLAYED_LOGS,	/* log device has unplayed logs */
 	EZFS_REFTAG_RELE,	/* snapshot release: tag not found */
 	EZFS_REFTAG_HOLD,	/* snapshot hold: tag already exists */
 	EZFS_TAGTOOLONG,	/* snapshot hold/rele: tag too long */
 	EZFS_PIPEFAILED,	/* pipe create failed */
 	EZFS_THREADCREATEFAILED, /* thread create failed */
 	EZFS_POSTSPLIT_ONLINE,	/* onlining a disk after splitting it */
 	EZFS_SCRUBBING,		/* currently scrubbing */
 	EZFS_NO_SCRUB,		/* no active scrub */
 	EZFS_DIFF,		/* general failure of zfs diff */
 	EZFS_DIFFDATA,		/* bad zfs diff data */
 	EZFS_POOLREADONLY,	/* pool is in read-only mode */
 	EZFS_UNKNOWN
-};
+} zfs_error_t;
 
 /*
  * The following data structures are all part
  * of the zfs_allow_t data structure which is
  * used for printing 'allow' permissions.
  * It is a linked list of zfs_allow_t's which
  * then contain avl tree's for user/group/sets/...
  * and each one of the entries in those trees have
  * avl tree's for the permissions they belong to and
  * whether they are local,descendent or local+descendent
  * permissions.  The AVL trees are used primarily for
  * sorting purposes, but also so that we can quickly find
  * a given user and or permission.
  */
 typedef struct zfs_perm_node {
 	avl_node_t z_node;
 	char z_pname[MAXPATHLEN];
 } zfs_perm_node_t;
 
 typedef struct zfs_allow_node {
 	avl_node_t z_node;
 	char z_key[MAXPATHLEN];		/* name, such as joe */
 	avl_tree_t z_localdescend;	/* local+descendent perms */
 	avl_tree_t z_local;		/* local permissions */
 	avl_tree_t z_descend;		/* descendent permissions */
 } zfs_allow_node_t;
 
 typedef struct zfs_allow {
 	struct zfs_allow *z_next;
 	char z_setpoint[MAXPATHLEN];
 	avl_tree_t z_sets;
 	avl_tree_t z_crperms;
 	avl_tree_t z_user;
 	avl_tree_t z_group;
 	avl_tree_t z_everyone;
 } zfs_allow_t;
 
 /*
  * Basic handle types
  */
 typedef struct zfs_handle zfs_handle_t;
 typedef struct zpool_handle zpool_handle_t;
 typedef struct libzfs_handle libzfs_handle_t;
 
 /*
  * Library initialization
  */
 extern libzfs_handle_t *libzfs_init(void);
 extern void libzfs_fini(libzfs_handle_t *);
 
 extern libzfs_handle_t *zpool_get_handle(zpool_handle_t *);
 extern libzfs_handle_t *zfs_get_handle(zfs_handle_t *);
 
 extern void libzfs_print_on_error(libzfs_handle_t *, boolean_t);
 
+extern void zfs_save_arguments(int argc, char **, char *, int);
+extern int zpool_log_history(libzfs_handle_t *, const char *);
+
 extern int libzfs_errno(libzfs_handle_t *);
 extern const char *libzfs_error_action(libzfs_handle_t *);
 extern const char *libzfs_error_description(libzfs_handle_t *);
 extern void libzfs_mnttab_init(libzfs_handle_t *);
 extern void libzfs_mnttab_fini(libzfs_handle_t *);
 extern void libzfs_mnttab_cache(libzfs_handle_t *, boolean_t);
 extern int libzfs_mnttab_find(libzfs_handle_t *, const char *,
     struct mnttab *);
 extern void libzfs_mnttab_add(libzfs_handle_t *, const char *,
     const char *, const char *);
 extern void libzfs_mnttab_remove(libzfs_handle_t *, const char *);
 
 /*
  * Basic handle functions
  */
 extern zpool_handle_t *zpool_open(libzfs_handle_t *, const char *);
 extern zpool_handle_t *zpool_open_canfail(libzfs_handle_t *, const char *);
 extern void zpool_close(zpool_handle_t *);
 extern const char *zpool_get_name(zpool_handle_t *);
 extern int zpool_get_state(zpool_handle_t *);
 extern char *zpool_state_to_name(vdev_state_t, vdev_aux_t);
 extern const char *zpool_pool_state_to_name(pool_state_t);
 extern void zpool_free_handles(libzfs_handle_t *);
 
 /*
  * Iterate over all active pools in the system.
  */
 typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
 extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
 
 /*
  * Functions to create and destroy pools
  */
 extern int zpool_create(libzfs_handle_t *, const char *, nvlist_t *,
     nvlist_t *, nvlist_t *);
-extern int zpool_destroy(zpool_handle_t *);
+extern int zpool_destroy(zpool_handle_t *, const char *);
 extern int zpool_add(zpool_handle_t *, nvlist_t *);
 
 typedef struct splitflags {
 	/* do not split, but return the config that would be split off */
 	int dryrun : 1;
 
 	/* after splitting, import the pool */
 	int import : 1;
 } splitflags_t;
 
 /*
  * Functions to manipulate pool and vdev state
  */
 extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
 extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
 extern int zpool_reguid(zpool_handle_t *);
 extern int zpool_reopen(zpool_handle_t *);
 
 extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
     vdev_state_t *);
 extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
 extern int zpool_vdev_attach(zpool_handle_t *, const char *,
     const char *, nvlist_t *, int);
 extern int zpool_vdev_detach(zpool_handle_t *, const char *);
 extern int zpool_vdev_remove(zpool_handle_t *, const char *);
 extern int zpool_vdev_split(zpool_handle_t *, char *, nvlist_t **, nvlist_t *,
     splitflags_t);
 
 extern int zpool_vdev_fault(zpool_handle_t *, uint64_t, vdev_aux_t);
 extern int zpool_vdev_degrade(zpool_handle_t *, uint64_t, vdev_aux_t);
 extern int zpool_vdev_clear(zpool_handle_t *, uint64_t);
 
 extern nvlist_t *zpool_find_vdev(zpool_handle_t *, const char *, boolean_t *,
     boolean_t *, boolean_t *);
 extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
     boolean_t *, boolean_t *, boolean_t *);
 extern int zpool_label_disk_wait(char *, int);
 extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
 
 /*
  * Functions to manage pool properties
  */
 extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
 extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
     size_t proplen, zprop_source_t *);
 extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
     zprop_source_t *);
 
 extern const char *zpool_prop_to_name(zpool_prop_t);
 extern const char *zpool_prop_values(zpool_prop_t);
 
 /*
  * Pool health statistics.
  */
 typedef enum {
 	/*
 	 * The following correspond to faults as defined in the (fault.fs.zfs.*)
 	 * event namespace.  Each is associated with a corresponding message ID.
 	 */
 	ZPOOL_STATUS_CORRUPT_CACHE,	/* corrupt /kernel/drv/zpool.cache */
 	ZPOOL_STATUS_MISSING_DEV_R,	/* missing device with replicas */
 	ZPOOL_STATUS_MISSING_DEV_NR,	/* missing device with no replicas */
 	ZPOOL_STATUS_CORRUPT_LABEL_R,	/* bad device label with replicas */
 	ZPOOL_STATUS_CORRUPT_LABEL_NR,	/* bad device label with no replicas */
 	ZPOOL_STATUS_BAD_GUID_SUM,	/* sum of device guids didn't match */
 	ZPOOL_STATUS_CORRUPT_POOL,	/* pool metadata is corrupted */
 	ZPOOL_STATUS_CORRUPT_DATA,	/* data errors in user (meta)data */
 	ZPOOL_STATUS_FAILING_DEV,	/* device experiencing errors */
 	ZPOOL_STATUS_VERSION_NEWER,	/* newer on-disk version */
 	ZPOOL_STATUS_HOSTID_MISMATCH,	/* last accessed by another system */
 	ZPOOL_STATUS_IO_FAILURE_WAIT,	/* failed I/O, failmode 'wait' */
 	ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
 	ZPOOL_STATUS_BAD_LOG,		/* cannot read log chain(s) */
 
 	/*
 	 * If the pool has unsupported features but can still be opened in
 	 * read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the
 	 * pool has unsupported features but cannot be opened at all, its
 	 * status is ZPOOL_STATUS_UNSUP_FEAT_READ.
 	 */
 	ZPOOL_STATUS_UNSUP_FEAT_READ,	/* unsupported features for read */
 	ZPOOL_STATUS_UNSUP_FEAT_WRITE,	/* unsupported features for write */
 
 	/*
 	 * These faults have no corresponding message ID.  At the time we are
 	 * checking the status, the original reason for the FMA fault (I/O or
 	 * checksum errors) has been lost.
 	 */
 	ZPOOL_STATUS_FAULTED_DEV_R,	/* faulted device with replicas */
 	ZPOOL_STATUS_FAULTED_DEV_NR,	/* faulted device with no replicas */
 
 	/*
 	 * The following are not faults per se, but still an error possibly
 	 * requiring administrative attention.  There is no corresponding
 	 * message ID.
 	 */
 	ZPOOL_STATUS_VERSION_OLDER,	/* older legacy on-disk version */
 	ZPOOL_STATUS_FEAT_DISABLED,	/* supported features are disabled */
 	ZPOOL_STATUS_RESILVERING,	/* device being resilvered */
 	ZPOOL_STATUS_OFFLINE_DEV,	/* device online */
 	ZPOOL_STATUS_REMOVED_DEV,	/* removed device */
 
 	/*
 	 * Finally, the following indicates a healthy pool.
 	 */
 	ZPOOL_STATUS_OK
 } zpool_status_t;
 
 extern zpool_status_t zpool_get_status(zpool_handle_t *, char **);
 extern zpool_status_t zpool_import_status(nvlist_t *, char **);
 extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
 
 /*
  * Statistics and configuration functions.
  */
 extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
 extern nvlist_t *zpool_get_features(zpool_handle_t *);
 extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
 extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
 
 /*
  * Import and export functions
  */
-extern int zpool_export(zpool_handle_t *, boolean_t);
-extern int zpool_export_force(zpool_handle_t *);
+extern int zpool_export(zpool_handle_t *, boolean_t, const char *);
+extern int zpool_export_force(zpool_handle_t *, const char *);
 extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
     char *altroot);
 extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
     nvlist_t *, int);
 extern void zpool_print_unsup_feat(nvlist_t *config);
 
 /*
  * Search for pools to import
  */
 
 typedef struct importargs {
 	char **path;		/* a list of paths to search		*/
 	int paths;		/* number of paths to search		*/
 	char *poolname;		/* name of a pool to find		*/
 	uint64_t guid;		/* guid of a pool to find		*/
 	char *cachefile;	/* cachefile to use for import		*/
 	int can_be_active : 1;	/* can the pool be active?		*/
 	int unique : 1;		/* does 'poolname' already exist?	*/
 	int exists : 1;		/* set on return if pool already exists	*/
 } importargs_t;
 
 extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
 
 /* legacy pool search routines */
 extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
 extern nvlist_t *zpool_find_import_cached(libzfs_handle_t *, const char *,
     char *, uint64_t);
 
 /*
  * Miscellaneous pool functions
  */
 struct zfs_cmd;
 
-extern const char *zfs_history_event_names[LOG_END];
+extern const char *zfs_history_event_names[];
 
 extern char *zpool_vdev_name(libzfs_handle_t *, zpool_handle_t *, nvlist_t *,
     boolean_t verbose);
 extern int zpool_upgrade(zpool_handle_t *, uint64_t);
 extern int zpool_get_history(zpool_handle_t *, nvlist_t **);
 extern int zpool_history_unpack(char *, uint64_t, uint64_t *,
     nvlist_t ***, uint_t *);
-extern void zpool_set_history_str(const char *subcommand, int argc,
-    char **argv, char *history_str);
-extern int zpool_stage_history(libzfs_handle_t *, const char *);
 extern int zpool_events_next(libzfs_handle_t *, nvlist_t **, int *, int, int);
 extern int zpool_events_clear(libzfs_handle_t *, int *);
 extern void zpool_obj_to_path(zpool_handle_t *, uint64_t, uint64_t, char *,
     size_t len);
 extern int zfs_ioctl(libzfs_handle_t *, int, struct zfs_cmd *);
 extern int zpool_get_physpath(zpool_handle_t *, char *, size_t);
 extern void zpool_explain_recover(libzfs_handle_t *, const char *, int,
     nvlist_t *);
 
 /*
  * Basic handle manipulations.  These functions do not create or destroy the
  * underlying datasets, only the references to them.
  */
 extern zfs_handle_t *zfs_open(libzfs_handle_t *, const char *, int);
 extern zfs_handle_t *zfs_handle_dup(zfs_handle_t *);
 extern void zfs_close(zfs_handle_t *);
 extern zfs_type_t zfs_get_type(const zfs_handle_t *);
 extern const char *zfs_get_name(const zfs_handle_t *);
 extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *);
 
 /*
  * Property management functions.  Some functions are shared with the kernel,
  * and are found in sys/fs/zfs.h.
  */
 
 /*
  * zfs dataset property management
  */
 extern const char *zfs_prop_default_string(zfs_prop_t);
 extern uint64_t zfs_prop_default_numeric(zfs_prop_t);
 extern const char *zfs_prop_column_name(zfs_prop_t);
 extern boolean_t zfs_prop_align_right(zfs_prop_t);
 
 extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
     nvlist_t *, uint64_t, zfs_handle_t *, const char *);
 
 extern const char *zfs_prop_to_name(zfs_prop_t);
 extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
 extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
     zprop_source_t *, char *, size_t, boolean_t);
 extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
     boolean_t);
 extern int zfs_prop_get_numeric(zfs_handle_t *, zfs_prop_t, uint64_t *,
     zprop_source_t *, char *, size_t);
 extern int zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue);
 extern int zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal);
 extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue);
 extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal);
 extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname,
     char *buf, size_t len);
-extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
-    uint64_t *usedp);
 extern uint64_t getprop_uint64(zfs_handle_t *, zfs_prop_t, char **);
 extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
 extern int zfs_prop_inherit(zfs_handle_t *, const char *, boolean_t);
 extern const char *zfs_prop_values(zfs_prop_t);
 extern int zfs_prop_is_string(zfs_prop_t prop);
 extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
 extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
 extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
 
 typedef struct zprop_list {
 	int		pl_prop;
 	char		*pl_user_prop;
 	struct zprop_list *pl_next;
 	boolean_t	pl_all;
 	size_t		pl_width;
 	size_t		pl_recvd_width;
 	boolean_t	pl_fixed;
 } zprop_list_t;
 
 extern int zfs_expand_proplist(zfs_handle_t *, zprop_list_t **, boolean_t);
 extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
 
 #define	ZFS_MOUNTPOINT_NONE	"none"
 #define	ZFS_MOUNTPOINT_LEGACY	"legacy"
 
 #define	ZFS_FEATURE_DISABLED	"disabled"
 #define	ZFS_FEATURE_ENABLED	"enabled"
 #define	ZFS_FEATURE_ACTIVE	"active"
 
 #define	ZFS_UNSUPPORTED_INACTIVE	"inactive"
 #define	ZFS_UNSUPPORTED_READONLY	"readonly"
 
 /*
  * zpool property management
  */
 extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
 extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *,
     size_t);
 extern const char *zpool_prop_default_string(zpool_prop_t);
 extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
 extern const char *zpool_prop_column_name(zpool_prop_t);
 extern boolean_t zpool_prop_align_right(zpool_prop_t);
 
 /*
  * Functions shared by zfs and zpool property management.
  */
 extern int zprop_iter(zprop_func func, void *cb, boolean_t show_all,
     boolean_t ordered, zfs_type_t type);
 extern int zprop_get_list(libzfs_handle_t *, char *, zprop_list_t **,
     zfs_type_t);
 extern void zprop_free_list(zprop_list_t *);
 
 #define	ZFS_GET_NCOLS	5
 
 typedef enum {
 	GET_COL_NONE,
 	GET_COL_NAME,
 	GET_COL_PROPERTY,
 	GET_COL_VALUE,
 	GET_COL_RECVD,
 	GET_COL_SOURCE
 } zfs_get_column_t;
 
 /*
  * Functions for printing zfs or zpool properties
  */
 typedef struct zprop_get_cbdata {
 	int cb_sources;
 	zfs_get_column_t cb_columns[ZFS_GET_NCOLS];
 	int cb_colwidths[ZFS_GET_NCOLS + 1];
 	boolean_t cb_scripted;
 	boolean_t cb_literal;
 	boolean_t cb_first;
 	zprop_list_t *cb_proplist;
 	zfs_type_t cb_type;
 } zprop_get_cbdata_t;
 
 void zprop_print_one_property(const char *, zprop_get_cbdata_t *,
     const char *, const char *, zprop_source_t, const char *,
     const char *);
 
 /*
  * Iterator functions.
  */
 typedef int (*zfs_iter_f)(zfs_handle_t *, void *);
 extern int zfs_iter_root(libzfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_children(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_dependents(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
 extern int zfs_iter_filesystems(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_snapshots(zfs_handle_t *, boolean_t, zfs_iter_f, void *);
 extern int zfs_iter_snapshots_sorted(zfs_handle_t *, zfs_iter_f, void *);
 extern int zfs_iter_snapspec(zfs_handle_t *, const char *, zfs_iter_f, void *);
 
 typedef struct get_all_cb {
 	zfs_handle_t	**cb_handles;
 	size_t		cb_alloc;
 	size_t		cb_used;
 	boolean_t	cb_verbose;
 	int		(*cb_getone)(zfs_handle_t *, void *);
 } get_all_cb_t;
 
 void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
 int libzfs_dataset_cmp(const void *, const void *);
 
 /*
  * Functions to create and destroy datasets.
  */
 extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
     nvlist_t *);
 extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
 extern int zfs_destroy(zfs_handle_t *, boolean_t);
 extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
 extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t);
 extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
 extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
+extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps,
+    nvlist_t *props);
 extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
 extern int zfs_rename(zfs_handle_t *, const char *, boolean_t, boolean_t);
 
 typedef struct sendflags {
 	/* print informational messages (ie, -v was specified) */
 	boolean_t verbose;
 
 	/* recursive send  (ie, -R) */
 	boolean_t replicate;
 
 	/* for incrementals, do all intermediate snapshots */
 	boolean_t doall;
 
 	/* if dataset is a clone, do incremental from its origin */
 	boolean_t fromorigin;
 
 	/* do deduplication */
 	boolean_t dedup;
 
 	/* send properties (ie, -p) */
 	boolean_t props;
 
 	/* do not send (no-op, ie. -n) */
 	boolean_t dryrun;
 
 	/* parsable verbose output (ie. -P) */
 	boolean_t parsable;
 
 	/* show progress (ie. -v) */
 	boolean_t progress;
 } sendflags_t;
 
 typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
 
 extern int zfs_send(zfs_handle_t *, const char *, const char *,
     sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
 
 extern int zfs_promote(zfs_handle_t *);
 extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
     boolean_t, boolean_t, int, uint64_t, uint64_t);
 extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
 extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
 extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);
 
 typedef int (*zfs_userspace_cb_t)(void *arg, const char *domain,
     uid_t rid, uint64_t space);
 
 extern int zfs_userspace(zfs_handle_t *, zfs_userquota_prop_t,
     zfs_userspace_cb_t, void *);
 
 extern int zfs_get_fsacl(zfs_handle_t *, nvlist_t **);
 extern int zfs_set_fsacl(zfs_handle_t *, boolean_t, nvlist_t *);
 
 typedef struct recvflags {
 	/* print informational messages (ie, -v was specified) */
 	boolean_t verbose;
 
 	/* the destination is a prefix, not the exact fs (ie, -d) */
 	boolean_t isprefix;
 
 	/*
 	 * Only the tail of the sent snapshot path is appended to the
 	 * destination to determine the received snapshot name (ie, -e).
 	 */
 	boolean_t istail;
 
 	/* do not actually do the recv, just check if it would work (ie, -n) */
 	boolean_t dryrun;
 
 	/* rollback/destroy filesystems as necessary (eg, -F) */
 	boolean_t force;
 
 	/* set "canmount=off" on all modified filesystems */
 	boolean_t canmountoff;
 
 	/* byteswap flag is used internally; callers need not specify */
 	boolean_t byteswap;
 
 	/* do not mount file systems as they are extracted (private) */
 	boolean_t nomount;
 } recvflags_t;
 
 extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *,
     int, avl_tree_t *);
 
 typedef enum diff_flags {
 	ZFS_DIFF_PARSEABLE = 0x1,
 	ZFS_DIFF_TIMESTAMP = 0x2,
 	ZFS_DIFF_CLASSIFY = 0x4
 } diff_flags_t;
 
 extern int zfs_show_diffs(zfs_handle_t *, int, const char *, const char *,
     int);
 
 /*
  * Miscellaneous functions.
  */
 extern const char *zfs_type_to_name(zfs_type_t);
 extern void zfs_refresh_properties(zfs_handle_t *);
 extern int zfs_name_valid(const char *, zfs_type_t);
 extern zfs_handle_t *zfs_path_to_zhandle(libzfs_handle_t *, char *, zfs_type_t);
 extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
     zfs_type_t);
 extern int zfs_spa_version(zfs_handle_t *, int *);
 extern int zfs_append_partition(char *path, size_t max_len);
 extern int zfs_resolve_shortname(const char *name, char *path, size_t pathlen);
 extern int zfs_strcmp_pathname(char *name, char *cmp_name, int wholedisk);
 
 /*
  * Mount support functions.
  */
 extern boolean_t is_mounted(libzfs_handle_t *, const char *special, char **);
 extern boolean_t zfs_is_mounted(zfs_handle_t *, char **);
 extern int zfs_mount(zfs_handle_t *, const char *, int);
 extern int zfs_unmount(zfs_handle_t *, const char *, int);
 extern int zfs_unmountall(zfs_handle_t *, int);
 
 /*
  * Share support functions.
  */
 extern boolean_t zfs_is_shared(zfs_handle_t *);
 extern int zfs_share(zfs_handle_t *);
 extern int zfs_unshare(zfs_handle_t *);
 
 /*
  * Protocol-specific share support functions.
  */
 extern boolean_t zfs_is_shared_nfs(zfs_handle_t *, char **);
 extern boolean_t zfs_is_shared_smb(zfs_handle_t *, char **);
 extern int zfs_share_nfs(zfs_handle_t *);
 extern int zfs_share_smb(zfs_handle_t *);
 extern int zfs_shareall(zfs_handle_t *);
 extern int zfs_unshare_nfs(zfs_handle_t *, const char *);
 extern int zfs_unshare_smb(zfs_handle_t *, const char *);
 extern int zfs_unshareall_nfs(zfs_handle_t *);
 extern int zfs_unshareall_smb(zfs_handle_t *);
 extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
 extern int zfs_unshareall(zfs_handle_t *);
 extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
     void *, void *, int, zfs_share_op_t);
 
 /*
  * Utility function to convert a number to a human-readable form.
  */
 extern void zfs_nicenum(uint64_t, char *, size_t);
 extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
 
 /*
  * Utility functions to run an external process.
  */
 #define	STDOUT_VERBOSE	0x01
 #define	STDERR_VERBOSE	0x02
 
 int libzfs_run_process(const char *, char **, int flags);
 int libzfs_load_module(const char *);
 
 /*
  * Given a device or file, determine if it is part of a pool.
  */
 extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
     boolean_t *);
 
 /*
  * Label manipulation.
  */
 extern int zpool_read_label(int, nvlist_t **);
 extern int zpool_clear_label(int);
 
 /*
  * Management interfaces for SMB ACL files
  */
 
 int zfs_smb_acl_add(libzfs_handle_t *, char *, char *, char *);
 int zfs_smb_acl_remove(libzfs_handle_t *, char *, char *, char *);
 int zfs_smb_acl_purge(libzfs_handle_t *, char *, char *);
 int zfs_smb_acl_rename(libzfs_handle_t *, char *, char *, char *, char *);
 
 /*
  * Enable and disable datasets within a pool by mounting/unmounting and
  * sharing/unsharing them.
  */
 extern int zpool_enable_datasets(zpool_handle_t *, const char *, int);
 extern int zpool_disable_datasets(zpool_handle_t *, boolean_t);
 
 /*
  * Mappings between vdev and FRU.
  */
 extern void libzfs_fru_refresh(libzfs_handle_t *);
 extern const char *libzfs_fru_lookup(libzfs_handle_t *, const char *);
 extern const char *libzfs_fru_devpath(libzfs_handle_t *, const char *);
 extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *,
     const char *);
 extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
 extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _LIBZFS_H */
diff --git a/include/zfs_comutil.h b/include/libzfs_core.h
similarity index 50%
copy from include/zfs_comutil.h
copy to include/libzfs_core.h
index 61327f9aa909..9edc884a14d1 100644
--- a/include/zfs_comutil.h
+++ b/include/libzfs_core.h
@@ -1,46 +1,62 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
+
 /*
- * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
-#ifndef	_ZFS_COMUTIL_H
-#define	_ZFS_COMUTIL_H
+#ifndef	_LIBZFS_CORE_H
+#define	_LIBZFS_CORE_H
 
-#include <sys/fs/zfs.h>
+#include <libnvpair.h>
+#include <sys/param.h>
 #include <sys/types.h>
+#include <sys/fs/zfs.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
-extern boolean_t zfs_allocatable_devs(nvlist_t *);
-extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
+int libzfs_core_init(void);
+void libzfs_core_fini(void);
+
+int lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist);
+int lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props);
+int lzc_clone(const char *fsname, const char *origin, nvlist_t *props);
+int lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist);
+
+int lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
+    uint64_t *usedp);
+
+int lzc_send(const char *snapname, const char *fromsnap, int fd);
+int lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
+    boolean_t force, int fd);
+int lzc_send_space(const char *snapname, const char *fromsnap,
+    uint64_t *result);
+
+boolean_t lzc_exists(const char *dataset);
 
-extern int zfs_zpl_version_map(int spa_version);
-extern int zfs_spa_version_map(int zpl_version);
-extern const char *zfs_history_event_names[LOG_END];
 
 #ifdef	__cplusplus
 }
 #endif
 
-#endif	/* _ZFS_COMUTIL_H */
+#endif	/* _LIBZFS_CORE_H */
diff --git a/include/libzfs_impl.h b/include/libzfs_impl.h
index fabcb1183144..7daf8348b228 100644
--- a/include/libzfs_impl.h
+++ b/include/libzfs_impl.h
@@ -1,222 +1,222 @@
 /*
  * CDDL HEADER SART
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
-#ifndef	_LIBFS_IMPL_H
-#define	_LIBFS_IMPL_H
+#ifndef	_LIBZFS_IMPL_H
+#define	_LIBZFS_IMPL_H
 
 #include <sys/dmu.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/spa.h>
 #include <sys/nvpair.h>
 
 #include <libuutil.h>
 #include <libzfs.h>
 #include <libshare.h>
+#include <libzfs_core.h>
 
 #if defined(HAVE_LIBTOPO)
 #include <fm/libtopo.h>
 #endif /* HAVE_LIBTOPO */
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #ifdef	VERIFY
 #undef	VERIFY
 #endif
 #define	VERIFY	verify
 
 typedef struct libzfs_fru {
 	char *zf_device;
 	char *zf_fru;
 	struct libzfs_fru *zf_chain;
 	struct libzfs_fru *zf_next;
 } libzfs_fru_t;
 
 struct libzfs_handle {
 	int libzfs_error;
 	int libzfs_fd;
 	FILE *libzfs_mnttab;
 	FILE *libzfs_sharetab;
 	zpool_handle_t *libzfs_pool_handles;
 	uu_avl_pool_t *libzfs_ns_avlpool;
 	uu_avl_t *libzfs_ns_avl;
 	uint64_t libzfs_ns_gen;
 	int libzfs_desc_active;
 	char libzfs_action[1024];
 	char libzfs_desc[1024];
-	char *libzfs_log_str;
 	int libzfs_printerr;
 	int libzfs_storeerr; /* stuff error messages into buffer */
 	void *libzfs_sharehdl; /* libshare handle */
 	uint_t libzfs_shareflags;
 	boolean_t libzfs_mnttab_enable;
 	avl_tree_t libzfs_mnttab_cache;
 	int libzfs_pool_iter;
 #if defined(HAVE_LIBTOPO)
 	topo_hdl_t *libzfs_topo_hdl;
 	libzfs_fru_t **libzfs_fru_hash;
 	libzfs_fru_t *libzfs_fru_list;
 #endif /* HAVE_LIBTOPO */
 	char libzfs_chassis_id[256];
 };
 
 #define	ZFSSHARE_MISS	0x01	/* Didn't find entry in cache */
 
 struct zfs_handle {
 	libzfs_handle_t *zfs_hdl;
 	zpool_handle_t *zpool_hdl;
 	char zfs_name[ZFS_MAXNAMELEN];
 	zfs_type_t zfs_type; /* type including snapshot */
 	zfs_type_t zfs_head_type; /* type excluding snapshot */
 	dmu_objset_stats_t zfs_dmustats;
 	nvlist_t *zfs_props;
 	nvlist_t *zfs_user_props;
 	nvlist_t *zfs_recvd_props;
 	boolean_t zfs_mntcheck;
 	char *zfs_mntopts;
 	uint8_t *zfs_props_table;
 };
 
 /*
  * This is different from checking zfs_type, because it will also catch
  * snapshots of volumes.
  */
 #define	ZFS_IS_VOLUME(zhp) ((zhp)->zfs_head_type == ZFS_TYPE_VOLUME)
 
 struct zpool_handle {
 	libzfs_handle_t *zpool_hdl;
 	zpool_handle_t *zpool_next;
 	char zpool_name[ZPOOL_MAXNAMELEN];
 	int zpool_state;
 	size_t zpool_config_size;
 	nvlist_t *zpool_config;
 	nvlist_t *zpool_old_config;
 	nvlist_t *zpool_props;
 	diskaddr_t zpool_start_block;
 };
 
 typedef enum {
 	PROTO_NFS = 0,
 	PROTO_SMB = 1,
 	PROTO_END = 2
 } zfs_share_proto_t;
 
 /*
  * The following can be used as a bitmask and any new values
  * added must preserve that capability.
  */
 typedef enum {
 	SHARED_NOT_SHARED = 0x0,
 	SHARED_NFS = 0x2,
 	SHARED_SMB = 0x4
 } zfs_share_type_t;
 
 int zfs_error(libzfs_handle_t *, int, const char *);
 int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
 void zfs_error_aux(libzfs_handle_t *, const char *, ...);
 void *zfs_alloc(libzfs_handle_t *, size_t);
 void *zfs_realloc(libzfs_handle_t *, void *, size_t, size_t);
 char *zfs_asprintf(libzfs_handle_t *, const char *, ...);
 char *zfs_strdup(libzfs_handle_t *, const char *);
 int no_memory(libzfs_handle_t *);
 
 int zfs_standard_error(libzfs_handle_t *, int, const char *);
 int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
 int zpool_standard_error(libzfs_handle_t *, int, const char *);
 int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
 
 int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
     size_t *);
 zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *);
 zfs_handle_t *make_dataset_simple_handle_zc(zfs_handle_t *, zfs_cmd_t *);
 
 int zprop_parse_value(libzfs_handle_t *, nvpair_t *, int, zfs_type_t,
     nvlist_t *, char **, uint64_t *, const char *);
 int zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp,
     zfs_type_t type);
 
 /*
  * Use this changelist_gather() flag to force attempting mounts
  * on each change node regardless of whether or not it is currently
  * mounted.
  */
 #define	CL_GATHER_MOUNT_ALWAYS	1
 
 typedef struct prop_changelist prop_changelist_t;
 
 int zcmd_alloc_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, size_t);
 int zcmd_write_src_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
 int zcmd_write_conf_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t *);
 int zcmd_expand_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *);
 int zcmd_read_dst_nvlist(libzfs_handle_t *, zfs_cmd_t *, nvlist_t **);
 void zcmd_free_nvlists(zfs_cmd_t *);
 
 int changelist_prefix(prop_changelist_t *);
 int changelist_postfix(prop_changelist_t *);
 void changelist_rename(prop_changelist_t *, const char *, const char *);
 void changelist_remove(prop_changelist_t *, const char *);
 void changelist_free(prop_changelist_t *);
 prop_changelist_t *changelist_gather(zfs_handle_t *, zfs_prop_t, int, int);
 int changelist_unshare(prop_changelist_t *, zfs_share_proto_t *);
 int changelist_haszonedchild(prop_changelist_t *);
 
 void remove_mountpoint(zfs_handle_t *);
 int create_parents(libzfs_handle_t *, char *, int);
 boolean_t isa_child_of(const char *dataset, const char *parent);
 
 zfs_handle_t *make_dataset_handle(libzfs_handle_t *, const char *);
 
 int zpool_open_silent(libzfs_handle_t *, const char *, zpool_handle_t **);
 
 int zvol_create_link(libzfs_handle_t *, const char *);
 int zvol_remove_link(libzfs_handle_t *, const char *);
 boolean_t zpool_name_valid(libzfs_handle_t *, boolean_t, const char *);
 
 int zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
     boolean_t modifying);
 
 void namespace_clear(libzfs_handle_t *);
 
 /*
  * libshare (sharemgr) interfaces used internally.
  */
 
 extern int zfs_init_libshare(libzfs_handle_t *, int);
 extern void zfs_uninit_libshare(libzfs_handle_t *);
 extern int zfs_parse_options(char *, zfs_share_proto_t);
 
 extern int zfs_unshare_proto(zfs_handle_t *,
     const char *, zfs_share_proto_t *);
 
 extern void libzfs_fru_clear(libzfs_handle_t *, boolean_t);
 
 #ifdef	__cplusplus
 }
 #endif
 
-#endif	/* _LIBFS_IMPL_H */
+#endif	/* _LIBZFS_IMPL_H */
diff --git a/include/sys/dmu.h b/include/sys/dmu.h
index adaab4c6bb61..c50df391ecb3 100644
--- a/include/sys/dmu.h
+++ b/include/sys/dmu.h
@@ -1,837 +1,829 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #ifndef	_SYS_DMU_H
 #define	_SYS_DMU_H
 
 /*
  * This file describes the interface that the DMU provides for its
  * consumers.
  *
  * The DMU also interacts with the SPA.  That interface is described in
  * dmu_spa.h.
  */
 
 #include <sys/inttypes.h>
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/cred.h>
 #include <sys/time.h>
+#include <sys/fs/zfs.h>
 #include <sys/uio.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 struct page;
 struct vnode;
 struct spa;
 struct zilog;
 struct zio;
 struct blkptr;
 struct zap_cursor;
 struct dsl_dataset;
 struct dsl_pool;
 struct dnode;
 struct drr_begin;
 struct drr_end;
 struct zbookmark;
 struct spa;
 struct nvlist;
 struct arc_buf;
 struct zio_prop;
 struct sa_handle;
 
 typedef struct objset objset_t;
 typedef struct dmu_tx dmu_tx_t;
 typedef struct dsl_dir dsl_dir_t;
 
 typedef enum dmu_object_byteswap {
 	DMU_BSWAP_UINT8,
 	DMU_BSWAP_UINT16,
 	DMU_BSWAP_UINT32,
 	DMU_BSWAP_UINT64,
 	DMU_BSWAP_ZAP,
 	DMU_BSWAP_DNODE,
 	DMU_BSWAP_OBJSET,
 	DMU_BSWAP_ZNODE,
 	DMU_BSWAP_OLDACL,
 	DMU_BSWAP_ACL,
 	/*
 	 * Allocating a new byteswap type number makes the on-disk format
 	 * incompatible with any other format that uses the same number.
 	 *
 	 * Data can usually be structured to work with one of the
 	 * DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
 	 */
 	DMU_BSWAP_NUMFUNCS
 } dmu_object_byteswap_t;
 
 #define	DMU_OT_NEWTYPE 0x80
 #define	DMU_OT_METADATA 0x40
 #define	DMU_OT_BYTESWAP_MASK 0x3f
 
 /*
  * Defines a uint8_t object type. Object types specify if the data
  * in the object is metadata (boolean) and how to byteswap the data
  * (dmu_object_byteswap_t).
  */
 #define	DMU_OT(byteswap, metadata) \
 	(DMU_OT_NEWTYPE | \
 	((metadata) ? DMU_OT_METADATA : 0) | \
 	((byteswap) & DMU_OT_BYTESWAP_MASK))
 
 #define	DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
 	(ot) < DMU_OT_NUMTYPES)
 
 #define	DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_METADATA) : \
 	dmu_ot[(int)(ot)].ot_metadata)
 
 #define	DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
 	((ot) & DMU_OT_BYTESWAP_MASK) : \
 	dmu_ot[(int)(ot)].ot_byteswap)
 
 typedef enum dmu_object_type {
 	DMU_OT_NONE,
 	/* general: */
 	DMU_OT_OBJECT_DIRECTORY,	/* ZAP */
 	DMU_OT_OBJECT_ARRAY,		/* UINT64 */
 	DMU_OT_PACKED_NVLIST,		/* UINT8 (XDR by nvlist_pack/unpack) */
 	DMU_OT_PACKED_NVLIST_SIZE,	/* UINT64 */
 	DMU_OT_BPOBJ,			/* UINT64 */
 	DMU_OT_BPOBJ_HDR,		/* UINT64 */
 	/* spa: */
 	DMU_OT_SPACE_MAP_HEADER,	/* UINT64 */
 	DMU_OT_SPACE_MAP,		/* UINT64 */
 	/* zil: */
 	DMU_OT_INTENT_LOG,		/* UINT64 */
 	/* dmu: */
 	DMU_OT_DNODE,			/* DNODE */
 	DMU_OT_OBJSET,			/* OBJSET */
 	/* dsl: */
 	DMU_OT_DSL_DIR,			/* UINT64 */
 	DMU_OT_DSL_DIR_CHILD_MAP,	/* ZAP */
 	DMU_OT_DSL_DS_SNAP_MAP,		/* ZAP */
 	DMU_OT_DSL_PROPS,		/* ZAP */
 	DMU_OT_DSL_DATASET,		/* UINT64 */
 	/* zpl: */
 	DMU_OT_ZNODE,			/* ZNODE */
 	DMU_OT_OLDACL,			/* Old ACL */
 	DMU_OT_PLAIN_FILE_CONTENTS,	/* UINT8 */
 	DMU_OT_DIRECTORY_CONTENTS,	/* ZAP */
 	DMU_OT_MASTER_NODE,		/* ZAP */
 	DMU_OT_UNLINKED_SET,		/* ZAP */
 	/* zvol: */
 	DMU_OT_ZVOL,			/* UINT8 */
 	DMU_OT_ZVOL_PROP,		/* ZAP */
 	/* other; for testing only! */
 	DMU_OT_PLAIN_OTHER,		/* UINT8 */
 	DMU_OT_UINT64_OTHER,		/* UINT64 */
 	DMU_OT_ZAP_OTHER,		/* ZAP */
 	/* new object types: */
 	DMU_OT_ERROR_LOG,		/* ZAP */
 	DMU_OT_SPA_HISTORY,		/* UINT8 */
 	DMU_OT_SPA_HISTORY_OFFSETS,	/* spa_his_phys_t */
 	DMU_OT_POOL_PROPS,		/* ZAP */
 	DMU_OT_DSL_PERMS,		/* ZAP */
 	DMU_OT_ACL,			/* ACL */
 	DMU_OT_SYSACL,			/* SYSACL */
 	DMU_OT_FUID,			/* FUID table (Packed NVLIST UINT8) */
 	DMU_OT_FUID_SIZE,		/* FUID table size UINT64 */
 	DMU_OT_NEXT_CLONES,		/* ZAP */
 	DMU_OT_SCAN_QUEUE,		/* ZAP */
 	DMU_OT_USERGROUP_USED,		/* ZAP */
 	DMU_OT_USERGROUP_QUOTA,		/* ZAP */
 	DMU_OT_USERREFS,		/* ZAP */
 	DMU_OT_DDT_ZAP,			/* ZAP */
 	DMU_OT_DDT_STATS,		/* ZAP */
 	DMU_OT_SA,			/* System attr */
 	DMU_OT_SA_MASTER_NODE,		/* ZAP */
 	DMU_OT_SA_ATTR_REGISTRATION,	/* ZAP */
 	DMU_OT_SA_ATTR_LAYOUTS,		/* ZAP */
 	DMU_OT_SCAN_XLATE,		/* ZAP */
 	DMU_OT_DEDUP,			/* fake dedup BP from ddt_bp_create() */
 	DMU_OT_DEADLIST,		/* ZAP */
 	DMU_OT_DEADLIST_HDR,		/* UINT64 */
 	DMU_OT_DSL_CLONES,		/* ZAP */
 	DMU_OT_BPOBJ_SUBOBJ,		/* UINT64 */
 	/*
 	 * Do not allocate new object types here. Doing so makes the on-disk
 	 * format incompatible with any other format that uses the same object
 	 * type number.
 	 *
 	 * When creating an object which does not have one of the above types
 	 * use the DMU_OTN_* type with the correct byteswap and metadata
 	 * values.
 	 *
 	 * The DMU_OTN_* types do not have entries in the dmu_ot table,
 	 * use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead
 	 * of indexing into dmu_ot directly (this works for both DMU_OT_* types
 	 * and DMU_OTN_* types).
 	 */
 	DMU_OT_NUMTYPES,
 
 	/*
 	 * Names for valid types declared with DMU_OT().
 	 */
 	DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
 	DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
 	DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
 	DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
 	DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
 	DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
 	DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
 	DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
 	DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
 	DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
 } dmu_object_type_t;
 
-typedef enum dmu_objset_type {
-	DMU_OST_NONE,
-	DMU_OST_META,
-	DMU_OST_ZFS,
-	DMU_OST_ZVOL,
-	DMU_OST_OTHER,			/* For testing only! */
-	DMU_OST_ANY,			/* Be careful! */
-	DMU_OST_NUMTYPES
-} dmu_objset_type_t;
-
 void byteswap_uint64_array(void *buf, size_t size);
 void byteswap_uint32_array(void *buf, size_t size);
 void byteswap_uint16_array(void *buf, size_t size);
 void byteswap_uint8_array(void *buf, size_t size);
 void zap_byteswap(void *buf, size_t size);
 void zfs_oldacl_byteswap(void *buf, size_t size);
 void zfs_acl_byteswap(void *buf, size_t size);
 void zfs_znode_byteswap(void *buf, size_t size);
 
 #define	DS_FIND_SNAPSHOTS	(1<<0)
 #define	DS_FIND_CHILDREN	(1<<1)
 
 /*
  * The maximum number of bytes that can be accessed as part of one
  * operation, including metadata.
  */
 #define	DMU_MAX_ACCESS (10<<20) /* 10MB */
 #define	DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
 
 #define	DMU_USERUSED_OBJECT	(-1ULL)
 #define	DMU_GROUPUSED_OBJECT	(-2ULL)
 #define	DMU_DEADLIST_OBJECT	(-3ULL)
 
 /*
  * artificial blkids for bonus buffer and spill blocks
  */
 #define	DMU_BONUS_BLKID		(-1ULL)
 #define	DMU_SPILL_BLKID		(-2ULL)
 /*
  * Public routines to create, destroy, open, and close objsets.
  */
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
 int dmu_objset_own(const char *name, dmu_objset_type_t type,
     boolean_t readonly, void *tag, objset_t **osp);
 void dmu_objset_rele(objset_t *os, void *tag);
 void dmu_objset_disown(objset_t *os, void *tag);
 int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
 
 int dmu_objset_evict_dbufs(objset_t *os);
 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
 int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
     uint64_t flags);
 int dmu_objset_destroy(const char *name, boolean_t defer);
-int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *);
-int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
-    struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
+int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer,
+    struct nvlist *errlist);
+int dmu_objset_snapshot(struct nvlist *snaps, struct nvlist *, struct nvlist *);
+int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
+int dmu_objset_snapshot_tmp(const char *, const char *, int);
 int dmu_objset_rename(const char *name, const char *newname,
     boolean_t recursive);
 int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
     int flags);
 void dmu_objset_byteswap(void *buf, size_t size);
 
 typedef struct dmu_buf {
 	uint64_t db_object;		/* object that this buffer is part of */
 	uint64_t db_offset;		/* byte offset in this object */
 	uint64_t db_size;		/* size of buffer in bytes */
 	void *db_data;			/* data in buffer */
 } dmu_buf_t;
 
 typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
 
 /*
  * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
  */
 #define	DMU_POOL_DIRECTORY_OBJECT	1
 #define	DMU_POOL_CONFIG			"config"
 #define	DMU_POOL_FEATURES_FOR_WRITE	"features_for_write"
 #define	DMU_POOL_FEATURES_FOR_READ	"features_for_read"
 #define	DMU_POOL_FEATURE_DESCRIPTIONS	"feature_descriptions"
 #define	DMU_POOL_ROOT_DATASET		"root_dataset"
 #define	DMU_POOL_SYNC_BPOBJ		"sync_bplist"
 #define	DMU_POOL_ERRLOG_SCRUB		"errlog_scrub"
 #define	DMU_POOL_ERRLOG_LAST		"errlog_last"
 #define	DMU_POOL_SPARES			"spares"
 #define	DMU_POOL_DEFLATE		"deflate"
 #define	DMU_POOL_HISTORY		"history"
 #define	DMU_POOL_PROPS			"pool_props"
 #define	DMU_POOL_L2CACHE		"l2cache"
 #define	DMU_POOL_TMP_USERREFS		"tmp_userrefs"
 #define	DMU_POOL_DDT			"DDT-%s-%s-%s"
 #define	DMU_POOL_DDT_STATS		"DDT-statistics"
 #define	DMU_POOL_CREATION_VERSION	"creation_version"
 #define	DMU_POOL_SCAN			"scan"
 #define	DMU_POOL_FREE_BPOBJ		"free_bpobj"
 #define	DMU_POOL_BPTREE_OBJ		"bptree_obj"
 #define	DMU_POOL_EMPTY_BPOBJ		"empty_bpobj"
 
 /*
  * Allocate an object from this objset.  The range of object numbers
  * available is (0, DN_MAX_OBJECT).  Object 0 is the meta-dnode.
  *
  * The transaction must be assigned to a txg.  The newly allocated
  * object will be "held" in the transaction (ie. you can modify the
  * newly allocated object in this transaction).
  *
  * dmu_object_alloc() chooses an object and returns it in *objectp.
  *
  * dmu_object_claim() allocates a specific object number.  If that
  * number is already allocated, it fails and returns EEXIST.
  *
  * Return 0 on success, or ENOSPC or EEXIST as specified above.
  */
 uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
 int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
 int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen);
 
 /*
  * Free an object from this objset.
  *
  * The object's data will be freed as well (ie. you don't need to call
  * dmu_free(object, 0, -1, tx)).
  *
  * The object need not be held in the transaction.
  *
  * If there are any holds on this object's buffers (via dmu_buf_hold()),
  * or tx holds on the object (via dmu_tx_hold_object()), you can not
  * free it; it fails and returns EBUSY.
  *
  * If the object is not allocated, it fails and returns ENOENT.
  *
  * Return 0 on success, or EBUSY or ENOENT as specified above.
  */
 int dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx);
 
 /*
  * Find the next allocated or free object.
  *
  * The objectp parameter is in-out.  It will be updated to be the next
  * object which is allocated.  Ignore objects which have not been
  * modified since txg.
  *
  * XXX Can only be called on a objset with no dirty data.
  *
  * Returns 0 on success, or ENOENT if there are no more objects.
  */
 int dmu_object_next(objset_t *os, uint64_t *objectp,
     boolean_t hole, uint64_t txg);
 
 /*
  * Set the data blocksize for an object.
  *
  * The object cannot have any blocks allcated beyond the first.  If
  * the first block is allocated already, the new size must be greater
  * than the current block size.  If these conditions are not met,
  * ENOTSUP will be returned.
  *
  * Returns 0 on success, or EBUSY if there are any holds on the object
  * contents, or ENOTSUP as described above.
  */
 int dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size,
     int ibs, dmu_tx_t *tx);
 
 /*
  * Set the checksum property on a dnode.  The new checksum algorithm will
  * apply to all newly written blocks; existing blocks will not be affected.
  */
 void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
     dmu_tx_t *tx);
 
 /*
  * Set the compress property on a dnode.  The new compression algorithm will
  * apply to all newly written blocks; existing blocks will not be affected.
  */
 void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
     dmu_tx_t *tx);
 
 /*
  * Decide how to write a block: checksum, compression, number of copies, etc.
  */
 #define	WP_NOFILL	0x1
 #define	WP_DMU_SYNC	0x2
 #define	WP_SPILL	0x4
 
 void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
     struct zio_prop *zp);
 /*
  * The bonus data is accessed more or less like a regular buffer.
  * You must dmu_bonus_hold() to get the buffer, which will give you a
  * dmu_buf_t with db_offset==-1ULL, and db_size = the size of the bonus
  * data.  As with any normal buffer, you must call dmu_buf_read() to
  * read db_data, dmu_buf_will_dirty() before modifying it, and the
  * object must be held in an assigned transaction before calling
  * dmu_buf_will_dirty.  You may use dmu_buf_set_user() on the bonus
  * buffer as well.  You must release what you hold with dmu_buf_rele().
  */
 int dmu_bonus_hold(objset_t *os, uint64_t object, void *tag, dmu_buf_t **);
 int dmu_bonus_max(void);
 int dmu_set_bonus(dmu_buf_t *, int, dmu_tx_t *);
 int dmu_set_bonustype(dmu_buf_t *, dmu_object_type_t, dmu_tx_t *);
 dmu_object_type_t dmu_get_bonustype(dmu_buf_t *);
 int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
 
 /*
  * Special spill buffer support used by "SA" framework
  */
 
 int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
 int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
     void *tag, dmu_buf_t **dbp);
 int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
 
 /*
  * Obtain the DMU buffer from the specified object which contains the
  * specified offset.  dmu_buf_hold() puts a "hold" on the buffer, so
  * that it will remain in memory.  You must release the hold with
  * dmu_buf_rele().  You must not access the dmu_buf_t after releasing
  * what you hold.  You must have a hold on any dmu_buf_t* you pass to the DMU.
  *
  * You must call dmu_buf_read, dmu_buf_will_dirty, or dmu_buf_will_fill
  * on the returned buffer before reading or writing the buffer's
  * db_data.  The comments for those routines describe what particular
  * operations are valid after calling them.
  *
  * The object number must be a valid, allocated object number.
  */
 int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
     void *tag, dmu_buf_t **, int flags);
 void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
 void dmu_buf_rele(dmu_buf_t *db, void *tag);
 uint64_t dmu_buf_refcount(dmu_buf_t *db);
 
 /*
  * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
  * range of an object.  A pointer to an array of dmu_buf_t*'s is
  * returned (in *dbpp).
  *
  * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
  * frees the array.  The hold on the array of buffers MUST be released
  * with dmu_buf_rele_array.  You can NOT release the hold on each buffer
  * individually with dmu_buf_rele.
  */
 int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
     uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
 void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
 
 /*
  * Returns NULL on success, or the existing user ptr if it's already
  * been set.
  *
  * user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
  *
  * user_data_ptr_ptr should be NULL, or a pointer to a pointer which
  * will be set to db->db_data when you are allowed to access it.  Note
  * that db->db_data (the pointer) can change when you do dmu_buf_read(),
  * dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
  * *user_data_ptr_ptr will be set to the new value when it changes.
  *
  * If non-NULL, pageout func will be called when this buffer is being
  * excised from the cache, so that you can clean up the data structure
  * pointed to by user_ptr.
  *
  * dmu_evict_user() will call the pageout func for all buffers in a
  * objset with a given pageout func.
  */
 void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
     dmu_buf_evict_func_t *pageout_func);
 /*
  * set_user_ie is the same as set_user, but request immediate eviction
  * when hold count goes to zero.
  */
 void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
     void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
 void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
     void *user_ptr, void *user_data_ptr_ptr,
     dmu_buf_evict_func_t *pageout_func);
 void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
 
 /*
  * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
  */
 void *dmu_buf_get_user(dmu_buf_t *db);
 
 /*
  * Indicate that you are going to modify the buffer's data (db_data).
  *
  * The transaction (tx) must be assigned to a txg (ie. you've called
  * dmu_tx_assign()).  The buffer's object must be held in the tx
  * (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
  */
 void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
 
 /*
  * Tells if the given dbuf is freeable.
  */
 boolean_t dmu_buf_freeable(dmu_buf_t *);
 
 /*
  * You must create a transaction, then hold the objects which you will
  * (or might) modify as part of this transaction.  Then you must assign
  * the transaction to a transaction group.  Once the transaction has
  * been assigned, you can modify buffers which belong to held objects as
  * part of this transaction.  You can't modify buffers before the
  * transaction has been assigned; you can't modify buffers which don't
  * belong to objects which this transaction holds; you can't hold
  * objects once the transaction has been assigned.  You may hold an
  * object which you are going to free (with dmu_object_free()), but you
  * don't have to.
  *
  * You can abort the transaction before it has been assigned.
  *
  * Note that you may hold buffers (with dmu_buf_hold) at any time,
  * regardless of transaction state.
  */
 
 #define	DMU_NEW_OBJECT	(-1ULL)
 #define	DMU_OBJECT_END	(-1ULL)
 
 dmu_tx_t *dmu_tx_create(objset_t *os);
 void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
 void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
     uint64_t len);
 void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
 void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
 void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
 void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
 void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
 void dmu_tx_abort(dmu_tx_t *tx);
 int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
 void dmu_tx_wait(dmu_tx_t *tx);
 void dmu_tx_commit(dmu_tx_t *tx);
 
 /*
  * To register a commit callback, dmu_tx_callback_register() must be called.
  *
  * dcb_data is a pointer to caller private data that is passed on as a
  * callback parameter. The caller is responsible for properly allocating and
  * freeing it.
  *
  * When registering a callback, the transaction must be already created, but
  * it cannot be committed or aborted. It can be assigned to a txg or not.
  *
  * The callback will be called after the transaction has been safely written
  * to stable storage and will also be called if the dmu_tx is aborted.
  * If there is any error which prevents the transaction from being committed to
  * disk, the callback will be called with a value of error != 0.
  */
 typedef void dmu_tx_callback_func_t(void *dcb_data, int error);
 
 void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
     void *dcb_data);
 
 /*
  * Free up the data blocks for a defined range of a file.  If size is
  * -1, the range from offset to end-of-file is freed.
  */
 int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
 	uint64_t size, dmu_tx_t *tx);
 int dmu_free_long_range(objset_t *os, uint64_t object, uint64_t offset,
 	uint64_t size);
 int dmu_free_object(objset_t *os, uint64_t object);
 
 /*
  * Convenience functions.
  *
  * Canfail routines will return 0 on success, or an errno if there is a
  * nonrecoverable I/O error.
  */
 #define	DMU_READ_PREFETCH	0 /* prefetch */
 #define	DMU_READ_NO_PREFETCH	1 /* don't prefetch */
 int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	void *buf, uint32_t flags);
 void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	const void *buf, dmu_tx_t *tx);
 void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
 	dmu_tx_t *tx);
 #ifdef _KERNEL
 #include <linux/blkdev_compat.h>
 int dmu_read_req(objset_t *os, uint64_t object, struct request *req);
 int dmu_write_req(objset_t *os, uint64_t object, struct request *req,
 	dmu_tx_t *tx);
 int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
 int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
 	dmu_tx_t *tx);
 int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
 	dmu_tx_t *tx);
 #endif
 struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
 void dmu_return_arcbuf(struct arc_buf *buf);
 void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
     dmu_tx_t *tx);
 int dmu_xuio_init(struct xuio *uio, int niov);
 void dmu_xuio_fini(struct xuio *uio);
 int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
     size_t n);
 int dmu_xuio_cnt(struct xuio *uio);
 struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
 void dmu_xuio_clear(struct xuio *uio, int i);
 void xuio_stat_wbuf_copied(void);
 void xuio_stat_wbuf_nocopy(void);
 
 extern int zfs_prefetch_disable;
 
 /*
  * Asynchronously try to read in the data.
  */
 void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
     uint64_t len);
 
 typedef struct dmu_object_info {
 	/* All sizes are in bytes unless otherwise indicated. */
 	uint32_t doi_data_block_size;
 	uint32_t doi_metadata_block_size;
 	dmu_object_type_t doi_type;
 	dmu_object_type_t doi_bonus_type;
 	uint64_t doi_bonus_size;
 	uint8_t doi_indirection;		/* 2 = dnode->indirect->data */
 	uint8_t doi_checksum;
 	uint8_t doi_compress;
 	uint8_t doi_pad[5];
 	uint64_t doi_physical_blocks_512;	/* data + metadata, 512b blks */
 	uint64_t doi_max_offset;
 	uint64_t doi_fill_count;		/* number of non-empty blocks */
 } dmu_object_info_t;
 
 typedef void (*const arc_byteswap_func_t)(void *buf, size_t size);
 
 typedef struct dmu_object_type_info {
 	dmu_object_byteswap_t	ot_byteswap;
 	boolean_t		ot_metadata;
 	char			*ot_name;
 } dmu_object_type_info_t;
 
 typedef const struct dmu_object_byteswap_info {
 	arc_byteswap_func_t	 ob_func;
 	char			*ob_name;
 } dmu_object_byteswap_info_t;
 
 extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
 extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
 
 /*
  * Get information on a DMU object.
  *
  * Return 0 on success or ENOENT if object is not allocated.
  *
  * If doi is NULL, just indicates whether the object exists.
  */
 int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
 void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
 void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
 void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
     u_longlong_t *nblk512);
 
 typedef struct dmu_objset_stats {
 	uint64_t dds_num_clones; /* number of clones of this */
 	uint64_t dds_creation_txg;
 	uint64_t dds_guid;
 	dmu_objset_type_t dds_type;
 	uint8_t dds_is_snapshot;
 	uint8_t dds_inconsistent;
 	char dds_origin[MAXNAMELEN];
 } dmu_objset_stats_t;
 
 /*
  * Get stats on a dataset.
  */
 void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
 
 /*
  * Add entries to the nvlist for all the objset's properties.  See
  * zfs_prop_table[] and zfs(1m) for details on the properties.
  */
 void dmu_objset_stats(objset_t *os, struct nvlist *nv);
 
 /*
  * Get the space usage statistics for statvfs().
  *
  * refdbytes is the amount of space "referenced" by this objset.
  * availbytes is the amount of space available to this objset, taking
  * into account quotas & reservations, assuming that no other objsets
  * use the space first.  These values correspond to the 'referenced' and
  * 'available' properties, described in the zfs(1m) manpage.
  *
  * usedobjs and availobjs are the number of objects currently allocated,
  * and available.
  */
 void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp);
 
 /*
  * The fsid_guid is a 56-bit ID that can change to avoid collisions.
  * (Contrast with the ds_guid which is a 64-bit ID that will never
  * change, so there is a small probability that it will collide.)
  */
 uint64_t dmu_objset_fsid_guid(objset_t *os);
 
 /*
  * Get the [cm]time for an objset's snapshot dir
  */
 timestruc_t dmu_objset_snap_cmtime(objset_t *os);
 
 int dmu_objset_is_snapshot(objset_t *os);
 
 extern struct spa *dmu_objset_spa(objset_t *os);
 extern struct zilog *dmu_objset_zil(objset_t *os);
 extern struct dsl_pool *dmu_objset_pool(objset_t *os);
 extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
 extern void dmu_objset_name(objset_t *os, char *buf);
 extern dmu_objset_type_t dmu_objset_type(objset_t *os);
 extern uint64_t dmu_objset_id(objset_t *os);
 extern uint64_t dmu_objset_syncprop(objset_t *os);
 extern uint64_t dmu_objset_logbias(objset_t *os);
 extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *id, uint64_t *offp, boolean_t *case_conflict);
 extern int dmu_snapshot_lookup(objset_t *os, const char *name, uint64_t *val);
 extern int dmu_snapshot_realname(objset_t *os, char *name, char *real,
     int maxlen, boolean_t *conflict);
 extern int dmu_dir_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp);
 
 typedef int objset_used_cb_t(dmu_object_type_t bonustype,
     void *bonus, uint64_t *userp, uint64_t *groupp);
 extern void dmu_objset_register_type(dmu_objset_type_t ost,
     objset_used_cb_t *cb);
 extern void dmu_objset_set_user(objset_t *os, void *user_ptr);
 extern void *dmu_objset_get_user(objset_t *os);
 
 /*
  * Return the txg number for the given assigned transaction.
  */
 uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
 
 /*
  * Synchronous write.
  * If a parent zio is provided this function initiates a write on the
  * provided buffer as a child of the parent zio.
  * In the absence of a parent zio, the write is completed synchronously.
  * At write completion, blk is filled with the bp of the written block.
  * Note that while the data covered by this function will be on stable
  * storage when the write completes this new data does not become a
  * permanent part of the file until the associated transaction commits.
  */
 
 /*
  * {zfs,zvol,ztest}_get_done() args
  */
 typedef struct zgd {
 	struct zilog	*zgd_zilog;
 	struct blkptr	*zgd_bp;
 	dmu_buf_t	*zgd_db;
 	struct rl	*zgd_rl;
 	void		*zgd_private;
 } zgd_t;
 
 typedef void dmu_sync_cb_t(zgd_t *arg, int error);
 int dmu_sync(struct zio *zio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd);
 
 /*
  * Find the next hole or data block in file starting at *off
  * Return found offset in *off. Return ESRCH for end of file.
  */
 int dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole,
     uint64_t *off);
 
 /*
  * Initial setup and final teardown.
  */
 extern void dmu_init(void);
 extern void dmu_fini(void);
 
 typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
     uint64_t object, uint64_t offset, int len);
 void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
     dmu_traverse_cb_t cb, void *arg);
 
-int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
+int dmu_send(objset_t *tosnap, objset_t *fromsnap,
     int outfd, struct vnode *vp, offset_t *off);
-int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign,
-    uint64_t *sizep);
+int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep);
 
 typedef struct dmu_recv_cookie {
 	/*
 	 * This structure is opaque!
 	 *
 	 * If logical and real are different, we are recving the stream
 	 * into the "real" temporary clone, and then switching it with
 	 * the "logical" target.
 	 */
 	struct dsl_dataset *drc_logical_ds;
 	struct dsl_dataset *drc_real_ds;
 	struct drr_begin *drc_drrb;
 	char *drc_tosnap;
 	char *drc_top_ds;
 	boolean_t drc_newfs;
 	boolean_t drc_force;
 	struct avl_tree *drc_guid_to_ds_map;
 } dmu_recv_cookie_t;
 
 int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
     boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
 int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
     int cleanup_fd, uint64_t *action_handlep);
 int dmu_recv_end(dmu_recv_cookie_t *drc);
 
 int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct vnode *vp,
     offset_t *off);
 
 /* CRC64 table */
 #define	ZFS_CRC64_POLY	0xC96C5795D7870F42ULL	/* ECMA-182, reflected form */
 extern uint64_t zfs_crc64_table[256];
 
 extern int zfs_mdcomp_disable;
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_DMU_H */
diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
index 507f73222895..79d3a6bc05a9 100644
--- a/include/sys/dmu_objset.h
+++ b/include/sys/dmu_objset.h
@@ -1,185 +1,176 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #ifndef	_SYS_DMU_OBJSET_H
 #define	_SYS_DMU_OBJSET_H
 
 #include <sys/spa.h>
 #include <sys/arc.h>
 #include <sys/txg.h>
 #include <sys/zfs_context.h>
 #include <sys/dnode.h>
 #include <sys/zio.h>
 #include <sys/zil.h>
 #include <sys/sa.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 extern krwlock_t os_lock;
 
 struct dsl_dataset;
 struct dmu_tx;
 
 #define	OBJSET_PHYS_SIZE 2048
 #define	OBJSET_OLD_PHYS_SIZE 1024
 
 #define	OBJSET_BUF_HAS_USERUSED(buf) \
 	(arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE)
 
 #define	OBJSET_FLAG_USERACCOUNTING_COMPLETE	(1ULL<<0)
 
 typedef struct objset_phys {
 	dnode_phys_t os_meta_dnode;
 	zil_header_t os_zil_header;
 	uint64_t os_type;
 	uint64_t os_flags;
 	char os_pad[OBJSET_PHYS_SIZE - sizeof (dnode_phys_t)*3 -
 	    sizeof (zil_header_t) - sizeof (uint64_t)*2];
 	dnode_phys_t os_userused_dnode;
 	dnode_phys_t os_groupused_dnode;
 } objset_phys_t;
 
 struct objset {
 	/* Immutable: */
 	struct dsl_dataset *os_dsl_dataset;
 	spa_t *os_spa;
 	arc_buf_t *os_phys_buf;
 	objset_phys_t *os_phys;
 	/*
 	 * The following "special" dnodes have no parent and are exempt from
 	 * dnode_move(), but they root their descendents in this objset using
 	 * handles anyway, so that all access to dnodes from dbufs consistently
 	 * uses handles.
 	 */
 	dnode_handle_t os_meta_dnode;
 	dnode_handle_t os_userused_dnode;
 	dnode_handle_t os_groupused_dnode;
 	zilog_t *os_zil;
 
 	/* can change, under dsl_dir's locks: */
 	uint8_t os_checksum;
 	uint8_t os_compress;
 	uint8_t os_copies;
 	uint8_t os_dedup_checksum;
 	uint8_t os_dedup_verify;
 	uint8_t os_logbias;
 	uint8_t os_primary_cache;
 	uint8_t os_secondary_cache;
 	uint8_t os_sync;
 
 	/* no lock needed: */
 	struct dmu_tx *os_synctx; /* XXX sketchy */
 	blkptr_t *os_rootbp;
 	zil_header_t os_zil_header;
 	list_t os_synced_dnodes;
 	uint64_t os_flags;
 
 	/* Protected by os_obj_lock */
 	kmutex_t os_obj_lock;
 	uint64_t os_obj_next;
 
 	/* Protected by os_lock */
 	kmutex_t os_lock;
 	list_t os_dirty_dnodes[TXG_SIZE];
 	list_t os_free_dnodes[TXG_SIZE];
 	list_t os_dnodes;
 	list_t os_downgraded_dbufs;
 
 	/* stuff we store for the user */
 	kmutex_t os_user_ptr_lock;
 	void *os_user_ptr;
 
 	/* SA layout/attribute registration */
 	sa_os_t *os_sa;
 };
 
 #define	DMU_META_OBJSET		0
 #define	DMU_META_DNODE_OBJECT	0
 #define	DMU_OBJECT_IS_SPECIAL(obj) ((int64_t)(obj) <= 0)
 #define	DMU_META_DNODE(os)	((os)->os_meta_dnode.dnh_dnode)
 #define	DMU_USERUSED_DNODE(os)	((os)->os_userused_dnode.dnh_dnode)
 #define	DMU_GROUPUSED_DNODE(os)	((os)->os_groupused_dnode.dnh_dnode)
 
 #define	DMU_OS_IS_L2CACHEABLE(os)				\
 	((os)->os_secondary_cache == ZFS_CACHE_ALL ||		\
 	(os)->os_secondary_cache == ZFS_CACHE_METADATA)
 
 #define	DMU_OS_IS_L2COMPRESSIBLE(os)	(zfs_mdcomp_disable == B_FALSE)
 
 /* called from zpl */
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
 int dmu_objset_own(const char *name, dmu_objset_type_t type,
     boolean_t readonly, void *tag, objset_t **osp);
 void dmu_objset_rele(objset_t *os, void *tag);
 void dmu_objset_disown(objset_t *os, void *tag);
 int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
 
-int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
-    void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
-int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
-    uint64_t flags);
-int dmu_objset_destroy(const char *name, boolean_t defer);
-int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
-    struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
 void dmu_objset_stats(objset_t *os, nvlist_t *nv);
 void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
 void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp);
 uint64_t dmu_objset_fsid_guid(objset_t *os);
-int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
-    int flags);
 int dmu_objset_find_spa(spa_t *spa, const char *name,
     int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
 int dmu_objset_prefetch(const char *name, void *arg);
-void dmu_objset_byteswap(void *buf, size_t size);
 int dmu_objset_evict_dbufs(objset_t *os);
 timestruc_t dmu_objset_snap_cmtime(objset_t *os);
 
 /* called from dsl */
 void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
 boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
 objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
     blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
 int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,
     objset_t **osp);
 void dmu_objset_evict(objset_t *os);
 void dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx);
 void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
 boolean_t dmu_objset_userused_enabled(objset_t *os);
 int dmu_objset_userspace_upgrade(objset_t *os);
 boolean_t dmu_objset_userspace_present(objset_t *os);
 
 void dmu_objset_init(void);
 void dmu_objset_fini(void);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_DMU_OBJSET_H */
diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h
index afcf2b7946f0..735ccbbd3058 100644
--- a/include/sys/dsl_dataset.h
+++ b/include/sys/dsl_dataset.h
@@ -1,300 +1,299 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DATASET_H
 #define	_SYS_DSL_DATASET_H
 
 #include <sys/dmu.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/zio.h>
 #include <sys/bplist.h>
 #include <sys/dsl_synctask.h>
 #include <sys/zfs_context.h>
 #include <sys/dsl_deadlist.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 struct dsl_dataset;
 struct dsl_dir;
 struct dsl_pool;
 
 #define	DS_FLAG_INCONSISTENT	(1ULL<<0)
 #define	DS_IS_INCONSISTENT(ds)	\
 	((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
 /*
  * NB: nopromote can not yet be set, but we want support for it in this
  * on-disk version, so that we don't need to upgrade for it later.  It
  * will be needed when we implement 'zfs split' (where the split off
  * clone should not be promoted).
  */
 #define	DS_FLAG_NOPROMOTE	(1ULL<<1)
 
 /*
  * DS_FLAG_UNIQUE_ACCURATE is set if ds_unique_bytes has been correctly
  * calculated for head datasets (starting with SPA_VERSION_UNIQUE_ACCURATE,
  * refquota/refreservations).
  */
 #define	DS_FLAG_UNIQUE_ACCURATE	(1ULL<<2)
 
 /*
  * DS_FLAG_DEFER_DESTROY is set after 'zfs destroy -d' has been called
  * on a dataset. This allows the dataset to be destroyed using 'zfs release'.
  */
 #define	DS_FLAG_DEFER_DESTROY	(1ULL<<3)
 #define	DS_IS_DEFER_DESTROY(ds)	\
 	((ds)->ds_phys->ds_flags & DS_FLAG_DEFER_DESTROY)
 
 /*
  * DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
  * name lookups should be performed case-insensitively.
  */
 #define	DS_FLAG_CI_DATASET	(1ULL<<16)
 
 typedef struct dsl_dataset_phys {
 	uint64_t ds_dir_obj;		/* DMU_OT_DSL_DIR */
 	uint64_t ds_prev_snap_obj;	/* DMU_OT_DSL_DATASET */
 	uint64_t ds_prev_snap_txg;
 	uint64_t ds_next_snap_obj;	/* DMU_OT_DSL_DATASET */
 	uint64_t ds_snapnames_zapobj;	/* DMU_OT_DSL_DS_SNAP_MAP 0 for snaps */
 	uint64_t ds_num_children;	/* clone/snap children; ==0 for head */
 	uint64_t ds_creation_time;	/* seconds since 1970 */
 	uint64_t ds_creation_txg;
 	uint64_t ds_deadlist_obj;	/* DMU_OT_DEADLIST */
 	/*
 	 * ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
 	 * include all blocks referenced by this dataset, including those
 	 * shared with any other datasets.
 	 */
 	uint64_t ds_referenced_bytes;
 	uint64_t ds_compressed_bytes;
 	uint64_t ds_uncompressed_bytes;
 	uint64_t ds_unique_bytes;	/* only relevant to snapshots */
 	/*
 	 * The ds_fsid_guid is a 56-bit ID that can change to avoid
 	 * collisions.  The ds_guid is a 64-bit ID that will never
 	 * change, so there is a small probability that it will collide.
 	 */
 	uint64_t ds_fsid_guid;
 	uint64_t ds_guid;
 	uint64_t ds_flags;		/* DS_FLAG_* */
 	blkptr_t ds_bp;
 	uint64_t ds_next_clones_obj;	/* DMU_OT_DSL_CLONES */
 	uint64_t ds_props_obj;		/* DMU_OT_DSL_PROPS for snaps */
 	uint64_t ds_userrefs_obj;	/* DMU_OT_USERREFS */
 	uint64_t ds_pad[5]; /* pad out to 320 bytes for good measure */
 } dsl_dataset_phys_t;
 
 typedef struct dsl_dataset {
 	/* Immutable: */
 	struct dsl_dir *ds_dir;
 	dsl_dataset_phys_t *ds_phys;
 	dmu_buf_t *ds_dbuf;
 	uint64_t ds_object;
 	uint64_t ds_fsid_guid;
 
 	/* only used in syncing context, only valid for non-snapshots: */
 	struct dsl_dataset *ds_prev;
 
 	/* has internal locking: */
 	dsl_deadlist_t ds_deadlist;
 	bplist_t ds_pending_deadlist;
 
 	/* to protect against multiple concurrent incremental recv */
 	kmutex_t ds_recvlock;
 
 	/* protected by lock on pool's dp_dirty_datasets list */
 	txg_node_t ds_dirty_link;
 	list_node_t ds_synced_link;
 
 	/*
 	 * ds_phys->ds_<accounting> is also protected by ds_lock.
 	 * Protected by ds_lock:
 	 */
 	kmutex_t ds_lock;
 	objset_t *ds_objset;
 	uint64_t ds_userrefs;
 
 	/*
 	 * ds_owner is protected by the ds_rwlock and the ds_lock
 	 */
 	krwlock_t ds_rwlock;
 	kcondvar_t ds_exclusive_cv;
 	void *ds_owner;
 
 	/* no locking; only for making guesses */
 	uint64_t ds_trysnap_txg;
 
 	/* for objset_open() */
 	kmutex_t ds_opening_lock;
 
 	uint64_t ds_reserved;	/* cached refreservation */
 	uint64_t ds_quota;	/* cached refquota */
 
 	kmutex_t ds_sendstream_lock;
 	list_t ds_sendstreams;
 
 	/* Protected by ds_lock; keep at end of struct for better locality */
 	char ds_snapname[MAXNAMELEN];
 } dsl_dataset_t;
 
 struct dsl_ds_destroyarg {
 	dsl_dataset_t *ds;		/* ds to destroy */
 	dsl_dataset_t *rm_origin;	/* also remove our origin? */
 	boolean_t is_origin_rm;		/* set if removing origin snap */
 	boolean_t defer;		/* destroy -d requested? */
 	boolean_t releasing;		/* destroying due to release? */
 	boolean_t need_prep;		/* do we need to retry due to EBUSY? */
 };
 
 /*
  * The max length of a temporary tag prefix is the number of hex digits
  * required to express UINT64_MAX plus one for the hyphen.
  */
 #define	MAX_TAG_PREFIX_LEN	17
 
 struct dsl_ds_holdarg {
 	dsl_sync_task_group_t *dstg;
-	char *htag;
+	const char *htag;
 	char *snapname;
 	boolean_t recursive;
 	boolean_t gotone;
 	boolean_t temphold;
 	char failed[MAXPATHLEN];
 };
 
 #define	dsl_dataset_is_snapshot(ds) \
 	((ds)->ds_phys->ds_num_children != 0)
 
 #define	DS_UNIQUE_IS_ACCURATE(ds)	\
 	(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
 
 int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
     void *tag, dsl_dataset_t **);
 int dsl_dataset_own(const char *name, boolean_t inconsistentok,
     void *tag, dsl_dataset_t **dsp);
 int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
     boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
 void dsl_dataset_name(dsl_dataset_t *ds, char *name);
 void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
 void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
 void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
 boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
     void *tag);
 void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
 void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
     minor_t minor);
 uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
 uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
     uint64_t flags, dmu_tx_t *tx);
 int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
-int dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
 dsl_checkfunc_t dsl_dataset_destroy_check;
 dsl_syncfunc_t dsl_dataset_destroy_sync;
-dsl_checkfunc_t dsl_dataset_snapshot_check;
-dsl_syncfunc_t dsl_dataset_snapshot_sync;
 dsl_syncfunc_t dsl_dataset_user_hold_sync;
+int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);
+void dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);
 int dsl_dataset_rename(char *name, const char *newname, boolean_t recursive);
 int dsl_dataset_promote(const char *name, char *conflsnap);
 int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
     boolean_t force);
 int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
     boolean_t recursive, boolean_t temphold, int cleanup_fd);
 int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
     boolean_t temphold);
 int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
     boolean_t recursive);
 int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
     char *htag, boolean_t retry);
 int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
 
 blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
 void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
 
 spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
 
 boolean_t dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds);
 
 void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
 
 void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
     dmu_tx_t *tx);
 int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
     dmu_tx_t *tx, boolean_t async);
 boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
     uint64_t blk_birth);
 uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
 int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
     uint64_t *value);
 
 void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
 void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
 void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
 void dsl_dataset_space(dsl_dataset_t *ds,
     uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp);
 uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
 int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
 int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
 boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds);
 
 int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
 
 int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
     uint64_t asize, uint64_t inflight, uint64_t *used,
     uint64_t *ref_rsrv);
 int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
     uint64_t quota);
 dsl_syncfunc_t dsl_dataset_set_quota_sync;
 int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
     uint64_t reservation);
 
 int dsl_destroy_inconsistent(const char *dsname, void *arg);
 
 #ifdef ZFS_DEBUG
 #define	dprintf_ds(ds, fmt, ...) do { \
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
 	char *__ds_name = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE); \
 	dsl_dataset_name(ds, __ds_name); \
 	dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
 	kmem_free(__ds_name, MAXNAMELEN); \
 	} \
 _NOTE(CONSTCOND) } while (0)
 #else
 #define	dprintf_ds(dd, fmt, ...)
 #endif
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_DSL_DATASET_H */
diff --git a/include/sys/dsl_deleg.h b/include/sys/dsl_deleg.h
index 9db6d07e87e7..5842639aafba 100644
--- a/include/sys/dsl_deleg.h
+++ b/include/sys/dsl_deleg.h
@@ -1,80 +1,79 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_DELEG_H
 #define	_SYS_DSL_DELEG_H
 
 #include <sys/dmu.h>
 #include <sys/dsl_pool.h>
 #include <sys/zfs_context.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #define	ZFS_DELEG_PERM_NONE		""
 #define	ZFS_DELEG_PERM_CREATE		"create"
 #define	ZFS_DELEG_PERM_DESTROY		"destroy"
 #define	ZFS_DELEG_PERM_SNAPSHOT		"snapshot"
 #define	ZFS_DELEG_PERM_ROLLBACK		"rollback"
 #define	ZFS_DELEG_PERM_CLONE		"clone"
 #define	ZFS_DELEG_PERM_PROMOTE		"promote"
 #define	ZFS_DELEG_PERM_RENAME		"rename"
 #define	ZFS_DELEG_PERM_MOUNT		"mount"
 #define	ZFS_DELEG_PERM_SHARE		"share"
 #define	ZFS_DELEG_PERM_SEND		"send"
 #define	ZFS_DELEG_PERM_RECEIVE		"receive"
 #define	ZFS_DELEG_PERM_ALLOW		"allow"
 #define	ZFS_DELEG_PERM_USERPROP		"userprop"
 #define	ZFS_DELEG_PERM_VSCAN		"vscan"
 #define	ZFS_DELEG_PERM_USERQUOTA	"userquota"
 #define	ZFS_DELEG_PERM_GROUPQUOTA	"groupquota"
 #define	ZFS_DELEG_PERM_USERUSED		"userused"
 #define	ZFS_DELEG_PERM_GROUPUSED	"groupused"
 #define	ZFS_DELEG_PERM_HOLD		"hold"
 #define	ZFS_DELEG_PERM_RELEASE		"release"
 #define	ZFS_DELEG_PERM_DIFF		"diff"
 
 /*
  * Note: the names of properties that are marked delegatable are also
  * valid delegated permissions
  */
 
 int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
 int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
 int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
-int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent,
-    const char *perm, cred_t *cr);
+int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr);
 void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
 int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
 int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
 int dsl_deleg_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx);
 boolean_t dsl_delegation_on(objset_t *os);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_DSL_DELEG_H */
diff --git a/include/sys/dsl_prop.h b/include/sys/dsl_prop.h
index a636ad35096b..b0d9a52cdfd7 100644
--- a/include/sys/dsl_prop.h
+++ b/include/sys/dsl_prop.h
@@ -1,119 +1,118 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_DSL_PROP_H
 #define	_SYS_DSL_PROP_H
 
 #include <sys/dmu.h>
 #include <sys/dsl_pool.h>
 #include <sys/zfs_context.h>
 #include <sys/dsl_synctask.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 struct dsl_dataset;
 struct dsl_dir;
 
 /* The callback func may not call into the DMU or DSL! */
 typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
 
 typedef struct dsl_prop_cb_record {
 	list_node_t cbr_node; /* link on dd_prop_cbs */
 	struct dsl_dataset *cbr_ds;
 	const char *cbr_propname;
 	dsl_prop_changed_cb_t *cbr_func;
 	void *cbr_arg;
 } dsl_prop_cb_record_t;
 
 typedef struct dsl_props_arg {
 	nvlist_t *pa_props;
 	zprop_source_t pa_source;
 } dsl_props_arg_t;
 
 typedef struct dsl_prop_set_arg {
 	const char *psa_name;
 	zprop_source_t psa_source;
 	int psa_intsz;
 	int psa_numints;
 	const void *psa_value;
 
 	/*
 	 * Used to handle the special requirements of the quota and reservation
 	 * properties.
 	 */
 	uint64_t psa_effective_value;
 } dsl_prop_setarg_t;
 
 int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg);
 int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg);
 int dsl_prop_numcb(struct dsl_dataset *ds);
 
 int dsl_prop_get(const char *ddname, const char *propname,
     int intsz, int numints, void *buf, char *setpoint);
 int dsl_prop_get_integer(const char *ddname, const char *propname,
     uint64_t *valuep, char *setpoint);
 int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
 int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
 int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
     int intsz, int numints, void *buf, char *setpoint);
 int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
     int intsz, int numints, void *buf, char *setpoint,
     boolean_t snapshot);
 
 dsl_syncfunc_t dsl_props_set_sync;
 int dsl_prop_set(const char *ddname, const char *propname,
     zprop_source_t source, int intsz, int numints, const void *buf);
 int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
-void dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
-    dmu_tx_t *tx);
 
 void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
     zprop_source_t source, uint64_t *value);
 int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
 #ifdef	ZFS_DEBUG
 void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
 #define	DSL_PROP_CHECK_PREDICTION(dd, psa)	\
 	dsl_prop_check_prediction((dd), (psa))
 #else
 #define	DSL_PROP_CHECK_PREDICTION(dd, psa)	/* nothing */
 #endif
 
 /* flag first receive on or after SPA_VERSION_RECVD_PROPS */
 boolean_t dsl_prop_get_hasrecvd(objset_t *os);
 void dsl_prop_set_hasrecvd(objset_t *os);
 void dsl_prop_unset_hasrecvd(objset_t *os);
 
 void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
 void dsl_prop_nvlist_add_string(nvlist_t *nv,
     zfs_prop_t prop, const char *value);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_DSL_PROP_H */
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 26c24fc003aa..164bf3591cb6 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -1,967 +1,937 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #ifndef	_SYS_FS_ZFS_H
 #define	_SYS_FS_ZFS_H
 
 #include <sys/time.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * Types and constants shared between userland and the kernel.
  */
 
 /*
  * Each dataset can be one of the following types.  These constants can be
  * combined into masks that can be passed to various functions.
  */
 typedef enum {
 	ZFS_TYPE_FILESYSTEM	= 0x1,
 	ZFS_TYPE_SNAPSHOT	= 0x2,
 	ZFS_TYPE_VOLUME		= 0x4,
 	ZFS_TYPE_POOL		= 0x8
 } zfs_type_t;
 
+typedef enum dmu_objset_type {
+	DMU_OST_NONE,
+	DMU_OST_META,
+	DMU_OST_ZFS,
+	DMU_OST_ZVOL,
+	DMU_OST_OTHER,			/* For testing only! */
+	DMU_OST_ANY,			/* Be careful! */
+	DMU_OST_NUMTYPES
+} dmu_objset_type_t;
+
 #define	ZFS_TYPE_DATASET	\
 	(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
 
 #define	ZAP_MAXNAMELEN 256
 #define	ZAP_MAXVALUELEN (1024 * 8)
 #define	ZAP_OLDMAXVALUELEN 1024
 
 /*
  * Dataset properties are identified by these constants and must be added to
  * the end of this list to ensure that external consumers are not affected
  * by the change. If you make any changes to this list, be sure to update
  * the property table in module/zcommon/zfs_prop.c.
  */
 typedef enum {
 	ZFS_PROP_TYPE,
 	ZFS_PROP_CREATION,
 	ZFS_PROP_USED,
 	ZFS_PROP_AVAILABLE,
 	ZFS_PROP_REFERENCED,
 	ZFS_PROP_COMPRESSRATIO,
 	ZFS_PROP_MOUNTED,
 	ZFS_PROP_ORIGIN,
 	ZFS_PROP_QUOTA,
 	ZFS_PROP_RESERVATION,
 	ZFS_PROP_VOLSIZE,
 	ZFS_PROP_VOLBLOCKSIZE,
 	ZFS_PROP_RECORDSIZE,
 	ZFS_PROP_MOUNTPOINT,
 	ZFS_PROP_SHARENFS,
 	ZFS_PROP_CHECKSUM,
 	ZFS_PROP_COMPRESSION,
 	ZFS_PROP_ATIME,
 	ZFS_PROP_DEVICES,
 	ZFS_PROP_EXEC,
 	ZFS_PROP_SETUID,
 	ZFS_PROP_READONLY,
 	ZFS_PROP_ZONED,
 	ZFS_PROP_SNAPDIR,
 	ZFS_PROP_PRIVATE,		/* not exposed to user, temporary */
 	ZFS_PROP_ACLINHERIT,
 	ZFS_PROP_CREATETXG,		/* not exposed to the user */
 	ZFS_PROP_NAME,			/* not exposed to the user */
 	ZFS_PROP_CANMOUNT,
 	ZFS_PROP_ISCSIOPTIONS,		/* not exposed to the user */
 	ZFS_PROP_XATTR,
 	ZFS_PROP_NUMCLONES,		/* not exposed to the user */
 	ZFS_PROP_COPIES,
 	ZFS_PROP_VERSION,
 	ZFS_PROP_UTF8ONLY,
 	ZFS_PROP_NORMALIZE,
 	ZFS_PROP_CASE,
 	ZFS_PROP_VSCAN,
 	ZFS_PROP_NBMAND,
 	ZFS_PROP_SHARESMB,
 	ZFS_PROP_REFQUOTA,
 	ZFS_PROP_REFRESERVATION,
 	ZFS_PROP_GUID,
 	ZFS_PROP_PRIMARYCACHE,
 	ZFS_PROP_SECONDARYCACHE,
 	ZFS_PROP_USEDSNAP,
 	ZFS_PROP_USEDDS,
 	ZFS_PROP_USEDCHILD,
 	ZFS_PROP_USEDREFRESERV,
 	ZFS_PROP_USERACCOUNTING,	/* not exposed to the user */
 	ZFS_PROP_STMF_SHAREINFO,	/* not exposed to the user */
 	ZFS_PROP_DEFER_DESTROY,
 	ZFS_PROP_USERREFS,
 	ZFS_PROP_LOGBIAS,
 	ZFS_PROP_UNIQUE,		/* not exposed to the user */
 	ZFS_PROP_OBJSETID,		/* not exposed to the user */
 	ZFS_PROP_DEDUP,
 	ZFS_PROP_MLSLABEL,
 	ZFS_PROP_SYNC,
 	ZFS_PROP_REFRATIO,
 	ZFS_PROP_WRITTEN,
 	ZFS_PROP_CLONES,
 	ZFS_PROP_SNAPDEV,
 	ZFS_NUM_PROPS
 } zfs_prop_t;
 
 typedef enum {
 	ZFS_PROP_USERUSED,
 	ZFS_PROP_USERQUOTA,
 	ZFS_PROP_GROUPUSED,
 	ZFS_PROP_GROUPQUOTA,
 	ZFS_NUM_USERQUOTA_PROPS
 } zfs_userquota_prop_t;
 
 extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
 
 /*
  * Pool properties are identified by these constants and must be added to the
  * end of this list to ensure that external consumers are not affected
  * by the change. If you make any changes to this list, be sure to update
  * the property table in module/zcommon/zpool_prop.c.
  */
 typedef enum {
 	ZPOOL_PROP_NAME,
 	ZPOOL_PROP_SIZE,
 	ZPOOL_PROP_CAPACITY,
 	ZPOOL_PROP_ALTROOT,
 	ZPOOL_PROP_HEALTH,
 	ZPOOL_PROP_GUID,
 	ZPOOL_PROP_VERSION,
 	ZPOOL_PROP_BOOTFS,
 	ZPOOL_PROP_DELEGATION,
 	ZPOOL_PROP_AUTOREPLACE,
 	ZPOOL_PROP_CACHEFILE,
 	ZPOOL_PROP_FAILUREMODE,
 	ZPOOL_PROP_LISTSNAPS,
 	ZPOOL_PROP_AUTOEXPAND,
 	ZPOOL_PROP_DEDUPDITTO,
 	ZPOOL_PROP_DEDUPRATIO,
 	ZPOOL_PROP_FREE,
 	ZPOOL_PROP_ALLOCATED,
 	ZPOOL_PROP_READONLY,
 	ZPOOL_PROP_ASHIFT,
 	ZPOOL_PROP_COMMENT,
 	ZPOOL_PROP_EXPANDSZ,
 	ZPOOL_PROP_FREEING,
 	ZPOOL_NUM_PROPS
 } zpool_prop_t;
 
 /* Small enough to not hog a whole line of printout in zpool(1M). */
 #define	ZPROP_MAX_COMMENT	32
 
 #define	ZPROP_CONT		-2
 #define	ZPROP_INVAL		-1
 
 #define	ZPROP_VALUE		"value"
 #define	ZPROP_SOURCE		"source"
 
 typedef enum {
 	ZPROP_SRC_NONE = 0x1,
 	ZPROP_SRC_DEFAULT = 0x2,
 	ZPROP_SRC_TEMPORARY = 0x4,
 	ZPROP_SRC_LOCAL = 0x8,
 	ZPROP_SRC_INHERITED = 0x10,
 	ZPROP_SRC_RECEIVED = 0x20
 } zprop_source_t;
 
 #define	ZPROP_SRC_ALL	0x3f
 
 #define	ZPROP_SOURCE_VAL_RECVD	"$recvd"
 #define	ZPROP_N_MORE_ERRORS	"N_MORE_ERRORS"
 /*
  * Dataset flag implemented as a special entry in the props zap object
  * indicating that the dataset has received properties on or after
  * SPA_VERSION_RECVD_PROPS. The first such receive blows away local properties
  * just as it did in earlier versions, and thereafter, local properties are
  * preserved.
  */
 #define	ZPROP_HAS_RECVD		"$hasrecvd"
 
 typedef enum {
 	ZPROP_ERR_NOCLEAR = 0x1, /* failure to clear existing props */
 	ZPROP_ERR_NORESTORE = 0x2 /* failure to restore props on error */
 } zprop_errflags_t;
 
 typedef int (*zprop_func)(int, void *);
 
 /*
  * Properties to be set on the root file system of a new pool
  * are stuffed into their own nvlist, which is then included in
  * the properties nvlist with the pool properties.
  */
 #define	ZPOOL_ROOTFS_PROPS	"root-props-nvl"
 
 /*
  * Dataset property functions shared between libzfs and kernel.
  */
 const char *zfs_prop_default_string(zfs_prop_t);
 uint64_t zfs_prop_default_numeric(zfs_prop_t);
 boolean_t zfs_prop_readonly(zfs_prop_t);
 boolean_t zfs_prop_inheritable(zfs_prop_t);
 boolean_t zfs_prop_setonce(zfs_prop_t);
 const char *zfs_prop_to_name(zfs_prop_t);
 zfs_prop_t zfs_name_to_prop(const char *);
 boolean_t zfs_prop_user(const char *);
 boolean_t zfs_prop_userquota(const char *);
 boolean_t zfs_prop_written(const char *);
 int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
 int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
 uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed);
 boolean_t zfs_prop_valid_for_type(int, zfs_type_t);
 
 /*
  * Pool property functions shared between libzfs and kernel.
  */
 zpool_prop_t zpool_name_to_prop(const char *);
 const char *zpool_prop_to_name(zpool_prop_t);
 const char *zpool_prop_default_string(zpool_prop_t);
 uint64_t zpool_prop_default_numeric(zpool_prop_t);
 boolean_t zpool_prop_readonly(zpool_prop_t);
 boolean_t zpool_prop_feature(const char *);
 boolean_t zpool_prop_unsupported(const char *);
 int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
 int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
 uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
 
 /*
  * Definitions for the Delegation.
  */
 typedef enum {
 	ZFS_DELEG_WHO_UNKNOWN = 0,
 	ZFS_DELEG_USER = 'u',
 	ZFS_DELEG_USER_SETS = 'U',
 	ZFS_DELEG_GROUP = 'g',
 	ZFS_DELEG_GROUP_SETS = 'G',
 	ZFS_DELEG_EVERYONE = 'e',
 	ZFS_DELEG_EVERYONE_SETS = 'E',
 	ZFS_DELEG_CREATE = 'c',
 	ZFS_DELEG_CREATE_SETS = 'C',
 	ZFS_DELEG_NAMED_SET = 's',
 	ZFS_DELEG_NAMED_SET_SETS = 'S'
 } zfs_deleg_who_type_t;
 
 typedef enum {
 	ZFS_DELEG_NONE = 0,
 	ZFS_DELEG_PERM_LOCAL = 1,
 	ZFS_DELEG_PERM_DESCENDENT = 2,
 	ZFS_DELEG_PERM_LOCALDESCENDENT = 3,
 	ZFS_DELEG_PERM_CREATE = 4
 } zfs_deleg_inherit_t;
 
 #define	ZFS_DELEG_PERM_UID	"uid"
 #define	ZFS_DELEG_PERM_GID	"gid"
 #define	ZFS_DELEG_PERM_GROUPS	"groups"
 
 #define	ZFS_MLSLABEL_DEFAULT	"none"
 
 #define	ZFS_SMB_ACL_SRC		"src"
 #define	ZFS_SMB_ACL_TARGET	"target"
 
 typedef enum {
 	ZFS_CANMOUNT_OFF = 0,
 	ZFS_CANMOUNT_ON = 1,
 	ZFS_CANMOUNT_NOAUTO = 2
 } zfs_canmount_type_t;
 
 typedef enum {
 	ZFS_LOGBIAS_LATENCY = 0,
 	ZFS_LOGBIAS_THROUGHPUT = 1
 } zfs_logbias_op_t;
 
 typedef enum zfs_share_op {
 	ZFS_SHARE_NFS = 0,
 	ZFS_UNSHARE_NFS = 1,
 	ZFS_SHARE_SMB = 2,
 	ZFS_UNSHARE_SMB = 3
 } zfs_share_op_t;
 
 typedef enum zfs_smb_acl_op {
 	ZFS_SMB_ACL_ADD,
 	ZFS_SMB_ACL_REMOVE,
 	ZFS_SMB_ACL_RENAME,
 	ZFS_SMB_ACL_PURGE
 } zfs_smb_acl_op_t;
 
 typedef enum zfs_cache_type {
 	ZFS_CACHE_NONE = 0,
 	ZFS_CACHE_METADATA = 1,
 	ZFS_CACHE_ALL = 2
 } zfs_cache_type_t;
 
 typedef enum {
 	ZFS_SYNC_STANDARD = 0,
 	ZFS_SYNC_ALWAYS = 1,
 	ZFS_SYNC_DISABLED = 2
 } zfs_sync_type_t;
 
 typedef enum {
 	ZFS_XATTR_OFF = 0,
 	ZFS_XATTR_DIR = 1,
 	ZFS_XATTR_SA = 2
 } zfs_xattr_type_t;
 
 /*
  * On-disk version number.
  */
 #define	SPA_VERSION_1			1ULL
 #define	SPA_VERSION_2			2ULL
 #define	SPA_VERSION_3			3ULL
 #define	SPA_VERSION_4			4ULL
 #define	SPA_VERSION_5			5ULL
 #define	SPA_VERSION_6			6ULL
 #define	SPA_VERSION_7			7ULL
 #define	SPA_VERSION_8			8ULL
 #define	SPA_VERSION_9			9ULL
 #define	SPA_VERSION_10			10ULL
 #define	SPA_VERSION_11			11ULL
 #define	SPA_VERSION_12			12ULL
 #define	SPA_VERSION_13			13ULL
 #define	SPA_VERSION_14			14ULL
 #define	SPA_VERSION_15			15ULL
 #define	SPA_VERSION_16			16ULL
 #define	SPA_VERSION_17			17ULL
 #define	SPA_VERSION_18			18ULL
 #define	SPA_VERSION_19			19ULL
 #define	SPA_VERSION_20			20ULL
 #define	SPA_VERSION_21			21ULL
 #define	SPA_VERSION_22			22ULL
 #define	SPA_VERSION_23			23ULL
 #define	SPA_VERSION_24			24ULL
 #define	SPA_VERSION_25			25ULL
 #define	SPA_VERSION_26			26ULL
 #define	SPA_VERSION_27			27ULL
 #define	SPA_VERSION_28			28ULL
 #define	SPA_VERSION_5000		5000ULL
 
 /*
  * When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
  * format change. Go to usr/src/grub/grub-0.97/stage2/{zfs-include/, fsys_zfs*},
  * and do the appropriate changes.  Also bump the version number in
  * usr/src/grub/capability.
  */
 #define	SPA_VERSION			SPA_VERSION_5000
 #define	SPA_VERSION_STRING		"5000"
 
 /*
  * Symbolic names for the changes that caused a SPA_VERSION switch.
  * Used in the code when checking for presence or absence of a feature.
  * Feel free to define multiple symbolic names for each version if there
  * were multiple changes to on-disk structures during that version.
  *
  * NOTE: When checking the current SPA_VERSION in your code, be sure
  *       to use spa_version() since it reports the version of the
  *       last synced uberblock.  Checking the in-flight version can
  *       be dangerous in some cases.
  */
 #define	SPA_VERSION_INITIAL		SPA_VERSION_1
 #define	SPA_VERSION_DITTO_BLOCKS	SPA_VERSION_2
 #define	SPA_VERSION_SPARES		SPA_VERSION_3
 #define	SPA_VERSION_RAIDZ2		SPA_VERSION_3
 #define	SPA_VERSION_BPOBJ_ACCOUNT	SPA_VERSION_3
 #define	SPA_VERSION_RAIDZ_DEFLATE	SPA_VERSION_3
 #define	SPA_VERSION_DNODE_BYTES		SPA_VERSION_3
 #define	SPA_VERSION_ZPOOL_HISTORY	SPA_VERSION_4
 #define	SPA_VERSION_GZIP_COMPRESSION	SPA_VERSION_5
 #define	SPA_VERSION_BOOTFS		SPA_VERSION_6
 #define	SPA_VERSION_SLOGS		SPA_VERSION_7
 #define	SPA_VERSION_DELEGATED_PERMS	SPA_VERSION_8
 #define	SPA_VERSION_FUID		SPA_VERSION_9
 #define	SPA_VERSION_REFRESERVATION	SPA_VERSION_9
 #define	SPA_VERSION_REFQUOTA		SPA_VERSION_9
 #define	SPA_VERSION_UNIQUE_ACCURATE	SPA_VERSION_9
 #define	SPA_VERSION_L2CACHE		SPA_VERSION_10
 #define	SPA_VERSION_NEXT_CLONES		SPA_VERSION_11
 #define	SPA_VERSION_ORIGIN		SPA_VERSION_11
 #define	SPA_VERSION_DSL_SCRUB		SPA_VERSION_11
 #define	SPA_VERSION_SNAP_PROPS		SPA_VERSION_12
 #define	SPA_VERSION_USED_BREAKDOWN	SPA_VERSION_13
 #define	SPA_VERSION_PASSTHROUGH_X	SPA_VERSION_14
 #define	SPA_VERSION_USERSPACE		SPA_VERSION_15
 #define	SPA_VERSION_STMF_PROP		SPA_VERSION_16
 #define	SPA_VERSION_RAIDZ3		SPA_VERSION_17
 #define	SPA_VERSION_USERREFS		SPA_VERSION_18
 #define	SPA_VERSION_HOLES		SPA_VERSION_19
 #define	SPA_VERSION_ZLE_COMPRESSION	SPA_VERSION_20
 #define	SPA_VERSION_DEDUP		SPA_VERSION_21
 #define	SPA_VERSION_RECVD_PROPS		SPA_VERSION_22
 #define	SPA_VERSION_SLIM_ZIL		SPA_VERSION_23
 #define	SPA_VERSION_SA			SPA_VERSION_24
 #define	SPA_VERSION_SCAN		SPA_VERSION_25
 #define	SPA_VERSION_DIR_CLONES		SPA_VERSION_26
 #define	SPA_VERSION_DEADLISTS		SPA_VERSION_26
 #define	SPA_VERSION_FAST_SNAP		SPA_VERSION_27
 #define	SPA_VERSION_MULTI_REPLACE	SPA_VERSION_28
 #define	SPA_VERSION_BEFORE_FEATURES	SPA_VERSION_28
 #define	SPA_VERSION_FEATURES		SPA_VERSION_5000
 
 #define	SPA_VERSION_IS_SUPPORTED(v) \
 	(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
 	((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
 
 /*
  * ZPL version - rev'd whenever an incompatible on-disk format change
  * occurs.  This is independent of SPA/DMU/ZAP versioning.  You must
  * also update the version_table[] and help message in zfs_prop.c.
  *
  * When changing, be sure to teach GRUB how to read the new format!
  * See usr/src/grub/grub-0.97/stage2/{zfs-include/,fsys_zfs*}
  */
 #define	ZPL_VERSION_1			1ULL
 #define	ZPL_VERSION_2			2ULL
 #define	ZPL_VERSION_3			3ULL
 #define	ZPL_VERSION_4			4ULL
 #define	ZPL_VERSION_5			5ULL
 #define	ZPL_VERSION			ZPL_VERSION_5
 #define	ZPL_VERSION_STRING		"5"
 
 #define	ZPL_VERSION_INITIAL		ZPL_VERSION_1
 #define	ZPL_VERSION_DIRENT_TYPE		ZPL_VERSION_2
 #define	ZPL_VERSION_FUID		ZPL_VERSION_3
 #define	ZPL_VERSION_NORMALIZATION	ZPL_VERSION_3
 #define	ZPL_VERSION_SYSATTR		ZPL_VERSION_3
 #define	ZPL_VERSION_USERSPACE		ZPL_VERSION_4
 #define	ZPL_VERSION_SA			ZPL_VERSION_5
 
 /* Rewind request information */
 #define	ZPOOL_NO_REWIND		1  /* No policy - default behavior */
 #define	ZPOOL_NEVER_REWIND	2  /* Do not search for best txg or rewind */
 #define	ZPOOL_TRY_REWIND	4  /* Search for best txg, but do not rewind */
 #define	ZPOOL_DO_REWIND		8  /* Rewind to best txg w/in deferred frees */
 #define	ZPOOL_EXTREME_REWIND	16 /* Allow extreme measures to find best txg */
 #define	ZPOOL_REWIND_MASK	28 /* All the possible rewind bits */
 #define	ZPOOL_REWIND_POLICIES	31 /* All the possible policy bits */
 
 typedef struct zpool_rewind_policy {
 	uint32_t	zrp_request;	/* rewind behavior requested */
 	uint64_t	zrp_maxmeta;	/* max acceptable meta-data errors */
 	uint64_t	zrp_maxdata;	/* max acceptable data errors */
 	uint64_t	zrp_txg;	/* specific txg to load */
 } zpool_rewind_policy_t;
 
 /*
  * The following are configuration names used in the nvlist describing a pool's
  * configuration.
  */
 #define	ZPOOL_CONFIG_VERSION		"version"
 #define	ZPOOL_CONFIG_POOL_NAME		"name"
 #define	ZPOOL_CONFIG_POOL_STATE		"state"
 #define	ZPOOL_CONFIG_POOL_TXG		"txg"
 #define	ZPOOL_CONFIG_POOL_GUID		"pool_guid"
 #define	ZPOOL_CONFIG_CREATE_TXG		"create_txg"
 #define	ZPOOL_CONFIG_TOP_GUID		"top_guid"
 #define	ZPOOL_CONFIG_VDEV_TREE		"vdev_tree"
 #define	ZPOOL_CONFIG_TYPE		"type"
 #define	ZPOOL_CONFIG_CHILDREN		"children"
 #define	ZPOOL_CONFIG_ID			"id"
 #define	ZPOOL_CONFIG_GUID		"guid"
 #define	ZPOOL_CONFIG_PATH		"path"
 #define	ZPOOL_CONFIG_DEVID		"devid"
 #define	ZPOOL_CONFIG_METASLAB_ARRAY	"metaslab_array"
 #define	ZPOOL_CONFIG_METASLAB_SHIFT	"metaslab_shift"
 #define	ZPOOL_CONFIG_ASHIFT		"ashift"
 #define	ZPOOL_CONFIG_ASIZE		"asize"
 #define	ZPOOL_CONFIG_DTL		"DTL"
 #define	ZPOOL_CONFIG_SCAN_STATS		"scan_stats"	/* not stored on disk */
 #define	ZPOOL_CONFIG_VDEV_STATS		"vdev_stats"	/* not stored on disk */
 #define	ZPOOL_CONFIG_WHOLE_DISK		"whole_disk"
 #define	ZPOOL_CONFIG_ERRCOUNT		"error_count"
 #define	ZPOOL_CONFIG_NOT_PRESENT	"not_present"
 #define	ZPOOL_CONFIG_SPARES		"spares"
 #define	ZPOOL_CONFIG_IS_SPARE		"is_spare"
 #define	ZPOOL_CONFIG_NPARITY		"nparity"
 #define	ZPOOL_CONFIG_HOSTID		"hostid"
 #define	ZPOOL_CONFIG_HOSTNAME		"hostname"
 #define	ZPOOL_CONFIG_LOADED_TIME	"initial_load_time"
 #define	ZPOOL_CONFIG_UNSPARE		"unspare"
 #define	ZPOOL_CONFIG_PHYS_PATH		"phys_path"
 #define	ZPOOL_CONFIG_IS_LOG		"is_log"
 #define	ZPOOL_CONFIG_L2CACHE		"l2cache"
 #define	ZPOOL_CONFIG_HOLE_ARRAY		"hole_array"
 #define	ZPOOL_CONFIG_VDEV_CHILDREN	"vdev_children"
 #define	ZPOOL_CONFIG_IS_HOLE		"is_hole"
 #define	ZPOOL_CONFIG_DDT_HISTOGRAM	"ddt_histogram"
 #define	ZPOOL_CONFIG_DDT_OBJ_STATS	"ddt_object_stats"
 #define	ZPOOL_CONFIG_DDT_STATS		"ddt_stats"
 #define	ZPOOL_CONFIG_SPLIT		"splitcfg"
 #define	ZPOOL_CONFIG_ORIG_GUID		"orig_guid"
 #define	ZPOOL_CONFIG_SPLIT_GUID		"split_guid"
 #define	ZPOOL_CONFIG_SPLIT_LIST		"guid_list"
 #define	ZPOOL_CONFIG_REMOVING		"removing"
 #define	ZPOOL_CONFIG_RESILVERING	"resilvering"
 #define	ZPOOL_CONFIG_COMMENT		"comment"
 #define	ZPOOL_CONFIG_SUSPENDED		"suspended"	/* not stored on disk */
 #define	ZPOOL_CONFIG_TIMESTAMP		"timestamp"	/* not stored on disk */
 #define	ZPOOL_CONFIG_BOOTFS		"bootfs"	/* not stored on disk */
 #define	ZPOOL_CONFIG_MISSING_DEVICES	"missing_vdevs"	/* not stored on disk */
 #define	ZPOOL_CONFIG_LOAD_INFO		"load_info"	/* not stored on disk */
 #define	ZPOOL_CONFIG_REWIND_INFO	"rewind_info"	/* not stored on disk */
 #define	ZPOOL_CONFIG_UNSUP_FEAT		"unsup_feat"	/* not stored on disk */
 #define	ZPOOL_CONFIG_ENABLED_FEAT	"enabled_feat"	/* not stored on disk */
 #define	ZPOOL_CONFIG_CAN_RDONLY		"can_rdonly"	/* not stored on disk */
 #define	ZPOOL_CONFIG_FEATURES_FOR_READ	"features_for_read"
 #define	ZPOOL_CONFIG_FEATURE_STATS	"feature_stats"	/* not stored on disk */
 /*
  * The persistent vdev state is stored as separate values rather than a single
  * 'vdev_state' entry.  This is because a device can be in multiple states, such
  * as offline and degraded.
  */
 #define	ZPOOL_CONFIG_OFFLINE		"offline"
 #define	ZPOOL_CONFIG_FAULTED		"faulted"
 #define	ZPOOL_CONFIG_DEGRADED		"degraded"
 #define	ZPOOL_CONFIG_REMOVED		"removed"
 #define	ZPOOL_CONFIG_FRU		"fru"
 #define	ZPOOL_CONFIG_AUX_STATE		"aux_state"
 
 /* Rewind policy parameters */
 #define	ZPOOL_REWIND_POLICY		"rewind-policy"
 #define	ZPOOL_REWIND_REQUEST		"rewind-request"
 #define	ZPOOL_REWIND_REQUEST_TXG	"rewind-request-txg"
 #define	ZPOOL_REWIND_META_THRESH	"rewind-meta-thresh"
 #define	ZPOOL_REWIND_DATA_THRESH	"rewind-data-thresh"
 
 /* Rewind data discovered */
 #define	ZPOOL_CONFIG_LOAD_TIME		"rewind_txg_ts"
 #define	ZPOOL_CONFIG_LOAD_DATA_ERRORS	"verify_data_errors"
 #define	ZPOOL_CONFIG_REWIND_TIME	"seconds_of_rewind"
 
 #define	VDEV_TYPE_ROOT			"root"
 #define	VDEV_TYPE_MIRROR		"mirror"
 #define	VDEV_TYPE_REPLACING		"replacing"
 #define	VDEV_TYPE_RAIDZ			"raidz"
 #define	VDEV_TYPE_DISK			"disk"
 #define	VDEV_TYPE_FILE			"file"
 #define	VDEV_TYPE_MISSING		"missing"
 #define	VDEV_TYPE_HOLE			"hole"
 #define	VDEV_TYPE_SPARE			"spare"
 #define	VDEV_TYPE_LOG			"log"
 #define	VDEV_TYPE_L2CACHE		"l2cache"
 
 /*
  * This is needed in userland to report the minimum necessary device size.
  */
 #define	SPA_MINDEVSIZE		(64ULL << 20)
 
 /*
  * The location of the pool configuration repository, shared between kernel and
  * userland.
  */
 #define	ZPOOL_CACHE		"/etc/zfs/zpool.cache"
 
 /*
  * vdev states are ordered from least to most healthy.
  * A vdev that's CANT_OPEN or below is considered unusable.
  */
 typedef enum vdev_state {
 	VDEV_STATE_UNKNOWN = 0,	/* Uninitialized vdev			*/
 	VDEV_STATE_CLOSED,	/* Not currently open			*/
 	VDEV_STATE_OFFLINE,	/* Not allowed to open			*/
 	VDEV_STATE_REMOVED,	/* Explicitly removed from system	*/
 	VDEV_STATE_CANT_OPEN,	/* Tried to open, but failed		*/
 	VDEV_STATE_FAULTED,	/* External request to fault device	*/
 	VDEV_STATE_DEGRADED,	/* Replicated vdev with unhealthy kids	*/
 	VDEV_STATE_HEALTHY	/* Presumed good			*/
 } vdev_state_t;
 
 #define	VDEV_STATE_ONLINE	VDEV_STATE_HEALTHY
 
 /*
  * vdev aux states.  When a vdev is in the CANT_OPEN state, the aux field
  * of the vdev stats structure uses these constants to distinguish why.
  */
 typedef enum vdev_aux {
 	VDEV_AUX_NONE,		/* no error				*/
 	VDEV_AUX_OPEN_FAILED,	/* ldi_open_*() or vn_open() failed	*/
 	VDEV_AUX_CORRUPT_DATA,	/* bad label or disk contents		*/
 	VDEV_AUX_NO_REPLICAS,	/* insufficient number of replicas	*/
 	VDEV_AUX_BAD_GUID_SUM,	/* vdev guid sum doesn't match		*/
 	VDEV_AUX_TOO_SMALL,	/* vdev size is too small		*/
 	VDEV_AUX_BAD_LABEL,	/* the label is OK but invalid		*/
 	VDEV_AUX_VERSION_NEWER,	/* on-disk version is too new		*/
 	VDEV_AUX_VERSION_OLDER,	/* on-disk version is too old		*/
 	VDEV_AUX_UNSUP_FEAT,	/* unsupported features			*/
 	VDEV_AUX_SPARED,	/* hot spare used in another pool	*/
 	VDEV_AUX_ERR_EXCEEDED,	/* too many errors			*/
 	VDEV_AUX_IO_FAILURE,	/* experienced I/O failure		*/
 	VDEV_AUX_BAD_LOG,	/* cannot read log chain(s)		*/
 	VDEV_AUX_EXTERNAL,	/* external diagnosis			*/
 	VDEV_AUX_SPLIT_POOL	/* vdev was split off into another pool	*/
 } vdev_aux_t;
 
 /*
  * pool state.  The following states are written to disk as part of the normal
  * SPA lifecycle: ACTIVE, EXPORTED, DESTROYED, SPARE, L2CACHE.  The remaining
  * states are software abstractions used at various levels to communicate
  * pool state.
  */
 typedef enum pool_state {
 	POOL_STATE_ACTIVE = 0,		/* In active use		*/
 	POOL_STATE_EXPORTED,		/* Explicitly exported		*/
 	POOL_STATE_DESTROYED,		/* Explicitly destroyed		*/
 	POOL_STATE_SPARE,		/* Reserved for hot spare use	*/
 	POOL_STATE_L2CACHE,		/* Level 2 ARC device		*/
 	POOL_STATE_UNINITIALIZED,	/* Internal spa_t state		*/
 	POOL_STATE_UNAVAIL,		/* Internal libzfs state	*/
 	POOL_STATE_POTENTIALLY_ACTIVE	/* Internal libzfs state	*/
 } pool_state_t;
 
 /*
  * Scan Functions.
  */
 typedef enum pool_scan_func {
 	POOL_SCAN_NONE,
 	POOL_SCAN_SCRUB,
 	POOL_SCAN_RESILVER,
 	POOL_SCAN_FUNCS
 } pool_scan_func_t;
 
 /*
  * ZIO types.  Needed to interpret vdev statistics below.
  */
 typedef enum zio_type {
 	ZIO_TYPE_NULL = 0,
 	ZIO_TYPE_READ,
 	ZIO_TYPE_WRITE,
 	ZIO_TYPE_FREE,
 	ZIO_TYPE_CLAIM,
 	ZIO_TYPE_IOCTL,
 	ZIO_TYPES
 } zio_type_t;
 
 /*
  * Pool statistics.  Note: all fields should be 64-bit because this
  * is passed between kernel and userland as an nvlist uint64 array.
  */
 typedef struct pool_scan_stat {
 	/* values stored on disk */
 	uint64_t	pss_func;	/* pool_scan_func_t */
 	uint64_t	pss_state;	/* dsl_scan_state_t */
 	uint64_t	pss_start_time;	/* scan start time */
 	uint64_t	pss_end_time;	/* scan end time */
 	uint64_t	pss_to_examine;	/* total bytes to scan */
 	uint64_t	pss_examined;	/* total examined bytes	*/
 	uint64_t	pss_to_process; /* total bytes to process */
 	uint64_t	pss_processed;	/* total processed bytes */
 	uint64_t	pss_errors;	/* scan errors	*/
 
 	/* values not stored on disk */
 	uint64_t	pss_pass_exam;	/* examined bytes per scan pass */
 	uint64_t	pss_pass_start;	/* start time of a scan pass */
 } pool_scan_stat_t;
 
 typedef enum dsl_scan_state {
 	DSS_NONE,
 	DSS_SCANNING,
 	DSS_FINISHED,
 	DSS_CANCELED,
 	DSS_NUM_STATES
 } dsl_scan_state_t;
 
 
 /*
  * Vdev statistics.  Note: all fields should be 64-bit because this
  * is passed between kernel and userland as an nvlist uint64 array.
  */
 typedef struct vdev_stat {
 	hrtime_t	vs_timestamp;		/* time since vdev load	*/
 	uint64_t	vs_state;		/* vdev state		*/
 	uint64_t	vs_aux;			/* see vdev_aux_t	*/
 	uint64_t	vs_alloc;		/* space allocated	*/
 	uint64_t	vs_space;		/* total capacity	*/
 	uint64_t	vs_dspace;		/* deflated capacity	*/
 	uint64_t	vs_rsize;		/* replaceable dev size */
 	uint64_t	vs_esize;		/* expandable dev size */
 	uint64_t	vs_ops[ZIO_TYPES];	/* operation count	*/
 	uint64_t	vs_bytes[ZIO_TYPES];	/* bytes read/written	*/
 	uint64_t	vs_read_errors;		/* read errors		*/
 	uint64_t	vs_write_errors;	/* write errors		*/
 	uint64_t	vs_checksum_errors;	/* checksum errors	*/
 	uint64_t	vs_self_healed;		/* self-healed bytes	*/
 	uint64_t	vs_scan_removing;	/* removing?	*/
 	uint64_t	vs_scan_processed;	/* scan processed bytes	*/
 } vdev_stat_t;
 
 /*
  * DDT statistics.  Note: all fields should be 64-bit because this
  * is passed between kernel and userland as an nvlist uint64 array.
  */
 typedef struct ddt_object {
 	uint64_t	ddo_count;	/* number of elments in ddt 	*/
 	uint64_t	ddo_dspace;	/* size of ddt on disk		*/
 	uint64_t	ddo_mspace;	/* size of ddt in-core		*/
 } ddt_object_t;
 
 typedef struct ddt_stat {
 	uint64_t	dds_blocks;	/* blocks			*/
 	uint64_t	dds_lsize;	/* logical size			*/
 	uint64_t	dds_psize;	/* physical size		*/
 	uint64_t	dds_dsize;	/* deflated allocated size	*/
 	uint64_t	dds_ref_blocks;	/* referenced blocks		*/
 	uint64_t	dds_ref_lsize;	/* referenced lsize * refcnt	*/
 	uint64_t	dds_ref_psize;	/* referenced psize * refcnt	*/
 	uint64_t	dds_ref_dsize;	/* referenced dsize * refcnt	*/
 } ddt_stat_t;
 
 typedef struct ddt_histogram {
 	ddt_stat_t	ddh_stat[64];	/* power-of-two histogram buckets */
 } ddt_histogram_t;
 
 #define	ZVOL_DRIVER	"zvol"
 #define	ZFS_DRIVER	"zfs"
 #define	ZFS_DEV		"/dev/zfs"
 
 /* general zvol path */
 #define	ZVOL_DIR	"/dev"
 
 #define	ZVOL_MAJOR		230
 #define	ZVOL_MINOR_BITS		4
 #define	ZVOL_MINOR_MASK		((1U << ZVOL_MINOR_BITS) - 1)
 #define	ZVOL_MINORS		(1 << 4)
 #define	ZVOL_DEV_NAME		"zd"
 
 #define	ZVOL_PROP_NAME		"name"
 #define	ZVOL_DEFAULT_BLOCKSIZE	8192
 
 /*
  * /dev/zfs ioctl numbers.
  */
-#define	ZFS_IOC		('Z' << 8)
-
 typedef enum zfs_ioc {
-	ZFS_IOC_POOL_CREATE = ZFS_IOC,
+	ZFS_IOC_FIRST =	('Z' << 8),
+	ZFS_IOC = ZFS_IOC_FIRST,
+	ZFS_IOC_POOL_CREATE = ZFS_IOC_FIRST,
 	ZFS_IOC_POOL_DESTROY,
 	ZFS_IOC_POOL_IMPORT,
 	ZFS_IOC_POOL_EXPORT,
 	ZFS_IOC_POOL_CONFIGS,
 	ZFS_IOC_POOL_STATS,
 	ZFS_IOC_POOL_TRYIMPORT,
 	ZFS_IOC_POOL_SCAN,
 	ZFS_IOC_POOL_FREEZE,
 	ZFS_IOC_POOL_UPGRADE,
 	ZFS_IOC_POOL_GET_HISTORY,
 	ZFS_IOC_VDEV_ADD,
 	ZFS_IOC_VDEV_REMOVE,
 	ZFS_IOC_VDEV_SET_STATE,
 	ZFS_IOC_VDEV_ATTACH,
 	ZFS_IOC_VDEV_DETACH,
 	ZFS_IOC_VDEV_SETPATH,
 	ZFS_IOC_VDEV_SETFRU,
 	ZFS_IOC_OBJSET_STATS,
 	ZFS_IOC_OBJSET_ZPLPROPS,
 	ZFS_IOC_DATASET_LIST_NEXT,
 	ZFS_IOC_SNAPSHOT_LIST_NEXT,
 	ZFS_IOC_SET_PROP,
 	ZFS_IOC_CREATE_MINOR,
 	ZFS_IOC_REMOVE_MINOR,
 	ZFS_IOC_CREATE,
 	ZFS_IOC_DESTROY,
 	ZFS_IOC_ROLLBACK,
 	ZFS_IOC_RENAME,
 	ZFS_IOC_RECV,
 	ZFS_IOC_SEND,
 	ZFS_IOC_INJECT_FAULT,
 	ZFS_IOC_CLEAR_FAULT,
 	ZFS_IOC_INJECT_LIST_NEXT,
 	ZFS_IOC_ERROR_LOG,
 	ZFS_IOC_CLEAR,
 	ZFS_IOC_PROMOTE,
-	ZFS_IOC_DESTROY_SNAPS_NVL,
+	ZFS_IOC_DESTROY_SNAPS,
 	ZFS_IOC_SNAPSHOT,
 	ZFS_IOC_DSOBJ_TO_DSNAME,
 	ZFS_IOC_OBJ_TO_PATH,
 	ZFS_IOC_POOL_SET_PROPS,
 	ZFS_IOC_POOL_GET_PROPS,
 	ZFS_IOC_SET_FSACL,
 	ZFS_IOC_GET_FSACL,
 	ZFS_IOC_SHARE,
 	ZFS_IOC_INHERIT_PROP,
 	ZFS_IOC_SMB_ACL,
 	ZFS_IOC_USERSPACE_ONE,
 	ZFS_IOC_USERSPACE_MANY,
 	ZFS_IOC_USERSPACE_UPGRADE,
 	ZFS_IOC_HOLD,
 	ZFS_IOC_RELEASE,
 	ZFS_IOC_GET_HOLDS,
 	ZFS_IOC_OBJSET_RECVD_PROPS,
 	ZFS_IOC_VDEV_SPLIT,
 	ZFS_IOC_NEXT_OBJ,
 	ZFS_IOC_DIFF,
 	ZFS_IOC_TMP_SNAPSHOT,
 	ZFS_IOC_OBJ_TO_STATS,
 	ZFS_IOC_EVENTS_NEXT,
 	ZFS_IOC_EVENTS_CLEAR,
 	ZFS_IOC_POOL_REGUID,
 	ZFS_IOC_SPACE_WRITTEN,
 	ZFS_IOC_SPACE_SNAPS,
 	ZFS_IOC_POOL_REOPEN,
 	ZFS_IOC_SEND_PROGRESS,
+	ZFS_IOC_LOG_HISTORY,
+	ZFS_IOC_SEND_NEW,
+	ZFS_IOC_SEND_SPACE,
+	ZFS_IOC_CLONE,
+	ZFS_IOC_LAST
 } zfs_ioc_t;
 
 /*
  * zvol ioctl to get dataset name
  */
 #define BLKZNAME		_IOR(0x12,125,char[ZFS_MAXNAMELEN])
 
 /*
  * Internal SPA load state.  Used by FMA diagnosis engine.
  */
 typedef enum {
 	SPA_LOAD_NONE,		/* no load in progress	*/
 	SPA_LOAD_OPEN,		/* normal open		*/
 	SPA_LOAD_IMPORT,	/* import in progress	*/
 	SPA_LOAD_TRYIMPORT,	/* tryimport in progress */
 	SPA_LOAD_RECOVER,	/* recovery requested	*/
 	SPA_LOAD_ERROR		/* load failed		*/
 } spa_load_state_t;
 
 /*
  * Bookmark name values.
  */
 #define	ZPOOL_ERR_LIST		"error list"
 #define	ZPOOL_ERR_DATASET	"dataset"
 #define	ZPOOL_ERR_OBJECT	"object"
 
 #define	HIS_MAX_RECORD_LEN	(MAXPATHLEN + MAXPATHLEN + 1)
 
 /*
  * The following are names used in the nvlist describing
  * the pool's history log.
  */
 #define	ZPOOL_HIST_RECORD	"history record"
 #define	ZPOOL_HIST_TIME		"history time"
 #define	ZPOOL_HIST_CMD		"history command"
 #define	ZPOOL_HIST_WHO		"history who"
 #define	ZPOOL_HIST_ZONE		"history zone"
 #define	ZPOOL_HIST_HOST		"history hostname"
 #define	ZPOOL_HIST_TXG		"history txg"
 #define	ZPOOL_HIST_INT_EVENT	"history internal event"
 #define	ZPOOL_HIST_INT_STR	"history internal str"
+#define	ZPOOL_HIST_INT_NAME	"internal_name"
+#define	ZPOOL_HIST_IOCTL	"ioctl"
+#define	ZPOOL_HIST_INPUT_NVL	"in_nvl"
+#define	ZPOOL_HIST_OUTPUT_NVL	"out_nvl"
+#define	ZPOOL_HIST_DSNAME	"dsname"
+#define	ZPOOL_HIST_DSID		"dsid"
 
 /*
  * Flags for ZFS_IOC_VDEV_SET_STATE
  */
 #define	ZFS_ONLINE_CHECKREMOVE	0x1
 #define	ZFS_ONLINE_UNSPARE	0x2
 #define	ZFS_ONLINE_FORCEFAULT	0x4
 #define	ZFS_ONLINE_EXPAND	0x8
 #define	ZFS_OFFLINE_TEMPORARY	0x1
 
 /*
  * Flags for ZFS_IOC_POOL_IMPORT
  */
 #define	ZFS_IMPORT_NORMAL	0x0
 #define	ZFS_IMPORT_VERBATIM	0x1
 #define	ZFS_IMPORT_ANY_HOST	0x2
 #define	ZFS_IMPORT_MISSING_LOG	0x4
 #define	ZFS_IMPORT_ONLY		0x8
 
 /*
  * Sysevent payload members.  ZFS will generate the following sysevents with the
  * given payloads:
  *
  *	ESC_ZFS_RESILVER_START
  *	ESC_ZFS_RESILVER_END
  *	ESC_ZFS_POOL_DESTROY
  *	ESC_ZFS_POOL_REGUID
  *
  *		ZFS_EV_POOL_NAME	DATA_TYPE_STRING
  *		ZFS_EV_POOL_GUID	DATA_TYPE_UINT64
  *
  *	ESC_ZFS_VDEV_REMOVE
  *	ESC_ZFS_VDEV_CLEAR
  *	ESC_ZFS_VDEV_CHECK
  *
  *		ZFS_EV_POOL_NAME	DATA_TYPE_STRING
  *		ZFS_EV_POOL_GUID	DATA_TYPE_UINT64
  *		ZFS_EV_VDEV_PATH	DATA_TYPE_STRING	(optional)
  *		ZFS_EV_VDEV_GUID	DATA_TYPE_UINT64
  */
 #define	ZFS_EV_POOL_NAME	"pool_name"
 #define	ZFS_EV_POOL_GUID	"pool_guid"
 #define	ZFS_EV_VDEV_PATH	"vdev_path"
 #define	ZFS_EV_VDEV_GUID	"vdev_guid"
 
-/*
- * Note: This is encoded on-disk, so new events must be added to the
- * end, and unused events can not be removed.  Be sure to edit
- * libzfs_pool.c: hist_event_table[].
- */
-typedef enum history_internal_events {
-	LOG_NO_EVENT = 0,
-	LOG_POOL_CREATE,
-	LOG_POOL_VDEV_ADD,
-	LOG_POOL_REMOVE,
-	LOG_POOL_DESTROY,
-	LOG_POOL_EXPORT,
-	LOG_POOL_IMPORT,
-	LOG_POOL_VDEV_ATTACH,
-	LOG_POOL_VDEV_REPLACE,
-	LOG_POOL_VDEV_DETACH,
-	LOG_POOL_VDEV_ONLINE,
-	LOG_POOL_VDEV_OFFLINE,
-	LOG_POOL_UPGRADE,
-	LOG_POOL_CLEAR,
-	LOG_POOL_SCAN,
-	LOG_POOL_PROPSET,
-	LOG_DS_CREATE,
-	LOG_DS_CLONE,
-	LOG_DS_DESTROY,
-	LOG_DS_DESTROY_BEGIN,
-	LOG_DS_INHERIT,
-	LOG_DS_PROPSET,
-	LOG_DS_QUOTA,
-	LOG_DS_PERM_UPDATE,
-	LOG_DS_PERM_REMOVE,
-	LOG_DS_PERM_WHO_REMOVE,
-	LOG_DS_PROMOTE,
-	LOG_DS_RECEIVE,
-	LOG_DS_RENAME,
-	LOG_DS_RESERVATION,
-	LOG_DS_REPLAY_INC_SYNC,
-	LOG_DS_REPLAY_FULL_SYNC,
-	LOG_DS_ROLLBACK,
-	LOG_DS_SNAPSHOT,
-	LOG_DS_UPGRADE,
-	LOG_DS_REFQUOTA,
-	LOG_DS_REFRESERV,
-	LOG_POOL_SCAN_DONE,
-	LOG_DS_USER_HOLD,
-	LOG_DS_USER_RELEASE,
-	LOG_POOL_SPLIT,
-	LOG_POOL_GUID_CHANGE,
-	LOG_END
-} history_internal_events_t;
-
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_FS_ZFS_H */
diff --git a/include/sys/rrwlock.h b/include/sys/rrwlock.h
index 798a015d19e2..8fde3a3beb58 100644
--- a/include/sys/rrwlock.h
+++ b/include/sys/rrwlock.h
@@ -1,80 +1,84 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #ifndef	_SYS_RR_RW_LOCK_H
 #define	_SYS_RR_RW_LOCK_H
 
 
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #include <sys/inttypes.h>
 #include <sys/zfs_context.h>
 #include <sys/refcount.h>
 
 /*
  * A reader-writer lock implementation that allows re-entrant reads, but
  * still gives writers priority on "new" reads.
  *
  * See rrwlock.c for more details about the implementation.
  *
  * Fields of the rrwlock_t structure:
  * - rr_lock: protects modification and reading of rrwlock_t fields
  * - rr_cv: cv for waking up readers or waiting writers
  * - rr_writer: thread id of the current writer
  * - rr_anon_rount: number of active anonymous readers
  * - rr_linked_rcount: total number of non-anonymous active readers
  * - rr_writer_wanted: a writer wants the lock
  */
 typedef struct rrwlock {
 	kmutex_t	rr_lock;
 	kcondvar_t	rr_cv;
 	kthread_t	*rr_writer;
 	refcount_t	rr_anon_rcount;
 	refcount_t	rr_linked_rcount;
 	boolean_t	rr_writer_wanted;
 } rrwlock_t;
 
 /*
  * 'tag' is used in reference counting tracking.  The
  * 'tag' must be the same in a rrw_enter() as in its
  * corresponding rrw_exit().
  */
 void rrw_init(rrwlock_t *rrl);
 void rrw_destroy(rrwlock_t *rrl);
 void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
 void rrw_exit(rrwlock_t *rrl, void *tag);
 boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
+void rrw_tsd_destroy(void *arg);
 
 #define	RRW_READ_HELD(x)	rrw_held(x, RW_READER)
 #define	RRW_WRITE_HELD(x)	rrw_held(x, RW_WRITER)
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_RR_RW_LOCK_H */
diff --git a/include/sys/spa.h b/include/sys/spa.h
index 8f2af8af2b70..1af9137f8d00 100644
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -1,717 +1,707 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  */
 
 #ifndef _SYS_SPA_H
 #define	_SYS_SPA_H
 
 #include <sys/avl.h>
 #include <sys/zfs_context.h>
 #include <sys/nvpair.h>
 #include <sys/sysmacros.h>
 #include <sys/types.h>
 #include <sys/fs/zfs.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * Forward references that lots of things need.
  */
 typedef struct spa spa_t;
 typedef struct vdev vdev_t;
 typedef struct metaslab metaslab_t;
 typedef struct metaslab_group metaslab_group_t;
 typedef struct metaslab_class metaslab_class_t;
 typedef struct zio zio_t;
 typedef struct zilog zilog_t;
 typedef struct spa_aux_vdev spa_aux_vdev_t;
 typedef struct ddt ddt_t;
 typedef struct ddt_entry ddt_entry_t;
 struct dsl_pool;
+struct dsl_dataset;
 
 /*
  * General-purpose 32-bit and 64-bit bitfield encodings.
  */
 #define	BF32_DECODE(x, low, len)	P2PHASE((x) >> (low), 1U << (len))
 #define	BF64_DECODE(x, low, len)	P2PHASE((x) >> (low), 1ULL << (len))
 #define	BF32_ENCODE(x, low, len)	(P2PHASE((x), 1U << (len)) << (low))
 #define	BF64_ENCODE(x, low, len)	(P2PHASE((x), 1ULL << (len)) << (low))
 
 #define	BF32_GET(x, low, len)		BF32_DECODE(x, low, len)
 #define	BF64_GET(x, low, len)		BF64_DECODE(x, low, len)
 
 #define	BF32_SET(x, low, len, val)	\
 	((x) ^= BF32_ENCODE((x >> low) ^ (val), low, len))
 #define	BF64_SET(x, low, len, val)	\
 	((x) ^= BF64_ENCODE((x >> low) ^ (val), low, len))
 
 #define	BF32_GET_SB(x, low, len, shift, bias)	\
 	((BF32_GET(x, low, len) + (bias)) << (shift))
 #define	BF64_GET_SB(x, low, len, shift, bias)	\
 	((BF64_GET(x, low, len) + (bias)) << (shift))
 
 #define	BF32_SET_SB(x, low, len, shift, bias, val)	\
 	BF32_SET(x, low, len, ((val) >> (shift)) - (bias))
 #define	BF64_SET_SB(x, low, len, shift, bias, val)	\
 	BF64_SET(x, low, len, ((val) >> (shift)) - (bias))
 
 /*
  * We currently support nine block sizes, from 512 bytes to 128K.
  * We could go higher, but the benefits are near-zero and the cost
  * of COWing a giant block to modify one byte would become excessive.
  */
 #define	SPA_MINBLOCKSHIFT	9
 #define	SPA_MAXBLOCKSHIFT	17
 #define	SPA_MINBLOCKSIZE	(1ULL << SPA_MINBLOCKSHIFT)
 #define	SPA_MAXBLOCKSIZE	(1ULL << SPA_MAXBLOCKSHIFT)
 
 #define	SPA_BLOCKSIZES		(SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
 
 /*
  * Size of block to hold the configuration data (a packed nvlist)
  */
 #define	SPA_CONFIG_BLOCKSIZE	(1ULL << 14)
 
 /*
  * The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
  * The ASIZE encoding should be at least 64 times larger (6 more bits)
  * to support up to 4-way RAID-Z mirror mode with worst-case gang block
  * overhead, three DVAs per bp, plus one more bit in case we do anything
  * else that expands the ASIZE.
  */
 #define	SPA_LSIZEBITS		16	/* LSIZE up to 32M (2^16 * 512)	*/
 #define	SPA_PSIZEBITS		16	/* PSIZE up to 32M (2^16 * 512)	*/
 #define	SPA_ASIZEBITS		24	/* ASIZE up to 64 times larger	*/
 
 /*
  * All SPA data is represented by 128-bit data virtual addresses (DVAs).
  * The members of the dva_t should be considered opaque outside the SPA.
  */
 typedef struct dva {
 	uint64_t	dva_word[2];
 } dva_t;
 
 /*
  * Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
  */
 typedef struct zio_cksum {
 	uint64_t	zc_word[4];
 } zio_cksum_t;
 
 /*
  * Each block is described by its DVAs, time of birth, checksum, etc.
  * The word-by-word, bit-by-bit layout of the blkptr is as follows:
  *
  *	64	56	48	40	32	24	16	8	0
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 0	|		vdev1		| GRID  |	  ASIZE		|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 1	|G|			 offset1				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 2	|		vdev2		| GRID  |	  ASIZE		|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 3	|G|			 offset2				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 4	|		vdev3		| GRID  |	  ASIZE		|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 5	|G|			 offset3				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 6	|BDX|lvl| type	| cksum | comp	|     PSIZE	|     LSIZE	|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 7	|			padding					|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 8	|			padding					|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * 9	|			physical birth txg			|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * a	|			logical birth txg			|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * b	|			fill count				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * c	|			checksum[0]				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * d	|			checksum[1]				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * e	|			checksum[2]				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  * f	|			checksum[3]				|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  *
  * Legend:
  *
  * vdev		virtual device ID
  * offset	offset into virtual device
  * LSIZE	logical size
  * PSIZE	physical size (after compression)
  * ASIZE	allocated size (including RAID-Z parity and gang block headers)
  * GRID		RAID-Z layout information (reserved for future use)
  * cksum	checksum function
  * comp		compression function
  * G		gang block indicator
  * B		byteorder (endianness)
  * D		dedup
  * X		unused
  * lvl		level of indirection
  * type		DMU object type
  * phys birth	txg of block allocation; zero if same as logical birth txg
  * log. birth	transaction group in which the block was logically born
  * fill count	number of non-zero blocks under this bp
  * checksum[4]	256-bit checksum of the data this bp describes
  */
 #define	SPA_BLKPTRSHIFT	7		/* blkptr_t is 128 bytes	*/
 #define	SPA_DVAS_PER_BP	3		/* Number of DVAs in a bp	*/
 
 typedef struct blkptr {
 	dva_t		blk_dva[SPA_DVAS_PER_BP]; /* Data Virtual Addresses */
 	uint64_t	blk_prop;	/* size, compression, type, etc	    */
 	uint64_t	blk_pad[2];	/* Extra space for the future	    */
 	uint64_t	blk_phys_birth;	/* txg when block was allocated	    */
 	uint64_t	blk_birth;	/* transaction group at birth	    */
 	uint64_t	blk_fill;	/* fill count			    */
 	zio_cksum_t	blk_cksum;	/* 256-bit checksum		    */
 } blkptr_t;
 
 /*
  * Macros to get and set fields in a bp or DVA.
  */
 #define	DVA_GET_ASIZE(dva)	\
 	BF64_GET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0)
 #define	DVA_SET_ASIZE(dva, x)	\
 	BF64_SET_SB((dva)->dva_word[0], 0, 24, SPA_MINBLOCKSHIFT, 0, x)
 
 #define	DVA_GET_GRID(dva)	BF64_GET((dva)->dva_word[0], 24, 8)
 #define	DVA_SET_GRID(dva, x)	BF64_SET((dva)->dva_word[0], 24, 8, x)
 
 #define	DVA_GET_VDEV(dva)	BF64_GET((dva)->dva_word[0], 32, 32)
 #define	DVA_SET_VDEV(dva, x)	BF64_SET((dva)->dva_word[0], 32, 32, x)
 
 #define	DVA_GET_OFFSET(dva)	\
 	BF64_GET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0)
 #define	DVA_SET_OFFSET(dva, x)	\
 	BF64_SET_SB((dva)->dva_word[1], 0, 63, SPA_MINBLOCKSHIFT, 0, x)
 
 #define	DVA_GET_GANG(dva)	BF64_GET((dva)->dva_word[1], 63, 1)
 #define	DVA_SET_GANG(dva, x)	BF64_SET((dva)->dva_word[1], 63, 1, x)
 
 #define	BP_GET_LSIZE(bp)	\
 	BF64_GET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1)
 #define	BP_SET_LSIZE(bp, x)	\
 	BF64_SET_SB((bp)->blk_prop, 0, 16, SPA_MINBLOCKSHIFT, 1, x)
 
 #define	BP_GET_PSIZE(bp)	\
 	BF64_GET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1)
 #define	BP_SET_PSIZE(bp, x)	\
 	BF64_SET_SB((bp)->blk_prop, 16, 16, SPA_MINBLOCKSHIFT, 1, x)
 
 #define	BP_GET_COMPRESS(bp)		BF64_GET((bp)->blk_prop, 32, 8)
 #define	BP_SET_COMPRESS(bp, x)		BF64_SET((bp)->blk_prop, 32, 8, x)
 
 #define	BP_GET_CHECKSUM(bp)		BF64_GET((bp)->blk_prop, 40, 8)
 #define	BP_SET_CHECKSUM(bp, x)		BF64_SET((bp)->blk_prop, 40, 8, x)
 
 #define	BP_GET_TYPE(bp)			BF64_GET((bp)->blk_prop, 48, 8)
 #define	BP_SET_TYPE(bp, x)		BF64_SET((bp)->blk_prop, 48, 8, x)
 
 #define	BP_GET_LEVEL(bp)		BF64_GET((bp)->blk_prop, 56, 5)
 #define	BP_SET_LEVEL(bp, x)		BF64_SET((bp)->blk_prop, 56, 5, x)
 
 #define	BP_GET_PROP_BIT_61(bp)		BF64_GET((bp)->blk_prop, 61, 1)
 #define	BP_SET_PROP_BIT_61(bp, x)	BF64_SET((bp)->blk_prop, 61, 1, x)
 
 #define	BP_GET_DEDUP(bp)		BF64_GET((bp)->blk_prop, 62, 1)
 #define	BP_SET_DEDUP(bp, x)		BF64_SET((bp)->blk_prop, 62, 1, x)
 
 #define	BP_GET_BYTEORDER(bp)		(0 - BF64_GET((bp)->blk_prop, 63, 1))
 #define	BP_SET_BYTEORDER(bp, x)		BF64_SET((bp)->blk_prop, 63, 1, x)
 
 #define	BP_PHYSICAL_BIRTH(bp)		\
 	((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
 
 #define	BP_SET_BIRTH(bp, logical, physical)	\
 {						\
 	(bp)->blk_birth = (logical);		\
 	(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
 }
 
 #define	BP_GET_ASIZE(bp)	\
 	(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
 		DVA_GET_ASIZE(&(bp)->blk_dva[2]))
 
 #define	BP_GET_UCSIZE(bp) \
 	((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
 	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
 
 #define	BP_GET_NDVAS(bp)	\
 	(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
 	!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
 	!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
 
 #define	BP_COUNT_GANG(bp)	\
 	(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
 	DVA_GET_GANG(&(bp)->blk_dva[1]) + \
 	DVA_GET_GANG(&(bp)->blk_dva[2]))
 
 #define	DVA_EQUAL(dva1, dva2)	\
 	((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
 	(dva1)->dva_word[0] == (dva2)->dva_word[0])
 
 #define	BP_EQUAL(bp1, bp2)	\
 	(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) &&	\
 	DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) &&	\
 	DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) &&	\
 	DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
 
 #define	ZIO_CHECKSUM_EQUAL(zc1, zc2) \
 	(0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
 	((zc1).zc_word[1] - (zc2).zc_word[1]) | \
 	((zc1).zc_word[2] - (zc2).zc_word[2]) | \
 	((zc1).zc_word[3] - (zc2).zc_word[3])))
 
 #define	DVA_IS_VALID(dva)	(DVA_GET_ASIZE(dva) != 0)
 
 #define	ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3)	\
 {						\
 	(zcp)->zc_word[0] = w0;			\
 	(zcp)->zc_word[1] = w1;			\
 	(zcp)->zc_word[2] = w2;			\
 	(zcp)->zc_word[3] = w3;			\
 }
 
 #define	BP_IDENTITY(bp)		(&(bp)->blk_dva[0])
 #define	BP_IS_GANG(bp)		DVA_GET_GANG(BP_IDENTITY(bp))
 #define	BP_IS_HOLE(bp)		((bp)->blk_birth == 0)
 
 /* BP_IS_RAIDZ(bp) assumes no block compression */
 #define	BP_IS_RAIDZ(bp)		(DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
 				BP_GET_PSIZE(bp))
 
 #define	BP_ZERO(bp)				\
 {						\
 	(bp)->blk_dva[0].dva_word[0] = 0;	\
 	(bp)->blk_dva[0].dva_word[1] = 0;	\
 	(bp)->blk_dva[1].dva_word[0] = 0;	\
 	(bp)->blk_dva[1].dva_word[1] = 0;	\
 	(bp)->blk_dva[2].dva_word[0] = 0;	\
 	(bp)->blk_dva[2].dva_word[1] = 0;	\
 	(bp)->blk_prop = 0;			\
 	(bp)->blk_pad[0] = 0;			\
 	(bp)->blk_pad[1] = 0;			\
 	(bp)->blk_phys_birth = 0;		\
 	(bp)->blk_birth = 0;			\
 	(bp)->blk_fill = 0;			\
 	ZIO_SET_CHECKSUM(&(bp)->blk_cksum, 0, 0, 0, 0);	\
 }
 
 /*
  * Note: the byteorder is either 0 or -1, both of which are palindromes.
  * This simplifies the endianness handling a bit.
  */
 #ifdef _BIG_ENDIAN
 #define	ZFS_HOST_BYTEORDER	(0ULL)
 #else
 #define	ZFS_HOST_BYTEORDER	(-1ULL)
 #endif
 
 #define	BP_SHOULD_BYTESWAP(bp)	(BP_GET_BYTEORDER(bp) != ZFS_HOST_BYTEORDER)
 
 #define	BP_SPRINTF_LEN	320
 
 /*
  * This macro allows code sharing between zfs, libzpool, and mdb.
  * 'func' is either snprintf() or mdb_snprintf().
  * 'ws' (whitespace) can be ' ' for single-line format, '\n' for multi-line.
  */
 #define	SPRINTF_BLKPTR(func, ws, buf, bp, type, checksum, compress)	\
 {									\
 	static const char *copyname[] =					\
 	    { "zero", "single", "double", "triple" };			\
 	int size = BP_SPRINTF_LEN;					\
 	int len = 0;							\
 	int copies = 0;							\
 	int d;								\
 									\
 	if (bp == NULL) {						\
 		len = func(buf + len, size - len, "<NULL>");		\
 	} else if (BP_IS_HOLE(bp)) {					\
 		len = func(buf + len, size - len, "<hole>");		\
 	} else {							\
 		for (d = 0; d < BP_GET_NDVAS(bp); d++) {		\
 			const dva_t *dva = &bp->blk_dva[d];		\
 			if (DVA_IS_VALID(dva))				\
 				copies++;				\
 			len += func(buf + len, size - len,		\
 			    "DVA[%d]=<%llu:%llx:%llx>%c", d,		\
 			    (u_longlong_t)DVA_GET_VDEV(dva),		\
 			    (u_longlong_t)DVA_GET_OFFSET(dva),		\
 			    (u_longlong_t)DVA_GET_ASIZE(dva),		\
 			    ws);					\
 		}							\
 		if (BP_IS_GANG(bp) &&					\
 		    DVA_GET_ASIZE(&bp->blk_dva[2]) <=			\
 		    DVA_GET_ASIZE(&bp->blk_dva[1]) / 2)			\
 			copies--;					\
 		len += func(buf + len, size - len,			\
 		    "[L%llu %s] %s %s %s %s %s %s%c"			\
 		    "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c"	\
 		    "cksum=%llx:%llx:%llx:%llx",			\
 		    (u_longlong_t)BP_GET_LEVEL(bp),			\
 		    type,						\
 		    checksum,						\
 		    compress,						\
 		    BP_GET_BYTEORDER(bp) == 0 ? "BE" : "LE",		\
 		    BP_IS_GANG(bp) ? "gang" : "contiguous",		\
 		    BP_GET_DEDUP(bp) ? "dedup" : "unique",		\
 		    copyname[copies],					\
 		    ws,							\
 		    (u_longlong_t)BP_GET_LSIZE(bp),			\
 		    (u_longlong_t)BP_GET_PSIZE(bp),			\
 		    (u_longlong_t)bp->blk_birth,			\
 		    (u_longlong_t)BP_PHYSICAL_BIRTH(bp),		\
 		    (u_longlong_t)bp->blk_fill,				\
 		    ws,							\
 		    (u_longlong_t)bp->blk_cksum.zc_word[0],		\
 		    (u_longlong_t)bp->blk_cksum.zc_word[1],		\
 		    (u_longlong_t)bp->blk_cksum.zc_word[2],		\
 		    (u_longlong_t)bp->blk_cksum.zc_word[3]);		\
 	}								\
 	ASSERT(len < size);						\
 }
 
 #include <sys/dmu.h>
 
 #define	BP_GET_BUFC_TYPE(bp)						\
 	(((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \
 	ARC_BUFC_METADATA : ARC_BUFC_DATA)
 
 typedef enum spa_import_type {
 	SPA_IMPORT_EXISTING,
 	SPA_IMPORT_ASSEMBLE
 } spa_import_type_t;
 
 /* state manipulation functions */
 extern int spa_open(const char *pool, spa_t **, void *tag);
 extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
     nvlist_t *policy, nvlist_t **config);
 extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
     size_t buflen);
 extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
-    const char *history_str, nvlist_t *zplprops);
+    nvlist_t *zplprops);
 extern int spa_import_rootpool(char *devpath, char *devid);
 extern int spa_import(const char *pool, nvlist_t *config, nvlist_t *props,
     uint64_t flags);
 extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
 extern int spa_destroy(char *pool);
 extern int spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
     boolean_t hardforce);
 extern int spa_reset(char *pool);
 extern void spa_async_request(spa_t *spa, int flag);
 extern void spa_async_unrequest(spa_t *spa, int flag);
 extern void spa_async_suspend(spa_t *spa);
 extern void spa_async_resume(spa_t *spa);
 extern spa_t *spa_inject_addref(char *pool);
 extern void spa_inject_delref(spa_t *spa);
 extern void spa_scan_stat_init(spa_t *spa);
 extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
 
 #define	SPA_ASYNC_CONFIG_UPDATE	0x01
 #define	SPA_ASYNC_REMOVE	0x02
 #define	SPA_ASYNC_PROBE		0x04
 #define	SPA_ASYNC_RESILVER_DONE	0x08
 #define	SPA_ASYNC_RESILVER	0x10
 #define	SPA_ASYNC_AUTOEXPAND	0x20
 #define	SPA_ASYNC_REMOVE_DONE	0x40
 #define	SPA_ASYNC_REMOVE_STOP	0x80
 
 /*
  * Controls the behavior of spa_vdev_remove().
  */
 #define	SPA_REMOVE_UNSPARE	0x01
 #define	SPA_REMOVE_DONE		0x02
 
 /* device manipulation */
 extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
 extern int spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot,
     int replacing);
 extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
     int replace_done);
 extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
 extern boolean_t spa_vdev_remove_active(spa_t *spa);
 extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
 extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
 extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
     nvlist_t *props, boolean_t exp);
 
 /* spare state (which is global across all pools) */
 extern void spa_spare_add(vdev_t *vd);
 extern void spa_spare_remove(vdev_t *vd);
 extern boolean_t spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt);
 extern void spa_spare_activate(vdev_t *vd);
 
 /* L2ARC state (which is global across all pools) */
 extern void spa_l2cache_add(vdev_t *vd);
 extern void spa_l2cache_remove(vdev_t *vd);
 extern boolean_t spa_l2cache_exists(uint64_t guid, uint64_t *pool);
 extern void spa_l2cache_activate(vdev_t *vd);
 extern void spa_l2cache_drop(spa_t *spa);
 
 /* scanning */
 extern int spa_scan(spa_t *spa, pool_scan_func_t func);
 extern int spa_scan_stop(spa_t *spa);
 
 /* spa syncing */
 extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
 extern void spa_sync_allpools(void);
 
 extern int zfs_sync_pass_deferred_free;
 
 /* spa namespace global mutex */
 extern kmutex_t spa_namespace_lock;
 
 /*
  * SPA configuration functions in spa_config.c
  */
 
 #define	SPA_CONFIG_UPDATE_POOL	0
 #define	SPA_CONFIG_UPDATE_VDEVS	1
 
 extern void spa_config_sync(spa_t *, boolean_t, boolean_t);
 extern void spa_config_load(void);
 extern nvlist_t *spa_all_configs(uint64_t *);
 extern void spa_config_set(spa_t *spa, nvlist_t *config);
 extern nvlist_t *spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg,
     int getstats);
 extern void spa_config_update(spa_t *spa, int what);
 
 /*
  * Miscellaneous SPA routines in spa_misc.c
  */
 
 /* Namespace manipulation */
 extern spa_t *spa_lookup(const char *name);
 extern spa_t *spa_add(const char *name, nvlist_t *config, const char *altroot);
 extern void spa_remove(spa_t *spa);
 extern spa_t *spa_next(spa_t *prev);
 
 /* Refcount functions */
 extern void spa_open_ref(spa_t *spa, void *tag);
 extern void spa_close(spa_t *spa, void *tag);
 extern boolean_t spa_refcount_zero(spa_t *spa);
 
 #define	SCL_NONE	0x00
 #define	SCL_CONFIG	0x01
 #define	SCL_STATE	0x02
 #define	SCL_L2ARC	0x04		/* hack until L2ARC 2.0 */
 #define	SCL_ALLOC	0x08
 #define	SCL_ZIO		0x10
 #define	SCL_FREE	0x20
 #define	SCL_VDEV	0x40
 #define	SCL_LOCKS	7
 #define	SCL_ALL		((1 << SCL_LOCKS) - 1)
 #define	SCL_STATE_ALL	(SCL_STATE | SCL_L2ARC | SCL_ZIO)
 
 /* Pool configuration locks */
 extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
 extern void spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw);
 extern void spa_config_exit(spa_t *spa, int locks, void *tag);
 extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
 
 /* Pool vdev add/remove lock */
 extern uint64_t spa_vdev_enter(spa_t *spa);
 extern uint64_t spa_vdev_config_enter(spa_t *spa);
 extern void spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg,
     int error, char *tag);
 extern int spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error);
 
 /* Pool vdev state change lock */
 extern void spa_vdev_state_enter(spa_t *spa, int oplock);
 extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
 
 /* Log state */
 typedef enum spa_log_state {
 	SPA_LOG_UNKNOWN = 0,	/* unknown log state */
 	SPA_LOG_MISSING,	/* missing log(s) */
 	SPA_LOG_CLEAR,		/* clear the log(s) */
 	SPA_LOG_GOOD,		/* log(s) are good */
 } spa_log_state_t;
 
 extern spa_log_state_t spa_get_log_state(spa_t *spa);
 extern void spa_set_log_state(spa_t *spa, spa_log_state_t state);
 extern int spa_offline_log(spa_t *spa);
 
 /* Log claim callback */
 extern void spa_claim_notify(zio_t *zio);
 extern void spa_deadman(void *);
 
 /* Accessor functions */
 extern boolean_t spa_shutting_down(spa_t *spa);
 extern struct dsl_pool *spa_get_dsl(spa_t *spa);
 extern boolean_t spa_is_initializing(spa_t *spa);
 extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
 extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
 extern void spa_altroot(spa_t *, char *, size_t);
 extern int spa_sync_pass(spa_t *spa);
 extern char *spa_name(spa_t *spa);
 extern uint64_t spa_guid(spa_t *spa);
 extern uint64_t spa_load_guid(spa_t *spa);
 extern uint64_t spa_last_synced_txg(spa_t *spa);
 extern uint64_t spa_first_txg(spa_t *spa);
 extern uint64_t spa_syncing_txg(spa_t *spa);
 extern uint64_t spa_version(spa_t *spa);
 extern pool_state_t spa_state(spa_t *spa);
 extern spa_load_state_t spa_load_state(spa_t *spa);
 extern uint64_t spa_freeze_txg(spa_t *spa);
 extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
 extern uint64_t spa_get_dspace(spa_t *spa);
 extern void spa_update_dspace(spa_t *spa);
 extern uint64_t spa_version(spa_t *spa);
 extern boolean_t spa_deflate(spa_t *spa);
 extern metaslab_class_t *spa_normal_class(spa_t *spa);
 extern metaslab_class_t *spa_log_class(spa_t *spa);
 extern int spa_max_replication(spa_t *spa);
 extern int spa_prev_software_version(spa_t *spa);
 extern int spa_busy(void);
 extern uint8_t spa_get_failmode(spa_t *spa);
 extern boolean_t spa_suspended(spa_t *spa);
 extern uint64_t spa_bootfs(spa_t *spa);
 extern uint64_t spa_delegation(spa_t *spa);
 extern objset_t *spa_meta_objset(spa_t *spa);
 extern uint64_t spa_deadman_synctime(spa_t *spa);
 
 /* Miscellaneous support routines */
 extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
 extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
 extern int spa_rename(const char *oldname, const char *newname);
 extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
 extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
 extern char *spa_strdup(const char *);
 extern void spa_strfree(char *);
 extern uint64_t spa_get_random(uint64_t range);
 extern uint64_t spa_generate_guid(spa_t *spa);
 extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
 extern void spa_freeze(spa_t *spa);
 extern int spa_change_guid(spa_t *spa);
 extern void spa_upgrade(spa_t *spa, uint64_t version);
 extern void spa_evict_all(void);
 extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
     boolean_t l2cache);
 extern boolean_t spa_has_spare(spa_t *, uint64_t guid);
 extern uint64_t dva_get_dsize_sync(spa_t *spa, const dva_t *dva);
 extern uint64_t bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp);
 extern uint64_t bp_get_dsize(spa_t *spa, const blkptr_t *bp);
 extern boolean_t spa_has_slogs(spa_t *spa);
 extern boolean_t spa_is_root(spa_t *spa);
 extern boolean_t spa_writeable(spa_t *spa);
 
 extern int spa_mode(spa_t *spa);
 extern uint64_t strtonum(const char *str, char **nptr);
 
-/* history logging */
-typedef enum history_log_type {
-	LOG_CMD_POOL_CREATE,
-	LOG_CMD_NORMAL,
-	LOG_INTERNAL
-} history_log_type_t;
-
-typedef struct history_arg {
-	char *ha_history_str;
-	history_log_type_t ha_log_type;
-	history_internal_events_t ha_event;
-	char *ha_zone;
-	uid_t ha_uid;
-} history_arg_t;
-
 extern char *spa_his_ievent_table[];
 
 extern void spa_history_create_obj(spa_t *spa, dmu_tx_t *tx);
 extern int spa_history_get(spa_t *spa, uint64_t *offset, uint64_t *len_read,
     char *his_buf);
-extern int spa_history_log(spa_t *spa, const char *his_buf,
-    history_log_type_t what);
-extern void spa_history_log_internal(history_internal_events_t event,
-    spa_t *spa, dmu_tx_t *tx, const char *fmt, ...);
-extern void spa_history_log_version(spa_t *spa, history_internal_events_t evt);
+extern int spa_history_log(spa_t *spa, const char *his_buf);
+extern int spa_history_log_nvl(spa_t *spa, nvlist_t *nvl);
+extern void spa_history_log_version(spa_t *spa, const char *operation);
+extern void spa_history_log_internal(spa_t *spa, const char *operation,
+    dmu_tx_t *tx, const char *fmt, ...);
+extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op,
+    dmu_tx_t *tx, const char *fmt, ...);
+extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
+    dmu_tx_t *tx, const char *fmt, ...);
 
 /* error handling */
 struct zbookmark;
 extern void spa_log_error(spa_t *spa, zio_t *zio);
 extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
     zio_t *zio, uint64_t stateoroffset, uint64_t length);
 extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
 extern void zfs_post_state_change(spa_t *spa, vdev_t *vd);
 extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
 extern uint64_t spa_get_errlog_size(spa_t *spa);
 extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
 extern void spa_errlog_rotate(spa_t *spa);
 extern void spa_errlog_drain(spa_t *spa);
 extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
 extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
 
 /* vdev cache */
 extern void vdev_cache_stat_init(void);
 extern void vdev_cache_stat_fini(void);
 
 /* Initialization and termination */
 extern void spa_init(int flags);
 extern void spa_fini(void);
 extern void spa_boot_init(void);
 
 /* properties */
 extern int spa_prop_set(spa_t *spa, nvlist_t *nvp);
 extern int spa_prop_get(spa_t *spa, nvlist_t **nvp);
 extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
 extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
 
 /* asynchronous event notification */
 extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
 
 #ifdef ZFS_DEBUG
 #define	dprintf_bp(bp, fmt, ...) do {					\
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) {				\
 	char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_PUSHPAGE);	\
 	sprintf_blkptr(__blkbuf, (bp));					\
 	dprintf(fmt " %s\n", __VA_ARGS__, __blkbuf);			\
 	kmem_free(__blkbuf, BP_SPRINTF_LEN);				\
 	} \
 _NOTE(CONSTCOND) } while (0)
 #else
 #define	dprintf_bp(bp, fmt, ...)
 #endif
 
 extern boolean_t spa_debug_enabled(spa_t *spa);
 #define	spa_dbgmsg(spa, ...)			\
 {						\
 	if (spa_debug_enabled(spa))		\
 		zfs_dbgmsg(__VA_ARGS__);	\
 }
 
 extern int spa_mode_global;			/* mode, e.g. FREAD | FWRITE */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_SPA_H */
diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
index 4b342595f335..599b97a9b6c9 100644
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@@ -1,658 +1,659 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /*
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_CONTEXT_H
 #define	_SYS_ZFS_CONTEXT_H
 
 #ifdef __KERNEL__
 
 #include <sys/note.h>
 #include <sys/types.h>
 #include <sys/t_lock.h>
 #include <sys/atomic.h>
 #include <sys/sysmacros.h>
 #include <sys/bitmap.h>
 #include <sys/cmn_err.h>
 #include <sys/kmem.h>
 #include <sys/taskq.h>
 #include <sys/buf.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/cpuvar.h>
 #include <sys/kobj.h>
 #include <sys/conf.h>
 #include <sys/disp.h>
 #include <sys/debug.h>
 #include <sys/random.h>
 #include <sys/byteorder.h>
 #include <sys/systm.h>
 #include <sys/list.h>
 #include <sys/uio_impl.h>
 #include <sys/dirent.h>
 #include <sys/time.h>
 #include <vm/seg_kmem.h>
 #include <sys/zone.h>
 #include <sys/zfs_debug.h>
 #include <sys/fm/fs/zfs.h>
 #include <sys/sunddi.h>
 #include <sys/ctype.h>
 #include <sys/disp.h>
 #include <linux/dcache_compat.h>
 
 #else /* _KERNEL */
 
 #define	_SYS_MUTEX_H
 #define	_SYS_RWLOCK_H
 #define	_SYS_CONDVAR_H
 #define	_SYS_SYSTM_H
 #define	_SYS_T_LOCK_H
 #define	_SYS_VNODE_H
 #define	_SYS_VFS_H
 #define	_SYS_SUNDDI_H
 #define	_SYS_CALLB_H
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>
 #include <stdarg.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <string.h>
 #include <strings.h>
 #include <pthread.h>
 #include <assert.h>
 #include <alloca.h>
 #include <umem.h>
 #include <limits.h>
 #include <atomic.h>
 #include <dirent.h>
 #include <time.h>
 #include <ctype.h>
 #include <sys/note.h>
 #include <sys/types.h>
 #include <sys/cred.h>
 #include <sys/sysmacros.h>
 #include <sys/bitmap.h>
 #include <sys/resource.h>
 #include <sys/byteorder.h>
 #include <sys/list.h>
 #include <sys/uio.h>
 #include <sys/zfs_debug.h>
 #include <sys/sdt.h>
 #include <sys/kstat.h>
 #include <sys/u8_textprep.h>
 #include <sys/fm/fs/zfs.h>
 #include <sys/sunddi.h>
 #include <sys/debug.h>
 
 /*
  * Stack
  */
 
 #define  noinline	__attribute__((noinline))
 
 /*
  * Debugging
  */
 
 /*
  * Note that we are not using the debugging levels.
  */
 
 #define	CE_CONT		0	/* continuation		*/
 #define	CE_NOTE		1	/* notice		*/
 #define	CE_WARN		2	/* warning		*/
 #define	CE_PANIC	3	/* panic		*/
 #define	CE_IGNORE	4	/* print nothing	*/
 
 extern int aok;
 
 /*
  * ZFS debugging
  */
 
 extern void dprintf_setup(int *argc, char **argv);
 extern void __dprintf(const char *file, const char *func,
     int line, const char *fmt, ...);
 extern void cmn_err(int, const char *, ...);
 extern void vcmn_err(int, const char *, __va_list);
 extern void panic(const char *, ...);
 extern void vpanic(const char *, __va_list);
 
 #define	fm_panic	panic
 
 /*
  * DTrace SDT probes have different signatures in userland than they do in
  * kernel.  If they're being used in kernel code, re-define them out of
  * existence for their counterparts in libzpool.
  */
 
 #ifdef DTRACE_PROBE
 #undef	DTRACE_PROBE
 #define	DTRACE_PROBE(a)	((void)0)
 #endif	/* DTRACE_PROBE */
 
 #ifdef DTRACE_PROBE1
 #undef	DTRACE_PROBE1
 #define	DTRACE_PROBE1(a, b, c)	((void)0)
 #endif	/* DTRACE_PROBE1 */
 
 #ifdef DTRACE_PROBE2
 #undef	DTRACE_PROBE2
 #define	DTRACE_PROBE2(a, b, c, d, e)	((void)0)
 #endif	/* DTRACE_PROBE2 */
 
 #ifdef DTRACE_PROBE3
 #undef	DTRACE_PROBE3
 #define	DTRACE_PROBE3(a, b, c, d, e, f, g)	((void)0)
 #endif	/* DTRACE_PROBE3 */
 
 #ifdef DTRACE_PROBE4
 #undef	DTRACE_PROBE4
 #define	DTRACE_PROBE4(a, b, c, d, e, f, g, h, i)	((void)0)
 #endif	/* DTRACE_PROBE4 */
 
 /*
  * Threads
  */
 #define	TS_MAGIC		0x72f158ab4261e538ull
 #define	TS_RUN			0x00000002
 #ifdef __linux__
 #define	STACK_SIZE		8192	/* Linux x86 and amd64 */
 #else
 #define	STACK_SIZE		24576	/* Solaris */
 #endif
 
 /* in libzpool, p0 exists only to have its address taken */
 typedef struct proc {
 	uintptr_t	this_is_never_used_dont_dereference_it;
 } proc_t;
 
 extern struct proc p0;
 #define	curproc		(&p0)
 
 typedef void (*thread_func_t)(void *);
 typedef void (*thread_func_arg_t)(void *);
 typedef pthread_t kt_did_t;
 
 typedef struct kthread {
 	kt_did_t	t_tid;
 	thread_func_t	t_func;
 	void *		t_arg;
 } kthread_t;
 
 #define	tsd_get(key)			pthread_getspecific(key)
 #define	tsd_set(key, val)		pthread_setspecific(key, val)
 #define	curthread			zk_thread_current()
 #define	thread_exit			zk_thread_exit
 #define	thread_create(stk, stksize, func, arg, len, pp, state, pri)	\
 	zk_thread_create(stk, stksize, (thread_func_t)func, arg,	\
 			 len, NULL, state, pri, PTHREAD_CREATE_DETACHED)
 #define	thread_join(t)			zk_thread_join(t)
 #define	newproc(f,a,cid,pri,ctp,pid)	(ENOSYS)
 
 extern kthread_t *zk_thread_current(void);
 extern void zk_thread_exit(void);
 extern kthread_t *zk_thread_create(caddr_t stk, size_t  stksize,
 	thread_func_t func, void *arg, size_t len,
 	proc_t *pp, int state, pri_t pri, int detachstate);
 extern void zk_thread_join(kt_did_t tid);
 
 #define	kpreempt_disable()	((void)0)
 #define	kpreempt_enable()	((void)0)
 
 #define	PS_NONE		-1
 
 #define	issig(why)	(FALSE)
 #define	ISSIG(thr, why)	(FALSE)
 
 /*
  * Mutexes
  */
 #define	MTX_MAGIC	0x9522f51362a6e326ull
 #define	MTX_INIT	((void *)NULL)
 #define	MTX_DEST	((void *)-1UL)
 
 typedef struct kmutex {
 	void		*m_owner;
 	uint64_t	m_magic;
 	pthread_mutex_t	m_lock;
 } kmutex_t;
 
 #define	MUTEX_DEFAULT	0
 #define MUTEX_HELD(m)	((m)->m_owner == curthread)
 #define	MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))
 
 extern void mutex_init(kmutex_t *mp, char *name, int type, void *cookie);
 extern void mutex_destroy(kmutex_t *mp);
 extern void mutex_enter(kmutex_t *mp);
 extern void mutex_exit(kmutex_t *mp);
 extern int mutex_tryenter(kmutex_t *mp);
 extern void *mutex_owner(kmutex_t *mp);
 extern int mutex_held(kmutex_t *mp);
 
 /*
  * RW locks
  */
 #define	RW_MAGIC	0x4d31fb123648e78aull
 #define	RW_INIT		((void *)NULL)
 #define	RW_DEST		((void *)-1UL)
 
 typedef struct krwlock {
 	void			*rw_owner;
 	void			*rw_wr_owner;
 	uint64_t		rw_magic;
 	pthread_rwlock_t	rw_lock;
 	uint_t			rw_readers;
 } krwlock_t;
 
 typedef int krw_t;
 
 #define	RW_READER	0
 #define	RW_WRITER	1
 #define RW_DEFAULT	RW_READER
 
 #define	RW_READ_HELD(x)		((x)->rw_readers > 0)
 #define	RW_WRITE_HELD(x)	((x)->rw_wr_owner == curthread)
 #define	RW_LOCK_HELD(x)		(RW_READ_HELD(x) || RW_WRITE_HELD(x))
 
 extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
 extern void rw_destroy(krwlock_t *rwlp);
 extern void rw_enter(krwlock_t *rwlp, krw_t rw);
 extern int rw_tryenter(krwlock_t *rwlp, krw_t rw);
 extern int rw_tryupgrade(krwlock_t *rwlp);
 extern void rw_exit(krwlock_t *rwlp);
 #define	rw_downgrade(rwlp) do { } while (0)
 
 extern uid_t crgetuid(cred_t *cr);
+extern uid_t crgetruid(cred_t *cr);
 extern gid_t crgetgid(cred_t *cr);
 extern int crgetngroups(cred_t *cr);
 extern gid_t *crgetgroups(cred_t *cr);
 
 /*
  * Condition variables
  */
 #define CV_MAGIC	0xd31ea9a83b1b30c4ull
 
 typedef struct kcondvar {
 	uint64_t		cv_magic;
 	pthread_cond_t		cv;
 } kcondvar_t;
 
 #define	CV_DEFAULT	0
 
 extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
 extern void cv_destroy(kcondvar_t *cv);
 extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
 extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
 extern void cv_signal(kcondvar_t *cv);
 extern void cv_broadcast(kcondvar_t *cv);
 #define cv_timedwait_interruptible(cv, mp, at)	cv_timedwait(cv, mp, at)
 #define cv_wait_interruptible(cv, mp)		cv_wait(cv, mp)
 #define cv_wait_io(cv, mp)			cv_wait(cv, mp)
 
 /*
  * kstat creation, installation and deletion
  */
 extern kstat_t *kstat_create(char *, int,
     char *, char *, uchar_t, ulong_t, uchar_t);
 extern void kstat_install(kstat_t *);
 extern void kstat_delete(kstat_t *);
 
 /*
  * Kernel memory
  */
 #define	KM_SLEEP		UMEM_NOFAIL
 #define	KM_PUSHPAGE		KM_SLEEP
 #define	KM_NOSLEEP		UMEM_DEFAULT
 #define	KM_NODEBUG		0x0
 #define	KMC_NODEBUG		UMC_NODEBUG
 #define	KMC_KMEM		0x0
 #define	KMC_VMEM		0x0
 #define	kmem_alloc(_s, _f)	umem_alloc(_s, _f)
 #define	kmem_zalloc(_s, _f)	umem_zalloc(_s, _f)
 #define	kmem_free(_b, _s)	umem_free(_b, _s)
 #define	vmem_alloc(_s, _f)	kmem_alloc(_s, _f)
 #define	vmem_zalloc(_s, _f)	kmem_zalloc(_s, _f)
 #define	vmem_free(_b, _s)	kmem_free(_b, _s)
 #define	kmem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i) \
 	umem_cache_create(_a, _b, _c, _d, _e, _f, _g, _h, _i)
 #define	kmem_cache_destroy(_c)	umem_cache_destroy(_c)
 #define	kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
 #define	kmem_cache_free(_c, _b)	umem_cache_free(_c, _b)
 #define	kmem_debugging()	0
 #define	kmem_cache_reap_now(_c)		/* nothing */
 #define	kmem_cache_set_move(_c, _cb)	/* nothing */
 #define	POINTER_INVALIDATE(_pp)		/* nothing */
 #define	POINTER_IS_VALID(_p)	0
 
 typedef umem_cache_t kmem_cache_t;
 
 typedef enum kmem_cbrc {
 	KMEM_CBRC_YES,
 	KMEM_CBRC_NO,
 	KMEM_CBRC_LATER,
 	KMEM_CBRC_DONT_NEED,
 	KMEM_CBRC_DONT_KNOW
 } kmem_cbrc_t;
 
 /*
  * Task queues
  */
 typedef struct taskq taskq_t;
 typedef uintptr_t taskqid_t;
 typedef void (task_func_t)(void *);
 
 typedef struct taskq_ent {
 	struct taskq_ent	*tqent_next;
 	struct taskq_ent	*tqent_prev;
 	task_func_t		*tqent_func;
 	void			*tqent_arg;
 	uintptr_t		tqent_flags;
 } taskq_ent_t;
 
 #define	TQENT_FLAG_PREALLOC	0x1	/* taskq_dispatch_ent used */
 
 #define	TASKQ_PREPOPULATE	0x0001
 #define	TASKQ_CPR_SAFE		0x0002	/* Use CPR safe protocol */
 #define	TASKQ_DYNAMIC		0x0004	/* Use dynamic thread scheduling */
 #define	TASKQ_THREADS_CPU_PCT	0x0008	/* Scale # threads by # cpus */
 #define	TASKQ_DC_BATCH		0x0010	/* Mark threads as batch */
 
 #define	TQ_SLEEP	KM_SLEEP	/* Can block for memory */
 #define	TQ_NOSLEEP	KM_NOSLEEP	/* cannot block for memory; may fail */
 #define	TQ_PUSHPAGE	KM_PUSHPAGE	/* Cannot perform I/O */
 #define	TQ_NOQUEUE	0x02		/* Do not enqueue if can't dispatch */
 #define	TQ_FRONT	0x08		/* Queue in front */
 
 extern taskq_t *system_taskq;
 
 extern taskq_t	*taskq_create(const char *, int, pri_t, int, int, uint_t);
 #define	taskq_create_proc(a, b, c, d, e, p, f) \
 	    (taskq_create(a, b, c, d, e, f))
 #define	taskq_create_sysdc(a, b, d, e, p, dc, f) \
 	    (taskq_create(a, b, maxclsyspri, d, e, f))
 extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
 extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *, uint_t,
     clock_t);
 extern void	taskq_dispatch_ent(taskq_t *, task_func_t, void *, uint_t,
     taskq_ent_t *);
 extern int	taskq_empty_ent(taskq_ent_t *);
 extern void	taskq_init_ent(taskq_ent_t *);
 extern void	taskq_destroy(taskq_t *);
 extern void	taskq_wait(taskq_t *);
 extern void	taskq_wait_id(taskq_t *, taskqid_t);
 extern int	taskq_member(taskq_t *, kthread_t *);
 extern int	taskq_cancel_id(taskq_t *, taskqid_t);
 extern void	system_taskq_init(void);
 extern void	system_taskq_fini(void);
 
 #define	XVA_MAPSIZE	3
 #define	XVA_MAGIC	0x78766174
 
 /*
  * vnodes
  */
 typedef struct vnode {
 	uint64_t	v_size;
 	int		v_fd;
 	char		*v_path;
 } vnode_t;
 
 #define	AV_SCANSTAMP_SZ	32		/* length of anti-virus scanstamp */
 
 typedef struct xoptattr {
 	timestruc_t	xoa_createtime;	/* Create time of file */
 	uint8_t		xoa_archive;
 	uint8_t		xoa_system;
 	uint8_t		xoa_readonly;
 	uint8_t		xoa_hidden;
 	uint8_t		xoa_nounlink;
 	uint8_t		xoa_immutable;
 	uint8_t		xoa_appendonly;
 	uint8_t		xoa_nodump;
 	uint8_t		xoa_settable;
 	uint8_t		xoa_opaque;
 	uint8_t		xoa_av_quarantined;
 	uint8_t		xoa_av_modified;
 	uint8_t		xoa_av_scanstamp[AV_SCANSTAMP_SZ];
 	uint8_t		xoa_reparse;
 	uint8_t		xoa_offline;
 	uint8_t		xoa_sparse;
 } xoptattr_t;
 
 typedef struct vattr {
 	uint_t		va_mask;	/* bit-mask of attributes */
 	u_offset_t	va_size;	/* file size in bytes */
 } vattr_t;
 
 
 typedef struct xvattr {
 	vattr_t		xva_vattr;	/* Embedded vattr structure */
 	uint32_t	xva_magic;	/* Magic Number */
 	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
 	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
 	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
 	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
 	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
 } xvattr_t;
 
 typedef struct vsecattr {
 	uint_t		vsa_mask;	/* See below */
 	int		vsa_aclcnt;	/* ACL entry count */
 	void		*vsa_aclentp;	/* pointer to ACL entries */
 	int		vsa_dfaclcnt;	/* default ACL entry count */
 	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
 	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
 } vsecattr_t;
 
 #define	AT_TYPE		0x00001
 #define	AT_MODE		0x00002
 #define	AT_UID		0x00004
 #define	AT_GID		0x00008
 #define	AT_FSID		0x00010
 #define	AT_NODEID	0x00020
 #define	AT_NLINK	0x00040
 #define	AT_SIZE		0x00080
 #define	AT_ATIME	0x00100
 #define	AT_MTIME	0x00200
 #define	AT_CTIME	0x00400
 #define	AT_RDEV		0x00800
 #define	AT_BLKSIZE	0x01000
 #define	AT_NBLOCKS	0x02000
 #define	AT_SEQ		0x08000
 #define	AT_XVATTR	0x10000
 
 #define	CRCREAT		0
 
 extern int fop_getattr(vnode_t *vp, vattr_t *vap);
 
 #define	VOP_CLOSE(vp, f, c, o, cr, ct)	vn_close(vp)
 #define	VOP_PUTPAGE(vp, of, sz, fl, cr, ct)	0
 #define	VOP_GETATTR(vp, vap, fl, cr, ct)  fop_getattr((vp), (vap));
 
 #define	VOP_FSYNC(vp, f, cr, ct)	fsync((vp)->v_fd)
 
 #define	VN_RELE(vp)	vn_close(vp)
 
 extern int vn_open(char *path, int x1, int oflags, int mode, vnode_t **vpp,
     int x2, int x3);
 extern int vn_openat(char *path, int x1, int oflags, int mode, vnode_t **vpp,
     int x2, int x3, vnode_t *vp, int fd);
 extern int vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len,
     offset_t offset, int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp);
 extern void vn_close(vnode_t *vp);
 
 #define	vn_remove(path, x1, x2)		remove(path)
 #define	vn_rename(from, to, seg)	rename((from), (to))
 #define	vn_is_readonly(vp)		B_FALSE
 
 extern vnode_t *rootdir;
 
 #include <sys/file.h>		/* for FREAD, FWRITE, etc */
 
 /*
  * Random stuff
  */
 #define	ddi_get_lbolt()		(gethrtime() >> 23)
 #define	ddi_get_lbolt64()	(gethrtime() >> 23)
 #define	hz	119	/* frequency when using gethrtime() >> 23 for lbolt */
 
 extern void delay(clock_t ticks);
 
 #define	SEC_TO_TICK(sec)	((sec) * hz)
 #define	MSEC_TO_TICK(msec)	((msec) / (MILLISEC / hz))
 #define	USEC_TO_TICK(usec)	((usec) / (MICROSEC / hz))
 #define	NSEC_TO_TICK(usec)	((usec) / (NANOSEC / hz))
 
 #define	gethrestime_sec() time(NULL)
 #define	gethrestime(t) \
 	do {\
 		(t)->tv_sec = gethrestime_sec();\
 		(t)->tv_nsec = 0;\
 	} while (0);
 
 #define	max_ncpus	64
 
 #define	minclsyspri	60
 #define	maxclsyspri	99
 
 #define	CPU_SEQID	(pthread_self() & (max_ncpus - 1))
 
 #define	kcred		NULL
 #define	CRED()		NULL
 
 #define	ptob(x)		((x) * PAGESIZE)
 
 extern uint64_t physmem;
 
 extern int highbit(ulong_t i);
 extern int random_get_bytes(uint8_t *ptr, size_t len);
 extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
 
 extern void kernel_init(int);
 extern void kernel_fini(void);
 
 struct spa;
 extern void nicenum(uint64_t num, char *buf);
 extern void show_pool_stats(struct spa *);
 
 typedef struct callb_cpr {
 	kmutex_t	*cc_lockp;
 } callb_cpr_t;
 
 #define	CALLB_CPR_INIT(cp, lockp, func, name)	{		\
 	(cp)->cc_lockp = lockp;					\
 }
 
 #define	CALLB_CPR_SAFE_BEGIN(cp) {				\
 	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
 }
 
 #define	CALLB_CPR_SAFE_END(cp, lockp) {				\
 	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
 }
 
 #define	CALLB_CPR_EXIT(cp) {					\
 	ASSERT(MUTEX_HELD((cp)->cc_lockp));			\
 	mutex_exit((cp)->cc_lockp);				\
 }
 
 #define	zone_dataset_visible(x, y)	(1)
 #define	INGLOBALZONE(z)			(1)
 
 extern char *kmem_vasprintf(const char *fmt, va_list adx);
 extern char *kmem_asprintf(const char *fmt, ...);
 #define	strfree(str) kmem_free((str), strlen(str)+1)
 
 /*
  * Hostname information
  */
 extern char hw_serial[];	/* for userland-emulated hostid access */
 extern int ddi_strtoul(const char *str, char **nptr, int base,
     unsigned long *result);
 
 extern int ddi_strtoull(const char *str, char **nptr, int base,
     u_longlong_t *result);
 
 /* ZFS Boot Related stuff. */
 
 struct _buf {
 	intptr_t	_fd;
 };
 
 struct bootstat {
 	uint64_t st_size;
 };
 
 typedef struct ace_object {
 	uid_t		a_who;
 	uint32_t	a_access_mask;
 	uint16_t	a_flags;
 	uint16_t	a_type;
 	uint8_t		a_obj_type[16];
 	uint8_t		a_inherit_obj_type[16];
 } ace_object_t;
 
 
 #define	ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE	0x05
 #define	ACE_ACCESS_DENIED_OBJECT_ACE_TYPE	0x06
 #define	ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE	0x07
 #define	ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE	0x08
 
 extern struct _buf *kobj_open_file(char *name);
 extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
     unsigned off);
 extern void kobj_close_file(struct _buf *file);
 extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
 extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
 extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
     cred_t *cr);
 extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
 extern zoneid_t getzoneid(void);
 
 /* SID stuff */
 typedef struct ksiddomain {
 	uint_t	kd_ref;
 	uint_t	kd_len;
 	char	*kd_name;
 } ksiddomain_t;
 
 ksiddomain_t *ksid_lookupdomain(const char *);
 void ksiddomain_rele(ksiddomain_t *);
 
 #define	DDI_SLEEP	KM_SLEEP
 #define	ddi_log_sysevent(_a, _b, _c, _d, _e, _f, _g) \
 	sysevent_post_event(_c, _d, _b, "libzpool", _e, _f)
 
 #endif /* _KERNEL */
 
 #endif	/* _SYS_ZFS_CONTEXT_H */
diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h
index c0cb4705d487..21bfe2b8f9b0 100644
--- a/include/sys/zfs_ioctl.h
+++ b/include/sys/zfs_ioctl.h
@@ -1,364 +1,381 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_ZFS_IOCTL_H
 #define	_SYS_ZFS_IOCTL_H
 
 #include <sys/cred.h>
 #include <sys/dmu.h>
 #include <sys/zio.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zfs_stat.h>
 
 #ifdef _KERNEL
 #include <sys/nvpair.h>
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
+/*
+ * The structures in this file are passed between userland and the
+ * kernel.  Userland may be running a 32-bit process, while the kernel
+ * is 64-bit.  Therefore, these structures need to compile the same in
+ * 32-bit and 64-bit.  This means not using type "long", and adding
+ * explicit padding so that the 32-bit structure will not be packed more
+ * tightly than the 64-bit structure (which requires 64-bit alignment).
+ */
+
 /*
  * Property values for snapdir
  */
 #define	ZFS_SNAPDIR_HIDDEN		0
 #define	ZFS_SNAPDIR_VISIBLE		1
 
 /*
  * Property values for snapdev
  */
 #define	ZFS_SNAPDEV_HIDDEN		0
 #define	ZFS_SNAPDEV_VISIBLE		1
 
 /*
  * Field manipulation macros for the drr_versioninfo field of the
  * send stream header.
  */
 
 /*
  * Header types for zfs send streams.
  */
 typedef enum drr_headertype {
 	DMU_SUBSTREAM = 0x1,
 	DMU_COMPOUNDSTREAM = 0x2
 } drr_headertype_t;
 
 #define	DMU_GET_STREAM_HDRTYPE(vi)	BF64_GET((vi), 0, 2)
 #define	DMU_SET_STREAM_HDRTYPE(vi, x)	BF64_SET((vi), 0, 2, x)
 
 #define	DMU_GET_FEATUREFLAGS(vi)	BF64_GET((vi), 2, 30)
 #define	DMU_SET_FEATUREFLAGS(vi, x)	BF64_SET((vi), 2, 30, x)
 
 /*
  * Feature flags for zfs send streams (flags in drr_versioninfo)
  */
 
 #define	DMU_BACKUP_FEATURE_DEDUP	(0x1)
 #define	DMU_BACKUP_FEATURE_DEDUPPROPS	(0x2)
 #define	DMU_BACKUP_FEATURE_SA_SPILL	(0x4)
 
 /*
  * Mask of all supported backup features
  */
 #define	DMU_BACKUP_FEATURE_MASK	(DMU_BACKUP_FEATURE_DEDUP | \
 		DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL)
 
 /* Are all features in the given flag word currently supported? */
 #define	DMU_STREAM_SUPPORTED(x)	(!((x) & ~DMU_BACKUP_FEATURE_MASK))
 
 /*
  * The drr_versioninfo field of the dmu_replay_record has the
  * following layout:
  *
  *	64	56	48	40	32	24	16	8	0
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  *  	|		reserved	|        feature-flags	    |C|S|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  *
  * The low order two bits indicate the header type: SUBSTREAM (0x1)
  * or COMPOUNDSTREAM (0x2).  Using two bits for this is historical:
  * this field used to be a version number, where the two version types
  * were 1 and 2.  Using two bits for this allows earlier versions of
  * the code to be able to recognize send streams that don't use any
  * of the features indicated by feature flags.
  */
 
 #define	DMU_BACKUP_MAGIC 0x2F5bacbacULL
 
 #define	DRR_FLAG_CLONE		(1<<0)
 #define	DRR_FLAG_CI_DATA	(1<<1)
 
 /*
  * flags in the drr_checksumflags field in the DRR_WRITE and
  * DRR_WRITE_BYREF blocks
  */
 #define	DRR_CHECKSUM_DEDUP	(1<<0)
 
 #define	DRR_IS_DEDUP_CAPABLE(flags)	((flags) & DRR_CHECKSUM_DEDUP)
 
 /*
  * zfs ioctl command structure
  */
 typedef struct dmu_replay_record {
 	enum {
 		DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
 		DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
 		DRR_SPILL, DRR_NUMTYPES
 	} drr_type;
 	uint32_t drr_payloadlen;
 	union {
 		struct drr_begin {
 			uint64_t drr_magic;
 			uint64_t drr_versioninfo; /* was drr_version */
 			uint64_t drr_creation_time;
 			dmu_objset_type_t drr_type;
 			uint32_t drr_flags;
 			uint64_t drr_toguid;
 			uint64_t drr_fromguid;
 			char drr_toname[MAXNAMELEN];
 		} drr_begin;
 		struct drr_end {
 			zio_cksum_t drr_checksum;
 			uint64_t drr_toguid;
 		} drr_end;
 		struct drr_object {
 			uint64_t drr_object;
 			dmu_object_type_t drr_type;
 			dmu_object_type_t drr_bonustype;
 			uint32_t drr_blksz;
 			uint32_t drr_bonuslen;
 			uint8_t drr_checksumtype;
 			uint8_t drr_compress;
 			uint8_t drr_pad[6];
 			uint64_t drr_toguid;
 			/* bonus content follows */
 		} drr_object;
 		struct drr_freeobjects {
 			uint64_t drr_firstobj;
 			uint64_t drr_numobjs;
 			uint64_t drr_toguid;
 		} drr_freeobjects;
 		struct drr_write {
 			uint64_t drr_object;
 			dmu_object_type_t drr_type;
 			uint32_t drr_pad;
 			uint64_t drr_offset;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 			uint8_t drr_checksumtype;
 			uint8_t drr_checksumflags;
 			uint8_t drr_pad2[6];
 			ddt_key_t drr_key; /* deduplication key */
 			/* content follows */
 		} drr_write;
 		struct drr_free {
 			uint64_t drr_object;
 			uint64_t drr_offset;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 		} drr_free;
 		struct drr_write_byref {
 			/* where to put the data */
 			uint64_t drr_object;
 			uint64_t drr_offset;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 			/* where to find the prior copy of the data */
 			uint64_t drr_refguid;
 			uint64_t drr_refobject;
 			uint64_t drr_refoffset;
 			/* properties of the data */
 			uint8_t drr_checksumtype;
 			uint8_t drr_checksumflags;
 			uint8_t drr_pad2[6];
 			ddt_key_t drr_key; /* deduplication key */
 		} drr_write_byref;
 		struct drr_spill {
 			uint64_t drr_object;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 			uint64_t drr_pad[4]; /* needed for crypto */
 			/* spill data follows */
 		} drr_spill;
 	} drr_u;
 } dmu_replay_record_t;
 
 /* diff record range types */
 typedef enum diff_type {
 	DDR_NONE = 0x1,
 	DDR_INUSE = 0x2,
 	DDR_FREE = 0x4
 } diff_type_t;
 
 /*
  * The diff reports back ranges of free or in-use objects.
  */
 typedef struct dmu_diff_record {
 	uint64_t ddr_type;
 	uint64_t ddr_first;
 	uint64_t ddr_last;
 } dmu_diff_record_t;
 
 typedef struct zinject_record {
 	uint64_t	zi_objset;
 	uint64_t	zi_object;
 	uint64_t	zi_start;
 	uint64_t	zi_end;
 	uint64_t	zi_guid;
 	uint32_t	zi_level;
 	uint32_t	zi_error;
 	uint64_t	zi_type;
 	uint32_t	zi_freq;
 	uint32_t	zi_failfast;
 	char		zi_func[MAXNAMELEN];
 	uint32_t	zi_iotype;
 	int32_t		zi_duration;
 	uint64_t	zi_timer;
 	uint32_t	zi_cmd;
 	uint32_t	zi_pad;
 } zinject_record_t;
 
 #define	ZINJECT_NULL		0x1
 #define	ZINJECT_FLUSH_ARC	0x2
 #define	ZINJECT_UNLOAD_SPA	0x4
 
 #define	ZEVENT_NONBLOCK		0x1
 #define	ZEVENT_SIZE		1024
 
 typedef enum zinject_type {
 	ZINJECT_UNINITIALIZED,
 	ZINJECT_DATA_FAULT,
 	ZINJECT_DEVICE_FAULT,
 	ZINJECT_LABEL_FAULT,
 	ZINJECT_IGNORED_WRITES,
 	ZINJECT_PANIC,
 	ZINJECT_DELAY_IO,
 } zinject_type_t;
 
 typedef struct zfs_share {
 	uint64_t	z_exportdata;
 	uint64_t	z_sharedata;
 	uint64_t	z_sharetype;	/* 0 = share, 1 = unshare */
 	uint64_t	z_sharemax;  /* max length of share string */
 } zfs_share_t;
 
 /*
  * ZFS file systems may behave the usual, POSIX-compliant way, where
  * name lookups are case-sensitive.  They may also be set up so that
  * all the name lookups are case-insensitive, or so that only some
  * lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
  */
 typedef enum zfs_case {
 	ZFS_CASE_SENSITIVE,
 	ZFS_CASE_INSENSITIVE,
 	ZFS_CASE_MIXED
 } zfs_case_t;
 
 typedef struct zfs_cmd {
-	char		zc_name[MAXPATHLEN];
+	char		zc_name[MAXPATHLEN];	/* name of pool or dataset */
+	uint64_t	zc_nvlist_src;		/* really (char *) */
+	uint64_t	zc_nvlist_src_size;
+	uint64_t	zc_nvlist_dst;		/* really (char *) */
+	uint64_t	zc_nvlist_dst_size;
+	boolean_t	zc_nvlist_dst_filled;	/* put an nvlist in dst? */
+	int		zc_pad2;
+
+	/*
+	 * The following members are for legacy ioctls which haven't been
+	 * converted to the new method.
+	 */
+	uint64_t	zc_history;		/* really (char *) */
 	char		zc_value[MAXPATHLEN * 2];
 	char		zc_string[MAXNAMELEN];
 	char		zc_top_ds[MAXPATHLEN];
 	uint64_t	zc_guid;
 	uint64_t	zc_nvlist_conf;		/* really (char *) */
 	uint64_t	zc_nvlist_conf_size;
-	uint64_t	zc_nvlist_src;		/* really (char *) */
-	uint64_t	zc_nvlist_src_size;
-	uint64_t	zc_nvlist_dst;		/* really (char *) */
-	uint64_t	zc_nvlist_dst_size;
 	uint64_t	zc_cookie;
 	uint64_t	zc_objset_type;
 	uint64_t	zc_perm_action;
-	uint64_t 	zc_history;		/* really (char *) */
-	uint64_t 	zc_history_len;
+	uint64_t	zc_history_len;
 	uint64_t	zc_history_offset;
 	uint64_t	zc_obj;
 	uint64_t	zc_iflags;		/* internal to zfs(7fs) */
 	zfs_share_t	zc_share;
 	dmu_objset_stats_t zc_objset_stats;
 	struct drr_begin zc_begin_record;
 	zinject_record_t zc_inject_record;
 	boolean_t	zc_defer_destroy;
 	boolean_t	zc_temphold;
 	uint64_t	zc_action_handle;
 	int		zc_cleanup_fd;
 	uint8_t		zc_simple;
 	uint8_t		zc_pad[3];		/* alignment */
 	uint64_t	zc_sendobj;
 	uint64_t	zc_fromobj;
 	uint64_t	zc_createtxg;
 	zfs_stat_t	zc_stat;
 } zfs_cmd_t;
 
 typedef struct zfs_useracct {
 	char zu_domain[256];
 	uid_t zu_rid;
 	uint32_t zu_pad;
 	uint64_t zu_space;
 } zfs_useracct_t;
 
 #define	ZFSDEV_MAX_MINOR	(1 << 16)
 #define	ZFS_MIN_MINOR	(ZFSDEV_MAX_MINOR + 1)
 
 #define	ZPOOL_EXPORT_AFTER_SPLIT 0x1
 
 #ifdef _KERNEL
 
 typedef struct zfs_creat {
 	nvlist_t	*zct_zplprops;
 	nvlist_t	*zct_props;
 } zfs_creat_t;
 
 extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
 extern int zfs_secpolicy_rename_perms(const char *from,
     const char *to, cred_t *cr);
 extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
 extern int zfs_unmount_snap(const char *, void *);
 
 enum zfsdev_state_type {
 	ZST_ONEXIT,
 	ZST_ZEVENT,
 	ZST_ALL,
 };
 
 typedef struct zfsdev_state {
         list_node_t             zs_next;        /* next zfsdev_state_t link */
 	struct file		*zs_file;	/* associated file struct */
 	minor_t			zs_minor;	/* made up minor number */
 	void			*zs_onexit;	/* onexit data */
 	void			*zs_zevent;	/* zevent data */
 } zfsdev_state_t;
 
 extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
 extern minor_t zfsdev_getminor(struct file *filp);
 extern minor_t zfsdev_minor_alloc(void);
 
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_ZFS_IOCTL_H */
diff --git a/include/zfs_comutil.h b/include/zfs_comutil.h
index 61327f9aa909..f89054388a4d 100644
--- a/include/zfs_comutil.h
+++ b/include/zfs_comutil.h
@@ -1,46 +1,48 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #ifndef	_ZFS_COMUTIL_H
 #define	_ZFS_COMUTIL_H
 
 #include <sys/fs/zfs.h>
 #include <sys/types.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 extern boolean_t zfs_allocatable_devs(nvlist_t *);
 extern void zpool_get_rewind_policy(nvlist_t *, zpool_rewind_policy_t *);
 
 extern int zfs_zpl_version_map(int spa_version);
 extern int zfs_spa_version_map(int zpl_version);
-extern const char *zfs_history_event_names[LOG_END];
+#define	ZFS_NUM_LEGACY_HISTORY_EVENTS 41
+extern const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS];
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _ZFS_COMUTIL_H */
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 09139d58c40a..8e7caf2a16e9 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1,7 +1,7 @@
 # NB: GNU Automake Manual, Chapter 8.3.5: Libtool Convenience Libraries
 # These five libraries are intermediary build components.
 SUBDIRS = libspl libavl libefi libshare libunicode
 
 # These four libraries, which are installed as the final build product,
 # incorporate the five convenience libraries given above.
-SUBDIRS += libuutil libnvpair libzpool libzfs
+SUBDIRS += libuutil libnvpair libzpool libzfs_core libzfs
diff --git a/lib/libzfs/Makefile.am b/lib/libzfs/Makefile.am
index 524efaa601f7..14abd7ab3cb8 100644
--- a/lib/libzfs/Makefile.am
+++ b/lib/libzfs/Makefile.am
@@ -1,29 +1,30 @@
 include $(top_srcdir)/config/Rules.am
 
 DEFAULT_INCLUDES += \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/lib/libspl/include
 
 lib_LTLIBRARIES = libzfs.la
 
 libzfs_la_SOURCES = \
 	$(top_srcdir)/lib/libzfs/libzfs_changelist.c \
 	$(top_srcdir)/lib/libzfs/libzfs_config.c \
 	$(top_srcdir)/lib/libzfs/libzfs_dataset.c \
 	$(top_srcdir)/lib/libzfs/libzfs_diff.c \
 	$(top_srcdir)/lib/libzfs/libzfs_fru.c \
 	$(top_srcdir)/lib/libzfs/libzfs_graph.c \
 	$(top_srcdir)/lib/libzfs/libzfs_import.c \
 	$(top_srcdir)/lib/libzfs/libzfs_iter.c \
 	$(top_srcdir)/lib/libzfs/libzfs_mount.c \
 	$(top_srcdir)/lib/libzfs/libzfs_pool.c \
 	$(top_srcdir)/lib/libzfs/libzfs_sendrecv.c \
 	$(top_srcdir)/lib/libzfs/libzfs_status.c \
 	$(top_srcdir)/lib/libzfs/libzfs_util.c
 
 libzfs_la_LIBADD = \
+	$(top_builddir)/lib/libzfs_core/libzfs_core.la \
 	$(top_builddir)/lib/libshare/libshare.la \
 	$(top_builddir)/lib/libnvpair/libnvpair.la \
 	$(top_builddir)/lib/libzpool/libzpool.la
 
 libzfs_la_LDFLAGS = -lm -ldl -version-info 1:1:0 $(LIBSELINUX)
diff --git a/lib/libzfs/libzfs_config.c b/lib/libzfs/libzfs_config.c
index ee94fe106d73..99b6e67c37c2 100644
--- a/lib/libzfs/libzfs_config.c
+++ b/lib/libzfs/libzfs_config.c
@@ -1,453 +1,453 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
  * The pool configuration repository is stored in /etc/zfs/zpool.cache as a
  * single packed nvlist.  While it would be nice to just read in this
  * file from userland, this wouldn't work from a local zone.  So we have to have
  * a zpool ioctl to return the complete configuration for all pools.  In the
  * global zone, this will be identical to reading the file and unpacking it in
  * userland.
  */
 
 #include <errno.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <stddef.h>
 #include <string.h>
 #include <unistd.h>
 #include <libintl.h>
 #include <libuutil.h>
 
 #include "libzfs_impl.h"
 
 typedef struct config_node {
 	char		*cn_name;
 	nvlist_t	*cn_config;
 	uu_avl_node_t	cn_avl;
 } config_node_t;
 
 /* ARGSUSED */
 static int
 config_node_compare(const void *a, const void *b, void *unused)
 {
 	int ret;
 
 	const config_node_t *ca = (config_node_t *)a;
 	const config_node_t *cb = (config_node_t *)b;
 
 	ret = strcmp(ca->cn_name, cb->cn_name);
 
 	if (ret < 0)
 		return (-1);
 	else if (ret > 0)
 		return (1);
 	else
 		return (0);
 }
 
 void
 namespace_clear(libzfs_handle_t *hdl)
 {
 	if (hdl->libzfs_ns_avl) {
 		config_node_t *cn;
 		void *cookie = NULL;
 
 		while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl,
 		    &cookie)) != NULL) {
 			nvlist_free(cn->cn_config);
 			free(cn->cn_name);
 			free(cn);
 		}
 
 		uu_avl_destroy(hdl->libzfs_ns_avl);
 		hdl->libzfs_ns_avl = NULL;
 	}
 
 	if (hdl->libzfs_ns_avlpool) {
 		uu_avl_pool_destroy(hdl->libzfs_ns_avlpool);
 		hdl->libzfs_ns_avlpool = NULL;
 	}
 }
 
 /*
  * Loads the pool namespace, or re-loads it if the cache has changed.
  */
 static int
 namespace_reload(libzfs_handle_t *hdl)
 {
 	nvlist_t *config;
 	config_node_t *cn;
 	nvpair_t *elem;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	void *cookie;
 
 	if (hdl->libzfs_ns_gen == 0) {
 		/*
 		 * This is the first time we've accessed the configuration
 		 * cache.  Initialize the AVL tree and then fall through to the
 		 * common code.
 		 */
 		if ((hdl->libzfs_ns_avlpool = uu_avl_pool_create("config_pool",
 		    sizeof (config_node_t),
 		    offsetof(config_node_t, cn_avl),
 		    config_node_compare, UU_DEFAULT)) == NULL)
 			return (no_memory(hdl));
 
 		if ((hdl->libzfs_ns_avl = uu_avl_create(hdl->libzfs_ns_avlpool,
 		    NULL, UU_DEFAULT)) == NULL)
 			return (no_memory(hdl));
 	}
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
 		return (-1);
 
 	for (;;) {
 		zc.zc_cookie = hdl->libzfs_ns_gen;
 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CONFIGS, &zc) != 0) {
 			switch (errno) {
 			case EEXIST:
 				/*
 				 * The namespace hasn't changed.
 				 */
 				zcmd_free_nvlists(&zc);
 				return (0);
 
 			case ENOMEM:
 				if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 					zcmd_free_nvlists(&zc);
 					return (-1);
 				}
 				break;
 
 			default:
 				zcmd_free_nvlists(&zc);
 				return (zfs_standard_error(hdl, errno,
 				    dgettext(TEXT_DOMAIN, "failed to read "
 				    "pool configuration")));
 			}
 		} else {
 			hdl->libzfs_ns_gen = zc.zc_cookie;
 			break;
 		}
 	}
 
 	if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (-1);
 	}
 
 	zcmd_free_nvlists(&zc);
 
 	/*
 	 * Clear out any existing configuration information.
 	 */
 	cookie = NULL;
 	while ((cn = uu_avl_teardown(hdl->libzfs_ns_avl, &cookie)) != NULL) {
 		nvlist_free(cn->cn_config);
 		free(cn->cn_name);
 		free(cn);
 	}
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(config, elem)) != NULL) {
 		nvlist_t *child;
 		uu_avl_index_t where;
 
 		if ((cn = zfs_alloc(hdl, sizeof (config_node_t))) == NULL) {
 			nvlist_free(config);
 			return (-1);
 		}
 
 		if ((cn->cn_name = zfs_strdup(hdl,
 		    nvpair_name(elem))) == NULL) {
 			free(cn);
 			nvlist_free(config);
 			return (-1);
 		}
 
 		verify(nvpair_value_nvlist(elem, &child) == 0);
 		if (nvlist_dup(child, &cn->cn_config, 0) != 0) {
 			free(cn->cn_name);
 			free(cn);
 			nvlist_free(config);
 			return (no_memory(hdl));
 		}
 		verify(uu_avl_find(hdl->libzfs_ns_avl, cn, NULL, &where)
 		    == NULL);
 
 		uu_avl_insert(hdl->libzfs_ns_avl, cn, where);
 	}
 
 	nvlist_free(config);
 	return (0);
 }
 
 /*
  * Retrieve the configuration for the given pool.  The configuration is a nvlist
  * describing the vdevs, as well as the statistics associated with each one.
  */
 nvlist_t *
 zpool_get_config(zpool_handle_t *zhp, nvlist_t **oldconfig)
 {
 	if (oldconfig)
 		*oldconfig = zhp->zpool_old_config;
 	return (zhp->zpool_config);
 }
 
 /*
  * Retrieves a list of enabled features and their refcounts and caches it in
  * the pool handle.
  */
 nvlist_t *
 zpool_get_features(zpool_handle_t *zhp)
 {
 	nvlist_t *config, *features;
 
 	config = zpool_get_config(zhp, NULL);
 
 	if (config == NULL || !nvlist_exists(config,
 	    ZPOOL_CONFIG_FEATURE_STATS)) {
 		int error;
 		boolean_t missing = B_FALSE;
 
 		error = zpool_refresh_stats(zhp, &missing);
 
 		if (error != 0 || missing)
 			return (NULL);
 
 		config = zpool_get_config(zhp, NULL);
 	}
 
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
 	    &features) == 0);
 
 	return (features);
 }
 
 /*
  * Refresh the vdev statistics associated with the given pool.  This is used in
  * iostat to show configuration changes and determine the delta from the last
  * time the function was called.  This function can fail, in case the pool has
  * been destroyed.
  */
 int
 zpool_refresh_stats(zpool_handle_t *zhp, boolean_t *missing)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int error;
 	nvlist_t *config;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	*missing = B_FALSE;
 	(void) strcpy(zc.zc_name, zhp->zpool_name);
 
 	if (zhp->zpool_config_size == 0)
 		zhp->zpool_config_size = 1 << 16;
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size) != 0)
 		return (-1);
 
 	for (;;) {
 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_STATS,
 		    &zc) == 0) {
 			/*
 			 * The real error is returned in the zc_cookie field.
 			 */
 			error = zc.zc_cookie;
 			break;
 		}
 
 		if (errno == ENOMEM) {
 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 				zcmd_free_nvlists(&zc);
 				return (-1);
 			}
 		} else {
 			zcmd_free_nvlists(&zc);
 			if (errno == ENOENT || errno == EINVAL)
 				*missing = B_TRUE;
 			zhp->zpool_state = POOL_STATE_UNAVAIL;
 			return (0);
 		}
 	}
 
 	if (zcmd_read_dst_nvlist(hdl, &zc, &config) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (-1);
 	}
 
 	zcmd_free_nvlists(&zc);
 
 	zhp->zpool_config_size = zc.zc_nvlist_dst_size;
 
 	if (zhp->zpool_config != NULL) {
 		uint64_t oldtxg, newtxg;
 
 		verify(nvlist_lookup_uint64(zhp->zpool_config,
 		    ZPOOL_CONFIG_POOL_TXG, &oldtxg) == 0);
 		verify(nvlist_lookup_uint64(config,
 		    ZPOOL_CONFIG_POOL_TXG, &newtxg) == 0);
 
 		if (zhp->zpool_old_config != NULL)
 			nvlist_free(zhp->zpool_old_config);
 
 		if (oldtxg != newtxg) {
 			nvlist_free(zhp->zpool_config);
 			zhp->zpool_old_config = NULL;
 		} else {
 			zhp->zpool_old_config = zhp->zpool_config;
 		}
 	}
 
 	zhp->zpool_config = config;
 	if (error)
 		zhp->zpool_state = POOL_STATE_UNAVAIL;
 	else
 		zhp->zpool_state = POOL_STATE_ACTIVE;
 
 	return (0);
 }
 
 /*
  * If the __ZFS_POOL_RESTRICT environment variable is set we only iterate over
  * pools it lists.
  *
  * This is an undocumented feature for use during testing only.
  *
  * This function returns B_TRUE if the pool should be skipped
  * during iteration.
  */
 static boolean_t
 check_restricted(const char *poolname)
 {
 	static boolean_t initialized = B_FALSE;
 	static char *restricted = NULL;
 
 	const char *cur, *end;
 	int len, namelen;
 
 	if (!initialized) {
 		initialized = B_TRUE;
 		restricted = getenv("__ZFS_POOL_RESTRICT");
 	}
 
 	if (NULL == restricted)
 		return (B_FALSE);
 
 	cur = restricted;
 	namelen = strlen(poolname);
 	do {
 		end = strchr(cur, ' ');
 		len = (NULL == end) ? strlen(cur) : (end - cur);
 
 		if (len == namelen && 0 == strncmp(cur, poolname, len)) {
 			return (B_FALSE);
 		}
 
 		cur += (len + 1);
 	} while (NULL != end);
 
 	return (B_TRUE);
 }
 
 /*
  * Iterate over all pools in the system.
  */
 int
 zpool_iter(libzfs_handle_t *hdl, zpool_iter_f func, void *data)
 {
 	config_node_t *cn;
 	zpool_handle_t *zhp;
 	int ret;
 
 	/*
 	 * If someone makes a recursive call to zpool_iter(), we want to avoid
 	 * refreshing the namespace because that will invalidate the parent
 	 * context.  We allow recursive calls, but simply re-use the same
 	 * namespace AVL tree.
 	 */
 	if (!hdl->libzfs_pool_iter && namespace_reload(hdl) != 0)
 		return (-1);
 
 	hdl->libzfs_pool_iter++;
 	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
 	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
 
 		if (check_restricted(cn->cn_name))
 			continue;
 
 		if (zpool_open_silent(hdl, cn->cn_name, &zhp) != 0) {
 			hdl->libzfs_pool_iter--;
 			return (-1);
 		}
 
 		if (zhp == NULL)
 			continue;
 
 		if ((ret = func(zhp, data)) != 0) {
 			hdl->libzfs_pool_iter--;
 			return (ret);
 		}
 	}
 	hdl->libzfs_pool_iter--;
 
 	return (0);
 }
 
 /*
  * Iterate over root datasets, calling the given function for each.  The zfs
  * handle passed each time must be explicitly closed by the callback.
  */
 int
 zfs_iter_root(libzfs_handle_t *hdl, zfs_iter_f func, void *data)
 {
 	config_node_t *cn;
 	zfs_handle_t *zhp;
 	int ret;
 
 	if (namespace_reload(hdl) != 0)
 		return (-1);
 
 	for (cn = uu_avl_first(hdl->libzfs_ns_avl); cn != NULL;
 	    cn = uu_avl_next(hdl->libzfs_ns_avl, cn)) {
 
 		if (check_restricted(cn->cn_name))
 			continue;
 
 		if ((zhp = make_dataset_handle(hdl, cn->cn_name)) == NULL)
 			continue;
 
 		if ((ret = func(zhp, data)) != 0)
 			return (ret);
 	}
 
 	return (0);
 }
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 244b687eddd5..041750bca8c3 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -1,4710 +1,4734 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012 DEY Storage Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <ctype.h>
 #include <errno.h>
 #include <libintl.h>
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <stddef.h>
 #include <zone.h>
 #include <fcntl.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
 #include <priv.h>
 #include <pwd.h>
 #include <grp.h>
 #include <stddef.h>
 #include <ucred.h>
 #ifdef HAVE_IDMAP
 #include <idmap.h>
 #include <aclutils.h>
 #include <directory.h>
 #endif /* HAVE_IDMAP */
 
 #include <sys/dnode.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <libzfs.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "libzfs_impl.h"
 #include "zfs_deleg.h"
 
 static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
 static int userquota_propname_decode(const char *propname, boolean_t zoned,
     zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp);
 
 /*
  * Given a single type (not a mask of types), return the type in a human
  * readable form.
  */
 const char *
 zfs_type_to_name(zfs_type_t type)
 {
 	switch (type) {
 	case ZFS_TYPE_FILESYSTEM:
 		return (dgettext(TEXT_DOMAIN, "filesystem"));
 	case ZFS_TYPE_SNAPSHOT:
 		return (dgettext(TEXT_DOMAIN, "snapshot"));
 	case ZFS_TYPE_VOLUME:
 		return (dgettext(TEXT_DOMAIN, "volume"));
 	default:
 		break;
 	}
 
 	return (NULL);
 }
 
 /*
  * Validate a ZFS path.  This is used even before trying to open the dataset, to
  * provide a more meaningful error message.  We call zfs_error_aux() to
  * explain exactly why the name was not valid.
  */
 int
 zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
     boolean_t modifying)
 {
 	namecheck_err_t why;
 	char what;
 
 	(void) zfs_prop_get_table();
 	if (dataset_namecheck(path, &why, &what) != 0) {
 		if (hdl != NULL) {
 			switch (why) {
 			case NAME_ERR_TOOLONG:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "name is too long"));
 				break;
 
 			case NAME_ERR_LEADING_SLASH:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "leading slash in name"));
 				break;
 
 			case NAME_ERR_EMPTY_COMPONENT:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "empty component in name"));
 				break;
 
 			case NAME_ERR_TRAILING_SLASH:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "trailing slash in name"));
 				break;
 
 			case NAME_ERR_INVALCHAR:
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "invalid character "
 				    "'%c' in name"), what);
 				break;
 
 			case NAME_ERR_MULTIPLE_AT:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "multiple '@' delimiters in name"));
 				break;
 
 			case NAME_ERR_NOLETTER:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "pool doesn't begin with a letter"));
 				break;
 
 			case NAME_ERR_RESERVED:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "name is reserved"));
 				break;
 
 			case NAME_ERR_DISKLIKE:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "reserved disk name"));
 				break;
 			default:
 				break;
 			}
 		}
 
 		return (0);
 	}
 
 	if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "snapshot delimiter '@' in filesystem name"));
 		return (0);
 	}
 
 	if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "missing '@' delimiter in snapshot name"));
 		return (0);
 	}
 
 	if (modifying && strchr(path, '%') != NULL) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid character %c in name"), '%');
 		return (0);
 	}
 
 	return (-1);
 }
 
 int
 zfs_name_valid(const char *name, zfs_type_t type)
 {
 	if (type == ZFS_TYPE_POOL)
 		return (zpool_name_valid(NULL, B_FALSE, name));
 	return (zfs_validate_name(NULL, name, type, B_FALSE));
 }
 
 /*
  * This function takes the raw DSL properties, and filters out the user-defined
  * properties into a separate nvlist.
  */
 static nvlist_t *
 process_user_props(zfs_handle_t *zhp, nvlist_t *props)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvpair_t *elem;
 	nvlist_t *propval;
 	nvlist_t *nvl;
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
 		return (NULL);
 	}
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		if (!zfs_prop_user(nvpair_name(elem)))
 			continue;
 
 		verify(nvpair_value_nvlist(elem, &propval) == 0);
 		if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) {
 			nvlist_free(nvl);
 			(void) no_memory(hdl);
 			return (NULL);
 		}
 	}
 
 	return (nvl);
 }
 
 static zpool_handle_t *
 zpool_add_handle(zfs_handle_t *zhp, const char *pool_name)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zpool_handle_t *zph;
 
 	if ((zph = zpool_open_canfail(hdl, pool_name)) != NULL) {
 		if (hdl->libzfs_pool_handles != NULL)
 			zph->zpool_next = hdl->libzfs_pool_handles;
 		hdl->libzfs_pool_handles = zph;
 	}
 	return (zph);
 }
 
 static zpool_handle_t *
 zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zpool_handle_t *zph = hdl->libzfs_pool_handles;
 
 	while ((zph != NULL) &&
 	    (strncmp(pool_name, zpool_get_name(zph), len) != 0))
 		zph = zph->zpool_next;
 	return (zph);
 }
 
 /*
  * Returns a handle to the pool that contains the provided dataset.
  * If a handle to that pool already exists then that handle is returned.
  * Otherwise, a new handle is created and added to the list of handles.
  */
 static zpool_handle_t *
 zpool_handle(zfs_handle_t *zhp)
 {
 	char *pool_name;
 	int len;
 	zpool_handle_t *zph;
 
 	len = strcspn(zhp->zfs_name, "/@") + 1;
 	pool_name = zfs_alloc(zhp->zfs_hdl, len);
 	(void) strlcpy(pool_name, zhp->zfs_name, len);
 
 	zph = zpool_find_handle(zhp, pool_name, len);
 	if (zph == NULL)
 		zph = zpool_add_handle(zhp, pool_name);
 
 	free(pool_name);
 	return (zph);
 }
 
 void
 zpool_free_handles(libzfs_handle_t *hdl)
 {
 	zpool_handle_t *next, *zph = hdl->libzfs_pool_handles;
 
 	while (zph != NULL) {
 		next = zph->zpool_next;
 		zpool_close(zph);
 		zph = next;
 	}
 	hdl->libzfs_pool_handles = NULL;
 }
 
 /*
  * Utility function to gather stats (objset and zpl) for the given object.
  */
 static int
 get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	(void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
 
 	while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, zc) != 0) {
 		if (errno == ENOMEM) {
 			if (zcmd_expand_dst_nvlist(hdl, zc) != 0) {
 				return (-1);
 			}
 		} else {
 			return (-1);
 		}
 	}
 	return (0);
 }
 
 /*
  * Utility function to get the received properties of the given object.
  */
 static int
 get_recvd_props_ioctl(zfs_handle_t *zhp)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *recvdprops;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int err;
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
 		return (-1);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	while (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) {
 		if (errno == ENOMEM) {
 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 				return (-1);
 			}
 		} else {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 
 	err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops);
 	zcmd_free_nvlists(&zc);
 	if (err != 0)
 		return (-1);
 
 	nvlist_free(zhp->zfs_recvd_props);
 	zhp->zfs_recvd_props = recvdprops;
 
 	return (0);
 }
 
 static int
 put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
 	nvlist_t *allprops, *userprops;
 
 	zhp->zfs_dmustats = zc->zc_objset_stats; /* structure assignment */
 
 	if (zcmd_read_dst_nvlist(zhp->zfs_hdl, zc, &allprops) != 0) {
 		return (-1);
 	}
 
 	/*
 	 * XXX Why do we store the user props separately, in addition to
 	 * storing them in zfs_props?
 	 */
 	if ((userprops = process_user_props(zhp, allprops)) == NULL) {
 		nvlist_free(allprops);
 		return (-1);
 	}
 
 	nvlist_free(zhp->zfs_props);
 	nvlist_free(zhp->zfs_user_props);
 
 	zhp->zfs_props = allprops;
 	zhp->zfs_user_props = userprops;
 
 	return (0);
 }
 
 static int
 get_stats(zfs_handle_t *zhp)
 {
 	int rc = 0;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
 		return (-1);
 	if (get_stats_ioctl(zhp, &zc) != 0)
 		rc = -1;
 	else if (put_stats_zhdl(zhp, &zc) != 0)
 		rc = -1;
 	zcmd_free_nvlists(&zc);
 	return (rc);
 }
 
 /*
  * Refresh the properties currently stored in the handle.
  */
 void
 zfs_refresh_properties(zfs_handle_t *zhp)
 {
 	(void) get_stats(zhp);
 }
 
 /*
  * Makes a handle from the given dataset name.  Used by zfs_open() and
  * zfs_iter_* to create child handles on the fly.
  */
 static int
 make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
 	if (put_stats_zhdl(zhp, zc) != 0)
 		return (-1);
 
 	/*
 	 * We've managed to open the dataset and gather statistics.  Determine
 	 * the high-level type.
 	 */
 	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
 		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
 		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER)
 		return (-1); /* zpios' and other testing datasets are
 		                of this type, ignore if encountered */
 	else
 		abort();
 
 	if (zhp->zfs_dmustats.dds_is_snapshot)
 		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
 		zhp->zfs_type = ZFS_TYPE_VOLUME;
 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
 		zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
 	else
 		abort();	/* we should never see any other types */
 
 	if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL)
 		return (-1);
 
 	return (0);
 }
 
 zfs_handle_t *
 make_dataset_handle(libzfs_handle_t *hdl, const char *path)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
 
 	if (zhp == NULL)
 		return (NULL);
 
 	zhp->zfs_hdl = hdl;
 	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0) {
 		free(zhp);
 		return (NULL);
 	}
 	if (get_stats_ioctl(zhp, &zc) == -1) {
 		zcmd_free_nvlists(&zc);
 		free(zhp);
 		return (NULL);
 	}
 	if (make_dataset_handle_common(zhp, &zc) == -1) {
 		free(zhp);
 		zhp = NULL;
 	}
 	zcmd_free_nvlists(&zc);
 	return (zhp);
 }
 
 zfs_handle_t *
 make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_cmd_t *zc)
 {
 	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
 
 	if (zhp == NULL)
 		return (NULL);
 
 	zhp->zfs_hdl = hdl;
 	(void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name));
 	if (make_dataset_handle_common(zhp, zc) == -1) {
 		free(zhp);
 		return (NULL);
 	}
 	return (zhp);
 }
 
 zfs_handle_t *
 make_dataset_simple_handle_zc(zfs_handle_t *pzhp, zfs_cmd_t *zc)
 {
 	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
 
 	if (zhp == NULL)
 		return (NULL);
 
 	zhp->zfs_hdl = pzhp->zfs_hdl;
 	(void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name));
 	zhp->zfs_head_type = pzhp->zfs_type;
 	zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
 	zhp->zpool_hdl = zpool_handle(zhp);
 
 	return (zhp);
 }
 
 zfs_handle_t *
 zfs_handle_dup(zfs_handle_t *zhp_orig)
 {
 	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
 
 	if (zhp == NULL)
 		return (NULL);
 
 	zhp->zfs_hdl = zhp_orig->zfs_hdl;
 	zhp->zpool_hdl = zhp_orig->zpool_hdl;
 	(void) strlcpy(zhp->zfs_name, zhp_orig->zfs_name,
 	    sizeof (zhp->zfs_name));
 	zhp->zfs_type = zhp_orig->zfs_type;
 	zhp->zfs_head_type = zhp_orig->zfs_head_type;
 	zhp->zfs_dmustats = zhp_orig->zfs_dmustats;
 	if (zhp_orig->zfs_props != NULL) {
 		if (nvlist_dup(zhp_orig->zfs_props, &zhp->zfs_props, 0) != 0) {
 			(void) no_memory(zhp->zfs_hdl);
 			zfs_close(zhp);
 			return (NULL);
 		}
 	}
 	if (zhp_orig->zfs_user_props != NULL) {
 		if (nvlist_dup(zhp_orig->zfs_user_props,
 		    &zhp->zfs_user_props, 0) != 0) {
 			(void) no_memory(zhp->zfs_hdl);
 			zfs_close(zhp);
 			return (NULL);
 		}
 	}
 	if (zhp_orig->zfs_recvd_props != NULL) {
 		if (nvlist_dup(zhp_orig->zfs_recvd_props,
 		    &zhp->zfs_recvd_props, 0)) {
 			(void) no_memory(zhp->zfs_hdl);
 			zfs_close(zhp);
 			return (NULL);
 		}
 	}
 	zhp->zfs_mntcheck = zhp_orig->zfs_mntcheck;
 	if (zhp_orig->zfs_mntopts != NULL) {
 		zhp->zfs_mntopts = zfs_strdup(zhp_orig->zfs_hdl,
 		    zhp_orig->zfs_mntopts);
 	}
 	zhp->zfs_props_table = zhp_orig->zfs_props_table;
 	return (zhp);
 }
 
 /*
  * Opens the given snapshot, filesystem, or volume.   The 'types'
  * argument is a mask of acceptable types.  The function will print an
  * appropriate error message and return NULL if it can't be opened.
  */
 zfs_handle_t *
 zfs_open(libzfs_handle_t *hdl, const char *path, int types)
 {
 	zfs_handle_t *zhp;
 	char errbuf[1024];
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
 
 	/*
 	 * Validate the name before we even try to open it.
 	 */
 	if (!zfs_validate_name(hdl, path, ZFS_TYPE_DATASET, B_FALSE)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "invalid dataset name"));
 		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
 		return (NULL);
 	}
 
 	/*
 	 * Try to get stats for the dataset, which will tell us if it exists.
 	 */
 	errno = 0;
 	if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
 		(void) zfs_standard_error(hdl, errno, errbuf);
 		return (NULL);
 	}
 
 	if (!(types & zhp->zfs_type)) {
 		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 		zfs_close(zhp);
 		return (NULL);
 	}
 
 	return (zhp);
 }
 
 /*
  * Release a ZFS handle.  Nothing to do but free the associated memory.
  */
 void
 zfs_close(zfs_handle_t *zhp)
 {
 	if (zhp->zfs_mntopts)
 		free(zhp->zfs_mntopts);
 	nvlist_free(zhp->zfs_props);
 	nvlist_free(zhp->zfs_user_props);
 	nvlist_free(zhp->zfs_recvd_props);
 	free(zhp);
 }
 
 typedef struct mnttab_node {
 	struct mnttab mtn_mt;
 	avl_node_t mtn_node;
 } mnttab_node_t;
 
 static int
 libzfs_mnttab_cache_compare(const void *arg1, const void *arg2)
 {
 	const mnttab_node_t *mtn1 = arg1;
 	const mnttab_node_t *mtn2 = arg2;
 	int rv;
 
 	rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special);
 
 	if (rv == 0)
 		return (0);
 	return (rv > 0 ? 1 : -1);
 }
 
 void
 libzfs_mnttab_init(libzfs_handle_t *hdl)
 {
 	assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0);
 	avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare,
 	    sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
 }
 
 int
 libzfs_mnttab_update(libzfs_handle_t *hdl)
 {
 	struct mnttab entry;
 
 	/* Reopen MNTTAB to prevent reading stale data from open file */
 	if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
 		return (ENOENT);
 
 	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
 		mnttab_node_t *mtn;
 
 		if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
 			continue;
 		mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
 		mtn->mtn_mt.mnt_special = zfs_strdup(hdl, entry.mnt_special);
 		mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, entry.mnt_mountp);
 		mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, entry.mnt_fstype);
 		mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, entry.mnt_mntopts);
 		avl_add(&hdl->libzfs_mnttab_cache, mtn);
 	}
 
 	return (0);
 }
 
 void
 libzfs_mnttab_fini(libzfs_handle_t *hdl)
 {
 	void *cookie = NULL;
 	mnttab_node_t *mtn;
 
 	while ((mtn = avl_destroy_nodes(&hdl->libzfs_mnttab_cache, &cookie))) {
 		free(mtn->mtn_mt.mnt_special);
 		free(mtn->mtn_mt.mnt_mountp);
 		free(mtn->mtn_mt.mnt_fstype);
 		free(mtn->mtn_mt.mnt_mntopts);
 		free(mtn);
 	}
 	avl_destroy(&hdl->libzfs_mnttab_cache);
 }
 
 void
 libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable)
 {
 	hdl->libzfs_mnttab_enable = enable;
 }
 
 int
 libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
     struct mnttab *entry)
 {
 	mnttab_node_t find;
 	mnttab_node_t *mtn;
 	int error;
 
 	if (!hdl->libzfs_mnttab_enable) {
 		struct mnttab srch = { 0 };
 
 		if (avl_numnodes(&hdl->libzfs_mnttab_cache))
 			libzfs_mnttab_fini(hdl);
 
 		/* Reopen MNTTAB to prevent reading stale data from open file */
 		if (freopen(MNTTAB, "r", hdl->libzfs_mnttab) == NULL)
 			return (ENOENT);
 
 		srch.mnt_special = (char *)fsname;
 		srch.mnt_fstype = MNTTYPE_ZFS;
 		if (getmntany(hdl->libzfs_mnttab, entry, &srch) == 0)
 			return (0);
 		else
 			return (ENOENT);
 	}
 
 	if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
 		if ((error = libzfs_mnttab_update(hdl)) != 0)
 			return (error);
 
 	find.mtn_mt.mnt_special = (char *)fsname;
 	mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
 	if (mtn) {
 		*entry = mtn->mtn_mt;
 		return (0);
 	}
 	return (ENOENT);
 }
 
 void
 libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special,
     const char *mountp, const char *mntopts)
 {
 	mnttab_node_t *mtn;
 
 	if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
 		return;
 	mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
 	mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
 	mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
 	mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
 	mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
 	avl_add(&hdl->libzfs_mnttab_cache, mtn);
 }
 
 void
 libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
 {
 	mnttab_node_t find;
 	mnttab_node_t *ret;
 
 	find.mtn_mt.mnt_special = (char *)fsname;
 	if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL))) {
 		avl_remove(&hdl->libzfs_mnttab_cache, ret);
 		free(ret->mtn_mt.mnt_special);
 		free(ret->mtn_mt.mnt_mountp);
 		free(ret->mtn_mt.mnt_fstype);
 		free(ret->mtn_mt.mnt_mntopts);
 		free(ret);
 	}
 }
 
 int
 zfs_spa_version(zfs_handle_t *zhp, int *spa_version)
 {
 	zpool_handle_t *zpool_handle = zhp->zpool_hdl;
 
 	if (zpool_handle == NULL)
 		return (-1);
 
 	*spa_version = zpool_get_prop_int(zpool_handle,
 	    ZPOOL_PROP_VERSION, NULL);
 	return (0);
 }
 
 /*
  * The choice of reservation property depends on the SPA version.
  */
 static int
 zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop)
 {
 	int spa_version;
 
 	if (zfs_spa_version(zhp, &spa_version) < 0)
 		return (-1);
 
 	if (spa_version >= SPA_VERSION_REFRESERVATION)
 		*resv_prop = ZFS_PROP_REFRESERVATION;
 	else
 		*resv_prop = ZFS_PROP_RESERVATION;
 
 	return (0);
 }
 
 /*
  * Given an nvlist of properties to set, validates that they are correct, and
  * parses any numeric properties (index, boolean, etc) if they are specified as
  * strings.
  */
 nvlist_t *
 zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
     uint64_t zoned, zfs_handle_t *zhp, const char *errbuf)
 {
 	nvpair_t *elem;
 	uint64_t intval;
 	char *strval;
 	zfs_prop_t prop;
 	nvlist_t *ret;
 	int chosen_normal = -1;
 	int chosen_utf = -1;
 
 	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
 		return (NULL);
 	}
 
 	/*
 	 * Make sure this property is valid and applies to this type.
 	 */
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
 		const char *propname = nvpair_name(elem);
 
 		prop = zfs_name_to_prop(propname);
 		if (prop == ZPROP_INVAL && zfs_prop_user(propname)) {
 			/*
 			 * This is a user property: make sure it's a
 			 * string, and that it's less than ZAP_MAXNAMELEN.
 			 */
 			if (nvpair_type(elem) != DATA_TYPE_STRING) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' must be a string"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property name '%s' is too long"),
 				    propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			(void) nvpair_value_string(elem, &strval);
 			if (nvlist_add_string(ret, propname, strval) != 0) {
 				(void) no_memory(hdl);
 				goto error;
 			}
 			continue;
 		}
 
 		/*
 		 * Currently, only user properties can be modified on
 		 * snapshots.
 		 */
 		if (type == ZFS_TYPE_SNAPSHOT) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "this property can not be modified for snapshots"));
 			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
 			goto error;
 		}
 
 		if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) {
 			zfs_userquota_prop_t uqtype;
 			char newpropname[128];
 			char domain[128];
 			uint64_t rid;
 			uint64_t valary[3];
 
 			if (userquota_propname_decode(propname, zoned,
 			    &uqtype, domain, sizeof (domain), &rid) != 0) {
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN,
 				    "'%s' has an invalid user/group name"),
 				    propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (uqtype != ZFS_PROP_USERQUOTA &&
 			    uqtype != ZFS_PROP_GROUPQUOTA) {
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
 				    propname);
 				(void) zfs_error(hdl, EZFS_PROPREADONLY,
 				    errbuf);
 				goto error;
 			}
 
 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
 				(void) nvpair_value_string(elem, &strval);
 				if (strcmp(strval, "none") == 0) {
 					intval = 0;
 				} else if (zfs_nicestrtonum(hdl,
 				    strval, &intval) != 0) {
 					(void) zfs_error(hdl,
 					    EZFS_BADPROP, errbuf);
 					goto error;
 				}
 			} else if (nvpair_type(elem) ==
 			    DATA_TYPE_UINT64) {
 				(void) nvpair_value_uint64(elem, &intval);
 				if (intval == 0) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "use 'none' to disable "
 					    "userquota/groupquota"));
 					goto error;
 				}
 			} else {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' must be a number"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			/*
 			 * Encode the prop name as
 			 * userquota@<hex-rid>-domain, to make it easy
 			 * for the kernel to decode.
 			 */
 			(void) snprintf(newpropname, sizeof (newpropname),
 			    "%s%llx-%s", zfs_userquota_prop_prefixes[uqtype],
 			    (longlong_t)rid, domain);
 			valary[0] = uqtype;
 			valary[1] = rid;
 			valary[2] = intval;
 			if (nvlist_add_uint64_array(ret, newpropname,
 			    valary, 3) != 0) {
 				(void) no_memory(hdl);
 				goto error;
 			}
 			continue;
 		} else if (prop == ZPROP_INVAL && zfs_prop_written(propname)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' is readonly"),
 			    propname);
 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
 			goto error;
 		}
 
 		if (prop == ZPROP_INVAL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid property '%s'"), propname);
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			goto error;
 		}
 
 		if (!zfs_prop_valid_for_type(prop, type)) {
 			zfs_error_aux(hdl,
 			    dgettext(TEXT_DOMAIN, "'%s' does not "
 			    "apply to datasets of this type"), propname);
 			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
 			goto error;
 		}
 
 		if (zfs_prop_readonly(prop) &&
 		    (!zfs_prop_setonce(prop) || zhp != NULL)) {
 			zfs_error_aux(hdl,
 			    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
 			    propname);
 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
 			goto error;
 		}
 
 		if (zprop_parse_value(hdl, elem, prop, type, ret,
 		    &strval, &intval, errbuf) != 0)
 			goto error;
 
 		/*
 		 * Perform some additional checks for specific properties.
 		 */
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 		{
 			int version;
 
 			if (zhp == NULL)
 				break;
 			version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 			if (intval < version) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "Can not downgrade; already at version %u"),
 				    version);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		}
 
 		case ZFS_PROP_RECORDSIZE:
 		case ZFS_PROP_VOLBLOCKSIZE:
 			/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
 			if (intval < SPA_MINBLOCKSIZE ||
 			    intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' must be power of 2 from %u "
 				    "to %uk"), propname,
 				    (uint_t)SPA_MINBLOCKSIZE,
 				    (uint_t)SPA_MAXBLOCKSIZE >> 10);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZFS_PROP_MLSLABEL:
 		{
 #ifdef HAVE_MLSLABEL
 			/*
 			 * Verify the mlslabel string and convert to
 			 * internal hex label string.
 			 */
 
 			m_label_t *new_sl;
 			char *hex = NULL;	/* internal label string */
 
 			/* Default value is already OK. */
 			if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 				break;
 
 			/* Verify the label can be converted to binary form */
 			if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) ||
 			    (str_to_label(strval, &new_sl, MAC_LABEL,
 			    L_NO_CORRECTION, NULL) == -1)) {
 				goto badlabel;
 			}
 
 			/* Now translate to hex internal label string */
 			if (label_to_str(new_sl, &hex, M_INTERNAL,
 			    DEF_NAMES) != 0) {
 				if (hex)
 					free(hex);
 				goto badlabel;
 			}
 			m_label_free(new_sl);
 
 			/* If string is already in internal form, we're done. */
 			if (strcmp(strval, hex) == 0) {
 				free(hex);
 				break;
 			}
 
 			/* Replace the label string with the internal form. */
 			(void) nvlist_remove(ret, zfs_prop_to_name(prop),
 			    DATA_TYPE_STRING);
 			verify(nvlist_add_string(ret, zfs_prop_to_name(prop),
 			    hex) == 0);
 			free(hex);
 
 			break;
 
 badlabel:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid mlslabel '%s'"), strval);
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			m_label_free(new_sl);	/* OK if null */
 			goto error;
 #else
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "mlslabels are unsupported"));
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			goto error;
 #endif /* HAVE_MLSLABEL */
 		}
 
 		case ZFS_PROP_MOUNTPOINT:
 		{
 			namecheck_err_t why;
 
 			if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 ||
 			    strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0)
 				break;
 
 			if (mountpoint_namecheck(strval, &why)) {
 				switch (why) {
 				case NAME_ERR_LEADING_SLASH:
 					zfs_error_aux(hdl,
 					    dgettext(TEXT_DOMAIN,
 					    "'%s' must be an absolute path, "
 					    "'none', or 'legacy'"), propname);
 					break;
 				case NAME_ERR_TOOLONG:
 					zfs_error_aux(hdl,
 					    dgettext(TEXT_DOMAIN,
 					    "component of '%s' is too long"),
 					    propname);
 					break;
 				default:
 					break;
 				}
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 		}
 
 			/*FALLTHRU*/
 
 		case ZFS_PROP_SHARESMB:
 		case ZFS_PROP_SHARENFS:
 			/*
 			 * For the mountpoint and sharenfs or sharesmb
 			 * properties, check if it can be set in a
 			 * global/non-global zone based on
 			 * the zoned property value:
 			 *
 			 *		global zone	    non-global zone
 			 * --------------------------------------------------
 			 * zoned=on	mountpoint (no)	    mountpoint (yes)
 			 *		sharenfs (no)	    sharenfs (no)
 			 *		sharesmb (no)	    sharesmb (no)
 			 *
 			 * zoned=off	mountpoint (yes)	N/A
 			 *		sharenfs (yes)
 			 *		sharesmb (yes)
 			 */
 			if (zoned) {
 				if (getzoneid() == GLOBAL_ZONEID) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be set on "
 					    "dataset in a non-global zone"),
 					    propname);
 					(void) zfs_error(hdl, EZFS_ZONED,
 					    errbuf);
 					goto error;
 				} else if (prop == ZFS_PROP_SHARENFS ||
 				    prop == ZFS_PROP_SHARESMB) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be set in "
 					    "a non-global zone"), propname);
 					(void) zfs_error(hdl, EZFS_ZONED,
 					    errbuf);
 					goto error;
 				}
 			} else if (getzoneid() != GLOBAL_ZONEID) {
 				/*
 				 * If zoned property is 'off', this must be in
 				 * a global zone. If not, something is wrong.
 				 */
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' cannot be set while dataset "
 				    "'zoned' property is set"), propname);
 				(void) zfs_error(hdl, EZFS_ZONED, errbuf);
 				goto error;
 			}
 
 			/*
 			 * At this point, it is legitimate to set the
 			 * property. Now we want to make sure that the
 			 * property value is valid if it is sharenfs.
 			 */
 			if ((prop == ZFS_PROP_SHARENFS ||
 			    prop == ZFS_PROP_SHARESMB) &&
 			    strcmp(strval, "on") != 0 &&
 			    strcmp(strval, "off") != 0) {
 				zfs_share_proto_t proto;
 
 				if (prop == ZFS_PROP_SHARESMB)
 					proto = PROTO_SMB;
 				else
 					proto = PROTO_NFS;
 
 				/*
 				 * Must be an valid sharing protocol
 				 * option string so init the libshare
 				 * in order to enable the parser and
 				 * then parse the options. We use the
 				 * control API since we don't care about
 				 * the current configuration and don't
 				 * want the overhead of loading it
 				 * until we actually do something.
 				 */
 
 				if (zfs_init_libshare(hdl,
 				    SA_INIT_CONTROL_API) != SA_OK) {
 					/*
 					 * An error occurred so we can't do
 					 * anything
 					 */
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be set: problem "
 					    "in share initialization"),
 					    propname);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 
 				if (zfs_parse_options(strval, proto) != SA_OK) {
 					/*
 					 * There was an error in parsing so
 					 * deal with it by issuing an error
 					 * message and leaving after
 					 * uninitializing the the libshare
 					 * interface.
 					 */
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be set to invalid "
 					    "options"), propname);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					zfs_uninit_libshare(hdl);
 					goto error;
 				}
 				zfs_uninit_libshare(hdl);
 			}
 
 			break;
 		case ZFS_PROP_UTF8ONLY:
 			chosen_utf = (int)intval;
 			break;
 		case ZFS_PROP_NORMALIZE:
 			chosen_normal = (int)intval;
 			break;
 		default:
 			break;
 		}
 
 		/*
 		 * For changes to existing volumes, we have some additional
 		 * checks to enforce.
 		 */
 		if (type == ZFS_TYPE_VOLUME && zhp != NULL) {
 			uint64_t volsize = zfs_prop_get_int(zhp,
 			    ZFS_PROP_VOLSIZE);
 			uint64_t blocksize = zfs_prop_get_int(zhp,
 			    ZFS_PROP_VOLBLOCKSIZE);
 			char buf[64];
 
 			switch (prop) {
 			case ZFS_PROP_RESERVATION:
 			case ZFS_PROP_REFRESERVATION:
 				if (intval > volsize) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' is greater than current "
 					    "volume size"), propname);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 				break;
 
 			case ZFS_PROP_VOLSIZE:
 				if (intval % blocksize != 0) {
 					zfs_nicenum(blocksize, buf,
 					    sizeof (buf));
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' must be a multiple of "
 					    "volume block size (%s)"),
 					    propname, buf);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 
 				if (intval == 0) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be zero"),
 					    propname);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 				break;
 			default:
 				break;
 			}
 		}
 	}
 
 	/*
 	 * If normalization was chosen, but no UTF8 choice was made,
 	 * enforce rejection of non-UTF8 names.
 	 *
 	 * If normalization was chosen, but rejecting non-UTF8 names
 	 * was explicitly not chosen, it is an error.
 	 */
 	if (chosen_normal > 0 && chosen_utf < 0) {
 		if (nvlist_add_uint64(ret,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) {
 			(void) no_memory(hdl);
 			goto error;
 		}
 	} else if (chosen_normal > 0 && chosen_utf == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "'%s' must be set 'on' if normalization chosen"),
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
 		(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 		goto error;
 	}
 	return (ret);
 
 error:
 	nvlist_free(ret);
 	return (NULL);
 }
 
 int
 zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl)
 {
 	uint64_t old_volsize;
 	uint64_t new_volsize;
 	uint64_t old_reservation;
 	uint64_t new_reservation;
 	zfs_prop_t resv_prop;
 
 	/*
 	 * If this is an existing volume, and someone is setting the volsize,
 	 * make sure that it matches the reservation, or add it if necessary.
 	 */
 	old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 	if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
 		return (-1);
 	old_reservation = zfs_prop_get_int(zhp, resv_prop);
 	if ((zvol_volsize_to_reservation(old_volsize, zhp->zfs_props) !=
 	    old_reservation) || nvlist_lookup_uint64(nvl,
 	    zfs_prop_to_name(resv_prop), &new_reservation) != ENOENT) {
 		return (0);
 	}
 	if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
 	    &new_volsize) != 0)
 		return (-1);
 	new_reservation = zvol_volsize_to_reservation(new_volsize,
 	    zhp->zfs_props);
 	if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop),
 	    new_reservation) != 0) {
 		(void) no_memory(zhp->zfs_hdl);
 		return (-1);
 	}
 	return (1);
 }
 
 void
 zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
     char *errbuf)
 {
 	switch (err) {
 
 	case ENOSPC:
 		/*
 		 * For quotas and reservations, ENOSPC indicates
 		 * something different; setting a quota or reservation
 		 * doesn't use any disk space.
 		 */
 		switch (prop) {
 		case ZFS_PROP_QUOTA:
 		case ZFS_PROP_REFQUOTA:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "size is less than current used or "
 			    "reserved space"));
 			(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
 			break;
 
 		case ZFS_PROP_RESERVATION:
 		case ZFS_PROP_REFRESERVATION:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "size is greater than available space"));
 			(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
 			break;
 
 		default:
 			(void) zfs_standard_error(hdl, err, errbuf);
 			break;
 		}
 		break;
 
 	case EBUSY:
 		(void) zfs_standard_error(hdl, EBUSY, errbuf);
 		break;
 
 	case EROFS:
 		(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
 		break;
 
 	case ENOTSUP:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "pool and or dataset must be upgraded to set this "
 		    "property or value"));
 		(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 		break;
 
 	case ERANGE:
 		if (prop == ZFS_PROP_COMPRESSION) {
 			(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "property setting is not allowed on "
 			    "bootable datasets"));
 			(void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
 		} else {
 			(void) zfs_standard_error(hdl, err, errbuf);
 		}
 		break;
 
 	case EINVAL:
 		if (prop == ZPROP_INVAL) {
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 		} else {
 			(void) zfs_standard_error(hdl, err, errbuf);
 		}
 		break;
 
 	case EOVERFLOW:
 		/*
 		 * This platform can't address a volume this big.
 		 */
 #ifdef _ILP32
 		if (prop == ZFS_PROP_VOLSIZE) {
 			(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
 			break;
 		}
 #endif
 		/* FALLTHROUGH */
 	default:
 		(void) zfs_standard_error(hdl, err, errbuf);
 	}
 }
 
 static boolean_t
 zfs_is_namespace_prop(zfs_prop_t prop)
 {
 	switch (prop) {
 
 	case ZFS_PROP_ATIME:
 	case ZFS_PROP_DEVICES:
 	case ZFS_PROP_EXEC:
 	case ZFS_PROP_SETUID:
 	case ZFS_PROP_READONLY:
 	case ZFS_PROP_XATTR:
 	case ZFS_PROP_NBMAND:
 		return (B_TRUE);
 
 	default:
 		return (B_FALSE);
 	}
 }
 
 /*
  * Given a property name and value, set the property for the given dataset.
  */
 int
 zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int ret = -1;
 	prop_changelist_t *cl = NULL;
 	char errbuf[1024];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *nvl = NULL, *realprops;
 	zfs_prop_t prop;
-	boolean_t do_prefix;
-	uint64_t idx;
+	boolean_t do_prefix = B_TRUE;
 	int added_resv = 0;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
 	    zhp->zfs_name);
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
 	    nvlist_add_string(nvl, propname, propval) != 0) {
 		(void) no_memory(hdl);
 		goto error;
 	}
 
 	if ((realprops = zfs_valid_proplist(hdl, zhp->zfs_type, nvl,
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL)
 		goto error;
 
 	nvlist_free(nvl);
 	nvl = realprops;
 
 	prop = zfs_name_to_prop(propname);
 
 	if (prop == ZFS_PROP_VOLSIZE) {
 		if ((added_resv = zfs_add_synthetic_resv(zhp, nvl)) == -1)
 			goto error;
 	}
 
 	if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL)
 		goto error;
 
 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "child dataset with inherited mountpoint is used "
 		    "in a non-global zone"));
 		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
 		goto error;
 	}
 
 	/*
-	 * If the dataset's canmount property is being set to noauto,
-	 * then we want to prevent unmounting & remounting it.
+	 * We don't want to unmount & remount the dataset when changing
+	 * its canmount property to 'on' or 'noauto'.  We only use
+	 * the changelist logic to unmount when setting canmount=off.
 	 */
-	do_prefix = !((prop == ZFS_PROP_CANMOUNT) &&
-	    (zprop_string_to_index(prop, propval, &idx,
-	    ZFS_TYPE_DATASET) == 0) && (idx == ZFS_CANMOUNT_NOAUTO));
+	if (prop == ZFS_PROP_CANMOUNT) {
+		uint64_t idx;
+		int err = zprop_string_to_index(prop, propval, &idx,
+		    ZFS_TYPE_DATASET);
+		if (err == 0 && idx != ZFS_CANMOUNT_OFF)
+			do_prefix = B_FALSE;
+	}
 
 	if (do_prefix && (ret = changelist_prefix(cl)) != 0)
 		goto error;
 
 	/*
 	 * Execute the corresponding ioctl() to set this property.
 	 */
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0)
 		goto error;
 
 	ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
 
 	if (ret != 0) {
 		zfs_setprop_error(hdl, prop, errno, errbuf);
 		if (added_resv && errno == ENOSPC) {
 			/* clean up the volsize property we tried to set */
 			uint64_t old_volsize = zfs_prop_get_int(zhp,
 			    ZFS_PROP_VOLSIZE);
 			nvlist_free(nvl);
 			zcmd_free_nvlists(&zc);
 			if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 				goto error;
 			if (nvlist_add_uint64(nvl,
 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
 			    old_volsize) != 0)
 				goto error;
 			if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0)
 				goto error;
 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
 		}
 	} else {
 		if (do_prefix)
 			ret = changelist_postfix(cl);
 
 		if (ret == 0) {
 			/*
 			 * Refresh the statistics so the new property
 			 * value is reflected.
 			 */
 			(void) get_stats(zhp);
 
 			/*
 			 * Remount the filesystem to propagate the change
 			 * if one of the options handled by the generic
 			 * Linux namespace layer has been modified.
 			 */
 			if (zfs_is_namespace_prop(prop) &&
 			    zfs_is_mounted(zhp, NULL))
 				ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0);
 		}
 	}
 
 error:
 	nvlist_free(nvl);
 	zcmd_free_nvlists(&zc);
 	if (cl)
 		changelist_free(cl);
 	return (ret);
 }
 
 /*
  * Given a property, inherit the value from the parent dataset, or if received
  * is TRUE, revert to the received value, if any.
  */
 int
 zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int ret;
 	prop_changelist_t *cl;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[1024];
 	zfs_prop_t prop;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
 
 	zc.zc_cookie = received;
 	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
 		/*
 		 * For user properties, the amount of work we have to do is very
 		 * small, so just do it here.
 		 */
 		if (!zfs_prop_user(propname)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid property"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 
 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 		(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
 
 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0)
 			return (zfs_standard_error(hdl, errno, errbuf));
 
 		return (0);
 	}
 
 	/*
 	 * Verify that this property is inheritable.
 	 */
 	if (zfs_prop_readonly(prop))
 		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
 
 	if (!zfs_prop_inheritable(prop) && !received)
 		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
 
 	/*
 	 * Check to see if the value applies to this type
 	 */
 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
 		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
 
 	/*
 	 * Normalize the name, to get rid of shorthand abbreviations.
 	 */
 	propname = zfs_prop_to_name(prop);
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
 
 	if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset is used in a non-global zone"));
 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
 	}
 
 	/*
 	 * Determine datasets which will be affected by this change, if any.
 	 */
 	if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL)
 		return (-1);
 
 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "child dataset with inherited mountpoint is used "
 		    "in a non-global zone"));
 		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
 		goto error;
 	}
 
 	if ((ret = changelist_prefix(cl)) != 0)
 		goto error;
 
 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) {
 		return (zfs_standard_error(hdl, errno, errbuf));
 	} else {
 
 		if ((ret = changelist_postfix(cl)) != 0)
 			goto error;
 
 		/*
 		 * Refresh the statistics so the new property is reflected.
 		 */
 		(void) get_stats(zhp);
 	}
 
 error:
 	changelist_free(cl);
 	return (ret);
 }
 
 /*
  * True DSL properties are stored in an nvlist.  The following two functions
  * extract them appropriately.
  */
 uint64_t
 getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
 {
 	nvlist_t *nv;
 	uint64_t value;
 
 	*source = NULL;
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(prop), &nv) == 0) {
 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
 		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
 	} else {
 		verify(!zhp->zfs_props_table ||
 		    zhp->zfs_props_table[prop] == B_TRUE);
 		value = zfs_prop_default_numeric(prop);
 		*source = "";
 	}
 
 	return (value);
 }
 
 static char *
 getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
 {
 	nvlist_t *nv;
 	char *value;
 
 	*source = NULL;
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(prop), &nv) == 0) {
 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
 		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
 	} else {
 		verify(!zhp->zfs_props_table ||
 		    zhp->zfs_props_table[prop] == B_TRUE);
 		if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
 			value = "";
 		*source = "";
 	}
 
 	return (value);
 }
 
 static boolean_t
 zfs_is_recvd_props_mode(zfs_handle_t *zhp)
 {
 	return (zhp->zfs_props == zhp->zfs_recvd_props);
 }
 
 static void
 zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie)
 {
 	*cookie = (uint64_t)(uintptr_t)zhp->zfs_props;
 	zhp->zfs_props = zhp->zfs_recvd_props;
 }
 
 static void
 zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie)
 {
 	zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie;
 	*cookie = 0;
 }
 
 /*
  * Internal function for getting a numeric property.  Both zfs_prop_get() and
  * zfs_prop_get_int() are built using this interface.
  *
  * Certain properties can be overridden using 'mount -o'.  In this case, scan
  * the contents of the /etc/mtab entry, searching for the appropriate options.
  * If they differ from the on-disk values, report the current values and mark
  * the source "temporary".
  */
 static int
 get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
     char **source, uint64_t *val)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	nvlist_t *zplprops = NULL;
 	struct mnttab mnt;
 	char *mntopt_on = NULL;
 	char *mntopt_off = NULL;
 	boolean_t received = zfs_is_recvd_props_mode(zhp);
 
 	*source = NULL;
 
 	switch (prop) {
 	case ZFS_PROP_ATIME:
 		mntopt_on = MNTOPT_ATIME;
 		mntopt_off = MNTOPT_NOATIME;
 		break;
 
 	case ZFS_PROP_DEVICES:
 		mntopt_on = MNTOPT_DEVICES;
 		mntopt_off = MNTOPT_NODEVICES;
 		break;
 
 	case ZFS_PROP_EXEC:
 		mntopt_on = MNTOPT_EXEC;
 		mntopt_off = MNTOPT_NOEXEC;
 		break;
 
 	case ZFS_PROP_READONLY:
 		mntopt_on = MNTOPT_RO;
 		mntopt_off = MNTOPT_RW;
 		break;
 
 	case ZFS_PROP_SETUID:
 		mntopt_on = MNTOPT_SETUID;
 		mntopt_off = MNTOPT_NOSETUID;
 		break;
 
 	case ZFS_PROP_XATTR:
 		mntopt_on = MNTOPT_XATTR;
 		mntopt_off = MNTOPT_NOXATTR;
 		break;
 
 	case ZFS_PROP_NBMAND:
 		mntopt_on = MNTOPT_NBMAND;
 		mntopt_off = MNTOPT_NONBMAND;
 		break;
 	default:
 		break;
 	}
 
 	/*
 	 * Because looking up the mount options is potentially expensive
 	 * (iterating over all of /etc/mtab), we defer its calculation until
 	 * we're looking up a property which requires its presence.
 	 */
 	if (!zhp->zfs_mntcheck &&
 	    (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
 		libzfs_handle_t *hdl = zhp->zfs_hdl;
 		struct mnttab entry;
 
 		if (libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0) {
 			zhp->zfs_mntopts = zfs_strdup(hdl,
 			    entry.mnt_mntopts);
 			if (zhp->zfs_mntopts == NULL)
 				return (-1);
 		}
 
 		zhp->zfs_mntcheck = B_TRUE;
 	}
 
 	if (zhp->zfs_mntopts == NULL)
 		mnt.mnt_mntopts = "";
 	else
 		mnt.mnt_mntopts = zhp->zfs_mntopts;
 
 	switch (prop) {
 	case ZFS_PROP_ATIME:
 	case ZFS_PROP_DEVICES:
 	case ZFS_PROP_EXEC:
 	case ZFS_PROP_READONLY:
 	case ZFS_PROP_SETUID:
 	case ZFS_PROP_XATTR:
 	case ZFS_PROP_NBMAND:
 		*val = getprop_uint64(zhp, prop, source);
 
 		if (received)
 			break;
 
 		if (hasmntopt(&mnt, mntopt_on) && !*val) {
 			*val = B_TRUE;
 			if (src)
 				*src = ZPROP_SRC_TEMPORARY;
 		} else if (hasmntopt(&mnt, mntopt_off) && *val) {
 			*val = B_FALSE;
 			if (src)
 				*src = ZPROP_SRC_TEMPORARY;
 		}
 		break;
 
 	case ZFS_PROP_CANMOUNT:
 	case ZFS_PROP_VOLSIZE:
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_REFQUOTA:
 	case ZFS_PROP_RESERVATION:
 	case ZFS_PROP_REFRESERVATION:
 		*val = getprop_uint64(zhp, prop, source);
 
 		if (*source == NULL) {
 			/* not default, must be local */
 			*source = zhp->zfs_name;
 		}
 		break;
 
 	case ZFS_PROP_MOUNTED:
 		*val = (zhp->zfs_mntopts != NULL);
 		break;
 
 	case ZFS_PROP_NUMCLONES:
 		*val = zhp->zfs_dmustats.dds_num_clones;
 		break;
 
 	case ZFS_PROP_VERSION:
 	case ZFS_PROP_NORMALIZE:
 	case ZFS_PROP_UTF8ONLY:
 	case ZFS_PROP_CASE:
 		if (!zfs_prop_valid_for_type(prop, zhp->zfs_head_type) ||
 		    zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
 			return (-1);
 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 		if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 ||
 		    nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop),
 		    val) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 		if (zplprops)
 			nvlist_free(zplprops);
 		zcmd_free_nvlists(&zc);
 		break;
 
 	default:
 		switch (zfs_prop_get_type(prop)) {
 		case PROP_TYPE_NUMBER:
 		case PROP_TYPE_INDEX:
 			*val = getprop_uint64(zhp, prop, source);
 			/*
 			 * If we tried to use a default value for a
 			 * readonly property, it means that it was not
 			 * present.
 			 */
 			if (zfs_prop_readonly(prop) &&
 			    *source != NULL && (*source)[0] == '\0') {
 				*source = NULL;
 			}
 			break;
 
 		case PROP_TYPE_STRING:
 		default:
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "cannot get non-numeric property"));
 			return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
 			    dgettext(TEXT_DOMAIN, "internal error")));
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Calculate the source type, given the raw source string.
  */
 static void
 get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
     char *statbuf, size_t statlen)
 {
 	if (statbuf == NULL || *srctype == ZPROP_SRC_TEMPORARY)
 		return;
 
 	if (source == NULL) {
 		*srctype = ZPROP_SRC_NONE;
 	} else if (source[0] == '\0') {
 		*srctype = ZPROP_SRC_DEFAULT;
 	} else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) {
 		*srctype = ZPROP_SRC_RECEIVED;
 	} else {
 		if (strcmp(source, zhp->zfs_name) == 0) {
 			*srctype = ZPROP_SRC_LOCAL;
 		} else {
 			(void) strlcpy(statbuf, source, statlen);
 			*srctype = ZPROP_SRC_INHERITED;
 		}
 	}
 
 }
 
 int
 zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf,
     size_t proplen, boolean_t literal)
 {
 	zfs_prop_t prop;
 	int err = 0;
 
 	if (zhp->zfs_recvd_props == NULL)
 		if (get_recvd_props_ioctl(zhp) != 0)
 			return (-1);
 
 	prop = zfs_name_to_prop(propname);
 
 	if (prop != ZPROP_INVAL) {
 		uint64_t cookie;
 		if (!nvlist_exists(zhp->zfs_recvd_props, propname))
 			return (-1);
 		zfs_set_recvd_props_mode(zhp, &cookie);
 		err = zfs_prop_get(zhp, prop, propbuf, proplen,
 		    NULL, NULL, 0, literal);
 		zfs_unset_recvd_props_mode(zhp, &cookie);
 	} else {
 		nvlist_t *propval;
 		char *recvdval;
 		if (nvlist_lookup_nvlist(zhp->zfs_recvd_props,
 		    propname, &propval) != 0)
 			return (-1);
 		verify(nvlist_lookup_string(propval, ZPROP_VALUE,
 		    &recvdval) == 0);
 		(void) strlcpy(propbuf, recvdval, proplen);
 	}
 
 	return (err == 0 ? 0 : -1);
 }
 
 static int
 get_clones_string(zfs_handle_t *zhp, char *propbuf, size_t proplen)
 {
 	nvlist_t *value;
 	nvpair_t *pair;
 
 	value = zfs_get_clones_nvl(zhp);
 	if (value == NULL)
 		return (-1);
 
 	propbuf[0] = '\0';
 	for (pair = nvlist_next_nvpair(value, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(value, pair)) {
 		if (propbuf[0] != '\0')
 			(void) strlcat(propbuf, ",", proplen);
 		(void) strlcat(propbuf, nvpair_name(pair), proplen);
 	}
 
 	return (0);
 }
 
 struct get_clones_arg {
 	uint64_t numclones;
 	nvlist_t *value;
 	const char *origin;
 	char buf[ZFS_MAXNAMELEN];
 };
 
 int
 get_clones_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct get_clones_arg *gca = arg;
 
 	if (gca->numclones == 0) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, gca->buf, sizeof (gca->buf),
 	    NULL, NULL, 0, B_TRUE) != 0)
 		goto out;
 	if (strcmp(gca->buf, gca->origin) == 0) {
 		if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) {
 			zfs_close(zhp);
 			return (no_memory(zhp->zfs_hdl));
 		}
 		gca->numclones--;
 	}
 
 out:
 	(void) zfs_iter_children(zhp, get_clones_cb, gca);
 	zfs_close(zhp);
 	return (0);
 }
 
 nvlist_t *
 zfs_get_clones_nvl(zfs_handle_t *zhp)
 {
 	nvlist_t *nv, *value;
 
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(ZFS_PROP_CLONES), &nv) != 0) {
 		struct get_clones_arg gca;
 
 		/*
 		 * if this is a snapshot, then the kernel wasn't able
 		 * to get the clones.  Do it by slowly iterating.
 		 */
 		if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT)
 			return (NULL);
 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) != 0)
 			return (NULL);
 		if (nvlist_alloc(&value, NV_UNIQUE_NAME, 0) != 0) {
 			nvlist_free(nv);
 			return (NULL);
 		}
 
 		gca.numclones = zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES);
 		gca.value = value;
 		gca.origin = zhp->zfs_name;
 
 		if (gca.numclones != 0) {
 			zfs_handle_t *root;
 			char pool[ZFS_MAXNAMELEN];
 			char *cp = pool;
 
 			/* get the pool name */
 			(void) strlcpy(pool, zhp->zfs_name, sizeof (pool));
 			(void) strsep(&cp, "/@");
 			root = zfs_open(zhp->zfs_hdl, pool,
 			    ZFS_TYPE_FILESYSTEM);
 
 			(void) get_clones_cb(root, &gca);
 		}
 
 		if (gca.numclones != 0 ||
 		    nvlist_add_nvlist(nv, ZPROP_VALUE, value) != 0 ||
 		    nvlist_add_nvlist(zhp->zfs_props,
 		    zfs_prop_to_name(ZFS_PROP_CLONES), nv) != 0) {
 			nvlist_free(nv);
 			nvlist_free(value);
 			return (NULL);
 		}
 		nvlist_free(nv);
 		nvlist_free(value);
 		verify(0 == nvlist_lookup_nvlist(zhp->zfs_props,
 		    zfs_prop_to_name(ZFS_PROP_CLONES), &nv));
 	}
 
 	verify(nvlist_lookup_nvlist(nv, ZPROP_VALUE, &value) == 0);
 
 	return (value);
 }
 
 /*
  * Retrieve a property from the given object.  If 'literal' is specified, then
  * numbers are left as exact values.  Otherwise, numbers are converted to a
  * human-readable form.
  *
  * Returns 0 on success, or -1 on error.
  */
 int
 zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
     zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
 {
 	char *source = NULL;
 	uint64_t val;
 	char *str;
 	const char *strval;
 	boolean_t received = zfs_is_recvd_props_mode(zhp);
 
 	/*
 	 * Check to see if this property applies to our object
 	 */
 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
 		return (-1);
 
 	if (received && zfs_prop_readonly(prop))
 		return (-1);
 
 	if (src)
 		*src = ZPROP_SRC_NONE;
 
 	switch (prop) {
 	case ZFS_PROP_CREATION:
 		/*
 		 * 'creation' is a time_t stored in the statistics.  We convert
 		 * this into a string unless 'literal' is specified.
 		 */
 		{
 			val = getprop_uint64(zhp, prop, &source);
 			time_t time = (time_t)val;
 			struct tm t;
 
 			if (literal ||
 			    localtime_r(&time, &t) == NULL ||
 			    strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
 			    &t) == 0)
 				(void) snprintf(propbuf, proplen, "%llu", (u_longlong_t) val);
 		}
 		break;
 
 	case ZFS_PROP_MOUNTPOINT:
 		/*
 		 * Getting the precise mountpoint can be tricky.
 		 *
 		 *  - for 'none' or 'legacy', return those values.
 		 *  - for inherited mountpoints, we want to take everything
 		 *    after our ancestor and append it to the inherited value.
 		 *
 		 * If the pool has an alternate root, we want to prepend that
 		 * root to any values we return.
 		 */
 
 		str = getprop_string(zhp, prop, &source);
 
 		if (str[0] == '/') {
 			char buf[MAXPATHLEN];
 			char *root = buf;
 			const char *relpath;
 
 			/*
 			 * If we inherit the mountpoint, even from a dataset
 			 * with a received value, the source will be the path of
 			 * the dataset we inherit from. If source is
 			 * ZPROP_SOURCE_VAL_RECVD, the received value is not
 			 * inherited.
 			 */
 			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) {
 				relpath = "";
 			} else {
 				relpath = zhp->zfs_name + strlen(source);
 				if (relpath[0] == '/')
 					relpath++;
 			}
 
 			if ((zpool_get_prop(zhp->zpool_hdl,
 			    ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL)) ||
 			    (strcmp(root, "-") == 0))
 				root[0] = '\0';
 			/*
 			 * Special case an alternate root of '/'. This will
 			 * avoid having multiple leading slashes in the
 			 * mountpoint path.
 			 */
 			if (strcmp(root, "/") == 0)
 				root++;
 
 			/*
 			 * If the mountpoint is '/' then skip over this
 			 * if we are obtaining either an alternate root or
 			 * an inherited mountpoint.
 			 */
 			if (str[1] == '\0' && (root[0] != '\0' ||
 			    relpath[0] != '\0'))
 				str++;
 
 			if (relpath[0] == '\0')
 				(void) snprintf(propbuf, proplen, "%s%s",
 				    root, str);
 			else
 				(void) snprintf(propbuf, proplen, "%s%s%s%s",
 				    root, str, relpath[0] == '@' ? "" : "/",
 				    relpath);
 		} else {
 			/* 'legacy' or 'none' */
 			(void) strlcpy(propbuf, str, proplen);
 		}
 
 		break;
 
 	case ZFS_PROP_ORIGIN:
 		(void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
 		    proplen);
 		/*
 		 * If there is no parent at all, return failure to indicate that
 		 * it doesn't apply to this dataset.
 		 */
 		if (propbuf[0] == '\0')
 			return (-1);
 		break;
 
 	case ZFS_PROP_CLONES:
 		if (get_clones_string(zhp, propbuf, proplen) != 0)
 			return (-1);
 		break;
 
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_REFQUOTA:
 	case ZFS_PROP_RESERVATION:
 	case ZFS_PROP_REFRESERVATION:
 
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
 
 		/*
 		 * If quota or reservation is 0, we translate this into 'none'
 		 * (unless literal is set), and indicate that it's the default
 		 * value.  Otherwise, we print the number nicely and indicate
 		 * that its set locally.
 		 */
 		if (val == 0) {
 			if (literal)
 				(void) strlcpy(propbuf, "0", proplen);
 			else
 				(void) strlcpy(propbuf, "none", proplen);
 		} else {
 			if (literal)
 				(void) snprintf(propbuf, proplen, "%llu",
 				    (u_longlong_t)val);
 			else
 				zfs_nicenum(val, propbuf, proplen);
 		}
 		break;
 
 	case ZFS_PROP_REFRATIO:
 	case ZFS_PROP_COMPRESSRATIO:
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
 		(void) snprintf(propbuf, proplen, "%llu.%02llux",
 		    (u_longlong_t)(val / 100),
 		    (u_longlong_t)(val % 100));
 		break;
 
 	case ZFS_PROP_TYPE:
 		switch (zhp->zfs_type) {
 		case ZFS_TYPE_FILESYSTEM:
 			str = "filesystem";
 			break;
 		case ZFS_TYPE_VOLUME:
 			str = "volume";
 			break;
 		case ZFS_TYPE_SNAPSHOT:
 			str = "snapshot";
 			break;
 		default:
 			abort();
 		}
 		(void) snprintf(propbuf, proplen, "%s", str);
 		break;
 
 	case ZFS_PROP_MOUNTED:
 		/*
 		 * The 'mounted' property is a pseudo-property that described
 		 * whether the filesystem is currently mounted.  Even though
 		 * it's a boolean value, the typical values of "on" and "off"
 		 * don't make sense, so we translate to "yes" and "no".
 		 */
 		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
 		    src, &source, &val) != 0)
 			return (-1);
 		if (val)
 			(void) strlcpy(propbuf, "yes", proplen);
 		else
 			(void) strlcpy(propbuf, "no", proplen);
 		break;
 
 	case ZFS_PROP_NAME:
 		/*
 		 * The 'name' property is a pseudo-property derived from the
 		 * dataset name.  It is presented as a real property to simplify
 		 * consumers.
 		 */
 		(void) strlcpy(propbuf, zhp->zfs_name, proplen);
 		break;
 
 	case ZFS_PROP_MLSLABEL:
 		{
 #ifdef HAVE_MLSLABEL
 			m_label_t *new_sl = NULL;
 			char *ascii = NULL;	/* human readable label */
 
 			(void) strlcpy(propbuf,
 			    getprop_string(zhp, prop, &source), proplen);
 
 			if (literal || (strcasecmp(propbuf,
 			    ZFS_MLSLABEL_DEFAULT) == 0))
 				break;
 
 			/*
 			 * Try to translate the internal hex string to
 			 * human-readable output.  If there are any
 			 * problems just use the hex string.
 			 */
 
 			if (str_to_label(propbuf, &new_sl, MAC_LABEL,
 			    L_NO_CORRECTION, NULL) == -1) {
 				m_label_free(new_sl);
 				break;
 			}
 
 			if (label_to_str(new_sl, &ascii, M_LABEL,
 			    DEF_NAMES) != 0) {
 				if (ascii)
 					free(ascii);
 				m_label_free(new_sl);
 				break;
 			}
 			m_label_free(new_sl);
 
 			(void) strlcpy(propbuf, ascii, proplen);
 			free(ascii);
 #else
 			(void) strlcpy(propbuf,
 			    getprop_string(zhp, prop, &source), proplen);
 #endif /* HAVE_MLSLABEL */
 		}
 		break;
 
 	case ZFS_PROP_GUID:
 		/*
 		 * GUIDs are stored as numbers, but they are identifiers.
 		 * We don't want them to be pretty printed, because pretty
 		 * printing mangles the ID into a truncated and useless value.
 		 */
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
 		(void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val);
 		break;
 
 	default:
 		switch (zfs_prop_get_type(prop)) {
 		case PROP_TYPE_NUMBER:
 			if (get_numeric_property(zhp, prop, src,
 			    &source, &val) != 0)
 				return (-1);
 			if (literal)
 				(void) snprintf(propbuf, proplen, "%llu",
 				    (u_longlong_t)val);
 			else
 				zfs_nicenum(val, propbuf, proplen);
 			break;
 
 		case PROP_TYPE_STRING:
 			(void) strlcpy(propbuf,
 			    getprop_string(zhp, prop, &source), proplen);
 			break;
 
 		case PROP_TYPE_INDEX:
 			if (get_numeric_property(zhp, prop, src,
 			    &source, &val) != 0)
 				return (-1);
 			if (zfs_prop_index_to_string(prop, val, &strval) != 0)
 				return (-1);
 			(void) strlcpy(propbuf, strval, proplen);
 			break;
 
 		default:
 			abort();
 		}
 	}
 
 	get_source(zhp, src, source, statbuf, statlen);
 
 	return (0);
 }
 
 /*
  * Utility function to get the given numeric property.  Does no validation that
  * the given property is the appropriate type; should only be used with
  * hard-coded property types.
  */
 uint64_t
 zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
 {
 	char *source;
 	uint64_t val;
 
 	(void) get_numeric_property(zhp, prop, NULL, &source, &val);
 
 	return (val);
 }
 
 int
 zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val)
 {
 	char buf[64];
 
 	(void) snprintf(buf, sizeof (buf), "%llu", (longlong_t)val);
 	return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf));
 }
 
 /*
  * Similar to zfs_prop_get(), but returns the value as an integer.
  */
 int
 zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
     zprop_source_t *src, char *statbuf, size_t statlen)
 {
 	char *source;
 
 	/*
 	 * Check to see if this property applies to our object
 	 */
 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
 		return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE,
 		    dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
 		    zfs_prop_to_name(prop)));
 	}
 
 	if (src)
 		*src = ZPROP_SRC_NONE;
 
 	if (get_numeric_property(zhp, prop, src, &source, value) != 0)
 		return (-1);
 
 	get_source(zhp, src, source, statbuf, statlen);
 
 	return (0);
 }
 
 #ifdef HAVE_IDMAP
 static int
 idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser,
     char **domainp, idmap_rid_t *ridp)
 {
 	idmap_get_handle_t *get_hdl = NULL;
 	idmap_stat status;
 	int err = EINVAL;
 
 	if (idmap_get_create(&get_hdl) != IDMAP_SUCCESS)
 		goto out;
 
 	if (isuser) {
 		err = idmap_get_sidbyuid(get_hdl, id,
 		    IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status);
 	} else {
 		err = idmap_get_sidbygid(get_hdl, id,
 		    IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status);
 	}
 	if (err == IDMAP_SUCCESS &&
 	    idmap_get_mappings(get_hdl) == IDMAP_SUCCESS &&
 	    status == IDMAP_SUCCESS)
 		err = 0;
 	else
 		err = EINVAL;
 out:
 	if (get_hdl)
 		idmap_get_destroy(get_hdl);
 	return (err);
 }
 #endif /* HAVE_IDMAP */
 
 /*
  * convert the propname into parameters needed by kernel
  * Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829
  * Eg: userused@matt@domain -> ZFS_PROP_USERUSED, "S-1-123-456", 789
  * Eg: groupquota@staff -> ZFS_PROP_GROUPQUOTA, "", 1234
  * Eg: groupused@staff -> ZFS_PROP_GROUPUSED, "", 1234
  */
 static int
 userquota_propname_decode(const char *propname, boolean_t zoned,
     zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp)
 {
 	zfs_userquota_prop_t type;
 	char *cp;
 	boolean_t isuser;
 	boolean_t isgroup;
 	struct passwd *pw;
 	struct group *gr;
 
 	domain[0] = '\0';
 
 	/* Figure out the property type ({user|group}{quota|space}) */
 	for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) {
 		if (strncmp(propname, zfs_userquota_prop_prefixes[type],
 		    strlen(zfs_userquota_prop_prefixes[type])) == 0)
 			break;
 	}
 	if (type == ZFS_NUM_USERQUOTA_PROPS)
 		return (EINVAL);
 	*typep = type;
 
 	isuser = (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_USERUSED);
 	isgroup = (type == ZFS_PROP_GROUPQUOTA || type == ZFS_PROP_GROUPUSED);
 
 	cp = strchr(propname, '@') + 1;
 
 	if (isuser && (pw = getpwnam(cp)) != NULL) {
 		if (zoned && getzoneid() == GLOBAL_ZONEID)
 			return (ENOENT);
 		*ridp = pw->pw_uid;
 	} else if (isgroup && (gr = getgrnam(cp)) != NULL) {
 		if (zoned && getzoneid() == GLOBAL_ZONEID)
 			return (ENOENT);
 		*ridp = gr->gr_gid;
 	} else if (strchr(cp, '@')) {
 #ifdef HAVE_IDMAP
 		/*
 		 * It's a SID name (eg "user@domain") that needs to be
 		 * turned into S-1-domainID-RID.
 		 */
 		directory_error_t e;
 		char *numericsid = NULL;
 		char *end;
 
 		if (zoned && getzoneid() == GLOBAL_ZONEID)
 			return (ENOENT);
 		if (isuser) {
 			e = directory_sid_from_user_name(NULL,
 			    cp, &numericsid);
 		} else {
 			e = directory_sid_from_group_name(NULL,
 			    cp, &numericsid);
 		}
 		if (e != NULL) {
 			directory_error_free(e);
 			return (ENOENT);
 		}
 		if (numericsid == NULL)
 			return (ENOENT);
 		cp = numericsid;
 		(void) strlcpy(domain, cp, domainlen);
 		cp = strrchr(domain, '-');
 		*cp = '\0';
 		cp++;
 
 		errno = 0;
 		*ridp = strtoull(cp, &end, 10);
 		free(numericsid);
 
 		if (errno != 0 || *end != '\0')
 			return (EINVAL);
 #else
 		return (ENOSYS);
 #endif /* HAVE_IDMAP */
 	} else {
 		/* It's a user/group ID (eg "12345"). */
 		uid_t id;
 		char *end;
 		id = strtoul(cp, &end, 10);
 		if (*end != '\0')
 			return (EINVAL);
 		if (id > MAXUID) {
 #ifdef HAVE_IDMAP
 			/* It's an ephemeral ID. */
 			idmap_rid_t rid;
 			char *mapdomain;
 
 			if (idmap_id_to_numeric_domain_rid(id, isuser,
 			    &mapdomain, &rid) != 0)
 				return (ENOENT);
 			(void) strlcpy(domain, mapdomain, domainlen);
 			*ridp = rid;
 #else
 			return (ENOSYS);
 #endif /* HAVE_IDMAP */
 		} else {
 			*ridp = id;
 		}
 	}
 
 	return (0);
 }
 
 static int
 zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue, zfs_userquota_prop_t *typep)
 {
 	int err;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	err = userquota_propname_decode(propname,
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED),
 	    typep, zc.zc_value, sizeof (zc.zc_value), &zc.zc_guid);
 	zc.zc_objset_type = *typep;
 	if (err)
 		return (err);
 
 	err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_USERSPACE_ONE, &zc);
 	if (err)
 		return (err);
 
 	*propvalue = zc.zc_cookie;
 	return (0);
 }
 
 int
 zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue)
 {
 	zfs_userquota_prop_t type;
 
 	return (zfs_prop_get_userquota_common(zhp, propname, propvalue,
 	    &type));
 }
 
 int
 zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal)
 {
 	int err;
 	uint64_t propvalue;
 	zfs_userquota_prop_t type;
 
 	err = zfs_prop_get_userquota_common(zhp, propname, &propvalue,
 	    &type);
 
 	if (err)
 		return (err);
 
 	if (literal) {
 		(void) snprintf(propbuf, proplen, "%llu",
 			       (u_longlong_t)propvalue);
 	} else if (propvalue == 0 &&
 	    (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA)) {
 		(void) strlcpy(propbuf, "none", proplen);
 	} else {
 		zfs_nicenum(propvalue, propbuf, proplen);
 	}
 	return (0);
 }
 
 int
 zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue)
 {
 	int err;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	const char *snapname;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	snapname = strchr(propname, '@') + 1;
 	if (strchr(snapname, '@')) {
 		(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 	} else {
 		/* snapname is the short name, append it to zhp's fsname */
 		char *cp;
 
 		(void) strlcpy(zc.zc_value, zhp->zfs_name,
 		    sizeof (zc.zc_value));
 		cp = strchr(zc.zc_value, '@');
 		if (cp != NULL)
 			*cp = '\0';
 		(void) strlcat(zc.zc_value, "@", sizeof (zc.zc_value));
 		(void) strlcat(zc.zc_value, snapname, sizeof (zc.zc_value));
 	}
 
 	err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_WRITTEN, &zc);
 	if (err)
 		return (err);
 
 	*propvalue = zc.zc_cookie;
 	return (0);
 }
 
 int
 zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal)
 {
 	int err;
 	uint64_t propvalue;
 
 	err = zfs_prop_get_written_int(zhp, propname, &propvalue);
 
 	if (err)
 		return (err);
 
 	if (literal) {
 		(void) snprintf(propbuf, proplen, "%llu", (long long unsigned int)propvalue);
 	} else {
 		zfs_nicenum(propvalue, propbuf, proplen);
 	}
 
 	return (0);
 }
 
-int
-zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
-    uint64_t *usedp)
-{
-	int err;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
-
-	(void) strlcpy(zc.zc_name, lastsnap->zfs_name, sizeof (zc.zc_name));
-	(void) strlcpy(zc.zc_value, firstsnap->zfs_name, sizeof (zc.zc_value));
-
-	err = ioctl(lastsnap->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_SNAPS, &zc);
-	if (err)
-		return (err);
-
-	*usedp = zc.zc_cookie;
-
-	return (0);
-}
-
 /*
  * Returns the name of the given zfs handle.
  */
 const char *
 zfs_get_name(const zfs_handle_t *zhp)
 {
 	return (zhp->zfs_name);
 }
 
 /*
  * Returns the type of the given zfs handle.
  */
 zfs_type_t
 zfs_get_type(const zfs_handle_t *zhp)
 {
 	return (zhp->zfs_type);
 }
 
 /*
  * Is one dataset name a child dataset of another?
  *
  * Needs to handle these cases:
  * Dataset 1	"a/foo"		"a/foo"		"a/foo"		"a/foo"
  * Dataset 2	"a/fo"		"a/foobar"	"a/bar/baz"	"a/foo/bar"
  * Descendant?	No.		No.		No.		Yes.
  */
 static boolean_t
 is_descendant(const char *ds1, const char *ds2)
 {
 	size_t d1len = strlen(ds1);
 
 	/* ds2 can't be a descendant if it's smaller */
 	if (strlen(ds2) < d1len)
 		return (B_FALSE);
 
 	/* otherwise, compare strings and verify that there's a '/' char */
 	return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0));
 }
 
 /*
  * Given a complete name, return just the portion that refers to the parent.
  * Will return -1 if there is no parent (path is just the name of the
  * pool).
  */
 static int
 parent_name(const char *path, char *buf, size_t buflen)
 {
 	char *slashp;
 
 	(void) strlcpy(buf, path, buflen);
 
 	if ((slashp = strrchr(buf, '/')) == NULL)
 		return (-1);
 	*slashp = '\0';
 
 	return (0);
 }
 
 /*
  * If accept_ancestor is false, then check to make sure that the given path has
  * a parent, and that it exists.  If accept_ancestor is true, then find the
  * closest existing ancestor for the given path.  In prefixlen return the
  * length of already existing prefix of the given path.  We also fetch the
  * 'zoned' property, which is used to validate property settings when creating
  * new datasets.
  */
 static int
 check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
     boolean_t accept_ancestor, int *prefixlen)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char parent[ZFS_MAXNAMELEN];
 	char *slash;
 	zfs_handle_t *zhp;
 	char errbuf[1024];
 	uint64_t is_zoned;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot create '%s'"), path);
 
 	/* get parent, and check to see if this is just a pool */
 	if (parent_name(path, parent, sizeof (parent)) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "missing dataset name"));
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	}
 
 	/* check to see if the pool exists */
 	if ((slash = strchr(parent, '/')) == NULL)
 		slash = parent + strlen(parent);
 	(void) strncpy(zc.zc_name, parent, slash - parent);
 	zc.zc_name[slash - parent] = '\0';
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
 	    errno == ENOENT) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "no such pool '%s'"), zc.zc_name);
 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
 	}
 
 	/* check to see if the parent dataset exists */
 	while ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
 		if (errno == ENOENT && accept_ancestor) {
 			/*
 			 * Go deeper to find an ancestor, give up on top level.
 			 */
 			if (parent_name(parent, parent, sizeof (parent)) != 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "no such pool '%s'"), zc.zc_name);
 				return (zfs_error(hdl, EZFS_NOENT, errbuf));
 			}
 		} else if (errno == ENOENT) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "parent does not exist"));
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 		} else
 			return (zfs_standard_error(hdl, errno, errbuf));
 	}
 
 	is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
 	if (zoned != NULL)
 		*zoned = is_zoned;
 
 	/* we are in a non-global zone, but parent is in the global zone */
 	if (getzoneid() != GLOBAL_ZONEID && !is_zoned) {
 		(void) zfs_standard_error(hdl, EPERM, errbuf);
 		zfs_close(zhp);
 		return (-1);
 	}
 
 	/* make sure parent is a filesystem */
 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "parent is not a filesystem"));
 		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 		zfs_close(zhp);
 		return (-1);
 	}
 
 	zfs_close(zhp);
 	if (prefixlen != NULL)
 		*prefixlen = strlen(parent);
 	return (0);
 }
 
 /*
  * Finds whether the dataset of the given type(s) exists.
  */
 boolean_t
 zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types)
 {
 	zfs_handle_t *zhp;
 
 	if (!zfs_validate_name(hdl, path, types, B_FALSE))
 		return (B_FALSE);
 
 	/*
 	 * Try to get stats for the dataset, which will tell us if it exists.
 	 */
 	if ((zhp = make_dataset_handle(hdl, path)) != NULL) {
 		int ds_type = zhp->zfs_type;
 
 		zfs_close(zhp);
 		if (types & ds_type)
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 /*
  * Given a path to 'target', create all the ancestors between
  * the prefixlen portion of the path, and the target itself.
  * Fail if the initial prefixlen-ancestor does not already exist.
  */
 int
 create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
 {
 	zfs_handle_t *h;
 	char *cp;
 	const char *opname;
 
 	/* make sure prefix exists */
 	cp = target + prefixlen;
 	if (*cp != '/') {
 		assert(strchr(cp, '/') == NULL);
 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
 	} else {
 		*cp = '\0';
 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
 		*cp = '/';
 	}
 	if (h == NULL)
 		return (-1);
 	zfs_close(h);
 
 	/*
 	 * Attempt to create, mount, and share any ancestor filesystems,
 	 * up to the prefixlen-long one.
 	 */
 	for (cp = target + prefixlen + 1;
 	    (cp = strchr(cp, '/')); *cp = '/', cp++) {
-		char *logstr;
 
 		*cp = '\0';
 
 		h = make_dataset_handle(hdl, target);
 		if (h) {
 			/* it already exists, nothing to do here */
 			zfs_close(h);
 			continue;
 		}
 
-		logstr = hdl->libzfs_log_str;
-		hdl->libzfs_log_str = NULL;
 		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
 		    NULL) != 0) {
-			hdl->libzfs_log_str = logstr;
 			opname = dgettext(TEXT_DOMAIN, "create");
 			goto ancestorerr;
 		}
 
-		hdl->libzfs_log_str = logstr;
 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
 		if (h == NULL) {
 			opname = dgettext(TEXT_DOMAIN, "open");
 			goto ancestorerr;
 		}
 
 		if (zfs_mount(h, NULL, 0) != 0) {
 			opname = dgettext(TEXT_DOMAIN, "mount");
 			goto ancestorerr;
 		}
 
 		if (zfs_share(h) != 0) {
 			opname = dgettext(TEXT_DOMAIN, "share");
 			goto ancestorerr;
 		}
 
 		zfs_close(h);
 	}
 
 	return (0);
 
 ancestorerr:
 	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 	    "failed to %s ancestor '%s'"), opname, target);
 	return (-1);
 }
 
 /*
  * Creates non-existing ancestors of the given path.
  */
 int
 zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
 {
 	int prefix;
 	char *path_copy;
 	int rc = 0;
 
 	if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0)
 		return (-1);
 
 	if ((path_copy = strdup(path)) != NULL) {
 		rc = create_parents(hdl, path_copy, prefix);
 		free(path_copy);
 	}
 	if (path_copy == NULL || rc != 0)
 		return (-1);
 
 	return (0);
 }
 
 /*
  * Create a new filesystem or volume.
  */
 int
 zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
     nvlist_t *props)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
 	int ret;
 	uint64_t size = 0;
 	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
 	char errbuf[1024];
 	uint64_t zoned;
+	dmu_objset_type_t ost;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), path);
 
 	/* validate the path, taking care to note the extended error message */
 	if (!zfs_validate_name(hdl, path, type, B_TRUE))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	/* validate parents exist */
 	if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0)
 		return (-1);
 
 	/*
 	 * The failure modes when creating a dataset of a different type over
 	 * one that already exists is a little strange.  In particular, if you
 	 * try to create a dataset on top of an existing dataset, the ioctl()
 	 * will return ENOENT, not EEXIST.  To prevent this from happening, we
 	 * first try to see if the dataset exists.
 	 */
-	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
-	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
+	if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset already exists"));
 		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 	}
 
 	if (type == ZFS_TYPE_VOLUME)
-		zc.zc_objset_type = DMU_OST_ZVOL;
+		ost = DMU_OST_ZVOL;
 	else
-		zc.zc_objset_type = DMU_OST_ZFS;
+		ost = DMU_OST_ZFS;
 
 	if (props && (props = zfs_valid_proplist(hdl, type, props,
 	    zoned, NULL, errbuf)) == 0)
 		return (-1);
 
 	if (type == ZFS_TYPE_VOLUME) {
 		/*
 		 * If we are creating a volume, the size and block size must
 		 * satisfy a few restraints.  First, the blocksize must be a
 		 * valid block size between SPA_{MIN,MAX}BLOCKSIZE.  Second, the
 		 * volsize must be a multiple of the block size, and cannot be
 		 * zero.
 		 */
 		if (props == NULL || nvlist_lookup_uint64(props,
 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) {
 			nvlist_free(props);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "missing volume size"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 
 		if ((ret = nvlist_lookup_uint64(props,
 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 		    &blocksize)) != 0) {
 			if (ret == ENOENT) {
 				blocksize = zfs_prop_default_numeric(
 				    ZFS_PROP_VOLBLOCKSIZE);
 			} else {
 				nvlist_free(props);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "missing volume block size"));
 				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 		}
 
 		if (size == 0) {
 			nvlist_free(props);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "volume size cannot be zero"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 
 		if (size % blocksize != 0) {
 			nvlist_free(props);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "volume size must be a multiple of volume block "
 			    "size"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 	}
 
-	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0)
-		return (-1);
-	nvlist_free(props);
-
 	/* create the dataset */
-	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
+	ret = lzc_create(path, ost, props);
+	nvlist_free(props);
 
 	if (ret == 0 && type == ZFS_TYPE_VOLUME) {
 		ret = zvol_create_link(hdl, path);
 		if (ret) {
 			(void) zfs_standard_error(hdl, errno,
 			    dgettext(TEXT_DOMAIN,
 			    "Volume successfully created, but device links "
 			    "were not created"));
-			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 
-	zcmd_free_nvlists(&zc);
-
 	/* check for failure */
 	if (ret != 0) {
 		char parent[ZFS_MAXNAMELEN];
 		(void) parent_name(path, parent, sizeof (parent));
 
 		switch (errno) {
 		case ENOENT:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "no such parent '%s'"), parent);
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case EINVAL:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "parent '%s' is not a filesystem"), parent);
 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 
 		case EDOM:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "volume block size must be power of 2 from "
 			    "%u to %uk"),
 			    (uint_t)SPA_MINBLOCKSIZE,
 			    (uint_t)SPA_MAXBLOCKSIZE >> 10);
 
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded to set this "
 			    "property or value"));
 			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
 #ifdef _ILP32
 		case EOVERFLOW:
 			/*
 			 * This platform can't address a volume this big.
 			 */
 			if (type == ZFS_TYPE_VOLUME)
 				return (zfs_error(hdl, EZFS_VOLTOOBIG,
 				    errbuf));
 #endif
 			/* FALLTHROUGH */
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Destroys the given dataset.  The caller must make sure that the filesystem
  * isn't mounted, and that there are no active dependents. If the file system
  * does not exist this function does nothing.
  */
 int
 zfs_destroy(zfs_handle_t *zhp, boolean_t defer)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (ZFS_IS_VOLUME(zhp)) {
 		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
 			return (-1);
 
 		zc.zc_objset_type = DMU_OST_ZVOL;
 	} else {
 		zc.zc_objset_type = DMU_OST_ZFS;
 	}
 
 	zc.zc_defer_destroy = defer;
 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0 &&
 	    errno != ENOENT) {
 		return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
 		    zhp->zfs_name));
 	}
 
 	remove_mountpoint(zhp);
 
 	return (0);
 }
 
 struct destroydata {
 	nvlist_t *nvl;
 	const char *snapname;
 };
 
 static int
 zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct destroydata *dd = arg;
 	zfs_handle_t *szhp;
 	char name[ZFS_MAXNAMELEN];
 	int rv = 0;
 
 	(void) snprintf(name, sizeof (name),
 	    "%s@%s", zhp->zfs_name, dd->snapname);
 
 	szhp = make_dataset_handle(zhp->zfs_hdl, name);
 	if (szhp) {
 		verify(nvlist_add_boolean(dd->nvl, name) == 0);
 		zfs_close(szhp);
 	}
 
 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
 		(void) zvol_remove_link(zhp->zfs_hdl, name);
 		/*
 		 * NB: this is simply a best-effort.  We don't want to
 		 * return an error, because then we wouldn't visit all
 		 * the volumes.
 		 */
 	}
 
 	rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd);
 	zfs_close(zhp);
 	return (rv);
 }
 
 /*
  * Destroys all snapshots with the given name in zhp & descendants.
  */
 int
 zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
 {
 	int ret;
 	struct destroydata dd = { 0 };
 
 	dd.snapname = snapname;
 	verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0);
 	(void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd);
 
 	if (nvlist_next_nvpair(dd.nvl, NULL) == NULL) {
 		ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
 		    zhp->zfs_name, snapname);
 	} else {
 		ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer);
 	}
 	nvlist_free(dd.nvl);
 	return (ret);
 }
 
 /*
  * Destroys all the snapshots named in the nvlist.  They must be underneath
  * the zhp (either snapshots of it, or snapshots of its descendants).
  */
 int
 zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer)
 {
 	int ret;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	nvlist_t *errlist;
 
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, snaps) != 0)
-		return (-1);
-	zc.zc_defer_destroy = defer;
+	ret = lzc_destroy_snaps(snaps, defer, &errlist);
 
-	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS_NVL, &zc);
 	if (ret != 0) {
-		char errbuf[1024];
-
-		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot destroy snapshots in %s"), zc.zc_name);
-
-		switch (errno) {
-		case EEXIST:
-			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
-			    "snapshot is cloned"));
-			return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
+		nvpair_t *pair;
+		for (pair = nvlist_next_nvpair(errlist, NULL);
+		    pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
+			char errbuf[1024];
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
+			    nvpair_name(pair));
 
-		default:
-			return (zfs_standard_error(zhp->zfs_hdl, errno,
-			    errbuf));
+			switch (fnvpair_value_int32(pair)) {
+			case EEXIST:
+				zfs_error_aux(zhp->zfs_hdl,
+				    dgettext(TEXT_DOMAIN,
+				    "snapshot is cloned"));
+				ret = zfs_error(zhp->zfs_hdl, EZFS_EXISTS,
+				    errbuf);
+				break;
+			default:
+				ret = zfs_standard_error(zhp->zfs_hdl, errno,
+				    errbuf);
+				break;
+			}
 		}
 	}
 
-	return (0);
+	return (ret);
 }
 
 /*
  * Clones the given dataset.  The target must be of the same type as the source.
  */
 int
 zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
 	char parent[ZFS_MAXNAMELEN];
 	int ret;
 	char errbuf[1024];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	zfs_type_t type;
 	uint64_t zoned;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), target);
 
 	/* validate the target/clone name */
 	if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	/* validate parents exist */
 	if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0)
 		return (-1);
 
 	(void) parent_name(target, parent, sizeof (parent));
 
 	/* do the clone */
-	if (ZFS_IS_VOLUME(zhp)) {
-		zc.zc_objset_type = DMU_OST_ZVOL;
-		type = ZFS_TYPE_VOLUME;
-	} else {
-		zc.zc_objset_type = DMU_OST_ZFS;
-		type = ZFS_TYPE_FILESYSTEM;
-	}
 
 	if (props) {
+		zfs_type_t type;
+		if (ZFS_IS_VOLUME(zhp)) {
+			type = ZFS_TYPE_VOLUME;
+		} else {
+			type = ZFS_TYPE_FILESYSTEM;
+		}
 		if ((props = zfs_valid_proplist(hdl, type, props, zoned,
 		    zhp, errbuf)) == NULL)
 			return (-1);
-
-		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
-			nvlist_free(props);
-			return (-1);
-		}
-
-		nvlist_free(props);
 	}
 
-	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
-	(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
-	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc);
-
-	zcmd_free_nvlists(&zc);
+	ret = lzc_clone(target, zhp->zfs_name, props);
+	nvlist_free(props);
 
 	if (ret != 0) {
 		switch (errno) {
 
 		case ENOENT:
 			/*
 			 * The parent doesn't exist.  We should have caught this
 			 * above, but there may a race condition that has since
 			 * destroyed the parent.
 			 *
 			 * At this point, we don't know whether it's the source
 			 * that doesn't exist anymore, or whether the target
 			 * dataset doesn't exist.
 			 */
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "no such parent '%s'"), parent);
 			return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
 
 		case EXDEV:
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "source and target pools differ"));
 			return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
 			    errbuf));
 
 		default:
 			return (zfs_standard_error(zhp->zfs_hdl, errno,
 			    errbuf));
 		}
 	} else if (ZFS_IS_VOLUME(zhp)) {
 		ret = zvol_create_link(zhp->zfs_hdl, target);
 	}
 
 	return (ret);
 }
 
 typedef struct promote_data {
 	char cb_mountpoint[MAXPATHLEN];
 	const char *cb_target;
 	const char *cb_errbuf;
 	uint64_t cb_pivot_txg;
 } promote_data_t;
 
 static int
 promote_snap_cb(zfs_handle_t *zhp, void *data)
 {
 	promote_data_t *pd = data;
 	zfs_handle_t *szhp;
 	char snapname[MAXPATHLEN];
 	int rv = 0;
 
 	/* We don't care about snapshots after the pivot point */
 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	/* Remove the device link if it's a zvol. */
 	if (ZFS_IS_VOLUME(zhp))
 		(void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
 
 	/* Check for conflicting names */
 	(void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
 	(void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
 	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
 	if (szhp != NULL) {
 		zfs_close(szhp);
 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 		    "snapshot name '%s' from origin \n"
 		    "conflicts with '%s' from target"),
 		    zhp->zfs_name, snapname);
 		rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
 	}
 	zfs_close(zhp);
 	return (rv);
 }
 
 static int
 promote_snap_done_cb(zfs_handle_t *zhp, void *data)
 {
 	promote_data_t *pd = data;
 
 	/* We don't care about snapshots after the pivot point */
 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
 		/* Create the device link if it's a zvol. */
 		if (ZFS_IS_VOLUME(zhp))
 			(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
 	}
 
 	zfs_close(zhp);
 	return (0);
 }
 
 /*
  * Promotes the given clone fs to be the clone parent.
  */
 int
 zfs_promote(zfs_handle_t *zhp)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char parent[MAXPATHLEN];
 	char *cp;
 	int ret;
 	zfs_handle_t *pzhp;
 	promote_data_t pd;
 	char errbuf[1024];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot promote '%s'"), zhp->zfs_name);
 
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "snapshots can not be promoted"));
 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 	}
 
 	(void) strlcpy(parent, zhp->zfs_dmustats.dds_origin, sizeof (parent));
 	if (parent[0] == '\0') {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "not a cloned filesystem"));
 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 	}
 	cp = strchr(parent, '@');
 	*cp = '\0';
 
 	/* Walk the snapshots we will be moving */
 	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
 	if (pzhp == NULL)
 		return (-1);
 	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
 	zfs_close(pzhp);
 	pd.cb_target = zhp->zfs_name;
 	pd.cb_errbuf = errbuf;
 	pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
 	if (pzhp == NULL)
 		return (-1);
 	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
 	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
 	ret = zfs_iter_snapshots(pzhp, B_FALSE, promote_snap_cb, &pd);
 	if (ret != 0) {
 		zfs_close(pzhp);
 		return (-1);
 	}
 
 	/* issue the ioctl */
 	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_origin,
 	    sizeof (zc.zc_value));
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
 
 	if (ret != 0) {
 		int save_errno = errno;
 
 		(void) zfs_iter_snapshots(pzhp, B_FALSE, promote_snap_done_cb,
 		    &pd);
 		zfs_close(pzhp);
 
 		switch (save_errno) {
 		case EEXIST:
 			/*
 			 * There is a conflicting snapshot name.  We
 			 * should have caught this above, but they could
 			 * have renamed something in the mean time.
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "conflicting snapshot '%s' from parent '%s'"),
 			    zc.zc_string, parent);
 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, save_errno, errbuf));
 		}
 	} else {
 		(void) zfs_iter_snapshots(zhp, B_FALSE, promote_snap_done_cb,
 		    &pd);
 	}
 
 	zfs_close(pzhp);
 	return (ret);
 }
 
 struct createdata {
 	const char *cd_snapname;
 	int cd_ifexists;
 };
 
 static int
 zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct createdata *cd = arg;
 	int ret;
 
 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
 		char name[MAXPATHLEN];
 
 		(void) strlcpy(name, zhp->zfs_name, sizeof (name));
 		(void) strlcat(name, "@", sizeof (name));
 		(void) strlcat(name, cd->cd_snapname, sizeof (name));
 		(void) zvol_create_link_common(zhp->zfs_hdl, name,
 		    cd->cd_ifexists);
 		/*
 		 * NB: this is simply a best-effort.  We don't want to
 		 * return an error, because then we wouldn't visit all
 		 * the volumes.
 		 */
 	}
 
 	ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
 
 	zfs_close(zhp);
 
 	return (ret);
 }
 
+typedef struct snapdata {
+	nvlist_t *sd_nvl;
+	const char *sd_snapname;
+} snapdata_t;
+
+static int
+zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
+{
+	snapdata_t *sd = arg;
+	char name[ZFS_MAXNAMELEN];
+	int rv = 0;
+
+	(void) snprintf(name, sizeof (name),
+	    "%s@%s", zfs_get_name(zhp), sd->sd_snapname);
+
+	fnvlist_add_boolean(sd->sd_nvl, name);
+
+	rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
+	zfs_close(zhp);
+	return (rv);
+}
+
 /*
- * Takes a snapshot of the given dataset.
+ * Creates snapshots.  The keys in the snaps nvlist are the snapshots to be
+ * created.
  */
 int
-zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
-    nvlist_t *props)
+zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
 {
-	const char *delim;
-	char parent[ZFS_MAXNAMELEN];
-	zfs_handle_t *zhp;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
 	int ret;
 	char errbuf[1024];
+	nvpair_t *elem;
+	nvlist_t *errors;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-	    "cannot snapshot '%s'"), path);
-
-	/* validate the target name */
-	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
-		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+	    "cannot create snapshots "));
 
-	if (props) {
-		if ((props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
-		    props, B_FALSE, NULL, errbuf)) == NULL)
-			return (-1);
+	elem = NULL;
+	while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) {
+		const char *snapname = nvpair_name(elem);
 
-		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
-			nvlist_free(props);
-			return (-1);
+		/* validate the target name */
+		if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT,
+		    B_TRUE)) {
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN,
+			    "cannot create snapshot '%s'"), snapname);
+			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
-
-		nvlist_free(props);
 	}
 
-	/* make sure the parent exists and is of the appropriate type */
-	delim = strchr(path, '@');
-	(void) strncpy(parent, path, delim - path);
-	parent[delim - path] = '\0';
-
-	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
-	    ZFS_TYPE_VOLUME)) == NULL) {
-		zcmd_free_nvlists(&zc);
+	if (props != NULL &&
+	    (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
+	    props, B_FALSE, NULL, errbuf)) == NULL) {
 		return (-1);
 	}
 
-	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
-	(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
-	if (ZFS_IS_VOLUME(zhp))
-		zc.zc_objset_type = DMU_OST_ZVOL;
-	else
-		zc.zc_objset_type = DMU_OST_ZFS;
-	zc.zc_cookie = recursive;
-	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc);
+	ret = lzc_snapshot(snaps, props, &errors);
 
-	zcmd_free_nvlists(&zc);
+	if (ret != 0) {
+		boolean_t printed = B_FALSE;
+		for (elem = nvlist_next_nvpair(errors, NULL);
+		    elem != NULL;
+		    elem = nvlist_next_nvpair(errors, elem)) {
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN,
+			    "cannot create snapshot '%s'"), nvpair_name(elem));
+			(void) zfs_standard_error(hdl,
+			    fnvpair_value_int32(elem), errbuf);
+			printed = B_TRUE;
+		}
+		if (!printed) {
+			switch (ret) {
+			case EXDEV:
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "multiple snapshots of same "
+				    "fs not allowed"));
+				(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
 
-	/*
-	 * if it was recursive, the one that actually failed will be in
-	 * zc.zc_name.
-	 */
-	if (ret != 0)
-		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
-		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
+				break;
+			default:
+				(void) zfs_standard_error(hdl, ret, errbuf);
+			}
+		}
+	} else {
+		zfs_handle_t *zhp;
+		int linktries = 0, linkok = 0, linkfail = 0;
+		nvpair_t *snap;
 
-	if (ret == 0 && recursive) {
-		struct createdata cd;
+		for (snap = nvlist_next_nvpair(snaps, NULL); snap != NULL;
+		    snap = nvlist_next_nvpair(snaps, snap)) {
+			char *cp, *snapname;
 
-		cd.cd_snapname = delim + 1;
-		cd.cd_ifexists = B_FALSE;
-		(void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
-	}
-	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
-		ret = zvol_create_link(zhp->zfs_hdl, path);
-		if (ret != 0) {
-			(void) zfs_standard_error(hdl, errno,
-			    dgettext(TEXT_DOMAIN,
-			    "Volume successfully snapshotted, but device links "
-			    "were not created"));
-			zfs_close(zhp);
-			return (-1);
+			snapname = nvpair_name(snap);
+			cp = strchr(snapname, '@');
+			*cp = '\0';
+
+			if ((zhp = zfs_open(hdl, snapname, ZFS_TYPE_FILESYSTEM |
+				ZFS_TYPE_VOLUME)) != NULL) {
+				if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
+					++linktries;
+					*cp = '@';
+					if (zvol_create_link(zhp->zfs_hdl, nvpair_name(snap)))
+						++linkfail;
+					else
+						++linkok;
+				}
+			}
 		}
 	}
 
-	if (ret != 0)
-		(void) zfs_standard_error(hdl, errno, errbuf);
+	nvlist_free(props);
+	nvlist_free(errors);
+	return (ret);
+}
 
-	zfs_close(zhp);
+int
+zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
+    nvlist_t *props)
+{
+	int ret;
+	snapdata_t sd = { 0 };
+	char fsname[ZFS_MAXNAMELEN];
+	char *cp;
+	zfs_handle_t *zhp;
+	char errbuf[1024];
+
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+	    "cannot snapshot %s"), path);
+
+	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
+ 
+	(void) strlcpy(fsname, path, sizeof (fsname));
+	cp = strchr(fsname, '@');
+	*cp = '\0';
+	sd.sd_snapname = cp + 1;
 
+	if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM |
+	    ZFS_TYPE_VOLUME)) == NULL) {
+		return (-1);
+	}
+
+	verify(nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) == 0);
+	if (recursive) {
+		(void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd);
+	} else {
+		fnvlist_add_boolean(sd.sd_nvl, path);
+	}
+
+	ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props);
+	nvlist_free(sd.sd_nvl);
+	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * Destroy any more recent snapshots.  We invoke this callback on any dependents
  * of the snapshot first.  If the 'cb_dependent' member is non-zero, then this
  * is a dependent and we should just destroy it without checking the transaction
  * group.
  */
 typedef struct rollback_data {
 	const char	*cb_target;		/* the snapshot */
 	uint64_t	cb_create;		/* creation time reference */
 	boolean_t	cb_error;
 	boolean_t	cb_dependent;
 	boolean_t	cb_force;
 } rollback_data_t;
 
 static int
 rollback_destroy(zfs_handle_t *zhp, void *data)
 {
 	rollback_data_t *cbp = data;
 
 	if (!cbp->cb_dependent) {
 		if (strcmp(zhp->zfs_name, cbp->cb_target) != 0 &&
 		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
 		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
 		    cbp->cb_create) {
-			char *logstr;
 
 			cbp->cb_dependent = B_TRUE;
 			cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE,
 			    rollback_destroy, cbp);
 			cbp->cb_dependent = B_FALSE;
 
-			logstr = zhp->zfs_hdl->libzfs_log_str;
-			zhp->zfs_hdl->libzfs_log_str = NULL;
 			cbp->cb_error |= zfs_destroy(zhp, B_FALSE);
-			zhp->zfs_hdl->libzfs_log_str = logstr;
 		}
 	} else {
 		/* We must destroy this clone; first unmount it */
 		prop_changelist_t *clp;
 
 		clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
 		    cbp->cb_force ? MS_FORCE: 0);
 		if (clp == NULL || changelist_prefix(clp) != 0) {
 			cbp->cb_error = B_TRUE;
 			zfs_close(zhp);
 			return (0);
 		}
 		if (zfs_destroy(zhp, B_FALSE) != 0)
 			cbp->cb_error = B_TRUE;
 		else
 			changelist_remove(clp, zhp->zfs_name);
 		(void) changelist_postfix(clp);
 		changelist_free(clp);
 	}
 
 	zfs_close(zhp);
 	return (0);
 }
 
 /*
  * Given a dataset, rollback to a specific snapshot, discarding any
  * data changes since then and making it the active dataset.
  *
  * Any snapshots more recent than the target are destroyed, along with
  * their dependents.
  */
 int
 zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
 {
 	rollback_data_t cb = { 0 };
 	int err;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	boolean_t restore_resv = 0;
 	uint64_t old_volsize = 0, new_volsize;
 	zfs_prop_t resv_prop = { 0 };
 
 	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
 	    zhp->zfs_type == ZFS_TYPE_VOLUME);
 
 	/*
 	 * Destroy all recent snapshots and their dependents.
 	 */
 	cb.cb_force = force;
 	cb.cb_target = snap->zfs_name;
 	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
 	(void) zfs_iter_children(zhp, rollback_destroy, &cb);
 
 	if (cb.cb_error)
 		return (-1);
 
 	/*
 	 * Now that we have verified that the snapshot is the latest,
 	 * rollback to the given snapshot.
 	 */
 
 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
 		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
 			return (-1);
 		if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
 			return (-1);
 		old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 		restore_resv =
 		    (old_volsize == zfs_prop_get_int(zhp, resv_prop));
 	}
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (ZFS_IS_VOLUME(zhp))
 		zc.zc_objset_type = DMU_OST_ZVOL;
 	else
 		zc.zc_objset_type = DMU_OST_ZFS;
 
 	/*
 	 * We rely on zfs_iter_children() to verify that there are no
 	 * newer snapshots for the given dataset.  Therefore, we can
 	 * simply pass the name on to the ioctl() call.  There is still
 	 * an unlikely race condition where the user has taken a
 	 * snapshot since we verified that this was the most recent.
 	 *
 	 */
 	if ((err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_ROLLBACK, &zc)) != 0) {
 		(void) zfs_standard_error_fmt(zhp->zfs_hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
 		    zhp->zfs_name);
 		return (err);
 	}
 
 	/*
 	 * For volumes, if the pre-rollback volsize matched the pre-
 	 * rollback reservation and the volsize has changed then set
 	 * the reservation property to the post-rollback volsize.
 	 * Make a new handle since the rollback closed the dataset.
 	 */
 	if ((zhp->zfs_type == ZFS_TYPE_VOLUME) &&
 	    (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) {
 		if ((err = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name))) {
 			zfs_close(zhp);
 			return (err);
 		}
 		if (restore_resv) {
 			new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 			if (old_volsize != new_volsize)
 				err = zfs_prop_set_int(zhp, resv_prop,
 				    new_volsize);
 		}
 		zfs_close(zhp);
 	}
 	return (err);
 }
 
 /*
  * Renames the given dataset.
  */
 int
 zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive,
     boolean_t force_unmount)
 {
 	int ret;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char *delim;
 	prop_changelist_t *cl = NULL;
 	zfs_handle_t *zhrp = NULL;
 	char *parentname = NULL;
 	char parent[ZFS_MAXNAMELEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[1024];
 
 	/* if we have the same exact name, just return success */
 	if (strcmp(zhp->zfs_name, target) == 0)
 		return (0);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot rename to '%s'"), target);
 
 	/*
 	 * Make sure the target name is valid
 	 */
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
 		if ((strchr(target, '@') == NULL) ||
 		    *target == '@') {
 			/*
 			 * Snapshot target name is abbreviated,
 			 * reconstruct full dataset name
 			 */
 			(void) strlcpy(parent, zhp->zfs_name,
 			    sizeof (parent));
 			delim = strchr(parent, '@');
 			if (strchr(target, '@') == NULL)
 				*(++delim) = '\0';
 			else
 				*delim = '\0';
 			(void) strlcat(parent, target, sizeof (parent));
 			target = parent;
 		} else {
 			/*
 			 * Make sure we're renaming within the same dataset.
 			 */
 			delim = strchr(target, '@');
 			if (strncmp(zhp->zfs_name, target, delim - target)
 			    != 0 || zhp->zfs_name[delim - target] != '@') {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "snapshots must be part of same "
 				    "dataset"));
 				return (zfs_error(hdl, EZFS_CROSSTARGET,
 				    errbuf));
 			}
 		}
 		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	} else {
 		if (recursive) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "recursive rename must be a snapshot"));
 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 		}
 
 		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 		/* validate parents */
 		if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0)
 			return (-1);
 
 		/* make sure we're in the same pool */
 		verify((delim = strchr(target, '/')) != NULL);
 		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
 		    zhp->zfs_name[delim - target] != '/') {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "datasets must be within same pool"));
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 		}
 
 		/* new name cannot be a child of the current dataset name */
 		if (is_descendant(zhp->zfs_name, target)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "New dataset name cannot be a descendant of "
 			    "current dataset name"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
 	}
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
 
 	if (getzoneid() == GLOBAL_ZONEID &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset is used in a non-global zone"));
 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
 	}
 
 	if (recursive) {
 		struct destroydata dd;
 
 		parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
 		if (parentname == NULL) {
 			ret = -1;
 			goto error;
 		}
 		delim = strchr(parentname, '@');
 		*delim = '\0';
 		zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET);
 		if (zhrp == NULL) {
 			ret = -1;
 			goto error;
 		}
 
 		dd.snapname = delim + 1;
 
 		/* We remove any zvol links prior to renaming them */
 		verify(nvlist_alloc(&dd.nvl, NV_UNIQUE_NAME, 0) == 0);
 		ret = zfs_iter_filesystems(zhrp, zfs_check_snap_cb, &dd);
 		nvlist_free(dd.nvl);
 		if (ret) {
 			goto error;
 		}
 	} else {
 		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0,
 		    force_unmount ? MS_FORCE : 0)) == NULL)
 			return (-1);
 
 		if (changelist_haszonedchild(cl)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "child dataset with inherited mountpoint is used "
 			    "in a non-global zone"));
 			(void) zfs_error(hdl, EZFS_ZONED, errbuf);
 			ret = -1;
 			goto error;
 		}
 
 		if ((ret = changelist_prefix(cl)) != 0)
 			goto error;
 	}
 
 	if (ZFS_IS_VOLUME(zhp))
 		zc.zc_objset_type = DMU_OST_ZVOL;
 	else
 		zc.zc_objset_type = DMU_OST_ZFS;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
 
 	zc.zc_cookie = recursive;
 
 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) {
 		/*
 		 * if it was recursive, the one that actually failed will
 		 * be in zc.zc_name
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot rename '%s'"), zc.zc_name);
 
 		if (recursive && errno == EEXIST) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "a child dataset already has a snapshot "
 			    "with the new name"));
 			(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
 		} else {
 			(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
 		}
 
 		/*
 		 * On failure, we still want to remount any filesystems that
 		 * were previously mounted, so we don't alter the system state.
 		 */
 		if (recursive) {
 			struct createdata cd;
 
 			/* only create links for datasets that had existed */
 			cd.cd_snapname = delim + 1;
 			cd.cd_ifexists = B_TRUE;
 			(void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
 			    &cd);
 		} else {
 			(void) changelist_postfix(cl);
 		}
 	} else {
 		if (recursive) {
 			struct createdata cd;
 
 			/* only create links for datasets that had existed */
 			cd.cd_snapname = strchr(target, '@') + 1;
 			cd.cd_ifexists = B_TRUE;
 			ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
 			    &cd);
 		} else {
 			changelist_rename(cl, zfs_get_name(zhp), target);
 			ret = changelist_postfix(cl);
 		}
 	}
 
 error:
 	if (parentname) {
 		free(parentname);
 	}
 	if (zhrp) {
 		zfs_close(zhrp);
 	}
 	if (cl) {
 		changelist_free(cl);
 	}
 	return (ret);
 }
 
 /*
  * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
  * and wait briefly for udev to create the /dev link.
  */
 int
 zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
 {
 	return (zvol_create_link_common(hdl, dataset, B_FALSE));
 }
 
 static int
 zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char path[MAXPATHLEN];
 	int error;
 
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 
 	/*
 	 * Issue the appropriate ioctl.
 	 */
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
 		switch (errno) {
 		case EEXIST:
 			/*
 			 * Silently ignore the case where the link already
 			 * exists.  This allows 'zfs volinit' to be run multiple
 			 * times without errors.
 			 */
 			return (0);
 
 		case ENODEV:
 			/*
 			 * snapdev set to hidden :
 			 *  device creation was not permitted (see zvol.c)
 			 *  ignore error quietly
 			 */
 			return (0);
 
 		case ENOENT:
 			/*
 			 * Dataset does not exist in the kernel.  If we
 			 * don't care (see zfs_rename), then ignore the
 			 * error quietly.
 			 */
 			if (ifexists) {
 				return (0);
 			}
 
 			/* FALLTHROUGH */
 
 		default:
 			return (zfs_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN, "cannot create device links "
 			    "for '%s'"), dataset));
 		}
 	}
 
 	/*
 	 * Wait up to 10 seconds for udev to create the device.
 	 */
 	(void) snprintf(path, sizeof (path), "%s/%s", ZVOL_DIR, dataset);
 	error = zpool_label_disk_wait(path, 10000);
 	if (error)
 		(void) printf(gettext("%s may not be immediately "
 		    "available\n"), path);
 
 	return (0);
 }
 
 /*
  * Remove a minor node for the given zvol and the associated /dev links.
  */
 int
 zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int timeout = 3000; /* in milliseconds */
 	int error = 0;
 	int i;
 
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 
 	/*
 	 * Due to concurrent updates by udev the device may be reported as
 	 * busy.  In this case don't immediately fail.  Instead briefly delay
 	 * and retry the ioctl() which is now likely to succeed.  If unable
 	 * remove the link after timeout milliseconds return the failure.
 	 */
 	for (i = 0; i < timeout; i++) {
 		error = ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
 		if (error && errno == EBUSY) {
 			usleep(1000);
 			continue;
 		} else {
 			break;
 		}
 	}
 
 	if (error) {
 		switch (errno) {
 		case ENXIO:
 			/*
 			 * Silently ignore the case where the link no longer
 			 * exists, so that 'zfs volfini' can be run multiple
 			 * times without errors.
 			 */
 			return (0);
 
 		default:
 			return (zfs_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN, "cannot remove device "
 			    "links for '%s': %s"), dataset, strerror(errno)));
 		}
 	}
 
 	return (0);
 }
 
 nvlist_t *
 zfs_get_user_props(zfs_handle_t *zhp)
 {
 	return (zhp->zfs_user_props);
 }
 
 /*
  * This function is used by 'zfs list' to determine the exact set of columns to
  * display, and their maximum widths.  This does two main things:
  *
  *      - If this is a list of all properties, then expand the list to include
  *        all native properties, and set a flag so that for each dataset we look
  *        for new unique user properties and add them to the list.
  *
  *      - For non fixed-width properties, keep track of the maximum width seen
  *        so that we can size the column appropriately. If the user has
  *        requested received property values, we also need to compute the width
  *        of the RECEIVED column.
  */
 int
 zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zprop_list_t *entry;
 	zprop_list_t **last, **start;
 	nvlist_t *userprops, *propval;
 	nvpair_t *elem;
 	char *strval;
 	char buf[ZFS_MAXPROPLEN];
 
 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0)
 		return (-1);
 
 	userprops = zfs_get_user_props(zhp);
 
 	entry = *plp;
 	if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) {
 		/*
 		 * Go through and add any user properties as necessary.  We
 		 * start by incrementing our list pointer to the first
 		 * non-native property.
 		 */
 		start = plp;
 		while (*start != NULL) {
 			if ((*start)->pl_prop == ZPROP_INVAL)
 				break;
 			start = &(*start)->pl_next;
 		}
 
 		elem = NULL;
 		while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) {
 			/*
 			 * See if we've already found this property in our list.
 			 */
 			for (last = start; *last != NULL;
 			    last = &(*last)->pl_next) {
 				if (strcmp((*last)->pl_user_prop,
 				    nvpair_name(elem)) == 0)
 					break;
 			}
 
 			if (*last == NULL) {
 				if ((entry = zfs_alloc(hdl,
 				    sizeof (zprop_list_t))) == NULL ||
 				    ((entry->pl_user_prop = zfs_strdup(hdl,
 				    nvpair_name(elem)))) == NULL) {
 					free(entry);
 					return (-1);
 				}
 
 				entry->pl_prop = ZPROP_INVAL;
 				entry->pl_width = strlen(nvpair_name(elem));
 				entry->pl_all = B_TRUE;
 				*last = entry;
 			}
 		}
 	}
 
 	/*
 	 * Now go through and check the width of any non-fixed columns
 	 */
 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
 		if (entry->pl_fixed)
 			continue;
 
 		if (entry->pl_prop != ZPROP_INVAL) {
 			if (zfs_prop_get(zhp, entry->pl_prop,
 			    buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) {
 				if (strlen(buf) > entry->pl_width)
 					entry->pl_width = strlen(buf);
 			}
 			if (received && zfs_prop_get_recvd(zhp,
 			    zfs_prop_to_name(entry->pl_prop),
 			    buf, sizeof (buf), B_FALSE) == 0)
 				if (strlen(buf) > entry->pl_recvd_width)
 					entry->pl_recvd_width = strlen(buf);
 		} else {
 			if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop,
 			    &propval) == 0) {
 				verify(nvlist_lookup_string(propval,
 				    ZPROP_VALUE, &strval) == 0);
 				if (strlen(strval) > entry->pl_width)
 					entry->pl_width = strlen(strval);
 			}
 			if (received && zfs_prop_get_recvd(zhp,
 			    entry->pl_user_prop,
 			    buf, sizeof (buf), B_FALSE) == 0)
 				if (strlen(buf) > entry->pl_recvd_width)
 					entry->pl_recvd_width = strlen(buf);
 		}
 	}
 
 	return (0);
 }
 
 void
 zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
 {
 	nvpair_t *curr;
 
 	/*
 	 * Keep a reference to the props-table against which we prune the
 	 * properties.
 	 */
 	zhp->zfs_props_table = props;
 
 	curr = nvlist_next_nvpair(zhp->zfs_props, NULL);
 
 	while (curr) {
 		zfs_prop_t zfs_prop = zfs_name_to_prop(nvpair_name(curr));
 		nvpair_t *next = nvlist_next_nvpair(zhp->zfs_props, curr);
 
 		/*
 		 * User properties will result in ZPROP_INVAL, and since we
 		 * only know how to prune standard ZFS properties, we always
 		 * leave these in the list.  This can also happen if we
 		 * encounter an unknown DSL property (when running older
 		 * software, for example).
 		 */
 		if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE)
 			(void) nvlist_remove(zhp->zfs_props,
 			    nvpair_name(curr), nvpair_type(curr));
 		curr = next;
 	}
 }
 
 static int
 zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path,
     zfs_smb_acl_op_t cmd, char *resource1, char *resource2)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	nvlist_t *nvlist = NULL;
 	int error;
 
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value));
 	zc.zc_cookie = (uint64_t)cmd;
 
 	if (cmd == ZFS_SMB_ACL_RENAME) {
 		if (nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0) != 0) {
 			(void) no_memory(hdl);
 			return (-1);
 		}
 	}
 
 	switch (cmd) {
 	case ZFS_SMB_ACL_ADD:
 	case ZFS_SMB_ACL_REMOVE:
 		(void) strlcpy(zc.zc_string, resource1, sizeof (zc.zc_string));
 		break;
 	case ZFS_SMB_ACL_RENAME:
 		if (nvlist_add_string(nvlist, ZFS_SMB_ACL_SRC,
 		    resource1) != 0) {
 				(void) no_memory(hdl);
 				return (-1);
 		}
 		if (nvlist_add_string(nvlist, ZFS_SMB_ACL_TARGET,
 		    resource2) != 0) {
 				(void) no_memory(hdl);
 				return (-1);
 		}
 		if (zcmd_write_src_nvlist(hdl, &zc, nvlist) != 0) {
 			nvlist_free(nvlist);
 			return (-1);
 		}
 		break;
 	case ZFS_SMB_ACL_PURGE:
 		break;
 	default:
 		return (-1);
 	}
 	error = ioctl(hdl->libzfs_fd, ZFS_IOC_SMB_ACL, &zc);
 	if (nvlist)
 		nvlist_free(nvlist);
 	return (error);
 }
 
 int
 zfs_smb_acl_add(libzfs_handle_t *hdl, char *dataset,
     char *path, char *resource)
 {
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_ADD,
 	    resource, NULL));
 }
 
 int
 zfs_smb_acl_remove(libzfs_handle_t *hdl, char *dataset,
     char *path, char *resource)
 {
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_REMOVE,
 	    resource, NULL));
 }
 
 int
 zfs_smb_acl_purge(libzfs_handle_t *hdl, char *dataset, char *path)
 {
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_PURGE,
 	    NULL, NULL));
 }
 
 int
 zfs_smb_acl_rename(libzfs_handle_t *hdl, char *dataset, char *path,
     char *oldname, char *newname)
 {
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_RENAME,
 	    oldname, newname));
 }
 
 int
 zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
     zfs_userspace_cb_t func, void *arg)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	zfs_useracct_t buf[100];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	int ret;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	zc.zc_objset_type = type;
 	zc.zc_nvlist_dst = (uintptr_t)buf;
 
 	for (;;) {
 		zfs_useracct_t *zua = buf;
 
 		zc.zc_nvlist_dst_size = sizeof (buf);
 		if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) {
 			char errbuf[ZFS_MAXNAMELEN + 32];
 
 			(void) snprintf(errbuf, sizeof (errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "cannot get used/quota for %s"), zc.zc_name);
 			return (zfs_standard_error_fmt(hdl, errno, errbuf));
 		}
 		if (zc.zc_nvlist_dst_size == 0)
 			break;
 
 		while (zc.zc_nvlist_dst_size > 0) {
 			if ((ret = func(arg, zua->zu_domain, zua->zu_rid,
 			    zua->zu_space)) != 0)
 				return (ret);
 			zua++;
 			zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t);
 		}
 	}
 
 	return (0);
 }
 
 int
 zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
     boolean_t recursive, boolean_t temphold, boolean_t enoent_ok,
     int cleanup_fd, uint64_t dsobj, uint64_t createtxg)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	ASSERT(!recursive || dsobj == 0);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 	if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
 	    >= sizeof (zc.zc_string))
 		return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
 	zc.zc_cookie = recursive;
 	zc.zc_temphold = temphold;
 	zc.zc_cleanup_fd = cleanup_fd;
 	zc.zc_sendobj = dsobj;
 	zc.zc_createtxg = createtxg;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
 		char errbuf[ZFS_MAXNAMELEN+32];
 
 		/*
 		 * if it was recursive, the one that actually failed will be in
 		 * zc.zc_name.
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot hold '%s@%s'"), zc.zc_name, snapname);
 		switch (errno) {
 		case E2BIG:
 			/*
 			 * Temporary tags wind up having the ds object id
 			 * prepended. So even if we passed the length check
 			 * above, it's still possible for the tag to wind
 			 * up being slightly too long.
 			 */
 			return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf));
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
 		case EINVAL:
 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 		case EEXIST:
 			return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
 		case ENOENT:
 			if (enoent_ok)
 				return (ENOENT);
 			/* FALLTHROUGH */
 		default:
 			return (zfs_standard_error_fmt(hdl, errno, errbuf));
 		}
 	}
 
 	return (0);
 }
 
 int
 zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
     boolean_t recursive)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 	if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
 	    >= sizeof (zc.zc_string))
 		return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
 	zc.zc_cookie = recursive;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
 		char errbuf[ZFS_MAXNAMELEN+32];
 
 		/*
 		 * if it was recursive, the one that actually failed will be in
 		 * zc.zc_name.
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot release '%s' from '%s@%s'"), tag, zc.zc_name,
 		    snapname);
 		switch (errno) {
 		case ESRCH:
 			return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
 		case EINVAL:
 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 		default:
 			return (zfs_standard_error_fmt(hdl, errno, errbuf));
 		}
 	}
 
 	return (0);
 }
 
 int
 zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	int nvsz = 2048;
 	void *nvbuf;
 	int err = 0;
 	char errbuf[ZFS_MAXNAMELEN+32];
 
 	assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
 	    zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
 
 tryagain:
 
 	nvbuf = malloc(nvsz);
 	if (nvbuf == NULL) {
 		err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
 		goto out;
 	}
 
 	zc.zc_nvlist_dst_size = nvsz;
 	zc.zc_nvlist_dst = (uintptr_t)nvbuf;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
 
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"),
 		    zc.zc_name);
 		switch (errno) {
 		case ENOMEM:
 			free(nvbuf);
 			nvsz = zc.zc_nvlist_dst_size;
 			goto tryagain;
 
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EINVAL:
 			err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		case ENOENT:
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
 			err = zfs_standard_error_fmt(hdl, errno, errbuf);
 			break;
 		}
 	} else {
 		/* success */
 		int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
 		if (rc) {
 			(void) snprintf(errbuf, sizeof (errbuf), dgettext(
 			    TEXT_DOMAIN, "cannot get permissions on '%s'"),
 			    zc.zc_name);
 			err = zfs_standard_error_fmt(hdl, rc, errbuf);
 		}
 	}
 
 	free(nvbuf);
 out:
 	return (err);
 }
 
 int
 zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char *nvbuf;
 	char errbuf[ZFS_MAXNAMELEN+32];
 	size_t nvsz;
 	int err;
 
 	assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
 	    zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
 
 	err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE);
 	assert(err == 0);
 
 	nvbuf = malloc(nvsz);
 
 	err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0);
 	assert(err == 0);
 
 	zc.zc_nvlist_src_size = nvsz;
 	zc.zc_nvlist_src = (uintptr_t)nvbuf;
 	zc.zc_perm_action = un;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"),
 		    zc.zc_name);
 		switch (errno) {
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EINVAL:
 			err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		case ENOENT:
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
 			err = zfs_standard_error_fmt(hdl, errno, errbuf);
 			break;
 		}
 	}
 
 	free(nvbuf);
 
 	return (err);
 }
 
 int
 zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	int nvsz = 2048;
 	void *nvbuf;
 	int err = 0;
 	char errbuf[ZFS_MAXNAMELEN+32];
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 
 tryagain:
 
 	nvbuf = malloc(nvsz);
 	if (nvbuf == NULL) {
 		err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
 		goto out;
 	}
 
 	zc.zc_nvlist_dst_size = nvsz;
 	zc.zc_nvlist_dst = (uintptr_t)nvbuf;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
 		    zc.zc_name);
 		switch (errno) {
 		case ENOMEM:
 			free(nvbuf);
 			nvsz = zc.zc_nvlist_dst_size;
 			goto tryagain;
 
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EINVAL:
 			err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		case ENOENT:
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
 			err = zfs_standard_error_fmt(hdl, errno, errbuf);
 			break;
 		}
 	} else {
 		/* success */
 		int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
 		if (rc) {
 			(void) snprintf(errbuf, sizeof (errbuf),
 			    dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
 			    zc.zc_name);
 			err = zfs_standard_error_fmt(hdl, rc, errbuf);
 		}
 	}
 
 	free(nvbuf);
 out:
 	return (err);
 }
 
 uint64_t
 zvol_volsize_to_reservation(uint64_t volsize, nvlist_t *props)
 {
 	uint64_t numdb;
 	uint64_t nblocks, volblocksize;
 	int ncopies;
 	char *strval;
 
 	if (nvlist_lookup_string(props,
 	    zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0)
 		ncopies = atoi(strval);
 	else
 		ncopies = 1;
 	if (nvlist_lookup_uint64(props,
 	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 	    &volblocksize) != 0)
 		volblocksize = ZVOL_DEFAULT_BLOCKSIZE;
 	nblocks = volsize/volblocksize;
 	/* start with metadnode L0-L6 */
 	numdb = 7;
 	/* calculate number of indirects */
 	while (nblocks > 1) {
 		nblocks += DNODES_PER_LEVEL - 1;
 		nblocks /= DNODES_PER_LEVEL;
 		numdb += nblocks;
 	}
 	numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1);
 	volsize *= ncopies;
 	/*
 	 * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't
 	 * compressed, but in practice they compress down to about
 	 * 1100 bytes
 	 */
 	numdb *= 1ULL << DN_MAX_INDBLKSHIFT;
 	volsize += numdb;
 	return (volsize);
 }
diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c
index 77d5a09ec63c..d8ef6ff025b1 100644
--- a/lib/libzfs/libzfs_diff.c
+++ b/lib/libzfs/libzfs_diff.c
@@ -1,830 +1,830 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
  * zfs diff support
  */
 #include <ctype.h>
 #include <errno.h>
 #include <libintl.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <attr.h>
 #include <stddef.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stropts.h>
 #include <pthread.h>
 #include <sys/zfs_ioctl.h>
 #include <libzfs.h>
 #include "libzfs_impl.h"
 
 #define	ZDIFF_SNAPDIR		"/.zfs/snapshot/"
 #define	ZDIFF_SHARESDIR 	"/.zfs/shares/"
 #define	ZDIFF_PREFIX		"zfs-diff-%d"
 
 #define	ZDIFF_ADDED	'+'
 #define	ZDIFF_MODIFIED	'M'
 #define	ZDIFF_REMOVED	'-'
 #define	ZDIFF_RENAMED	'R'
 
 static boolean_t
 do_name_cmp(const char *fpath, const char *tpath)
 {
 	char *fname, *tname;
 	fname = strrchr(fpath, '/') + 1;
 	tname = strrchr(tpath, '/') + 1;
 	return (strcmp(fname, tname) == 0);
 }
 
 typedef struct differ_info {
 	zfs_handle_t *zhp;
 	char *fromsnap;
 	char *frommnt;
 	char *tosnap;
 	char *tomnt;
 	char *ds;
 	char *dsmnt;
 	char *tmpsnap;
 	char errbuf[1024];
 	boolean_t isclone;
 	boolean_t scripted;
 	boolean_t classify;
 	boolean_t timestamped;
 	uint64_t shares;
 	int zerr;
 	int cleanupfd;
 	int outputfd;
 	int datafd;
 } differ_info_t;
 
 /*
  * Given a {dsname, object id}, get the object path
  */
 static int
 get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj,
     char *pn, int maxlen, zfs_stat_t *sb)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int error;
 
 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
 	zc.zc_obj = obj;
 
 	errno = 0;
 	error = ioctl(di->zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_STATS, &zc);
 	di->zerr = errno;
 
 	/* we can get stats even if we failed to get a path */
 	(void) memcpy(sb, &zc.zc_stat, sizeof (zfs_stat_t));
 	if (error == 0) {
 		ASSERT(di->zerr == 0);
 		(void) strlcpy(pn, zc.zc_value, maxlen);
 		return (0);
 	}
 
 	if (di->zerr == EPERM) {
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "The sys_config privilege or diff delegated permission "
 		    "is needed\nto discover path names"));
 		return (-1);
 	} else {
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Unable to determine path or stats for "
 		    "object %lld in %s"), (longlong_t)obj, dsname);
 		return (-1);
 	}
 }
 
 /*
  * stream_bytes
  *
  * Prints a file name out a character at a time.  If the character is
  * not in the range of what we consider "printable" ASCII, display it
  * as an escaped 3-digit octal value.  ASCII values less than a space
  * are all control characters and we declare the upper end as the
  * DELete character.  This also is the last 7-bit ASCII character.
  * We choose to treat all 8-bit ASCII as not printable for this
  * application.
  */
 static void
 stream_bytes(FILE *fp, const char *string)
 {
 	while (*string) {
 		if (*string > ' ' && *string != '\\' && *string < '\177')
 			(void) fprintf(fp, "%c", *string++);
 		else
 			(void) fprintf(fp, "\\%03o", (unsigned char)*string++);
 	}
 }
 
 static void
 print_what(FILE *fp, mode_t what)
 {
 	char symbol;
 
 	switch (what & S_IFMT) {
 	case S_IFBLK:
 		symbol = 'B';
 		break;
 	case S_IFCHR:
 		symbol = 'C';
 		break;
 	case S_IFDIR:
 		symbol = '/';
 		break;
 #ifdef S_IFDOOR
 	case S_IFDOOR:
 		symbol = '>';
 		break;
 #endif
 	case S_IFIFO:
 		symbol = '|';
 		break;
 	case S_IFLNK:
 		symbol = '@';
 		break;
 #ifdef S_IFPORT
 	case S_IFPORT:
 		symbol = 'P';
 		break;
 #endif
 	case S_IFSOCK:
 		symbol = '=';
 		break;
 	case S_IFREG:
 		symbol = 'F';
 		break;
 	default:
 		symbol = '?';
 		break;
 	}
 	(void) fprintf(fp, "%c", symbol);
 }
 
 static void
 print_cmn(FILE *fp, differ_info_t *di, const char *file)
 {
 	stream_bytes(fp, di->dsmnt);
 	stream_bytes(fp, file);
 }
 
 static void
 print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new,
     zfs_stat_t *isb)
 {
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
 	(void) fprintf(fp, "%c\t", ZDIFF_RENAMED);
 	if (di->classify) {
 		print_what(fp, isb->zs_mode);
 		(void) fprintf(fp, "\t");
 	}
 	print_cmn(fp, di, old);
 	if (di->scripted)
 		(void) fprintf(fp, "\t");
 	else
 		(void) fprintf(fp, " -> ");
 	print_cmn(fp, di, new);
 	(void) fprintf(fp, "\n");
 }
 
 static void
 print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file,
     zfs_stat_t *isb)
 {
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
 	(void) fprintf(fp, "%c\t", ZDIFF_MODIFIED);
 	if (di->classify) {
 		print_what(fp, isb->zs_mode);
 		(void) fprintf(fp, "\t");
 	}
 	print_cmn(fp, di, file);
 	(void) fprintf(fp, "\t(%+d)", delta);
 	(void) fprintf(fp, "\n");
 }
 
 static void
 print_file(FILE *fp, differ_info_t *di, char type, const char *file,
     zfs_stat_t *isb)
 {
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
 	(void) fprintf(fp, "%c\t", type);
 	if (di->classify) {
 		print_what(fp, isb->zs_mode);
 		(void) fprintf(fp, "\t");
 	}
 	print_cmn(fp, di, file);
 	(void) fprintf(fp, "\n");
 }
 
 static int
 write_inuse_diffs_one(FILE *fp, differ_info_t *di, uint64_t dobj)
 {
 	struct zfs_stat fsb, tsb;
 	boolean_t same_name;
 	mode_t fmode, tmode;
 	char fobjname[MAXPATHLEN], tobjname[MAXPATHLEN];
 	int fobjerr, tobjerr;
 	int change;
 
 	if (dobj == di->shares)
 		return (0);
 
 	/*
 	 * Check the from and to snapshots for info on the object. If
 	 * we get ENOENT, then the object just didn't exist in that
 	 * snapshot.  If we get ENOTSUP, then we tried to get
 	 * info on a non-ZPL object, which we don't care about anyway.
 	 */
 	fobjerr = get_stats_for_obj(di, di->fromsnap, dobj, fobjname,
 	    MAXPATHLEN, &fsb);
 	if (fobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP)
 		return (-1);
 
 	tobjerr = get_stats_for_obj(di, di->tosnap, dobj, tobjname,
 	    MAXPATHLEN, &tsb);
 	if (tobjerr && di->zerr != ENOENT && di->zerr != ENOTSUP)
 		return (-1);
 
 	/*
 	 * Unallocated object sharing the same meta dnode block
 	 */
 	if (fobjerr && tobjerr) {
 		ASSERT(di->zerr == ENOENT || di->zerr == ENOTSUP);
 		di->zerr = 0;
 		return (0);
 	}
 
 	di->zerr = 0; /* negate get_stats_for_obj() from side that failed */
 	fmode = fsb.zs_mode & S_IFMT;
 	tmode = tsb.zs_mode & S_IFMT;
 	if (fmode == S_IFDIR || tmode == S_IFDIR || fsb.zs_links == 0 ||
 	    tsb.zs_links == 0)
 		change = 0;
 	else
 		change = tsb.zs_links - fsb.zs_links;
 
 	if (fobjerr) {
 		if (change) {
 			print_link_change(fp, di, change, tobjname, &tsb);
 			return (0);
 		}
 		print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb);
 		return (0);
 	} else if (tobjerr) {
 		if (change) {
 			print_link_change(fp, di, change, fobjname, &fsb);
 			return (0);
 		}
 		print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb);
 		return (0);
 	}
 
 	if (fmode != tmode && fsb.zs_gen == tsb.zs_gen)
 		tsb.zs_gen++;	/* Force a generational difference */
 	same_name = do_name_cmp(fobjname, tobjname);
 
 	/* Simple modification or no change */
 	if (fsb.zs_gen == tsb.zs_gen) {
 		/* No apparent changes.  Could we assert !this?  */
 		if (fsb.zs_ctime[0] == tsb.zs_ctime[0] &&
 		    fsb.zs_ctime[1] == tsb.zs_ctime[1])
 			return (0);
 		if (change) {
 			print_link_change(fp, di, change,
 			    change > 0 ? fobjname : tobjname, &tsb);
 		} else if (same_name) {
 			print_file(fp, di, ZDIFF_MODIFIED, fobjname, &tsb);
 		} else {
 			print_rename(fp, di, fobjname, tobjname, &tsb);
 		}
 		return (0);
 	} else {
 		/* file re-created or object re-used */
 		print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb);
 		print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb);
 		return (0);
 	}
 }
 
 static int
 write_inuse_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr)
 {
 	uint64_t o;
 	int err;
 
 	for (o = dr->ddr_first; o <= dr->ddr_last; o++) {
 		if ((err = write_inuse_diffs_one(fp, di, o)))
 			return (err);
 	}
 	return (0);
 }
 
 static int
 describe_free(FILE *fp, differ_info_t *di, uint64_t object, char *namebuf,
     int maxlen)
 {
 	struct zfs_stat sb;
 
 	if (get_stats_for_obj(di, di->fromsnap, object, namebuf,
 	    maxlen, &sb) != 0) {
 		/* Let it slide, if in the delete queue on from side */
 		if (di->zerr == ENOENT && sb.zs_links == 0) {
 			di->zerr = 0;
 			return (0);
 		}
 		return (-1);
 	}
 
 	print_file(fp, di, ZDIFF_REMOVED, namebuf, &sb);
 	return (0);
 }
 
 static int
 write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *lhdl = di->zhp->zfs_hdl;
 	char fobjname[MAXPATHLEN];
 
 	(void) strlcpy(zc.zc_name, di->fromsnap, sizeof (zc.zc_name));
 	zc.zc_obj = dr->ddr_first - 1;
 
 	ASSERT(di->zerr == 0);
 
 	while (zc.zc_obj < dr->ddr_last) {
 		int err;
 
 		err = ioctl(lhdl->libzfs_fd, ZFS_IOC_NEXT_OBJ, &zc);
 		if (err == 0) {
 			if (zc.zc_obj == di->shares) {
 				zc.zc_obj++;
 				continue;
 			}
 			if (zc.zc_obj > dr->ddr_last) {
 				break;
 			}
 			err = describe_free(fp, di, zc.zc_obj, fobjname,
 			    MAXPATHLEN);
 			if (err)
 				break;
 		} else if (errno == ESRCH) {
 			break;
 		} else {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "next allocated object (> %lld) find failure"),
 			    (longlong_t)zc.zc_obj);
 			di->zerr = errno;
 			break;
 		}
 	}
 	if (di->zerr)
 		return (-1);
 	return (0);
 }
 
 static void *
 differ(void *arg)
 {
 	differ_info_t *di = arg;
 	dmu_diff_record_t dr;
 	FILE *ofp;
 	int err = 0;
 
 	if ((ofp = fdopen(di->outputfd, "w")) == NULL) {
 		di->zerr = errno;
 		strncpy(di->errbuf, strerror(errno), sizeof (di->errbuf));
 		(void) close(di->datafd);
 		return ((void *)-1);
 	}
 
 	for (;;) {
 		char *cp = (char *)&dr;
 		int len = sizeof (dr);
 		int rv;
 
 		do {
 			rv = read(di->datafd, cp, len);
 			cp += rv;
 			len -= rv;
 		} while (len > 0 && rv > 0);
 
 		if (rv < 0 || (rv == 0 && len != sizeof (dr))) {
 			di->zerr = EPIPE;
 			break;
 		} else if (rv == 0) {
 			/* end of file at a natural breaking point */
 			break;
 		}
 
 		switch (dr.ddr_type) {
 		case DDR_FREE:
 			err = write_free_diffs(ofp, di, &dr);
 			break;
 		case DDR_INUSE:
 			err = write_inuse_diffs(ofp, di, &dr);
 			break;
 		default:
 			di->zerr = EPIPE;
 			break;
 		}
 
 		if (err || di->zerr)
 			break;
 	}
 
 	(void) fclose(ofp);
 	(void) close(di->datafd);
 	if (err)
 		return ((void *)-1);
 	if (di->zerr) {
 		ASSERT(di->zerr == EINVAL);
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Internal error: bad data from diff IOCTL"));
 		return ((void *)-1);
 	}
 	return ((void *)0);
 }
 
 static int
 find_shares_object(differ_info_t *di)
 {
 	char fullpath[MAXPATHLEN];
 	struct stat64 sb = { 0 };
 
 	(void) strlcpy(fullpath, di->dsmnt, MAXPATHLEN);
 	(void) strlcat(fullpath, ZDIFF_SHARESDIR, MAXPATHLEN);
 
 	if (stat64(fullpath, &sb) != 0) {
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN, "Cannot stat %s"), fullpath);
 		return (zfs_error(di->zhp->zfs_hdl, EZFS_DIFF, di->errbuf));
 	}
 
 	di->shares = (uint64_t)sb.st_ino;
 	return (0);
 }
 
 static int
 make_temp_snapshot(differ_info_t *di)
 {
 	libzfs_handle_t *hdl = di->zhp->zfs_hdl;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	(void) snprintf(zc.zc_value, sizeof (zc.zc_value),
 	    ZDIFF_PREFIX, getpid());
 	(void) strlcpy(zc.zc_name, di->ds, sizeof (zc.zc_name));
 	zc.zc_cleanup_fd = di->cleanupfd;
 
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_TMP_SNAPSHOT, &zc) != 0) {
 		int err = errno;
 		if (err == EPERM) {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
 			    dgettext(TEXT_DOMAIN, "The diff delegated "
 			    "permission is needed in order\nto create a "
 			    "just-in-time snapshot for diffing\n"));
 			return (zfs_error(hdl, EZFS_DIFF, di->errbuf));
 		} else {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
 			    dgettext(TEXT_DOMAIN, "Cannot create just-in-time "
 			    "snapshot of '%s'"), zc.zc_name);
 			return (zfs_standard_error(hdl, err, di->errbuf));
 		}
 	}
 
 	di->tmpsnap = zfs_strdup(hdl, zc.zc_value);
 	di->tosnap = zfs_asprintf(hdl, "%s@%s", di->ds, di->tmpsnap);
 	return (0);
 }
 
 static void
 teardown_differ_info(differ_info_t *di)
 {
 	free(di->ds);
 	free(di->dsmnt);
 	free(di->fromsnap);
 	free(di->frommnt);
 	free(di->tosnap);
 	free(di->tmpsnap);
 	free(di->tomnt);
 	(void) close(di->cleanupfd);
 }
 
 static int
 get_snapshot_names(differ_info_t *di, const char *fromsnap,
     const char *tosnap)
 {
 	libzfs_handle_t *hdl = di->zhp->zfs_hdl;
 	char *atptrf = NULL;
 	char *atptrt = NULL;
 	int fdslen, fsnlen;
 	int tdslen, tsnlen;
 
 	/*
 	 * Can accept
 	 *    dataset@snap1
 	 *    dataset@snap1 dataset@snap2
 	 *    dataset@snap1 @snap2
 	 *    dataset@snap1 dataset
 	 *    @snap1 dataset@snap2
 	 */
 	if (tosnap == NULL) {
 		/* only a from snapshot given, must be valid */
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Badly formed snapshot name %s"), fromsnap);
 
 		if (!zfs_validate_name(hdl, fromsnap, ZFS_TYPE_SNAPSHOT,
 		    B_FALSE)) {
 			return (zfs_error(hdl, EZFS_INVALIDNAME,
 			    di->errbuf));
 		}
 
 		atptrf = strchr(fromsnap, '@');
 		ASSERT(atptrf != NULL);
 		fdslen = atptrf - fromsnap;
 
 		di->fromsnap = zfs_strdup(hdl, fromsnap);
 		di->ds = zfs_strdup(hdl, fromsnap);
 		di->ds[fdslen] = '\0';
 
 		/* the to snap will be a just-in-time snap of the head */
 		return (make_temp_snapshot(di));
 	}
 
 	(void) snprintf(di->errbuf, sizeof (di->errbuf),
 	    dgettext(TEXT_DOMAIN,
 	    "Unable to determine which snapshots to compare"));
 
 	atptrf = strchr(fromsnap, '@');
 	atptrt = strchr(tosnap, '@');
 	fdslen = atptrf ? atptrf - fromsnap : strlen(fromsnap);
 	tdslen = atptrt ? atptrt - tosnap : strlen(tosnap);
 	fsnlen = strlen(fromsnap) - fdslen;	/* includes @ sign */
 	tsnlen = strlen(tosnap) - tdslen;	/* includes @ sign */
 
 	if (fsnlen <= 1 || tsnlen == 1 || (fdslen == 0 && tdslen == 0) ||
 	    (fsnlen == 0 && tsnlen == 0)) {
 		return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf));
 	} else if ((fdslen > 0 && tdslen > 0) &&
 	    ((tdslen != fdslen || strncmp(fromsnap, tosnap, fdslen) != 0))) {
 		/*
 		 * not the same dataset name, might be okay if
 		 * tosnap is a clone of a fromsnap descendant.
 		 */
 		char origin[ZFS_MAXNAMELEN];
 		zprop_source_t src;
 		zfs_handle_t *zhp;
 
 		di->ds = zfs_alloc(di->zhp->zfs_hdl, tdslen + 1);
 		(void) strncpy(di->ds, tosnap, tdslen);
 		di->ds[tdslen] = '\0';
 
 		zhp = zfs_open(hdl, di->ds, ZFS_TYPE_FILESYSTEM);
 		while (zhp != NULL) {
 			(void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN,
 			    origin, sizeof (origin), &src, NULL, 0, B_FALSE);
 
 			if (strncmp(origin, fromsnap, fsnlen) == 0)
 				break;
 
 			(void) zfs_close(zhp);
 			zhp = zfs_open(hdl, origin, ZFS_TYPE_FILESYSTEM);
 		}
 
 		if (zhp == NULL) {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "Not an earlier snapshot from the same fs"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf));
 		} else {
 			(void) zfs_close(zhp);
 		}
 
 		di->isclone = B_TRUE;
 		di->fromsnap = zfs_strdup(hdl, fromsnap);
 		if (tsnlen) {
 			di->tosnap = zfs_strdup(hdl, tosnap);
 		} else {
 			return (make_temp_snapshot(di));
 		}
 	} else {
 		int dslen = fdslen ? fdslen : tdslen;
 
 		di->ds = zfs_alloc(hdl, dslen + 1);
 		(void) strncpy(di->ds, fdslen ? fromsnap : tosnap, dslen);
 		di->ds[dslen] = '\0';
 
 		di->fromsnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrf);
 		if (tsnlen) {
 			di->tosnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrt);
 		} else {
 			return (make_temp_snapshot(di));
 		}
 	}
 	return (0);
 }
 
 static int
 get_mountpoint(differ_info_t *di, char *dsnm, char **mntpt)
 {
 	boolean_t mounted;
 
 	mounted = is_mounted(di->zhp->zfs_hdl, dsnm, mntpt);
 	if (mounted == B_FALSE) {
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Cannot diff an unmounted snapshot"));
 		return (zfs_error(di->zhp->zfs_hdl, EZFS_BADTYPE, di->errbuf));
 	}
 
 	/* Avoid a double slash at the beginning of root-mounted datasets */
 	if (**mntpt == '/' && *(*mntpt + 1) == '\0')
 		**mntpt = '\0';
 	return (0);
 }
 
 static int
 get_mountpoints(differ_info_t *di)
 {
 	char *strptr;
 	char *frommntpt;
 
 	/*
 	 * first get the mountpoint for the parent dataset
 	 */
 	if (get_mountpoint(di, di->ds, &di->dsmnt) != 0)
 		return (-1);
 
 	strptr = strchr(di->tosnap, '@');
 	ASSERT3P(strptr, !=, NULL);
 	di->tomnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", di->dsmnt,
 	    ZDIFF_SNAPDIR, ++strptr);
 
 	strptr = strchr(di->fromsnap, '@');
 	ASSERT3P(strptr, !=, NULL);
 
 	frommntpt = di->dsmnt;
 	if (di->isclone) {
 		char *mntpt;
 		int err;
 
 		*strptr = '\0';
 		err = get_mountpoint(di, di->fromsnap, &mntpt);
 		*strptr = '@';
 		if (err != 0)
 			return (-1);
 		frommntpt = mntpt;
 	}
 
 	di->frommnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", frommntpt,
 	    ZDIFF_SNAPDIR, ++strptr);
 
 	if (di->isclone)
 		free(frommntpt);
 
 	return (0);
 }
 
 static int
 setup_differ_info(zfs_handle_t *zhp, const char *fromsnap,
     const char *tosnap, differ_info_t *di)
 {
 	di->zhp = zhp;
 
 	di->cleanupfd = open(ZFS_DEV, O_RDWR);
 	VERIFY(di->cleanupfd >= 0);
 
 	if (get_snapshot_names(di, fromsnap, tosnap) != 0)
 		return (-1);
 
 	if (get_mountpoints(di) != 0)
 		return (-1);
 
 	if (find_shares_object(di) != 0)
 		return (-1);
 
 	return (0);
 }
 
 int
 zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap,
     const char *tosnap, int flags)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char errbuf[1024];
 	differ_info_t di = { 0 };
 	pthread_t tid;
 	int pipefd[2];
 	int iocerr;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "zfs diff failed"));
 
 	if (setup_differ_info(zhp, fromsnap, tosnap, &di)) {
 		teardown_differ_info(&di);
 		return (-1);
 	}
 
 	if (pipe(pipefd)) {
 		zfs_error_aux(zhp->zfs_hdl, strerror(errno));
 		teardown_differ_info(&di);
 		return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf));
 	}
 
 	di.scripted = (flags & ZFS_DIFF_PARSEABLE);
 	di.classify = (flags & ZFS_DIFF_CLASSIFY);
 	di.timestamped = (flags & ZFS_DIFF_TIMESTAMP);
 
 	di.outputfd = outfd;
 	di.datafd = pipefd[0];
 
 	if (pthread_create(&tid, NULL, differ, &di)) {
 		zfs_error_aux(zhp->zfs_hdl, strerror(errno));
 		(void) close(pipefd[0]);
 		(void) close(pipefd[1]);
 		teardown_differ_info(&di);
 		return (zfs_error(zhp->zfs_hdl,
 		    EZFS_THREADCREATEFAILED, errbuf));
 	}
 
 	/* do the ioctl() */
 	(void) strlcpy(zc.zc_value, di.fromsnap, strlen(di.fromsnap) + 1);
 	(void) strlcpy(zc.zc_name, di.tosnap, strlen(di.tosnap) + 1);
 	zc.zc_cookie = pipefd[1];
 
 	iocerr = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DIFF, &zc);
 	if (iocerr != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "Unable to obtain diffs"));
 		if (errno == EPERM) {
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "\n   The sys_mount privilege or diff delegated "
 			    "permission is needed\n   to execute the "
 			    "diff ioctl"));
 		} else if (errno == EXDEV) {
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "\n   Not an earlier snapshot from the same fs"));
 		} else if (errno != EPIPE || di.zerr == 0) {
 			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
 		}
 		(void) close(pipefd[1]);
 		(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
 		teardown_differ_info(&di);
 		if (di.zerr != 0 && di.zerr != EPIPE) {
 			zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr));
 			return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf));
 		} else {
 			return (zfs_error(zhp->zfs_hdl, EZFS_DIFFDATA, errbuf));
 		}
 	}
 
 	(void) close(pipefd[1]);
 	(void) pthread_join(tid, NULL);
 
 	if (di.zerr != 0) {
 		zfs_error_aux(zhp->zfs_hdl, strerror(di.zerr));
 		return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf));
 	}
 	teardown_differ_info(&di);
 	return (0);
 }
diff --git a/lib/libzfs/libzfs_fru.c b/lib/libzfs/libzfs_fru.c
index 78f2f9c371ae..aa84aa30deb5 100644
--- a/lib/libzfs/libzfs_fru.c
+++ b/lib/libzfs/libzfs_fru.c
@@ -1,469 +1,469 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <dlfcn.h>
 #include <errno.h>
 #include <libintl.h>
 #include <link.h>
 #include <pthread.h>
 #include <strings.h>
 #include <unistd.h>
 
 #include <libzfs.h>
 
 #if defined(HAVE_LIBTOPO)
 
 #include <fm/libtopo.h>
 #include <sys/fm/protocol.h>
 #include <sys/systeminfo.h>
 
 #include "libzfs_impl.h"
 
 /*
  * This file is responsible for determining the relationship between I/O
  * devices paths and physical locations.  In the world of MPxIO and external
  * enclosures, the device path is not synonymous with the physical location.
  * If you remove a drive and insert it into a different slot, it will end up
  * with the same path under MPxIO.  If you recable storage enclosures, the
  * device paths may change.  All of this makes it difficult to implement the
  * 'autoreplace' property, which is supposed to automatically manage disk
  * replacement based on physical slot.
  *
  * In order to work around these limitations, we have a per-vdev FRU property
  * that is the libtopo path (minus disk-specific authority information) to the
  * physical location of the device on the system.  This is an optional
  * property, and is only needed when using the 'autoreplace' property or when
  * generating FMA faults against vdevs.
  */
 
 /*
  * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case
  * it is not present.  We only need this once per library instance, so it is
  * not part of the libzfs handle.
  */
 static void *_topo_dlhandle;
 static topo_hdl_t *(*_topo_open)(int, const char *, int *);
 static void (*_topo_close)(topo_hdl_t *);
 static char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *);
 static void (*_topo_snap_release)(topo_hdl_t *);
 static topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *,
     topo_walk_cb_t, void *, int *);
 static int (*_topo_walk_step)(topo_walk_t *, int);
 static void (*_topo_walk_fini)(topo_walk_t *);
 static void (*_topo_hdl_strfree)(topo_hdl_t *, char *);
 static char *(*_topo_node_name)(tnode_t *);
 static int (*_topo_prop_get_string)(tnode_t *, const char *, const char *,
     char **, int *);
 static int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *);
 static int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *);
 static int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *,
     const char *);
 
 #define	ZFS_FRU_HASH_SIZE	257
 
 static size_t
 fru_strhash(const char *key)
 {
 	ulong_t g, h = 0;
 	const char *p;
 
 	for (p = key; *p != '\0'; p++) {
 		h = (h << 4) + *p;
 
 		if ((g = (h & 0xf0000000)) != 0) {
 			h ^= (g >> 24);
 			h ^= g;
 		}
 	}
 
 	return (h % ZFS_FRU_HASH_SIZE);
 }
 
 static int
 libzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg)
 {
 	libzfs_handle_t *hdl = arg;
 	nvlist_t *fru;
 	char *devpath, *frustr;
 	int err;
 	libzfs_fru_t *frup;
 	size_t idx;
 
 	/*
 	 * If this is the chassis node, and we don't yet have the system
 	 * chassis ID, then fill in this value now.
 	 */
 	if (hdl->libzfs_chassis_id[0] == '\0' &&
 	    strcmp(_topo_node_name(tn), "chassis") == 0) {
 		if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY,
 		    FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0)
 			(void) strlcpy(hdl->libzfs_chassis_id, devpath,
 			    sizeof (hdl->libzfs_chassis_id));
 	}
 
 	/*
 	 * Skip non-disk nodes.
 	 */
 	if (strcmp(_topo_node_name(tn), "disk") != 0)
 		return (TOPO_WALK_NEXT);
 
 	/*
 	 * Get the devfs path and FRU.
 	 */
 	if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0)
 		return (TOPO_WALK_NEXT);
 
 	if (libzfs_fru_lookup(hdl, devpath) != NULL) {
 		_topo_hdl_strfree(thp, devpath);
 		return (TOPO_WALK_NEXT);
 	}
 
 	if (_topo_node_fru(tn, &fru, NULL, &err) != 0) {
 		_topo_hdl_strfree(thp, devpath);
 		return (TOPO_WALK_NEXT);
 	}
 
 	/*
 	 * Convert the FRU into a string.
 	 */
 	if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) {
 		nvlist_free(fru);
 		_topo_hdl_strfree(thp, devpath);
 		return (TOPO_WALK_NEXT);
 	}
 
 	nvlist_free(fru);
 
 	/*
 	 * Finally, we have a FRU string and device path.  Add it to the hash.
 	 */
 	if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) {
 		_topo_hdl_strfree(thp, devpath);
 		_topo_hdl_strfree(thp, frustr);
 		return (TOPO_WALK_NEXT);
 	}
 
 	if ((frup->zf_device = strdup(devpath)) == NULL ||
 	    (frup->zf_fru = strdup(frustr)) == NULL) {
 		free(frup->zf_device);
 		free(frup);
 		_topo_hdl_strfree(thp, devpath);
 		_topo_hdl_strfree(thp, frustr);
 		return (TOPO_WALK_NEXT);
 	}
 
 	_topo_hdl_strfree(thp, devpath);
 	_topo_hdl_strfree(thp, frustr);
 
 	idx = fru_strhash(frup->zf_device);
 	frup->zf_chain = hdl->libzfs_fru_hash[idx];
 	hdl->libzfs_fru_hash[idx] = frup;
 	frup->zf_next = hdl->libzfs_fru_list;
 	hdl->libzfs_fru_list = frup;
 
 	return (TOPO_WALK_NEXT);
 }
 
 /*
  * Called during initialization to setup the dynamic libtopo connection.
  */
 #pragma init(libzfs_init_fru)
 static void
 libzfs_init_fru(void)
 {
 	char path[MAXPATHLEN];
 	char isa[257];
 
 #if defined(_LP64)
 	if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0)
 		isa[0] = '\0';
 #else
 	isa[0] = '\0';
 #endif
 	(void) snprintf(path, sizeof (path),
 	    "/usr/lib/fm/%s/libtopo.so", isa);
 
 	if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL)
 		return;
 
 	_topo_open = (topo_hdl_t *(*)())
 	    dlsym(_topo_dlhandle, "topo_open");
 	_topo_close = (void (*)())
 	    dlsym(_topo_dlhandle, "topo_close");
 	_topo_snap_hold = (char *(*)())
 	    dlsym(_topo_dlhandle, "topo_snap_hold");
 	_topo_snap_release = (void (*)())
 	    dlsym(_topo_dlhandle, "topo_snap_release");
 	_topo_walk_init = (topo_walk_t *(*)())
 	    dlsym(_topo_dlhandle, "topo_walk_init");
 	_topo_walk_step = (int (*)())
 	    dlsym(_topo_dlhandle, "topo_walk_step");
 	_topo_walk_fini = (void (*)())
 	    dlsym(_topo_dlhandle, "topo_walk_fini");
 	_topo_hdl_strfree = (void (*)())
 	    dlsym(_topo_dlhandle, "topo_hdl_strfree");
 	_topo_node_name = (char *(*)())
 	    dlsym(_topo_dlhandle, "topo_node_name");
 	_topo_prop_get_string = (int (*)())
 	    dlsym(_topo_dlhandle, "topo_prop_get_string");
 	_topo_node_fru = (int (*)())
 	    dlsym(_topo_dlhandle, "topo_node_fru");
 	_topo_fmri_nvl2str = (int (*)())
 	    dlsym(_topo_dlhandle, "topo_fmri_nvl2str");
 	_topo_fmri_strcmp_noauth = (int (*)())
 	    dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth");
 
 	if (_topo_open == NULL || _topo_close == NULL ||
 	    _topo_snap_hold == NULL || _topo_snap_release == NULL ||
 	    _topo_walk_init == NULL || _topo_walk_step == NULL ||
 	    _topo_walk_fini == NULL || _topo_hdl_strfree == NULL ||
 	    _topo_node_name == NULL || _topo_prop_get_string == NULL ||
 	    _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL ||
 	    _topo_fmri_strcmp_noauth == NULL) {
 		(void) dlclose(_topo_dlhandle);
 		_topo_dlhandle = NULL;
 	}
 }
 
 /*
  * Refresh the mappings from device path -> FMRI.  We do this by walking the
  * hc topology looking for disk nodes, and recording the io/devfs-path and FRU.
  * Note that we strip out the disk-specific authority information (serial,
  * part, revision, etc) so that we are left with only the identifying
  * characteristics of the slot (hc path and chassis-id).
  */
 void
 libzfs_fru_refresh(libzfs_handle_t *hdl)
 {
 	int err;
 	char *uuid;
 	topo_hdl_t *thp;
 	topo_walk_t *twp;
 
 	if (_topo_dlhandle == NULL)
 		return;
 
 	/*
 	 * Clear the FRU hash and initialize our basic structures.
 	 */
 	libzfs_fru_clear(hdl, B_FALSE);
 
 	if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION,
 	    NULL, &err)) == NULL)
 		return;
 
 	thp = hdl->libzfs_topo_hdl;
 
 	if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL)
 		return;
 
 	_topo_hdl_strfree(thp, uuid);
 
 	if (hdl->libzfs_fru_hash == NULL &&
 	    (hdl->libzfs_fru_hash =
 	    calloc(ZFS_FRU_HASH_SIZE * sizeof (void *), 1)) == NULL)
 		return;
 
 	/*
 	 * We now have a topo snapshot, so iterate over the hc topology looking
 	 * for disks to add to the hash.
 	 */
 	twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC,
 	    libzfs_fru_gather, hdl, &err);
 	if (twp != NULL) {
 		(void) _topo_walk_step(twp, TOPO_WALK_CHILD);
 		_topo_walk_fini(twp);
 	}
 }
 
 /*
  * Given a devfs path, return the FRU for the device, if known.  This will
  * automatically call libzfs_fru_refresh() if it hasn't already been called by
  * the consumer.  The string returned is valid until the next call to
  * libzfs_fru_refresh().
  */
 const char *
 libzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath)
 {
 	size_t idx = fru_strhash(devpath);
 	libzfs_fru_t *frup;
 
 	if (hdl->libzfs_fru_hash == NULL)
 		libzfs_fru_refresh(hdl);
 
 	if (hdl->libzfs_fru_hash == NULL)
 		return (NULL);
 
 	for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
 	    frup = frup->zf_chain) {
 		if (strcmp(devpath, frup->zf_device) == 0)
 			return (frup->zf_fru);
 	}
 
 	return (NULL);
 }
 
 /*
  * Given a fru path, return the device path.  This will automatically call
  * libzfs_fru_refresh() if it hasn't already been called by the consumer.  The
  * string returned is valid until the next call to libzfs_fru_refresh().
  */
 const char *
 libzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru)
 {
 	libzfs_fru_t *frup;
 	size_t idx;
 
 	if (hdl->libzfs_fru_hash == NULL)
 		libzfs_fru_refresh(hdl);
 
 	if (hdl->libzfs_fru_hash == NULL)
 		return (NULL);
 
 	for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) {
 		for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
 		    frup = frup->zf_next) {
 			if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl,
 			    fru, frup->zf_fru))
 				return (frup->zf_device);
 		}
 	}
 
 	return (NULL);
 }
 
 /*
  * Change the stored FRU for the given vdev.
  */
 int
 zpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru)
 {
-	zfs_cmd_t zc = { 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	(void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value));
 	zc.zc_guid = vdev_guid;
 
 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0)
 		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot set FRU")));
 
 	return (0);
 }
 
 /*
  * Compare to two FRUs, ignoring any authority information.
  */
 boolean_t
 libzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b)
 {
 	if (hdl->libzfs_fru_hash == NULL)
 		libzfs_fru_refresh(hdl);
 
 	if (hdl->libzfs_fru_hash == NULL)
 		return (strcmp(a, b) == 0);
 
 	return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b));
 }
 
 /*
  * This special function checks to see whether the FRU indicates it's supposed
  * to be in the system chassis, but the chassis-id doesn't match.  This can
  * happen in a clustered case, where both head nodes have the same logical
  * disk, but opening the device on the other head node is meaningless.
  */
 boolean_t
 libzfs_fru_notself(libzfs_handle_t *hdl, const char *fru)
 {
 	const char *chassisid;
 	size_t len;
 
 	if (hdl->libzfs_fru_hash == NULL)
 		libzfs_fru_refresh(hdl);
 
 	if (hdl->libzfs_chassis_id[0] == '\0')
 		return (B_FALSE);
 
 	if (strstr(fru, "/chassis=0/") == NULL)
 		return (B_FALSE);
 
 	if ((chassisid = strstr(fru, ":chassis-id=")) == NULL)
 		return (B_FALSE);
 
 	chassisid += 12;
 	len = strlen(hdl->libzfs_chassis_id);
 	if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 &&
 	    (chassisid[len] == '/' || chassisid[len] == ':'))
 		return (B_FALSE);
 
 	return (B_TRUE);
 }
 
 /*
  * Clear memory associated with the FRU hash.
  */
 void
 libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
 {
 	libzfs_fru_t *frup;
 
 	while ((frup = hdl->libzfs_fru_list) != NULL) {
 		hdl->libzfs_fru_list = frup->zf_next;
 		free(frup->zf_device);
 		free(frup->zf_fru);
 		free(frup);
 	}
 
 	hdl->libzfs_fru_list = NULL;
 
 	if (hdl->libzfs_topo_hdl != NULL) {
 		_topo_snap_release(hdl->libzfs_topo_hdl);
 		_topo_close(hdl->libzfs_topo_hdl);
 		hdl->libzfs_topo_hdl = NULL;
 	}
 
 	if (final) {
 		free(hdl->libzfs_fru_hash);
 	} else if (hdl->libzfs_fru_hash != NULL) {
 		bzero(hdl->libzfs_fru_hash,
 		    ZFS_FRU_HASH_SIZE * sizeof (void *));
 	}
 }
 
 #else /* HAVE_LIBTOPO */
 
 /*
  * Clear memory associated with the FRU hash.
  */
 void
 libzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
 {
 	return;
 }
 
 #endif /* HAVE_LIBTOPO */
 
 
diff --git a/lib/libzfs/libzfs_graph.c b/lib/libzfs/libzfs_graph.c
index 0e538e3ded84..3c5bdcc6767c 100644
--- a/lib/libzfs/libzfs_graph.c
+++ b/lib/libzfs/libzfs_graph.c
@@ -1,653 +1,653 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Iterate over all children of the current object.  This includes the normal
  * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
  * walk all datasets in the pool, and construct a directed graph of the form:
  *
  * 			home
  *                        |
  *                   +----+----+
  *                   |         |
  *                   v         v             ws
  *                  bar       baz             |
  *                             |              |
  *                             v              v
  *                          @yesterday ----> foo
  *
  * In order to construct this graph, we have to walk every dataset in the pool,
  * because the clone parent is stored as a property of the child, not the
  * parent.  The parent only keeps track of the number of clones.
  *
  * In the normal case (without clones) this would be rather expensive.  To avoid
  * unnecessary computation, we first try a walk of the subtree hierarchy
  * starting from the initial node.  At each dataset, we construct a node in the
  * graph and an edge leading from its parent.  If we don't see any snapshots
  * with a non-zero clone count, then we are finished.
  *
  * If we do find a cloned snapshot, then we finish the walk of the current
  * subtree, but indicate that we need to do a complete walk.  We then perform a
  * global walk of all datasets, avoiding the subtree we already processed.
  *
  * At the end of this, we'll end up with a directed graph of all relevant (and
  * possible some irrelevant) datasets in the system.  We need to both find our
  * limiting subgraph and determine a safe ordering in which to destroy the
  * datasets.  We do a topological ordering of our graph starting at our target
  * dataset, and then walk the results in reverse.
  *
  * It's possible for the graph to have cycles if, for example, the user renames
  * a clone to be the parent of its origin snapshot.  The user can request to
  * generate an error in this case, or ignore the cycle and continue.
  *
  * When removing datasets, we want to destroy the snapshots in chronological
  * order (because this is the most efficient method).  In order to accomplish
  * this, we store the creation transaction group with each vertex and keep each
  * vertex's edges sorted according to this value.  The topological sort will
  * automatically walk the snapshots in the correct order.
  */
 
 #include <assert.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <strings.h>
 #include <unistd.h>
 
 #include <libzfs.h>
 
 #include "libzfs_impl.h"
 #include "zfs_namecheck.h"
 
 #define	MIN_EDGECOUNT	4
 
 /*
  * Vertex structure.  Indexed by dataset name, this structure maintains a list
  * of edges to other vertices.
  */
 struct zfs_edge;
 typedef struct zfs_vertex {
 	char			zv_dataset[ZFS_MAXNAMELEN];
 	struct zfs_vertex	*zv_next;
 	int			zv_visited;
 	uint64_t		zv_txg;
 	struct zfs_edge		**zv_edges;
 	int			zv_edgecount;
 	int			zv_edgealloc;
 } zfs_vertex_t;
 
 enum {
 	VISIT_SEEN = 1,
 	VISIT_SORT_PRE,
 	VISIT_SORT_POST
 };
 
 /*
  * Edge structure.  Simply maintains a pointer to the destination vertex.  There
  * is no need to store the source vertex, since we only use edges in the context
  * of the source vertex.
  */
 typedef struct zfs_edge {
 	zfs_vertex_t		*ze_dest;
 	struct zfs_edge		*ze_next;
 } zfs_edge_t;
 
 #define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */
 
 /*
  * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
  */
 typedef struct zfs_graph {
 	zfs_vertex_t		**zg_hash;
 	size_t			zg_size;
 	size_t			zg_nvertex;
 	const char		*zg_root;
 	int			zg_clone_count;
 } zfs_graph_t;
 
 /*
  * Allocate a new edge pointing to the target vertex.
  */
 static zfs_edge_t *
 zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
 {
 	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
 
 	if (zep == NULL)
 		return (NULL);
 
 	zep->ze_dest = dest;
 
 	return (zep);
 }
 
 /*
  * Destroy an edge.
  */
 static void
 zfs_edge_destroy(zfs_edge_t *zep)
 {
 	free(zep);
 }
 
 /*
  * Allocate a new vertex with the given name.
  */
 static zfs_vertex_t *
 zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
 {
 	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
 
 	if (zvp == NULL)
 		return (NULL);
 
 	assert(strlen(dataset) < ZFS_MAXNAMELEN);
 
 	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
 
 	if ((zvp->zv_edges = zfs_alloc(hdl,
 	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
 		free(zvp);
 		return (NULL);
 	}
 
 	zvp->zv_edgealloc = MIN_EDGECOUNT;
 
 	return (zvp);
 }
 
 /*
  * Destroy a vertex.  Frees up any associated edges.
  */
 static void
 zfs_vertex_destroy(zfs_vertex_t *zvp)
 {
 	int i;
 
 	for (i = 0; i < zvp->zv_edgecount; i++)
 		zfs_edge_destroy(zvp->zv_edges[i]);
 
 	free(zvp->zv_edges);
 	free(zvp);
 }
 
 /*
  * Given a vertex, add an edge to the destination vertex.
  */
 static int
 zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
     zfs_vertex_t *dest)
 {
 	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
 
 	if (zep == NULL)
 		return (-1);
 
 	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
 		void *ptr;
 
 		if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
 		    zvp->zv_edgealloc * sizeof (void *),
 		    zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
 			return (-1);
 
 		zvp->zv_edges = ptr;
 		zvp->zv_edgealloc *= 2;
 	}
 
 	zvp->zv_edges[zvp->zv_edgecount++] = zep;
 
 	return (0);
 }
 
 static int
 zfs_edge_compare(const void *a, const void *b)
 {
 	const zfs_edge_t *ea = *((zfs_edge_t **)a);
 	const zfs_edge_t *eb = *((zfs_edge_t **)b);
 
 	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
 		return (-1);
 	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
 		return (1);
 	return (0);
 }
 
 /*
  * Sort the given vertex edges according to the creation txg of each vertex.
  */
 static void
 zfs_vertex_sort_edges(zfs_vertex_t *zvp)
 {
 	if (zvp->zv_edgecount == 0)
 		return;
 
 	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
 	    zfs_edge_compare);
 }
 
 /*
  * Construct a new graph object.  We allow the size to be specified as a
  * parameter so in the future we can size the hash according to the number of
  * datasets in the pool.
  */
 static zfs_graph_t *
 zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
 {
 	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
 
 	if (zgp == NULL)
 		return (NULL);
 
 	zgp->zg_size = size;
 	if ((zgp->zg_hash = zfs_alloc(hdl,
 	    size * sizeof (zfs_vertex_t *))) == NULL) {
 		free(zgp);
 		return (NULL);
 	}
 
 	zgp->zg_root = dataset;
 	zgp->zg_clone_count = 0;
 
 	return (zgp);
 }
 
 /*
  * Destroy a graph object.  We have to iterate over all the hash chains,
  * destroying each vertex in the process.
  */
 static void
 zfs_graph_destroy(zfs_graph_t *zgp)
 {
 	int i;
 	zfs_vertex_t *current, *next;
 
 	for (i = 0; i < zgp->zg_size; i++) {
 		current = zgp->zg_hash[i];
 		while (current != NULL) {
 			next = current->zv_next;
 			zfs_vertex_destroy(current);
 			current = next;
 		}
 	}
 
 	free(zgp->zg_hash);
 	free(zgp);
 }
 
 /*
  * Graph hash function.  Classic bernstein k=33 hash function, taken from
  * usr/src/cmd/sgs/tools/common/strhash.c
  */
 static size_t
 zfs_graph_hash(zfs_graph_t *zgp, const char *str)
 {
 	size_t hash = 5381;
 	int c;
 
 	while ((c = *str++) != 0)
 		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
 
 	return (hash % zgp->zg_size);
 }
 
 /*
  * Given a dataset name, finds the associated vertex, creating it if necessary.
  */
 static zfs_vertex_t *
 zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
     uint64_t txg)
 {
 	size_t idx = zfs_graph_hash(zgp, dataset);
 	zfs_vertex_t *zvp;
 
 	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
 		if (strcmp(zvp->zv_dataset, dataset) == 0) {
 			if (zvp->zv_txg == 0)
 				zvp->zv_txg = txg;
 			return (zvp);
 		}
 	}
 
 	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
 		return (NULL);
 
 	zvp->zv_next = zgp->zg_hash[idx];
 	zvp->zv_txg = txg;
 	zgp->zg_hash[idx] = zvp;
 	zgp->zg_nvertex++;
 
 	return (zvp);
 }
 
 /*
  * Given two dataset names, create an edge between them.  For the source vertex,
  * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
  * created it as a destination of another edge.  If 'dest' is NULL, then this
  * is an individual vertex (i.e. the starting vertex), so don't add an edge.
  */
 static int
 zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
     const char *dest, uint64_t txg)
 {
 	zfs_vertex_t *svp, *dvp;
 
 	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
 		return (-1);
 	svp->zv_visited = VISIT_SEEN;
 	if (dest != NULL) {
 		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
 		if (dvp == NULL)
 			return (-1);
 		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
 			return (-1);
 	}
 
 	return (0);
 }
 
 /*
  * Iterate over all children of the given dataset, adding any vertices
  * as necessary.  Returns -1 if there was an error, or 0 otherwise.
  * This is a simple recursive algorithm - the ZFS namespace typically
  * is very flat.  We manually invoke the necessary ioctl() calls to
  * avoid the overhead and additional semantics of zfs_open().
  */
 static int
 iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	zfs_vertex_t *zvp;
 
 	/*
 	 * Look up the source vertex, and avoid it if we've seen it before.
 	 */
 	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
 	if (zvp == NULL)
 		return (-1);
 	if (zvp->zv_visited == VISIT_SEEN)
 		return (0);
 
 	/*
 	 * Iterate over all children
 	 */
 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
 		/*
 		 * Get statistics for this dataset, to determine the type of the
 		 * dataset and clone statistics.  If this fails, the dataset has
 		 * since been removed, and we're pretty much screwed anyway.
 		 */
 		zc.zc_objset_stats.dds_origin[0] = '\0';
 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 			continue;
 
 		if (zc.zc_objset_stats.dds_origin[0] != '\0') {
 			if (zfs_graph_add(hdl, zgp,
 			    zc.zc_objset_stats.dds_origin, zc.zc_name,
 			    zc.zc_objset_stats.dds_creation_txg) != 0)
 				return (-1);
 			/*
 			 * Count origins only if they are contained in the graph
 			 */
 			if (isa_child_of(zc.zc_objset_stats.dds_origin,
 			    zgp->zg_root))
 				zgp->zg_clone_count--;
 		}
 
 		/*
 		 * Add an edge between the parent and the child.
 		 */
 		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
 		    zc.zc_objset_stats.dds_creation_txg) != 0)
 			return (-1);
 
 		/*
 		 * Recursively visit child
 		 */
 		if (iterate_children(hdl, zgp, zc.zc_name))
 			return (-1);
 	}
 
 	/*
 	 * Now iterate over all snapshots.
 	 */
 	bzero(&zc, sizeof (zc));
 
 	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
 	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
 
 		/*
 		 * Get statistics for this dataset, to determine the type of the
 		 * dataset and clone statistics.  If this fails, the dataset has
 		 * since been removed, and we're pretty much screwed anyway.
 		 */
 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 			continue;
 
 		/*
 		 * Add an edge between the parent and the child.
 		 */
 		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
 		    zc.zc_objset_stats.dds_creation_txg) != 0)
 			return (-1);
 
 		zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
 	}
 
 	zvp->zv_visited = VISIT_SEEN;
 
 	return (0);
 }
 
 /*
  * Returns false if there are no snapshots with dependent clones in this
  * subtree or if all of those clones are also in this subtree.  Returns
  * true if there is an error or there are external dependents.
  */
 static boolean_t
 external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	/*
 	 * Check whether this dataset is a clone or has clones since
 	 * iterate_children() only checks the children.
 	 */
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 		return (B_TRUE);
 
 	if (zc.zc_objset_stats.dds_origin[0] != '\0') {
 		if (zfs_graph_add(hdl, zgp,
 		    zc.zc_objset_stats.dds_origin, zc.zc_name,
 		    zc.zc_objset_stats.dds_creation_txg) != 0)
 			return (B_TRUE);
 		if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
 			zgp->zg_clone_count--;
 	}
 
 	if ((zc.zc_objset_stats.dds_num_clones) ||
 	    iterate_children(hdl, zgp, dataset))
 		return (B_TRUE);
 
 	return (zgp->zg_clone_count != 0);
 }
 
 /*
  * Construct a complete graph of all necessary vertices.  First, iterate over
  * only our object's children.  If no cloned snapshots are found, or all of
  * the cloned snapshots are in this subtree then return a graph of the subtree.
  * Otherwise, start at the root of the pool and iterate over all datasets.
  */
 static zfs_graph_t *
 construct_graph(libzfs_handle_t *hdl, const char *dataset)
 {
 	zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
 	int ret = 0;
 
 	if (zgp == NULL)
 		return (zgp);
 
 	if ((strchr(dataset, '/') == NULL) ||
 	    (external_dependents(hdl, zgp, dataset))) {
 		/*
 		 * Determine pool name and try again.
 		 */
 		int len = strcspn(dataset, "/@") + 1;
 		char *pool = zfs_alloc(hdl, len);
 
 		if (pool == NULL) {
 			zfs_graph_destroy(zgp);
 			return (NULL);
 		}
 		(void) strlcpy(pool, dataset, len);
 
 		if (iterate_children(hdl, zgp, pool) == -1 ||
 		    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
 			free(pool);
 			zfs_graph_destroy(zgp);
 			return (NULL);
 		}
 		free(pool);
 	}
 
 	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
 		zfs_graph_destroy(zgp);
 		return (NULL);
 	}
 
 	return (zgp);
 }
 
 /*
  * Given a graph, do a recursive topological sort into the given array.  This is
  * really just a depth first search, so that the deepest nodes appear first.
  * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
  */
 static int
 topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
     size_t *idx, zfs_vertex_t *zgv)
 {
 	int i;
 
 	if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
 		/*
 		 * If we've already seen this vertex as part of our depth-first
 		 * search, then we have a cyclic dependency, and we must return
 		 * an error.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "recursive dependency at '%s'"),
 		    zgv->zv_dataset);
 		return (zfs_error(hdl, EZFS_RECURSIVE,
 		    dgettext(TEXT_DOMAIN,
 		    "cannot determine dependent datasets")));
 	} else if (zgv->zv_visited >= VISIT_SORT_PRE) {
 		/*
 		 * If we've already processed this as part of the topological
 		 * sort, then don't bother doing so again.
 		 */
 		return (0);
 	}
 
 	zgv->zv_visited = VISIT_SORT_PRE;
 
 	/* avoid doing a search if we don't have to */
 	zfs_vertex_sort_edges(zgv);
 	for (i = 0; i < zgv->zv_edgecount; i++) {
 		if (topo_sort(hdl, allowrecursion, result, idx,
 		    zgv->zv_edges[i]->ze_dest) != 0)
 			return (-1);
 	}
 
 	/* we may have visited this in the course of the above */
 	if (zgv->zv_visited == VISIT_SORT_POST)
 		return (0);
 
 	if ((result[*idx] = zfs_alloc(hdl,
 	    strlen(zgv->zv_dataset) + 1)) == NULL)
 		return (-1);
 
 	(void) strcpy(result[*idx], zgv->zv_dataset);
 	*idx += 1;
 	zgv->zv_visited = VISIT_SORT_POST;
 	return (0);
 }
 
 /*
  * The only public interface for this file.  Do the dirty work of constructing a
  * child list for the given object.  Construct the graph, do the toplogical
  * sort, and then return the array of strings to the caller.
  *
  * The 'allowrecursion' parameter controls behavior when cycles are found.  If
  * it is set, the the cycle is ignored and the results returned as if the cycle
  * did not exist.  If it is not set, then the routine will generate an error if
  * a cycle is found.
  */
 int
 get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
     const char *dataset, char ***result, size_t *count)
 {
 	zfs_graph_t *zgp;
 	zfs_vertex_t *zvp;
 
 	if ((zgp = construct_graph(hdl, dataset)) == NULL)
 		return (-1);
 
 	if ((*result = zfs_alloc(hdl,
 	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
 		zfs_graph_destroy(zgp);
 		return (-1);
 	}
 
 	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
 		free(*result);
 		zfs_graph_destroy(zgp);
 		return (-1);
 	}
 
 	*count = 0;
 	if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
 		free(*result);
 		zfs_graph_destroy(zgp);
 		return (-1);
 	}
 
 	/*
 	 * Get rid of the last entry, which is our starting vertex and not
 	 * strictly a dependent.
 	 */
 	assert(*count > 0);
 	free((*result)[*count - 1]);
 	(*count)--;
 
 	zfs_graph_destroy(zgp);
 
 	return (0);
 }
diff --git a/lib/libzfs/libzfs_import.c b/lib/libzfs/libzfs_import.c
index 9e79bd934899..af6a43d8388f 100644
--- a/lib/libzfs/libzfs_import.c
+++ b/lib/libzfs/libzfs_import.c
@@ -1,1624 +1,1624 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
  * Pool import support functions.
  *
  * To import a pool, we rely on reading the configuration information from the
  * ZFS label of each device.  If we successfully read the label, then we
  * organize the configuration information in the following hierarchy:
  *
  * 	pool guid -> toplevel vdev guid -> label txg
  *
  * Duplicate entries matching this same tuple will be discarded.  Once we have
  * examined every device, we pick the best label txg config for each toplevel
  * vdev.  We then arrange these toplevel vdevs into a complete pool config, and
  * update any paths that have changed.  Finally, we attempt to import the pool
  * using our derived config, and record the results.
  */
 
 #include <ctype.h>
 #include <devid.h>
 #include <dirent.h>
 #include <errno.h>
 #include <libintl.h>
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <sys/vtoc.h>
 #include <sys/dktp/fdisk.h>
 #include <sys/efi_partition.h>
 
 #include <sys/vdev_impl.h>
 #ifdef HAVE_LIBBLKID
 #include <blkid/blkid.h>
 #endif
 
 #include "libzfs.h"
 #include "libzfs_impl.h"
 
 /*
  * Intermediate structures used to gather configuration information.
  */
 typedef struct config_entry {
 	uint64_t		ce_txg;
 	nvlist_t		*ce_config;
 	struct config_entry	*ce_next;
 } config_entry_t;
 
 typedef struct vdev_entry {
 	uint64_t		ve_guid;
 	config_entry_t		*ve_configs;
 	struct vdev_entry	*ve_next;
 } vdev_entry_t;
 
 typedef struct pool_entry {
 	uint64_t		pe_guid;
 	vdev_entry_t		*pe_vdevs;
 	struct pool_entry	*pe_next;
 } pool_entry_t;
 
 typedef struct name_entry {
 	char			*ne_name;
 	uint64_t		ne_guid;
 	uint64_t		ne_order;
 	struct name_entry	*ne_next;
 } name_entry_t;
 
 typedef struct pool_list {
 	pool_entry_t		*pools;
 	name_entry_t		*names;
 } pool_list_t;
 
 static char *
 get_devid(const char *path)
 {
 	int fd;
 	ddi_devid_t devid;
 	char *minor, *ret;
 
 	if ((fd = open(path, O_RDONLY)) < 0)
 		return (NULL);
 
 	minor = NULL;
 	ret = NULL;
 	if (devid_get(fd, &devid) == 0) {
 		if (devid_get_minor_name(fd, &minor) == 0)
 			ret = devid_str_encode(devid, minor);
 		if (minor != NULL)
 			devid_str_free(minor);
 		devid_free(devid);
 	}
 	(void) close(fd);
 
 	return (ret);
 }
 
 
 /*
  * Go through and fix up any path and/or devid information for the given vdev
  * configuration.
  */
 static int
 fix_paths(nvlist_t *nv, name_entry_t *names)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	uint64_t guid;
 	name_entry_t *ne, *best;
 	char *path, *devid;
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			if (fix_paths(child[c], names) != 0)
 				return (-1);
 		return (0);
 	}
 
 	/*
 	 * This is a leaf (file or disk) vdev.  In either case, go through
 	 * the name list and see if we find a matching guid.  If so, replace
 	 * the path and see if we can calculate a new devid.
 	 *
 	 * There may be multiple names associated with a particular guid, in
 	 * which case we have overlapping partitions or multiple paths to the
 	 * same disk.  In this case we prefer to use the path name which
 	 * matches the ZPOOL_CONFIG_PATH.  If no matching entry is found we
 	 * use the lowest order device which corresponds to the first match
 	 * while traversing the ZPOOL_IMPORT_PATH search path.
 	 */
 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0);
 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
 		path = NULL;
 
 	best = NULL;
 	for (ne = names; ne != NULL; ne = ne->ne_next) {
 		if (ne->ne_guid == guid) {
 
 			if (path == NULL) {
 				best = ne;
 				break;
 			}
 
 			if ((strlen(path) == strlen(ne->ne_name)) &&
 			    !strncmp(path, ne->ne_name, strlen(path))) {
 				best = ne;
 				break;
 			}
 
 			if (best == NULL || ne->ne_order < best->ne_order)
 				best = ne;
 		}
 	}
 
 	if (best == NULL)
 		return (0);
 
 	if (nvlist_add_string(nv, ZPOOL_CONFIG_PATH, best->ne_name) != 0)
 		return (-1);
 
 	if ((devid = get_devid(best->ne_name)) == NULL) {
 		(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
 	} else {
 		if (nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, devid) != 0)
 			return (-1);
 		devid_str_free(devid);
 	}
 
 	return (0);
 }
 
 /*
  * Add the given configuration to the list of known devices.
  */
 static int
 add_config(libzfs_handle_t *hdl, pool_list_t *pl, const char *path,
     int order, nvlist_t *config)
 {
 	uint64_t pool_guid, vdev_guid, top_guid, txg, state;
 	pool_entry_t *pe;
 	vdev_entry_t *ve;
 	config_entry_t *ce;
 	name_entry_t *ne;
 
 	/*
 	 * If this is a hot spare not currently in use or level 2 cache
 	 * device, add it to the list of names to translate, but don't do
 	 * anything else.
 	 */
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 	    &state) == 0 &&
 	    (state == POOL_STATE_SPARE || state == POOL_STATE_L2CACHE) &&
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid) == 0) {
 		if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
 			return (-1);
 
 		if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
 			free(ne);
 			return (-1);
 		}
 		ne->ne_guid = vdev_guid;
 		ne->ne_order = order;
 		ne->ne_next = pl->names;
 		pl->names = ne;
 		return (0);
 	}
 
 	/*
 	 * If we have a valid config but cannot read any of these fields, then
 	 * it means we have a half-initialized label.  In vdev_label_init()
 	 * we write a label with txg == 0 so that we can identify the device
 	 * in case the user refers to the same disk later on.  If we fail to
 	 * create the pool, we'll be left with a label in this state
 	 * which should not be considered part of a valid pool.
 	 */
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    &pool_guid) != 0 ||
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
 	    &vdev_guid) != 0 ||
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID,
 	    &top_guid) != 0 ||
 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 	    &txg) != 0 || txg == 0) {
 		nvlist_free(config);
 		return (0);
 	}
 
 	/*
 	 * First, see if we know about this pool.  If not, then add it to the
 	 * list of known pools.
 	 */
 	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
 		if (pe->pe_guid == pool_guid)
 			break;
 	}
 
 	if (pe == NULL) {
 		if ((pe = zfs_alloc(hdl, sizeof (pool_entry_t))) == NULL) {
 			nvlist_free(config);
 			return (-1);
 		}
 		pe->pe_guid = pool_guid;
 		pe->pe_next = pl->pools;
 		pl->pools = pe;
 	}
 
 	/*
 	 * Second, see if we know about this toplevel vdev.  Add it if its
 	 * missing.
 	 */
 	for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
 		if (ve->ve_guid == top_guid)
 			break;
 	}
 
 	if (ve == NULL) {
 		if ((ve = zfs_alloc(hdl, sizeof (vdev_entry_t))) == NULL) {
 			nvlist_free(config);
 			return (-1);
 		}
 		ve->ve_guid = top_guid;
 		ve->ve_next = pe->pe_vdevs;
 		pe->pe_vdevs = ve;
 	}
 
 	/*
 	 * Third, see if we have a config with a matching transaction group.  If
 	 * so, then we do nothing.  Otherwise, add it to the list of known
 	 * configs.
 	 */
 	for (ce = ve->ve_configs; ce != NULL; ce = ce->ce_next) {
 		if (ce->ce_txg == txg)
 			break;
 	}
 
 	if (ce == NULL) {
 		if ((ce = zfs_alloc(hdl, sizeof (config_entry_t))) == NULL) {
 			nvlist_free(config);
 			return (-1);
 		}
 		ce->ce_txg = txg;
 		ce->ce_config = config;
 		ce->ce_next = ve->ve_configs;
 		ve->ve_configs = ce;
 	} else {
 		nvlist_free(config);
 	}
 
 	/*
 	 * At this point we've successfully added our config to the list of
 	 * known configs.  The last thing to do is add the vdev guid -> path
 	 * mappings so that we can fix up the configuration as necessary before
 	 * doing the import.
 	 */
 	if ((ne = zfs_alloc(hdl, sizeof (name_entry_t))) == NULL)
 		return (-1);
 
 	if ((ne->ne_name = zfs_strdup(hdl, path)) == NULL) {
 		free(ne);
 		return (-1);
 	}
 
 	ne->ne_guid = vdev_guid;
 	ne->ne_order = order;
 	ne->ne_next = pl->names;
 	pl->names = ne;
 
 	return (0);
 }
 
 /*
  * Returns true if the named pool matches the given GUID.
  */
 static int
 pool_active(libzfs_handle_t *hdl, const char *name, uint64_t guid,
     boolean_t *isactive)
 {
 	zpool_handle_t *zhp;
 	uint64_t theguid;
 
 	if (zpool_open_silent(hdl, name, &zhp) != 0)
 		return (-1);
 
 	if (zhp == NULL) {
 		*isactive = B_FALSE;
 		return (0);
 	}
 
 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
 	    &theguid) == 0);
 
 	zpool_close(zhp);
 
 	*isactive = (theguid == guid);
 	return (0);
 }
 
 static nvlist_t *
 refresh_config(libzfs_handle_t *hdl, nvlist_t *config)
 {
 	nvlist_t *nvl;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int err;
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0)
 		return (NULL);
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc,
 	    zc.zc_nvlist_conf_size * 2) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (NULL);
 	}
 
 	while ((err = ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_TRYIMPORT,
 	    &zc)) != 0 && errno == ENOMEM) {
 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (NULL);
 		}
 	}
 
 	if (err) {
 		zcmd_free_nvlists(&zc);
 		return (NULL);
 	}
 
 	if (zcmd_read_dst_nvlist(hdl, &zc, &nvl) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (NULL);
 	}
 
 	zcmd_free_nvlists(&zc);
 	return (nvl);
 }
 
 /*
  * Determine if the vdev id is a hole in the namespace.
  */
 boolean_t
 vdev_is_hole(uint64_t *hole_array, uint_t holes, uint_t id)
 {
 	int c;
 
 	for (c = 0; c < holes; c++) {
 
 		/* Top-level is a hole */
 		if (hole_array[c] == id)
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 /*
  * Convert our list of pools into the definitive set of configurations.  We
  * start by picking the best config for each toplevel vdev.  Once that's done,
  * we assemble the toplevel vdevs into a full config for the pool.  We make a
  * pass to fix up any incorrect paths, and then add it to the main list to
  * return to the user.
  */
 static nvlist_t *
 get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
 {
 	pool_entry_t *pe;
 	vdev_entry_t *ve;
 	config_entry_t *ce;
 	nvlist_t *ret = NULL, *config = NULL, *tmp = NULL, *nvtop, *nvroot;
 	nvlist_t **spares, **l2cache;
 	uint_t i, nspares, nl2cache;
 	boolean_t config_seen;
 	uint64_t best_txg;
 	char *name, *hostname = NULL;
 	uint64_t guid;
 	uint_t children = 0;
 	nvlist_t **child = NULL;
 	uint_t holes;
 	uint64_t *hole_array, max_id;
 	uint_t c;
 	boolean_t isactive;
 	uint64_t hostid;
 	nvlist_t *nvl;
 	boolean_t found_one = B_FALSE;
 	boolean_t valid_top_config = B_FALSE;
 
 	if (nvlist_alloc(&ret, 0, 0) != 0)
 		goto nomem;
 
 	for (pe = pl->pools; pe != NULL; pe = pe->pe_next) {
 		uint64_t id, max_txg = 0;
 
 		if (nvlist_alloc(&config, NV_UNIQUE_NAME, 0) != 0)
 			goto nomem;
 		config_seen = B_FALSE;
 
 		/*
 		 * Iterate over all toplevel vdevs.  Grab the pool configuration
 		 * from the first one we find, and then go through the rest and
 		 * add them as necessary to the 'vdevs' member of the config.
 		 */
 		for (ve = pe->pe_vdevs; ve != NULL; ve = ve->ve_next) {
 
 			/*
 			 * Determine the best configuration for this vdev by
 			 * selecting the config with the latest transaction
 			 * group.
 			 */
 			best_txg = 0;
 			for (ce = ve->ve_configs; ce != NULL;
 			    ce = ce->ce_next) {
 
 				if (ce->ce_txg > best_txg) {
 					tmp = ce->ce_config;
 					best_txg = ce->ce_txg;
 				}
 			}
 
 			/*
 			 * We rely on the fact that the max txg for the
 			 * pool will contain the most up-to-date information
 			 * about the valid top-levels in the vdev namespace.
 			 */
 			if (best_txg > max_txg) {
 				(void) nvlist_remove(config,
 				    ZPOOL_CONFIG_VDEV_CHILDREN,
 				    DATA_TYPE_UINT64);
 				(void) nvlist_remove(config,
 				    ZPOOL_CONFIG_HOLE_ARRAY,
 				    DATA_TYPE_UINT64_ARRAY);
 
 				max_txg = best_txg;
 				hole_array = NULL;
 				holes = 0;
 				max_id = 0;
 				valid_top_config = B_FALSE;
 
 				if (nvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_VDEV_CHILDREN, &max_id) == 0) {
 					verify(nvlist_add_uint64(config,
 					    ZPOOL_CONFIG_VDEV_CHILDREN,
 					    max_id) == 0);
 					valid_top_config = B_TRUE;
 				}
 
 				if (nvlist_lookup_uint64_array(tmp,
 				    ZPOOL_CONFIG_HOLE_ARRAY, &hole_array,
 				    &holes) == 0) {
 					verify(nvlist_add_uint64_array(config,
 					    ZPOOL_CONFIG_HOLE_ARRAY,
 					    hole_array, holes) == 0);
 				}
 			}
 
 			if (!config_seen) {
 				/*
 				 * Copy the relevant pieces of data to the pool
 				 * configuration:
 				 *
 				 *	version
 				 *	pool guid
 				 *	name
 				 *	comment (if available)
 				 *	pool state
 				 *	hostid (if available)
 				 *	hostname (if available)
 				 */
 				uint64_t state, version;
 				char *comment = NULL;
 
 				version = fnvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_VERSION);
 				fnvlist_add_uint64(config,
 				    ZPOOL_CONFIG_VERSION, version);
 				guid = fnvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_POOL_GUID);
 				fnvlist_add_uint64(config,
 				    ZPOOL_CONFIG_POOL_GUID, guid);
 				name = fnvlist_lookup_string(tmp,
 				    ZPOOL_CONFIG_POOL_NAME);
 				fnvlist_add_string(config,
 				    ZPOOL_CONFIG_POOL_NAME, name);
 
 				if (nvlist_lookup_string(tmp,
 				    ZPOOL_CONFIG_COMMENT, &comment) == 0)
 					fnvlist_add_string(config,
 					    ZPOOL_CONFIG_COMMENT, comment);
 
 				state = fnvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_POOL_STATE);
 				fnvlist_add_uint64(config,
 				    ZPOOL_CONFIG_POOL_STATE, state);
 
 				hostid = 0;
 				if (nvlist_lookup_uint64(tmp,
 				    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
 					fnvlist_add_uint64(config,
 					    ZPOOL_CONFIG_HOSTID, hostid);
 					hostname = fnvlist_lookup_string(tmp,
 					    ZPOOL_CONFIG_HOSTNAME);
 					fnvlist_add_string(config,
 					    ZPOOL_CONFIG_HOSTNAME, hostname);
 				}
 
 				config_seen = B_TRUE;
 			}
 
 			/*
 			 * Add this top-level vdev to the child array.
 			 */
 			verify(nvlist_lookup_nvlist(tmp,
 			    ZPOOL_CONFIG_VDEV_TREE, &nvtop) == 0);
 			verify(nvlist_lookup_uint64(nvtop, ZPOOL_CONFIG_ID,
 			    &id) == 0);
 
 			if (id >= children) {
 				nvlist_t **newchild;
 
 				newchild = zfs_alloc(hdl, (id + 1) *
 				    sizeof (nvlist_t *));
 				if (newchild == NULL)
 					goto nomem;
 
 				for (c = 0; c < children; c++)
 					newchild[c] = child[c];
 
 				free(child);
 				child = newchild;
 				children = id + 1;
 			}
 			if (nvlist_dup(nvtop, &child[id], 0) != 0)
 				goto nomem;
 
 		}
 
 		/*
 		 * If we have information about all the top-levels then
 		 * clean up the nvlist which we've constructed. This
 		 * means removing any extraneous devices that are
 		 * beyond the valid range or adding devices to the end
 		 * of our array which appear to be missing.
 		 */
 		if (valid_top_config) {
 			if (max_id < children) {
 				for (c = max_id; c < children; c++)
 					nvlist_free(child[c]);
 				children = max_id;
 			} else if (max_id > children) {
 				nvlist_t **newchild;
 
 				newchild = zfs_alloc(hdl, (max_id) *
 				    sizeof (nvlist_t *));
 				if (newchild == NULL)
 					goto nomem;
 
 				for (c = 0; c < children; c++)
 					newchild[c] = child[c];
 
 				free(child);
 				child = newchild;
 				children = max_id;
 			}
 		}
 
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &guid) == 0);
 
 		/*
 		 * The vdev namespace may contain holes as a result of
 		 * device removal. We must add them back into the vdev
 		 * tree before we process any missing devices.
 		 */
 		if (holes > 0) {
 			ASSERT(valid_top_config);
 
 			for (c = 0; c < children; c++) {
 				nvlist_t *holey;
 
 				if (child[c] != NULL ||
 				    !vdev_is_hole(hole_array, holes, c))
 					continue;
 
 				if (nvlist_alloc(&holey, NV_UNIQUE_NAME,
 				    0) != 0)
 					goto nomem;
 
 				/*
 				 * Holes in the namespace are treated as
 				 * "hole" top-level vdevs and have a
 				 * special flag set on them.
 				 */
 				if (nvlist_add_string(holey,
 				    ZPOOL_CONFIG_TYPE,
 				    VDEV_TYPE_HOLE) != 0 ||
 				    nvlist_add_uint64(holey,
 				    ZPOOL_CONFIG_ID, c) != 0 ||
 				    nvlist_add_uint64(holey,
 				    ZPOOL_CONFIG_GUID, 0ULL) != 0)
 					goto nomem;
 				child[c] = holey;
 			}
 		}
 
 		/*
 		 * Look for any missing top-level vdevs.  If this is the case,
 		 * create a faked up 'missing' vdev as a placeholder.  We cannot
 		 * simply compress the child array, because the kernel performs
 		 * certain checks to make sure the vdev IDs match their location
 		 * in the configuration.
 		 */
 		for (c = 0; c < children; c++) {
 			if (child[c] == NULL) {
 				nvlist_t *missing;
 				if (nvlist_alloc(&missing, NV_UNIQUE_NAME,
 				    0) != 0)
 					goto nomem;
 				if (nvlist_add_string(missing,
 				    ZPOOL_CONFIG_TYPE,
 				    VDEV_TYPE_MISSING) != 0 ||
 				    nvlist_add_uint64(missing,
 				    ZPOOL_CONFIG_ID, c) != 0 ||
 				    nvlist_add_uint64(missing,
 				    ZPOOL_CONFIG_GUID, 0ULL) != 0) {
 					nvlist_free(missing);
 					goto nomem;
 				}
 				child[c] = missing;
 			}
 		}
 
 		/*
 		 * Put all of this pool's top-level vdevs into a root vdev.
 		 */
 		if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
 			goto nomem;
 		if (nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
 		    VDEV_TYPE_ROOT) != 0 ||
 		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) != 0 ||
 		    nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, guid) != 0 ||
 		    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 		    child, children) != 0) {
 			nvlist_free(nvroot);
 			goto nomem;
 		}
 
 		for (c = 0; c < children; c++)
 			nvlist_free(child[c]);
 		free(child);
 		children = 0;
 		child = NULL;
 
 		/*
 		 * Go through and fix up any paths and/or devids based on our
 		 * known list of vdev GUID -> path mappings.
 		 */
 		if (fix_paths(nvroot, pl->names) != 0) {
 			nvlist_free(nvroot);
 			goto nomem;
 		}
 
 		/*
 		 * Add the root vdev to this pool's configuration.
 		 */
 		if (nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    nvroot) != 0) {
 			nvlist_free(nvroot);
 			goto nomem;
 		}
 		nvlist_free(nvroot);
 
 		/*
 		 * zdb uses this path to report on active pools that were
 		 * imported or created using -R.
 		 */
 		if (active_ok)
 			goto add_pool;
 
 		/*
 		 * Determine if this pool is currently active, in which case we
 		 * can't actually import it.
 		 */
 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 		    &name) == 0);
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &guid) == 0);
 
 		if (pool_active(hdl, name, guid, &isactive) != 0)
 			goto error;
 
 		if (isactive) {
 			nvlist_free(config);
 			config = NULL;
 			continue;
 		}
 
 		if ((nvl = refresh_config(hdl, config)) == NULL) {
 			nvlist_free(config);
 			config = NULL;
 			continue;
 		}
 
 		nvlist_free(config);
 		config = nvl;
 
 		/*
 		 * Go through and update the paths for spares, now that we have
 		 * them.
 		 */
 		verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 		    &nvroot) == 0);
 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 		    &spares, &nspares) == 0) {
 			for (i = 0; i < nspares; i++) {
 				if (fix_paths(spares[i], pl->names) != 0)
 					goto nomem;
 			}
 		}
 
 		/*
 		 * Update the paths for l2cache devices.
 		 */
 		if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 		    &l2cache, &nl2cache) == 0) {
 			for (i = 0; i < nl2cache; i++) {
 				if (fix_paths(l2cache[i], pl->names) != 0)
 					goto nomem;
 			}
 		}
 
 		/*
 		 * Restore the original information read from the actual label.
 		 */
 		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTID,
 		    DATA_TYPE_UINT64);
 		(void) nvlist_remove(config, ZPOOL_CONFIG_HOSTNAME,
 		    DATA_TYPE_STRING);
 		if (hostid != 0) {
 			verify(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
 			    hostid) == 0);
 			verify(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
 			    hostname) == 0);
 		}
 
 add_pool:
 		/*
 		 * Add this pool to the list of configs.
 		 */
 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 		    &name) == 0);
 		if (nvlist_add_nvlist(ret, name, config) != 0)
 			goto nomem;
 
 		found_one = B_TRUE;
 		nvlist_free(config);
 		config = NULL;
 	}
 
 	if (!found_one) {
 		nvlist_free(ret);
 		ret = NULL;
 	}
 
 	return (ret);
 
 nomem:
 	(void) no_memory(hdl);
 error:
 	nvlist_free(config);
 	nvlist_free(ret);
 	for (c = 0; c < children; c++)
 		nvlist_free(child[c]);
 	free(child);
 
 	return (NULL);
 }
 
 /*
  * Return the offset of the given label.
  */
 static uint64_t
 label_offset(uint64_t size, int l)
 {
 	ASSERT(P2PHASE_TYPED(size, sizeof (vdev_label_t), uint64_t) == 0);
 	return (l * sizeof (vdev_label_t) + (l < VDEV_LABELS / 2 ?
 	    0 : size - VDEV_LABELS * sizeof (vdev_label_t)));
 }
 
 /*
  * Given a file descriptor, read the label information and return an nvlist
  * describing the configuration, if there is one.
  */
 int
 zpool_read_label(int fd, nvlist_t **config)
 {
 	struct stat64 statbuf;
 	int l;
 	vdev_label_t *label;
 	uint64_t state, txg, size;
 
 	*config = NULL;
 
 	if (fstat64_blk(fd, &statbuf) == -1)
 		return (0);
 	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
 
 	if ((label = malloc(sizeof (vdev_label_t))) == NULL)
 		return (-1);
 
 	for (l = 0; l < VDEV_LABELS; l++) {
 		if (pread64(fd, label, sizeof (vdev_label_t),
 		    label_offset(size, l)) != sizeof (vdev_label_t))
 			continue;
 
 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
 		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0)
 			continue;
 
 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
 			nvlist_free(*config);
 			continue;
 		}
 
 		if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
 		    (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
 		    &txg) != 0 || txg == 0)) {
 			nvlist_free(*config);
 			continue;
 		}
 
 		free(label);
 		return (0);
 	}
 
 	free(label);
 	*config = NULL;
 	return (0);
 }
 
 /*
  * Given a file descriptor, clear (zero) the label information.  This function
  * is used in the appliance stack as part of the ZFS sysevent module and
  * to implement the "zpool labelclear" command.
  */
 int
 zpool_clear_label(int fd)
 {
 	struct stat64 statbuf;
 	int l;
 	vdev_label_t *label;
 	uint64_t size;
 
 	if (fstat64_blk(fd, &statbuf) == -1)
 		return (0);
 	size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
 
 	if ((label = calloc(sizeof (vdev_label_t), 1)) == NULL)
 		return (-1);
 
 	for (l = 0; l < VDEV_LABELS; l++) {
 		if (pwrite64(fd, label, sizeof (vdev_label_t),
 		    label_offset(size, l)) != sizeof (vdev_label_t))
 			return (-1);
 	}
 
 	free(label);
 	return (0);
 }
 
 #ifdef HAVE_LIBBLKID
 /*
  * Use libblkid to quickly search for zfs devices
  */
 static int
 zpool_find_import_blkid(libzfs_handle_t *hdl, pool_list_t *pools)
 {
 	blkid_cache cache;
 	blkid_dev_iterate iter;
 	blkid_dev dev;
 	const char *devname;
 	nvlist_t *config;
 	int fd, err;
 
 	err = blkid_get_cache(&cache, NULL);
 	if (err != 0) {
 		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "blkid_get_cache() %d"), err);
 		goto err_blkid1;
 	}
 
 	err = blkid_probe_all(cache);
 	if (err != 0) {
 		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "blkid_probe_all() %d"), err);
 		goto err_blkid2;
 	}
 
 	iter = blkid_dev_iterate_begin(cache);
 	if (iter == NULL) {
 		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "blkid_dev_iterate_begin()"));
 		goto err_blkid2;
 	}
 
 	err = blkid_dev_set_search(iter, "TYPE", "zfs");
 	if (err != 0) {
 		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "blkid_dev_set_search() %d"), err);
 		goto err_blkid3;
 	}
 
 	while (blkid_dev_next(iter, &dev) == 0) {
 		devname = blkid_dev_devname(dev);
 		if ((fd = open64(devname, O_RDONLY)) < 0)
 			continue;
 
 		err = zpool_read_label(fd, &config);
 		(void) close(fd);
 
 		if (err != 0) {
 			(void) no_memory(hdl);
 			goto err_blkid3;
 		}
 
 		if (config != NULL) {
 			err = add_config(hdl, pools, devname, 0, config);
 			if (err != 0)
 				goto err_blkid3;
 		}
 	}
 
 err_blkid3:
 	blkid_dev_iterate_end(iter);
 err_blkid2:
 	blkid_put_cache(cache);
 err_blkid1:
 	return err;
 }
 #endif /* HAVE_LIBBLKID */
 
 char *
 zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
 	"/dev/disk/by-vdev",	/* Custom rules, use first if they exist */
 	"/dev/mapper",		/* Use multipath devices before components */
 	"/dev/disk/by-uuid",	/* Single unique entry and persistent */
 	"/dev/disk/by-id",	/* May be multiple entries and persistent */
 	"/dev/disk/by-path",	/* Encodes physical location and persistent */
 	"/dev/disk/by-label",	/* Custom persistent labels */
 	"/dev"			/* UNSAFE device names will change */
 };
 
 /*
  * Given a list of directories to search, find all pools stored on disk.  This
  * includes partial pools which are not available to import.  If no args are
  * given (argc is 0), then the default directory (/dev/dsk) is searched.
  * poolname or guid (but not both) are provided by the caller when trying
  * to import a specific pool.
  */
 static nvlist_t *
 zpool_find_import_impl(libzfs_handle_t *hdl, importargs_t *iarg)
 {
 	int i, dirs = iarg->paths;
 	DIR *dirp = NULL;
 	struct dirent64 *dp;
 	char path[MAXPATHLEN];
 	char *end, **dir = iarg->path;
 	size_t pathleft;
 	struct stat64 statbuf;
 	nvlist_t *ret = NULL, *config;
 	int fd;
 	pool_list_t pools = { 0 };
 	pool_entry_t *pe, *penext;
 	vdev_entry_t *ve, *venext;
 	config_entry_t *ce, *cenext;
 	name_entry_t *ne, *nenext;
 
 	verify(iarg->poolname == NULL || iarg->guid == 0);
 
 	if (dirs == 0) {
 #ifdef HAVE_LIBBLKID
 		/* Use libblkid to scan all device for their type */
 		if (zpool_find_import_blkid(hdl, &pools) == 0)
 			goto skip_scanning;
 
 		(void) zfs_error_fmt(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "blkid failure falling back "
 		    "to manual probing"));
 #endif /* HAVE_LIBBLKID */
 
 		dir = zpool_default_import_path;
 		dirs = DEFAULT_IMPORT_PATH_SIZE;
 	}
 
 	/*
 	 * Go through and read the label configuration information from every
 	 * possible device, organizing the information according to pool GUID
 	 * and toplevel GUID.
 	 */
 	for (i = 0; i < dirs; i++) {
 		char *rdsk;
 		int dfd;
 
 		/* use realpath to normalize the path */
 		if (realpath(dir[i], path) == 0) {
 
 			/* it is safe to skip missing search paths */
 			if (errno == ENOENT)
 				continue;
 
 			zfs_error_aux(hdl, strerror(errno));
 			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
 			    dgettext(TEXT_DOMAIN, "cannot open '%s'"), dir[i]);
 			goto error;
 		}
 		end = &path[strlen(path)];
 		*end++ = '/';
 		*end = 0;
 		pathleft = &path[sizeof (path)] - end;
 
 		/*
 		 * Using raw devices instead of block devices when we're
 		 * reading the labels skips a bunch of slow operations during
 		 * close(2) processing, so we replace /dev/dsk with /dev/rdsk.
 		 */
 		if (strcmp(path, "/dev/dsk/") == 0)
 			rdsk = "/dev/rdsk/";
 		else
 			rdsk = path;
 
 		if ((dfd = open64(rdsk, O_RDONLY)) < 0 ||
 		    (dirp = fdopendir(dfd)) == NULL) {
 			zfs_error_aux(hdl, strerror(errno));
 			(void) zfs_error_fmt(hdl, EZFS_BADPATH,
 			    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
 			    rdsk);
 			goto error;
 		}
 
 		/*
 		 * This is not MT-safe, but we have no MT consumers of libzfs
 		 */
 		while ((dp = readdir64(dirp)) != NULL) {
 			const char *name = dp->d_name;
 			if (name[0] == '.' &&
 			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
 				continue;
 
 			/*
 			 * Skip checking devices with well known prefixes:
 			 * watchdog - A special close is required to avoid
 			 *            triggering it and resetting the system.
 			 * fuse     - Fuse control device.
 			 * ppp      - Generic PPP driver.
 			 * tty*     - Generic serial interface.
 			 * vcs*     - Virtual console memory.
 			 * parport* - Parallel port interface.
 			 * lp*      - Printer interface.
 			 * fd*      - Floppy interface.
 			 * hpet     - High Precision Event Timer, crashes qemu
 			 *            when accessed from a virtual machine.
 			 * core     - Symlink to /proc/kcore, causes a crash
 			 *            when access from Xen dom0.
 			 */
 			if ((strncmp(name, "watchdog", 8) == 0) ||
 			    (strncmp(name, "fuse", 4) == 0)     ||
 			    (strncmp(name, "ppp", 3) == 0)      ||
 			    (strncmp(name, "tty", 3) == 0)      ||
 			    (strncmp(name, "vcs", 3) == 0)      ||
 			    (strncmp(name, "parport", 7) == 0)  ||
 			    (strncmp(name, "lp", 2) == 0)       ||
 			    (strncmp(name, "fd", 2) == 0)       ||
 			    (strncmp(name, "hpet", 4) == 0)     ||
 			    (strncmp(name, "core", 4) == 0))
 				continue;
 
 			/*
 			 * Ignore failed stats.  We only want regular
 			 * files and block devices.
 			 */
 			if ((fstatat64(dfd, name, &statbuf, 0) != 0) ||
 			    (!S_ISREG(statbuf.st_mode) &&
 			    !S_ISBLK(statbuf.st_mode)))
 				continue;
 
 			if ((fd = openat64(dfd, name, O_RDONLY)) < 0)
 				continue;
 
 			if ((zpool_read_label(fd, &config)) != 0) {
 				(void) close(fd);
 				(void) no_memory(hdl);
 				goto error;
 			}
 
 			(void) close(fd);
 
 			if (config != NULL) {
 				boolean_t matched = B_TRUE;
 				char *pname;
 
 				if ((iarg->poolname != NULL) &&
 				    (nvlist_lookup_string(config,
 				    ZPOOL_CONFIG_POOL_NAME, &pname) == 0)) {
 
 					if (strcmp(iarg->poolname, pname))
 					       matched = B_FALSE;
 
 				} else if (iarg->guid != 0) {
 					uint64_t this_guid;
 
 					matched = nvlist_lookup_uint64(config,
 					    ZPOOL_CONFIG_POOL_GUID,
 					    &this_guid) == 0 &&
 					    iarg->guid == this_guid;
 				}
 				if (!matched) {
 					nvlist_free(config);
 					config = NULL;
 					continue;
 				}
 				/* use the non-raw path for the config */
 				(void) strlcpy(end, name, pathleft);
 				if (add_config(hdl, &pools, path, i+1, config))
 					goto error;
 			}
 		}
 
 		(void) closedir(dirp);
 		dirp = NULL;
 	}
 
 #ifdef HAVE_LIBBLKID
 skip_scanning:
 #endif
 	ret = get_configs(hdl, &pools, iarg->can_be_active);
 
 error:
 	for (pe = pools.pools; pe != NULL; pe = penext) {
 		penext = pe->pe_next;
 		for (ve = pe->pe_vdevs; ve != NULL; ve = venext) {
 			venext = ve->ve_next;
 			for (ce = ve->ve_configs; ce != NULL; ce = cenext) {
 				cenext = ce->ce_next;
 				if (ce->ce_config)
 					nvlist_free(ce->ce_config);
 				free(ce);
 			}
 			free(ve);
 		}
 		free(pe);
 	}
 
 	for (ne = pools.names; ne != NULL; ne = nenext) {
 		nenext = ne->ne_next;
 		if (ne->ne_name)
 			free(ne->ne_name);
 		free(ne);
 	}
 
 	if (dirp)
 		(void) closedir(dirp);
 
 	return (ret);
 }
 
 nvlist_t *
 zpool_find_import(libzfs_handle_t *hdl, int argc, char **argv)
 {
 	importargs_t iarg = { 0 };
 
 	iarg.paths = argc;
 	iarg.path = argv;
 
 	return (zpool_find_import_impl(hdl, &iarg));
 }
 
 /*
  * Given a cache file, return the contents as a list of importable pools.
  * poolname or guid (but not both) are provided by the caller when trying
  * to import a specific pool.
  */
 nvlist_t *
 zpool_find_import_cached(libzfs_handle_t *hdl, const char *cachefile,
     char *poolname, uint64_t guid)
 {
 	char *buf;
 	int fd;
 	struct stat64 statbuf;
 	nvlist_t *raw, *src, *dst;
 	nvlist_t *pools;
 	nvpair_t *elem;
 	char *name;
 	uint64_t this_guid;
 	boolean_t active;
 
 	verify(poolname == NULL || guid == 0);
 
 	if ((fd = open(cachefile, O_RDONLY)) < 0) {
 		zfs_error_aux(hdl, "%s", strerror(errno));
 		(void) zfs_error(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "failed to open cache file"));
 		return (NULL);
 	}
 
 	if (fstat64(fd, &statbuf) != 0) {
 		zfs_error_aux(hdl, "%s", strerror(errno));
 		(void) close(fd);
 		(void) zfs_error(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN, "failed to get size of cache file"));
 		return (NULL);
 	}
 
 	if ((buf = zfs_alloc(hdl, statbuf.st_size)) == NULL) {
 		(void) close(fd);
 		return (NULL);
 	}
 
 	if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
 		(void) close(fd);
 		free(buf);
 		(void) zfs_error(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN,
 		    "failed to read cache file contents"));
 		return (NULL);
 	}
 
 	(void) close(fd);
 
 	if (nvlist_unpack(buf, statbuf.st_size, &raw, 0) != 0) {
 		free(buf);
 		(void) zfs_error(hdl, EZFS_BADCACHE,
 		    dgettext(TEXT_DOMAIN,
 		    "invalid or corrupt cache file contents"));
 		return (NULL);
 	}
 
 	free(buf);
 
 	/*
 	 * Go through and get the current state of the pools and refresh their
 	 * state.
 	 */
 	if (nvlist_alloc(&pools, 0, 0) != 0) {
 		(void) no_memory(hdl);
 		nvlist_free(raw);
 		return (NULL);
 	}
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(raw, elem)) != NULL) {
 		verify(nvpair_value_nvlist(elem, &src) == 0);
 
 		verify(nvlist_lookup_string(src, ZPOOL_CONFIG_POOL_NAME,
 		    &name) == 0);
 		if (poolname != NULL && strcmp(poolname, name) != 0)
 			continue;
 
 		verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
 		    &this_guid) == 0);
 		if (guid != 0) {
 			verify(nvlist_lookup_uint64(src, ZPOOL_CONFIG_POOL_GUID,
 			    &this_guid) == 0);
 			if (guid != this_guid)
 				continue;
 		}
 
 		if (pool_active(hdl, name, this_guid, &active) != 0) {
 			nvlist_free(raw);
 			nvlist_free(pools);
 			return (NULL);
 		}
 
 		if (active)
 			continue;
 
 		if ((dst = refresh_config(hdl, src)) == NULL) {
 			nvlist_free(raw);
 			nvlist_free(pools);
 			return (NULL);
 		}
 
 		if (nvlist_add_nvlist(pools, nvpair_name(elem), dst) != 0) {
 			(void) no_memory(hdl);
 			nvlist_free(dst);
 			nvlist_free(raw);
 			nvlist_free(pools);
 			return (NULL);
 		}
 		nvlist_free(dst);
 	}
 
 	nvlist_free(raw);
 	return (pools);
 }
 
 static int
 name_or_guid_exists(zpool_handle_t *zhp, void *data)
 {
 	importargs_t *import = data;
 	int found = 0;
 
 	if (import->poolname != NULL) {
 		char *pool_name;
 
 		verify(nvlist_lookup_string(zhp->zpool_config,
 		    ZPOOL_CONFIG_POOL_NAME, &pool_name) == 0);
 		if (strcmp(pool_name, import->poolname) == 0)
 			found = 1;
 	} else {
 		uint64_t pool_guid;
 
 		verify(nvlist_lookup_uint64(zhp->zpool_config,
 		    ZPOOL_CONFIG_POOL_GUID, &pool_guid) == 0);
 		if (pool_guid == import->guid)
 			found = 1;
 	}
 
 	zpool_close(zhp);
 	return (found);
 }
 
 nvlist_t *
 zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
 {
 	verify(import->poolname == NULL || import->guid == 0);
 
 	if (import->unique)
 		import->exists = zpool_iter(hdl, name_or_guid_exists, import);
 
 	if (import->cachefile != NULL)
 		return (zpool_find_import_cached(hdl, import->cachefile,
 		    import->poolname, import->guid));
 
 	return (zpool_find_import_impl(hdl, import));
 }
 
 boolean_t
 find_guid(nvlist_t *nv, uint64_t guid)
 {
 	uint64_t tmp;
 	nvlist_t **child;
 	uint_t c, children;
 
 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &tmp) == 0);
 	if (tmp == guid)
 		return (B_TRUE);
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++)
 			if (find_guid(child[c], guid))
 				return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 typedef struct aux_cbdata {
 	const char	*cb_type;
 	uint64_t	cb_guid;
 	zpool_handle_t	*cb_zhp;
 } aux_cbdata_t;
 
 static int
 find_aux(zpool_handle_t *zhp, void *data)
 {
 	aux_cbdata_t *cbp = data;
 	nvlist_t **list;
 	uint_t i, count;
 	uint64_t guid;
 	nvlist_t *nvroot;
 
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
 	if (nvlist_lookup_nvlist_array(nvroot, cbp->cb_type,
 	    &list, &count) == 0) {
 		for (i = 0; i < count; i++) {
 			verify(nvlist_lookup_uint64(list[i],
 			    ZPOOL_CONFIG_GUID, &guid) == 0);
 			if (guid == cbp->cb_guid) {
 				cbp->cb_zhp = zhp;
 				return (1);
 			}
 		}
 	}
 
 	zpool_close(zhp);
 	return (0);
 }
 
 /*
  * Determines if the pool is in use.  If so, it returns true and the state of
  * the pool as well as the name of the pool.  Both strings are allocated and
  * must be freed by the caller.
  */
 int
 zpool_in_use(libzfs_handle_t *hdl, int fd, pool_state_t *state, char **namestr,
     boolean_t *inuse)
 {
 	nvlist_t *config;
 	char *name;
 	boolean_t ret;
 	uint64_t guid, vdev_guid;
 	zpool_handle_t *zhp;
 	nvlist_t *pool_config;
 	uint64_t stateval, isspare;
 	aux_cbdata_t cb = { 0 };
 	boolean_t isactive;
 
 	*inuse = B_FALSE;
 
 	if (zpool_read_label(fd, &config) != 0) {
 		(void) no_memory(hdl);
 		return (-1);
 	}
 
 	if (config == NULL)
 		return (0);
 
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 	    &stateval) == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID,
 	    &vdev_guid) == 0);
 
 	if (stateval != POOL_STATE_SPARE && stateval != POOL_STATE_L2CACHE) {
 		verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 		    &name) == 0);
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 		    &guid) == 0);
 	}
 
 	switch (stateval) {
 	case POOL_STATE_EXPORTED:
 		/*
 		 * A pool with an exported state may in fact be imported
 		 * read-only, so check the in-core state to see if it's
 		 * active and imported read-only.  If it is, set
 		 * its state to active.
 		 */
 		if (pool_active(hdl, name, guid, &isactive) == 0 && isactive &&
 		    (zhp = zpool_open_canfail(hdl, name)) != NULL &&
 		    zpool_get_prop_int(zhp, ZPOOL_PROP_READONLY, NULL))
 			stateval = POOL_STATE_ACTIVE;
 
 		ret = B_TRUE;
 		break;
 
 	case POOL_STATE_ACTIVE:
 		/*
 		 * For an active pool, we have to determine if it's really part
 		 * of a currently active pool (in which case the pool will exist
 		 * and the guid will be the same), or whether it's part of an
 		 * active pool that was disconnected without being explicitly
 		 * exported.
 		 */
 		if (pool_active(hdl, name, guid, &isactive) != 0) {
 			nvlist_free(config);
 			return (-1);
 		}
 
 		if (isactive) {
 			/*
 			 * Because the device may have been removed while
 			 * offlined, we only report it as active if the vdev is
 			 * still present in the config.  Otherwise, pretend like
 			 * it's not in use.
 			 */
 			if ((zhp = zpool_open_canfail(hdl, name)) != NULL &&
 			    (pool_config = zpool_get_config(zhp, NULL))
 			    != NULL) {
 				nvlist_t *nvroot;
 
 				verify(nvlist_lookup_nvlist(pool_config,
 				    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 				ret = find_guid(nvroot, vdev_guid);
 			} else {
 				ret = B_FALSE;
 			}
 
 			/*
 			 * If this is an active spare within another pool, we
 			 * treat it like an unused hot spare.  This allows the
 			 * user to create a pool with a hot spare that currently
 			 * in use within another pool.  Since we return B_TRUE,
 			 * libdiskmgt will continue to prevent generic consumers
 			 * from using the device.
 			 */
 			if (ret && nvlist_lookup_uint64(config,
 			    ZPOOL_CONFIG_IS_SPARE, &isspare) == 0 && isspare)
 				stateval = POOL_STATE_SPARE;
 
 			if (zhp != NULL)
 				zpool_close(zhp);
 		} else {
 			stateval = POOL_STATE_POTENTIALLY_ACTIVE;
 			ret = B_TRUE;
 		}
 		break;
 
 	case POOL_STATE_SPARE:
 		/*
 		 * For a hot spare, it can be either definitively in use, or
 		 * potentially active.  To determine if it's in use, we iterate
 		 * over all pools in the system and search for one with a spare
 		 * with a matching guid.
 		 *
 		 * Due to the shared nature of spares, we don't actually report
 		 * the potentially active case as in use.  This means the user
 		 * can freely create pools on the hot spares of exported pools,
 		 * but to do otherwise makes the resulting code complicated, and
 		 * we end up having to deal with this case anyway.
 		 */
 		cb.cb_zhp = NULL;
 		cb.cb_guid = vdev_guid;
 		cb.cb_type = ZPOOL_CONFIG_SPARES;
 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
 			name = (char *)zpool_get_name(cb.cb_zhp);
 			ret = TRUE;
 		} else {
 			ret = FALSE;
 		}
 		break;
 
 	case POOL_STATE_L2CACHE:
 
 		/*
 		 * Check if any pool is currently using this l2cache device.
 		 */
 		cb.cb_zhp = NULL;
 		cb.cb_guid = vdev_guid;
 		cb.cb_type = ZPOOL_CONFIG_L2CACHE;
 		if (zpool_iter(hdl, find_aux, &cb) == 1) {
 			name = (char *)zpool_get_name(cb.cb_zhp);
 			ret = TRUE;
 		} else {
 			ret = FALSE;
 		}
 		break;
 
 	default:
 		ret = B_FALSE;
 	}
 
 
 	if (ret) {
 		if ((*namestr = zfs_strdup(hdl, name)) == NULL) {
 			if (cb.cb_zhp)
 				zpool_close(cb.cb_zhp);
 			nvlist_free(config);
 			return (-1);
 		}
 		*state = (pool_state_t)stateval;
 	}
 
 	if (cb.cb_zhp)
 		zpool_close(cb.cb_zhp);
 
 	nvlist_free(config);
 	*inuse = ret;
 	return (0);
 }
diff --git a/lib/libzfs/libzfs_iter.c b/lib/libzfs/libzfs_iter.c
index 8215d3cb17af..ff76f9f367e2 100644
--- a/lib/libzfs/libzfs_iter.c
+++ b/lib/libzfs/libzfs_iter.c
@@ -1,468 +1,468 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <stddef.h>
 #include <libintl.h>
 #include <libzfs.h>
 
 #include "libzfs_impl.h"
 
 int
 zfs_iter_clones(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 {
 	nvlist_t *nvl = zfs_get_clones_nvl(zhp);
 	nvpair_t *pair;
 
 	if (nvl == NULL)
 		return (0);
 
 	for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(nvl, pair)) {
 		zfs_handle_t *clone = zfs_open(zhp->zfs_hdl, nvpair_name(pair),
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (clone != NULL) {
 			int err = func(clone, data);
 			if (err != 0)
 				return (err);
 		}
 	}
 	return (0);
 }
 
 static int
 zfs_do_list_ioctl(zfs_handle_t *zhp, int arg, zfs_cmd_t *zc)
 {
 	int rc;
 	uint64_t	orig_cookie;
 
 	orig_cookie = zc->zc_cookie;
 top:
 	(void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
 	rc = ioctl(zhp->zfs_hdl->libzfs_fd, arg, zc);
 
 	if (rc == -1) {
 		switch (errno) {
 		case ENOMEM:
 			/* expand nvlist memory and try again */
 			if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, zc) != 0) {
 				zcmd_free_nvlists(zc);
 				return (-1);
 			}
 			zc->zc_cookie = orig_cookie;
 			goto top;
 		/*
 		 * An errno value of ESRCH indicates normal completion.
 		 * If ENOENT is returned, then the underlying dataset
 		 * has been removed since we obtained the handle.
 		 */
 		case ESRCH:
 		case ENOENT:
 			rc = 1;
 			break;
 		default:
 			rc = zfs_standard_error(zhp->zfs_hdl, errno,
 			    dgettext(TEXT_DOMAIN,
 			    "cannot iterate filesystems"));
 			break;
 		}
 	}
 	return (rc);
 }
 
 /*
  * Iterate over all child filesystems
  */
 int
 zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	zfs_handle_t *nzhp;
 	int ret;
 
 	if (zhp->zfs_type != ZFS_TYPE_FILESYSTEM)
 		return (0);
 
 	if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
 		return (-1);
 
 	while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_DATASET_LIST_NEXT,
 	    &zc)) == 0) {
 		/*
 		 * Silently ignore errors, as the only plausible explanation is
 		 * that the pool has since been removed.
 		 */
 		if ((nzhp = make_dataset_handle_zc(zhp->zfs_hdl,
 		    &zc)) == NULL) {
 			continue;
 		}
 
 		if ((ret = func(nzhp, data)) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (ret);
 		}
 	}
 	zcmd_free_nvlists(&zc);
 	return ((ret < 0) ? ret : 0);
 }
 
 /*
  * Iterate over all snapshots
  */
 int
 zfs_iter_snapshots(zfs_handle_t *zhp, boolean_t simple, zfs_iter_f func,
     void *data)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	zfs_handle_t *nzhp;
 	int ret;
 
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
 		return (0);
 
 	zc.zc_simple = simple;
 
 	if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
 		return (-1);
 	while ((ret = zfs_do_list_ioctl(zhp, ZFS_IOC_SNAPSHOT_LIST_NEXT,
 	    &zc)) == 0) {
 
 		if (simple)
 			nzhp = make_dataset_simple_handle_zc(zhp, &zc);
 		else
 			nzhp = make_dataset_handle_zc(zhp->zfs_hdl, &zc);
 		if (nzhp == NULL)
 			continue;
 
 		if ((ret = func(nzhp, data)) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (ret);
 		}
 	}
 	zcmd_free_nvlists(&zc);
 	return ((ret < 0) ? ret : 0);
 }
 
 /*
  * Routines for dealing with the sorted snapshot functionality
  */
 typedef struct zfs_node {
 	zfs_handle_t	*zn_handle;
 	avl_node_t	zn_avlnode;
 } zfs_node_t;
 
 static int
 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
 {
 	avl_tree_t *avl = data;
 	zfs_node_t *node;
 	zfs_node_t search;
 
 	search.zn_handle = zhp;
 	node = avl_find(avl, &search, NULL);
 	if (node) {
 		/*
 		 * If this snapshot was renamed while we were creating the
 		 * AVL tree, it's possible that we already inserted it under
 		 * its old name. Remove the old handle before adding the new
 		 * one.
 		 */
 		zfs_close(node->zn_handle);
 		avl_remove(avl, node);
 		free(node);
 	}
 
 	node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
 	node->zn_handle = zhp;
 	avl_add(avl, node);
 
 	return (0);
 }
 
 static int
 zfs_snapshot_compare(const void *larg, const void *rarg)
 {
 	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
 	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
 	uint64_t lcreate, rcreate;
 
 	/*
 	 * Sort them according to creation time.  We use the hidden
 	 * CREATETXG property to get an absolute ordering of snapshots.
 	 */
 	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
 	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
 
 	if (lcreate < rcreate)
 		return (-1);
 	else if (lcreate > rcreate)
 		return (+1);
 	else
 		return (0);
 }
 
 int
 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
 {
 	int ret = 0;
 	zfs_node_t *node;
 	avl_tree_t avl;
 	void *cookie = NULL;
 
 	avl_create(&avl, zfs_snapshot_compare,
 	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
 
 	ret = zfs_iter_snapshots(zhp, B_FALSE, zfs_sort_snaps, &avl);
 
 	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
 		ret |= callback(node->zn_handle, data);
 
 	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
 		free(node);
 
 	avl_destroy(&avl);
 
 	return (ret);
 }
 
 typedef struct {
 	char *ssa_first;
 	char *ssa_last;
 	boolean_t ssa_seenfirst;
 	boolean_t ssa_seenlast;
 	zfs_iter_f ssa_func;
 	void *ssa_arg;
 } snapspec_arg_t;
 
 static int
 snapspec_cb(zfs_handle_t *zhp, void *arg) {
 	snapspec_arg_t *ssa = arg;
 	char *shortsnapname;
 	int err = 0;
 
 	if (ssa->ssa_seenlast)
 		return (0);
 	shortsnapname = zfs_strdup(zhp->zfs_hdl,
 	    strchr(zfs_get_name(zhp), '@') + 1);
 
 	if (!ssa->ssa_seenfirst && strcmp(shortsnapname, ssa->ssa_first) == 0)
 		ssa->ssa_seenfirst = B_TRUE;
 
 	if (ssa->ssa_seenfirst) {
 		err = ssa->ssa_func(zhp, ssa->ssa_arg);
 	} else {
 		zfs_close(zhp);
 	}
 
 	if (strcmp(shortsnapname, ssa->ssa_last) == 0)
 		ssa->ssa_seenlast = B_TRUE;
 	free(shortsnapname);
 
 	return (err);
 }
 
 /*
  * spec is a string like "A,B%C,D"
  *
  * <snaps>, where <snaps> can be:
  *      <snap>          (single snapshot)
  *      <snap>%<snap>   (range of snapshots, inclusive)
  *      %<snap>         (range of snapshots, starting with earliest)
  *      <snap>%         (range of snapshots, ending with last)
  *      %               (all snapshots)
  *      <snaps>[,...]   (comma separated list of the above)
  *
  * If a snapshot can not be opened, continue trying to open the others, but
  * return ENOENT at the end.
  */
 int
 zfs_iter_snapspec(zfs_handle_t *fs_zhp, const char *spec_orig,
     zfs_iter_f func, void *arg)
 {
-	char buf[ZFS_MAXNAMELEN];
-	char *comma_separated, *cp;
+	char *buf, *comma_separated, *cp;
 	int err = 0;
 	int ret = 0;
 
-	(void) strlcpy(buf, spec_orig, sizeof (buf));
+	buf = zfs_strdup(fs_zhp->zfs_hdl, spec_orig);
 	cp = buf;
 
 	while ((comma_separated = strsep(&cp, ",")) != NULL) {
 		char *pct = strchr(comma_separated, '%');
 		if (pct != NULL) {
 			snapspec_arg_t ssa = { 0 };
 			ssa.ssa_func = func;
 			ssa.ssa_arg = arg;
 
 			if (pct == comma_separated)
 				ssa.ssa_seenfirst = B_TRUE;
 			else
 				ssa.ssa_first = comma_separated;
 			*pct = '\0';
 			ssa.ssa_last = pct + 1;
 
 			/*
 			 * If there is a lastname specified, make sure it
 			 * exists.
 			 */
 			if (ssa.ssa_last[0] != '\0') {
 				char snapname[ZFS_MAXNAMELEN];
 				(void) snprintf(snapname, sizeof (snapname),
 				    "%s@%s", zfs_get_name(fs_zhp),
 				    ssa.ssa_last);
 				if (!zfs_dataset_exists(fs_zhp->zfs_hdl,
 				    snapname, ZFS_TYPE_SNAPSHOT)) {
 					ret = ENOENT;
 					continue;
 				}
 			}
 
 			err = zfs_iter_snapshots_sorted(fs_zhp,
 			    snapspec_cb, &ssa);
 			if (ret == 0)
 				ret = err;
 			if (ret == 0 && (!ssa.ssa_seenfirst ||
 			    (ssa.ssa_last[0] != '\0' && !ssa.ssa_seenlast))) {
 				ret = ENOENT;
 			}
 		} else {
 			char snapname[ZFS_MAXNAMELEN];
 			zfs_handle_t *snap_zhp;
 			(void) snprintf(snapname, sizeof (snapname), "%s@%s",
 			    zfs_get_name(fs_zhp), comma_separated);
 			snap_zhp = make_dataset_handle(fs_zhp->zfs_hdl,
 			    snapname);
 			if (snap_zhp == NULL) {
 				ret = ENOENT;
 				continue;
 			}
 			err = func(snap_zhp, arg);
 			if (ret == 0)
 				ret = err;
 		}
 	}
 
+	free(buf);
 	return (ret);
 }
 
 /*
  * Iterate over all children, snapshots and filesystems
  */
 int
 zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
 {
 	int ret;
 
 	if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0)
 		return (ret);
 
 	return (zfs_iter_snapshots(zhp, B_FALSE, func, data));
 }
 
 
 typedef struct iter_stack_frame {
 	struct iter_stack_frame *next;
 	zfs_handle_t *zhp;
 } iter_stack_frame_t;
 
 typedef struct iter_dependents_arg {
 	boolean_t first;
 	boolean_t allowrecursion;
 	iter_stack_frame_t *stack;
 	zfs_iter_f func;
 	void *data;
 } iter_dependents_arg_t;
 
 static int
 iter_dependents_cb(zfs_handle_t *zhp, void *arg)
 {
 	iter_dependents_arg_t *ida = arg;
 	int err;
 	boolean_t first = ida->first;
 	ida->first = B_FALSE;
 
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
 		err = zfs_iter_clones(zhp, iter_dependents_cb, ida);
 	} else {
 		iter_stack_frame_t isf;
 		iter_stack_frame_t *f;
 
 		/*
 		 * check if there is a cycle by seeing if this fs is already
 		 * on the stack.
 		 */
 		for (f = ida->stack; f != NULL; f = f->next) {
 			if (f->zhp->zfs_dmustats.dds_guid ==
 			    zhp->zfs_dmustats.dds_guid) {
 				if (ida->allowrecursion) {
 					zfs_close(zhp);
 					return (0);
 				} else {
 					zfs_error_aux(zhp->zfs_hdl,
 					    dgettext(TEXT_DOMAIN,
 					    "recursive dependency at '%s'"),
 					    zfs_get_name(zhp));
 					err = zfs_error(zhp->zfs_hdl,
 					    EZFS_RECURSIVE,
 					    dgettext(TEXT_DOMAIN,
 					    "cannot determine dependent "
 					    "datasets"));
 					zfs_close(zhp);
 					return (err);
 				}
 			}
 		}
 
 		isf.zhp = zhp;
 		isf.next = ida->stack;
 		ida->stack = &isf;
 		err = zfs_iter_filesystems(zhp, iter_dependents_cb, ida);
 		if (err == 0)
 			err = zfs_iter_snapshots(zhp, B_FALSE,
 			    iter_dependents_cb, ida);
 		ida->stack = isf.next;
 	}
 	if (!first && err == 0)
 		err = ida->func(zhp, ida->data);
 	return (err);
 }
 
 int
 zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion,
     zfs_iter_f func, void *data)
 {
 	iter_dependents_arg_t ida;
 	ida.allowrecursion = allowrecursion;
 	ida.stack = NULL;
 	ida.func = func;
 	ida.data = data;
 	ida.first = B_TRUE;
 	return (iter_dependents_cb(zfs_handle_dup(zhp), &ida));
 }
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
index a6cacd370c2c..45c39cc0facd 100644
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -1,4184 +1,4180 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <ctype.h>
 #include <errno.h>
 #include <devid.h>
 #include <fcntl.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <libgen.h>
 #include <zone.h>
 #include <sys/stat.h>
 #include <sys/efi_partition.h>
 #include <sys/vtoc.h>
 #include <sys/zfs_ioctl.h>
 #include <dlfcn.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "libzfs_impl.h"
 #include "zfs_comutil.h"
 #include "zfeature_common.h"
 
 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
 
 typedef struct prop_flags {
 	int create:1;	/* Validate property on creation */
 	int import:1;	/* Validate property on import */
 } prop_flags_t;
 
 /*
  * ====================================================================
  *   zpool property functions
  * ====================================================================
  */
 
 static int
 zpool_get_all_props(zpool_handle_t *zhp)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
 		return (-1);
 
 	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
 		if (errno == ENOMEM) {
 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 				zcmd_free_nvlists(&zc);
 				return (-1);
 			}
 		} else {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 
 	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (-1);
 	}
 
 	zcmd_free_nvlists(&zc);
 
 	return (0);
 }
 
 static int
 zpool_props_refresh(zpool_handle_t *zhp)
 {
 	nvlist_t *old_props;
 
 	old_props = zhp->zpool_props;
 
 	if (zpool_get_all_props(zhp) != 0)
 		return (-1);
 
 	nvlist_free(old_props);
 	return (0);
 }
 
 static char *
 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
     zprop_source_t *src)
 {
 	nvlist_t *nv, *nvl;
 	uint64_t ival;
 	char *value;
 	zprop_source_t source;
 
 	nvl = zhp->zpool_props;
 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
 		source = ival;
 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
 	} else {
 		source = ZPROP_SRC_DEFAULT;
 		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
 			value = "-";
 	}
 
 	if (src)
 		*src = source;
 
 	return (value);
 }
 
 uint64_t
 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
 {
 	nvlist_t *nv, *nvl;
 	uint64_t value;
 	zprop_source_t source;
 
 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
 		/*
 		 * zpool_get_all_props() has most likely failed because
 		 * the pool is faulted, but if all we need is the top level
 		 * vdev's guid then get it from the zhp config nvlist.
 		 */
 		if ((prop == ZPOOL_PROP_GUID) &&
 		    (nvlist_lookup_nvlist(zhp->zpool_config,
 		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
 		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
 		    == 0)) {
 			return (value);
 		}
 		return (zpool_prop_default_numeric(prop));
 	}
 
 	nvl = zhp->zpool_props;
 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
 		source = value;
 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
 	} else {
 		source = ZPROP_SRC_DEFAULT;
 		value = zpool_prop_default_numeric(prop);
 	}
 
 	if (src)
 		*src = source;
 
 	return (value);
 }
 
 /*
  * Map VDEV STATE to printed strings.
  */
 char *
 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
 {
 	switch (state) {
 	default:
 		break;
 	case VDEV_STATE_CLOSED:
 	case VDEV_STATE_OFFLINE:
 		return (gettext("OFFLINE"));
 	case VDEV_STATE_REMOVED:
 		return (gettext("REMOVED"));
 	case VDEV_STATE_CANT_OPEN:
 		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
 			return (gettext("FAULTED"));
 		else if (aux == VDEV_AUX_SPLIT_POOL)
 			return (gettext("SPLIT"));
 		else
 			return (gettext("UNAVAIL"));
 	case VDEV_STATE_FAULTED:
 		return (gettext("FAULTED"));
 	case VDEV_STATE_DEGRADED:
 		return (gettext("DEGRADED"));
 	case VDEV_STATE_HEALTHY:
 		return (gettext("ONLINE"));
 	}
 
 	return (gettext("UNKNOWN"));
 }
 
 /*
  * Map POOL STATE to printed strings.
  */
 const char *
 zpool_pool_state_to_name(pool_state_t state)
 {
 	switch (state) {
 	default:
 		break;
 	case POOL_STATE_ACTIVE:
 		return (gettext("ACTIVE"));
 	case POOL_STATE_EXPORTED:
 		return (gettext("EXPORTED"));
 	case POOL_STATE_DESTROYED:
 		return (gettext("DESTROYED"));
 	case POOL_STATE_SPARE:
 		return (gettext("SPARE"));
 	case POOL_STATE_L2CACHE:
 		return (gettext("L2CACHE"));
 	case POOL_STATE_UNINITIALIZED:
 		return (gettext("UNINITIALIZED"));
 	case POOL_STATE_UNAVAIL:
 		return (gettext("UNAVAIL"));
 	case POOL_STATE_POTENTIALLY_ACTIVE:
 		return (gettext("POTENTIALLY_ACTIVE"));
 	}
 
 	return (gettext("UNKNOWN"));
 }
 
 /*
  * Get a zpool property value for 'prop' and return the value in
  * a pre-allocated buffer.
  */
 int
 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
     zprop_source_t *srctype)
 {
 	uint64_t intval;
 	const char *strval;
 	zprop_source_t src = ZPROP_SRC_NONE;
 	nvlist_t *nvroot;
 	vdev_stat_t *vs;
 	uint_t vsc;
 
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
 		switch (prop) {
 		case ZPOOL_PROP_NAME:
 			(void) strlcpy(buf, zpool_get_name(zhp), len);
 			break;
 
 		case ZPOOL_PROP_HEALTH:
 			(void) strlcpy(buf, "FAULTED", len);
 			break;
 
 		case ZPOOL_PROP_GUID:
 			intval = zpool_get_prop_int(zhp, prop, &src);
 			(void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
 			break;
 
 		case ZPOOL_PROP_ALTROOT:
 		case ZPOOL_PROP_CACHEFILE:
 		case ZPOOL_PROP_COMMENT:
 			if (zhp->zpool_props != NULL ||
 			    zpool_get_all_props(zhp) == 0) {
 				(void) strlcpy(buf,
 				    zpool_get_prop_string(zhp, prop, &src),
 				    len);
 				if (srctype != NULL)
 					*srctype = src;
 				return (0);
 			}
 			/* FALLTHROUGH */
 		default:
 			(void) strlcpy(buf, "-", len);
 			break;
 		}
 
 		if (srctype != NULL)
 			*srctype = src;
 		return (0);
 	}
 
 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
 	    prop != ZPOOL_PROP_NAME)
 		return (-1);
 
 	switch (zpool_prop_get_type(prop)) {
 	case PROP_TYPE_STRING:
 		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
 		    len);
 		break;
 
 	case PROP_TYPE_NUMBER:
 		intval = zpool_get_prop_int(zhp, prop, &src);
 
 		switch (prop) {
 		case ZPOOL_PROP_SIZE:
 		case ZPOOL_PROP_ALLOCATED:
 		case ZPOOL_PROP_FREE:
 		case ZPOOL_PROP_FREEING:
 		case ZPOOL_PROP_EXPANDSZ:
 		case ZPOOL_PROP_ASHIFT:
 			(void) zfs_nicenum(intval, buf, len);
 			break;
 
 		case ZPOOL_PROP_CAPACITY:
 			(void) snprintf(buf, len, "%llu%%",
 			    (u_longlong_t)intval);
 			break;
 
 		case ZPOOL_PROP_DEDUPRATIO:
 			(void) snprintf(buf, len, "%llu.%02llux",
 			    (u_longlong_t)(intval / 100),
 			    (u_longlong_t)(intval % 100));
 			break;
 
 		case ZPOOL_PROP_HEALTH:
 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 			verify(nvlist_lookup_uint64_array(nvroot,
 			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
 			    == 0);
 
 			(void) strlcpy(buf, zpool_state_to_name(intval,
 			    vs->vs_aux), len);
 			break;
 		case ZPOOL_PROP_VERSION:
 			if (intval >= SPA_VERSION_FEATURES) {
 				(void) snprintf(buf, len, "-");
 				break;
 			}
 			/* FALLTHROUGH */
 		default:
 			(void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
 		}
 		break;
 
 	case PROP_TYPE_INDEX:
 		intval = zpool_get_prop_int(zhp, prop, &src);
 		if (zpool_prop_index_to_string(prop, intval, &strval)
 		    != 0)
 			return (-1);
 		(void) strlcpy(buf, strval, len);
 		break;
 
 	default:
 		abort();
 	}
 
 	if (srctype)
 		*srctype = src;
 
 	return (0);
 }
 
 /*
  * Check if the bootfs name has the same pool name as it is set to.
  * Assuming bootfs is a valid dataset name.
  */
 static boolean_t
 bootfs_name_valid(const char *pool, char *bootfs)
 {
 	int len = strlen(pool);
 
 	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
 		return (B_FALSE);
 
 	if (strncmp(pool, bootfs, len) == 0 &&
 	    (bootfs[len] == '/' || bootfs[len] == '\0'))
 		return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 #if defined(__sun__) || defined(__sun)
 /*
  * Inspect the configuration to determine if any of the devices contain
  * an EFI label.
  */
 static boolean_t
 pool_uses_efi(nvlist_t *config)
 {
 	nvlist_t **child;
 	uint_t c, children;
 
 	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		return (read_efi_label(config, NULL) >= 0);
 
 	for (c = 0; c < children; c++) {
 		if (pool_uses_efi(child[c]))
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 #endif
 
 boolean_t
 zpool_is_bootable(zpool_handle_t *zhp)
 {
 	char bootfs[ZPOOL_MAXNAMELEN];
 
 	return (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
 	    sizeof (bootfs), NULL) == 0 && strncmp(bootfs, "-",
 	    sizeof (bootfs)) != 0);
 }
 
 
 /*
  * Given an nvlist of zpool properties to be set, validate that they are
  * correct, and parse any numeric properties (index, boolean, etc) if they are
  * specified as strings.
  */
 static nvlist_t *
 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
 {
 	nvpair_t *elem;
 	nvlist_t *retprops;
 	zpool_prop_t prop;
 	char *strval;
 	uint64_t intval;
 	char *slash, *check;
 	struct stat64 statbuf;
 	zpool_handle_t *zhp;
 	nvlist_t *nvroot;
 
 	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
 		return (NULL);
 	}
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		const char *propname = nvpair_name(elem);
 
 		prop = zpool_name_to_prop(propname);
 		if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
 			int err;
 			zfeature_info_t *feature;
 			char *fname = strchr(propname, '@') + 1;
 
 			err = zfeature_lookup_name(fname, &feature);
 			if (err != 0) {
 				ASSERT3U(err, ==, ENOENT);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "invalid feature '%s'"), fname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (nvpair_type(elem) != DATA_TYPE_STRING) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' must be a string"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			(void) nvpair_value_string(elem, &strval);
 			if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' can only be set to "
 				    "'enabled'"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (nvlist_add_uint64(retprops, propname, 0) != 0) {
 				(void) no_memory(hdl);
 				goto error;
 			}
 			continue;
 		}
 
 		/*
 		 * Make sure this property is valid and applies to this type.
 		 */
 		if (prop == ZPROP_INVAL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid property '%s'"), propname);
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			goto error;
 		}
 
 		if (zpool_prop_readonly(prop)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
 			    "is readonly"), propname);
 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
 			goto error;
 		}
 
 		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
 		    &strval, &intval, errbuf) != 0)
 			goto error;
 
 		/*
 		 * Perform additional checking for specific properties.
 		 */
 		switch (prop) {
 		default:
 			break;
 		case ZPOOL_PROP_VERSION:
 			if (intval < version ||
 			    !SPA_VERSION_IS_SUPPORTED(intval)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' number %d is invalid."),
 				    propname, intval);
 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZPOOL_PROP_ASHIFT:
 			if (!flags.create) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' can only be set at "
 				    "creation time"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (intval != 0 && (intval < 9 || intval > 13)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' number %d is invalid."),
 				    propname, intval);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZPOOL_PROP_BOOTFS:
 			if (flags.create || flags.import) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' cannot be set at creation "
 				    "or import time"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (version < SPA_VERSION_BOOTFS) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "pool must be upgraded to support "
 				    "'%s' property"), propname);
 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 				goto error;
 			}
 
 			/*
 			 * bootfs property value has to be a dataset name and
 			 * the dataset has to be in the same pool as it sets to.
 			 */
 			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
 			    strval)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
 				    "is an invalid name"), strval);
 				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
 				goto error;
 			}
 
 			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "could not open pool '%s'"), poolname);
 				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
 				goto error;
 			}
 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 
 #if defined(__sun__) || defined(__sun)
 			/*
 			 * bootfs property cannot be set on a disk which has
 			 * been EFI labeled.
 			 */
 			if (pool_uses_efi(nvroot)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' not supported on "
 				    "EFI labeled devices"), propname);
 				(void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
 				zpool_close(zhp);
 				goto error;
 			}
 #endif
 			zpool_close(zhp);
 			break;
 
 		case ZPOOL_PROP_ALTROOT:
 			if (!flags.create && !flags.import) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' can only be set during pool "
 				    "creation or import"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (strval[0] != '/') {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "bad alternate root '%s'"), strval);
 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZPOOL_PROP_CACHEFILE:
 			if (strval[0] == '\0')
 				break;
 
 			if (strcmp(strval, "none") == 0)
 				break;
 
 			if (strval[0] != '/') {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' must be empty, an "
 				    "absolute path, or 'none'"), propname);
 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
 				goto error;
 			}
 
 			slash = strrchr(strval, '/');
 
 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
 			    strcmp(slash, "/..") == 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' is not a valid file"), strval);
 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
 				goto error;
 			}
 
 			*slash = '\0';
 
 			if (strval[0] != '\0' &&
 			    (stat64(strval, &statbuf) != 0 ||
 			    !S_ISDIR(statbuf.st_mode))) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' is not a valid directory"),
 				    strval);
 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
 				goto error;
 			}
 
 			*slash = '/';
 			break;
 
 		case ZPOOL_PROP_COMMENT:
 			for (check = strval; *check != '\0'; check++) {
 				if (!isprint(*check)) {
 					zfs_error_aux(hdl,
 					    dgettext(TEXT_DOMAIN,
 					    "comment may only have printable "
 					    "characters"));
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 			}
 			if (strlen(strval) > ZPROP_MAX_COMMENT) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "comment must not exceed %d characters"),
 				    ZPROP_MAX_COMMENT);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		case ZPOOL_PROP_READONLY:
 			if (!flags.import) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' can only be set at "
 				    "import time"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		}
 	}
 
 	return (retprops);
 error:
 	nvlist_free(retprops);
 	return (NULL);
 }
 
 /*
  * Set zpool property : propname=propval.
  */
 int
 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int ret = -1;
 	char errbuf[1024];
 	nvlist_t *nvl = NULL;
 	nvlist_t *realprops;
 	uint64_t version;
 	prop_flags_t flags = { 0 };
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
 	    zhp->zpool_name);
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 		return (no_memory(zhp->zpool_hdl));
 
 	if (nvlist_add_string(nvl, propname, propval) != 0) {
 		nvlist_free(nvl);
 		return (no_memory(zhp->zpool_hdl));
 	}
 
 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
 	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
 	    zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
 		nvlist_free(nvl);
 		return (-1);
 	}
 
 	nvlist_free(nvl);
 	nvl = realprops;
 
 	/*
 	 * Execute the corresponding ioctl() to set this property.
 	 */
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
 		nvlist_free(nvl);
 		return (-1);
 	}
 
 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
 
 	zcmd_free_nvlists(&zc);
 	nvlist_free(nvl);
 
 	if (ret)
 		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
 	else
 		(void) zpool_props_refresh(zhp);
 
 	return (ret);
 }
 
 int
 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
 {
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	zprop_list_t *entry;
 	char buf[ZFS_MAXPROPLEN];
 	nvlist_t *features = NULL;
 	nvpair_t *nvp;
 	zprop_list_t **last;
 	boolean_t firstexpand = (NULL == *plp);
 	int i;
 
 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
 		return (-1);
 
 	last = plp;
 	while (*last != NULL)
 		last = &(*last)->pl_next;
 
 	if ((*plp)->pl_all)
 		features = zpool_get_features(zhp);
 
 	if ((*plp)->pl_all && firstexpand) {
 		for (i = 0; i < SPA_FEATURES; i++) {
 			zprop_list_t *entry = zfs_alloc(hdl,
 			    sizeof (zprop_list_t));
 			entry->pl_prop = ZPROP_INVAL;
 			entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
 			    spa_feature_table[i].fi_uname);
 			entry->pl_width = strlen(entry->pl_user_prop);
 			entry->pl_all = B_TRUE;
 
 			*last = entry;
 			last = &entry->pl_next;
 		}
 	}
 
 	/* add any unsupported features */
 	for (nvp = nvlist_next_nvpair(features, NULL);
 	    nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
 		char *propname;
 		boolean_t found;
 		zprop_list_t *entry;
 
 		if (zfeature_is_supported(nvpair_name(nvp)))
 			continue;
 
 		propname = zfs_asprintf(hdl, "unsupported@%s",
 		    nvpair_name(nvp));
 
 		/*
 		 * Before adding the property to the list make sure that no
 		 * other pool already added the same property.
 		 */
 		found = B_FALSE;
 		entry = *plp;
 		while (entry != NULL) {
 			if (entry->pl_user_prop != NULL &&
 			    strcmp(propname, entry->pl_user_prop) == 0) {
 				found = B_TRUE;
 				break;
 			}
 			entry = entry->pl_next;
 		}
 		if (found) {
 			free(propname);
 			continue;
 		}
 
 		entry = zfs_alloc(hdl, sizeof (zprop_list_t));
 		entry->pl_prop = ZPROP_INVAL;
 		entry->pl_user_prop = propname;
 		entry->pl_width = strlen(entry->pl_user_prop);
 		entry->pl_all = B_TRUE;
 
 		*last = entry;
 		last = &entry->pl_next;
 	}
 
 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
 
 		if (entry->pl_fixed)
 			continue;
 
 		if (entry->pl_prop != ZPROP_INVAL &&
 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
 		    NULL) == 0) {
 			if (strlen(buf) > entry->pl_width)
 				entry->pl_width = strlen(buf);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Get the state for the given feature on the given ZFS pool.
  */
 int
 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
     size_t len)
 {
 	uint64_t refcount;
 	boolean_t found = B_FALSE;
 	nvlist_t *features = zpool_get_features(zhp);
 	boolean_t supported;
 	const char *feature = strchr(propname, '@') + 1;
 
 	supported = zpool_prop_feature(propname);
 	ASSERT(supported || zpool_prop_unsupported(propname));
 
 	/*
 	 * Convert from feature name to feature guid. This conversion is
 	 * unecessary for unsupported@... properties because they already
 	 * use guids.
 	 */
 	if (supported) {
 		int ret;
 		zfeature_info_t *fi;
 
 		ret = zfeature_lookup_name(feature, &fi);
 		if (ret != 0) {
 			(void) strlcpy(buf, "-", len);
 			return (ENOTSUP);
 		}
 		feature = fi->fi_guid;
 	}
 
 	if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
 		found = B_TRUE;
 
 	if (supported) {
 		if (!found) {
 			(void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
 		} else  {
 			if (refcount == 0)
 				(void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
 			else
 				(void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
 		}
 	} else {
 		if (found) {
 			if (refcount == 0) {
 				(void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
 			} else {
 				(void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
 			}
 		} else {
 			(void) strlcpy(buf, "-", len);
 			return (ENOTSUP);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Don't start the slice at the default block of 34; many storage
  * devices will use a stripe width of 128k, other vendors prefer a 1m
  * alignment.  It is best to play it safe and ensure a 1m alignment
  * given 512B blocks.  When the block size is larger by a power of 2
  * we will still be 1m aligned.  Some devices are sensitive to the
  * partition ending alignment as well.
  */
 #define	NEW_START_BLOCK		2048
 #define	PARTITION_END_ALIGNMENT	2048
 
 /*
  * Validate the given pool name, optionally putting an extended error message in
  * 'buf'.
  */
 boolean_t
 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
 {
 	namecheck_err_t why;
 	char what;
 	int ret;
 
 	ret = pool_namecheck(pool, &why, &what);
 
 	/*
 	 * The rules for reserved pool names were extended at a later point.
 	 * But we need to support users with existing pools that may now be
 	 * invalid.  So we only check for this expanded set of names during a
 	 * create (or import), and only in userland.
 	 */
 	if (ret == 0 && !isopen &&
 	    (strncmp(pool, "mirror", 6) == 0 ||
 	    strncmp(pool, "raidz", 5) == 0 ||
 	    strncmp(pool, "spare", 5) == 0 ||
 	    strcmp(pool, "log") == 0)) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl,
 			    dgettext(TEXT_DOMAIN, "name is reserved"));
 		return (B_FALSE);
 	}
 
 
 	if (ret != 0) {
 		if (hdl != NULL) {
 			switch (why) {
 			case NAME_ERR_TOOLONG:
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "name is too long"));
 				break;
 
 			case NAME_ERR_INVALCHAR:
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "invalid character "
 				    "'%c' in pool name"), what);
 				break;
 
 			case NAME_ERR_NOLETTER:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "name must begin with a letter"));
 				break;
 
 			case NAME_ERR_RESERVED:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "name is reserved"));
 				break;
 
 			case NAME_ERR_DISKLIKE:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "pool name is reserved"));
 				break;
 
 			case NAME_ERR_LEADING_SLASH:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "leading slash in name"));
 				break;
 
 			case NAME_ERR_EMPTY_COMPONENT:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "empty component in name"));
 				break;
 
 			case NAME_ERR_TRAILING_SLASH:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "trailing slash in name"));
 				break;
 
 			case NAME_ERR_MULTIPLE_AT:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "multiple '@' delimiters in name"));
 				break;
 			case NAME_ERR_NO_AT:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "permission set is missing '@'"));
 				break;
 			}
 		}
 		return (B_FALSE);
 	}
 
 	return (B_TRUE);
 }
 
 /*
  * Open a handle to the given pool, even if the pool is currently in the FAULTED
  * state.
  */
 zpool_handle_t *
 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
 {
 	zpool_handle_t *zhp;
 	boolean_t missing;
 
 	/*
 	 * Make sure the pool name is valid.
 	 */
 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
 		    pool);
 		return (NULL);
 	}
 
 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
 		return (NULL);
 
 	zhp->zpool_hdl = hdl;
 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
 
 	if (zpool_refresh_stats(zhp, &missing) != 0) {
 		zpool_close(zhp);
 		return (NULL);
 	}
 
 	if (missing) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
 		zpool_close(zhp);
 		return (NULL);
 	}
 
 	return (zhp);
 }
 
 /*
  * Like the above, but silent on error.  Used when iterating over pools (because
  * the configuration cache may be out of date).
  */
 int
 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
 {
 	zpool_handle_t *zhp;
 	boolean_t missing;
 
 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
 		return (-1);
 
 	zhp->zpool_hdl = hdl;
 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
 
 	if (zpool_refresh_stats(zhp, &missing) != 0) {
 		zpool_close(zhp);
 		return (-1);
 	}
 
 	if (missing) {
 		zpool_close(zhp);
 		*ret = NULL;
 		return (0);
 	}
 
 	*ret = zhp;
 	return (0);
 }
 
 /*
  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
  * state.
  */
 zpool_handle_t *
 zpool_open(libzfs_handle_t *hdl, const char *pool)
 {
 	zpool_handle_t *zhp;
 
 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
 		return (NULL);
 
 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
 		zpool_close(zhp);
 		return (NULL);
 	}
 
 	return (zhp);
 }
 
 /*
  * Close the handle.  Simply frees the memory associated with the handle.
  */
 void
 zpool_close(zpool_handle_t *zhp)
 {
 	if (zhp->zpool_config)
 		nvlist_free(zhp->zpool_config);
 	if (zhp->zpool_old_config)
 		nvlist_free(zhp->zpool_old_config);
 	if (zhp->zpool_props)
 		nvlist_free(zhp->zpool_props);
 	free(zhp);
 }
 
 /*
  * Return the name of the pool.
  */
 const char *
 zpool_get_name(zpool_handle_t *zhp)
 {
 	return (zhp->zpool_name);
 }
 
 
 /*
  * Return the state of the pool (ACTIVE or UNAVAILABLE)
  */
 int
 zpool_get_state(zpool_handle_t *zhp)
 {
 	return (zhp->zpool_state);
 }
 
 /*
  * Create the named pool, using the provided vdev list.  It is assumed
  * that the consumer has already validated the contents of the nvlist, so we
  * don't have to worry about error semantics.
  */
 int
 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
     nvlist_t *props, nvlist_t *fsprops)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	nvlist_t *zc_fsprops = NULL;
 	nvlist_t *zc_props = NULL;
 	char msg[1024];
 	char *altroot;
 	int ret = -1;
 
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), pool);
 
 	if (!zpool_name_valid(hdl, B_FALSE, pool))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
 		return (-1);
 
 	if (props) {
 		prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
 
 		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
 		    SPA_VERSION_1, flags, msg)) == NULL) {
 			goto create_failed;
 		}
 	}
 
 	if (fsprops) {
 		uint64_t zoned;
 		char *zonestr;
 
 		zoned = ((nvlist_lookup_string(fsprops,
 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
 		    strcmp(zonestr, "on") == 0);
 
 		if ((zc_fsprops = zfs_valid_proplist(hdl,
 		    ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
 			goto create_failed;
 		}
 		if (!zc_props &&
 		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
 			goto create_failed;
 		}
 		if (nvlist_add_nvlist(zc_props,
 		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
 			goto create_failed;
 		}
 	}
 
 	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
 		goto create_failed;
 
 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
 
 	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
 
 		zcmd_free_nvlists(&zc);
 		nvlist_free(zc_props);
 		nvlist_free(zc_fsprops);
 
 		switch (errno) {
 		case EBUSY:
 			/*
 			 * This can happen if the user has specified the same
 			 * device multiple times.  We can't reliably detect this
 			 * until we try to add it and see we already have a
 			 * label.  This can also happen under if the device is
 			 * part of an active md or lvm device.
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more vdevs refer to the same device, or one of\n"
 			    "the devices is part of an active md or lvm device"));
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case EOVERFLOW:
 			/*
 			 * This occurs when one of the devices is below
 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
 			 * device was the problem device since there's no
 			 * reliable way to determine device size from userland.
 			 */
 			{
 				char buf[64];
 
 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
 
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "one or more devices is less than the "
 				    "minimum size (%s)"), buf);
 			}
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case ENOSPC:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices is out of space"));
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case ENOTBLK:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cache device must be a disk or disk slice"));
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		default:
 			return (zpool_standard_error(hdl, errno, msg));
 		}
 	}
 
 	/*
 	 * If this is an alternate root pool, then we automatically set the
 	 * mountpoint of the root dataset to be '/'.
 	 */
 	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
 	    &altroot) == 0) {
 		zfs_handle_t *zhp;
 
 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
 		    "/") == 0);
 
 		zfs_close(zhp);
 	}
 
 create_failed:
 	zcmd_free_nvlists(&zc);
 	nvlist_free(zc_props);
 	nvlist_free(zc_fsprops);
 	return (ret);
 }
 
 /*
  * Destroy the given pool.  It is up to the caller to ensure that there are no
  * datasets left in the pool.
  */
 int
-zpool_destroy(zpool_handle_t *zhp)
+zpool_destroy(zpool_handle_t *zhp, const char *log_str)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	zfs_handle_t *zfp = NULL;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	char msg[1024];
 
 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
 	    (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (-1);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
+	zc.zc_history = (uint64_t)(uintptr_t)log_str;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot destroy '%s'"), zhp->zpool_name);
 
 		if (errno == EROFS) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices is read only"));
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		} else {
 			(void) zpool_standard_error(hdl, errno, msg);
 		}
 
 		if (zfp)
 			zfs_close(zfp);
 		return (-1);
 	}
 
 	if (zfp) {
 		remove_mountpoint(zfp);
 		zfs_close(zfp);
 	}
 
 	return (0);
 }
 
 /*
  * Add the given vdevs to the pool.  The caller must have already performed the
  * necessary verification to ensure that the vdev specification is well-formed.
  */
 int
 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int ret;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	char msg[1024];
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 	    "cannot add to '%s'"), zhp->zpool_name);
 
 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
 	    SPA_VERSION_SPARES &&
 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
 		    "upgraded to add hot spares"));
 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
 	}
 
 #if defined(__sun__) || defined(__sun)
 	if (zpool_is_bootable(zhp) && nvlist_lookup_nvlist_array(nvroot,
 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0) {
 		uint64_t s;
 
 		for (s = 0; s < nspares; s++) {
 			char *path;
 
 			if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH,
 			    &path) == 0 && pool_uses_efi(spares[s])) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "device '%s' contains an EFI label and "
 				    "cannot be used on root pools."),
 				    zpool_vdev_name(hdl, NULL, spares[s],
 				    B_FALSE));
 				return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
 			}
 		}
 	}
 #endif
 
 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
 	    SPA_VERSION_L2CACHE &&
 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
 		    "upgraded to add cache devices"));
 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
 	}
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
 		return (-1);
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
 		switch (errno) {
 		case EBUSY:
 			/*
 			 * This can happen if the user has specified the same
 			 * device multiple times.  We can't reliably detect this
 			 * until we try to add it and see we already have a
 			 * label.
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more vdevs refer to the same device"));
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 			break;
 
 		case EOVERFLOW:
 			/*
 			 * This occurrs when one of the devices is below
 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
 			 * device was the problem device since there's no
 			 * reliable way to determine device size from userland.
 			 */
 			{
 				char buf[64];
 
 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
 
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "device is less than the minimum "
 				    "size (%s)"), buf);
 			}
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 			break;
 
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded to add these vdevs"));
 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
 			break;
 
 		case EDOM:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "root pool can not have multiple vdevs"
 			    " or separate logs"));
 			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
 			break;
 
 		case ENOTBLK:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cache device must be a disk or disk slice"));
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 			break;
 
 		default:
 			(void) zpool_standard_error(hdl, errno, msg);
 		}
 
 		ret = -1;
 	} else {
 		ret = 0;
 	}
 
 	zcmd_free_nvlists(&zc);
 
 	return (ret);
 }
 
 /*
  * Exports the pool from the system.  The caller must ensure that there are no
  * mounted datasets in the pool.
  */
-int
-zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce)
+static int
+zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
+    const char *log_str)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 	    "cannot export '%s'"), zhp->zpool_name);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_cookie = force;
 	zc.zc_guid = hardforce;
+	zc.zc_history = (uint64_t)(uintptr_t)log_str;
 
 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
 		switch (errno) {
 		case EXDEV:
 			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
 			    "use '-f' to override the following errors:\n"
 			    "'%s' has an active shared spare which could be"
 			    " used by other pools once '%s' is exported."),
 			    zhp->zpool_name, zhp->zpool_name);
 			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
 			    msg));
 		default:
 			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
 			    msg));
 		}
 	}
 
 	return (0);
 }
 
 int
-zpool_export(zpool_handle_t *zhp, boolean_t force)
+zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
 {
-	return (zpool_export_common(zhp, force, B_FALSE));
+	return (zpool_export_common(zhp, force, B_FALSE, log_str));
 }
 
 int
-zpool_export_force(zpool_handle_t *zhp)
+zpool_export_force(zpool_handle_t *zhp, const char *log_str)
 {
-	return (zpool_export_common(zhp, B_TRUE, B_TRUE));
+	return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
 }
 
 static void
 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
     nvlist_t *config)
 {
 	nvlist_t *nv = NULL;
 	uint64_t rewindto;
 	int64_t loss = -1;
 	struct tm t;
 	char timestr[128];
 
 	if (!hdl->libzfs_printerr || config == NULL)
 		return;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
 	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
 		return;
 	}
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
 		return;
 	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
 
 	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
 	    strftime(timestr, 128, "%c", &t) != 0) {
 		if (dryrun) {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "Would be able to return %s "
 			    "to its state as of %s.\n"),
 			    name, timestr);
 		} else {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "Pool %s returned to its state as of %s.\n"),
 			    name, timestr);
 		}
 		if (loss > 120) {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "%s approximately %lld "),
 			    dryrun ? "Would discard" : "Discarded",
 			    ((longlong_t)loss + 30) / 60);
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "minutes of transactions.\n"));
 		} else if (loss > 0) {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "%s approximately %lld "),
 			    dryrun ? "Would discard" : "Discarded",
 			    (longlong_t)loss);
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "seconds of transactions.\n"));
 		}
 	}
 }
 
 void
 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
     nvlist_t *config)
 {
 	nvlist_t *nv = NULL;
 	int64_t loss = -1;
 	uint64_t edata = UINT64_MAX;
 	uint64_t rewindto;
 	struct tm t;
 	char timestr[128];
 
 	if (!hdl->libzfs_printerr)
 		return;
 
 	if (reason >= 0)
 		(void) printf(dgettext(TEXT_DOMAIN, "action: "));
 	else
 		(void) printf(dgettext(TEXT_DOMAIN, "\t"));
 
 	/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
 	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
 		goto no_info;
 
 	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
 	    &edata);
 
 	(void) printf(dgettext(TEXT_DOMAIN,
 	    "Recovery is possible, but will result in some data loss.\n"));
 
 	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
 	    strftime(timestr, 128, "%c", &t) != 0) {
 		(void) printf(dgettext(TEXT_DOMAIN,
 		    "\tReturning the pool to its state as of %s\n"
 		    "\tshould correct the problem.  "),
 		    timestr);
 	} else {
 		(void) printf(dgettext(TEXT_DOMAIN,
 		    "\tReverting the pool to an earlier state "
 		    "should correct the problem.\n\t"));
 	}
 
 	if (loss > 120) {
 		(void) printf(dgettext(TEXT_DOMAIN,
 		    "Approximately %lld minutes of data\n"
 		    "\tmust be discarded, irreversibly.  "),
 		    ((longlong_t)loss + 30) / 60);
 	} else if (loss > 0) {
 		(void) printf(dgettext(TEXT_DOMAIN,
 		    "Approximately %lld seconds of data\n"
 		    "\tmust be discarded, irreversibly.  "),
 		    (longlong_t)loss);
 	}
 	if (edata != 0 && edata != UINT64_MAX) {
 		if (edata == 1) {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "After rewind, at least\n"
 			    "\tone persistent user-data error will remain.  "));
 		} else {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "After rewind, several\n"
 			    "\tpersistent user-data errors will remain.  "));
 		}
 	}
 	(void) printf(dgettext(TEXT_DOMAIN,
 	    "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
 	    reason >= 0 ? "clear" : "import", name);
 
 	(void) printf(dgettext(TEXT_DOMAIN,
 	    "A scrub of the pool\n"
 	    "\tis strongly recommended after recovery.\n"));
 	return;
 
 no_info:
 	(void) printf(dgettext(TEXT_DOMAIN,
 	    "Destroy and re-create the pool from\n\ta backup source.\n"));
 }
 
 /*
  * zpool_import() is a contracted interface. Should be kept the same
  * if possible.
  *
  * Applications should use zpool_import_props() to import a pool with
  * new properties value to be set.
  */
 int
 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
     char *altroot)
 {
 	nvlist_t *props = NULL;
 	int ret;
 
 	if (altroot != NULL) {
 		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
 			    newname));
 		}
 
 		if (nvlist_add_string(props,
 		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
 		    nvlist_add_string(props,
 		    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
 			nvlist_free(props);
 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
 			    newname));
 		}
 	}
 
 	ret = zpool_import_props(hdl, config, newname, props,
 	    ZFS_IMPORT_NORMAL);
 	if (props)
 		nvlist_free(props);
 	return (ret);
 }
 
 static void
 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
     int indent)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	char *vname;
 	uint64_t is_log = 0;
 
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
 	    &is_log);
 
 	if (name != NULL)
 		(void) printf("\t%*s%s%s\n", indent, "", name,
 		    is_log ? " [log]" : "");
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		return;
 
 	for (c = 0; c < children; c++) {
 		vname = zpool_vdev_name(hdl, NULL, child[c], B_TRUE);
 		print_vdev_tree(hdl, vname, child[c], indent + 2);
 		free(vname);
 	}
 }
 
 void
 zpool_print_unsup_feat(nvlist_t *config)
 {
 	nvlist_t *nvinfo, *unsup_feat;
 	nvpair_t *nvp;
 
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
 	    0);
 	verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
 	    &unsup_feat) == 0);
 
 	for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
 	    nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
 		char *desc;
 
 		verify(nvpair_type(nvp) == DATA_TYPE_STRING);
 		verify(nvpair_value_string(nvp, &desc) == 0);
 
 		if (strlen(desc) > 0)
 			(void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
 		else
 			(void) printf("\t%s\n", nvpair_name(nvp));
 	}
 }
 
 /*
  * Import the given pool using the known configuration and a list of
  * properties to be set. The configuration should have come from
  * zpool_find_import(). The 'newname' parameters control whether the pool
  * is imported with a different name.
  */
 int
 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
     nvlist_t *props, int flags)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	zpool_rewind_policy_t policy;
 	nvlist_t *nv = NULL;
 	nvlist_t *nvinfo = NULL;
 	nvlist_t *missing = NULL;
 	char *thename;
 	char *origname;
 	int ret;
 	int error = 0;
 	char errbuf[1024];
 
 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &origname) == 0);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot import pool '%s'"), origname);
 
 	if (newname != NULL) {
 		if (!zpool_name_valid(hdl, B_FALSE, newname))
 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
 			    newname));
 		thename = (char *)newname;
 	} else {
 		thename = origname;
 	}
 
 	if (props) {
 		uint64_t version;
 		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
 
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 		    &version) == 0);
 
 		if ((props = zpool_valid_proplist(hdl, origname,
 		    props, version, flags, errbuf)) == NULL) {
 			return (-1);
 		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
 			nvlist_free(props);
 			return (-1);
 		}
 	}
 
 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
 
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    &zc.zc_guid) == 0);
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
 		nvlist_free(props);
 		return (-1);
 	}
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
 		nvlist_free(props);
 		return (-1);
 	}
 
 	zc.zc_cookie = flags;
 	while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
 	    errno == ENOMEM) {
 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 	if (ret != 0)
 		error = errno;
 
 	(void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
 	zpool_get_rewind_policy(config, &policy);
 
 	if (error) {
 		char desc[1024];
 
 		/*
 		 * Dry-run failed, but we print out what success
 		 * looks like if we found a best txg
 		 */
 		if (policy.zrp_request & ZPOOL_TRY_REWIND) {
 			zpool_rewind_exclaim(hdl, newname ? origname : thename,
 			    B_TRUE, nv);
 			nvlist_free(nv);
 			return (-1);
 		}
 
 		if (newname == NULL)
 			(void) snprintf(desc, sizeof (desc),
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
 			    thename);
 		else
 			(void) snprintf(desc, sizeof (desc),
 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
 			    origname, thename);
 
 		switch (error) {
 		case ENOTSUP:
 			if (nv != NULL && nvlist_lookup_nvlist(nv,
 			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
 			    nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
 				(void) printf(dgettext(TEXT_DOMAIN, "This "
 				    "pool uses the following feature(s) not "
 				    "supported by this system:\n"));
 				zpool_print_unsup_feat(nv);
 				if (nvlist_exists(nvinfo,
 				    ZPOOL_CONFIG_CAN_RDONLY)) {
 					(void) printf(dgettext(TEXT_DOMAIN,
 					    "All unsupported features are only "
 					    "required for writing to the pool."
 					    "\nThe pool can be imported using "
 					    "'-o readonly=on'.\n"));
 				}
 			}
 			/*
 			 * Unsupported version.
 			 */
 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
 			break;
 
 		case EINVAL:
 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
 			break;
 
 		case EROFS:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices is read only"));
 			(void) zfs_error(hdl, EZFS_BADDEV, desc);
 			break;
 
 		case ENXIO:
 			if (nv && nvlist_lookup_nvlist(nv,
 			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
 			    nvlist_lookup_nvlist(nvinfo,
 			    ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
 				(void) printf(dgettext(TEXT_DOMAIN,
 				    "The devices below are missing, use "
 				    "'-m' to import the pool anyway:\n"));
 				print_vdev_tree(hdl, NULL, missing, 2);
 				(void) printf("\n");
 			}
 			(void) zpool_standard_error(hdl, error, desc);
 			break;
 
 		case EEXIST:
 			(void) zpool_standard_error(hdl, error, desc);
 			break;
 
 		case EBUSY:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices are already in use\n"));
 			(void) zfs_error(hdl, EZFS_BADDEV, desc);
 			break;
 
 		default:
 			(void) zpool_standard_error(hdl, error, desc);
 			zpool_explain_recover(hdl,
 			    newname ? origname : thename, -error, nv);
 			break;
 		}
 
 		nvlist_free(nv);
 		ret = -1;
 	} else {
 		zpool_handle_t *zhp;
 
 		/*
 		 * This should never fail, but play it safe anyway.
 		 */
 		if (zpool_open_silent(hdl, thename, &zhp) != 0)
 			ret = -1;
 		else if (zhp != NULL)
 			zpool_close(zhp);
 		if (policy.zrp_request &
 		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
 			zpool_rewind_exclaim(hdl, newname ? origname : thename,
 			    ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0), nv);
 		}
 		nvlist_free(nv);
 		return (0);
 	}
 
 	zcmd_free_nvlists(&zc);
 	nvlist_free(props);
 
 	return (ret);
 }
 
 /*
  * Scan the pool.
  */
 int
 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_cookie = func;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0 ||
 	    (errno == ENOENT && func != POOL_SCAN_NONE))
 		return (0);
 
 	if (func == POOL_SCAN_SCRUB) {
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
 	} else if (func == POOL_SCAN_NONE) {
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
 		    zc.zc_name);
 	} else {
 		assert(!"unexpected result");
 	}
 
 	if (errno == EBUSY) {
 		nvlist_t *nvroot;
 		pool_scan_stat_t *ps = NULL;
 		uint_t psc;
 
 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 		(void) nvlist_lookup_uint64_array(nvroot,
 		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
 		if (ps && ps->pss_func == POOL_SCAN_SCRUB)
 			return (zfs_error(hdl, EZFS_SCRUBBING, msg));
 		else
 			return (zfs_error(hdl, EZFS_RESILVERING, msg));
 	} else if (errno == ENOENT) {
 		return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
 	} else {
 		return (zpool_standard_error(hdl, errno, msg));
 	}
 }
 
 /*
  * Find a vdev that matches the search criteria specified. We use the
  * the nvpair name to determine how we should look for the device.
  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
  * spare; but FALSE if its an INUSE spare.
  */
 static nvlist_t *
 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
 {
 	uint_t c, children;
 	nvlist_t **child;
 	nvlist_t *ret;
 	uint64_t is_log;
 	char *srchkey;
 	nvpair_t *pair = nvlist_next_nvpair(search, NULL);
 
 	/* Nothing to look for */
 	if (search == NULL || pair == NULL)
 		return (NULL);
 
 	/* Obtain the key we will use to search */
 	srchkey = nvpair_name(pair);
 
 	switch (nvpair_type(pair)) {
 	case DATA_TYPE_UINT64:
 		if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
 			uint64_t srchval, theguid;
 
 			verify(nvpair_value_uint64(pair, &srchval) == 0);
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 			    &theguid) == 0);
 			if (theguid == srchval)
 				return (nv);
 		}
 		break;
 
 	case DATA_TYPE_STRING: {
 		char *srchval, *val;
 
 		verify(nvpair_value_string(pair, &srchval) == 0);
 		if (nvlist_lookup_string(nv, srchkey, &val) != 0)
 			break;
 
 		/*
 		 * Search for the requested value. Special cases:
 		 *
 		 * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
 		 *   "-part1", or "p1".  The suffix is hidden from the user,
 		 *   but included in the string, so this matches around it.
 		 * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
 		 *   is used to check all possible expanded paths.
 		 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
 		 *
 		 * Otherwise, all other searches are simple string compares.
 		 */
 		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
 			uint64_t wholedisk = 0;
 
 			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 			    &wholedisk);
 			if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
 				return (nv);
 
 		} else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
 			char *type, *idx, *end, *p;
 			uint64_t id, vdev_id;
 
 			/*
 			 * Determine our vdev type, keeping in mind
 			 * that the srchval is composed of a type and
 			 * vdev id pair (i.e. mirror-4).
 			 */
 			if ((type = strdup(srchval)) == NULL)
 				return (NULL);
 
 			if ((p = strrchr(type, '-')) == NULL) {
 				free(type);
 				break;
 			}
 			idx = p + 1;
 			*p = '\0';
 
 			/*
 			 * If the types don't match then keep looking.
 			 */
 			if (strncmp(val, type, strlen(val)) != 0) {
 				free(type);
 				break;
 			}
 
 			verify(strncmp(type, VDEV_TYPE_RAIDZ,
 			    strlen(VDEV_TYPE_RAIDZ)) == 0 ||
 			    strncmp(type, VDEV_TYPE_MIRROR,
 			    strlen(VDEV_TYPE_MIRROR)) == 0);
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
 			    &id) == 0);
 
 			errno = 0;
 			vdev_id = strtoull(idx, &end, 10);
 
 			free(type);
 			if (errno != 0)
 				return (NULL);
 
 			/*
 			 * Now verify that we have the correct vdev id.
 			 */
 			if (vdev_id == id)
 				return (nv);
 		}
 
 		/*
 		 * Common case
 		 */
 		if (strcmp(srchval, val) == 0)
 			return (nv);
 		break;
 	}
 
 	default:
 		break;
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		return (NULL);
 
 	for (c = 0; c < children; c++) {
 		if ((ret = vdev_to_nvlist_iter(child[c], search,
 		    avail_spare, l2cache, NULL)) != NULL) {
 			/*
 			 * The 'is_log' value is only set for the toplevel
 			 * vdev, not the leaf vdevs.  So we always lookup the
 			 * log device from the root of the vdev tree (where
 			 * 'log' is non-NULL).
 			 */
 			if (log != NULL &&
 			    nvlist_lookup_uint64(child[c],
 			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
 			    is_log) {
 				*log = B_TRUE;
 			}
 			return (ret);
 		}
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
 			if ((ret = vdev_to_nvlist_iter(child[c], search,
 			    avail_spare, l2cache, NULL)) != NULL) {
 				*avail_spare = B_TRUE;
 				return (ret);
 			}
 		}
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
 			if ((ret = vdev_to_nvlist_iter(child[c], search,
 			    avail_spare, l2cache, NULL)) != NULL) {
 				*l2cache = B_TRUE;
 				return (ret);
 			}
 		}
 	}
 
 	return (NULL);
 }
 
 /*
  * Given a physical path (minus the "/devices" prefix), find the
  * associated vdev.
  */
 nvlist_t *
 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
 {
 	nvlist_t *search, *nvroot, *ret;
 
 	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH, ppath) == 0);
 
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
 	*avail_spare = B_FALSE;
 	*l2cache = B_FALSE;
 	if (log != NULL)
 		*log = B_FALSE;
 	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
 	nvlist_free(search);
 
 	return (ret);
 }
 
 /*
  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
  */
 boolean_t
 zpool_vdev_is_interior(const char *name)
 {
 	if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
 	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
 		return (B_TRUE);
 	return (B_FALSE);
 }
 
 nvlist_t *
 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
 {
 	char *end;
 	nvlist_t *nvroot, *search, *ret;
 	uint64_t guid;
 
 	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	guid = strtoull(path, &end, 10);
 	if (guid != 0 && *end == '\0') {
 		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
 	} else if (zpool_vdev_is_interior(path)) {
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
 	} else {
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
 	}
 
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
 	*avail_spare = B_FALSE;
 	*l2cache = B_FALSE;
 	if (log != NULL)
 		*log = B_FALSE;
 	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
 	nvlist_free(search);
 
 	return (ret);
 }
 
 static int
 vdev_online(nvlist_t *nv)
 {
 	uint64_t ival;
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
 		return (0);
 
 	return (1);
 }
 
 /*
  * Helper function for zpool_get_physpaths().
  */
 static int
 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
     size_t *bytes_written)
 {
 	size_t bytes_left, pos, rsz;
 	char *tmppath;
 	const char *format;
 
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
 	    &tmppath) != 0)
 		return (EZFS_NODEVICE);
 
 	pos = *bytes_written;
 	bytes_left = physpath_size - pos;
 	format = (pos == 0) ? "%s" : " %s";
 
 	rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
 	*bytes_written += rsz;
 
 	if (rsz >= bytes_left) {
 		/* if physpath was not copied properly, clear it */
 		if (bytes_left != 0) {
 			physpath[pos] = 0;
 		}
 		return (EZFS_NOSPC);
 	}
 	return (0);
 }
 
 static int
 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
     size_t *rsz, boolean_t is_spare)
 {
 	char *type;
 	int ret;
 
 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
 		return (EZFS_INVALCONFIG);
 
 	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
 		/*
 		 * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
 		 * For a spare vdev, we only want to boot from the active
 		 * spare device.
 		 */
 		if (is_spare) {
 			uint64_t spare = 0;
 			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
 			    &spare);
 			if (!spare)
 				return (EZFS_INVALCONFIG);
 		}
 
 		if (vdev_online(nv)) {
 			if ((ret = vdev_get_one_physpath(nv, physpath,
 			    phypath_size, rsz)) != 0)
 				return (ret);
 		}
 	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
 	    strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
 	    (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
 		nvlist_t **child;
 		uint_t count;
 		int i, ret;
 
 		if (nvlist_lookup_nvlist_array(nv,
 		    ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
 			return (EZFS_INVALCONFIG);
 
 		for (i = 0; i < count; i++) {
 			ret = vdev_get_physpaths(child[i], physpath,
 			    phypath_size, rsz, is_spare);
 			if (ret == EZFS_NOSPC)
 				return (ret);
 		}
 	}
 
 	return (EZFS_POOL_INVALARG);
 }
 
 /*
  * Get phys_path for a root pool config.
  * Return 0 on success; non-zero on failure.
  */
 static int
 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
 {
 	size_t rsz;
 	nvlist_t *vdev_root;
 	nvlist_t **child;
 	uint_t count;
 	char *type;
 
 	rsz = 0;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &vdev_root) != 0)
 		return (EZFS_INVALCONFIG);
 
 	if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
 	    nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
 	    &child, &count) != 0)
 		return (EZFS_INVALCONFIG);
 
 #if defined(__sun__) || defined(__sun)
 	/*
 	 * root pool can not have EFI labeled disks and can only have
 	 * a single top-level vdev.
 	 */
 	if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1 ||
 	    pool_uses_efi(vdev_root))
 		return (EZFS_POOL_INVALARG);
 #endif
 
 	(void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
 	    B_FALSE);
 
 	/* No online devices */
 	if (rsz == 0)
 		return (EZFS_NODEVICE);
 
 	return (0);
 }
 
 /*
  * Get phys_path for a root pool
  * Return 0 on success; non-zero on failure.
  */
 int
 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
 {
 	return (zpool_get_config_physpath(zhp->zpool_config, physpath,
 	    phypath_size));
 }
 
 /*
  * If the device has being dynamically expanded then we need to relabel
  * the disk to use the new unallocated space.
  */
 static int
 zpool_relabel_disk(libzfs_handle_t *hdl, const char *path, const char *msg)
 {
 	int fd, error;
 
 	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
 		    "relabel '%s': unable to open device: %d"), path, errno);
 		return (zfs_error(hdl, EZFS_OPENFAILED, msg));
 	}
 
 	/*
 	 * It's possible that we might encounter an error if the device
 	 * does not have any unallocated space left. If so, we simply
 	 * ignore that error and continue on.
 	 *
 	 * Also, we don't call efi_rescan() - that would just return EBUSY.
 	 * The module will do it for us in vdev_disk_open().
 	 */
 	error = efi_use_whole_disk(fd);
 	(void) close(fd);
 	if (error && error != VT_ENOSPC) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
 		    "relabel '%s': unable to read disk capacity"), path);
 		return (zfs_error(hdl, EZFS_NOCAP, msg));
 	}
 	return (0);
 }
 
 /*
  * Bring the specified vdev online.   The 'flags' parameter is a set of the
  * ZFS_ONLINE_* flags.
  */
 int
 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
     vdev_state_t *newstate)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	int error;
 
 	if (flags & ZFS_ONLINE_EXPAND) {
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
 	} else {
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
 	}
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	if (flags & ZFS_ONLINE_EXPAND ||
 	    zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
 		uint64_t wholedisk = 0;
 
 		(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
 		    &wholedisk);
 
 		/*
 		 * XXX - L2ARC 1.0 devices can't support expansion.
 		 */
 		if (l2cache) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cannot expand cache devices"));
 			return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
 		}
 
 		if (wholedisk) {
 			const char *fullpath = path;
 			char buf[MAXPATHLEN];
 
 			if (path[0] != '/') {
 				error = zfs_resolve_shortname(path, buf,
 				    sizeof(buf));
 				if (error != 0)
 					return (zfs_error(hdl, EZFS_NODEVICE,
 					    msg));
 
 				fullpath = buf;
 			}
 
 			error = zpool_relabel_disk(hdl, fullpath, msg);
 			if (error != 0)
 				return (error);
 		}
 	}
 
 	zc.zc_cookie = VDEV_STATE_ONLINE;
 	zc.zc_obj = flags;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
 		if (errno == EINVAL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
 			    "from this pool into a new one.  Use '%s' "
 			    "instead"), "zpool detach");
 			return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
 		}
 		return (zpool_standard_error(hdl, errno, msg));
 	}
 
 	*newstate = zc.zc_cookie;
 	return (0);
 }
 
 /*
  * Take the specified vdev offline
  */
 int
 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    NULL)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	zc.zc_cookie = VDEV_STATE_OFFLINE;
 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	switch (errno) {
 	case EBUSY:
 
 		/*
 		 * There are no other replicas of this device.
 		 */
 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
 
 	case EEXIST:
 		/*
 		 * The log device has unplayed logs
 		 */
 		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
 
 	default:
 		return (zpool_standard_error(hdl, errno, msg));
 	}
 }
 
 /*
  * Mark the given vdev faulted.
  */
 int
 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
            dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
 	zc.zc_cookie = VDEV_STATE_FAULTED;
 	zc.zc_obj = aux;
 
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	switch (errno) {
 	case EBUSY:
 
 		/*
 		 * There are no other replicas of this device.
 		 */
 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
 
 	default:
 		return (zpool_standard_error(hdl, errno, msg));
 	}
 
 }
 
 /*
  * Mark the given vdev degraded.
  */
 int
 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
            dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
 	zc.zc_cookie = VDEV_STATE_DEGRADED;
 	zc.zc_obj = aux;
 
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
  * a hot spare.
  */
 static boolean_t
 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	char *type;
 
 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) == 0) {
 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
 		    &type) == 0);
 
 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
 		    children == 2 && child[which] == tgt)
 			return (B_TRUE);
 
 		for (c = 0; c < children; c++)
 			if (is_replacing_spare(child[c], tgt, which))
 				return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 /*
  * Attach new_disk (fully described by nvroot) to old_disk.
  * If 'replacing' is specified, the new disk will replace the old one.
  */
 int
 zpool_vdev_attach(zpool_handle_t *zhp,
     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	int ret;
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	uint64_t val;
 	char *newname;
 	nvlist_t **child;
 	uint_t children;
 	nvlist_t *config_root;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	boolean_t rootpool = zpool_is_bootable(zhp);
 
 	if (replacing)
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot replace %s with %s"), old_disk, new_disk);
 	else
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot attach %s to %s"), new_disk, old_disk);
 
 #if defined(__sun__) || defined(__sun)
 	/*
 	 * If this is a root pool, make sure that we're not attaching an
 	 * EFI labeled device.
 	 */
 	if (rootpool && pool_uses_efi(nvroot)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "EFI labeled devices are not supported on root pools."));
 		return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
 	}
 #endif
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
 	    &islog)) == 0)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	if (l2cache)
 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 	zc.zc_cookie = replacing;
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0 || children != 1) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "new device must be a single disk"));
 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
 	}
 
 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
 
 	if ((newname = zpool_vdev_name(NULL, NULL, child[0], B_FALSE)) == NULL)
 		return (-1);
 
 	/*
 	 * If the target is a hot spare that has been swapped in, we can only
 	 * replace it with another hot spare.
 	 */
 	if (replacing &&
 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
 	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
 	    NULL) == NULL || !avail_spare) &&
 	    is_replacing_spare(config_root, tgt, 1)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "can only be replaced by another hot spare"));
 		free(newname);
 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
 	}
 
 	free(newname);
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
 		return (-1);
 
 	ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
 
 	zcmd_free_nvlists(&zc);
 
 	if (ret == 0) {
 		if (rootpool) {
 			/*
 			 * XXX need a better way to prevent user from
 			 * booting up a half-baked vdev.
 			 */
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Make "
 			    "sure to wait until resilver is done "
 			    "before rebooting.\n"));
 		}
 		return (0);
 	}
 
 	switch (errno) {
 	case ENOTSUP:
 		/*
 		 * Can't attach to or replace this type of vdev.
 		 */
 		if (replacing) {
 			uint64_t version = zpool_get_prop_int(zhp,
 			    ZPOOL_PROP_VERSION, NULL);
 
 			if (islog)
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "cannot replace a log with a spare"));
 			else if (version >= SPA_VERSION_MULTI_REPLACE)
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "already in replacing/spare config; wait "
 				    "for completion or use 'zpool detach'"));
 			else
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "cannot replace a replacing device"));
 		} else {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "can only attach to mirrors and top-level "
 			    "disks"));
 		}
 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
 		break;
 
 	case EINVAL:
 		/*
 		 * The new device must be a single disk.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "new device must be a single disk"));
 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
 		break;
 
 	case EBUSY:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
 		    new_disk);
 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case EOVERFLOW:
 		/*
 		 * The new device is too small.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "device is too small"));
 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case EDOM:
 		/*
 		 * The new device has a different alignment requirement.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "devices have different sector alignment"));
 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case ENAMETOOLONG:
 		/*
 		 * The resulting top-level vdev spec won't fit in the label.
 		 */
 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
 		break;
 
 	default:
 		(void) zpool_standard_error(hdl, errno, msg);
 	}
 
 	return (-1);
 }
 
 /*
  * Detach the specified device.
  */
 int
 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    NULL)) == 0)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	if (l2cache)
 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
 		return (0);
 
 	switch (errno) {
 
 	case ENOTSUP:
 		/*
 		 * Can't detach from this type of vdev.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
 		    "applicable to mirror and replacing vdevs"));
 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
 		break;
 
 	case EBUSY:
 		/*
 		 * There are no other replicas of this device.
 		 */
 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
 		break;
 
 	default:
 		(void) zpool_standard_error(hdl, errno, msg);
 	}
 
 	return (-1);
 }
 
 /*
  * Find a mirror vdev in the source nvlist.
  *
  * The mchild array contains a list of disks in one of the top-level mirrors
  * of the source pool.  The schild array contains a list of disks that the
  * user specified on the command line.  We loop over the mchild array to
  * see if any entry in the schild array matches.
  *
  * If a disk in the mchild array is found in the schild array, we return
  * the index of that entry.  Otherwise we return -1.
  */
 static int
 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
     nvlist_t **schild, uint_t schildren)
 {
 	uint_t mc;
 
 	for (mc = 0; mc < mchildren; mc++) {
 		uint_t sc;
 		char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
 		    mchild[mc], B_FALSE);
 
 		for (sc = 0; sc < schildren; sc++) {
 			char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
 			    schild[sc], B_FALSE);
 			boolean_t result = (strcmp(mpath, spath) == 0);
 
 			free(spath);
 			if (result) {
 				free(mpath);
 				return (mc);
 			}
 		}
 
 		free(mpath);
 	}
 
 	return (-1);
 }
 
 /*
  * Split a mirror pool.  If newroot points to null, then a new nvlist
  * is generated and it is the responsibility of the caller to free it.
  */
 int
 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
     nvlist_t *props, splitflags_t flags)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
 	nvlist_t **varray = NULL, *zc_props = NULL;
 	uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	uint64_t vers;
 	boolean_t freelist = B_FALSE, memory_err = B_TRUE;
 	int retval = 0;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
 
 	if (!zpool_name_valid(hdl, B_FALSE, newname))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
 
 	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
 		(void) fprintf(stderr, gettext("Internal error: unable to "
 		    "retrieve pool configuration\n"));
 		return (-1);
 	}
 
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
 	    == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
 
 	if (props) {
 		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
 		if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
 		    props, vers, flags, msg)) == NULL)
 			return (-1);
 	}
 
 	if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "Source pool is missing vdev tree"));
 		if (zc_props)
 			nvlist_free(zc_props);
 		return (-1);
 	}
 
 	varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
 	vcount = 0;
 
 	if (*newroot == NULL ||
 	    nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
 	    &newchild, &newchildren) != 0)
 		newchildren = 0;
 
 	for (c = 0; c < children; c++) {
 		uint64_t is_log = B_FALSE, is_hole = B_FALSE;
 		char *type;
 		nvlist_t **mchild, *vdev;
 		uint_t mchildren;
 		int entry;
 
 		/*
 		 * Unlike cache & spares, slogs are stored in the
 		 * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
 		 */
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &is_log);
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
 		    &is_hole);
 		if (is_log || is_hole) {
 			/*
 			 * Create a hole vdev and put it in the config.
 			 */
 			if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
 				goto out;
 			if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
 			    VDEV_TYPE_HOLE) != 0)
 				goto out;
 			if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
 			    1) != 0)
 				goto out;
 			if (lastlog == 0)
 				lastlog = vcount;
 			varray[vcount++] = vdev;
 			continue;
 		}
 		lastlog = 0;
 		verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
 		    == 0);
 		if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Source pool must be composed only of mirrors\n"));
 			retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
 			goto out;
 		}
 
 		verify(nvlist_lookup_nvlist_array(child[c],
 		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
 
 		/* find or add an entry for this top-level vdev */
 		if (newchildren > 0 &&
 		    (entry = find_vdev_entry(zhp, mchild, mchildren,
 		    newchild, newchildren)) >= 0) {
 			/* We found a disk that the user specified. */
 			vdev = mchild[entry];
 			++found;
 		} else {
 			/* User didn't specify a disk for this vdev. */
 			vdev = mchild[mchildren - 1];
 		}
 
 		if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
 			goto out;
 	}
 
 	/* did we find every disk the user specified? */
 	if (found != newchildren) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
 		    "include at most one disk from each mirror"));
 		retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
 		goto out;
 	}
 
 	/* Prepare the nvlist for populating. */
 	if (*newroot == NULL) {
 		if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
 			goto out;
 		freelist = B_TRUE;
 		if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
 		    VDEV_TYPE_ROOT) != 0)
 			goto out;
 	} else {
 		verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
 	}
 
 	/* Add all the children we found */
 	if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
 	    lastlog == 0 ? vcount : lastlog) != 0)
 		goto out;
 
 	/*
 	 * If we're just doing a dry run, exit now with success.
 	 */
 	if (flags.dryrun) {
 		memory_err = B_FALSE;
 		freelist = B_FALSE;
 		goto out;
 	}
 
 	/* now build up the config list & call the ioctl */
 	if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
 		goto out;
 
 	if (nvlist_add_nvlist(newconfig,
 	    ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
 	    nvlist_add_string(newconfig,
 	    ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
 	    nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
 		goto out;
 
 	/*
 	 * The new pool is automatically part of the namespace unless we
 	 * explicitly export it.
 	 */
 	if (!flags.import)
 		zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
 	if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
 		goto out;
 	if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
 		goto out;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
 		retval = zpool_standard_error(hdl, errno, msg);
 		goto out;
 	}
 
 	freelist = B_FALSE;
 	memory_err = B_FALSE;
 
 out:
 	if (varray != NULL) {
 		int v;
 
 		for (v = 0; v < vcount; v++)
 			nvlist_free(varray[v]);
 		free(varray);
 	}
 	zcmd_free_nvlists(&zc);
 	if (zc_props)
 		nvlist_free(zc_props);
 	if (newconfig)
 		nvlist_free(newconfig);
 	if (freelist) {
 		nvlist_free(*newroot);
 		*newroot = NULL;
 	}
 
 	if (retval != 0)
 		return (retval);
 
 	if (memory_err)
 		return (no_memory(hdl));
 
 	return (0);
 }
 
 /*
  * Remove the given device.  Currently, this is supported only for hot spares
  * and level 2 cache devices.
  */
 int
 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	uint64_t version;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == 0)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 	/*
 	 * XXX - this should just go away.
 	 */
 	if (!avail_spare && !l2cache && !islog) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "only inactive hot spares, cache, top-level, "
 		    "or log devices can be removed"));
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 	}
 
 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
 	if (islog && version < SPA_VERSION_HOLES) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "pool must be upgrade to support log removal"));
 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
 	}
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Clear the errors for the pool, or the particular device if specified.
  */
 int
 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	zpool_rewind_policy_t policy;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	nvlist_t *nvi = NULL;
 	int error;
 
 	if (path)
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
 		    path);
 	else
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
 		    zhp->zpool_name);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if (path) {
 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
 		    &l2cache, NULL)) == 0)
 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 		/*
 		 * Don't allow error clearing for hot spares.  Do allow
 		 * error clearing for l2cache devices.
 		 */
 		if (avail_spare)
 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
 		    &zc.zc_guid) == 0);
 	}
 
 	zpool_get_rewind_policy(rewindnvl, &policy);
 	zc.zc_cookie = policy.zrp_request;
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
 		return (-1);
 
 	if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
 		return (-1);
 
 	while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
 	    errno == ENOMEM) {
 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 
 	if (!error || ((policy.zrp_request & ZPOOL_TRY_REWIND) &&
 	    errno != EPERM && errno != EACCES)) {
 		if (policy.zrp_request &
 		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
 			(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
 			zpool_rewind_exclaim(hdl, zc.zc_name,
 			    ((policy.zrp_request & ZPOOL_TRY_REWIND) != 0),
 			    nvi);
 			nvlist_free(nvi);
 		}
 		zcmd_free_nvlists(&zc);
 		return (0);
 	}
 
 	zcmd_free_nvlists(&zc);
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Similar to zpool_clear(), but takes a GUID (used by fmd).
  */
 int
 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
            (u_longlong_t)guid);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
 	zc.zc_cookie = ZPOOL_NO_REWIND;
 
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Change the GUID for a pool.
  */
 int
 zpool_reguid(zpool_handle_t *zhp)
 {
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Reopen the pool.
  */
 int
 zpool_reopen(zpool_handle_t *zhp)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot reopen '%s'"),
 	    zhp->zpool_name);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_REOPEN, &zc) == 0)
 		return (0);
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Convert from a devid string to a path.
  */
 static char *
 devid_to_path(char *devid_str)
 {
 	ddi_devid_t devid;
 	char *minor;
 	char *path;
 	devid_nmlist_t *list = NULL;
 	int ret;
 
 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
 		return (NULL);
 
 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
 
 	devid_str_free(minor);
 	devid_free(devid);
 
 	if (ret != 0)
 		return (NULL);
 
 	if ((path = strdup(list[0].devname)) == NULL)
 		return (NULL);
 
 	devid_free_nmlist(list);
 
 	return (path);
 }
 
 /*
  * Convert from a path to a devid string.
  */
 static char *
 path_to_devid(const char *path)
 {
 	int fd;
 	ddi_devid_t devid;
 	char *minor, *ret;
 
 	if ((fd = open(path, O_RDONLY)) < 0)
 		return (NULL);
 
 	minor = NULL;
 	ret = NULL;
 	if (devid_get(fd, &devid) == 0) {
 		if (devid_get_minor_name(fd, &minor) == 0)
 			ret = devid_str_encode(devid, minor);
 		if (minor != NULL)
 			devid_str_free(minor);
 		devid_free(devid);
 	}
 	(void) close(fd);
 
 	return (ret);
 }
 
 /*
  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
  * ignore any failure here, since a common case is for an unprivileged user to
  * type 'zpool status', and we'll display the correct information anyway.
  */
 static void
 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 	    &zc.zc_guid) == 0);
 
 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
 }
 
 /*
  * Remove partition suffix from a vdev path.  Partition suffixes may take three
  * forms: "-partX", "pX", or "X", where X is a string of digits.  The second
  * case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
  * third case only occurs when preceded by a string matching the regular
  * expression "^[hs]d[a-z]+", i.e. a scsi or ide disk.
  */
 static char *
 strip_partition(libzfs_handle_t *hdl, char *path)
 {
 	char *tmp = zfs_strdup(hdl, path);
 	char *part = NULL, *d = NULL;
 
 	if ((part = strstr(tmp, "-part")) && part != tmp) {
 		d = part + 5;
 	} else if ((part = strrchr(tmp, 'p')) &&
 	    part > tmp + 1 && isdigit(*(part-1))) {
 		d = part + 1;
 	} else if ((tmp[0] == 'h' || tmp[0] == 's') && tmp[1] == 'd') {
 		for (d = &tmp[2]; isalpha(*d); part = ++d);
 	}
 	if (part && d && *d != '\0') {
 		for (; isdigit(*d); d++);
 		if (*d == '\0')
 			*part = '\0';
 	}
 	return (tmp);
 }
 
 #define	PATH_BUF_LEN	64
 
 /*
  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
  * We also check if this is a whole disk, in which case we strip off the
  * trailing 's0' slice name.
  *
  * This routine is also responsible for identifying when disks have been
  * reconfigured in a new location.  The kernel will have opened the device by
  * devid, but the path will still refer to the old location.  To catch this, we
  * first do a path -> devid translation (which is fast for the common case).  If
  * the devid matches, we're done.  If not, we do a reverse devid -> path
  * translation and issue the appropriate ioctl() to update the path of the vdev.
  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
  * of these checks.
  */
 char *
 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
     boolean_t verbose)
 {
 	char *path, *devid, *type;
 	uint64_t value;
 	char buf[PATH_BUF_LEN];
 	char tmpbuf[PATH_BUF_LEN];
 	vdev_stat_t *vs;
 	uint_t vsc;
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
 	    &value) == 0) {
 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 		    &value) == 0);
 		(void) snprintf(buf, sizeof (buf), "%llu",
 		    (u_longlong_t)value);
 		path = buf;
 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
 		/*
 		 * If the device is dead (faulted, offline, etc) then don't
 		 * bother opening it.  Otherwise we may be forcing the user to
 		 * open a misbehaving device, which can have undesirable
 		 * effects.
 		 */
 		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
 		    (uint64_t **)&vs, &vsc) != 0 ||
 		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
 		    zhp != NULL &&
 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
 			/*
 			 * Determine if the current path is correct.
 			 */
 			char *newdevid = path_to_devid(path);
 
 			if (newdevid == NULL ||
 			    strcmp(devid, newdevid) != 0) {
 				char *newpath;
 
 				if ((newpath = devid_to_path(devid)) != NULL) {
 					/*
 					 * Update the path appropriately.
 					 */
 					set_path(zhp, nv, newpath);
 					if (nvlist_add_string(nv,
 					    ZPOOL_CONFIG_PATH, newpath) == 0)
 						verify(nvlist_lookup_string(nv,
 						    ZPOOL_CONFIG_PATH,
 						    &path) == 0);
 					free(newpath);
 				}
 			}
 
 			if (newdevid)
 				devid_str_free(newdevid);
 		}
 
 		/*
 		 * For a block device only use the name.
 		 */
 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 		if (strcmp(type, VDEV_TYPE_DISK) == 0) {
 			path = strrchr(path, '/');
 			path++;
 		}
 
 		/*
 		 * Remove the partition from the path it this is a whole disk.
 		 */
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 		    &value) == 0 && value) {
 			return strip_partition(hdl, path);
 		}
 	} else {
 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
 
 		/*
 		 * If it's a raidz device, we need to stick in the parity level.
 		 */
 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
 
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
 			    &value) == 0);
 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
 			    (u_longlong_t)value);
 			path = buf;
 		}
 
 		/*
 		 * We identify each top-level vdev by using a <type-id>
 		 * naming convention.
 		 */
 		if (verbose) {
 			uint64_t id;
 
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
 			    &id) == 0);
 			(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
 			    path, (u_longlong_t)id);
 			path = tmpbuf;
 		}
 	}
 
 	return (zfs_strdup(hdl, path));
 }
 
 static int
 zbookmark_compare(const void *a, const void *b)
 {
 	return (memcmp(a, b, sizeof (zbookmark_t)));
 }
 
 /*
  * Retrieve the persistent error log, uniquify the members, and return to the
  * caller.
  */
 int
 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	uint64_t count;
 	zbookmark_t *zb = NULL;
 	int i;
 
 	/*
 	 * Retrieve the raw error list from the kernel.  If the number of errors
 	 * has increased, allocate more space and continue until we get the
 	 * entire list.
 	 */
 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
 	    &count) == 0);
 	if (count == 0)
 		return (0);
 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
 		return (-1);
 	zc.zc_nvlist_dst_size = count;
 	(void) strcpy(zc.zc_name, zhp->zpool_name);
 	for (;;) {
 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
 		    &zc) != 0) {
 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
 			if (errno == ENOMEM) {
 				count = zc.zc_nvlist_dst_size;
 				if ((zc.zc_nvlist_dst = (uintptr_t)
 				    zfs_alloc(zhp->zpool_hdl, count *
 				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
 					return (-1);
 			} else {
 				return (-1);
 			}
 		} else {
 			break;
 		}
 	}
 
 	/*
 	 * Sort the resulting bookmarks.  This is a little confusing due to the
 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
 	 * _not_ copied as part of the process.  So we point the start of our
 	 * array appropriate and decrement the total number of elements.
 	 */
 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
 	    zc.zc_nvlist_dst_size;
 	count -= zc.zc_nvlist_dst_size;
 
 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
 
 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
 
 	/*
 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
 	 */
 	for (i = 0; i < count; i++) {
 		nvlist_t *nv;
 
 		/* ignoring zb_blkid and zb_level for now */
 		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
 		    zb[i-1].zb_object == zb[i].zb_object)
 			continue;
 
 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
 			goto nomem;
 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
 		    zb[i].zb_objset) != 0) {
 			nvlist_free(nv);
 			goto nomem;
 		}
 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
 		    zb[i].zb_object) != 0) {
 			nvlist_free(nv);
 			goto nomem;
 		}
 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
 			nvlist_free(nv);
 			goto nomem;
 		}
 		nvlist_free(nv);
 	}
 
 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
 	return (0);
 
 nomem:
 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
 	return (no_memory(zhp->zpool_hdl));
 }
 
 /*
  * Upgrade a ZFS pool to the latest on-disk version.
  */
 int
 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strcpy(zc.zc_name, zhp->zpool_name);
 	zc.zc_cookie = new_version;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
 		return (zpool_standard_error_fmt(hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
 		    zhp->zpool_name));
 	return (0);
 }
 
 void
-zpool_set_history_str(const char *subcommand, int argc, char **argv,
-    char *history_str)
+zfs_save_arguments(int argc, char **argv, char *string, int len)
 {
 	int i;
 
-	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
+	(void) strlcpy(string, basename(argv[0]), len);
 	for (i = 1; i < argc; i++) {
-		if (strlen(history_str) + 1 + strlen(argv[i]) >
-		    HIS_MAX_RECORD_LEN)
-			break;
-		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
-		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
+		(void) strlcat(string, " ", len);
+		(void) strlcat(string, argv[i], len);
 	}
 }
 
-/*
- * Stage command history for logging.
- */
 int
-zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
+zpool_log_history(libzfs_handle_t *hdl, const char *message)
 {
-	if (history_str == NULL)
-		return (EINVAL);
-
-	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
-		return (EINVAL);
-
-	if (hdl->libzfs_log_str != NULL)
-		free(hdl->libzfs_log_str);
-
-	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
-		return (no_memory(hdl));
-
-	return (0);
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
+	nvlist_t *args;
+	int err;
+
+	args = fnvlist_alloc();
+	fnvlist_add_string(args, "message", message);
+	err = zcmd_write_src_nvlist(hdl, &zc, args);
+	if (err == 0)
+		err = ioctl(hdl->libzfs_fd, ZFS_IOC_LOG_HISTORY, &zc);
+	nvlist_free(args);
+	zcmd_free_nvlists(&zc);
+	return (err);
 }
 
 /*
  * Perform ioctl to get some command history of a pool.
  *
  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
  * logical offset of the history buffer to start reading from.
  *
  * Upon return, 'off' is the next logical offset to read from and
  * 'len' is the actual amount of bytes read into 'buf'.
  */
 static int
 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	zc.zc_history = (uint64_t)(uintptr_t)buf;
 	zc.zc_history_len = *len;
 	zc.zc_history_offset = *off;
 
 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
 		switch (errno) {
 		case EPERM:
 			return (zfs_error_fmt(hdl, EZFS_PERM,
 			    dgettext(TEXT_DOMAIN,
 			    "cannot show history for pool '%s'"),
 			    zhp->zpool_name));
 		case ENOENT:
 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
 			    "'%s'"), zhp->zpool_name));
 		case ENOTSUP:
 			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
 			    "'%s', pool must be upgraded"), zhp->zpool_name));
 		default:
 			return (zpool_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN,
 			    "cannot get history for '%s'"), zhp->zpool_name));
 		}
 	}
 
 	*len = zc.zc_history_len;
 	*off = zc.zc_history_offset;
 
 	return (0);
 }
 
 /*
  * Process the buffer of nvlists, unpacking and storing each nvlist record
  * into 'records'.  'leftover' is set to the number of bytes that weren't
  * processed as there wasn't a complete record.
  */
 int
 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
     nvlist_t ***records, uint_t *numrecords)
 {
 	uint64_t reclen;
 	nvlist_t *nv;
 	int i;
 
 	while (bytes_read > sizeof (reclen)) {
 
 		/* get length of packed record (stored as little endian) */
 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
 
 		if (bytes_read < sizeof (reclen) + reclen)
 			break;
 
 		/* unpack record */
 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
 			return (ENOMEM);
 		bytes_read -= sizeof (reclen) + reclen;
 		buf += sizeof (reclen) + reclen;
 
 		/* add record to nvlist array */
 		(*numrecords)++;
 		if (ISP2(*numrecords + 1)) {
 			*records = realloc(*records,
 			    *numrecords * 2 * sizeof (nvlist_t *));
 		}
 		(*records)[*numrecords - 1] = nv;
 	}
 
 	*leftover = bytes_read;
 	return (0);
 }
 
 #define	HIS_BUF_LEN	(128*1024)
 
 /*
  * Retrieve the command history of a pool.
  */
 int
 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
 {
 	char buf[HIS_BUF_LEN];
 	uint64_t off = 0;
 	nvlist_t **records = NULL;
 	uint_t numrecords = 0;
 	int err, i;
 
 	do {
 		uint64_t bytes_read = sizeof (buf);
 		uint64_t leftover;
 
 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
 			break;
 
 		/* if nothing else was read in, we're at EOF, just return */
 		if (!bytes_read)
 			break;
 
 		if ((err = zpool_history_unpack(buf, bytes_read,
 		    &leftover, &records, &numrecords)) != 0)
 			break;
 		off -= leftover;
 
 		/* CONSTCOND */
 	} while (1);
 
 	if (!err) {
 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
 		    records, numrecords) == 0);
 	}
 	for (i = 0; i < numrecords; i++)
 		nvlist_free(records[i]);
 	free(records);
 
 	return (err);
 }
 
 /*
  * Retrieve the next event.  If there is a new event available 'nvp' will
  * contain a newly allocated nvlist and 'dropped' will be set to the number
  * of missed events since the last call to this function.  When 'nvp' is
  * set to NULL it indicates no new events are available.  In either case
  * the function returns 0 and it is up to the caller to free 'nvp'.  In
  * the case of a fatal error the function will return a non-zero value.
  * When the function is called in blocking mode it will not return until
  * a new event is available.
  */
 int
 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
     int *dropped, int block, int cleanup_fd)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int error = 0;
 
 	*nvp = NULL;
 	*dropped = 0;
 	zc.zc_cleanup_fd = cleanup_fd;
 
 	if (!block)
 		zc.zc_guid = ZEVENT_NONBLOCK;
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
 		return (-1);
 
 retry:
 	if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
 		switch (errno) {
 		case ESHUTDOWN:
 			error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
 			    dgettext(TEXT_DOMAIN, "zfs shutdown"));
 			goto out;
 		case ENOENT:
 			/* Blocking error case should not occur */
 			if (block)
 				error = zpool_standard_error_fmt(hdl, errno,
 				    dgettext(TEXT_DOMAIN, "cannot get event"));
 
 			goto out;
 		case ENOMEM:
 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 				error = zfs_error_fmt(hdl, EZFS_NOMEM,
 				    dgettext(TEXT_DOMAIN, "cannot get event"));
 				goto out;
 			} else {
 				goto retry;
 			}
 		default:
 			error = zpool_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN, "cannot get event"));
 			goto out;
 		}
 	}
 
 	error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
 	if (error != 0)
 		goto out;
 
 	*dropped = (int)zc.zc_cookie;
 out:
 	zcmd_free_nvlists(&zc);
 
 	return (error);
 }
 
 /*
  * Clear all events.
  */
 int
 zpool_events_clear(libzfs_handle_t *hdl, int *count)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	char msg[1024];
 
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 	    "cannot clear events"));
 
 	if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
 		return (zpool_standard_error_fmt(hdl, errno, msg));
 
 	if (count != NULL)
 		*count = (int)zc.zc_cookie; /* # of events cleared */
 
 	return (0);
 }
 
 void
 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
     char *pathname, size_t len)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	boolean_t mounted = B_FALSE;
 	char *mntpnt = NULL;
 	char dsname[MAXNAMELEN];
 
 	if (dsobj == 0) {
 		/* special case for the MOS */
 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", (longlong_t)obj);
 		return;
 	}
 
 	/* get the dataset's name */
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_obj = dsobj;
 	if (ioctl(zhp->zpool_hdl->libzfs_fd,
 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
 		/* just write out a path of two object numbers */
 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
 		    (longlong_t)dsobj, (longlong_t)obj);
 		return;
 	}
 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
 
 	/* find out if the dataset is mounted */
 	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
 
 	/* get the corrupted object's path */
 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
 	zc.zc_obj = obj;
 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
 	    &zc) == 0) {
 		if (mounted) {
 			(void) snprintf(pathname, len, "%s%s", mntpnt,
 			    zc.zc_value);
 		} else {
 			(void) snprintf(pathname, len, "%s:%s",
 			    dsname, zc.zc_value);
 		}
 	} else {
 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, (longlong_t)obj);
 	}
 	free(mntpnt);
 }
 
 /*
  * Read the EFI label from the config, if a label does not exist then
  * pass back the error to the caller. If the caller has passed a non-NULL
  * diskaddr argument then we set it to the starting address of the EFI
  * partition.
  */
 static int
 read_efi_label(nvlist_t *config, diskaddr_t *sb)
 {
 	char *path;
 	int fd;
 	char diskname[MAXPATHLEN];
 	int err = -1;
 
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
 		return (err);
 
 	(void) snprintf(diskname, sizeof (diskname), "%s%s", DISK_ROOT,
 	    strrchr(path, '/'));
 	if ((fd = open(diskname, O_RDWR|O_DIRECT)) >= 0) {
 		struct dk_gpt *vtoc;
 
 		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
 			if (sb != NULL)
 				*sb = vtoc->efi_parts[0].p_start;
 			efi_free(vtoc);
 		}
 		(void) close(fd);
 	}
 	return (err);
 }
 
 /*
  * determine where a partition starts on a disk in the current
  * configuration
  */
 static diskaddr_t
 find_start_block(nvlist_t *config)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	diskaddr_t sb = MAXOFFSET_T;
 	uint64_t wholedisk;
 
 	if (nvlist_lookup_nvlist_array(config,
 	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
 		if (nvlist_lookup_uint64(config,
 		    ZPOOL_CONFIG_WHOLE_DISK,
 		    &wholedisk) != 0 || !wholedisk) {
 			return (MAXOFFSET_T);
 		}
 		if (read_efi_label(config, &sb) < 0)
 			sb = MAXOFFSET_T;
 		return (sb);
 	}
 
 	for (c = 0; c < children; c++) {
 		sb = find_start_block(child[c]);
 		if (sb != MAXOFFSET_T) {
 			return (sb);
 		}
 	}
 	return (MAXOFFSET_T);
 }
 
 int
 zpool_label_disk_wait(char *path, int timeout)
 {
 	struct stat64 statbuf;
 	int i;
 
 	/*
 	 * Wait timeout miliseconds for a newly created device to be available
 	 * from the given path.  There is a small window when a /dev/ device
 	 * will exist and the udev link will not, so we must wait for the
 	 * symlink.  Depending on the udev rules this may take a few seconds.
 	 */
 	for (i = 0; i < timeout; i++) {
 		usleep(1000);
 
 		errno = 0;
 		if ((stat64(path, &statbuf) == 0) && (errno == 0))
 			return (0);
 	}
 
 	return (ENOENT);
 }
 
 int
 zpool_label_disk_check(char *path)
 {
 	struct dk_gpt *vtoc;
 	int fd, err;
 
 	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0)
 		return errno;
 
 	if ((err = efi_alloc_and_read(fd, &vtoc)) != 0) {
 		(void) close(fd);
 		return err;
 	}
 
 	if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
 		efi_free(vtoc);
 		(void) close(fd);
 		return EIDRM;
 	}
 
 	efi_free(vtoc);
 	(void) close(fd);
 	return 0;
 }
 
 /*
  * Label an individual disk.  The name provided is the short name,
  * stripped of any leading /dev path.
  */
 int
 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
 {
 	char path[MAXPATHLEN];
 	struct dk_gpt *vtoc;
 	int rval, fd;
 	size_t resv = EFI_MIN_RESV_SIZE;
 	uint64_t slice_size;
 	diskaddr_t start_block;
 	char errbuf[1024];
 
 	/* prepare an error message just in case */
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
 
 	if (zhp) {
 		nvlist_t *nvroot;
 
 #if defined(__sun__) || defined(__sun)
 		if (zpool_is_bootable(zhp)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "EFI labeled devices are not supported on root "
 			    "pools."));
 			return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
 		}
 #endif
 
 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 
 		if (zhp->zpool_start_block == 0)
 			start_block = find_start_block(nvroot);
 		else
 			start_block = zhp->zpool_start_block;
 		zhp->zpool_start_block = start_block;
 	} else {
 		/* new pool */
 		start_block = NEW_START_BLOCK;
 	}
 
 	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
 
 	if ((fd = open(path, O_RDWR|O_DIRECT)) < 0) {
 		/*
 		 * This shouldn't happen.  We've long since verified that this
 		 * is a valid device.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
 		    "label '%s': unable to open device: %d"), path, errno);
 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
 	}
 
 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
 		/*
 		 * The only way this can fail is if we run out of memory, or we
 		 * were unable to read the disk's capacity
 		 */
 		if (errno == ENOMEM)
 			(void) no_memory(hdl);
 
 		(void) close(fd);
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
 		    "label '%s': unable to read disk capacity"), path);
 
 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
 	}
 
 	slice_size = vtoc->efi_last_u_lba + 1;
 	slice_size -= EFI_MIN_RESV_SIZE;
 	if (start_block == MAXOFFSET_T)
 		start_block = NEW_START_BLOCK;
 	slice_size -= start_block;
 	slice_size = P2ALIGN(slice_size, PARTITION_END_ALIGNMENT);
 
 	vtoc->efi_parts[0].p_start = start_block;
 	vtoc->efi_parts[0].p_size = slice_size;
 
 	/*
 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
 	 * disposable by some EFI utilities (since EFI doesn't have a backup
 	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
 	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
 	 * etc. were all pretty specific.  V_USR is as close to reality as we
 	 * can get, in the absence of V_OTHER.
 	 */
 	vtoc->efi_parts[0].p_tag = V_USR;
 	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
 
 	vtoc->efi_parts[8].p_start = slice_size + start_block;
 	vtoc->efi_parts[8].p_size = resv;
 	vtoc->efi_parts[8].p_tag = V_RESERVED;
 
 	if ((rval = efi_write(fd, vtoc)) != 0 || (rval = efi_rescan(fd)) != 0) {
 		/*
 		 * Some block drivers (like pcata) may not support EFI
 		 * GPT labels.  Print out a helpful error message dir-
 		 * ecting the user to manually label the disk and give
 		 * a specific slice.
 		 */
 		(void) close(fd);
 		efi_free(vtoc);
 
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "try using "
 		    "parted(8) and then provide a specific slice: %d"), rval);
 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
 	}
 
 	(void) close(fd);
 	efi_free(vtoc);
 
 	/* Wait for the first expected partition to appear. */
 
 	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
 	(void) zfs_append_partition(path, MAXPATHLEN);
 
 	rval = zpool_label_disk_wait(path, 3000);
 	if (rval) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "failed to "
 		    "detect device partitions on '%s': %d"), path, rval);
 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
 	}
 
 	/* We can't be to paranoid.  Read the label back and verify it. */
 	(void) snprintf(path, sizeof (path), "%s/%s", DISK_ROOT, name);
 	rval = zpool_label_disk_check(path);
 	if (rval) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "freshly written "
 		    "EFI label on '%s' is damaged.  Ensure\nthis device "
 		    "is not in in use, and is functioning properly: %d"),
 		    path, rval);
 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
 	}
 
 	return 0;
 }
diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
index 9dbfb1641ac6..5d0ab0eb4b71 100644
--- a/lib/libzfs/libzfs_sendrecv.c
+++ b/lib/libzfs/libzfs_sendrecv.c
@@ -1,3215 +1,3215 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * All rights reserved
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <stddef.h>
 #include <fcntl.h>
 #include <sys/mount.h>
 #include <sys/mntent.h>
 #include <sys/mnttab.h>
 #include <sys/avl.h>
 #include <sys/debug.h>
 #include <stddef.h>
 #include <pthread.h>
 #include <umem.h>
 #include <time.h>
 
 #include <libzfs.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_fletcher.h"
 #include "libzfs_impl.h"
 #include <sys/zio_checksum.h>
 #include <sys/ddt.h>
 #include <sys/socket.h>
 
 /* in libzfs_dataset.c */
 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
 
 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
     int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
 
 static const zio_cksum_t zero_cksum = { { 0 } };
 
 typedef struct dedup_arg {
 	int	inputfd;
 	int	outputfd;
 	libzfs_handle_t  *dedup_hdl;
 } dedup_arg_t;
 
 typedef struct progress_arg {
 	zfs_handle_t *pa_zhp;
 	int pa_fd;
 	boolean_t pa_parsable;
 } progress_arg_t;
 
 typedef struct dataref {
 	uint64_t ref_guid;
 	uint64_t ref_object;
 	uint64_t ref_offset;
 } dataref_t;
 
 typedef struct dedup_entry {
 	struct dedup_entry	*dde_next;
 	zio_cksum_t dde_chksum;
 	uint64_t dde_prop;
 	dataref_t dde_ref;
 } dedup_entry_t;
 
 #define	MAX_DDT_PHYSMEM_PERCENT		20
 #define	SMALLEST_POSSIBLE_MAX_DDT_MB		128
 
 typedef struct dedup_table {
 	dedup_entry_t	**dedup_hash_array;
 	umem_cache_t	*ddecache;
 	uint64_t	max_ddt_size;  /* max dedup table size in bytes */
 	uint64_t	cur_ddt_size;  /* current dedup table size in bytes */
 	uint64_t	ddt_count;
 	int		numhashbits;
 	boolean_t	ddt_full;
 } dedup_table_t;
 
 static int
 high_order_bit(uint64_t n)
 {
 	int count;
 
 	for (count = 0; n != 0; count++)
 		n >>= 1;
 	return (count);
 }
 
 static size_t
 ssread(void *buf, size_t len, FILE *stream)
 {
 	size_t outlen;
 
 	if ((outlen = fread(buf, len, 1, stream)) == 0)
 		return (0);
 
 	return (outlen);
 }
 
 static void
 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
     zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
 {
 	dedup_entry_t	*dde;
 
 	if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
 		if (ddt->ddt_full == B_FALSE) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Dedup table full.  Deduplication will continue "
 			    "with existing table entries"));
 			ddt->ddt_full = B_TRUE;
 		}
 		return;
 	}
 
 	if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
 	    != NULL) {
 		assert(*ddepp == NULL);
 		dde->dde_next = NULL;
 		dde->dde_chksum = *cs;
 		dde->dde_prop = prop;
 		dde->dde_ref = *dr;
 		*ddepp = dde;
 		ddt->cur_ddt_size += sizeof (dedup_entry_t);
 		ddt->ddt_count++;
 	}
 }
 
 /*
  * Using the specified dedup table, do a lookup for an entry with
  * the checksum cs.  If found, return the block's reference info
  * in *dr. Otherwise, insert a new entry in the dedup table, using
  * the reference information specified by *dr.
  *
  * return value:  true - entry was found
  *		  false - entry was not found
  */
 static boolean_t
 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
     uint64_t prop, dataref_t *dr)
 {
 	uint32_t hashcode;
 	dedup_entry_t **ddepp;
 
 	hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
 
 	for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
 	    ddepp = &((*ddepp)->dde_next)) {
 		if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
 		    (*ddepp)->dde_prop == prop) {
 			*dr = (*ddepp)->dde_ref;
 			return (B_TRUE);
 		}
 	}
 	ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
 	return (B_FALSE);
 }
 
 static int
 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
 {
 	fletcher_4_incremental_native(buf, len, zc);
 	return (write(outfd, buf, len));
 }
 
 /*
  * This function is started in a separate thread when the dedup option
  * has been requested.  The main send thread determines the list of
  * snapshots to be included in the send stream and makes the ioctl calls
  * for each one.  But instead of having the ioctl send the output to the
  * the output fd specified by the caller of zfs_send()), the
  * ioctl is told to direct the output to a pipe, which is read by the
  * alternate thread running THIS function.  This function does the
  * dedup'ing by:
  *  1. building a dedup table (the DDT)
  *  2. doing checksums on each data block and inserting a record in the DDT
  *  3. looking for matching checksums, and
  *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
  *      a duplicate block is found.
  * The output of this function then goes to the output fd requested
  * by the caller of zfs_send().
  */
 static void *
 cksummer(void *arg)
 {
 	dedup_arg_t *dda = arg;
 	char *buf = malloc(1<<20);
 	dmu_replay_record_t thedrr;
 	dmu_replay_record_t *drr = &thedrr;
 	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
 	struct drr_end *drre = &thedrr.drr_u.drr_end;
 	struct drr_object *drro = &thedrr.drr_u.drr_object;
 	struct drr_write *drrw = &thedrr.drr_u.drr_write;
 	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
 	FILE *ofp;
 	int outfd;
 	dmu_replay_record_t wbr_drr = {0};
 	struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
 	dedup_table_t ddt;
 	zio_cksum_t stream_cksum;
 	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
 	uint64_t numbuckets;
 
 	ddt.max_ddt_size =
 	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
 	    SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
 
 	numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
 
 	/*
 	 * numbuckets must be a power of 2.  Increase number to
 	 * a power of 2 if necessary.
 	 */
 	if (!ISP2(numbuckets))
 		numbuckets = 1 << high_order_bit(numbuckets);
 
 	ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
 	ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
 	    NULL, NULL, NULL, NULL, NULL, 0);
 	ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
 	ddt.numhashbits = high_order_bit(numbuckets) - 1;
 	ddt.ddt_full = B_FALSE;
 
 	/* Initialize the write-by-reference block. */
 	wbr_drr.drr_type = DRR_WRITE_BYREF;
 	wbr_drr.drr_payloadlen = 0;
 
 	outfd = dda->outputfd;
 	ofp = fdopen(dda->inputfd, "r");
 	while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
 
 		switch (drr->drr_type) {
 		case DRR_BEGIN:
 		{
 			int	fflags;
 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
 
 			/* set the DEDUP feature flag for this stream */
 			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
 			    DMU_BACKUP_FEATURE_DEDUPPROPS);
 			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
 
 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 			    &stream_cksum, outfd) == -1)
 				goto out;
 			if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 			    DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
 				int sz = drr->drr_payloadlen;
 
 				if (sz > 1<<20) {
 					free(buf);
 					buf = malloc(sz);
 				}
 				(void) ssread(buf, sz, ofp);
 				if (ferror(stdin))
 					perror("fread");
 				if (cksum_and_write(buf, sz, &stream_cksum,
 				    outfd) == -1)
 					goto out;
 			}
 			break;
 		}
 
 		case DRR_END:
 		{
 			/* use the recalculated checksum */
 			ZIO_SET_CHECKSUM(&drre->drr_checksum,
 			    stream_cksum.zc_word[0], stream_cksum.zc_word[1],
 			    stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
 			if ((write(outfd, drr,
 			    sizeof (dmu_replay_record_t))) == -1)
 				goto out;
 			break;
 		}
 
 		case DRR_OBJECT:
 		{
 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 			    &stream_cksum, outfd) == -1)
 				goto out;
 			if (drro->drr_bonuslen > 0) {
 				(void) ssread(buf,
 				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 				    ofp);
 				if (cksum_and_write(buf,
 				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 				    &stream_cksum, outfd) == -1)
 					goto out;
 			}
 			break;
 		}
 
 		case DRR_SPILL:
 		{
 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 			    &stream_cksum, outfd) == -1)
 				goto out;
 			(void) ssread(buf, drrs->drr_length, ofp);
 			if (cksum_and_write(buf, drrs->drr_length,
 			    &stream_cksum, outfd) == -1)
 				goto out;
 			break;
 		}
 
 		case DRR_FREEOBJECTS:
 		{
 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 			    &stream_cksum, outfd) == -1)
 				goto out;
 			break;
 		}
 
 		case DRR_WRITE:
 		{
 			dataref_t	dataref;
 
 			(void) ssread(buf, drrw->drr_length, ofp);
 
 			/*
 			 * Use the existing checksum if it's dedup-capable,
 			 * else calculate a SHA256 checksum for it.
 			 */
 
 			if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
 			    zero_cksum) ||
 			    !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
 				zio_cksum_t tmpsha256;
 
 				zio_checksum_SHA256(buf,
 				    drrw->drr_length, &tmpsha256);
 
 				drrw->drr_key.ddk_cksum.zc_word[0] =
 				    BE_64(tmpsha256.zc_word[0]);
 				drrw->drr_key.ddk_cksum.zc_word[1] =
 				    BE_64(tmpsha256.zc_word[1]);
 				drrw->drr_key.ddk_cksum.zc_word[2] =
 				    BE_64(tmpsha256.zc_word[2]);
 				drrw->drr_key.ddk_cksum.zc_word[3] =
 				    BE_64(tmpsha256.zc_word[3]);
 				drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
 				drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
 			}
 
 			dataref.ref_guid = drrw->drr_toguid;
 			dataref.ref_object = drrw->drr_object;
 			dataref.ref_offset = drrw->drr_offset;
 
 			if (ddt_update(dda->dedup_hdl, &ddt,
 			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
 			    &dataref)) {
 				/* block already present in stream */
 				wbr_drrr->drr_object = drrw->drr_object;
 				wbr_drrr->drr_offset = drrw->drr_offset;
 				wbr_drrr->drr_length = drrw->drr_length;
 				wbr_drrr->drr_toguid = drrw->drr_toguid;
 				wbr_drrr->drr_refguid = dataref.ref_guid;
 				wbr_drrr->drr_refobject =
 				    dataref.ref_object;
 				wbr_drrr->drr_refoffset =
 				    dataref.ref_offset;
 
 				wbr_drrr->drr_checksumtype =
 				    drrw->drr_checksumtype;
 				wbr_drrr->drr_checksumflags =
 				    drrw->drr_checksumtype;
 				wbr_drrr->drr_key.ddk_cksum =
 				    drrw->drr_key.ddk_cksum;
 				wbr_drrr->drr_key.ddk_prop =
 				    drrw->drr_key.ddk_prop;
 
 				if (cksum_and_write(&wbr_drr,
 				    sizeof (dmu_replay_record_t), &stream_cksum,
 				    outfd) == -1)
 					goto out;
 			} else {
 				/* block not previously seen */
 				if (cksum_and_write(drr,
 				    sizeof (dmu_replay_record_t), &stream_cksum,
 				    outfd) == -1)
 					goto out;
 				if (cksum_and_write(buf,
 				    drrw->drr_length,
 				    &stream_cksum, outfd) == -1)
 					goto out;
 			}
 			break;
 		}
 
 		case DRR_FREE:
 		{
 			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 			    &stream_cksum, outfd) == -1)
 				goto out;
 			break;
 		}
 
 		default:
 			(void) printf("INVALID record type 0x%x\n",
 			    drr->drr_type);
 			/* should never happen, so assert */
 			assert(B_FALSE);
 		}
 	}
 out:
 	umem_cache_destroy(ddt.ddecache);
 	free(ddt.dedup_hash_array);
 	free(buf);
 	(void) fclose(ofp);
 
 	return (NULL);
 }
 
 /*
  * Routines for dealing with the AVL tree of fs-nvlists
  */
 typedef struct fsavl_node {
 	avl_node_t fn_node;
 	nvlist_t *fn_nvfs;
 	char *fn_snapname;
 	uint64_t fn_guid;
 } fsavl_node_t;
 
 static int
 fsavl_compare(const void *arg1, const void *arg2)
 {
 	const fsavl_node_t *fn1 = arg1;
 	const fsavl_node_t *fn2 = arg2;
 
 	if (fn1->fn_guid > fn2->fn_guid)
 		return (+1);
 	else if (fn1->fn_guid < fn2->fn_guid)
 		return (-1);
 	else
 		return (0);
 }
 
 /*
  * Given the GUID of a snapshot, find its containing filesystem and
  * (optionally) name.
  */
 static nvlist_t *
 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
 {
 	fsavl_node_t fn_find;
 	fsavl_node_t *fn;
 
 	fn_find.fn_guid = snapguid;
 
 	fn = avl_find(avl, &fn_find, NULL);
 	if (fn) {
 		if (snapname)
 			*snapname = fn->fn_snapname;
 		return (fn->fn_nvfs);
 	}
 	return (NULL);
 }
 
 static void
 fsavl_destroy(avl_tree_t *avl)
 {
 	fsavl_node_t *fn;
 	void *cookie;
 
 	if (avl == NULL)
 		return;
 
 	cookie = NULL;
 	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
 		free(fn);
 	avl_destroy(avl);
 	free(avl);
 }
 
 /*
  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
  */
 static avl_tree_t *
 fsavl_create(nvlist_t *fss)
 {
 	avl_tree_t *fsavl;
 	nvpair_t *fselem = NULL;
 
 	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
 		return (NULL);
 
 	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
 	    offsetof(fsavl_node_t, fn_node));
 
 	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
 		nvlist_t *nvfs, *snaps;
 		nvpair_t *snapelem = NULL;
 
 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
 
 		while ((snapelem =
 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
 			fsavl_node_t *fn;
 			uint64_t guid;
 
 			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
 				fsavl_destroy(fsavl);
 				return (NULL);
 			}
 			fn->fn_nvfs = nvfs;
 			fn->fn_snapname = nvpair_name(snapelem);
 			fn->fn_guid = guid;
 
 			/*
 			 * Note: if there are multiple snaps with the
 			 * same GUID, we ignore all but one.
 			 */
 			if (avl_find(fsavl, fn, NULL) == NULL)
 				avl_add(fsavl, fn);
 			else
 				free(fn);
 		}
 	}
 
 	return (fsavl);
 }
 
 /*
  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
  */
 typedef struct send_data {
 	uint64_t parent_fromsnap_guid;
 	nvlist_t *parent_snaps;
 	nvlist_t *fss;
 	nvlist_t *snapprops;
 	const char *fromsnap;
 	const char *tosnap;
 	boolean_t recursive;
 
 	/*
 	 * The header nvlist is of the following format:
 	 * {
 	 *   "tosnap" -> string
 	 *   "fromsnap" -> string (if incremental)
 	 *   "fss" -> {
 	 *	id -> {
 	 *
 	 *	 "name" -> string (full name; for debugging)
 	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
 	 *
 	 *	 "props" -> { name -> value (only if set here) }
 	 *	 "snaps" -> { name (lastname) -> number (guid) }
 	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
 	 *
 	 *	 "origin" -> number (guid) (if clone)
 	 *	 "sent" -> boolean (not on-disk)
 	 *	}
 	 *   }
 	 * }
 	 *
 	 */
 } send_data_t;
 
 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
 
 static int
 send_iterate_snap(zfs_handle_t *zhp, void *arg)
 {
 	send_data_t *sd = arg;
 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
 	char *snapname;
 	nvlist_t *nv;
 
 	snapname = strrchr(zhp->zfs_name, '@')+1;
 
 	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
 	/*
 	 * NB: if there is no fromsnap here (it's a newly created fs in
 	 * an incremental replication), we will substitute the tosnap.
 	 */
 	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
 	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
 	    strcmp(snapname, sd->tosnap) == 0)) {
 		sd->parent_fromsnap_guid = guid;
 	}
 
 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
 	send_iterate_prop(zhp, nv);
 	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
 	nvlist_free(nv);
 
 	zfs_close(zhp);
 	return (0);
 }
 
 static void
 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
 {
 	nvpair_t *elem = NULL;
 
 	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
 		char *propname = nvpair_name(elem);
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		nvlist_t *propnv;
 
 		if (!zfs_prop_user(propname)) {
 			/*
 			 * Realistically, this should never happen.  However,
 			 * we want the ability to add DSL properties without
 			 * needing to make incompatible version changes.  We
 			 * need to ignore unknown properties to allow older
 			 * software to still send datasets containing these
 			 * properties, with the unknown properties elided.
 			 */
 			if (prop == ZPROP_INVAL)
 				continue;
 
 			if (zfs_prop_readonly(prop))
 				continue;
 		}
 
 		verify(nvpair_value_nvlist(elem, &propnv) == 0);
 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
 		    prop == ZFS_PROP_REFQUOTA ||
 		    prop == ZFS_PROP_REFRESERVATION) {
 			char *source;
 			uint64_t value;
 			verify(nvlist_lookup_uint64(propnv,
 			    ZPROP_VALUE, &value) == 0);
 			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
 				continue;
 			/*
 			 * May have no source before SPA_VERSION_RECVD_PROPS,
 			 * but is still modifiable.
 			 */
 			if (nvlist_lookup_string(propnv,
 			    ZPROP_SOURCE, &source) == 0) {
 				if ((strcmp(source, zhp->zfs_name) != 0) &&
 				    (strcmp(source,
 				    ZPROP_SOURCE_VAL_RECVD) != 0))
 					continue;
 			}
 		} else {
 			char *source;
 			if (nvlist_lookup_string(propnv,
 			    ZPROP_SOURCE, &source) != 0)
 				continue;
 			if ((strcmp(source, zhp->zfs_name) != 0) &&
 			    (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
 				continue;
 		}
 
 		if (zfs_prop_user(propname) ||
 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 			char *value;
 			verify(nvlist_lookup_string(propnv,
 			    ZPROP_VALUE, &value) == 0);
 			VERIFY(0 == nvlist_add_string(nv, propname, value));
 		} else {
 			uint64_t value;
 			verify(nvlist_lookup_uint64(propnv,
 			    ZPROP_VALUE, &value) == 0);
 			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
 		}
 	}
 }
 
 /*
  * recursively generate nvlists describing datasets.  See comment
  * for the data structure send_data_t above for description of contents
  * of the nvlist.
  */
 static int
 send_iterate_fs(zfs_handle_t *zhp, void *arg)
 {
 	send_data_t *sd = arg;
 	nvlist_t *nvfs, *nv;
 	int rv = 0;
 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
 	char guidstring[64];
 
 	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
 	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
 	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
 	    sd->parent_fromsnap_guid));
 
 	if (zhp->zfs_dmustats.dds_origin[0]) {
 		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
 		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
 		if (origin == NULL)
 			return (-1);
 		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
 		    origin->zfs_dmustats.dds_guid));
 	}
 
 	/* iterate over props */
 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
 	send_iterate_prop(zhp, nv);
 	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
 	nvlist_free(nv);
 
 	/* iterate over snaps, and set sd->parent_fromsnap_guid */
 	sd->parent_fromsnap_guid = 0;
 	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
 	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
 	(void) zfs_iter_snapshots(zhp, B_FALSE, send_iterate_snap, sd);
 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
 	nvlist_free(sd->parent_snaps);
 	nvlist_free(sd->snapprops);
 
 	/* add this fs to nvlist */
 	(void) snprintf(guidstring, sizeof (guidstring),
 	    "0x%llx", (longlong_t)guid);
 	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
 	nvlist_free(nvfs);
 
 	/* iterate over children */
 	if (sd->recursive)
 		rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
 
 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
 
 	zfs_close(zhp);
 	return (rv);
 }
 
 static int
 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
     const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
 {
 	zfs_handle_t *zhp;
 	send_data_t sd = { 0 };
 	int error;
 
 	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (EZFS_BADTYPE);
 
 	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
 	sd.fromsnap = fromsnap;
 	sd.tosnap = tosnap;
 	sd.recursive = recursive;
 
 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
 		nvlist_free(sd.fss);
 		if (avlp != NULL)
 			*avlp = NULL;
 		*nvlp = NULL;
 		return (error);
 	}
 
 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
 		nvlist_free(sd.fss);
 		*nvlp = NULL;
 		return (EZFS_NOMEM);
 	}
 
 	*nvlp = sd.fss;
 	return (0);
 }
 
 /*
  * Routines specific to "zfs send"
  */
 typedef struct send_dump_data {
 	/* these are all just the short snapname (the part after the @) */
 	const char *fromsnap;
 	const char *tosnap;
 	char prevsnap[ZFS_MAXNAMELEN];
 	uint64_t prevsnap_obj;
 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
 	boolean_t verbose, dryrun, parsable, progress;
 	int outfd;
 	boolean_t err;
 	nvlist_t *fss;
 	avl_tree_t *fsavl;
 	snapfilter_cb_t *filter_cb;
 	void *filter_cb_arg;
 	nvlist_t *debugnv;
 	char holdtag[ZFS_MAXNAMELEN];
 	int cleanup_fd;
 	uint64_t size;
 } send_dump_data_t;
 
 static int
 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
     boolean_t fromorigin, uint64_t *sizep)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 	assert(fromsnap_obj == 0 || !fromorigin);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	zc.zc_obj = fromorigin;
 	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 	zc.zc_fromobj = fromsnap_obj;
 	zc.zc_guid = 1;  /* estimate flag */
 
 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot estimate space for '%s'"), zhp->zfs_name);
 
 		switch (errno) {
 		case EXDEV:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "not an earlier snapshot from the same fs"));
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 
 		case ENOENT:
 			if (zfs_dataset_exists(hdl, zc.zc_name,
 			    ZFS_TYPE_SNAPSHOT)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "incremental source (@%s) does not exist"),
 				    zc.zc_value);
 			}
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case EDQUOT:
 		case EFBIG:
 		case EIO:
 		case ENOLINK:
 		case ENOSPC:
 		case ENOSTR:
 		case ENXIO:
 		case EPIPE:
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
 			zfs_error_aux(hdl, strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
 	*sizep = zc.zc_objset_type;
 
 	return (0);
 }
 
 /*
  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
  * NULL) to the file descriptor specified by outfd.
  */
 static int
 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
     boolean_t fromorigin, int outfd, nvlist_t *debugnv)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *thisdbg;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 	assert(fromsnap_obj == 0 || !fromorigin);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	zc.zc_cookie = outfd;
 	zc.zc_obj = fromorigin;
 	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 	zc.zc_fromobj = fromsnap_obj;
 
 	VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
 	if (fromsnap && fromsnap[0] != '\0') {
 		VERIFY(0 == nvlist_add_string(thisdbg,
 		    "fromsnap", fromsnap));
 	}
 
 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 		char errbuf[1024];
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot send '%s'"), zhp->zfs_name);
 
 		VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
 		if (debugnv) {
 			VERIFY(0 == nvlist_add_nvlist(debugnv,
 			    zhp->zfs_name, thisdbg));
 		}
 		nvlist_free(thisdbg);
 
 		switch (errno) {
 		case EXDEV:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "not an earlier snapshot from the same fs"));
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 
 		case ENOENT:
 			if (zfs_dataset_exists(hdl, zc.zc_name,
 			    ZFS_TYPE_SNAPSHOT)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "incremental source (@%s) does not exist"),
 				    zc.zc_value);
 			}
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case EDQUOT:
 		case EFBIG:
 		case EIO:
 		case ENOLINK:
 		case ENOSPC:
 		case ENOSTR:
 		case ENXIO:
 		case EPIPE:
 		case ERANGE:
 		case EFAULT:
 		case EROFS:
 			zfs_error_aux(hdl, strerror(errno));
 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
 	if (debugnv)
 		VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 	nvlist_free(thisdbg);
 
 	return (0);
 }
 
 static int
 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
 {
 	zfs_handle_t *pzhp;
 	int error = 0;
 	char *thissnap;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 
 	if (sdd->dryrun)
 		return (0);
 
 	/*
 	 * zfs_send() only opens a cleanup_fd for sends that need it,
 	 * e.g. replication and doall.
 	 */
 	if (sdd->cleanup_fd == -1)
 		return (0);
 
 	thissnap = strchr(zhp->zfs_name, '@') + 1;
 	*(thissnap - 1) = '\0';
 	pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 	*(thissnap - 1) = '@';
 
 	/*
 	 * It's OK if the parent no longer exists.  The send code will
 	 * handle that error.
 	 */
 	if (pzhp) {
 		error = zfs_hold(pzhp, thissnap, sdd->holdtag,
 		    B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
 		    zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
 		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
 		zfs_close(pzhp);
 	}
 
 	return (error);
 }
 
 static void *
 send_progress_thread(void *arg)
 {
 	progress_arg_t *pa = arg;
 
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	zfs_handle_t *zhp = pa->pa_zhp;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	unsigned long long bytes;
 	char buf[16];
 
 	time_t t;
 	struct tm *tm;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (!pa->pa_parsable)
 		(void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
 
 	/*
 	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
 	 */
 	for (;;) {
 		(void) sleep(1);
 
 		zc.zc_cookie = pa->pa_fd;
 		if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
 			return ((void *)-1);
 
 		(void) time(&t);
 		tm = localtime(&t);
 		bytes = zc.zc_cookie;
 
 		if (pa->pa_parsable) {
 			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
 			    tm->tm_hour, tm->tm_min, tm->tm_sec,
 			    bytes, zhp->zfs_name);
 		} else {
 			zfs_nicenum(bytes, buf, sizeof (buf));
 			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
 			    tm->tm_hour, tm->tm_min, tm->tm_sec,
 			    buf, zhp->zfs_name);
 		}
 	}
 }
 
 static int
 dump_snapshot(zfs_handle_t *zhp, void *arg)
 {
 	send_dump_data_t *sdd = arg;
 	progress_arg_t pa = { 0 };
 	pthread_t tid;
 
 	char *thissnap;
 	int err;
 	boolean_t isfromsnap, istosnap, fromorigin;
 	boolean_t exclude = B_FALSE;
 
 	thissnap = strchr(zhp->zfs_name, '@') + 1;
 	isfromsnap = (sdd->fromsnap != NULL &&
 	    strcmp(sdd->fromsnap, thissnap) == 0);
 
 	if (!sdd->seenfrom && isfromsnap) {
 		err = hold_for_send(zhp, sdd);
 		if (err == 0) {
 			sdd->seenfrom = B_TRUE;
 			(void) strcpy(sdd->prevsnap, thissnap);
 			sdd->prevsnap_obj = zfs_prop_get_int(zhp,
 			    ZFS_PROP_OBJSETID);
 		} else if (err == ENOENT) {
 			err = 0;
 		}
 		zfs_close(zhp);
 		return (err);
 	}
 
 	if (sdd->seento || !sdd->seenfrom) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
 	if (istosnap)
 		sdd->seento = B_TRUE;
 
 	if (!sdd->doall && !isfromsnap && !istosnap) {
 		if (sdd->replicate) {
 			char *snapname;
 			nvlist_t *snapprops;
 			/*
 			 * Filter out all intermediate snapshots except origin
 			 * snapshots needed to replicate clones.
 			 */
 			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
 			    zhp->zfs_dmustats.dds_guid, &snapname);
 
 			VERIFY(0 == nvlist_lookup_nvlist(nvfs,
 			    "snapprops", &snapprops));
 			VERIFY(0 == nvlist_lookup_nvlist(snapprops,
 			    thissnap, &snapprops));
 			exclude = !nvlist_exists(snapprops, "is_clone_origin");
 		} else {
 			exclude = B_TRUE;
 		}
 	}
 
 	/*
 	 * If a filter function exists, call it to determine whether
 	 * this snapshot will be sent.
 	 */
 	if (exclude || (sdd->filter_cb != NULL &&
 	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
 		/*
 		 * This snapshot is filtered out.  Don't send it, and don't
 		 * set prevsnap_obj, so it will be as if this snapshot didn't
 		 * exist, and the next accepted snapshot will be sent as
 		 * an incremental from the last accepted one, or as the
 		 * first (and full) snapshot in the case of a replication,
 		 * non-incremental send.
 		 */
 		zfs_close(zhp);
 		return (0);
 	}
 
 	err = hold_for_send(zhp, sdd);
 	if (err) {
 		if (err == ENOENT)
 			err = 0;
 		zfs_close(zhp);
 		return (err);
 	}
 
 	fromorigin = sdd->prevsnap[0] == '\0' &&
 	    (sdd->fromorigin || sdd->replicate);
 
 	if (sdd->verbose) {
 		uint64_t size;
 		err = estimate_ioctl(zhp, sdd->prevsnap_obj,
 		    fromorigin, &size);
 
 		if (sdd->parsable) {
 			if (sdd->prevsnap[0] != '\0') {
 				(void) fprintf(stderr, "incremental\t%s\t%s",
 				    sdd->prevsnap, zhp->zfs_name);
 			} else {
 				(void) fprintf(stderr, "full\t%s",
 				    zhp->zfs_name);
 			}
 		} else {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "send from @%s to %s"),
 			    sdd->prevsnap, zhp->zfs_name);
 		}
 		if (err == 0) {
 			if (sdd->parsable) {
 				(void) fprintf(stderr, "\t%llu\n",
 				    (longlong_t)size);
 			} else {
 				char buf[16];
 				zfs_nicenum(size, buf, sizeof (buf));
 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 				    " estimated size is %s\n"), buf);
 			}
 			sdd->size += size;
 		} else {
 			(void) fprintf(stderr, "\n");
 		}
 	}
 
 	if (!sdd->dryrun) {
 		/*
 		 * If progress reporting is requested, spawn a new thread to
 		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
 		 */
 		if (sdd->progress) {
 			pa.pa_zhp = zhp;
 			pa.pa_fd = sdd->outfd;
 			pa.pa_parsable = sdd->parsable;
 
 			if ((err = pthread_create(&tid, NULL,
 			    send_progress_thread, &pa))) {
 				zfs_close(zhp);
 				return (err);
 			}
 		}
 
 		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
 		    fromorigin, sdd->outfd, sdd->debugnv);
 
 		if (sdd->progress) {
 			(void) pthread_cancel(tid);
 			(void) pthread_join(tid, NULL);
 		}
 	}
 
 	(void) strcpy(sdd->prevsnap, thissnap);
 	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 dump_filesystem(zfs_handle_t *zhp, void *arg)
 {
 	int rv = 0;
 	send_dump_data_t *sdd = arg;
 	boolean_t missingfrom = B_FALSE;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
 	    zhp->zfs_name, sdd->tosnap);
 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "WARNING: could not send %s@%s: does not exist\n"),
 		    zhp->zfs_name, sdd->tosnap);
 		sdd->err = B_TRUE;
 		return (0);
 	}
 
 	if (sdd->replicate && sdd->fromsnap) {
 		/*
 		 * If this fs does not have fromsnap, and we're doing
 		 * recursive, we need to send a full stream from the
 		 * beginning (or an incremental from the origin if this
 		 * is a clone).  If we're doing non-recursive, then let
 		 * them get the error.
 		 */
 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
 		    zhp->zfs_name, sdd->fromsnap);
 		if (ioctl(zhp->zfs_hdl->libzfs_fd,
 		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
 			missingfrom = B_TRUE;
 		}
 	}
 
 	sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
 	sdd->prevsnap_obj = 0;
 	if (sdd->fromsnap == NULL || missingfrom)
 		sdd->seenfrom = B_TRUE;
 
 	rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
 	if (!sdd->seenfrom) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "WARNING: could not send %s@%s:\n"
 		    "incremental source (%s@%s) does not exist\n"),
 		    zhp->zfs_name, sdd->tosnap,
 		    zhp->zfs_name, sdd->fromsnap);
 		sdd->err = B_TRUE;
 	} else if (!sdd->seento) {
 		if (sdd->fromsnap) {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "WARNING: could not send %s@%s:\n"
 			    "incremental source (%s@%s) "
 			    "is not earlier than it\n"),
 			    zhp->zfs_name, sdd->tosnap,
 			    zhp->zfs_name, sdd->fromsnap);
 		} else {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "WARNING: "
 			    "could not send %s@%s: does not exist\n"),
 			    zhp->zfs_name, sdd->tosnap);
 		}
 		sdd->err = B_TRUE;
 	}
 
 	return (rv);
 }
 
 static int
 dump_filesystems(zfs_handle_t *rzhp, void *arg)
 {
 	send_dump_data_t *sdd = arg;
 	nvpair_t *fspair;
 	boolean_t needagain, progress;
 
 	if (!sdd->replicate)
 		return (dump_filesystem(rzhp, sdd));
 
 	/* Mark the clone origin snapshots. */
 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
 		nvlist_t *nvfs;
 		uint64_t origin_guid = 0;
 
 		VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
 		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
 		if (origin_guid != 0) {
 			char *snapname;
 			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
 			    origin_guid, &snapname);
 			if (origin_nv != NULL) {
 				nvlist_t *snapprops;
 				VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
 				    "snapprops", &snapprops));
 				VERIFY(0 == nvlist_lookup_nvlist(snapprops,
 				    snapname, &snapprops));
 				VERIFY(0 == nvlist_add_boolean(
 				    snapprops, "is_clone_origin"));
 			}
 		}
 	}
 again:
 	needagain = progress = B_FALSE;
 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
 		nvlist_t *fslist, *parent_nv;
 		char *fsname;
 		zfs_handle_t *zhp;
 		int err;
 		uint64_t origin_guid = 0;
 		uint64_t parent_guid = 0;
 
 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
 			continue;
 
 		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
 		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
 		    &parent_guid);
 
 		if (parent_guid != 0) {
 			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
 			if (!nvlist_exists(parent_nv, "sent")) {
 				/* parent has not been sent; skip this one */
 				needagain = B_TRUE;
 				continue;
 			}
 		}
 
 		if (origin_guid != 0) {
 			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
 			    origin_guid, NULL);
 			if (origin_nv != NULL &&
 			    !nvlist_exists(origin_nv, "sent")) {
 				/*
 				 * origin has not been sent yet;
 				 * skip this clone.
 				 */
 				needagain = B_TRUE;
 				continue;
 			}
 		}
 
 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
 		if (zhp == NULL)
 			return (-1);
 		err = dump_filesystem(zhp, sdd);
 		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
 		progress = B_TRUE;
 		zfs_close(zhp);
 		if (err)
 			return (err);
 	}
 	if (needagain) {
 		assert(progress);
 		goto again;
 	}
 
 	/* clean out the sent flags in case we reuse this fss */
 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
 		nvlist_t *fslist;
 
 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
 		(void) nvlist_remove_all(fslist, "sent");
 	}
 
 	return (0);
 }
 
 /*
  * Generate a send stream for the dataset identified by the argument zhp.
  *
  * The content of the send stream is the snapshot identified by
  * 'tosnap'.  Incremental streams are requested in two ways:
  *     - from the snapshot identified by "fromsnap" (if non-null) or
  *     - from the origin of the dataset identified by zhp, which must
  *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
  *	 is TRUE.
  *
  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
  * if "replicate" is set.  If "doall" is set, dump all the intermediate
  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
  * case too. If "props" is set, send properties.
  */
 int
 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
     void *cb_arg, nvlist_t **debugnvp)
 {
 	char errbuf[1024];
 	send_dump_data_t sdd = { 0 };
 	int err = 0;
 	nvlist_t *fss = NULL;
 	avl_tree_t *fsavl = NULL;
 	static uint64_t holdseq;
 	int spa_version;
 	pthread_t tid;
 	int pipefd[2];
 	dedup_arg_t dda = { 0 };
 	int featureflags = 0;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot send '%s'"), zhp->zfs_name);
 
 	if (fromsnap && fromsnap[0] == '\0') {
 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 		    "zero-length incremental source"));
 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
 	}
 
 	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
 		uint64_t version;
 		version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 		if (version >= ZPL_VERSION_SA) {
 			featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
 		}
 	}
 
 	if (flags->dedup && !flags->dryrun) {
 		featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
 		    DMU_BACKUP_FEATURE_DEDUPPROPS);
 		if ((err = socketpair(AF_UNIX, SOCK_STREAM, 0, pipefd))) {
 			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
 			return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
 			    errbuf));
 		}
 		dda.outputfd = outfd;
 		dda.inputfd = pipefd[1];
 		dda.dedup_hdl = zhp->zfs_hdl;
 		if ((err = pthread_create(&tid, NULL, cksummer, &dda))) {
 			(void) close(pipefd[0]);
 			(void) close(pipefd[1]);
 			zfs_error_aux(zhp->zfs_hdl, strerror(errno));
 			return (zfs_error(zhp->zfs_hdl,
 			    EZFS_THREADCREATEFAILED, errbuf));
 		}
 	}
 
 	if (flags->replicate || flags->doall || flags->props) {
 		dmu_replay_record_t drr = { 0 };
 		char *packbuf = NULL;
 		size_t buflen = 0;
 		zio_cksum_t zc = { { 0 } };
 
 		if (flags->replicate || flags->props) {
 			nvlist_t *hdrnv;
 
 			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
 			if (fromsnap) {
 				VERIFY(0 == nvlist_add_string(hdrnv,
 				    "fromsnap", fromsnap));
 			}
 			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
 			if (!flags->replicate) {
 				VERIFY(0 == nvlist_add_boolean(hdrnv,
 				    "not_recursive"));
 			}
 
 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
 			    fromsnap, tosnap, flags->replicate, &fss, &fsavl);
 			if (err)
 				goto err_out;
 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
 			    NV_ENCODE_XDR, 0);
 			if (debugnvp)
 				*debugnvp = hdrnv;
 			else
 				nvlist_free(hdrnv);
 			if (err) {
 				fsavl_destroy(fsavl);
 				nvlist_free(fss);
 				goto stderr_out;
 			}
 		}
 
 		if (!flags->dryrun) {
 			/* write first begin record */
 			drr.drr_type = DRR_BEGIN;
 			drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
 			DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
 			    drr_versioninfo, DMU_COMPOUNDSTREAM);
 			DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
 			    drr_versioninfo, featureflags);
 			(void) snprintf(drr.drr_u.drr_begin.drr_toname,
 			    sizeof (drr.drr_u.drr_begin.drr_toname),
 			    "%s@%s", zhp->zfs_name, tosnap);
 			drr.drr_payloadlen = buflen;
 			err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
 
 			/* write header nvlist */
 			if (err != -1 && packbuf != NULL) {
 				err = cksum_and_write(packbuf, buflen, &zc,
 				    outfd);
 			}
 			free(packbuf);
 			if (err == -1) {
 				fsavl_destroy(fsavl);
 				nvlist_free(fss);
 				err = errno;
 				goto stderr_out;
 			}
 
 			/* write end record */
 			bzero(&drr, sizeof (drr));
 			drr.drr_type = DRR_END;
 			drr.drr_u.drr_end.drr_checksum = zc;
 			err = write(outfd, &drr, sizeof (drr));
 			if (err == -1) {
 				fsavl_destroy(fsavl);
 				nvlist_free(fss);
 				err = errno;
 				goto stderr_out;
 			}
 
 			err = 0;
 		}
 	}
 
 	/* dump each stream */
 	sdd.fromsnap = fromsnap;
 	sdd.tosnap = tosnap;
 	if (flags->dedup)
 		sdd.outfd = pipefd[0];
 	else
 		sdd.outfd = outfd;
 	sdd.replicate = flags->replicate;
 	sdd.doall = flags->doall;
 	sdd.fromorigin = flags->fromorigin;
 	sdd.fss = fss;
 	sdd.fsavl = fsavl;
 	sdd.verbose = flags->verbose;
 	sdd.parsable = flags->parsable;
 	sdd.progress = flags->progress;
 	sdd.dryrun = flags->dryrun;
 	sdd.filter_cb = filter_func;
 	sdd.filter_cb_arg = cb_arg;
 	if (debugnvp)
 		sdd.debugnv = *debugnvp;
 
 	/*
 	 * Some flags require that we place user holds on the datasets that are
 	 * being sent so they don't get destroyed during the send. We can skip
 	 * this step if the pool is imported read-only since the datasets cannot
 	 * be destroyed.
 	 */
 	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
 	    ZPOOL_PROP_READONLY, NULL) &&
 	    zfs_spa_version(zhp, &spa_version) == 0 &&
 	    spa_version >= SPA_VERSION_USERREFS &&
 	    (flags->doall || flags->replicate)) {
 		++holdseq;
 		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
 		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
 		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR);
 		if (sdd.cleanup_fd < 0) {
 			err = errno;
 			goto stderr_out;
 		}
 	} else {
 		sdd.cleanup_fd = -1;
 	}
 	if (flags->verbose) {
 		/*
 		 * Do a verbose no-op dry run to get all the verbose output
 		 * before generating any data.  Then do a non-verbose real
 		 * run to generate the streams.
 		 */
 		sdd.dryrun = B_TRUE;
 		err = dump_filesystems(zhp, &sdd);
 		sdd.dryrun = flags->dryrun;
 		sdd.verbose = B_FALSE;
 		if (flags->parsable) {
 			(void) fprintf(stderr, "size\t%llu\n",
 			    (longlong_t)sdd.size);
 		} else {
 			char buf[16];
 			zfs_nicenum(sdd.size, buf, sizeof (buf));
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 			    "total estimated size is %s\n"), buf);
 		}
 	}
 	err = dump_filesystems(zhp, &sdd);
 	fsavl_destroy(fsavl);
 	nvlist_free(fss);
 
 	if (flags->dedup) {
 		(void) close(pipefd[0]);
 		(void) pthread_join(tid, NULL);
 	}
 
 	if (sdd.cleanup_fd != -1) {
 		VERIFY(0 == close(sdd.cleanup_fd));
 		sdd.cleanup_fd = -1;
 	}
 
 	if (!flags->dryrun && (flags->replicate || flags->doall ||
 	    flags->props)) {
 		/*
 		 * write final end record.  NB: want to do this even if
 		 * there was some error, because it might not be totally
 		 * failed.
 		 */
 		dmu_replay_record_t drr = { 0 };
 		drr.drr_type = DRR_END;
 		if (write(outfd, &drr, sizeof (drr)) == -1) {
 			return (zfs_standard_error(zhp->zfs_hdl,
 			    errno, errbuf));
 		}
 	}
 
 	return (err || sdd.err);
 
 stderr_out:
 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
 err_out:
 	if (sdd.cleanup_fd != -1)
 		VERIFY(0 == close(sdd.cleanup_fd));
 	if (flags->dedup) {
 		(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
 		(void) close(pipefd[0]);
 	}
 	return (err);
 }
 
 /*
  * Routines specific to "zfs recv"
  */
 
 static int
 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
     boolean_t byteswap, zio_cksum_t *zc)
 {
 	char *cp = buf;
 	int rv;
 	int len = ilen;
 
 	do {
 		rv = read(fd, cp, len);
 		cp += rv;
 		len -= rv;
 	} while (rv > 0);
 
 	if (rv < 0 || len != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "failed to read from stream"));
 		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
 		    "cannot receive")));
 	}
 
 	if (zc) {
 		if (byteswap)
 			fletcher_4_incremental_byteswap(buf, ilen, zc);
 		else
 			fletcher_4_incremental_native(buf, ilen, zc);
 	}
 	return (0);
 }
 
 static int
 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
     boolean_t byteswap, zio_cksum_t *zc)
 {
 	char *buf;
 	int err;
 
 	buf = zfs_alloc(hdl, len);
 	if (buf == NULL)
 		return (ENOMEM);
 
 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
 	if (err != 0) {
 		free(buf);
 		return (err);
 	}
 
 	err = nvlist_unpack(buf, len, nvp, 0);
 	free(buf);
 	if (err != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 		    "stream (malformed nvlist)"));
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static int
 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
     int baselen, char *newname, recvflags_t *flags)
 {
 	static int seq;
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int err;
 	prop_changelist_t *clp;
 	zfs_handle_t *zhp;
 
 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
 	if (zhp == NULL)
 		return (-1);
 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
 	    flags->force ? MS_FORCE : 0);
 	zfs_close(zhp);
 	if (clp == NULL)
 		return (-1);
 	err = changelist_prefix(clp);
 	if (err)
 		return (err);
 
 	zc.zc_objset_type = DMU_OST_ZFS;
 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
 
 	if (tryname) {
 		(void) strcpy(newname, tryname);
 
 		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
 
 		if (flags->verbose) {
 			(void) printf("attempting rename %s to %s\n",
 			    zc.zc_name, zc.zc_value);
 		}
 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
 		if (err == 0)
 			changelist_rename(clp, name, tryname);
 	} else {
 		err = ENOENT;
 	}
 
 	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
 		seq++;
 
 		(void) strncpy(newname, name, baselen);
 		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
 		    "recv-%ld-%u", (long) getpid(), seq);
 		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
 
 		if (flags->verbose) {
 			(void) printf("failed - trying rename %s to %s\n",
 			    zc.zc_name, zc.zc_value);
 		}
 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
 		if (err == 0)
 			changelist_rename(clp, name, newname);
 		if (err && flags->verbose) {
 			(void) printf("failed (%u) - "
 			    "will try again on next pass\n", errno);
 		}
 		err = EAGAIN;
 	} else if (flags->verbose) {
 		if (err == 0)
 			(void) printf("success\n");
 		else
 			(void) printf("failed (%u)\n", errno);
 	}
 
 	(void) changelist_postfix(clp);
 	changelist_free(clp);
 
 	return (err);
 }
 
 static int
 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
     char *newname, recvflags_t *flags)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	int err = 0;
 	prop_changelist_t *clp;
 	zfs_handle_t *zhp;
 	boolean_t defer = B_FALSE;
 	int spa_version;
 
 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
 	if (zhp == NULL)
 		return (-1);
 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
 	    flags->force ? MS_FORCE : 0);
 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
 	    zfs_spa_version(zhp, &spa_version) == 0 &&
 	    spa_version >= SPA_VERSION_USERREFS)
 		defer = B_TRUE;
 	zfs_close(zhp);
 	if (clp == NULL)
 		return (-1);
 	err = changelist_prefix(clp);
 	if (err)
 		return (err);
 
 	zc.zc_objset_type = DMU_OST_ZFS;
 	zc.zc_defer_destroy = defer;
 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
 
 	if (flags->verbose)
 		(void) printf("attempting destroy %s\n", zc.zc_name);
 	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
 	if (err == 0) {
 		if (flags->verbose)
 			(void) printf("success\n");
 		changelist_remove(clp, zc.zc_name);
 	}
 
 	(void) changelist_postfix(clp);
 	changelist_free(clp);
 
 	/*
 	 * Deferred destroy might destroy the snapshot or only mark it to be
 	 * destroyed later, and it returns success in either case.
 	 */
 	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
 	    ZFS_TYPE_SNAPSHOT))) {
 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
 	}
 
 	return (err);
 }
 
 typedef struct guid_to_name_data {
 	uint64_t guid;
 	char *name;
 	char *skip;
 } guid_to_name_data_t;
 
 static int
 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
 {
 	guid_to_name_data_t *gtnd = arg;
 	int err;
 
 	if (gtnd->skip != NULL &&
 	    strcmp(zhp->zfs_name, gtnd->skip) == 0) {
 		return (0);
 	}
 
 	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
 		(void) strcpy(gtnd->name, zhp->zfs_name);
 		zfs_close(zhp);
 		return (EEXIST);
 	}
 
 	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
 	zfs_close(zhp);
 	return (err);
 }
 
 /*
  * Attempt to find the local dataset associated with this guid.  In the case of
  * multiple matches, we attempt to find the "best" match by searching
  * progressively larger portions of the hierarchy.  This allows one to send a
  * tree of datasets individually and guarantee that we will find the source
  * guid within that hierarchy, even if there are multiple matches elsewhere.
  */
 static int
 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
     char *name)
 {
 	/* exhaustive search all local snapshots */
 	char pname[ZFS_MAXNAMELEN];
 	guid_to_name_data_t gtnd;
 	int err = 0;
 	zfs_handle_t *zhp;
 	char *cp;
 
 	gtnd.guid = guid;
 	gtnd.name = name;
 	gtnd.skip = NULL;
 
 	(void) strlcpy(pname, parent, sizeof (pname));
 
 	/*
 	 * Search progressively larger portions of the hierarchy.  This will
 	 * select the "most local" version of the origin snapshot in the case
 	 * that there are multiple matching snapshots in the system.
 	 */
 	while ((cp = strrchr(pname, '/')) != NULL) {
 
 		/* Chop off the last component and open the parent */
 		*cp = '\0';
 		zhp = make_dataset_handle(hdl, pname);
 
 		if (zhp == NULL)
 			continue;
 
 		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
 		zfs_close(zhp);
 		if (err == EEXIST)
 			return (0);
 
 		/*
 		 * Remember the dataset that we already searched, so we
 		 * skip it next time through.
 		 */
 		gtnd.skip = pname;
 	}
 
 	return (ENOENT);
 }
 
 /*
  * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
  * guid1 is after guid2.
  */
 static int
 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
     uint64_t guid1, uint64_t guid2)
 {
 	nvlist_t *nvfs;
 	char *fsname, *snapname;
 	char buf[ZFS_MAXNAMELEN];
 	int rv;
 	zfs_handle_t *guid1hdl, *guid2hdl;
 	uint64_t create1, create2;
 
 	if (guid2 == 0)
 		return (0);
 	if (guid1 == 0)
 		return (1);
 
 	nvfs = fsavl_find(avl, guid1, &snapname);
 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
 	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
 	if (guid1hdl == NULL)
 		return (-1);
 
 	nvfs = fsavl_find(avl, guid2, &snapname);
 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
 	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
 	if (guid2hdl == NULL) {
 		zfs_close(guid1hdl);
 		return (-1);
 	}
 
 	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
 	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
 
 	if (create1 < create2)
 		rv = -1;
 	else if (create1 > create2)
 		rv = +1;
 	else
 		rv = 0;
 
 	zfs_close(guid1hdl);
 	zfs_close(guid2hdl);
 
 	return (rv);
 }
 
 static int
 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
     recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
     nvlist_t *renamed)
 {
 	nvlist_t *local_nv;
 	avl_tree_t *local_avl;
 	nvpair_t *fselem, *nextfselem;
 	char *fromsnap;
 	char newname[ZFS_MAXNAMELEN];
 	int error;
 	boolean_t needagain, progress, recursive;
 	char *s1, *s2;
 
 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
 
 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
 	    ENOENT);
 
 	if (flags->dryrun)
 		return (0);
 
 again:
 	needagain = progress = B_FALSE;
 
 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
 	    recursive, &local_nv, &local_avl)) != 0)
 		return (error);
 
 	/*
 	 * Process deletes and renames
 	 */
 	for (fselem = nvlist_next_nvpair(local_nv, NULL);
 	    fselem; fselem = nextfselem) {
 		nvlist_t *nvfs, *snaps;
 		nvlist_t *stream_nvfs = NULL;
 		nvpair_t *snapelem, *nextsnapelem;
 		uint64_t fromguid = 0;
 		uint64_t originguid = 0;
 		uint64_t stream_originguid = 0;
 		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
 		char *fsname, *stream_fsname;
 
 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
 
 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
 		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
 		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
 		    &parent_fromsnap_guid));
 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
 
 		/*
 		 * First find the stream's fs, so we can check for
 		 * a different origin (due to "zfs promote")
 		 */
 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
 			uint64_t thisguid;
 
 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
 
 			if (stream_nvfs != NULL)
 				break;
 		}
 
 		/* check for promote */
 		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
 		    &stream_originguid);
 		if (stream_nvfs && originguid != stream_originguid) {
 			switch (created_before(hdl, local_avl,
 			    stream_originguid, originguid)) {
 			case 1: {
 				/* promote it! */
-				zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+				zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 				nvlist_t *origin_nvfs;
 				char *origin_fsname;
 
 				if (flags->verbose)
 					(void) printf("promoting %s\n", fsname);
 
 				origin_nvfs = fsavl_find(local_avl, originguid,
 				    NULL);
 				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
 				    "name", &origin_fsname));
 				(void) strlcpy(zc.zc_value, origin_fsname,
 				    sizeof (zc.zc_value));
 				(void) strlcpy(zc.zc_name, fsname,
 				    sizeof (zc.zc_name));
 				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
 				if (error == 0)
 					progress = B_TRUE;
 				break;
 			}
 			default:
 				break;
 			case -1:
 				fsavl_destroy(local_avl);
 				nvlist_free(local_nv);
 				return (-1);
 			}
 			/*
 			 * We had/have the wrong origin, therefore our
 			 * list of snapshots is wrong.  Need to handle
 			 * them on the next pass.
 			 */
 			needagain = B_TRUE;
 			continue;
 		}
 
 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
 		    snapelem; snapelem = nextsnapelem) {
 			uint64_t thisguid;
 			char *stream_snapname;
 			nvlist_t *found, *props;
 
 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
 
 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
 			found = fsavl_find(stream_avl, thisguid,
 			    &stream_snapname);
 
 			/* check for delete */
 			if (found == NULL) {
 				char name[ZFS_MAXNAMELEN];
 
 				if (!flags->force)
 					continue;
 
 				(void) snprintf(name, sizeof (name), "%s@%s",
 				    fsname, nvpair_name(snapelem));
 
 				error = recv_destroy(hdl, name,
 				    strlen(fsname)+1, newname, flags);
 				if (error)
 					needagain = B_TRUE;
 				else
 					progress = B_TRUE;
 				continue;
 			}
 
 			stream_nvfs = found;
 
 			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
 			    &props) && 0 == nvlist_lookup_nvlist(props,
 			    stream_snapname, &props)) {
-				zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+				zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 				zc.zc_cookie = B_TRUE; /* received */
 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
 				    "%s@%s", fsname, nvpair_name(snapelem));
 				if (zcmd_write_src_nvlist(hdl, &zc,
 				    props) == 0) {
 					(void) zfs_ioctl(hdl,
 					    ZFS_IOC_SET_PROP, &zc);
 					zcmd_free_nvlists(&zc);
 				}
 			}
 
 			/* check for different snapname */
 			if (strcmp(nvpair_name(snapelem),
 			    stream_snapname) != 0) {
 				char name[ZFS_MAXNAMELEN];
 				char tryname[ZFS_MAXNAMELEN];
 
 				(void) snprintf(name, sizeof (name), "%s@%s",
 				    fsname, nvpair_name(snapelem));
 				(void) snprintf(tryname, sizeof (name), "%s@%s",
 				    fsname, stream_snapname);
 
 				error = recv_rename(hdl, name, tryname,
 				    strlen(fsname)+1, newname, flags);
 				if (error)
 					needagain = B_TRUE;
 				else
 					progress = B_TRUE;
 			}
 
 			if (strcmp(stream_snapname, fromsnap) == 0)
 				fromguid = thisguid;
 		}
 
 		/* check for delete */
 		if (stream_nvfs == NULL) {
 			if (!flags->force)
 				continue;
 
 			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
 			    newname, flags);
 			if (error)
 				needagain = B_TRUE;
 			else
 				progress = B_TRUE;
 			continue;
 		}
 
 		if (fromguid == 0) {
 			if (flags->verbose) {
 				(void) printf("local fs %s does not have "
 				    "fromsnap (%s in stream); must have "
 				    "been deleted locally; ignoring\n",
 				    fsname, fromsnap);
 			}
 			continue;
 		}
 
 		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
 		    "name", &stream_fsname));
 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
 		    "parentfromsnap", &stream_parent_fromsnap_guid));
 
 		s1 = strrchr(fsname, '/');
 		s2 = strrchr(stream_fsname, '/');
 
 		/*
 		 * Check for rename. If the exact receive path is specified, it
 		 * does not count as a rename, but we still need to check the
 		 * datasets beneath it.
 		 */
 		if ((stream_parent_fromsnap_guid != 0 &&
 		    parent_fromsnap_guid != 0 &&
 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
 		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
 		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
 			nvlist_t *parent;
 			char tryname[ZFS_MAXNAMELEN];
 
 			parent = fsavl_find(local_avl,
 			    stream_parent_fromsnap_guid, NULL);
 			/*
 			 * NB: parent might not be found if we used the
 			 * tosnap for stream_parent_fromsnap_guid,
 			 * because the parent is a newly-created fs;
 			 * we'll be able to rename it after we recv the
 			 * new fs.
 			 */
 			if (parent != NULL) {
 				char *pname;
 
 				VERIFY(0 == nvlist_lookup_string(parent, "name",
 				    &pname));
 				(void) snprintf(tryname, sizeof (tryname),
 				    "%s%s", pname, strrchr(stream_fsname, '/'));
 			} else {
 				tryname[0] = '\0';
 				if (flags->verbose) {
 					(void) printf("local fs %s new parent "
 					    "not found\n", fsname);
 				}
 			}
 
 			newname[0] = '\0';
 
 			error = recv_rename(hdl, fsname, tryname,
 			    strlen(tofs)+1, newname, flags);
 
 			if (renamed != NULL && newname[0] != '\0') {
 				VERIFY(0 == nvlist_add_boolean(renamed,
 				    newname));
 			}
 
 			if (error)
 				needagain = B_TRUE;
 			else
 				progress = B_TRUE;
 		}
 	}
 
 	fsavl_destroy(local_avl);
 	nvlist_free(local_nv);
 
 	if (needagain && progress) {
 		/* do another pass to fix up temporary names */
 		if (flags->verbose)
 			(void) printf("another pass:\n");
 		goto again;
 	}
 
 	return (needagain);
 }
 
 static int
 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
     recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
 {
 	nvlist_t *stream_nv = NULL;
 	avl_tree_t *stream_avl = NULL;
 	char *fromsnap = NULL;
 	char *cp;
 	char tofs[ZFS_MAXNAMELEN];
 	char sendfs[ZFS_MAXNAMELEN];
 	char errbuf[1024];
 	dmu_replay_record_t drre;
 	int error;
 	boolean_t anyerr = B_FALSE;
 	boolean_t softerr = B_FALSE;
 	boolean_t recursive;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
 	assert(drr->drr_type == DRR_BEGIN);
 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
 	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
 	    DMU_COMPOUNDSTREAM);
 
 	/*
 	 * Read in the nvlist from the stream.
 	 */
 	if (drr->drr_payloadlen != 0) {
 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
 		    &stream_nv, flags->byteswap, zc);
 		if (error) {
 			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			goto out;
 		}
 	}
 
 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
 	    ENOENT);
 
 	if (recursive && strchr(destname, '@')) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "cannot specify snapshot name for multi-snapshot stream"));
 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 		goto out;
 	}
 
 	/*
 	 * Read in the end record and verify checksum.
 	 */
 	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
 	    flags->byteswap, NULL)))
 		goto out;
 	if (flags->byteswap) {
 		drre.drr_type = BSWAP_32(drre.drr_type);
 		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
 		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
 		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
 		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
 	}
 	if (drre.drr_type != DRR_END) {
 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 		goto out;
 	}
 	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "incorrect header checksum"));
 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 		goto out;
 	}
 
 	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
 
 	if (drr->drr_payloadlen != 0) {
 		nvlist_t *stream_fss;
 
 		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
 		    &stream_fss));
 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "couldn't allocate avl tree"));
 			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
 			goto out;
 		}
 
 		if (fromsnap != NULL) {
 			nvlist_t *renamed = NULL;
 			nvpair_t *pair = NULL;
 
 			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
 			if (flags->isprefix) {
 				struct drr_begin *drrb = &drr->drr_u.drr_begin;
 				int i;
 
 				if (flags->istail) {
 					cp = strrchr(drrb->drr_toname, '/');
 					if (cp == NULL) {
 						(void) strlcat(tofs, "/",
 						    ZFS_MAXNAMELEN);
 						i = 0;
 					} else {
 						i = (cp - drrb->drr_toname);
 					}
 				} else {
 					i = strcspn(drrb->drr_toname, "/@");
 				}
 				/* zfs_receive_one() will create_parents() */
 				(void) strlcat(tofs, &drrb->drr_toname[i],
 				    ZFS_MAXNAMELEN);
 				*strchr(tofs, '@') = '\0';
 			}
 
 			if (recursive && !flags->dryrun && !flags->nomount) {
 				VERIFY(0 == nvlist_alloc(&renamed,
 				    NV_UNIQUE_NAME, 0));
 			}
 
 			softerr = recv_incremental_replication(hdl, tofs, flags,
 			    stream_nv, stream_avl, renamed);
 
 			/* Unmount renamed filesystems before receiving. */
 			while ((pair = nvlist_next_nvpair(renamed,
 			    pair)) != NULL) {
 				zfs_handle_t *zhp;
 				prop_changelist_t *clp = NULL;
 
 				zhp = zfs_open(hdl, nvpair_name(pair),
 				    ZFS_TYPE_FILESYSTEM);
 				if (zhp != NULL) {
 					clp = changelist_gather(zhp,
 					    ZFS_PROP_MOUNTPOINT, 0, 0);
 					zfs_close(zhp);
 					if (clp != NULL) {
 						softerr |=
 						    changelist_prefix(clp);
 						changelist_free(clp);
 					}
 				}
 			}
 
 			nvlist_free(renamed);
 		}
 	}
 
 	/*
 	 * Get the fs specified by the first path in the stream (the top level
 	 * specified by 'zfs send') and pass it to each invocation of
 	 * zfs_receive_one().
 	 */
 	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
 	    ZFS_MAXNAMELEN);
 	if ((cp = strchr(sendfs, '@')) != NULL)
 		*cp = '\0';
 
 	/* Finally, receive each contained stream */
 	do {
 		/*
 		 * we should figure out if it has a recoverable
 		 * error, in which case do a recv_skip() and drive on.
 		 * Note, if we fail due to already having this guid,
 		 * zfs_receive_one() will take care of it (ie,
 		 * recv_skip() and return 0).
 		 */
 		error = zfs_receive_impl(hdl, destname, flags, fd,
 		    sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
 		    action_handlep);
 		if (error == ENODATA) {
 			error = 0;
 			break;
 		}
 		anyerr |= error;
 	} while (error == 0);
 
 	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
 		/*
 		 * Now that we have the fs's they sent us, try the
 		 * renames again.
 		 */
 		softerr = recv_incremental_replication(hdl, tofs, flags,
 		    stream_nv, stream_avl, NULL);
 	}
 
 out:
 	fsavl_destroy(stream_avl);
 	if (stream_nv)
 		nvlist_free(stream_nv);
 	if (softerr)
 		error = -2;
 	if (anyerr)
 		error = -1;
 	return (error);
 }
 
 static void
 trunc_prop_errs(int truncated)
 {
 	ASSERT(truncated != 0);
 
 	if (truncated == 1)
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "1 more property could not be set\n"));
 	else
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
 		    "%d more properties could not be set\n"), truncated);
 }
 
 static int
 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
 {
 	dmu_replay_record_t *drr;
 	void *buf = malloc(1<<20);
 	char errbuf[1024];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive:"));
 
 	/* XXX would be great to use lseek if possible... */
 	drr = buf;
 
 	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
 	    byteswap, NULL) == 0) {
 		if (byteswap)
 			drr->drr_type = BSWAP_32(drr->drr_type);
 
 		switch (drr->drr_type) {
 		case DRR_BEGIN:
 			/* NB: not to be used on v2 stream packages */
 			if (drr->drr_payloadlen != 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "invalid substream header"));
 				return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 			}
 			break;
 
 		case DRR_END:
 			free(buf);
 			return (0);
 
 		case DRR_OBJECT:
 			if (byteswap) {
 				drr->drr_u.drr_object.drr_bonuslen =
 				    BSWAP_32(drr->drr_u.drr_object.
 				    drr_bonuslen);
 			}
 			(void) recv_read(hdl, fd, buf,
 			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
 			    B_FALSE, NULL);
 			break;
 
 		case DRR_WRITE:
 			if (byteswap) {
 				drr->drr_u.drr_write.drr_length =
 				    BSWAP_64(drr->drr_u.drr_write.drr_length);
 			}
 			(void) recv_read(hdl, fd, buf,
 			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
 			break;
 		case DRR_SPILL:
 			if (byteswap) {
 				drr->drr_u.drr_write.drr_length =
 				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
 			}
 			(void) recv_read(hdl, fd, buf,
 			    drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
 			break;
 		case DRR_WRITE_BYREF:
 		case DRR_FREEOBJECTS:
 		case DRR_FREE:
 			break;
 
 		default:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid record type"));
 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 		}
 	}
 
 	free(buf);
 	return (-1);
 }
 
 /*
  * Restores a backup of tosnap from the file descriptor specified by infd.
  */
 static int
 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
     recvflags_t *flags, dmu_replay_record_t *drr,
     dmu_replay_record_t *drr_noswap, const char *sendfs,
     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
     uint64_t *action_handlep)
 {
-	zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 	time_t begin_time;
 	int ioctl_err, ioctl_errno, err;
 	char *cp;
 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
 	char errbuf[1024];
 	char prop_errbuf[1024];
 	const char *chopprefix;
 	boolean_t newfs = B_FALSE;
 	boolean_t stream_wantsnewfs;
 	uint64_t parent_snapguid = 0;
 	prop_changelist_t *clp = NULL;
 	nvlist_t *snapprops_nvlist = NULL;
 	zprop_errflags_t prop_errflags;
 	boolean_t recursive;
 
 	begin_time = time(NULL);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
 	    ENOENT);
 
 	if (stream_avl != NULL) {
 		char *snapname;
 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
 		    &snapname);
 		nvlist_t *props;
 		int ret;
 
 		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
 		    &parent_snapguid);
 		err = nvlist_lookup_nvlist(fs, "props", &props);
 		if (err)
 			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
 
 		if (flags->canmountoff) {
 			VERIFY(0 == nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
 		}
 		ret = zcmd_write_src_nvlist(hdl, &zc, props);
 		if (err)
 			nvlist_free(props);
 
 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
 			VERIFY(0 == nvlist_lookup_nvlist(props,
 			    snapname, &snapprops_nvlist));
 		}
 
 		if (ret != 0)
 			return (-1);
 	}
 
 	cp = NULL;
 
 	/*
 	 * Determine how much of the snapshot name stored in the stream
 	 * we are going to tack on to the name they specified on the
 	 * command line, and how much we are going to chop off.
 	 *
 	 * If they specified a snapshot, chop the entire name stored in
 	 * the stream.
 	 */
 	if (flags->istail) {
 		/*
 		 * A filesystem was specified with -e. We want to tack on only
 		 * the tail of the sent snapshot path.
 		 */
 		if (strchr(tosnap, '@')) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 			    "argument - snapshot not allowed with -e"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
 
 		chopprefix = strrchr(sendfs, '/');
 
 		if (chopprefix == NULL) {
 			/*
 			 * The tail is the poolname, so we need to
 			 * prepend a path separator.
 			 */
 			int len = strlen(drrb->drr_toname);
 			cp = malloc(len + 2);
 			cp[0] = '/';
 			(void) strcpy(&cp[1], drrb->drr_toname);
 			chopprefix = cp;
 		} else {
 			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
 		}
 	} else if (flags->isprefix) {
 		/*
 		 * A filesystem was specified with -d. We want to tack on
 		 * everything but the first element of the sent snapshot path
 		 * (all but the pool name).
 		 */
 		if (strchr(tosnap, '@')) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 			    "argument - snapshot not allowed with -d"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
 
 		chopprefix = strchr(drrb->drr_toname, '/');
 		if (chopprefix == NULL)
 			chopprefix = strchr(drrb->drr_toname, '@');
 	} else if (strchr(tosnap, '@') == NULL) {
 		/*
 		 * If a filesystem was specified without -d or -e, we want to
 		 * tack on everything after the fs specified by 'zfs send'.
 		 */
 		chopprefix = drrb->drr_toname + strlen(sendfs);
 	} else {
 		/* A snapshot was specified as an exact path (no -d or -e). */
 		if (recursive) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cannot specify snapshot name for multi-snapshot "
 			    "stream"));
 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 		}
 		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
 	}
 
 	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
 	ASSERT(chopprefix > drrb->drr_toname);
 	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
 	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
 	    chopprefix[0] == '\0');
 
 	/*
 	 * Determine name of destination snapshot, store in zc_value.
 	 */
 	(void) strcpy(zc.zc_top_ds, tosnap);
 	(void) strcpy(zc.zc_value, tosnap);
 	(void) strlcat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
 	free(cp);
 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
 		zcmd_free_nvlists(&zc);
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	}
 
 	/*
 	 * Determine the name of the origin snapshot, store in zc_string.
 	 */
 	if (drrb->drr_flags & DRR_FLAG_CLONE) {
 		if (guid_to_name(hdl, zc.zc_value,
 		    drrb->drr_fromguid, zc.zc_string) != 0) {
 			zcmd_free_nvlists(&zc);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "local origin for clone %s does not exist"),
 			    zc.zc_value);
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 		}
 		if (flags->verbose)
 			(void) printf("found clone origin %s\n", zc.zc_string);
 	}
 
 	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
 	    (drrb->drr_flags & DRR_FLAG_CLONE));
 
 	if (stream_wantsnewfs) {
 		/*
 		 * if the parent fs does not exist, look for it based on
 		 * the parent snap GUID
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot receive new filesystem stream"));
 
 		(void) strcpy(zc.zc_name, zc.zc_value);
 		cp = strrchr(zc.zc_name, '/');
 		if (cp)
 			*cp = '\0';
 		if (cp &&
 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
 			char suffix[ZFS_MAXNAMELEN];
 			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
 			if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
 			    zc.zc_value) == 0) {
 				*strchr(zc.zc_value, '@') = '\0';
 				(void) strcat(zc.zc_value, suffix);
 			}
 		}
 	} else {
 		/*
 		 * if the fs does not exist, look for it based on the
 		 * fromsnap GUID
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot receive incremental stream"));
 
 		(void) strcpy(zc.zc_name, zc.zc_value);
 		*strchr(zc.zc_name, '@') = '\0';
 
 		/*
 		 * If the exact receive path was specified and this is the
 		 * topmost path in the stream, then if the fs does not exist we
 		 * should look no further.
 		 */
 		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
 		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
 			char snap[ZFS_MAXNAMELEN];
 			(void) strcpy(snap, strchr(zc.zc_value, '@'));
 			if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
 			    zc.zc_value) == 0) {
 				*strchr(zc.zc_value, '@') = '\0';
 				(void) strcat(zc.zc_value, snap);
 			}
 		}
 	}
 
 	(void) strcpy(zc.zc_name, zc.zc_value);
 	*strchr(zc.zc_name, '@') = '\0';
 
 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
 		zfs_handle_t *zhp;
 
 		/*
 		 * Destination fs exists.  Therefore this should either
 		 * be an incremental, or the stream specifies a new fs
 		 * (full stream or clone) and they want us to blow it
 		 * away (and have therefore specified -F and removed any
 		 * snapshots).
 		 */
 		if (stream_wantsnewfs) {
 			if (!flags->force) {
 				zcmd_free_nvlists(&zc);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "destination '%s' exists\n"
 				    "must specify -F to overwrite it"),
 				    zc.zc_name);
 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 			}
 			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
 			    &zc) == 0) {
 				zcmd_free_nvlists(&zc);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "destination has snapshots (eg. %s)\n"
 				    "must destroy them to overwrite it"),
 				    zc.zc_name);
 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 			}
 		}
 
 		if ((zhp = zfs_open(hdl, zc.zc_name,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 
 		if (stream_wantsnewfs &&
 		    zhp->zfs_dmustats.dds_origin[0]) {
 			zcmd_free_nvlists(&zc);
 			zfs_close(zhp);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination '%s' is a clone\n"
 			    "must destroy it to overwrite it"),
 			    zc.zc_name);
 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 		}
 
 		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
 		    stream_wantsnewfs) {
 			/* We can't do online recv in this case */
 			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
 			if (clp == NULL) {
 				zfs_close(zhp);
 				zcmd_free_nvlists(&zc);
 				return (-1);
 			}
 			if (changelist_prefix(clp) != 0) {
 				changelist_free(clp);
 				zfs_close(zhp);
 				zcmd_free_nvlists(&zc);
 				return (-1);
 			}
 		}
 		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
 		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
 			zfs_close(zhp);
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 		zfs_close(zhp);
 	} else {
 		/*
 		 * Destination filesystem does not exist.  Therefore we better
 		 * be creating a new filesystem (either from a full backup, or
 		 * a clone).  It would therefore be invalid if the user
 		 * specified only the pool name (i.e. if the destination name
 		 * contained no slash character).
 		 */
 		if (!stream_wantsnewfs ||
 		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
 			zcmd_free_nvlists(&zc);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination '%s' does not exist"), zc.zc_name);
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 		}
 
 		/*
 		 * Trim off the final dataset component so we perform the
 		 * recvbackup ioctl to the filesystems's parent.
 		 */
 		*cp = '\0';
 
 		if (flags->isprefix && !flags->istail && !flags->dryrun &&
 		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
 		}
 
 		newfs = B_TRUE;
 	}
 
 	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
 	zc.zc_cookie = infd;
 	zc.zc_guid = flags->force;
 	if (flags->verbose) {
 		(void) printf("%s %s stream of %s into %s\n",
 		    flags->dryrun ? "would receive" : "receiving",
 		    drrb->drr_fromguid ? "incremental" : "full",
 		    drrb->drr_toname, zc.zc_value);
 		(void) fflush(stdout);
 	}
 
 	if (flags->dryrun) {
 		zcmd_free_nvlists(&zc);
 		return (recv_skip(hdl, infd, flags->byteswap));
 	}
 
 	zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
 	zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
 	zc.zc_cleanup_fd = cleanup_fd;
 	zc.zc_action_handle = *action_handlep;
 
 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
 	ioctl_errno = errno;
 	prop_errflags = (zprop_errflags_t)zc.zc_obj;
 
 	if (err == 0) {
 		nvlist_t *prop_errors;
 		VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
 		    zc.zc_nvlist_dst_size, &prop_errors, 0));
 
 		nvpair_t *prop_err = NULL;
 
 		while ((prop_err = nvlist_next_nvpair(prop_errors,
 		    prop_err)) != NULL) {
 			char tbuf[1024];
 			zfs_prop_t prop;
 			int intval;
 
 			prop = zfs_name_to_prop(nvpair_name(prop_err));
 			(void) nvpair_value_int32(prop_err, &intval);
 			if (strcmp(nvpair_name(prop_err),
 			    ZPROP_N_MORE_ERRORS) == 0) {
 				trunc_prop_errs(intval);
 				break;
 			} else {
 				(void) snprintf(tbuf, sizeof (tbuf),
 				    dgettext(TEXT_DOMAIN,
 				    "cannot receive %s property on %s"),
 				    nvpair_name(prop_err), zc.zc_name);
 				zfs_setprop_error(hdl, prop, intval, tbuf);
 			}
 		}
 		nvlist_free(prop_errors);
 	}
 
 	zc.zc_nvlist_dst = 0;
 	zc.zc_nvlist_dst_size = 0;
 	zcmd_free_nvlists(&zc);
 
 	if (err == 0 && snapprops_nvlist) {
-		zfs_cmd_t zc2 = { "\0", "\0", "\0", "\0", 0 };
+		zfs_cmd_t zc2 = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
 
 		(void) strcpy(zc2.zc_name, zc.zc_value);
 		zc2.zc_cookie = B_TRUE; /* received */
 		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
 			zcmd_free_nvlists(&zc2);
 		}
 	}
 
 	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
 		/*
 		 * It may be that this snapshot already exists,
 		 * in which case we want to consume & ignore it
 		 * rather than failing.
 		 */
 		avl_tree_t *local_avl;
 		nvlist_t *local_nv, *fs;
 		cp = strchr(zc.zc_value, '@');
 
 		/*
 		 * XXX Do this faster by just iterating over snaps in
 		 * this fs.  Also if zc_value does not exist, we will
 		 * get a strange "does not exist" error message.
 		 */
 		*cp = '\0';
 		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
 		    &local_nv, &local_avl) == 0) {
 			*cp = '@';
 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
 			fsavl_destroy(local_avl);
 			nvlist_free(local_nv);
 
 			if (fs != NULL) {
 				if (flags->verbose) {
 					(void) printf("snap %s already exists; "
 					    "ignoring\n", zc.zc_value);
 				}
 				err = ioctl_err = recv_skip(hdl, infd,
 				    flags->byteswap);
 			}
 		}
 		*cp = '@';
 	}
 
 	if (ioctl_err != 0) {
 		switch (ioctl_errno) {
 		case ENODEV:
 			cp = strchr(zc.zc_value, '@');
 			*cp = '\0';
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "most recent snapshot of %s does not\n"
 			    "match incremental source"), zc.zc_value);
 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
 			*cp = '@';
 			break;
 		case ETXTBSY:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination %s has been modified\n"
 			    "since most recent snapshot"), zc.zc_name);
 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
 			break;
 		case EEXIST:
 			cp = strchr(zc.zc_value, '@');
 			if (newfs) {
 				/* it's the containing fs that exists */
 				*cp = '\0';
 			}
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination already exists"));
 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
 			    zc.zc_value);
 			*cp = '@';
 			break;
 		case EINVAL:
 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
 		case ECKSUM:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid stream (checksum mismatch)"));
 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
 			break;
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded to receive this stream."));
 			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EDQUOT:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "destination %s space quota exceeded"), zc.zc_name);
 			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
 			break;
 		default:
 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
 		}
 	}
 
 	/*
 	 * Mount the target filesystem (if created).  Also mount any
 	 * children of the target filesystem if we did a replication
 	 * receive (indicated by stream_avl being non-NULL).
 	 */
 	cp = strchr(zc.zc_value, '@');
 	if (cp && (ioctl_err == 0 || !newfs)) {
 		zfs_handle_t *h;
 
 		*cp = '\0';
 		h = zfs_open(hdl, zc.zc_value,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (h != NULL) {
 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
 				*cp = '@';
 				err = zvol_create_link(hdl, h->zfs_name);
 				if (err == 0 && ioctl_err == 0)
 					err = zvol_create_link(hdl,
 					    zc.zc_value);
 			} else if (newfs || stream_avl) {
 				/*
 				 * Track the first/top of hierarchy fs,
 				 * for mounting and sharing later.
 				 */
 				if (top_zfs && *top_zfs == NULL)
 					*top_zfs = zfs_strdup(hdl, zc.zc_value);
 			}
 			zfs_close(h);
 		}
 		*cp = '@';
 	}
 
 	if (clp) {
 		err |= changelist_postfix(clp);
 		changelist_free(clp);
 	}
 
 	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
 		    "failed to clear unreceived properties on %s"),
 		    zc.zc_name);
 		(void) fprintf(stderr, "\n");
 	}
 	if (prop_errflags & ZPROP_ERR_NORESTORE) {
 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
 		    "failed to restore original properties on %s"),
 		    zc.zc_name);
 		(void) fprintf(stderr, "\n");
 	}
 
 	if (err || ioctl_err)
 		return (-1);
 
 	*action_handlep = zc.zc_action_handle;
 
 	if (flags->verbose) {
 		char buf1[64];
 		char buf2[64];
 		uint64_t bytes = zc.zc_cookie;
 		time_t delta = time(NULL) - begin_time;
 		if (delta == 0)
 			delta = 1;
 		zfs_nicenum(bytes, buf1, sizeof (buf1));
 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
 
 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
 		    buf1, delta, buf2);
 	}
 
 	return (0);
 }
 
 static int
 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
     int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
     char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
 {
 	int err;
 	dmu_replay_record_t drr, drr_noswap;
 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
 	char errbuf[1024];
 	zio_cksum_t zcksum = { { 0 } };
 	uint64_t featureflags;
 	int hdrtype;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
 
 	if (flags->isprefix &&
 	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
 		    "(%s) does not exist"), tosnap);
 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
 	}
 
 	/* read in the BEGIN record */
 	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
 	    &zcksum)))
 		return (err);
 
 	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
 		/* It's the double end record at the end of a package */
 		return (ENODATA);
 	}
 
 	/* the kernel needs the non-byteswapped begin record */
 	drr_noswap = drr;
 
 	flags->byteswap = B_FALSE;
 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
 		/*
 		 * We computed the checksum in the wrong byteorder in
 		 * recv_read() above; do it again correctly.
 		 */
 		bzero(&zcksum, sizeof (zio_cksum_t));
 		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
 		flags->byteswap = B_TRUE;
 
 		drr.drr_type = BSWAP_32(drr.drr_type);
 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
 		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
 		drrb->drr_type = BSWAP_32(drrb->drr_type);
 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
 	}
 
 	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 		    "stream (bad magic number)"));
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
 
 	if (!DMU_STREAM_SUPPORTED(featureflags) ||
 	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "stream has unsupported feature, feature flags = %lx"),
 		    featureflags);
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	if (strchr(drrb->drr_toname, '@') == NULL) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
 		    "stream (bad snapshot name)"));
 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
 	}
 
 	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
 		char nonpackage_sendfs[ZFS_MAXNAMELEN];
 		if (sendfs == NULL) {
 			/*
 			 * We were not called from zfs_receive_package(). Get
 			 * the fs specified by 'zfs send'.
 			 */
 			char *cp;
 			(void) strlcpy(nonpackage_sendfs,
 			    drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
 			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
 				*cp = '\0';
 			sendfs = nonpackage_sendfs;
 		}
 		return (zfs_receive_one(hdl, infd, tosnap, flags,
 		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
 		    top_zfs, cleanup_fd, action_handlep));
 	} else {
 		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 		    DMU_COMPOUNDSTREAM);
 		return (zfs_receive_package(hdl, infd, tosnap, flags,
 		    &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
 	}
 }
 
 /*
  * Restores a backup of tosnap from the file descriptor specified by infd.
  * Return 0 on total success, -2 if some things couldn't be
  * destroyed/renamed/promoted, -1 if some things couldn't be received.
  * (-1 will override -2).
  */
 int
 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
     int infd, avl_tree_t *stream_avl)
 {
 	char *top_zfs = NULL;
 	int err;
 	int cleanup_fd;
 	uint64_t action_handle = 0;
 
 	cleanup_fd = open(ZFS_DEV, O_RDWR);
 	VERIFY(cleanup_fd >= 0);
 
 	err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
 	    stream_avl, &top_zfs, cleanup_fd, &action_handle);
 
 	VERIFY(0 == close(cleanup_fd));
 
 	if (err == 0 && !flags->nomount && top_zfs) {
 		zfs_handle_t *zhp;
 		prop_changelist_t *clp;
 
 		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
 		if (zhp != NULL) {
 			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
 			    CL_GATHER_MOUNT_ALWAYS, 0);
 			zfs_close(zhp);
 			if (clp != NULL) {
 				/* mount and share received datasets */
 				err = changelist_postfix(clp);
 				changelist_free(clp);
 			}
 		}
 		if (zhp == NULL || clp == NULL || err)
 			err = -1;
 	}
 	if (top_zfs)
 		free(top_zfs);
 
 	return (err);
 }
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c
index 5bb88e94677b..54dc2afc5b07 100644
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -1,1744 +1,1742 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
  * Internal utility routines for the ZFS library.
  */
 
 #include <errno.h>
 #include <fcntl.h>
 #include <libintl.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <ctype.h>
 #include <math.h>
 #include <sys/stat.h>
 #include <sys/mnttab.h>
 #include <sys/mntent.h>
 #include <sys/types.h>
 #include <wait.h>
 
 #include <libzfs.h>
+#include <libzfs_core.h>
 
 #include "libzfs_impl.h"
 #include "zfs_prop.h"
 #include "zfeature_common.h"
 
 int
 libzfs_errno(libzfs_handle_t *hdl)
 {
 	return (hdl->libzfs_error);
 }
 
 const char *
 libzfs_error_action(libzfs_handle_t *hdl)
 {
 	return (hdl->libzfs_action);
 }
 
 const char *
 libzfs_error_description(libzfs_handle_t *hdl)
 {
 	if (hdl->libzfs_desc[0] != '\0')
 		return (hdl->libzfs_desc);
 
 	switch (hdl->libzfs_error) {
 	case EZFS_NOMEM:
 		return (dgettext(TEXT_DOMAIN, "out of memory"));
 	case EZFS_BADPROP:
 		return (dgettext(TEXT_DOMAIN, "invalid property value"));
 	case EZFS_PROPREADONLY:
 		return (dgettext(TEXT_DOMAIN, "read-only property"));
 	case EZFS_PROPTYPE:
 		return (dgettext(TEXT_DOMAIN, "property doesn't apply to "
 		    "datasets of this type"));
 	case EZFS_PROPNONINHERIT:
 		return (dgettext(TEXT_DOMAIN, "property cannot be inherited"));
 	case EZFS_PROPSPACE:
 		return (dgettext(TEXT_DOMAIN, "invalid quota or reservation"));
 	case EZFS_BADTYPE:
 		return (dgettext(TEXT_DOMAIN, "operation not applicable to "
 		    "datasets of this type"));
 	case EZFS_BUSY:
 		return (dgettext(TEXT_DOMAIN, "pool or dataset is busy"));
 	case EZFS_EXISTS:
 		return (dgettext(TEXT_DOMAIN, "pool or dataset exists"));
 	case EZFS_NOENT:
 		return (dgettext(TEXT_DOMAIN, "no such pool or dataset"));
 	case EZFS_BADSTREAM:
 		return (dgettext(TEXT_DOMAIN, "invalid backup stream"));
 	case EZFS_DSREADONLY:
 		return (dgettext(TEXT_DOMAIN, "dataset is read-only"));
 	case EZFS_VOLTOOBIG:
 		return (dgettext(TEXT_DOMAIN, "volume size exceeds limit for "
 		    "this system"));
 	case EZFS_INVALIDNAME:
 		return (dgettext(TEXT_DOMAIN, "invalid name"));
 	case EZFS_BADRESTORE:
 		return (dgettext(TEXT_DOMAIN, "unable to restore to "
 		    "destination"));
 	case EZFS_BADBACKUP:
 		return (dgettext(TEXT_DOMAIN, "backup failed"));
 	case EZFS_BADTARGET:
 		return (dgettext(TEXT_DOMAIN, "invalid target vdev"));
 	case EZFS_NODEVICE:
 		return (dgettext(TEXT_DOMAIN, "no such device in pool"));
 	case EZFS_BADDEV:
 		return (dgettext(TEXT_DOMAIN, "invalid device"));
 	case EZFS_NOREPLICAS:
 		return (dgettext(TEXT_DOMAIN, "no valid replicas"));
 	case EZFS_RESILVERING:
 		return (dgettext(TEXT_DOMAIN, "currently resilvering"));
 	case EZFS_BADVERSION:
 		return (dgettext(TEXT_DOMAIN, "unsupported version or "
 		    "feature"));
 	case EZFS_POOLUNAVAIL:
 		return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
 	case EZFS_DEVOVERFLOW:
 		return (dgettext(TEXT_DOMAIN, "too many devices in one vdev"));
 	case EZFS_BADPATH:
 		return (dgettext(TEXT_DOMAIN, "must be an absolute path"));
 	case EZFS_CROSSTARGET:
 		return (dgettext(TEXT_DOMAIN, "operation crosses datasets or "
 		    "pools"));
 	case EZFS_ZONED:
 		return (dgettext(TEXT_DOMAIN, "dataset in use by local zone"));
 	case EZFS_MOUNTFAILED:
 		return (dgettext(TEXT_DOMAIN, "mount failed"));
 	case EZFS_UMOUNTFAILED:
 		return (dgettext(TEXT_DOMAIN, "umount failed"));
 	case EZFS_UNSHARENFSFAILED:
 		return (dgettext(TEXT_DOMAIN, "unshare(1M) failed"));
 	case EZFS_SHARENFSFAILED:
 		return (dgettext(TEXT_DOMAIN, "share(1M) failed"));
 	case EZFS_UNSHARESMBFAILED:
 		return (dgettext(TEXT_DOMAIN, "smb remove share failed"));
 	case EZFS_SHARESMBFAILED:
 		return (dgettext(TEXT_DOMAIN, "smb add share failed"));
 	case EZFS_PERM:
 		return (dgettext(TEXT_DOMAIN, "permission denied"));
 	case EZFS_NOSPC:
 		return (dgettext(TEXT_DOMAIN, "out of space"));
 	case EZFS_FAULT:
 		return (dgettext(TEXT_DOMAIN, "bad address"));
 	case EZFS_IO:
 		return (dgettext(TEXT_DOMAIN, "I/O error"));
 	case EZFS_INTR:
 		return (dgettext(TEXT_DOMAIN, "signal received"));
 	case EZFS_ISSPARE:
 		return (dgettext(TEXT_DOMAIN, "device is reserved as a hot "
 		    "spare"));
 	case EZFS_INVALCONFIG:
 		return (dgettext(TEXT_DOMAIN, "invalid vdev configuration"));
 	case EZFS_RECURSIVE:
 		return (dgettext(TEXT_DOMAIN, "recursive dataset dependency"));
 	case EZFS_NOHISTORY:
 		return (dgettext(TEXT_DOMAIN, "no history available"));
 	case EZFS_POOLPROPS:
 		return (dgettext(TEXT_DOMAIN, "failed to retrieve "
 		    "pool properties"));
 	case EZFS_POOL_NOTSUP:
 		return (dgettext(TEXT_DOMAIN, "operation not supported "
 		    "on this type of pool"));
 	case EZFS_POOL_INVALARG:
 		return (dgettext(TEXT_DOMAIN, "invalid argument for "
 		    "this pool operation"));
 	case EZFS_NAMETOOLONG:
 		return (dgettext(TEXT_DOMAIN, "dataset name is too long"));
 	case EZFS_OPENFAILED:
 		return (dgettext(TEXT_DOMAIN, "open failed"));
 	case EZFS_NOCAP:
 		return (dgettext(TEXT_DOMAIN,
 		    "disk capacity information could not be retrieved"));
 	case EZFS_LABELFAILED:
 		return (dgettext(TEXT_DOMAIN, "write of label failed"));
 	case EZFS_BADWHO:
 		return (dgettext(TEXT_DOMAIN, "invalid user/group"));
 	case EZFS_BADPERM:
 		return (dgettext(TEXT_DOMAIN, "invalid permission"));
 	case EZFS_BADPERMSET:
 		return (dgettext(TEXT_DOMAIN, "invalid permission set name"));
 	case EZFS_NODELEGATION:
 		return (dgettext(TEXT_DOMAIN, "delegated administration is "
 		    "disabled on pool"));
 	case EZFS_BADCACHE:
 		return (dgettext(TEXT_DOMAIN, "invalid or missing cache file"));
 	case EZFS_ISL2CACHE:
 		return (dgettext(TEXT_DOMAIN, "device is in use as a cache"));
 	case EZFS_VDEVNOTSUP:
 		return (dgettext(TEXT_DOMAIN, "vdev specification is not "
 		    "supported"));
 	case EZFS_NOTSUP:
 		return (dgettext(TEXT_DOMAIN, "operation not supported "
 		    "on this dataset"));
 	case EZFS_ACTIVE_SPARE:
 		return (dgettext(TEXT_DOMAIN, "pool has active shared spare "
 		    "device"));
 	case EZFS_UNPLAYED_LOGS:
 		return (dgettext(TEXT_DOMAIN, "log device has unplayed intent "
 		    "logs"));
 	case EZFS_REFTAG_RELE:
 		return (dgettext(TEXT_DOMAIN, "no such tag on this dataset"));
 	case EZFS_REFTAG_HOLD:
 		return (dgettext(TEXT_DOMAIN, "tag already exists on this "
 		    "dataset"));
 	case EZFS_TAGTOOLONG:
 		return (dgettext(TEXT_DOMAIN, "tag too long"));
 	case EZFS_PIPEFAILED:
 		return (dgettext(TEXT_DOMAIN, "pipe create failed"));
 	case EZFS_THREADCREATEFAILED:
 		return (dgettext(TEXT_DOMAIN, "thread create failed"));
 	case EZFS_POSTSPLIT_ONLINE:
 		return (dgettext(TEXT_DOMAIN, "disk was split from this pool "
 		    "into a new one"));
 	case EZFS_SCRUBBING:
 		return (dgettext(TEXT_DOMAIN, "currently scrubbing; "
 		    "use 'zpool scrub -s' to cancel current scrub"));
 	case EZFS_NO_SCRUB:
 		return (dgettext(TEXT_DOMAIN, "there is no active scrub"));
 	case EZFS_DIFF:
 		return (dgettext(TEXT_DOMAIN, "unable to generate diffs"));
 	case EZFS_DIFFDATA:
 		return (dgettext(TEXT_DOMAIN, "invalid diff data"));
 	case EZFS_POOLREADONLY:
 		return (dgettext(TEXT_DOMAIN, "pool is read-only"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:
 		assert(hdl->libzfs_error == 0);
 		return (dgettext(TEXT_DOMAIN, "no error"));
 	}
 }
 
 /*PRINTFLIKE2*/
 void
 zfs_error_aux(libzfs_handle_t *hdl, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 
 	(void) vsnprintf(hdl->libzfs_desc, sizeof (hdl->libzfs_desc),
 	    fmt, ap);
 	hdl->libzfs_desc_active = 1;
 
 	va_end(ap);
 }
 
 static void
 zfs_verror(libzfs_handle_t *hdl, int error, const char *fmt, va_list ap)
 {
 	(void) vsnprintf(hdl->libzfs_action, sizeof (hdl->libzfs_action),
 	    fmt, ap);
 	hdl->libzfs_error = error;
 
 	if (hdl->libzfs_desc_active)
 		hdl->libzfs_desc_active = 0;
 	else
 		hdl->libzfs_desc[0] = '\0';
 
 	if (hdl->libzfs_printerr) {
 		if (error == EZFS_UNKNOWN) {
 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "internal "
 			    "error: %s\n"), libzfs_error_description(hdl));
 			abort();
 		}
 
 		(void) fprintf(stderr, "%s: %s\n", hdl->libzfs_action,
 		    libzfs_error_description(hdl));
 		if (error == EZFS_NOMEM)
 			exit(1);
 	}
 }
 
 int
 zfs_error(libzfs_handle_t *hdl, int error, const char *msg)
 {
 	return (zfs_error_fmt(hdl, error, "%s", msg));
 }
 
 /*PRINTFLIKE3*/
 int
 zfs_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 
 	zfs_verror(hdl, error, fmt, ap);
 
 	va_end(ap);
 
 	return (-1);
 }
 
 static int
 zfs_common_error(libzfs_handle_t *hdl, int error, const char *fmt,
     va_list ap)
 {
 	switch (error) {
 	case EPERM:
 	case EACCES:
 		zfs_verror(hdl, EZFS_PERM, fmt, ap);
 		return (-1);
 
 	case ECANCELED:
 		zfs_verror(hdl, EZFS_NODELEGATION, fmt, ap);
 		return (-1);
 
 	case EIO:
 		zfs_verror(hdl, EZFS_IO, fmt, ap);
 		return (-1);
 
 	case EFAULT:
 		zfs_verror(hdl, EZFS_FAULT, fmt, ap);
 		return (-1);
 
 	case EINTR:
 		zfs_verror(hdl, EZFS_INTR, fmt, ap);
 		return (-1);
 	}
 
 	return (0);
 }
 
 int
 zfs_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
 {
 	return (zfs_standard_error_fmt(hdl, error, "%s", msg));
 }
 
 /*PRINTFLIKE3*/
 int
 zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 
 	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
 		va_end(ap);
 		return (-1);
 	}
 
 	switch (error) {
 	case ENXIO:
 	case ENODEV:
 	case EPIPE:
 		zfs_verror(hdl, EZFS_IO, fmt, ap);
 		break;
 
 	case ENOENT:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset does not exist"));
 		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
 		break;
 
 	case ENOSPC:
 	case EDQUOT:
 		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
 		return (-1);
 
 	case EEXIST:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset already exists"));
 		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
 		break;
 
 	case EBUSY:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset is busy"));
 		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
 		break;
 	case EROFS:
 		zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap);
 		break;
 	case ENAMETOOLONG:
 		zfs_verror(hdl, EZFS_NAMETOOLONG, fmt, ap);
 		break;
 	case ENOTSUP:
 		zfs_verror(hdl, EZFS_BADVERSION, fmt, ap);
 		break;
 	case EAGAIN:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "pool I/O is currently suspended"));
 		zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
 		break;
 	default:
 		zfs_error_aux(hdl, strerror(error));
 		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
 		break;
 	}
 
 	va_end(ap);
 	return (-1);
 }
 
 int
 zpool_standard_error(libzfs_handle_t *hdl, int error, const char *msg)
 {
 	return (zpool_standard_error_fmt(hdl, error, "%s", msg));
 }
 
 /*PRINTFLIKE3*/
 int
 zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 {
 	va_list ap;
 
 	va_start(ap, fmt);
 
 	if (zfs_common_error(hdl, error, fmt, ap) != 0) {
 		va_end(ap);
 		return (-1);
 	}
 
 	switch (error) {
 	case ENODEV:
 		zfs_verror(hdl, EZFS_NODEVICE, fmt, ap);
 		break;
 
 	case ENOENT:
 		zfs_error_aux(hdl,
 		    dgettext(TEXT_DOMAIN, "no such pool or dataset"));
 		zfs_verror(hdl, EZFS_NOENT, fmt, ap);
 		break;
 
 	case EEXIST:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "pool already exists"));
 		zfs_verror(hdl, EZFS_EXISTS, fmt, ap);
 		break;
 
 	case EBUSY:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool is busy"));
 		zfs_verror(hdl, EZFS_BUSY, fmt, ap);
 		break;
 
 	case ENXIO:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "one or more devices is currently unavailable"));
 		zfs_verror(hdl, EZFS_BADDEV, fmt, ap);
 		break;
 
 	case ENAMETOOLONG:
 		zfs_verror(hdl, EZFS_DEVOVERFLOW, fmt, ap);
 		break;
 
 	case ENOTSUP:
 		zfs_verror(hdl, EZFS_POOL_NOTSUP, fmt, ap);
 		break;
 
 	case EINVAL:
 		zfs_verror(hdl, EZFS_POOL_INVALARG, fmt, ap);
 		break;
 
 	case ENOSPC:
 	case EDQUOT:
 		zfs_verror(hdl, EZFS_NOSPC, fmt, ap);
 		return (-1);
 
 	case EAGAIN:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "pool I/O is currently suspended"));
 		zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
 		break;
 
 	case EROFS:
 		zfs_verror(hdl, EZFS_POOLREADONLY, fmt, ap);
 		break;
 
 	default:
 		zfs_error_aux(hdl, strerror(error));
 		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
 	}
 
 	va_end(ap);
 	return (-1);
 }
 
 /*
  * Display an out of memory error message and abort the current program.
  */
 int
 no_memory(libzfs_handle_t *hdl)
 {
 	return (zfs_error(hdl, EZFS_NOMEM, "internal error"));
 }
 
 /*
  * A safe form of malloc() which will die if the allocation fails.
  */
 void *
 zfs_alloc(libzfs_handle_t *hdl, size_t size)
 {
 	void *data;
 
 	if ((data = calloc(1, size)) == NULL)
 		(void) no_memory(hdl);
 
 	return (data);
 }
 
 /*
  * A safe form of asprintf() which will die if the allocation fails.
  */
 /*PRINTFLIKE2*/
 char *
 zfs_asprintf(libzfs_handle_t *hdl, const char *fmt, ...)
 {
 	va_list ap;
 	char *ret;
 	int err;
 
 	va_start(ap, fmt);
 
 	err = vasprintf(&ret, fmt, ap);
 
 	va_end(ap);
 
 	if (err < 0)
 		(void) no_memory(hdl);
 
 	return (ret);
 }
 
 /*
  * A safe form of realloc(), which also zeroes newly allocated space.
  */
 void *
 zfs_realloc(libzfs_handle_t *hdl, void *ptr, size_t oldsize, size_t newsize)
 {
 	void *ret;
 
 	if ((ret = realloc(ptr, newsize)) == NULL) {
 		(void) no_memory(hdl);
 		return (NULL);
 	}
 
 	bzero((char *)ret + oldsize, (newsize - oldsize));
 	return (ret);
 }
 
 /*
  * A safe form of strdup() which will die if the allocation fails.
  */
 char *
 zfs_strdup(libzfs_handle_t *hdl, const char *str)
 {
 	char *ret;
 
 	if ((ret = strdup(str)) == NULL)
 		(void) no_memory(hdl);
 
 	return (ret);
 }
 
 /*
  * Convert a number to an appropriately human-readable output.
  */
 void
 zfs_nicenum(uint64_t num, char *buf, size_t buflen)
 {
 	uint64_t n = num;
 	int index = 0;
 	char u;
 
 	while (n >= 1024) {
 		n /= 1024;
 		index++;
 	}
 
 	u = " KMGTPE"[index];
 
 	if (index == 0) {
 		(void) snprintf(buf, buflen, "%llu", (u_longlong_t) n);
 	} else if ((num & ((1ULL << 10 * index) - 1)) == 0) {
 		/*
 		 * If this is an even multiple of the base, always display
 		 * without any decimal precision.
 		 */
 		(void) snprintf(buf, buflen, "%llu%c", (u_longlong_t) n, u);
 	} else {
 		/*
 		 * We want to choose a precision that reflects the best choice
 		 * for fitting in 5 characters.  This can get rather tricky when
 		 * we have numbers that are very close to an order of magnitude.
 		 * For example, when displaying 10239 (which is really 9.999K),
 		 * we want only a single place of precision for 10.0K.  We could
 		 * develop some complex heuristics for this, but it's much
 		 * easier just to try each combination in turn.
 		 */
 		int i;
 		for (i = 2; i >= 0; i--) {
 			if (snprintf(buf, buflen, "%.*f%c", i,
 			    (double)num / (1ULL << 10 * index), u) <= 5)
 				break;
 		}
 	}
 }
 
 void
 libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
 {
 	hdl->libzfs_printerr = printerr;
 }
 
 static int
 libzfs_module_loaded(const char *module)
 {
 	const char path_prefix[] = "/sys/module/";
 	char path[256];
 
 	memcpy(path, path_prefix, sizeof(path_prefix) - 1);
 	strcpy(path + sizeof(path_prefix) - 1, module);
 
 	return (access(path, F_OK) == 0);
 }
 
 int
 libzfs_run_process(const char *path, char *argv[], int flags)
 {
 	pid_t pid;
 	int rc, devnull_fd;
 
 	pid = vfork();
 	if (pid == 0) {
 		devnull_fd = open("/dev/null", O_WRONLY);
 
 		if (devnull_fd < 0)
 			_exit(-1);
 
 		if (!(flags & STDOUT_VERBOSE))
 			(void) dup2(devnull_fd, STDOUT_FILENO);
 
 		if (!(flags & STDERR_VERBOSE))
 			(void) dup2(devnull_fd, STDERR_FILENO);
 
 		close(devnull_fd);
 
 		(void) execvp(path, argv);
 		_exit(-1);
 	} else if (pid > 0) {
 		int status;
 
 		while ((rc = waitpid(pid, &status, 0)) == -1 &&
 			errno == EINTR);
 		if (rc < 0 || !WIFEXITED(status))
 			return -1;
 
 		return WEXITSTATUS(status);
 	}
 
 	return -1;
 }
 
 int
 libzfs_load_module(const char *module)
 {
 	char *argv[4] = {"/sbin/modprobe", "-q", (char *)module, (char *)0};
 
 	if (libzfs_module_loaded(module))
 		return 0;
 
 	return libzfs_run_process("/sbin/modprobe", argv, 0);
 }
 
 libzfs_handle_t *
 libzfs_init(void)
 {
 	libzfs_handle_t *hdl;
 
 	if (libzfs_load_module("zfs") != 0) {
 		(void) fprintf(stderr, gettext("Failed to load ZFS module "
 			       "stack.\nLoad the module manually by running "
 			       "'insmod <location>/zfs.ko' as root.\n"));
 		return (NULL);
 	}
 
 	if ((hdl = calloc(1, sizeof (libzfs_handle_t))) == NULL) {
 		return (NULL);
 	}
 
 	if ((hdl->libzfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
 		(void) fprintf(stderr, gettext("Unable to open %s: %s.\n"),
 			       ZFS_DEV, strerror(errno));
 		if (errno == ENOENT)
 			(void) fprintf(stderr,
 			     gettext("Verify the ZFS module stack is "
 			     "loaded by running '/sbin/modprobe zfs'.\n"));
 
 		free(hdl);
 		return (NULL);
 	}
 
 #ifdef HAVE_SETMNTENT
 	if ((hdl->libzfs_mnttab = setmntent(MNTTAB, "r")) == NULL) {
 #else
 	if ((hdl->libzfs_mnttab = fopen(MNTTAB, "r")) == NULL) {
 #endif
 		(void) close(hdl->libzfs_fd);
 		(void) fprintf(stderr,
 		    gettext("mtab is not present at %s.\n"), MNTTAB);
 		free(hdl);
 		return (NULL);
 	}
 
 	hdl->libzfs_sharetab = fopen("/etc/dfs/sharetab", "r");
 
+	if (libzfs_core_init() != 0) {
+		(void) close(hdl->libzfs_fd);
+		(void) fclose(hdl->libzfs_mnttab);
+		(void) fclose(hdl->libzfs_sharetab);
+		free(hdl);
+		return (NULL);
+	}
+
 	zfs_prop_init();
 	zpool_prop_init();
 	zpool_feature_init();
 	libzfs_mnttab_init(hdl);
 
 	return (hdl);
 }
 
 void
 libzfs_fini(libzfs_handle_t *hdl)
 {
 	(void) close(hdl->libzfs_fd);
 	if (hdl->libzfs_mnttab)
 #ifdef HAVE_SETMNTENT
 		(void) endmntent(hdl->libzfs_mnttab);
 #else
 		(void) fclose(hdl->libzfs_mnttab);
 #endif
 	if (hdl->libzfs_sharetab)
 		(void) fclose(hdl->libzfs_sharetab);
 	zfs_uninit_libshare(hdl);
-	if (hdl->libzfs_log_str)
-		(void) free(hdl->libzfs_log_str);
 	zpool_free_handles(hdl);
 	libzfs_fru_clear(hdl, B_TRUE);
 	namespace_clear(hdl);
 	libzfs_mnttab_fini(hdl);
+	libzfs_core_fini();
 	free(hdl);
 }
 
 libzfs_handle_t *
 zpool_get_handle(zpool_handle_t *zhp)
 {
 	return (zhp->zpool_hdl);
 }
 
 libzfs_handle_t *
 zfs_get_handle(zfs_handle_t *zhp)
 {
 	return (zhp->zfs_hdl);
 }
 
 zpool_handle_t *
 zfs_get_pool_handle(const zfs_handle_t *zhp)
 {
 	return (zhp->zpool_hdl);
 }
 
 /*
  * Given a name, determine whether or not it's a valid path
  * (starts with '/' or "./").  If so, walk the mnttab trying
  * to match the device number.  If not, treat the path as an
  * fs/vol/snap name.
  */
 zfs_handle_t *
 zfs_path_to_zhandle(libzfs_handle_t *hdl, char *path, zfs_type_t argtype)
 {
 	struct stat64 statbuf;
 	struct extmnttab entry;
 	int ret;
 
 	if (path[0] != '/' && strncmp(path, "./", strlen("./")) != 0) {
 		/*
 		 * It's not a valid path, assume it's a name of type 'argtype'.
 		 */
 		return (zfs_open(hdl, path, argtype));
 	}
 
 	if (stat64(path, &statbuf) != 0) {
 		(void) fprintf(stderr, "%s: %s\n", path, strerror(errno));
 		return (NULL);
 	}
 
 	rewind(hdl->libzfs_mnttab);
 	while ((ret = getextmntent(hdl->libzfs_mnttab, &entry, 0)) == 0) {
 		if (makedevice(entry.mnt_major, entry.mnt_minor) ==
 		    statbuf.st_dev) {
 			break;
 		}
 	}
 	if (ret != 0) {
 		return (NULL);
 	}
 
 	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
 		(void) fprintf(stderr, gettext("'%s': not a ZFS filesystem\n"),
 		    path);
 		return (NULL);
 	}
 
 	return (zfs_open(hdl, entry.mnt_special, ZFS_TYPE_FILESYSTEM));
 }
 
 /*
  * Append partition suffix to an otherwise fully qualified device path.
  * This is used to generate the name the full path as its stored in
  * ZPOOL_CONFIG_PATH for whole disk devices.  On success the new length
  * of 'path' will be returned on error a negative value is returned.
  */
 int
 zfs_append_partition(char *path, size_t max_len)
 {
 	int len = strlen(path);
 
 	if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) {
 		if (len + 6 >= max_len)
 			return (-1);
 
 		(void) strcat(path, "-part1");
 		len += 6;
 	} else {
 		if (len + 2 >= max_len)
 			return (-1);
 
 		if (isdigit(path[len-1])) {
 			(void) strcat(path, "p1");
 			len += 2;
 		} else {
 			(void) strcat(path, "1");
 			len += 1;
 		}
 	}
 
 	return (len);
 }
 
 /*
  * Given a shorthand device name check if a file by that name exists in any
  * of the 'zpool_default_import_path' or ZPOOL_IMPORT_PATH directories.  If
  * one is found, store its fully qualified path in the 'path' buffer passed
  * by the caller and return 0, otherwise return an error.
  */
 int
 zfs_resolve_shortname(const char *name, char *path, size_t len)
 {
 	int i, error = -1;
 	char *dir, *env, *envdup;
 
 	env = getenv("ZPOOL_IMPORT_PATH");
 	errno = ENOENT;
 
 	if (env) {
 		envdup = strdup(env);
 		dir = strtok(envdup, ":");
 		while (dir && error) {
 			(void) snprintf(path, len, "%s/%s", dir, name);
 			error = access(path, F_OK);
 			dir = strtok(NULL, ":");
 		}
 		free(envdup);
 	} else {
 		for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE && error < 0; i++) {
 			(void) snprintf(path, len, "%s/%s",
 			    zpool_default_import_path[i], name);
 			error = access(path, F_OK);
 		}
 	}
 
 	return (error ? ENOENT : 0);
 }
 
 /*
  * Given a shorthand device name look for a match against 'cmp_name'.  This
  * is done by checking all prefix expansions using either the default
  * 'zpool_default_import_paths' or the ZPOOL_IMPORT_PATH environment
  * variable.  Proper partition suffixes will be appended if this is a
  * whole disk.  When a match is found 0 is returned otherwise ENOENT.
  */
 static int
 zfs_strcmp_shortname(char *name, char *cmp_name, int wholedisk)
 {
 	int path_len, cmp_len, i = 0, error = ENOENT;
 	char *dir, *env, *envdup = NULL;
 	char path_name[MAXPATHLEN];
 
 	cmp_len = strlen(cmp_name);
 	env = getenv("ZPOOL_IMPORT_PATH");
 
 	if (env) {
 		envdup = strdup(env);
 		dir = strtok(envdup, ":");
 	} else {
 		dir =  zpool_default_import_path[i];
 	}
 
 	while (dir) {
 		/* Trim trailing directory slashes from ZPOOL_IMPORT_PATH */
 		while (dir[strlen(dir)-1] == '/')
 			dir[strlen(dir)-1] = '\0';
 
 		path_len = snprintf(path_name, MAXPATHLEN, "%s/%s", dir, name);
 		if (wholedisk)
 			path_len = zfs_append_partition(path_name, MAXPATHLEN);
 
 		if ((path_len == cmp_len) && !strcmp(path_name, cmp_name)) {
 			error = 0;
 			break;
 		}
 
 		if (env) {
 			dir = strtok(NULL, ":");
 		} else if (++i < DEFAULT_IMPORT_PATH_SIZE) {
 			dir = zpool_default_import_path[i];
 		} else {
 			dir = NULL;
 		}
 	}
 
 	if (env)
 		free(envdup);
 
 	return (error);
 }
 
 /*
  * Given either a shorthand or fully qualified path name look for a match
  * against 'cmp'.  The passed name will be expanded as needed for comparison
  * purposes and redundant slashes stripped to ensure an accurate match.
  */
 int
 zfs_strcmp_pathname(char *name, char *cmp, int wholedisk)
 {
 	int path_len, cmp_len;
 	char path_name[MAXPATHLEN];
 	char cmp_name[MAXPATHLEN];
 	char *dir;
 
 	/* Strip redundant slashes if one exists due to ZPOOL_IMPORT_PATH */
 	memset(cmp_name, 0, MAXPATHLEN);
 	dir = strtok(cmp, "/");
 	while (dir) {
 		strcat(cmp_name, "/");
 		strcat(cmp_name, dir);
 		dir = strtok(NULL, "/");
 	}
 
 	if (name[0] != '/')
 		return zfs_strcmp_shortname(name, cmp_name, wholedisk);
 
 	strncpy(path_name, name, MAXPATHLEN);
 	path_len = strlen(path_name);
 	cmp_len = strlen(cmp_name);
 
 	if (wholedisk) {
 		path_len = zfs_append_partition(path_name, MAXPATHLEN);
 		if (path_len == -1)
 			return (ENOMEM);
 	}
 
 	if ((path_len != cmp_len) || strcmp(path_name, cmp_name))
 		return (ENOENT);
 
 	return (0);
 }
 
 /*
  * Initialize the zc_nvlist_dst member to prepare for receiving an nvlist from
  * an ioctl().
  */
 int
 zcmd_alloc_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, size_t len)
 {
 	if (len == 0)
 		len = 16 * 1024;
 	zc->zc_nvlist_dst_size = len;
 	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
 	    zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0)
 		return (-1);
 
 	return (0);
 }
 
 /*
  * Called when an ioctl() which returns an nvlist fails with ENOMEM.  This will
  * expand the nvlist to the size specified in 'zc_nvlist_dst_size', which was
  * filled in by the kernel to indicate the actual required size.
  */
 int
 zcmd_expand_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc)
 {
 	free((void *)(uintptr_t)zc->zc_nvlist_dst);
 	if ((zc->zc_nvlist_dst = (uint64_t)(uintptr_t)
 	    zfs_alloc(hdl, zc->zc_nvlist_dst_size)) == 0)
 		return (-1);
 
 	return (0);
 }
 
 /*
  * Called to free the src and dst nvlists stored in the command structure.
  */
 void
 zcmd_free_nvlists(zfs_cmd_t *zc)
 {
 	free((void *)(uintptr_t)zc->zc_nvlist_conf);
 	free((void *)(uintptr_t)zc->zc_nvlist_src);
 	free((void *)(uintptr_t)zc->zc_nvlist_dst);
 }
 
 static int
 zcmd_write_nvlist_com(libzfs_handle_t *hdl, uint64_t *outnv, uint64_t *outlen,
     nvlist_t *nvl)
 {
 	char *packed;
 	size_t len;
 
 	verify(nvlist_size(nvl, &len, NV_ENCODE_NATIVE) == 0);
 
 	if ((packed = zfs_alloc(hdl, len)) == NULL)
 		return (-1);
 
 	verify(nvlist_pack(nvl, &packed, &len, NV_ENCODE_NATIVE, 0) == 0);
 
 	*outnv = (uint64_t)(uintptr_t)packed;
 	*outlen = len;
 
 	return (0);
 }
 
 int
 zcmd_write_conf_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl)
 {
 	return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_conf,
 	    &zc->zc_nvlist_conf_size, nvl));
 }
 
 int
 zcmd_write_src_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t *nvl)
 {
 	return (zcmd_write_nvlist_com(hdl, &zc->zc_nvlist_src,
 	    &zc->zc_nvlist_src_size, nvl));
 }
 
 /*
  * Unpacks an nvlist from the ZFS ioctl command structure.
  */
 int
 zcmd_read_dst_nvlist(libzfs_handle_t *hdl, zfs_cmd_t *zc, nvlist_t **nvlp)
 {
 	if (nvlist_unpack((void *)(uintptr_t)zc->zc_nvlist_dst,
 	    zc->zc_nvlist_dst_size, nvlp, 0) != 0)
 		return (no_memory(hdl));
 
 	return (0);
 }
 
 int
 zfs_ioctl(libzfs_handle_t *hdl, int request, zfs_cmd_t *zc)
 {
-	int error;
-
-	zc->zc_history = (uint64_t)(uintptr_t)hdl->libzfs_log_str;
-	error = ioctl(hdl->libzfs_fd, request, zc);
-	if (hdl->libzfs_log_str) {
-		free(hdl->libzfs_log_str);
-		hdl->libzfs_log_str = NULL;
-	}
-	zc->zc_history = 0;
-
-	return (error);
+	return (ioctl(hdl->libzfs_fd, request, zc));
 }
 
 /*
  * ================================================================
  * API shared by zfs and zpool property management
  * ================================================================
  */
 
 static void
 zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 {
 	zprop_list_t *pl = cbp->cb_proplist;
 	int i;
 	char *title;
 	size_t len;
 
 	cbp->cb_first = B_FALSE;
 	if (cbp->cb_scripted)
 		return;
 
 	/*
 	 * Start with the length of the column headers.
 	 */
 	cbp->cb_colwidths[GET_COL_NAME] = strlen(dgettext(TEXT_DOMAIN, "NAME"));
 	cbp->cb_colwidths[GET_COL_PROPERTY] = strlen(dgettext(TEXT_DOMAIN,
 	    "PROPERTY"));
 	cbp->cb_colwidths[GET_COL_VALUE] = strlen(dgettext(TEXT_DOMAIN,
 	    "VALUE"));
 	cbp->cb_colwidths[GET_COL_RECVD] = strlen(dgettext(TEXT_DOMAIN,
 	    "RECEIVED"));
 	cbp->cb_colwidths[GET_COL_SOURCE] = strlen(dgettext(TEXT_DOMAIN,
 	    "SOURCE"));
 
 	/* first property is always NAME */
 	assert(cbp->cb_proplist->pl_prop ==
 	    ((type == ZFS_TYPE_POOL) ?  ZPOOL_PROP_NAME : ZFS_PROP_NAME));
 
 	/*
 	 * Go through and calculate the widths for each column.  For the
 	 * 'source' column, we kludge it up by taking the worst-case scenario of
 	 * inheriting from the longest name.  This is acceptable because in the
 	 * majority of cases 'SOURCE' is the last column displayed, and we don't
 	 * use the width anyway.  Note that the 'VALUE' column can be oversized,
 	 * if the name of the property is much longer than any values we find.
 	 */
 	for (pl = cbp->cb_proplist; pl != NULL; pl = pl->pl_next) {
 		/*
 		 * 'PROPERTY' column
 		 */
 		if (pl->pl_prop != ZPROP_INVAL) {
 			const char *propname = (type == ZFS_TYPE_POOL) ?
 			    zpool_prop_to_name(pl->pl_prop) :
 			    zfs_prop_to_name(pl->pl_prop);
 
 			len = strlen(propname);
 			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
 				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
 		} else {
 			len = strlen(pl->pl_user_prop);
 			if (len > cbp->cb_colwidths[GET_COL_PROPERTY])
 				cbp->cb_colwidths[GET_COL_PROPERTY] = len;
 		}
 
 		/*
 		 * 'VALUE' column.  The first property is always the 'name'
 		 * property that was tacked on either by /sbin/zfs's
 		 * zfs_do_get() or when calling zprop_expand_list(), so we
 		 * ignore its width.  If the user specified the name property
 		 * to display, then it will be later in the list in any case.
 		 */
 		if (pl != cbp->cb_proplist &&
 		    pl->pl_width > cbp->cb_colwidths[GET_COL_VALUE])
 			cbp->cb_colwidths[GET_COL_VALUE] = pl->pl_width;
 
 		/* 'RECEIVED' column. */
 		if (pl != cbp->cb_proplist &&
 		    pl->pl_recvd_width > cbp->cb_colwidths[GET_COL_RECVD])
 			cbp->cb_colwidths[GET_COL_RECVD] = pl->pl_recvd_width;
 
 		/*
 		 * 'NAME' and 'SOURCE' columns
 		 */
 		if (pl->pl_prop == (type == ZFS_TYPE_POOL ? ZPOOL_PROP_NAME :
 		    ZFS_PROP_NAME) &&
 		    pl->pl_width > cbp->cb_colwidths[GET_COL_NAME]) {
 			cbp->cb_colwidths[GET_COL_NAME] = pl->pl_width;
 			cbp->cb_colwidths[GET_COL_SOURCE] = pl->pl_width +
 			    strlen(dgettext(TEXT_DOMAIN, "inherited from"));
 		}
 	}
 
 	/*
 	 * Now go through and print the headers.
 	 */
 	for (i = 0; i < ZFS_GET_NCOLS; i++) {
 		switch (cbp->cb_columns[i]) {
 		case GET_COL_NAME:
 			title = dgettext(TEXT_DOMAIN, "NAME");
 			break;
 		case GET_COL_PROPERTY:
 			title = dgettext(TEXT_DOMAIN, "PROPERTY");
 			break;
 		case GET_COL_VALUE:
 			title = dgettext(TEXT_DOMAIN, "VALUE");
 			break;
 		case GET_COL_RECVD:
 			title = dgettext(TEXT_DOMAIN, "RECEIVED");
 			break;
 		case GET_COL_SOURCE:
 			title = dgettext(TEXT_DOMAIN, "SOURCE");
 			break;
 		default:
 			title = NULL;
 		}
 
 		if (title != NULL) {
 			if (i == (ZFS_GET_NCOLS - 1) ||
 			    cbp->cb_columns[i + 1] == GET_COL_NONE)
 				(void) printf("%s", title);
 			else
 				(void) printf("%-*s  ",
 				    cbp->cb_colwidths[cbp->cb_columns[i]],
 				    title);
 		}
 	}
 	(void) printf("\n");
 }
 
 /*
  * Display a single line of output, according to the settings in the callback
  * structure.
  */
 void
 zprop_print_one_property(const char *name, zprop_get_cbdata_t *cbp,
     const char *propname, const char *value, zprop_source_t sourcetype,
     const char *source, const char *recvd_value)
 {
 	int i;
 	const char *str = NULL;
 	char buf[128];
 
 	/*
 	 * Ignore those source types that the user has chosen to ignore.
 	 */
 	if ((sourcetype & cbp->cb_sources) == 0)
 		return;
 
 	if (cbp->cb_first)
 		zprop_print_headers(cbp, cbp->cb_type);
 
 	for (i = 0; i < ZFS_GET_NCOLS; i++) {
 		switch (cbp->cb_columns[i]) {
 		case GET_COL_NAME:
 			str = name;
 			break;
 
 		case GET_COL_PROPERTY:
 			str = propname;
 			break;
 
 		case GET_COL_VALUE:
 			str = value;
 			break;
 
 		case GET_COL_SOURCE:
 			switch (sourcetype) {
 			case ZPROP_SRC_NONE:
 				str = "-";
 				break;
 
 			case ZPROP_SRC_DEFAULT:
 				str = "default";
 				break;
 
 			case ZPROP_SRC_LOCAL:
 				str = "local";
 				break;
 
 			case ZPROP_SRC_TEMPORARY:
 				str = "temporary";
 				break;
 
 			case ZPROP_SRC_INHERITED:
 				(void) snprintf(buf, sizeof (buf),
 				    "inherited from %s", source);
 				str = buf;
 				break;
 			case ZPROP_SRC_RECEIVED:
 				str = "received";
 				break;
 			}
 			break;
 
 		case GET_COL_RECVD:
 			str = (recvd_value == NULL ? "-" : recvd_value);
 			break;
 
 		default:
 			continue;
 		}
 
 		if (cbp->cb_columns[i + 1] == GET_COL_NONE)
 			(void) printf("%s", str);
 		else if (cbp->cb_scripted)
 			(void) printf("%s\t", str);
 		else
 			(void) printf("%-*s  ",
 			    cbp->cb_colwidths[cbp->cb_columns[i]],
 			    str);
 	}
 
 	(void) printf("\n");
 }
 
 /*
  * Given a numeric suffix, convert the value into a number of bits that the
  * resulting value must be shifted.
  */
 static int
 str2shift(libzfs_handle_t *hdl, const char *buf)
 {
 	const char *ends = "BKMGTPEZ";
 	int i;
 
 	if (buf[0] == '\0')
 		return (0);
 	for (i = 0; i < strlen(ends); i++) {
 		if (toupper(buf[0]) == ends[i])
 			break;
 	}
 	if (i == strlen(ends)) {
 		if (hdl)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid numeric suffix '%s'"), buf);
 		return (-1);
 	}
 
 	/*
 	 * Allow 'G' = 'GB' = 'GiB', case-insensitively.
 	 * However, 'BB' and 'BiB' are disallowed.
 	 */
 	if (buf[1] == '\0' ||
 	    (toupper(buf[0]) != 'B' &&
 	     ((toupper(buf[1]) == 'B' && buf[2] == '\0') ||
 	      (toupper(buf[1]) == 'I' && toupper(buf[2]) == 'B' &&
 	       buf[3] == '\0'))))
 		return (10*i);
 
 	if (hdl)
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "invalid numeric suffix '%s'"), buf);
 	return (-1);
 }
 
 /*
  * Convert a string of the form '100G' into a real number.  Used when setting
  * properties or creating a volume.  'buf' is used to place an extended error
  * message for the caller to use.
  */
 int
 zfs_nicestrtonum(libzfs_handle_t *hdl, const char *value, uint64_t *num)
 {
 	char *end;
 	int shift;
 
 	*num = 0;
 
 	/* Check to see if this looks like a number.  */
 	if ((value[0] < '0' || value[0] > '9') && value[0] != '.') {
 		if (hdl)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "bad numeric value '%s'"), value);
 		return (-1);
 	}
 
 	/* Rely on strtoull() to process the numeric portion.  */
 	errno = 0;
 	*num = strtoull(value, &end, 10);
 
 	/*
 	 * Check for ERANGE, which indicates that the value is too large to fit
 	 * in a 64-bit value.
 	 */
 	if (errno == ERANGE) {
 		if (hdl)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "numeric value is too large"));
 		return (-1);
 	}
 
 	/*
 	 * If we have a decimal value, then do the computation with floating
 	 * point arithmetic.  Otherwise, use standard arithmetic.
 	 */
 	if (*end == '.') {
 		double fval = strtod(value, &end);
 
 		if ((shift = str2shift(hdl, end)) == -1)
 			return (-1);
 
 		fval *= pow(2, shift);
 
 		if (fval > UINT64_MAX) {
 			if (hdl)
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "numeric value is too large"));
 			return (-1);
 		}
 
 		*num = (uint64_t)fval;
 	} else {
 		if ((shift = str2shift(hdl, end)) == -1)
 			return (-1);
 
 		/* Check for overflow */
 		if (shift >= 64 || (*num << shift) >> shift != *num) {
 			if (hdl)
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "numeric value is too large"));
 			return (-1);
 		}
 
 		*num <<= shift;
 	}
 
 	return (0);
 }
 
 /*
  * Given a propname=value nvpair to set, parse any numeric properties
  * (index, boolean, etc) if they are specified as strings and add the
  * resulting nvpair to the returned nvlist.
  *
  * At the DSL layer, all properties are either 64-bit numbers or strings.
  * We want the user to be able to ignore this fact and specify properties
  * as native values (numbers, for example) or as strings (to simplify
  * command line utilities).  This also handles converting index types
  * (compression, checksum, etc) from strings to their on-disk index.
  */
 int
 zprop_parse_value(libzfs_handle_t *hdl, nvpair_t *elem, int prop,
     zfs_type_t type, nvlist_t *ret, char **svalp, uint64_t *ivalp,
     const char *errbuf)
 {
 	data_type_t datatype = nvpair_type(elem);
 	zprop_type_t proptype;
 	const char *propname;
 	char *value;
 	boolean_t isnone = B_FALSE;
 
 	if (type == ZFS_TYPE_POOL) {
 		proptype = zpool_prop_get_type(prop);
 		propname = zpool_prop_to_name(prop);
 	} else {
 		proptype = zfs_prop_get_type(prop);
 		propname = zfs_prop_to_name(prop);
 	}
 
 	/*
 	 * Convert any properties to the internal DSL value types.
 	 */
 	*svalp = NULL;
 	*ivalp = 0;
 
 	switch (proptype) {
 	case PROP_TYPE_STRING:
 		if (datatype != DATA_TYPE_STRING) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' must be a string"), nvpair_name(elem));
 			goto error;
 		}
 		(void) nvpair_value_string(elem, svalp);
 		if (strlen(*svalp) >= ZFS_MAXPROPLEN) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' is too long"), nvpair_name(elem));
 			goto error;
 		}
 		break;
 
 	case PROP_TYPE_NUMBER:
 		if (datatype == DATA_TYPE_STRING) {
 			(void) nvpair_value_string(elem, &value);
 			if (strcmp(value, "none") == 0) {
 				isnone = B_TRUE;
 			} else if (zfs_nicestrtonum(hdl, value, ivalp)
 			    != 0) {
 				goto error;
 			}
 		} else if (datatype == DATA_TYPE_UINT64) {
 			(void) nvpair_value_uint64(elem, ivalp);
 		} else {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' must be a number"), nvpair_name(elem));
 			goto error;
 		}
 
 		/*
 		 * Quota special: force 'none' and don't allow 0.
 		 */
 		if ((type & ZFS_TYPE_DATASET) && *ivalp == 0 && !isnone &&
 		    (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_REFQUOTA)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "use 'none' to disable quota/refquota"));
 			goto error;
 		}
 		break;
 
 	case PROP_TYPE_INDEX:
 		if (datatype != DATA_TYPE_STRING) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' must be a string"), nvpair_name(elem));
 			goto error;
 		}
 
 		(void) nvpair_value_string(elem, &value);
 
 		if (zprop_string_to_index(prop, value, ivalp, type) != 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' must be one of '%s'"), propname,
 			    zprop_values(prop, type));
 			goto error;
 		}
 		break;
 
 	default:
 		abort();
 	}
 
 	/*
 	 * Add the result to our return set of properties.
 	 */
 	if (*svalp != NULL) {
 		if (nvlist_add_string(ret, propname, *svalp) != 0) {
 			(void) no_memory(hdl);
 			return (-1);
 		}
 	} else {
 		if (nvlist_add_uint64(ret, propname, *ivalp) != 0) {
 			(void) no_memory(hdl);
 			return (-1);
 		}
 	}
 
 	return (0);
 error:
 	(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 	return (-1);
 }
 
 static int
 addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp,
     zfs_type_t type)
 {
 	int prop;
 	zprop_list_t *entry;
 
 	prop = zprop_name_to_prop(propname, type);
 
 	if (prop != ZPROP_INVAL && !zprop_valid_for_type(prop, type))
 		prop = ZPROP_INVAL;
 
 	/*
 	 * When no property table entry can be found, return failure if
 	 * this is a pool property or if this isn't a user-defined
 	 * dataset property,
 	 */
 	if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL &&
 	    !zpool_prop_feature(propname) &&
 	    !zpool_prop_unsupported(propname)) ||
 	    (type == ZFS_TYPE_DATASET && !zfs_prop_user(propname) &&
 	    !zfs_prop_userquota(propname) && !zfs_prop_written(propname)))) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "invalid property '%s'"), propname);
 		return (zfs_error(hdl, EZFS_BADPROP,
 		    dgettext(TEXT_DOMAIN, "bad property list")));
 	}
 
 	if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL)
 		return (-1);
 
 	entry->pl_prop = prop;
 	if (prop == ZPROP_INVAL) {
 		if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) ==
 		    NULL) {
 			free(entry);
 			return (-1);
 		}
 		entry->pl_width = strlen(propname);
 	} else {
 		entry->pl_width = zprop_width(prop, &entry->pl_fixed,
 		    type);
 	}
 
 	*listp = entry;
 
 	return (0);
 }
 
 /*
  * Given a comma-separated list of properties, construct a property list
  * containing both user-defined and native properties.  This function will
  * return a NULL list if 'all' is specified, which can later be expanded
  * by zprop_expand_list().
  */
 int
 zprop_get_list(libzfs_handle_t *hdl, char *props, zprop_list_t **listp,
     zfs_type_t type)
 {
 	*listp = NULL;
 
 	/*
 	 * If 'all' is specified, return a NULL list.
 	 */
 	if (strcmp(props, "all") == 0)
 		return (0);
 
 	/*
 	 * If no props were specified, return an error.
 	 */
 	if (props[0] == '\0') {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "no properties specified"));
 		return (zfs_error(hdl, EZFS_BADPROP, dgettext(TEXT_DOMAIN,
 		    "bad property list")));
 	}
 
 	/*
 	 * It would be nice to use getsubopt() here, but the inclusion of column
 	 * aliases makes this more effort than it's worth.
 	 */
 	while (*props != '\0') {
 		size_t len;
 		char *p;
 		char c;
 
 		if ((p = strchr(props, ',')) == NULL) {
 			len = strlen(props);
 			p = props + len;
 		} else {
 			len = p - props;
 		}
 
 		/*
 		 * Check for empty options.
 		 */
 		if (len == 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "empty property name"));
 			return (zfs_error(hdl, EZFS_BADPROP,
 			    dgettext(TEXT_DOMAIN, "bad property list")));
 		}
 
 		/*
 		 * Check all regular property names.
 		 */
 		c = props[len];
 		props[len] = '\0';
 
 		if (strcmp(props, "space") == 0) {
 			static char *spaceprops[] = {
 				"name", "avail", "used", "usedbysnapshots",
 				"usedbydataset", "usedbyrefreservation",
 				"usedbychildren", NULL
 			};
 			int i;
 
 			for (i = 0; spaceprops[i]; i++) {
 				if (addlist(hdl, spaceprops[i], listp, type))
 					return (-1);
 				listp = &(*listp)->pl_next;
 			}
 		} else {
 			if (addlist(hdl, props, listp, type))
 				return (-1);
 			listp = &(*listp)->pl_next;
 		}
 
 		props = p;
 		if (c == ',')
 			props++;
 	}
 
 	return (0);
 }
 
 void
 zprop_free_list(zprop_list_t *pl)
 {
 	zprop_list_t *next;
 
 	while (pl != NULL) {
 		next = pl->pl_next;
 		free(pl->pl_user_prop);
 		free(pl);
 		pl = next;
 	}
 }
 
 typedef struct expand_data {
 	zprop_list_t	**last;
 	libzfs_handle_t	*hdl;
 	zfs_type_t type;
 } expand_data_t;
 
 int
 zprop_expand_list_cb(int prop, void *cb)
 {
 	zprop_list_t *entry;
 	expand_data_t *edp = cb;
 
 	if ((entry = zfs_alloc(edp->hdl, sizeof (zprop_list_t))) == NULL)
 		return (ZPROP_INVAL);
 
 	entry->pl_prop = prop;
 	entry->pl_width = zprop_width(prop, &entry->pl_fixed, edp->type);
 	entry->pl_all = B_TRUE;
 
 	*(edp->last) = entry;
 	edp->last = &entry->pl_next;
 
 	return (ZPROP_CONT);
 }
 
 int
 zprop_expand_list(libzfs_handle_t *hdl, zprop_list_t **plp, zfs_type_t type)
 {
 	zprop_list_t *entry;
 	zprop_list_t **last;
 	expand_data_t exp;
 
 	if (*plp == NULL) {
 		/*
 		 * If this is the very first time we've been called for an 'all'
 		 * specification, expand the list to include all native
 		 * properties.
 		 */
 		last = plp;
 
 		exp.last = last;
 		exp.hdl = hdl;
 		exp.type = type;
 
 		if (zprop_iter_common(zprop_expand_list_cb, &exp, B_FALSE,
 		    B_FALSE, type) == ZPROP_INVAL)
 			return (-1);
 
 		/*
 		 * Add 'name' to the beginning of the list, which is handled
 		 * specially.
 		 */
 		if ((entry = zfs_alloc(hdl, sizeof (zprop_list_t))) == NULL)
 			return (-1);
 
 		entry->pl_prop = (type == ZFS_TYPE_POOL) ?  ZPOOL_PROP_NAME :
 		    ZFS_PROP_NAME;
 		entry->pl_width = zprop_width(entry->pl_prop,
 		    &entry->pl_fixed, type);
 		entry->pl_all = B_TRUE;
 		entry->pl_next = *plp;
 		*plp = entry;
 	}
 	return (0);
 }
 
 int
 zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered,
     zfs_type_t type)
 {
 	return (zprop_iter_common(func, cb, show_all, ordered, type));
 }
diff --git a/lib/libzfs_core/Makefile.am b/lib/libzfs_core/Makefile.am
new file mode 100644
index 000000000000..180292de1407
--- /dev/null
+++ b/lib/libzfs_core/Makefile.am
@@ -0,0 +1,15 @@
+include $(top_srcdir)/config/Rules.am
+
+DEFAULT_INCLUDES += \
+	-I$(top_srcdir)/include \
+	-I$(top_srcdir)/lib/libspl/include
+
+lib_LTLIBRARIES = libzfs_core.la
+
+libzfs_core_la_SOURCES = \
+	$(top_srcdir)/lib/libzfs_core/libzfs_core.c
+
+libzfs_core_la_LIBADD = \
+	$(top_builddir)/lib/libnvpair/libnvpair.la
+
+libzfs_core_la_LDFLAGS = -pthread -version-info 1:1:0
diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c
new file mode 100644
index 000000000000..bff6902caaf9
--- /dev/null
+++ b/lib/libzfs_core/libzfs_core.c
@@ -0,0 +1,477 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+/*
+ * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
+ * It has the following characteristics:
+ *
+ *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
+ *  threads.  This is accomplished primarily by avoiding global data
+ *  (e.g. caching).  Since it's thread-safe, there is no reason for a
+ *  process to have multiple libzfs "instances".  Therefore, we store
+ *  our few pieces of data (e.g. the file descriptor) in global
+ *  variables.  The fd is reference-counted so that the libzfs_core
+ *  library can be "initialized" multiple times (e.g. by different
+ *  consumers within the same process).
+ *
+ *  - Committed Interface.  The libzfs_core interface will be committed,
+ *  therefore consumers can compile against it and be confident that
+ *  their code will continue to work on future releases of this code.
+ *  Currently, the interface is Evolving (not Committed), but we intend
+ *  to commit to it once it is more complete and we determine that it
+ *  meets the needs of all consumers.
+ *
+ *  - Programatic Error Handling.  libzfs_core communicates errors with
+ *  defined error numbers, and doesn't print anything to stdout/stderr.
+ *
+ *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
+ *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
+ *  between libzfs_core functions and ioctls to /dev/zfs.
+ *
+ *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
+ *  with kernel ioctls, and kernel ioctls are general atomic, each
+ *  libzfs_core function is atomic.  For example, creating multiple
+ *  snapshots with a single call to lzc_snapshot() is atomic -- it
+ *  can't fail with only some of the requested snapshots created, even
+ *  in the event of power loss or system crash.
+ *
+ *  - Continued libzfs Support.  Some higher-level operations (e.g.
+ *  support for "zfs send -R") are too complicated to fit the scope of
+ *  libzfs_core.  This functionality will continue to live in libzfs.
+ *  Where appropriate, libzfs will use the underlying atomic operations
+ *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
+ *  zfs receive" by using individual "send one snapshot", rename,
+ *  destroy, and "receive one snapshot" operations in libzfs_core.
+ *  /sbin/zfs and /zbin/zpool will link with both libzfs and
+ *  libzfs_core.  Other consumers should aim to use only libzfs_core,
+ *  since that will be the supported, stable interface going forwards.
+ */
+
+#include <libzfs_core.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/nvpair.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/zfs_ioctl.h>
+
+static int g_fd;
+static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
+static int g_refcount;
+
+int
+libzfs_core_init(void)
+{
+	(void) pthread_mutex_lock(&g_lock);
+	if (g_refcount == 0) {
+		g_fd = open("/dev/zfs", O_RDWR);
+		if (g_fd < 0) {
+			(void) pthread_mutex_unlock(&g_lock);
+			return (errno);
+		}
+	}
+	g_refcount++;
+	(void) pthread_mutex_unlock(&g_lock);
+	return (0);
+}
+
+void
+libzfs_core_fini(void)
+{
+	(void) pthread_mutex_lock(&g_lock);
+	ASSERT3S(g_refcount, >, 0);
+	g_refcount--;
+	if (g_refcount == 0)
+		(void) close(g_fd);
+	(void) pthread_mutex_unlock(&g_lock);
+}
+
+static int
+lzc_ioctl(zfs_ioc_t ioc, const char *name,
+    nvlist_t *source, nvlist_t **resultp)
+{
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
+	int error = 0;
+	char *packed;
+	size_t size;
+
+	ASSERT3S(g_refcount, >, 0);
+
+	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
+
+	packed = fnvlist_pack(source, &size);
+	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
+	zc.zc_nvlist_src_size = size;
+
+	if (resultp != NULL) {
+		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
+		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
+		    malloc(zc.zc_nvlist_dst_size);
+		if (zc.zc_nvlist_dst == (uint64_t)0) {
+			error = ENOMEM;
+			goto out;
+		}
+	}
+
+	while (ioctl(g_fd, ioc, &zc) != 0) {
+		if (errno == ENOMEM && resultp != NULL) {
+			free((void *)(uintptr_t)zc.zc_nvlist_dst);
+			zc.zc_nvlist_dst_size *= 2;
+			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
+			    malloc(zc.zc_nvlist_dst_size);
+			if (zc.zc_nvlist_dst == (uint64_t)0) {
+				error = ENOMEM;
+				goto out;
+			}
+		} else {
+			error = errno;
+			break;
+		}
+	}
+	if (zc.zc_nvlist_dst_filled) {
+		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
+		    zc.zc_nvlist_dst_size);
+	} else if (resultp != NULL) {
+		*resultp = NULL;
+	}
+
+out:
+	fnvlist_pack_free(packed, size);
+	free((void *)(uintptr_t)zc.zc_nvlist_dst);
+	return (error);
+}
+
+int
+lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props)
+{
+	int error;
+	nvlist_t *args = fnvlist_alloc();
+	fnvlist_add_int32(args, "type", type);
+	if (props != NULL)
+		fnvlist_add_nvlist(args, "props", props);
+	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
+	nvlist_free(args);
+	return (error);
+}
+
+int
+lzc_clone(const char *fsname, const char *origin,
+    nvlist_t *props)
+{
+	int error;
+	nvlist_t *args = fnvlist_alloc();
+	fnvlist_add_string(args, "origin", origin);
+	if (props != NULL)
+		fnvlist_add_nvlist(args, "props", props);
+	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
+	nvlist_free(args);
+	return (error);
+}
+
+/*
+ * Creates snapshots.
+ *
+ * The keys in the snaps nvlist are the snapshots to be created.
+ * They must all be in the same pool.
+ *
+ * The props nvlist is properties to set.  Currently only user properties
+ * are supported.  { user:prop_name -> string value }
+ *
+ * The returned results nvlist will have an entry for each snapshot that failed.
+ * The value will be the (int32) error code.
+ *
+ * The return value will be 0 if all snapshots were created, otherwise it will
+ * be the errno of a (undetermined) snapshot that failed.
+ */
+int
+lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
+{
+	nvpair_t *elem;
+	nvlist_t *args;
+	int error;
+	char pool[MAXNAMELEN];
+
+	*errlist = NULL;
+
+	/* determine the pool name */
+	elem = nvlist_next_nvpair(snaps, NULL);
+	if (elem == NULL)
+		return (0);
+	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
+	pool[strcspn(pool, "/@")] = '\0';
+
+	args = fnvlist_alloc();
+	fnvlist_add_nvlist(args, "snaps", snaps);
+	if (props != NULL)
+		fnvlist_add_nvlist(args, "props", props);
+
+	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
+	nvlist_free(args);
+
+	return (error);
+}
+
+/*
+ * Destroys snapshots.
+ *
+ * The keys in the snaps nvlist are the snapshots to be destroyed.
+ * They must all be in the same pool.
+ *
+ * Snapshots that do not exist will be silently ignored.
+ *
+ * If 'defer' is not set, and a snapshot has user holds or clones, the
+ * destroy operation will fail and none of the snapshots will be
+ * destroyed.
+ *
+ * If 'defer' is set, and a snapshot has user holds or clones, it will be
+ * marked for deferred destruction, and will be destroyed when the last hold
+ * or clone is removed/destroyed.
+ *
+ * The return value will be 0 if all snapshots were destroyed (or marked for
+ * later destruction if 'defer' is set) or didn't exist to begin with.
+ *
+ * Otherwise the return value will be the errno of a (undetermined) snapshot
+ * that failed, no snapshots will be destroyed, and the errlist will have an
+ * entry for each snapshot that failed.  The value in the errlist will be
+ * the (int32) error code.
+ */
+int
+lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
+{
+	nvpair_t *elem;
+	nvlist_t *args;
+	int error;
+	char pool[MAXNAMELEN];
+
+	/* determine the pool name */
+	elem = nvlist_next_nvpair(snaps, NULL);
+	if (elem == NULL)
+		return (0);
+	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
+	pool[strcspn(pool, "/@")] = '\0';
+
+	args = fnvlist_alloc();
+	fnvlist_add_nvlist(args, "snaps", snaps);
+	if (defer)
+		fnvlist_add_boolean(args, "defer");
+
+	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
+	nvlist_free(args);
+
+	return (error);
+
+}
+
+int
+lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
+    uint64_t *usedp)
+{
+	nvlist_t *args;
+	nvlist_t *result;
+	int err;
+	char fs[MAXNAMELEN];
+	char *atp;
+
+	/* determine the fs name */
+	(void) strlcpy(fs, firstsnap, sizeof (fs));
+	atp = strchr(fs, '@');
+	if (atp == NULL)
+		return (EINVAL);
+	*atp = '\0';
+
+	args = fnvlist_alloc();
+	fnvlist_add_string(args, "firstsnap", firstsnap);
+
+	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
+	nvlist_free(args);
+	if (err == 0)
+		*usedp = fnvlist_lookup_uint64(result, "used");
+	fnvlist_free(result);
+
+	return (err);
+}
+
+boolean_t
+lzc_exists(const char *dataset)
+{
+	/*
+	 * The objset_stats ioctl is still legacy, so we need to construct our
+	 * own zfs_cmd_t rather than using zfsc_ioctl().
+	 */
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
+
+	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
+	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
+}
+
+/*
+ * If fromsnap is NULL, a full (non-incremental) stream will be sent.
+ */
+int
+lzc_send(const char *snapname, const char *fromsnap, int fd)
+{
+	nvlist_t *args;
+	int err;
+
+	args = fnvlist_alloc();
+	fnvlist_add_int32(args, "fd", fd);
+	if (fromsnap != NULL)
+		fnvlist_add_string(args, "fromsnap", fromsnap);
+	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
+	nvlist_free(args);
+	return (err);
+}
+
+/*
+ * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
+ */
+int
+lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
+{
+	nvlist_t *args;
+	nvlist_t *result;
+	int err;
+
+	args = fnvlist_alloc();
+	if (fromsnap != NULL)
+		fnvlist_add_string(args, "fromsnap", fromsnap);
+	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
+	nvlist_free(args);
+	if (err == 0)
+		*spacep = fnvlist_lookup_uint64(result, "space");
+	nvlist_free(result);
+	return (err);
+}
+
+static int
+recv_read(int fd, void *buf, int ilen)
+{
+	char *cp = buf;
+	int rv;
+	int len = ilen;
+
+	do {
+		rv = read(fd, cp, len);
+		cp += rv;
+		len -= rv;
+	} while (rv > 0);
+
+	if (rv < 0 || len != 0)
+		return (EIO);
+
+	return (0);
+}
+
+/*
+ * The simplest receive case: receive from the specified fd, creating the
+ * specified snapshot.  Apply the specified properties a "received" properties
+ * (which can be overridden by locally-set properties).  If the stream is a
+ * clone, its origin snapshot must be specified by 'origin'.  The 'force'
+ * flag will cause the target filesystem to be rolled back or destroyed if
+ * necessary to receive.
+ *
+ * Return 0 on success or an errno on failure.
+ *
+ * Note: this interface does not work on dedup'd streams
+ * (those with DMU_BACKUP_FEATURE_DEDUP).
+ */
+int
+lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
+    boolean_t force, int fd)
+{
+	/*
+	 * The receive ioctl is still legacy, so we need to construct our own
+	 * zfs_cmd_t rather than using zfsc_ioctl().
+	 */
+	zfs_cmd_t zc = {"\0", 0, 0, 0, 0, 0, 0, 0, "\0", "\0", "\0"};
+	char *atp;
+	char *packed = NULL;
+	size_t size;
+	dmu_replay_record_t drr;
+	int error;
+
+	ASSERT3S(g_refcount, >, 0);
+
+	/* zc_name is name of containing filesystem */
+	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
+	atp = strchr(zc.zc_name, '@');
+	if (atp == NULL)
+		return (EINVAL);
+	*atp = '\0';
+
+	/* if the fs does not exist, try its parent. */
+	if (!lzc_exists(zc.zc_name)) {
+		char *slashp = strrchr(zc.zc_name, '/');
+		if (slashp == NULL)
+			return (ENOENT);
+		*slashp = '\0';
+
+	}
+
+	/* zc_value is full name of the snapshot to create */
+	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
+
+	if (props != NULL) {
+		/* zc_nvlist_src is props to set */
+		packed = fnvlist_pack(props, &size);
+		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
+		zc.zc_nvlist_src_size = size;
+	}
+
+	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
+	if (origin != NULL)
+		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
+
+	/* zc_begin_record is non-byteswapped BEGIN record */
+	error = recv_read(fd, &drr, sizeof (drr));
+	if (error != 0)
+		goto out;
+	zc.zc_begin_record = drr.drr_u.drr_begin;
+
+	/* zc_cookie is fd to read from */
+	zc.zc_cookie = fd;
+
+	/* zc guid is force flag */
+	zc.zc_guid = force;
+
+	/* zc_cleanup_fd is unused */
+	zc.zc_cleanup_fd = -1;
+
+	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
+	if (error != 0)
+		error = errno;
+
+out:
+	if (packed != NULL)
+		fnvlist_pack_free(packed, size);
+	free((void*)(uintptr_t)zc.zc_nvlist_dst);
+	return (error);
+}
diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
index f7aeeb440c06..0293b5eb5a2d 100644
--- a/lib/libzpool/kernel.c
+++ b/lib/libzpool/kernel.c
@@ -1,1198 +1,1204 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <assert.h>
 #include <fcntl.h>
 #include <poll.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <zlib.h>
 #include <sys/signal.h>
 #include <sys/spa.h>
 #include <sys/stat.h>
 #include <sys/processor.h>
 #include <sys/zfs_context.h>
 #include <sys/utsname.h>
 #include <sys/time.h>
 #include <sys/systeminfo.h>
 
 /*
  * Emulation of kernel services in userland.
  */
 
 int aok;
 uint64_t physmem;
 vnode_t *rootdir = (vnode_t *)0xabcd1234;
 char hw_serial[HW_HOSTID_LEN];
 
 struct utsname utsname = {
 	"userland", "libzpool", "1", "1", "na"
 };
 
 /* this only exists to have its address taken */
 struct proc p0;
 
 /*
  * =========================================================================
  * threads
  * =========================================================================
  */
 
 pthread_cond_t kthread_cond = PTHREAD_COND_INITIALIZER;
 pthread_mutex_t kthread_lock = PTHREAD_MUTEX_INITIALIZER;
 pthread_key_t kthread_key;
 int kthread_nr = 0;
 
 static void
 thread_init(void)
 {
 	kthread_t *kt;
 
 	VERIFY3S(pthread_key_create(&kthread_key, NULL), ==, 0);
 
 	/* Create entry for primary kthread */
 	kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
 	kt->t_tid = pthread_self();
 	kt->t_func = NULL;
 
 	VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
 
 	/* Only the main thread should be running at the moment */
 	ASSERT3S(kthread_nr, ==, 0);
 	kthread_nr = 1;
 }
 
 static void
 thread_fini(void)
 {
 	kthread_t *kt = curthread;
 
 	ASSERT(pthread_equal(kt->t_tid, pthread_self()));
 	ASSERT3P(kt->t_func, ==, NULL);
 
 	umem_free(kt, sizeof(kthread_t));
 
 	/* Wait for all threads to exit via thread_exit() */
 	VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
 
 	kthread_nr--; /* Main thread is exiting */
 
 	while (kthread_nr > 0)
 		VERIFY3S(pthread_cond_wait(&kthread_cond, &kthread_lock), ==,
 		    0);
 
 	ASSERT3S(kthread_nr, ==, 0);
 	VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
 
 	VERIFY3S(pthread_key_delete(kthread_key), ==, 0);
 }
 
 kthread_t *
 zk_thread_current(void)
 {
 	kthread_t *kt = pthread_getspecific(kthread_key);
 
 	ASSERT3P(kt, !=, NULL);
 
 	return kt;
 }
 
 void *
 zk_thread_helper(void *arg)
 {
 	kthread_t *kt = (kthread_t *) arg;
 
 	VERIFY3S(pthread_setspecific(kthread_key, kt), ==, 0);
 
 	VERIFY3S(pthread_mutex_lock(&kthread_lock), ==, 0);
 	kthread_nr++;
 	VERIFY3S(pthread_mutex_unlock(&kthread_lock), ==, 0);
 
 	kt->t_tid = pthread_self();
 	((thread_func_arg_t) kt->t_func)(kt->t_arg);
 
 	/* Unreachable, thread must exit with thread_exit() */
 	abort();
 
 	return NULL;
 }
 
 kthread_t *
 zk_thread_create(caddr_t stk, size_t stksize, thread_func_t func, void *arg,
 	      size_t len, proc_t *pp, int state, pri_t pri, int detachstate)
 {
 	kthread_t *kt;
 	pthread_attr_t attr;
 	size_t stack;
 
 	ASSERT3S(state & ~TS_RUN, ==, 0);
 
 	kt = umem_zalloc(sizeof(kthread_t), UMEM_NOFAIL);
 	kt->t_func = func;
 	kt->t_arg = arg;
 
 	/*
 	 * The Solaris kernel stack size is 24k for x86/x86_64.
 	 * The Linux kernel stack size is 8k for x86/x86_64.
 	 *
 	 * We reduce the default stack size in userspace, to ensure
 	 * we observe stack overruns in user space as well as in
 	 * kernel space. In practice we can't set the userspace stack
 	 * size to 8k because differences in stack usage between kernel
 	 * space and userspace could lead to spurious stack overflows
 	 * (especially when debugging is enabled). Nevertheless, we try
 	 * to set it to the lowest value that works (currently 8k*4).
 	 * PTHREAD_STACK_MIN is the minimum stack required for a NULL
 	 * procedure in user space and is added in to the stack
 	 * requirements.
 	 *
 	 * Some buggy NPTL threading implementations include the
 	 * guard area within the stack size allocations.  In
 	 * this case we allocate an extra page to account for the
 	 * guard area since we only have two pages of usable stack
 	 * on Linux.
 	 */
 
 	stack = PTHREAD_STACK_MIN + MAX(stksize, STACK_SIZE) * 4;
 
 	VERIFY3S(pthread_attr_init(&attr), ==, 0);
 	VERIFY3S(pthread_attr_setstacksize(&attr, stack), ==, 0);
 	VERIFY3S(pthread_attr_setguardsize(&attr, PAGESIZE), ==, 0);
 	VERIFY3S(pthread_attr_setdetachstate(&attr, detachstate), ==, 0);
 
 	VERIFY3S(pthread_create(&kt->t_tid, &attr, &zk_thread_helper, kt),
 	    ==, 0);
 
 	VERIFY3S(pthread_attr_destroy(&attr), ==, 0);
 
 	return kt;
 }
 
 void
 zk_thread_exit(void)
 {
 	kthread_t *kt = curthread;
 
 	ASSERT(pthread_equal(kt->t_tid, pthread_self()));
 
 	umem_free(kt, sizeof(kthread_t));
 
 	pthread_mutex_lock(&kthread_lock);
 	kthread_nr--;
 	pthread_mutex_unlock(&kthread_lock);
 
 	pthread_cond_broadcast(&kthread_cond);
 	pthread_exit((void *)TS_MAGIC);
 }
 
 void
 zk_thread_join(kt_did_t tid)
 {
 	void *ret;
 
 	pthread_join((pthread_t)tid, &ret);
 	VERIFY3P(ret, ==, (void *)TS_MAGIC);
 }
 
 /*
  * =========================================================================
  * kstats
  * =========================================================================
  */
 /*ARGSUSED*/
 kstat_t *
 kstat_create(char *module, int instance, char *name, char *class,
     uchar_t type, ulong_t ndata, uchar_t ks_flag)
 {
 	return (NULL);
 }
 
 /*ARGSUSED*/
 void
 kstat_install(kstat_t *ksp)
 {}
 
 /*ARGSUSED*/
 void
 kstat_delete(kstat_t *ksp)
 {}
 
 /*
  * =========================================================================
  * mutexes
  * =========================================================================
  */
 
 void
 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
 {
 	ASSERT3S(type, ==, MUTEX_DEFAULT);
 	ASSERT3P(cookie, ==, NULL);
 	mp->m_owner = MTX_INIT;
 	mp->m_magic = MTX_MAGIC;
 	VERIFY3S(pthread_mutex_init(&mp->m_lock, NULL), ==, 0);
 }
 
 void
 mutex_destroy(kmutex_t *mp)
 {
 	ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
 	ASSERT3P(mp->m_owner, ==, MTX_INIT);
 	VERIFY3S(pthread_mutex_destroy(&(mp)->m_lock), ==, 0);
 	mp->m_owner = MTX_DEST;
 	mp->m_magic = 0;
 }
 
 void
 mutex_enter(kmutex_t *mp)
 {
 	ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
 	ASSERT3P(mp->m_owner, !=, MTX_DEST);
 	ASSERT3P(mp->m_owner, !=, curthread);
 	VERIFY3S(pthread_mutex_lock(&mp->m_lock), ==, 0);
 	ASSERT3P(mp->m_owner, ==, MTX_INIT);
 	mp->m_owner = curthread;
 }
 
 int
 mutex_tryenter(kmutex_t *mp)
 {
 	ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
 	ASSERT3P(mp->m_owner, !=, MTX_DEST);
 	if (0 == pthread_mutex_trylock(&mp->m_lock)) {
 		ASSERT3P(mp->m_owner, ==, MTX_INIT);
 		mp->m_owner = curthread;
 		return (1);
 	} else {
 		return (0);
 	}
 }
 
 void
 mutex_exit(kmutex_t *mp)
 {
 	ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
 	ASSERT3P(mutex_owner(mp), ==, curthread);
 	mp->m_owner = MTX_INIT;
 	VERIFY3S(pthread_mutex_unlock(&mp->m_lock), ==, 0);
 }
 
 void *
 mutex_owner(kmutex_t *mp)
 {
 	ASSERT3U(mp->m_magic, ==, MTX_MAGIC);
 	return (mp->m_owner);
 }
 
 int
 mutex_held(kmutex_t *mp)
 {
 	return (mp->m_owner == curthread);
 }
 
 /*
  * =========================================================================
  * rwlocks
  * =========================================================================
  */
 
 void
 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
 {
 	ASSERT3S(type, ==, RW_DEFAULT);
 	ASSERT3P(arg, ==, NULL);
 	VERIFY3S(pthread_rwlock_init(&rwlp->rw_lock, NULL), ==, 0);
 	rwlp->rw_owner = RW_INIT;
 	rwlp->rw_wr_owner = RW_INIT;
 	rwlp->rw_readers = 0;
 	rwlp->rw_magic = RW_MAGIC;
 }
 
 void
 rw_destroy(krwlock_t *rwlp)
 {
 	ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
 
 	VERIFY3S(pthread_rwlock_destroy(&rwlp->rw_lock), ==, 0);
 	rwlp->rw_magic = 0;
 }
 
 void
 rw_enter(krwlock_t *rwlp, krw_t rw)
 {
 	ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
 	ASSERT3P(rwlp->rw_owner, !=, curthread);
 	ASSERT3P(rwlp->rw_wr_owner, !=, curthread);
 
 	if (rw == RW_READER) {
 		VERIFY3S(pthread_rwlock_rdlock(&rwlp->rw_lock), ==, 0);
 		ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
 
 		atomic_inc_uint(&rwlp->rw_readers);
 	} else {
 		VERIFY3S(pthread_rwlock_wrlock(&rwlp->rw_lock), ==, 0);
 		ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
 		ASSERT3U(rwlp->rw_readers, ==, 0);
 
 		rwlp->rw_wr_owner = curthread;
 	}
 
 	rwlp->rw_owner = curthread;
 }
 
 void
 rw_exit(krwlock_t *rwlp)
 {
 	ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
 	ASSERT(RW_LOCK_HELD(rwlp));
 
 	if (RW_READ_HELD(rwlp))
 		atomic_dec_uint(&rwlp->rw_readers);
 	else
 		rwlp->rw_wr_owner = RW_INIT;
 
 	rwlp->rw_owner = RW_INIT;
 	VERIFY3S(pthread_rwlock_unlock(&rwlp->rw_lock), ==, 0);
 }
 
 int
 rw_tryenter(krwlock_t *rwlp, krw_t rw)
 {
 	int rv;
 
 	ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
 
 	if (rw == RW_READER)
 		rv = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
 	else
 		rv = pthread_rwlock_trywrlock(&rwlp->rw_lock);
 
 	if (rv == 0) {
 		ASSERT3P(rwlp->rw_wr_owner, ==, RW_INIT);
 
 		if (rw == RW_READER)
 			atomic_inc_uint(&rwlp->rw_readers);
 		else {
 			ASSERT3U(rwlp->rw_readers, ==, 0);
 			rwlp->rw_wr_owner = curthread;
 		}
 
 		rwlp->rw_owner = curthread;
 		return (1);
 	}
 
 	VERIFY3S(rv, ==, EBUSY);
 
 	return (0);
 }
 
 int
 rw_tryupgrade(krwlock_t *rwlp)
 {
 	ASSERT3U(rwlp->rw_magic, ==, RW_MAGIC);
 
 	return (0);
 }
 
 /*
  * =========================================================================
  * condition variables
  * =========================================================================
  */
 
 void
 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
 {
 	ASSERT3S(type, ==, CV_DEFAULT);
 	cv->cv_magic = CV_MAGIC;
 	VERIFY3S(pthread_cond_init(&cv->cv, NULL), ==, 0);
 }
 
 void
 cv_destroy(kcondvar_t *cv)
 {
 	ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
 	VERIFY3S(pthread_cond_destroy(&cv->cv), ==, 0);
 	cv->cv_magic = 0;
 }
 
 void
 cv_wait(kcondvar_t *cv, kmutex_t *mp)
 {
 	ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
 	ASSERT3P(mutex_owner(mp), ==, curthread);
 	mp->m_owner = MTX_INIT;
 	int ret = pthread_cond_wait(&cv->cv, &mp->m_lock);
 	if (ret != 0)
 		VERIFY3S(ret, ==, EINTR);
 	mp->m_owner = curthread;
 }
 
 clock_t
 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
 {
 	int error;
 	struct timeval tv;
 	timestruc_t ts;
 	clock_t delta;
 
 	ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
 
 top:
 	delta = abstime - ddi_get_lbolt();
 	if (delta <= 0)
 		return (-1);
 
 	VERIFY(gettimeofday(&tv, NULL) == 0);
 
 	ts.tv_sec = tv.tv_sec + delta / hz;
 	ts.tv_nsec = tv.tv_usec * 1000 + (delta % hz) * (NANOSEC / hz);
 	if (ts.tv_nsec >= NANOSEC) {
 		ts.tv_sec++;
 		ts.tv_nsec -= NANOSEC;
 	}
 
 	ASSERT3P(mutex_owner(mp), ==, curthread);
 	mp->m_owner = MTX_INIT;
 	error = pthread_cond_timedwait(&cv->cv, &mp->m_lock, &ts);
 	mp->m_owner = curthread;
 
 	if (error == ETIMEDOUT)
 		return (-1);
 
 	if (error == EINTR)
 		goto top;
 
 	VERIFY3S(error, ==, 0);
 
 	return (1);
 }
 
 void
 cv_signal(kcondvar_t *cv)
 {
 	ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
 	VERIFY3S(pthread_cond_signal(&cv->cv), ==, 0);
 }
 
 void
 cv_broadcast(kcondvar_t *cv)
 {
 	ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
 	VERIFY3S(pthread_cond_broadcast(&cv->cv), ==, 0);
 }
 
 /*
  * =========================================================================
  * vnode operations
  * =========================================================================
  */
 /*
  * Note: for the xxxat() versions of these functions, we assume that the
  * starting vp is always rootdir (which is true for spa_directory.c, the only
  * ZFS consumer of these interfaces).  We assert this is true, and then emulate
  * them by adding '/' in front of the path.
  */
 
 /*ARGSUSED*/
 int
 vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
 {
 	int fd;
 	vnode_t *vp;
 	int old_umask = 0;
 	char *realpath;
 	struct stat64 st;
 	int err;
 
 	realpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	/*
 	 * If we're accessing a real disk from userland, we need to use
 	 * the character interface to avoid caching.  This is particularly
 	 * important if we're trying to look at a real in-kernel storage
 	 * pool from userland, e.g. via zdb, because otherwise we won't
 	 * see the changes occurring under the segmap cache.
 	 * On the other hand, the stupid character device returns zero
 	 * for its size.  So -- gag -- we open the block device to get
 	 * its size, and remember it for subsequent VOP_GETATTR().
 	 */
 #if defined(__sun__) || defined(__sun)
 	if (strncmp(path, "/dev/", 5) == 0) {
 #else
 	if (0) {
 #endif
 		char *dsk;
 		fd = open64(path, O_RDONLY);
 		if (fd == -1) {
 			err = errno;
 			free(realpath);
 			return (err);
 		}
 		if (fstat64(fd, &st) == -1) {
 			err = errno;
 			close(fd);
 			free(realpath);
 			return (err);
 		}
 		close(fd);
 		(void) sprintf(realpath, "%s", path);
 		dsk = strstr(path, "/dsk/");
 		if (dsk != NULL)
 			(void) sprintf(realpath + (dsk - path) + 1, "r%s",
 			    dsk + 1);
 	} else {
 		(void) sprintf(realpath, "%s", path);
 		if (!(flags & FCREAT) && stat64(realpath, &st) == -1) {
 			err = errno;
 			free(realpath);
 			return (err);
 		}
 	}
 
 	if (!(flags & FCREAT) && S_ISBLK(st.st_mode)) {
 #ifdef __linux__
 		flags |= O_DIRECT;
 #endif
 		/* We shouldn't be writing to block devices in userspace */
 		VERIFY(!(flags & FWRITE));
 	}
 
 	if (flags & FCREAT)
 		old_umask = umask(0);
 
 	/*
 	 * The construct 'flags - FREAD' conveniently maps combinations of
 	 * FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
 	 */
 	fd = open64(realpath, flags - FREAD, mode);
 	free(realpath);
 
 	if (flags & FCREAT)
 		(void) umask(old_umask);
 
 	if (fd == -1)
 		return (errno);
 
 	if (fstat64_blk(fd, &st) == -1) {
 		err = errno;
 		close(fd);
 		return (err);
 	}
 
 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
 
 	*vpp = vp = umem_zalloc(sizeof (vnode_t), UMEM_NOFAIL);
 
 	vp->v_fd = fd;
 	vp->v_size = st.st_size;
 	vp->v_path = spa_strdup(path);
 
 	return (0);
 }
 
 /*ARGSUSED*/
 int
 vn_openat(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2,
     int x3, vnode_t *startvp, int fd)
 {
 	char *realpath = umem_alloc(strlen(path) + 2, UMEM_NOFAIL);
 	int ret;
 
 	ASSERT(startvp == rootdir);
 	(void) sprintf(realpath, "/%s", path);
 
 	/* fd ignored for now, need if want to simulate nbmand support */
 	ret = vn_open(realpath, x1, flags, mode, vpp, x2, x3);
 
 	umem_free(realpath, strlen(path) + 2);
 
 	return (ret);
 }
 
 /*ARGSUSED*/
 int
 vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
 	int x1, int x2, rlim64_t x3, void *x4, ssize_t *residp)
 {
 	ssize_t rc, done = 0, split;
 
 	if (uio == UIO_READ) {
 		rc = pread64(vp->v_fd, addr, len, offset);
 	} else {
 		/*
 		 * To simulate partial disk writes, we split writes into two
 		 * system calls so that the process can be killed in between.
 		 */
 		int sectors = len >> SPA_MINBLOCKSHIFT;
 		split = (sectors > 0 ? rand() % sectors : 0) <<
 		    SPA_MINBLOCKSHIFT;
 		rc = pwrite64(vp->v_fd, addr, split, offset);
 		if (rc != -1) {
 			done = rc;
 			rc = pwrite64(vp->v_fd, (char *)addr + split,
 			    len - split, offset + split);
 		}
 	}
 
 #ifdef __linux__
 	if (rc == -1 && errno == EINVAL) {
 		/*
 		 * Under Linux, this most likely means an alignment issue
 		 * (memory or disk) due to O_DIRECT, so we abort() in order to
 		 * catch the offender.
 		 */
 		 abort();
 	}
 #endif
 	if (rc == -1)
 		return (errno);
 
 	done += rc;
 
 	if (residp)
 		*residp = len - done;
 	else if (done != len)
 		return (EIO);
 	return (0);
 }
 
 void
 vn_close(vnode_t *vp)
 {
 	close(vp->v_fd);
 	spa_strfree(vp->v_path);
 	umem_free(vp, sizeof (vnode_t));
 }
 
 /*
  * At a minimum we need to update the size since vdev_reopen()
  * will no longer call vn_openat().
  */
 int
 fop_getattr(vnode_t *vp, vattr_t *vap)
 {
 	struct stat64 st;
 	int err;
 
 	if (fstat64_blk(vp->v_fd, &st) == -1) {
 		err = errno;
 		close(vp->v_fd);
 		return (err);
 	}
 
 	vap->va_size = st.st_size;
 	return (0);
 }
 
 /*
  * =========================================================================
  * Figure out which debugging statements to print
  * =========================================================================
  */
 
 static char *dprintf_string;
 static int dprintf_print_all;
 
 int
 dprintf_find_string(const char *string)
 {
 	char *tmp_str = dprintf_string;
 	int len = strlen(string);
 
 	/*
 	 * Find out if this is a string we want to print.
 	 * String format: file1.c,function_name1,file2.c,file3.c
 	 */
 
 	while (tmp_str != NULL) {
 		if (strncmp(tmp_str, string, len) == 0 &&
 		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
 			return (1);
 		tmp_str = strchr(tmp_str, ',');
 		if (tmp_str != NULL)
 			tmp_str++; /* Get rid of , */
 	}
 	return (0);
 }
 
 void
 dprintf_setup(int *argc, char **argv)
 {
 	int i, j;
 
 	/*
 	 * Debugging can be specified two ways: by setting the
 	 * environment variable ZFS_DEBUG, or by including a
 	 * "debug=..."  argument on the command line.  The command
 	 * line setting overrides the environment variable.
 	 */
 
 	for (i = 1; i < *argc; i++) {
 		int len = strlen("debug=");
 		/* First look for a command line argument */
 		if (strncmp("debug=", argv[i], len) == 0) {
 			dprintf_string = argv[i] + len;
 			/* Remove from args */
 			for (j = i; j < *argc; j++)
 				argv[j] = argv[j+1];
 			argv[j] = NULL;
 			(*argc)--;
 		}
 	}
 
 	if (dprintf_string == NULL) {
 		/* Look for ZFS_DEBUG environment variable */
 		dprintf_string = getenv("ZFS_DEBUG");
 	}
 
 	/*
 	 * Are we just turning on all debugging?
 	 */
 	if (dprintf_find_string("on"))
 		dprintf_print_all = 1;
 }
 
 /*
  * =========================================================================
  * debug printfs
  * =========================================================================
  */
 void
 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 {
 	const char *newfile;
 	va_list adx;
 
 	/*
 	 * Get rid of annoying "../common/" prefix to filename.
 	 */
 	newfile = strrchr(file, '/');
 	if (newfile != NULL) {
 		newfile = newfile + 1; /* Get rid of leading / */
 	} else {
 		newfile = file;
 	}
 
 	if (dprintf_print_all ||
 	    dprintf_find_string(newfile) ||
 	    dprintf_find_string(func)) {
 		/* Print out just the function name if requested */
 		flockfile(stdout);
 		if (dprintf_find_string("pid"))
 			(void) printf("%d ", getpid());
 		if (dprintf_find_string("tid"))
 			(void) printf("%u ", (uint_t) pthread_self());
 		if (dprintf_find_string("cpu"))
 			(void) printf("%u ", getcpuid());
 		if (dprintf_find_string("time"))
 			(void) printf("%llu ", gethrtime());
 		if (dprintf_find_string("long"))
 			(void) printf("%s, line %d: ", newfile, line);
 		(void) printf("%s: ", func);
 		va_start(adx, fmt);
 		(void) vprintf(fmt, adx);
 		va_end(adx);
 		funlockfile(stdout);
 	}
 }
 
 /*
  * =========================================================================
  * cmn_err() and panic()
  * =========================================================================
  */
 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
 
 void
 vpanic(const char *fmt, va_list adx)
 {
 	(void) fprintf(stderr, "error: ");
 	(void) vfprintf(stderr, fmt, adx);
 	(void) fprintf(stderr, "\n");
 
 	abort();	/* think of it as a "user-level crash dump" */
 }
 
 void
 panic(const char *fmt, ...)
 {
 	va_list adx;
 
 	va_start(adx, fmt);
 	vpanic(fmt, adx);
 	va_end(adx);
 }
 
 void
 vcmn_err(int ce, const char *fmt, va_list adx)
 {
 	if (ce == CE_PANIC)
 		vpanic(fmt, adx);
 	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
 		(void) fprintf(stderr, "%s", ce_prefix[ce]);
 		(void) vfprintf(stderr, fmt, adx);
 		(void) fprintf(stderr, "%s", ce_suffix[ce]);
 	}
 }
 
 /*PRINTFLIKE2*/
 void
 cmn_err(int ce, const char *fmt, ...)
 {
 	va_list adx;
 
 	va_start(adx, fmt);
 	vcmn_err(ce, fmt, adx);
 	va_end(adx);
 }
 
 /*
  * =========================================================================
  * kobj interfaces
  * =========================================================================
  */
 struct _buf *
 kobj_open_file(char *name)
 {
 	struct _buf *file;
 	vnode_t *vp;
 
 	/* set vp as the _fd field of the file */
 	if (vn_openat(name, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0, rootdir,
 	    -1) != 0)
 		return ((void *)-1UL);
 
 	file = umem_zalloc(sizeof (struct _buf), UMEM_NOFAIL);
 	file->_fd = (intptr_t)vp;
 	return (file);
 }
 
 int
 kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
 {
 	ssize_t resid;
 
 	vn_rdwr(UIO_READ, (vnode_t *)file->_fd, buf, size, (offset_t)off,
 	    UIO_SYSSPACE, 0, 0, 0, &resid);
 
 	return (size - resid);
 }
 
 void
 kobj_close_file(struct _buf *file)
 {
 	vn_close((vnode_t *)file->_fd);
 	umem_free(file, sizeof (struct _buf));
 }
 
 int
 kobj_get_filesize(struct _buf *file, uint64_t *size)
 {
 	struct stat64 st;
 	vnode_t *vp = (vnode_t *)file->_fd;
 
 	if (fstat64(vp->v_fd, &st) == -1) {
 		vn_close(vp);
 		return (errno);
 	}
 	*size = st.st_size;
 	return (0);
 }
 
 /*
  * =========================================================================
  * misc routines
  * =========================================================================
  */
 
 void
 delay(clock_t ticks)
 {
 	poll(0, 0, ticks * (1000 / hz));
 }
 
 /*
  * Find highest one bit set.
  *	Returns bit number + 1 of highest bit that is set, otherwise returns 0.
  * High order bit is 31 (or 63 in _LP64 kernel).
  */
 int
 highbit(ulong_t i)
 {
 	register int h = 1;
 
 	if (i == 0)
 		return (0);
 #ifdef _LP64
 	if (i & 0xffffffff00000000ul) {
 		h += 32; i >>= 32;
 	}
 #endif
 	if (i & 0xffff0000) {
 		h += 16; i >>= 16;
 	}
 	if (i & 0xff00) {
 		h += 8; i >>= 8;
 	}
 	if (i & 0xf0) {
 		h += 4; i >>= 4;
 	}
 	if (i & 0xc) {
 		h += 2; i >>= 2;
 	}
 	if (i & 0x2) {
 		h += 1;
 	}
 	return (h);
 }
 
 static int random_fd = -1, urandom_fd = -1;
 
 static int
 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
 {
 	size_t resid = len;
 	ssize_t bytes;
 
 	ASSERT(fd != -1);
 
 	while (resid != 0) {
 		bytes = read(fd, ptr, resid);
 		ASSERT3S(bytes, >=, 0);
 		ptr += bytes;
 		resid -= bytes;
 	}
 
 	return (0);
 }
 
 int
 random_get_bytes(uint8_t *ptr, size_t len)
 {
 	return (random_get_bytes_common(ptr, len, random_fd));
 }
 
 int
 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 {
 	return (random_get_bytes_common(ptr, len, urandom_fd));
 }
 
 int
 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
 {
 	char *end;
 
 	*result = strtoul(hw_serial, &end, base);
 	if (*result == 0)
 		return (errno);
 	return (0);
 }
 
 int
 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
 {
 	char *end;
 
 	*result = strtoull(str, &end, base);
 	if (*result == 0)
 		return (errno);
 	return (0);
 }
 
 /*
  * =========================================================================
  * kernel emulation setup & teardown
  * =========================================================================
  */
 static int
 umem_out_of_memory(void)
 {
 	char errmsg[] = "out of memory -- generating core dump\n";
 
 	(void) fprintf(stderr, "%s", errmsg);
 	abort();
 	return (0);
 }
 
 void
 kernel_init(int mode)
 {
 	umem_nofail_callback(umem_out_of_memory);
 
 	physmem = sysconf(_SC_PHYS_PAGES);
 
 	dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
 
 	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
 	    (mode & FWRITE) ? gethostid() : 0);
 
 	VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
 	VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
 
 	thread_init();
 	system_taskq_init();
 
 	spa_init(mode);
 }
 
 void
 kernel_fini(void)
 {
 	spa_fini();
 
 	system_taskq_fini();
 	thread_fini();
 
 	close(random_fd);
 	close(urandom_fd);
 
 	random_fd = -1;
 	urandom_fd = -1;
 }
 
 uid_t
 crgetuid(cred_t *cr)
 {
 	return (0);
 }
 
+uid_t
+crgetruid(cred_t *cr)
+{
+	return (0);
+}
+
 gid_t
 crgetgid(cred_t *cr)
 {
 	return (0);
 }
 
 int
 crgetngroups(cred_t *cr)
 {
 	return (0);
 }
 
 gid_t *
 crgetgroups(cred_t *cr)
 {
 	return (NULL);
 }
 
 int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
 	return (0);
 }
 
 int
 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 {
 	return (0);
 }
 
 int
 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 {
 	return (0);
 }
 
 ksiddomain_t *
 ksid_lookupdomain(const char *dom)
 {
 	ksiddomain_t *kd;
 
 	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
 	kd->kd_name = spa_strdup(dom);
 	return (kd);
 }
 
 void
 ksiddomain_rele(ksiddomain_t *ksid)
 {
 	spa_strfree(ksid->kd_name);
 	umem_free(ksid, sizeof (ksiddomain_t));
 }
 
 char *
 kmem_vasprintf(const char *fmt, va_list adx)
 {
 	char *buf = NULL;
 	va_list adx_copy;
 
 	va_copy(adx_copy, adx);
 	VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
 	va_end(adx_copy);
 
 	return (buf);
 }
 
 char *
 kmem_asprintf(const char *fmt, ...)
 {
 	char *buf = NULL;
 	va_list adx;
 
 	va_start(adx, fmt);
 	VERIFY(vasprintf(&buf, fmt, adx) != -1);
 	va_end(adx);
 
 	return (buf);
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_fd_hold(int fd, minor_t *minorp)
 {
 	*minorp = 0;
 	return (0);
 }
 
 /* ARGSUSED */
 void
 zfs_onexit_fd_rele(int fd)
 {
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle)
 {
 	return (0);
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_del_cb(minor_t minor, uint64_t action_handle, boolean_t fire)
 {
 	return (0);
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
 {
 	return (0);
 }
diff --git a/man/man8/zfs.8 b/man/man8/zfs.8
index a0089e699c21..bd8bbfdcf69a 100644
--- a/man/man8/zfs.8
+++ b/man/man8/zfs.8
@@ -1,3461 +1,3461 @@
 '\" t
 .\"
 .\" CDDL HEADER START
 .\"
 .\" The contents of this file are subject to the terms of the
 .\" Common Development and Distribution License (the "License").
 .\" You may not use this file except in compliance with the License.
 .\"
 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 .\" or http://www.opensolaris.org/os/licensing.
 .\" See the License for the specific language governing permissions
 .\" and limitations under the License.
 .\"
 .\" When distributing Covered Code, include this CDDL HEADER in each
 .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 .\" If applicable, add the following below this CDDL HEADER, with the
 .\" fields enclosed by brackets "[]" replaced with your own identifying
 .\" information: Portions Copyright [yyyy] [name of copyright owner]
 .\"
 .\" CDDL HEADER END
 .\"
 .\"
 .\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
 .\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
 .\" Copyright (c) 2012 by Delphix. All rights reserved.
 .\" Copyright (c) 2012, Joyent, Inc. All rights reserved.
 .\" Copyright 2012 Nexenta Systems, Inc. All Rights Reserved.
 .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
 .\"
 .TH zfs 8 "Jan 10, 2013" "ZFS pool 28, filesystem 5" "System Administration Commands"
 .SH NAME
 zfs \- configures ZFS file systems
 .SH SYNOPSIS
 .LP
 .nf
 \fBzfs\fR [\fB-?\fR]
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBcreate\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBcreate\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fB-V\fR \fIsize\fR \fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBdestroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBdestroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,...]
 .fi
 
 .LP
 .nf
-\fBzfs\fR \fBsnapshot | snap\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR]... 
-      \fIfilesystem@snapname\fR|\fIvolume@snapname\fR
+\fBzfs\fR \fBsnapshot | snap\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... 
+      \fIfilesystem@snapname\fR|\fIvolume@snapname\fR ...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrollback\fR [\fB-rRf\fR] \fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBclone\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBpromote\fR \fIclone-filesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrename\fR [\fB-f\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
      \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrename\fR [\fB-fp\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-H\fR][\fB-o\fR \fIproperty\fR[,...]] [\fB-t\fR \fItype\fR[,...]]
      [\fB-s\fR \fIproperty\fR] ... [\fB-S\fR \fIproperty\fR] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fIsnap\fR] ...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBset\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBget\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR][\fB-Hp\fR][\fB-o\fR \fIfield\fR[,...]] [\fB-t\fR \fItype\fR[,...]] 
     [\fB-s\fR \fIsource\fR[,...]] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBinherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume|snapshot\fR ...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBupgrade\fR [\fB-v\fR]
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBupgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] \fB-a\fR | \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBuserspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIfield\fR] ...
     [\fB-S\fR \fIfield\fR] ... [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBgroupspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,...]] [\fB-s\fR \fIfield\fR] ...
     [\fB-S\fR \fIfield\fR] ... [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBmount\fR 
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBmount\fR [\fB-vO\fR] [\fB-o \fIoptions\fR\fR] \fB-a\fR | \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunmount | umount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBshare\fR \fB-a\fR | \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunshare\fR \fB-a\fR \fIfilesystem\fR|\fImountpoint\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBsend\fR [\fB-DnPpRv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBreceive | recv\fR [\fB-vnFu\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBreceive | recv\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] \fIfilesystem\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|\fI@setname\fR[,...] 
      \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBallow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunallow\fR [\fB-rldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] [\fIperm\fR|@\fIsetname\fR[,... ]] 
      \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunallow\fR [\fB-rld\fR] \fB-e\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[ ... ]] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBunallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,... ]] \fIfilesystem\fR|\fIvolume\fR
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBhold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBholds\fR [\fB-r\fR] \fIsnapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBrelease\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...
 .fi
 
 .LP
 .nf
 \fBzfs\fR \fBdiff\fR [\fB-FHt\fR] \fIsnapshot\fR \fIsnapshot|filesystem\fR
 
 .SH DESCRIPTION
 .sp
 .LP
 The \fBzfs\fR command configures \fBZFS\fR datasets within a \fBZFS\fR storage pool, as described in \fBzpool\fR(8). A dataset is identified by a unique path within the \fBZFS\fR namespace. For example:
 .sp
 .in +2
 .nf
 pool/{filesystem,volume,snapshot}
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 where the maximum length of a dataset name is \fBMAXNAMELEN\fR (256 bytes).
 .sp
 .LP
 A dataset can be one of the following:
 .sp
 .ne 2
 .mk
 .na
 \fB\fIfile system\fR\fR
 .ad
 .sp .6
 .RS 4n
 A \fBZFS\fR dataset of type \fBfilesystem\fR can be mounted within the standard system namespace and behaves like other file systems. While \fBZFS\fR file systems are designed to be \fBPOSIX\fR compliant, known issues exist that prevent compliance in some cases. Applications that depend on standards conformance might fail due to nonstandard behavior when checking file system free space.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 A logical volume exported as a raw or block device. This type of dataset should only be used under special circumstances. File systems are typically used in most environments.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 A read-only version of a file system or volume at a given point in time. It is specified as \fIfilesystem@name\fR or \fIvolume@name\fR.
 .RE
 
 .SS "ZFS File System Hierarchy"
 .sp
 .LP
 A \fBZFS\fR storage pool is a logical collection of devices that provide space for datasets. A storage pool is also the root of the \fBZFS\fR file system hierarchy.
 .sp
 .LP
 The root of the pool can be accessed as a file system, such as mounting and unmounting, taking snapshots, and setting properties. The physical storage characteristics, however, are managed by the \fBzpool\fR(8) command.
 .sp
 .LP
 See \fBzpool\fR(8) for more information on creating and administering pools.
 .SS "Snapshots"
 .sp
 .LP
 A snapshot is a read-only copy of a file system or volume. Snapshots can be created extremely quickly, and initially consume no additional space within the pool. As data within the active dataset changes, the snapshot consumes more data than would otherwise be shared with the active dataset.
 .sp
 .LP
 Snapshots can have arbitrary names. Snapshots of volumes can be cloned or rolled back, but cannot be accessed independently.
 .sp
 .LP
 File system snapshots can be accessed under the \fB\&.zfs/snapshot\fR directory in the root of the file system. Snapshots are automatically mounted on demand and may be unmounted at regular intervals. The visibility of the \fB\&.zfs\fR directory can be controlled by the \fBsnapdir\fR property.
 .SS "Clones"
 .sp
 .LP
 A clone is a writable volume or file system whose initial contents are the same as another dataset. As with snapshots, creating a clone is nearly instantaneous, and initially consumes no additional space.
 .sp
 .LP
 Clones can only be created from a snapshot. When a snapshot is cloned, it creates an implicit dependency between the parent and child. Even though the clone is created somewhere else in the dataset hierarchy, the original snapshot cannot be destroyed as long as a clone exists. The \fBorigin\fR property exposes this dependency, and the \fBdestroy\fR command lists any such dependencies, if they exist.
 .sp
 .LP
 The clone parent-child dependency relationship can be reversed by using the \fBpromote\fR subcommand. This causes the "origin" file system to become a clone of the specified file system, which makes it possible to destroy the file system that the clone was created from.
 .SS "Mount Points"
 .sp
 .LP
 Creating a \fBZFS\fR file system is a simple operation, so the number of file systems per system is likely to be numerous. To cope with this, \fBZFS\fR automatically manages mounting and unmounting file systems without the need to edit the \fB/etc/fstab\fR file. All automatically managed file systems are mounted by \fBZFS\fR at boot time.
 .sp
 .LP
 By default, file systems are mounted under \fB/\fIpath\fR\fR, where \fIpath\fR is the name of the file system in the \fBZFS\fR namespace. Directories are created and destroyed as needed.
 .sp
 .LP
 A file system can also have a mount point set in the \fBmountpoint\fR property. This directory is created as needed, and \fBZFS\fR automatically mounts the file system when the \fBzfs mount -a\fR command is invoked (without editing \fB/etc/fstab\fR). The \fBmountpoint\fR property can be inherited, so if \fBpool/home\fR has a mount point of \fB/export/stuff\fR, then \fBpool/home/user\fR automatically inherits a mount point of \fB/export/stuff/user\fR.
 .sp
 .LP
 A file system \fBmountpoint\fR property of \fBnone\fR prevents the file system from being mounted.
 .sp
 .LP
 If needed, \fBZFS\fR file systems can also be managed with traditional tools (\fBmount\fR, \fBumount\fR, \fB/etc/fstab\fR). If a file system's mount point is set to \fBlegacy\fR, \fBZFS\fR makes no attempt to manage the file system, and the administrator is responsible for mounting and unmounting the file system.
 .SS "Deduplication"
 .sp
 .LP
 Deduplication is the process for removing redundant data at the block-level, reducing the total amount of data stored. If a file system has the \fBdedup\fR property enabled, duplicate data blocks are removed synchronously.  The result is that only unique data is stored and common components are shared among files.
 .SS "Native Properties"
 .sp
 .LP
 Properties are divided into two types, native properties and user-defined (or "user") properties. Native properties either export internal statistics or control \fBZFS\fR behavior. In addition, native properties are either editable or read-only. User properties have no effect on \fBZFS\fR behavior, but you can use them to annotate datasets in a way that is meaningful in your environment. For more information about user properties, see the "User Properties" section, below.
 .sp
 .LP
 Every dataset has a set of properties that export statistics about the dataset as well as control various behaviors. Properties are inherited from the parent unless overridden by the child. Some properties apply only to certain types of datasets (file systems, volumes, or snapshots).
 .sp
 .LP
 The values of numeric properties can be specified using human-readable suffixes (for example, \fBk\fR, \fBKB\fR, \fBM\fR, \fBGb\fR, and so forth, up to \fBZ\fR for zettabyte). The following are all valid (and equal) specifications: 
 .sp
 .in +2
 .nf
 1536M, 1.5g, 1.50GB
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 The values of non-numeric properties are case sensitive and must be lowercase, except for \fBmountpoint\fR, \fBsharenfs\fR, and \fBsharesmb\fR.
 .sp
 .LP
 The following native properties consist of read-only statistics about the dataset. These properties can be neither set, nor inherited. Native properties apply to all dataset types unless otherwise noted.
 .sp
 .ne 2
 .mk
 .na
 \fB\fBavailable\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space available to the dataset and all its children, assuming that there is no other activity in the pool. Because space is shared within a pool, availability can be limited by any number of factors, including physical pool size, quotas, reservations, or other datasets within the pool.
 .sp
 This property can also be referred to by its shortened column name, \fBavail\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBcompressratio\fR\fR
 .ad
 .sp .6
 .RS 4n
 For non-snapshots, the compression ratio achieved for the \fBused\fR space of this dataset, expressed as a multiplier.  The \fBused\fR property includes descendant datasets, and, for clones, does not include the space shared with the origin snapshot.  For snapshots, the \fBcompressratio\fR is the same as the \fBrefcompressratio\fR property.  Compression can be turned on by running: \fBzfs set compression=on \fIdataset\fR\fR. The default value is \fBoff\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBcreation\fR\fR
 .ad
 .sp .6
 .RS 4n
 The time this dataset was created.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBclones\fR\fR
 .ad
 .sp .6
 .RS 4n
 For snapshots, this property is a comma-separated list of filesystems or
 volumes which are clones of this snapshot.  The clones' \fBorigin\fR property
 is this snapshot.  If the \fBclones\fR property is not empty, then this
 snapshot can not be destroyed (even with the \fB-r\fR or \fB-f\fR options).
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBdefer_destroy\fR\fR
 .ad
 .sp .6
 .RS 4n
 This property is \fBon\fR if the snapshot has been marked for deferred destruction by using the \fBzfs destroy\fR \fB-d\fR command. Otherwise, the property is \fBoff\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBmounted\fR\fR
 .ad
 .sp .6
 .RS 4n
 For file systems, indicates whether the file system is currently mounted. This property can be either \fByes\fR or \fBno\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBorigin\fR\fR
 .ad
 .sp .6
 .RS 4n
 For cloned file systems or volumes, the snapshot from which the clone was created. See also the \fBclones\fR property.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBreferenced\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of data that is accessible by this dataset, which may or may not be shared with other datasets in the pool. When a snapshot or clone is created, it initially references the same amount of space as the file system or snapshot it was created from, since its contents are identical.
 .sp
 This property can also be referred to by its shortened column name, \fBrefer\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBrefcompressratio\fR\fR
 .ad
 .sp .6
 .RS 4n
 The compression ratio achieved for the \fBreferenced\fR space of this
 dataset, expressed as a multiplier.  See also the \fBcompressratio\fR
 property.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBtype\fR\fR
 .ad
 .sp .6
 .RS 4n
 The type of dataset: \fBfilesystem\fR, \fBvolume\fR, or \fBsnapshot\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBused\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space consumed by this dataset and all its descendents. This is the value that is checked against this dataset's quota and reservation. The space used does not include this dataset's reservation, but does take into account the reservations of any descendent datasets. The amount of space that a dataset consumes from its parent, as well as the amount of space that are freed if this dataset is recursively destroyed, is the greater of its space used and its reservation.
 .sp
 When snapshots (see the "Snapshots" section) are created, their space is initially shared between the snapshot and the file system, and possibly with previous snapshots. As the file system changes, space that was previously shared becomes unique to the snapshot, and counted in the snapshot's space used. Additionally, deleting snapshots can increase the amount of space unique to (and used by) other snapshots.
 .sp
 The amount of space used, available, or referenced does not take into account pending changes. Pending changes are generally accounted for within a few seconds. Committing a change to a disk using \fBfsync\fR(2) or \fBO_SYNC\fR does not necessarily guarantee that the space usage information is updated immediately.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBusedby*\fR\fR
 .ad
 .sp .6
 .RS 4n
 The \fBusedby*\fR properties decompose the \fBused\fR properties into the various reasons that space is used. Specifically, \fBused\fR = \fBusedbychildren\fR + \fBusedbydataset\fR + \fBusedbyrefreservation\fR +, \fBusedbysnapshots\fR. These properties are only available for datasets created on \fBzpool\fR "version 13" pools.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBusedbychildren\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space used by children of this dataset, which would be freed if all the dataset's children were destroyed.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBusedbydataset\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space used by this dataset itself, which would be freed if the dataset were destroyed (after first removing any \fBrefreservation\fR and destroying any necessary snapshots or descendents).
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBusedbyrefreservation\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space used by a \fBrefreservation\fR set on this dataset, which would be freed if the \fBrefreservation\fR was removed.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBusedbysnapshots\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space consumed by snapshots of this dataset. In particular, it is the amount of space that would be freed if all of this dataset's snapshots were destroyed. Note that this is not simply the sum of the snapshots' \fBused\fR properties because space can be shared by multiple snapshots.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBuserused@\fR\fIuser\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space consumed by the specified user in this dataset. Space is charged to the owner of each file, as displayed by \fBls\fR \fB-l\fR. The amount of space charged is displayed by \fBdu\fR and \fBls\fR \fB-s\fR. See the \fBzfs userspace\fR subcommand for more information.
 .sp
 Unprivileged users can access only their own space usage. The root user, or a user who has been granted the \fBuserused\fR privilege with \fBzfs allow\fR, can access everyone's usage.
 .sp
 The \fBuserused@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fIPOSIX name\fR (for example, \fBjoe\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fIPOSIX numeric ID\fR (for example, \fB789\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fISID name\fR (for example, \fBjoe.smith@mydomain\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR)
 .RE
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBuserrefs\fR\fR
 .ad
 .sp .6
 .RS 4n
 This property is set to the number of user holds on this snapshot. User holds are set by using the \fBzfs hold\fR command.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBgroupused@\fR\fIgroup\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of space consumed by the specified group in this dataset. Space is charged to the group of each file, as displayed by \fBls\fR \fB-l\fR. See the \fBuserused@\fR\fIuser\fR property for more information.
 .sp
 Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBgroupused\fR privilege with \fBzfs allow\fR, can access all groups' usage.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBvolblocksize\fR=\fIblocksize\fR\fR
 .ad
 .sp .6
 .RS 4n
 For volumes, specifies the block size of the volume. The \fBblocksize\fR cannot be changed once the volume has been written, so it should be set at volume creation time. The default \fBblocksize\fR for volumes is 8 Kbytes. Any power of 2 from 512 bytes to 128 Kbytes is valid.
 .sp
 This property can also be referred to by its shortened column name, \fBvolblock\fR.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBwritten\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of \fBreferenced\fR space written to this dataset since the
 previous snapshot.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBwritten@\fR\fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 The amount of \fBreferenced\fR space written to this dataset since the
 specified snapshot.  This is the space that is referenced by this dataset
 but was not referenced by the specified snapshot.
 .sp
 The \fIsnapshot\fR may be specified as a short snapshot name (just the part
 after the \fB@\fR), in which case it will be interpreted as a snapshot in
 the same filesystem as this dataset.
 The \fIsnapshot\fR be a full snapshot name (\fIfilesystem\fR@\fIsnapshot\fR),
 which for clones may be a snapshot in the origin's filesystem (or the origin
 of the origin's filesystem, etc).
 .RE
 
 .sp
 .LP
 The following native properties can be used to change the behavior of a \fBZFS\fR dataset.
 .sp
 .ne 2
 .mk
 .na
 \fB\fBaclinherit\fR=\fBdiscard\fR | \fBnoallow\fR | \fBrestricted\fR | \fBpassthrough\fR | \fBpassthrough-x\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls how \fBACL\fR entries are inherited when files and directories are created. A file system with an \fBaclinherit\fR property of \fBdiscard\fR does not inherit any \fBACL\fR entries. A file system with an \fBaclinherit\fR property value of \fBnoallow\fR only inherits inheritable \fBACL\fR entries that specify "deny" permissions. The property value \fBrestricted\fR (the default) removes the \fBwrite_acl\fR and \fBwrite_owner\fR permissions when the \fBACL\fR entry is inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough\fR inherits all inheritable \fBACL\fR entries without any modifications made to the \fBACL\fR entries when they are inherited. A file system with an \fBaclinherit\fR property value of \fBpassthrough-x\fR has the same meaning as \fBpassthrough\fR, except that the \fBowner@\fR, \fBgroup@\fR, and \fBeveryone@\fR \fBACE\fRs inherit the execute permission only if the file creation mode also requests the execute bit.
 .sp
 When the property value is set to \fBpassthrough\fR, files are created with a mode determined by the inheritable \fBACE\fRs. If no inheritable \fBACE\fRs exist that affect the mode, then the mode is set in accordance to the requested mode from the application.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBaclmode\fR=\fBdiscard\fR | \fBgroupmask\fR | \fBpassthrough\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls how an \fBACL\fR is modified during \fBchmod\fR(2). A file system with an \fBaclmode\fR property of \fBdiscard\fR deletes all \fBACL\fR entries that do not represent the mode of the file. An \fBaclmode\fR property of \fBgroupmask\fR (the default) reduces user or group permissions. The permissions are reduced, such that they are no greater than the group permission bits, unless it is a user entry that has the same \fBUID\fR as the owner of the file or directory. In this case, the \fBACL\fR permissions are reduced so that they are no greater than owner permission bits. A file system with an \fBaclmode\fR property of \fBpassthrough\fR indicates that no changes are made to the \fBACL\fR other than generating the necessary \fBACL\fR entries to represent the new mode of the file or directory.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBatime\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the access time for files is updated when they are read. Turning this property off avoids producing write traffic when reading files and can result in significant performance gains, though it might confuse mailers and other similar utilities. The default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBcanmount\fR=\fBon\fR | \fBoff\fR | \fBnoauto\fR\fR
 .ad
 .sp .6
 .RS 4n
 If this property is set to \fBoff\fR, the file system cannot be mounted, and is ignored by \fBzfs mount -a\fR. Setting this property to \fBoff\fR is similar to setting the \fBmountpoint\fR property to \fBnone\fR, except that the dataset still has a normal \fBmountpoint\fR property, which can be inherited. Setting this property to \fBoff\fR allows datasets to be used solely as a mechanism to inherit properties. One example of setting \fBcanmount=\fR\fBoff\fR is to have two datasets with the same \fBmountpoint\fR, so that the children of both datasets appear in the same directory, but might have different inherited characteristics.
 .sp
 When the \fBnoauto\fR option is set, a dataset can only be mounted and unmounted explicitly. The dataset is not mounted automatically when the dataset is created or imported, nor is it mounted by the \fBzfs mount -a\fR command or unmounted by the \fBzfs unmount -a\fR command.
 .sp
 This property is not inherited.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2,\fR| \fBfletcher4\fR | \fBsha256\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice.
 .sp
 Changing this property affects only newly-written data.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fBcompression\fR=\fBon\fR | \fBoff\fR | \fBlzjb\fR | \fBgzip\fR | \fBgzip-\fR\fIN\fR | \fBzle\fR | \fBlz4\fR
 .ad
 .sp .6
 .RS 4n
 Controls the compression algorithm used for this dataset. The \fBlzjb\fR compression algorithm is optimized for performance while providing decent data compression. Setting compression to \fBon\fR uses the \fBlzjb\fR compression algorithm.
 .sp
 The \fBgzip\fR compression algorithm uses the same compression as the \fBgzip\fR(1) command. You can specify the \fBgzip\fR level by using the value \fBgzip-\fR\fIN\fR where \fIN\fR is an integer from 1 (fastest) to 9 (best compression ratio). Currently, \fBgzip\fR is equivalent to \fBgzip-6\fR (which is also the default for \fBgzip\fR(1)).
 .sp
 The \fBzle\fR (zero-length encoding) compression algorithm is a fast and simple algorithm to eliminate runs of zeroes.
 .sp
 The \fBlz4\fR compression algorithm is a high-performance replacement
 for the \fBlzjb\fR algorithm. It features significantly faster
 compression and decompression, as well as a moderately higher
 compression ratio than \fBlzjb\fR, but can only be used on pools with
 the \fBlz4_compress\fR feature set to \fIenabled\fR. See
 \fBzpool-features\fR(5) for details on ZFS feature flags and the
 \fBlz4_compress\fR feature.
 .sp
 This property can also be referred to by its shortened column name \fBcompress\fR. Changing this property affects only newly-written data.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBcopies\fR=\fB1\fR | \fB2\fR | \fB3\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls the number of copies of data stored for this dataset. These copies are in addition to any redundancy provided by the pool, for example, mirroring or RAID-Z. The copies are stored on different disks, if possible. The space used by multiple copies is charged to the associated file and dataset, changing the \fBused\fR property and counting against quotas and reservations.
 .sp
 Changing this property only affects newly-written data. Therefore, set this property at file system creation time by using the \fB-o\fR \fBcopies=\fR\fIN\fR option.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBdedup\fR=\fBon\fR | \fBoff\fR | \fBverify\fR | \fBsha256\fR[,\fBverify\fR]\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether deduplication is in effect for a dataset. The default value is \fBoff\fR. The default checksum used for deduplication is \fBsha256\fR (subject to change). When \fBdedup\fR is enabled, the \fBdedup\fR checksum algorithm overrides the \fBchecksum\fR property. Setting the value to \fBverify\fR is equivalent to specifying \fBsha256,verify\fR.
 .sp
 If the property is set to \fBverify\fR, then, whenever two blocks have the same signature, ZFS will do a byte-for-byte comparison with the existing block to ensure that the contents are identical.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBdevices\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether device nodes can be opened on this file system. The default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBexec\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether processes can be executed from within this file system. The default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBmlslabel\fR=\fIlabel\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 The \fBmlslabel\fR property is a sensitivity label that determines if a dataset  can be mounted in a zone on a system with Trusted Extensions enabled. If the labeled dataset matches the labeled zone, the dataset can be mounted  and accessed from the labeled zone.
 .sp
 When the \fBmlslabel\fR property is not set, the default value is \fBnone\fR. Setting the  \fBmlslabel\fR property to \fBnone\fR is equivalent to removing the property.
 .sp
 The \fBmlslabel\fR property can be modified only when Trusted Extensions is enabled and only with appropriate privilege. Rights to modify it cannot be delegated. When changing a label to a higher label or setting the initial dataset label, the \fB{PRIV_FILE_UPGRADE_SL}\fR privilege is required. When changing a label to a lower label or the default (\fBnone\fR), the \fB{PRIV_FILE_DOWNGRADE_SL}\fR privilege is required. Changing the dataset to labels other than the default can be done only when the dataset is not mounted. When a dataset with the default label is mounted into a labeled-zone, the mount operation automatically sets the \fBmlslabel\fR property to the label of that zone.
 .sp
 When Trusted Extensions is \fBnot\fR enabled, only datasets with the default label (\fBnone\fR) can be mounted.
 .sp
 Zones are a Solaris feature and are not relevant on Linux.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBmountpoint\fR=\fIpath\fR | \fBnone\fR | \fBlegacy\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls the mount point used for this file system. See the "Mount Points" section for more information on how this property is used. 
 .sp
 When the \fBmountpoint\fR property is changed for a file system, the file system and any children that inherit the mount point are unmounted. If the new value is \fBlegacy\fR, then they remain unmounted. Otherwise, they are automatically remounted in the new location if the property was previously \fBlegacy\fR or \fBnone\fR, or if they were mounted before the property was changed. In addition, any shared file systems are unshared and shared in the new location.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBnbmand\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the file system should be mounted with \fBnbmand\fR (Non Blocking mandatory locks). This is used for \fBCIFS\fR clients. Changes to this property only take effect when the file system is umounted and remounted. See \fBmount\fR(8) for more information on \fBnbmand\fR mounts.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBprimarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls what is cached in the primary cache (ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBquota\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 Limits the amount of space a dataset and its descendents can consume. This property enforces a hard limit on the amount of space used. This includes all space consumed by descendents, including file systems and snapshots. Setting a quota on a descendent of a dataset that already has a quota does not override the ancestor's quota, but rather imposes an additional limit.
 .sp
 Quotas cannot be set on volumes, as the \fBvolsize\fR property acts as an implicit quota.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBuserquota@\fR\fIuser\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 Limits the amount of space consumed by the specified user. Similar to the \fBrefquota\fR property, the \fBuserquota\fR space calculation does not include space that is used by descendent datasets, such as snapshots and clones. User space consumption is identified by the \fBuserspace@\fR\fIuser\fR property.
 .sp
 Enforcement of user quotas may be delayed by several seconds. This delay means that a user might exceed their quota before the system notices that they are over quota and begins to refuse additional writes with the \fBEDQUOT\fR error message . See the \fBzfs userspace\fR subcommand for more information.
 .sp
 Unprivileged users can only access their own groups' space usage. The root user, or a user who has been granted the \fBuserquota\fR privilege with \fBzfs allow\fR, can get and set everyone's quota.
 .sp
 This property is not available on volumes, on file systems before version 4, or on pools before version 15. The \fBuserquota@\fR... properties are not displayed by \fBzfs get all\fR. The user's name must be appended after the \fB@\fR symbol, using one of the following forms:
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fIPOSIX name\fR (for example, \fBjoe\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fIPOSIX numeric ID\fR (for example, \fB789\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fISID name\fR (for example, \fBjoe.smith@mydomain\fR)
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 \fISID numeric ID\fR (for example, \fBS-1-123-456-789\fR)
 .RE
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBgroupquota@\fR\fIgroup\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 Limits the amount of space consumed by the specified group. Group space consumption is identified by the \fBuserquota@\fR\fIuser\fR property.
 .sp
 Unprivileged users can access only their own groups' space usage. The root user, or a user who has been granted the \fBgroupquota\fR privilege with \fBzfs allow\fR, can get and set all groups' quotas.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBreadonly\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether this dataset can be modified. The default value is \fBoff\fR.
 .sp
 This property can also be referred to by its shortened column name, \fBrdonly\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBrecordsize\fR=\fIsize\fR\fR
 .ad
 .sp .6
 .RS 4n
 Specifies a suggested block size for files in the file system. This property is designed solely for use with database workloads that access files in fixed-size records. \fBZFS\fR automatically tunes block sizes according to internal algorithms optimized for typical access patterns. 
 .sp
 For databases that create very large files but access them in small random chunks, these algorithms may be suboptimal. Specifying a \fBrecordsize\fR greater than or equal to the record size of the database can result in significant performance gains. Use of this property for general purpose file systems is strongly discouraged, and may adversely affect performance.
 .sp
 The size specified must be a power of two greater than or equal to 512 and less than or equal to 128 Kbytes.
 .sp
 Changing the file system's \fBrecordsize\fR affects only files created afterward; existing files are unaffected.
 .sp
 This property can also be referred to by its shortened column name, \fBrecsize\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBrefquota\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 Limits the amount of space a dataset can consume. This property enforces a hard limit on the amount of space used. This hard limit does not include space used by descendents, including file systems and snapshots.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBrefreservation\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 The minimum amount of space guaranteed to a dataset, not including its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by \fBrefreservation\fR. The \fBrefreservation\fR reservation is accounted for in the parent datasets' space used, and counts against the parent datasets' quotas and reservations.
 .sp
 If \fBrefreservation\fR is set, a snapshot is only allowed if there is enough free pool space outside of this reservation to accommodate the current number of "referenced" bytes in the dataset.
 .sp
 This property can also be referred to by its shortened column name, \fBrefreserv\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBreservation\fR=\fIsize\fR | \fBnone\fR\fR
 .ad
 .sp .6
 .RS 4n
 The minimum amount of space guaranteed to a dataset and its descendents. When the amount of space used is below this value, the dataset is treated as if it were taking up the amount of space specified by its reservation. Reservations are accounted for in the parent datasets' space used, and count against the parent datasets' quotas and reservations.
 .sp
 This property can also be referred to by its shortened column name, \fBreserv\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBsecondarycache\fR=\fBall\fR | \fBnone\fR | \fBmetadata\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls what is cached in the secondary cache (L2ARC). If this property is set to \fBall\fR, then both user data and metadata is cached. If this property is set to \fBnone\fR, then neither user data nor metadata is cached. If this property is set to \fBmetadata\fR, then only metadata is cached. The default value is \fBall\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBsetuid\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the set-\fBUID\fR bit is respected for the file system. The default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBshareiscsi\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Like the \fBsharenfs\fR property, \fBshareiscsi\fR indicates whether a \fBZFS\fR volume is exported as an \fBiSCSI\fR target. The acceptable values for this property are \fBon\fR, \fBoff\fR, and \fBtype=disk\fR. The default value is \fBoff\fR. In the future, other target types might be supported. For example, \fBtape\fR.
 .sp
 You might want to set \fBshareiscsi=on\fR for a file system so that all \fBZFS\fR volumes within the file system are shared by default. However, setting this property on a file system has no direct effect.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBsharesmb\fR=\fBon\fR | \fBoff\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the file system is shared by using \fBSamba USERSHARES\fR, and what options are to be used. Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBnet\fR(8) command is invoked to create a \fBUSERSHARE\fR.
 .sp
 Because \fBSMB\fR shares requires a resource name, a unique resource name is constructed from the dataset name. The constructed name is a copy of the dataset name except that the characters in the dataset name, which would be illegal in the resource name, are replaced with underscore (\fB_\fR) characters. The ZFS On Linux driver does not (yet) support additional options which might be availible in the Solaris version.
 .sp
 If the \fBsharesmb\fR property is set to \fBoff\fR, the file systems are unshared.
 .sp
 In Linux, the share is created with the ACL (Access Control List) "Everyone:F" ("F" stands for "full permissions", ie. read and write permissions) and no guest access (which means samba must be able to authenticate a real user, system passwd/shadow, ldap or smbpasswd based) by default. This means that any additional access control (dissalow specific user specific access etc) must be done on the underlaying filesystem.
 .sp
 .in +2
 Example to mount a SMB filesystem shared through ZFS (share/tmp):
 .mk
 Note that a user and his/her password \fBmust\fR be given!
 .sp
 .in +2
 smbmount //127.0.0.1/share_tmp /mnt/tmp -o user=workgroup/turbo,password=obrut,uid=1000
 .in -2
 .in -2
 .sp
 .ne 2
 .mk
 .na
 \fBMinimal /etc/samba/smb.conf configuration\fR
 .sp
 .in +2
 * Samba will need to listen to 'localhost' (127.0.0.1) for the zfs utilities to communitate with samba.  This is the default behavior for most Linux distributions.
 .sp
 * Samba must be able to authenticate a user. This can be done in a number of ways, depending on if using the system password file, LDAP or the Samba specific smbpasswd file. How to do this is outside the scope of this manual. Please refer to the smb.conf(5) manpage for more information.
 .sp
 * See the \fBUSERSHARE\fR section of the \fBsmb.conf\fR(5) man page for all configuration options in case you need to modify any options to the share afterwards. Do note that any changes done with the 'net' command will be undone if the share is every unshared (such as at a reboot etc). In the future, ZoL will be able to set specific options directly using sharesmb=<option>.
 .sp
 .in -2
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBsharenfs\fR=\fBon\fR | \fBoff\fR | \fIopts\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the file system is shared via \fBNFS\fR, and what options are used. A file system with a \fBsharenfs\fR property of \fBoff\fR is managed through traditional tools such as \fBshare\fR(1M), \fBunshare\fR(1M), and \fBdfstab\fR(4). Otherwise, the file system is automatically shared and unshared with the \fBzfs share\fR and \fBzfs unshare\fR commands. If the property is set to \fBon\fR, the \fBshare\fR(1M) command is invoked with no options. Otherwise, the \fBshare\fR(1M) command is invoked with options equivalent to the contents of this property.
 .sp
 When the \fBsharenfs\fR property is changed for a dataset, the dataset and any children inheriting the property are re-shared with the new options, only if the property was previously \fBoff\fR, or if they were shared before the property was changed. If the new property is \fBoff\fR, the file systems are unshared.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBlogbias\fR = \fBlatency\fR | \fBthroughput\fR\fR
 .ad
 .sp .6
 .RS 4n
 Provide a hint to ZFS about handling of synchronous requests in this dataset. If \fBlogbias\fR is set to \fBlatency\fR (the default), ZFS will use pool log devices (if configured) to handle the requests at low latency. If \fBlogbias\fR is set to \fBthroughput\fR, ZFS will not use configured pool log devices. ZFS will instead optimize synchronous operations for global pool throughput and efficient use of resources.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBsnapdev\fR=\fBhidden\fR | \fBvisible\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the snapshots devices of zvol's are hidden or visible. The default value is \fBhidden\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBsnapdir\fR=\fBhidden\fR | \fBvisible\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the \fB\&.zfs\fR directory is hidden or visible in the root of the file system as discussed in the "Snapshots" section. The default value is \fBhidden\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBsync\fR=\fBstandard\fR | \fBalways\fR | \fBdisabled\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls the behavior of synchronous requests (e.g. fsync, O_DSYNC).
 \fBstandard\fR is the POSIX specified behavior of ensuring all synchronous
 requests are written to stable storage and all devices are flushed to ensure
 data is not cached by device controllers (this is the default). \fBalways\fR
 causes every file system transaction to be written and flushed before its
 system call returns. This has a large performance penalty. \fBdisabled\fR
 disables synchronous requests. File system transactions are only committed to
 stable storage periodically. This option will give the highest performance.
 However, it is very dangerous as ZFS would be ignoring the synchronous
 transaction demands of applications such as databases or NFS.  Administrators
 should only use this option when the risks are understood.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fBversion\fR=\fB1\fR | \fB2\fR | \fBcurrent\fR\fR
 .ad
 .sp .6
 .RS 4n
 The on-disk version of this file system, which is independent of the pool version. This property can only be set to later supported versions. See the \fBzfs upgrade\fR command.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBvolsize\fR=\fIsize\fR\fR
 .ad
 .sp .6
 .RS 4n
 For volumes, specifies the logical size of the volume. By default, creating a volume establishes a reservation of equal size. For storage pools with a version number of 9 or higher, a \fBrefreservation\fR is set instead. Any changes to \fBvolsize\fR are reflected in an equivalent change to the reservation (or \fBrefreservation\fR). The \fBvolsize\fR can only be set to a multiple of \fBvolblocksize\fR, and cannot be zero.
 .sp
 The reservation is kept equal to the volume's logical size to prevent unexpected behavior for consumers. Without the reservation, the volume could run out of space, resulting in undefined behavior or data corruption, depending on how the volume is used. These effects can also occur when the volume size is changed while it is in use (particularly when shrinking the size). Extreme care should be used when adjusting the volume size.
 .sp
 Though not recommended, a "sparse volume" (also known as "thin provisioning") can be created by specifying the \fB-s\fR option to the \fBzfs create -V\fR command, or by changing the reservation after the volume has been created. A "sparse volume" is a volume where the reservation is less then the volume size. Consequently, writes to a sparse volume can fail with \fBENOSPC\fR when the pool is low on space. For a sparse volume, changes to \fBvolsize\fR are not reflected in the reservation.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBvscan\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether regular files should be scanned for viruses when a file is opened and closed. In addition to enabling this property, the virus scan service must also be enabled for virus scanning to occur. The default value is \fBoff\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBxattr\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether extended attributes are enabled for this file system. The default value is \fBon\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzoned\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Controls whether the dataset is managed from a non-global zone. Zones are a Solaris feature and are not relevant on Linux. The default value is \fBoff\fR.
 .RE
 
 .sp
 .LP
 The following three properties cannot be changed after the file system is created, and therefore, should be set when the file system is created. If the properties are not set with the \fBzfs create\fR or \fBzpool create\fR commands, these properties are inherited from the parent dataset. If the parent dataset lacks these properties due to having been created prior to these features being supported, the new file system will have the default values for these properties.
 .sp
 .ne 2
 .mk
 .na
 \fB\fBcasesensitivity\fR=\fBsensitive\fR | \fBinsensitive\fR | \fBmixed\fR\fR
 .ad
 .sp .6
 .RS 4n
 Indicates whether the file name matching algorithm used by the file system should be case-sensitive, case-insensitive, or allow a combination of both styles of matching. The default value for the \fBcasesensitivity\fR property is \fBsensitive\fR. Traditionally, UNIX and POSIX file systems have case-sensitive file names.
 .sp
 The \fBmixed\fR value for the \fBcasesensitivity\fR property indicates that the file system can support requests for both case-sensitive and case-insensitive matching behavior. Currently, case-insensitive matching behavior on a file system that supports mixed behavior is limited to the Solaris CIFS server product. For more information about the \fBmixed\fR value behavior, see the \fISolaris ZFS Administration Guide\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBnormalization\fR = \fBnone\fR | \fBformC\fR | \fBformD\fR | \fBformKC\fR | \fBformKD\fR\fR
 .ad
 .sp .6
 .RS 4n
 Indicates whether the file system should perform a \fBunicode\fR normalization of file names whenever two file names are compared, and which normalization algorithm should be used. File names are always stored unmodified, names are normalized as part of any comparison process. If this property is set to a legal value other than \fBnone\fR, and the \fButf8only\fR property was left unspecified, the \fButf8only\fR property is automatically set to \fBon\fR. The default value of the \fBnormalization\fR property is \fBnone\fR. This property cannot be changed after the file system is created.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fButf8only\fR=\fBon\fR | \fBoff\fR\fR
 .ad
 .sp .6
 .RS 4n
 Indicates whether the file system should reject file names that include characters that are not present in the \fBUTF-8\fR character code set. If this property is explicitly set to \fBoff\fR, the normalization property must either not be explicitly set or be set to \fBnone\fR. The default value for the \fButf8only\fR property is \fBoff\fR. This property cannot be changed after the file system is created.
 .RE
 
 .sp
 .LP
 The \fBcasesensitivity\fR, \fBnormalization\fR, and \fButf8only\fR properties are also new permissions that can be assigned to non-privileged users by using the \fBZFS\fR delegated administration feature.
 .SS "Temporary Mount Point Properties"
 .sp
 .LP
 When a file system is mounted, either through \fBmount\fR(8) for legacy mounts or the \fBzfs mount\fR command for normal file systems, its mount options are set according to its properties. The correlation between properties and mount options is as follows:
 .sp
 .in +2
 .nf
     PROPERTY                MOUNT OPTION
      devices                 devices/nodevices
      exec                    exec/noexec
      readonly                ro/rw
      setuid                  setuid/nosetuid
      xattr                   xattr/noxattr
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 In addition, these options can be set on a per-mount basis using the \fB-o\fR option, without affecting the property that is stored on disk. The values specified on the command line override the values stored in the dataset. The \fB-nosuid\fR option is an alias for \fBnodevices,nosetuid\fR. These properties are reported as "temporary" by the \fBzfs get\fR command. If the properties are changed while the dataset is mounted, the new setting overrides any temporary settings.
 .SS "User Properties"
 .sp
 .LP
 In addition to the standard native properties, \fBZFS\fR supports arbitrary user properties. User properties have no effect on \fBZFS\fR behavior, but applications or administrators can use them to annotate datasets (file systems, volumes, and snapshots).
 .sp
 .LP
 User property names must contain a colon (\fB:\fR) character to distinguish them from native properties. They may contain lowercase letters, numbers, and the following punctuation characters: colon (\fB:\fR), dash (\fB-\fR), period (\fB\&.\fR), and underscore (\fB_\fR). The expected convention is that the property name is divided into two portions such as \fImodule\fR\fB:\fR\fIproperty\fR, but this namespace is not enforced by \fBZFS\fR. User property names can be at most 256 characters, and cannot begin with a dash (\fB-\fR).
 .sp
 .LP
 When making programmatic use of user properties, it is strongly suggested to use a reversed \fBDNS\fR domain name for the \fImodule\fR component of property names to reduce the chance that two independently-developed packages use the same property name for different purposes. For example, property names beginning with \fBcom.sun\fR. are reserved for use by Oracle Corporation (which acquired Sun Microsystems).
 .sp
 .LP
 The values of user properties are arbitrary strings, are always inherited, and are never validated. All of the commands that operate on properties (\fBzfs list\fR, \fBzfs get\fR, \fBzfs set\fR, and so forth) can be used to manipulate both native properties and user properties. Use the \fBzfs inherit\fR command to clear a user property . If the property is not defined in any parent dataset, it is removed entirely. Property values are limited to 1024 characters.
 .SS "ZFS Volumes as Swap"
 .sp
 .LP
 \fBZFS\fR volumes may be used as Linux swap devices.  After creating the volume
 with the \fBzfs create\fR command set up and enable the swap area using the
 \fBmkswap\fR(8) and \fBswapon\fR(8) commands.  Do not swap to a file on a
 \fBZFS\fR file system. A \fBZFS\fR swap file configuration is not supported.
 .SH SUBCOMMANDS
 .sp
 .LP
 All subcommands that modify state are logged persistently to the pool in their original form.
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs ?\fR\fR
 .ad
 .sp .6
 .RS 4n
 Displays a help message.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs create\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a new \fBZFS\fR file system. The file system is automatically mounted according to the \fBmountpoint\fR property inherited from the parent.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If the target filesystem already exists, the operation completes successfully.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets the specified property as if the command \fBzfs set\fR \fIproperty\fR=\fIvalue\fR was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs create\fR [\fB-ps\fR] [\fB-b\fR \fIblocksize\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fB-V\fR \fIsize\fR \fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a volume of the given size. The volume is exported as a block device in \fB/dev/zvol/\fR\fIpath\fR, where \fIpath\fR is the name of the volume in the \fBZFS\fR namespace. The size represents the logical size as exported by the device. By default, a reservation of equal size is created.
 .sp
 \fIsize\fR is automatically rounded up to the nearest 128 Kbytes to ensure that the volume has an integral number of blocks regardless of \fIblocksize\fR.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. Any property specified on the command line using the \fB-o\fR option is ignored. If the target filesystem already exists, the operation completes successfully.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-s\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a sparse volume with no reservation. See \fBvolsize\fR in the Native Properties section for more information about sparse volumes.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets the specified property as if the \fBzfs set\fR \fIproperty\fR=\fIvalue\fR command was invoked at the same time the dataset was created. Any editable \fBZFS\fR property can also be set at creation time. Multiple \fB-o\fR options can be specified. An error results if the same property is specified in multiple \fB-o\fR options.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-b\fR \fIblocksize\fR\fR
 .ad
 .sp .6
 .RS 4n
 Equivalent to \fB-o\fR \fBvolblocksize\fR=\fIblocksize\fR. If this option is specified in conjunction with \fB-o\fR \fBvolblocksize\fR, the resulting behavior is undefined.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fBzfs destroy\fR [\fB-fnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR
 .ad
 .sp .6
 .RS 4n
 Destroys the given dataset. By default, the command unshares any file systems that are currently shared, unmounts any file systems that are currently mounted, and refuses to destroy a dataset that has active dependents (children or clones).
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy all children.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-R\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy all dependents, including cloned file systems outside the target hierarchy.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-f\fR\fR
 .ad
 .sp .6
 .RS 4n
 Force an unmount of any file systems using the \fBunmount -f\fR command. This option has no effect on non-file systems or unmounted file systems.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do a dry-run ("No-op") deletion.  No data will be deleted.  This is
 useful in conjunction with the \fB-v\fR or \fB-p\fR flags to determine what
 data would be deleted.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print machine-parsable verbose information about the deleted data.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print verbose information about the deleted data.
 .RE
 .sp
 
 Extreme care should be taken when applying either the \fB-r\fR or the \fB-R\fR options, as they can destroy large portions of a pool and cause unexpected behavior for mounted file systems in use.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fBzfs destroy\fR [\fB-dnpRrv\fR] \fIfilesystem\fR|\fIvolume\fR@\fIsnap\fR[%\fIsnap\fR][,...]
 .ad
 .sp .6
 .RS 4n
 The given snapshots are destroyed immediately if and only if the \fBzfs destroy\fR command without the \fB-d\fR option would have destroyed it. Such immediate destruction would occur, for example, if the snapshot had no clones and the user-initiated reference count were zero.
 .sp
 If a snapshot does not qualify for immediate destruction, it is marked for deferred destruction. In this state, it exists as a usable, visible snapshot until both of the preconditions listed above are met, at which point it is destroyed.
 .sp
 An inclusive range of snapshots may be specified by separating the
 first and last snapshots with a percent sign.
 The first and/or last snapshots may be left blank, in which case the
 filesystem's oldest or newest snapshot will be implied.
 .sp
 Multiple snapshots
 (or ranges of snapshots) of the same filesystem or volume may be specified
 in a comma-separated list of snapshots.
 Only the snapshot's short name (the
 part after the \fB@\fR) should be specified when using a range or
 comma-separated list to identify multiple snapshots.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-d\fR\fR
 .ad
 .sp .6
 .RS 4n
 Defer snapshot deletion.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Destroy (or mark for deferred destruction) all snapshots with this name in descendent file systems.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-R\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy all dependents.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do a dry-run ("No-op") deletion.  No data will be deleted.  This is
 useful in conjunction with the \fB-v\fR or \fB-p\fR flags to determine what
 data would be deleted.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print machine-parsable verbose information about the deleted data.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print verbose information about the deleted data.
 .RE
 
 .sp
 Extreme care should be taken when applying either the \fB-r\fR or the \fB-f\fR
 options, as they can destroy large portions of a pool and cause unexpected
 behavior for mounted file systems in use.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
-\fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR
+\fB\fBzfs snapshot\fR [\fB-r\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIfilesystem@snapname\fR|\fIvolume@snapname\fR\fR ...
 .ad
 .sp .6
 .RS 4n
-Creates a snapshot with the given name. All previous modifications by successful system calls to the file system are part of the snapshot. See the "Snapshots" section for details.
+Creates snapshots with the given names. All previous modifications by successful system calls to the file system are part of the snapshots. Snapshots are taken atomically, so that all snapshots correspond to the same moment in time. See the "Snapshots" section for details.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
-Recursively create snapshots of all descendent datasets. Snapshots are taken atomically, so that all recursive snapshots correspond to the same moment in time.
+Recursively create snapshots of all descendent datasets.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets the specified property; see \fBzfs create\fR for details.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs rollback\fR [\fB-rRf\fR] \fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 Roll back the given dataset to a previous snapshot. When a dataset is rolled back, all data that has changed since the snapshot is discarded, and the dataset reverts to the state at the time of the snapshot. By default, the command refuses to roll back to a snapshot other than the most recent one. In order to do so, all intermediate snapshots must be destroyed by specifying the \fB-r\fR option.
 .sp
 The \fB-rR\fR options do not recursively destroy the child snapshots of a recursive snapshot. Only the top-level recursive snapshot is destroyed by either of these options. To completely roll back a recursive snapshot, you must rollback the individual child snapshots.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy any snapshots more recent than the one specified.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-R\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively destroy any more recent snapshots, as well as any clones of those snapshots.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-f\fR\fR
 .ad
 .sp .6
 .RS 4n
 Used with the \fB-R\fR option to force an unmount of any clone file systems that are to be destroyed.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs clone\fR [\fB-p\fR] [\fB-o\fR \fIproperty\fR=\fIvalue\fR] ... \fIsnapshot\fR \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a clone of the given snapshot. See the "Clones" section for details. The target dataset can be located anywhere in the \fBZFS\fR hierarchy, and is created as the same type as the original.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates all the non-existing parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent. If the target filesystem or volume already exists, the operation completes successfully.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-o\fR \fIproperty\fR=\fIvalue\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets the specified property; see \fBzfs create\fR for details.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs promote\fR \fIclone-filesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Promotes a clone file system to no longer be dependent on its "origin" snapshot. This makes it possible to destroy the file system that the clone was created from. The clone parent-child dependency relationship is reversed, so that the origin file system becomes a clone of the specified file system. 
 .sp
 The snapshot that was cloned, and any snapshots previous to this snapshot, are now owned by the promoted clone. The space they use moves from the origin file system to the promoted clone, so enough space must be available to accommodate these snapshots. No new space is consumed by this operation, but the space accounting is adjusted. The promoted clone must not have any conflicting snapshot names of its own. The \fBrename\fR subcommand can be used to rename any conflicting snapshots.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs rename\fR [\fB-f\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
 .ad
 .br
 .na
 \fB\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs rename\fR [\fB-fp\fR] \fIfilesystem\fR|\fIvolume\fR \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Renames the given dataset. The new target can be located anywhere in the \fBZFS\fR hierarchy, with the exception of snapshots. Snapshots can only be renamed within the parent file system or volume. When renaming a snapshot, the parent file system of the snapshot does not need to be specified as part of the second argument. Renamed file systems can inherit new mount points, in which case they are unmounted and remounted at the new mount point.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates all the nonexistent parent datasets. Datasets created in this manner are automatically mounted according to the \fBmountpoint\fR property inherited from their parent.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-f\fR\fR
 .ad
 .sp .6
 .RS 4n
 Force unmount any filesystems that need to be unmounted in the process.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs rename\fR \fB-r\fR \fIsnapshot\fR \fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively rename the snapshots of all descendent datasets. Snapshots are the only dataset that can be renamed recursively.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs\fR \fBlist\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-H\fR] [\fB-o\fR \fIproperty\fR[,\fI\&...\fR]] [ \fB-t\fR \fItype\fR[,\fI\&...\fR]] [ \fB-s\fR \fIproperty\fR ] ... [ \fB-S\fR \fIproperty\fR ] ... [\fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR|\fIsnap\fR] ...\fR
 .ad
 .sp .6
 .RS 4n
 Lists the property information for the given datasets in tabular form. If specified, you can list property information by the absolute pathname or the relative pathname. By default, all file systems and volumes are displayed. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR (the default is \fBoff\fR) . The following fields are displayed, \fBname,used,available,referenced,mountpoint\fR.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-H\fR\fR
 .ad
 .sp .6
 .RS 4n
 Used for scripting mode. Do not print headers and separate fields by a single tab instead of arbitrary white space.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively display any children of the dataset on the command line. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-d\fR \fIdepth\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively display any children of the dataset, limiting the recursion to \fIdepth\fR. A depth of \fB1\fR will display only the dataset and its direct children.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-o\fR \fIproperty\fR\fR
 .ad
 .sp .6
 .RS 4n
 A comma-separated list of properties to display. The property must be:
 .RS +4
 .TP
 .ie t \(bu
 .el o
 One of the properties described in the "Native Properties" section
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 A user property
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 The value \fBname\fR to display the dataset name
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 The value \fBspace\fR to display space usage properties on file systems and volumes. This is a shortcut for specifying \fB-o name,avail,used,usedsnap,usedds,usedrefreserv,usedchild\fR \fB-t filesystem,volume\fR syntax.
 .RE
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-s\fR \fIproperty\fR\fR
 .ad
 .sp .6
 .RS 4n
 A property for sorting the output by column in ascending order based on the value of the property. The property must be one of the properties described in the "Properties" section, or the special value \fBname\fR to sort by the dataset name. Multiple properties can be specified at one time using multiple \fB-s\fR property options. Multiple \fB-s\fR options are evaluated from left to right in decreasing order of importance.
 .sp
 The following is a list of sorting criteria:
 .RS +4
 .TP
 .ie t \(bu
 .el o
 Numeric types sort in numeric order.
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 String types sort in alphabetical order.
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 Types inappropriate for a row sort that row to the literal bottom, regardless of the specified ordering.
 .RE
 .RS +4
 .TP
 .ie t \(bu
 .el o
 If no sorting options are specified the existing behavior of \fBzfs list\fR is preserved.
 .RE
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-S\fR \fIproperty\fR\fR
 .ad
 .sp .6
 .RS 4n
 Same as the \fB-s\fR option, but sorts by property in descending order. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-t\fR \fItype\fR\fR
 .ad
 .sp .6
 .RS 4n
 A comma-separated list of types to display, where \fItype\fR is one of \fBfilesystem\fR, \fBsnapshot\fR , \fBvolume\fR, or \fBall\fR. For example, specifying \fB-t snapshot\fR displays only snapshots.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs set\fR \fIproperty\fR=\fIvalue\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
 .ad
 .sp .6
 .RS 4n
 Sets the property to the given value for each dataset. Only some properties can be edited. See the "Properties" section for more information on what properties can be set and acceptable values. Numeric values can be specified as exact values, or in a human-readable form with a suffix of \fBB\fR, \fBK\fR, \fBM\fR, \fBG\fR, \fBT\fR, \fBP\fR, \fBE\fR, \fBZ\fR (for bytes, kilobytes, megabytes, gigabytes, terabytes, petabytes, exabytes, or zettabytes, respectively). User properties can be set on snapshots. For more information, see the "User Properties" section.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs get\fR [\fB-r\fR|\fB-d\fR \fIdepth\fR] [\fB-Hp\fR] [\fB-o\fR \fIfield\fR[,...] [\fB-t\fR \fItype\fR[,...]] [\fB-s\fR \fIsource\fR[,...] "\fIall\fR" | \fIproperty\fR[,...] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
 .ad
 .sp .6
 .RS 4n
 Displays properties for the given datasets. If no datasets are specified, then the command displays properties for all datasets on the system. For each property, the following columns are displayed:
 .sp
 .in +2
 .nf
     name      Dataset name
      property  Property name
      value     Property value
      source    Property source. Can either be local, default,
                temporary, inherited, or none (-).
 .fi
 .in -2
 .sp
 
 All columns are displayed by default, though this can be controlled by using the \fB-o\fR option. This command takes a comma-separated list of properties as described in the "Native Properties" and "User Properties" sections.
 .sp
 The special value \fBall\fR can be used to display all properties that apply to the given dataset's type (filesystem, volume, or snapshot).
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively display properties for any children.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-d\fR \fIdepth\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively display any children of the dataset, limiting the recursion to \fIdepth\fR. A depth of \fB1\fR will display only the dataset and its direct children.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-H\fR\fR
 .ad
 .sp .6
 .RS 4n
 Display output in a form more easily parsed by scripts. Any headers are omitted, and fields are explicitly separated by a single tab instead of an arbitrary amount of space.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-o\fR \fIfield\fR\fR
 .ad
 .sp .6
 .RS 4n
 A comma-separated list of columns to display. \fBname,property,value,source\fR is the default value. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-s\fR \fIsource\fR\fR
 .ad
 .sp .6
 .RS 4n
 A comma-separated list of sources to display. Those properties coming from a source other than those in this list are ignored. Each source must be one of the following: \fBlocal,default,inherited,temporary,none\fR. The default value is all sources.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Display numbers in parseable (exact) values.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs inherit\fR [\fB-r\fR] \fIproperty\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR ...\fR
 .ad
 .sp .6
 .RS 4n
 Clears the specified property, causing it to be inherited from an ancestor. If no ancestor has the property set, then the default value is used. See the "Properties" section for a listing of default values, and details on which properties can be inherited.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively inherit the given property for all children.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs upgrade\fR [\fB-v\fR]\fR
 .ad
 .sp .6
 .RS 4n
 Displays a list of file systems that are not the most recent version.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs upgrade\fR [\fB-r\fR] [\fB-V\fR \fIversion\fR] [\fB-a\fR | \fIfilesystem\fR]\fR
 .ad
 .sp .6
 .RS 4n
 Upgrades file systems to a new on-disk version. Once this is done, the file systems will no longer be accessible on systems running older versions of the software. \fBzfs send\fR streams generated from new snapshots of these file systems cannot be accessed on systems running older versions of the software.
 .sp
 In general, the file system version is independent of the pool version. See \fBzpool\fR(8) for information on the \fBzpool upgrade\fR command. 
 .sp
 In some cases, the file system version and the pool version are interrelated and the pool version must be upgraded before the file system version can be upgraded.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Upgrade all file systems on all imported pools.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Upgrade the specified file system. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Upgrade the specified file system and all descendent file systems 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-V\fR \fIversion\fR\fR
 .ad
 .sp .6
 .RS 4n
 Upgrade to the specified \fIversion\fR. If the \fB-V\fR flag is not specified, this command upgrades to the most recent version. This option can only be used to increase the version number, and only up to the most recent version supported by this software.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fBzfs\fR \fBuserspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,...]]
 [\fB-s\fR \fIfield\fR] ...
 [\fB-S\fR \fIfield\fR] ...
 [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR
 .ad
 .sp .6
 .RS 4n
 Displays space consumed by, and quotas on, each user in the specified
 filesystem or snapshot. This corresponds to the \fBuserused@\fR\fIuser\fR and
 \fBuserquota@\fR\fIuser\fR properties.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print numeric ID instead of user/group name.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-H\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do not print headers, use tab-delimited output.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Use exact (parsable) numeric output.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-o\fR \fIfield\fR[,...]\fR
 .ad
 .sp .6
 .RS 4n
 Display only the specified fields from the following
 set: \fBtype, name, used, quota\fR. The default is to display all fields.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-s\fR \fIfield\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sort output by this field. The \fIs\fR and \fIS\fR flags may be specified
 multiple times to sort first by one field, then by another. The default is
 \fB-s type\fR \fB-s name\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-S\fR \fIfield\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sort by this field in reverse order. See \fB-s\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-t\fR \fItype\fR[,...]\fR
 .ad
 .sp .6
 .RS 4n
 Print only the specified types from the following
 set: \fBall, posixuser, smbuser, posixgroup, smbgroup\fR. The default
 is \fB-t posixuser,smbuser\fR. The default can be changed to include group
 types.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-i\fR\fR
 .ad
 .sp .6
 .RS 4n
 Translate SID to POSIX ID. The POSIX ID may be ephemeral if no mapping exists.
 Normal POSIX interfaces (for example, \fBstat\fR(2), \fBls\fR \fB-l\fR) perform
 this translation, so the \fB-i\fR option allows the output from \fBzfs
 userspace\fR to be compared directly with those utilities. However, \fB-i\fR
 may lead to confusion if some files were created by an SMB user before a
 SMB-to-POSIX name mapping was established. In such a case, some files will be owned
 by the SMB entity and some by the POSIX entity. However, the \fB-i\fR option
 will report that the POSIX entity has the total usage and quota for both.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fBzfs\fR \fBgroupspace\fR [\fB-Hinp\fR] [\fB-o\fR \fIfield\fR[,...]]
 [\fB-s\fR \fIfield\fR] ...
 [\fB-S\fR \fIfield\fR] ...
 [\fB-t\fR \fItype\fR[,...]] \fIfilesystem\fR|\fIsnapshot\fR
 .ad
 .sp .6
 .RS 4n
 Displays space consumed by, and quotas on, each group in the specified
 filesystem or snapshot. This subcommand is identical to \fBzfs userspace\fR,
 except that the default types to display are \fB-t posixgroup,smbgroup\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs mount\fR\fR
 .ad
 .sp .6
 .RS 4n
 Displays all \fBZFS\fR file systems currently mounted.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs mount\fR [\fB-vO\fR] [\fB-o\fR \fIoptions\fR] \fB-a\fR | \fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Mounts \fBZFS\fR file systems. Invoked automatically as part of the boot process.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-o\fR \fIoptions\fR\fR
 .ad
 .sp .6
 .RS 4n
 An optional, comma-separated list of mount options to use temporarily for the
 duration of the mount. See the "Temporary Mount Point Properties" section for
 details.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-O\fR\fR
 .ad
 .sp .6
 .RS 4n
 Perform an overlay mount. See \fBmount\fR(8) for more information.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Report mount progress.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Mount all available \fBZFS\fR file systems. Invoked automatically as part of
 the boot process.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Mount the specified filesystem.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs unmount\fR [\fB-f\fR] \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unmounts currently mounted \fBZFS\fR file systems. Invoked automatically as part of the shutdown process.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-f\fR\fR
 .ad
 .sp .6
 .RS 4n
 Forcefully unmount the file system, even if it is currently in use.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unmount all available \fBZFS\fR file systems. Invoked automatically as part of the boot process. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fIfilesystem\fR|\fImountpoint\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unmount the specified filesystem. The command can also be given a path to a \fBZFS\fR file system mount point on the system.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs share\fR \fB-a\fR | \fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Shares available \fBZFS\fR file systems. 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Share all available \fBZFS\fR file systems. Invoked automatically as part of the boot process. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Share the specified filesystem according to the \fBsharenfs\fR and \fBsharesmb\fR properties. File systems are shared when the \fBsharenfs\fR or \fBsharesmb\fR property is set.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs unshare\fR \fB-a\fR | \fIfilesystem\fR|\fImountpoint\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unshares currently shared \fBZFS\fR file systems. This is invoked automatically as part of the shutdown process.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-a\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unshare all available \fBZFS\fR file systems. Invoked automatically as part of the boot process. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fIfilesystem\fR|\fImountpoint\fR\fR
 .ad
 .sp .6
 .RS 4n
 Unshare the specified filesystem. The command can also be given a path to a \fBZFS\fR file system shared on the system.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fBzfs send\fR [\fB-DnPpRv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
 .ad
 .sp .6
 .RS 4n
 Creates a stream representation of the second \fIsnapshot\fR, which is written to standard output. The output can be redirected to a file or to a different system (for example, using \fBssh\fR(1). By default, a full stream is generated.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-i\fR \fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 Generate an incremental stream from the first \fIsnapshot\fR to the second \fIsnapshot\fR. The incremental source (the first \fIsnapshot\fR) can be specified as the last component of the snapshot name (for example, the part after the \fB@\fR), and it is assumed to be from the same file system as the second \fIsnapshot\fR.
 .sp
 If the destination is a clone, the source may be the origin snapshot, which must be fully specified (for example, \fBpool/fs@origin\fR, not just \fB@origin\fR).
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-I\fR \fIsnapshot\fR\fR
 .ad
 .sp .6
 .RS 4n
 Generate a stream package that sends all intermediary snapshots from the first snapshot to the second snapshot. For example, \fB-I @a fs@d\fR is similar to \fB-i @a fs@b; -i @b fs@c; -i @c fs@d\fR. The incremental source snapshot may be specified as with the \fB-i\fR option.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-R\fR\fR
 .ad
 .sp .6
 .RS 4n
 Generate a replication stream package, which will replicate the specified filesystem, and all descendent file systems, up to the named snapshot. When received, all properties, snapshots, descendent file systems, and clones are preserved.
 .sp
 If the \fB-i\fR or \fB-I\fR flags are used in conjunction with the \fB-R\fR flag, an incremental replication stream is generated. The current values of properties, and current snapshot and file system names are set when the stream is received. If the \fB-F\fR flag is specified when this stream is received, snapshots and file systems that do not exist on the sending side are destroyed. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-D\fR\fR
 .ad
 .sp .6
 .RS 4n
 Generate a deduplicated stream. Blocks which would have been sent multiple times in the send stream will only be sent once. The receiving system must also support this feature to recieve a deduplicated stream.  This flag can be used regardless of the dataset's dedup  property, but performance will be much better if the filesystem uses a dedup-capable checksum (eg.  sha256).
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-p\fR\fR
 .ad
 .sp .6
 .RS 4n
 Include the dataset's properties in the stream.  This flag is implicit when -R is specified.  The receiving system must also support this feature.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do a dry-run ("No-op") send.  Do not generate any actual send data.  This is
 useful in conjunction with the \fB-v\fR or \fB-P\fR flags to determine what
 data will be sent.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-P\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print machine-parsable verbose information about the stream package generated.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print verbose information about the stream package generated.  This information
 includes a per-second report of how much data has been sent.
 .RE
 
 The format of the stream is committed. You will be able to receive your streams on future versions of \fBZFS\fR.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs receive\fR [\fB-vnFu\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs receive\fR [\fB-vnFu\fR] [\fB-d\fR|\fB-e\fR] \fIfilesystem\fR\fR
 .ad
 .sp .6
 .RS 4n
 Creates a snapshot whose contents are as specified in the stream provided on standard input. If a full stream is received, then a new file system is created as well. Streams are created using the \fBzfs send\fR subcommand, which by default creates a full stream. \fBzfs recv\fR can be used as an alias for \fBzfs receive\fR.
 .sp
 If an incremental stream is received, then the destination file system must already exist, and its most recent snapshot must match the incremental stream's source. For \fBzvols\fR, the destination device link is destroyed and recreated, which means the \fBzvol\fR cannot be accessed during the \fBreceive\fR operation.
 .sp
 When a snapshot replication package stream that is generated by using the \fBzfs send\fR \fB-R\fR command is  received, any snapshots that do not exist on the sending location are destroyed by using the \fBzfs destroy\fR \fB-d\fR command.
 .sp
 The name of the snapshot (and file system, if a full stream is received) that this subcommand creates depends on the argument type and the use of the \fB-d\fR or \fB-e\fR options.
 .sp
 If the argument is a snapshot name, the specified \fIsnapshot\fR is created. If the argument is a file system or volume name, a snapshot with the same name as the sent snapshot is created within the specified \fIfilesystem\fR or \fIvolume\fR.  If neither of the \fB-d\fR or \fB-e\fR options are specified, the provided target snapshot name is used exactly as provided.
 .sp
 The \fB-d\fR and \fB-e\fR options cause the file system name of the target snapshot to be determined by appending a portion of the sent snapshot's name to the specified target \fIfilesystem\fR. If the \fB-d\fR option is specified, all but the first element of the sent snapshot's file system path (usually the pool name) is used and any required intermediate file systems within the specified one are created.  If the \fB-e\fR option is specified, then only the last element of the sent snapshot's file system name (i.e. the name of the source file system itself) is used as the target file system name.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-d\fR\fR
 .ad
 .sp .6
 .RS 4n
 Discard the first element of the sent snapshot's file system name, using the remaining elements to determine the name of the target file system for the new snapshot as described in the paragraph above.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-e\fR\fR
 .ad
 .sp .6
 .RS 4n
 Discard all but the last element of the sent snapshot's file system name, using that element to determine the name of the target file system for the new snapshot as described in the paragraph above.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-u\fR\fR
 .ad
 .sp .6
 .RS 4n
 File system that is associated with the received stream is not mounted.
 .RE
 
 .sp
 .ne 2
 .na
 \fB\fB-v\fR\fR
 .ad
 .sp .6
 .RS 4n
 Print verbose information about the stream and the time required to perform the receive operation.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-n\fR\fR
 .ad
 .sp .6
 .RS 4n
 Do not actually receive the stream. This can be useful in conjunction with the \fB-v\fR option to verify the name the receive operation would use.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-F\fR\fR
 .ad
 .sp .6
 .RS 4n
 Force a rollback of the file system to the most recent snapshot before performing the receive operation. If receiving an incremental replication stream (for example, one generated by \fBzfs send -R -[iI]\fR), destroy snapshots and file systems that do not exist on the sending side.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs allow\fR \fIfilesystem\fR | \fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Displays permissions that have been delegated on the specified filesystem or volume. See the other forms of \fBzfs allow\fR for more information.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs allow\fR [\fB-ldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR| \fIvolume\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs allow\fR [\fB-ld\fR] \fB-e\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR | \fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Delegates \fBZFS\fR administration permission for the file systems to non-privileged users.
 .sp
 .ne 2
 .mk
 .na
 \fB[\fB-ug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...]\fR
 .ad
 .sp .6
 .RS 4n
 Specifies to whom the permissions are delegated. Multiple entities can be specified as a comma-separated list. If neither of the \fB-ug\fR options are specified, then the argument is interpreted preferentially as the keyword "everyone", then as a user name, and lastly as a group name. To specify a user or group named "everyone", use the \fB-u\fR or \fB-g\fR options. To specify a group with the same name as a user, use the \fB-g\fR options.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB[\fB-e\fR] \fIperm\fR|@\fIsetname\fR[,...]\fR
 .ad
 .sp .6
 .RS 4n
 Specifies that the permissions be delegated to "everyone." Multiple permissions may be specified as a comma-separated list. Permission names are the same as \fBZFS\fR subcommand and property names. See the property list below. Property set names, which begin with an at sign (\fB@\fR) , may be specified. See the \fB-s\fR form below for details.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB[\fB-ld\fR] \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Specifies where the permissions are delegated. If neither of the \fB-ld\fR options are specified, or both are, then the permissions are allowed for the file system or volume, and all of its descendents. If only the \fB-l\fR option is used, then is allowed "locally" only for the specified file system. If only the \fB-d\fR option is used, then is allowed only for the descendent file systems.
 .RE
 
 .RE
 
 .sp
 .LP
 Permissions are generally the ability to use a \fBZFS\fR subcommand or change a \fBZFS\fR property. The following permissions are available:
 .sp
 .in +2
 .nf
 NAME             TYPE           NOTES
 allow            subcommand     Must also have the permission that is being
                                 allowed
 clone            subcommand     Must also have the 'create' ability and 'mount'
                                 ability in the origin file system
 create           subcommand     Must also have the 'mount' ability
 destroy          subcommand     Must also have the 'mount' ability
 diff             subcommand     Allows lookup of paths within a dataset
                                 given an object number, and the ability to
                                 create snapshots necessary to 'zfs diff'.
 mount            subcommand     Allows mount/umount of ZFS datasets
 promote          subcommand     Must also have the 'mount'
                                 and 'promote' ability in the origin file system
 receive          subcommand     Must also have the 'mount' and 'create' ability
 rename           subcommand     Must also have the 'mount' and 'create'
                                 ability in the new parent
 rollback         subcommand     Must also have the 'mount' ability
 send             subcommand     
 share            subcommand     Allows sharing file systems over NFS or SMB
                                 protocols
 snapshot         subcommand     Must also have the 'mount' ability
 groupquota       other          Allows accessing any groupquota@... property
 groupused        other          Allows reading any groupused@... property
 userprop         other          Allows changing any user property
 userquota        other          Allows accessing any userquota@... property
 userused         other          Allows reading any userused@... property
 
 aclinherit       property       
 aclmode          property       
 atime            property       
 canmount         property       
 casesensitivity  property       
 checksum         property       
 compression      property       
 copies           property       
 dedup            property
 devices          property       
 exec             property       
 logbias          property
 mlslabel         property
 mountpoint       property       
 nbmand           property       
 normalization    property       
 primarycache     property       
 quota            property       
 readonly         property       
 recordsize       property       
 refquota         property       
 refreservation   property       
 reservation      property       
 secondarycache   property       
 setuid           property       
 shareiscsi       property       
 sharenfs         property       
 sharesmb         property       
 snapdir          property       
 utf8only         property       
 version          property       
 volblocksize     property       
 volsize          property       
 vscan            property       
 xattr            property       
 zoned            property       
 .fi
 .in -2
 .sp
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs allow\fR \fB-c\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Sets "create time" permissions. These permissions are granted (locally) to the creator of any newly-created descendent file system.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs allow\fR \fB-s\fR @\fIsetname\fR \fIperm\fR|@\fIsetname\fR[,...] \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Defines or adds permissions to a permission set. The set can be used by other \fBzfs allow\fR commands for the specified file system and its descendents. Sets are evaluated dynamically, so changes to a set are immediately reflected. Permission sets follow the same naming restrictions as ZFS file systems, but the name must begin with an "at sign" (\fB@\fR), and can be no more than 64 characters long.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs unallow\fR [\fB-rldug\fR] "\fIeveryone\fR"|\fIuser\fR|\fIgroup\fR[,...] [\fIperm\fR|@\fIsetname\fR[, ...]] \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs unallow\fR [\fB-rld\fR] \fB-e\fR [\fIperm\fR|@\fIsetname\fR [,...]] \fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .br
 .na
 \fB\fBzfs unallow\fR [\fB-r\fR] \fB-c\fR [\fIperm\fR|@\fIsetname\fR[,...]]\fR
 .ad
 .br
 .na
 \fB\fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Removes permissions that were granted with the \fBzfs allow\fR command. No permissions are explicitly denied, so other permissions granted are still in effect. For example, if the permission is granted by an ancestor. If no permissions are specified, then all permissions for the specified \fIuser\fR, \fIgroup\fR, or \fIeveryone\fR are removed. Specifying "everyone" (or using the \fB-e\fR option) only removes the permissions that were granted to "everyone", not all permissions for every user and group. See the \fBzfs allow\fR command for a description of the \fB-ldugec\fR options.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively remove the permissions from this file system and all descendents.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs unallow\fR [\fB-r\fR] \fB-s\fR @\fIsetname\fR [\fIperm\fR|@\fIsetname\fR[,...]]\fR
 .ad
 .br
 .na
 \fB\fIfilesystem\fR|\fIvolume\fR\fR
 .ad
 .sp .6
 .RS 4n
 Removes permissions from a permission set. If no permissions are specified, then all permissions are removed, thus removing the set entirely.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs hold\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Adds a single reference, named with the \fItag\fR argument, to the specified snapshot or snapshots. Each snapshot has its own tag namespace, and tags must be unique within that space.
 .sp
 If a hold exists on a snapshot, attempts to destroy that snapshot by using the \fBzfs destroy\fR command return \fBEBUSY\fR.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Specifies that a hold with the given tag is applied recursively to the snapshots of all descendent file systems.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs holds\fR [\fB-r\fR] \fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Lists all existing user references for the given snapshot or snapshots.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Lists the holds that are set on the named descendent snapshots, in addition to listing the holds on the named snapshot.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs release\fR [\fB-r\fR] \fItag\fR \fIsnapshot\fR...\fR
 .ad
 .sp .6
 .RS 4n
 Removes a single reference, named with the \fItag\fR argument, from the specified snapshot or snapshots. The tag must already exist for each snapshot.
 .sp
 If a hold exists on a snapshot, attempts to destroy that snapshot by using the \fBzfs destroy\fR command return \fBEBUSY\fR.
 .sp
 .ne 2
 .mk
 .na
 \fB\fB-r\fR\fR
 .ad
 .sp .6
 .RS 4n
 Recursively releases a hold with the given tag on the snapshots of all descendent file systems.
 .RE
 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fBzfs diff\fR [\fB-FHt\fR] \fIsnapshot\fR \fIsnapshot|filesystem\fR
 .ad
 .sp .6
 .RS 4n
 Display the difference between a snapshot of a given filesystem and another
 snapshot of that filesystem from a later time or the current contents of the
 filesystem.  The first column is a character indicating the type of change,
 the other columns indicate pathname, new pathname (in case of rename), change
 in link count, and optionally file type and/or change time.
 
 The types of change are:
 .in +2
 .nf
 -       The path has been removed
 +       The path has been created
 M       The path has been modified
 R       The path has been renamed
 .fi
 .in -2
 .sp
 .ne 2
 .na
 \fB-F\fR
 .ad
 .sp .6
 .RS 4n
 Display an indication of the type of file, in a manner similar to the \fB-F\fR
 option of \fBls\fR(1).
 .in +2
 .nf
 B       Block device
 C       Character device
 /       Directory
 >       Door
 |       Named pipe
 @       Symbolic link
 P       Event port
 =       Socket
 F       Regular file
 .fi
 .in -2
 .RE
 .sp
 .ne 2
 .na
 \fB-H\fR
 .ad
 .sp .6
 .RS 4n
 Give more parseable tab-separated output, without header lines and without arrows.
 .RE
 .sp
 .ne 2
 .na
 \fB-t\fR
 .ad
 .sp .6
 .RS 4n
 Display the path's inode change time as the first column of output.
 .RE
 
 .SH EXAMPLES
 .LP
 \fBExample 1 \fRCreating a ZFS File System Hierarchy
 .sp
 .LP
 The following commands create a file system named \fBpool/home\fR and a file system named \fBpool/home/bob\fR. The mount point \fB/export/home\fR is set for the parent file system, and is automatically inherited by the child file system.
 
 .sp
 .in +2
 .nf
 # \fBzfs create pool/home\fR
 # \fBzfs set mountpoint=/export/home pool/home\fR
 # \fBzfs create pool/home/bob\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 2 \fRCreating a ZFS Snapshot
 .sp
 .LP
 The following command creates a snapshot named \fByesterday\fR. This snapshot is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of the \fBpool/home/bob\fR file system.
 
 .sp
 .in +2
 .nf
 # \fBzfs snapshot pool/home/bob@yesterday\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 3 \fRCreating and Destroying Multiple Snapshots
 .sp
 .LP
 The following command creates snapshots named \fByesterday\fR of \fBpool/home\fR and all of its descendent file systems. Each snapshot is mounted on demand in the \fB\&.zfs/snapshot\fR directory at the root of its file system. The second command destroys the newly created snapshots.
 
 .sp
 .in +2
 .nf
 # \fBzfs snapshot -r pool/home@yesterday\fR
 # \fBzfs destroy -r pool/home@yesterday\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 4 \fRDisabling and Enabling File System Compression
 .sp
 .LP
 The following command disables the \fBcompression\fR property for all file systems under \fBpool/home\fR. The next command explicitly enables \fBcompression\fR for \fBpool/home/anne\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs set compression=off pool/home\fR
 # \fBzfs set compression=on pool/home/anne\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 5 \fRListing ZFS Datasets
 .sp
 .LP
 The following command lists all active file systems and volumes in the system. Snapshots are displayed if the \fBlistsnaps\fR property is \fBon\fR. The default is \fBoff\fR. See \fBzpool\fR(8) for more information on pool properties.
 
 .sp
 .in +2
 .nf
 # \fBzfs list\fR
    NAME                      USED  AVAIL  REFER  MOUNTPOINT
    pool                      450K   457G    18K  /pool
    pool/home                 315K   457G    21K  /export/home
    pool/home/anne             18K   457G    18K  /export/home/anne
    pool/home/bob             276K   457G   276K  /export/home/bob
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 6 \fRSetting a Quota on a ZFS File System
 .sp
 .LP
 The following command sets a quota of 50 Gbytes for \fBpool/home/bob\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs set quota=50G pool/home/bob\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 7 \fRListing ZFS Properties
 .sp
 .LP
 The following command lists all properties for \fBpool/home/bob\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs get all pool/home/bob\fR
 NAME           PROPERTY              VALUE                  SOURCE
 pool/home/bob  type                  filesystem             -
 pool/home/bob  creation              Tue Jul 21 15:53 2009  -
 pool/home/bob  used                  21K                    -
 pool/home/bob  available             20.0G                  -
 pool/home/bob  referenced            21K                    -
 pool/home/bob  compressratio         1.00x                  -
 pool/home/bob  mounted               yes                    -
 pool/home/bob  quota                 20G                    local
 pool/home/bob  reservation           none                   default
 pool/home/bob  recordsize            128K                   default
 pool/home/bob  mountpoint            /pool/home/bob         default
 pool/home/bob  sharenfs              off                    default
 pool/home/bob  checksum              on                     default
 pool/home/bob  compression           on                     local
 pool/home/bob  atime                 on                     default
 pool/home/bob  devices               on                     default
 pool/home/bob  exec                  on                     default
 pool/home/bob  setuid                on                     default
 pool/home/bob  readonly              off                    default
 pool/home/bob  zoned                 off                    default
 pool/home/bob  snapdir               hidden                 default
 pool/home/bob  aclmode               groupmask              default
 pool/home/bob  aclinherit            restricted             default
 pool/home/bob  canmount              on                     default
 pool/home/bob  shareiscsi            off                    default
 pool/home/bob  xattr                 on                     default
 pool/home/bob  copies                1                      default
 pool/home/bob  version               4                      -
 pool/home/bob  utf8only              off                    -
 pool/home/bob  normalization         none                   -
 pool/home/bob  casesensitivity       sensitive              -
 pool/home/bob  vscan                 off                    default
 pool/home/bob  nbmand                off                    default
 pool/home/bob  sharesmb              off                    default
 pool/home/bob  refquota              none                   default
 pool/home/bob  refreservation        none                   default
 pool/home/bob  primarycache          all                    default
 pool/home/bob  secondarycache        all                    default
 pool/home/bob  usedbysnapshots       0                      -
 pool/home/bob  usedbydataset         21K                    -
 pool/home/bob  usedbychildren        0                      -
 pool/home/bob  usedbyrefreservation  0                      -
 pool/home/bob  logbias               latency                default
 pool/home/bob  dedup                 off                    default
 pool/home/bob  mlslabel              none                   default
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 The following command gets a single property value.
 
 .sp
 .in +2
 .nf
 # \fBzfs get -H -o value compression pool/home/bob\fR
 on
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 The following command lists all properties with local settings for \fBpool/home/bob\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs get -r -s local -o name,property,value all pool/home/bob\fR
 NAME           PROPERTY              VALUE
 pool/home/bob  quota                 20G
 pool/home/bob  compression           on
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 8 \fRRolling Back a ZFS File System
 .sp
 .LP
 The following command reverts the contents of \fBpool/home/anne\fR to the snapshot named \fByesterday\fR, deleting all intermediate snapshots.
 
 .sp
 .in +2
 .nf
 # \fBzfs rollback -r pool/home/anne@yesterday\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 9 \fRCreating a ZFS Clone
 .sp
 .LP
 The following command creates a writable file system whose initial contents are the same as \fBpool/home/bob@yesterday\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs clone pool/home/bob@yesterday pool/clone\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 10 \fRPromoting a ZFS Clone
 .sp
 .LP
 The following commands illustrate how to test out changes to a file system, and then replace the original file system with the changed one, using clones, clone promotion, and renaming:
 
 .sp
 .in +2
 .nf
 # \fBzfs create pool/project/production\fR
   populate /pool/project/production with data
 # \fBzfs snapshot pool/project/production@today\fR
 # \fBzfs clone pool/project/production@today pool/project/beta\fR
 make changes to /pool/project/beta and test them
 # \fBzfs promote pool/project/beta\fR
 # \fBzfs rename pool/project/production pool/project/legacy\fR
 # \fBzfs rename pool/project/beta pool/project/production\fR
 once the legacy version is no longer needed, it can be destroyed
 # \fBzfs destroy pool/project/legacy\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 11 \fRInheriting ZFS Properties
 .sp
 .LP
 The following command causes \fBpool/home/bob\fR and \fBpool/home/anne\fR to inherit the \fBchecksum\fR property from their parent.
 
 .sp
 .in +2
 .nf
 # \fBzfs inherit checksum pool/home/bob pool/home/anne\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 12 \fRRemotely Replicating ZFS Data
 .sp
 .LP
 The following commands send a full stream and then an incremental stream to a remote machine, restoring them into \fBpoolB/received/fs@a\fRand \fBpoolB/received/fs@b\fR, respectively. \fBpoolB\fR must contain the file system \fBpoolB/received\fR, and must not initially contain \fBpoolB/received/fs\fR.
 
 .sp
 .in +2
 .nf
 # \fBzfs send pool/fs@a | \e\fR
    \fBssh host zfs receive poolB/received/fs@a\fR
 # \fBzfs send -i a pool/fs@b | ssh host \e\fR
    \fBzfs receive poolB/received/fs\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 13 \fRUsing the \fBzfs receive\fR \fB-d\fR Option
 .sp
 .LP
 The following command sends a full stream of \fBpoolA/fsA/fsB@snap\fR to a remote machine, receiving it into \fBpoolB/received/fsA/fsB@snap\fR. The \fBfsA/fsB@snap\fR portion of the received snapshot's name is determined from the name of the sent snapshot. \fBpoolB\fR must contain the file system \fBpoolB/received\fR. If \fBpoolB/received/fsA\fR does not exist, it is created as an empty file system.
 
 .sp
 .in +2
 .nf
 # \fBzfs send poolA/fsA/fsB@snap | \e
    ssh host zfs receive -d poolB/received\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 14 \fRSetting User Properties
 .sp
 .LP
 The following example sets the user-defined \fBcom.example:department\fR property for a dataset.
 
 .sp
 .in +2
 .nf
 # \fBzfs set com.example:department=12345 tank/accounting\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 15 \fRCreating a ZFS Volume as an iSCSI Target Device
 .sp
 .LP
 The following example shows how to create a \fBZFS\fR volume as an \fBiSCSI\fR target. 
 
 .sp
 .in +2
 .nf
 # \fBzfs create -V 2g pool/volumes/vol1\fR
 # \fBzfs set shareiscsi=on pool/volumes/vol1\fR
 # \fBiscsitadm list target\fR
 Target: pool/volumes/vol1
  iSCSI Name: 
  iqn.1986-03.com.sun:02:7b4b02a6-3277-eb1b-e686-a24762c52a8c
  Connections: 0
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 After the \fBiSCSI\fR target is created, set up the \fBiSCSI\fR initiator. For more information about the Solaris \fBiSCSI\fR initiator, see \fBiscsitadm\fR(1M).
 .LP
 \fBExample 16 \fRPerforming a Rolling Snapshot
 .sp
 .LP
 The following example shows how to maintain a history of snapshots with a consistent naming scheme. To keep a week's worth of snapshots, the user destroys the oldest snapshot, renames the remaining snapshots, and then creates a new snapshot, as follows:
 
 .sp
 .in +2
 .nf
 # \fBzfs destroy -r pool/users@7daysago\fR
 # \fBzfs rename -r pool/users@6daysago @7daysago\fR
 # \fBzfs rename -r pool/users@5daysago @6daysago\fR
 # \fBzfs rename -r pool/users@4daysago @5daysago\fR
 # \fBzfs rename -r pool/users@3daysago @4daysago\fR
 # \fBzfs rename -r pool/users@2daysago @3daysago\fR
 # \fBzfs rename -r pool/users@yesterday @2daysago\fR
 # \fBzfs rename -r pool/users@today @yesterday\fR
 # \fBzfs snapshot -r pool/users@today\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 17 \fRSetting \fBsharenfs\fR Property Options on a ZFS File System
 .sp
 .LP
 The following commands show how to set \fBsharenfs\fR property options to enable \fBrw\fR access for a set of \fBIP\fR addresses and to enable root access for system \fBneo\fR on the \fBtank/home\fR file system.
 
 .sp
 .in +2
 .nf
 # \fBzfs set sharenfs='rw=@123.123.0.0/16,root=neo' tank/home\fR
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 If you are using \fBDNS\fR for host name resolution, specify the fully qualified hostname.
 
 .LP
 \fBExample 18 \fRDelegating ZFS Administration Permissions on a ZFS Dataset
 .sp
 .LP
 The following example shows how to set permissions so that user \fBcindys\fR can create, destroy, mount, and take snapshots on \fBtank/cindys\fR. The permissions on \fBtank/cindys\fR are also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs allow cindys create,destroy,mount,snapshot tank/cindys\fR
 # \fBzfs allow tank/cindys\fR
 -------------------------------------------------------------
 Local+Descendent permissions on (tank/cindys)
           user cindys create,destroy,mount,snapshot
 -------------------------------------------------------------
 .fi
 .in -2
 .sp
 
 .sp
 .LP
 Because the \fBtank/cindys\fR mount point permission is set to 755 by default, user \fBcindys\fR will be unable to mount file systems under \fBtank/cindys\fR. Set an \fBACL\fR similar to the following syntax to provide mount point access:
 .sp
 .in +2
 .nf
 # \fBchmod A+user:cindys:add_subdirectory:allow /tank/cindys\fR
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 19 \fRDelegating Create Time Permissions on a ZFS Dataset
 .sp
 .LP
 The following example shows how to grant anyone in the group \fBstaff\fR to create file systems in \fBtank/users\fR. This syntax also allows staff members to destroy their own file systems, but not destroy anyone else's file system. The permissions on \fBtank/users\fR are also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs allow staff create,mount tank/users\fR
 # \fBzfs allow -c destroy tank/users\fR
 # \fBzfs allow tank/users\fR
 -------------------------------------------------------------
 Create time permissions on (tank/users)
           create,destroy
 Local+Descendent permissions on (tank/users)
           group staff create,mount
 ------------------------------------------------------------- 
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 20 \fRDefining and Granting a Permission Set on a ZFS Dataset
 .sp
 .LP
 The following example shows how to define and grant a permission set on the \fBtank/users\fR file system. The permissions on \fBtank/users\fR are also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs allow -s @pset create,destroy,snapshot,mount tank/users\fR
 # \fBzfs allow staff @pset tank/users\fR
 # \fBzfs allow tank/users\fR
 -------------------------------------------------------------
 Permission sets on (tank/users)
         @pset create,destroy,mount,snapshot
 Create time permissions on (tank/users)
         create,destroy
 Local+Descendent permissions on (tank/users)
         group staff @pset,create,mount
 -------------------------------------------------------------
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 21 \fRDelegating Property Permissions on a ZFS Dataset
 .sp
 .LP
 The following example shows to grant the ability to set quotas and reservations on the \fBusers/home\fR file system. The permissions on \fBusers/home\fR are also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs allow cindys quota,reservation users/home\fR
 # \fBzfs allow users/home\fR
 -------------------------------------------------------------
 Local+Descendent permissions on (users/home)
         user cindys quota,reservation
 -------------------------------------------------------------
 cindys% \fBzfs set quota=10G users/home/marks\fR
 cindys% \fBzfs get quota users/home/marks\fR
 NAME              PROPERTY  VALUE             SOURCE
 users/home/marks  quota     10G               local 
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 22 \fRRemoving ZFS Delegated Permissions on a ZFS Dataset
 .sp
 .LP
 The following example shows how to remove the snapshot permission from the \fBstaff\fR group on the \fBtank/users\fR file system. The permissions on \fBtank/users\fR are also displayed.
 
 .sp
 .in +2
 .nf
 # \fBzfs unallow staff snapshot tank/users\fR
 # \fBzfs allow tank/users\fR
 -------------------------------------------------------------
 Permission sets on (tank/users)
         @pset create,destroy,mount,snapshot
 Create time permissions on (tank/users)
         create,destroy
 Local+Descendent permissions on (tank/users)
         group staff @pset,create,mount
 ------------------------------------------------------------- 
 .fi
 .in -2
 .sp
 
 .LP
 \fBExample 23\fR Showing the differences between a snapshot and a ZFS Dataset
 .sp
 .LP
 The following example shows how to see what has changed between a prior
 snapshot of a ZFS Dataset and its current state.  The \fB-F\fR option is used
 to indicate type information for the files affected.
 
 .sp
 .in +2
 .nf
 # zfs diff -F tank/test@before tank/test
 M       /       /tank/test/
 M       F       /tank/test/linked      (+1)
 R       F       /tank/test/oldname -> /tank/test/newname
 -       F       /tank/test/deleted
 +       F       /tank/test/created
 M       F       /tank/test/modified
 .fi
 .in -2
 .sp
 
 .SH EXIT STATUS
 .sp
 .LP
 The following exit values are returned:
 .sp
 .ne 2
 .mk
 .na
 \fB\fB0\fR\fR
 .ad
 .sp .6
 .RS 4n
 Successful completion. 
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB1\fR\fR
 .ad
 .sp .6
 .RS 4n
 An error occurred.
 .RE
 
 .sp
 .ne 2
 .mk
 .na
 \fB\fB2\fR\fR
 .ad
 .sp .6
 .RS 4n
 Invalid command line options were specified.
 .RE
 
 .SH SEE ALSO
 .sp
 .LP
 \fBchmod\fR(2), \fBfsync\fR(2), \fBgzip\fR(1), \fBmount\fR(8), \fBssh\fR(1), \fBstat\fR(2), \fBwrite\fR(2), \fBzpool\fR(8)
diff --git a/module/nvpair/fnvpair.c b/module/nvpair/fnvpair.c
index 17583715bb53..7faea0fceb9a 100644
--- a/module/nvpair/fnvpair.c
+++ b/module/nvpair/fnvpair.c
@@ -1,566 +1,567 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/nvpair.h>
 #include <sys/kmem.h>
 #include <sys/debug.h>
 #ifndef _KERNEL
 #include <stdlib.h>
 #endif
 
 /*
  * "Force" nvlist wrapper.
  *
  * These functions wrap the nvlist_* functions with assertions that assume
  * the operation is successful.  This allows the caller's code to be much
  * more readable, especially for the fnvlist_lookup_* and fnvpair_value_*
  * functions, which can return the requested value (rather than filling in
  * a pointer).
  *
  * These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate
  * with KM_SLEEP.
  *
  * More wrappers should be added as needed -- for example
  * nvlist_lookup_*_array and nvpair_value_*_array.
  */
 
 nvlist_t *
 fnvlist_alloc(void)
 {
 	nvlist_t *nvl;
 	VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP));
 	return (nvl);
 }
 
 void
 fnvlist_free(nvlist_t *nvl)
 {
 	nvlist_free(nvl);
 }
 
 size_t
 fnvlist_size(nvlist_t *nvl)
 {
 	size_t size;
 	VERIFY0(nvlist_size(nvl, &size, NV_ENCODE_NATIVE));
 	return (size);
 }
 
 /*
  * Returns allocated buffer of size *sizep.  Caller must free the buffer with
  * fnvlist_pack_free().
  */
 char *
 fnvlist_pack(nvlist_t *nvl, size_t *sizep)
 {
 	char *packed = 0;
 	VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE,
 	    KM_SLEEP), ==, 0);
 	return (packed);
 }
 
 /*ARGSUSED*/
 void
 fnvlist_pack_free(char *pack, size_t size)
 {
 #ifdef _KERNEL
 	kmem_free(pack, size);
 #else
 	free(pack);
 #endif
 }
 
 nvlist_t *
 fnvlist_unpack(char *buf, size_t buflen)
 {
 	nvlist_t *rv;
 	VERIFY0(nvlist_unpack(buf, buflen, &rv, KM_SLEEP));
 	return (rv);
 }
 
 nvlist_t *
 fnvlist_dup(nvlist_t *nvl)
 {
 	nvlist_t *rv;
 	VERIFY0(nvlist_dup(nvl, &rv, KM_SLEEP));
 	return (rv);
 }
 
 void
 fnvlist_merge(nvlist_t *dst, nvlist_t *src)
 {
 	VERIFY0(nvlist_merge(dst, src, KM_SLEEP));
 }
 
 void
 fnvlist_add_boolean(nvlist_t *nvl, const char *name)
 {
 	VERIFY0(nvlist_add_boolean(nvl, name));
 }
 
 void
 fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
 {
 	VERIFY0(nvlist_add_boolean_value(nvl, name, val));
 }
 
 void
 fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
 {
 	VERIFY0(nvlist_add_byte(nvl, name, val));
 }
 
 void
 fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
 {
 	VERIFY0(nvlist_add_int8(nvl, name, val));
 }
 
 void
 fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
 {
 	VERIFY0(nvlist_add_uint8(nvl, name, val));
 }
 
 void
 fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
 {
 	VERIFY0(nvlist_add_int16(nvl, name, val));
 }
 
 void
 fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
 {
 	VERIFY0(nvlist_add_uint16(nvl, name, val));
 }
 
 void
 fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
 {
 	VERIFY0(nvlist_add_int32(nvl, name, val));
 }
 
 void
 fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
 {
 	VERIFY0(nvlist_add_uint32(nvl, name, val));
 }
 
 void
 fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
 {
 	VERIFY0(nvlist_add_int64(nvl, name, val));
 }
 
 void
 fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
 {
 	VERIFY0(nvlist_add_uint64(nvl, name, val));
 }
 
 void
 fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
 {
 	VERIFY0(nvlist_add_string(nvl, name, val));
 }
 
 void
 fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
 {
 	VERIFY0(nvlist_add_nvlist(nvl, name, val));
 }
 
 void
 fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair)
 {
 	VERIFY0(nvlist_add_nvpair(nvl, pair));
 }
 
 void
 fnvlist_add_boolean_array(nvlist_t *nvl, const char *name,
     boolean_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_boolean_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_byte_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_int8_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_uint8_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_int16_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_uint16_array(nvlist_t *nvl, const char *name,
     uint16_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_uint16_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_int32_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_uint32_array(nvlist_t *nvl, const char *name,
     uint32_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_uint32_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_int64_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_uint64_array(nvlist_t *nvl, const char *name,
     uint64_t *val, uint_t n)
 {
 	VERIFY0(nvlist_add_uint64_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_string_array(nvlist_t *nvl, const char *name,
     char * const *val, uint_t n)
 {
 	VERIFY0(nvlist_add_string_array(nvl, name, val, n));
 }
 
 void
 fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name,
     nvlist_t **val, uint_t n)
 {
 	VERIFY0(nvlist_add_nvlist_array(nvl, name, val, n));
 }
 
 void
 fnvlist_remove(nvlist_t *nvl, const char *name)
 {
 	VERIFY0(nvlist_remove_all(nvl, name));
 }
 
 void
 fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair)
 {
 	VERIFY0(nvlist_remove_nvpair(nvl, pair));
 }
 
 nvpair_t *
 fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name)
 {
 	nvpair_t *rv;
 	VERIFY0(nvlist_lookup_nvpair(nvl, name, &rv));
 	return (rv);
 }
 
 /* returns B_TRUE if the entry exists */
 boolean_t
 fnvlist_lookup_boolean(nvlist_t *nvl, const char *name)
 {
 	return (nvlist_lookup_boolean(nvl, name) == 0);
 }
 
 boolean_t
 fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name)
 {
 	boolean_t rv;
 	VERIFY0(nvlist_lookup_boolean_value(nvl, name, &rv));
 	return (rv);
 }
 
 uchar_t
 fnvlist_lookup_byte(nvlist_t *nvl, const char *name)
 {
 	uchar_t rv;
 	VERIFY0(nvlist_lookup_byte(nvl, name, &rv));
 	return (rv);
 }
 
 int8_t
 fnvlist_lookup_int8(nvlist_t *nvl, const char *name)
 {
 	int8_t rv;
 	VERIFY0(nvlist_lookup_int8(nvl, name, &rv));
 	return (rv);
 }
 
 int16_t
 fnvlist_lookup_int16(nvlist_t *nvl, const char *name)
 {
 	int16_t rv;
 	VERIFY0(nvlist_lookup_int16(nvl, name, &rv));
 	return (rv);
 }
 
 int32_t
 fnvlist_lookup_int32(nvlist_t *nvl, const char *name)
 {
 	int32_t rv;
 	VERIFY0(nvlist_lookup_int32(nvl, name, &rv));
 	return (rv);
 }
 
 int64_t
 fnvlist_lookup_int64(nvlist_t *nvl, const char *name)
 {
 	int64_t rv;
 	VERIFY0(nvlist_lookup_int64(nvl, name, &rv));
 	return (rv);
 }
 
 uint8_t
 fnvlist_lookup_uint8(nvlist_t *nvl, const char *name)
 {
 	uint8_t rv;
 	VERIFY0(nvlist_lookup_uint8(nvl, name, &rv));
 	return (rv);
 }
 
 uint16_t
 fnvlist_lookup_uint16(nvlist_t *nvl, const char *name)
 {
 	uint16_t rv;
 	VERIFY0(nvlist_lookup_uint16(nvl, name, &rv));
 	return (rv);
 }
 
 uint32_t
 fnvlist_lookup_uint32(nvlist_t *nvl, const char *name)
 {
 	uint32_t rv;
 	VERIFY0(nvlist_lookup_uint32(nvl, name, &rv));
 	return (rv);
 }
 
 uint64_t
 fnvlist_lookup_uint64(nvlist_t *nvl, const char *name)
 {
 	uint64_t rv;
 	VERIFY0(nvlist_lookup_uint64(nvl, name, &rv));
 	return (rv);
 }
 
 char *
 fnvlist_lookup_string(nvlist_t *nvl, const char *name)
 {
 	char *rv;
 	VERIFY0(nvlist_lookup_string(nvl, name, &rv));
 	return (rv);
 }
 
 nvlist_t *
 fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name)
 {
 	nvlist_t *rv;
 	VERIFY0(nvlist_lookup_nvlist(nvl, name, &rv));
 	return (rv);
 }
 
 boolean_t
 fnvpair_value_boolean_value(nvpair_t *nvp)
 {
 	boolean_t rv;
 	VERIFY0(nvpair_value_boolean_value(nvp, &rv));
 	return (rv);
 }
 
 uchar_t
 fnvpair_value_byte(nvpair_t *nvp)
 {
 	uchar_t rv;
 	VERIFY0(nvpair_value_byte(nvp, &rv));
 	return (rv);
 }
 
 int8_t
 fnvpair_value_int8(nvpair_t *nvp)
 {
 	int8_t rv;
 	VERIFY0(nvpair_value_int8(nvp, &rv));
 	return (rv);
 }
 
 int16_t
 fnvpair_value_int16(nvpair_t *nvp)
 {
 	int16_t rv;
 	VERIFY0(nvpair_value_int16(nvp, &rv));
 	return (rv);
 }
 
 int32_t
 fnvpair_value_int32(nvpair_t *nvp)
 {
 	int32_t rv;
 	VERIFY0(nvpair_value_int32(nvp, &rv));
 	return (rv);
 }
 
 int64_t
 fnvpair_value_int64(nvpair_t *nvp)
 {
 	int64_t rv;
 	VERIFY0(nvpair_value_int64(nvp, &rv));
 	return (rv);
 }
 
 uint8_t
 fnvpair_value_uint8(nvpair_t *nvp)
 {
 	uint8_t rv;
 	VERIFY0(nvpair_value_uint8(nvp, &rv));
 	return (rv);
 }
 
 uint16_t
 fnvpair_value_uint16(nvpair_t *nvp)
 {
 	uint16_t rv;
 	VERIFY0(nvpair_value_uint16(nvp, &rv));
 	return (rv);
 }
 
 uint32_t
 fnvpair_value_uint32(nvpair_t *nvp)
 {
 	uint32_t rv;
 	VERIFY0(nvpair_value_uint32(nvp, &rv));
 	return (rv);
 }
 
 uint64_t
 fnvpair_value_uint64(nvpair_t *nvp)
 {
 	uint64_t rv;
 	VERIFY0(nvpair_value_uint64(nvp, &rv));
 	return (rv);
 }
 
 char *
 fnvpair_value_string(nvpair_t *nvp)
 {
 	char *rv;
 	VERIFY0(nvpair_value_string(nvp, &rv));
 	return (rv);
 }
 
 nvlist_t *
 fnvpair_value_nvlist(nvpair_t *nvp)
 {
 	nvlist_t *rv;
 	VERIFY0(nvpair_value_nvlist(nvp, &rv));
 	return (rv);
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 
 EXPORT_SYMBOL(fnvlist_alloc);
 EXPORT_SYMBOL(fnvlist_free);
 EXPORT_SYMBOL(fnvlist_size);
 EXPORT_SYMBOL(fnvlist_pack);
+EXPORT_SYMBOL(fnvlist_pack_free);
 EXPORT_SYMBOL(fnvlist_unpack);
 EXPORT_SYMBOL(fnvlist_dup);
 EXPORT_SYMBOL(fnvlist_merge);
 
 EXPORT_SYMBOL(fnvlist_add_nvpair);
 EXPORT_SYMBOL(fnvlist_add_boolean);
 EXPORT_SYMBOL(fnvlist_add_boolean_value);
 EXPORT_SYMBOL(fnvlist_add_byte);
 EXPORT_SYMBOL(fnvlist_add_int8);
 EXPORT_SYMBOL(fnvlist_add_uint8);
 EXPORT_SYMBOL(fnvlist_add_int16);
 EXPORT_SYMBOL(fnvlist_add_uint16);
 EXPORT_SYMBOL(fnvlist_add_int32);
 EXPORT_SYMBOL(fnvlist_add_uint32);
 EXPORT_SYMBOL(fnvlist_add_int64);
 EXPORT_SYMBOL(fnvlist_add_uint64);
 EXPORT_SYMBOL(fnvlist_add_string);
 EXPORT_SYMBOL(fnvlist_add_nvlist);
 EXPORT_SYMBOL(fnvlist_add_boolean_array);
 EXPORT_SYMBOL(fnvlist_add_byte_array);
 EXPORT_SYMBOL(fnvlist_add_int8_array);
 EXPORT_SYMBOL(fnvlist_add_uint8_array);
 EXPORT_SYMBOL(fnvlist_add_int16_array);
 EXPORT_SYMBOL(fnvlist_add_uint16_array);
 EXPORT_SYMBOL(fnvlist_add_int32_array);
 EXPORT_SYMBOL(fnvlist_add_uint32_array);
 EXPORT_SYMBOL(fnvlist_add_int64_array);
 EXPORT_SYMBOL(fnvlist_add_uint64_array);
 EXPORT_SYMBOL(fnvlist_add_string_array);
 EXPORT_SYMBOL(fnvlist_add_nvlist_array);
 
 EXPORT_SYMBOL(fnvlist_remove);
 EXPORT_SYMBOL(fnvlist_remove_nvpair);
 
 EXPORT_SYMBOL(fnvlist_lookup_nvpair);
 EXPORT_SYMBOL(fnvlist_lookup_boolean);
 EXPORT_SYMBOL(fnvlist_lookup_boolean_value);
 EXPORT_SYMBOL(fnvlist_lookup_byte);
 EXPORT_SYMBOL(fnvlist_lookup_int8);
 EXPORT_SYMBOL(fnvlist_lookup_uint8);
 EXPORT_SYMBOL(fnvlist_lookup_int16);
 EXPORT_SYMBOL(fnvlist_lookup_uint16);
 EXPORT_SYMBOL(fnvlist_lookup_int32);
 EXPORT_SYMBOL(fnvlist_lookup_uint32);
 EXPORT_SYMBOL(fnvlist_lookup_int64);
 EXPORT_SYMBOL(fnvlist_lookup_uint64);
 EXPORT_SYMBOL(fnvlist_lookup_string);
 EXPORT_SYMBOL(fnvlist_lookup_nvlist);
 
 EXPORT_SYMBOL(fnvpair_value_boolean_value);
 EXPORT_SYMBOL(fnvpair_value_byte);
 EXPORT_SYMBOL(fnvpair_value_int8);
 EXPORT_SYMBOL(fnvpair_value_uint8);
 EXPORT_SYMBOL(fnvpair_value_int16);
 EXPORT_SYMBOL(fnvpair_value_uint16);
 EXPORT_SYMBOL(fnvpair_value_int32);
 EXPORT_SYMBOL(fnvpair_value_uint32);
 EXPORT_SYMBOL(fnvpair_value_int64);
 EXPORT_SYMBOL(fnvpair_value_uint64);
 EXPORT_SYMBOL(fnvpair_value_string);
 EXPORT_SYMBOL(fnvpair_value_nvlist);
 
 #endif
diff --git a/module/zcommon/zfs_comutil.c b/module/zcommon/zfs_comutil.c
index ccf169be6dae..6d0314fa78d0 100644
--- a/module/zcommon/zfs_comutil.c
+++ b/module/zcommon/zfs_comutil.c
@@ -1,210 +1,215 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
  * This file is intended for functions that ought to be common between user
  * land (libzfs) and the kernel. When many common routines need to be shared
  * then a separate file should to be created.
  */
 
 #if defined(_KERNEL)
 #include <sys/systm.h>
 #else
 #include <string.h>
 #endif
 
 #include <sys/types.h>
 #include <sys/fs/zfs.h>
 #include <sys/int_limits.h>
 #include <sys/nvpair.h>
 #include "zfs_comutil.h"
 
 /*
  * Are there allocatable vdevs?
  */
 boolean_t
 zfs_allocatable_devs(nvlist_t *nv)
 {
 	uint64_t is_log;
 	uint_t c;
 	nvlist_t **child;
 	uint_t children;
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0) {
 		return (B_FALSE);
 	}
 	for (c = 0; c < children; c++) {
 		is_log = 0;
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &is_log);
 		if (!is_log)
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 void
 zpool_get_rewind_policy(nvlist_t *nvl, zpool_rewind_policy_t *zrpp)
 {
 	nvlist_t *policy;
 	nvpair_t *elem;
 	char *nm;
 
 	/* Defaults */
 	zrpp->zrp_request = ZPOOL_NO_REWIND;
 	zrpp->zrp_maxmeta = 0;
 	zrpp->zrp_maxdata = UINT64_MAX;
 	zrpp->zrp_txg = UINT64_MAX;
 
 	if (nvl == NULL)
 		return;
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
 		nm = nvpair_name(elem);
 		if (strcmp(nm, ZPOOL_REWIND_POLICY) == 0) {
 			if (nvpair_value_nvlist(elem, &policy) == 0)
 				zpool_get_rewind_policy(policy, zrpp);
 			return;
 		} else if (strcmp(nm, ZPOOL_REWIND_REQUEST) == 0) {
 			if (nvpair_value_uint32(elem, &zrpp->zrp_request) == 0)
 				if (zrpp->zrp_request & ~ZPOOL_REWIND_POLICIES)
 					zrpp->zrp_request = ZPOOL_NO_REWIND;
 		} else if (strcmp(nm, ZPOOL_REWIND_REQUEST_TXG) == 0) {
 			(void) nvpair_value_uint64(elem, &zrpp->zrp_txg);
 		} else if (strcmp(nm, ZPOOL_REWIND_META_THRESH) == 0) {
 			(void) nvpair_value_uint64(elem, &zrpp->zrp_maxmeta);
 		} else if (strcmp(nm, ZPOOL_REWIND_DATA_THRESH) == 0) {
 			(void) nvpair_value_uint64(elem, &zrpp->zrp_maxdata);
 		}
 	}
 	if (zrpp->zrp_request == 0)
 		zrpp->zrp_request = ZPOOL_NO_REWIND;
 }
 
 typedef struct zfs_version_spa_map {
 	int	version_zpl;
 	int	version_spa;
 } zfs_version_spa_map_t;
 
 /*
  * Keep this table in monotonically increasing version number order.
  */
 static zfs_version_spa_map_t zfs_version_table[] = {
 	{ZPL_VERSION_INITIAL, SPA_VERSION_INITIAL},
 	{ZPL_VERSION_DIRENT_TYPE, SPA_VERSION_INITIAL},
 	{ZPL_VERSION_FUID, SPA_VERSION_FUID},
 	{ZPL_VERSION_USERSPACE, SPA_VERSION_USERSPACE},
 	{ZPL_VERSION_SA, SPA_VERSION_SA},
 	{0, 0}
 };
 
 /*
  * Return the max zpl version for a corresponding spa version
  * -1 is returned if no mapping exists.
  */
 int
 zfs_zpl_version_map(int spa_version)
 {
 	int i;
 	int version = -1;
 
 	for (i = 0; zfs_version_table[i].version_spa; i++) {
 		if (spa_version >= zfs_version_table[i].version_spa)
 			version = zfs_version_table[i].version_zpl;
 	}
 
 	return (version);
 }
 
 /*
  * Return the min spa version for a corresponding spa version
  * -1 is returned if no mapping exists.
  */
 int
 zfs_spa_version_map(int zpl_version)
 {
 	int i;
 	int version = -1;
 
 	for (i = 0; zfs_version_table[i].version_zpl; i++) {
 		if (zfs_version_table[i].version_zpl >= zpl_version)
 			return (zfs_version_table[i].version_spa);
 	}
 
 	return (version);
 }
 
-const char *zfs_history_event_names[LOG_END] = {
+/*
+ * This is the table of legacy internal event names; it should not be modified.
+ * The internal events are now stored in the history log as strings.
+ */
+const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
 	"invalid event",
 	"pool create",
 	"vdev add",
 	"pool remove",
 	"pool destroy",
 	"pool export",
 	"pool import",
 	"vdev attach",
 	"vdev replace",
 	"vdev detach",
 	"vdev online",
 	"vdev offline",
 	"vdev upgrade",
 	"pool clear",
 	"pool scrub",
 	"pool property set",
 	"create",
 	"clone",
 	"destroy",
 	"destroy_begin_sync",
 	"inherit",
 	"property set",
 	"quota set",
 	"permission update",
 	"permission remove",
 	"permission who remove",
 	"promote",
 	"receive",
 	"rename",
 	"reservation set",
 	"replay_inc_sync",
 	"replay_full_sync",
 	"rollback",
 	"snapshot",
 	"filesystem version upgrade",
 	"refquota set",
 	"refreservation set",
 	"pool scrub done",
 	"user hold",
 	"user release",
 	"pool split",
 };
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(zfs_allocatable_devs);
 EXPORT_SYMBOL(zpool_get_rewind_policy);
 EXPORT_SYMBOL(zfs_zpl_version_map);
 EXPORT_SYMBOL(zfs_spa_version_map);
 EXPORT_SYMBOL(zfs_history_event_names);
 #endif
diff --git a/module/zcommon/zprop_common.c b/module/zcommon/zprop_common.c
index 0a0af2334a08..6d9f89a989dd 100644
--- a/module/zcommon/zprop_common.c
+++ b/module/zcommon/zprop_common.c
@@ -1,444 +1,448 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 /*
  * Common routines used by zfs and zpool property management.
  */
 
 #include <sys/zio.h>
 #include <sys/spa.h>
 #include <sys/zfs_acl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_znode.h>
 #include <sys/fs/zfs.h>
 
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
 
 #if defined(_KERNEL)
 #include <sys/systm.h>
 #include <util/qsort.h>
 #else
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
 #endif
 
 static zprop_desc_t *
 zprop_get_proptable(zfs_type_t type)
 {
 	if (type == ZFS_TYPE_POOL)
 		return (zpool_prop_get_table());
 	else
 		return (zfs_prop_get_table());
 }
 
 static int
 zprop_get_numprops(zfs_type_t type)
 {
 	if (type == ZFS_TYPE_POOL)
 		return (ZPOOL_NUM_PROPS);
 	else
 		return (ZFS_NUM_PROPS);
 }
 
 void
 zprop_register_impl(int prop, const char *name, zprop_type_t type,
     uint64_t numdefault, const char *strdefault, zprop_attr_t attr,
     int objset_types, const char *values, const char *colname,
     boolean_t rightalign, boolean_t visible, const zprop_index_t *idx_tbl)
 {
 	zprop_desc_t *prop_tbl = zprop_get_proptable(objset_types);
 	zprop_desc_t *pd;
 
 	pd = &prop_tbl[prop];
 
 	ASSERT(pd->pd_name == NULL || pd->pd_name == name);
 	ASSERT(name != NULL);
 	ASSERT(colname != NULL);
 
 	pd->pd_name = name;
 	pd->pd_propnum = prop;
 	pd->pd_proptype = type;
 	pd->pd_numdefault = numdefault;
 	pd->pd_strdefault = strdefault;
 	pd->pd_attr = attr;
 	pd->pd_types = objset_types;
 	pd->pd_values = values;
 	pd->pd_colname = colname;
 	pd->pd_rightalign = rightalign;
 	pd->pd_visible = visible;
 	pd->pd_table = idx_tbl;
 	pd->pd_table_size = 0;
 	while (idx_tbl && (idx_tbl++)->pi_name != NULL)
 		pd->pd_table_size++;
 }
 
 void
 zprop_register_string(int prop, const char *name, const char *def,
     zprop_attr_t attr, int objset_types, const char *values,
     const char *colname)
 {
 	zprop_register_impl(prop, name, PROP_TYPE_STRING, 0, def, attr,
 	    objset_types, values, colname, B_FALSE, B_TRUE, NULL);
 
 }
 
 void
 zprop_register_number(int prop, const char *name, uint64_t def,
     zprop_attr_t attr, int objset_types, const char *values,
     const char *colname)
 {
 	zprop_register_impl(prop, name, PROP_TYPE_NUMBER, def, NULL, attr,
 	    objset_types, values, colname, B_TRUE, B_TRUE, NULL);
 }
 
 void
 zprop_register_index(int prop, const char *name, uint64_t def,
     zprop_attr_t attr, int objset_types, const char *values,
     const char *colname, const zprop_index_t *idx_tbl)
 {
 	zprop_register_impl(prop, name, PROP_TYPE_INDEX, def, NULL, attr,
 	    objset_types, values, colname, B_TRUE, B_TRUE, idx_tbl);
 }
 
 void
 zprop_register_hidden(int prop, const char *name, zprop_type_t type,
     zprop_attr_t attr, int objset_types, const char *colname)
 {
 	zprop_register_impl(prop, name, type, 0, NULL, attr,
-	    objset_types, NULL, colname, B_FALSE, B_FALSE, NULL);
+	    objset_types, NULL, colname,
+	    type == PROP_TYPE_NUMBER, B_FALSE, NULL);
 }
 
 
 /*
  * A comparison function we can use to order indexes into property tables.
  */
 static int
 zprop_compare(const void *arg1, const void *arg2)
 {
 	const zprop_desc_t *p1 = *((zprop_desc_t **)arg1);
 	const zprop_desc_t *p2 = *((zprop_desc_t **)arg2);
 	boolean_t p1ro, p2ro;
 
 	p1ro = (p1->pd_attr == PROP_READONLY);
 	p2ro = (p2->pd_attr == PROP_READONLY);
 
 	if (p1ro == p2ro)
 		return (strcmp(p1->pd_name, p2->pd_name));
 
 	return (p1ro ? -1 : 1);
 }
 
 /*
  * Iterate over all properties in the given property table, calling back
  * into the specified function for each property. We will continue to
  * iterate until we either reach the end or the callback function returns
  * something other than ZPROP_CONT.
  */
 int
 zprop_iter_common(zprop_func func, void *cb, boolean_t show_all,
     boolean_t ordered, zfs_type_t type)
 {
 	int i, j, num_props, size, prop;
 	zprop_desc_t *prop_tbl;
 	zprop_desc_t **order;
 
 	prop_tbl = zprop_get_proptable(type);
 	num_props = zprop_get_numprops(type);
 	size = num_props * sizeof (zprop_desc_t *);
 
 #if defined(_KERNEL)
 	order = kmem_alloc(size, KM_PUSHPAGE);
 #else
 	if ((order = malloc(size)) == NULL)
 		return (ZPROP_CONT);
 #endif
 
 	for (j = 0; j < num_props; j++)
 		order[j] = &prop_tbl[j];
 
 	if (ordered) {
 		qsort((void *)order, num_props, sizeof (zprop_desc_t *),
 		    zprop_compare);
 	}
 
 	prop = ZPROP_CONT;
 	for (i = 0; i < num_props; i++) {
 		if ((order[i]->pd_visible || show_all) &&
 		    (func(order[i]->pd_propnum, cb) != ZPROP_CONT)) {
 			prop = order[i]->pd_propnum;
 			break;
 		}
 	}
 
 #if defined(_KERNEL)
 	kmem_free(order, size);
 #else
 	free(order);
 #endif
 	return (prop);
 }
 
 static boolean_t
 propname_match(const char *p, size_t len, zprop_desc_t *prop_entry)
 {
 	const char *propname = prop_entry->pd_name;
 #ifndef _KERNEL
 	const char *colname = prop_entry->pd_colname;
 	int c;
 #endif
 
 	if (len == strlen(propname) &&
 	    strncmp(p, propname, len) == 0)
 		return (B_TRUE);
 
 #ifndef _KERNEL
 	if (colname == NULL || len != strlen(colname))
 		return (B_FALSE);
 
 	for (c = 0; c < len; c++)
 		if (p[c] != tolower(colname[c]))
 			break;
 
 	return (colname[c] == '\0');
 #else
 	return (B_FALSE);
 #endif
 }
 
 typedef struct name_to_prop_cb {
 	const char *propname;
 	zprop_desc_t *prop_tbl;
 } name_to_prop_cb_t;
 
 static int
 zprop_name_to_prop_cb(int prop, void *cb_data)
 {
 	name_to_prop_cb_t *data = cb_data;
 
 	if (propname_match(data->propname, strlen(data->propname),
 	    &data->prop_tbl[prop]))
 		return (prop);
 
 	return (ZPROP_CONT);
 }
 
 int
 zprop_name_to_prop(const char *propname, zfs_type_t type)
 {
 	int prop;
 	name_to_prop_cb_t cb_data;
 
 	cb_data.propname = propname;
 	cb_data.prop_tbl = zprop_get_proptable(type);
 
 	prop = zprop_iter_common(zprop_name_to_prop_cb, &cb_data,
 	    B_TRUE, B_FALSE, type);
 
 	return (prop == ZPROP_CONT ? ZPROP_INVAL : prop);
 }
 
 int
 zprop_string_to_index(int prop, const char *string, uint64_t *index,
     zfs_type_t type)
 {
 	zprop_desc_t *prop_tbl;
 	const zprop_index_t *idx_tbl;
 	int i;
 
 	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
 		return (-1);
 
 	ASSERT(prop < zprop_get_numprops(type));
 	prop_tbl = zprop_get_proptable(type);
 	if ((idx_tbl = prop_tbl[prop].pd_table) == NULL)
 		return (-1);
 
 	for (i = 0; idx_tbl[i].pi_name != NULL; i++) {
 		if (strcmp(string, idx_tbl[i].pi_name) == 0) {
 			*index = idx_tbl[i].pi_value;
 			return (0);
 		}
 	}
 
 	return (-1);
 }
 
 int
 zprop_index_to_string(int prop, uint64_t index, const char **string,
     zfs_type_t type)
 {
 	zprop_desc_t *prop_tbl;
 	const zprop_index_t *idx_tbl;
 	int i;
 
 	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
 		return (-1);
 
 	ASSERT(prop < zprop_get_numprops(type));
 	prop_tbl = zprop_get_proptable(type);
 	if ((idx_tbl = prop_tbl[prop].pd_table) == NULL)
 		return (-1);
 
 	for (i = 0; idx_tbl[i].pi_name != NULL; i++) {
 		if (idx_tbl[i].pi_value == index) {
 			*string = idx_tbl[i].pi_name;
 			return (0);
 		}
 	}
 
 	return (-1);
 }
 
 /*
  * Return a random valid property value.  Used by ztest.
  */
 uint64_t
 zprop_random_value(int prop, uint64_t seed, zfs_type_t type)
 {
 	zprop_desc_t *prop_tbl;
 	const zprop_index_t *idx_tbl;
 
 	ASSERT((uint_t)prop < zprop_get_numprops(type));
 	prop_tbl = zprop_get_proptable(type);
 	idx_tbl = prop_tbl[prop].pd_table;
 
 	if (idx_tbl == NULL)
 		return (seed);
 
 	return (idx_tbl[seed % prop_tbl[prop].pd_table_size].pi_value);
 }
 
 const char *
 zprop_values(int prop, zfs_type_t type)
 {
 	zprop_desc_t *prop_tbl;
 
 	ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT);
 	ASSERT(prop < zprop_get_numprops(type));
 
 	prop_tbl = zprop_get_proptable(type);
 
 	return (prop_tbl[prop].pd_values);
 }
 
 /*
  * Returns TRUE if the property applies to any of the given dataset types.
  */
 boolean_t
 zprop_valid_for_type(int prop, zfs_type_t type)
 {
 	zprop_desc_t *prop_tbl;
 
 	if (prop == ZPROP_INVAL || prop == ZPROP_CONT)
 		return (B_FALSE);
 
 	ASSERT(prop < zprop_get_numprops(type));
 	prop_tbl = zprop_get_proptable(type);
 	return ((prop_tbl[prop].pd_types & type) != 0);
 }
 
 #ifndef _KERNEL
 
 /*
  * Determines the minimum width for the column, and indicates whether it's fixed
  * or not.  Only string columns are non-fixed.
  */
 size_t
 zprop_width(int prop, boolean_t *fixed, zfs_type_t type)
 {
 	zprop_desc_t *prop_tbl, *pd;
 	const zprop_index_t *idx;
 	size_t ret;
 	int i;
 
 	ASSERT(prop != ZPROP_INVAL && prop != ZPROP_CONT);
 	ASSERT(prop < zprop_get_numprops(type));
 
 	prop_tbl = zprop_get_proptable(type);
 	pd = &prop_tbl[prop];
 
 	*fixed = B_TRUE;
 
 	/*
 	 * Start with the width of the column name.
 	 */
 	ret = strlen(pd->pd_colname);
 
 	/*
 	 * For fixed-width values, make sure the width is large enough to hold
 	 * any possible value.
 	 */
 	switch (pd->pd_proptype) {
 	case PROP_TYPE_NUMBER:
 		/*
 		 * The maximum length of a human-readable number is 5 characters
 		 * ("20.4M", for example).
 		 */
 		if (ret < 5)
 			ret = 5;
 		/*
 		 * 'creation' is handled specially because it's a number
 		 * internally, but displayed as a date string.
 		 */
 		if (prop == ZFS_PROP_CREATION)
 			*fixed = B_FALSE;
 		break;
 	case PROP_TYPE_INDEX:
 		idx = prop_tbl[prop].pd_table;
 		for (i = 0; idx[i].pi_name != NULL; i++) {
 			if (strlen(idx[i].pi_name) > ret)
 				ret = strlen(idx[i].pi_name);
 		}
 		break;
 
 	case PROP_TYPE_STRING:
 		*fixed = B_FALSE;
 		break;
 	}
 
 	return (ret);
 }
 
 #endif
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 /* Common routines to initialize property tables */
 EXPORT_SYMBOL(zprop_register_impl);
 EXPORT_SYMBOL(zprop_register_string);
 EXPORT_SYMBOL(zprop_register_number);
 EXPORT_SYMBOL(zprop_register_index);
 EXPORT_SYMBOL(zprop_register_hidden);
 
 /* Common routines for zfs and zpool property management */
 EXPORT_SYMBOL(zprop_iter_common);
 EXPORT_SYMBOL(zprop_name_to_prop);
 EXPORT_SYMBOL(zprop_string_to_index);
 EXPORT_SYMBOL(zprop_index_to_string);
 EXPORT_SYMBOL(zprop_random_value);
 EXPORT_SYMBOL(zprop_values);
 EXPORT_SYMBOL(zprop_valid_for_type);
 #endif
diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
index 52d55d566895..0f07a4cc95d2 100644
--- a/module/zfs/dmu_objset.c
+++ b/module/zfs/dmu_objset.c
@@ -1,1831 +1,1891 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/cred.h>
 #include <sys/zfs_context.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dsl_deleg.h>
 #include <sys/dnode.h>
 #include <sys/dbuf.h>
 #include <sys/zvol.h>
 #include <sys/dmu_tx.h>
 #include <sys/zap.h>
 #include <sys/zil.h>
 #include <sys/dmu_impl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/sa.h>
 #include <sys/zfs_onexit.h>
 
 /*
  * Needed to close a window in dnode_move() that allows the objset to be freed
  * before it can be safely accessed.
  */
 krwlock_t os_lock;
 
 void
 dmu_objset_init(void)
 {
 	rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
 }
 
 void
 dmu_objset_fini(void)
 {
 	rw_destroy(&os_lock);
 }
 
 spa_t *
 dmu_objset_spa(objset_t *os)
 {
 	return (os->os_spa);
 }
 
 zilog_t *
 dmu_objset_zil(objset_t *os)
 {
 	return (os->os_zil);
 }
 
 dsl_pool_t *
 dmu_objset_pool(objset_t *os)
 {
 	dsl_dataset_t *ds;
 
 	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
 		return (ds->ds_dir->dd_pool);
 	else
 		return (spa_get_dsl(os->os_spa));
 }
 
 dsl_dataset_t *
 dmu_objset_ds(objset_t *os)
 {
 	return (os->os_dsl_dataset);
 }
 
 dmu_objset_type_t
 dmu_objset_type(objset_t *os)
 {
 	return (os->os_phys->os_type);
 }
 
 void
 dmu_objset_name(objset_t *os, char *buf)
 {
 	dsl_dataset_name(os->os_dsl_dataset, buf);
 }
 
 uint64_t
 dmu_objset_id(objset_t *os)
 {
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 
 	return (ds ? ds->ds_object : 0);
 }
 
 uint64_t
 dmu_objset_syncprop(objset_t *os)
 {
 	return (os->os_sync);
 }
 
 uint64_t
 dmu_objset_logbias(objset_t *os)
 {
 	return (os->os_logbias);
 }
 
 static void
 checksum_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 
 	/*
 	 * Inheritance should have been done by now.
 	 */
 	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
 
 	os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
 }
 
 static void
 compression_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval != ZIO_COMPRESS_INHERIT);
 
 	os->os_compress = zio_compress_select(newval, ZIO_COMPRESS_ON_VALUE);
 }
 
 static void
 copies_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval > 0);
 	ASSERT(newval <= spa_max_replication(os->os_spa));
 
 	os->os_copies = newval;
 }
 
 static void
 dedup_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 	spa_t *spa = os->os_spa;
 	enum zio_checksum checksum;
 
 	/*
 	 * Inheritance should have been done by now.
 	 */
 	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
 
 	checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
 
 	os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
 	os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
 }
 
 static void
 primary_cache_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 	    newval == ZFS_CACHE_METADATA);
 
 	os->os_primary_cache = newval;
 }
 
 static void
 secondary_cache_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
 	    newval == ZFS_CACHE_METADATA);
 
 	os->os_secondary_cache = newval;
 }
 
 static void
 sync_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 
 	/*
 	 * Inheritance and range checking should have been done by now.
 	 */
 	ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
 	    newval == ZFS_SYNC_DISABLED);
 
 	os->os_sync = newval;
 	if (os->os_zil)
 		zil_set_sync(os->os_zil, newval);
 }
 
 static void
 logbias_changed_cb(void *arg, uint64_t newval)
 {
 	objset_t *os = arg;
 
 	ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
 	    newval == ZFS_LOGBIAS_THROUGHPUT);
 	os->os_logbias = newval;
 	if (os->os_zil)
 		zil_set_logbias(os->os_zil, newval);
 }
 
 void
 dmu_objset_byteswap(void *buf, size_t size)
 {
 	objset_phys_t *osp = buf;
 
 	ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
 	dnode_byteswap(&osp->os_meta_dnode);
 	byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
 	osp->os_type = BSWAP_64(osp->os_type);
 	osp->os_flags = BSWAP_64(osp->os_flags);
 	if (size == sizeof (objset_phys_t)) {
 		dnode_byteswap(&osp->os_userused_dnode);
 		dnode_byteswap(&osp->os_groupused_dnode);
 	}
 }
 
 int
 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
     objset_t **osp)
 {
 	objset_t *os;
 	int i, err;
 
 	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
 
 	os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE);
 	os->os_dsl_dataset = ds;
 	os->os_spa = spa;
 	os->os_rootbp = bp;
 	if (!BP_IS_HOLE(os->os_rootbp)) {
 		uint32_t aflags = ARC_WAIT;
 		zbookmark_t zb;
 		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 
 		if (DMU_OS_IS_L2CACHEABLE(os))
 			aflags |= ARC_L2CACHE;
 		if (DMU_OS_IS_L2COMPRESSIBLE(os))
 			aflags |= ARC_L2COMPRESS;
 
 		dprintf_bp(os->os_rootbp, "reading %s", "");
 		err = arc_read(NULL, spa, os->os_rootbp,
 		    arc_getbuf_func, &os->os_phys_buf,
 		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
 		if (err) {
 			kmem_free(os, sizeof (objset_t));
 			/* convert checksum errors into IO errors */
 			if (err == ECKSUM)
 				err = EIO;
 			return (err);
 		}
 
 		/* Increase the blocksize if we are permitted. */
 		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
 		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
 			arc_buf_t *buf = arc_buf_alloc(spa,
 			    sizeof (objset_phys_t), &os->os_phys_buf,
 			    ARC_BUFC_METADATA);
 			bzero(buf->b_data, sizeof (objset_phys_t));
 			bcopy(os->os_phys_buf->b_data, buf->b_data,
 			    arc_buf_size(os->os_phys_buf));
 			(void) arc_buf_remove_ref(os->os_phys_buf,
 			    &os->os_phys_buf);
 			os->os_phys_buf = buf;
 		}
 
 		os->os_phys = os->os_phys_buf->b_data;
 		os->os_flags = os->os_phys->os_flags;
 	} else {
 		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
 		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
 		os->os_phys_buf = arc_buf_alloc(spa, size,
 		    &os->os_phys_buf, ARC_BUFC_METADATA);
 		os->os_phys = os->os_phys_buf->b_data;
 		bzero(os->os_phys, size);
 	}
 
 	/*
 	 * Note: the changed_cb will be called once before the register
 	 * func returns, thus changing the checksum/compression from the
 	 * default (fletcher2/off).  Snapshots don't need to know about
 	 * checksum/compression/copies.
 	 */
 	if (ds) {
 		err = dsl_prop_register(ds, "primarycache",
 		    primary_cache_changed_cb, os);
 		if (err == 0)
 			err = dsl_prop_register(ds, "secondarycache",
 			    secondary_cache_changed_cb, os);
 		if (!dsl_dataset_is_snapshot(ds)) {
 			if (err == 0)
 				err = dsl_prop_register(ds, "checksum",
 				    checksum_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "compression",
 				    compression_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "copies",
 				    copies_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "dedup",
 				    dedup_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "logbias",
 				    logbias_changed_cb, os);
 			if (err == 0)
 				err = dsl_prop_register(ds, "sync",
 				    sync_changed_cb, os);
 		}
 		if (err) {
 			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
 			    &os->os_phys_buf) == 1);
 			kmem_free(os, sizeof (objset_t));
 			return (err);
 		}
 	} else if (ds == NULL) {
 		/* It's the meta-objset. */
 		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
 		os->os_compress = ZIO_COMPRESS_LZJB;
 		os->os_copies = spa_max_replication(spa);
 		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
 		os->os_dedup_verify = 0;
 		os->os_logbias = 0;
 		os->os_sync = 0;
 		os->os_primary_cache = ZFS_CACHE_ALL;
 		os->os_secondary_cache = ZFS_CACHE_ALL;
 	}
 
 	if (ds == NULL || !dsl_dataset_is_snapshot(ds))
 		os->os_zil_header = os->os_phys->os_zil_header;
 	os->os_zil = zil_alloc(os, &os->os_zil_header);
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
 		    offsetof(dnode_t, dn_dirty_link[i]));
 		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
 		    offsetof(dnode_t, dn_dirty_link[i]));
 	}
 	list_create(&os->os_dnodes, sizeof (dnode_t),
 	    offsetof(dnode_t, dn_link));
 	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
 	    offsetof(dmu_buf_impl_t, db_link));
 
 	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	DMU_META_DNODE(os) = dnode_special_open(os,
 	    &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
 	    &os->os_meta_dnode);
 	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
 		DMU_USERUSED_DNODE(os) = dnode_special_open(os,
 		    &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
 		    &os->os_userused_dnode);
 		DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
 		    &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
 		    &os->os_groupused_dnode);
 	}
 
 	/*
 	 * We should be the only thread trying to do this because we
 	 * have ds_opening_lock
 	 */
 	if (ds) {
 		mutex_enter(&ds->ds_lock);
 		ASSERT(ds->ds_objset == NULL);
 		ds->ds_objset = os;
 		mutex_exit(&ds->ds_lock);
 	}
 
 	*osp = os;
 	return (0);
 }
 
 int
 dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
 {
 	int err = 0;
 
 	mutex_enter(&ds->ds_opening_lock);
 	*osp = ds->ds_objset;
 	if (*osp == NULL) {
 		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
 		    ds, dsl_dataset_get_blkptr(ds), osp);
 	}
 	mutex_exit(&ds->ds_opening_lock);
 	return (err);
 }
 
 /* called from zpl */
 int
 dmu_objset_hold(const char *name, void *tag, objset_t **osp)
 {
 	dsl_dataset_t *ds;
 	int err;
 
 	err = dsl_dataset_hold(name, tag, &ds);
 	if (err)
 		return (err);
 
 	err = dmu_objset_from_ds(ds, osp);
 	if (err)
 		dsl_dataset_rele(ds, tag);
 
 	return (err);
 }
 
 /* called from zpl */
 int
 dmu_objset_own(const char *name, dmu_objset_type_t type,
     boolean_t readonly, void *tag, objset_t **osp)
 {
 	dsl_dataset_t *ds;
 	int err;
 
 	err = dsl_dataset_own(name, B_FALSE, tag, &ds);
 	if (err)
 		return (err);
 
 	err = dmu_objset_from_ds(ds, osp);
 	if (err) {
 		dsl_dataset_disown(ds, tag);
 	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
 		dmu_objset_disown(*osp, tag);
 		return (EINVAL);
 	} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
 		dmu_objset_disown(*osp, tag);
 		return (EROFS);
 	}
 	return (err);
 }
 
 void
 dmu_objset_rele(objset_t *os, void *tag)
 {
 	dsl_dataset_rele(os->os_dsl_dataset, tag);
 }
 
 void
 dmu_objset_disown(objset_t *os, void *tag)
 {
 	dsl_dataset_disown(os->os_dsl_dataset, tag);
 }
 
 int
 dmu_objset_evict_dbufs(objset_t *os)
 {
 	dnode_t *dn;
 
 	mutex_enter(&os->os_lock);
 
 	/* process the mdn last, since the other dnodes have holds on it */
 	list_remove(&os->os_dnodes, DMU_META_DNODE(os));
 	list_insert_tail(&os->os_dnodes, DMU_META_DNODE(os));
 
 	/*
 	 * Find the first dnode with holds.  We have to do this dance
 	 * because dnode_add_ref() only works if you already have a
 	 * hold.  If there are no holds then it has no dbufs so OK to
 	 * skip.
 	 */
 	for (dn = list_head(&os->os_dnodes);
 	    dn && !dnode_add_ref(dn, FTAG);
 	    dn = list_next(&os->os_dnodes, dn))
 		continue;
 
 	while (dn) {
 		dnode_t *next_dn = dn;
 
 		do {
 			next_dn = list_next(&os->os_dnodes, next_dn);
 		} while (next_dn && !dnode_add_ref(next_dn, FTAG));
 
 		mutex_exit(&os->os_lock);
 		dnode_evict_dbufs(dn);
 		dnode_rele(dn, FTAG);
 		mutex_enter(&os->os_lock);
 		dn = next_dn;
 	}
 	dn = list_head(&os->os_dnodes);
 	mutex_exit(&os->os_lock);
 	return (dn != DMU_META_DNODE(os));
 }
 
 void
 dmu_objset_evict(objset_t *os)
 {
-	dsl_dataset_t *ds = os->os_dsl_dataset;
 	int t;
 
+	dsl_dataset_t *ds = os->os_dsl_dataset;
+
 	for (t = 0; t < TXG_SIZE; t++)
 		ASSERT(!dmu_objset_is_dirty(os, t));
 
 	if (ds) {
 		if (!dsl_dataset_is_snapshot(ds)) {
 			VERIFY(0 == dsl_prop_unregister(ds, "checksum",
 			    checksum_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "compression",
 			    compression_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "copies",
 			    copies_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "dedup",
 			    dedup_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "logbias",
 			    logbias_changed_cb, os));
 			VERIFY(0 == dsl_prop_unregister(ds, "sync",
 			    sync_changed_cb, os));
 		}
 		VERIFY(0 == dsl_prop_unregister(ds, "primarycache",
 		    primary_cache_changed_cb, os));
 		VERIFY(0 == dsl_prop_unregister(ds, "secondarycache",
 		    secondary_cache_changed_cb, os));
 	}
 
 	if (os->os_sa)
 		sa_tear_down(os);
 
 	/*
 	 * We should need only a single pass over the dnode list, since
 	 * nothing can be added to the list at this point.
 	 */
 	(void) dmu_objset_evict_dbufs(os);
 
 	dnode_special_close(&os->os_meta_dnode);
 	if (DMU_USERUSED_DNODE(os)) {
 		dnode_special_close(&os->os_userused_dnode);
 		dnode_special_close(&os->os_groupused_dnode);
 	}
 	zil_free(os->os_zil);
 
 	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
 
 	VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1);
 
 	/*
 	 * This is a barrier to prevent the objset from going away in
 	 * dnode_move() until we can safely ensure that the objset is still in
 	 * use. We consider the objset valid before the barrier and invalid
 	 * after the barrier.
 	 */
 	rw_enter(&os_lock, RW_READER);
 	rw_exit(&os_lock);
 
 	mutex_destroy(&os->os_lock);
 	mutex_destroy(&os->os_obj_lock);
 	mutex_destroy(&os->os_user_ptr_lock);
 	kmem_free(os, sizeof (objset_t));
 }
 
 timestruc_t
 dmu_objset_snap_cmtime(objset_t *os)
 {
 	return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
 }
 
 /* called from dsl for meta-objset */
 objset_t *
 dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
     dmu_objset_type_t type, dmu_tx_t *tx)
 {
 	objset_t *os;
 	dnode_t *mdn;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	if (ds != NULL)
 		VERIFY(0 == dmu_objset_from_ds(ds, &os));
 	else
 		VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os));
 
 	mdn = DMU_META_DNODE(os);
 
 	dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
 	    DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
 
 	/*
 	 * We don't want to have to increase the meta-dnode's nlevels
 	 * later, because then we could do it in quescing context while
 	 * we are also accessing it in open context.
 	 *
 	 * This precaution is not necessary for the MOS (ds == NULL),
 	 * because the MOS is only updated in syncing context.
 	 * This is most fortunate: the MOS is the only objset that
 	 * needs to be synced multiple times as spa_sync() iterates
 	 * to convergence, so minimizing its dn_nlevels matters.
 	 */
 	if (ds != NULL) {
 		int levels = 1;
 
 		/*
 		 * Determine the number of levels necessary for the meta-dnode
 		 * to contain DN_MAX_OBJECT dnodes.
 		 */
 		while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
 		    (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
 		    DN_MAX_OBJECT * sizeof (dnode_phys_t))
 			levels++;
 
 		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
 		    mdn->dn_nlevels = levels;
 	}
 
 	ASSERT(type != DMU_OST_NONE);
 	ASSERT(type != DMU_OST_ANY);
 	ASSERT(type < DMU_OST_NUMTYPES);
 	os->os_phys->os_type = type;
 	if (dmu_objset_userused_enabled(os)) {
 		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
 		os->os_flags = os->os_phys->os_flags;
 	}
 
 	dsl_dataset_dirty(ds, tx);
 
 	return (os);
 }
 
 struct oscarg {
 	void (*userfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
 	void *userarg;
 	dsl_dataset_t *clone_origin;
 	const char *lastname;
 	dmu_objset_type_t type;
 	uint64_t flags;
 	cred_t *cr;
 };
 
 /*ARGSUSED*/
 static int
 dmu_objset_create_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct oscarg *oa = arg2;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	int err;
 	uint64_t ddobj;
 
 	err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
 	    oa->lastname, sizeof (uint64_t), 1, &ddobj);
 	if (err != ENOENT)
 		return (err ? err : EEXIST);
 
 	if (oa->clone_origin != NULL) {
 		/* You can't clone across pools. */
 		if (oa->clone_origin->ds_dir->dd_pool != dd->dd_pool)
 			return (EXDEV);
 
 		/* You can only clone snapshots, not the head datasets. */
 		if (!dsl_dataset_is_snapshot(oa->clone_origin))
 			return (EINVAL);
 	}
 
 	return (0);
 }
 
 static void
 dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	spa_t *spa = dd->dd_pool->dp_spa;
 	struct oscarg *oa = arg2;
 	uint64_t obj;
+	dsl_dataset_t *ds;
+	blkptr_t *bp;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	obj = dsl_dataset_create_sync(dd, oa->lastname,
 	    oa->clone_origin, oa->flags, oa->cr, tx);
 
-	if (oa->clone_origin == NULL) {
-		dsl_pool_t *dp = dd->dd_pool;
-		dsl_dataset_t *ds;
-		blkptr_t *bp;
-		objset_t *os;
-
-		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
-		bp = dsl_dataset_get_blkptr(ds);
-		ASSERT(BP_IS_HOLE(bp));
-
-		os = dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
+	VERIFY3U(0, ==, dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds));
+	bp = dsl_dataset_get_blkptr(ds);
+	if (BP_IS_HOLE(bp)) {
+		objset_t *os =
+		    dmu_objset_create_impl(spa, ds, bp, oa->type, tx);
 
 		if (oa->userfunc)
 			oa->userfunc(os, oa->userarg, oa->cr, tx);
-		dsl_dataset_rele(ds, FTAG);
 	}
 
-	spa_history_log_internal(LOG_DS_CREATE, spa, tx, "dataset = %llu", obj);
+	if (oa->clone_origin == NULL) {
+		spa_history_log_internal_ds(ds, "create", tx, "");
+	} else {
+		char namebuf[MAXNAMELEN];
+		dsl_dataset_name(oa->clone_origin, namebuf);
+		spa_history_log_internal_ds(ds, "clone", tx,
+		    "origin=%s (%llu)", namebuf, oa->clone_origin->ds_object);
+	}
+	dsl_dataset_rele(ds, FTAG);
 }
 
 int
 dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg)
 {
 	dsl_dir_t *pdd;
 	const char *tail;
 	int err = 0;
 	struct oscarg oa = { 0 };
 
 	ASSERT(strchr(name, '@') == NULL);
 	err = dsl_dir_open(name, FTAG, &pdd, &tail);
 	if (err)
 		return (err);
 	if (tail == NULL) {
 		dsl_dir_close(pdd, FTAG);
 		return (EEXIST);
 	}
 
 	oa.userfunc = func;
 	oa.userarg = arg;
 	oa.lastname = tail;
 	oa.type = type;
 	oa.flags = flags;
 	oa.cr = CRED();
 
 	err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
 	    dmu_objset_create_sync, pdd, &oa, 5);
 	dsl_dir_close(pdd, FTAG);
 	return (err);
 }
 
 int
 dmu_objset_clone(const char *name, dsl_dataset_t *clone_origin, uint64_t flags)
 {
 	dsl_dir_t *pdd;
 	const char *tail;
 	int err = 0;
 	struct oscarg oa = { 0 };
 
 	ASSERT(strchr(name, '@') == NULL);
 	err = dsl_dir_open(name, FTAG, &pdd, &tail);
 	if (err)
 		return (err);
 	if (tail == NULL) {
 		dsl_dir_close(pdd, FTAG);
 		return (EEXIST);
 	}
 
 	oa.lastname = tail;
 	oa.clone_origin = clone_origin;
 	oa.flags = flags;
 	oa.cr = CRED();
 
 	err = dsl_sync_task_do(pdd->dd_pool, dmu_objset_create_check,
 	    dmu_objset_create_sync, pdd, &oa, 5);
 	dsl_dir_close(pdd, FTAG);
 	return (err);
 }
 
 int
 dmu_objset_destroy(const char *name, boolean_t defer)
 {
 	dsl_dataset_t *ds;
 	int error;
 
 	error = dsl_dataset_own(name, B_TRUE, FTAG, &ds);
 	if (error == 0) {
 		error = dsl_dataset_destroy(ds, FTAG, defer);
 		/* dsl_dataset_destroy() closes the ds. */
 	}
 
 	return (error);
 }
 
-struct snaparg {
-	dsl_sync_task_group_t *dstg;
-	char *snapname;
-	char *htag;
-	char failed[MAXPATHLEN];
-	boolean_t recursive;
-	boolean_t needsuspend;
-	boolean_t temporary;
-	nvlist_t *props;
-	struct dsl_ds_holdarg *ha;	/* only needed in the temporary case */
-	dsl_dataset_t *newds;
-};
+typedef struct snapallarg {
+	dsl_sync_task_group_t *saa_dstg;
+	boolean_t saa_needsuspend;
+	nvlist_t *saa_props;
+
+	/* the following are used only if 'temporary' is set: */
+	boolean_t saa_temporary;
+	const char *saa_htag;
+	struct dsl_ds_holdarg *saa_ha;
+	dsl_dataset_t *saa_newds;
+} snapallarg_t;
+
+typedef struct snaponearg {
+	const char *soa_longname; /* long snap name */
+	const char *soa_snapname; /* short snap name */
+	snapallarg_t *soa_saa;
+} snaponearg_t;
 
 static int
 snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	objset_t *os = arg1;
-	struct snaparg *sn = arg2;
+	snaponearg_t *soa = arg2;
+	snapallarg_t *saa = soa->soa_saa;
 	int error;
 
 	/* The props have already been checked by zfs_check_userprops(). */
 
 	error = dsl_dataset_snapshot_check(os->os_dsl_dataset,
-	    sn->snapname, tx);
+	    soa->soa_snapname, tx);
 	if (error)
 		return (error);
 
-	if (sn->temporary) {
+	if (saa->saa_temporary) {
 		/*
 		 * Ideally we would just call
 		 * dsl_dataset_user_hold_check() and
 		 * dsl_dataset_destroy_check() here.  However the
 		 * dataset we want to hold and destroy is the snapshot
 		 * that we just confirmed we can create, but it won't
 		 * exist until after these checks are run.  Do any
 		 * checks we can here and if more checks are added to
 		 * those routines in the future, similar checks may be
 		 * necessary here.
 		 */
 		if (spa_version(os->os_spa) < SPA_VERSION_USERREFS)
 			return (ENOTSUP);
 		/*
 		 * Not checking number of tags because the tag will be
 		 * unique, as it will be the only tag.
 		 */
-		if (strlen(sn->htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
+		if (strlen(saa->saa_htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
 			return (E2BIG);
 
-		sn->ha = kmem_alloc(sizeof(struct dsl_ds_holdarg), KM_PUSHPAGE);
-		sn->ha->temphold = B_TRUE;
-		sn->ha->htag = sn->htag;
+		saa->saa_ha = kmem_alloc(sizeof (struct dsl_ds_holdarg),
+		    KM_PUSHPAGE);
+		saa->saa_ha->temphold = B_TRUE;
+		saa->saa_ha->htag = saa->saa_htag;
 	}
 	return (error);
 }
 
 static void
 snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	objset_t *os = arg1;
 	dsl_dataset_t *ds = os->os_dsl_dataset;
-	struct snaparg *sn = arg2;
+	snaponearg_t *soa = arg2;
+	snapallarg_t *saa = soa->soa_saa;
 
-	dsl_dataset_snapshot_sync(ds, sn->snapname, tx);
+	dsl_dataset_snapshot_sync(ds, soa->soa_snapname, tx);
 
-	if (sn->props) {
+	if (saa->saa_props != NULL) {
 		dsl_props_arg_t pa;
-		pa.pa_props = sn->props;
+		pa.pa_props = saa->saa_props;
 		pa.pa_source = ZPROP_SRC_LOCAL;
 		dsl_props_set_sync(ds->ds_prev, &pa, tx);
 	}
 
-	if (sn->temporary) {
+	if (saa->saa_temporary) {
 		struct dsl_ds_destroyarg da;
 
-		dsl_dataset_user_hold_sync(ds->ds_prev, sn->ha, tx);
-		kmem_free(sn->ha, sizeof (struct dsl_ds_holdarg));
-		sn->ha = NULL;
-		sn->newds = ds->ds_prev;
+		dsl_dataset_user_hold_sync(ds->ds_prev, saa->saa_ha, tx);
+		kmem_free(saa->saa_ha, sizeof (struct dsl_ds_holdarg));
+		saa->saa_ha = NULL;
+		saa->saa_newds = ds->ds_prev;
 
 		da.ds = ds->ds_prev;
 		da.defer = B_TRUE;
 		dsl_dataset_destroy_sync(&da, FTAG, tx);
 	}
 }
 
 static int
-dmu_objset_snapshot_one(const char *name, void *arg)
+snapshot_one_impl(const char *snapname, void *arg)
 {
-	struct snaparg *sn = arg;
+	char *fsname;
+	snapallarg_t *saa = arg;
+	snaponearg_t *soa;
 	objset_t *os;
 	int err;
-	char *cp;
 
-	/*
-	 * If the objset starts with a '%', then ignore it unless it was
-	 * explicitly named (ie, not recursive).  These hidden datasets
-	 * are always inconsistent, and by not opening them here, we can
-	 * avoid a race with dsl_dir_destroy_check().
-	 */
-	cp = strrchr(name, '/');
-	if (cp && cp[1] == '%' && sn->recursive)
-		return (0);
+	fsname = kmem_zalloc(MAXPATHLEN, KM_PUSHPAGE);
+	(void) strlcpy(fsname, snapname, MAXPATHLEN);
+	strchr(fsname, '@')[0] = '\0';
 
-	(void) strcpy(sn->failed, name);
-
-	/*
-	 * Check permissions if we are doing a recursive snapshot.  The
-	 * permission checks for the starting dataset have already been
-	 * performed in zfs_secpolicy_snapshot()
-	 */
-	if (sn->recursive && (err = zfs_secpolicy_snapshot_perms(name, CRED())))
-		return (err);
-
-	err = dmu_objset_hold(name, sn, &os);
+	err = dmu_objset_hold(fsname, saa, &os);
+	kmem_free(fsname, MAXPATHLEN);
 	if (err != 0)
 		return (err);
 
 	/*
 	 * If the objset is in an inconsistent state (eg, in the process
-	 * of being destroyed), don't snapshot it.  As with %hidden
-	 * datasets, we return EBUSY if this name was explicitly
-	 * requested (ie, not recursive), and otherwise ignore it.
+	 * of being destroyed), don't snapshot it.
 	 */
 	if (os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) {
-		dmu_objset_rele(os, sn);
-		return (sn->recursive ? 0 : EBUSY);
+		dmu_objset_rele(os, saa);
+		return (EBUSY);
 	}
 
-	if (sn->needsuspend) {
+	if (saa->saa_needsuspend) {
 		err = zil_suspend(dmu_objset_zil(os));
 		if (err) {
-			dmu_objset_rele(os, sn);
+			dmu_objset_rele(os, saa);
 			return (err);
 		}
 	}
-	dsl_sync_task_create(sn->dstg, snapshot_check, snapshot_sync,
-	    os, sn, 3);
+
+	soa = kmem_zalloc(sizeof (*soa), KM_PUSHPAGE);
+	soa->soa_saa = saa;
+	soa->soa_longname = snapname;
+	soa->soa_snapname = strchr(snapname, '@') + 1;
+
+	dsl_sync_task_create(saa->saa_dstg, snapshot_check, snapshot_sync,
+	    os, soa, 3);
 
 	return (0);
 }
 
+/*
+ * The snapshots must all be in the same pool.
+ */
 int
-dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
-    nvlist_t *props, boolean_t recursive, boolean_t temporary, int cleanup_fd)
+dmu_objset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
 {
 	dsl_sync_task_t *dst;
-	struct snaparg *sn;
+	snapallarg_t saa = { 0 };
 	spa_t *spa;
-	minor_t minor;
+	int rv = 0;
 	int err;
+	nvpair_t *pair;
 
-	sn = kmem_alloc(sizeof (struct snaparg), KM_SLEEP);
-	(void) strcpy(sn->failed, fsname);
+	pair = nvlist_next_nvpair(snaps, NULL);
+	if (pair == NULL)
+		return (0);
 
-	err = spa_open(fsname, &spa, FTAG);
-	if (err) {
-		kmem_free(sn, sizeof (struct snaparg));
+	err = spa_open(nvpair_name(pair), &spa, FTAG);
+	if (err)
 		return (err);
+	saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
+	saa.saa_props = props;
+	saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
+
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(snaps, pair)) {
+		err = snapshot_one_impl(nvpair_name(pair), &saa);
+		if (err != 0) {
+			if (errors != NULL) {
+				fnvlist_add_int32(errors,
+				    nvpair_name(pair), err);
+			}
+			rv = err;
+		}
 	}
 
-	if (temporary) {
-		if (cleanup_fd < 0) {
-			spa_close(spa, FTAG);
-			return (EINVAL);
-		}
-		if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
-			spa_close(spa, FTAG);
-			return (err);
+	/*
+	 * If any call to snapshot_one_impl() failed, don't execute the
+	 * sync task.  The error handling code below will clean up the
+	 * snaponearg_t from any successful calls to
+	 * snapshot_one_impl().
+	 */
+	if (rv == 0)
+		err = dsl_sync_task_group_wait(saa.saa_dstg);
+	if (err != 0)
+		rv = err;
+
+	for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
+	    dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
+		objset_t *os = dst->dst_arg1;
+		snaponearg_t *soa = dst->dst_arg2;
+		if (dst->dst_err != 0) {
+			if (errors != NULL) {
+				fnvlist_add_int32(errors,
+				    soa->soa_longname, dst->dst_err);
+			}
+			rv = dst->dst_err;
 		}
+
+		if (saa.saa_needsuspend)
+			zil_resume(dmu_objset_zil(os));
+		dmu_objset_rele(os, &saa);
+		kmem_free(soa, sizeof (*soa));
 	}
 
-	sn->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
-	sn->snapname = snapname;
-	sn->htag = tag;
-	sn->props = props;
-	sn->recursive = recursive;
-	sn->needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
-	sn->temporary = temporary;
-	sn->ha = NULL;
-	sn->newds = NULL;
-
-	if (recursive) {
-		err = dmu_objset_find(fsname,
-		    dmu_objset_snapshot_one, sn, DS_FIND_CHILDREN);
-	} else {
-		err = dmu_objset_snapshot_one(fsname, sn);
+	dsl_sync_task_group_destroy(saa.saa_dstg);
+	spa_close(spa, FTAG);
+	return (rv);
+}
+
+int
+dmu_objset_snapshot_one(const char *fsname, const char *snapname)
+{
+	int err;
+	char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
+	nvlist_t *snaps = fnvlist_alloc();
+
+	fnvlist_add_boolean(snaps, longsnap);
+	err = dmu_objset_snapshot(snaps, NULL, NULL);
+	fnvlist_free(snaps);
+	strfree(longsnap);
+	return (err);
+}
+
+int
+dmu_objset_snapshot_tmp(const char *snapname, const char *tag, int cleanup_fd)
+{
+	dsl_sync_task_t *dst;
+	snapallarg_t saa = { 0 };
+	spa_t *spa;
+	minor_t minor;
+	int err;
+
+	err = spa_open(snapname, &spa, FTAG);
+	if (err)
+		return (err);
+	saa.saa_dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
+	saa.saa_htag = tag;
+	saa.saa_needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP);
+	saa.saa_temporary = B_TRUE;
+
+	if (cleanup_fd < 0) {
+		spa_close(spa, FTAG);
+		return (EINVAL);
+	}
+	if ((err = zfs_onexit_fd_hold(cleanup_fd, &minor)) != 0) {
+		spa_close(spa, FTAG);
+		return (err);
 	}
 
+	err = snapshot_one_impl(snapname, &saa);
+
 	if (err == 0)
-		err = dsl_sync_task_group_wait(sn->dstg);
+		err = dsl_sync_task_group_wait(saa.saa_dstg);
 
-	for (dst = list_head(&sn->dstg->dstg_tasks); dst;
-	    dst = list_next(&sn->dstg->dstg_tasks, dst)) {
+	for (dst = list_head(&saa.saa_dstg->dstg_tasks); dst;
+	    dst = list_next(&saa.saa_dstg->dstg_tasks, dst)) {
 		objset_t *os = dst->dst_arg1;
-		dsl_dataset_t *ds = os->os_dsl_dataset;
-		if (dst->dst_err) {
-			dsl_dataset_name(ds, sn->failed);
-		} else if (temporary) {
-			dsl_register_onexit_hold_cleanup(sn->newds, tag, minor);
-		}
-		if (sn->needsuspend)
+		dsl_register_onexit_hold_cleanup(saa.saa_newds, tag, minor);
+		if (saa.saa_needsuspend)
 			zil_resume(dmu_objset_zil(os));
-		dmu_objset_rele(os, sn);
+		dmu_objset_rele(os, &saa);
 	}
 
-	if (err)
-		(void) strcpy(fsname, sn->failed);
-	if (temporary)
-		zfs_onexit_fd_rele(cleanup_fd);
-	dsl_sync_task_group_destroy(sn->dstg);
+	zfs_onexit_fd_rele(cleanup_fd);
+	dsl_sync_task_group_destroy(saa.saa_dstg);
 	spa_close(spa, FTAG);
-	kmem_free(sn, sizeof (struct snaparg));
 	return (err);
 }
 
+
 static void
 dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 
 	while ((dn = list_head(list))) {
 		ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
 		ASSERT(dn->dn_dbuf->db_data_pending);
 		/*
 		 * Initialize dn_zio outside dnode_sync() because the
 		 * meta-dnode needs to set it ouside dnode_sync().
 		 */
 		dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
 		ASSERT(dn->dn_zio);
 
 		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
 		list_remove(list, dn);
 
 		if (newlist) {
 			(void) dnode_add_ref(dn, newlist);
 			list_insert_tail(newlist, dn);
 		}
 
 		dnode_sync(dn, tx);
 	}
 }
 
 /* ARGSUSED */
 static void
 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
 {
 	int i;
 
 	blkptr_t *bp = zio->io_bp;
 	objset_t *os = arg;
 	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
 
 	ASSERT(bp == os->os_rootbp);
 	ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET);
 	ASSERT(BP_GET_LEVEL(bp) == 0);
 
 	/*
 	 * Update rootbp fill count: it should be the number of objects
 	 * allocated in the object set (not counting the "special"
 	 * objects that are stored in the objset_phys_t -- the meta
 	 * dnode and user/group accounting objects).
 	 */
 	bp->blk_fill = 0;
 	for (i = 0; i < dnp->dn_nblkptr; i++)
 		bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
 }
 
 /* ARGSUSED */
 static void
 dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
 {
 	blkptr_t *bp = zio->io_bp;
 	blkptr_t *bp_orig = &zio->io_bp_orig;
 	objset_t *os = arg;
 
 	if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
 		ASSERT(BP_EQUAL(bp, bp_orig));
 	} else {
 		dsl_dataset_t *ds = os->os_dsl_dataset;
 		dmu_tx_t *tx = os->os_synctx;
 
 		(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
 		dsl_dataset_block_born(ds, bp, tx);
 	}
 }
 
 /* called from dsl */
 void
 dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
 {
 	int txgoff;
 	zbookmark_t zb;
 	zio_prop_t zp;
 	zio_t *zio;
 	list_t *list;
 	list_t *newlist = NULL;
 	dbuf_dirty_record_t *dr;
 
 	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	/* XXX the write_done callback should really give us the tx... */
 	os->os_synctx = tx;
 
 	if (os->os_dsl_dataset == NULL) {
 		/*
 		 * This is the MOS.  If we have upgraded,
 		 * spa_max_replication() could change, so reset
 		 * os_copies here.
 		 */
 		os->os_copies = spa_max_replication(os->os_spa);
 	}
 
 	/*
 	 * Create the root block IO
 	 */
 	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
 	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
 	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 	arc_release(os->os_phys_buf, &os->os_phys_buf);
 
 	dmu_write_policy(os, NULL, 0, 0, &zp);
 
 	zio = arc_write(pio, os->os_spa, tx->tx_txg,
 	    os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
 	    DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready,
 	    dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE,
 	    ZIO_FLAG_MUSTSUCCEED, &zb);
 
 	/*
 	 * Sync special dnodes - the parent IO for the sync is the root block
 	 */
 	DMU_META_DNODE(os)->dn_zio = zio;
 	dnode_sync(DMU_META_DNODE(os), tx);
 
 	os->os_phys->os_flags = os->os_flags;
 
 	if (DMU_USERUSED_DNODE(os) &&
 	    DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
 		DMU_USERUSED_DNODE(os)->dn_zio = zio;
 		dnode_sync(DMU_USERUSED_DNODE(os), tx);
 		DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
 		dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
 	}
 
 	txgoff = tx->tx_txg & TXG_MASK;
 
 	if (dmu_objset_userused_enabled(os)) {
 		newlist = &os->os_synced_dnodes;
 		/*
 		 * We must create the list here because it uses the
 		 * dn_dirty_link[] of this txg.
 		 */
 		list_create(newlist, sizeof (dnode_t),
 		    offsetof(dnode_t, dn_dirty_link[txgoff]));
 	}
 
 	dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
 	dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
 
 	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
-	while ((dr = list_head(list)) != NULL) {
+	while ((dr = list_head(list))) {
 		ASSERT(dr->dr_dbuf->db_level == 0);
 		list_remove(list, dr);
 		if (dr->dr_zio)
 			zio_nowait(dr->dr_zio);
 	}
 	/*
 	 * Free intent log blocks up to this tx.
 	 */
 	zil_sync(os->os_zil, tx);
 	os->os_phys->os_zil_header = os->os_zil_header;
 	zio_nowait(zio);
 }
 
 boolean_t
 dmu_objset_is_dirty(objset_t *os, uint64_t txg)
 {
 	return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
 	    !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
 }
 
 static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
 
 void
 dmu_objset_register_type(dmu_objset_type_t ost, objset_used_cb_t *cb)
 {
 	used_cbs[ost] = cb;
 }
 
 boolean_t
 dmu_objset_userused_enabled(objset_t *os)
 {
 	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
 	    used_cbs[os->os_phys->os_type] != NULL &&
 	    DMU_USERUSED_DNODE(os) != NULL);
 }
 
 static void
 do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
     uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
 {
 	if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
 		int64_t delta = DNODE_SIZE + used;
 		if (subtract)
 			delta = -delta;
 		VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
 		    user, delta, tx));
 		VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
 		    group, delta, tx));
 	}
 }
 
 void
 dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
 {
 	dnode_t *dn;
 	list_t *list = &os->os_synced_dnodes;
 
 	ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
 
-	while ((dn = list_head(list)) != NULL) {
+	while ((dn = list_head(list))) {
 		int flags;
 		ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
 		ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
 		    dn->dn_phys->dn_flags &
 		    DNODE_FLAG_USERUSED_ACCOUNTED);
 
 		/* Allocate the user/groupused objects if necessary. */
 		if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
 			VERIFY(0 == zap_create_claim(os,
 			    DMU_USERUSED_OBJECT,
 			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
 			VERIFY(0 == zap_create_claim(os,
 			    DMU_GROUPUSED_OBJECT,
 			    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
 		}
 
 		/*
 		 * We intentionally modify the zap object even if the
 		 * net delta is zero.  Otherwise
 		 * the block of the zap obj could be shared between
 		 * datasets but need to be different between them after
 		 * a bprewrite.
 		 */
 
 		flags = dn->dn_id_flags;
 		ASSERT(flags);
 		if (flags & DN_ID_OLD_EXIST)  {
 			do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
 			    dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
 		}
 		if (flags & DN_ID_NEW_EXIST) {
 			do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
 			    dn->dn_phys->dn_flags,  dn->dn_newuid,
 			    dn->dn_newgid, B_FALSE, tx);
 		}
 
 		mutex_enter(&dn->dn_mtx);
 		dn->dn_oldused = 0;
 		dn->dn_oldflags = 0;
 		if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
 			dn->dn_olduid = dn->dn_newuid;
 			dn->dn_oldgid = dn->dn_newgid;
 			dn->dn_id_flags |= DN_ID_OLD_EXIST;
 			if (dn->dn_bonuslen == 0)
 				dn->dn_id_flags |= DN_ID_CHKED_SPILL;
 			else
 				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
 		}
 		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
 		mutex_exit(&dn->dn_mtx);
 
 		list_remove(list, dn);
 		dnode_rele(dn, list);
 	}
 }
 
 /*
  * Returns a pointer to data to find uid/gid from
  *
  * If a dirty record for transaction group that is syncing can't
  * be found then NULL is returned.  In the NULL case it is assumed
  * the uid/gid aren't changing.
  */
 static void *
 dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
 {
 	dbuf_dirty_record_t *dr, **drp;
 	void *data;
 
 	if (db->db_dirtycnt == 0)
 		return (db->db.db_data);  /* Nothing is changing */
 
 	for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
 		if (dr->dr_txg == tx->tx_txg)
 			break;
 
 	if (dr == NULL) {
 		data = NULL;
 	} else {
 		dnode_t *dn;
 
 		DB_DNODE_ENTER(dr->dr_dbuf);
 		dn = DB_DNODE(dr->dr_dbuf);
 
 		if (dn->dn_bonuslen == 0 &&
 		    dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
 			data = dr->dt.dl.dr_data->b_data;
 		else
 			data = dr->dt.dl.dr_data;
 
 		DB_DNODE_EXIT(dr->dr_dbuf);
 	}
 
 	return (data);
 }
 
 void
 dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
 {
 	objset_t *os = dn->dn_objset;
 	void *data = NULL;
 	dmu_buf_impl_t *db = NULL;
 	uint64_t *user = NULL, *group = NULL;
 	int flags = dn->dn_id_flags;
 	int error;
 	boolean_t have_spill = B_FALSE;
 
 	if (!dmu_objset_userused_enabled(dn->dn_objset))
 		return;
 
 	if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
 	    DN_ID_CHKED_SPILL)))
 		return;
 
 	if (before && dn->dn_bonuslen != 0)
 		data = DN_BONUS(dn->dn_phys);
 	else if (!before && dn->dn_bonuslen != 0) {
 		if (dn->dn_bonus) {
 			db = dn->dn_bonus;
 			mutex_enter(&db->db_mtx);
 			data = dmu_objset_userquota_find_data(db, tx);
 		} else {
 			data = DN_BONUS(dn->dn_phys);
 		}
 	} else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
 			int rf = 0;
 
 			if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
 				rf |= DB_RF_HAVESTRUCT;
 			error = dmu_spill_hold_by_dnode(dn,
 			    rf | DB_RF_MUST_SUCCEED,
 			    FTAG, (dmu_buf_t **)&db);
 			ASSERT(error == 0);
 			mutex_enter(&db->db_mtx);
 			data = (before) ? db->db.db_data :
 			    dmu_objset_userquota_find_data(db, tx);
 			have_spill = B_TRUE;
 	} else {
 		mutex_enter(&dn->dn_mtx);
 		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
 		mutex_exit(&dn->dn_mtx);
 		return;
 	}
 
 	if (before) {
 		ASSERT(data);
 		user = &dn->dn_olduid;
 		group = &dn->dn_oldgid;
 	} else if (data) {
 		user = &dn->dn_newuid;
 		group = &dn->dn_newgid;
 	}
 
 	/*
 	 * Must always call the callback in case the object
 	 * type has changed and that type isn't an object type to track
 	 */
 	error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
 	    user, group);
 
 	/*
 	 * Preserve existing uid/gid when the callback can't determine
 	 * what the new uid/gid are and the callback returned EEXIST.
 	 * The EEXIST error tells us to just use the existing uid/gid.
 	 * If we don't know what the old values are then just assign
 	 * them to 0, since that is a new file  being created.
 	 */
 	if (!before && data == NULL && error == EEXIST) {
 		if (flags & DN_ID_OLD_EXIST) {
 			dn->dn_newuid = dn->dn_olduid;
 			dn->dn_newgid = dn->dn_oldgid;
 		} else {
 			dn->dn_newuid = 0;
 			dn->dn_newgid = 0;
 		}
 		error = 0;
 	}
 
 	if (db)
 		mutex_exit(&db->db_mtx);
 
 	mutex_enter(&dn->dn_mtx);
 	if (error == 0 && before)
 		dn->dn_id_flags |= DN_ID_OLD_EXIST;
 	if (error == 0 && !before)
 		dn->dn_id_flags |= DN_ID_NEW_EXIST;
 
 	if (have_spill) {
 		dn->dn_id_flags |= DN_ID_CHKED_SPILL;
 	} else {
 		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
 	}
 	mutex_exit(&dn->dn_mtx);
 	if (have_spill)
 		dmu_buf_rele((dmu_buf_t *)db, FTAG);
 }
 
 boolean_t
 dmu_objset_userspace_present(objset_t *os)
 {
 	return (os->os_phys->os_flags &
 	    OBJSET_FLAG_USERACCOUNTING_COMPLETE);
 }
 
 int
 dmu_objset_userspace_upgrade(objset_t *os)
 {
 	uint64_t obj;
 	int err = 0;
 
 	if (dmu_objset_userspace_present(os))
 		return (0);
 	if (!dmu_objset_userused_enabled(os))
 		return (ENOTSUP);
 	if (dmu_objset_is_snapshot(os))
 		return (EINVAL);
 
 	/*
 	 * We simply need to mark every object dirty, so that it will be
 	 * synced out and now accounted.  If this is called
 	 * concurrently, or if we already did some work before crashing,
 	 * that's fine, since we track each object's accounted state
 	 * independently.
 	 */
 
 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
 		dmu_tx_t *tx;
 		dmu_buf_t *db;
 		int objerr;
 
 		if (issig(JUSTLOOKING) && issig(FORREAL))
 			return (EINTR);
 
 		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
 		if (objerr)
 			continue;
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_bonus(tx, obj);
 		objerr = dmu_tx_assign(tx, TXG_WAIT);
 		if (objerr) {
 			dmu_tx_abort(tx);
 			continue;
 		}
 		dmu_buf_will_dirty(db, tx);
 		dmu_buf_rele(db, FTAG);
 		dmu_tx_commit(tx);
 	}
 
 	os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
 	txg_wait_synced(dmu_objset_pool(os), 0);
 	return (0);
 }
 
 void
 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp)
 {
 	dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
 	    usedobjsp, availobjsp);
 }
 
 uint64_t
 dmu_objset_fsid_guid(objset_t *os)
 {
 	return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
 }
 
 void
 dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
 {
 	stat->dds_type = os->os_phys->os_type;
 	if (os->os_dsl_dataset)
 		dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
 }
 
 void
 dmu_objset_stats(objset_t *os, nvlist_t *nv)
 {
 	ASSERT(os->os_dsl_dataset ||
 	    os->os_phys->os_type == DMU_OST_META);
 
 	if (os->os_dsl_dataset != NULL)
 		dsl_dataset_stats(os->os_dsl_dataset, nv);
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
 	    os->os_phys->os_type);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
 	    dmu_objset_userspace_present(os));
 }
 
 int
 dmu_objset_is_snapshot(objset_t *os)
 {
 	if (os->os_dsl_dataset != NULL)
 		return (dsl_dataset_is_snapshot(os->os_dsl_dataset));
 	else
 		return (B_FALSE);
 }
 
 int
 dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
     boolean_t *conflict)
 {
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 	uint64_t ignored;
 
 	if (ds->ds_phys->ds_snapnames_zapobj == 0)
 		return (ENOENT);
 
 	return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
 	    ds->ds_phys->ds_snapnames_zapobj, name, 8, 1, &ignored, MT_FIRST,
 	    real, maxlen, conflict));
 }
 
 int
 dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
 {
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 	zap_cursor_t cursor;
 	zap_attribute_t attr;
 
 	if (ds->ds_phys->ds_snapnames_zapobj == 0)
 		return (ENOENT);
 
 	zap_cursor_init_serialized(&cursor,
 	    ds->ds_dir->dd_pool->dp_meta_objset,
 	    ds->ds_phys->ds_snapnames_zapobj, *offp);
 
 	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
 		zap_cursor_fini(&cursor);
 		return (ENOENT);
 	}
 
 	if (strlen(attr.za_name) + 1 > namelen) {
 		zap_cursor_fini(&cursor);
 		return (ENAMETOOLONG);
 	}
 
 	(void) strcpy(name, attr.za_name);
 	if (idp)
 		*idp = attr.za_first_integer;
 	if (case_conflict)
 		*case_conflict = attr.za_normalization_conflict;
 	zap_cursor_advance(&cursor);
 	*offp = zap_cursor_serialize(&cursor);
 	zap_cursor_fini(&cursor);
 
 	return (0);
 }
 
 int
 dmu_snapshot_lookup(objset_t *os, const char *name, uint64_t *value)
 {
 	return dsl_dataset_snap_lookup(os->os_dsl_dataset, name, value);
 }
 
 int
 dmu_dir_list_next(objset_t *os, int namelen, char *name,
     uint64_t *idp, uint64_t *offp)
 {
 	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
 	zap_cursor_t cursor;
 	zap_attribute_t attr;
 
 	/* there is no next dir on a snapshot! */
 	if (os->os_dsl_dataset->ds_object !=
 	    dd->dd_phys->dd_head_dataset_obj)
 		return (ENOENT);
 
 	zap_cursor_init_serialized(&cursor,
 	    dd->dd_pool->dp_meta_objset,
 	    dd->dd_phys->dd_child_dir_zapobj, *offp);
 
 	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
 		zap_cursor_fini(&cursor);
 		return (ENOENT);
 	}
 
 	if (strlen(attr.za_name) + 1 > namelen) {
 		zap_cursor_fini(&cursor);
 		return (ENAMETOOLONG);
 	}
 
 	(void) strcpy(name, attr.za_name);
 	if (idp)
 		*idp = attr.za_first_integer;
 	zap_cursor_advance(&cursor);
 	*offp = zap_cursor_serialize(&cursor);
 	zap_cursor_fini(&cursor);
 
 	return (0);
 }
 
 struct findarg {
 	int (*func)(const char *, void *);
 	void *arg;
 };
 
 /* ARGSUSED */
 static int
 findfunc(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
 {
 	struct findarg *fa = arg;
 	return (fa->func(dsname, fa->arg));
 }
 
 /*
  * Find all objsets under name, and for each, call 'func(child_name, arg)'.
  * Perhaps change all callers to use dmu_objset_find_spa()?
  */
 int
 dmu_objset_find(char *name, int func(const char *, void *), void *arg,
     int flags)
 {
 	struct findarg fa;
 	fa.func = func;
 	fa.arg = arg;
 	return (dmu_objset_find_spa(NULL, name, findfunc, &fa, flags));
 }
 
 /*
  * Find all objsets under name, call func on each
  */
 int
 dmu_objset_find_spa(spa_t *spa, const char *name,
     int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags)
 {
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	zap_cursor_t zc;
 	zap_attribute_t *attr;
 	char *child;
 	uint64_t thisobj;
 	int err;
 
 	if (name == NULL)
 		name = spa_name(spa);
 	err = dsl_dir_open_spa(spa, name, FTAG, &dd, NULL);
 	if (err)
 		return (err);
 
 	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
 	if (dd->dd_myname[0] == '$') {
 		dsl_dir_close(dd, FTAG);
 		return (0);
 	}
 
 	thisobj = dd->dd_phys->dd_head_dataset_obj;
 	attr = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE);
 	dp = dd->dd_pool;
 
 	/*
 	 * Iterate over all children.
 	 */
 	if (flags & DS_FIND_CHILDREN) {
 		for (zap_cursor_init(&zc, dp->dp_meta_objset,
 		    dd->dd_phys->dd_child_dir_zapobj);
 		    zap_cursor_retrieve(&zc, attr) == 0;
 		    (void) zap_cursor_advance(&zc)) {
 			ASSERT(attr->za_integer_length == sizeof (uint64_t));
 			ASSERT(attr->za_num_integers == 1);
 
 			child = kmem_asprintf("%s/%s", name, attr->za_name);
 			err = dmu_objset_find_spa(spa, child, func, arg, flags);
 			strfree(child);
 			if (err)
 				break;
 		}
 		zap_cursor_fini(&zc);
 
 		if (err) {
 			dsl_dir_close(dd, FTAG);
 			kmem_free(attr, sizeof (zap_attribute_t));
 			return (err);
 		}
 	}
 
 	/*
 	 * Iterate over all snapshots.
 	 */
 	if (flags & DS_FIND_SNAPSHOTS) {
 		if (!dsl_pool_sync_context(dp))
 			rw_enter(&dp->dp_config_rwlock, RW_READER);
 		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
 		if (!dsl_pool_sync_context(dp))
 			rw_exit(&dp->dp_config_rwlock);
 
 		if (err == 0) {
 			uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 			dsl_dataset_rele(ds, FTAG);
 
 			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
 			    zap_cursor_retrieve(&zc, attr) == 0;
 			    (void) zap_cursor_advance(&zc)) {
 				ASSERT(attr->za_integer_length ==
 				    sizeof (uint64_t));
 				ASSERT(attr->za_num_integers == 1);
 
 				child = kmem_asprintf("%s@%s",
 				    name, attr->za_name);
 				err = func(spa, attr->za_first_integer,
 				    child, arg);
 				strfree(child);
 				if (err)
 					break;
 			}
 			zap_cursor_fini(&zc);
 		}
 	}
 
 	dsl_dir_close(dd, FTAG);
 	kmem_free(attr, sizeof (zap_attribute_t));
 
 	if (err)
 		return (err);
 
 	/*
 	 * Apply to self if appropriate.
 	 */
 	err = func(spa, thisobj, name, arg);
 	return (err);
 }
 
 /* ARGSUSED */
 int
 dmu_objset_prefetch(const char *name, void *arg)
 {
 	dsl_dataset_t *ds;
 
 	if (dsl_dataset_hold(name, FTAG, &ds))
 		return (0);
 
 	if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
 		mutex_enter(&ds->ds_opening_lock);
 		if (ds->ds_objset == NULL) {
 			uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
 			zbookmark_t zb;
 
 			SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
 			    ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 
 			(void) arc_read(NULL, dsl_dataset_get_spa(ds),
 			    &ds->ds_phys->ds_bp, NULL, NULL,
 			    ZIO_PRIORITY_ASYNC_READ,
 			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
 			    &aflags, &zb);
 		}
 		mutex_exit(&ds->ds_opening_lock);
 	}
 
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 void
 dmu_objset_set_user(objset_t *os, void *user_ptr)
 {
 	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
 	os->os_user_ptr = user_ptr;
 }
 
 void *
 dmu_objset_get_user(objset_t *os)
 {
 	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
 	return (os->os_user_ptr);
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dmu_objset_zil);
 EXPORT_SYMBOL(dmu_objset_pool);
 EXPORT_SYMBOL(dmu_objset_ds);
 EXPORT_SYMBOL(dmu_objset_type);
 EXPORT_SYMBOL(dmu_objset_name);
 EXPORT_SYMBOL(dmu_objset_hold);
 EXPORT_SYMBOL(dmu_objset_own);
 EXPORT_SYMBOL(dmu_objset_rele);
 EXPORT_SYMBOL(dmu_objset_disown);
 EXPORT_SYMBOL(dmu_objset_from_ds);
 EXPORT_SYMBOL(dmu_objset_create);
 EXPORT_SYMBOL(dmu_objset_clone);
 EXPORT_SYMBOL(dmu_objset_destroy);
 EXPORT_SYMBOL(dmu_objset_snapshot);
 EXPORT_SYMBOL(dmu_objset_stats);
 EXPORT_SYMBOL(dmu_objset_fast_stat);
 EXPORT_SYMBOL(dmu_objset_spa);
 EXPORT_SYMBOL(dmu_objset_space);
 EXPORT_SYMBOL(dmu_objset_fsid_guid);
 EXPORT_SYMBOL(dmu_objset_find);
 EXPORT_SYMBOL(dmu_objset_find_spa);
 EXPORT_SYMBOL(dmu_objset_prefetch);
 EXPORT_SYMBOL(dmu_objset_byteswap);
 EXPORT_SYMBOL(dmu_objset_evict_dbufs);
 EXPORT_SYMBOL(dmu_objset_snap_cmtime);
 
 EXPORT_SYMBOL(dmu_objset_sync);
 EXPORT_SYMBOL(dmu_objset_is_dirty);
 EXPORT_SYMBOL(dmu_objset_create_impl);
 EXPORT_SYMBOL(dmu_objset_open_impl);
 EXPORT_SYMBOL(dmu_objset_evict);
 EXPORT_SYMBOL(dmu_objset_register_type);
 EXPORT_SYMBOL(dmu_objset_do_userquota_updates);
 EXPORT_SYMBOL(dmu_objset_userquota_get_ids);
 EXPORT_SYMBOL(dmu_objset_userused_enabled);
 EXPORT_SYMBOL(dmu_objset_userspace_upgrade);
 EXPORT_SYMBOL(dmu_objset_userspace_present);
 #endif
diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
index 54e75971d117..6552e1d9d72d 100644
--- a/module/zfs/dmu_send.c
+++ b/module/zfs/dmu_send.c
@@ -1,1713 +1,1715 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2011 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/dbuf.h>
 #include <sys/dnode.h>
 #include <sys/zfs_context.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_synctask.h>
 #include <sys/spa_impl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zap.h>
 #include <sys/zio_checksum.h>
 #include <sys/zfs_znode.h>
 #include <zfs_fletcher.h>
 #include <sys/avl.h>
 #include <sys/ddt.h>
 #include <sys/zfs_onexit.h>
 
 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
 int zfs_send_corrupt_data = B_FALSE;
 
 static char *dmu_recv_tag = "dmu_recv_tag";
 
 typedef struct dump_bytes_io {
 	dmu_sendarg_t	*dbi_dsp;
 	void		*dbi_buf;
 	int		dbi_len;
 } dump_bytes_io_t;
 
 static void
 dump_bytes_strategy(void *arg)
 {
 	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
 	dmu_sendarg_t *dsp = dbi->dbi_dsp;
 	dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
 	ssize_t resid; /* have to get resid to get detailed errno */
 	ASSERT0(dbi->dbi_len % 8);
 
 	fletcher_4_incremental_native(dbi->dbi_buf, dbi->dbi_len, &dsp->dsa_zc);
 	dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
 	    (caddr_t)dbi->dbi_buf, dbi->dbi_len,
 	    0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
 
 	mutex_enter(&ds->ds_sendstream_lock);
 	*dsp->dsa_off += dbi->dbi_len;
 	mutex_exit(&ds->ds_sendstream_lock);
 }
 
 static int
 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
 {
 	dump_bytes_io_t dbi;
 
 	dbi.dbi_dsp = dsp;
 	dbi.dbi_buf = buf;
 	dbi.dbi_len = len;
 
 	/*
 	 * The vn_rdwr() call is performed in a taskq to ensure that there is
 	 * always enough stack space to write safely to the target filesystem.
 	 * The ZIO_TYPE_FREE threads are used because there can be a lot of
 	 * them and they are used in vdev_file.c for a similar purpose.
 	 */
 	spa_taskq_dispatch_sync(dmu_objset_spa(dsp->dsa_os), ZIO_TYPE_FREE,
 	    ZIO_TASKQ_ISSUE, dump_bytes_strategy, &dbi, TQ_SLEEP);
 
 	return (dsp->dsa_err);
 }
 
 static int
 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
     uint64_t length)
 {
 	struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
 
 	if (length != -1ULL && offset + length < offset)
 		length = -1ULL;
 
 	/*
 	 * If there is a pending op, but it's not PENDING_FREE, push it out,
 	 * since free block aggregation can only be done for blocks of the
 	 * same type (i.e., DRR_FREE records can only be aggregated with
 	 * other DRR_FREE records.  DRR_FREEOBJECTS records can only be
 	 * aggregated with other DRR_FREEOBJECTS records.
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE &&
 	    dsp->dsa_pending_op != PENDING_FREE) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
 	if (dsp->dsa_pending_op == PENDING_FREE) {
 		/*
 		 * There should never be a PENDING_FREE if length is -1
 		 * (because dump_dnode is the only place where this
 		 * function is called with a -1, and only after flushing
 		 * any pending record).
 		 */
 		ASSERT(length != -1ULL);
 		/*
 		 * Check to see whether this free block can be aggregated
 		 * with pending one.
 		 */
 		if (drrf->drr_object == object && drrf->drr_offset +
 		    drrf->drr_length == offset) {
 			drrf->drr_length += length;
 			return (0);
 		} else {
 			/* not a continuation.  Push out pending record */
 			if (dump_bytes(dsp, dsp->dsa_drr,
 			    sizeof (dmu_replay_record_t)) != 0)
 				return (EINTR);
 			dsp->dsa_pending_op = PENDING_NONE;
 		}
 	}
 	/* create a FREE record and make it pending */
 	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 	dsp->dsa_drr->drr_type = DRR_FREE;
 	drrf->drr_object = object;
 	drrf->drr_offset = offset;
 	drrf->drr_length = length;
 	drrf->drr_toguid = dsp->dsa_toguid;
 	if (length == -1ULL) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
 	} else {
 		dsp->dsa_pending_op = PENDING_FREE;
 	}
 
 	return (0);
 }
 
 static int
 dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
     uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
 {
 	struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
 
 
 	/*
 	 * If there is any kind of pending aggregation (currently either
 	 * a grouping of free objects or free blocks), push it out to
 	 * the stream, since aggregation can't be done across operations
 	 * of different types.
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 	/* write a DATA record */
 	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 	dsp->dsa_drr->drr_type = DRR_WRITE;
 	drrw->drr_object = object;
 	drrw->drr_type = type;
 	drrw->drr_offset = offset;
 	drrw->drr_length = blksz;
 	drrw->drr_toguid = dsp->dsa_toguid;
 	drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
 	if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
 		drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
 	DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
 	DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
 	DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
 	drrw->drr_key.ddk_cksum = bp->blk_cksum;
 
 	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
 		return (EINTR);
 	if (dump_bytes(dsp, data, blksz) != 0)
 		return (EINTR);
 	return (0);
 }
 
 static int
 dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
 {
 	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
 	/* write a SPILL record */
 	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 	dsp->dsa_drr->drr_type = DRR_SPILL;
 	drrs->drr_object = object;
 	drrs->drr_length = blksz;
 	drrs->drr_toguid = dsp->dsa_toguid;
 
 	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
 		return (EINTR);
 	if (dump_bytes(dsp, data, blksz))
 		return (EINTR);
 	return (0);
 }
 
 static int
 dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
 {
 	struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
 
 	/*
 	 * If there is a pending op, but it's not PENDING_FREEOBJECTS,
 	 * push it out, since free block aggregation can only be done for
 	 * blocks of the same type (i.e., DRR_FREE records can only be
 	 * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
 	 * can only be aggregated with other DRR_FREEOBJECTS records.
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE &&
 	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 	if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
 		/*
 		 * See whether this free object array can be aggregated
 		 * with pending one
 		 */
 		if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
 			drrfo->drr_numobjs += numobjs;
 			return (0);
 		} else {
 			/* can't be aggregated.  Push out pending record */
 			if (dump_bytes(dsp, dsp->dsa_drr,
 			    sizeof (dmu_replay_record_t)) != 0)
 				return (EINTR);
 			dsp->dsa_pending_op = PENDING_NONE;
 		}
 	}
 
 	/* write a FREEOBJECTS record */
 	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 	dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
 	drrfo->drr_firstobj = firstobj;
 	drrfo->drr_numobjs = numobjs;
 	drrfo->drr_toguid = dsp->dsa_toguid;
 
 	dsp->dsa_pending_op = PENDING_FREEOBJECTS;
 
 	return (0);
 }
 
 static int
 dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
 {
 	struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
 
 	if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
 		return (dump_freeobjects(dsp, object, 1));
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
 		if (dump_bytes(dsp, dsp->dsa_drr,
 		    sizeof (dmu_replay_record_t)) != 0)
 			return (EINTR);
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
 
 	/* write an OBJECT record */
 	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 	dsp->dsa_drr->drr_type = DRR_OBJECT;
 	drro->drr_object = object;
 	drro->drr_type = dnp->dn_type;
 	drro->drr_bonustype = dnp->dn_bonustype;
 	drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 	drro->drr_bonuslen = dnp->dn_bonuslen;
 	drro->drr_checksumtype = dnp->dn_checksum;
 	drro->drr_compress = dnp->dn_compress;
 	drro->drr_toguid = dsp->dsa_toguid;
 
 	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
 		return (EINTR);
 
 	if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
 		return (EINTR);
 
 	/* free anything past the end of the file */
 	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
 	    (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
 		return (EINTR);
 	if (dsp->dsa_err)
 		return (EINTR);
 	return (0);
 }
 
 #define	BP_SPAN(dnp, level) \
 	(((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
 	(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
 
 /* ARGSUSED */
 static int
 backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	dmu_sendarg_t *dsp = arg;
 	dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
 	int err = 0;
 
 	if (issig(JUSTLOOKING) && issig(FORREAL))
 		return (EINTR);
 
 	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
 	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
 		return (0);
 	} else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
 		uint64_t span = BP_SPAN(dnp, zb->zb_level);
 		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
 		err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
 	} else if (bp == NULL) {
 		uint64_t span = BP_SPAN(dnp, zb->zb_level);
 		err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
 	} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
 		return (0);
 	} else if (type == DMU_OT_DNODE) {
 		dnode_phys_t *blk;
 		int i;
 		int blksz = BP_GET_LSIZE(bp);
 		uint32_t aflags = ARC_WAIT;
 		arc_buf_t *abuf;
 
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 		    &aflags, zb) != 0)
 			return (EIO);
 
 		blk = abuf->b_data;
 		for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
 			uint64_t dnobj = (zb->zb_blkid <<
 			    (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
 			err = dump_dnode(dsp, dnobj, blk+i);
 			if (err)
 				break;
 		}
 		(void) arc_buf_remove_ref(abuf, &abuf);
 	} else if (type == DMU_OT_SA) {
 		uint32_t aflags = ARC_WAIT;
 		arc_buf_t *abuf;
 		int blksz = BP_GET_LSIZE(bp);
 
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 		    &aflags, zb) != 0)
 			return (EIO);
 
 		err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
 		(void) arc_buf_remove_ref(abuf, &abuf);
 	} else { /* it's a level-0 block of a regular object */
 		uint32_t aflags = ARC_WAIT;
 		arc_buf_t *abuf;
 		int blksz = BP_GET_LSIZE(bp);
 
 		if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 		    &aflags, zb) != 0) {
 			if (zfs_send_corrupt_data) {
 				uint64_t *ptr;
 				/* Send a block filled with 0x"zfs badd bloc" */
 				abuf = arc_buf_alloc(spa, blksz, &abuf,
 				    ARC_BUFC_DATA);
 				for (ptr = abuf->b_data;
 				    (char *)ptr < (char *)abuf->b_data + blksz;
 				    ptr++)
 					*ptr = 0x2f5baddb10cULL;
 			} else {
 				return (EIO);
 			}
 		}
 
 		err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
 		    blksz, bp, abuf->b_data);
 		(void) arc_buf_remove_ref(abuf, &abuf);
 	}
 
 	ASSERT(err == 0 || err == EINTR);
 	return (err);
 }
 
+/*
+ * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
+ * For example, they could both be snapshots of the same filesystem, and
+ * 'earlier' is before 'later'.  Or 'earlier' could be the origin of
+ * 'later's filesystem.  Or 'earlier' could be an older snapshot in the origin's
+ * filesystem.  Or 'earlier' could be the origin's origin.
+ */
+static boolean_t
+is_before(dsl_dataset_t *later, dsl_dataset_t *earlier)
+{
+	dsl_pool_t *dp = later->ds_dir->dd_pool;
+	int error;
+	boolean_t ret;
+	dsl_dataset_t *origin;
+
+	if (earlier->ds_phys->ds_creation_txg >=
+	    later->ds_phys->ds_creation_txg)
+		return (B_FALSE);
+
+	if (later->ds_dir == earlier->ds_dir)
+		return (B_TRUE);
+	if (!dsl_dir_is_clone(later->ds_dir))
+		return (B_FALSE);
+
+	rw_enter(&dp->dp_config_rwlock, RW_READER);
+	if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) {
+		rw_exit(&dp->dp_config_rwlock);
+		return (B_TRUE);
+	}
+	error = dsl_dataset_hold_obj(dp,
+	    later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin);
+	rw_exit(&dp->dp_config_rwlock);
+	if (error != 0)
+		return (B_FALSE);
+	ret = is_before(origin, earlier);
+	dsl_dataset_rele(origin, FTAG);
+	return (ret);
+}
+
 int
-dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
-    int outfd, vnode_t *vp, offset_t *off)
+dmu_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
+    offset_t *off)
 {
 	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
 	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
 	dmu_replay_record_t *drr;
 	dmu_sendarg_t *dsp;
 	int err;
 	uint64_t fromtxg = 0;
 
 	/* tosnap must be a snapshot */
 	if (ds->ds_phys->ds_next_snap_obj == 0)
 		return (EINVAL);
 
-	/* fromsnap must be an earlier snapshot from the same fs as tosnap */
-	if (fromds && (ds->ds_dir != fromds->ds_dir ||
-	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
+	/*
+	 * fromsnap must be an earlier snapshot from the same fs as tosnap,
+	 * or the origin's fs.
+	 */
+	if (fromds != NULL && !is_before(ds, fromds))
 		return (EXDEV);
 
-	if (fromorigin) {
-		dsl_pool_t *dp = ds->ds_dir->dd_pool;
-
-		if (fromsnap)
-			return (EINVAL);
-
-		if (dsl_dir_is_clone(ds->ds_dir)) {
-			rw_enter(&dp->dp_config_rwlock, RW_READER);
-			err = dsl_dataset_hold_obj(dp,
-			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
-			rw_exit(&dp->dp_config_rwlock);
-			if (err)
-				return (err);
-		} else {
-			fromorigin = B_FALSE;
-		}
-	}
-
-
 	drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 	drr->drr_type = DRR_BEGIN;
 	drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
 	DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
 	    DMU_SUBSTREAM);
 
 #ifdef _KERNEL
 	if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
 		uint64_t version;
 		if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
 			kmem_free(drr, sizeof (dmu_replay_record_t));
 			return (EINVAL);
 		}
 		if (version == ZPL_VERSION_SA) {
 			DMU_SET_FEATUREFLAGS(
 			    drr->drr_u.drr_begin.drr_versioninfo,
 			    DMU_BACKUP_FEATURE_SA_SPILL);
 		}
 	}
 #endif
 
 	drr->drr_u.drr_begin.drr_creation_time =
 	    ds->ds_phys->ds_creation_time;
 	drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
-	if (fromorigin)
+	if (fromds != NULL && ds->ds_dir != fromds->ds_dir)
 		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
 	drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 		drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
 
 	if (fromds)
 		drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
 	dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
 
 	if (fromds)
 		fromtxg = fromds->ds_phys->ds_creation_txg;
-	if (fromorigin)
-		dsl_dataset_rele(fromds, FTAG);
 
 	dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
 
 	dsp->dsa_drr = drr;
 	dsp->dsa_vp = vp;
 	dsp->dsa_outfd = outfd;
 	dsp->dsa_proc = curproc;
 	dsp->dsa_os = tosnap;
 	dsp->dsa_off = off;
 	dsp->dsa_toguid = ds->ds_phys->ds_guid;
 	ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
 	dsp->dsa_pending_op = PENDING_NONE;
 
 	mutex_enter(&ds->ds_sendstream_lock);
 	list_insert_head(&ds->ds_sendstreams, dsp);
 	mutex_exit(&ds->ds_sendstream_lock);
 
 	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 		err = dsp->dsa_err;
 		goto out;
 	}
 
 	err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
 	    backup_cb, dsp);
 
 	if (dsp->dsa_pending_op != PENDING_NONE)
 		if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
 			err = EINTR;
 
 	if (err) {
 		if (err == EINTR && dsp->dsa_err)
 			err = dsp->dsa_err;
 		goto out;
 	}
 
 	bzero(drr, sizeof (dmu_replay_record_t));
 	drr->drr_type = DRR_END;
 	drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
 	drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 
 	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 		err = dsp->dsa_err;
 		goto out;
 	}
 
 out:
 	mutex_enter(&ds->ds_sendstream_lock);
 	list_remove(&ds->ds_sendstreams, dsp);
 	mutex_exit(&ds->ds_sendstream_lock);
 
 	kmem_free(drr, sizeof (dmu_replay_record_t));
 	kmem_free(dsp, sizeof (dmu_sendarg_t));
 
 	return (err);
 }
 
 int
-dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
-    uint64_t *sizep)
+dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep)
 {
 	dsl_dataset_t *ds = tosnap->os_dsl_dataset;
 	dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	int err;
 	uint64_t size, recordsize;
 
 	/* tosnap must be a snapshot */
 	if (ds->ds_phys->ds_next_snap_obj == 0)
 		return (EINVAL);
 
-	/* fromsnap must be an earlier snapshot from the same fs as tosnap */
-	if (fromds && (ds->ds_dir != fromds->ds_dir ||
-	    fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
+	/*
+	 * fromsnap must be an earlier snapshot from the same fs as tosnap,
+	 * or the origin's fs.
+	 */
+	if (fromds != NULL && !is_before(ds, fromds))
 		return (EXDEV);
 
-	if (fromorigin) {
-		if (fromsnap)
-			return (EINVAL);
-
-		if (dsl_dir_is_clone(ds->ds_dir)) {
-			rw_enter(&dp->dp_config_rwlock, RW_READER);
-			err = dsl_dataset_hold_obj(dp,
-			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
-			rw_exit(&dp->dp_config_rwlock);
-			if (err)
-				return (err);
-		} else {
-			fromorigin = B_FALSE;
-		}
-	}
-
 	/* Get uncompressed size estimate of changed data. */
 	if (fromds == NULL) {
 		size = ds->ds_phys->ds_uncompressed_bytes;
 	} else {
 		uint64_t used, comp;
 		err = dsl_dataset_space_written(fromds, ds,
 		    &used, &comp, &size);
-		if (fromorigin)
-			dsl_dataset_rele(fromds, FTAG);
 		if (err)
 			return (err);
 	}
 
 	/*
 	 * Assume that space (both on-disk and in-stream) is dominated by
 	 * data.  We will adjust for indirect blocks and the copies property,
 	 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
 	 */
 
 	/*
 	 * Subtract out approximate space used by indirect blocks.
 	 * Assume most space is used by data blocks (non-indirect, non-dnode).
 	 * Assume all blocks are recordsize.  Assume ditto blocks and
 	 * internal fragmentation counter out compression.
 	 *
 	 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
 	 * block, which we observe in practice.
 	 */
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	err = dsl_prop_get_ds(ds, "recordsize",
 	    sizeof (recordsize), 1, &recordsize, NULL);
 	rw_exit(&dp->dp_config_rwlock);
 	if (err)
 		return (err);
 	size -= size / recordsize * sizeof (blkptr_t);
 
 	/* Add in the space for the record associated with each block. */
 	size += size / recordsize * sizeof (dmu_replay_record_t);
 
 	*sizep = size;
 
 	return (0);
 }
 
 struct recvbeginsyncarg {
 	const char *tofs;
 	const char *tosnap;
 	dsl_dataset_t *origin;
 	uint64_t fromguid;
 	dmu_objset_type_t type;
 	void *tag;
 	boolean_t force;
 	uint64_t dsflags;
 	char clonelastname[MAXNAMELEN];
 	dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
 	cred_t *cr;
 };
 
 /* ARGSUSED */
 static int
 recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct recvbeginsyncarg *rbsa = arg2;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	uint64_t val;
 	int err;
 
 	err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
 	    strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val);
 
 	if (err != ENOENT)
 		return (err ? err : EEXIST);
 
 	if (rbsa->origin) {
 		/* make sure it's a snap in the same pool */
 		if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool)
 			return (EXDEV);
 		if (!dsl_dataset_is_snapshot(rbsa->origin))
 			return (EINVAL);
 		if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
 			return (ENODEV);
 	}
 
 	return (0);
 }
 
 static void
 recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct recvbeginsyncarg *rbsa = arg2;
 	uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
 	uint64_t dsobj;
 
 	/* Create and open new dataset. */
 	dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
 	    rbsa->origin, flags, rbsa->cr, tx);
 	VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
 	    B_TRUE, dmu_recv_tag, &rbsa->ds));
 
 	if (rbsa->origin == NULL) {
 		(void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
 		    rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
 	}
 
-	spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC,
-	    dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj);
+	spa_history_log_internal_ds(rbsa->ds, "receive new", tx, "");
 }
 
 /* ARGSUSED */
 static int
 recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct recvbeginsyncarg *rbsa = arg2;
 	int err;
 	uint64_t val;
 
 	/* must not have any changes since most recent snapshot */
 	if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds))
 		return (ETXTBSY);
 
 	/* new snapshot name must not exist */
 	err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
 	    ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val);
 	if (err == 0)
 		return (EEXIST);
 	if (err != ENOENT)
 		return (err);
 
 	if (rbsa->fromguid) {
 		/* if incremental, most recent snapshot must match fromguid */
 		if (ds->ds_prev == NULL)
 			return (ENODEV);
 
 		/*
 		 * most recent snapshot must match fromguid, or there are no
 		 * changes since the fromguid one
 		 */
 		if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) {
 			uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth;
 			uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj;
 			while (obj != 0) {
 				dsl_dataset_t *snap;
 				err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 				    obj, FTAG, &snap);
 				if (err)
 					return (ENODEV);
 				if (snap->ds_phys->ds_creation_txg < birth) {
 					dsl_dataset_rele(snap, FTAG);
 					return (ENODEV);
 				}
 				if (snap->ds_phys->ds_guid == rbsa->fromguid) {
 					dsl_dataset_rele(snap, FTAG);
 					break; /* it's ok */
 				}
 				obj = snap->ds_phys->ds_prev_snap_obj;
 				dsl_dataset_rele(snap, FTAG);
 			}
 			if (obj == 0)
 				return (ENODEV);
 		}
 	} else {
 		/* if full, most recent snapshot must be $ORIGIN */
 		if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL)
 			return (ENODEV);
 	}
 
 	/* temporary clone name must not exist */
 	err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
 	    ds->ds_dir->dd_phys->dd_child_dir_zapobj,
 	    rbsa->clonelastname, 8, 1, &val);
 	if (err == 0)
 		return (EEXIST);
 	if (err != ENOENT)
 		return (err);
 
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ohds = arg1;
 	struct recvbeginsyncarg *rbsa = arg2;
 	dsl_pool_t *dp = ohds->ds_dir->dd_pool;
 	dsl_dataset_t *cds;
 	uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
 	uint64_t dsobj;
 
 	/* create and open the temporary clone */
 	dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
 	    ohds->ds_prev, flags, rbsa->cr, tx);
 	VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
 
 	/*
 	 * If we actually created a non-clone, we need to create the
 	 * objset in our new dataset.
 	 */
 	if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) {
 		(void) dmu_objset_create_impl(dp->dp_spa,
 		    cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx);
 	}
 
 	rbsa->ds = cds;
 
-	spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC,
-	    dp->dp_spa, tx, "dataset = %lld", dsobj);
+	spa_history_log_internal_ds(cds, "receive over existing", tx, "");
 }
 
 static boolean_t
 dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb)
 {
 	int featureflags;
 
 	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 
 	/* Verify pool version supports SA if SA_SPILL feature set */
 	return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
 	    (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA));
 }
 
 /*
  * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
  * succeeds; otherwise we will leak the holds on the datasets.
  */
 int
 dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb,
     boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc)
 {
 	int err = 0;
 	boolean_t byteswap;
 	struct recvbeginsyncarg rbsa = { 0 };
 	uint64_t versioninfo;
 	int flags;
 	dsl_dataset_t *ds;
 
 	if (drrb->drr_magic == DMU_BACKUP_MAGIC)
 		byteswap = FALSE;
 	else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
 		byteswap = TRUE;
 	else
 		return (EINVAL);
 
 	rbsa.tofs = tofs;
 	rbsa.tosnap = tosnap;
 	rbsa.origin = origin ? origin->os_dsl_dataset : NULL;
 	rbsa.fromguid = drrb->drr_fromguid;
 	rbsa.type = drrb->drr_type;
 	rbsa.tag = FTAG;
 	rbsa.dsflags = 0;
 	rbsa.cr = CRED();
 	versioninfo = drrb->drr_versioninfo;
 	flags = drrb->drr_flags;
 
 	if (byteswap) {
 		rbsa.type = BSWAP_32(rbsa.type);
 		rbsa.fromguid = BSWAP_64(rbsa.fromguid);
 		versioninfo = BSWAP_64(versioninfo);
 		flags = BSWAP_32(flags);
 	}
 
 	if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM ||
 	    rbsa.type >= DMU_OST_NUMTYPES ||
 	    ((flags & DRR_FLAG_CLONE) && origin == NULL))
 		return (EINVAL);
 
 	if (flags & DRR_FLAG_CI_DATA)
 		rbsa.dsflags = DS_FLAG_CI_DATASET;
 
 	bzero(drc, sizeof (dmu_recv_cookie_t));
 	drc->drc_drrb = drrb;
 	drc->drc_tosnap = tosnap;
 	drc->drc_top_ds = top_ds;
 	drc->drc_force = force;
 
 	/*
 	 * Process the begin in syncing context.
 	 */
 
 	/* open the dataset we are logically receiving into */
 	err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds);
 	if (err == 0) {
 		if (dmu_recv_verify_features(ds, drrb)) {
 			dsl_dataset_rele(ds, dmu_recv_tag);
 			return (ENOTSUP);
 		}
 		/* target fs already exists; recv into temp clone */
 
 		/* Can't recv a clone into an existing fs */
 		if (flags & DRR_FLAG_CLONE) {
 			dsl_dataset_rele(ds, dmu_recv_tag);
 			return (EINVAL);
 		}
 
 		/* must not have an incremental recv already in progress */
 		if (!mutex_tryenter(&ds->ds_recvlock)) {
 			dsl_dataset_rele(ds, dmu_recv_tag);
 			return (EBUSY);
 		}
 
 		/* tmp clone name is: tofs/%tosnap" */
 		(void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
 		    "%%%s", tosnap);
 		rbsa.force = force;
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    recv_existing_check, recv_existing_sync, ds, &rbsa, 5);
 		if (err) {
 			mutex_exit(&ds->ds_recvlock);
 			dsl_dataset_rele(ds, dmu_recv_tag);
 			return (err);
 		}
 		drc->drc_logical_ds = ds;
 		drc->drc_real_ds = rbsa.ds;
 	} else if (err == ENOENT) {
 		/* target fs does not exist; must be a full backup or clone */
 		char *cp;
 
 		/*
 		 * If it's a non-clone incremental, we are missing the
 		 * target fs, so fail the recv.
 		 */
 		if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE))
 			return (ENOENT);
 
 		/* Open the parent of tofs */
 		cp = strrchr(tofs, '/');
 		*cp = '\0';
 		err = dsl_dataset_hold(tofs, FTAG, &ds);
 		*cp = '/';
 		if (err)
 			return (err);
 
 		if (dmu_recv_verify_features(ds, drrb)) {
 			dsl_dataset_rele(ds, FTAG);
 			return (ENOTSUP);
 		}
 
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5);
 		dsl_dataset_rele(ds, FTAG);
 		if (err)
 			return (err);
 		drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds;
 		drc->drc_newfs = B_TRUE;
 	}
 
 	return (err);
 }
 
 struct restorearg {
 	int err;
 	int byteswap;
 	vnode_t *vp;
 	char *buf;
 	uint64_t voff;
 	int bufsize; /* amount of memory allocated for buf */
 	zio_cksum_t cksum;
 	avl_tree_t *guid_to_ds_map;
 };
 
 typedef struct guid_map_entry {
 	uint64_t	guid;
 	dsl_dataset_t	*gme_ds;
 	avl_node_t	avlnode;
 } guid_map_entry_t;
 
 static int
 guid_compare(const void *arg1, const void *arg2)
 {
 	const guid_map_entry_t *gmep1 = arg1;
 	const guid_map_entry_t *gmep2 = arg2;
 
 	if (gmep1->guid < gmep2->guid)
 		return (-1);
 	else if (gmep1->guid > gmep2->guid)
 		return (1);
 	return (0);
 }
 
 static void
 free_guid_map_onexit(void *arg)
 {
 	avl_tree_t *ca = arg;
 	void *cookie = NULL;
 	guid_map_entry_t *gmep;
 
 	while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
 		dsl_dataset_rele(gmep->gme_ds, ca);
 		kmem_free(gmep, sizeof (guid_map_entry_t));
 	}
 	avl_destroy(ca);
 	kmem_free(ca, sizeof (avl_tree_t));
 }
 
 static void *
 restore_read(struct restorearg *ra, int len)
 {
 	void *rv;
 	int done = 0;
 
 	/* some things will require 8-byte alignment, so everything must */
 	ASSERT0(len % 8);
 
 	while (done < len) {
 		ssize_t resid;
 
 		ra->err = vn_rdwr(UIO_READ, ra->vp,
 		    (caddr_t)ra->buf + done, len - done,
 		    ra->voff, UIO_SYSSPACE, FAPPEND,
 		    RLIM64_INFINITY, CRED(), &resid);
 
 		if (resid == len - done)
 			ra->err = EINVAL;
 		ra->voff += len - done - resid;
 		done = len - resid;
 		if (ra->err)
 			return (NULL);
 	}
 
 	ASSERT3U(done, ==, len);
 	rv = ra->buf;
 	if (ra->byteswap)
 		fletcher_4_incremental_byteswap(rv, len, &ra->cksum);
 	else
 		fletcher_4_incremental_native(rv, len, &ra->cksum);
 	return (rv);
 }
 
 noinline static void
 backup_byteswap(dmu_replay_record_t *drr)
 {
 #define	DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
 #define	DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
 	drr->drr_type = BSWAP_32(drr->drr_type);
 	drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
 	switch (drr->drr_type) {
 	case DRR_BEGIN:
 		DO64(drr_begin.drr_magic);
 		DO64(drr_begin.drr_versioninfo);
 		DO64(drr_begin.drr_creation_time);
 		DO32(drr_begin.drr_type);
 		DO32(drr_begin.drr_flags);
 		DO64(drr_begin.drr_toguid);
 		DO64(drr_begin.drr_fromguid);
 		break;
 	case DRR_OBJECT:
 		DO64(drr_object.drr_object);
 		/* DO64(drr_object.drr_allocation_txg); */
 		DO32(drr_object.drr_type);
 		DO32(drr_object.drr_bonustype);
 		DO32(drr_object.drr_blksz);
 		DO32(drr_object.drr_bonuslen);
 		DO64(drr_object.drr_toguid);
 		break;
 	case DRR_FREEOBJECTS:
 		DO64(drr_freeobjects.drr_firstobj);
 		DO64(drr_freeobjects.drr_numobjs);
 		DO64(drr_freeobjects.drr_toguid);
 		break;
 	case DRR_WRITE:
 		DO64(drr_write.drr_object);
 		DO32(drr_write.drr_type);
 		DO64(drr_write.drr_offset);
 		DO64(drr_write.drr_length);
 		DO64(drr_write.drr_toguid);
 		DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
 		DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
 		DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
 		DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
 		DO64(drr_write.drr_key.ddk_prop);
 		break;
 	case DRR_WRITE_BYREF:
 		DO64(drr_write_byref.drr_object);
 		DO64(drr_write_byref.drr_offset);
 		DO64(drr_write_byref.drr_length);
 		DO64(drr_write_byref.drr_toguid);
 		DO64(drr_write_byref.drr_refguid);
 		DO64(drr_write_byref.drr_refobject);
 		DO64(drr_write_byref.drr_refoffset);
 		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
 		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
 		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
 		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
 		DO64(drr_write_byref.drr_key.ddk_prop);
 		break;
 	case DRR_FREE:
 		DO64(drr_free.drr_object);
 		DO64(drr_free.drr_offset);
 		DO64(drr_free.drr_length);
 		DO64(drr_free.drr_toguid);
 		break;
 	case DRR_SPILL:
 		DO64(drr_spill.drr_object);
 		DO64(drr_spill.drr_length);
 		DO64(drr_spill.drr_toguid);
 		break;
 	case DRR_END:
 		DO64(drr_end.drr_checksum.zc_word[0]);
 		DO64(drr_end.drr_checksum.zc_word[1]);
 		DO64(drr_end.drr_checksum.zc_word[2]);
 		DO64(drr_end.drr_checksum.zc_word[3]);
 		DO64(drr_end.drr_toguid);
 		break;
 	default:
 		break;
 	}
 #undef DO64
 #undef DO32
 }
 
 noinline static int
 restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
 {
 	int err;
 	dmu_tx_t *tx;
 	void *data = NULL;
 
 	if (drro->drr_type == DMU_OT_NONE ||
 	    !DMU_OT_IS_VALID(drro->drr_type) ||
 	    !DMU_OT_IS_VALID(drro->drr_bonustype) ||
 	    drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
 	    drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
 	    P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
 	    drro->drr_blksz < SPA_MINBLOCKSIZE ||
 	    drro->drr_blksz > SPA_MAXBLOCKSIZE ||
 	    drro->drr_bonuslen > DN_MAX_BONUSLEN) {
 		return (EINVAL);
 	}
 
 	err = dmu_object_info(os, drro->drr_object, NULL);
 
 	if (err != 0 && err != ENOENT)
 		return (EINVAL);
 
 	if (drro->drr_bonuslen) {
 		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
 		if (ra->err)
 			return (ra->err);
 	}
 
 	if (err == ENOENT) {
 		/* currently free, want to be allocated */
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
 		err = dmu_tx_assign(tx, TXG_WAIT);
 		if (err) {
 			dmu_tx_abort(tx);
 			return (err);
 		}
 		err = dmu_object_claim(os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen, tx);
 		dmu_tx_commit(tx);
 	} else {
 		/* currently allocated, want to be allocated */
 		err = dmu_object_reclaim(os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen);
 	}
 	if (err) {
 		return (EINVAL);
 	}
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_bonus(tx, drro->drr_object);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 
 	dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
 	    tx);
 	dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
 
 	if (data != NULL) {
 		dmu_buf_t *db;
 
 		VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
 		dmu_buf_will_dirty(db, tx);
 
 		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
 		bcopy(data, db->db_data, drro->drr_bonuslen);
 		if (ra->byteswap) {
 			dmu_object_byteswap_t byteswap =
 			    DMU_OT_BYTESWAP(drro->drr_bonustype);
 			dmu_ot_byteswap[byteswap].ob_func(db->db_data,
 			    drro->drr_bonuslen);
 		}
 		dmu_buf_rele(db, FTAG);
 	}
 	dmu_tx_commit(tx);
 	return (0);
 }
 
 /* ARGSUSED */
 noinline static int
 restore_freeobjects(struct restorearg *ra, objset_t *os,
     struct drr_freeobjects *drrfo)
 {
 	uint64_t obj;
 
 	if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
 		return (EINVAL);
 
 	for (obj = drrfo->drr_firstobj;
 	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
 	    (void) dmu_object_next(os, &obj, FALSE, 0)) {
 		int err;
 
 		if (dmu_object_info(os, obj, NULL) != 0)
 			continue;
 
 		err = dmu_free_object(os, obj);
 		if (err)
 			return (err);
 	}
 	return (0);
 }
 
 noinline static int
 restore_write(struct restorearg *ra, objset_t *os,
     struct drr_write *drrw)
 {
 	dmu_tx_t *tx;
 	void *data;
 	int err;
 
 	if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
 	    !DMU_OT_IS_VALID(drrw->drr_type))
 		return (EINVAL);
 
 	data = restore_read(ra, drrw->drr_length);
 	if (data == NULL)
 		return (ra->err);
 
 	if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
 		return (EINVAL);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, drrw->drr_object,
 	    drrw->drr_offset, drrw->drr_length);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 	if (ra->byteswap) {
 		dmu_object_byteswap_t byteswap =
 		    DMU_OT_BYTESWAP(drrw->drr_type);
 		dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
 	}
 	dmu_write(os, drrw->drr_object,
 	    drrw->drr_offset, drrw->drr_length, data, tx);
 	dmu_tx_commit(tx);
 	return (0);
 }
 
 /*
  * Handle a DRR_WRITE_BYREF record.  This record is used in dedup'ed
  * streams to refer to a copy of the data that is already on the
  * system because it came in earlier in the stream.  This function
  * finds the earlier copy of the data, and uses that copy instead of
  * data from the stream to fulfill this write.
  */
 static int
 restore_write_byref(struct restorearg *ra, objset_t *os,
     struct drr_write_byref *drrwbr)
 {
 	dmu_tx_t *tx;
 	int err;
 	guid_map_entry_t gmesrch;
 	guid_map_entry_t *gmep;
 	avl_index_t	where;
 	objset_t *ref_os = NULL;
 	dmu_buf_t *dbp;
 
 	if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
 		return (EINVAL);
 
 	/*
 	 * If the GUID of the referenced dataset is different from the
 	 * GUID of the target dataset, find the referenced dataset.
 	 */
 	if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
 		gmesrch.guid = drrwbr->drr_refguid;
 		if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
 		    &where)) == NULL) {
 			return (EINVAL);
 		}
 		if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
 			return (EINVAL);
 	} else {
 		ref_os = os;
 	}
 
 	err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
 	    drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH);
 	if (err)
 		return (err);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, drrwbr->drr_object,
 	    drrwbr->drr_offset, drrwbr->drr_length);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 	dmu_write(os, drrwbr->drr_object,
 	    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
 	dmu_buf_rele(dbp, FTAG);
 	dmu_tx_commit(tx);
 	return (0);
 }
 
 static int
 restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
 {
 	dmu_tx_t *tx;
 	void *data;
 	dmu_buf_t *db, *db_spill;
 	int err;
 
 	if (drrs->drr_length < SPA_MINBLOCKSIZE ||
 	    drrs->drr_length > SPA_MAXBLOCKSIZE)
 		return (EINVAL);
 
 	data = restore_read(ra, drrs->drr_length);
 	if (data == NULL)
 		return (ra->err);
 
 	if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
 		return (EINVAL);
 
 	VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
 	if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
 		dmu_buf_rele(db, FTAG);
 		return (err);
 	}
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_spill(tx, db->db_object);
 
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
 		dmu_buf_rele(db, FTAG);
 		dmu_buf_rele(db_spill, FTAG);
 		dmu_tx_abort(tx);
 		return (err);
 	}
 	dmu_buf_will_dirty(db_spill, tx);
 
 	if (db_spill->db_size < drrs->drr_length)
 		VERIFY(0 == dbuf_spill_set_blksz(db_spill,
 		    drrs->drr_length, tx));
 	bcopy(data, db_spill->db_data, drrs->drr_length);
 
 	dmu_buf_rele(db, FTAG);
 	dmu_buf_rele(db_spill, FTAG);
 
 	dmu_tx_commit(tx);
 	return (0);
 }
 
 /* ARGSUSED */
 noinline static int
 restore_free(struct restorearg *ra, objset_t *os,
     struct drr_free *drrf)
 {
 	int err;
 
 	if (drrf->drr_length != -1ULL &&
 	    drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
 		return (EINVAL);
 
 	if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
 		return (EINVAL);
 
 	err = dmu_free_long_range(os, drrf->drr_object,
 	    drrf->drr_offset, drrf->drr_length);
 	return (err);
 }
 
 /*
  * NB: callers *must* call dmu_recv_end() if this succeeds.
  */
 int
 dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
     int cleanup_fd, uint64_t *action_handlep)
 {
 	struct restorearg ra = { 0 };
 	dmu_replay_record_t *drr;
 	objset_t *os;
 	zio_cksum_t pcksum;
 	int featureflags;
 
 	if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
 		ra.byteswap = TRUE;
 
 	{
 		/* compute checksum of drr_begin record */
 		dmu_replay_record_t *drr;
 		drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 
 		drr->drr_type = DRR_BEGIN;
 		drr->drr_u.drr_begin = *drc->drc_drrb;
 		if (ra.byteswap) {
 			fletcher_4_incremental_byteswap(drr,
 			    sizeof (dmu_replay_record_t), &ra.cksum);
 		} else {
 			fletcher_4_incremental_native(drr,
 			    sizeof (dmu_replay_record_t), &ra.cksum);
 		}
 		kmem_free(drr, sizeof (dmu_replay_record_t));
 	}
 
 	if (ra.byteswap) {
 		struct drr_begin *drrb = drc->drc_drrb;
 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
 		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
 		drrb->drr_type = BSWAP_32(drrb->drr_type);
 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
 	}
 
 	ra.vp = vp;
 	ra.voff = *voffp;
 	ra.bufsize = 1<<20;
 	ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP);
 
 	/* these were verified in dmu_recv_begin */
 	ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) ==
 	    DMU_SUBSTREAM);
 	ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES);
 
 	/*
 	 * Open the objset we are modifying.
 	 */
 	VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0);
 
 	ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
 
 	featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
 
 	/* if this stream is dedup'ed, set up the avl tree for guid mapping */
 	if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
 		minor_t minor;
 
 		if (cleanup_fd == -1) {
 			ra.err = EBADF;
 			goto out;
 		}
 		ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
 		if (ra.err) {
 			cleanup_fd = -1;
 			goto out;
 		}
 
 		if (*action_handlep == 0) {
 			ra.guid_to_ds_map =
 			    kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
 			avl_create(ra.guid_to_ds_map, guid_compare,
 			    sizeof (guid_map_entry_t),
 			    offsetof(guid_map_entry_t, avlnode));
 			ra.err = zfs_onexit_add_cb(minor,
 			    free_guid_map_onexit, ra.guid_to_ds_map,
 			    action_handlep);
 			if (ra.err)
 				goto out;
 		} else {
 			ra.err = zfs_onexit_cb_data(minor, *action_handlep,
 			    (void **)&ra.guid_to_ds_map);
 			if (ra.err)
 				goto out;
 		}
 
 		drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
 	}
 
 	/*
 	 * Read records and process them.
 	 */
 	pcksum = ra.cksum;
 	while (ra.err == 0 &&
 	    NULL != (drr = restore_read(&ra, sizeof (*drr)))) {
 		if (issig(JUSTLOOKING) && issig(FORREAL)) {
 			ra.err = EINTR;
 			goto out;
 		}
 
 		if (ra.byteswap)
 			backup_byteswap(drr);
 
 		switch (drr->drr_type) {
 		case DRR_OBJECT:
 		{
 			/*
 			 * We need to make a copy of the record header,
 			 * because restore_{object,write} may need to
 			 * restore_read(), which will invalidate drr.
 			 */
 			struct drr_object drro = drr->drr_u.drr_object;
 			ra.err = restore_object(&ra, os, &drro);
 			break;
 		}
 		case DRR_FREEOBJECTS:
 		{
 			struct drr_freeobjects drrfo =
 			    drr->drr_u.drr_freeobjects;
 			ra.err = restore_freeobjects(&ra, os, &drrfo);
 			break;
 		}
 		case DRR_WRITE:
 		{
 			struct drr_write drrw = drr->drr_u.drr_write;
 			ra.err = restore_write(&ra, os, &drrw);
 			break;
 		}
 		case DRR_WRITE_BYREF:
 		{
 			struct drr_write_byref drrwbr =
 			    drr->drr_u.drr_write_byref;
 			ra.err = restore_write_byref(&ra, os, &drrwbr);
 			break;
 		}
 		case DRR_FREE:
 		{
 			struct drr_free drrf = drr->drr_u.drr_free;
 			ra.err = restore_free(&ra, os, &drrf);
 			break;
 		}
 		case DRR_END:
 		{
 			struct drr_end drre = drr->drr_u.drr_end;
 			/*
 			 * We compare against the *previous* checksum
 			 * value, because the stored checksum is of
 			 * everything before the DRR_END record.
 			 */
 			if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
 				ra.err = ECKSUM;
 			goto out;
 		}
 		case DRR_SPILL:
 		{
 			struct drr_spill drrs = drr->drr_u.drr_spill;
 			ra.err = restore_spill(&ra, os, &drrs);
 			break;
 		}
 		default:
 			ra.err = EINVAL;
 			goto out;
 		}
 		pcksum = ra.cksum;
 	}
 	ASSERT(ra.err != 0);
 
 out:
 	if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
 		zfs_onexit_fd_rele(cleanup_fd);
 
 	if (ra.err != 0) {
 		/*
 		 * destroy what we created, so we don't leave it in the
 		 * inconsistent restoring state.
 		 */
 		txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
 
 		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
 		    B_FALSE);
 		if (drc->drc_real_ds != drc->drc_logical_ds) {
 			mutex_exit(&drc->drc_logical_ds->ds_recvlock);
 			dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
 		}
 	}
 
 	vmem_free(ra.buf, ra.bufsize);
 	*voffp = ra.voff;
 	return (ra.err);
 }
 
 struct recvendsyncarg {
 	char *tosnap;
 	uint64_t creation_time;
 	uint64_t toguid;
 };
 
 static int
 recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct recvendsyncarg *resa = arg2;
 
 	return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx));
 }
 
 static void
 recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct recvendsyncarg *resa = arg2;
 
 	dsl_dataset_snapshot_sync(ds, resa->tosnap, tx);
 
 	/* set snapshot's creation time and guid */
 	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 	ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time;
 	ds->ds_prev->ds_phys->ds_guid = resa->toguid;
 	ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
+	spa_history_log_internal_ds(ds, "finished receiving", tx, "");
 }
 
 static int
 add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj;
 	dsl_dataset_t *snapds;
 	guid_map_entry_t *gmep;
 	int err;
 
 	ASSERT(guid_map != NULL);
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds);
 	if (err == 0) {
 		gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
 		gmep->guid = snapds->ds_phys->ds_guid;
 		gmep->gme_ds = snapds;
 		avl_add(guid_map, gmep);
 	}
 
 	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
 
 static int
 dmu_recv_existing_end(dmu_recv_cookie_t *drc)
 {
 	struct recvendsyncarg resa;
 	dsl_dataset_t *ds = drc->drc_logical_ds;
 	int err, myerr;
 
 	if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
 		err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
 		    drc->drc_force);
 		if (err)
 			goto out;
 	} else {
 		mutex_exit(&ds->ds_recvlock);
 		dsl_dataset_rele(ds, dmu_recv_tag);
 		(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
 		    B_FALSE);
 		return (EBUSY);
 	}
 
 	resa.creation_time = drc->drc_drrb->drr_creation_time;
 	resa.toguid = drc->drc_drrb->drr_toguid;
 	resa.tosnap = drc->drc_tosnap;
 
 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    recv_end_check, recv_end_sync, ds, &resa, 3);
 	if (err) {
 		/* swap back */
 		(void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE);
 	}
 
 out:
 	mutex_exit(&ds->ds_recvlock);
 	if (err == 0 && drc->drc_guid_to_ds_map != NULL)
 		(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
 	dsl_dataset_disown(ds, dmu_recv_tag);
 	myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
 	ASSERT0(myerr);
 	return (err);
 }
 
 static int
 dmu_recv_new_end(dmu_recv_cookie_t *drc)
 {
 	struct recvendsyncarg resa;
 	dsl_dataset_t *ds = drc->drc_logical_ds;
 	int err;
 
 	/*
 	 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
 	 * expects it to have a ds_user_ptr (and zil), but clone_swap()
 	 * can close it.
 	 */
 	txg_wait_synced(ds->ds_dir->dd_pool, 0);
 
 	resa.creation_time = drc->drc_drrb->drr_creation_time;
 	resa.toguid = drc->drc_drrb->drr_toguid;
 	resa.tosnap = drc->drc_tosnap;
 
 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    recv_end_check, recv_end_sync, ds, &resa, 3);
 	if (err) {
 		/* clean up the fs we just recv'd into */
 		(void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
 	} else {
 		if (drc->drc_guid_to_ds_map != NULL)
 			(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
 		/* release the hold from dmu_recv_begin */
 		dsl_dataset_disown(ds, dmu_recv_tag);
 	}
 	return (err);
 }
 
 int
 dmu_recv_end(dmu_recv_cookie_t *drc)
 {
 	if (drc->drc_logical_ds != drc->drc_real_ds)
 		return (dmu_recv_existing_end(drc));
 	else
 		return (dmu_recv_new_end(drc));
 }
diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
index fd714135a853..30867f9d76cb 100644
--- a/module/zfs/dmu_tx.c
+++ b/module/zfs/dmu_tx.c
@@ -1,1469 +1,1469 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dbuf.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */
 #include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */
 #include <sys/dsl_pool.h>
 #include <sys/zap_impl.h> /* for fzap_default_block_shift */
 #include <sys/spa.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/zfs_context.h>
 #include <sys/varargs.h>
 
 typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn,
     uint64_t arg1, uint64_t arg2);
 
 dmu_tx_stats_t dmu_tx_stats = {
 	{ "dmu_tx_assigned",		KSTAT_DATA_UINT64 },
 	{ "dmu_tx_delay",		KSTAT_DATA_UINT64 },
 	{ "dmu_tx_error",		KSTAT_DATA_UINT64 },
 	{ "dmu_tx_suspended",		KSTAT_DATA_UINT64 },
 	{ "dmu_tx_group",		KSTAT_DATA_UINT64 },
 	{ "dmu_tx_how",			KSTAT_DATA_UINT64 },
 	{ "dmu_tx_memory_reserve",	KSTAT_DATA_UINT64 },
 	{ "dmu_tx_memory_reclaim",	KSTAT_DATA_UINT64 },
 	{ "dmu_tx_memory_inflight",	KSTAT_DATA_UINT64 },
 	{ "dmu_tx_dirty_throttle",	KSTAT_DATA_UINT64 },
 	{ "dmu_tx_write_limit",		KSTAT_DATA_UINT64 },
 	{ "dmu_tx_quota",		KSTAT_DATA_UINT64 },
 };
 
 static kstat_t *dmu_tx_ksp;
 
 dmu_tx_t *
 dmu_tx_create_dd(dsl_dir_t *dd)
 {
 	dmu_tx_t *tx = kmem_zalloc(sizeof (dmu_tx_t), KM_PUSHPAGE);
 	tx->tx_dir = dd;
-	if (dd)
+	if (dd != NULL)
 		tx->tx_pool = dd->dd_pool;
 	list_create(&tx->tx_holds, sizeof (dmu_tx_hold_t),
 	    offsetof(dmu_tx_hold_t, txh_node));
 	list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t),
 	    offsetof(dmu_tx_callback_t, dcb_node));
 #ifdef DEBUG_DMU_TX
 	refcount_create(&tx->tx_space_written);
 	refcount_create(&tx->tx_space_freed);
 #endif
 	return (tx);
 }
 
 dmu_tx_t *
 dmu_tx_create(objset_t *os)
 {
 	dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir);
 	tx->tx_objset = os;
 	tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset);
 	return (tx);
 }
 
 dmu_tx_t *
 dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg)
 {
 	dmu_tx_t *tx = dmu_tx_create_dd(NULL);
 
 	ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
 	tx->tx_pool = dp;
 	tx->tx_txg = txg;
 	tx->tx_anyobj = TRUE;
 
 	return (tx);
 }
 
 int
 dmu_tx_is_syncing(dmu_tx_t *tx)
 {
 	return (tx->tx_anyobj);
 }
 
 int
 dmu_tx_private_ok(dmu_tx_t *tx)
 {
 	return (tx->tx_anyobj);
 }
 
 static dmu_tx_hold_t *
 dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
     enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2)
 {
 	dmu_tx_hold_t *txh;
 	dnode_t *dn = NULL;
 	int err;
 
 	if (object != DMU_NEW_OBJECT) {
 		err = dnode_hold(os, object, tx, &dn);
 		if (err) {
 			tx->tx_err = err;
 			return (NULL);
 		}
 
 		if (err == 0 && tx->tx_txg != 0) {
 			mutex_enter(&dn->dn_mtx);
 			/*
 			 * dn->dn_assigned_txg == tx->tx_txg doesn't pose a
 			 * problem, but there's no way for it to happen (for
 			 * now, at least).
 			 */
 			ASSERT(dn->dn_assigned_txg == 0);
 			dn->dn_assigned_txg = tx->tx_txg;
 			(void) refcount_add(&dn->dn_tx_holds, tx);
 			mutex_exit(&dn->dn_mtx);
 		}
 	}
 
 	txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_PUSHPAGE);
 	txh->txh_tx = tx;
 	txh->txh_dnode = dn;
 #ifdef DEBUG_DMU_TX
 	txh->txh_type = type;
 	txh->txh_arg1 = arg1;
 	txh->txh_arg2 = arg2;
 #endif
 	list_insert_tail(&tx->tx_holds, txh);
 
 	return (txh);
 }
 
 void
 dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object)
 {
 	/*
 	 * If we're syncing, they can manipulate any object anyhow, and
 	 * the hold on the dnode_t can cause problems.
 	 */
 	if (!dmu_tx_is_syncing(tx)) {
 		(void) dmu_tx_hold_object_impl(tx, os,
 		    object, THT_NEWOBJECT, 0, 0);
 	}
 }
 
 static int
 dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
 {
 	int err;
 	dmu_buf_impl_t *db;
 
 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
 	db = dbuf_hold_level(dn, level, blkid, FTAG);
 	rw_exit(&dn->dn_struct_rwlock);
 	if (db == NULL)
 		return (EIO);
 	err = dbuf_read(db, zio, DB_RF_CANFAIL | DB_RF_NOPREFETCH);
 	dbuf_rele(db, FTAG);
 	return (err);
 }
 
 static void
 dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db,
     int level, uint64_t blkid, boolean_t freeable, uint64_t *history)
 {
 	objset_t *os = dn->dn_objset;
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 	int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 	dmu_buf_impl_t *parent = NULL;
 	blkptr_t *bp = NULL;
 	uint64_t space;
 
 	if (level >= dn->dn_nlevels || history[level] == blkid)
 		return;
 
 	history[level] = blkid;
 
 	space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift);
 
 	if (db == NULL || db == dn->dn_dbuf) {
 		ASSERT(level != 0);
 		db = NULL;
 	} else {
 		ASSERT(DB_DNODE(db) == dn);
 		ASSERT(db->db_level == level);
 		ASSERT(db->db.db_size == space);
 		ASSERT(db->db_blkid == blkid);
 		bp = db->db_blkptr;
 		parent = db->db_parent;
 	}
 
 	freeable = (bp && (freeable ||
 	    dsl_dataset_block_freeable(ds, bp, bp->blk_birth)));
 
 	if (freeable)
 		txh->txh_space_tooverwrite += space;
 	else
 		txh->txh_space_towrite += space;
 	if (bp)
 		txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp);
 
 	dmu_tx_count_twig(txh, dn, parent, level + 1,
 	    blkid >> epbs, freeable, history);
 }
 
 /* ARGSUSED */
 static void
 dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
 {
 	dnode_t *dn = txh->txh_dnode;
 	uint64_t start, end, i;
 	int min_bs, max_bs, min_ibs, max_ibs, epbs, bits;
 	int err = 0;
 	int l;
 
 	if (len == 0)
 		return;
 
 	min_bs = SPA_MINBLOCKSHIFT;
 	max_bs = SPA_MAXBLOCKSHIFT;
 	min_ibs = DN_MIN_INDBLKSHIFT;
 	max_ibs = DN_MAX_INDBLKSHIFT;
 
 	if (dn) {
 		uint64_t history[DN_MAX_LEVELS];
 		int nlvls = dn->dn_nlevels;
 		int delta;
 
 		/*
 		 * For i/o error checking, read the first and last level-0
 		 * blocks (if they are not aligned), and all the level-1 blocks.
 		 */
 		if (dn->dn_maxblkid == 0) {
 			delta = dn->dn_datablksz;
 			start = (off < dn->dn_datablksz) ? 0 : 1;
 			end = (off+len <= dn->dn_datablksz) ? 0 : 1;
 			if (start == 0 && (off > 0 || len < dn->dn_datablksz)) {
 				err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
 				if (err)
 					goto out;
 				delta -= off;
 			}
 		} else {
 			zio_t *zio = zio_root(dn->dn_objset->os_spa,
 			    NULL, NULL, ZIO_FLAG_CANFAIL);
 
 			/* first level-0 block */
 			start = off >> dn->dn_datablkshift;
 			if (P2PHASE(off, dn->dn_datablksz) ||
 			    len < dn->dn_datablksz) {
 				err = dmu_tx_check_ioerr(zio, dn, 0, start);
 				if (err)
 					goto out;
 			}
 
 			/* last level-0 block */
 			end = (off+len-1) >> dn->dn_datablkshift;
 			if (end != start && end <= dn->dn_maxblkid &&
 			    P2PHASE(off+len, dn->dn_datablksz)) {
 				err = dmu_tx_check_ioerr(zio, dn, 0, end);
 				if (err)
 					goto out;
 			}
 
 			/* level-1 blocks */
 			if (nlvls > 1) {
 				int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 				for (i = (start>>shft)+1; i < end>>shft; i++) {
 					err = dmu_tx_check_ioerr(zio, dn, 1, i);
 					if (err)
 						goto out;
 				}
 			}
 
 			err = zio_wait(zio);
 			if (err)
 				goto out;
 			delta = P2NPHASE(off, dn->dn_datablksz);
 		}
 
 		min_ibs = max_ibs = dn->dn_indblkshift;
 		if (dn->dn_maxblkid > 0) {
 			/*
 			 * The blocksize can't change,
 			 * so we can make a more precise estimate.
 			 */
 			ASSERT(dn->dn_datablkshift != 0);
 			min_bs = max_bs = dn->dn_datablkshift;
 		}
 
 		/*
 		 * If this write is not off the end of the file
 		 * we need to account for overwrites/unref.
 		 */
 		if (start <= dn->dn_maxblkid) {
 			for (l = 0; l < DN_MAX_LEVELS; l++)
 				history[l] = -1ULL;
 		}
 		while (start <= dn->dn_maxblkid) {
 			dmu_buf_impl_t *db;
 
 			rw_enter(&dn->dn_struct_rwlock, RW_READER);
 			err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db);
 			rw_exit(&dn->dn_struct_rwlock);
 
 			if (err) {
 				txh->txh_tx->tx_err = err;
 				return;
 			}
 
 			dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE,
 			    history);
 			dbuf_rele(db, FTAG);
 			if (++start > end) {
 				/*
 				 * Account for new indirects appearing
 				 * before this IO gets assigned into a txg.
 				 */
 				bits = 64 - min_bs;
 				epbs = min_ibs - SPA_BLKPTRSHIFT;
 				for (bits -= epbs * (nlvls - 1);
 				    bits >= 0; bits -= epbs)
 					txh->txh_fudge += 1ULL << max_ibs;
 				goto out;
 			}
 			off += delta;
 			if (len >= delta)
 				len -= delta;
 			delta = dn->dn_datablksz;
 		}
 	}
 
 	/*
 	 * 'end' is the last thing we will access, not one past.
 	 * This way we won't overflow when accessing the last byte.
 	 */
 	start = P2ALIGN(off, 1ULL << max_bs);
 	end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1;
 	txh->txh_space_towrite += end - start + 1;
 
 	start >>= min_bs;
 	end >>= min_bs;
 
 	epbs = min_ibs - SPA_BLKPTRSHIFT;
 
 	/*
 	 * The object contains at most 2^(64 - min_bs) blocks,
 	 * and each indirect level maps 2^epbs.
 	 */
 	for (bits = 64 - min_bs; bits >= 0; bits -= epbs) {
 		start >>= epbs;
 		end >>= epbs;
 		ASSERT3U(end, >=, start);
 		txh->txh_space_towrite += (end - start + 1) << max_ibs;
 		if (start != 0) {
 			/*
 			 * We also need a new blkid=0 indirect block
 			 * to reference any existing file data.
 			 */
 			txh->txh_space_towrite += 1ULL << max_ibs;
 		}
 	}
 
 out:
 	if (txh->txh_space_towrite + txh->txh_space_tooverwrite >
 	    2 * DMU_MAX_ACCESS)
 		err = EFBIG;
 
 	if (err)
 		txh->txh_tx->tx_err = err;
 }
 
 static void
 dmu_tx_count_dnode(dmu_tx_hold_t *txh)
 {
 	dnode_t *dn = txh->txh_dnode;
 	dnode_t *mdn = DMU_META_DNODE(txh->txh_tx->tx_objset);
 	uint64_t space = mdn->dn_datablksz +
 	    ((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
 
 	if (dn && dn->dn_dbuf->db_blkptr &&
 	    dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
 	    dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) {
 		txh->txh_space_tooverwrite += space;
 		txh->txh_space_tounref += space;
 	} else {
 		txh->txh_space_towrite += space;
 		if (dn && dn->dn_dbuf->db_blkptr)
 			txh->txh_space_tounref += space;
 	}
 }
 
 void
 dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
 {
 	dmu_tx_hold_t *txh;
 
 	ASSERT(tx->tx_txg == 0);
 	ASSERT(len < DMU_MAX_ACCESS);
 	ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
 	    object, THT_WRITE, off, len);
 	if (txh == NULL)
 		return;
 
 	dmu_tx_count_write(txh, off, len);
 	dmu_tx_count_dnode(txh);
 }
 
 static void
 dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
 {
 	uint64_t blkid, nblks, lastblk;
 	uint64_t space = 0, unref = 0, skipped = 0;
 	dnode_t *dn = txh->txh_dnode;
 	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
 	spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
 	int epbs;
 	uint64_t l0span = 0, nl1blks = 0;
 
 	if (dn->dn_nlevels == 0)
 		return;
 
 	/*
 	 * The struct_rwlock protects us against dn_nlevels
 	 * changing, in case (against all odds) we manage to dirty &
 	 * sync out the changes after we check for being dirty.
 	 * Also, dbuf_hold_impl() wants us to have the struct_rwlock.
 	 */
 	rw_enter(&dn->dn_struct_rwlock, RW_READER);
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 	if (dn->dn_maxblkid == 0) {
 		if (off == 0 && len >= dn->dn_datablksz) {
 			blkid = 0;
 			nblks = 1;
 		} else {
 			rw_exit(&dn->dn_struct_rwlock);
 			return;
 		}
 	} else {
 		blkid = off >> dn->dn_datablkshift;
 		nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift;
 
 		if (blkid >= dn->dn_maxblkid) {
 			rw_exit(&dn->dn_struct_rwlock);
 			return;
 		}
 		if (blkid + nblks > dn->dn_maxblkid)
 			nblks = dn->dn_maxblkid - blkid;
 
 	}
 	l0span = nblks;    /* save for later use to calc level > 1 overhead */
 	if (dn->dn_nlevels == 1) {
 		int i;
 		for (i = 0; i < nblks; i++) {
 			blkptr_t *bp = dn->dn_phys->dn_blkptr;
 			ASSERT3U(blkid + i, <, dn->dn_nblkptr);
 			bp += blkid + i;
 			if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
 				dprintf_bp(bp, "can free old%s", "");
 				space += bp_get_dsize(spa, bp);
 			}
 			unref += BP_GET_ASIZE(bp);
 		}
 		nl1blks = 1;
 		nblks = 0;
 	}
 
 	lastblk = blkid + nblks - 1;
 	while (nblks) {
 		dmu_buf_impl_t *dbuf;
 		uint64_t ibyte, new_blkid;
 		int epb = 1 << epbs;
 		int err, i, blkoff, tochk;
 		blkptr_t *bp;
 
 		ibyte = blkid << dn->dn_datablkshift;
 		err = dnode_next_offset(dn,
 		    DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0);
 		new_blkid = ibyte >> dn->dn_datablkshift;
 		if (err == ESRCH) {
 			skipped += (lastblk >> epbs) - (blkid >> epbs) + 1;
 			break;
 		}
 		if (err) {
 			txh->txh_tx->tx_err = err;
 			break;
 		}
 		if (new_blkid > lastblk) {
 			skipped += (lastblk >> epbs) - (blkid >> epbs) + 1;
 			break;
 		}
 
 		if (new_blkid > blkid) {
 			ASSERT((new_blkid >> epbs) > (blkid >> epbs));
 			skipped += (new_blkid >> epbs) - (blkid >> epbs) - 1;
 			nblks -= new_blkid - blkid;
 			blkid = new_blkid;
 		}
 		blkoff = P2PHASE(blkid, epb);
 		tochk = MIN(epb - blkoff, nblks);
 
 		err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf);
 		if (err) {
 			txh->txh_tx->tx_err = err;
 			break;
 		}
 
 		txh->txh_memory_tohold += dbuf->db.db_size;
 
 		/*
 		 * We don't check memory_tohold against DMU_MAX_ACCESS because
 		 * memory_tohold is an over-estimation (especially the >L1
 		 * indirect blocks), so it could fail.  Callers should have
 		 * already verified that they will not be holding too much
 		 * memory.
 		 */
 
 		err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
 		if (err != 0) {
 			txh->txh_tx->tx_err = err;
 			dbuf_rele(dbuf, FTAG);
 			break;
 		}
 
 		bp = dbuf->db.db_data;
 		bp += blkoff;
 
 		for (i = 0; i < tochk; i++) {
 			if (dsl_dataset_block_freeable(ds, &bp[i],
 			    bp[i].blk_birth)) {
 				dprintf_bp(&bp[i], "can free old%s", "");
 				space += bp_get_dsize(spa, &bp[i]);
 			}
 			unref += BP_GET_ASIZE(bp);
 		}
 		dbuf_rele(dbuf, FTAG);
 
 		++nl1blks;
 		blkid += tochk;
 		nblks -= tochk;
 	}
 	rw_exit(&dn->dn_struct_rwlock);
 
 	/*
 	 * Add in memory requirements of higher-level indirects.
 	 * This assumes a worst-possible scenario for dn_nlevels and a
 	 * worst-possible distribution of l1-blocks over the region to free.
 	 */
 	{
 		uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs);
 		int level = 2;
 		/*
 		 * Here we don't use DN_MAX_LEVEL, but calculate it with the
 		 * given datablkshift and indblkshift. This makes the
 		 * difference between 19 and 8 on large files.
 		 */
 		int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) /
 		    (dn->dn_indblkshift - SPA_BLKPTRSHIFT);
 
 		while (level++ < maxlevel) {
 			txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1)
 			    << dn->dn_indblkshift;
 			blkcnt = 1 + (blkcnt >> epbs);
 		}
 	}
 
 	/* account for new level 1 indirect blocks that might show up */
 	if (skipped > 0) {
 		txh->txh_fudge += skipped << dn->dn_indblkshift;
 		skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs);
 		txh->txh_memory_tohold += skipped << dn->dn_indblkshift;
 	}
 	txh->txh_space_tofree += space;
 	txh->txh_space_tounref += unref;
 }
 
 void
 dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
 {
 	dmu_tx_hold_t *txh;
 	dnode_t *dn;
 	uint64_t start, end, i;
 	int err, shift;
 	zio_t *zio;
 
 	ASSERT(tx->tx_txg == 0);
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
 	    object, THT_FREE, off, len);
 	if (txh == NULL)
 		return;
 	dn = txh->txh_dnode;
 
 	/* first block */
 	if (off != 0)
 		dmu_tx_count_write(txh, off, 1);
 	/* last block */
 	if (len != DMU_OBJECT_END)
 		dmu_tx_count_write(txh, off+len, 1);
 
 	dmu_tx_count_dnode(txh);
 
 	if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz)
 		return;
 	if (len == DMU_OBJECT_END)
 		len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off;
 
 	/*
 	 * For i/o error checking, read the first and last level-0
 	 * blocks, and all the level-1 blocks.  The above count_write's
 	 * have already taken care of the level-0 blocks.
 	 */
 	if (dn->dn_nlevels > 1) {
 		shift = dn->dn_datablkshift + dn->dn_indblkshift -
 		    SPA_BLKPTRSHIFT;
 		start = off >> shift;
 		end = dn->dn_datablkshift ? ((off+len) >> shift) : 0;
 
 		zio = zio_root(tx->tx_pool->dp_spa,
 		    NULL, NULL, ZIO_FLAG_CANFAIL);
 		for (i = start; i <= end; i++) {
 			uint64_t ibyte = i << shift;
 			err = dnode_next_offset(dn, 0, &ibyte, 2, 1, 0);
 			i = ibyte >> shift;
 			if (err == ESRCH)
 				break;
 			if (err) {
 				tx->tx_err = err;
 				return;
 			}
 
 			err = dmu_tx_check_ioerr(zio, dn, 1, i);
 			if (err) {
 				tx->tx_err = err;
 				return;
 			}
 		}
 		err = zio_wait(zio);
 		if (err) {
 			tx->tx_err = err;
 			return;
 		}
 	}
 
 	dmu_tx_count_free(txh, off, len);
 }
 
 void
 dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
 {
 	dmu_tx_hold_t *txh;
 	dnode_t *dn;
 	uint64_t nblocks;
 	int epbs, err;
 
 	ASSERT(tx->tx_txg == 0);
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
 	    object, THT_ZAP, add, (uintptr_t)name);
 	if (txh == NULL)
 		return;
 	dn = txh->txh_dnode;
 
 	dmu_tx_count_dnode(txh);
 
 	if (dn == NULL) {
 		/*
 		 * We will be able to fit a new object's entries into one leaf
 		 * block.  So there will be at most 2 blocks total,
 		 * including the header block.
 		 */
 		dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift);
 		return;
 	}
 
 	ASSERT3U(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP);
 
 	if (dn->dn_maxblkid == 0 && !add) {
 		blkptr_t *bp;
 
 		/*
 		 * If there is only one block  (i.e. this is a micro-zap)
 		 * and we are not adding anything, the accounting is simple.
 		 */
 		err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
 		if (err) {
 			tx->tx_err = err;
 			return;
 		}
 
 		/*
 		 * Use max block size here, since we don't know how much
 		 * the size will change between now and the dbuf dirty call.
 		 */
 		bp = &dn->dn_phys->dn_blkptr[0];
 		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
 		    bp, bp->blk_birth))
 			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
 		else
 			txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
 		if (!BP_IS_HOLE(bp))
 			txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
 		return;
 	}
 
 	if (dn->dn_maxblkid > 0 && name) {
 		/*
 		 * access the name in this fat-zap so that we'll check
 		 * for i/o errors to the leaf blocks, etc.
 		 */
 		err = zap_lookup(dn->dn_objset, dn->dn_object, name,
 		    8, 0, NULL);
 		if (err == EIO) {
 			tx->tx_err = err;
 			return;
 		}
 	}
 
 	err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
 	    &txh->txh_space_towrite, &txh->txh_space_tooverwrite);
 
 	/*
 	 * If the modified blocks are scattered to the four winds,
 	 * we'll have to modify an indirect twig for each.
 	 */
 	epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 	for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
 		if (dn->dn_objset->os_dsl_dataset->ds_phys->ds_prev_snap_obj)
 			txh->txh_space_towrite += 3 << dn->dn_indblkshift;
 		else
 			txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift;
 }
 
 void
 dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
 {
 	dmu_tx_hold_t *txh;
 
 	ASSERT(tx->tx_txg == 0);
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
 	    object, THT_BONUS, 0, 0);
 	if (txh)
 		dmu_tx_count_dnode(txh);
 }
 
 void
 dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
 {
 	dmu_tx_hold_t *txh;
 
 	ASSERT(tx->tx_txg == 0);
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
 	    DMU_NEW_OBJECT, THT_SPACE, space, 0);
 	if (txh)
 		txh->txh_space_towrite += space;
 }
 
 int
 dmu_tx_holds(dmu_tx_t *tx, uint64_t object)
 {
 	dmu_tx_hold_t *txh;
 	int holds = 0;
 
 	/*
 	 * By asserting that the tx is assigned, we're counting the
 	 * number of dn_tx_holds, which is the same as the number of
 	 * dn_holds.  Otherwise, we'd be counting dn_holds, but
 	 * dn_tx_holds could be 0.
 	 */
 	ASSERT(tx->tx_txg != 0);
 
 	/* if (tx->tx_anyobj == TRUE) */
 		/* return (0); */
 
 	for (txh = list_head(&tx->tx_holds); txh;
 	    txh = list_next(&tx->tx_holds, txh)) {
 		if (txh->txh_dnode && txh->txh_dnode->dn_object == object)
 			holds++;
 	}
 
 	return (holds);
 }
 
 #ifdef DEBUG_DMU_TX
 void
 dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
 {
 	dmu_tx_hold_t *txh;
 	int match_object = FALSE, match_offset = FALSE;
 	dnode_t *dn;
 
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
 	ASSERT(dn != NULL);
 	ASSERT(tx->tx_txg != 0);
 	ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset);
 	ASSERT3U(dn->dn_object, ==, db->db.db_object);
 
 	if (tx->tx_anyobj) {
 		DB_DNODE_EXIT(db);
 		return;
 	}
 
 	/* XXX No checking on the meta dnode for now */
 	if (db->db.db_object == DMU_META_DNODE_OBJECT) {
 		DB_DNODE_EXIT(db);
 		return;
 	}
 
 	for (txh = list_head(&tx->tx_holds); txh;
 	    txh = list_next(&tx->tx_holds, txh)) {
 		ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
 		if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT)
 			match_object = TRUE;
 		if (txh->txh_dnode == NULL || txh->txh_dnode == dn) {
 			int datablkshift = dn->dn_datablkshift ?
 			    dn->dn_datablkshift : SPA_MAXBLOCKSHIFT;
 			int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 			int shift = datablkshift + epbs * db->db_level;
 			uint64_t beginblk = shift >= 64 ? 0 :
 			    (txh->txh_arg1 >> shift);
 			uint64_t endblk = shift >= 64 ? 0 :
 			    ((txh->txh_arg1 + txh->txh_arg2 - 1) >> shift);
 			uint64_t blkid = db->db_blkid;
 
 			/* XXX txh_arg2 better not be zero... */
 
 			dprintf("found txh type %x beginblk=%llx endblk=%llx\n",
 			    txh->txh_type, beginblk, endblk);
 
 			switch (txh->txh_type) {
 			case THT_WRITE:
 				if (blkid >= beginblk && blkid <= endblk)
 					match_offset = TRUE;
 				/*
 				 * We will let this hold work for the bonus
 				 * or spill buffer so that we don't need to
 				 * hold it when creating a new object.
 				 */
 				if (blkid == DMU_BONUS_BLKID ||
 				    blkid == DMU_SPILL_BLKID)
 					match_offset = TRUE;
 				/*
 				 * They might have to increase nlevels,
 				 * thus dirtying the new TLIBs.  Or the
 				 * might have to change the block size,
 				 * thus dirying the new lvl=0 blk=0.
 				 */
 				if (blkid == 0)
 					match_offset = TRUE;
 				break;
 			case THT_FREE:
 				/*
 				 * We will dirty all the level 1 blocks in
 				 * the free range and perhaps the first and
 				 * last level 0 block.
 				 */
 				if (blkid >= beginblk && (blkid <= endblk ||
 				    txh->txh_arg2 == DMU_OBJECT_END))
 					match_offset = TRUE;
 				break;
 			case THT_SPILL:
 				if (blkid == DMU_SPILL_BLKID)
 					match_offset = TRUE;
 				break;
 			case THT_BONUS:
 				if (blkid == DMU_BONUS_BLKID)
 					match_offset = TRUE;
 				break;
 			case THT_ZAP:
 				match_offset = TRUE;
 				break;
 			case THT_NEWOBJECT:
 				match_object = TRUE;
 				break;
 			default:
 				ASSERT(!"bad txh_type");
 			}
 		}
 		if (match_object && match_offset) {
 			DB_DNODE_EXIT(db);
 			return;
 		}
 	}
 	DB_DNODE_EXIT(db);
 	panic("dirtying dbuf obj=%llx lvl=%u blkid=%llx but not tx_held\n",
 	    (u_longlong_t)db->db.db_object, db->db_level,
 	    (u_longlong_t)db->db_blkid);
 }
 #endif
 
 static int
 dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
 {
 	dmu_tx_hold_t *txh;
 	spa_t *spa = tx->tx_pool->dp_spa;
 	uint64_t memory, asize, fsize, usize;
 	uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge;
 
 	ASSERT0(tx->tx_txg);
 
 	if (tx->tx_err) {
 		DMU_TX_STAT_BUMP(dmu_tx_error);
 		return (tx->tx_err);
 	}
 
 	if (spa_suspended(spa)) {
 		DMU_TX_STAT_BUMP(dmu_tx_suspended);
 
 		/*
 		 * If the user has indicated a blocking failure mode
 		 * then return ERESTART which will block in dmu_tx_wait().
 		 * Otherwise, return EIO so that an error can get
 		 * propagated back to the VOP calls.
 		 *
 		 * Note that we always honor the txg_how flag regardless
 		 * of the failuremode setting.
 		 */
 		if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
 		    txg_how != TXG_WAIT)
 			return (EIO);
 
 		return (ERESTART);
 	}
 
 	tx->tx_txg = txg_hold_open(tx->tx_pool, &tx->tx_txgh);
 	tx->tx_needassign_txh = NULL;
 
 	/*
 	 * NB: No error returns are allowed after txg_hold_open, but
 	 * before processing the dnode holds, due to the
 	 * dmu_tx_unassign() logic.
 	 */
 
 	towrite = tofree = tooverwrite = tounref = tohold = fudge = 0;
 	for (txh = list_head(&tx->tx_holds); txh;
 	    txh = list_next(&tx->tx_holds, txh)) {
 		dnode_t *dn = txh->txh_dnode;
 		if (dn != NULL) {
 			mutex_enter(&dn->dn_mtx);
 			if (dn->dn_assigned_txg == tx->tx_txg - 1) {
 				mutex_exit(&dn->dn_mtx);
 				tx->tx_needassign_txh = txh;
 				DMU_TX_STAT_BUMP(dmu_tx_group);
 				return (ERESTART);
 			}
 			if (dn->dn_assigned_txg == 0)
 				dn->dn_assigned_txg = tx->tx_txg;
 			ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
 			(void) refcount_add(&dn->dn_tx_holds, tx);
 			mutex_exit(&dn->dn_mtx);
 		}
 		towrite += txh->txh_space_towrite;
 		tofree += txh->txh_space_tofree;
 		tooverwrite += txh->txh_space_tooverwrite;
 		tounref += txh->txh_space_tounref;
 		tohold += txh->txh_memory_tohold;
 		fudge += txh->txh_fudge;
 	}
 
 	/*
 	 * NB: This check must be after we've held the dnodes, so that
 	 * the dmu_tx_unassign() logic will work properly
 	 */
 	if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) {
 		DMU_TX_STAT_BUMP(dmu_tx_how);
 		return (ERESTART);
 	}
 
 	/*
 	 * If a snapshot has been taken since we made our estimates,
 	 * assume that we won't be able to free or overwrite anything.
 	 */
 	if (tx->tx_objset &&
 	    dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) >
 	    tx->tx_lastsnap_txg) {
 		towrite += tooverwrite;
 		tooverwrite = tofree = 0;
 	}
 
 	/* needed allocation: worst-case estimate of write space */
 	asize = spa_get_asize(tx->tx_pool->dp_spa, towrite + tooverwrite);
 	/* freed space estimate: worst-case overwrite + free estimate */
 	fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree;
 	/* convert unrefd space to worst-case estimate */
 	usize = spa_get_asize(tx->tx_pool->dp_spa, tounref);
 	/* calculate memory footprint estimate */
 	memory = towrite + tooverwrite + tohold;
 
 #ifdef DEBUG_DMU_TX
 	/*
 	 * Add in 'tohold' to account for our dirty holds on this memory
 	 * XXX - the "fudge" factor is to account for skipped blocks that
 	 * we missed because dnode_next_offset() misses in-core-only blocks.
 	 */
 	tx->tx_space_towrite = asize +
 	    spa_get_asize(tx->tx_pool->dp_spa, tohold + fudge);
 	tx->tx_space_tofree = tofree;
 	tx->tx_space_tooverwrite = tooverwrite;
 	tx->tx_space_tounref = tounref;
 #endif
 
 	if (tx->tx_dir && asize != 0) {
 		int err = dsl_dir_tempreserve_space(tx->tx_dir, memory,
 		    asize, fsize, usize, &tx->tx_tempreserve_cookie, tx);
 		if (err)
 			return (err);
 	}
 
 	DMU_TX_STAT_BUMP(dmu_tx_assigned);
 
 	return (0);
 }
 
 static void
 dmu_tx_unassign(dmu_tx_t *tx)
 {
 	dmu_tx_hold_t *txh;
 
 	if (tx->tx_txg == 0)
 		return;
 
 	txg_rele_to_quiesce(&tx->tx_txgh);
 
 	for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh;
 	    txh = list_next(&tx->tx_holds, txh)) {
 		dnode_t *dn = txh->txh_dnode;
 
 		if (dn == NULL)
 			continue;
 		mutex_enter(&dn->dn_mtx);
 		ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
 
 		if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
 			dn->dn_assigned_txg = 0;
 			cv_broadcast(&dn->dn_notxholds);
 		}
 		mutex_exit(&dn->dn_mtx);
 	}
 
 	txg_rele_to_sync(&tx->tx_txgh);
 
 	tx->tx_lasttried_txg = tx->tx_txg;
 	tx->tx_txg = 0;
 }
 
 /*
  * Assign tx to a transaction group.  txg_how can be one of:
  *
  * (1)	TXG_WAIT.  If the current open txg is full, waits until there's
  *	a new one.  This should be used when you're not holding locks.
  *	If will only fail if we're truly out of space (or over quota).
  *
  * (2)	TXG_NOWAIT.  If we can't assign into the current open txg without
  *	blocking, returns immediately with ERESTART.  This should be used
  *	whenever you're holding locks.  On an ERESTART error, the caller
  *	should drop locks, do a dmu_tx_wait(tx), and try again.
  *
  * (3)	A specific txg.  Use this if you need to ensure that multiple
  *	transactions all sync in the same txg.  Like TXG_NOWAIT, it
  *	returns ERESTART if it can't assign you into the requested txg.
  */
 int
 dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
 {
 	hrtime_t before, after;
 	int err;
 
 	ASSERT(tx->tx_txg == 0);
 	ASSERT(txg_how != 0);
 	ASSERT(!dsl_pool_sync_context(tx->tx_pool));
 
 	before = gethrtime();
 
 	while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
 		dmu_tx_unassign(tx);
 
 		if (err != ERESTART || txg_how != TXG_WAIT)
 			return (err);
 
 		dmu_tx_wait(tx);
 	}
 
 	txg_rele_to_quiesce(&tx->tx_txgh);
 
 	after = gethrtime();
 
 	dsl_pool_tx_assign_add_usecs(tx->tx_pool,
 	    (after - before) / NSEC_PER_USEC);
 
 	return (0);
 }
 
 void
 dmu_tx_wait(dmu_tx_t *tx)
 {
 	spa_t *spa = tx->tx_pool->dp_spa;
 
 	ASSERT(tx->tx_txg == 0);
 
 	/*
 	 * It's possible that the pool has become active after this thread
 	 * has tried to obtain a tx. If that's the case then his
 	 * tx_lasttried_txg would not have been assigned.
 	 */
 	if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
 		txg_wait_synced(tx->tx_pool, spa_last_synced_txg(spa) + 1);
 	} else if (tx->tx_needassign_txh) {
 		dnode_t *dn = tx->tx_needassign_txh->txh_dnode;
 
 		mutex_enter(&dn->dn_mtx);
 		while (dn->dn_assigned_txg == tx->tx_lasttried_txg - 1)
 			cv_wait(&dn->dn_notxholds, &dn->dn_mtx);
 		mutex_exit(&dn->dn_mtx);
 		tx->tx_needassign_txh = NULL;
 	} else {
 		txg_wait_open(tx->tx_pool, tx->tx_lasttried_txg + 1);
 	}
 }
 
 void
 dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta)
 {
 #ifdef DEBUG_DMU_TX
 	if (tx->tx_dir == NULL || delta == 0)
 		return;
 
 	if (delta > 0) {
 		ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=,
 		    tx->tx_space_towrite);
 		(void) refcount_add_many(&tx->tx_space_written, delta, NULL);
 	} else {
 		(void) refcount_add_many(&tx->tx_space_freed, -delta, NULL);
 	}
 #endif
 }
 
 void
 dmu_tx_commit(dmu_tx_t *tx)
 {
 	dmu_tx_hold_t *txh;
 
 	ASSERT(tx->tx_txg != 0);
 
 	while ((txh = list_head(&tx->tx_holds))) {
 		dnode_t *dn = txh->txh_dnode;
 
 		list_remove(&tx->tx_holds, txh);
 		kmem_free(txh, sizeof (dmu_tx_hold_t));
 		if (dn == NULL)
 			continue;
 		mutex_enter(&dn->dn_mtx);
 		ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
 
 		if (refcount_remove(&dn->dn_tx_holds, tx) == 0) {
 			dn->dn_assigned_txg = 0;
 			cv_broadcast(&dn->dn_notxholds);
 		}
 		mutex_exit(&dn->dn_mtx);
 		dnode_rele(dn, tx);
 	}
 
 	if (tx->tx_tempreserve_cookie)
 		dsl_dir_tempreserve_clear(tx->tx_tempreserve_cookie, tx);
 
 	if (!list_is_empty(&tx->tx_callbacks))
 		txg_register_callbacks(&tx->tx_txgh, &tx->tx_callbacks);
 
 	if (tx->tx_anyobj == FALSE)
 		txg_rele_to_sync(&tx->tx_txgh);
 
 	list_destroy(&tx->tx_callbacks);
 	list_destroy(&tx->tx_holds);
 #ifdef DEBUG_DMU_TX
 	dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
 	    tx->tx_space_towrite, refcount_count(&tx->tx_space_written),
 	    tx->tx_space_tofree, refcount_count(&tx->tx_space_freed));
 	refcount_destroy_many(&tx->tx_space_written,
 	    refcount_count(&tx->tx_space_written));
 	refcount_destroy_many(&tx->tx_space_freed,
 	    refcount_count(&tx->tx_space_freed));
 #endif
 	kmem_free(tx, sizeof (dmu_tx_t));
 }
 
 void
 dmu_tx_abort(dmu_tx_t *tx)
 {
 	dmu_tx_hold_t *txh;
 
 	ASSERT(tx->tx_txg == 0);
 
 	while ((txh = list_head(&tx->tx_holds))) {
 		dnode_t *dn = txh->txh_dnode;
 
 		list_remove(&tx->tx_holds, txh);
 		kmem_free(txh, sizeof (dmu_tx_hold_t));
 		if (dn != NULL)
 			dnode_rele(dn, tx);
 	}
 
 	/*
 	 * Call any registered callbacks with an error code.
 	 */
 	if (!list_is_empty(&tx->tx_callbacks))
 		dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED);
 
 	list_destroy(&tx->tx_callbacks);
 	list_destroy(&tx->tx_holds);
 #ifdef DEBUG_DMU_TX
 	refcount_destroy_many(&tx->tx_space_written,
 	    refcount_count(&tx->tx_space_written));
 	refcount_destroy_many(&tx->tx_space_freed,
 	    refcount_count(&tx->tx_space_freed));
 #endif
 	kmem_free(tx, sizeof (dmu_tx_t));
 }
 
 uint64_t
 dmu_tx_get_txg(dmu_tx_t *tx)
 {
 	ASSERT(tx->tx_txg != 0);
 	return (tx->tx_txg);
 }
 
 void
 dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
 {
 	dmu_tx_callback_t *dcb;
 
 	dcb = kmem_alloc(sizeof (dmu_tx_callback_t), KM_PUSHPAGE);
 
 	dcb->dcb_func = func;
 	dcb->dcb_data = data;
 
 	list_insert_tail(&tx->tx_callbacks, dcb);
 }
 
 /*
  * Call all the commit callbacks on a list, with a given error code.
  */
 void
 dmu_tx_do_callbacks(list_t *cb_list, int error)
 {
 	dmu_tx_callback_t *dcb;
 
 	while ((dcb = list_head(cb_list))) {
 		list_remove(cb_list, dcb);
 		dcb->dcb_func(dcb->dcb_data, error);
 		kmem_free(dcb, sizeof (dmu_tx_callback_t));
 	}
 }
 
 /*
  * Interface to hold a bunch of attributes.
  * used for creating new files.
  * attrsize is the total size of all attributes
  * to be added during object creation
  *
  * For updating/adding a single attribute dmu_tx_hold_sa() should be used.
  */
 
 /*
  * hold necessary attribute name for attribute registration.
  * should be a very rare case where this is needed.  If it does
  * happen it would only happen on the first write to the file system.
  */
 static void
 dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx)
 {
 	int i;
 
 	if (!sa->sa_need_attr_registration)
 		return;
 
 	for (i = 0; i != sa->sa_num_attrs; i++) {
 		if (!sa->sa_attr_table[i].sa_registered) {
 			if (sa->sa_reg_attr_obj)
 				dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj,
 				    B_TRUE, sa->sa_attr_table[i].sa_name);
 			else
 				dmu_tx_hold_zap(tx, DMU_NEW_OBJECT,
 				    B_TRUE, sa->sa_attr_table[i].sa_name);
 		}
 	}
 }
 
 
 void
 dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
 {
 	dnode_t *dn;
 	dmu_tx_hold_t *txh;
 
 	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
 	    THT_SPILL, 0, 0);
 	if (txh == NULL)
 		return;
 
 	dn = txh->txh_dnode;
 
 	if (dn == NULL)
 		return;
 
 	/* If blkptr doesn't exist then add space to towrite */
 	if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
 		txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
 	} else {
 		blkptr_t *bp;
 
 		bp = &dn->dn_phys->dn_spill;
 		if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
 		    bp, bp->blk_birth))
 			txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
 		else
 			txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
 		if (!BP_IS_HOLE(bp))
 			txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
 	}
 }
 
 void
 dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize)
 {
 	sa_os_t *sa = tx->tx_objset->os_sa;
 
 	dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
 
 	if (tx->tx_objset->os_sa->sa_master_obj == 0)
 		return;
 
 	if (tx->tx_objset->os_sa->sa_layout_attr_obj)
 		dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
 	else {
 		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
 		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
 	}
 
 	dmu_tx_sa_registration_hold(sa, tx);
 
 	if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill)
 		return;
 
 	(void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT,
 	    THT_SPILL, 0, 0);
 }
 
 /*
  * Hold SA attribute
  *
  * dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *, attribute, add, size)
  *
  * variable_size is the total size of all variable sized attributes
  * passed to this function.  It is not the total size of all
  * variable size attributes that *may* exist on this object.
  */
 void
 dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow)
 {
 	uint64_t object;
 	sa_os_t *sa = tx->tx_objset->os_sa;
 
 	ASSERT(hdl != NULL);
 
 	object = sa_handle_object(hdl);
 
 	dmu_tx_hold_bonus(tx, object);
 
 	if (tx->tx_objset->os_sa->sa_master_obj == 0)
 		return;
 
 	if (tx->tx_objset->os_sa->sa_reg_attr_obj == 0 ||
 	    tx->tx_objset->os_sa->sa_layout_attr_obj == 0) {
 		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
 		dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
 	}
 
 	dmu_tx_sa_registration_hold(sa, tx);
 
 	if (may_grow && tx->tx_objset->os_sa->sa_layout_attr_obj)
 		dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
 
 	if (sa->sa_force_spill || may_grow || hdl->sa_spill) {
 		ASSERT(tx->tx_txg == 0);
 		dmu_tx_hold_spill(tx, object);
 	} else {
 		dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
 		dnode_t *dn;
 
 		DB_DNODE_ENTER(db);
 		dn = DB_DNODE(db);
 		if (dn->dn_have_spill) {
 			ASSERT(tx->tx_txg == 0);
 			dmu_tx_hold_spill(tx, object);
 		}
 		DB_DNODE_EXIT(db);
 	}
 }
 
 void
 dmu_tx_init(void)
 {
 	dmu_tx_ksp = kstat_create("zfs", 0, "dmu_tx", "misc",
 	    KSTAT_TYPE_NAMED, sizeof (dmu_tx_stats) / sizeof (kstat_named_t),
 	    KSTAT_FLAG_VIRTUAL);
 
 	if (dmu_tx_ksp != NULL) {
 		dmu_tx_ksp->ks_data = &dmu_tx_stats;
 		kstat_install(dmu_tx_ksp);
 	}
 }
 
 void
 dmu_tx_fini(void)
 {
 	if (dmu_tx_ksp != NULL) {
 		kstat_delete(dmu_tx_ksp);
 		dmu_tx_ksp = NULL;
 	}
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dmu_tx_create);
 EXPORT_SYMBOL(dmu_tx_hold_write);
 EXPORT_SYMBOL(dmu_tx_hold_free);
 EXPORT_SYMBOL(dmu_tx_hold_zap);
 EXPORT_SYMBOL(dmu_tx_hold_bonus);
 EXPORT_SYMBOL(dmu_tx_abort);
 EXPORT_SYMBOL(dmu_tx_assign);
 EXPORT_SYMBOL(dmu_tx_wait);
 EXPORT_SYMBOL(dmu_tx_commit);
 EXPORT_SYMBOL(dmu_tx_get_txg);
 EXPORT_SYMBOL(dmu_tx_callback_register);
 EXPORT_SYMBOL(dmu_tx_do_callbacks);
 EXPORT_SYMBOL(dmu_tx_hold_spill);
 EXPORT_SYMBOL(dmu_tx_hold_sa_create);
 EXPORT_SYMBOL(dmu_tx_hold_sa);
 #endif
diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c
index d9e8bd30fd14..2eca2b2044e2 100644
--- a/module/zfs/dsl_dataset.c
+++ b/module/zfs/dsl_dataset.c
@@ -1,4367 +1,4358 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  */
 
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/arc.h>
 #include <sys/zio.h>
 #include <sys/zap.h>
 #include <sys/zfeature.h>
 #include <sys/unique.h>
 #include <sys/zfs_context.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/spa.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/dsl_deadlist.h>
 
 static char *dsl_reaper = "the grim reaper";
 
 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
 
 #define	SWITCH64(x, y) \
 	{ \
 		uint64_t __tmp = (x); \
 		(x) = (y); \
 		(y) = __tmp; \
 	}
 
 #define	DS_REF_MAX	(1ULL << 62)
 
 #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
 
 #define	DSL_DATASET_IS_DESTROYED(ds)	((ds)->ds_owner == dsl_reaper)
 
 
 /*
  * Figure out how much of this delta should be propogated to the dsl_dir
  * layer.  If there's a refreservation, that space has already been
  * partially accounted for in our ancestors.
  */
 static int64_t
 parent_delta(dsl_dataset_t *ds, int64_t delta)
 {
 	uint64_t old_bytes, new_bytes;
 
 	if (ds->ds_reserved == 0)
 		return (delta);
 
 	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
 
 	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
 	return (new_bytes - old_bytes);
 }
 
 void
 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	int used, compressed, uncompressed;
 	int64_t delta;
 
 	used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
 	compressed = BP_GET_PSIZE(bp);
 	uncompressed = BP_GET_UCSIZE(bp);
 
 	dprintf_bp(bp, "ds=%p", ds);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	/* It could have been compressed away to nothing */
 	if (BP_IS_HOLE(bp))
 		return;
 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
 	ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
 	if (ds == NULL) {
 		dsl_pool_mos_diduse_space(tx->tx_pool,
 		    used, compressed, uncompressed);
 		return;
 	}
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
 	mutex_enter(&ds->ds_dir->dd_lock);
 	mutex_enter(&ds->ds_lock);
 	delta = parent_delta(ds, used);
 	ds->ds_phys->ds_referenced_bytes += used;
 	ds->ds_phys->ds_compressed_bytes += compressed;
 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
 	ds->ds_phys->ds_unique_bytes += used;
 	mutex_exit(&ds->ds_lock);
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
 	    compressed, uncompressed, tx);
 	dsl_dir_transfer_space(ds->ds_dir, used - delta,
 	    DD_USED_REFRSRV, DD_USED_HEAD, tx);
 	mutex_exit(&ds->ds_dir->dd_lock);
 }
 
 int
 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
     boolean_t async)
 {
 	int used, compressed, uncompressed;
 
 	if (BP_IS_HOLE(bp))
 		return (0);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(bp->blk_birth <= tx->tx_txg);
 
 	used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
 	compressed = BP_GET_PSIZE(bp);
 	uncompressed = BP_GET_UCSIZE(bp);
 
 	ASSERT(used > 0);
 	if (ds == NULL) {
 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
 		dsl_pool_mos_diduse_space(tx->tx_pool,
 		    -used, -compressed, -uncompressed);
 		return (used);
 	}
 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
 
 	ASSERT(!dsl_dataset_is_snapshot(ds));
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
 		int64_t delta;
 
 		dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
 		dsl_free(tx->tx_pool, tx->tx_txg, bp);
 
 		mutex_enter(&ds->ds_dir->dd_lock);
 		mutex_enter(&ds->ds_lock);
 		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
 		    !DS_UNIQUE_IS_ACCURATE(ds));
 		delta = parent_delta(ds, -used);
 		ds->ds_phys->ds_unique_bytes -= used;
 		mutex_exit(&ds->ds_lock);
 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 		    delta, -compressed, -uncompressed, tx);
 		dsl_dir_transfer_space(ds->ds_dir, -used - delta,
 		    DD_USED_REFRSRV, DD_USED_HEAD, tx);
 		mutex_exit(&ds->ds_dir->dd_lock);
 	} else {
 		dprintf_bp(bp, "putting on dead list: %s", "");
 		if (async) {
 			/*
 			 * We are here as part of zio's write done callback,
 			 * which means we're a zio interrupt thread.  We can't
 			 * call dsl_deadlist_insert() now because it may block
 			 * waiting for I/O.  Instead, put bp on the deferred
 			 * queue and let dsl_pool_sync() finish the job.
 			 */
 			bplist_append(&ds->ds_pending_deadlist, bp);
 		} else {
 			dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
 		}
 		ASSERT3U(ds->ds_prev->ds_object, ==,
 		    ds->ds_phys->ds_prev_snap_obj);
 		ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
 		if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
 		    ds->ds_object && bp->blk_birth >
 		    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 			mutex_enter(&ds->ds_prev->ds_lock);
 			ds->ds_prev->ds_phys->ds_unique_bytes += used;
 			mutex_exit(&ds->ds_prev->ds_lock);
 		}
 		if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
 			dsl_dir_transfer_space(ds->ds_dir, used,
 			    DD_USED_HEAD, DD_USED_SNAP, tx);
 		}
 	}
 	mutex_enter(&ds->ds_lock);
 	ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
 	ds->ds_phys->ds_referenced_bytes -= used;
 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
 	ds->ds_phys->ds_compressed_bytes -= compressed;
 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
 	mutex_exit(&ds->ds_lock);
 
 	return (used);
 }
 
 uint64_t
 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
 {
 	uint64_t trysnap = 0;
 
 	if (ds == NULL)
 		return (0);
 	/*
 	 * The snapshot creation could fail, but that would cause an
 	 * incorrect FALSE return, which would only result in an
 	 * overestimation of the amount of space that an operation would
 	 * consume, which is OK.
 	 *
 	 * There's also a small window where we could miss a pending
 	 * snapshot, because we could set the sync task in the quiescing
 	 * phase.  So this should only be used as a guess.
 	 */
 	if (ds->ds_trysnap_txg >
 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
 		trysnap = ds->ds_trysnap_txg;
 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
 }
 
 boolean_t
 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
     uint64_t blk_birth)
 {
 	if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
 		return (B_FALSE);
 
 	ddt_prefetch(dsl_dataset_get_spa(ds), bp);
 
 	return (B_TRUE);
 }
 
 /* ARGSUSED */
 static void
 dsl_dataset_evict(dmu_buf_t *db, void *dsv)
 {
 	dsl_dataset_t *ds = dsv;
 
 	ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
 
 	unique_remove(ds->ds_fsid_guid);
 
 	if (ds->ds_objset != NULL)
 		dmu_objset_evict(ds->ds_objset);
 
 	if (ds->ds_prev) {
 		dsl_dataset_drop_ref(ds->ds_prev, ds);
 		ds->ds_prev = NULL;
 	}
 
 	bplist_destroy(&ds->ds_pending_deadlist);
 	if (db != NULL) {
 		dsl_deadlist_close(&ds->ds_deadlist);
 	} else {
 		ASSERT(ds->ds_deadlist.dl_dbuf == NULL);
 		ASSERT(!ds->ds_deadlist.dl_oldfmt);
 	}
 	if (ds->ds_dir)
 		dsl_dir_close(ds->ds_dir, ds);
 
 	ASSERT(!list_link_active(&ds->ds_synced_link));
 
 	mutex_destroy(&ds->ds_lock);
 	mutex_destroy(&ds->ds_recvlock);
 	mutex_destroy(&ds->ds_opening_lock);
 	rw_destroy(&ds->ds_rwlock);
 	cv_destroy(&ds->ds_exclusive_cv);
 
 	kmem_free(ds, sizeof (dsl_dataset_t));
 }
 
 static int
 dsl_dataset_get_snapname(dsl_dataset_t *ds)
 {
 	dsl_dataset_phys_t *headphys;
 	int err;
 	dmu_buf_t *headdbuf;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 
 	if (ds->ds_snapname[0])
 		return (0);
 	if (ds->ds_phys->ds_next_snap_obj == 0)
 		return (0);
 
 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
 	    FTAG, &headdbuf);
 	if (err)
 		return (err);
 	headphys = headdbuf->db_data;
 	err = zap_value_search(dp->dp_meta_objset,
 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
 	dmu_buf_rele(headdbuf, FTAG);
 	return (err);
 }
 
 int
 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 	matchtype_t mt;
 	int err;
 
 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 		mt = MT_FIRST;
 	else
 		mt = MT_EXACT;
 
 	err = zap_lookup_norm(mos, snapobj, name, 8, 1,
 	    value, mt, NULL, 0, NULL);
 	if (err == ENOTSUP && mt == MT_FIRST)
 		err = zap_lookup(mos, snapobj, name, 8, 1, value);
 	return (err);
 }
 
 static int
 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 	matchtype_t mt;
 	int err;
 
 	dsl_dir_snap_cmtime_update(ds->ds_dir);
 
 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 		mt = MT_FIRST;
 	else
 		mt = MT_EXACT;
 
 	err = zap_remove_norm(mos, snapobj, name, mt, tx);
 	if (err == ENOTSUP && mt == MT_FIRST)
 		err = zap_remove(mos, snapobj, name, tx);
 	return (err);
 }
 
 static int
 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
     dsl_dataset_t **dsp)
 {
 	objset_t *mos = dp->dp_meta_objset;
 	dmu_buf_t *dbuf;
 	dsl_dataset_t *ds;
 	int err;
 	dmu_object_info_t doi;
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 	    dsl_pool_sync_context(dp));
 
 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 	if (err)
 		return (err);
 
 	/* Make sure dsobj has the correct object type. */
 	dmu_object_info_from_db(dbuf, &doi);
 	if (doi.doi_type != DMU_OT_DSL_DATASET)
 		return (EINVAL);
 
 	ds = dmu_buf_get_user(dbuf);
 	if (ds == NULL) {
 		dsl_dataset_t *winner = NULL;
 
 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_PUSHPAGE);
 		ds->ds_dbuf = dbuf;
 		ds->ds_object = dsobj;
 		ds->ds_phys = dbuf->db_data;
 		list_link_init(&ds->ds_synced_link);
 
 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
 
 		rw_init(&ds->ds_rwlock, NULL, RW_DEFAULT, NULL);
 		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
 
 		bplist_create(&ds->ds_pending_deadlist);
 		dsl_deadlist_open(&ds->ds_deadlist,
 		    mos, ds->ds_phys->ds_deadlist_obj);
 
 		list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
 		    offsetof(dmu_sendarg_t, dsa_link));
 
 		if (err == 0) {
 			err = dsl_dir_open_obj(dp,
 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
 		}
 		if (err) {
 			mutex_destroy(&ds->ds_lock);
 			mutex_destroy(&ds->ds_recvlock);
 			mutex_destroy(&ds->ds_opening_lock);
 			rw_destroy(&ds->ds_rwlock);
 			cv_destroy(&ds->ds_exclusive_cv);
 			bplist_destroy(&ds->ds_pending_deadlist);
 			dsl_deadlist_close(&ds->ds_deadlist);
 			kmem_free(ds, sizeof (dsl_dataset_t));
 			dmu_buf_rele(dbuf, tag);
 			return (err);
 		}
 
 		if (!dsl_dataset_is_snapshot(ds)) {
 			ds->ds_snapname[0] = '\0';
 			if (ds->ds_phys->ds_prev_snap_obj) {
 				err = dsl_dataset_get_ref(dp,
 				    ds->ds_phys->ds_prev_snap_obj,
 				    ds, &ds->ds_prev);
 			}
 		} else {
 			if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
 				err = dsl_dataset_get_snapname(ds);
 			if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
 				err = zap_count(
 				    ds->ds_dir->dd_pool->dp_meta_objset,
 				    ds->ds_phys->ds_userrefs_obj,
 				    &ds->ds_userrefs);
 			}
 		}
 
 		if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
 			/*
 			 * In sync context, we're called with either no lock
 			 * or with the write lock.  If we're not syncing,
 			 * we're always called with the read lock held.
 			 */
 			boolean_t need_lock =
 			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
 			    dsl_pool_sync_context(dp);
 
 			if (need_lock)
 				rw_enter(&dp->dp_config_rwlock, RW_READER);
 
 			err = dsl_prop_get_ds(ds,
 			    "refreservation", sizeof (uint64_t), 1,
 			    &ds->ds_reserved, NULL);
 			if (err == 0) {
 				err = dsl_prop_get_ds(ds,
 				    "refquota", sizeof (uint64_t), 1,
 				    &ds->ds_quota, NULL);
 			}
 
 			if (need_lock)
 				rw_exit(&dp->dp_config_rwlock);
 		} else {
 			ds->ds_reserved = ds->ds_quota = 0;
 		}
 
 		if (err == 0) {
 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
 			    dsl_dataset_evict);
 		}
 		if (err || winner) {
 			bplist_destroy(&ds->ds_pending_deadlist);
 			dsl_deadlist_close(&ds->ds_deadlist);
 			if (ds->ds_prev)
 				dsl_dataset_drop_ref(ds->ds_prev, ds);
 			dsl_dir_close(ds->ds_dir, ds);
 			mutex_destroy(&ds->ds_lock);
 			mutex_destroy(&ds->ds_recvlock);
 			mutex_destroy(&ds->ds_opening_lock);
 			rw_destroy(&ds->ds_rwlock);
 			cv_destroy(&ds->ds_exclusive_cv);
 			kmem_free(ds, sizeof (dsl_dataset_t));
 			if (err) {
 				dmu_buf_rele(dbuf, tag);
 				return (err);
 			}
 			ds = winner;
 		} else {
 			ds->ds_fsid_guid =
 			    unique_insert(ds->ds_phys->ds_fsid_guid);
 		}
 	}
 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
 	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
 	    spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
 	    dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
 	mutex_enter(&ds->ds_lock);
 	if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
 		mutex_exit(&ds->ds_lock);
 		dmu_buf_rele(ds->ds_dbuf, tag);
 		return (ENOENT);
 	}
 	mutex_exit(&ds->ds_lock);
 	*dsp = ds;
 	return (0);
 }
 
 static int
 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
 	/*
 	 * In syncing context we don't want the rwlock lock: there
 	 * may be an existing writer waiting for sync phase to
 	 * finish.  We don't need to worry about such writers, since
 	 * sync phase is single-threaded, so the writer can't be
 	 * doing anything while we are active.
 	 */
 	if (dsl_pool_sync_context(dp)) {
 		ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
 		return (0);
 	}
 
 	/*
 	 * Normal users will hold the ds_rwlock as a READER until they
 	 * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
 	 * drop their READER lock after they set the ds_owner field.
 	 *
 	 * If the dataset is being destroyed, the destroy thread will
 	 * obtain a WRITER lock for exclusive access after it's done its
 	 * open-context work and then change the ds_owner to
 	 * dsl_reaper once destruction is assured.  So threads
 	 * may block here temporarily, until the "destructability" of
 	 * the dataset is determined.
 	 */
 	ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
 	mutex_enter(&ds->ds_lock);
 	while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
 		rw_exit(&dp->dp_config_rwlock);
 		cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
 		if (DSL_DATASET_IS_DESTROYED(ds)) {
 			mutex_exit(&ds->ds_lock);
 			dsl_dataset_drop_ref(ds, tag);
 			rw_enter(&dp->dp_config_rwlock, RW_READER);
 			return (ENOENT);
 		}
 		/*
 		 * The dp_config_rwlock lives above the ds_lock. And
 		 * we need to check DSL_DATASET_IS_DESTROYED() while
 		 * holding the ds_lock, so we have to drop and reacquire
 		 * the ds_lock here.
 		 */
 		mutex_exit(&ds->ds_lock);
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		mutex_enter(&ds->ds_lock);
 	}
 	mutex_exit(&ds->ds_lock);
 	return (0);
 }
 
 int
 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
     dsl_dataset_t **dsp)
 {
 	int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
 
 	if (err)
 		return (err);
 	return (dsl_dataset_hold_ref(*dsp, tag));
 }
 
 int
 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
     void *tag, dsl_dataset_t **dsp)
 {
 	int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
 	if (err)
 		return (err);
 	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
 		dsl_dataset_rele(*dsp, tag);
 		*dsp = NULL;
 		return (EBUSY);
 	}
 	return (0);
 }
 
 int
 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
 {
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	const char *snapname;
 	uint64_t obj;
 	int err = 0;
 
 	err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
 	if (err)
 		return (err);
 
 	dp = dd->dd_pool;
 	obj = dd->dd_phys->dd_head_dataset_obj;
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	if (obj)
 		err = dsl_dataset_get_ref(dp, obj, tag, dsp);
 	else
 		err = ENOENT;
 	if (err)
 		goto out;
 
 	err = dsl_dataset_hold_ref(*dsp, tag);
 
 	/* we may be looking for a snapshot */
 	if (err == 0 && snapname != NULL) {
 		dsl_dataset_t *ds = NULL;
 
 		if (*snapname++ != '@') {
 			dsl_dataset_rele(*dsp, tag);
 			err = ENOENT;
 			goto out;
 		}
 
 		dprintf("looking for snapshot '%s'\n", snapname);
 		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
 		if (err == 0)
 			err = dsl_dataset_get_ref(dp, obj, tag, &ds);
 		dsl_dataset_rele(*dsp, tag);
 
 		ASSERT3U((err == 0), ==, (ds != NULL));
 
 		if (ds) {
 			mutex_enter(&ds->ds_lock);
 			if (ds->ds_snapname[0] == 0)
 				(void) strlcpy(ds->ds_snapname, snapname,
 				    sizeof (ds->ds_snapname));
 			mutex_exit(&ds->ds_lock);
 			err = dsl_dataset_hold_ref(ds, tag);
 			*dsp = err ? NULL : ds;
 		}
 	}
 out:
 	rw_exit(&dp->dp_config_rwlock);
 	dsl_dir_close(dd, FTAG);
 	return (err);
 }
 
 int
 dsl_dataset_own(const char *name, boolean_t inconsistentok,
     void *tag, dsl_dataset_t **dsp)
 {
 	int err = dsl_dataset_hold(name, tag, dsp);
 	if (err)
 		return (err);
 	if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
 		dsl_dataset_rele(*dsp, tag);
 		return (EBUSY);
 	}
 	return (0);
 }
 
 void
 dsl_dataset_name(dsl_dataset_t *ds, char *name)
 {
 	if (ds == NULL) {
 		(void) strcpy(name, "mos");
 	} else {
 		dsl_dir_name(ds->ds_dir, name);
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 		if (ds->ds_snapname[0]) {
 			(void) strcat(name, "@");
 			/*
 			 * We use a "recursive" mutex so that we
 			 * can call dprintf_ds() with ds_lock held.
 			 */
 			if (!MUTEX_HELD(&ds->ds_lock)) {
 				mutex_enter(&ds->ds_lock);
 				(void) strcat(name, ds->ds_snapname);
 				mutex_exit(&ds->ds_lock);
 			} else {
 				(void) strcat(name, ds->ds_snapname);
 			}
 		}
 	}
 }
 
 static int
 dsl_dataset_namelen(dsl_dataset_t *ds)
 {
 	int result;
 
 	if (ds == NULL) {
 		result = 3;	/* "mos" */
 	} else {
 		result = dsl_dir_namelen(ds->ds_dir);
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 		if (ds->ds_snapname[0]) {
 			++result;	/* adding one for the @-sign */
 			if (!MUTEX_HELD(&ds->ds_lock)) {
 				mutex_enter(&ds->ds_lock);
 				result += strlen(ds->ds_snapname);
 				mutex_exit(&ds->ds_lock);
 			} else {
 				result += strlen(ds->ds_snapname);
 			}
 		}
 	}
 
 	return (result);
 }
 
 void
 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
 {
 	dmu_buf_rele(ds->ds_dbuf, tag);
 }
 
 void
 dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
 {
 	if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
 		rw_exit(&ds->ds_rwlock);
 	}
 	dsl_dataset_drop_ref(ds, tag);
 }
 
 void
 dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
 {
 	ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
 	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
 
 	mutex_enter(&ds->ds_lock);
 	ds->ds_owner = NULL;
 	if (RW_WRITE_HELD(&ds->ds_rwlock)) {
 		rw_exit(&ds->ds_rwlock);
 		cv_broadcast(&ds->ds_exclusive_cv);
 	}
 	mutex_exit(&ds->ds_lock);
 	if (ds->ds_dbuf)
 		dsl_dataset_drop_ref(ds, tag);
 	else
 		dsl_dataset_evict(NULL, ds);
 }
 
 boolean_t
 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
 {
 	boolean_t gotit = FALSE;
 
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_owner == NULL &&
 	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
 		ds->ds_owner = tag;
 		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
 			rw_exit(&ds->ds_rwlock);
 		gotit = TRUE;
 	}
 	mutex_exit(&ds->ds_lock);
 	return (gotit);
 }
 
 void
 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
 {
 	ASSERT3P(owner, ==, ds->ds_owner);
 	if (!RW_WRITE_HELD(&ds->ds_rwlock))
 		rw_enter(&ds->ds_rwlock, RW_WRITER);
 }
 
 uint64_t
 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
     uint64_t flags, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = dd->dd_pool;
 	dmu_buf_t *dbuf;
 	dsl_dataset_phys_t *dsphys;
 	uint64_t dsobj;
 	objset_t *mos = dp->dp_meta_objset;
 
 	if (origin == NULL)
 		origin = dp->dp_origin_snap;
 
 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
 
 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	dsphys = dbuf->db_data;
 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
 	dsphys->ds_dir_obj = dd->dd_object;
 	dsphys->ds_flags = flags;
 	dsphys->ds_fsid_guid = unique_create();
 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
 	    sizeof (dsphys->ds_guid));
 	dsphys->ds_snapnames_zapobj =
 	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
 	    DMU_OT_NONE, 0, tx);
 	dsphys->ds_creation_time = gethrestime_sec();
 	dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
 
 	if (origin == NULL) {
 		dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
 	} else {
 		dsl_dataset_t *ohds;
 
 		dsphys->ds_prev_snap_obj = origin->ds_object;
 		dsphys->ds_prev_snap_txg =
 		    origin->ds_phys->ds_creation_txg;
 		dsphys->ds_referenced_bytes =
 		    origin->ds_phys->ds_referenced_bytes;
 		dsphys->ds_compressed_bytes =
 		    origin->ds_phys->ds_compressed_bytes;
 		dsphys->ds_uncompressed_bytes =
 		    origin->ds_phys->ds_uncompressed_bytes;
 		dsphys->ds_bp = origin->ds_phys->ds_bp;
 		dsphys->ds_flags |= origin->ds_phys->ds_flags;
 
 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
 		origin->ds_phys->ds_num_children++;
 
 		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
 		    origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
 		dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
 		    dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
 		dsl_dataset_rele(ohds, FTAG);
 
 		if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
 			if (origin->ds_phys->ds_next_clones_obj == 0) {
 				origin->ds_phys->ds_next_clones_obj =
 				    zap_create(mos,
 				    DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
 			}
 			VERIFY(0 == zap_add_int(mos,
 			    origin->ds_phys->ds_next_clones_obj,
 			    dsobj, tx));
 		}
 
 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
 		dd->dd_phys->dd_origin_obj = origin->ds_object;
 		if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 			if (origin->ds_dir->dd_phys->dd_clones == 0) {
 				dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
 				origin->ds_dir->dd_phys->dd_clones =
 				    zap_create(mos,
 				    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 			}
 			VERIFY3U(0, ==, zap_add_int(mos,
 			    origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
 		}
 	}
 
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 
 	dmu_buf_rele(dbuf, FTAG);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	dd->dd_phys->dd_head_dataset_obj = dsobj;
 
 	return (dsobj);
 }
 
 uint64_t
 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
     dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = pdd->dd_pool;
 	uint64_t dsobj, ddobj;
 	dsl_dir_t *dd;
 
 	ASSERT(lastname[0] != '@');
 
 	ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
 
 	dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
 
 	dsl_deleg_set_create_perms(dd, tx, cr);
 
 	dsl_dir_close(dd, FTAG);
 
 	/*
 	 * If we are creating a clone, make sure we zero out any stale
 	 * data from the origin snapshots zil header.
 	 */
 	if (origin != NULL) {
 		dsl_dataset_t *ds;
 		objset_t *os;
 
 		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 		VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
 		bzero(&os->os_zil_header, sizeof (os->os_zil_header));
 		dsl_dataset_dirty(ds, tx);
 		dsl_dataset_rele(ds, FTAG);
 	}
 
 	return (dsobj);
 }
 
 /*
  * The snapshots must all be in the same pool.
  */
 int
-dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
+dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer,
+    nvlist_t *errlist)
 {
 	int err;
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	nvpair_t *pair;
 	dsl_sync_task_group_t *dstg;
 
 	pair = nvlist_next_nvpair(snaps, NULL);
 	if (pair == NULL)
 		return (0);
 
 	err = spa_open(nvpair_name(pair), &spa, FTAG);
 	if (err)
 		return (err);
 	dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		dsl_dataset_t *ds;
 
 		err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
 		if (err == 0) {
 			struct dsl_ds_destroyarg *dsda;
 
 			dsl_dataset_make_exclusive(ds, dstg);
 			dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
 			    KM_SLEEP);
 			dsda->ds = ds;
 			dsda->defer = defer;
 			dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
 			    dsl_dataset_destroy_sync, dsda, dstg, 0);
 		} else if (err == ENOENT) {
 			err = 0;
 		} else {
-			(void) strcpy(failed, nvpair_name(pair));
+			fnvlist_add_int32(errlist, nvpair_name(pair), err);
 			break;
 		}
 	}
 
 	if (err == 0)
 		err = dsl_sync_task_group_wait(dstg);
 
 	for (dst = list_head(&dstg->dstg_tasks); dst;
 	    dst = list_next(&dstg->dstg_tasks, dst)) {
 		struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
 		dsl_dataset_t *ds = dsda->ds;
 
 		/*
-		 * Return the file system name that triggered the error
+		 * Return the snapshots that triggered the error.
 		 */
-		if (dst->dst_err) {
-			dsl_dataset_name(ds, failed);
+		if (dst->dst_err != 0) {
+			char name[ZFS_MAXNAMELEN];
+			dsl_dataset_name(ds, name);
+			fnvlist_add_int32(errlist, name, dst->dst_err);
 		}
 		ASSERT3P(dsda->rm_origin, ==, NULL);
 		dsl_dataset_disown(ds, dstg);
 		kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
 	}
 
 	dsl_sync_task_group_destroy(dstg);
 	spa_close(spa, FTAG);
 	return (err);
 
 }
 
 static boolean_t
 dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
 {
 	boolean_t might_destroy = B_FALSE;
 
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
 	    DS_IS_DEFER_DESTROY(ds))
 		might_destroy = B_TRUE;
 	mutex_exit(&ds->ds_lock);
 
 	return (might_destroy);
 }
 
 /*
  * If we're removing a clone, and these three conditions are true:
  *	1) the clone's origin has no other children
  *	2) the clone's origin has no user references
  *	3) the clone's origin has been marked for deferred destruction
  * Then, prepare to remove the origin as part of this sync task group.
  */
 static int
 dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
 {
 	dsl_dataset_t *ds = dsda->ds;
 	dsl_dataset_t *origin = ds->ds_prev;
 
 	if (dsl_dataset_might_destroy_origin(origin)) {
 		char *name;
 		int namelen;
 		int error;
 
 		namelen = dsl_dataset_namelen(origin) + 1;
 		name = kmem_alloc(namelen, KM_SLEEP);
 		dsl_dataset_name(origin, name);
 #ifdef _KERNEL
 		error = zfs_unmount_snap(name, NULL);
 		if (error) {
 			kmem_free(name, namelen);
 			return (error);
 		}
 #endif
 		error = dsl_dataset_own(name, B_TRUE, tag, &origin);
 		kmem_free(name, namelen);
 		if (error)
 			return (error);
 		dsda->rm_origin = origin;
 		dsl_dataset_make_exclusive(origin, tag);
 	}
 
 	return (0);
 }
 
 /*
  * ds must be opened as OWNER.  On return (whether successful or not),
  * ds will be closed and caller can no longer dereference it.
  */
 int
 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
 {
 	int err;
 	dsl_sync_task_group_t *dstg;
 	objset_t *os;
 	dsl_dir_t *dd;
 	uint64_t obj;
 	struct dsl_ds_destroyarg dsda = { 0 };
-	dsl_dataset_t *dummy_ds;
 
 	dsda.ds = ds;
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		/* Destroying a snapshot is simpler */
 		dsl_dataset_make_exclusive(ds, tag);
 
 		dsda.defer = defer;
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
 		    &dsda, tag, 0);
 		ASSERT3P(dsda.rm_origin, ==, NULL);
 		goto out;
 	} else if (defer) {
 		err = EINVAL;
 		goto out;
 	}
 
 	dd = ds->ds_dir;
-	dummy_ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
-	dummy_ds->ds_dir = dd;
-	dummy_ds->ds_object = ds->ds_object;
 
 	if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
 	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
 		/*
 		 * Check for errors and mark this ds as inconsistent, in
 		 * case we crash while freeing the objects.
 		 */
 		err = dsl_sync_task_do(dd->dd_pool,
 		    dsl_dataset_destroy_begin_check,
 		    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
 		if (err)
-			goto out_free;
+			goto out;
 
 		err = dmu_objset_from_ds(ds, &os);
 		if (err)
-			goto out_free;
+			goto out;
 
 		/*
 		 * Remove all objects while in the open context so that
 		 * there is less work to do in the syncing context.
 		 */
 		for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
 		    ds->ds_phys->ds_prev_snap_txg)) {
 			/*
 			 * Ignore errors, if there is not enough disk space
 			 * we will deal with it in dsl_dataset_destroy_sync().
 			 */
 			(void) dmu_free_object(os, obj);
 		}
 		if (err != ESRCH)
-			goto out_free;
+			goto out;
 
 		/*
 		 * Sync out all in-flight IO.
 		 */
 		txg_wait_synced(dd->dd_pool, 0);
 
 		/*
 		 * If we managed to free all the objects in open
 		 * context, the user space accounting should be zero.
 		 */
 		if (ds->ds_phys->ds_bp.blk_fill == 0 &&
 		    dmu_objset_userused_enabled(os)) {
 			ASSERTV(uint64_t count);
 
 			ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
 			    &count) != 0 || count == 0);
 			ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
 			    &count) != 0 || count == 0);
 		}
 	}
 
 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
 	rw_exit(&dd->dd_pool->dp_config_rwlock);
 
 	if (err)
-		goto out_free;
+		goto out;
 
 	/*
 	 * Blow away the dsl_dir + head dataset.
 	 */
 	dsl_dataset_make_exclusive(ds, tag);
 	/*
 	 * If we're removing a clone, we might also need to remove its
 	 * origin.
 	 */
 	do {
 		dsda.need_prep = B_FALSE;
 		if (dsl_dir_is_clone(dd)) {
 			err = dsl_dataset_origin_rm_prep(&dsda, tag);
 			if (err) {
 				dsl_dir_close(dd, FTAG);
-				goto out_free;
+				goto out;
 			}
 		}
 
 		dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
 		dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
 		    dsl_dataset_destroy_sync, &dsda, tag, 0);
 		dsl_sync_task_create(dstg, dsl_dir_destroy_check,
-		    dsl_dir_destroy_sync, dummy_ds, FTAG, 0);
+		    dsl_dir_destroy_sync, dd, FTAG, 0);
 		err = dsl_sync_task_group_wait(dstg);
 		dsl_sync_task_group_destroy(dstg);
 
 		/*
 		 * We could be racing against 'zfs release' or 'zfs destroy -d'
 		 * on the origin snap, in which case we can get EBUSY if we
 		 * needed to destroy the origin snap but were not ready to
 		 * do so.
 		 */
 		if (dsda.need_prep) {
 			ASSERT(err == EBUSY);
 			ASSERT(dsl_dir_is_clone(dd));
 			ASSERT(dsda.rm_origin == NULL);
 		}
 	} while (dsda.need_prep);
 
 	if (dsda.rm_origin != NULL)
 		dsl_dataset_disown(dsda.rm_origin, tag);
 
 	/* if it is successful, dsl_dir_destroy_sync will close the dd */
 	if (err)
 		dsl_dir_close(dd, FTAG);
 
-out_free:
-	kmem_free(dummy_ds, sizeof (dsl_dataset_t));
 out:
 	dsl_dataset_disown(ds, tag);
 	return (err);
 }
 
 blkptr_t *
 dsl_dataset_get_blkptr(dsl_dataset_t *ds)
 {
 	return (&ds->ds_phys->ds_bp);
 }
 
 void
 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 	/* If it's the meta-objset, set dp_meta_rootbp */
 	if (ds == NULL) {
 		tx->tx_pool->dp_meta_rootbp = *bp;
 	} else {
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ds->ds_phys->ds_bp = *bp;
 	}
 }
 
 spa_t *
 dsl_dataset_get_spa(dsl_dataset_t *ds)
 {
 	return (ds->ds_dir->dd_pool->dp_spa);
 }
 
 void
 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp;
 
 	if (ds == NULL) /* this is the meta-objset */
 		return;
 
 	ASSERT(ds->ds_objset != NULL);
 
 	if (ds->ds_phys->ds_next_snap_obj != 0)
 		panic("dirtying snapshot!");
 
 	dp = ds->ds_dir->dd_pool;
 
 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(ds->ds_dbuf, ds);
 	}
 }
 
 boolean_t
 dsl_dataset_is_dirty(dsl_dataset_t *ds)
 {
 	int t;
 
 	for (t = 0; t < TXG_SIZE; t++) {
 		if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
 		    ds, t))
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 /*
  * The unique space in the head dataset can be calculated by subtracting
  * the space used in the most recent snapshot, that is still being used
  * in this file system, from the space currently in use.  To figure out
  * the space in the most recent snapshot still in use, we need to take
  * the total space used in the snapshot and subtract out the space that
  * has been freed up since the snapshot was taken.
  */
 static void
 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
 {
 	uint64_t mrs_used;
 	uint64_t dlused, dlcomp, dluncomp;
 
 	ASSERT(!dsl_dataset_is_snapshot(ds));
 
 	if (ds->ds_phys->ds_prev_snap_obj != 0)
 		mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
 	else
 		mrs_used = 0;
 
 	dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
 
 	ASSERT3U(dlused, <=, mrs_used);
 	ds->ds_phys->ds_unique_bytes =
 	    ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
 	    SPA_VERSION_UNIQUE_ACCURATE)
 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 }
 
 struct killarg {
 	dsl_dataset_t *ds;
 	dmu_tx_t *tx;
 };
 
 /* ARGSUSED */
 static int
 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	struct killarg *ka = arg;
 	dmu_tx_t *tx = ka->tx;
 
 	if (bp == NULL)
 		return (0);
 
 	if (zb->zb_level == ZB_ZIL_LEVEL) {
 		ASSERT(zilog != NULL);
 		/*
 		 * It's a block in the intent log.  It has no
 		 * accounting, so just free it.
 		 */
 		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
 	} else {
 		ASSERT(zilog == NULL);
 		ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
 		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
 	}
 
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t count;
 	int err;
 
 	/*
 	 * Can't delete a head dataset if there are snapshots of it.
 	 * (Except if the only snapshots are from the branch we cloned
 	 * from.)
 	 */
 	if (ds->ds_prev != NULL &&
 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
 		return (EBUSY);
 
 	/*
 	 * This is really a dsl_dir thing, but check it here so that
 	 * we'll be less likely to leave this dataset inconsistent &
 	 * nearly destroyed.
 	 */
 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
 	if (err)
 		return (err);
 	if (count != 0)
 		return (EEXIST);
 
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
-	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 
 	/* Mark it as inconsistent on-disk, in case we crash */
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 
-	spa_history_log_internal(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
-	    "dataset = %llu", ds->ds_object);
+	spa_history_log_internal_ds(ds, "destroy begin", tx, "");
 }
 
 static int
 dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
     dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = dsda->ds;
 	dsl_dataset_t *ds_prev = ds->ds_prev;
 
 	if (dsl_dataset_might_destroy_origin(ds_prev)) {
 		struct dsl_ds_destroyarg ndsda = {0};
 
 		/*
 		 * If we're not prepared to remove the origin, don't remove
 		 * the clone either.
 		 */
 		if (dsda->rm_origin == NULL) {
 			dsda->need_prep = B_TRUE;
 			return (EBUSY);
 		}
 
 		ndsda.ds = ds_prev;
 		ndsda.is_origin_rm = B_TRUE;
 		return (dsl_dataset_destroy_check(&ndsda, tag, tx));
 	}
 
 	/*
 	 * If we're not going to remove the origin after all,
 	 * undo the open context setup.
 	 */
 	if (dsda->rm_origin != NULL) {
 		dsl_dataset_disown(dsda->rm_origin, tag);
 		dsda->rm_origin = NULL;
 	}
 
 	return (0);
 }
 
 /*
  * If you add new checks here, you may need to add
  * additional checks to the "temporary" case in
  * snapshot_check() in dmu_objset.c.
  */
 /* ARGSUSED */
 int
 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	struct dsl_ds_destroyarg *dsda = arg1;
 	dsl_dataset_t *ds = dsda->ds;
 
 	/* we have an owner hold, so noone else can destroy us */
 	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
 
 	/*
 	 * Only allow deferred destroy on pools that support it.
 	 * NOTE: deferred destroy is only supported on snapshots.
 	 */
 	if (dsda->defer) {
 		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
 		    SPA_VERSION_USERREFS)
 			return (ENOTSUP);
 		ASSERT(dsl_dataset_is_snapshot(ds));
 		return (0);
 	}
 
 	/*
 	 * Can't delete a head dataset if there are snapshots of it.
 	 * (Except if the only snapshots are from the branch we cloned
 	 * from.)
 	 */
 	if (ds->ds_prev != NULL &&
 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
 		return (EBUSY);
 
 	/*
 	 * If we made changes this txg, traverse_dsl_dataset won't find
 	 * them.  Try again.
 	 */
 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
 		return (EAGAIN);
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		/*
 		 * If this snapshot has an elevated user reference count,
 		 * we can't destroy it yet.
 		 */
 		if (ds->ds_userrefs > 0 && !dsda->releasing)
 			return (EBUSY);
 
 		mutex_enter(&ds->ds_lock);
 		/*
 		 * Can't delete a branch point. However, if we're destroying
 		 * a clone and removing its origin due to it having a user
 		 * hold count of 0 and having been marked for deferred destroy,
 		 * it's OK for the origin to have a single clone.
 		 */
 		if (ds->ds_phys->ds_num_children >
 		    (dsda->is_origin_rm ? 2 : 1)) {
 			mutex_exit(&ds->ds_lock);
 			return (EEXIST);
 		}
 		mutex_exit(&ds->ds_lock);
 	} else if (dsl_dir_is_clone(ds->ds_dir)) {
 		return (dsl_dataset_origin_check(dsda, arg2, tx));
 	}
 
 	/* XXX we should do some i/o error checking... */
 	return (0);
 }
 
 struct refsarg {
 	kmutex_t lock;
 	boolean_t gone;
 	kcondvar_t cv;
 };
 
 /* ARGSUSED */
 static void
 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
 {
 	struct refsarg *arg = argv;
 
 	mutex_enter(&arg->lock);
 	arg->gone = TRUE;
 	cv_signal(&arg->cv);
 	mutex_exit(&arg->lock);
 }
 
 static void
 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
 {
 	struct refsarg arg;
 
 	mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
 	arg.gone = FALSE;
 	(void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
 	    dsl_dataset_refs_gone);
 	dmu_buf_rele(ds->ds_dbuf, tag);
 	mutex_enter(&arg.lock);
 	while (!arg.gone)
 		cv_wait(&arg.cv, &arg.lock);
 	ASSERT(arg.gone);
 	mutex_exit(&arg.lock);
 	ds->ds_dbuf = NULL;
 	ds->ds_phys = NULL;
 	mutex_destroy(&arg.lock);
 	cv_destroy(&arg.cv);
 }
 
 static void
 remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	int err;
 	ASSERTV(uint64_t count);
 
 	ASSERT(ds->ds_phys->ds_num_children >= 2);
 	err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
 	/*
 	 * The err should not be ENOENT, but a bug in a previous version
 	 * of the code could cause upgrade_clones_cb() to not set
 	 * ds_next_snap_obj when it should, leading to a missing entry.
 	 * If we knew that the pool was created after
 	 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
 	 * ENOENT.  However, at least we can check that we don't have
 	 * too many entries in the next_clones_obj even after failing to
 	 * remove this one.
 	 */
 	if (err != ENOENT) {
 		VERIFY0(err);
 	}
 	ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
 	    &count));
 	ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
 }
 
 static void
 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 
 	/*
 	 * If it is the old version, dd_clones doesn't exist so we can't
 	 * find the clones, but deadlist_remove_key() is a no-op so it
 	 * doesn't matter.
 	 */
 	if (ds->ds_dir->dd_phys->dd_clones == 0)
 		return;
 
 	for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_dataset_t *clone;
 
 		VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 		    za.za_first_integer, FTAG, &clone));
 		if (clone->ds_dir->dd_origin_txg > mintxg) {
 			dsl_deadlist_remove_key(&clone->ds_deadlist,
 			    mintxg, tx);
 			dsl_dataset_remove_clones_key(clone, mintxg, tx);
 		}
 		dsl_dataset_rele(clone, FTAG);
 	}
 	zap_cursor_fini(&zc);
 }
 
 struct process_old_arg {
 	dsl_dataset_t *ds;
 	dsl_dataset_t *ds_prev;
 	boolean_t after_branch_point;
 	zio_t *pio;
 	uint64_t used, comp, uncomp;
 };
 
 static int
 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	struct process_old_arg *poa = arg;
 	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
 
 	if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
 		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
 		if (poa->ds_prev && !poa->after_branch_point &&
 		    bp->blk_birth >
 		    poa->ds_prev->ds_phys->ds_prev_snap_txg) {
 			poa->ds_prev->ds_phys->ds_unique_bytes +=
 			    bp_get_dsize_sync(dp->dp_spa, bp);
 		}
 	} else {
 		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
 		poa->comp += BP_GET_PSIZE(bp);
 		poa->uncomp += BP_GET_UCSIZE(bp);
 		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
 	}
 	return (0);
 }
 
 static void
 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
 {
 	struct process_old_arg poa = { 0 };
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 
 	ASSERT(ds->ds_deadlist.dl_oldfmt);
 	ASSERT(ds_next->ds_deadlist.dl_oldfmt);
 
 	poa.ds = ds;
 	poa.ds_prev = ds_prev;
 	poa.after_branch_point = after_branch_point;
 	poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 	VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
 	    process_old_cb, &poa, tx));
 	VERIFY0(zio_wait(poa.pio));
 	ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
 
 	/* change snapused */
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 	    -poa.used, -poa.comp, -poa.uncomp, tx);
 
 	/* swap next's deadlist to our deadlist */
 	dsl_deadlist_close(&ds->ds_deadlist);
 	dsl_deadlist_close(&ds_next->ds_deadlist);
 	SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
 	    ds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_open(&ds_next->ds_deadlist, mos,
 	    ds_next->ds_phys->ds_deadlist_obj);
 }
 
 static int
 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	int err;
 	struct killarg ka;
 
 	/*
 	 * Free everything that we point to (that's born after
 	 * the previous snapshot, if we are a clone)
 	 *
 	 * NB: this should be very quick, because we already
 	 * freed all the objects in open context.
 	 */
 	ka.ds = ds;
 	ka.tx = tx;
 	err = traverse_dataset(ds,
 	    ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
 	    kill_blkptr, &ka);
 	ASSERT0(err);
 	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
 
 	return (err);
 }
 
 void
 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	struct dsl_ds_destroyarg *dsda = arg1;
 	dsl_dataset_t *ds = dsda->ds;
 	int err = 0;
 	int after_branch_point = FALSE;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	dsl_dataset_t *ds_prev = NULL;
 	boolean_t wont_destroy;
 	uint64_t obj;
 
 	wont_destroy = (dsda->defer &&
 	    (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
 
 	ASSERT(ds->ds_owner || wont_destroy);
 	ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
 	ASSERT(ds->ds_prev == NULL ||
 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
 
 	if (wont_destroy) {
 		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
+		spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
 		return;
 	}
 
+	/* We need to log before removing it from the namespace. */
+	spa_history_log_internal_ds(ds, "destroy", tx, "");
+
 	/* signal any waiters that this dataset is going away */
 	mutex_enter(&ds->ds_lock);
 	ds->ds_owner = dsl_reaper;
 	cv_broadcast(&ds->ds_exclusive_cv);
 	mutex_exit(&ds->ds_lock);
 
 	/* Remove our reservation */
 	if (ds->ds_reserved != 0) {
 		dsl_prop_setarg_t psa;
 		uint64_t value = 0;
 
 		dsl_prop_setarg_init_uint64(&psa, "refreservation",
 		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
 		    &value);
 		psa.psa_effective_value = 0;	/* predict default value */
 
 		dsl_dataset_set_reservation_sync(ds, &psa, tx);
 		ASSERT0(ds->ds_reserved);
 	}
 
 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
 
 	dsl_scan_ds_destroyed(ds, tx);
 
 	obj = ds->ds_object;
 
 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
 		if (ds->ds_prev) {
 			ds_prev = ds->ds_prev;
 		} else {
 			VERIFY(0 == dsl_dataset_hold_obj(dp,
 			    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
 		}
 		after_branch_point =
 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
 
 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
 		if (after_branch_point &&
 		    ds_prev->ds_phys->ds_next_clones_obj != 0) {
 			remove_from_next_clones(ds_prev, obj, tx);
 			if (ds->ds_phys->ds_next_snap_obj != 0) {
 				VERIFY(0 == zap_add_int(mos,
 				    ds_prev->ds_phys->ds_next_clones_obj,
 				    ds->ds_phys->ds_next_snap_obj, tx));
 			}
 		}
 		if (after_branch_point &&
 		    ds->ds_phys->ds_next_snap_obj == 0) {
 			/* This clone is toast. */
 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
 			ds_prev->ds_phys->ds_num_children--;
 
 			/*
 			 * If the clone's origin has no other clones, no
 			 * user holds, and has been marked for deferred
 			 * deletion, then we should have done the necessary
 			 * destroy setup for it.
 			 */
 			if (ds_prev->ds_phys->ds_num_children == 1 &&
 			    ds_prev->ds_userrefs == 0 &&
 			    DS_IS_DEFER_DESTROY(ds_prev)) {
 				ASSERT3P(dsda->rm_origin, !=, NULL);
 			} else {
 				ASSERT3P(dsda->rm_origin, ==, NULL);
 			}
 		} else if (!after_branch_point) {
 			ds_prev->ds_phys->ds_next_snap_obj =
 			    ds->ds_phys->ds_next_snap_obj;
 		}
 	}
 
 	if (dsl_dataset_is_snapshot(ds)) {
 		dsl_dataset_t *ds_next;
 		uint64_t old_unique;
 		uint64_t used = 0, comp = 0, uncomp = 0;
 
 		VERIFY(0 == dsl_dataset_hold_obj(dp,
 		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
 
 		old_unique = ds_next->ds_phys->ds_unique_bytes;
 
 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
 		ds_next->ds_phys->ds_prev_snap_obj =
 		    ds->ds_phys->ds_prev_snap_obj;
 		ds_next->ds_phys->ds_prev_snap_txg =
 		    ds->ds_phys->ds_prev_snap_txg;
 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
 
 
 		if (ds_next->ds_deadlist.dl_oldfmt) {
 			process_old_deadlist(ds, ds_prev, ds_next,
 			    after_branch_point, tx);
 		} else {
 			/* Adjust prev's unique space. */
 			if (ds_prev && !after_branch_point) {
 				dsl_deadlist_space_range(&ds_next->ds_deadlist,
 				    ds_prev->ds_phys->ds_prev_snap_txg,
 				    ds->ds_phys->ds_prev_snap_txg,
 				    &used, &comp, &uncomp);
 				ds_prev->ds_phys->ds_unique_bytes += used;
 			}
 
 			/* Adjust snapused. */
 			dsl_deadlist_space_range(&ds_next->ds_deadlist,
 			    ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 			    &used, &comp, &uncomp);
 			dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 			    -used, -comp, -uncomp, tx);
 
 			/* Move blocks to be freed to pool's free list. */
 			dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
 			    &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
 			    tx);
 			dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
 			    DD_USED_HEAD, used, comp, uncomp, tx);
 
 			/* Merge our deadlist into next's and free it. */
 			dsl_deadlist_merge(&ds_next->ds_deadlist,
 			    ds->ds_phys->ds_deadlist_obj, tx);
 		}
 		dsl_deadlist_close(&ds->ds_deadlist);
 		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
 
 		/* Collapse range in clone heads */
 		dsl_dataset_remove_clones_key(ds,
 		    ds->ds_phys->ds_creation_txg, tx);
 
 		if (dsl_dataset_is_snapshot(ds_next)) {
 			dsl_dataset_t *ds_nextnext;
 			dsl_dataset_t *hds;
 
 			/*
 			 * Update next's unique to include blocks which
 			 * were previously shared by only this snapshot
 			 * and it.  Those blocks will be born after the
 			 * prev snap and before this snap, and will have
 			 * died after the next snap and before the one
 			 * after that (ie. be on the snap after next's
 			 * deadlist).
 			 */
 			VERIFY(0 == dsl_dataset_hold_obj(dp,
 			    ds_next->ds_phys->ds_next_snap_obj,
 			    FTAG, &ds_nextnext));
 			dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
 			    ds->ds_phys->ds_prev_snap_txg,
 			    ds->ds_phys->ds_creation_txg,
 			    &used, &comp, &uncomp);
 			ds_next->ds_phys->ds_unique_bytes += used;
 			dsl_dataset_rele(ds_nextnext, FTAG);
 			ASSERT3P(ds_next->ds_prev, ==, NULL);
 
 			/* Collapse range in this head. */
 			VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
 			    ds->ds_dir->dd_phys->dd_head_dataset_obj,
 			    FTAG, &hds));
 			dsl_deadlist_remove_key(&hds->ds_deadlist,
 			    ds->ds_phys->ds_creation_txg, tx);
 			dsl_dataset_rele(hds, FTAG);
 
 		} else {
 			ASSERT3P(ds_next->ds_prev, ==, ds);
 			dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
 			ds_next->ds_prev = NULL;
 			if (ds_prev) {
 				VERIFY(0 == dsl_dataset_get_ref(dp,
 				    ds->ds_phys->ds_prev_snap_obj,
 				    ds_next, &ds_next->ds_prev));
 			}
 
 			dsl_dataset_recalc_head_uniq(ds_next);
 
 			/*
 			 * Reduce the amount of our unconsmed refreservation
 			 * being charged to our parent by the amount of
 			 * new unique data we have gained.
 			 */
 			if (old_unique < ds_next->ds_reserved) {
 				int64_t mrsdelta;
 				uint64_t new_unique =
 				    ds_next->ds_phys->ds_unique_bytes;
 
 				ASSERT(old_unique <= new_unique);
 				mrsdelta = MIN(new_unique - old_unique,
 				    ds_next->ds_reserved - old_unique);
 				dsl_dir_diduse_space(ds->ds_dir,
 				    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
 			}
 		}
 		dsl_dataset_rele(ds_next, FTAG);
 	} else {
 		zfeature_info_t *async_destroy =
 		    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
 		objset_t *os;
 
 		/*
 		 * There's no next snapshot, so this is a head dataset.
 		 * Destroy the deadlist.  Unless it's a clone, the
 		 * deadlist should be empty.  (If it's a clone, it's
 		 * safe to ignore the deadlist contents.)
 		 */
 		dsl_deadlist_close(&ds->ds_deadlist);
 		dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
 		ds->ds_phys->ds_deadlist_obj = 0;
 
 		VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
 
 		if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
 			err = old_synchronous_dataset_destroy(ds, tx);
 		} else {
 			/*
 			 * Move the bptree into the pool's list of trees to
 			 * clean up and update space accounting information.
 			 */
 			uint64_t used, comp, uncomp;
 
 			zil_destroy_sync(dmu_objset_zil(os), tx);
 
 			if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
 				spa_feature_incr(dp->dp_spa, async_destroy, tx);
 				dp->dp_bptree_obj = bptree_alloc(mos, tx);
 				VERIFY(zap_add(mos,
 				    DMU_POOL_DIRECTORY_OBJECT,
 				    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 				    &dp->dp_bptree_obj, tx) == 0);
 			}
 
 			used = ds->ds_dir->dd_phys->dd_used_bytes;
 			comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
 			uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
 
 			ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 			    ds->ds_phys->ds_unique_bytes == used);
 
 			bptree_add(mos, dp->dp_bptree_obj,
 			    &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
 			    used, comp, uncomp, tx);
 			dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 			    -used, -comp, -uncomp, tx);
 			dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
 			    used, comp, uncomp, tx);
 		}
 
 		if (ds->ds_prev != NULL) {
 			if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 				VERIFY3U(0, ==, zap_remove_int(mos,
 				    ds->ds_prev->ds_dir->dd_phys->dd_clones,
 				    ds->ds_object, tx));
 			}
 			dsl_dataset_rele(ds->ds_prev, ds);
 			ds->ds_prev = ds_prev = NULL;
 		}
 	}
 
 	/*
 	 * This must be done after the dsl_traverse(), because it will
 	 * re-open the objset.
 	 */
 	if (ds->ds_objset) {
 		dmu_objset_evict(ds->ds_objset);
 		ds->ds_objset = NULL;
 	}
 
 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
 		/* Erase the link in the dir */
 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
 		ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
 		ASSERT(err == 0);
 	} else {
 		/* remove from snapshot namespace */
 		dsl_dataset_t *ds_head;
 		ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
 		VERIFY(0 == dsl_dataset_hold_obj(dp,
 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 #ifdef ZFS_DEBUG
 		{
 			uint64_t val;
 
 			err = dsl_dataset_snap_lookup(ds_head,
 			    ds->ds_snapname, &val);
 			ASSERT0(err);
 			ASSERT3U(val, ==, obj);
 		}
 #endif
 		err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
 		ASSERT(err == 0);
 		dsl_dataset_rele(ds_head, FTAG);
 	}
 
 	if (ds_prev && ds->ds_prev != ds_prev)
 		dsl_dataset_rele(ds_prev, FTAG);
 
 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
-	spa_history_log_internal(LOG_DS_DESTROY, dp->dp_spa, tx,
-	    "dataset = %llu", ds->ds_object);
 
 	if (ds->ds_phys->ds_next_clones_obj != 0) {
 		ASSERTV(uint64_t count);
 		ASSERT(0 == zap_count(mos,
 		    ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
 		VERIFY(0 == dmu_object_free(mos,
 		    ds->ds_phys->ds_next_clones_obj, tx));
 	}
 	if (ds->ds_phys->ds_props_obj != 0)
 		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
 	if (ds->ds_phys->ds_userrefs_obj != 0)
 		VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
 	dsl_dir_close(ds->ds_dir, ds);
 	ds->ds_dir = NULL;
 	dsl_dataset_drain_refs(ds, tag);
 	VERIFY(0 == dmu_object_free(mos, obj, tx));
 
 	if (dsda->rm_origin) {
 		/*
 		 * Remove the origin of the clone we just destroyed.
 		 */
 		struct dsl_ds_destroyarg ndsda = {0};
 
 		ndsda.ds = dsda->rm_origin;
 		dsl_dataset_destroy_sync(&ndsda, tag, tx);
 	}
 }
 
 static int
 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	uint64_t asize;
 
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
 	/*
 	 * If there's an fs-only reservation, any blocks that might become
 	 * owned by the snapshot dataset must be accommodated by space
 	 * outside of the reservation.
 	 */
 	ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
 	asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
 		return (ENOSPC);
 
 	/*
-	 * Propogate any reserved space for this snapshot to other
+	 * Propagate any reserved space for this snapshot to other
 	 * snapshot checks in this sync group.
 	 */
 	if (asize > 0)
 		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
 
 	return (0);
 }
 
 int
-dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
+    dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	const char *snapname = arg2;
 	int err;
 	uint64_t value;
 
 	/*
 	 * We don't allow multiple snapshots of the same txg.  If there
 	 * is already one, try again.
 	 */
 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
 		return (EAGAIN);
 
 	/*
-	 * Check for conflicting name snapshot name.
+	 * Check for conflicting snapshot name.
 	 */
 	err = dsl_dataset_snap_lookup(ds, snapname, &value);
 	if (err == 0)
 		return (EEXIST);
 	if (err != ENOENT)
 		return (err);
 
 	/*
 	 * Check that the dataset's name is not too long.  Name consists
 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
 	 */
 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
 		return (ENAMETOOLONG);
 
 	err = dsl_dataset_snapshot_reserve_space(ds, tx);
 	if (err)
 		return (err);
 
 	ds->ds_trysnap_txg = tx->tx_txg;
 	return (0);
 }
 
 void
-dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
+    dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	const char *snapname = arg2;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	dmu_buf_t *dbuf;
 	dsl_dataset_phys_t *dsphys;
 	uint64_t dsobj, crtxg;
 	objset_t *mos = dp->dp_meta_objset;
 	int err;
 
 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
 
 	/*
 	 * The origin's ds_creation_txg has to be < TXG_INITIAL
 	 */
 	if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
 		crtxg = 1;
 	else
 		crtxg = tx->tx_txg;
 
 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	dsphys = dbuf->db_data;
 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
 	dsphys->ds_fsid_guid = unique_create();
 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
 	    sizeof (dsphys->ds_guid));
 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
 	dsphys->ds_next_snap_obj = ds->ds_object;
 	dsphys->ds_num_children = 1;
 	dsphys->ds_creation_time = gethrestime_sec();
 	dsphys->ds_creation_txg = crtxg;
 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
 	dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
 	dsphys->ds_flags = ds->ds_phys->ds_flags;
 	dsphys->ds_bp = ds->ds_phys->ds_bp;
 	dmu_buf_rele(dbuf, FTAG);
 
 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
 	if (ds->ds_prev) {
 		uint64_t next_clones_obj =
 		    ds->ds_prev->ds_phys->ds_next_clones_obj;
 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
 		    ds->ds_object ||
 		    ds->ds_prev->ds_phys->ds_num_children > 1);
 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
 			    ds->ds_prev->ds_phys->ds_creation_txg);
 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
 		} else if (next_clones_obj != 0) {
 			remove_from_next_clones(ds->ds_prev,
 			    dsphys->ds_next_snap_obj, tx);
 			VERIFY3U(0, ==, zap_add_int(mos,
 			    next_clones_obj, dsobj, tx));
 		}
 	}
 
 	/*
 	 * If we have a reference-reservation on this dataset, we will
 	 * need to increase the amount of refreservation being charged
 	 * since our unique space is going to zero.
 	 */
 	if (ds->ds_reserved) {
 		int64_t delta;
 		ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
 		delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
 		    delta, 0, 0, tx);
 	}
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu",
 	    ds->ds_dir->dd_myname, snapname, dsobj,
 	    ds->ds_phys->ds_prev_snap_txg);
 	ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist,
 	    UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx);
 	dsl_deadlist_close(&ds->ds_deadlist);
 	dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_add_key(&ds->ds_deadlist,
 	    ds->ds_phys->ds_prev_snap_txg, tx);
 
 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
 	ds->ds_phys->ds_prev_snap_obj = dsobj;
 	ds->ds_phys->ds_prev_snap_txg = crtxg;
 	ds->ds_phys->ds_unique_bytes = 0;
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 
 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
 	    snapname, 8, 1, &dsobj, tx);
 	ASSERT(err == 0);
 
 	if (ds->ds_prev)
 		dsl_dataset_drop_ref(ds->ds_prev, ds);
 	VERIFY(0 == dsl_dataset_get_ref(dp,
 	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
 
 	dsl_scan_ds_snapshotted(ds, tx);
 
 	dsl_dir_snap_cmtime_update(ds->ds_dir);
 
-	spa_history_log_internal(LOG_DS_SNAPSHOT, dp->dp_spa, tx,
-	    "dataset = %llu", dsobj);
+	spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
 }
 
 void
 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(ds->ds_objset != NULL);
 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
 
 	/*
 	 * in case we had to change ds_fsid_guid when we opened it,
 	 * sync it out now.
 	 */
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
 
 	dmu_objset_sync(ds->ds_objset, zio, tx);
 }
 
 static void
 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
 {
 	uint64_t count = 0;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	nvlist_t *propval;
 	nvlist_t *val;
 
 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	/*
 	 * There may me missing entries in ds_next_clones_obj
 	 * due to a bug in a previous version of the code.
 	 * Only trust it if it has the right number of entries.
 	 */
 	if (ds->ds_phys->ds_next_clones_obj != 0) {
 		ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
 		    &count));
 	}
 	if (count != ds->ds_phys->ds_num_children - 1) {
 		goto fail;
 	}
 	for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_dataset_t *clone;
 		char buf[ZFS_MAXNAMELEN];
 		/*
 		 * Even though we hold the dp_config_rwlock, the dataset
 		 * may fail to open, returning ENOENT.  If there is a
 		 * thread concurrently attempting to destroy this
 		 * dataset, it will have the ds_rwlock held for
 		 * RW_WRITER.  Our call to dsl_dataset_hold_obj() ->
 		 * dsl_dataset_hold_ref() will fail its
 		 * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
 		 * dp_config_rwlock, and wait for the destroy progress
 		 * and signal ds_exclusive_cv.  If the destroy was
 		 * successful, we will see that
 		 * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
 		 */
 		if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 		    za.za_first_integer, FTAG, &clone) != 0)
 			continue;
 		dsl_dir_name(clone->ds_dir, buf);
 		VERIFY(nvlist_add_boolean(val, buf) == 0);
 		dsl_dataset_rele(clone, FTAG);
 	}
 	zap_cursor_fini(&zc);
 	VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
 	VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
 	    propval) == 0);
 fail:
 	nvlist_free(val);
 	nvlist_free(propval);
 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 }
 
 void
 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
 {
 	uint64_t refd, avail, uobjs, aobjs, ratio;
 
-	dsl_dir_stats(ds->ds_dir, nv);
+	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
+	    (ds->ds_phys->ds_uncompressed_bytes * 100 /
+	    ds->ds_phys->ds_compressed_bytes);
+
+	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
+
+	if (dsl_dataset_is_snapshot(ds)) {
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
+		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
+		    ds->ds_phys->ds_unique_bytes);
+		get_clones_stat(ds, nv);
+	} else {
+		dsl_dir_stats(ds->ds_dir, nv);
+	}
 
 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
 	    ds->ds_phys->ds_creation_time);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
 	    ds->ds_phys->ds_creation_txg);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
 	    ds->ds_quota);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
 	    ds->ds_reserved);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
 	    ds->ds_phys->ds_guid);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
 	    ds->ds_phys->ds_unique_bytes);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
 	    ds->ds_object);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
 	    ds->ds_userrefs);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
 	    DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
 
 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
 		uint64_t written, comp, uncomp;
 		dsl_pool_t *dp = ds->ds_dir->dd_pool;
 		dsl_dataset_t *prev;
 		int err;
 
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		err = dsl_dataset_hold_obj(dp,
 		    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
 		rw_exit(&dp->dp_config_rwlock);
 		if (err == 0) {
 			err = dsl_dataset_space_written(prev, ds, &written,
 			    &comp, &uncomp);
 			dsl_dataset_rele(prev, FTAG);
 			if (err == 0) {
 				dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
 				    written);
 			}
 		}
 	}
 
-	ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
-	    (ds->ds_phys->ds_uncompressed_bytes * 100 /
-	    ds->ds_phys->ds_compressed_bytes);
-	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
-
-	if (ds->ds_phys->ds_next_snap_obj) {
-		/*
-		 * This is a snapshot; override the dd's space used with
-		 * our unique space and compression ratio.
-		 */
-		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
-		    ds->ds_phys->ds_unique_bytes);
-		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
-
-		get_clones_stat(ds, nv);
-	}
 }
 
 void
 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
 {
 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
 	stat->dds_guid = ds->ds_phys->ds_guid;
-	if (ds->ds_phys->ds_next_snap_obj) {
+	stat->dds_origin[0] = '\0';
+	if (dsl_dataset_is_snapshot(ds)) {
 		stat->dds_is_snapshot = B_TRUE;
 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
 	} else {
 		stat->dds_is_snapshot = B_FALSE;
 		stat->dds_num_clones = 0;
-	}
 
-	/* clone origin is really a dsl_dir thing... */
-	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
-	if (dsl_dir_is_clone(ds->ds_dir)) {
-		dsl_dataset_t *ods;
+		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
+		if (dsl_dir_is_clone(ds->ds_dir)) {
+			dsl_dataset_t *ods;
 
-		VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
-		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
-		dsl_dataset_name(ods, stat->dds_origin);
-		dsl_dataset_drop_ref(ods, FTAG);
-	} else {
-		stat->dds_origin[0] = '\0';
+			VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
+			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
+			dsl_dataset_name(ods, stat->dds_origin);
+			dsl_dataset_drop_ref(ods, FTAG);
+		}
+		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 	}
-	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 }
 
 uint64_t
 dsl_dataset_fsid_guid(dsl_dataset_t *ds)
 {
 	return (ds->ds_fsid_guid);
 }
 
 void
 dsl_dataset_space(dsl_dataset_t *ds,
     uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp)
 {
 	*refdbytesp = ds->ds_phys->ds_referenced_bytes;
 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
 	if (ds->ds_quota != 0) {
 		/*
 		 * Adjust available bytes according to refquota
 		 */
 		if (*refdbytesp < ds->ds_quota)
 			*availbytesp = MIN(*availbytesp,
 			    ds->ds_quota - *refdbytesp);
 		else
 			*availbytesp = 0;
 	}
 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
 }
 
 boolean_t
 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
 {
 	ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 	    dsl_pool_sync_context(dp));
 	if (ds->ds_prev == NULL)
 		return (B_FALSE);
 	if (ds->ds_phys->ds_bp.blk_birth >
 	    ds->ds_prev->ds_phys->ds_creation_txg) {
 		objset_t *os, *os_prev;
 		/*
 		 * It may be that only the ZIL differs, because it was
 		 * reset in the head.  Don't count that as being
 		 * modified.
 		 */
 		if (dmu_objset_from_ds(ds, &os) != 0)
 			return (B_TRUE);
 		if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0)
 			return (B_TRUE);
 		return (bcmp(&os->os_phys->os_meta_dnode,
 		    &os_prev->os_phys->os_meta_dnode,
 		    sizeof (os->os_phys->os_meta_dnode)) != 0);
 	}
 	return (B_FALSE);
 }
 
 /* ARGSUSED */
 static int
 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	char *newsnapname = arg2;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_dataset_t *hds;
 	uint64_t val;
 	int err;
 
 	err = dsl_dataset_hold_obj(dd->dd_pool,
 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
 	if (err)
 		return (err);
 
 	/* new name better not be in use */
 	err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
 	dsl_dataset_rele(hds, FTAG);
 
 	if (err == 0)
 		err = EEXIST;
 	else if (err == ENOENT)
 		err = 0;
 
 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
 		err = ENAMETOOLONG;
 
 	return (err);
 }
 
 static void
 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	const char *newsnapname = arg2;
 	dsl_dir_t *dd = ds->ds_dir;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	dsl_dataset_t *hds;
 	int err;
 
 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
 
 	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
 
 	VERIFY(0 == dsl_dataset_get_snapname(ds));
 	err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
 	ASSERT0(err);
 	mutex_enter(&ds->ds_lock);
 	(void) strcpy(ds->ds_snapname, newsnapname);
 	mutex_exit(&ds->ds_lock);
 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
 	ASSERT0(err);
 
-	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
-	    "dataset = %llu", ds->ds_object);
+	spa_history_log_internal_ds(ds, "rename", tx,
+	    "-> @%s", newsnapname);
 	dsl_dataset_rele(hds, FTAG);
 }
 
 struct renamesnaparg {
 	dsl_sync_task_group_t *dstg;
 	char failed[MAXPATHLEN];
 	char *oldsnap;
 	char *newsnap;
 };
 
 static int
 dsl_snapshot_rename_one(const char *name, void *arg)
 {
 	struct renamesnaparg *ra = arg;
 	dsl_dataset_t *ds = NULL;
 	char *snapname;
 	int err;
 
 	snapname = kmem_asprintf("%s@%s", name, ra->oldsnap);
 	(void) strlcpy(ra->failed, snapname, sizeof (ra->failed));
 
 	/*
 	 * For recursive snapshot renames the parent won't be changing
 	 * so we just pass name for both the to/from argument.
 	 */
 	err = zfs_secpolicy_rename_perms(snapname, snapname, CRED());
 	if (err != 0) {
 		strfree(snapname);
 		return (err == ENOENT ? 0 : err);
 	}
 
 #ifdef _KERNEL
 	/*
 	 * For all filesystems undergoing rename, we'll need to unmount it.
 	 */
 	(void) zfs_unmount_snap(snapname, NULL);
 #endif
 	err = dsl_dataset_hold(snapname, ra->dstg, &ds);
 	strfree(snapname);
 	if (err != 0)
 		return (err == ENOENT ? 0 : err);
 
 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
 
 	return (0);
 }
 
 static int
 dsl_recursive_rename(char *oldname, const char *newname)
 {
 	int err;
 	struct renamesnaparg *ra;
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	char *cp, *fsname = spa_strdup(oldname);
 	int len = strlen(oldname) + 1;
 
 	/* truncate the snapshot name to get the fsname */
 	cp = strchr(fsname, '@');
 	*cp = '\0';
 
 	err = spa_open(fsname, &spa, FTAG);
 	if (err) {
 		kmem_free(fsname, len);
 		return (err);
 	}
 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 
 	ra->oldsnap = strchr(oldname, '@') + 1;
 	ra->newsnap = strchr(newname, '@') + 1;
 	*ra->failed = '\0';
 
 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
 	    DS_FIND_CHILDREN);
 	kmem_free(fsname, len);
 
 	if (err == 0) {
 		err = dsl_sync_task_group_wait(ra->dstg);
 	}
 
 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
 		dsl_dataset_t *ds = dst->dst_arg1;
 		if (dst->dst_err) {
 			dsl_dir_name(ds->ds_dir, ra->failed);
 			(void) strlcat(ra->failed, "@", sizeof (ra->failed));
 			(void) strlcat(ra->failed, ra->newsnap,
 			    sizeof (ra->failed));
 		}
 		dsl_dataset_rele(ds, ra->dstg);
 	}
 
 	if (err)
 		(void) strlcpy(oldname, ra->failed, sizeof (ra->failed));
 
 	dsl_sync_task_group_destroy(ra->dstg);
 	kmem_free(ra, sizeof (struct renamesnaparg));
 	spa_close(spa, FTAG);
 	return (err);
 }
 
 static int
 dsl_valid_rename(const char *oldname, void *arg)
 {
 	int delta = *(int *)arg;
 
 	if (strlen(oldname) + delta >= MAXNAMELEN)
 		return (ENAMETOOLONG);
 
 	return (0);
 }
 
 #pragma weak dmu_objset_rename = dsl_dataset_rename
 int
 dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
 {
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	const char *tail;
 	int err;
 
 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
 	if (err)
 		return (err);
 
 	if (tail == NULL) {
 		int delta = strlen(newname) - strlen(oldname);
 
 		/* if we're growing, validate child name lengths */
 		if (delta > 0)
 			err = dmu_objset_find(oldname, dsl_valid_rename,
 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
 
 		if (err == 0)
 			err = dsl_dir_rename(dd, newname);
 		dsl_dir_close(dd, FTAG);
 		return (err);
 	}
 
 	if (tail[0] != '@') {
 		/* the name ended in a nonexistent component */
 		dsl_dir_close(dd, FTAG);
 		return (ENOENT);
 	}
 
 	dsl_dir_close(dd, FTAG);
 
 	/* new name must be snapshot in same filesystem */
 	tail = strchr(newname, '@');
 	if (tail == NULL)
 		return (EINVAL);
 	tail++;
 	if (strncmp(oldname, newname, tail - newname) != 0)
 		return (EXDEV);
 
 	if (recursive) {
 		err = dsl_recursive_rename(oldname, newname);
 	} else {
 		err = dsl_dataset_hold(oldname, FTAG, &ds);
 		if (err)
 			return (err);
 
 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 		    dsl_dataset_snapshot_rename_check,
 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
 
 		dsl_dataset_rele(ds, FTAG);
 	}
 
 	return (err);
 }
 
 struct promotenode {
 	list_node_t link;
 	dsl_dataset_t *ds;
 };
 
 struct promotearg {
 	list_t shared_snaps, origin_snaps, clone_snaps;
 	dsl_dataset_t *origin_origin;
 	uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
 	char *err_ds;
 };
 
 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
 
 static int
 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *hds = arg1;
 	struct promotearg *pa = arg2;
 	struct promotenode *snap = list_head(&pa->shared_snaps);
 	dsl_dataset_t *origin_ds = snap->ds;
 	int err;
 	uint64_t unused;
 
 	/* Check that it is a real clone */
 	if (!dsl_dir_is_clone(hds->ds_dir))
 		return (EINVAL);
 
 	/* Since this is so expensive, don't do the preliminary check */
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
 		return (EXDEV);
 
 	/* compute origin's new unique space */
 	snap = list_tail(&pa->clone_snaps);
 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
 	dsl_deadlist_space_range(&snap->ds->ds_deadlist,
 	    origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 	    &pa->unique, &unused, &unused);
 
 	/*
 	 * Walk the snapshots that we are moving
 	 *
 	 * Compute space to transfer.  Consider the incremental changes
 	 * to used for each snapshot:
 	 * (my used) = (prev's used) + (blocks born) - (blocks killed)
 	 * So each snapshot gave birth to:
 	 * (blocks born) = (my used) - (prev's used) + (blocks killed)
 	 * So a sequence would look like:
 	 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
 	 * Which simplifies to:
 	 * uN + kN + kN-1 + ... + k1 + k0
 	 * Note however, if we stop before we reach the ORIGIN we get:
 	 * uN + kN + kN-1 + ... + kM - uM-1
 	 */
 	pa->used = origin_ds->ds_phys->ds_referenced_bytes;
 	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
 	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
 	for (snap = list_head(&pa->shared_snaps); snap;
 	    snap = list_next(&pa->shared_snaps, snap)) {
 		uint64_t val, dlused, dlcomp, dluncomp;
 		dsl_dataset_t *ds = snap->ds;
 
 		/* Check that the snapshot name does not conflict */
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
 		if (err == 0) {
 			err = EEXIST;
 			goto out;
 		}
 		if (err != ENOENT)
 			goto out;
 
 		/* The very first snapshot does not have a deadlist */
 		if (ds->ds_phys->ds_prev_snap_obj == 0)
 			continue;
 
 		dsl_deadlist_space(&ds->ds_deadlist,
 		    &dlused, &dlcomp, &dluncomp);
 		pa->used += dlused;
 		pa->comp += dlcomp;
 		pa->uncomp += dluncomp;
 	}
 
 	/*
 	 * If we are a clone of a clone then we never reached ORIGIN,
 	 * so we need to subtract out the clone origin's used space.
 	 */
 	if (pa->origin_origin) {
 		pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
 		pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
 		pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
 	}
 
 	/* Check that there is enough space here */
 	err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
 	    pa->used);
 	if (err)
 		return (err);
 
 	/*
 	 * Compute the amounts of space that will be used by snapshots
 	 * after the promotion (for both origin and clone).  For each,
 	 * it is the amount of space that will be on all of their
 	 * deadlists (that was not born before their new origin).
 	 */
 	if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		uint64_t space;
 
 		/*
 		 * Note, typically this will not be a clone of a clone,
 		 * so dd_origin_txg will be < TXG_INITIAL, so
 		 * these snaplist_space() -> dsl_deadlist_space_range()
 		 * calls will be fast because they do not have to
 		 * iterate over all bps.
 		 */
 		snap = list_head(&pa->origin_snaps);
 		err = snaplist_space(&pa->shared_snaps,
 		    snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap);
 		if (err)
 			return (err);
 
 		err = snaplist_space(&pa->clone_snaps,
 		    snap->ds->ds_dir->dd_origin_txg, &space);
 		if (err)
 			return (err);
 		pa->cloneusedsnap += space;
 	}
 	if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		err = snaplist_space(&pa->origin_snaps,
 		    origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
 		if (err)
 			return (err);
 	}
 
 	return (0);
 out:
 	pa->err_ds =  snap->ds->ds_snapname;
 	return (err);
 }
 
 static void
 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *hds = arg1;
 	struct promotearg *pa = arg2;
 	struct promotenode *snap = list_head(&pa->shared_snaps);
 	dsl_dataset_t *origin_ds = snap->ds;
 	dsl_dataset_t *origin_head;
 	dsl_dir_t *dd = hds->ds_dir;
 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
 	dsl_dir_t *odd = NULL;
 	uint64_t oldnext_obj;
 	int64_t delta;
 
 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
 
 	snap = list_head(&pa->origin_snaps);
 	origin_head = snap->ds;
 
 	/*
 	 * We need to explicitly open odd, since origin_ds's dd will be
 	 * changing.
 	 */
 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
 	    NULL, FTAG, &odd));
 
 	/* change origin's next snap */
 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
 	oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
 	snap = list_tail(&pa->clone_snaps);
 	ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
 	origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
 
 	/* change the origin's next clone */
 	if (origin_ds->ds_phys->ds_next_clones_obj) {
 		remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
 		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
 		    origin_ds->ds_phys->ds_next_clones_obj,
 		    oldnext_obj, tx));
 	}
 
 	/* change origin */
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
 	dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
 	origin_head->ds_dir->dd_origin_txg =
 	    origin_ds->ds_phys->ds_creation_txg;
 
 	/* change dd_clone entries */
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    odd->dd_phys->dd_clones, hds->ds_object, tx));
 		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
 		    pa->origin_origin->ds_dir->dd_phys->dd_clones,
 		    hds->ds_object, tx));
 
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    pa->origin_origin->ds_dir->dd_phys->dd_clones,
 		    origin_head->ds_object, tx));
 		if (dd->dd_phys->dd_clones == 0) {
 			dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset,
 			    DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 		}
 		VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
 		    dd->dd_phys->dd_clones, origin_head->ds_object, tx));
 
 	}
 
 	/* move snapshots to this dir */
 	for (snap = list_head(&pa->shared_snaps); snap;
 	    snap = list_next(&pa->shared_snaps, snap)) {
 		dsl_dataset_t *ds = snap->ds;
 
 		/* unregister props as dsl_dir is changing */
 		if (ds->ds_objset) {
 			dmu_objset_evict(ds->ds_objset);
 			ds->ds_objset = NULL;
 		}
 		/* move snap name entry */
 		VERIFY(0 == dsl_dataset_get_snapname(ds));
 		VERIFY(0 == dsl_dataset_snap_remove(origin_head,
 		    ds->ds_snapname, tx));
 		VERIFY(0 == zap_add(dp->dp_meta_objset,
 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
 		    8, 1, &ds->ds_object, tx));
 
 		/* change containing dsl_dir */
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
 		ds->ds_phys->ds_dir_obj = dd->dd_object;
 		ASSERT3P(ds->ds_dir, ==, odd);
 		dsl_dir_close(ds->ds_dir, ds);
 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
 		    NULL, ds, &ds->ds_dir));
 
 		/* move any clone references */
 		if (ds->ds_phys->ds_next_clones_obj &&
 		    spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 			zap_cursor_t zc;
 			zap_attribute_t za;
 
 			for (zap_cursor_init(&zc, dp->dp_meta_objset,
 			    ds->ds_phys->ds_next_clones_obj);
 			    zap_cursor_retrieve(&zc, &za) == 0;
 			    zap_cursor_advance(&zc)) {
 				dsl_dataset_t *cnds;
 				uint64_t o;
 
 				if (za.za_first_integer == oldnext_obj) {
 					/*
 					 * We've already moved the
 					 * origin's reference.
 					 */
 					continue;
 				}
 
 				VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
 				    za.za_first_integer, FTAG, &cnds));
 				o = cnds->ds_dir->dd_phys->dd_head_dataset_obj;
 
 				VERIFY3U(zap_remove_int(dp->dp_meta_objset,
 				    odd->dd_phys->dd_clones, o, tx), ==, 0);
 				VERIFY3U(zap_add_int(dp->dp_meta_objset,
 				    dd->dd_phys->dd_clones, o, tx), ==, 0);
 				dsl_dataset_rele(cnds, FTAG);
 			}
 			zap_cursor_fini(&zc);
 		}
 
 		ASSERT0(dsl_prop_numcb(ds));
 	}
 
 	/*
 	 * Change space accounting.
 	 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
 	 * both be valid, or both be 0 (resulting in delta == 0).  This
 	 * is true for each of {clone,origin} independently.
 	 */
 
 	delta = pa->cloneusedsnap -
 	    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
 	ASSERT3S(delta, >=, 0);
 	ASSERT3U(pa->used, >=, delta);
 	dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
 	dsl_dir_diduse_space(dd, DD_USED_HEAD,
 	    pa->used - delta, pa->comp, pa->uncomp, tx);
 
 	delta = pa->originusedsnap -
 	    odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
 	ASSERT3S(delta, <=, 0);
 	ASSERT3U(pa->used, >=, -delta);
 	dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
 	dsl_dir_diduse_space(odd, DD_USED_HEAD,
 	    -pa->used - delta, -pa->comp, -pa->uncomp, tx);
 
 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
 
 	/* log history record */
-	spa_history_log_internal(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
-	    "dataset = %llu", hds->ds_object);
+	spa_history_log_internal_ds(hds, "promote", tx, "");
 
 	dsl_dir_close(odd, FTAG);
 }
 
 static char *snaplist_tag = "snaplist";
 /*
  * Make a list of dsl_dataset_t's for the snapshots between first_obj
  * (exclusive) and last_obj (inclusive).  The list will be in reverse
  * order (last_obj will be the list_head()).  If first_obj == 0, do all
  * snapshots back to this dataset's origin.
  */
 static int
 snaplist_make(dsl_pool_t *dp, boolean_t own,
     uint64_t first_obj, uint64_t last_obj, list_t *l)
 {
 	uint64_t obj = last_obj;
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
 
 	list_create(l, sizeof (struct promotenode),
 	    offsetof(struct promotenode, link));
 
 	while (obj != first_obj) {
 		dsl_dataset_t *ds;
 		struct promotenode *snap;
 		int err;
 
 		if (own) {
 			err = dsl_dataset_own_obj(dp, obj,
 			    0, snaplist_tag, &ds);
 			if (err == 0)
 				dsl_dataset_make_exclusive(ds, snaplist_tag);
 		} else {
 			err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
 		}
 		if (err == ENOENT) {
 			/* lost race with snapshot destroy */
 			struct promotenode *last = list_tail(l);
 			ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
 			obj = last->ds->ds_phys->ds_prev_snap_obj;
 			continue;
 		} else if (err) {
 			return (err);
 		}
 
 		if (first_obj == 0)
 			first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
 
 		snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
 		snap->ds = ds;
 		list_insert_tail(l, snap);
 		obj = ds->ds_phys->ds_prev_snap_obj;
 	}
 
 	return (0);
 }
 
 static int
 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
 {
 	struct promotenode *snap;
 
 	*spacep = 0;
 	for (snap = list_head(l); snap; snap = list_next(l, snap)) {
 		uint64_t used, comp, uncomp;
 		dsl_deadlist_space_range(&snap->ds->ds_deadlist,
 		    mintxg, UINT64_MAX, &used, &comp, &uncomp);
 		*spacep += used;
 	}
 	return (0);
 }
 
 static void
 snaplist_destroy(list_t *l, boolean_t own)
 {
 	struct promotenode *snap;
 
 	if (!l || !list_link_active(&l->list_head))
 		return;
 
 	while ((snap = list_tail(l)) != NULL) {
 		list_remove(l, snap);
 		if (own)
 			dsl_dataset_disown(snap->ds, snaplist_tag);
 		else
 			dsl_dataset_rele(snap->ds, snaplist_tag);
 		kmem_free(snap, sizeof (struct promotenode));
 	}
 	list_destroy(l);
 }
 
 /*
  * Promote a clone.  Nomenclature note:
  * "clone" or "cds": the original clone which is being promoted
  * "origin" or "ods": the snapshot which is originally clone's origin
  * "origin head" or "ohds": the dataset which is the head
  * (filesystem/volume) for the origin
  * "origin origin": the origin of the origin's filesystem (typically
  * NULL, indicating that the clone is not a clone of a clone).
  */
 int
 dsl_dataset_promote(const char *name, char *conflsnap)
 {
 	dsl_dataset_t *ds;
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	dmu_object_info_t doi;
 	struct promotearg pa;
 	struct promotenode *snap;
 	int err;
 
 	bzero(&pa, sizeof(struct promotearg));
 	err = dsl_dataset_hold(name, FTAG, &ds);
 	if (err)
 		return (err);
 	dd = ds->ds_dir;
 	dp = dd->dd_pool;
 
 	err = dmu_object_info(dp->dp_meta_objset,
 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
 	if (err) {
 		dsl_dataset_rele(ds, FTAG);
 		return (err);
 	}
 
 	if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
 		dsl_dataset_rele(ds, FTAG);
 		return (EINVAL);
 	}
 
 	/*
 	 * We are going to inherit all the snapshots taken before our
 	 * origin (i.e., our new origin will be our parent's origin).
 	 * Take ownership of them so that we can rename them into our
 	 * namespace.
 	 */
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 
 	err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
 	    &pa.shared_snaps);
 	if (err != 0)
 		goto out;
 
 	err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
 	if (err != 0)
 		goto out;
 
 	snap = list_head(&pa.shared_snaps);
 	ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
 	err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
 	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
 	if (err != 0)
 		goto out;
 
 	if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
 		err = dsl_dataset_hold_obj(dp,
 		    snap->ds->ds_dir->dd_phys->dd_origin_obj,
 		    FTAG, &pa.origin_origin);
 		if (err != 0)
 			goto out;
 	}
 
 out:
 	rw_exit(&dp->dp_config_rwlock);
 
 	/*
 	 * Add in 128x the snapnames zapobj size, since we will be moving
 	 * a bunch of snapnames to the promoted ds, and dirtying their
 	 * bonus buffers.
 	 */
 	if (err == 0) {
 		err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
 		    dsl_dataset_promote_sync, ds, &pa,
 		    2 + 2 * doi.doi_physical_blocks_512);
 		if (err && pa.err_ds && conflsnap)
 			(void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
 	}
 
 	snaplist_destroy(&pa.shared_snaps, B_TRUE);
 	snaplist_destroy(&pa.clone_snaps, B_FALSE);
 	snaplist_destroy(&pa.origin_snaps, B_FALSE);
 	if (pa.origin_origin)
 		dsl_dataset_rele(pa.origin_origin, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 struct cloneswaparg {
 	dsl_dataset_t *cds; /* clone dataset */
 	dsl_dataset_t *ohds; /* origin's head dataset */
 	boolean_t force;
 	int64_t unused_refres_delta; /* change in unconsumed refreservation */
 };
 
 /* ARGSUSED */
 static int
 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	struct cloneswaparg *csa = arg1;
 
 	/* they should both be heads */
 	if (dsl_dataset_is_snapshot(csa->cds) ||
 	    dsl_dataset_is_snapshot(csa->ohds))
 		return (EINVAL);
 
 	/* the branch point should be just before them */
 	if (csa->cds->ds_prev != csa->ohds->ds_prev)
 		return (EINVAL);
 
 	/* cds should be the clone (unless they are unrelated) */
 	if (csa->cds->ds_prev != NULL &&
 	    csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap &&
 	    csa->ohds->ds_object !=
 	    csa->cds->ds_prev->ds_phys->ds_next_snap_obj)
 		return (EINVAL);
 
 	/* the clone should be a child of the origin */
 	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
 		return (EINVAL);
 
 	/* ohds shouldn't be modified unless 'force' */
 	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
 		return (ETXTBSY);
 
 	/* adjust amount of any unconsumed refreservation */
 	csa->unused_refres_delta =
 	    (int64_t)MIN(csa->ohds->ds_reserved,
 	    csa->ohds->ds_phys->ds_unique_bytes) -
 	    (int64_t)MIN(csa->ohds->ds_reserved,
 	    csa->cds->ds_phys->ds_unique_bytes);
 
 	if (csa->unused_refres_delta > 0 &&
 	    csa->unused_refres_delta >
 	    dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
 		return (ENOSPC);
 
 	if (csa->ohds->ds_quota != 0 &&
 	    csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota)
 		return (EDQUOT);
 
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	struct cloneswaparg *csa = arg1;
 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
 
 	ASSERT(csa->cds->ds_reserved == 0);
 	ASSERT(csa->ohds->ds_quota == 0 ||
 	    csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota);
 
 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
 
 	if (csa->cds->ds_objset != NULL) {
 		dmu_objset_evict(csa->cds->ds_objset);
 		csa->cds->ds_objset = NULL;
 	}
 
 	if (csa->ohds->ds_objset != NULL) {
 		dmu_objset_evict(csa->ohds->ds_objset);
 		csa->ohds->ds_objset = NULL;
 	}
 
 	/*
 	 * Reset origin's unique bytes, if it exists.
 	 */
 	if (csa->cds->ds_prev) {
 		dsl_dataset_t *origin = csa->cds->ds_prev;
 		uint64_t comp, uncomp;
 
 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
 		dsl_deadlist_space_range(&csa->cds->ds_deadlist,
 		    origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 		    &origin->ds_phys->ds_unique_bytes, &comp, &uncomp);
 	}
 
 	/* swap blkptrs */
 	{
 		blkptr_t tmp;
 		tmp = csa->ohds->ds_phys->ds_bp;
 		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
 		csa->cds->ds_phys->ds_bp = tmp;
 	}
 
 	/* set dd_*_bytes */
 	{
 		int64_t dused, dcomp, duncomp;
 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
 		uint64_t odl_used, odl_comp, odl_uncomp;
 
 		ASSERT3U(csa->cds->ds_dir->dd_phys->
 		    dd_used_breakdown[DD_USED_SNAP], ==, 0);
 
 		dsl_deadlist_space(&csa->cds->ds_deadlist,
 		    &cdl_used, &cdl_comp, &cdl_uncomp);
 		dsl_deadlist_space(&csa->ohds->ds_deadlist,
 		    &odl_used, &odl_comp, &odl_uncomp);
 
 		dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
 		    (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
 		    cdl_uncomp -
 		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
 
 		dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
 		    dused, dcomp, duncomp, tx);
 		dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
 		    -dused, -dcomp, -duncomp, tx);
 
 		/*
 		 * The difference in the space used by snapshots is the
 		 * difference in snapshot space due to the head's
 		 * deadlist (since that's the only thing that's
 		 * changing that affects the snapused).
 		 */
 		dsl_deadlist_space_range(&csa->cds->ds_deadlist,
 		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
 		    &cdl_used, &cdl_comp, &cdl_uncomp);
 		dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
 		    csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
 		    &odl_used, &odl_comp, &odl_uncomp);
 		dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
 		    DD_USED_HEAD, DD_USED_SNAP, tx);
 	}
 
 	/* swap ds_*_bytes */
 	SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
 	    csa->cds->ds_phys->ds_referenced_bytes);
 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
 	    csa->cds->ds_phys->ds_compressed_bytes);
 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
 	    csa->cds->ds_phys->ds_uncompressed_bytes);
 	SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
 	    csa->cds->ds_phys->ds_unique_bytes);
 
 	/* apply any parent delta for change in unconsumed refreservation */
 	dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
 	    csa->unused_refres_delta, 0, 0, tx);
 
 	/*
 	 * Swap deadlists.
 	 */
 	dsl_deadlist_close(&csa->cds->ds_deadlist);
 	dsl_deadlist_close(&csa->ohds->ds_deadlist);
 	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
 	    csa->cds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
 	    csa->cds->ds_phys->ds_deadlist_obj);
 	dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
 	    csa->ohds->ds_phys->ds_deadlist_obj);
 
 	dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
+
+	spa_history_log_internal_ds(csa->cds, "clone swap", tx,
+	    "parent=%s", csa->ohds->ds_dir->dd_myname);
 }
 
 /*
  * Swap 'clone' with its origin head datasets.  Used at the end of "zfs
  * recv" into an existing fs to swizzle the file system to the new
  * version, and by "zfs rollback".  Can also be used to swap two
  * independent head datasets if neither has any snapshots.
  */
 int
 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
     boolean_t force)
 {
 	struct cloneswaparg csa;
 	int error;
 
 	ASSERT(clone->ds_owner);
 	ASSERT(origin_head->ds_owner);
 retry:
 	/*
 	 * Need exclusive access for the swap. If we're swapping these
 	 * datasets back after an error, we already hold the locks.
 	 */
 	if (!RW_WRITE_HELD(&clone->ds_rwlock))
 		rw_enter(&clone->ds_rwlock, RW_WRITER);
 	if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
 	    !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
 		rw_exit(&clone->ds_rwlock);
 		rw_enter(&origin_head->ds_rwlock, RW_WRITER);
 		if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
 			rw_exit(&origin_head->ds_rwlock);
 			goto retry;
 		}
 	}
 	csa.cds = clone;
 	csa.ohds = origin_head;
 	csa.force = force;
 	error = dsl_sync_task_do(clone->ds_dir->dd_pool,
 	    dsl_dataset_clone_swap_check,
 	    dsl_dataset_clone_swap_sync, &csa, NULL, 9);
 	return (error);
 }
 
 /*
  * Given a pool name and a dataset object number in that pool,
  * return the name of that dataset.
  */
 int
 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
 {
 	spa_t *spa;
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int error;
 
 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
 		return (error);
 	dp = spa_get_dsl(spa);
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
 		dsl_dataset_name(ds, buf);
 		dsl_dataset_rele(ds, FTAG);
 	}
 	rw_exit(&dp->dp_config_rwlock);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 int
 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
     uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
 {
 	int error = 0;
 
 	ASSERT3S(asize, >, 0);
 
 	/*
 	 * *ref_rsrv is the portion of asize that will come from any
 	 * unconsumed refreservation space.
 	 */
 	*ref_rsrv = 0;
 
 	mutex_enter(&ds->ds_lock);
 	/*
 	 * Make a space adjustment for reserved bytes.
 	 */
 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
 		ASSERT3U(*used, >=,
 		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
 		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
 		*ref_rsrv =
 		    asize - MIN(asize, parent_delta(ds, asize + inflight));
 	}
 
 	if (!check_quota || ds->ds_quota == 0) {
 		mutex_exit(&ds->ds_lock);
 		return (0);
 	}
 	/*
 	 * If they are requesting more space, and our current estimate
 	 * is over quota, they get to try again unless the actual
 	 * on-disk is over quota and there are no pending changes (which
 	 * may free up space for us).
 	 */
 	if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
 		if (inflight > 0 ||
 		    ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
 			error = ERESTART;
 		else
 			error = EDQUOT;
 
 		DMU_TX_STAT_BUMP(dmu_tx_quota);
 	}
 	mutex_exit(&ds->ds_lock);
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	int err;
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
 		return (ENOTSUP);
 
 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
 		return (err);
 
 	if (psa->psa_effective_value == 0)
 		return (0);
 
 	if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
 	    psa->psa_effective_value < ds->ds_reserved)
 		return (ENOSPC);
 
 	return (0);
 }
 
 extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
 
 void
 dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value = psa->psa_effective_value;
 
 	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
 
 	if (ds->ds_quota != effective_value) {
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		ds->ds_quota = effective_value;
+
+		spa_history_log_internal_ds(ds, "set refquota", tx,
+		    "refquota=%lld", (longlong_t)ds->ds_quota);
 	}
 }
 
 int
 dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota)
 {
 	dsl_dataset_t *ds;
 	dsl_prop_setarg_t psa;
 	int err;
 
 	dsl_prop_setarg_init_uint64(&psa, "refquota", source, &quota);
 
 	err = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	/*
 	 * If someone removes a file, then tries to set the quota, we
 	 * want to make sure the file freeing takes effect.
 	 */
 	txg_wait_open(ds->ds_dir->dd_pool, 0);
 
 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
 	    ds, &psa, 0);
 
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 static int
 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value;
 	uint64_t unique;
 	int err;
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
 	    SPA_VERSION_REFRESERVATION)
 		return (ENOTSUP);
 
 	if (dsl_dataset_is_snapshot(ds))
 		return (EINVAL);
 
 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
 		return (err);
 
 	effective_value = psa->psa_effective_value;
 
 	/*
 	 * If we are doing the preliminary check in open context, the
 	 * space estimates may be inaccurate.
 	 */
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
 	mutex_enter(&ds->ds_lock);
 	if (!DS_UNIQUE_IS_ACCURATE(ds))
 		dsl_dataset_recalc_head_uniq(ds);
 	unique = ds->ds_phys->ds_unique_bytes;
 	mutex_exit(&ds->ds_lock);
 
 	if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) {
 		uint64_t delta = MAX(unique, effective_value) -
 		    MAX(unique, ds->ds_reserved);
 
 		if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
 			return (ENOSPC);
 		if (ds->ds_quota > 0 &&
 		    effective_value > ds->ds_quota)
 			return (ENOSPC);
 	}
 
 	return (0);
 }
 
 static void
 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value = psa->psa_effective_value;
 	uint64_t unique;
 	int64_t delta;
 
 	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
 
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 
 	mutex_enter(&ds->ds_dir->dd_lock);
 	mutex_enter(&ds->ds_lock);
 	ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
 	unique = ds->ds_phys->ds_unique_bytes;
 	delta = MAX(0, (int64_t)(effective_value - unique)) -
 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
 	ds->ds_reserved = effective_value;
 	mutex_exit(&ds->ds_lock);
 
 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
 	mutex_exit(&ds->ds_dir->dd_lock);
+
+	spa_history_log_internal_ds(ds, "set refreservation", tx,
+	    "refreservation=%lld", (longlong_t)effective_value);
 }
 
 int
 dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
     uint64_t reservation)
 {
 	dsl_dataset_t *ds;
 	dsl_prop_setarg_t psa;
 	int err;
 
 	dsl_prop_setarg_init_uint64(&psa, "refreservation", source,
 	    &reservation);
 
 	err = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    dsl_dataset_set_reservation_check,
 	    dsl_dataset_set_reservation_sync, ds, &psa, 0);
 
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 typedef struct zfs_hold_cleanup_arg {
 	dsl_pool_t *dp;
 	uint64_t dsobj;
 	char htag[MAXNAMELEN];
 } zfs_hold_cleanup_arg_t;
 
 static void
 dsl_dataset_user_release_onexit(void *arg)
 {
 	zfs_hold_cleanup_arg_t *ca = arg;
 
 	(void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
 	    B_TRUE);
 	kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 }
 
 void
 dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
     minor_t minor)
 {
 	zfs_hold_cleanup_arg_t *ca;
 
 	ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
 	ca->dp = ds->ds_dir->dd_pool;
 	ca->dsobj = ds->ds_object;
 	(void) strlcpy(ca->htag, htag, sizeof (ca->htag));
 	VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
 	    dsl_dataset_user_release_onexit, ca, NULL));
 }
 
 /*
  * If you add new checks here, you may need to add
  * additional checks to the "temporary" case in
  * snapshot_check() in dmu_objset.c.
  */
 static int
 dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct dsl_ds_holdarg *ha = arg2;
-	char *htag = ha->htag;
+	const char *htag = ha->htag;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	int error = 0;
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
 		return (ENOTSUP);
 
 	if (!dsl_dataset_is_snapshot(ds))
 		return (EINVAL);
 
 	/* tags must be unique */
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_phys->ds_userrefs_obj) {
 		error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
 		    8, 1, tx);
 		if (error == 0)
 			error = EEXIST;
 		else if (error == ENOENT)
 			error = 0;
 	}
 	mutex_exit(&ds->ds_lock);
 
 	if (error == 0 && ha->temphold &&
 	    strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
 		error = E2BIG;
 
 	return (error);
 }
 
 void
 dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	struct dsl_ds_holdarg *ha = arg2;
-	char *htag = ha->htag;
+	const char *htag = ha->htag;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t now = gethrestime_sec();
 	uint64_t zapobj;
 
 	mutex_enter(&ds->ds_lock);
 	if (ds->ds_phys->ds_userrefs_obj == 0) {
 		/*
 		 * This is the first user hold for this dataset.  Create
 		 * the userrefs zap object.
 		 */
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		zapobj = ds->ds_phys->ds_userrefs_obj =
 		    zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 	} else {
 		zapobj = ds->ds_phys->ds_userrefs_obj;
 	}
 	ds->ds_userrefs++;
 	mutex_exit(&ds->ds_lock);
 
 	VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 
 	if (ha->temphold) {
 		VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object,
 		    htag, &now, tx));
 	}
 
-	spa_history_log_internal(LOG_DS_USER_HOLD,
-	    dp->dp_spa, tx, "<%s> temp = %d dataset = %llu", htag,
-	    (int)ha->temphold, ds->ds_object);
+	spa_history_log_internal_ds(ds, "hold", tx,
+	    "tag = %s temp = %d holds now = %llu",
+	    htag, (int)ha->temphold, ds->ds_userrefs);
 }
 
 static int
 dsl_dataset_user_hold_one(const char *dsname, void *arg)
 {
 	struct dsl_ds_holdarg *ha = arg;
 	dsl_dataset_t *ds;
 	int error;
 	char *name;
 
 	/* alloc a buffer to hold dsname@snapname plus terminating NULL */
 	name = kmem_asprintf("%s@%s", dsname, ha->snapname);
 	error = dsl_dataset_hold(name, ha->dstg, &ds);
 	strfree(name);
 	if (error == 0) {
 		ha->gotone = B_TRUE;
 		dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
 		    dsl_dataset_user_hold_sync, ds, ha, 0);
 	} else if (error == ENOENT && ha->recursive) {
 		error = 0;
 	} else {
 		(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
 	}
 	return (error);
 }
 
 int
 dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
     boolean_t temphold)
 {
 	struct dsl_ds_holdarg *ha;
 	int error;
 
 	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
 	ha->htag = htag;
 	ha->temphold = temphold;
 	error = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
 	    ds, ha, 0);
 	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 
 	return (error);
 }
 
 int
 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
     boolean_t recursive, boolean_t temphold, int cleanup_fd)
 {
 	struct dsl_ds_holdarg *ha;
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	int error;
 	minor_t minor = 0;
 
 	if (cleanup_fd != -1) {
 		/* Currently we only support cleanup-on-exit of tempholds. */
 		if (!temphold)
 			return (EINVAL);
 		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
 		if (error)
 			return (error);
 	}
 
 	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
 
 	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
 
 	error = spa_open(dsname, &spa, FTAG);
 	if (error) {
 		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 		if (cleanup_fd != -1)
 			zfs_onexit_fd_rele(cleanup_fd);
 		return (error);
 	}
 
 	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 	ha->htag = htag;
 	ha->snapname = snapname;
 	ha->recursive = recursive;
 	ha->temphold = temphold;
 
 	if (recursive) {
 		error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
 		    ha, DS_FIND_CHILDREN);
 	} else {
 		error = dsl_dataset_user_hold_one(dsname, ha);
 	}
 	if (error == 0)
 		error = dsl_sync_task_group_wait(ha->dstg);
 
 	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
 	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
 		dsl_dataset_t *ds = dst->dst_arg1;
 
 		if (dst->dst_err) {
 			dsl_dataset_name(ds, ha->failed);
 			*strchr(ha->failed, '@') = '\0';
 		} else if (error == 0 && minor != 0 && temphold) {
 			/*
 			 * If this hold is to be released upon process exit,
 			 * register that action now.
 			 */
 			dsl_register_onexit_hold_cleanup(ds, htag, minor);
 		}
 		dsl_dataset_rele(ds, ha->dstg);
 	}
 
 	if (error == 0 && recursive && !ha->gotone)
 		error = ENOENT;
 
 	if (error)
 		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
 
 	dsl_sync_task_group_destroy(ha->dstg);
 
 	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 	spa_close(spa, FTAG);
 	if (cleanup_fd != -1)
 		zfs_onexit_fd_rele(cleanup_fd);
 	return (error);
 }
 
 struct dsl_ds_releasearg {
 	dsl_dataset_t *ds;
 	const char *htag;
 	boolean_t own;		/* do we own or just hold ds? */
 };
 
 static int
 dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
     boolean_t *might_destroy)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t zapobj;
 	uint64_t tmp;
 	int error;
 
 	*might_destroy = B_FALSE;
 
 	mutex_enter(&ds->ds_lock);
 	zapobj = ds->ds_phys->ds_userrefs_obj;
 	if (zapobj == 0) {
 		/* The tag can't possibly exist */
 		mutex_exit(&ds->ds_lock);
 		return (ESRCH);
 	}
 
 	/* Make sure the tag exists */
 	error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
 	if (error) {
 		mutex_exit(&ds->ds_lock);
 		if (error == ENOENT)
 			error = ESRCH;
 		return (error);
 	}
 
 	if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
 	    DS_IS_DEFER_DESTROY(ds))
 		*might_destroy = B_TRUE;
 
 	mutex_exit(&ds->ds_lock);
 	return (0);
 }
 
 static int
 dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	struct dsl_ds_releasearg *ra = arg1;
 	dsl_dataset_t *ds = ra->ds;
 	boolean_t might_destroy;
 	int error;
 
 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
 		return (ENOTSUP);
 
 	error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
 	if (error)
 		return (error);
 
 	if (might_destroy) {
 		struct dsl_ds_destroyarg dsda = {0};
 
 		if (dmu_tx_is_syncing(tx)) {
 			/*
 			 * If we're not prepared to remove the snapshot,
 			 * we can't allow the release to happen right now.
 			 */
 			if (!ra->own)
 				return (EBUSY);
 		}
 		dsda.ds = ds;
 		dsda.releasing = B_TRUE;
 		return (dsl_dataset_destroy_check(&dsda, tag, tx));
 	}
 
 	return (0);
 }
 
 static void
 dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
 	struct dsl_ds_releasearg *ra = arg1;
 	dsl_dataset_t *ds = ra->ds;
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t zapobj;
-	uint64_t dsobj = ds->ds_object;
 	uint64_t refs;
 	int error;
 
 	mutex_enter(&ds->ds_lock);
 	ds->ds_userrefs--;
 	refs = ds->ds_userrefs;
 	mutex_exit(&ds->ds_lock);
 	error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx);
 	VERIFY(error == 0 || error == ENOENT);
 	zapobj = ds->ds_phys->ds_userrefs_obj;
 	VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
-
-	spa_history_log_internal(LOG_DS_USER_RELEASE,
-	    dp->dp_spa, tx, "<%s> %lld dataset = %llu",
-	    ra->htag, (longlong_t)refs, dsobj);
-
 	if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
 	    DS_IS_DEFER_DESTROY(ds)) {
 		struct dsl_ds_destroyarg dsda = {0};
 
 		ASSERT(ra->own);
 		dsda.ds = ds;
 		dsda.releasing = B_TRUE;
 		/* We already did the destroy_check */
 		dsl_dataset_destroy_sync(&dsda, tag, tx);
 	}
 }
 
 static int
 dsl_dataset_user_release_one(const char *dsname, void *arg)
 {
 	struct dsl_ds_holdarg *ha = arg;
 	struct dsl_ds_releasearg *ra;
 	dsl_dataset_t *ds;
 	int error;
 	void *dtag = ha->dstg;
 	char *name;
 	boolean_t own = B_FALSE;
 	boolean_t might_destroy;
 
 	/* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
 	name = kmem_asprintf("%s@%s", dsname, ha->snapname);
 	error = dsl_dataset_hold(name, dtag, &ds);
 	strfree(name);
 	if (error == ENOENT && ha->recursive)
 		return (0);
 	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
 	if (error)
 		return (error);
 
 	ha->gotone = B_TRUE;
 
 	ASSERT(dsl_dataset_is_snapshot(ds));
 
 	error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
 	if (error) {
 		dsl_dataset_rele(ds, dtag);
 		return (error);
 	}
 
 	if (might_destroy) {
 #ifdef _KERNEL
 		name = kmem_asprintf("%s@%s", dsname, ha->snapname);
 		error = zfs_unmount_snap(name, NULL);
 		strfree(name);
 		if (error) {
 			dsl_dataset_rele(ds, dtag);
 			return (error);
 		}
 #endif
 		if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
 			dsl_dataset_rele(ds, dtag);
 			return (EBUSY);
 		} else {
 			own = B_TRUE;
 			dsl_dataset_make_exclusive(ds, dtag);
 		}
 	}
 
 	ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
 	ra->ds = ds;
 	ra->htag = ha->htag;
 	ra->own = own;
 	dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
 	    dsl_dataset_user_release_sync, ra, dtag, 0);
 
 	return (0);
 }
 
 int
 dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
     boolean_t recursive)
 {
 	struct dsl_ds_holdarg *ha;
 	dsl_sync_task_t *dst;
 	spa_t *spa;
 	int error;
 
 top:
 	ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
 
 	(void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
 
 	error = spa_open(dsname, &spa, FTAG);
 	if (error) {
 		kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 		return (error);
 	}
 
 	ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 	ha->htag = htag;
 	ha->snapname = snapname;
 	ha->recursive = recursive;
 	if (recursive) {
 		error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
 		    ha, DS_FIND_CHILDREN);
 	} else {
 		error = dsl_dataset_user_release_one(dsname, ha);
 	}
 	if (error == 0)
 		error = dsl_sync_task_group_wait(ha->dstg);
 
 	for (dst = list_head(&ha->dstg->dstg_tasks); dst;
 	    dst = list_next(&ha->dstg->dstg_tasks, dst)) {
 		struct dsl_ds_releasearg *ra = dst->dst_arg1;
 		dsl_dataset_t *ds = ra->ds;
 
 		if (dst->dst_err)
 			dsl_dataset_name(ds, ha->failed);
 
 		if (ra->own)
 			dsl_dataset_disown(ds, ha->dstg);
 		else
 			dsl_dataset_rele(ds, ha->dstg);
 
 		kmem_free(ra, sizeof (struct dsl_ds_releasearg));
 	}
 
 	if (error == 0 && recursive && !ha->gotone)
 		error = ENOENT;
 
 	if (error && error != EBUSY)
 		(void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
 
 	dsl_sync_task_group_destroy(ha->dstg);
 	kmem_free(ha, sizeof (struct dsl_ds_holdarg));
 	spa_close(spa, FTAG);
 
 	/*
 	 * We can get EBUSY if we were racing with deferred destroy and
 	 * dsl_dataset_user_release_check() hadn't done the necessary
 	 * open context setup.  We can also get EBUSY if we're racing
 	 * with destroy and that thread is the ds_owner.  Either way
 	 * the busy condition should be transient, and we should retry
 	 * the release operation.
 	 */
 	if (error == EBUSY)
 		goto top;
 
 	return (error);
 }
 
 /*
  * Called at spa_load time (with retry == B_FALSE) to release a stale
  * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
  */
 int
 dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
     boolean_t retry)
 {
 	dsl_dataset_t *ds;
 	char *snap;
 	char *name;
 	int namelen;
 	int error;
 
 	do {
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 		rw_exit(&dp->dp_config_rwlock);
 		if (error)
 			return (error);
 		namelen = dsl_dataset_namelen(ds)+1;
 		name = kmem_alloc(namelen, KM_SLEEP);
 		dsl_dataset_name(ds, name);
 		dsl_dataset_rele(ds, FTAG);
 
 		snap = strchr(name, '@');
 		*snap = '\0';
 		++snap;
 		error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
 		kmem_free(name, namelen);
 
 		/*
 		 * The object can't have been destroyed because we have a hold,
 		 * but it might have been renamed, resulting in ENOENT.  Retry
 		 * if we've been requested to do so.
 		 *
 		 * It would be nice if we could use the dsobj all the way
 		 * through and avoid ENOENT entirely.  But we might need to
 		 * unmount the snapshot, and there's currently no way to lookup
 		 * a vfsp using a ZFS object id.
 		 */
 	} while ((error == ENOENT) && retry);
 
 	return (error);
 }
 
 int
 dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
 {
 	dsl_dataset_t *ds;
 	int err;
 
 	err = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
 	if (ds->ds_phys->ds_userrefs_obj != 0) {
 		zap_attribute_t *za;
 		zap_cursor_t zc;
 
 		za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 		for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
 		    ds->ds_phys->ds_userrefs_obj);
 		    zap_cursor_retrieve(&zc, za) == 0;
 		    zap_cursor_advance(&zc)) {
 			VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
 			    za->za_first_integer));
 		}
 		zap_cursor_fini(&zc);
 		kmem_free(za, sizeof (zap_attribute_t));
 	}
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 /*
  * Note, this function is used as the callback for dmu_objset_find().  We
  * always return 0 so that we will continue to find and process
  * inconsistent datasets, even if we encounter an error trying to
  * process one of them.
  */
 /* ARGSUSED */
 int
 dsl_destroy_inconsistent(const char *dsname, void *arg)
 {
 	dsl_dataset_t *ds;
 
 	if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
 		if (DS_IS_INCONSISTENT(ds))
 			(void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
 		else
 			dsl_dataset_disown(ds, FTAG);
 	}
 	return (0);
 }
 
 
 /*
  * Return (in *usedp) the amount of space written in new that is not
  * present in oldsnap.  New may be a snapshot or the head.  Old must be
  * a snapshot before new, in new's filesystem (or its origin).  If not then
  * fail and return EINVAL.
  *
  * The written space is calculated by considering two components:  First, we
  * ignore any freed space, and calculate the written as new's used space
  * minus old's used space.  Next, we add in the amount of space that was freed
  * between the two snapshots, thus reducing new's used space relative to old's.
  * Specifically, this is the space that was born before old->ds_creation_txg,
  * and freed before new (ie. on new's deadlist or a previous deadlist).
  *
  * space freed                         [---------------------]
  * snapshots                       ---O-------O--------O-------O------
  *                                         oldsnap            new
  */
 int
 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
 {
 	int err = 0;
 	uint64_t snapobj;
 	dsl_pool_t *dp = new->ds_dir->dd_pool;
 
 	*usedp = 0;
 	*usedp += new->ds_phys->ds_referenced_bytes;
 	*usedp -= oldsnap->ds_phys->ds_referenced_bytes;
 
 	*compp = 0;
 	*compp += new->ds_phys->ds_compressed_bytes;
 	*compp -= oldsnap->ds_phys->ds_compressed_bytes;
 
 	*uncompp = 0;
 	*uncompp += new->ds_phys->ds_uncompressed_bytes;
 	*uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	snapobj = new->ds_object;
 	while (snapobj != oldsnap->ds_object) {
 		dsl_dataset_t *snap;
 		uint64_t used, comp, uncomp;
 
 		if (snapobj == new->ds_object) {
 			snap = new;
 		} else {
 			err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
 			if (err != 0)
 				break;
 		}
 
 		if (snap->ds_phys->ds_prev_snap_txg ==
 		    oldsnap->ds_phys->ds_creation_txg) {
 			/*
 			 * The blocks in the deadlist can not be born after
 			 * ds_prev_snap_txg, so get the whole deadlist space,
 			 * which is more efficient (especially for old-format
 			 * deadlists).  Unfortunately the deadlist code
 			 * doesn't have enough information to make this
 			 * optimization itself.
 			 */
 			dsl_deadlist_space(&snap->ds_deadlist,
 			    &used, &comp, &uncomp);
 		} else {
 			dsl_deadlist_space_range(&snap->ds_deadlist,
 			    0, oldsnap->ds_phys->ds_creation_txg,
 			    &used, &comp, &uncomp);
 		}
 		*usedp += used;
 		*compp += comp;
 		*uncompp += uncomp;
 
 		/*
 		 * If we get to the beginning of the chain of snapshots
 		 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
 		 * was not a snapshot of/before new.
 		 */
 		snapobj = snap->ds_phys->ds_prev_snap_obj;
 		if (snap != new)
 			dsl_dataset_rele(snap, FTAG);
 		if (snapobj == 0) {
 			err = EINVAL;
 			break;
 		}
 
 	}
 	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
 
 /*
  * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
  * lastsnap, and all snapshots in between are deleted.
  *
  * blocks that would be freed            [---------------------------]
  * snapshots                       ---O-------O--------O-------O--------O
  *                                        firstsnap        lastsnap
  *
  * This is the set of blocks that were born after the snap before firstsnap,
  * (birth > firstsnap->prev_snap_txg) and died before the snap after the
  * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
  * We calculate this by iterating over the relevant deadlists (from the snap
  * after lastsnap, backward to the snap after firstsnap), summing up the
  * space on the deadlist that was born after the snap before firstsnap.
  */
 int
 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
     dsl_dataset_t *lastsnap,
     uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
 {
 	int err = 0;
 	uint64_t snapobj;
 	dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
 
 	ASSERT(dsl_dataset_is_snapshot(firstsnap));
 	ASSERT(dsl_dataset_is_snapshot(lastsnap));
 
 	/*
 	 * Check that the snapshots are in the same dsl_dir, and firstsnap
 	 * is before lastsnap.
 	 */
 	if (firstsnap->ds_dir != lastsnap->ds_dir ||
 	    firstsnap->ds_phys->ds_creation_txg >
 	    lastsnap->ds_phys->ds_creation_txg)
 		return (EINVAL);
 
 	*usedp = *compp = *uncompp = 0;
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	snapobj = lastsnap->ds_phys->ds_next_snap_obj;
 	while (snapobj != firstsnap->ds_object) {
 		dsl_dataset_t *ds;
 		uint64_t used, comp, uncomp;
 
 		err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
 		if (err != 0)
 			break;
 
 		dsl_deadlist_space_range(&ds->ds_deadlist,
 		    firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 		    &used, &comp, &uncomp);
 		*usedp += used;
 		*compp += comp;
 		*uncompp += uncomp;
 
 		snapobj = ds->ds_phys->ds_prev_snap_obj;
 		ASSERT3U(snapobj, !=, 0);
 		dsl_dataset_rele(ds, FTAG);
 	}
 	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dmu_snapshots_destroy_nvl);
 EXPORT_SYMBOL(dsl_dataset_hold);
 EXPORT_SYMBOL(dsl_dataset_hold_obj);
 EXPORT_SYMBOL(dsl_dataset_own);
 EXPORT_SYMBOL(dsl_dataset_own_obj);
 EXPORT_SYMBOL(dsl_dataset_name);
 EXPORT_SYMBOL(dsl_dataset_rele);
 EXPORT_SYMBOL(dsl_dataset_disown);
 EXPORT_SYMBOL(dsl_dataset_drop_ref);
 EXPORT_SYMBOL(dsl_dataset_tryown);
 EXPORT_SYMBOL(dsl_dataset_make_exclusive);
 EXPORT_SYMBOL(dsl_dataset_create_sync);
 EXPORT_SYMBOL(dsl_dataset_create_sync_dd);
 EXPORT_SYMBOL(dsl_dataset_destroy);
 EXPORT_SYMBOL(dsl_dataset_destroy_check);
 EXPORT_SYMBOL(dsl_dataset_destroy_sync);
 EXPORT_SYMBOL(dsl_dataset_snapshot_check);
 EXPORT_SYMBOL(dsl_dataset_snapshot_sync);
 EXPORT_SYMBOL(dsl_dataset_rename);
 EXPORT_SYMBOL(dsl_dataset_promote);
 EXPORT_SYMBOL(dsl_dataset_clone_swap);
 EXPORT_SYMBOL(dsl_dataset_user_hold);
 EXPORT_SYMBOL(dsl_dataset_user_release);
 EXPORT_SYMBOL(dsl_dataset_user_release_tmp);
 EXPORT_SYMBOL(dsl_dataset_get_holds);
 EXPORT_SYMBOL(dsl_dataset_get_blkptr);
 EXPORT_SYMBOL(dsl_dataset_set_blkptr);
 EXPORT_SYMBOL(dsl_dataset_get_spa);
 EXPORT_SYMBOL(dsl_dataset_modified_since_lastsnap);
 EXPORT_SYMBOL(dsl_dataset_space_written);
 EXPORT_SYMBOL(dsl_dataset_space_wouldfree);
 EXPORT_SYMBOL(dsl_dataset_sync);
 EXPORT_SYMBOL(dsl_dataset_block_born);
 EXPORT_SYMBOL(dsl_dataset_block_kill);
 EXPORT_SYMBOL(dsl_dataset_block_freeable);
 EXPORT_SYMBOL(dsl_dataset_prev_snap_txg);
 EXPORT_SYMBOL(dsl_dataset_dirty);
 EXPORT_SYMBOL(dsl_dataset_stats);
 EXPORT_SYMBOL(dsl_dataset_fast_stat);
 EXPORT_SYMBOL(dsl_dataset_space);
 EXPORT_SYMBOL(dsl_dataset_fsid_guid);
 EXPORT_SYMBOL(dsl_dsobj_to_dsname);
 EXPORT_SYMBOL(dsl_dataset_check_quota);
 EXPORT_SYMBOL(dsl_dataset_set_quota);
 EXPORT_SYMBOL(dsl_dataset_set_quota_sync);
 EXPORT_SYMBOL(dsl_dataset_set_reservation);
 EXPORT_SYMBOL(dsl_destroy_inconsistent);
 #endif
diff --git a/module/zfs/dsl_deleg.c b/module/zfs/dsl_deleg.c
index 294932c450bf..48c261e63806 100644
--- a/module/zfs/dsl_deleg.c
+++ b/module/zfs/dsl_deleg.c
@@ -1,761 +1,753 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
  * DSL permissions are stored in a two level zap attribute
  * mechanism.   The first level identifies the "class" of
  * entry.  The class is identified by the first 2 letters of
  * the attribute.  The second letter "l" or "d" identifies whether
  * it is a local or descendent permission.  The first letter
  * identifies the type of entry.
  *
  * ul$<id>    identifies permissions granted locally for this userid.
  * ud$<id>    identifies permissions granted on descendent datasets for
  *            this userid.
  * Ul$<id>    identifies permission sets granted locally for this userid.
  * Ud$<id>    identifies permission sets granted on descendent datasets for
  *            this userid.
  * gl$<id>    identifies permissions granted locally for this groupid.
  * gd$<id>    identifies permissions granted on descendent datasets for
  *            this groupid.
  * Gl$<id>    identifies permission sets granted locally for this groupid.
  * Gd$<id>    identifies permission sets granted on descendent datasets for
  *            this groupid.
  * el$        identifies permissions granted locally for everyone.
  * ed$        identifies permissions granted on descendent datasets
  *            for everyone.
  * El$        identifies permission sets granted locally for everyone.
  * Ed$        identifies permission sets granted to descendent datasets for
  *            everyone.
  * c-$        identifies permission to create at dataset creation time.
  * C-$        identifies permission sets to grant locally at dataset creation
  *            time.
  * s-$@<name> permissions defined in specified set @<name>
  * S-$@<name> Sets defined in named set @<name>
  *
  * Each of the above entities points to another zap attribute that contains one
  * attribute for each allowed permission, such as create, destroy,...
  * All of the "upper" case class types will specify permission set names
  * rather than permissions.
  *
  * Basically it looks something like this:
  * ul$12 -> ZAP OBJ -> permissions...
  *
  * The ZAP OBJ is referred to as the jump object.
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/fs/zfs.h>
 #include <sys/cred.h>
 #include <sys/sunddi.h>
 
 #include "zfs_deleg.h"
 
 /*
  * Validate that user is allowed to delegate specified permissions.
  *
  * In order to delegate "create" you must have "create"
  * and "allow".
  */
 int
 dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr)
 {
 	nvpair_t *whopair = NULL;
 	int error;
 
 	if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0)
 		return (error);
 
 	while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
 		nvlist_t *perms;
 		nvpair_t *permpair = NULL;
 
 		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
 
 		while ((permpair = nvlist_next_nvpair(perms, permpair))) {
 			const char *perm = nvpair_name(permpair);
 
 			if (strcmp(perm, ZFS_DELEG_PERM_ALLOW) == 0)
 				return (EPERM);
 
 			if ((error = dsl_deleg_access(ddname, perm, cr)) != 0)
 				return (error);
 		}
 	}
 	return (0);
 }
 
 /*
  * Validate that user is allowed to unallow specified permissions.  They
  * must have the 'allow' permission, and even then can only unallow
  * perms for their uid.
  */
 int
 dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
 {
 	nvpair_t *whopair = NULL;
 	int error;
 	char idstr[32];
 
 	if ((error = dsl_deleg_access(ddname, ZFS_DELEG_PERM_ALLOW, cr)) != 0)
 		return (error);
 
 	(void) snprintf(idstr, sizeof (idstr), "%lld",
 	    (longlong_t)crgetuid(cr));
 
 	while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
 		zfs_deleg_who_type_t type = nvpair_name(whopair)[0];
 
 		if (type != ZFS_DELEG_USER &&
 		    type != ZFS_DELEG_USER_SETS)
 			return (EPERM);
 
 		if (strcmp(idstr, &nvpair_name(whopair)[3]) != 0)
 			return (EPERM);
 	}
 	return (0);
 }
 
 static void
 dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	nvlist_t *nvp = arg2;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	nvpair_t *whopair = NULL;
 	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
 
 	if (zapobj == 0) {
 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
 		zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
 		    DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
 	}
 
 	while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
 		const char *whokey = nvpair_name(whopair);
 		nvlist_t *perms;
 		nvpair_t *permpair = NULL;
 		uint64_t jumpobj;
 
 		VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
 
 		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
 			jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
 			    zapobj, whokey, tx);
 		}
 
 		while ((permpair = nvlist_next_nvpair(perms, permpair))) {
 			const char *perm = nvpair_name(permpair);
 			uint64_t n = 0;
 
 			VERIFY(zap_update(mos, jumpobj,
 			    perm, 8, 1, &n, tx) == 0);
-			spa_history_log_internal(LOG_DS_PERM_UPDATE,
-			    dd->dd_pool->dp_spa, tx,
-			    "%s %s dataset = %llu", whokey, perm,
-			    dd->dd_phys->dd_head_dataset_obj);
+			spa_history_log_internal_dd(dd, "permission update", tx,
+			    "%s %s", whokey, perm);
 		}
 	}
 }
 
 static void
 dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	nvlist_t *nvp = arg2;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	nvpair_t *whopair = NULL;
 	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
 
 	if (zapobj == 0)
 		return;
 
 	while ((whopair = nvlist_next_nvpair(nvp, whopair))) {
 		const char *whokey = nvpair_name(whopair);
 		nvlist_t *perms;
 		nvpair_t *permpair = NULL;
 		uint64_t jumpobj;
 
 		if (nvpair_value_nvlist(whopair, &perms) != 0) {
 			if (zap_lookup(mos, zapobj, whokey, 8,
 			    1, &jumpobj) == 0) {
 				(void) zap_remove(mos, zapobj, whokey, tx);
 				VERIFY(0 == zap_destroy(mos, jumpobj, tx));
 			}
-			spa_history_log_internal(LOG_DS_PERM_WHO_REMOVE,
-			    dd->dd_pool->dp_spa, tx,
-			    "%s dataset = %llu", whokey,
-			    dd->dd_phys->dd_head_dataset_obj);
+			spa_history_log_internal_dd(dd, "permission who remove",
+			    tx, "%s", whokey);
 			continue;
 		}
 
 		if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0)
 			continue;
 
 		while ((permpair = nvlist_next_nvpair(perms, permpair))) {
 			const char *perm = nvpair_name(permpair);
 			uint64_t n = 0;
 
 			(void) zap_remove(mos, jumpobj, perm, tx);
 			if (zap_count(mos, jumpobj, &n) == 0 && n == 0) {
 				(void) zap_remove(mos, zapobj,
 				    whokey, tx);
 				VERIFY(0 == zap_destroy(mos,
 				    jumpobj, tx));
 			}
-			spa_history_log_internal(LOG_DS_PERM_REMOVE,
-			    dd->dd_pool->dp_spa, tx,
-			    "%s %s dataset = %llu", whokey, perm,
-			    dd->dd_phys->dd_head_dataset_obj);
+			spa_history_log_internal_dd(dd, "permission remove", tx,
+			    "%s %s", whokey, perm);
 		}
 	}
 }
 
 int
 dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
 {
 	dsl_dir_t *dd;
 	int error;
 	nvpair_t *whopair = NULL;
 	int blocks_modified = 0;
 
 	error = dsl_dir_open(ddname, FTAG, &dd, NULL);
 	if (error)
 		return (error);
 
 	if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) <
 	    SPA_VERSION_DELEGATED_PERMS) {
 		dsl_dir_close(dd, FTAG);
 		return (ENOTSUP);
 	}
 
 	while ((whopair = nvlist_next_nvpair(nvp, whopair)))
 		blocks_modified++;
 
 	error = dsl_sync_task_do(dd->dd_pool, NULL,
 	    unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
 	    dd, nvp, blocks_modified);
 	dsl_dir_close(dd, FTAG);
 
 	return (error);
 }
 
 /*
  * Find all 'allow' permissions from a given point and then continue
  * traversing up to the root.
  *
  * This function constructs an nvlist of nvlists.
  * each setpoint is an nvlist composed of an nvlist of an nvlist
  * of the individual * users/groups/everyone/create
  * permissions.
  *
  * The nvlist will look like this.
  *
  * { source fsname -> { whokeys { permissions,...}, ...}}
  *
  * The fsname nvpairs will be arranged in a bottom up order.  For example,
  * if we have the following structure a/b/c then the nvpairs for the fsnames
  * will be ordered a/b/c, a/b, a.
  */
 int
 dsl_deleg_get(const char *ddname, nvlist_t **nvp)
 {
 	dsl_dir_t *dd, *startdd;
 	dsl_pool_t *dp;
 	int error;
 	objset_t *mos;
 	zap_cursor_t *basezc, *zc;
 	zap_attribute_t *baseza, *za;
 	char *source;
 
 	error = dsl_dir_open(ddname, FTAG, &startdd, NULL);
 	if (error)
 		return (error);
 
 	dp = startdd->dd_pool;
 	mos = dp->dp_meta_objset;
 
 	zc = kmem_alloc(sizeof(zap_cursor_t), KM_SLEEP);
 	za = kmem_alloc(sizeof(zap_attribute_t), KM_SLEEP);
 	basezc = kmem_alloc(sizeof(zap_cursor_t), KM_SLEEP);
 	baseza = kmem_alloc(sizeof(zap_attribute_t), KM_SLEEP);
 	source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP);
 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
 		nvlist_t *sp_nvp;
 		uint64_t n;
 
 		if (dd->dd_phys->dd_deleg_zapobj &&
 		    (zap_count(mos, dd->dd_phys->dd_deleg_zapobj,
 		    &n) == 0) && n) {
 			VERIFY(nvlist_alloc(&sp_nvp,
 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		} else {
 			continue;
 		}
 
 		for (zap_cursor_init(basezc, mos,
 		    dd->dd_phys->dd_deleg_zapobj);
 		    zap_cursor_retrieve(basezc, baseza) == 0;
 		    zap_cursor_advance(basezc)) {
 			nvlist_t *perms_nvp;
 
 			ASSERT(baseza->za_integer_length == 8);
 			ASSERT(baseza->za_num_integers == 1);
 
 			VERIFY(nvlist_alloc(&perms_nvp,
 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 			for (zap_cursor_init(zc, mos, baseza->za_first_integer);
 			    zap_cursor_retrieve(zc, za) == 0;
 			    zap_cursor_advance(zc)) {
 				VERIFY(nvlist_add_boolean(perms_nvp,
 				    za->za_name) == 0);
 			}
 			zap_cursor_fini(zc);
 			VERIFY(nvlist_add_nvlist(sp_nvp, baseza->za_name,
 			    perms_nvp) == 0);
 			nvlist_free(perms_nvp);
 		}
 
 		zap_cursor_fini(basezc);
 
 		dsl_dir_name(dd, source);
 		VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0);
 		nvlist_free(sp_nvp);
 	}
 	rw_exit(&dp->dp_config_rwlock);
 
 	kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
 	kmem_free(baseza, sizeof(zap_attribute_t));
 	kmem_free(basezc, sizeof(zap_cursor_t));
 	kmem_free(za, sizeof(zap_attribute_t));
 	kmem_free(zc, sizeof(zap_cursor_t));
 
 	dsl_dir_close(startdd, FTAG);
 	return (0);
 }
 
 /*
  * Routines for dsl_deleg_access() -- access checking.
  */
 typedef struct perm_set {
 	avl_node_t	p_node;
 	boolean_t	p_matched;
 	char		p_setname[ZFS_MAX_DELEG_NAME];
 } perm_set_t;
 
 static int
 perm_set_compare(const void *arg1, const void *arg2)
 {
 	const perm_set_t *node1 = arg1;
 	const perm_set_t *node2 = arg2;
 	int val;
 
 	val = strcmp(node1->p_setname, node2->p_setname);
 	if (val == 0)
 		return (0);
 	return (val > 0 ? 1 : -1);
 }
 
 /*
  * Determine whether a specified permission exists.
  *
  * First the base attribute has to be retrieved.  i.e. ul$12
  * Once the base object has been retrieved the actual permission
  * is lookup up in the zap object the base object points to.
  *
  * Return 0 if permission exists, ENOENT if there is no whokey, EPERM if
  * there is no perm in that jumpobj.
  */
 static int
 dsl_check_access(objset_t *mos, uint64_t zapobj,
     char type, char checkflag, void *valp, const char *perm)
 {
 	int error;
 	uint64_t jumpobj, zero;
 	char whokey[ZFS_MAX_DELEG_NAME];
 
 	zfs_deleg_whokey(whokey, type, checkflag, valp);
 	error = zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj);
 	if (error == 0) {
 		error = zap_lookup(mos, jumpobj, perm, 8, 1, &zero);
 		if (error == ENOENT)
 			error = EPERM;
 	}
 	return (error);
 }
 
 /*
  * check a specified user/group for a requested permission
  */
 static int
 dsl_check_user_access(objset_t *mos, uint64_t zapobj, const char *perm,
     int checkflag, cred_t *cr)
 {
 	const	gid_t *gids;
 	int	ngids;
 	int	i;
 	uint64_t id;
 
 	/* check for user */
 	id = crgetuid(cr);
 	if (dsl_check_access(mos, zapobj,
 	    ZFS_DELEG_USER, checkflag, &id, perm) == 0)
 		return (0);
 
 	/* check for users primary group */
 	id = crgetgid(cr);
 	if (dsl_check_access(mos, zapobj,
 	    ZFS_DELEG_GROUP, checkflag, &id, perm) == 0)
 		return (0);
 
 	/* check for everyone entry */
 	id = -1;
 	if (dsl_check_access(mos, zapobj,
 	    ZFS_DELEG_EVERYONE, checkflag, &id, perm) == 0)
 		return (0);
 
 	/* check each supplemental group user is a member of */
 	ngids = crgetngroups(cr);
 	gids = crgetgroups(cr);
 	for (i = 0; i != ngids; i++) {
 		id = gids[i];
 		if (dsl_check_access(mos, zapobj,
 		    ZFS_DELEG_GROUP, checkflag, &id, perm) == 0)
 			return (0);
 	}
 
 	return (EPERM);
 }
 
 /*
  * Iterate over the sets specified in the specified zapobj
  * and load them into the permsets avl tree.
  */
 static int
 dsl_load_sets(objset_t *mos, uint64_t zapobj,
     char type, char checkflag, void *valp, avl_tree_t *avl)
 {
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	perm_set_t *permnode;
 	avl_index_t idx;
 	uint64_t jumpobj;
 	int error;
 	char whokey[ZFS_MAX_DELEG_NAME];
 
 	zfs_deleg_whokey(whokey, type, checkflag, valp);
 
 	error = zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj);
 	if (error != 0)
 		return (error);
 
 	for (zap_cursor_init(&zc, mos, jumpobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		permnode = kmem_alloc(sizeof (perm_set_t), KM_SLEEP);
 		(void) strlcpy(permnode->p_setname, za.za_name,
 		    sizeof (permnode->p_setname));
 		permnode->p_matched = B_FALSE;
 
 		if (avl_find(avl, permnode, &idx) == NULL) {
 			avl_insert(avl, permnode, idx);
 		} else {
 			kmem_free(permnode, sizeof (perm_set_t));
 		}
 	}
 	zap_cursor_fini(&zc);
 	return (0);
 }
 
 /*
  * Load all permissions user based on cred belongs to.
  */
 static void
 dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl,
     char checkflag, cred_t *cr)
 {
 	const	gid_t *gids;
 	int	ngids, i;
 	uint64_t id;
 
 	id = crgetuid(cr);
 	(void) dsl_load_sets(mos, zapobj,
 	    ZFS_DELEG_USER_SETS, checkflag, &id, avl);
 
 	id = crgetgid(cr);
 	(void) dsl_load_sets(mos, zapobj,
 	    ZFS_DELEG_GROUP_SETS, checkflag, &id, avl);
 
 	(void) dsl_load_sets(mos, zapobj,
 	    ZFS_DELEG_EVERYONE_SETS, checkflag, NULL, avl);
 
 	ngids = crgetngroups(cr);
 	gids = crgetgroups(cr);
 	for (i = 0; i != ngids; i++) {
 		id = gids[i];
 		(void) dsl_load_sets(mos, zapobj,
 		    ZFS_DELEG_GROUP_SETS, checkflag, &id, avl);
 	}
 }
 
 /*
- * Check if user has requested permission.  If descendent is set, must have
- * descendent perms.
+ * Check if user has requested permission.
  */
 int
-dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
-    cred_t *cr)
+dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
 {
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	void *cookie;
 	int	error;
 	char	checkflag;
 	objset_t *mos;
 	avl_tree_t permsets;
 	perm_set_t *setnode;
 
 	dp = ds->ds_dir->dd_pool;
 	mos = dp->dp_meta_objset;
 
 	if (dsl_delegation_on(mos) == B_FALSE)
 		return (ECANCELED);
 
 	if (spa_version(dmu_objset_spa(dp->dp_meta_objset)) <
 	    SPA_VERSION_DELEGATED_PERMS)
 		return (EPERM);
 
-	if (dsl_dataset_is_snapshot(ds) || descendent) {
+	if (dsl_dataset_is_snapshot(ds)) {
 		/*
 		 * Snapshots are treated as descendents only,
 		 * local permissions do not apply.
 		 */
 		checkflag = ZFS_DELEG_DESCENDENT;
 	} else {
 		checkflag = ZFS_DELEG_LOCAL;
 	}
 
 	avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
 	    offsetof(perm_set_t, p_node));
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
 	    checkflag = ZFS_DELEG_DESCENDENT) {
 		uint64_t zapobj;
 		boolean_t expanded;
 
 		/*
 		 * If not in global zone then make sure
 		 * the zoned property is set
 		 */
 		if (!INGLOBALZONE(curproc)) {
 			uint64_t zoned;
 
 			if (dsl_prop_get_dd(dd,
 			    zfs_prop_to_name(ZFS_PROP_ZONED),
 			    8, 1, &zoned, NULL, B_FALSE) != 0)
 				break;
 			if (!zoned)
 				break;
 		}
 		zapobj = dd->dd_phys->dd_deleg_zapobj;
 
 		if (zapobj == 0)
 			continue;
 
 		dsl_load_user_sets(mos, zapobj, &permsets, checkflag, cr);
 again:
 		expanded = B_FALSE;
 		for (setnode = avl_first(&permsets); setnode;
 		    setnode = AVL_NEXT(&permsets, setnode)) {
 			if (setnode->p_matched == B_TRUE)
 				continue;
 
 			/* See if this set directly grants this permission */
 			error = dsl_check_access(mos, zapobj,
 			    ZFS_DELEG_NAMED_SET, 0, setnode->p_setname, perm);
 			if (error == 0)
 				goto success;
 			if (error == EPERM)
 				setnode->p_matched = B_TRUE;
 
 			/* See if this set includes other sets */
 			error = dsl_load_sets(mos, zapobj,
 			    ZFS_DELEG_NAMED_SET_SETS, 0,
 			    setnode->p_setname, &permsets);
 			if (error == 0)
 				setnode->p_matched = expanded = B_TRUE;
 		}
 		/*
 		 * If we expanded any sets, that will define more sets,
 		 * which we need to check.
 		 */
 		if (expanded)
 			goto again;
 
 		error = dsl_check_user_access(mos, zapobj, perm, checkflag, cr);
 		if (error == 0)
 			goto success;
 	}
 	error = EPERM;
 success:
 	rw_exit(&dp->dp_config_rwlock);
 
 	cookie = NULL;
 	while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
 		kmem_free(setnode, sizeof (perm_set_t));
 
 	return (error);
 }
 
 int
 dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
 {
 	dsl_dataset_t *ds;
 	int error;
 
 	error = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (error)
 		return (error);
 
-	error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
+	error = dsl_deleg_access_impl(ds, perm, cr);
 	dsl_dataset_rele(ds, FTAG);
 
 	return (error);
 }
 
 /*
  * Other routines.
  */
 
 static void
 copy_create_perms(dsl_dir_t *dd, uint64_t pzapobj,
     boolean_t dosets, uint64_t uid, dmu_tx_t *tx)
 {
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	uint64_t jumpobj, pjumpobj;
 	uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	char whokey[ZFS_MAX_DELEG_NAME];
 
 	zfs_deleg_whokey(whokey,
 	    dosets ? ZFS_DELEG_CREATE_SETS : ZFS_DELEG_CREATE,
 	    ZFS_DELEG_LOCAL, NULL);
 	if (zap_lookup(mos, pzapobj, whokey, 8, 1, &pjumpobj) != 0)
 		return;
 
 	if (zapobj == 0) {
 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
 		zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
 		    DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
 	}
 
 	zfs_deleg_whokey(whokey,
 	    dosets ? ZFS_DELEG_USER_SETS : ZFS_DELEG_USER,
 	    ZFS_DELEG_LOCAL, &uid);
 	if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) == ENOENT) {
 		jumpobj = zap_create(mos, DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
 		VERIFY(zap_add(mos, zapobj, whokey, 8, 1, &jumpobj, tx) == 0);
 	}
 
 	for (zap_cursor_init(&zc, mos, pjumpobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		uint64_t zero = 0;
 		ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1);
 
 		VERIFY(zap_update(mos, jumpobj, za.za_name,
 		    8, 1, &zero, tx) == 0);
 	}
 	zap_cursor_fini(&zc);
 }
 
 /*
  * set all create time permission on new dataset.
  */
 void
 dsl_deleg_set_create_perms(dsl_dir_t *sdd, dmu_tx_t *tx, cred_t *cr)
 {
 	dsl_dir_t *dd;
 	uint64_t uid = crgetuid(cr);
 
 	if (spa_version(dmu_objset_spa(sdd->dd_pool->dp_meta_objset)) <
 	    SPA_VERSION_DELEGATED_PERMS)
 		return;
 
 	for (dd = sdd->dd_parent; dd != NULL; dd = dd->dd_parent) {
 		uint64_t pzapobj = dd->dd_phys->dd_deleg_zapobj;
 
 		if (pzapobj == 0)
 			continue;
 
 		copy_create_perms(sdd, pzapobj, B_FALSE, uid, tx);
 		copy_create_perms(sdd, pzapobj, B_TRUE, uid, tx);
 	}
 }
 
 int
 dsl_deleg_destroy(objset_t *mos, uint64_t zapobj, dmu_tx_t *tx)
 {
 	zap_cursor_t zc;
 	zap_attribute_t za;
 
 	if (zapobj == 0)
 		return (0);
 
 	for (zap_cursor_init(&zc, mos, zapobj);
 	    zap_cursor_retrieve(&zc, &za) == 0;
 	    zap_cursor_advance(&zc)) {
 		ASSERT(za.za_integer_length == 8 && za.za_num_integers == 1);
 		VERIFY(0 == zap_destroy(mos, za.za_first_integer, tx));
 	}
 	zap_cursor_fini(&zc);
 	VERIFY(0 == zap_destroy(mos, zapobj, tx));
 	return (0);
 }
 
 boolean_t
 dsl_delegation_on(objset_t *os)
 {
 	return (!!spa_delegation(os->os_spa));
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dsl_deleg_get);
 EXPORT_SYMBOL(dsl_deleg_set);
 #endif
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index 69f68c209a10..45c73c363e57 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -1,1418 +1,1429 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/metaslab.h>
 #include <sys/zap.h>
 #include <sys/zio.h>
 #include <sys/arc.h>
 #include <sys/sunddi.h>
 #include "zfs_namecheck.h"
 
 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
-static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
-
+static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd,
+    uint64_t value, dmu_tx_t *tx);
 
 /* ARGSUSED */
 static void
 dsl_dir_evict(dmu_buf_t *db, void *arg)
 {
 	dsl_dir_t *dd = arg;
 	ASSERTV(dsl_pool_t *dp = dd->dd_pool;)
 	int t;
 
 	for (t = 0; t < TXG_SIZE; t++) {
 		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
 		ASSERT(dd->dd_tempreserved[t] == 0);
 		ASSERT(dd->dd_space_towrite[t] == 0);
 	}
 
 	if (dd->dd_parent)
 		dsl_dir_close(dd->dd_parent, dd);
 
 	spa_close(dd->dd_pool->dp_spa, dd);
 
 	/*
 	 * The props callback list should have been cleaned up by
 	 * objset_evict().
 	 */
 	list_destroy(&dd->dd_prop_cbs);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 }
 
 int
 dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
     const char *tail, void *tag, dsl_dir_t **ddp)
 {
 	dmu_buf_t *dbuf;
 	dsl_dir_t *dd;
 	int err;
 
 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 	    dsl_pool_sync_context(dp));
 
 	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
 	if (err)
 		return (err);
 	dd = dmu_buf_get_user(dbuf);
 #ifdef ZFS_DEBUG
 	{
 		dmu_object_info_t doi;
 		dmu_object_info_from_db(dbuf, &doi);
 		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
 		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
 	}
 #endif
 	if (dd == NULL) {
 		dsl_dir_t *winner;
 
 		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_PUSHPAGE);
 		dd->dd_object = ddobj;
 		dd->dd_dbuf = dbuf;
 		dd->dd_pool = dp;
 		dd->dd_phys = dbuf->db_data;
 		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
 
 		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
 		    offsetof(dsl_prop_cb_record_t, cbr_node));
 
 		dsl_dir_snap_cmtime_update(dd);
 
 		if (dd->dd_phys->dd_parent_obj) {
 			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
 			    NULL, dd, &dd->dd_parent);
 			if (err)
 				goto errout;
 			if (tail) {
 #ifdef ZFS_DEBUG
 				uint64_t foundobj;
 
 				err = zap_lookup(dp->dp_meta_objset,
 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 				    tail, sizeof (foundobj), 1, &foundobj);
 				ASSERT(err || foundobj == ddobj);
 #endif
 				(void) strcpy(dd->dd_myname, tail);
 			} else {
 				err = zap_value_search(dp->dp_meta_objset,
 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 				    ddobj, 0, dd->dd_myname);
 			}
 			if (err)
 				goto errout;
 		} else {
 			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
 		}
 
 		if (dsl_dir_is_clone(dd)) {
 			dmu_buf_t *origin_bonus;
 			dsl_dataset_phys_t *origin_phys;
 
 			/*
 			 * We can't open the origin dataset, because
 			 * that would require opening this dsl_dir.
 			 * Just look at its phys directly instead.
 			 */
 			err = dmu_bonus_hold(dp->dp_meta_objset,
 			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
 			if (err)
 				goto errout;
 			origin_phys = origin_bonus->db_data;
 			dd->dd_origin_txg =
 			    origin_phys->ds_creation_txg;
 			dmu_buf_rele(origin_bonus, FTAG);
 		}
 
 		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
 		    dsl_dir_evict);
 		if (winner) {
 			if (dd->dd_parent)
 				dsl_dir_close(dd->dd_parent, dd);
 			mutex_destroy(&dd->dd_lock);
 			kmem_free(dd, sizeof (dsl_dir_t));
 			dd = winner;
 		} else {
 			spa_open_ref(dp->dp_spa, dd);
 		}
 	}
 
 	/*
 	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
 	 * holds on the spa.  We need the open-to-close holds because
 	 * otherwise the spa_refcnt wouldn't change when we open a
 	 * dir which the spa also has open, so we could incorrectly
 	 * think it was OK to unload/export/destroy the pool.  We need
 	 * the instantiate-to-evict hold because the dsl_dir_t has a
 	 * pointer to the dd_pool, which has a pointer to the spa_t.
 	 */
 	spa_open_ref(dp->dp_spa, tag);
 	ASSERT3P(dd->dd_pool, ==, dp);
 	ASSERT3U(dd->dd_object, ==, ddobj);
 	ASSERT3P(dd->dd_dbuf, ==, dbuf);
 	*ddp = dd;
 	return (0);
 
 errout:
 	if (dd->dd_parent)
 		dsl_dir_close(dd->dd_parent, dd);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 	dmu_buf_rele(dbuf, tag);
 	return (err);
 }
 
 void
 dsl_dir_close(dsl_dir_t *dd, void *tag)
 {
 	dprintf_dd(dd, "%s\n", "");
 	spa_close(dd->dd_pool->dp_spa, tag);
 	dmu_buf_rele(dd->dd_dbuf, tag);
 }
 
 /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
 void
 dsl_dir_name(dsl_dir_t *dd, char *buf)
 {
 	if (dd->dd_parent) {
 		dsl_dir_name(dd->dd_parent, buf);
 		(void) strcat(buf, "/");
 	} else {
 		buf[0] = '\0';
 	}
 	if (!MUTEX_HELD(&dd->dd_lock)) {
 		/*
 		 * recursive mutex so that we can use
 		 * dprintf_dd() with dd_lock held
 		 */
 		mutex_enter(&dd->dd_lock);
 		(void) strcat(buf, dd->dd_myname);
 		mutex_exit(&dd->dd_lock);
 	} else {
 		(void) strcat(buf, dd->dd_myname);
 	}
 }
 
 /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
 int
 dsl_dir_namelen(dsl_dir_t *dd)
 {
 	int result = 0;
 
 	if (dd->dd_parent) {
 		/* parent's name + 1 for the "/" */
 		result = dsl_dir_namelen(dd->dd_parent) + 1;
 	}
 
 	if (!MUTEX_HELD(&dd->dd_lock)) {
 		/* see dsl_dir_name */
 		mutex_enter(&dd->dd_lock);
 		result += strlen(dd->dd_myname);
 		mutex_exit(&dd->dd_lock);
 	} else {
 		result += strlen(dd->dd_myname);
 	}
 
 	return (result);
 }
 
 static int
 getcomponent(const char *path, char *component, const char **nextp)
 {
 	char *p;
 	if ((path == NULL) || (path[0] == '\0'))
 		return (ENOENT);
 	/* This would be a good place to reserve some namespace... */
 	p = strpbrk(path, "/@");
 	if (p && (p[1] == '/' || p[1] == '@')) {
 		/* two separators in a row */
 		return (EINVAL);
 	}
 	if (p == NULL || p == path) {
 		/*
 		 * if the first thing is an @ or /, it had better be an
 		 * @ and it had better not have any more ats or slashes,
 		 * and it had better have something after the @.
 		 */
 		if (p != NULL &&
 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
 			return (EINVAL);
 		if (strlen(path) >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strcpy(component, path);
 		p = NULL;
 	} else if (p[0] == '/') {
 		if (p-path >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strncpy(component, path, p - path);
 		component[p-path] = '\0';
 		p++;
 	} else if (p[0] == '@') {
 		/*
 		 * if the next separator is an @, there better not be
 		 * any more slashes.
 		 */
 		if (strchr(path, '/'))
 			return (EINVAL);
 		if (p-path >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strncpy(component, path, p - path);
 		component[p-path] = '\0';
 	} else {
 		ASSERT(!"invalid p");
 	}
 	*nextp = p;
 	return (0);
 }
 
 /*
  * same as dsl_dir_open, ignore the first component of name and use the
  * spa instead
  */
 int
 dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
     dsl_dir_t **ddp, const char **tailp)
 {
 	char *buf;
 	const char *next, *nextnext = NULL;
 	int err;
 	dsl_dir_t *dd;
 	dsl_pool_t *dp;
 	uint64_t ddobj;
 	int openedspa = FALSE;
 
 	dprintf("%s\n", name);
 
 	buf = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE);
 	err = getcomponent(name, buf, &next);
 	if (err)
 		goto error;
 	if (spa == NULL) {
 		err = spa_open(buf, &spa, FTAG);
 		if (err) {
 			dprintf("spa_open(%s) failed\n", buf);
 			goto error;
 		}
 		openedspa = TRUE;
 
 		/* XXX this assertion belongs in spa_open */
 		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
 	}
 
 	dp = spa_get_dsl(spa);
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
 	if (err) {
 		rw_exit(&dp->dp_config_rwlock);
 		if (openedspa)
 			spa_close(spa, FTAG);
 		goto error;
 	}
 
 	while (next != NULL) {
 		dsl_dir_t *child_ds;
 		err = getcomponent(next, buf, &nextnext);
 		if (err)
 			break;
 		ASSERT(next[0] != '\0');
 		if (next[0] == '@')
 			break;
 		dprintf("looking up %s in obj%lld\n",
 		    buf, dd->dd_phys->dd_child_dir_zapobj);
 
 		err = zap_lookup(dp->dp_meta_objset,
 		    dd->dd_phys->dd_child_dir_zapobj,
 		    buf, sizeof (ddobj), 1, &ddobj);
 		if (err) {
 			if (err == ENOENT)
 				err = 0;
 			break;
 		}
 
 		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
 		if (err)
 			break;
 		dsl_dir_close(dd, tag);
 		dd = child_ds;
 		next = nextnext;
 	}
 	rw_exit(&dp->dp_config_rwlock);
 
 	if (err) {
 		dsl_dir_close(dd, tag);
 		if (openedspa)
 			spa_close(spa, FTAG);
 		goto error;
 	}
 
 	/*
 	 * It's an error if there's more than one component left, or
 	 * tailp==NULL and there's any component left.
 	 */
 	if (next != NULL &&
 	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
 		/* bad path name */
 		dsl_dir_close(dd, tag);
 		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
 		err = ENOENT;
 	}
 	if (tailp)
 		*tailp = next;
 	if (openedspa)
 		spa_close(spa, FTAG);
 	*ddp = dd;
 error:
 	kmem_free(buf, MAXNAMELEN);
 	return (err);
 }
 
 /*
  * Return the dsl_dir_t, and possibly the last component which couldn't
  * be found in *tail.  Return NULL if the path is bogus, or if
  * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
  * means that the last component is a snapshot.
  */
 int
 dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
 {
 	return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
 }
 
 uint64_t
 dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
     dmu_tx_t *tx)
 {
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t ddobj;
 	dsl_dir_phys_t *ddphys;
 	dmu_buf_t *dbuf;
 
 	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
 	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
 	if (pds) {
 		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
 		    name, sizeof (uint64_t), 1, &ddobj, tx));
 	} else {
 		/* it's the root dir */
 		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
 	}
 	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	ddphys = dbuf->db_data;
 
 	ddphys->dd_creation_time = gethrestime_sec();
 	if (pds)
 		ddphys->dd_parent_obj = pds->dd_object;
 	ddphys->dd_props_zapobj = zap_create(mos,
 	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
 	ddphys->dd_child_dir_zapobj = zap_create(mos,
 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
 		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
 	dmu_buf_rele(dbuf, FTAG);
 
 	return (ddobj);
 }
 
 /* ARGSUSED */
 int
 dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
+	dsl_dir_t *dd = arg1;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err;
 	uint64_t count;
 
 	/*
 	 * There should be exactly two holds, both from
 	 * dsl_dataset_destroy: one on the dd directory, and one on its
 	 * head ds.  If there are more holds, then a concurrent thread is
 	 * performing a lookup inside this dir while we're trying to destroy
 	 * it.  To minimize this possibility, we perform this check only
 	 * in syncing context and fail the operation if we encounter
 	 * additional holds.  The dp_config_rwlock ensures that nobody else
 	 * opens it after we check.
 	 */
 	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
 		return (EBUSY);
 
 	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
 	if (err)
 		return (err);
 	if (count != 0)
 		return (EEXIST);
 
 	return (0);
 }
 
 void
 dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
+	dsl_dir_t *dd = arg1;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
-	dsl_prop_setarg_t psa;
-	uint64_t value = 0;
 	uint64_t obj;
 	dd_used_t t;
 
 	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
 
-	/* Remove our reservation. */
-	dsl_prop_setarg_init_uint64(&psa, "reservation",
-	    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
-	    &value);
-	psa.psa_effective_value = 0;	/* predict default value */
-
-	dsl_dir_set_reservation_sync(ds, &psa, tx);
+	/*
+	 * Remove our reservation. The impl() routine avoids setting the
+	 * actual property, which would require the (already destroyed) ds.
+	 */
+	dsl_dir_set_reservation_sync_impl(dd, 0, tx);
 
 	ASSERT0(dd->dd_phys->dd_used_bytes);
 	ASSERT0(dd->dd_phys->dd_reserved);
 	for (t = 0; t < DD_USED_NUM; t++)
 		ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
 
 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
 	VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
 	VERIFY(0 == zap_remove(mos,
 	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
 
 	obj = dd->dd_object;
 	dsl_dir_close(dd, tag);
 	VERIFY(0 == dmu_object_free(mos, obj, tx));
 }
 
 boolean_t
 dsl_dir_is_clone(dsl_dir_t *dd)
 {
 	return (dd->dd_phys->dd_origin_obj &&
 	    (dd->dd_pool->dp_origin_snap == NULL ||
 	    dd->dd_phys->dd_origin_obj !=
 	    dd->dd_pool->dp_origin_snap->ds_object));
 }
 
 void
 dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 {
 	mutex_enter(&dd->dd_lock);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
 	    dd->dd_phys->dd_used_bytes);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
 	    dd->dd_phys->dd_reserved);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
 	    dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
 	    (dd->dd_phys->dd_uncompressed_bytes * 100 /
 	    dd->dd_phys->dd_compressed_bytes));
 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
 		    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
 		    dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
 		    dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
 	}
 	mutex_exit(&dd->dd_lock);
 
 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 	if (dsl_dir_is_clone(dd)) {
 		dsl_dataset_t *ds;
 		char buf[MAXNAMELEN];
 
 		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
 		    dd->dd_phys->dd_origin_obj, FTAG, &ds));
 		dsl_dataset_name(ds, buf);
 		dsl_dataset_rele(ds, FTAG);
 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 	}
 	rw_exit(&dd->dd_pool->dp_config_rwlock);
 }
 
 void
 dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = dd->dd_pool;
 
 	ASSERT(dd->dd_phys);
 
 	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(dd->dd_dbuf, dd);
 	}
 }
 
 static int64_t
 parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
 {
 	uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
 	uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
 	return (new_accounted - old_accounted);
 }
 
 void
 dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	mutex_enter(&dd->dd_lock);
 	ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
 	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
 	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
 	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
 	mutex_exit(&dd->dd_lock);
 
 	/* release the hold from dsl_dir_dirty */
 	dmu_buf_rele(dd->dd_dbuf, dd);
 }
 
 static uint64_t
 dsl_dir_space_towrite(dsl_dir_t *dd)
 {
 	uint64_t space = 0;
 	int i;
 
 	ASSERT(MUTEX_HELD(&dd->dd_lock));
 
 	for (i = 0; i < TXG_SIZE; i++) {
 		space += dd->dd_space_towrite[i&TXG_MASK];
 		ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
 	}
 	return (space);
 }
 
 /*
  * How much space would dd have available if ancestor had delta applied
  * to it?  If ondiskonly is set, we're only interested in what's
  * on-disk, not estimated pending changes.
  */
 uint64_t
 dsl_dir_space_available(dsl_dir_t *dd,
     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
 {
 	uint64_t parentspace, myspace, quota, used;
 
 	/*
 	 * If there are no restrictions otherwise, assume we have
 	 * unlimited space available.
 	 */
 	quota = UINT64_MAX;
 	parentspace = UINT64_MAX;
 
 	if (dd->dd_parent != NULL) {
 		parentspace = dsl_dir_space_available(dd->dd_parent,
 		    ancestor, delta, ondiskonly);
 	}
 
 	mutex_enter(&dd->dd_lock);
 	if (dd->dd_phys->dd_quota != 0)
 		quota = dd->dd_phys->dd_quota;
 	used = dd->dd_phys->dd_used_bytes;
 	if (!ondiskonly)
 		used += dsl_dir_space_towrite(dd);
 
 	if (dd->dd_parent == NULL) {
 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
 		quota = MIN(quota, poolsize);
 	}
 
 	if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
 		/*
 		 * We have some space reserved, in addition to what our
 		 * parent gave us.
 		 */
 		parentspace += dd->dd_phys->dd_reserved - used;
 	}
 
 	if (dd == ancestor) {
 		ASSERT(delta <= 0);
 		ASSERT(used >= -delta);
 		used += delta;
 		if (parentspace != UINT64_MAX)
 			parentspace -= delta;
 	}
 
 	if (used > quota) {
 		/* over quota */
 		myspace = 0;
 	} else {
 		/*
 		 * the lesser of the space provided by our parent and
 		 * the space left in our quota
 		 */
 		myspace = MIN(parentspace, quota - used);
 	}
 
 	mutex_exit(&dd->dd_lock);
 
 	return (myspace);
 }
 
 struct tempreserve {
 	list_node_t tr_node;
 	dsl_pool_t *tr_dp;
 	dsl_dir_t *tr_ds;
 	uint64_t tr_size;
 };
 
 static int
 dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
     boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
     dmu_tx_t *tx, boolean_t first)
 {
 	uint64_t txg = tx->tx_txg;
 	uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
 	uint64_t deferred = 0;
 	struct tempreserve *tr;
 	int retval = EDQUOT;
 	int txgidx = txg & TXG_MASK;
 	int i;
 	uint64_t ref_rsrv = 0;
 
 	ASSERT3U(txg, !=, 0);
 	ASSERT3S(asize, >, 0);
 
 	mutex_enter(&dd->dd_lock);
 
 	/*
 	 * Check against the dsl_dir's quota.  We don't add in the delta
 	 * when checking for over-quota because they get one free hit.
 	 */
 	est_inflight = dsl_dir_space_towrite(dd);
 	for (i = 0; i < TXG_SIZE; i++)
 		est_inflight += dd->dd_tempreserved[i];
 	used_on_disk = dd->dd_phys->dd_used_bytes;
 
 	/*
 	 * On the first iteration, fetch the dataset's used-on-disk and
 	 * refreservation values. Also, if checkrefquota is set, test if
 	 * allocating this space would exceed the dataset's refquota.
 	 */
 	if (first && tx->tx_objset) {
 		int error;
 		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
 
 		error = dsl_dataset_check_quota(ds, checkrefquota,
 		    asize, est_inflight, &used_on_disk, &ref_rsrv);
 		if (error) {
 			mutex_exit(&dd->dd_lock);
 			return (error);
 		}
 	}
 
 	/*
 	 * If this transaction will result in a net free of space,
 	 * we want to let it through.
 	 */
 	if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
 		quota = UINT64_MAX;
 	else
 		quota = dd->dd_phys->dd_quota;
 
 	/*
 	 * Adjust the quota against the actual pool size at the root
 	 * minus any outstanding deferred frees.
 	 * To ensure that it's possible to remove files from a full
 	 * pool without inducing transient overcommits, we throttle
 	 * netfree transactions against a quota that is slightly larger,
 	 * but still within the pool's allocation slop.  In cases where
 	 * we're very close to full, this will allow a steady trickle of
 	 * removes to get through.
 	 */
 	if (dd->dd_parent == NULL) {
 		spa_t *spa = dd->dd_pool->dp_spa;
 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
 		deferred = metaslab_class_get_deferred(spa_normal_class(spa));
 		if (poolsize - deferred < quota) {
 			quota = poolsize - deferred;
 			retval = ENOSPC;
 		}
 	}
 
 	/*
 	 * If they are requesting more space, and our current estimate
 	 * is over quota, they get to try again unless the actual
 	 * on-disk is over quota and there are no pending changes (which
 	 * may free up space for us).
 	 */
 	if (used_on_disk + est_inflight >= quota) {
 		if (est_inflight > 0 || used_on_disk < quota ||
 		    (retval == ENOSPC && used_on_disk < quota + deferred))
 			retval = ERESTART;
 		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
 		    "quota=%lluK tr=%lluK err=%d\n",
 		    used_on_disk>>10, est_inflight>>10,
 		    quota>>10, asize>>10, retval);
 		mutex_exit(&dd->dd_lock);
 		return (retval);
 	}
 
 	/* We need to up our estimated delta before dropping dd_lock */
 	dd->dd_tempreserved[txgidx] += asize;
 
 	parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
 	    asize - ref_rsrv);
 	mutex_exit(&dd->dd_lock);
 
 	tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
 	tr->tr_ds = dd;
 	tr->tr_size = asize;
 	list_insert_tail(tr_list, tr);
 
 	/* see if it's OK with our parent */
 	if (dd->dd_parent && parent_rsrv) {
 		boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
 
 		return (dsl_dir_tempreserve_impl(dd->dd_parent,
 		    parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
 	} else {
 		return (0);
 	}
 }
 
 /*
  * Reserve space in this dsl_dir, to be used in this tx's txg.
  * After the space has been dirtied (and dsl_dir_willuse_space()
  * has been called), the reservation should be canceled, using
  * dsl_dir_tempreserve_clear().
  */
 int
 dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
     uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
 {
 	int err;
 	list_t *tr_list;
 
 	if (asize == 0) {
 		*tr_cookiep = NULL;
 		return (0);
 	}
 
 	tr_list = kmem_alloc(sizeof (list_t), KM_PUSHPAGE);
 	list_create(tr_list, sizeof (struct tempreserve),
 	    offsetof(struct tempreserve, tr_node));
 	ASSERT3S(asize, >, 0);
 	ASSERT3S(fsize, >=, 0);
 
 	err = arc_tempreserve_space(lsize, tx->tx_txg);
 	if (err == 0) {
 		struct tempreserve *tr;
 
 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
 		tr->tr_size = lsize;
 		list_insert_tail(tr_list, tr);
 
 		err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
 	} else {
 		if (err == EAGAIN) {
 			txg_delay(dd->dd_pool, tx->tx_txg, 1);
 			err = ERESTART;
 		}
 		dsl_pool_memory_pressure(dd->dd_pool);
 	}
 
 	if (err == 0) {
 		struct tempreserve *tr;
 
 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
 		tr->tr_dp = dd->dd_pool;
 		tr->tr_size = asize;
 		list_insert_tail(tr_list, tr);
 
 		err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
 		    FALSE, asize > usize, tr_list, tx, TRUE);
 	}
 
 	if (err)
 		dsl_dir_tempreserve_clear(tr_list, tx);
 	else
 		*tr_cookiep = tr_list;
 
 	return (err);
 }
 
 /*
  * Clear a temporary reservation that we previously made with
  * dsl_dir_tempreserve_space().
  */
 void
 dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
 {
 	int txgidx = tx->tx_txg & TXG_MASK;
 	list_t *tr_list = tr_cookie;
 	struct tempreserve *tr;
 
 	ASSERT3U(tx->tx_txg, !=, 0);
 
 	if (tr_cookie == NULL)
 		return;
 
 	while ((tr = list_head(tr_list))) {
 		if (tr->tr_dp) {
 			dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
 		} else if (tr->tr_ds) {
 			mutex_enter(&tr->tr_ds->dd_lock);
 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
 			    tr->tr_size);
 			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
 			mutex_exit(&tr->tr_ds->dd_lock);
 		} else {
 			arc_tempreserve_clear(tr->tr_size);
 		}
 		list_remove(tr_list, tr);
 		kmem_free(tr, sizeof (struct tempreserve));
 	}
 
 	kmem_free(tr_list, sizeof (list_t));
 }
 
 static void
 dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 {
 	int64_t parent_space;
 	uint64_t est_used;
 
 	mutex_enter(&dd->dd_lock);
 	if (space > 0)
 		dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
 
 	est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
 	parent_space = parent_delta(dd, est_used, space);
 	mutex_exit(&dd->dd_lock);
 
 	/* Make sure that we clean up dd_space_to* */
 	dsl_dir_dirty(dd, tx);
 
 	/* XXX this is potentially expensive and unnecessary... */
 	if (parent_space && dd->dd_parent)
 		dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
 }
 
 /*
  * Call in open context when we think we're going to write/free space,
  * eg. when dirtying data.  Be conservative (ie. OK to write less than
  * this or free more than this, but don't write more or free less).
  */
 void
 dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 {
 	dsl_pool_willuse_space(dd->dd_pool, space, tx);
 	dsl_dir_willuse_space_impl(dd, space, tx);
 }
 
 /* call from syncing context when we actually write/free space for this dd */
 void
 dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
 {
 	int64_t accounted_delta;
 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(type < DD_USED_NUM);
 
 	if (needlock)
 		mutex_enter(&dd->dd_lock);
 	accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
 	ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
 	ASSERT(compressed >= 0 ||
 	    dd->dd_phys->dd_compressed_bytes >= -compressed);
 	ASSERT(uncompressed >= 0 ||
 	    dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	dd->dd_phys->dd_used_bytes += used;
 	dd->dd_phys->dd_uncompressed_bytes += uncompressed;
 	dd->dd_phys->dd_compressed_bytes += compressed;
 
 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		ASSERT(used > 0 ||
 		    dd->dd_phys->dd_used_breakdown[type] >= -used);
 		dd->dd_phys->dd_used_breakdown[type] += used;
 #ifdef DEBUG
 		{
 			dd_used_t t;
 			uint64_t u = 0;
 			for (t = 0; t < DD_USED_NUM; t++)
 				u += dd->dd_phys->dd_used_breakdown[t];
 			ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
 		}
 #endif
 	}
 	if (needlock)
 		mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent != NULL) {
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
 		    accounted_delta, compressed, uncompressed, tx);
 		dsl_dir_transfer_space(dd->dd_parent,
 		    used - accounted_delta,
 		    DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
 	}
 }
 
 void
 dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
 {
 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(oldtype < DD_USED_NUM);
 	ASSERT(newtype < DD_USED_NUM);
 
 	if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
 		return;
 
 	if (needlock)
 		mutex_enter(&dd->dd_lock);
 	ASSERT(delta > 0 ?
 	    dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
 	    dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
 	ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
 	dd->dd_phys->dd_used_breakdown[newtype] += delta;
 	if (needlock)
 		mutex_exit(&dd->dd_lock);
 }
 
 static int
 dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_setarg_t *psa = arg2;
 	int err;
 	uint64_t towrite;
 
 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
 		return (err);
 
 	if (psa->psa_effective_value == 0)
 		return (0);
 
 	mutex_enter(&dd->dd_lock);
 	/*
 	 * If we are doing the preliminary check in open context, and
 	 * there are pending changes, then don't fail it, since the
 	 * pending changes could under-estimate the amount of space to be
 	 * freed up.
 	 */
 	towrite = dsl_dir_space_towrite(dd);
 	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
 	    (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
 	    psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
 		err = ENOSPC;
 	}
 	mutex_exit(&dd->dd_lock);
 	return (err);
 }
 
 extern dsl_syncfunc_t dsl_prop_set_sync;
 
 static void
 dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value = psa->psa_effective_value;
 
 	dsl_prop_set_sync(ds, psa, tx);
 	DSL_PROP_CHECK_PREDICTION(dd, psa);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	mutex_enter(&dd->dd_lock);
 	dd->dd_phys->dd_quota = effective_value;
 	mutex_exit(&dd->dd_lock);
+
+	spa_history_log_internal_dd(dd, "set quota", tx,
+	    "quota=%lld", (longlong_t)effective_value);
 }
 
 int
 dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
 {
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	dsl_prop_setarg_t psa;
 	int err;
 
 	dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
 
 	err = dsl_dataset_hold(ddname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
 	if (err) {
 		dsl_dataset_rele(ds, FTAG);
 		return (err);
 	}
 
 	ASSERT(ds->ds_dir == dd);
 
 	/*
 	 * If someone removes a file, then tries to set the quota, we want to
 	 * make sure the file freeing takes effect.
 	 */
 	txg_wait_open(dd->dd_pool, 0);
 
 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
 	    dsl_dir_set_quota_sync, ds, &psa, 0);
 
 	dsl_dir_close(dd, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 int
 dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_setarg_t *psa = arg2;
 	uint64_t effective_value;
 	uint64_t used, avail;
 	int err;
 
 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
 		return (err);
 
 	effective_value = psa->psa_effective_value;
 
 	/*
 	 * If we are doing the preliminary check in open context, the
 	 * space estimates may be inaccurate.
 	 */
 	if (!dmu_tx_is_syncing(tx))
 		return (0);
 
 	mutex_enter(&dd->dd_lock);
 	used = dd->dd_phys->dd_used_bytes;
 	mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent) {
 		avail = dsl_dir_space_available(dd->dd_parent,
 		    NULL, 0, FALSE);
 	} else {
 		avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
 	}
 
 	if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
 		uint64_t delta = MAX(used, effective_value) -
 		    MAX(used, dd->dd_phys->dd_reserved);
 
 		if (delta > avail)
 			return (ENOSPC);
 		if (dd->dd_phys->dd_quota > 0 &&
 		    effective_value > dd->dd_phys->dd_quota)
 			return (ENOSPC);
 	}
 
 	return (0);
 }
 
 static void
-dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
 {
-	dsl_dataset_t *ds = arg1;
-	dsl_dir_t *dd = ds->ds_dir;
-	dsl_prop_setarg_t *psa = arg2;
-	uint64_t effective_value = psa->psa_effective_value;
 	uint64_t used;
 	int64_t delta;
 
-	dsl_prop_set_sync(ds, psa, tx);
-	DSL_PROP_CHECK_PREDICTION(dd, psa);
-
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	mutex_enter(&dd->dd_lock);
 	used = dd->dd_phys->dd_used_bytes;
-	delta = MAX(used, effective_value) -
-	    MAX(used, dd->dd_phys->dd_reserved);
-	dd->dd_phys->dd_reserved = effective_value;
+	delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
+	dd->dd_phys->dd_reserved = value;
 
 	if (dd->dd_parent != NULL) {
 		/* Roll up this additional usage into our ancestors */
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
 		    delta, 0, 0, tx);
 	}
 	mutex_exit(&dd->dd_lock);
 }
 
+static void
+dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
+{
+	dsl_dataset_t *ds = arg1;
+	dsl_dir_t *dd = ds->ds_dir;
+	dsl_prop_setarg_t *psa = arg2;
+	uint64_t value = psa->psa_effective_value;
+
+	dsl_prop_set_sync(ds, psa, tx);
+	DSL_PROP_CHECK_PREDICTION(dd, psa);
+ 
+	dsl_dir_set_reservation_sync_impl(dd, value, tx);
+
+	spa_history_log_internal_dd(dd, "set reservation", tx,
+	    "reservation=%lld", (longlong_t)value);
+ }
+
 int
 dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
     uint64_t reservation)
 {
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	dsl_prop_setarg_t psa;
 	int err;
 
 	dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
 
 	err = dsl_dataset_hold(ddname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
 	if (err) {
 		dsl_dataset_rele(ds, FTAG);
 		return (err);
 	}
 
 	ASSERT(ds->ds_dir == dd);
 
 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
 	    dsl_dir_set_reservation_sync, ds, &psa, 0);
 
 	dsl_dir_close(dd, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 static dsl_dir_t *
 closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
 {
 	for (; ds1; ds1 = ds1->dd_parent) {
 		dsl_dir_t *dd;
 		for (dd = ds2; dd; dd = dd->dd_parent) {
 			if (ds1 == dd)
 				return (dd);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * If delta is applied to dd, how much of that delta would be applied to
  * ancestor?  Syncing context only.
  */
 static int64_t
 would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
 {
 	if (dd == ancestor)
 		return (delta);
 
 	mutex_enter(&dd->dd_lock);
 	delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
 	mutex_exit(&dd->dd_lock);
 	return (would_change(dd->dd_parent, delta, ancestor));
 }
 
 struct renamearg {
 	dsl_dir_t *newparent;
 	const char *mynewname;
 };
 
 static int
 dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct renamearg *ra = arg2;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err;
 	uint64_t val;
 
 	/*
 	 * There should only be one reference, from dmu_objset_rename().
 	 * Fleeting holds are also possible (eg, from "zfs list" getting
 	 * stats), but any that are present in open context will likely
 	 * be gone by syncing context, so only fail from syncing
 	 * context.
 	 */
 	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
 		return (EBUSY);
 
 	/* check for existing name */
 	err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
 	    ra->mynewname, 8, 1, &val);
 	if (err == 0)
 		return (EEXIST);
 	if (err != ENOENT)
 		return (err);
 
 	if (ra->newparent != dd->dd_parent) {
 		/* is there enough space? */
 		uint64_t myspace =
 		    MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
 
 		/* no rename into our descendant */
 		if (closest_common_ancestor(dd, ra->newparent) == dd)
 			return (EINVAL);
 
 		if ((err = dsl_dir_transfer_possible(dd->dd_parent,
 		    ra->newparent, myspace)))
 			return (err);
 	}
 
 	return (0);
 }
 
 static void
 dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dir_t *dd = arg1;
 	struct renamearg *ra = arg2;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err;
+	char namebuf[MAXNAMELEN];
 
 	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
 
+	/* Log this before we change the name. */
+	dsl_dir_name(ra->newparent, namebuf);
+	spa_history_log_internal_dd(dd, "rename", tx,
+	    "-> %s/%s", namebuf, ra->mynewname);
+
 	if (ra->newparent != dd->dd_parent) {
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
 		    -dd->dd_phys->dd_used_bytes,
 		    -dd->dd_phys->dd_compressed_bytes,
 		    -dd->dd_phys->dd_uncompressed_bytes, tx);
 		dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
 		    dd->dd_phys->dd_used_bytes,
 		    dd->dd_phys->dd_compressed_bytes,
 		    dd->dd_phys->dd_uncompressed_bytes, tx);
 
 		if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
 			uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
 			    dd->dd_phys->dd_used_bytes;
 
 			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
 			    -unused_rsrv, 0, 0, tx);
 			dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
 			    unused_rsrv, 0, 0, tx);
 		}
 	}
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	/* remove from old parent zapobj */
 	err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
 	    dd->dd_myname, tx);
 	ASSERT0(err);
 
 	(void) strcpy(dd->dd_myname, ra->mynewname);
 	dsl_dir_close(dd->dd_parent, dd);
 	dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
 	VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
 	    ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
 
 	/* add to new parent zapobj */
 	err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
 	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
 	ASSERT0(err);
 
-	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
-	    tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
 }
 
 int
 dsl_dir_rename(dsl_dir_t *dd, const char *newname)
 {
 	struct renamearg ra;
 	int err;
 
 	/* new parent should exist */
 	err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
 	if (err)
 		return (err);
 
 	/* can't rename to different pool */
 	if (dd->dd_pool != ra.newparent->dd_pool) {
 		err = ENXIO;
 		goto out;
 	}
 
 	/* new name should not already exist */
 	if (ra.mynewname == NULL) {
 		err = EEXIST;
 		goto out;
 	}
 
 	err = dsl_sync_task_do(dd->dd_pool,
 	    dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
 
 out:
 	dsl_dir_close(ra.newparent, FTAG);
 	return (err);
 }
 
 int
 dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
 {
 	dsl_dir_t *ancestor;
 	int64_t adelta;
 	uint64_t avail;
 
 	ancestor = closest_common_ancestor(sdd, tdd);
 	adelta = would_change(sdd, -space, ancestor);
 	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
 	if (avail < space)
 		return (ENOSPC);
 
 	return (0);
 }
 
 timestruc_t
 dsl_dir_snap_cmtime(dsl_dir_t *dd)
 {
 	timestruc_t t;
 
 	mutex_enter(&dd->dd_lock);
 	t = dd->dd_snap_cmtime;
 	mutex_exit(&dd->dd_lock);
 
 	return (t);
 }
 
 void
 dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
 {
 	timestruc_t t;
 
 	gethrestime(&t);
 	mutex_enter(&dd->dd_lock);
 	dd->dd_snap_cmtime = t;
 	mutex_exit(&dd->dd_lock);
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dsl_dir_set_quota);
 EXPORT_SYMBOL(dsl_dir_set_reservation);
 EXPORT_SYMBOL(dsl_dir_open);
 EXPORT_SYMBOL(dsl_dir_close);
 #endif
diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c
index e44a94853929..153420ccf5f1 100644
--- a/module/zfs/dsl_prop.c
+++ b/module/zfs/dsl_prop.c
@@ -1,1170 +1,1151 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/fs/zfs.h>
 
 #include "zfs_prop.h"
 
 #define	ZPROP_INHERIT_SUFFIX "$inherit"
 #define	ZPROP_RECVD_SUFFIX "$recvd"
 
 static int
 dodefault(const char *propname, int intsz, int numints, void *buf)
 {
 	zfs_prop_t prop;
 
 	/*
 	 * The setonce properties are read-only, BUT they still
 	 * have a default value that can be used as the initial
 	 * value.
 	 */
 	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL ||
 	    (zfs_prop_readonly(prop) && !zfs_prop_setonce(prop)))
 		return (ENOENT);
 
 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 		if (intsz != 1)
 			return (EOVERFLOW);
 		(void) strncpy(buf, zfs_prop_default_string(prop),
 		    numints);
 	} else {
 		if (intsz != 8 || numints < 1)
 			return (EOVERFLOW);
 
 		*(uint64_t *)buf = zfs_prop_default_numeric(prop);
 	}
 
 	return (0);
 }
 
 int
 dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
     int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot)
 {
 	int err = ENOENT;
 	dsl_dir_t *target = dd;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	zfs_prop_t prop;
 	boolean_t inheritable;
 	boolean_t inheriting = B_FALSE;
 	char *inheritstr;
 	char *recvdstr;
 
 	ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
 
 	if (setpoint)
 		setpoint[0] = '\0';
 
 	prop = zfs_name_to_prop(propname);
 	inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
 	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
 
 	/*
 	 * Note: dd may become NULL, therefore we shouldn't dereference it
 	 * after this loop.
 	 */
 	for (; dd != NULL; dd = dd->dd_parent) {
 		ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
 
 		if (dd != target || snapshot) {
 			if (!inheritable)
 				break;
 			inheriting = B_TRUE;
 		}
 
 		/* Check for a local value. */
 		err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj, propname,
 		    intsz, numints, buf);
 		if (err != ENOENT) {
 			if (setpoint != NULL && err == 0)
 				dsl_dir_name(dd, setpoint);
 			break;
 		}
 
 		/*
 		 * Skip the check for a received value if there is an explicit
 		 * inheritance entry.
 		 */
 		err = zap_contains(mos, dd->dd_phys->dd_props_zapobj,
 		    inheritstr);
 		if (err != 0 && err != ENOENT)
 			break;
 
 		if (err == ENOENT) {
 			/* Check for a received value. */
 			err = zap_lookup(mos, dd->dd_phys->dd_props_zapobj,
 			    recvdstr, intsz, numints, buf);
 			if (err != ENOENT) {
 				if (setpoint != NULL && err == 0) {
 					if (inheriting) {
 						dsl_dir_name(dd, setpoint);
 					} else {
 						(void) strcpy(setpoint,
 						    ZPROP_SOURCE_VAL_RECVD);
 					}
 				}
 				break;
 			}
 		}
 
 		/*
 		 * If we found an explicit inheritance entry, err is zero even
 		 * though we haven't yet found the value, so reinitializing err
 		 * at the end of the loop (instead of at the beginning) ensures
 		 * that err has a valid post-loop value.
 		 */
 		err = ENOENT;
 	}
 
 	if (err == ENOENT)
 		err = dodefault(propname, intsz, numints, buf);
 
 	strfree(inheritstr);
 	strfree(recvdstr);
 
 	return (err);
 }
 
 int
 dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
     int intsz, int numints, void *buf, char *setpoint)
 {
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	boolean_t inheritable;
 	boolean_t snapshot;
 	uint64_t zapobj;
 
 	ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock));
 	inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
 	snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds));
 	zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj);
 
 	if (zapobj != 0) {
 		objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 		int err;
 
 		ASSERT(snapshot);
 
 		/* Check for a local value. */
 		err = zap_lookup(mos, zapobj, propname, intsz, numints, buf);
 		if (err != ENOENT) {
 			if (setpoint != NULL && err == 0)
 				dsl_dataset_name(ds, setpoint);
 			return (err);
 		}
 
 		/*
 		 * Skip the check for a received value if there is an explicit
 		 * inheritance entry.
 		 */
 		if (inheritable) {
 			char *inheritstr = kmem_asprintf("%s%s", propname,
 			    ZPROP_INHERIT_SUFFIX);
 			err = zap_contains(mos, zapobj, inheritstr);
 			strfree(inheritstr);
 			if (err != 0 && err != ENOENT)
 				return (err);
 		}
 
 		if (err == ENOENT) {
 			/* Check for a received value. */
 			char *recvdstr = kmem_asprintf("%s%s", propname,
 			    ZPROP_RECVD_SUFFIX);
 			err = zap_lookup(mos, zapobj, recvdstr,
 			    intsz, numints, buf);
 			strfree(recvdstr);
 			if (err != ENOENT) {
 				if (setpoint != NULL && err == 0)
 					(void) strcpy(setpoint,
 					    ZPROP_SOURCE_VAL_RECVD);
 				return (err);
 			}
 		}
 	}
 
 	return (dsl_prop_get_dd(ds->ds_dir, propname,
 	    intsz, numints, buf, setpoint, snapshot));
 }
 
 /*
  * Register interest in the named property.  We'll call the callback
  * once to notify it of the current property value, and again each time
  * the property changes, until this callback is unregistered.
  *
  * Return 0 on success, errno if the prop is not an integer value.
  */
 int
 dsl_prop_register(dsl_dataset_t *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg)
 {
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_pool_t *dp = dd->dd_pool;
 	uint64_t value;
 	dsl_prop_cb_record_t *cbr;
 	int err;
 	int need_rwlock;
 
 	need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock);
 	if (need_rwlock)
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 
 	err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL);
 	if (err != 0) {
 		if (need_rwlock)
 			rw_exit(&dp->dp_config_rwlock);
 		return (err);
 	}
 
 	cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_PUSHPAGE);
 	cbr->cbr_ds = ds;
 	cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_PUSHPAGE);
 	(void) strcpy((char *)cbr->cbr_propname, propname);
 	cbr->cbr_func = callback;
 	cbr->cbr_arg = cbarg;
 	mutex_enter(&dd->dd_lock);
 	list_insert_head(&dd->dd_prop_cbs, cbr);
 	mutex_exit(&dd->dd_lock);
 
 	cbr->cbr_func(cbr->cbr_arg, value);
 
 	if (need_rwlock)
 		rw_exit(&dp->dp_config_rwlock);
 	return (0);
 }
 
 int
 dsl_prop_get(const char *dsname, const char *propname,
     int intsz, int numints, void *buf, char *setpoint)
 {
 	dsl_dataset_t *ds;
 	int err;
 
 	err = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 	err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint);
 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 /*
  * Get the current property value.  It may have changed by the time this
  * function returns, so it is NOT safe to follow up with
  * dsl_prop_register() and assume that the value has not changed in
  * between.
  *
  * Return 0 on success, ENOENT if ddname is invalid.
  */
 int
 dsl_prop_get_integer(const char *ddname, const char *propname,
     uint64_t *valuep, char *setpoint)
 {
 	return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
 }
 
 void
 dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
     zprop_source_t source, uint64_t *value)
 {
 	psa->psa_name = propname;
 	psa->psa_source = source;
 	psa->psa_intsz = 8;
 	psa->psa_numints = 1;
 	psa->psa_value = value;
 
 	psa->psa_effective_value = -1ULL;
 }
 
 /*
  * Predict the effective value of the given special property if it were set with
  * the given value and source. This is not a general purpose function. It exists
  * only to handle the special requirements of the quota and reservation
  * properties. The fact that these properties are non-inheritable greatly
  * simplifies the prediction logic.
  *
  * Returns 0 on success, a positive error code on failure, or -1 if called with
  * a property not handled by this function.
  */
 int
 dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
 {
 	const char *propname = psa->psa_name;
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	zprop_source_t source = psa->psa_source;
 	objset_t *mos;
 	uint64_t zapobj;
 	uint64_t version;
 	char *recvdstr;
 	int err = 0;
 
 	switch (prop) {
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_RESERVATION:
 	case ZFS_PROP_REFQUOTA:
 	case ZFS_PROP_REFRESERVATION:
 		break;
 	default:
 		return (-1);
 	}
 
 	mos = dd->dd_pool->dp_meta_objset;
 	zapobj = dd->dd_phys->dd_props_zapobj;
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
 
 	version = spa_version(dd->dd_pool->dp_spa);
 	if (version < SPA_VERSION_RECVD_PROPS) {
 		if (source & ZPROP_SRC_NONE)
 			source = ZPROP_SRC_NONE;
 		else if (source & ZPROP_SRC_RECEIVED)
 			source = ZPROP_SRC_LOCAL;
 	}
 
 	switch ((int)source) {
 	case ZPROP_SRC_NONE:
 		/* Revert to the received value, if any. */
 		err = zap_lookup(mos, zapobj, recvdstr, 8, 1,
 		    &psa->psa_effective_value);
 		if (err == ENOENT)
 			psa->psa_effective_value = 0;
 		break;
 	case ZPROP_SRC_LOCAL:
 		psa->psa_effective_value = *(uint64_t *)psa->psa_value;
 		break;
 	case ZPROP_SRC_RECEIVED:
 		/*
 		 * If there's no local setting, then the new received value will
 		 * be the effective value.
 		 */
 		err = zap_lookup(mos, zapobj, propname, 8, 1,
 		    &psa->psa_effective_value);
 		if (err == ENOENT)
 			psa->psa_effective_value = *(uint64_t *)psa->psa_value;
 		break;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
 		/*
 		 * We're clearing the received value, so the local setting (if
 		 * it exists) remains the effective value.
 		 */
 		err = zap_lookup(mos, zapobj, propname, 8, 1,
 		    &psa->psa_effective_value);
 		if (err == ENOENT)
 			psa->psa_effective_value = 0;
 		break;
 	default:
 		cmn_err(CE_PANIC, "unexpected property source: %d", source);
 	}
 
 	strfree(recvdstr);
 
 	if (err == ENOENT)
 		return (0);
 
 	return (err);
 }
 
 #ifdef	ZFS_DEBUG
 void
 dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
 {
 	zfs_prop_t prop = zfs_name_to_prop(psa->psa_name);
 	uint64_t intval;
 	char setpoint[MAXNAMELEN];
 	uint64_t version = spa_version(dd->dd_pool->dp_spa);
 	int err;
 
 	if (version < SPA_VERSION_RECVD_PROPS) {
 		switch (prop) {
 		case ZFS_PROP_QUOTA:
 		case ZFS_PROP_RESERVATION:
 			return;
 		default:
 			break;
 		}
 	}
 
 	err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval,
 	    setpoint, B_FALSE);
 	if (err == 0 && intval != psa->psa_effective_value) {
 		cmn_err(CE_PANIC, "%s property, source: %x, "
 		    "predicted effective value: %llu, "
 		    "actual effective value: %llu (setpoint: %s)",
 		    psa->psa_name, psa->psa_source,
 		    (unsigned long long)psa->psa_effective_value,
 		    (unsigned long long)intval, setpoint);
 	}
 }
 #endif
 
 /*
  * Unregister this callback.  Return 0 on success, ENOENT if ddname is
  * invalid, ENOMSG if no matching callback registered.
  */
 int
 dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg)
 {
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_cb_record_t *cbr;
 
 	mutex_enter(&dd->dd_lock);
 	for (cbr = list_head(&dd->dd_prop_cbs);
 	    cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
 		if (cbr->cbr_ds == ds &&
 		    cbr->cbr_func == callback &&
 		    cbr->cbr_arg == cbarg &&
 		    strcmp(cbr->cbr_propname, propname) == 0)
 			break;
 	}
 
 	if (cbr == NULL) {
 		mutex_exit(&dd->dd_lock);
 		return (ENOMSG);
 	}
 
 	list_remove(&dd->dd_prop_cbs, cbr);
 	mutex_exit(&dd->dd_lock);
 	kmem_free((void*)cbr->cbr_propname, strlen(cbr->cbr_propname)+1);
 	kmem_free(cbr, sizeof (dsl_prop_cb_record_t));
 
 	return (0);
 }
 
 /*
  * Return the number of callbacks that are registered for this dataset.
  */
 int
 dsl_prop_numcb(dsl_dataset_t *ds)
 {
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_cb_record_t *cbr;
 	int num = 0;
 
 	mutex_enter(&dd->dd_lock);
 	for (cbr = list_head(&dd->dd_prop_cbs);
 	    cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
 		if (cbr->cbr_ds == ds)
 			num++;
 	}
 	mutex_exit(&dd->dd_lock);
 
 	return (num);
 }
 
 static void
 dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
     const char *propname, uint64_t value, int first)
 {
 	dsl_dir_t *dd;
 	dsl_prop_cb_record_t *cbr;
 	objset_t *mos = dp->dp_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t *za;
 	int err;
 
 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
 	err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
 	if (err)
 		return;
 
 	if (!first) {
 		/*
 		 * If the prop is set here, then this change is not
 		 * being inherited here or below; stop the recursion.
 		 */
 		err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname);
 		if (err == 0) {
 			dsl_dir_close(dd, FTAG);
 			return;
 		}
 		ASSERT3U(err, ==, ENOENT);
 	}
 
 	mutex_enter(&dd->dd_lock);
 	for (cbr = list_head(&dd->dd_prop_cbs); cbr;
 	    cbr = list_next(&dd->dd_prop_cbs, cbr)) {
 		uint64_t propobj = cbr->cbr_ds->ds_phys->ds_props_obj;
 
 		if (strcmp(cbr->cbr_propname, propname) != 0)
 			continue;
 
 		/*
 		 * If the property is set on this ds, then it is not
 		 * inherited here; don't call the callback.
 		 */
 		if (propobj && 0 == zap_contains(mos, propobj, propname))
 			continue;
 
 		cbr->cbr_func(cbr->cbr_arg, value);
 	}
 	mutex_exit(&dd->dd_lock);
 
 	za = kmem_alloc(sizeof (zap_attribute_t), KM_PUSHPAGE);
 	for (zap_cursor_init(&zc, mos,
 	    dd->dd_phys->dd_child_dir_zapobj);
 	    zap_cursor_retrieve(&zc, za) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_prop_changed_notify(dp, za->za_first_integer,
 		    propname, value, FALSE);
 	}
 	kmem_free(za, sizeof (zap_attribute_t));
 	zap_cursor_fini(&zc);
 	dsl_dir_close(dd, FTAG);
 }
 
 void
 dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_prop_setarg_t *psa = arg2;
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t zapobj, intval, dummy;
 	int isint;
 	char valbuf[32];
 	char *valstr = NULL;
 	char *inheritstr;
 	char *recvdstr;
 	char *tbuf = NULL;
 	int err;
 	uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
 	const char *propname = psa->psa_name;
 	zprop_source_t source = psa->psa_source;
 
 	isint = (dodefault(propname, 8, 1, &intval) == 0);
 
 	if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
 		ASSERT(version >= SPA_VERSION_SNAP_PROPS);
 		if (ds->ds_phys->ds_props_obj == 0) {
 			dmu_buf_will_dirty(ds->ds_dbuf, tx);
 			ds->ds_phys->ds_props_obj =
 			    zap_create(mos,
 			    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
 		}
 		zapobj = ds->ds_phys->ds_props_obj;
 	} else {
 		zapobj = ds->ds_dir->dd_phys->dd_props_zapobj;
 	}
 
 	if (version < SPA_VERSION_RECVD_PROPS) {
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION)
 			return;
 
 		if (source & ZPROP_SRC_NONE)
 			source = ZPROP_SRC_NONE;
 		else if (source & ZPROP_SRC_RECEIVED)
 			source = ZPROP_SRC_LOCAL;
 	}
 
 	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
 
 	switch ((int)source) {
 	case ZPROP_SRC_NONE:
 		/*
 		 * revert to received value, if any (inherit -S)
 		 * - remove propname
 		 * - remove propname$inherit
 		 */
 		err = zap_remove(mos, zapobj, propname, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		break;
 	case ZPROP_SRC_LOCAL:
 		/*
 		 * remove propname$inherit
 		 * set propname -> value
 		 */
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		VERIFY(0 == zap_update(mos, zapobj, propname,
 		    psa->psa_intsz, psa->psa_numints, psa->psa_value, tx));
 		break;
 	case ZPROP_SRC_INHERITED:
 		/*
 		 * explicitly inherit
 		 * - remove propname
 		 * - set propname$inherit
 		 */
 		err = zap_remove(mos, zapobj, propname, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		if (version >= SPA_VERSION_RECVD_PROPS &&
 		    dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy,
 		    NULL) == 0) {
 			dummy = 0;
 			err = zap_update(mos, zapobj, inheritstr,
 			    8, 1, &dummy, tx);
 			ASSERT(err == 0);
 		}
 		break;
 	case ZPROP_SRC_RECEIVED:
 		/*
 		 * set propname$recvd -> value
 		 */
 		err = zap_update(mos, zapobj, recvdstr,
 		    psa->psa_intsz, psa->psa_numints, psa->psa_value, tx);
 		ASSERT(err == 0);
 		break;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
 		/*
 		 * clear local and received settings
 		 * - remove propname
 		 * - remove propname$inherit
 		 * - remove propname$recvd
 		 */
 		err = zap_remove(mos, zapobj, propname, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		/* FALLTHRU */
 	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
 		/*
 		 * remove propname$recvd
 		 */
 		err = zap_remove(mos, zapobj, recvdstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		break;
 	default:
 		cmn_err(CE_PANIC, "unexpected property source: %d", source);
 	}
 
 	strfree(inheritstr);
 	strfree(recvdstr);
 
 	if (isint) {
 		VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL));
 
 		if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
 			dsl_prop_cb_record_t *cbr;
 			/*
 			 * It's a snapshot; nothing can inherit this
 			 * property, so just look for callbacks on this
 			 * ds here.
 			 */
 			mutex_enter(&ds->ds_dir->dd_lock);
 			for (cbr = list_head(&ds->ds_dir->dd_prop_cbs); cbr;
 			    cbr = list_next(&ds->ds_dir->dd_prop_cbs, cbr)) {
 				if (cbr->cbr_ds == ds &&
 				    strcmp(cbr->cbr_propname, propname) == 0)
 					cbr->cbr_func(cbr->cbr_arg, intval);
 			}
 			mutex_exit(&ds->ds_dir->dd_lock);
 		} else {
 			dsl_prop_changed_notify(ds->ds_dir->dd_pool,
 			    ds->ds_dir->dd_object, propname, intval, TRUE);
 		}
 
 		(void) snprintf(valbuf, sizeof (valbuf),
 		    "%lld", (longlong_t)intval);
 		valstr = valbuf;
 	} else {
 		if (source == ZPROP_SRC_LOCAL) {
 			valstr = (char *)psa->psa_value;
 		} else {
 			tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_PUSHPAGE);
 			if (dsl_prop_get_ds(ds, propname, 1,
 			    ZAP_MAXVALUELEN, tbuf, NULL) == 0)
 				valstr = tbuf;
 		}
 	}
 
-	spa_history_log_internal((source == ZPROP_SRC_NONE ||
-	    source == ZPROP_SRC_INHERITED) ? LOG_DS_INHERIT :
-	    LOG_DS_PROPSET, ds->ds_dir->dd_pool->dp_spa, tx,
-	    "%s=%s dataset = %llu", propname,
-	    (valstr == NULL ? "" : valstr), ds->ds_object);
+	spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE ||
+	    source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx,
+	    "%s=%s", propname, (valstr == NULL ? "" : valstr));
 
 	if (tbuf != NULL)
 		kmem_free(tbuf, ZAP_MAXVALUELEN);
 }
 
 void
 dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_dataset_t *ds = arg1;
 	dsl_props_arg_t *pa = arg2;
 	nvlist_t *props = pa->pa_props;
 	dsl_prop_setarg_t psa;
 	nvpair_t *elem = NULL;
 
 	psa.psa_source = pa->pa_source;
 
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		nvpair_t *pair = elem;
 
 		psa.psa_name = nvpair_name(pair);
 
 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 			/*
 			 * dsl_prop_get_all_impl() returns properties in this
 			 * format.
 			 */
 			nvlist_t *attrs;
 			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 			VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 			    &pair) == 0);
 		}
 
 		if (nvpair_type(pair) == DATA_TYPE_STRING) {
 			VERIFY(nvpair_value_string(pair,
 			    (char **)&psa.psa_value) == 0);
 			psa.psa_intsz = 1;
 			psa.psa_numints = strlen(psa.psa_value) + 1;
 		} else {
 			uint64_t intval;
 			VERIFY(nvpair_value_uint64(pair, &intval) == 0);
 			psa.psa_intsz = sizeof (intval);
 			psa.psa_numints = 1;
 			psa.psa_value = &intval;
 		}
 		dsl_prop_set_sync(ds, &psa, tx);
 	}
 }
 
-void
-dsl_dir_prop_set_uint64_sync(dsl_dir_t *dd, const char *name, uint64_t val,
-    dmu_tx_t *tx)
-{
-	objset_t *mos = dd->dd_pool->dp_meta_objset;
-	uint64_t zapobj = dd->dd_phys->dd_props_zapobj;
-
-	ASSERT(dmu_tx_is_syncing(tx));
-
-	VERIFY(0 == zap_update(mos, zapobj, name, sizeof (val), 1, &val, tx));
-
-	dsl_prop_changed_notify(dd->dd_pool, dd->dd_object, name, val, TRUE);
-
-	spa_history_log_internal(LOG_DS_PROPSET, dd->dd_pool->dp_spa, tx,
-	    "%s=%llu dataset = %llu", name, (u_longlong_t)val,
-	    dd->dd_phys->dd_head_dataset_obj);
-}
-
 int
 dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,
     int intsz, int numints, const void *buf)
 {
 	dsl_dataset_t *ds;
 	uint64_t version;
 	int err;
 	dsl_prop_setarg_t psa;
 
 	/*
 	 * We must do these checks before we get to the syncfunc, since
 	 * it can't fail.
 	 */
 	if (strlen(propname) >= ZAP_MAXNAMELEN)
 		return (ENAMETOOLONG);
 
 	err = dsl_dataset_hold(dsname, FTAG, &ds);
 	if (err)
 		return (err);
 
 	version = spa_version(ds->ds_dir->dd_pool->dp_spa);
 	if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ?
 	    ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
 		dsl_dataset_rele(ds, FTAG);
 		return (E2BIG);
 	}
 	if (dsl_dataset_is_snapshot(ds) &&
 	    version < SPA_VERSION_SNAP_PROPS) {
 		dsl_dataset_rele(ds, FTAG);
 		return (ENOTSUP);
 	}
 
 	psa.psa_name = propname;
 	psa.psa_source = source;
 	psa.psa_intsz = intsz;
 	psa.psa_numints = numints;
 	psa.psa_value = buf;
 	psa.psa_effective_value = -1ULL;
 
 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    NULL, dsl_prop_set_sync, ds, &psa, 2);
 
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 int
 dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
 {
 	dsl_dataset_t *ds;
 	uint64_t version;
 	nvpair_t *elem = NULL;
 	dsl_props_arg_t pa;
 	int err;
 
 	if ((err = dsl_dataset_hold(dsname, FTAG, &ds)))
 		return (err);
 	/*
 	 * Do these checks before the syncfunc, since it can't fail.
 	 */
 	version = spa_version(ds->ds_dir->dd_pool->dp_spa);
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
 			dsl_dataset_rele(ds, FTAG);
 			return (ENAMETOOLONG);
 		}
 		if (nvpair_type(elem) == DATA_TYPE_STRING) {
 			char *valstr;
 			VERIFY(nvpair_value_string(elem, &valstr) == 0);
 			if (strlen(valstr) >= (version <
 			    SPA_VERSION_STMF_PROP ?
 			    ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
 				dsl_dataset_rele(ds, FTAG);
 				return (E2BIG);
 			}
 		}
 	}
 
 	if (dsl_dataset_is_snapshot(ds) &&
 	    version < SPA_VERSION_SNAP_PROPS) {
 		dsl_dataset_rele(ds, FTAG);
 		return (ENOTSUP);
 	}
 
 	pa.pa_props = props;
 	pa.pa_source = source;
 
 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
 	    NULL, dsl_props_set_sync, ds, &pa, 2);
 
 	dsl_dataset_rele(ds, FTAG);
 	return (err);
 }
 
 typedef enum dsl_prop_getflags {
 	DSL_PROP_GET_INHERITING = 0x1,	/* searching parent of target ds */
 	DSL_PROP_GET_SNAPSHOT = 0x2,	/* snapshot dataset */
 	DSL_PROP_GET_LOCAL = 0x4,	/* local properties */
 	DSL_PROP_GET_RECEIVED = 0x8	/* received properties */
 } dsl_prop_getflags_t;
 
 static int
 dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
     const char *setpoint, dsl_prop_getflags_t flags, nvlist_t *nv)
 {
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	int err = 0;
 
 	for (zap_cursor_init(&zc, mos, propobj);
 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		nvlist_t *propval;
 		zfs_prop_t prop;
 		char buf[ZAP_MAXNAMELEN];
 		char *valstr;
 		const char *suffix;
 		const char *propname;
 		const char *source;
 
 		suffix = strchr(za.za_name, '$');
 
 		if (suffix == NULL) {
 			/*
 			 * Skip local properties if we only want received
 			 * properties.
 			 */
 			if (flags & DSL_PROP_GET_RECEIVED)
 				continue;
 
 			propname = za.za_name;
 			source = setpoint;
 		} else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) {
 			/* Skip explicitly inherited entries. */
 			continue;
 		} else if (strcmp(suffix, ZPROP_RECVD_SUFFIX) == 0) {
 			if (flags & DSL_PROP_GET_LOCAL)
 				continue;
 
 			(void) strncpy(buf, za.za_name, (suffix - za.za_name));
 			buf[suffix - za.za_name] = '\0';
 			propname = buf;
 
 			if (!(flags & DSL_PROP_GET_RECEIVED)) {
 				/* Skip if locally overridden. */
 				err = zap_contains(mos, propobj, propname);
 				if (err == 0)
 					continue;
 				if (err != ENOENT)
 					break;
 
 				/* Skip if explicitly inherited. */
 				valstr = kmem_asprintf("%s%s", propname,
 				    ZPROP_INHERIT_SUFFIX);
 				err = zap_contains(mos, propobj, valstr);
 				strfree(valstr);
 				if (err == 0)
 					continue;
 				if (err != ENOENT)
 					break;
 			}
 
 			source = ((flags & DSL_PROP_GET_INHERITING) ?
 			    setpoint : ZPROP_SOURCE_VAL_RECVD);
 		} else {
 			/*
 			 * For backward compatibility, skip suffixes we don't
 			 * recognize.
 			 */
 			continue;
 		}
 
 		prop = zfs_name_to_prop(propname);
 
 		/* Skip non-inheritable properties. */
 		if ((flags & DSL_PROP_GET_INHERITING) && prop != ZPROP_INVAL &&
 		    !zfs_prop_inheritable(prop))
 			continue;
 
 		/* Skip properties not valid for this type. */
 		if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_INVAL &&
 		    !zfs_prop_valid_for_type(prop, ZFS_TYPE_SNAPSHOT))
 			continue;
 
 		/* Skip properties already defined. */
 		if (nvlist_exists(nv, propname))
 			continue;
 
 		VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		if (za.za_integer_length == 1) {
 			/*
 			 * String property
 			 */
 			char *tmp = kmem_alloc(za.za_num_integers,
 			    KM_SLEEP);
 			err = zap_lookup(mos, propobj,
 			    za.za_name, 1, za.za_num_integers, tmp);
 			if (err != 0) {
 				kmem_free(tmp, za.za_num_integers);
 				break;
 			}
 			VERIFY(nvlist_add_string(propval, ZPROP_VALUE,
 			    tmp) == 0);
 			kmem_free(tmp, za.za_num_integers);
 		} else {
 			/*
 			 * Integer property
 			 */
 			ASSERT(za.za_integer_length == 8);
 			(void) nvlist_add_uint64(propval, ZPROP_VALUE,
 			    za.za_first_integer);
 		}
 
 		VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, source) == 0);
 		VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
 		nvlist_free(propval);
 	}
 	zap_cursor_fini(&zc);
 	if (err == ENOENT)
 		err = 0;
 	return (err);
 }
 
 /*
  * Iterate over all properties for this dataset and return them in an nvlist.
  */
 static int
 dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
     dsl_prop_getflags_t flags)
 {
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err = 0;
 	char setpoint[MAXNAMELEN];
 
 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	if (dsl_dataset_is_snapshot(ds))
 		flags |= DSL_PROP_GET_SNAPSHOT;
 
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 
 	if (ds->ds_phys->ds_props_obj != 0) {
 		ASSERT(flags & DSL_PROP_GET_SNAPSHOT);
 		dsl_dataset_name(ds, setpoint);
 		err = dsl_prop_get_all_impl(mos, ds->ds_phys->ds_props_obj,
 		    setpoint, flags, *nvp);
 		if (err)
 			goto out;
 	}
 
 	for (; dd != NULL; dd = dd->dd_parent) {
 		if (dd != ds->ds_dir || (flags & DSL_PROP_GET_SNAPSHOT)) {
 			if (flags & (DSL_PROP_GET_LOCAL |
 			    DSL_PROP_GET_RECEIVED))
 				break;
 			flags |= DSL_PROP_GET_INHERITING;
 		}
 		dsl_dir_name(dd, setpoint);
 		err = dsl_prop_get_all_impl(mos, dd->dd_phys->dd_props_zapobj,
 		    setpoint, flags, *nvp);
 		if (err)
 			break;
 	}
 out:
 	rw_exit(&dp->dp_config_rwlock);
 	return (err);
 }
 
 boolean_t
 dsl_prop_get_hasrecvd(objset_t *os)
 {
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 	int rc;
 	uint64_t dummy;
 
 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 	rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL);
 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 	ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
 	return (rc == 0);
 }
 
 static void
 dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source)
 {
 	dsl_dataset_t *ds = os->os_dsl_dataset;
 	uint64_t dummy = 0;
 	dsl_prop_setarg_t psa;
 
 	if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS)
 		return;
 
 	dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy);
 
 	(void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL,
 	    dsl_prop_set_sync, ds, &psa, 2);
 }
 
 /*
  * Call after successfully receiving properties to ensure that only the first
  * receive on or after SPA_VERSION_RECVD_PROPS blows away local properties.
  */
 void
 dsl_prop_set_hasrecvd(objset_t *os)
 {
 	if (dsl_prop_get_hasrecvd(os)) {
 		ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
 		return;
 	}
 	dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL);
 }
 
 void
 dsl_prop_unset_hasrecvd(objset_t *os)
 {
 	dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE);
 }
 
 int
 dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
 {
 	return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, 0));
 }
 
 int
 dsl_prop_get_received(objset_t *os, nvlist_t **nvp)
 {
 	/*
 	 * Received properties are not distinguishable from local properties
 	 * until the dataset has received properties on or after
 	 * SPA_VERSION_RECVD_PROPS.
 	 */
 	dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ?
 	    DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL);
 	return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags));
 }
 
 void
 dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value)
 {
 	nvlist_t *propval;
 	const char *propname = zfs_prop_to_name(prop);
 	uint64_t default_value;
 
 	if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
 		return;
 	}
 
 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
 	/* Indicate the default source if we can. */
 	if (dodefault(propname, 8, 1, &default_value) == 0 &&
 	    value == default_value) {
 		VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, "") == 0);
 	}
 	VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
 	nvlist_free(propval);
 }
 
 void
 dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
 {
 	nvlist_t *propval;
 	const char *propname = zfs_prop_to_name(prop);
 
 	if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
 		return;
 	}
 
 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
 	VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
 	nvlist_free(propval);
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dsl_prop_register);
 EXPORT_SYMBOL(dsl_prop_unregister);
 EXPORT_SYMBOL(dsl_prop_numcb);
 EXPORT_SYMBOL(dsl_prop_set);
 EXPORT_SYMBOL(dsl_prop_get);
 EXPORT_SYMBOL(dsl_prop_get_integer);
 EXPORT_SYMBOL(dsl_prop_get_all);
 EXPORT_SYMBOL(dsl_prop_get_received);
 EXPORT_SYMBOL(dsl_prop_get_ds);
 EXPORT_SYMBOL(dsl_prop_get_dd);
 EXPORT_SYMBOL(dsl_prop_nvlist_add_uint64);
 EXPORT_SYMBOL(dsl_prop_nvlist_add_string);
 #endif
diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
index 34a4f0382d9a..90ca7b256606 100644
--- a/module/zfs/dsl_scan.c
+++ b/module/zfs/dsl_scan.c
@@ -1,1768 +1,1768 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dsl_scan.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dnode.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
 #include <sys/arc.h>
 #include <sys/zap.h>
 #include <sys/zio.h>
 #include <sys/zfs_context.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_znode.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/zil_impl.h>
 #include <sys/zio_checksum.h>
 #include <sys/ddt.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/zfeature.h>
 #ifdef _KERNEL
 #include <sys/zfs_vfsops.h>
 #endif
 
 typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
 
 static scan_cb_t dsl_scan_scrub_cb;
 static dsl_syncfunc_t dsl_scan_cancel_sync;
 static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
 
 int zfs_top_maxinflight = 32;		/* maximum I/Os per top-level */
 int zfs_resilver_delay = 2;		/* number of ticks to delay resilver */
 int zfs_scrub_delay = 4;		/* number of ticks to delay scrub */
 int zfs_scan_idle = 50;			/* idle window in clock ticks */
 
 int zfs_scan_min_time_ms = 1000; /* min millisecs to scrub per txg */
 int zfs_free_min_time_ms = 1000; /* min millisecs to free per txg */
 int zfs_resilver_min_time_ms = 3000; /* min millisecs to resilver per txg */
 int zfs_no_scrub_io = B_FALSE; /* set to disable scrub i/o */
 int zfs_no_scrub_prefetch = B_FALSE; /* set to disable srub prefetching */
 enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
 int dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */
 
 #define	DSL_SCAN_IS_SCRUB_RESILVER(scn) \
 	((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \
 	(scn)->scn_phys.scn_func == POOL_SCAN_RESILVER)
 
 /* the order has to match pool_scan_type */
 static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
 	NULL,
 	dsl_scan_scrub_cb,	/* POOL_SCAN_SCRUB */
 	dsl_scan_scrub_cb,	/* POOL_SCAN_RESILVER */
 };
 
 int
 dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
 {
 	int err;
 	dsl_scan_t *scn;
 	spa_t *spa = dp->dp_spa;
 	uint64_t f;
 
 	scn = dp->dp_scan = kmem_zalloc(sizeof (dsl_scan_t), KM_SLEEP);
 	scn->scn_dp = dp;
 
 	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    "scrub_func", sizeof (uint64_t), 1, &f);
 	if (err == 0) {
 		/*
 		 * There was an old-style scrub in progress.  Restart a
 		 * new-style scrub from the beginning.
 		 */
 		scn->scn_restart_txg = txg;
 		zfs_dbgmsg("old-style scrub was in progress; "
 		    "restarting new-style scrub in txg %llu",
 		    scn->scn_restart_txg);
 
 		/*
 		 * Load the queue obj from the old location so that it
 		 * can be freed by dsl_scan_done().
 		 */
 		(void) zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    "scrub_queue", sizeof (uint64_t), 1,
 		    &scn->scn_phys.scn_queue_obj);
 	} else {
 		err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 		    &scn->scn_phys);
 		if (err == ENOENT)
 			return (0);
 		else if (err)
 			return (err);
 
 		if (scn->scn_phys.scn_state == DSS_SCANNING &&
 		    spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
 			/*
 			 * A new-type scrub was in progress on an old
 			 * pool, and the pool was accessed by old
 			 * software.  Restart from the beginning, since
 			 * the old software may have changed the pool in
 			 * the meantime.
 			 */
 			scn->scn_restart_txg = txg;
 			zfs_dbgmsg("new-style scrub was modified "
 			    "by old software; restarting in txg %llu",
 			    scn->scn_restart_txg);
 		}
 	}
 
 	spa_scan_stat_init(spa);
 	return (0);
 }
 
 void
 dsl_scan_fini(dsl_pool_t *dp)
 {
 	if (dp->dp_scan) {
 		kmem_free(dp->dp_scan, sizeof (dsl_scan_t));
 		dp->dp_scan = NULL;
 	}
 }
 
 /* ARGSUSED */
 static int
 dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = arg1;
 
 	if (scn->scn_phys.scn_state == DSS_SCANNING)
 		return (EBUSY);
 
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = arg1;
 	pool_scan_func_t *funcp = arg2;
 	dmu_object_type_t ot = 0;
 	dsl_pool_t *dp = scn->scn_dp;
 	spa_t *spa = dp->dp_spa;
 
 	ASSERT(scn->scn_phys.scn_state != DSS_SCANNING);
 	ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
 	bzero(&scn->scn_phys, sizeof (scn->scn_phys));
 	scn->scn_phys.scn_func = *funcp;
 	scn->scn_phys.scn_state = DSS_SCANNING;
 	scn->scn_phys.scn_min_txg = 0;
 	scn->scn_phys.scn_max_txg = tx->tx_txg;
 	scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
 	scn->scn_phys.scn_start_time = gethrestime_sec();
 	scn->scn_phys.scn_errors = 0;
 	scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
 	scn->scn_restart_txg = 0;
 	spa_scan_stat_init(spa);
 
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 		scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
 
 		/* rewrite all disk labels */
 		vdev_config_dirty(spa->spa_root_vdev);
 
 		if (vdev_resilver_needed(spa->spa_root_vdev,
 		    &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
 			spa_event_notify(spa, NULL, FM_EREPORT_ZFS_RESILVER_START);
 		} else {
 			spa_event_notify(spa, NULL, FM_EREPORT_ZFS_SCRUB_START);
 		}
 
 		spa->spa_scrub_started = B_TRUE;
 		/*
 		 * If this is an incremental scrub, limit the DDT scrub phase
 		 * to just the auto-ditto class (for correctness); the rest
 		 * of the scrub should go faster using top-down pruning.
 		 */
 		if (scn->scn_phys.scn_min_txg > TXG_INITIAL)
 			scn->scn_phys.scn_ddt_class_max = DDT_CLASS_DITTO;
 
 	}
 
 	/* back to the generic stuff */
 
 	if (dp->dp_blkstats == NULL) {
 		dp->dp_blkstats = kmem_alloc(sizeof (zfs_all_blkstats_t),
 		    KM_PUSHPAGE | KM_NODEBUG);
 	}
 	bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
 
 	if (spa_version(spa) < SPA_VERSION_DSL_SCRUB)
 		ot = DMU_OT_ZAP_OTHER;
 
 	scn->scn_phys.scn_queue_obj = zap_create(dp->dp_meta_objset,
 	    ot ? ot : DMU_OT_SCAN_QUEUE, DMU_OT_NONE, 0, tx);
 
 	dsl_scan_sync_state(scn, tx);
 
-	spa_history_log_internal(LOG_POOL_SCAN, spa, tx,
+	spa_history_log_internal(spa, "scan setup", tx,
 	    "func=%u mintxg=%llu maxtxg=%llu",
 	    *funcp, scn->scn_phys.scn_min_txg, scn->scn_phys.scn_max_txg);
 }
 
 /* ARGSUSED */
 static void
 dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
 {
 	static const char *old_names[] = {
 		"scrub_bookmark",
 		"scrub_ddt_bookmark",
 		"scrub_ddt_class_max",
 		"scrub_queue",
 		"scrub_min_txg",
 		"scrub_max_txg",
 		"scrub_func",
 		"scrub_errors",
 		NULL
 	};
 
 	dsl_pool_t *dp = scn->scn_dp;
 	spa_t *spa = dp->dp_spa;
 	int i;
 
 	/* Remove any remnants of an old-style scrub. */
 	for (i = 0; old_names[i]; i++) {
 		(void) zap_remove(dp->dp_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, old_names[i], tx);
 	}
 
 	if (scn->scn_phys.scn_queue_obj != 0) {
 		VERIFY(0 == dmu_object_free(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, tx));
 		scn->scn_phys.scn_queue_obj = 0;
 	}
 
 	/*
 	 * If we were "restarted" from a stopped state, don't bother
 	 * with anything else.
 	 */
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
 		return;
 
 	if (complete)
 		scn->scn_phys.scn_state = DSS_FINISHED;
 	else
 		scn->scn_phys.scn_state = DSS_CANCELED;
 
-	spa_history_log_internal(LOG_POOL_SCAN_DONE, spa, tx,
+	spa_history_log_internal(spa, "scan done", tx,
 	    "complete=%u", complete);
 
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 		mutex_enter(&spa->spa_scrub_lock);
 		while (spa->spa_scrub_inflight > 0) {
 			cv_wait(&spa->spa_scrub_io_cv,
 			    &spa->spa_scrub_lock);
 		}
 		mutex_exit(&spa->spa_scrub_lock);
 		spa->spa_scrub_started = B_FALSE;
 		spa->spa_scrub_active = B_FALSE;
 
 		/*
 		 * If the scrub/resilver completed, update all DTLs to
 		 * reflect this.  Whether it succeeded or not, vacate
 		 * all temporary scrub DTLs.
 		 */
 		vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
 		    complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE);
 		if (complete) {
 			spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ?
 			    FM_EREPORT_ZFS_RESILVER_FINISH :
 			    FM_EREPORT_ZFS_SCRUB_FINISH);
 		}
 		spa_errlog_rotate(spa);
 
 		/*
 		 * We may have finished replacing a device.
 		 * Let the async thread assess this and handle the detach.
 		 */
 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
 	}
 
 	scn->scn_phys.scn_end_time = gethrestime_sec();
 }
 
 /* ARGSUSED */
 static int
 dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = arg1;
 
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
 		return (ENOENT);
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = arg1;
 
 	dsl_scan_done(scn, B_FALSE, tx);
 	dsl_scan_sync_state(scn, tx);
 }
 
 int
 dsl_scan_cancel(dsl_pool_t *dp)
 {
 	boolean_t complete = B_FALSE;
 	int err;
 
 	err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
 	    dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
 	return (err);
 }
 
 static void dsl_scan_visitbp(blkptr_t *bp,
     const zbookmark_t *zb, dnode_phys_t *dnp, arc_buf_t *pbuf,
     dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype,
     dmu_tx_t *tx);
 inline __attribute__((always_inline)) static void dsl_scan_visitdnode(
     dsl_scan_t *, dsl_dataset_t *ds, dmu_objset_type_t ostype,
     dnode_phys_t *dnp, arc_buf_t *buf, uint64_t object, dmu_tx_t *tx);
 
 void
 dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bp)
 {
 	zio_free(dp->dp_spa, txg, bp);
 }
 
 void
 dsl_free_sync(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp)
 {
 	ASSERT(dsl_pool_sync_context(dp));
 	zio_nowait(zio_free_sync(pio, dp->dp_spa, txg, bpp, pio->io_flags));
 }
 
 static uint64_t
 dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
 {
 	uint64_t smt = ds->ds_dir->dd_pool->dp_scan->scn_phys.scn_max_txg;
 	if (dsl_dataset_is_snapshot(ds))
 		return (MIN(smt, ds->ds_phys->ds_creation_txg));
 	return (smt);
 }
 
 static void
 dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
 {
 	VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
 	    &scn->scn_phys, tx));
 }
 
 static boolean_t
 dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
 {
 	uint64_t elapsed_nanosecs;
 	int mintime;
 
 	/* we never skip user/group accounting objects */
 	if (zb && (int64_t)zb->zb_object < 0)
 		return (B_FALSE);
 
 	if (scn->scn_pausing)
 		return (B_TRUE); /* we're already pausing */
 
 	if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
 		return (B_FALSE); /* we're resuming */
 
 	/* We only know how to resume from level-0 blocks. */
 	if (zb && zb->zb_level != 0)
 		return (B_FALSE);
 
 	mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
 	    zfs_resilver_min_time_ms : zfs_scan_min_time_ms;
 	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
 	if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
 	    (elapsed_nanosecs / MICROSEC > mintime &&
 	    txg_sync_waiting(scn->scn_dp)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa)) {
 		if (zb) {
 			dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
 			    (longlong_t)zb->zb_objset,
 			    (longlong_t)zb->zb_object,
 			    (longlong_t)zb->zb_level,
 			    (longlong_t)zb->zb_blkid);
 			scn->scn_phys.scn_bookmark = *zb;
 		}
 		dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
 		scn->scn_pausing = B_TRUE;
 		return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 typedef struct zil_scan_arg {
 	dsl_pool_t	*zsa_dp;
 	zil_header_t	*zsa_zh;
 } zil_scan_arg_t;
 
 /* ARGSUSED */
 static int
 dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
 {
 	zil_scan_arg_t *zsa = arg;
 	dsl_pool_t *dp = zsa->zsa_dp;
 	dsl_scan_t *scn = dp->dp_scan;
 	zil_header_t *zh = zsa->zsa_zh;
 	zbookmark_t zb;
 
 	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
 		return (0);
 
 	/*
 	 * One block ("stubby") can be allocated a long time ago; we
 	 * want to visit that one because it has been allocated
 	 * (on-disk) even if it hasn't been claimed (even though for
 	 * scrub there's nothing to do to it).
 	 */
 	if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(dp->dp_spa))
 		return (0);
 
 	SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 	    ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
 
 	VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
 {
 	if (lrc->lrc_txtype == TX_WRITE) {
 		zil_scan_arg_t *zsa = arg;
 		dsl_pool_t *dp = zsa->zsa_dp;
 		dsl_scan_t *scn = dp->dp_scan;
 		zil_header_t *zh = zsa->zsa_zh;
 		lr_write_t *lr = (lr_write_t *)lrc;
 		blkptr_t *bp = &lr->lr_blkptr;
 		zbookmark_t zb;
 
 		if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
 			return (0);
 
 		/*
 		 * birth can be < claim_txg if this record's txg is
 		 * already txg sync'ed (but this log block contains
 		 * other records that are not synced)
 		 */
 		if (claim_txg == 0 || bp->blk_birth < claim_txg)
 			return (0);
 
 		SET_BOOKMARK(&zb, zh->zh_log.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 		    lr->lr_foid, ZB_ZIL_LEVEL,
 		    lr->lr_offset / BP_GET_LSIZE(bp));
 
 		VERIFY(0 == scan_funcs[scn->scn_phys.scn_func](dp, bp, &zb));
 	}
 	return (0);
 }
 
 static void
 dsl_scan_zil(dsl_pool_t *dp, zil_header_t *zh)
 {
 	uint64_t claim_txg = zh->zh_claim_txg;
 	zil_scan_arg_t zsa = { dp, zh };
 	zilog_t *zilog;
 
 	/*
 	 * We only want to visit blocks that have been claimed but not yet
 	 * replayed (or, in read-only mode, blocks that *would* be claimed).
 	 */
 	if (claim_txg == 0 && spa_writeable(dp->dp_spa))
 		return;
 
 	zilog = zil_alloc(dp->dp_meta_objset, zh);
 
 	(void) zil_parse(zilog, dsl_scan_zil_block, dsl_scan_zil_record, &zsa,
 	    claim_txg);
 
 	zil_free(zilog);
 }
 
 /* ARGSUSED */
 static void
 dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
     uint64_t objset, uint64_t object, uint64_t blkid)
 {
 	zbookmark_t czb;
 	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
 
 	if (zfs_no_scrub_prefetch)
 		return;
 
 	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_min_txg ||
 	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
 		return;
 
 	SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);
 
 	(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp,
 	    NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb);
 }
 
 static boolean_t
 dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
     const zbookmark_t *zb)
 {
 	/*
 	 * We never skip over user/group accounting objects (obj<0)
 	 */
 	if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) &&
 	    (int64_t)zb->zb_object >= 0) {
 		/*
 		 * If we already visited this bp & everything below (in
 		 * a prior txg sync), don't bother doing it again.
 		 */
 		if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
 			return (B_TRUE);
 
 		/*
 		 * If we found the block we're trying to resume from, or
 		 * we went past it to a different object, zero it out to
 		 * indicate that it's OK to start checking for pausing
 		 * again.
 		 */
 		if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
 		    zb->zb_object > scn->scn_phys.scn_bookmark.zb_object) {
 			dprintf("resuming at %llx/%llx/%llx/%llx\n",
 			    (longlong_t)zb->zb_objset,
 			    (longlong_t)zb->zb_object,
 			    (longlong_t)zb->zb_level,
 			    (longlong_t)zb->zb_blkid);
 			bzero(&scn->scn_phys.scn_bookmark, sizeof (*zb));
 		}
 	}
 	return (B_FALSE);
 }
 
 /*
  * Return nonzero on i/o error.
  * Return new buf to write out in *bufp.
  */
 inline __attribute__((always_inline)) static int
 dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
     dnode_phys_t *dnp, const blkptr_t *bp,
     const zbookmark_t *zb, dmu_tx_t *tx, arc_buf_t **bufp)
 {
 	dsl_pool_t *dp = scn->scn_dp;
 	int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD;
 	int err;
 
 	if (BP_GET_LEVEL(bp) > 0) {
 		uint32_t flags = ARC_WAIT;
 		int i;
 		blkptr_t *cbp;
 		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 
 		err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp,
 		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
 		if (err) {
 			scn->scn_phys.scn_errors++;
 			return (err);
 		}
 		for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) {
 			dsl_scan_prefetch(scn, *bufp, cbp, zb->zb_objset,
 			    zb->zb_object, zb->zb_blkid * epb + i);
 		}
 		for (i = 0, cbp = (*bufp)->b_data; i < epb; i++, cbp++) {
 			zbookmark_t czb;
 
 			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 			    zb->zb_level - 1,
 			    zb->zb_blkid * epb + i);
 			dsl_scan_visitbp(cbp, &czb, dnp,
 			    *bufp, ds, scn, ostype, tx);
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_USERGROUP_USED) {
 		uint32_t flags = ARC_WAIT;
 
 		err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp,
 		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
 		if (err) {
 			scn->scn_phys.scn_errors++;
 			return (err);
 		}
 	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
 		uint32_t flags = ARC_WAIT;
 		dnode_phys_t *cdnp;
 		int i, j;
 		int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 
 		err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp,
 		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
 		if (err) {
 			scn->scn_phys.scn_errors++;
 			return (err);
 		}
 		for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) {
 			for (j = 0; j < cdnp->dn_nblkptr; j++) {
 				blkptr_t *cbp = &cdnp->dn_blkptr[j];
 				dsl_scan_prefetch(scn, *bufp, cbp,
 				    zb->zb_objset, zb->zb_blkid * epb + i, j);
 			}
 		}
 		for (i = 0, cdnp = (*bufp)->b_data; i < epb; i++, cdnp++) {
 			dsl_scan_visitdnode(scn, ds, ostype,
 			    cdnp, *bufp, zb->zb_blkid * epb + i, tx);
 		}
 
 	} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
 		uint32_t flags = ARC_WAIT;
 		objset_phys_t *osp;
 
 		err = arc_read(NULL, dp->dp_spa, bp, arc_getbuf_func, bufp,
 		    ZIO_PRIORITY_ASYNC_READ, zio_flags, &flags, zb);
 		if (err) {
 			scn->scn_phys.scn_errors++;
 			return (err);
 		}
 
 		osp = (*bufp)->b_data;
 
 		dsl_scan_visitdnode(scn, ds, osp->os_type,
 		    &osp->os_meta_dnode, *bufp, DMU_META_DNODE_OBJECT, tx);
 
 		if (OBJSET_BUF_HAS_USERUSED(*bufp)) {
 			/*
 			 * We also always visit user/group accounting
 			 * objects, and never skip them, even if we are
 			 * pausing.  This is necessary so that the space
 			 * deltas from this txg get integrated.
 			 */
 			dsl_scan_visitdnode(scn, ds, osp->os_type,
 			    &osp->os_groupused_dnode, *bufp,
 			    DMU_GROUPUSED_OBJECT, tx);
 			dsl_scan_visitdnode(scn, ds, osp->os_type,
 			    &osp->os_userused_dnode, *bufp,
 			    DMU_USERUSED_OBJECT, tx);
 		}
 	}
 
 	return (0);
 }
 
 inline __attribute__((always_inline)) static void
 dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
     dmu_objset_type_t ostype, dnode_phys_t *dnp, arc_buf_t *buf,
     uint64_t object, dmu_tx_t *tx)
 {
 	int j;
 
 	for (j = 0; j < dnp->dn_nblkptr; j++) {
 		zbookmark_t czb;
 
 		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
 		    dnp->dn_nlevels - 1, j);
 		dsl_scan_visitbp(&dnp->dn_blkptr[j],
 		    &czb, dnp, buf, ds, scn, ostype, tx);
 	}
 
 	if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 		zbookmark_t czb;
 		SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
 		    0, DMU_SPILL_BLKID);
 		dsl_scan_visitbp(&dnp->dn_spill,
 		    &czb, dnp, buf, ds, scn, ostype, tx);
 	}
 }
 
 /*
  * The arguments are in this order because mdb can only print the
  * first 5; we want them to be useful.
  */
 static void
 dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
     dnode_phys_t *dnp, arc_buf_t *pbuf,
     dsl_dataset_t *ds, dsl_scan_t *scn, dmu_objset_type_t ostype,
     dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = scn->scn_dp;
 	arc_buf_t *buf = NULL;
 	blkptr_t *bp_toread;
 
 	bp_toread = kmem_alloc(sizeof (blkptr_t), KM_PUSHPAGE);
 	*bp_toread = *bp;
 
 	/* ASSERT(pbuf == NULL || arc_released(pbuf)); */
 
 	if (dsl_scan_check_pause(scn, zb))
 		goto out;
 
 	if (dsl_scan_check_resume(scn, dnp, zb))
 		goto out;
 
 	if (bp->blk_birth == 0)
 		goto out;
 
 	scn->scn_visited_this_txg++;
 
 	/*
 	 * This debugging is commented out to conserve stack space.  This
 	 * function is called recursively and the debugging addes several
 	 * bytes to the stack for each call.  It can be commented back in
 	 * if required to debug an issue in dsl_scan_visitbp().
 	 *
 	 * dprintf_bp(bp,
 	 *    "visiting ds=%p/%llu zb=%llx/%llx/%llx/%llx buf=%p bp=%p",
 	 *    ds, ds ? ds->ds_object : 0,
 	 *    zb->zb_objset, zb->zb_object, zb->zb_level, zb->zb_blkid,
 	 *    pbuf, bp);
 	 */
 
 	if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
 		goto out;
 
 	if (dsl_scan_recurse(scn, ds, ostype, dnp, bp_toread, zb, tx,
 	    &buf) != 0)
 		goto out;
 
 	/*
 	 * If dsl_scan_ddt() has aready visited this block, it will have
 	 * already done any translations or scrubbing, so don't call the
 	 * callback again.
 	 */
 	if (ddt_class_contains(dp->dp_spa,
 	    scn->scn_phys.scn_ddt_class_max, bp)) {
 		ASSERT(buf == NULL);
 		goto out;
 	}
 
 	/*
 	 * If this block is from the future (after cur_max_txg), then we
 	 * are doing this on behalf of a deleted snapshot, and we will
 	 * revisit the future block on the next pass of this dataset.
 	 * Don't scan it now unless we need to because something
 	 * under it was modified.
 	 */
 	if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) {
 		scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
 	}
 	if (buf)
 		(void) arc_buf_remove_ref(buf, &buf);
 out:
 	kmem_free(bp_toread, sizeof(blkptr_t));
 }
 
 static void
 dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
     dmu_tx_t *tx)
 {
 	zbookmark_t zb;
 
 	SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 	dsl_scan_visitbp(bp, &zb, NULL, NULL,
 	    ds, scn, DMU_OST_NONE, tx);
 
 	dprintf_ds(ds, "finished scan%s", "");
 }
 
 void
 dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 	uint64_t mintxg;
 
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
 		return;
 
 	if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
 		if (dsl_dataset_is_snapshot(ds)) {
 			/* Note, scn_cur_{min,max}_txg stays the same. */
 			scn->scn_phys.scn_bookmark.zb_objset =
 			    ds->ds_phys->ds_next_snap_obj;
 			zfs_dbgmsg("destroying ds %llu; currently traversing; "
 			    "reset zb_objset to %llu",
 			    (u_longlong_t)ds->ds_object,
 			    (u_longlong_t)ds->ds_phys->ds_next_snap_obj);
 			scn->scn_phys.scn_flags |= DSF_VISIT_DS_AGAIN;
 		} else {
 			SET_BOOKMARK(&scn->scn_phys.scn_bookmark,
 			    ZB_DESTROYED_OBJSET, 0, 0, 0);
 			zfs_dbgmsg("destroying ds %llu; currently traversing; "
 			    "reset bookmark to -1,0,0,0",
 			    (u_longlong_t)ds->ds_object);
 		}
 	} else if (zap_lookup_int_key(dp->dp_meta_objset,
 	    scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
 		ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
 		if (dsl_dataset_is_snapshot(ds)) {
 			/*
 			 * We keep the same mintxg; it could be >
 			 * ds_creation_txg if the previous snapshot was
 			 * deleted too.
 			 */
 			VERIFY(zap_add_int_key(dp->dp_meta_objset,
 			    scn->scn_phys.scn_queue_obj,
 			    ds->ds_phys->ds_next_snap_obj, mintxg, tx) == 0);
 			zfs_dbgmsg("destroying ds %llu; in queue; "
 			    "replacing with %llu",
 			    (u_longlong_t)ds->ds_object,
 			    (u_longlong_t)ds->ds_phys->ds_next_snap_obj);
 		} else {
 			zfs_dbgmsg("destroying ds %llu; in queue; removing",
 			    (u_longlong_t)ds->ds_object);
 		}
 	} else {
 		zfs_dbgmsg("destroying ds %llu; ignoring",
 		    (u_longlong_t)ds->ds_object);
 	}
 
 	/*
 	 * dsl_scan_sync() should be called after this, and should sync
 	 * out our changed state, but just to be safe, do it here.
 	 */
 	dsl_scan_sync_state(scn, tx);
 }
 
 void
 dsl_scan_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 	uint64_t mintxg;
 
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
 		return;
 
 	ASSERT(ds->ds_phys->ds_prev_snap_obj != 0);
 
 	if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
 		scn->scn_phys.scn_bookmark.zb_objset =
 		    ds->ds_phys->ds_prev_snap_obj;
 		zfs_dbgmsg("snapshotting ds %llu; currently traversing; "
 		    "reset zb_objset to %llu",
 		    (u_longlong_t)ds->ds_object,
 		    (u_longlong_t)ds->ds_phys->ds_prev_snap_obj);
 	} else if (zap_lookup_int_key(dp->dp_meta_objset,
 	    scn->scn_phys.scn_queue_obj, ds->ds_object, &mintxg) == 0) {
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds->ds_object, tx));
 		VERIFY(zap_add_int_key(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj,
 		    ds->ds_phys->ds_prev_snap_obj, mintxg, tx) == 0);
 		zfs_dbgmsg("snapshotting ds %llu; in queue; "
 		    "replacing with %llu",
 		    (u_longlong_t)ds->ds_object,
 		    (u_longlong_t)ds->ds_phys->ds_prev_snap_obj);
 	}
 	dsl_scan_sync_state(scn, tx);
 }
 
 void
 dsl_scan_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = ds1->ds_dir->dd_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 	uint64_t mintxg;
 
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
 		return;
 
 	if (scn->scn_phys.scn_bookmark.zb_objset == ds1->ds_object) {
 		scn->scn_phys.scn_bookmark.zb_objset = ds2->ds_object;
 		zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
 		    "reset zb_objset to %llu",
 		    (u_longlong_t)ds1->ds_object,
 		    (u_longlong_t)ds2->ds_object);
 	} else if (scn->scn_phys.scn_bookmark.zb_objset == ds2->ds_object) {
 		scn->scn_phys.scn_bookmark.zb_objset = ds1->ds_object;
 		zfs_dbgmsg("clone_swap ds %llu; currently traversing; "
 		    "reset zb_objset to %llu",
 		    (u_longlong_t)ds2->ds_object,
 		    (u_longlong_t)ds1->ds_object);
 	}
 
 	if (zap_lookup_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
 	    ds1->ds_object, &mintxg) == 0) {
 		int err;
 
 		ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg);
 		ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg);
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds1->ds_object, tx));
 		err = zap_add_int_key(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds2->ds_object, mintxg, tx);
 		VERIFY(err == 0 || err == EEXIST);
 		if (err == EEXIST) {
 			/* Both were there to begin with */
 			VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
 			    scn->scn_phys.scn_queue_obj,
 			    ds1->ds_object, mintxg, tx));
 		}
 		zfs_dbgmsg("clone_swap ds %llu; in queue; "
 		    "replacing with %llu",
 		    (u_longlong_t)ds1->ds_object,
 		    (u_longlong_t)ds2->ds_object);
 	} else if (zap_lookup_int_key(dp->dp_meta_objset,
 	    scn->scn_phys.scn_queue_obj, ds2->ds_object, &mintxg) == 0) {
 		ASSERT3U(mintxg, ==, ds1->ds_phys->ds_prev_snap_txg);
 		ASSERT3U(mintxg, ==, ds2->ds_phys->ds_prev_snap_txg);
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds2->ds_object, tx));
 		VERIFY(0 == zap_add_int_key(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds1->ds_object, mintxg, tx));
 		zfs_dbgmsg("clone_swap ds %llu; in queue; "
 		    "replacing with %llu",
 		    (u_longlong_t)ds2->ds_object,
 		    (u_longlong_t)ds1->ds_object);
 	}
 
 	dsl_scan_sync_state(scn, tx);
 }
 
 struct enqueue_clones_arg {
 	dmu_tx_t *tx;
 	uint64_t originobj;
 };
 
 /* ARGSUSED */
 static int
 enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
 {
 	struct enqueue_clones_arg *eca = arg;
 	dsl_dataset_t *ds;
 	int err;
 	dsl_pool_t *dp = spa->spa_dsl_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 
 	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 	if (err)
 		return (err);
 
 	if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
 		while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
 			dsl_dataset_t *prev;
 			err = dsl_dataset_hold_obj(dp,
 			    ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
 
 			dsl_dataset_rele(ds, FTAG);
 			if (err)
 				return (err);
 			ds = prev;
 		}
 		VERIFY(zap_add_int_key(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds->ds_object,
 		    ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
 	}
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 static void
 dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = scn->scn_dp;
 	dsl_dataset_t *ds;
 	objset_t *os;
 	char *dsname;
 
 	VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 
 	if (dmu_objset_from_ds(ds, &os))
 		goto out;
 
 	/*
 	 * Only the ZIL in the head (non-snapshot) is valid.  Even though
 	 * snapshots can have ZIL block pointers (which may be the same
 	 * BP as in the head), they must be ignored.  So we traverse the
 	 * ZIL here, rather than in scan_recurse(), because the regular
 	 * snapshot block-sharing rules don't apply to it.
 	 */
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn) && !dsl_dataset_is_snapshot(ds))
 		dsl_scan_zil(dp, &os->os_zil_header);
 
 	/*
 	 * Iterate over the bps in this ds.
 	 */
 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
 	dsl_scan_visit_rootbp(scn, ds, &ds->ds_phys->ds_bp, tx);
 
 	dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_PUSHPAGE);
 	dsl_dataset_name(ds, dsname);
 	zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
 	    "pausing=%u",
 	    (longlong_t)dsobj, dsname,
 	    (longlong_t)scn->scn_phys.scn_cur_min_txg,
 	    (longlong_t)scn->scn_phys.scn_cur_max_txg,
 	    (int)scn->scn_pausing);
 	kmem_free(dsname, ZFS_MAXNAMELEN);
 
 	if (scn->scn_pausing)
 		goto out;
 
 	/*
 	 * We've finished this pass over this dataset.
 	 */
 
 	/*
 	 * If we did not completely visit this dataset, do another pass.
 	 */
 	if (scn->scn_phys.scn_flags & DSF_VISIT_DS_AGAIN) {
 		zfs_dbgmsg("incomplete pass; visiting again");
 		scn->scn_phys.scn_flags &= ~DSF_VISIT_DS_AGAIN;
 		VERIFY(zap_add_int_key(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds->ds_object,
 		    scn->scn_phys.scn_cur_max_txg, tx) == 0);
 		goto out;
 	}
 
 	/*
 	 * Add descendent datasets to work queue.
 	 */
 	if (ds->ds_phys->ds_next_snap_obj != 0) {
 		VERIFY(zap_add_int_key(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, ds->ds_phys->ds_next_snap_obj,
 		    ds->ds_phys->ds_creation_txg, tx) == 0);
 	}
 	if (ds->ds_phys->ds_num_children > 1) {
 		boolean_t usenext = B_FALSE;
 		if (ds->ds_phys->ds_next_clones_obj != 0) {
 			uint64_t count;
 			/*
 			 * A bug in a previous version of the code could
 			 * cause upgrade_clones_cb() to not set
 			 * ds_next_snap_obj when it should, leading to a
 			 * missing entry.  Therefore we can only use the
 			 * next_clones_obj when its count is correct.
 			 */
 			int err = zap_count(dp->dp_meta_objset,
 			    ds->ds_phys->ds_next_clones_obj, &count);
 			if (err == 0 &&
 			    count == ds->ds_phys->ds_num_children - 1)
 				usenext = B_TRUE;
 		}
 
 		if (usenext) {
 			VERIFY(zap_join_key(dp->dp_meta_objset,
 			    ds->ds_phys->ds_next_clones_obj,
 			    scn->scn_phys.scn_queue_obj,
 			    ds->ds_phys->ds_creation_txg, tx) == 0);
 		} else {
 			struct enqueue_clones_arg eca;
 			eca.tx = tx;
 			eca.originobj = ds->ds_object;
 
 			(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
 			    NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
 		}
 	}
 
 out:
 	dsl_dataset_rele(ds, FTAG);
 }
 
 /* ARGSUSED */
 static int
 enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
 {
 	dmu_tx_t *tx = arg;
 	dsl_dataset_t *ds;
 	int err;
 	dsl_pool_t *dp = spa->spa_dsl_pool;
 	dsl_scan_t *scn = dp->dp_scan;
 
 	err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 	if (err)
 		return (err);
 
 	while (ds->ds_phys->ds_prev_snap_obj != 0) {
 		dsl_dataset_t *prev;
 		err = dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
 		    FTAG, &prev);
 		if (err) {
 			dsl_dataset_rele(ds, FTAG);
 			return (err);
 		}
 
 		/*
 		 * If this is a clone, we don't need to worry about it for now.
 		 */
 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
 			dsl_dataset_rele(ds, FTAG);
 			dsl_dataset_rele(prev, FTAG);
 			return (0);
 		}
 		dsl_dataset_rele(ds, FTAG);
 		ds = prev;
 	}
 
 	VERIFY(zap_add_int_key(dp->dp_meta_objset, scn->scn_phys.scn_queue_obj,
 	    ds->ds_object, ds->ds_phys->ds_prev_snap_txg, tx) == 0);
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 /*
  * Scrub/dedup interaction.
  *
  * If there are N references to a deduped block, we don't want to scrub it
  * N times -- ideally, we should scrub it exactly once.
  *
  * We leverage the fact that the dde's replication class (enum ddt_class)
  * is ordered from highest replication class (DDT_CLASS_DITTO) to lowest
  * (DDT_CLASS_UNIQUE) so that we may walk the DDT in that order.
  *
  * To prevent excess scrubbing, the scrub begins by walking the DDT
  * to find all blocks with refcnt > 1, and scrubs each of these once.
  * Since there are two replication classes which contain blocks with
  * refcnt > 1, we scrub the highest replication class (DDT_CLASS_DITTO) first.
  * Finally the top-down scrub begins, only visiting blocks with refcnt == 1.
  *
  * There would be nothing more to say if a block's refcnt couldn't change
  * during a scrub, but of course it can so we must account for changes
  * in a block's replication class.
  *
  * Here's an example of what can occur:
  *
  * If a block has refcnt > 1 during the DDT scrub phase, but has refcnt == 1
  * when visited during the top-down scrub phase, it will be scrubbed twice.
  * This negates our scrub optimization, but is otherwise harmless.
  *
  * If a block has refcnt == 1 during the DDT scrub phase, but has refcnt > 1
  * on each visit during the top-down scrub phase, it will never be scrubbed.
  * To catch this, ddt_sync_entry() notifies the scrub code whenever a block's
  * reference class transitions to a higher level (i.e DDT_CLASS_UNIQUE to
  * DDT_CLASS_DUPLICATE); if it transitions from refcnt == 1 to refcnt > 1
  * while a scrub is in progress, it scrubs the block right then.
  */
 static void
 dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
 {
 	ddt_bookmark_t *ddb = &scn->scn_phys.scn_ddt_bookmark;
 	ddt_entry_t dde;
 	int error;
 	uint64_t n = 0;
 
 	bzero(&dde, sizeof (ddt_entry_t));
 
 	while ((error = ddt_walk(scn->scn_dp->dp_spa, ddb, &dde)) == 0) {
 		ddt_t *ddt;
 
 		if (ddb->ddb_class > scn->scn_phys.scn_ddt_class_max)
 			break;
 		dprintf("visiting ddb=%llu/%llu/%llu/%llx\n",
 		    (longlong_t)ddb->ddb_class,
 		    (longlong_t)ddb->ddb_type,
 		    (longlong_t)ddb->ddb_checksum,
 		    (longlong_t)ddb->ddb_cursor);
 
 		/* There should be no pending changes to the dedup table */
 		ddt = scn->scn_dp->dp_spa->spa_ddt[ddb->ddb_checksum];
 		ASSERT(avl_first(&ddt->ddt_tree) == NULL);
 
 		dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
 		n++;
 
 		if (dsl_scan_check_pause(scn, NULL))
 			break;
 	}
 
 	zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u",
 	    (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max,
 	    (int)scn->scn_pausing);
 
 	ASSERT(error == 0 || error == ENOENT);
 	ASSERT(error != ENOENT ||
 	    ddb->ddb_class > scn->scn_phys.scn_ddt_class_max);
 }
 
 /* ARGSUSED */
 void
 dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
     ddt_entry_t *dde, dmu_tx_t *tx)
 {
 	const ddt_key_t *ddk = &dde->dde_key;
 	ddt_phys_t *ddp = dde->dde_phys;
 	blkptr_t bp;
 	zbookmark_t zb = { 0 };
 	int p;
 
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
 		return;
 
 	for (p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 		if (ddp->ddp_phys_birth == 0 ||
 		    ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg)
 			continue;
 		ddt_bp_create(checksum, ddk, ddp, &bp);
 
 		scn->scn_visited_this_txg++;
 		scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
 	}
 }
 
 static void
 dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = scn->scn_dp;
 	zap_cursor_t *zc;
 	zap_attribute_t *za;
 
 	if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
 	    scn->scn_phys.scn_ddt_class_max) {
 		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
 		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
 		dsl_scan_ddt(scn, tx);
 		if (scn->scn_pausing)
 			return;
 	}
 
 	if (scn->scn_phys.scn_bookmark.zb_objset == DMU_META_OBJSET) {
 		/* First do the MOS & ORIGIN */
 
 		scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
 		scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
 		dsl_scan_visit_rootbp(scn, NULL,
 		    &dp->dp_meta_rootbp, tx);
 		spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
 		if (scn->scn_pausing)
 			return;
 
 		if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
 			VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
 			    NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
 		} else {
 			dsl_scan_visitds(scn,
 			    dp->dp_origin_snap->ds_object, tx);
 		}
 		ASSERT(!scn->scn_pausing);
 	} else if (scn->scn_phys.scn_bookmark.zb_objset !=
 	    ZB_DESTROYED_OBJSET) {
 		/*
 		 * If we were paused, continue from here.  Note if the
 		 * ds we were paused on was deleted, the zb_objset may
 		 * be -1, so we will skip this and find a new objset
 		 * below.
 		 */
 		dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
 		if (scn->scn_pausing)
 			return;
 	}
 
 	/*
 	 * In case we were paused right at the end of the ds, zero the
 	 * bookmark so we don't think that we're still trying to resume.
 	 */
 	bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_t));
 	zc = kmem_alloc(sizeof(zap_cursor_t), KM_PUSHPAGE);
 	za = kmem_alloc(sizeof(zap_attribute_t), KM_PUSHPAGE);
 
 	/* keep pulling things out of the zap-object-as-queue */
 	while (zap_cursor_init(zc, dp->dp_meta_objset,
 	    scn->scn_phys.scn_queue_obj),
 	    zap_cursor_retrieve(zc, za) == 0) {
 		dsl_dataset_t *ds;
 		uint64_t dsobj;
 
 		dsobj = strtonum(za->za_name, NULL);
 		VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
 		    scn->scn_phys.scn_queue_obj, dsobj, tx));
 
 		/* Set up min/max txg */
 		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 		if (za->za_first_integer != 0) {
 			scn->scn_phys.scn_cur_min_txg =
 			    MAX(scn->scn_phys.scn_min_txg,
 			    za->za_first_integer);
 		} else {
 			scn->scn_phys.scn_cur_min_txg =
 			    MAX(scn->scn_phys.scn_min_txg,
 			    ds->ds_phys->ds_prev_snap_txg);
 		}
 		scn->scn_phys.scn_cur_max_txg = dsl_scan_ds_maxtxg(ds);
 		dsl_dataset_rele(ds, FTAG);
 
 		dsl_scan_visitds(scn, dsobj, tx);
 		zap_cursor_fini(zc);
 		if (scn->scn_pausing)
 			goto out;
 	}
 	zap_cursor_fini(zc);
 out:
 	kmem_free(za, sizeof(zap_attribute_t));
 	kmem_free(zc, sizeof(zap_cursor_t));
 }
 
 static boolean_t
 dsl_scan_free_should_pause(dsl_scan_t *scn)
 {
 	uint64_t elapsed_nanosecs;
 
 	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
 	return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
 	    (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
 	    txg_sync_waiting(scn->scn_dp)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa));
 }
 
 static int
 dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = arg;
 
 	if (!scn->scn_is_bptree ||
 	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
 		if (dsl_scan_free_should_pause(scn))
 			return (ERESTART);
 	}
 
 	zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
 	    dmu_tx_get_txg(tx), bp, 0));
 	dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, DD_USED_HEAD,
 	    -bp_get_dsize_sync(scn->scn_dp->dp_spa, bp),
 	    -BP_GET_PSIZE(bp), -BP_GET_UCSIZE(bp), tx);
 	scn->scn_visited_this_txg++;
 	return (0);
 }
 
 boolean_t
 dsl_scan_active(dsl_scan_t *scn)
 {
 	spa_t *spa = scn->scn_dp->dp_spa;
 	uint64_t used = 0, comp, uncomp;
 
 	if (spa->spa_load_state != SPA_LOAD_NONE)
 		return (B_FALSE);
 	if (spa_shutting_down(spa))
 		return (B_FALSE);
 
 	if (scn->scn_phys.scn_state == DSS_SCANNING)
 		return (B_TRUE);
 
 	if (spa_feature_is_active(spa,
 	    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
 		return (B_TRUE);
 	}
 	if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
 		(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
 		    &used, &comp, &uncomp);
 	}
 	return (used != 0);
 }
 
 void
 dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
 {
 	dsl_scan_t *scn = dp->dp_scan;
 	spa_t *spa = dp->dp_spa;
 	int err;
 
 	/*
 	 * Check for scn_restart_txg before checking spa_load_state, so
 	 * that we can restart an old-style scan while the pool is being
 	 * imported (see dsl_scan_init).
 	 */
 	if (scn->scn_restart_txg != 0 &&
 	    scn->scn_restart_txg <= tx->tx_txg) {
 		pool_scan_func_t func = POOL_SCAN_SCRUB;
 		dsl_scan_done(scn, B_FALSE, tx);
 		if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
 			func = POOL_SCAN_RESILVER;
 		zfs_dbgmsg("restarting scan func=%u txg=%llu",
 		    func, tx->tx_txg);
 		dsl_scan_setup_sync(scn, &func, tx);
 	}
 
 	if (!dsl_scan_active(scn) ||
 	    spa_sync_pass(dp->dp_spa) > 1)
 		return;
 
 	scn->scn_visited_this_txg = 0;
 	scn->scn_pausing = B_FALSE;
 	scn->scn_sync_start_time = gethrtime();
 	spa->spa_scrub_active = B_TRUE;
 
 	/*
 	 * First process the free list.  If we pause the free, don't do
 	 * any scanning.  This ensures that there is no free list when
 	 * we are scanning, so the scan code doesn't have to worry about
 	 * traversing it.
 	 */
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
 		scn->scn_is_bptree = B_FALSE;
 		scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
 		    NULL, ZIO_FLAG_MUSTSUCCEED);
 		err = bpobj_iterate(&dp->dp_free_bpobj,
 		    dsl_scan_free_block_cb, scn, tx);
 		VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
 
 		if (err == 0 && spa_feature_is_active(spa,
 		    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
 			scn->scn_is_bptree = B_TRUE;
 			scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
 			    NULL, ZIO_FLAG_MUSTSUCCEED);
 			err = bptree_iterate(dp->dp_meta_objset,
 			    dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
 			    scn, tx);
 			VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
 			if (err != 0)
 				return;
 
 			/* disable async destroy feature */
 			spa_feature_decr(spa,
 			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
 			ASSERT(!spa_feature_is_active(spa,
 			    &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
 			VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
 			    DMU_POOL_DIRECTORY_OBJECT,
 			    DMU_POOL_BPTREE_OBJ, tx));
 			VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
 			    dp->dp_bptree_obj, tx));
 			dp->dp_bptree_obj = 0;
 		}
 		if (scn->scn_visited_this_txg) {
 			zfs_dbgmsg("freed %llu blocks in %llums from "
 			    "free_bpobj/bptree txg %llu",
 			    (longlong_t)scn->scn_visited_this_txg,
 			    (longlong_t)
 			    (gethrtime() - scn->scn_sync_start_time) / MICROSEC,
 			    (longlong_t)tx->tx_txg);
 			scn->scn_visited_this_txg = 0;
 			/*
 			 * Re-sync the ddt so that we can further modify
 			 * it when doing bprewrite.
 			 */
 			ddt_sync(spa, tx->tx_txg);
 		}
 		if (err == ERESTART)
 			return;
 	}
 
 	if (scn->scn_phys.scn_state != DSS_SCANNING)
 		return;
 
 	if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
 	    scn->scn_phys.scn_ddt_class_max) {
 		zfs_dbgmsg("doing scan sync txg %llu; "
 		    "ddt bm=%llu/%llu/%llu/%llx",
 		    (longlong_t)tx->tx_txg,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
 		    (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
 		ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0);
 		ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0);
 		ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0);
 		ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0);
 	} else {
 		zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu",
 		    (longlong_t)tx->tx_txg,
 		    (longlong_t)scn->scn_phys.scn_bookmark.zb_objset,
 		    (longlong_t)scn->scn_phys.scn_bookmark.zb_object,
 		    (longlong_t)scn->scn_phys.scn_bookmark.zb_level,
 		    (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid);
 	}
 
 	scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
 	    NULL, ZIO_FLAG_CANFAIL);
 	dsl_scan_visit(scn, tx);
 	(void) zio_wait(scn->scn_zio_root);
 	scn->scn_zio_root = NULL;
 
 	zfs_dbgmsg("visited %llu blocks in %llums",
 	    (longlong_t)scn->scn_visited_this_txg,
 	    (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC);
 
 	if (!scn->scn_pausing) {
 		/* finished with scan. */
 		zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg);
 		dsl_scan_done(scn, B_TRUE, tx);
 	}
 
 	if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 		mutex_enter(&spa->spa_scrub_lock);
 		while (spa->spa_scrub_inflight > 0) {
 			cv_wait(&spa->spa_scrub_io_cv,
 			    &spa->spa_scrub_lock);
 		}
 		mutex_exit(&spa->spa_scrub_lock);
 	}
 
 	dsl_scan_sync_state(scn, tx);
 }
 
 /*
  * This will start a new scan, or restart an existing one.
  */
 void
 dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
 {
 	if (txg == 0) {
 		dmu_tx_t *tx;
 		tx = dmu_tx_create_dd(dp->dp_mos_dir);
 		VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
 
 		txg = dmu_tx_get_txg(tx);
 		dp->dp_scan->scn_restart_txg = txg;
 		dmu_tx_commit(tx);
 	} else {
 		dp->dp_scan->scn_restart_txg = txg;
 	}
 	zfs_dbgmsg("restarting resilver txg=%llu", txg);
 }
 
 boolean_t
 dsl_scan_resilvering(dsl_pool_t *dp)
 {
 	return (dp->dp_scan->scn_phys.scn_state == DSS_SCANNING &&
 	    dp->dp_scan->scn_phys.scn_func == POOL_SCAN_RESILVER);
 }
 
 /*
  * scrub consumers
  */
 
 static void
 count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
 {
 	int i;
 
 	/*
 	 * If we resume after a reboot, zab will be NULL; don't record
 	 * incomplete stats in that case.
 	 */
 	if (zab == NULL)
 		return;
 
 	for (i = 0; i < 4; i++) {
 		int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
 		int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
 		int equal;
 		zfs_blkstat_t *zb;
 
 		if (t & DMU_OT_NEWTYPE)
 			t = DMU_OT_OTHER;
 
 		zb = &zab->zab_type[l][t];
 		zb->zb_count++;
 		zb->zb_asize += BP_GET_ASIZE(bp);
 		zb->zb_lsize += BP_GET_LSIZE(bp);
 		zb->zb_psize += BP_GET_PSIZE(bp);
 		zb->zb_gangs += BP_COUNT_GANG(bp);
 
 		switch (BP_GET_NDVAS(bp)) {
 		case 2:
 			if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[1]))
 				zb->zb_ditto_2_of_2_samevdev++;
 			break;
 		case 3:
 			equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[1])) +
 			    (DVA_GET_VDEV(&bp->blk_dva[0]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[2])) +
 			    (DVA_GET_VDEV(&bp->blk_dva[1]) ==
 			    DVA_GET_VDEV(&bp->blk_dva[2]));
 			if (equal == 1)
 				zb->zb_ditto_2_of_3_samevdev++;
 			else if (equal == 3)
 				zb->zb_ditto_3_of_3_samevdev++;
 			break;
 		}
 	}
 }
 
 static void
 dsl_scan_scrub_done(zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 
 	zio_data_buf_free(zio->io_data, zio->io_size);
 
 	mutex_enter(&spa->spa_scrub_lock);
 	spa->spa_scrub_inflight--;
 	cv_broadcast(&spa->spa_scrub_io_cv);
 
 	if (zio->io_error && (zio->io_error != ECKSUM ||
 	    !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) {
 		spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors++;
 	}
 	mutex_exit(&spa->spa_scrub_lock);
 }
 
 static int
 dsl_scan_scrub_cb(dsl_pool_t *dp,
     const blkptr_t *bp, const zbookmark_t *zb)
 {
 	dsl_scan_t *scn = dp->dp_scan;
 	size_t size = BP_GET_PSIZE(bp);
 	spa_t *spa = dp->dp_spa;
 	uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
 	boolean_t needs_io = B_FALSE;
 	int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
 	int zio_priority = 0;
 	int scan_delay = 0;
 	int d;
 
 	if (phys_birth <= scn->scn_phys.scn_min_txg ||
 	    phys_birth >= scn->scn_phys.scn_max_txg)
 		return (0);
 
 	count_block(dp->dp_blkstats, bp);
 
 	ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn));
 	if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) {
 		zio_flags |= ZIO_FLAG_SCRUB;
 		zio_priority = ZIO_PRIORITY_SCRUB;
 		needs_io = B_TRUE;
 		scan_delay = zfs_scrub_delay;
 	} else if (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) {
 		zio_flags |= ZIO_FLAG_RESILVER;
 		zio_priority = ZIO_PRIORITY_RESILVER;
 		needs_io = B_FALSE;
 		scan_delay = zfs_resilver_delay;
 	}
 
 	/* If it's an intent log block, failure is expected. */
 	if (zb->zb_level == ZB_ZIL_LEVEL)
 		zio_flags |= ZIO_FLAG_SPECULATIVE;
 
 	for (d = 0; d < BP_GET_NDVAS(bp); d++) {
 		vdev_t *vd = vdev_lookup_top(spa,
 		    DVA_GET_VDEV(&bp->blk_dva[d]));
 
 		/*
 		 * Keep track of how much data we've examined so that
 		 * zpool(1M) status can make useful progress reports.
 		 */
 		scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]);
 		spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]);
 
 		/* if it's a resilver, this may not be in the target range */
 		if (!needs_io) {
 			if (DVA_GET_GANG(&bp->blk_dva[d])) {
 				/*
 				 * Gang members may be spread across multiple
 				 * vdevs, so the best estimate we have is the
 				 * scrub range, which has already been checked.
 				 * XXX -- it would be better to change our
 				 * allocation policy to ensure that all
 				 * gang members reside on the same vdev.
 				 */
 				needs_io = B_TRUE;
 			} else {
 				needs_io = vdev_dtl_contains(vd, DTL_PARTIAL,
 				    phys_birth, 1);
 			}
 		}
 	}
 
 	if (needs_io && !zfs_no_scrub_io) {
 		vdev_t *rvd = spa->spa_root_vdev;
 		uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight;
 		void *data = zio_data_buf_alloc(size);
 
 		mutex_enter(&spa->spa_scrub_lock);
 		while (spa->spa_scrub_inflight >= maxinflight)
 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 		spa->spa_scrub_inflight++;
 		mutex_exit(&spa->spa_scrub_lock);
 
 		/*
 		 * If we're seeing recent (zfs_scan_idle) "important" I/Os
 		 * then throttle our workload to limit the impact of a scan.
 		 */
 		if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle)
 			delay(scan_delay);
 
 		zio_nowait(zio_read(NULL, spa, bp, data, size,
 		    dsl_scan_scrub_done, NULL, zio_priority,
 		    zio_flags, zb));
 	}
 
 	/* do not relocate this block */
 	return (0);
 }
 
 int
 dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
 {
 	spa_t *spa = dp->dp_spa;
 
 	/*
 	 * Purge all vdev caches and probe all devices.  We do this here
 	 * rather than in sync context because this requires a writer lock
 	 * on the spa_config lock, which we can't do from sync context.  The
 	 * spa_scrub_reopen flag indicates that vdev_open() should not
 	 * attempt to start another scrub.
 	 */
 	spa_vdev_state_enter(spa, SCL_NONE);
 	spa->spa_scrub_reopen = B_TRUE;
 	vdev_reopen(spa->spa_root_vdev);
 	spa->spa_scrub_reopen = B_FALSE;
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 
 	return (dsl_sync_task_do(dp, dsl_scan_setup_check,
 	    dsl_scan_setup_sync, dp->dp_scan, &func, 0));
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 module_param(zfs_top_maxinflight, int, 0644);
 MODULE_PARM_DESC(zfs_top_maxinflight, "Max I/Os per top-level");
 
 module_param(zfs_resilver_delay, int, 0644);
 MODULE_PARM_DESC(zfs_resilver_delay, "Number of ticks to delay resilver");
 
 module_param(zfs_scrub_delay, int, 0644);
 MODULE_PARM_DESC(zfs_scrub_delay, "Number of ticks to delay scrub");
 
 module_param(zfs_scan_idle, int, 0644);
 MODULE_PARM_DESC(zfs_scan_idle, "Idle window in clock ticks");
 
 module_param(zfs_scan_min_time_ms, int, 0644);
 MODULE_PARM_DESC(zfs_scan_min_time_ms, "Min millisecs to scrub per txg");
 
 module_param(zfs_free_min_time_ms, int, 0644);
 MODULE_PARM_DESC(zfs_free_min_time_ms, "Min millisecs to free per txg");
 
 module_param(zfs_resilver_min_time_ms, int, 0644);
 MODULE_PARM_DESC(zfs_resilver_min_time_ms, "Min millisecs to resilver per txg");
 
 module_param(zfs_no_scrub_io, int, 0644);
 MODULE_PARM_DESC(zfs_no_scrub_io, "Set to disable scrub I/O");
 
 module_param(zfs_no_scrub_prefetch, int, 0644);
 MODULE_PARM_DESC(zfs_no_scrub_prefetch, "Set to disable scrub prefetching");
 #endif
diff --git a/module/zfs/dsl_synctask.c b/module/zfs/dsl_synctask.c
index 75eb507b949c..2ed47fe0c95c 100644
--- a/module/zfs/dsl_synctask.c
+++ b/module/zfs/dsl_synctask.c
@@ -1,245 +1,246 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_synctask.h>
 #include <sys/metaslab.h>
 
 #define	DST_AVG_BLKSHIFT 14
 
 /* ARGSUSED */
 static int
 dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	return (0);
 }
 
 dsl_sync_task_group_t *
 dsl_sync_task_group_create(dsl_pool_t *dp)
 {
 	dsl_sync_task_group_t *dstg;
 
 	dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
 	list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
 	    offsetof(dsl_sync_task_t, dst_node));
 	dstg->dstg_pool = dp;
 
 	return (dstg);
 }
 
 void
 dsl_sync_task_create(dsl_sync_task_group_t *dstg,
     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
     void *arg1, void *arg2, int blocks_modified)
 {
 	dsl_sync_task_t *dst;
 
 	if (checkfunc == NULL)
 		checkfunc = dsl_null_checkfunc;
 	dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
 	dst->dst_checkfunc = checkfunc;
 	dst->dst_syncfunc = syncfunc;
 	dst->dst_arg1 = arg1;
 	dst->dst_arg2 = arg2;
 	list_insert_tail(&dstg->dstg_tasks, dst);
 
 	dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
 }
 
 int
 dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
 {
 	dmu_tx_t *tx;
 	uint64_t txg;
 	dsl_sync_task_t *dst;
 
 top:
 	tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
 	VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
 
 	txg = dmu_tx_get_txg(tx);
 
 	/* Do a preliminary error check. */
 	dstg->dstg_err = 0;
+#ifdef ZFS_DEBUG
+	/*
+	 * Only check half the time, otherwise, the sync-context
+	 * check will almost never fail.
+	 */
+	if (spa_get_random(2) == 0)
+		goto skip;
+#endif
 	rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
 	for (dst = list_head(&dstg->dstg_tasks); dst;
 	    dst = list_next(&dstg->dstg_tasks, dst)) {
-#ifdef ZFS_DEBUG
-		/*
-		 * Only check half the time, otherwise, the sync-context
-		 * check will almost never fail.
-		 */
-		if (spa_get_random(2) == 0)
-			continue;
-#endif
 		dst->dst_err =
 		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
 		if (dst->dst_err)
 			dstg->dstg_err = dst->dst_err;
 	}
 	rw_exit(&dstg->dstg_pool->dp_config_rwlock);
 
 	if (dstg->dstg_err) {
 		dmu_tx_commit(tx);
 		return (dstg->dstg_err);
 	}
+#ifdef ZFS_DEBUG
+skip:
+#endif
 
 	/*
 	 * We don't generally have many sync tasks, so pay the price of
 	 * add_tail to get the tasks executed in the right order.
 	 */
 	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
 	    dstg, txg));
 
 	dmu_tx_commit(tx);
 
 	txg_wait_synced(dstg->dstg_pool, txg);
 
 	if (dstg->dstg_err == EAGAIN) {
 		txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
 		goto top;
 	}
 
 	return (dstg->dstg_err);
 }
 
 void
 dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
 {
 	uint64_t txg;
 
 	dstg->dstg_nowaiter = B_TRUE;
 	txg = dmu_tx_get_txg(tx);
 	/*
 	 * We don't generally have many sync tasks, so pay the price of
 	 * add_tail to get the tasks executed in the right order.
 	 */
 	VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
 	    dstg, txg));
 }
 
 void
 dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
 {
 	dsl_sync_task_t *dst;
 
 	while ((dst = list_head(&dstg->dstg_tasks))) {
 		list_remove(&dstg->dstg_tasks, dst);
 		kmem_free(dst, sizeof (dsl_sync_task_t));
 	}
 	kmem_free(dstg, sizeof (dsl_sync_task_group_t));
 }
 
 void
 dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
 {
 	dsl_sync_task_t *dst;
 	dsl_pool_t *dp = dstg->dstg_pool;
 	uint64_t quota, used;
 
 	ASSERT0(dstg->dstg_err);
 
 	/*
 	 * Check for sufficient space.  We just check against what's
 	 * on-disk; we don't want any in-flight accounting to get in our
 	 * way, because open context may have already used up various
 	 * in-core limits (arc_tempreserve, dsl_pool_tempreserve).
 	 */
 	quota = dsl_pool_adjustedsize(dp, B_FALSE) -
 	    metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
 	used = dp->dp_root_dir->dd_phys->dd_used_bytes;
 	/* MOS space is triple-dittoed, so we multiply by 3. */
 	if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
 		dstg->dstg_err = ENOSPC;
 		return;
 	}
 
 	/*
 	 * Check for errors by calling checkfuncs.
 	 */
 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
 	for (dst = list_head(&dstg->dstg_tasks); dst;
 	    dst = list_next(&dstg->dstg_tasks, dst)) {
 		dst->dst_err =
 		    dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
 		if (dst->dst_err)
 			dstg->dstg_err = dst->dst_err;
 	}
 
 	if (dstg->dstg_err == 0) {
 		/*
 		 * Execute sync tasks.
 		 */
 		for (dst = list_head(&dstg->dstg_tasks); dst;
 		    dst = list_next(&dstg->dstg_tasks, dst)) {
 			dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
 		}
 	}
 	rw_exit(&dp->dp_config_rwlock);
 
 	if (dstg->dstg_nowaiter)
 		dsl_sync_task_group_destroy(dstg);
 }
 
 int
 dsl_sync_task_do(dsl_pool_t *dp,
     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
     void *arg1, void *arg2, int blocks_modified)
 {
 	dsl_sync_task_group_t *dstg;
 	int err;
 
 	ASSERT(spa_writeable(dp->dp_spa));
 
 	dstg = dsl_sync_task_group_create(dp);
 	dsl_sync_task_create(dstg, checkfunc, syncfunc,
 	    arg1, arg2, blocks_modified);
 	err = dsl_sync_task_group_wait(dstg);
 	dsl_sync_task_group_destroy(dstg);
 	return (err);
 }
 
 void
 dsl_sync_task_do_nowait(dsl_pool_t *dp,
     dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
     void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
 {
 	dsl_sync_task_group_t *dstg;
 
-	if (!spa_writeable(dp->dp_spa))
-		return;
-
 	dstg = dsl_sync_task_group_create(dp);
 	dsl_sync_task_create(dstg, checkfunc, syncfunc,
 	    arg1, arg2, blocks_modified);
 	dsl_sync_task_group_nowait(dstg, tx);
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(dsl_sync_task_do);
 EXPORT_SYMBOL(dsl_sync_task_do_nowait);
 #endif
diff --git a/module/zfs/rrwlock.c b/module/zfs/rrwlock.c
index 4cef53f95132..7f9290bd44c1 100644
--- a/module/zfs/rrwlock.c
+++ b/module/zfs/rrwlock.c
@@ -1,264 +1,277 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
 
 #include <sys/refcount.h>
 #include <sys/rrwlock.h>
 
 /*
  * This file contains the implementation of a re-entrant read
  * reader/writer lock (aka "rrwlock").
  *
  * This is a normal reader/writer lock with the additional feature
  * of allowing threads who have already obtained a read lock to
  * re-enter another read lock (re-entrant read) - even if there are
  * waiting writers.
  *
  * Callers who have not obtained a read lock give waiting writers priority.
  *
  * The rrwlock_t lock does not allow re-entrant writers, nor does it
  * allow a re-entrant mix of reads and writes (that is, it does not
  * allow a caller who has already obtained a read lock to be able to
  * then grab a write lock without first dropping all read locks, and
  * vice versa).
  *
  * The rrwlock_t uses tsd (thread specific data) to keep a list of
  * nodes (rrw_node_t), where each node keeps track of which specific
  * lock (rrw_node_t::rn_rrl) the thread has grabbed.  Since re-entering
  * should be rare, a thread that grabs multiple reads on the same rrwlock_t
  * will store multiple rrw_node_ts of the same 'rrn_rrl'. Nodes on the
  * tsd list can represent a different rrwlock_t.  This allows a thread
  * to enter multiple and unique rrwlock_ts for read locks at the same time.
  *
  * Since using tsd exposes some overhead, the rrwlock_t only needs to
  * keep tsd data when writers are waiting.  If no writers are waiting, then
  * a reader just bumps the anonymous read count (rr_anon_rcount) - no tsd
  * is needed.  Once a writer attempts to grab the lock, readers then
  * keep tsd data and bump the linked readers count (rr_linked_rcount).
  *
  * If there are waiting writers and there are anonymous readers, then a
  * reader doesn't know if it is a re-entrant lock. But since it may be one,
  * we allow the read to proceed (otherwise it could deadlock).  Since once
  * waiting writers are active, readers no longer bump the anonymous count,
  * the anonymous readers will eventually flush themselves out.  At this point,
  * readers will be able to tell if they are a re-entrant lock (have a
  * rrw_node_t entry for the lock) or not. If they are a re-entrant lock, then
  * we must let the proceed.  If they are not, then the reader blocks for the
  * waiting writers.  Hence, we do not starve writers.
  */
 
 /* global key for TSD */
 uint_t rrw_tsd_key;
 
 typedef struct rrw_node {
 	struct rrw_node	*rn_next;
 	rrwlock_t	*rn_rrl;
 } rrw_node_t;
 
 static rrw_node_t *
 rrn_find(rrwlock_t *rrl)
 {
 	rrw_node_t *rn;
 
 	if (refcount_count(&rrl->rr_linked_rcount) == 0)
 		return (NULL);
 
 	for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
 		if (rn->rn_rrl == rrl)
 			return (rn);
 	}
 	return (NULL);
 }
 
 /*
  * Add a node to the head of the singly linked list.
  */
 static void
 rrn_add(rrwlock_t *rrl)
 {
 	rrw_node_t *rn;
 
 	rn = kmem_alloc(sizeof (*rn), KM_SLEEP);
 	rn->rn_rrl = rrl;
 	rn->rn_next = tsd_get(rrw_tsd_key);
 	VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
 }
 
 /*
  * If a node is found for 'rrl', then remove the node from this
  * thread's list and return TRUE; otherwise return FALSE.
  */
 static boolean_t
 rrn_find_and_remove(rrwlock_t *rrl)
 {
 	rrw_node_t *rn;
 	rrw_node_t *prev = NULL;
 
 	if (refcount_count(&rrl->rr_linked_rcount) == 0)
 		return (B_FALSE);
 
 	for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
 		if (rn->rn_rrl == rrl) {
 			if (prev)
 				prev->rn_next = rn->rn_next;
 			else
 				VERIFY(tsd_set(rrw_tsd_key, rn->rn_next) == 0);
 			kmem_free(rn, sizeof (*rn));
 			return (B_TRUE);
 		}
 		prev = rn;
 	}
 	return (B_FALSE);
 }
 
 void
 rrw_init(rrwlock_t *rrl)
 {
 	mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL);
 	rrl->rr_writer = NULL;
 	refcount_create(&rrl->rr_anon_rcount);
 	refcount_create(&rrl->rr_linked_rcount);
 	rrl->rr_writer_wanted = B_FALSE;
 }
 
 void
 rrw_destroy(rrwlock_t *rrl)
 {
 	mutex_destroy(&rrl->rr_lock);
 	cv_destroy(&rrl->rr_cv);
 	ASSERT(rrl->rr_writer == NULL);
 	refcount_destroy(&rrl->rr_anon_rcount);
 	refcount_destroy(&rrl->rr_linked_rcount);
 }
 
 static void
 rrw_enter_read(rrwlock_t *rrl, void *tag)
 {
 	mutex_enter(&rrl->rr_lock);
 #if !defined(DEBUG) && defined(_KERNEL)
 	if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
 		rrl->rr_anon_rcount.rc_count++;
 		mutex_exit(&rrl->rr_lock);
 		return;
 	}
 	DTRACE_PROBE(zfs__rrwfastpath__rdmiss);
 #endif
 	ASSERT(rrl->rr_writer != curthread);
 	ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
 
 	while (rrl->rr_writer || (rrl->rr_writer_wanted &&
 	    refcount_is_zero(&rrl->rr_anon_rcount) &&
 	    rrn_find(rrl) == NULL))
 		cv_wait(&rrl->rr_cv, &rrl->rr_lock);
 
 	if (rrl->rr_writer_wanted) {
 		/* may or may not be a re-entrant enter */
 		rrn_add(rrl);
 		(void) refcount_add(&rrl->rr_linked_rcount, tag);
 	} else {
 		(void) refcount_add(&rrl->rr_anon_rcount, tag);
 	}
 	ASSERT(rrl->rr_writer == NULL);
 	mutex_exit(&rrl->rr_lock);
 }
 
 static void
 rrw_enter_write(rrwlock_t *rrl)
 {
 	mutex_enter(&rrl->rr_lock);
 	ASSERT(rrl->rr_writer != curthread);
 
 	while (refcount_count(&rrl->rr_anon_rcount) > 0 ||
 	    refcount_count(&rrl->rr_linked_rcount) > 0 ||
 	    rrl->rr_writer != NULL) {
 		rrl->rr_writer_wanted = B_TRUE;
 		cv_wait(&rrl->rr_cv, &rrl->rr_lock);
 	}
 	rrl->rr_writer_wanted = B_FALSE;
 	rrl->rr_writer = curthread;
 	mutex_exit(&rrl->rr_lock);
 }
 
 void
 rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag)
 {
 	if (rw == RW_READER)
 		rrw_enter_read(rrl, tag);
 	else
 		rrw_enter_write(rrl);
 }
 
 void
 rrw_exit(rrwlock_t *rrl, void *tag)
 {
 	mutex_enter(&rrl->rr_lock);
 #if !defined(DEBUG) && defined(_KERNEL)
 	if (!rrl->rr_writer && rrl->rr_linked_rcount.rc_count == 0) {
 		rrl->rr_anon_rcount.rc_count--;
 		if (rrl->rr_anon_rcount.rc_count == 0)
 			cv_broadcast(&rrl->rr_cv);
 		mutex_exit(&rrl->rr_lock);
 		return;
 	}
 	DTRACE_PROBE(zfs__rrwfastpath__exitmiss);
 #endif
 	ASSERT(!refcount_is_zero(&rrl->rr_anon_rcount) ||
 	    !refcount_is_zero(&rrl->rr_linked_rcount) ||
 	    rrl->rr_writer != NULL);
 
 	if (rrl->rr_writer == NULL) {
 		int64_t count;
 		if (rrn_find_and_remove(rrl))
 			count = refcount_remove(&rrl->rr_linked_rcount, tag);
 		else
 			count = refcount_remove(&rrl->rr_anon_rcount, tag);
 		if (count == 0)
 			cv_broadcast(&rrl->rr_cv);
 	} else {
 		ASSERT(rrl->rr_writer == curthread);
 		ASSERT(refcount_is_zero(&rrl->rr_anon_rcount) &&
 		    refcount_is_zero(&rrl->rr_linked_rcount));
 		rrl->rr_writer = NULL;
 		cv_broadcast(&rrl->rr_cv);
 	}
 	mutex_exit(&rrl->rr_lock);
 }
 
 boolean_t
 rrw_held(rrwlock_t *rrl, krw_t rw)
 {
 	boolean_t held;
 
 	mutex_enter(&rrl->rr_lock);
 	if (rw == RW_WRITER) {
 		held = (rrl->rr_writer == curthread);
 	} else {
 		held = (!refcount_is_zero(&rrl->rr_anon_rcount) ||
 		    !refcount_is_zero(&rrl->rr_linked_rcount));
 	}
 	mutex_exit(&rrl->rr_lock);
 
 	return (held);
 }
+
+void
+rrw_tsd_destroy(void *arg)
+{
+	rrw_node_t *rn = arg;
+	if (rn != NULL) {
+		panic("thread %p terminating with rrw lock %p held",
+		    (void *)curthread, (void *)rn->rn_rrl);
+	}
+}
diff --git a/module/zfs/spa.c b/module/zfs/spa.c
index 65f78b7846af..7c37ca426d4a 100644
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -1,6546 +1,6548 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /*
  * This file contains all the routines used when modifying on-disk SPA state.
  * This includes opening, importing, destroying, exporting a pool, and syncing a
  * pool.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/fm/fs/zfs.h>
 #include <sys/spa_impl.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
 #include <sys/dmu.h>
 #include <sys/dmu_tx.h>
 #include <sys/zap.h>
 #include <sys/zil.h>
 #include <sys/ddt.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_disk.h>
 #include <sys/metaslab.h>
 #include <sys/metaslab_impl.h>
 #include <sys/uberblock_impl.h>
 #include <sys/txg.h>
 #include <sys/avl.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dmu_objset.h>
 #include <sys/unique.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/fs/zfs.h>
 #include <sys/arc.h>
 #include <sys/callb.h>
 #include <sys/systeminfo.h>
 #include <sys/spa_boot.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/dsl_scan.h>
 #include <sys/zfeature.h>
 #include <sys/zvol.h>
 
 #ifdef	_KERNEL
 #include <sys/bootprops.h>
 #include <sys/callb.h>
 #include <sys/cpupart.h>
 #include <sys/pool.h>
 #include <sys/sysdc.h>
 #include <sys/zone.h>
 #endif	/* _KERNEL */
 
 #include "zfs_prop.h"
 #include "zfs_comutil.h"
 
 typedef enum zti_modes {
 	ZTI_MODE_FIXED,			/* value is # of threads (min 1) */
 	ZTI_MODE_ONLINE_PERCENT,	/* value is % of online CPUs */
 	ZTI_MODE_BATCH,			/* cpu-intensive; value is ignored */
 	ZTI_MODE_NULL,			/* don't create a taskq */
 	ZTI_NMODES
 } zti_modes_t;
 
 #define	ZTI_P(n, q)	{ ZTI_MODE_FIXED, (n), (q) }
 #define	ZTI_PCT(n)	{ ZTI_MODE_ONLINE_PERCENT, (n), 1 }
 #define	ZTI_BATCH	{ ZTI_MODE_BATCH, 0, 1 }
 #define	ZTI_NULL	{ ZTI_MODE_NULL, 0, 0 }
 
 #define	ZTI_N(n)	ZTI_P(n, 1)
 #define	ZTI_ONE		ZTI_N(1)
 
 typedef struct zio_taskq_info {
 	zti_modes_t zti_mode;
 	uint_t zti_value;
 	uint_t zti_count;
 } zio_taskq_info_t;
 
 static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
 	"iss", "iss_h", "int", "int_h"
 };
 
 /*
  * This table defines the taskq settings for each ZFS I/O type. When
  * initializing a pool, we use this table to create an appropriately sized
  * taskq. Some operations are low volume and therefore have a small, static
  * number of threads assigned to their taskqs using the ZTI_N(#) or ZTI_ONE
  * macros. Other operations process a large amount of data; the ZTI_BATCH
  * macro causes us to create a taskq oriented for throughput. Some operations
  * are so high frequency and short-lived that the taskq itself can become a a
  * point of lock contention. The ZTI_P(#, #) macro indicates that we need an
  * additional degree of parallelism specified by the number of threads per-
  * taskq and the number of taskqs; when dispatching an event in this case, the
  * particular taskq is chosen at random.
  *
  * The different taskq priorities are to handle the different contexts (issue
  * and interrupt) and then to reserve threads for ZIO_PRIORITY_NOW I/Os that
  * need to be handled with minimum delay.
  */
 const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* NULL */
 	{ ZTI_N(8),	ZTI_NULL,	ZTI_BATCH,	ZTI_NULL }, /* READ */
 	{ ZTI_BATCH,	ZTI_N(5),	ZTI_N(16),	ZTI_N(5) }, /* WRITE */
 	{ ZTI_P(4, 8),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* FREE */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* CLAIM */
 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL }, /* IOCTL */
 };
 
 static dsl_syncfunc_t spa_sync_version;
 static dsl_syncfunc_t spa_sync_props;
 static dsl_checkfunc_t spa_change_guid_check;
 static dsl_syncfunc_t spa_change_guid_sync;
 static boolean_t spa_has_active_shared_spare(spa_t *spa);
 static inline int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
     char **ereport);
 static void spa_vdev_resilver_done(spa_t *spa);
 
 uint_t		zio_taskq_batch_pct = 100;	/* 1 thread per cpu in pset */
 id_t		zio_taskq_psrset_bind = PS_NONE;
 boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
 uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
 
 boolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
 
 /*
  * This (illegal) pool name is used when temporarily importing a spa_t in order
  * to get the vdev stats associated with the imported devices.
  */
 #define	TRYIMPORT_NAME	"$import"
 
 /*
  * ==========================================================================
  * SPA properties routines
  * ==========================================================================
  */
 
 /*
  * Add a (source=src, propname=propval) list to an nvlist.
  */
 static void
 spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
     uint64_t intval, zprop_source_t src)
 {
 	const char *propname = zpool_prop_to_name(prop);
 	nvlist_t *propval;
 
 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
 
 	if (strval != NULL)
 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
 	else
 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
 
 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
 	nvlist_free(propval);
 }
 
 /*
  * Get property values from the spa configuration.
  */
 static void
 spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	dsl_pool_t *pool = spa->spa_dsl_pool;
 	uint64_t size;
 	uint64_t alloc;
 	uint64_t space;
 	uint64_t cap, version;
 	zprop_source_t src = ZPROP_SRC_NONE;
 	spa_config_dirent_t *dp;
 	int c;
 
 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
 
 	if (rvd != NULL) {
 		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 		size = metaslab_class_get_space(spa_normal_class(spa));
 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
 		    size - alloc, src);
 
 		space = 0;
 		for (c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *tvd = rvd->vdev_child[c];
 			space += tvd->vdev_max_asize - tvd->vdev_asize;
 		}
 		spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, space,
 		    src);
 
 		spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
 		    (spa_mode(spa) == FREAD), src);
 
 		cap = (size == 0) ? 0 : (alloc * 100 / size);
 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
 
 		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
 		    ddt_get_pool_dedup_ratio(spa), src);
 
 		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
 		    rvd->vdev_state, src);
 
 		version = spa_version(spa);
 		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
 			src = ZPROP_SRC_DEFAULT;
 		else
 			src = ZPROP_SRC_LOCAL;
 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
 	}
 
 	if (pool != NULL) {
 		dsl_dir_t *freedir = pool->dp_free_dir;
 
 		/*
 		 * The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
 		 * when opening pools before this version freedir will be NULL.
 		 */
 		if (freedir != NULL) {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
 			    freedir->dd_phys->dd_used_bytes, src);
 		} else {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
 			    NULL, 0, src);
 		}
 	}
 
 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
 
 	if (spa->spa_comment != NULL) {
 		spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
 		    0, ZPROP_SRC_LOCAL);
 	}
 
 	if (spa->spa_root != NULL)
 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
 		    0, ZPROP_SRC_LOCAL);
 
 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
 		if (dp->scd_path == NULL) {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
 			    "none", 0, ZPROP_SRC_LOCAL);
 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
 		}
 	}
 }
 
 /*
  * Get zpool property values.
  */
 int
 spa_prop_get(spa_t *spa, nvlist_t **nvp)
 {
 	objset_t *mos = spa->spa_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	int err;
 
 	err = nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_PUSHPAGE);
 	if (err)
 		return err;
 
 	mutex_enter(&spa->spa_props_lock);
 
 	/*
 	 * Get properties from the spa config.
 	 */
 	spa_prop_get_config(spa, nvp);
 
 	/* If no pool property object, no more prop to get. */
 	if (mos == NULL || spa->spa_pool_props_object == 0) {
 		mutex_exit(&spa->spa_props_lock);
 		goto out;
 	}
 
 	/*
 	 * Get properties from the MOS pool property object.
 	 */
 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		uint64_t intval = 0;
 		char *strval = NULL;
 		zprop_source_t src = ZPROP_SRC_DEFAULT;
 		zpool_prop_t prop;
 
 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
 			continue;
 
 		switch (za.za_integer_length) {
 		case 8:
 			/* integer property */
 			if (za.za_first_integer !=
 			    zpool_prop_default_numeric(prop))
 				src = ZPROP_SRC_LOCAL;
 
 			if (prop == ZPOOL_PROP_BOOTFS) {
 				dsl_pool_t *dp;
 				dsl_dataset_t *ds = NULL;
 
 				dp = spa_get_dsl(spa);
 				rw_enter(&dp->dp_config_rwlock, RW_READER);
 				if ((err = dsl_dataset_hold_obj(dp,
 				    za.za_first_integer, FTAG, &ds))) {
 					rw_exit(&dp->dp_config_rwlock);
 					break;
 				}
 
 				strval = kmem_alloc(
 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
 				    KM_PUSHPAGE);
 				dsl_dataset_name(ds, strval);
 				dsl_dataset_rele(ds, FTAG);
 				rw_exit(&dp->dp_config_rwlock);
 			} else {
 				strval = NULL;
 				intval = za.za_first_integer;
 			}
 
 			spa_prop_add_list(*nvp, prop, strval, intval, src);
 
 			if (strval != NULL)
 				kmem_free(strval,
 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
 
 			break;
 
 		case 1:
 			/* string property */
 			strval = kmem_alloc(za.za_num_integers, KM_PUSHPAGE);
 			err = zap_lookup(mos, spa->spa_pool_props_object,
 			    za.za_name, 1, za.za_num_integers, strval);
 			if (err) {
 				kmem_free(strval, za.za_num_integers);
 				break;
 			}
 			spa_prop_add_list(*nvp, prop, strval, 0, src);
 			kmem_free(strval, za.za_num_integers);
 			break;
 
 		default:
 			break;
 		}
 	}
 	zap_cursor_fini(&zc);
 	mutex_exit(&spa->spa_props_lock);
 out:
 	if (err && err != ENOENT) {
 		nvlist_free(*nvp);
 		*nvp = NULL;
 		return (err);
 	}
 
 	return (0);
 }
 
 /*
  * Validate the given pool properties nvlist and modify the list
  * for the property values to be set.
  */
 static int
 spa_prop_validate(spa_t *spa, nvlist_t *props)
 {
 	nvpair_t *elem;
 	int error = 0, reset_bootfs = 0;
 	uint64_t objnum = 0;
 	boolean_t has_feature = B_FALSE;
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		uint64_t intval;
 		char *strval, *slash, *check, *fname;
 		const char *propname = nvpair_name(elem);
 		zpool_prop_t prop = zpool_name_to_prop(propname);
 
 		switch ((int)prop) {
 		case ZPROP_INVAL:
 			if (!zpool_prop_feature(propname)) {
 				error = EINVAL;
 				break;
 			}
 
 			/*
 			 * Sanitize the input.
 			 */
 			if (nvpair_type(elem) != DATA_TYPE_UINT64) {
 				error = EINVAL;
 				break;
 			}
 
 			if (nvpair_value_uint64(elem, &intval) != 0) {
 				error = EINVAL;
 				break;
 			}
 
 			if (intval != 0) {
 				error = EINVAL;
 				break;
 			}
 
 			fname = strchr(propname, '@') + 1;
 			if (zfeature_lookup_name(fname, NULL) != 0) {
 				error = EINVAL;
 				break;
 			}
 
 			has_feature = B_TRUE;
 			break;
 
 		case ZPOOL_PROP_VERSION:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error &&
 			    (intval < spa_version(spa) ||
 			    intval > SPA_VERSION_BEFORE_FEATURES ||
 			    has_feature))
 				error = EINVAL;
 			break;
 
 		case ZPOOL_PROP_DELEGATION:
 		case ZPOOL_PROP_AUTOREPLACE:
 		case ZPOOL_PROP_LISTSNAPS:
 		case ZPOOL_PROP_AUTOEXPAND:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error && intval > 1)
 				error = EINVAL;
 			break;
 
 		case ZPOOL_PROP_BOOTFS:
 			/*
 			 * If the pool version is less than SPA_VERSION_BOOTFS,
 			 * or the pool is still being created (version == 0),
 			 * the bootfs property cannot be set.
 			 */
 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
 				error = ENOTSUP;
 				break;
 			}
 
 			/*
 			 * Make sure the vdev config is bootable
 			 */
 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
 				error = ENOTSUP;
 				break;
 			}
 
 			reset_bootfs = 1;
 
 			error = nvpair_value_string(elem, &strval);
 
 			if (!error) {
 				objset_t *os;
 				uint64_t compress;
 
 				if (strval == NULL || strval[0] == '\0') {
 					objnum = zpool_prop_default_numeric(
 					    ZPOOL_PROP_BOOTFS);
 					break;
 				}
 
 				if ((error = dmu_objset_hold(strval,FTAG,&os)))
 					break;
 
 				/* Must be ZPL and not gzip compressed. */
 
 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
 					error = ENOTSUP;
 				} else if ((error = dsl_prop_get_integer(strval,
 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
 				    &compress, NULL)) == 0 &&
 				    !BOOTFS_COMPRESS_VALID(compress)) {
 					error = ENOTSUP;
 				} else {
 					objnum = dmu_objset_id(os);
 				}
 				dmu_objset_rele(os, FTAG);
 			}
 			break;
 
 		case ZPOOL_PROP_FAILUREMODE:
 			error = nvpair_value_uint64(elem, &intval);
 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
 			    intval > ZIO_FAILURE_MODE_PANIC))
 				error = EINVAL;
 
 			/*
 			 * This is a special case which only occurs when
 			 * the pool has completely failed. This allows
 			 * the user to change the in-core failmode property
 			 * without syncing it out to disk (I/Os might
 			 * currently be blocked). We do this by returning
 			 * EIO to the caller (spa_prop_set) to trick it
 			 * into thinking we encountered a property validation
 			 * error.
 			 */
 			if (!error && spa_suspended(spa)) {
 				spa->spa_failmode = intval;
 				error = EIO;
 			}
 			break;
 
 		case ZPOOL_PROP_CACHEFILE:
 			if ((error = nvpair_value_string(elem, &strval)) != 0)
 				break;
 
 			if (strval[0] == '\0')
 				break;
 
 			if (strcmp(strval, "none") == 0)
 				break;
 
 			if (strval[0] != '/') {
 				error = EINVAL;
 				break;
 			}
 
 			slash = strrchr(strval, '/');
 			ASSERT(slash != NULL);
 
 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
 			    strcmp(slash, "/..") == 0)
 				error = EINVAL;
 			break;
 
 		case ZPOOL_PROP_COMMENT:
 			if ((error = nvpair_value_string(elem, &strval)) != 0)
 				break;
 			for (check = strval; *check != '\0'; check++) {
 				if (!isprint(*check)) {
 					error = EINVAL;
 					break;
 				}
 				check++;
 			}
 			if (strlen(strval) > ZPROP_MAX_COMMENT)
 				error = E2BIG;
 			break;
 
 		case ZPOOL_PROP_DEDUPDITTO:
 			if (spa_version(spa) < SPA_VERSION_DEDUP)
 				error = ENOTSUP;
 			else
 				error = nvpair_value_uint64(elem, &intval);
 			if (error == 0 &&
 			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
 				error = EINVAL;
 			break;
 
 		default:
 			break;
 		}
 
 		if (error)
 			break;
 	}
 
 	if (!error && reset_bootfs) {
 		error = nvlist_remove(props,
 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
 
 		if (!error) {
 			error = nvlist_add_uint64(props,
 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
 		}
 	}
 
 	return (error);
 }
 
 void
 spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
 {
 	char *cachefile;
 	spa_config_dirent_t *dp;
 
 	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
 	    &cachefile) != 0)
 		return;
 
 	dp = kmem_alloc(sizeof (spa_config_dirent_t),
 	    KM_PUSHPAGE);
 
 	if (cachefile[0] == '\0')
 		dp->scd_path = spa_strdup(spa_config_path);
 	else if (strcmp(cachefile, "none") == 0)
 		dp->scd_path = NULL;
 	else
 		dp->scd_path = spa_strdup(cachefile);
 
 	list_insert_head(&spa->spa_config_list, dp);
 	if (need_sync)
 		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
 }
 
 int
 spa_prop_set(spa_t *spa, nvlist_t *nvp)
 {
 	int error;
 	nvpair_t *elem = NULL;
 	boolean_t need_sync = B_FALSE;
 
 	if ((error = spa_prop_validate(spa, nvp)) != 0)
 		return (error);
 
 	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
 		zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
 
 		if (prop == ZPOOL_PROP_CACHEFILE ||
 		    prop == ZPOOL_PROP_ALTROOT ||
 		    prop == ZPOOL_PROP_READONLY)
 			continue;
 
 		if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
 			uint64_t ver;
 
 			if (prop == ZPOOL_PROP_VERSION) {
 				VERIFY(nvpair_value_uint64(elem, &ver) == 0);
 			} else {
 				ASSERT(zpool_prop_feature(nvpair_name(elem)));
 				ver = SPA_VERSION_FEATURES;
 				need_sync = B_TRUE;
 			}
 
 			/* Save time if the version is already set. */
 			if (ver == spa_version(spa))
 				continue;
 
 			/*
 			 * In addition to the pool directory object, we might
 			 * create the pool properties object, the features for
 			 * read object, the features for write object, or the
 			 * feature descriptions object.
 			 */
 			error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
 			    spa_sync_version, spa, &ver, 6);
 			if (error)
 				return (error);
 			continue;
 		}
 
 		need_sync = B_TRUE;
 		break;
 	}
 
 	if (need_sync) {
 		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
 		    spa, nvp, 6));
 	}
 
 	return (0);
 }
 
 /*
  * If the bootfs property value is dsobj, clear it.
  */
 void
 spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
 {
 	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
 		VERIFY(zap_remove(spa->spa_meta_objset,
 		    spa->spa_pool_props_object,
 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
 		spa->spa_bootfs = 0;
 	}
 }
 
 /*ARGSUSED*/
 static int
 spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t vdev_state;
 	ASSERTV(uint64_t *newguid = arg2);
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 	vdev_state = rvd->vdev_state;
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	if (vdev_state != VDEV_STATE_HEALTHY)
 		return (ENXIO);
 
 	ASSERT3U(spa_guid(spa), !=, *newguid);
 
 	return (0);
 }
 
 static void
 spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	uint64_t *newguid = arg2;
 	uint64_t oldguid;
 	vdev_t *rvd = spa->spa_root_vdev;
 
 	oldguid = spa_guid(spa);
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 	rvd->vdev_guid = *newguid;
 	rvd->vdev_guid_sum += (*newguid - oldguid);
 	vdev_config_dirty(rvd);
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
-	spa_history_log_internal(LOG_POOL_GUID_CHANGE, spa, tx,
-	    "old=%lld new=%lld", oldguid, *newguid);
+	spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu",
+	    oldguid, *newguid);
 }
 
 /*
  * Change the GUID for the pool.  This is done so that we can later
  * re-import a pool built from a clone of our own vdevs.  We will modify
  * the root vdev's guid, our own pool guid, and then mark all of our
  * vdevs dirty.  Note that we must make sure that all our vdevs are
  * online when we do this, or else any vdevs that weren't present
  * would be orphaned from our pool.  We are also going to issue a
  * sysevent to update any watchers.
  */
 int
 spa_change_guid(spa_t *spa)
 {
 	int error;
 	uint64_t guid;
 
 	mutex_enter(&spa_namespace_lock);
 	guid = spa_generate_guid(NULL);
 
 	error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check,
 	    spa_change_guid_sync, spa, &guid, 5);
 
 	if (error == 0) {
 		spa_config_sync(spa, B_FALSE, B_TRUE);
 		spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_REGUID);
 	}
 
 	mutex_exit(&spa_namespace_lock);
 
 	return (error);
 }
 
 /*
  * ==========================================================================
  * SPA state manipulation (open/create/destroy/import/export)
  * ==========================================================================
  */
 
 static int
 spa_error_entry_compare(const void *a, const void *b)
 {
 	spa_error_entry_t *sa = (spa_error_entry_t *)a;
 	spa_error_entry_t *sb = (spa_error_entry_t *)b;
 	int ret;
 
 	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
 	    sizeof (zbookmark_t));
 
 	if (ret < 0)
 		return (-1);
 	else if (ret > 0)
 		return (1);
 	else
 		return (0);
 }
 
 /*
  * Utility function which retrieves copies of the current logs and
  * re-initializes them in the process.
  */
 void
 spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
 {
 	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
 
 	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
 	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
 
 	avl_create(&spa->spa_errlist_scrub,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 	avl_create(&spa->spa_errlist_last,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 }
 
 static void
 spa_taskqs_init(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
 {
 	const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
 	enum zti_modes mode = ztip->zti_mode;
 	uint_t value = ztip->zti_value;
 	uint_t count = ztip->zti_count;
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 	char name[32];
 	uint_t i, flags = 0;
 	boolean_t batch = B_FALSE;
 
 	if (mode == ZTI_MODE_NULL) {
 		tqs->stqs_count = 0;
 		tqs->stqs_taskq = NULL;
 		return;
 	}
 
 	ASSERT3U(count, >, 0);
 
 	tqs->stqs_count = count;
 	tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
 
 	for (i = 0; i < count; i++) {
 		taskq_t *tq;
 
 		switch (mode) {
 		case ZTI_MODE_FIXED:
 			ASSERT3U(value, >=, 1);
 			value = MAX(value, 1);
 			break;
 
 		case ZTI_MODE_BATCH:
 			batch = B_TRUE;
 			flags |= TASKQ_THREADS_CPU_PCT;
 			value = zio_taskq_batch_pct;
 			break;
 
 		case ZTI_MODE_ONLINE_PERCENT:
 			flags |= TASKQ_THREADS_CPU_PCT;
 			break;
 
 		default:
 			panic("unrecognized mode for %s_%s taskq (%u:%u) in "
 			    "spa_activate()",
 			    zio_type_name[t], zio_taskq_types[q], mode, value);
 			break;
 		}
 
 		if (count > 1) {
 			(void) snprintf(name, sizeof (name), "%s_%s_%u",
 			    zio_type_name[t], zio_taskq_types[q], i);
 		} else {
 			(void) snprintf(name, sizeof (name), "%s_%s",
 			    zio_type_name[t], zio_taskq_types[q]);
 		}
 
 		if (zio_taskq_sysdc && spa->spa_proc != &p0) {
 			if (batch)
 				flags |= TASKQ_DC_BATCH;
 
 			tq = taskq_create_sysdc(name, value, 50, INT_MAX,
 			    spa->spa_proc, zio_taskq_basedc, flags);
 		} else {
 			tq = taskq_create_proc(name, value, maxclsyspri, 50,
 			    INT_MAX, spa->spa_proc, flags);
 		}
 
 		tqs->stqs_taskq[i] = tq;
 	}
 }
 
 static void
 spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
 {
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 	uint_t i;
 
 	if (tqs->stqs_taskq == NULL) {
 		ASSERT3U(tqs->stqs_count, ==, 0);
 		return;
 	}
 
 	for (i = 0; i < tqs->stqs_count; i++) {
 		ASSERT3P(tqs->stqs_taskq[i], !=, NULL);
 		taskq_destroy(tqs->stqs_taskq[i]);
 	}
 
 	kmem_free(tqs->stqs_taskq, tqs->stqs_count * sizeof (taskq_t *));
 	tqs->stqs_taskq = NULL;
 }
 
 /*
  * Dispatch a task to the appropriate taskq for the ZFS I/O type and priority.
  * Note that a type may have multiple discrete taskqs to avoid lock contention
  * on the taskq itself. In that case we choose which taskq at random by using
  * the low bits of gethrtime().
  */
 void
 spa_taskq_dispatch_ent(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
     task_func_t *func, void *arg, uint_t flags, taskq_ent_t *ent)
 {
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 	taskq_t *tq;
 
 	ASSERT3P(tqs->stqs_taskq, !=, NULL);
 	ASSERT3U(tqs->stqs_count, !=, 0);
 
 	if (tqs->stqs_count == 1) {
 		tq = tqs->stqs_taskq[0];
 	} else {
 		tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
 	}
 
 	taskq_dispatch_ent(tq, func, arg, flags, ent);
 }
 
 /*
  * Same as spa_taskq_dispatch_ent() but block on the task until completion.
  */
 void
 spa_taskq_dispatch_sync(spa_t *spa, zio_type_t t, zio_taskq_type_t q,
     task_func_t *func, void *arg, uint_t flags)
 {
 	spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
 	taskq_t *tq;
 	taskqid_t id;
 
 	ASSERT3P(tqs->stqs_taskq, !=, NULL);
 	ASSERT3U(tqs->stqs_count, !=, 0);
 
 	if (tqs->stqs_count == 1) {
 		tq = tqs->stqs_taskq[0];
 	} else {
 		tq = tqs->stqs_taskq[((uint64_t)gethrtime()) % tqs->stqs_count];
 	}
 
 	id = taskq_dispatch(tq, func, arg, flags);
 	if (id)
 		taskq_wait_id(tq, id);
 }
 
 static void
 spa_create_zio_taskqs(spa_t *spa)
 {
 	int t, q;
 
 	for (t = 0; t < ZIO_TYPES; t++) {
 		for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
 			spa_taskqs_init(spa, t, q);
 		}
 	}
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPA_THREAD)
 static void
 spa_thread(void *arg)
 {
 	callb_cpr_t cprinfo;
 
 	spa_t *spa = arg;
 	user_t *pu = PTOU(curproc);
 
 	CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr,
 	    spa->spa_name);
 
 	ASSERT(curproc != &p0);
 	(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs),
 	    "zpool-%s", spa->spa_name);
 	(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm));
 
 	/* bind this thread to the requested psrset */
 	if (zio_taskq_psrset_bind != PS_NONE) {
 		pool_lock();
 		mutex_enter(&cpu_lock);
 		mutex_enter(&pidlock);
 		mutex_enter(&curproc->p_lock);
 
 		if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind,
 		    0, NULL, NULL) == 0)  {
 			curthread->t_bind_pset = zio_taskq_psrset_bind;
 		} else {
 			cmn_err(CE_WARN,
 			    "Couldn't bind process for zfs pool \"%s\" to "
 			    "pset %d\n", spa->spa_name, zio_taskq_psrset_bind);
 		}
 
 		mutex_exit(&curproc->p_lock);
 		mutex_exit(&pidlock);
 		mutex_exit(&cpu_lock);
 		pool_unlock();
 	}
 
 	if (zio_taskq_sysdc) {
 		sysdc_thread_enter(curthread, 100, 0);
 	}
 
 	spa->spa_proc = curproc;
 	spa->spa_did = curthread->t_did;
 
 	spa_create_zio_taskqs(spa);
 
 	mutex_enter(&spa->spa_proc_lock);
 	ASSERT(spa->spa_proc_state == SPA_PROC_CREATED);
 
 	spa->spa_proc_state = SPA_PROC_ACTIVE;
 	cv_broadcast(&spa->spa_proc_cv);
 
 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
 	while (spa->spa_proc_state == SPA_PROC_ACTIVE)
 		cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
 	CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock);
 
 	ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE);
 	spa->spa_proc_state = SPA_PROC_GONE;
 	spa->spa_proc = &p0;
 	cv_broadcast(&spa->spa_proc_cv);
 	CALLB_CPR_EXIT(&cprinfo);	/* drops spa_proc_lock */
 
 	mutex_enter(&curproc->p_lock);
 	lwp_exit();
 }
 #endif
 
 /*
  * Activate an uninitialized pool.
  */
 static void
 spa_activate(spa_t *spa, int mode)
 {
 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
 
 	spa->spa_state = POOL_STATE_ACTIVE;
 	spa->spa_mode = mode;
 
 	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
 	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
 
 	/* Try to create a covering process */
 	mutex_enter(&spa->spa_proc_lock);
 	ASSERT(spa->spa_proc_state == SPA_PROC_NONE);
 	ASSERT(spa->spa_proc == &p0);
 	spa->spa_did = 0;
 
 #ifdef HAVE_SPA_THREAD
 	/* Only create a process if we're going to be around a while. */
 	if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
 		if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
 		    NULL, 0) == 0) {
 			spa->spa_proc_state = SPA_PROC_CREATED;
 			while (spa->spa_proc_state == SPA_PROC_CREATED) {
 				cv_wait(&spa->spa_proc_cv,
 				    &spa->spa_proc_lock);
 			}
 			ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
 			ASSERT(spa->spa_proc != &p0);
 			ASSERT(spa->spa_did != 0);
 		} else {
 #ifdef _KERNEL
 			cmn_err(CE_WARN,
 			    "Couldn't create process for zfs pool \"%s\"\n",
 			    spa->spa_name);
 #endif
 		}
 	}
 #endif /* HAVE_SPA_THREAD */
 	mutex_exit(&spa->spa_proc_lock);
 
 	/* If we didn't create a process, we need to create our taskqs. */
 	if (spa->spa_proc == &p0) {
 		spa_create_zio_taskqs(spa);
 	}
 
 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
 	    offsetof(vdev_t, vdev_config_dirty_node));
 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
 	    offsetof(vdev_t, vdev_state_dirty_node));
 
 	txg_list_create(&spa->spa_vdev_txg_list,
 	    offsetof(struct vdev, vdev_txg_node));
 
 	avl_create(&spa->spa_errlist_scrub,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 	avl_create(&spa->spa_errlist_last,
 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
 	    offsetof(spa_error_entry_t, se_avl));
 }
 
 /*
  * Opposite of spa_activate().
  */
 static void
 spa_deactivate(spa_t *spa)
 {
 	int t, q;
 
 	ASSERT(spa->spa_sync_on == B_FALSE);
 	ASSERT(spa->spa_dsl_pool == NULL);
 	ASSERT(spa->spa_root_vdev == NULL);
 	ASSERT(spa->spa_async_zio_root == NULL);
 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
 
 	txg_list_destroy(&spa->spa_vdev_txg_list);
 
 	list_destroy(&spa->spa_config_dirty_list);
 	list_destroy(&spa->spa_state_dirty_list);
 
 	taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
 
 	for (t = 0; t < ZIO_TYPES; t++) {
 		for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
 			spa_taskqs_fini(spa, t, q);
 		}
 	}
 
 	metaslab_class_destroy(spa->spa_normal_class);
 	spa->spa_normal_class = NULL;
 
 	metaslab_class_destroy(spa->spa_log_class);
 	spa->spa_log_class = NULL;
 
 	/*
 	 * If this was part of an import or the open otherwise failed, we may
 	 * still have errors left in the queues.  Empty them just in case.
 	 */
 	spa_errlog_drain(spa);
 
 	avl_destroy(&spa->spa_errlist_scrub);
 	avl_destroy(&spa->spa_errlist_last);
 
 	spa->spa_state = POOL_STATE_UNINITIALIZED;
 
 	mutex_enter(&spa->spa_proc_lock);
 	if (spa->spa_proc_state != SPA_PROC_NONE) {
 		ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
 		spa->spa_proc_state = SPA_PROC_DEACTIVATE;
 		cv_broadcast(&spa->spa_proc_cv);
 		while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) {
 			ASSERT(spa->spa_proc != &p0);
 			cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
 		}
 		ASSERT(spa->spa_proc_state == SPA_PROC_GONE);
 		spa->spa_proc_state = SPA_PROC_NONE;
 	}
 	ASSERT(spa->spa_proc == &p0);
 	mutex_exit(&spa->spa_proc_lock);
 
 	/*
 	 * We want to make sure spa_thread() has actually exited the ZFS
 	 * module, so that the module can't be unloaded out from underneath
 	 * it.
 	 */
 	if (spa->spa_did != 0) {
 		thread_join(spa->spa_did);
 		spa->spa_did = 0;
 	}
 }
 
 /*
  * Verify a pool configuration, and construct the vdev tree appropriately.  This
  * will create all the necessary vdevs in the appropriate layout, with each vdev
  * in the CLOSED state.  This will prep the pool before open/creation/import.
  * All vdev validation is done by the vdev_alloc() routine.
  */
 static int
 spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
     uint_t id, int atype)
 {
 	nvlist_t **child;
 	uint_t children;
 	int error;
 	int c;
 
 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
 		return (error);
 
 	if ((*vdp)->vdev_ops->vdev_op_leaf)
 		return (0);
 
 	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children);
 
 	if (error == ENOENT)
 		return (0);
 
 	if (error) {
 		vdev_free(*vdp);
 		*vdp = NULL;
 		return (EINVAL);
 	}
 
 	for (c = 0; c < children; c++) {
 		vdev_t *vd;
 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
 		    atype)) != 0) {
 			vdev_free(*vdp);
 			*vdp = NULL;
 			return (error);
 		}
 	}
 
 	ASSERT(*vdp != NULL);
 
 	return (0);
 }
 
 /*
  * Opposite of spa_load().
  */
 static void
 spa_unload(spa_t *spa)
 {
 	int i;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	/*
 	 * Stop async tasks.
 	 */
 	spa_async_suspend(spa);
 
 	/*
 	 * Stop syncing.
 	 */
 	if (spa->spa_sync_on) {
 		txg_sync_stop(spa->spa_dsl_pool);
 		spa->spa_sync_on = B_FALSE;
 	}
 
 	/*
 	 * Wait for any outstanding async I/O to complete.
 	 */
 	if (spa->spa_async_zio_root != NULL) {
 		(void) zio_wait(spa->spa_async_zio_root);
 		spa->spa_async_zio_root = NULL;
 	}
 
 	bpobj_close(&spa->spa_deferred_bpobj);
 
 	/*
 	 * Close the dsl pool.
 	 */
 	if (spa->spa_dsl_pool) {
 		dsl_pool_close(spa->spa_dsl_pool);
 		spa->spa_dsl_pool = NULL;
 		spa->spa_meta_objset = NULL;
 	}
 
 	ddt_unload(spa);
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	/*
 	 * Drop and purge level 2 cache
 	 */
 	spa_l2cache_drop(spa);
 
 	/*
 	 * Close all vdevs.
 	 */
 	if (spa->spa_root_vdev)
 		vdev_free(spa->spa_root_vdev);
 	ASSERT(spa->spa_root_vdev == NULL);
 
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		vdev_free(spa->spa_spares.sav_vdevs[i]);
 	if (spa->spa_spares.sav_vdevs) {
 		kmem_free(spa->spa_spares.sav_vdevs,
 		    spa->spa_spares.sav_count * sizeof (void *));
 		spa->spa_spares.sav_vdevs = NULL;
 	}
 	if (spa->spa_spares.sav_config) {
 		nvlist_free(spa->spa_spares.sav_config);
 		spa->spa_spares.sav_config = NULL;
 	}
 	spa->spa_spares.sav_count = 0;
 
 	for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
 		vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
 	}
 	if (spa->spa_l2cache.sav_vdevs) {
 		kmem_free(spa->spa_l2cache.sav_vdevs,
 		    spa->spa_l2cache.sav_count * sizeof (void *));
 		spa->spa_l2cache.sav_vdevs = NULL;
 	}
 	if (spa->spa_l2cache.sav_config) {
 		nvlist_free(spa->spa_l2cache.sav_config);
 		spa->spa_l2cache.sav_config = NULL;
 	}
 	spa->spa_l2cache.sav_count = 0;
 
 	spa->spa_async_suspended = 0;
 
 	if (spa->spa_comment != NULL) {
 		spa_strfree(spa->spa_comment);
 		spa->spa_comment = NULL;
 	}
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 }
 
 /*
  * Load (or re-load) the current list of vdevs describing the active spares for
  * this pool.  When this is called, we have some form of basic information in
  * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
  * then re-generate a more complete list including status information.
  */
 static void
 spa_load_spares(spa_t *spa)
 {
 	nvlist_t **spares;
 	uint_t nspares;
 	int i;
 	vdev_t *vd, *tvd;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	/*
 	 * First, close and free any existing spare vdevs.
 	 */
 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
 		vd = spa->spa_spares.sav_vdevs[i];
 
 		/* Undo the call to spa_activate() below */
 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
 		    B_FALSE)) != NULL && tvd->vdev_isspare)
 			spa_spare_remove(tvd);
 		vdev_close(vd);
 		vdev_free(vd);
 	}
 
 	if (spa->spa_spares.sav_vdevs)
 		kmem_free(spa->spa_spares.sav_vdevs,
 		    spa->spa_spares.sav_count * sizeof (void *));
 
 	if (spa->spa_spares.sav_config == NULL)
 		nspares = 0;
 	else
 		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
 
 	spa->spa_spares.sav_count = (int)nspares;
 	spa->spa_spares.sav_vdevs = NULL;
 
 	if (nspares == 0)
 		return;
 
 	/*
 	 * Construct the array of vdevs, opening them to get status in the
 	 * process.   For each spare, there is potentially two different vdev_t
 	 * structures associated with it: one in the list of spares (used only
 	 * for basic validation purposes) and one in the active vdev
 	 * configuration (if it's spared in).  During this phase we open and
 	 * validate each vdev on the spare list.  If the vdev also exists in the
 	 * active configuration, then we also mark this vdev as an active spare.
 	 */
 	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
 	    KM_PUSHPAGE);
 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
 		    VDEV_ALLOC_SPARE) == 0);
 		ASSERT(vd != NULL);
 
 		spa->spa_spares.sav_vdevs[i] = vd;
 
 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
 		    B_FALSE)) != NULL) {
 			if (!tvd->vdev_isspare)
 				spa_spare_add(tvd);
 
 			/*
 			 * We only mark the spare active if we were successfully
 			 * able to load the vdev.  Otherwise, importing a pool
 			 * with a bad active spare would result in strange
 			 * behavior, because multiple pool would think the spare
 			 * is actively in use.
 			 *
 			 * There is a vulnerability here to an equally bizarre
 			 * circumstance, where a dead active spare is later
 			 * brought back to life (onlined or otherwise).  Given
 			 * the rarity of this scenario, and the extra complexity
 			 * it adds, we ignore the possibility.
 			 */
 			if (!vdev_is_dead(tvd))
 				spa_spare_activate(tvd);
 		}
 
 		vd->vdev_top = vd;
 		vd->vdev_aux = &spa->spa_spares;
 
 		if (vdev_open(vd) != 0)
 			continue;
 
 		if (vdev_validate_aux(vd) == 0)
 			spa_spare_add(vd);
 	}
 
 	/*
 	 * Recompute the stashed list of spares, with status information
 	 * this time.
 	 */
 	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
 	    DATA_TYPE_NVLIST_ARRAY) == 0);
 
 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
 	    KM_PUSHPAGE);
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		spares[i] = vdev_config_generate(spa,
 		    spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE);
 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
 	for (i = 0; i < spa->spa_spares.sav_count; i++)
 		nvlist_free(spares[i]);
 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
 }
 
 /*
  * Load (or re-load) the current list of vdevs describing the active l2cache for
  * this pool.  When this is called, we have some form of basic information in
  * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
  * then re-generate a more complete list including status information.
  * Devices which are already active have their details maintained, and are
  * not re-opened.
  */
 static void
 spa_load_l2cache(spa_t *spa)
 {
 	nvlist_t **l2cache;
 	uint_t nl2cache;
 	int i, j, oldnvdevs;
 	uint64_t guid;
 	vdev_t *vd, **oldvdevs, **newvdevs = NULL;
 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	if (sav->sav_config != NULL) {
 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
 		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_PUSHPAGE);
 	} else {
 		nl2cache = 0;
 	}
 
 	oldvdevs = sav->sav_vdevs;
 	oldnvdevs = sav->sav_count;
 	sav->sav_vdevs = NULL;
 	sav->sav_count = 0;
 
 	/*
 	 * Process new nvlist of vdevs.
 	 */
 	for (i = 0; i < nl2cache; i++) {
 		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
 		    &guid) == 0);
 
 		newvdevs[i] = NULL;
 		for (j = 0; j < oldnvdevs; j++) {
 			vd = oldvdevs[j];
 			if (vd != NULL && guid == vd->vdev_guid) {
 				/*
 				 * Retain previous vdev for add/remove ops.
 				 */
 				newvdevs[i] = vd;
 				oldvdevs[j] = NULL;
 				break;
 			}
 		}
 
 		if (newvdevs[i] == NULL) {
 			/*
 			 * Create new vdev
 			 */
 			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
 			    VDEV_ALLOC_L2CACHE) == 0);
 			ASSERT(vd != NULL);
 			newvdevs[i] = vd;
 
 			/*
 			 * Commit this vdev as an l2cache device,
 			 * even if it fails to open.
 			 */
 			spa_l2cache_add(vd);
 
 			vd->vdev_top = vd;
 			vd->vdev_aux = sav;
 
 			spa_l2cache_activate(vd);
 
 			if (vdev_open(vd) != 0)
 				continue;
 
 			(void) vdev_validate_aux(vd);
 
 			if (!vdev_is_dead(vd))
 				l2arc_add_vdev(spa, vd);
 		}
 	}
 
 	/*
 	 * Purge vdevs that were dropped
 	 */
 	for (i = 0; i < oldnvdevs; i++) {
 		uint64_t pool;
 
 		vd = oldvdevs[i];
 		if (vd != NULL) {
 			ASSERT(vd->vdev_isl2cache);
 
 			if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
 			    pool != 0ULL && l2arc_vdev_present(vd))
 				l2arc_remove_vdev(vd);
 			vdev_clear_stats(vd);
 			vdev_free(vd);
 		}
 	}
 
 	if (oldvdevs)
 		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
 
 	if (sav->sav_config == NULL)
 		goto out;
 
 	sav->sav_vdevs = newvdevs;
 	sav->sav_count = (int)nl2cache;
 
 	/*
 	 * Recompute the stashed list of l2cache devices, with status
 	 * information this time.
 	 */
 	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
 	    DATA_TYPE_NVLIST_ARRAY) == 0);
 
 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_PUSHPAGE);
 	for (i = 0; i < sav->sav_count; i++)
 		l2cache[i] = vdev_config_generate(spa,
 		    sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
 out:
 	for (i = 0; i < sav->sav_count; i++)
 		nvlist_free(l2cache[i]);
 	if (sav->sav_count)
 		kmem_free(l2cache, sav->sav_count * sizeof (void *));
 }
 
 static int
 load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
 {
 	dmu_buf_t *db;
 	char *packed = NULL;
 	size_t nvsize = 0;
 	int error;
 	*value = NULL;
 
 	error = dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db);
 	if (error)
 		return (error);
 
 	nvsize = *(uint64_t *)db->db_data;
 	dmu_buf_rele(db, FTAG);
 
 	packed = kmem_alloc(nvsize, KM_PUSHPAGE | KM_NODEBUG);
 	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
 	    DMU_READ_PREFETCH);
 	if (error == 0)
 		error = nvlist_unpack(packed, nvsize, value, 0);
 	kmem_free(packed, nvsize);
 
 	return (error);
 }
 
 /*
  * Checks to see if the given vdev could not be opened, in which case we post a
  * sysevent to notify the autoreplace code that the device has been removed.
  */
 static void
 spa_check_removed(vdev_t *vd)
 {
 	int c;
 
 	for (c = 0; c < vd->vdev_children; c++)
 		spa_check_removed(vd->vdev_child[c]);
 
 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
 		zfs_ereport_post(FM_EREPORT_RESOURCE_AUTOREPLACE,
 		    vd->vdev_spa, vd, NULL, 0, 0);
 		spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_CHECK);
 	}
 }
 
 /*
  * Validate the current config against the MOS config
  */
 static boolean_t
 spa_config_valid(spa_t *spa, nvlist_t *config)
 {
 	vdev_t *mrvd, *rvd = spa->spa_root_vdev;
 	nvlist_t *nv;
 	int c, i;
 
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0);
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
 
 	ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children);
 
 	/*
 	 * If we're doing a normal import, then build up any additional
 	 * diagnostic information about missing devices in this config.
 	 * We'll pass this up to the user for further processing.
 	 */
 	if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) {
 		nvlist_t **child, *nv;
 		uint64_t idx = 0;
 
 		child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **),
 		    KM_PUSHPAGE);
 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 
 		for (c = 0; c < rvd->vdev_children; c++) {
 			vdev_t *tvd = rvd->vdev_child[c];
 			vdev_t *mtvd  = mrvd->vdev_child[c];
 
 			if (tvd->vdev_ops == &vdev_missing_ops &&
 			    mtvd->vdev_ops != &vdev_missing_ops &&
 			    mtvd->vdev_islog)
 				child[idx++] = vdev_config_generate(spa, mtvd,
 				    B_FALSE, 0);
 		}
 
 		if (idx) {
 			VERIFY(nvlist_add_nvlist_array(nv,
 			    ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
 			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
 			    ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0);
 
 			for (i = 0; i < idx; i++)
 				nvlist_free(child[i]);
 		}
 		nvlist_free(nv);
 		kmem_free(child, rvd->vdev_children * sizeof (char **));
 	}
 
 	/*
 	 * Compare the root vdev tree with the information we have
 	 * from the MOS config (mrvd). Check each top-level vdev
 	 * with the corresponding MOS config top-level (mtvd).
 	 */
 	for (c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		vdev_t *mtvd  = mrvd->vdev_child[c];
 
 		/*
 		 * Resolve any "missing" vdevs in the current configuration.
 		 * If we find that the MOS config has more accurate information
 		 * about the top-level vdev then use that vdev instead.
 		 */
 		if (tvd->vdev_ops == &vdev_missing_ops &&
 		    mtvd->vdev_ops != &vdev_missing_ops) {
 
 			if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG))
 				continue;
 
 			/*
 			 * Device specific actions.
 			 */
 			if (mtvd->vdev_islog) {
 				spa_set_log_state(spa, SPA_LOG_CLEAR);
 			} else {
 				/*
 				 * XXX - once we have 'readonly' pool
 				 * support we should be able to handle
 				 * missing data devices by transitioning
 				 * the pool to readonly.
 				 */
 				continue;
 			}
 
 			/*
 			 * Swap the missing vdev with the data we were
 			 * able to obtain from the MOS config.
 			 */
 			vdev_remove_child(rvd, tvd);
 			vdev_remove_child(mrvd, mtvd);
 
 			vdev_add_child(rvd, mtvd);
 			vdev_add_child(mrvd, tvd);
 
 			spa_config_exit(spa, SCL_ALL, FTAG);
 			vdev_load(mtvd);
 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 			vdev_reopen(rvd);
 		} else if (mtvd->vdev_islog) {
 			/*
 			 * Load the slog device's state from the MOS config
 			 * since it's possible that the label does not
 			 * contain the most up-to-date information.
 			 */
 			vdev_load_log_state(tvd, mtvd);
 			vdev_reopen(tvd);
 		}
 	}
 	vdev_free(mrvd);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	/*
 	 * Ensure we were able to validate the config.
 	 */
 	return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum);
 }
 
 /*
  * Check for missing log devices
  */
 static int
 spa_check_logs(spa_t *spa)
 {
 	switch (spa->spa_log_state) {
 	default:
 		break;
 	case SPA_LOG_MISSING:
 		/* need to recheck in case slog has been restored */
 	case SPA_LOG_UNKNOWN:
 		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
 		    DS_FIND_CHILDREN)) {
 			spa_set_log_state(spa, SPA_LOG_MISSING);
 			return (1);
 		}
 		break;
 	}
 	return (0);
 }
 
 static boolean_t
 spa_passivate_log(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	boolean_t slog_found = B_FALSE;
 	int c;
 
 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
 
 	if (!spa_has_slogs(spa))
 		return (B_FALSE);
 
 	for (c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		metaslab_group_t *mg = tvd->vdev_mg;
 
 		if (tvd->vdev_islog) {
 			metaslab_group_passivate(mg);
 			slog_found = B_TRUE;
 		}
 	}
 
 	return (slog_found);
 }
 
 static void
 spa_activate_log(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	int c;
 
 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
 
 	for (c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		metaslab_group_t *mg = tvd->vdev_mg;
 
 		if (tvd->vdev_islog)
 			metaslab_group_activate(mg);
 	}
 }
 
 int
 spa_offline_log(spa_t *spa)
 {
 	int error = 0;
 
 	if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
 	    NULL, DS_FIND_CHILDREN)) == 0) {
 
 		/*
 		 * We successfully offlined the log device, sync out the
 		 * current txg so that the "stubby" block can be removed
 		 * by zil_sync().
 		 */
 		txg_wait_synced(spa->spa_dsl_pool, 0);
 	}
 	return (error);
 }
 
 static void
 spa_aux_check_removed(spa_aux_vdev_t *sav)
 {
 	int i;
 
 	for (i = 0; i < sav->sav_count; i++)
 		spa_check_removed(sav->sav_vdevs[i]);
 }
 
 void
 spa_claim_notify(zio_t *zio)
 {
 	spa_t *spa = zio->io_spa;
 
 	if (zio->io_error)
 		return;
 
 	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
 	if (spa->spa_claim_max_txg < zio->io_bp->blk_birth)
 		spa->spa_claim_max_txg = zio->io_bp->blk_birth;
 	mutex_exit(&spa->spa_props_lock);
 }
 
 typedef struct spa_load_error {
 	uint64_t	sle_meta_count;
 	uint64_t	sle_data_count;
 } spa_load_error_t;
 
 static void
 spa_load_verify_done(zio_t *zio)
 {
 	blkptr_t *bp = zio->io_bp;
 	spa_load_error_t *sle = zio->io_private;
 	dmu_object_type_t type = BP_GET_TYPE(bp);
 	int error = zio->io_error;
 
 	if (error) {
 		if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
 		    type != DMU_OT_INTENT_LOG)
 			atomic_add_64(&sle->sle_meta_count, 1);
 		else
 			atomic_add_64(&sle->sle_data_count, 1);
 	}
 	zio_data_buf_free(zio->io_data, zio->io_size);
 }
 
 /*ARGSUSED*/
 static int
 spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 {
 	if (bp != NULL) {
 		zio_t *rio = arg;
 		size_t size = BP_GET_PSIZE(bp);
 		void *data = zio_data_buf_alloc(size);
 
 		zio_nowait(zio_read(rio, spa, bp, data, size,
 		    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
 		    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
 		    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
 	}
 	return (0);
 }
 
 static int
 spa_load_verify(spa_t *spa)
 {
 	zio_t *rio;
 	spa_load_error_t sle = { 0 };
 	zpool_rewind_policy_t policy;
 	boolean_t verify_ok = B_FALSE;
 	int error;
 
 	zpool_get_rewind_policy(spa->spa_config, &policy);
 
 	if (policy.zrp_request & ZPOOL_NEVER_REWIND)
 		return (0);
 
 	rio = zio_root(spa, NULL, &sle,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
 
 	error = traverse_pool(spa, spa->spa_verify_min_txg,
 	    TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
 
 	(void) zio_wait(rio);
 
 	spa->spa_load_meta_errors = sle.sle_meta_count;
 	spa->spa_load_data_errors = sle.sle_data_count;
 
 	if (!error && sle.sle_meta_count <= policy.zrp_maxmeta &&
 	    sle.sle_data_count <= policy.zrp_maxdata) {
 		int64_t loss = 0;
 
 		verify_ok = B_TRUE;
 		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
 		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
 
 		loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
 		VERIFY(nvlist_add_uint64(spa->spa_load_info,
 		    ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
 		VERIFY(nvlist_add_int64(spa->spa_load_info,
 		    ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
 		VERIFY(nvlist_add_uint64(spa->spa_load_info,
 		    ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
 	} else {
 		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
 	}
 
 	if (error) {
 		if (error != ENXIO && error != EIO)
 			error = EIO;
 		return (error);
 	}
 
 	return (verify_ok ? 0 : EIO);
 }
 
 /*
  * Find a value in the pool props object.
  */
 static void
 spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val)
 {
 	(void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object,
 	    zpool_prop_to_name(prop), sizeof (uint64_t), 1, val);
 }
 
 /*
  * Find a value in the pool directory object.
  */
 static int
 spa_dir_prop(spa_t *spa, const char *name, uint64_t *val)
 {
 	return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
 	    name, sizeof (uint64_t), 1, val));
 }
 
 static int
 spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err)
 {
 	vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux);
 	return (err);
 }
 
 /*
  * Fix up config after a partly-completed split.  This is done with the
  * ZPOOL_CONFIG_SPLIT nvlist.  Both the splitting pool and the split-off
  * pool have that entry in their config, but only the splitting one contains
  * a list of all the guids of the vdevs that are being split off.
  *
  * This function determines what to do with that list: either rejoin
  * all the disks to the pool, or complete the splitting process.  To attempt
  * the rejoin, each disk that is offlined is marked online again, and
  * we do a reopen() call.  If the vdev label for every disk that was
  * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL)
  * then we call vdev_split() on each disk, and complete the split.
  *
  * Otherwise we leave the config alone, with all the vdevs in place in
  * the original pool.
  */
 static void
 spa_try_repair(spa_t *spa, nvlist_t *config)
 {
 	uint_t extracted;
 	uint64_t *glist;
 	uint_t i, gcount;
 	nvlist_t *nvl;
 	vdev_t **vd;
 	boolean_t attempt_reopen;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0)
 		return;
 
 	/* check that the config is complete */
 	if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
 	    &glist, &gcount) != 0)
 		return;
 
 	vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_PUSHPAGE);
 
 	/* attempt to online all the vdevs & validate */
 	attempt_reopen = B_TRUE;
 	for (i = 0; i < gcount; i++) {
 		if (glist[i] == 0)	/* vdev is hole */
 			continue;
 
 		vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE);
 		if (vd[i] == NULL) {
 			/*
 			 * Don't bother attempting to reopen the disks;
 			 * just do the split.
 			 */
 			attempt_reopen = B_FALSE;
 		} else {
 			/* attempt to re-online it */
 			vd[i]->vdev_offline = B_FALSE;
 		}
 	}
 
 	if (attempt_reopen) {
 		vdev_reopen(spa->spa_root_vdev);
 
 		/* check each device to see what state it's in */
 		for (extracted = 0, i = 0; i < gcount; i++) {
 			if (vd[i] != NULL &&
 			    vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL)
 				break;
 			++extracted;
 		}
 	}
 
 	/*
 	 * If every disk has been moved to the new pool, or if we never
 	 * even attempted to look at them, then we split them off for
 	 * good.
 	 */
 	if (!attempt_reopen || gcount == extracted) {
 		for (i = 0; i < gcount; i++)
 			if (vd[i] != NULL)
 				vdev_split(vd[i]);
 		vdev_reopen(spa->spa_root_vdev);
 	}
 
 	kmem_free(vd, gcount * sizeof (vdev_t *));
 }
 
 static int
 spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
     boolean_t mosconfig)
 {
 	nvlist_t *config = spa->spa_config;
 	char *ereport = FM_EREPORT_ZFS_POOL;
 	char *comment;
 	int error;
 	uint64_t pool_guid;
 	nvlist_t *nvl;
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
 		return (EINVAL);
 
 	ASSERT(spa->spa_comment == NULL);
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
 		spa->spa_comment = spa_strdup(comment);
 
 	/*
 	 * Versioning wasn't explicitly added to the label until later, so if
 	 * it's not present treat it as the initial version.
 	 */
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 	    &spa->spa_ubsync.ub_version) != 0)
 		spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
 
 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 	    &spa->spa_config_txg);
 
 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
 	    spa_guid_exists(pool_guid, 0)) {
 		error = EEXIST;
 	} else {
 		spa->spa_config_guid = pool_guid;
 
 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
 		    &nvl) == 0) {
 			VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
 			    KM_PUSHPAGE) == 0);
 		}
 
 		nvlist_free(spa->spa_load_info);
 		spa->spa_load_info = fnvlist_alloc();
 
 		gethrestime(&spa->spa_loaded_ts);
 		error = spa_load_impl(spa, pool_guid, config, state, type,
 		    mosconfig, &ereport);
 	}
 
 	spa->spa_minref = refcount_count(&spa->spa_refcount);
 	if (error) {
 		if (error != EEXIST) {
 			spa->spa_loaded_ts.tv_sec = 0;
 			spa->spa_loaded_ts.tv_nsec = 0;
 		}
 		if (error != EBADF) {
 			zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
 		}
 	}
 	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
 	spa->spa_ena = 0;
 
 	return (error);
 }
 
 /*
  * Load an existing storage pool, using the pool's builtin spa_config as a
  * source of configuration information.
  */
 __attribute__((always_inline))
 static inline int
 spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
     char **ereport)
 {
 	int error = 0;
 	nvlist_t *nvroot = NULL;
 	nvlist_t *label;
 	vdev_t *rvd;
 	uberblock_t *ub = &spa->spa_uberblock;
 	uint64_t children, config_cache_txg = spa->spa_config_txg;
 	int orig_mode = spa->spa_mode;
 	int parse;
 	uint64_t obj;
 	boolean_t missing_feat_write = B_FALSE;
 
 	/*
 	 * If this is an untrusted config, access the pool in read-only mode.
 	 * This prevents things like resilvering recently removed devices.
 	 */
 	if (!mosconfig)
 		spa->spa_mode = FREAD;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa->spa_load_state = state;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
 		return (EINVAL);
 
 	parse = (type == SPA_IMPORT_EXISTING ?
 	    VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
 
 	/*
 	 * Create "The Godfather" zio to hold all async IOs
 	 */
 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
 
 	/*
 	 * Parse the configuration into a vdev tree.  We explicitly set the
 	 * value that will be returned by spa_version() since parsing the
 	 * configuration requires knowing the version number.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	if (error != 0)
 		return (error);
 
 	ASSERT(spa->spa_root_vdev == rvd);
 
 	if (type != SPA_IMPORT_ASSEMBLE) {
 		ASSERT(spa_guid(spa) == pool_guid);
 	}
 
 	/*
 	 * Try to open all vdevs, loading each label in the process.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	error = vdev_open(rvd);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * We need to validate the vdev labels against the configuration that
 	 * we have in hand, which is dependent on the setting of mosconfig. If
 	 * mosconfig is true then we're validating the vdev labels based on
 	 * that config.  Otherwise, we're validating against the cached config
 	 * (zpool.cache) that was read when we loaded the zfs module, and then
 	 * later we will recursively call spa_load() and validate against
 	 * the vdev config.
 	 *
 	 * If we're assembling a new pool that's been split off from an
 	 * existing pool, the labels haven't yet been updated so we skip
 	 * validation for now.
 	 */
 	if (type != SPA_IMPORT_ASSEMBLE) {
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		error = vdev_validate(rvd, mosconfig);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 
 		if (error != 0)
 			return (error);
 
 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
 			return (ENXIO);
 	}
 
 	/*
 	 * Find the best uberblock.
 	 */
 	vdev_uberblock_load(rvd, ub, &label);
 
 	/*
 	 * If we weren't able to find a single valid uberblock, return failure.
 	 */
 	if (ub->ub_txg == 0) {
 		nvlist_free(label);
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
 	}
 
 	/*
 	 * If the pool has an unsupported version we can't open it.
 	 */
 	if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
 		nvlist_free(label);
 		return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
 	}
 
 	if (ub->ub_version >= SPA_VERSION_FEATURES) {
 		nvlist_t *features;
 
 		/*
 		 * If we weren't able to find what's necessary for reading the
 		 * MOS in the label, return failure.
 		 */
 		if (label == NULL || nvlist_lookup_nvlist(label,
 		    ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
 			nvlist_free(label);
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
 			    ENXIO));
 		}
 
 		/*
 		 * Update our in-core representation with the definitive values
 		 * from the label.
 		 */
 		nvlist_free(spa->spa_label_features);
 		VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
 	}
 
 	nvlist_free(label);
 
 	/*
 	 * Look through entries in the label nvlist's features_for_read. If
 	 * there is a feature listed there which we don't understand then we
 	 * cannot open a pool.
 	 */
 	if (ub->ub_version >= SPA_VERSION_FEATURES) {
 		nvlist_t *unsup_feat;
 		nvpair_t *nvp;
 
 		VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
 		    0);
 
 		for (nvp = nvlist_next_nvpair(spa->spa_label_features, NULL);
 		    nvp != NULL;
 		    nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
 			if (!zfeature_is_supported(nvpair_name(nvp))) {
 				VERIFY(nvlist_add_string(unsup_feat,
 				    nvpair_name(nvp), "") == 0);
 			}
 		}
 
 		if (!nvlist_empty(unsup_feat)) {
 			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
 			    ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
 			nvlist_free(unsup_feat);
 			return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
 			    ENOTSUP));
 		}
 
 		nvlist_free(unsup_feat);
 	}
 
 	/*
 	 * If the vdev guid sum doesn't match the uberblock, we have an
 	 * incomplete configuration.  We first check to see if the pool
 	 * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN).
 	 * If it is, defer the vdev_guid_sum check till later so we
 	 * can handle missing vdevs.
 	 */
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
 	    &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE &&
 	    rvd->vdev_guid_sum != ub->ub_guid_sum)
 		return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
 
 	if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) {
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_try_repair(spa, config);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		nvlist_free(spa->spa_config_splitting);
 		spa->spa_config_splitting = NULL;
 	}
 
 	/*
 	 * Initialize internal SPA structures.
 	 */
 	spa->spa_state = POOL_STATE_ACTIVE;
 	spa->spa_ubsync = spa->spa_uberblock;
 	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
 	    TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
 	spa->spa_claim_max_txg = spa->spa_first_txg;
 	spa->spa_prev_software_version = ub->ub_software_version;
 
 	error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
 	if (error)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
 
 	if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	if (spa_version(spa) >= SPA_VERSION_FEATURES) {
 		boolean_t missing_feat_read = B_FALSE;
 		nvlist_t *unsup_feat, *enabled_feat;
 
 		if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
 		    &spa->spa_feat_for_read_obj) != 0) {
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 		}
 
 		if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
 		    &spa->spa_feat_for_write_obj) != 0) {
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 		}
 
 		if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
 		    &spa->spa_feat_desc_obj) != 0) {
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 		}
 
 		enabled_feat = fnvlist_alloc();
 		unsup_feat = fnvlist_alloc();
 
 		if (!feature_is_supported(spa->spa_meta_objset,
 		    spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
 		    unsup_feat, enabled_feat))
 			missing_feat_read = B_TRUE;
 
 		if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
 			if (!feature_is_supported(spa->spa_meta_objset,
 			    spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
 			    unsup_feat, enabled_feat)) {
 				missing_feat_write = B_TRUE;
 			}
 		}
 
 		fnvlist_add_nvlist(spa->spa_load_info,
 		    ZPOOL_CONFIG_ENABLED_FEAT, enabled_feat);
 
 		if (!nvlist_empty(unsup_feat)) {
 			fnvlist_add_nvlist(spa->spa_load_info,
 			    ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat);
 		}
 
 		fnvlist_free(enabled_feat);
 		fnvlist_free(unsup_feat);
 
 		if (!missing_feat_read) {
 			fnvlist_add_boolean(spa->spa_load_info,
 			    ZPOOL_CONFIG_CAN_RDONLY);
 		}
 
 		/*
 		 * If the state is SPA_LOAD_TRYIMPORT, our objective is
 		 * twofold: to determine whether the pool is available for
 		 * import in read-write mode and (if it is not) whether the
 		 * pool is available for import in read-only mode. If the pool
 		 * is available for import in read-write mode, it is displayed
 		 * as available in userland; if it is not available for import
 		 * in read-only mode, it is displayed as unavailable in
 		 * userland. If the pool is available for import in read-only
 		 * mode but not read-write mode, it is displayed as unavailable
 		 * in userland with a special note that the pool is actually
 		 * available for open in read-only mode.
 		 *
 		 * As a result, if the state is SPA_LOAD_TRYIMPORT and we are
 		 * missing a feature for write, we must first determine whether
 		 * the pool can be opened read-only before returning to
 		 * userland in order to know whether to display the
 		 * abovementioned note.
 		 */
 		if (missing_feat_read || (missing_feat_write &&
 		    spa_writeable(spa))) {
 			return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
 			    ENOTSUP));
 		}
 	}
 
 	spa->spa_is_initializing = B_TRUE;
 	error = dsl_pool_open(spa->spa_dsl_pool);
 	spa->spa_is_initializing = B_FALSE;
 	if (error != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	if (!mosconfig) {
 		uint64_t hostid;
 		nvlist_t *policy = NULL, *nvconfig;
 
 		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
 			char *hostname;
 			unsigned long myhostid = 0;
 
 			VERIFY(nvlist_lookup_string(nvconfig,
 			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
 
 #ifdef	_KERNEL
 			myhostid = zone_get_hostid(NULL);
 #else	/* _KERNEL */
 			/*
 			 * We're emulating the system's hostid in userland, so
 			 * we can't use zone_get_hostid().
 			 */
 			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
 #endif	/* _KERNEL */
 			if (hostid != 0 && myhostid != 0 &&
 			    hostid != myhostid) {
 				nvlist_free(nvconfig);
 				cmn_err(CE_WARN, "pool '%s' could not be "
 				    "loaded as it was last accessed by "
 				    "another system (host: %s hostid: 0x%lx). "
 				    "See: http://zfsonlinux.org/msg/ZFS-8000-EY",
 				    spa_name(spa), hostname,
 				    (unsigned long)hostid);
 				return (EBADF);
 			}
 		}
 		if (nvlist_lookup_nvlist(spa->spa_config,
 		    ZPOOL_REWIND_POLICY, &policy) == 0)
 			VERIFY(nvlist_add_nvlist(nvconfig,
 			    ZPOOL_REWIND_POLICY, policy) == 0);
 
 		spa_config_set(spa, nvconfig);
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_activate(spa, orig_mode);
 
 		return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
 	}
 
 	if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
 	if (error != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
 	 * Load the bit that tells us to use the new accounting function
 	 * (raid-z deflation).  If we have an older pool, this will not
 	 * be present.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION,
 	    &spa->spa_creation_version);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
 	 * Load the persistent error log.  If we have an older pool, this will
 	 * not be present.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB,
 	    &spa->spa_errlog_scrub);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
 	 * Load the history object.  If we have an older pool, this
 	 * will not be present.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	/*
 	 * If we're assembling the pool from the split-off vdevs of
 	 * an existing pool, we don't want to attach the spares & cache
 	 * devices.
 	 */
 
 	/*
 	 * Load any hot spares for this pool.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
 		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
 		if (load_nvlist(spa, spa->spa_spares.sav_object,
 		    &spa->spa_spares.sav_config) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_spares(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 	} else if (error == 0) {
 		spa->spa_spares.sav_sync = B_TRUE;
 	}
 
 	/*
 	 * Load any level 2 ARC devices for this pool.
 	 */
 	error = spa_dir_prop(spa, DMU_POOL_L2CACHE,
 	    &spa->spa_l2cache.sav_object);
 	if (error != 0 && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
 		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
 		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
 		    &spa->spa_l2cache.sav_config) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_l2cache(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 	} else if (error == 0) {
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
 
 	error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object);
 	if (error && error != ENOENT)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	if (error == 0) {
 		uint64_t autoreplace;
 
 		spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
 		spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
 		spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
 		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
 		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
 		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
 		    &spa->spa_dedup_ditto);
 
 		spa->spa_autoreplace = (autoreplace != 0);
 	}
 
 	/*
 	 * If the 'autoreplace' property is set, then post a resource notifying
 	 * the ZFS DE that it should not issue any faults for unopenable
 	 * devices.  We also iterate over the vdevs, and post a sysevent for any
 	 * unopenable vdevs so that the normal autoreplace handler can take
 	 * over.
 	 */
 	if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) {
 		spa_check_removed(spa->spa_root_vdev);
 		/*
 		 * For the import case, this is done in spa_import(), because
 		 * at this point we're using the spare definitions from
 		 * the MOS config, not necessarily from the userland config.
 		 */
 		if (state != SPA_LOAD_IMPORT) {
 			spa_aux_check_removed(&spa->spa_spares);
 			spa_aux_check_removed(&spa->spa_l2cache);
 		}
 	}
 
 	/*
 	 * Load the vdev state for all toplevel vdevs.
 	 */
 	vdev_load(rvd);
 
 	/*
 	 * Propagate the leaf DTLs we just loaded all the way up the tree.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	/*
 	 * Load the DDTs (dedup tables).
 	 */
 	error = ddt_load(spa);
 	if (error != 0)
 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 	spa_update_dspace(spa);
 
 	/*
 	 * Validate the config, using the MOS config to fill in any
 	 * information which might be missing.  If we fail to validate
 	 * the config then declare the pool unfit for use. If we're
 	 * assembling a pool from a split, the log is not transferred
 	 * over.
 	 */
 	if (type != SPA_IMPORT_ASSEMBLE) {
 		nvlist_t *nvconfig;
 
 		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
 
 		if (!spa_config_valid(spa, nvconfig)) {
 			nvlist_free(nvconfig);
 			return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM,
 			    ENXIO));
 		}
 		nvlist_free(nvconfig);
 
 		/*
 		 * Now that we've validated the config, check the state of the
 		 * root vdev.  If it can't be opened, it indicates one or
 		 * more toplevel vdevs are faulted.
 		 */
 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
 			return (ENXIO);
 
 		if (spa_check_logs(spa)) {
 			*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
 			return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
 		}
 	}
 
 	if (missing_feat_write) {
 		ASSERT(state == SPA_LOAD_TRYIMPORT);
 
 		/*
 		 * At this point, we know that we can open the pool in
 		 * read-only mode but not read-write mode. We now have enough
 		 * information and can return to userland.
 		 */
 		return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
 	}
 
 	/*
 	 * We've successfully opened the pool, verify that we're ready
 	 * to start pushing transactions.
 	 */
 	if (state != SPA_LOAD_TRYIMPORT) {
 		if ((error = spa_load_verify(spa)))
 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
 			    error));
 	}
 
 	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
 	    spa->spa_load_max_txg == UINT64_MAX)) {
 		dmu_tx_t *tx;
 		int need_update = B_FALSE;
 		int c;
 
 		ASSERT(state != SPA_LOAD_TRYIMPORT);
 
 		/*
 		 * Claim log blocks that haven't been committed yet.
 		 * This must all happen in a single txg.
 		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
 		 * invoked from zil_claim_log_block()'s i/o done callback.
 		 * Price of rollback is that we abandon the log.
 		 */
 		spa->spa_claiming = B_TRUE;
 
 		tx = dmu_tx_create_assigned(spa_get_dsl(spa),
 		    spa_first_txg(spa));
 		(void) dmu_objset_find(spa_name(spa),
 		    zil_claim, tx, DS_FIND_CHILDREN);
 		dmu_tx_commit(tx);
 
 		spa->spa_claiming = B_FALSE;
 
 		spa_set_log_state(spa, SPA_LOG_GOOD);
 		spa->spa_sync_on = B_TRUE;
 		txg_sync_start(spa->spa_dsl_pool);
 
 		/*
 		 * Wait for all claims to sync.  We sync up to the highest
 		 * claimed log block birth time so that claimed log blocks
 		 * don't appear to be from the future.  spa_claim_max_txg
 		 * will have been set for us by either zil_check_log_chain()
 		 * (invoked from spa_check_logs()) or zil_claim() above.
 		 */
 		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
 
 		/*
 		 * If the config cache is stale, or we have uninitialized
 		 * metaslabs (see spa_vdev_add()), then update the config.
 		 *
 		 * If this is a verbatim import, trust the current
 		 * in-core spa_config and update the disk labels.
 		 */
 		if (config_cache_txg != spa->spa_config_txg ||
 		    state == SPA_LOAD_IMPORT ||
 		    state == SPA_LOAD_RECOVER ||
 		    (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
 			need_update = B_TRUE;
 
 		for (c = 0; c < rvd->vdev_children; c++)
 			if (rvd->vdev_child[c]->vdev_ms_array == 0)
 				need_update = B_TRUE;
 
 		/*
 		 * Update the config cache asychronously in case we're the
 		 * root pool, in which case the config cache isn't writable yet.
 		 */
 		if (need_update)
 			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
 
 		/*
 		 * Check all DTLs to see if anything needs resilvering.
 		 */
 		if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
 		    vdev_resilver_needed(rvd, NULL, NULL))
 			spa_async_request(spa, SPA_ASYNC_RESILVER);
 
+		/*
+		 * Log the fact that we booted up (so that we can detect if
+		 * we rebooted in the middle of an operation).
+		 */
+		spa_history_log_version(spa, "open");
+
 		/*
 		 * Delete any inconsistent datasets.
 		 */
 		(void) dmu_objset_find(spa_name(spa),
 		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
 
 		/*
 		 * Clean up any stale temporary dataset userrefs.
 		 */
 		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
 	}
 
 	return (0);
 }
 
 static int
 spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
 {
 	int mode = spa->spa_mode;
 
 	spa_unload(spa);
 	spa_deactivate(spa);
 
 	spa->spa_load_max_txg--;
 
 	spa_activate(spa, mode);
 	spa_async_suspend(spa);
 
 	return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
 }
 
 /*
  * If spa_load() fails this function will try loading prior txg's. If
  * 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool
  * will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this
  * function will not rewind the pool and will return the same error as
  * spa_load().
  */
 static int
 spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
     uint64_t max_request, int rewind_flags)
 {
 	nvlist_t *loadinfo = NULL;
 	nvlist_t *config = NULL;
 	int load_error, rewind_error;
 	uint64_t safe_rewind_txg;
 	uint64_t min_txg;
 
 	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
 		spa->spa_load_max_txg = spa->spa_load_txg;
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 	} else {
 		spa->spa_load_max_txg = max_request;
 	}
 
 	load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
 	    mosconfig);
 	if (load_error == 0)
 		return (0);
 
 	if (spa->spa_root_vdev != NULL)
 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
 
 	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
 	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
 
 	if (rewind_flags & ZPOOL_NEVER_REWIND) {
 		nvlist_free(config);
 		return (load_error);
 	}
 
 	if (state == SPA_LOAD_RECOVER) {
 		/* Price of rolling back is discarding txgs, including log */
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 	} else {
 		/*
 		 * If we aren't rolling back save the load info from our first
 		 * import attempt so that we can restore it after attempting
 		 * to rewind.
 		 */
 		loadinfo = spa->spa_load_info;
 		spa->spa_load_info = fnvlist_alloc();
 	}
 
 	spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
 	safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
 	min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ?
 	    TXG_INITIAL : safe_rewind_txg;
 
 	/*
 	 * Continue as long as we're finding errors, we're still within
 	 * the acceptable rewind range, and we're still finding uberblocks
 	 */
 	while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg &&
 	    spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) {
 		if (spa->spa_load_max_txg < safe_rewind_txg)
 			spa->spa_extreme_rewind = B_TRUE;
 		rewind_error = spa_load_retry(spa, state, mosconfig);
 	}
 
 	spa->spa_extreme_rewind = B_FALSE;
 	spa->spa_load_max_txg = UINT64_MAX;
 
 	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
 		spa_config_set(spa, config);
 
 	if (state == SPA_LOAD_RECOVER) {
 		ASSERT3P(loadinfo, ==, NULL);
 		return (rewind_error);
 	} else {
 		/* Store the rewind info as part of the initial load info */
 		fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO,
 		    spa->spa_load_info);
 
 		/* Restore the initial load info */
 		fnvlist_free(spa->spa_load_info);
 		spa->spa_load_info = loadinfo;
 
 		return (load_error);
 	}
 }
 
 /*
  * Pool Open/Import
  *
  * The import case is identical to an open except that the configuration is sent
  * down from userland, instead of grabbed from the configuration cache.  For the
  * case of an open, the pool configuration will exist in the
  * POOL_STATE_UNINITIALIZED state.
  *
  * The stats information (gen/count/ustats) is used to gather vdev statistics at
  * the same time open the pool, without having to keep around the spa_t in some
  * ambiguous state.
  */
 static int
 spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
     nvlist_t **config)
 {
 	spa_t *spa;
 	spa_load_state_t state = SPA_LOAD_OPEN;
 	int error;
 	int locked = B_FALSE;
 	int firstopen = B_FALSE;
 
 	*spapp = NULL;
 
 	/*
 	 * As disgusting as this is, we need to support recursive calls to this
 	 * function because dsl_dir_open() is called during spa_load(), and ends
 	 * up calling spa_open() again.  The real fix is to figure out how to
 	 * avoid dsl_dir_open() calling this in the first place.
 	 */
 	if (mutex_owner(&spa_namespace_lock) != curthread) {
 		mutex_enter(&spa_namespace_lock);
 		locked = B_TRUE;
 	}
 
 	if ((spa = spa_lookup(pool)) == NULL) {
 		if (locked)
 			mutex_exit(&spa_namespace_lock);
 		return (ENOENT);
 	}
 
 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
 		zpool_rewind_policy_t policy;
 
 		firstopen = B_TRUE;
 
 		zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
 		    &policy);
 		if (policy.zrp_request & ZPOOL_DO_REWIND)
 			state = SPA_LOAD_RECOVER;
 
 		spa_activate(spa, spa_mode_global);
 
 		if (state != SPA_LOAD_RECOVER)
 			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
 
 		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
 		    policy.zrp_request);
 
 		if (error == EBADF) {
 			/*
 			 * If vdev_validate() returns failure (indicated by
 			 * EBADF), it indicates that one of the vdevs indicates
 			 * that the pool has been exported or destroyed.  If
 			 * this is the case, the config cache is out of sync and
 			 * we should remove the pool from the namespace.
 			 */
 			spa_unload(spa);
 			spa_deactivate(spa);
 			spa_config_sync(spa, B_TRUE, B_TRUE);
 			spa_remove(spa);
 			if (locked)
 				mutex_exit(&spa_namespace_lock);
 			return (ENOENT);
 		}
 
 		if (error) {
 			/*
 			 * We can't open the pool, but we still have useful
 			 * information: the state of each vdev after the
 			 * attempted vdev_open().  Return this to the user.
 			 */
 			if (config != NULL && spa->spa_config) {
 				VERIFY(nvlist_dup(spa->spa_config, config,
 				    KM_PUSHPAGE) == 0);
 				VERIFY(nvlist_add_nvlist(*config,
 				    ZPOOL_CONFIG_LOAD_INFO,
 				    spa->spa_load_info) == 0);
 			}
 			spa_unload(spa);
 			spa_deactivate(spa);
 			spa->spa_last_open_failed = error;
 			if (locked)
 				mutex_exit(&spa_namespace_lock);
 			*spapp = NULL;
 			return (error);
 		}
 	}
 
 	spa_open_ref(spa, tag);
 
 	if (config != NULL)
 		*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
 
 	/*
 	 * If we've recovered the pool, pass back any information we
 	 * gathered while doing the load.
 	 */
 	if (state == SPA_LOAD_RECOVER) {
 		VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
 		    spa->spa_load_info) == 0);
 	}
 
 	if (locked) {
 		spa->spa_last_open_failed = 0;
 		spa->spa_last_ubsync_txg = 0;
 		spa->spa_load_txg = 0;
 		mutex_exit(&spa_namespace_lock);
 	}
 
 #ifdef _KERNEL
 	if (firstopen)
 		zvol_create_minors(spa->spa_name);
 #endif
 
 	*spapp = spa;
 
 	return (0);
 }
 
 int
 spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
     nvlist_t **config)
 {
 	return (spa_open_common(name, spapp, tag, policy, config));
 }
 
 int
 spa_open(const char *name, spa_t **spapp, void *tag)
 {
 	return (spa_open_common(name, spapp, tag, NULL, NULL));
 }
 
 /*
  * Lookup the given spa_t, incrementing the inject count in the process,
  * preventing it from being exported or destroyed.
  */
 spa_t *
 spa_inject_addref(char *name)
 {
 	spa_t *spa;
 
 	mutex_enter(&spa_namespace_lock);
 	if ((spa = spa_lookup(name)) == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (NULL);
 	}
 	spa->spa_inject_ref++;
 	mutex_exit(&spa_namespace_lock);
 
 	return (spa);
 }
 
 void
 spa_inject_delref(spa_t *spa)
 {
 	mutex_enter(&spa_namespace_lock);
 	spa->spa_inject_ref--;
 	mutex_exit(&spa_namespace_lock);
 }
 
 /*
  * Add spares device information to the nvlist.
  */
 static void
 spa_add_spares(spa_t *spa, nvlist_t *config)
 {
 	nvlist_t **spares;
 	uint_t i, nspares;
 	nvlist_t *nvroot;
 	uint64_t guid;
 	vdev_stat_t *vs;
 	uint_t vsc;
 	uint64_t pool;
 
 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
 
 	if (spa->spa_spares.sav_count == 0)
 		return;
 
 	VERIFY(nvlist_lookup_nvlist(config,
 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
 	if (nspares != 0) {
 		VERIFY(nvlist_add_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
 
 		/*
 		 * Go through and find any spares which have since been
 		 * repurposed as an active spare.  If this is the case, update
 		 * their status appropriately.
 		 */
 		for (i = 0; i < nspares; i++) {
 			VERIFY(nvlist_lookup_uint64(spares[i],
 			    ZPOOL_CONFIG_GUID, &guid) == 0);
 			if (spa_spare_exists(guid, &pool, NULL) &&
 			    pool != 0ULL) {
 				VERIFY(nvlist_lookup_uint64_array(
 				    spares[i], ZPOOL_CONFIG_VDEV_STATS,
 				    (uint64_t **)&vs, &vsc) == 0);
 				vs->vs_state = VDEV_STATE_CANT_OPEN;
 				vs->vs_aux = VDEV_AUX_SPARED;
 			}
 		}
 	}
 }
 
 /*
  * Add l2cache device information to the nvlist, including vdev stats.
  */
 static void
 spa_add_l2cache(spa_t *spa, nvlist_t *config)
 {
 	nvlist_t **l2cache;
 	uint_t i, j, nl2cache;
 	nvlist_t *nvroot;
 	uint64_t guid;
 	vdev_t *vd;
 	vdev_stat_t *vs;
 	uint_t vsc;
 
 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
 
 	if (spa->spa_l2cache.sav_count == 0)
 		return;
 
 	VERIFY(nvlist_lookup_nvlist(config,
 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
 	if (nl2cache != 0) {
 		VERIFY(nvlist_add_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
 
 		/*
 		 * Update level 2 cache device stats.
 		 */
 
 		for (i = 0; i < nl2cache; i++) {
 			VERIFY(nvlist_lookup_uint64(l2cache[i],
 			    ZPOOL_CONFIG_GUID, &guid) == 0);
 
 			vd = NULL;
 			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
 				if (guid ==
 				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
 					vd = spa->spa_l2cache.sav_vdevs[j];
 					break;
 				}
 			}
 			ASSERT(vd != NULL);
 
 			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
 			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
 			    == 0);
 			vdev_get_stats(vd, vs);
 		}
 	}
 }
 
 static void
 spa_add_feature_stats(spa_t *spa, nvlist_t *config)
 {
 	nvlist_t *features;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 
 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
 	VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	if (spa->spa_feat_for_read_obj != 0) {
 		for (zap_cursor_init(&zc, spa->spa_meta_objset,
 		    spa->spa_feat_for_read_obj);
 		    zap_cursor_retrieve(&zc, &za) == 0;
 		    zap_cursor_advance(&zc)) {
 			ASSERT(za.za_integer_length == sizeof (uint64_t) &&
 			    za.za_num_integers == 1);
 			VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
 			    za.za_first_integer));
 		}
 		zap_cursor_fini(&zc);
 	}
 
 	if (spa->spa_feat_for_write_obj != 0) {
 		for (zap_cursor_init(&zc, spa->spa_meta_objset,
 		    spa->spa_feat_for_write_obj);
 		    zap_cursor_retrieve(&zc, &za) == 0;
 		    zap_cursor_advance(&zc)) {
 			ASSERT(za.za_integer_length == sizeof (uint64_t) &&
 			    za.za_num_integers == 1);
 			VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
 			    za.za_first_integer));
 		}
 		zap_cursor_fini(&zc);
 	}
 
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
 	    features) == 0);
 	nvlist_free(features);
 }
 
 int
 spa_get_stats(const char *name, nvlist_t **config,
     char *altroot, size_t buflen)
 {
 	int error;
 	spa_t *spa;
 
 	*config = NULL;
 	error = spa_open_common(name, &spa, FTAG, NULL, config);
 
 	if (spa != NULL) {
 		/*
 		 * This still leaves a window of inconsistency where the spares
 		 * or l2cache devices could change and the config would be
 		 * self-inconsistent.
 		 */
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 		if (*config != NULL) {
 			uint64_t loadtimes[2];
 
 			loadtimes[0] = spa->spa_loaded_ts.tv_sec;
 			loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
 			VERIFY(nvlist_add_uint64_array(*config,
 			    ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0);
 
 			VERIFY(nvlist_add_uint64(*config,
 			    ZPOOL_CONFIG_ERRCOUNT,
 			    spa_get_errlog_size(spa)) == 0);
 
 			if (spa_suspended(spa))
 				VERIFY(nvlist_add_uint64(*config,
 				    ZPOOL_CONFIG_SUSPENDED,
 				    spa->spa_failmode) == 0);
 
 			spa_add_spares(spa, *config);
 			spa_add_l2cache(spa, *config);
 			spa_add_feature_stats(spa, *config);
 		}
 	}
 
 	/*
 	 * We want to get the alternate root even for faulted pools, so we cheat
 	 * and call spa_lookup() directly.
 	 */
 	if (altroot) {
 		if (spa == NULL) {
 			mutex_enter(&spa_namespace_lock);
 			spa = spa_lookup(name);
 			if (spa)
 				spa_altroot(spa, altroot, buflen);
 			else
 				altroot[0] = '\0';
 			spa = NULL;
 			mutex_exit(&spa_namespace_lock);
 		} else {
 			spa_altroot(spa, altroot, buflen);
 		}
 	}
 
 	if (spa != NULL) {
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 		spa_close(spa, FTAG);
 	}
 
 	return (error);
 }
 
 /*
  * Validate that the auxiliary device array is well formed.  We must have an
  * array of nvlists, each which describes a valid leaf vdev.  If this is an
  * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
  * specified, as long as they are well-formed.
  */
 static int
 spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
     spa_aux_vdev_t *sav, const char *config, uint64_t version,
     vdev_labeltype_t label)
 {
 	nvlist_t **dev;
 	uint_t i, ndev;
 	vdev_t *vd;
 	int error;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	/*
 	 * It's acceptable to have no devs specified.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
 		return (0);
 
 	if (ndev == 0)
 		return (EINVAL);
 
 	/*
 	 * Make sure the pool is formatted with a version that supports this
 	 * device type.
 	 */
 	if (spa_version(spa) < version)
 		return (ENOTSUP);
 
 	/*
 	 * Set the pending device list so we correctly handle device in-use
 	 * checking.
 	 */
 	sav->sav_pending = dev;
 	sav->sav_npending = ndev;
 
 	for (i = 0; i < ndev; i++) {
 		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
 		    mode)) != 0)
 			goto out;
 
 		if (!vd->vdev_ops->vdev_op_leaf) {
 			vdev_free(vd);
 			error = EINVAL;
 			goto out;
 		}
 
 		/*
 		 * The L2ARC currently only supports disk devices in
 		 * kernel context.  For user-level testing, we allow it.
 		 */
 #ifdef _KERNEL
 		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
 		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
 			error = ENOTBLK;
 			vdev_free(vd);
 			goto out;
 		}
 #endif
 		vd->vdev_top = vd;
 
 		if ((error = vdev_open(vd)) == 0 &&
 		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
 			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
 			    vd->vdev_guid) == 0);
 		}
 
 		vdev_free(vd);
 
 		if (error &&
 		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
 			goto out;
 		else
 			error = 0;
 	}
 
 out:
 	sav->sav_pending = NULL;
 	sav->sav_npending = 0;
 	return (error);
 }
 
 static int
 spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
 {
 	int error;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 
 	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
 	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
 	    VDEV_LABEL_SPARE)) != 0) {
 		return (error);
 	}
 
 	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
 	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
 	    VDEV_LABEL_L2CACHE));
 }
 
 static void
 spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
     const char *config)
 {
 	int i;
 
 	if (sav->sav_config != NULL) {
 		nvlist_t **olddevs;
 		uint_t oldndevs;
 		nvlist_t **newdevs;
 
 		/*
 		 * Generate new dev list by concatentating with the
 		 * current dev list.
 		 */
 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
 		    &olddevs, &oldndevs) == 0);
 
 		newdevs = kmem_alloc(sizeof (void *) *
 		    (ndevs + oldndevs), KM_PUSHPAGE);
 		for (i = 0; i < oldndevs; i++)
 			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
 			    KM_PUSHPAGE) == 0);
 		for (i = 0; i < ndevs; i++)
 			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
 			    KM_PUSHPAGE) == 0);
 
 		VERIFY(nvlist_remove(sav->sav_config, config,
 		    DATA_TYPE_NVLIST_ARRAY) == 0);
 
 		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
 		    config, newdevs, ndevs + oldndevs) == 0);
 		for (i = 0; i < oldndevs + ndevs; i++)
 			nvlist_free(newdevs[i]);
 		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
 	} else {
 		/*
 		 * Generate a new dev list.
 		 */
 		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
 		    KM_PUSHPAGE) == 0);
 		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
 		    devs, ndevs) == 0);
 	}
 }
 
 /*
  * Stop and drop level 2 ARC devices
  */
 void
 spa_l2cache_drop(spa_t *spa)
 {
 	vdev_t *vd;
 	int i;
 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
 
 	for (i = 0; i < sav->sav_count; i++) {
 		uint64_t pool;
 
 		vd = sav->sav_vdevs[i];
 		ASSERT(vd != NULL);
 
 		if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
 		    pool != 0ULL && l2arc_vdev_present(vd))
 			l2arc_remove_vdev(vd);
 	}
 }
 
 /*
  * Pool Creation
  */
 int
 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
-    const char *history_str, nvlist_t *zplprops)
+    nvlist_t *zplprops)
 {
 	spa_t *spa;
 	char *altroot = NULL;
 	vdev_t *rvd;
 	dsl_pool_t *dp;
 	dmu_tx_t *tx;
 	int error = 0;
 	uint64_t txg = TXG_INITIAL;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 	uint64_t version, obj;
 	boolean_t has_features;
 	nvpair_t *elem;
 	int c;
 
 	/*
 	 * If this pool already exists, return failure.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	if (spa_lookup(pool) != NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (EEXIST);
 	}
 
 	/*
 	 * Allocate a new spa_t structure.
 	 */
 	(void) nvlist_lookup_string(props,
 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
 	spa = spa_add(pool, NULL, altroot);
 	spa_activate(spa, spa_mode_global);
 
 	if (props && (error = spa_prop_validate(spa, props))) {
 		spa_deactivate(spa);
 		spa_remove(spa);
 		mutex_exit(&spa_namespace_lock);
 		return (error);
 	}
 
 	has_features = B_FALSE;
 	for (elem = nvlist_next_nvpair(props, NULL);
 	    elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
 		if (zpool_prop_feature(nvpair_name(elem)))
 			has_features = B_TRUE;
 	}
 
 	if (has_features || nvlist_lookup_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) {
 		version = SPA_VERSION;
 	}
 	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 
 	spa->spa_first_txg = txg;
 	spa->spa_uberblock.ub_txg = txg - 1;
 	spa->spa_uberblock.ub_version = version;
 	spa->spa_ubsync = spa->spa_uberblock;
 
 	/*
 	 * Create "The Godfather" zio to hold all async IOs
 	 */
 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
 
 	/*
 	 * Create the root vdev.
 	 */
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
 
 	ASSERT(error != 0 || rvd != NULL);
 	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
 
 	if (error == 0 && !zfs_allocatable_devs(nvroot))
 		error = EINVAL;
 
 	if (error == 0 &&
 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
 	    (error = spa_validate_aux(spa, nvroot, txg,
 	    VDEV_ALLOC_ADD)) == 0) {
 		for (c = 0; c < rvd->vdev_children; c++) {
 			vdev_metaslab_set_size(rvd->vdev_child[c]);
 			vdev_expand(rvd->vdev_child[c], txg);
 		}
 	}
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	if (error != 0) {
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_remove(spa);
 		mutex_exit(&spa_namespace_lock);
 		return (error);
 	}
 
 	/*
 	 * Get the list of spares, if specified.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
 		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
 		    KM_PUSHPAGE) == 0);
 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_spares(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa->spa_spares.sav_sync = B_TRUE;
 	}
 
 	/*
 	 * Get the list of level 2 cache devices, if specified.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache) == 0) {
 		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
 		    NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_l2cache(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	spa->spa_is_initializing = B_TRUE;
 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
 	spa->spa_meta_objset = dp->dp_meta_objset;
 	spa->spa_is_initializing = B_FALSE;
 
 	/*
 	 * Create DDTs (dedup tables).
 	 */
 	ddt_create(spa);
 
 	spa_update_dspace(spa);
 
 	tx = dmu_tx_create_assigned(dp, txg);
 
 	/*
 	 * Create the pool config object.
 	 */
 	spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
 	    DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
 
 	if (zap_add(spa->spa_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
 	    sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
 		cmn_err(CE_PANIC, "failed to add pool config");
 	}
 
 	if (spa_version(spa) >= SPA_VERSION_FEATURES)
 		spa_feature_create_zap_objects(spa, tx);
 
 	if (zap_add(spa->spa_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
 	    sizeof (uint64_t), 1, &version, tx) != 0) {
 		cmn_err(CE_PANIC, "failed to add pool version");
 	}
 
 	/* Newly created pools with the right version are always deflated. */
 	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
 		spa->spa_deflate = TRUE;
 		if (zap_add(spa->spa_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
 		    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
 			cmn_err(CE_PANIC, "failed to add deflate");
 		}
 	}
 
 	/*
 	 * Create the deferred-free bpobj.  Turn off compression
 	 * because sync-to-convergence takes longer if the blocksize
 	 * keeps changing.
 	 */
 	obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx);
 	dmu_object_set_compress(spa->spa_meta_objset, obj,
 	    ZIO_COMPRESS_OFF, tx);
 	if (zap_add(spa->spa_meta_objset,
 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ,
 	    sizeof (uint64_t), 1, &obj, tx) != 0) {
 		cmn_err(CE_PANIC, "failed to add bpobj");
 	}
 	VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj,
 	    spa->spa_meta_objset, obj));
 
 	/*
 	 * Create the pool's history object.
 	 */
 	if (version >= SPA_VERSION_ZPOOL_HISTORY)
 		spa_history_create_obj(spa, tx);
 
 	/*
 	 * Set pool properties.
 	 */
 	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
 	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
 	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
 
 	if (props != NULL) {
 		spa_configfile_set(spa, props, B_FALSE);
 		spa_sync_props(spa, props, tx);
 	}
 
 	dmu_tx_commit(tx);
 
 	spa->spa_sync_on = B_TRUE;
 	txg_sync_start(spa->spa_dsl_pool);
 
 	/*
 	 * We explicitly wait for the first transaction to complete so that our
 	 * bean counters are appropriately updated.
 	 */
 	txg_wait_synced(spa->spa_dsl_pool, txg);
 
 	spa_config_sync(spa, B_FALSE, B_TRUE);
 
-	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
-		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
-	spa_history_log_version(spa, LOG_POOL_CREATE);
+	spa_history_log_version(spa, "create");
 
 	spa->spa_minref = refcount_count(&spa->spa_refcount);
 
 	mutex_exit(&spa_namespace_lock);
 
 	return (0);
 }
 
 #ifdef _KERNEL
 /*
  * Get the root pool information from the root disk, then import the root pool
  * during the system boot up time.
  */
 extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
 
 static nvlist_t *
 spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
 {
 	nvlist_t *config;
 	nvlist_t *nvtop, *nvroot;
 	uint64_t pgid;
 
 	if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0)
 		return (NULL);
 
 	/*
 	 * Add this top-level vdev to the child array.
 	 */
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvtop) == 0);
 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    &pgid) == 0);
 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0);
 
 	/*
 	 * Put this pool's top-level vdevs into a root vdev.
 	 */
 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
 	    VDEV_TYPE_ROOT) == 0);
 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
 	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &nvtop, 1) == 0);
 
 	/*
 	 * Replace the existing vdev_tree with the new root vdev in
 	 * this pool's configuration (remove the old, add the new).
 	 */
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
 	nvlist_free(nvroot);
 	return (config);
 }
 
 /*
  * Walk the vdev tree and see if we can find a device with "better"
  * configuration. A configuration is "better" if the label on that
  * device has a more recent txg.
  */
 static void
 spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
 {
 	int c;
 
 	for (c = 0; c < vd->vdev_children; c++)
 		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
 
 	if (vd->vdev_ops->vdev_op_leaf) {
 		nvlist_t *label;
 		uint64_t label_txg;
 
 		if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid,
 		    &label) != 0)
 			return;
 
 		VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
 		    &label_txg) == 0);
 
 		/*
 		 * Do we have a better boot device?
 		 */
 		if (label_txg > *txg) {
 			*txg = label_txg;
 			*avd = vd;
 		}
 		nvlist_free(label);
 	}
 }
 
 /*
  * Import a root pool.
  *
  * For x86. devpath_list will consist of devid and/or physpath name of
  * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
  * The GRUB "findroot" command will return the vdev we should boot.
  *
  * For Sparc, devpath_list consists the physpath name of the booting device
  * no matter the rootpool is a single device pool or a mirrored pool.
  * e.g.
  *	"/pci@1f,0/ide@d/disk@0,0:a"
  */
 int
 spa_import_rootpool(char *devpath, char *devid)
 {
 	spa_t *spa;
 	vdev_t *rvd, *bvd, *avd = NULL;
 	nvlist_t *config, *nvtop;
 	uint64_t guid, txg;
 	char *pname;
 	int error;
 
 	/*
 	 * Read the label from the boot device and generate a configuration.
 	 */
 	config = spa_generate_rootconf(devpath, devid, &guid);
 #if defined(_OBP) && defined(_KERNEL)
 	if (config == NULL) {
 		if (strstr(devpath, "/iscsi/ssd") != NULL) {
 			/* iscsi boot */
 			get_iscsi_bootpath_phy(devpath);
 			config = spa_generate_rootconf(devpath, devid, &guid);
 		}
 	}
 #endif
 	if (config == NULL) {
 		cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
 		    devpath);
 		return (EIO);
 	}
 
 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &pname) == 0);
 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
 
 	mutex_enter(&spa_namespace_lock);
 	if ((spa = spa_lookup(pname)) != NULL) {
 		/*
 		 * Remove the existing root pool from the namespace so that we
 		 * can replace it with the correct config we just read in.
 		 */
 		spa_remove(spa);
 	}
 
 	spa = spa_add(pname, config, NULL);
 	spa->spa_is_root = B_TRUE;
 	spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
 
 	/*
 	 * Build up a vdev tree based on the boot device's label config.
 	 */
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvtop) == 0);
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
 	    VDEV_ALLOC_ROOTPOOL);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	if (error) {
 		mutex_exit(&spa_namespace_lock);
 		nvlist_free(config);
 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
 		    pname);
 		return (error);
 	}
 
 	/*
 	 * Get the boot vdev.
 	 */
 	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
 		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
 		    (u_longlong_t)guid);
 		error = ENOENT;
 		goto out;
 	}
 
 	/*
 	 * Determine if there is a better boot device.
 	 */
 	avd = bvd;
 	spa_alt_rootvdev(rvd, &avd, &txg);
 	if (avd != bvd) {
 		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
 		    "try booting from '%s'", avd->vdev_path);
 		error = EINVAL;
 		goto out;
 	}
 
 	/*
 	 * If the boot device is part of a spare vdev then ensure that
 	 * we're booting off the active spare.
 	 */
 	if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
 	    !bvd->vdev_isspare) {
 		cmn_err(CE_NOTE, "The boot device is currently spared. Please "
 		    "try booting from '%s'",
 		    bvd->vdev_parent->
 		    vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
 		error = EINVAL;
 		goto out;
 	}
 
 	error = 0;
-	spa_history_log_version(spa, LOG_POOL_IMPORT);
 out:
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	vdev_free(rvd);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	mutex_exit(&spa_namespace_lock);
 
 	nvlist_free(config);
 	return (error);
 }
 
 #endif
 
 /*
  * Import a non-root pool into the system.
  */
 int
 spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
 {
 	spa_t *spa;
 	char *altroot = NULL;
 	spa_load_state_t state = SPA_LOAD_IMPORT;
 	zpool_rewind_policy_t policy;
 	uint64_t mode = spa_mode_global;
 	uint64_t readonly = B_FALSE;
 	int error;
 	nvlist_t *nvroot;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 
 	/*
 	 * If a pool with this name exists, return failure.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	if (spa_lookup(pool) != NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (EEXIST);
 	}
 
 	/*
 	 * Create and initialize the spa structure.
 	 */
 	(void) nvlist_lookup_string(props,
 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
 	(void) nvlist_lookup_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
 	if (readonly)
 		mode = FREAD;
 	spa = spa_add(pool, config, altroot);
 	spa->spa_import_flags = flags;
 
 	/*
 	 * Verbatim import - Take a pool and insert it into the namespace
 	 * as if it had been loaded at boot.
 	 */
 	if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
 		if (props != NULL)
 			spa_configfile_set(spa, props, B_FALSE);
 
 		spa_config_sync(spa, B_FALSE, B_TRUE);
 
 		mutex_exit(&spa_namespace_lock);
-		spa_history_log_version(spa, LOG_POOL_IMPORT);
+		spa_history_log_version(spa, "import");
 
 		return (0);
 	}
 
 	spa_activate(spa, mode);
 
 	/*
 	 * Don't start async tasks until we know everything is healthy.
 	 */
 	spa_async_suspend(spa);
 
 	zpool_get_rewind_policy(config, &policy);
 	if (policy.zrp_request & ZPOOL_DO_REWIND)
 		state = SPA_LOAD_RECOVER;
 
 	/*
 	 * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
 	 * because the user-supplied config is actually the one to trust when
 	 * doing an import.
 	 */
 	if (state != SPA_LOAD_RECOVER)
 		spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
 
 	error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
 	    policy.zrp_request);
 
 	/*
 	 * Propagate anything learned while loading the pool and pass it
 	 * back to caller (i.e. rewind info, missing devices, etc).
 	 */
 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
 	    spa->spa_load_info) == 0);
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	/*
 	 * Toss any existing sparelist, as it doesn't have any validity
 	 * anymore, and conflicts with spa_has_spare().
 	 */
 	if (spa->spa_spares.sav_config) {
 		nvlist_free(spa->spa_spares.sav_config);
 		spa->spa_spares.sav_config = NULL;
 		spa_load_spares(spa);
 	}
 	if (spa->spa_l2cache.sav_config) {
 		nvlist_free(spa->spa_l2cache.sav_config);
 		spa->spa_l2cache.sav_config = NULL;
 		spa_load_l2cache(spa);
 	}
 
 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 	if (error == 0)
 		error = spa_validate_aux(spa, nvroot, -1ULL,
 		    VDEV_ALLOC_SPARE);
 	if (error == 0)
 		error = spa_validate_aux(spa, nvroot, -1ULL,
 		    VDEV_ALLOC_L2CACHE);
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	if (props != NULL)
 		spa_configfile_set(spa, props, B_FALSE);
 
 	if (error != 0 || (props && spa_writeable(spa) &&
 	    (error = spa_prop_set(spa, props)))) {
 		spa_unload(spa);
 		spa_deactivate(spa);
 		spa_remove(spa);
 		mutex_exit(&spa_namespace_lock);
 		return (error);
 	}
 
 	spa_async_resume(spa);
 
 	/*
 	 * Override any spares and level 2 cache devices as specified by
 	 * the user, as these may have correct device names/devids, etc.
 	 */
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
 		if (spa->spa_spares.sav_config)
 			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
 			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
 		else
 			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
 			    NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_spares(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa->spa_spares.sav_sync = B_TRUE;
 	}
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache) == 0) {
 		if (spa->spa_l2cache.sav_config)
 			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
 			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
 		else
 			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
 			    NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 		spa_load_l2cache(spa);
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	/*
 	 * Check for any removed devices.
 	 */
 	if (spa->spa_autoreplace) {
 		spa_aux_check_removed(&spa->spa_spares);
 		spa_aux_check_removed(&spa->spa_l2cache);
 	}
 
 	if (spa_writeable(spa)) {
 		/*
 		 * Update the config cache to include the newly-imported pool.
 		 */
 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 	}
 
 	/*
 	 * It's possible that the pool was expanded while it was exported.
 	 * We kick off an async task to handle this for us.
 	 */
 	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
 
 	mutex_exit(&spa_namespace_lock);
-	spa_history_log_version(spa, LOG_POOL_IMPORT);
+	spa_history_log_version(spa, "import");
 
 #ifdef _KERNEL
 	zvol_create_minors(pool);
 #endif
 
 	return (0);
 }
 
 nvlist_t *
 spa_tryimport(nvlist_t *tryconfig)
 {
 	nvlist_t *config = NULL;
 	char *poolname;
 	spa_t *spa;
 	uint64_t state;
 	int error;
 
 	if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
 		return (NULL);
 
 	if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
 		return (NULL);
 
 	/*
 	 * Create and initialize the spa structure.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
 	spa_activate(spa, FREAD);
 
 	/*
 	 * Pass off the heavy lifting to spa_load().
 	 * Pass TRUE for mosconfig because the user-supplied config
 	 * is actually the one to trust when doing an import.
 	 */
 	error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE);
 
 	/*
 	 * If 'tryconfig' was at least parsable, return the current config.
 	 */
 	if (spa->spa_root_vdev != NULL) {
 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
 		VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
 		    poolname) == 0);
 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 		    state) == 0);
 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
 		    spa->spa_uberblock.ub_timestamp) == 0);
 		VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
 		    spa->spa_load_info) == 0);
 
 		/*
 		 * If the bootfs property exists on this pool then we
 		 * copy it out so that external consumers can tell which
 		 * pools are bootable.
 		 */
 		if ((!error || error == EEXIST) && spa->spa_bootfs) {
 			char *tmpname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
 
 			/*
 			 * We have to play games with the name since the
 			 * pool was opened as TRYIMPORT_NAME.
 			 */
 			if (dsl_dsobj_to_dsname(spa_name(spa),
 			    spa->spa_bootfs, tmpname) == 0) {
 				char *cp;
 				char *dsname = kmem_alloc(MAXPATHLEN, KM_PUSHPAGE);
 
 				cp = strchr(tmpname, '/');
 				if (cp == NULL) {
 					(void) strlcpy(dsname, tmpname,
 					    MAXPATHLEN);
 				} else {
 					(void) snprintf(dsname, MAXPATHLEN,
 					    "%s/%s", poolname, ++cp);
 				}
 				VERIFY(nvlist_add_string(config,
 				    ZPOOL_CONFIG_BOOTFS, dsname) == 0);
 				kmem_free(dsname, MAXPATHLEN);
 			}
 			kmem_free(tmpname, MAXPATHLEN);
 		}
 
 		/*
 		 * Add the list of hot spares and level 2 cache devices.
 		 */
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 		spa_add_spares(spa, config);
 		spa_add_l2cache(spa, config);
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 	}
 
 	spa_unload(spa);
 	spa_deactivate(spa);
 	spa_remove(spa);
 	mutex_exit(&spa_namespace_lock);
 
 	return (config);
 }
 
 /*
  * Pool export/destroy
  *
  * The act of destroying or exporting a pool is very simple.  We make sure there
  * is no more pending I/O and any references to the pool are gone.  Then, we
  * update the pool state and sync all the labels to disk, removing the
  * configuration from the cache afterwards. If the 'hardforce' flag is set, then
  * we don't sync the labels or remove the configuration cache.
  */
 static int
 spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
     boolean_t force, boolean_t hardforce)
 {
 	spa_t *spa;
 
 	if (oldconfig)
 		*oldconfig = NULL;
 
 	if (!(spa_mode_global & FWRITE))
 		return (EROFS);
 
 	mutex_enter(&spa_namespace_lock);
 	if ((spa = spa_lookup(pool)) == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (ENOENT);
 	}
 
 	/*
 	 * Put a hold on the pool, drop the namespace lock, stop async tasks,
 	 * reacquire the namespace lock, and see if we can export.
 	 */
 	spa_open_ref(spa, FTAG);
 	mutex_exit(&spa_namespace_lock);
 	spa_async_suspend(spa);
 	mutex_enter(&spa_namespace_lock);
 	spa_close(spa, FTAG);
 
 	/*
 	 * The pool will be in core if it's openable,
 	 * in which case we can modify its state.
 	 */
 	if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
 		/*
 		 * Objsets may be open only because they're dirty, so we
 		 * have to force it to sync before checking spa_refcnt.
 		 */
 		txg_wait_synced(spa->spa_dsl_pool, 0);
 
 		/*
 		 * A pool cannot be exported or destroyed if there are active
 		 * references.  If we are resetting a pool, allow references by
 		 * fault injection handlers.
 		 */
 		if (!spa_refcount_zero(spa) ||
 		    (spa->spa_inject_ref != 0 &&
 		    new_state != POOL_STATE_UNINITIALIZED)) {
 			spa_async_resume(spa);
 			mutex_exit(&spa_namespace_lock);
 			return (EBUSY);
 		}
 
 		/*
 		 * A pool cannot be exported if it has an active shared spare.
 		 * This is to prevent other pools stealing the active spare
 		 * from an exported pool. At user's own will, such pool can
 		 * be forcedly exported.
 		 */
 		if (!force && new_state == POOL_STATE_EXPORTED &&
 		    spa_has_active_shared_spare(spa)) {
 			spa_async_resume(spa);
 			mutex_exit(&spa_namespace_lock);
 			return (EXDEV);
 		}
 
 		/*
 		 * We want this to be reflected on every label,
 		 * so mark them all dirty.  spa_unload() will do the
 		 * final sync that pushes these changes out.
 		 */
 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 			spa->spa_state = new_state;
 			spa->spa_final_txg = spa_last_synced_txg(spa) +
 			    TXG_DEFER_SIZE + 1;
 			vdev_config_dirty(spa->spa_root_vdev);
 			spa_config_exit(spa, SCL_ALL, FTAG);
 		}
 	}
 
 	spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_DESTROY);
 
 	if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
 		spa_unload(spa);
 		spa_deactivate(spa);
 	}
 
 	if (oldconfig && spa->spa_config)
 		VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
 
 	if (new_state != POOL_STATE_UNINITIALIZED) {
 		if (!hardforce)
 			spa_config_sync(spa, B_TRUE, B_TRUE);
 		spa_remove(spa);
 	}
 	mutex_exit(&spa_namespace_lock);
 
 	return (0);
 }
 
 /*
  * Destroy a storage pool.
  */
 int
 spa_destroy(char *pool)
 {
 	return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL,
 	    B_FALSE, B_FALSE));
 }
 
 /*
  * Export a storage pool.
  */
 int
 spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
     boolean_t hardforce)
 {
 	return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig,
 	    force, hardforce));
 }
 
 /*
  * Similar to spa_export(), this unloads the spa_t without actually removing it
  * from the namespace in any way.
  */
 int
 spa_reset(char *pool)
 {
 	return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL,
 	    B_FALSE, B_FALSE));
 }
 
 /*
  * ==========================================================================
  * Device manipulation
  * ==========================================================================
  */
 
 /*
  * Add a device to a storage pool.
  */
 int
 spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
 {
 	uint64_t txg, id;
 	int error;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd, *tvd;
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 	int c;
 
 	ASSERT(spa_writeable(spa));
 
 	txg = spa_vdev_enter(spa);
 
 	if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
 	    VDEV_ALLOC_ADD)) != 0)
 		return (spa_vdev_exit(spa, NULL, txg, error));
 
 	spa->spa_pending_vdev = vd;	/* spa_vdev_exit() will clear this */
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
 	    &nspares) != 0)
 		nspares = 0;
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache,
 	    &nl2cache) != 0)
 		nl2cache = 0;
 
 	if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0)
 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
 
 	if (vd->vdev_children != 0 &&
 	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
 		return (spa_vdev_exit(spa, vd, txg, error));
 
 	/*
 	 * We must validate the spares and l2cache devices after checking the
 	 * children.  Otherwise, vdev_inuse() will blindly overwrite the spare.
 	 */
 	if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0)
 		return (spa_vdev_exit(spa, vd, txg, error));
 
 	/*
 	 * Transfer each new top-level vdev from vd to rvd.
 	 */
 	for (c = 0; c < vd->vdev_children; c++) {
 
 		/*
 		 * Set the vdev id to the first hole, if one exists.
 		 */
 		for (id = 0; id < rvd->vdev_children; id++) {
 			if (rvd->vdev_child[id]->vdev_ishole) {
 				vdev_free(rvd->vdev_child[id]);
 				break;
 			}
 		}
 		tvd = vd->vdev_child[c];
 		vdev_remove_child(vd, tvd);
 		tvd->vdev_id = id;
 		vdev_add_child(rvd, tvd);
 		vdev_config_dirty(tvd);
 	}
 
 	if (nspares != 0) {
 		spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
 		    ZPOOL_CONFIG_SPARES);
 		spa_load_spares(spa);
 		spa->spa_spares.sav_sync = B_TRUE;
 	}
 
 	if (nl2cache != 0) {
 		spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
 		    ZPOOL_CONFIG_L2CACHE);
 		spa_load_l2cache(spa);
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	}
 
 	/*
 	 * We have to be careful when adding new vdevs to an existing pool.
 	 * If other threads start allocating from these vdevs before we
 	 * sync the config cache, and we lose power, then upon reboot we may
 	 * fail to open the pool because there are DVAs that the config cache
 	 * can't translate.  Therefore, we first add the vdevs without
 	 * initializing metaslabs; sync the config cache (via spa_vdev_exit());
 	 * and then let spa_config_update() initialize the new metaslabs.
 	 *
 	 * spa_load() checks for added-but-not-initialized vdevs, so that
 	 * if we lose power at any point in this sequence, the remaining
 	 * steps will be completed the next time we load the pool.
 	 */
 	(void) spa_vdev_exit(spa, vd, txg, 0);
 
 	mutex_enter(&spa_namespace_lock);
 	spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 	mutex_exit(&spa_namespace_lock);
 
 	return (0);
 }
 
 /*
  * Attach a device to a mirror.  The arguments are the path to any device
  * in the mirror, and the nvroot for the new device.  If the path specifies
  * a device that is not mirrored, we automatically insert the mirror vdev.
  *
  * If 'replacing' is specified, the new device is intended to replace the
  * existing device; in this case the two devices are made into their own
  * mirror using the 'replacing' vdev, which is functionally identical to
  * the mirror vdev (it actually reuses all the same ops) but has a few
  * extra rules: you can't attach to it after it's been created, and upon
  * completion of resilvering, the first disk (the one being replaced)
  * is automatically detached.
  */
 int
 spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
 {
 	uint64_t txg, dtl_max_txg;
 	ASSERTV(vdev_t *rvd = spa->spa_root_vdev;)
 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
 	vdev_ops_t *pvops;
 	char *oldvdpath, *newvdpath;
 	int newvd_isspare;
 	int error;
 
 	ASSERT(spa_writeable(spa));
 
 	txg = spa_vdev_enter(spa);
 
 	oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
 
 	if (oldvd == NULL)
 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
 
 	if (!oldvd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	pvd = oldvd->vdev_parent;
 
 	if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
 	    VDEV_ALLOC_ATTACH)) != 0)
 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
 	if (newrootvd->vdev_children != 1)
 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
 
 	newvd = newrootvd->vdev_child[0];
 
 	if (!newvd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
 
 	if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
 		return (spa_vdev_exit(spa, newrootvd, txg, error));
 
 	/*
 	 * Spares can't replace logs
 	 */
 	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 
 	if (!replacing) {
 		/*
 		 * For attach, the only allowable parent is a mirror or the root
 		 * vdev.
 		 */
 		if (pvd->vdev_ops != &vdev_mirror_ops &&
 		    pvd->vdev_ops != &vdev_root_ops)
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 
 		pvops = &vdev_mirror_ops;
 	} else {
 		/*
 		 * Active hot spares can only be replaced by inactive hot
 		 * spares.
 		 */
 		if (pvd->vdev_ops == &vdev_spare_ops &&
 		    oldvd->vdev_isspare &&
 		    !spa_has_spare(spa, newvd->vdev_guid))
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 
 		/*
 		 * If the source is a hot spare, and the parent isn't already a
 		 * spare, then we want to create a new hot spare.  Otherwise, we
 		 * want to create a replacing vdev.  The user is not allowed to
 		 * attach to a spared vdev child unless the 'isspare' state is
 		 * the same (spare replaces spare, non-spare replaces
 		 * non-spare).
 		 */
 		if (pvd->vdev_ops == &vdev_replacing_ops &&
 		    spa_version(spa) < SPA_VERSION_MULTI_REPLACE) {
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 		} else if (pvd->vdev_ops == &vdev_spare_ops &&
 		    newvd->vdev_isspare != oldvd->vdev_isspare) {
 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
 		}
 
 		if (newvd->vdev_isspare)
 			pvops = &vdev_spare_ops;
 		else
 			pvops = &vdev_replacing_ops;
 	}
 
 	/*
 	 * Make sure the new device is big enough.
 	 */
 	if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
 		return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
 
 	/*
 	 * The new device cannot have a higher alignment requirement
 	 * than the top-level vdev.
 	 */
 	if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
 		return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
 
 	/*
 	 * If this is an in-place replacement, update oldvd's path and devid
 	 * to make it distinguishable from newvd, and unopenable from now on.
 	 */
 	if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
 		spa_strfree(oldvd->vdev_path);
 		oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
 		    KM_PUSHPAGE);
 		(void) sprintf(oldvd->vdev_path, "%s/%s",
 		    newvd->vdev_path, "old");
 		if (oldvd->vdev_devid != NULL) {
 			spa_strfree(oldvd->vdev_devid);
 			oldvd->vdev_devid = NULL;
 		}
 	}
 
 	/* mark the device being resilvered */
 	newvd->vdev_resilvering = B_TRUE;
 
 	/*
 	 * If the parent is not a mirror, or if we're replacing, insert the new
 	 * mirror/replacing/spare vdev above oldvd.
 	 */
 	if (pvd->vdev_ops != pvops)
 		pvd = vdev_add_parent(oldvd, pvops);
 
 	ASSERT(pvd->vdev_top->vdev_parent == rvd);
 	ASSERT(pvd->vdev_ops == pvops);
 	ASSERT(oldvd->vdev_parent == pvd);
 
 	/*
 	 * Extract the new device from its root and add it to pvd.
 	 */
 	vdev_remove_child(newrootvd, newvd);
 	newvd->vdev_id = pvd->vdev_children;
 	newvd->vdev_crtxg = oldvd->vdev_crtxg;
 	vdev_add_child(pvd, newvd);
 
 	tvd = newvd->vdev_top;
 	ASSERT(pvd->vdev_top == tvd);
 	ASSERT(tvd->vdev_parent == rvd);
 
 	vdev_config_dirty(tvd);
 
 	/*
 	 * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account
 	 * for any dmu_sync-ed blocks.  It will propagate upward when
 	 * spa_vdev_exit() calls vdev_dtl_reassess().
 	 */
 	dtl_max_txg = txg + TXG_CONCURRENT_STATES;
 
 	vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
 	    dtl_max_txg - TXG_INITIAL);
 
 	if (newvd->vdev_isspare) {
 		spa_spare_activate(newvd);
 		spa_event_notify(spa, newvd, FM_EREPORT_ZFS_DEVICE_SPARE);
 	}
 
 	oldvdpath = spa_strdup(oldvd->vdev_path);
 	newvdpath = spa_strdup(newvd->vdev_path);
 	newvd_isspare = newvd->vdev_isspare;
 
 	/*
 	 * Mark newvd's DTL dirty in this txg.
 	 */
 	vdev_dirty(tvd, VDD_DTL, newvd, txg);
 
 	/*
 	 * Restart the resilver
 	 */
 	dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
 
 	/*
 	 * Commit the config
 	 */
 	(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
 
-	spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
+	spa_history_log_internal(spa, "vdev attach", NULL,
 	    "%s vdev=%s %s vdev=%s",
 	    replacing && newvd_isspare ? "spare in" :
 	    replacing ? "replace" : "attach", newvdpath,
 	    replacing ? "for" : "to", oldvdpath);
 
 	spa_strfree(oldvdpath);
 	spa_strfree(newvdpath);
 
 	if (spa->spa_bootfs)
 		spa_event_notify(spa, newvd, FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH);
 
 	return (0);
 }
 
 /*
  * Detach a device from a mirror or replacing vdev.
  * If 'replace_done' is specified, only detach if the parent
  * is a replacing vdev.
  */
 int
 spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
 {
 	uint64_t txg;
 	int error;
 	ASSERTV(vdev_t *rvd = spa->spa_root_vdev;)
 	vdev_t *vd, *pvd, *cvd, *tvd;
 	boolean_t unspare = B_FALSE;
 	uint64_t unspare_guid = 0;
 	char *vdpath;
 	int c, t;
 
 	ASSERT(spa_writeable(spa));
 
 	txg = spa_vdev_enter(spa);
 
 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
 
 	if (vd == NULL)
 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
 
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	pvd = vd->vdev_parent;
 
 	/*
 	 * If the parent/child relationship is not as expected, don't do it.
 	 * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
 	 * vdev that's replacing B with C.  The user's intent in replacing
 	 * is to go from M(A,B) to M(A,C).  If the user decides to cancel
 	 * the replace by detaching C, the expected behavior is to end up
 	 * M(A,B).  But suppose that right after deciding to detach C,
 	 * the replacement of B completes.  We would have M(A,C), and then
 	 * ask to detach C, which would leave us with just A -- not what
 	 * the user wanted.  To prevent this, we make sure that the
 	 * parent/child relationship hasn't changed -- in this example,
 	 * that C's parent is still the replacing vdev R.
 	 */
 	if (pvd->vdev_guid != pguid && pguid != 0)
 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
 
 	/*
 	 * Only 'replacing' or 'spare' vdevs can be replaced.
 	 */
 	if (replace_done && pvd->vdev_ops != &vdev_replacing_ops &&
 	    pvd->vdev_ops != &vdev_spare_ops)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
 	    spa_version(spa) >= SPA_VERSION_SPARES);
 
 	/*
 	 * Only mirror, replacing, and spare vdevs support detach.
 	 */
 	if (pvd->vdev_ops != &vdev_replacing_ops &&
 	    pvd->vdev_ops != &vdev_mirror_ops &&
 	    pvd->vdev_ops != &vdev_spare_ops)
 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
 
 	/*
 	 * If this device has the only valid copy of some data,
 	 * we cannot safely detach it.
 	 */
 	if (vdev_dtl_required(vd))
 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
 
 	ASSERT(pvd->vdev_children >= 2);
 
 	/*
 	 * If we are detaching the second disk from a replacing vdev, then
 	 * check to see if we changed the original vdev's path to have "/old"
 	 * at the end in spa_vdev_attach().  If so, undo that change now.
 	 */
 	if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 &&
 	    vd->vdev_path != NULL) {
 		size_t len = strlen(vd->vdev_path);
 
 		for (c = 0; c < pvd->vdev_children; c++) {
 			cvd = pvd->vdev_child[c];
 
 			if (cvd == vd || cvd->vdev_path == NULL)
 				continue;
 
 			if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
 			    strcmp(cvd->vdev_path + len, "/old") == 0) {
 				spa_strfree(cvd->vdev_path);
 				cvd->vdev_path = spa_strdup(vd->vdev_path);
 				break;
 			}
 		}
 	}
 
 	/*
 	 * If we are detaching the original disk from a spare, then it implies
 	 * that the spare should become a real disk, and be removed from the
 	 * active spare list for the pool.
 	 */
 	if (pvd->vdev_ops == &vdev_spare_ops &&
 	    vd->vdev_id == 0 &&
 	    pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare)
 		unspare = B_TRUE;
 
 	/*
 	 * Erase the disk labels so the disk can be used for other things.
 	 * This must be done after all other error cases are handled,
 	 * but before we disembowel vd (so we can still do I/O to it).
 	 * But if we can't do it, don't treat the error as fatal --
 	 * it may be that the unwritability of the disk is the reason
 	 * it's being detached!
 	 */
 	error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
 
 	/*
 	 * Remove vd from its parent and compact the parent's children.
 	 */
 	vdev_remove_child(pvd, vd);
 	vdev_compact_children(pvd);
 
 	/*
 	 * Remember one of the remaining children so we can get tvd below.
 	 */
 	cvd = pvd->vdev_child[pvd->vdev_children - 1];
 
 	/*
 	 * If we need to remove the remaining child from the list of hot spares,
 	 * do it now, marking the vdev as no longer a spare in the process.
 	 * We must do this before vdev_remove_parent(), because that can
 	 * change the GUID if it creates a new toplevel GUID.  For a similar
 	 * reason, we must remove the spare now, in the same txg as the detach;
 	 * otherwise someone could attach a new sibling, change the GUID, and
 	 * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail.
 	 */
 	if (unspare) {
 		ASSERT(cvd->vdev_isspare);
 		spa_spare_remove(cvd);
 		unspare_guid = cvd->vdev_guid;
 		(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
 		cvd->vdev_unspare = B_TRUE;
 	}
 
 	/*
 	 * If the parent mirror/replacing vdev only has one child,
 	 * the parent is no longer needed.  Remove it from the tree.
 	 */
 	if (pvd->vdev_children == 1) {
 		if (pvd->vdev_ops == &vdev_spare_ops)
 			cvd->vdev_unspare = B_FALSE;
 		vdev_remove_parent(cvd);
 		cvd->vdev_resilvering = B_FALSE;
 	}
 
 
 	/*
 	 * We don't set tvd until now because the parent we just removed
 	 * may have been the previous top-level vdev.
 	 */
 	tvd = cvd->vdev_top;
 	ASSERT(tvd->vdev_parent == rvd);
 
 	/*
 	 * Reevaluate the parent vdev state.
 	 */
 	vdev_propagate_state(cvd);
 
 	/*
 	 * If the 'autoexpand' property is set on the pool then automatically
 	 * try to expand the size of the pool. For example if the device we
 	 * just detached was smaller than the others, it may be possible to
 	 * add metaslabs (i.e. grow the pool). We need to reopen the vdev
 	 * first so that we can obtain the updated sizes of the leaf vdevs.
 	 */
 	if (spa->spa_autoexpand) {
 		vdev_reopen(tvd);
 		vdev_expand(tvd, txg);
 	}
 
 	vdev_config_dirty(tvd);
 
 	/*
 	 * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
 	 * vd->vdev_detached is set and free vd's DTL object in syncing context.
 	 * But first make sure we're not on any *other* txg's DTL list, to
 	 * prevent vd from being accessed after it's freed.
 	 */
 	vdpath = spa_strdup(vd->vdev_path);
 	for (t = 0; t < TXG_SIZE; t++)
 		(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
 	vd->vdev_detached = B_TRUE;
 	vdev_dirty(tvd, VDD_DTL, vd, txg);
 
 	spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE);
 
 	/* hang on to the spa before we release the lock */
 	spa_open_ref(spa, FTAG);
 
 	error = spa_vdev_exit(spa, vd, txg, 0);
 
-	spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
+	spa_history_log_internal(spa, "detach", NULL,
 	    "vdev=%s", vdpath);
 	spa_strfree(vdpath);
 
 	/*
 	 * If this was the removal of the original device in a hot spare vdev,
 	 * then we want to go through and remove the device from the hot spare
 	 * list of every other pool.
 	 */
 	if (unspare) {
 		spa_t *altspa = NULL;
 
 		mutex_enter(&spa_namespace_lock);
 		while ((altspa = spa_next(altspa)) != NULL) {
 			if (altspa->spa_state != POOL_STATE_ACTIVE ||
 			    altspa == spa)
 				continue;
 
 			spa_open_ref(altspa, FTAG);
 			mutex_exit(&spa_namespace_lock);
 			(void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
 			mutex_enter(&spa_namespace_lock);
 			spa_close(altspa, FTAG);
 		}
 		mutex_exit(&spa_namespace_lock);
 
 		/* search the rest of the vdevs for spares to remove */
 		spa_vdev_resilver_done(spa);
 	}
 
 	/* all done with the spa; OK to release */
 	mutex_enter(&spa_namespace_lock);
 	spa_close(spa, FTAG);
 	mutex_exit(&spa_namespace_lock);
 
 	return (error);
 }
 
 /*
  * Split a set of devices from their mirrors, and create a new pool from them.
  */
 int
 spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
     nvlist_t *props, boolean_t exp)
 {
 	int error = 0;
 	uint64_t txg, *glist;
 	spa_t *newspa;
 	uint_t c, children, lastlog;
 	nvlist_t **child, *nvl, *tmp;
 	dmu_tx_t *tx;
 	char *altroot = NULL;
 	vdev_t *rvd, **vml = NULL;			/* vdev modify list */
 	boolean_t activate_slog;
 
 	ASSERT(spa_writeable(spa));
 
 	txg = spa_vdev_enter(spa);
 
 	/* clear the log and flush everything up to now */
 	activate_slog = spa_passivate_log(spa);
 	(void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
 	error = spa_offline_log(spa);
 	txg = spa_vdev_config_enter(spa);
 
 	if (activate_slog)
 		spa_activate_log(spa);
 
 	if (error != 0)
 		return (spa_vdev_exit(spa, NULL, txg, error));
 
 	/* check new spa name before going any further */
 	if (spa_lookup(newname) != NULL)
 		return (spa_vdev_exit(spa, NULL, txg, EEXIST));
 
 	/*
 	 * scan through all the children to ensure they're all mirrors
 	 */
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 ||
 	    nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) != 0)
 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
 	/* first, check to ensure we've got the right child count */
 	rvd = spa->spa_root_vdev;
 	lastlog = 0;
 	for (c = 0; c < rvd->vdev_children; c++) {
 		vdev_t *vd = rvd->vdev_child[c];
 
 		/* don't count the holes & logs as children */
 		if (vd->vdev_islog || vd->vdev_ishole) {
 			if (lastlog == 0)
 				lastlog = c;
 			continue;
 		}
 
 		lastlog = 0;
 	}
 	if (children != (lastlog != 0 ? lastlog : rvd->vdev_children))
 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
 	/* next, ensure no spare or cache devices are part of the split */
 	if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 ||
 	    nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0)
 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
 
 	vml = kmem_zalloc(children * sizeof (vdev_t *), KM_PUSHPAGE);
 	glist = kmem_zalloc(children * sizeof (uint64_t), KM_PUSHPAGE);
 
 	/* then, loop over each vdev and validate it */
 	for (c = 0; c < children; c++) {
 		uint64_t is_hole = 0;
 
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
 		    &is_hole);
 
 		if (is_hole != 0) {
 			if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole ||
 			    spa->spa_root_vdev->vdev_child[c]->vdev_islog) {
 				continue;
 			} else {
 				error = EINVAL;
 				break;
 			}
 		}
 
 		/* which disk is going to be split? */
 		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
 		    &glist[c]) != 0) {
 			error = EINVAL;
 			break;
 		}
 
 		/* look it up in the spa */
 		vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE);
 		if (vml[c] == NULL) {
 			error = ENODEV;
 			break;
 		}
 
 		/* make sure there's nothing stopping the split */
 		if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops ||
 		    vml[c]->vdev_islog ||
 		    vml[c]->vdev_ishole ||
 		    vml[c]->vdev_isspare ||
 		    vml[c]->vdev_isl2cache ||
 		    !vdev_writeable(vml[c]) ||
 		    vml[c]->vdev_children != 0 ||
 		    vml[c]->vdev_state != VDEV_STATE_HEALTHY ||
 		    c != spa->spa_root_vdev->vdev_child[c]->vdev_id) {
 			error = EINVAL;
 			break;
 		}
 
 		if (vdev_dtl_required(vml[c])) {
 			error = EBUSY;
 			break;
 		}
 
 		/* we need certain info from the top level */
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
 		    vml[c]->vdev_top->vdev_ms_array) == 0);
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
 		    vml[c]->vdev_top->vdev_ms_shift) == 0);
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
 		    vml[c]->vdev_top->vdev_asize) == 0);
 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
 		    vml[c]->vdev_top->vdev_ashift) == 0);
 	}
 
 	if (error != 0) {
 		kmem_free(vml, children * sizeof (vdev_t *));
 		kmem_free(glist, children * sizeof (uint64_t));
 		return (spa_vdev_exit(spa, NULL, txg, error));
 	}
 
 	/* stop writers from using the disks */
 	for (c = 0; c < children; c++) {
 		if (vml[c] != NULL)
 			vml[c]->vdev_offline = B_TRUE;
 	}
 	vdev_reopen(spa->spa_root_vdev);
 
 	/*
 	 * Temporarily record the splitting vdevs in the spa config.  This
 	 * will disappear once the config is regenerated.
 	 */
 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 	VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
 	    glist, children) == 0);
 	kmem_free(glist, children * sizeof (uint64_t));
 
 	mutex_enter(&spa->spa_props_lock);
 	VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
 	    nvl) == 0);
 	mutex_exit(&spa->spa_props_lock);
 	spa->spa_config_splitting = nvl;
 	vdev_config_dirty(spa->spa_root_vdev);
 
 	/* configure and create the new pool */
 	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 	    exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
 	    spa_version(spa)) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 	    spa->spa_config_txg) == 0);
 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    spa_generate_guid(NULL)) == 0);
 	(void) nvlist_lookup_string(props,
 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
 
 	/* add the new pool to the namespace */
 	newspa = spa_add(newname, config, altroot);
 	newspa->spa_config_txg = spa->spa_config_txg;
 	spa_set_log_state(newspa, SPA_LOG_CLEAR);
 
 	/* release the spa config lock, retaining the namespace lock */
 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
 
 	if (zio_injection_enabled)
 		zio_handle_panic_injection(spa, FTAG, 1);
 
 	spa_activate(newspa, spa_mode_global);
 	spa_async_suspend(newspa);
 
 	/* create the new pool from the disks of the original pool */
 	error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE);
 	if (error)
 		goto out;
 
 	/* if that worked, generate a real config for the new pool */
 	if (newspa->spa_root_vdev != NULL) {
 		VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
 		    NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 		VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
 		    ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
 		spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
 		    B_TRUE));
 	}
 
 	/* set the props */
 	if (props != NULL) {
 		spa_configfile_set(newspa, props, B_FALSE);
 		error = spa_prop_set(newspa, props);
 		if (error)
 			goto out;
 	}
 
 	/* flush everything */
 	txg = spa_vdev_config_enter(newspa);
 	vdev_config_dirty(newspa->spa_root_vdev);
 	(void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
 
 	if (zio_injection_enabled)
 		zio_handle_panic_injection(spa, FTAG, 2);
 
 	spa_async_resume(newspa);
 
 	/* finally, update the original pool's config */
 	txg = spa_vdev_config_enter(spa);
 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error != 0)
 		dmu_tx_abort(tx);
 	for (c = 0; c < children; c++) {
 		if (vml[c] != NULL) {
 			vdev_split(vml[c]);
 			if (error == 0)
-				spa_history_log_internal(LOG_POOL_VDEV_DETACH,
-				    spa, tx, "vdev=%s",
-				    vml[c]->vdev_path);
+				spa_history_log_internal(spa, "detach", tx,
+				    "vdev=%s", vml[c]->vdev_path);
 			vdev_free(vml[c]);
 		}
 	}
 	vdev_config_dirty(spa->spa_root_vdev);
 	spa->spa_config_splitting = NULL;
 	nvlist_free(nvl);
 	if (error == 0)
 		dmu_tx_commit(tx);
 	(void) spa_vdev_exit(spa, NULL, txg, 0);
 
 	if (zio_injection_enabled)
 		zio_handle_panic_injection(spa, FTAG, 3);
 
 	/* split is complete; log a history record */
-	spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
-	    "split new pool %s from pool %s", newname, spa_name(spa));
+	spa_history_log_internal(newspa, "split", NULL,
+	    "from pool %s", spa_name(spa));
 
 	kmem_free(vml, children * sizeof (vdev_t *));
 
 	/* if we're not going to mount the filesystems in userland, export */
 	if (exp)
 		error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
 		    B_FALSE, B_FALSE);
 
 	return (error);
 
 out:
 	spa_unload(newspa);
 	spa_deactivate(newspa);
 	spa_remove(newspa);
 
 	txg = spa_vdev_config_enter(spa);
 
 	/* re-online all offlined disks */
 	for (c = 0; c < children; c++) {
 		if (vml[c] != NULL)
 			vml[c]->vdev_offline = B_FALSE;
 	}
 	vdev_reopen(spa->spa_root_vdev);
 
 	nvlist_free(spa->spa_config_splitting);
 	spa->spa_config_splitting = NULL;
 	(void) spa_vdev_exit(spa, NULL, txg, error);
 
 	kmem_free(vml, children * sizeof (vdev_t *));
 	return (error);
 }
 
 static nvlist_t *
 spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
 {
 	int i;
 
 	for (i = 0; i < count; i++) {
 		uint64_t guid;
 
 		VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID,
 		    &guid) == 0);
 
 		if (guid == target_guid)
 			return (nvpp[i]);
 	}
 
 	return (NULL);
 }
 
 static void
 spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
 	nvlist_t *dev_to_remove)
 {
 	nvlist_t **newdev = NULL;
 	int i, j;
 
 	if (count > 1)
 		newdev = kmem_alloc((count - 1) * sizeof (void *), KM_PUSHPAGE);
 
 	for (i = 0, j = 0; i < count; i++) {
 		if (dev[i] == dev_to_remove)
 			continue;
 		VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_PUSHPAGE) == 0);
 	}
 
 	VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
 	VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0);
 
 	for (i = 0; i < count - 1; i++)
 		nvlist_free(newdev[i]);
 
 	if (count > 1)
 		kmem_free(newdev, (count - 1) * sizeof (void *));
 }
 
 /*
  * Evacuate the device.
  */
 static int
 spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
 {
 	uint64_t txg;
 	int error = 0;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
 	ASSERT(vd == vd->vdev_top);
 
 	/*
 	 * Evacuate the device.  We don't hold the config lock as writer
 	 * since we need to do I/O but we do keep the
 	 * spa_namespace_lock held.  Once this completes the device
 	 * should no longer have any blocks allocated on it.
 	 */
 	if (vd->vdev_islog) {
 		if (vd->vdev_stat.vs_alloc != 0)
 			error = spa_offline_log(spa);
 	} else {
 		error = ENOTSUP;
 	}
 
 	if (error)
 		return (error);
 
 	/*
 	 * The evacuation succeeded.  Remove any remaining MOS metadata
 	 * associated with this vdev, and wait for these changes to sync.
 	 */
 	ASSERT0(vd->vdev_stat.vs_alloc);
 	txg = spa_vdev_config_enter(spa);
 	vd->vdev_removing = B_TRUE;
 	vdev_dirty(vd, 0, NULL, txg);
 	vdev_config_dirty(vd);
 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
 
 	return (0);
 }
 
 /*
  * Complete the removal by cleaning up the namespace.
  */
 static void
 spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	uint64_t id = vd->vdev_id;
 	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
 	ASSERT(vd == vd->vdev_top);
 
 	/*
 	 * Only remove any devices which are empty.
 	 */
 	if (vd->vdev_stat.vs_alloc != 0)
 		return;
 
 	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
 
 	if (list_link_active(&vd->vdev_state_dirty_node))
 		vdev_state_clean(vd);
 	if (list_link_active(&vd->vdev_config_dirty_node))
 		vdev_config_clean(vd);
 
 	vdev_free(vd);
 
 	if (last_vdev) {
 		vdev_compact_children(rvd);
 	} else {
 		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
 		vdev_add_child(rvd, vd);
 	}
 	vdev_config_dirty(rvd);
 
 	/*
 	 * Reassess the health of our root vdev.
 	 */
 	vdev_reopen(rvd);
 }
 
 /*
  * Remove a device from the pool -
  *
  * Removing a device from the vdev namespace requires several steps
  * and can take a significant amount of time.  As a result we use
  * the spa_vdev_config_[enter/exit] functions which allow us to
  * grab and release the spa_config_lock while still holding the namespace
  * lock.  During each step the configuration is synced out.
  */
 
 /*
  * Remove a device from the pool.  Currently, this supports removing only hot
  * spares, slogs, and level 2 ARC devices.
  */
 int
 spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
 {
 	vdev_t *vd;
 	metaslab_group_t *mg;
 	nvlist_t **spares, **l2cache, *nv;
 	uint64_t txg = 0;
 	uint_t nspares, nl2cache;
 	int error = 0;
 	boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
 
 	ASSERT(spa_writeable(spa));
 
 	if (!locked)
 		txg = spa_vdev_enter(spa);
 
 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
 
 	if (spa->spa_spares.sav_vdevs != NULL &&
 	    nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 &&
 	    (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) {
 		/*
 		 * Only remove the hot spare if it's not currently in use
 		 * in this pool.
 		 */
 		if (vd == NULL || unspare) {
 			spa_vdev_remove_aux(spa->spa_spares.sav_config,
 			    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
 			spa_load_spares(spa);
 			spa->spa_spares.sav_sync = B_TRUE;
 		} else {
 			error = EBUSY;
 		}
 	} else if (spa->spa_l2cache.sav_vdevs != NULL &&
 	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 &&
 	    (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) {
 		/*
 		 * Cache devices can always be removed.
 		 */
 		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
 		spa_load_l2cache(spa);
 		spa->spa_l2cache.sav_sync = B_TRUE;
 	} else if (vd != NULL && vd->vdev_islog) {
 		ASSERT(!locked);
 		ASSERT(vd == vd->vdev_top);
 
 		/*
 		 * XXX - Once we have bp-rewrite this should
 		 * become the common case.
 		 */
 
 		mg = vd->vdev_mg;
 
 		/*
 		 * Stop allocating from this vdev.
 		 */
 		metaslab_group_passivate(mg);
 
 		/*
 		 * Wait for the youngest allocations and frees to sync,
 		 * and then wait for the deferral of those frees to finish.
 		 */
 		spa_vdev_config_exit(spa, NULL,
 		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
 
 		/*
 		 * Attempt to evacuate the vdev.
 		 */
 		error = spa_vdev_remove_evacuate(spa, vd);
 
 		txg = spa_vdev_config_enter(spa);
 
 		/*
 		 * If we couldn't evacuate the vdev, unwind.
 		 */
 		if (error) {
 			metaslab_group_activate(mg);
 			return (spa_vdev_exit(spa, NULL, txg, error));
 		}
 
 		/*
 		 * Clean up the vdev namespace.
 		 */
 		spa_vdev_remove_from_namespace(spa, vd);
 
 	} else if (vd != NULL) {
 		/*
 		 * Normal vdevs cannot be removed (yet).
 		 */
 		error = ENOTSUP;
 	} else {
 		/*
 		 * There is no vdev of any kind with the specified guid.
 		 */
 		error = ENOENT;
 	}
 
 	if (!locked)
 		return (spa_vdev_exit(spa, NULL, txg, error));
 
 	return (error);
 }
 
 /*
  * Find any device that's done replacing, or a vdev marked 'unspare' that's
  * current spared, so we can detach it.
  */
 static vdev_t *
 spa_vdev_resilver_done_hunt(vdev_t *vd)
 {
 	vdev_t *newvd, *oldvd;
 	int c;
 
 	for (c = 0; c < vd->vdev_children; c++) {
 		oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
 		if (oldvd != NULL)
 			return (oldvd);
 	}
 
 	/*
 	 * Check for a completed replacement.  We always consider the first
 	 * vdev in the list to be the oldest vdev, and the last one to be
 	 * the newest (see spa_vdev_attach() for how that works).  In
 	 * the case where the newest vdev is faulted, we will not automatically
 	 * remove it after a resilver completes.  This is OK as it will require
 	 * user intervention to determine which disk the admin wishes to keep.
 	 */
 	if (vd->vdev_ops == &vdev_replacing_ops) {
 		ASSERT(vd->vdev_children > 1);
 
 		newvd = vd->vdev_child[vd->vdev_children - 1];
 		oldvd = vd->vdev_child[0];
 
 		if (vdev_dtl_empty(newvd, DTL_MISSING) &&
 		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
 		    !vdev_dtl_required(oldvd))
 			return (oldvd);
 	}
 
 	/*
 	 * Check for a completed resilver with the 'unspare' flag set.
 	 */
 	if (vd->vdev_ops == &vdev_spare_ops) {
 		vdev_t *first = vd->vdev_child[0];
 		vdev_t *last = vd->vdev_child[vd->vdev_children - 1];
 
 		if (last->vdev_unspare) {
 			oldvd = first;
 			newvd = last;
 		} else if (first->vdev_unspare) {
 			oldvd = last;
 			newvd = first;
 		} else {
 			oldvd = NULL;
 		}
 
 		if (oldvd != NULL &&
 		    vdev_dtl_empty(newvd, DTL_MISSING) &&
 		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
 		    !vdev_dtl_required(oldvd))
 			return (oldvd);
 
 		/*
 		 * If there are more than two spares attached to a disk,
 		 * and those spares are not required, then we want to
 		 * attempt to free them up now so that they can be used
 		 * by other pools.  Once we're back down to a single
 		 * disk+spare, we stop removing them.
 		 */
 		if (vd->vdev_children > 2) {
 			newvd = vd->vdev_child[1];
 
 			if (newvd->vdev_isspare && last->vdev_isspare &&
 			    vdev_dtl_empty(last, DTL_MISSING) &&
 			    vdev_dtl_empty(last, DTL_OUTAGE) &&
 			    !vdev_dtl_required(newvd))
 				return (newvd);
 		}
 	}
 
 	return (NULL);
 }
 
 static void
 spa_vdev_resilver_done(spa_t *spa)
 {
 	vdev_t *vd, *pvd, *ppvd;
 	uint64_t guid, sguid, pguid, ppguid;
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
 		pvd = vd->vdev_parent;
 		ppvd = pvd->vdev_parent;
 		guid = vd->vdev_guid;
 		pguid = pvd->vdev_guid;
 		ppguid = ppvd->vdev_guid;
 		sguid = 0;
 		/*
 		 * If we have just finished replacing a hot spared device, then
 		 * we need to detach the parent's first child (the original hot
 		 * spare) as well.
 		 */
 		if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 &&
 		    ppvd->vdev_children == 2) {
 			ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
 			sguid = ppvd->vdev_child[1]->vdev_guid;
 		}
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
 			return;
 		if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0)
 			return;
 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	}
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 }
 
 /*
  * Update the stored path or FRU for this vdev.
  */
 int
 spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
     boolean_t ispath)
 {
 	vdev_t *vd;
 	boolean_t sync = B_FALSE;
 
 	ASSERT(spa_writeable(spa));
 
 	spa_vdev_state_enter(spa, SCL_ALL);
 
 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
 		return (spa_vdev_state_exit(spa, NULL, ENOENT));
 
 	if (!vd->vdev_ops->vdev_op_leaf)
 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
 
 	if (ispath) {
 		if (strcmp(value, vd->vdev_path) != 0) {
 			spa_strfree(vd->vdev_path);
 			vd->vdev_path = spa_strdup(value);
 			sync = B_TRUE;
 		}
 	} else {
 		if (vd->vdev_fru == NULL) {
 			vd->vdev_fru = spa_strdup(value);
 			sync = B_TRUE;
 		} else if (strcmp(value, vd->vdev_fru) != 0) {
 			spa_strfree(vd->vdev_fru);
 			vd->vdev_fru = spa_strdup(value);
 			sync = B_TRUE;
 		}
 	}
 
 	return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0));
 }
 
 int
 spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
 {
 	return (spa_vdev_set_common(spa, guid, newpath, B_TRUE));
 }
 
 int
 spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
 {
 	return (spa_vdev_set_common(spa, guid, newfru, B_FALSE));
 }
 
 /*
  * ==========================================================================
  * SPA Scanning
  * ==========================================================================
  */
 
 int
 spa_scan_stop(spa_t *spa)
 {
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
 	if (dsl_scan_resilvering(spa->spa_dsl_pool))
 		return (EBUSY);
 	return (dsl_scan_cancel(spa->spa_dsl_pool));
 }
 
 int
 spa_scan(spa_t *spa, pool_scan_func_t func)
 {
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
 
 	if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
 		return (ENOTSUP);
 
 	/*
 	 * If a resilver was requested, but there is no DTL on a
 	 * writeable leaf device, we have nothing to do.
 	 */
 	if (func == POOL_SCAN_RESILVER &&
 	    !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
 		return (0);
 	}
 
 	return (dsl_scan(spa->spa_dsl_pool, func));
 }
 
 /*
  * ==========================================================================
  * SPA async task processing
  * ==========================================================================
  */
 
 static void
 spa_async_remove(spa_t *spa, vdev_t *vd)
 {
 	int c;
 
 	if (vd->vdev_remove_wanted) {
 		vd->vdev_remove_wanted = B_FALSE;
 		vd->vdev_delayed_close = B_FALSE;
 		vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
 
 		/*
 		 * We want to clear the stats, but we don't want to do a full
 		 * vdev_clear() as that will cause us to throw away
 		 * degraded/faulted state as well as attempt to reopen the
 		 * device, all of which is a waste.
 		 */
 		vd->vdev_stat.vs_read_errors = 0;
 		vd->vdev_stat.vs_write_errors = 0;
 		vd->vdev_stat.vs_checksum_errors = 0;
 
 		vdev_state_dirty(vd->vdev_top);
 	}
 
 	for (c = 0; c < vd->vdev_children; c++)
 		spa_async_remove(spa, vd->vdev_child[c]);
 }
 
 static void
 spa_async_probe(spa_t *spa, vdev_t *vd)
 {
 	int c;
 
 	if (vd->vdev_probe_wanted) {
 		vd->vdev_probe_wanted = B_FALSE;
 		vdev_reopen(vd);	/* vdev_open() does the actual probe */
 	}
 
 	for (c = 0; c < vd->vdev_children; c++)
 		spa_async_probe(spa, vd->vdev_child[c]);
 }
 
 static void
 spa_async_autoexpand(spa_t *spa, vdev_t *vd)
 {
 	int c;
 
 	if (!spa->spa_autoexpand)
 		return;
 
 	for (c = 0; c < vd->vdev_children; c++) {
 		vdev_t *cvd = vd->vdev_child[c];
 		spa_async_autoexpand(spa, cvd);
 	}
 
 	if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
 		return;
 
 	spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_AUTOEXPAND);
 }
 
 static void
 spa_async_thread(spa_t *spa)
 {
 	int tasks, i;
 
 	ASSERT(spa->spa_sync_on);
 
 	mutex_enter(&spa->spa_async_lock);
 	tasks = spa->spa_async_tasks;
 	spa->spa_async_tasks = 0;
 	mutex_exit(&spa->spa_async_lock);
 
 	/*
 	 * See if the config needs to be updated.
 	 */
 	if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
 		uint64_t old_space, new_space;
 
 		mutex_enter(&spa_namespace_lock);
 		old_space = metaslab_class_get_space(spa_normal_class(spa));
 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
 		new_space = metaslab_class_get_space(spa_normal_class(spa));
 		mutex_exit(&spa_namespace_lock);
 
 		/*
 		 * If the pool grew as a result of the config update,
 		 * then log an internal history event.
 		 */
 		if (new_space != old_space) {
-			spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
-			    spa, NULL,
+			spa_history_log_internal(spa, "vdev online", NULL,
 			    "pool '%s' size: %llu(+%llu)",
 			    spa_name(spa), new_space, new_space - old_space);
 		}
 	}
 
 	/*
 	 * See if any devices need to be marked REMOVED.
 	 */
 	if (tasks & SPA_ASYNC_REMOVE) {
 		spa_vdev_state_enter(spa, SCL_NONE);
 		spa_async_remove(spa, spa->spa_root_vdev);
 		for (i = 0; i < spa->spa_l2cache.sav_count; i++)
 			spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
 		for (i = 0; i < spa->spa_spares.sav_count; i++)
 			spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
 		(void) spa_vdev_state_exit(spa, NULL, 0);
 	}
 
 	if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 		spa_async_autoexpand(spa, spa->spa_root_vdev);
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 	}
 
 	/*
 	 * See if any devices need to be probed.
 	 */
 	if (tasks & SPA_ASYNC_PROBE) {
 		spa_vdev_state_enter(spa, SCL_NONE);
 		spa_async_probe(spa, spa->spa_root_vdev);
 		(void) spa_vdev_state_exit(spa, NULL, 0);
 	}
 
 	/*
 	 * If any devices are done replacing, detach them.
 	 */
 	if (tasks & SPA_ASYNC_RESILVER_DONE)
 		spa_vdev_resilver_done(spa);
 
 	/*
 	 * Kick off a resilver.
 	 */
 	if (tasks & SPA_ASYNC_RESILVER)
 		dsl_resilver_restart(spa->spa_dsl_pool, 0);
 
 	/*
 	 * Let the world know that we're done.
 	 */
 	mutex_enter(&spa->spa_async_lock);
 	spa->spa_async_thread = NULL;
 	cv_broadcast(&spa->spa_async_cv);
 	mutex_exit(&spa->spa_async_lock);
 	thread_exit();
 }
 
 void
 spa_async_suspend(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
 	spa->spa_async_suspended++;
 	while (spa->spa_async_thread != NULL)
 		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
 	mutex_exit(&spa->spa_async_lock);
 }
 
 void
 spa_async_resume(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
 	ASSERT(spa->spa_async_suspended != 0);
 	spa->spa_async_suspended--;
 	mutex_exit(&spa->spa_async_lock);
 }
 
 static void
 spa_async_dispatch(spa_t *spa)
 {
 	mutex_enter(&spa->spa_async_lock);
 	if (spa->spa_async_tasks && !spa->spa_async_suspended &&
 	    spa->spa_async_thread == NULL &&
 	    rootdir != NULL && !vn_is_readonly(rootdir))
 		spa->spa_async_thread = thread_create(NULL, 0,
 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
 	mutex_exit(&spa->spa_async_lock);
 }
 
 void
 spa_async_request(spa_t *spa, int task)
 {
 	zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task);
 	mutex_enter(&spa->spa_async_lock);
 	spa->spa_async_tasks |= task;
 	mutex_exit(&spa->spa_async_lock);
 }
 
 /*
  * ==========================================================================
  * SPA syncing routines
  * ==========================================================================
  */
 
 static int
 bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	bpobj_t *bpo = arg;
 	bpobj_enqueue(bpo, bp, tx);
 	return (0);
 }
 
 static int
 spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 {
 	zio_t *zio = arg;
 
 	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
 	    zio->io_flags));
 	return (0);
 }
 
 static void
 spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
 {
 	char *packed = NULL;
 	size_t bufsize;
 	size_t nvsize = 0;
 	dmu_buf_t *db;
 
 	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
 
 	/*
 	 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
 	 * information.  This avoids the dbuf_will_dirty() path and
 	 * saves us a pre-read to get data we don't actually care about.
 	 */
 	bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
 	packed = vmem_alloc(bufsize, KM_PUSHPAGE);
 
 	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
 	    KM_PUSHPAGE) == 0);
 	bzero(packed + nvsize, bufsize - nvsize);
 
 	dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
 
 	vmem_free(packed, bufsize);
 
 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
 	dmu_buf_will_dirty(db, tx);
 	*(uint64_t *)db->db_data = nvsize;
 	dmu_buf_rele(db, FTAG);
 }
 
 static void
 spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
     const char *config, const char *entry)
 {
 	nvlist_t *nvroot;
 	nvlist_t **list;
 	int i;
 
 	if (!sav->sav_sync)
 		return;
 
 	/*
 	 * Update the MOS nvlist describing the list of available devices.
 	 * spa_validate_aux() will have already made sure this nvlist is
 	 * valid and the vdevs are labeled appropriately.
 	 */
 	if (sav->sav_object == 0) {
 		sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
 		    DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE,
 		    sizeof (uint64_t), tx);
 		VERIFY(zap_update(spa->spa_meta_objset,
 		    DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1,
 		    &sav->sav_object, tx) == 0);
 	}
 
 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
 	if (sav->sav_count == 0) {
 		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
 	} else {
 		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_PUSHPAGE);
 		for (i = 0; i < sav->sav_count; i++)
 			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
 			    B_FALSE, VDEV_CONFIG_L2CACHE);
 		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
 		    sav->sav_count) == 0);
 		for (i = 0; i < sav->sav_count; i++)
 			nvlist_free(list[i]);
 		kmem_free(list, sav->sav_count * sizeof (void *));
 	}
 
 	spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
 	nvlist_free(nvroot);
 
 	sav->sav_sync = B_FALSE;
 }
 
 static void
 spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
 {
 	nvlist_t *config;
 
 	if (list_is_empty(&spa->spa_config_dirty_list))
 		return;
 
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
 	config = spa_config_generate(spa, spa->spa_root_vdev,
 	    dmu_tx_get_txg(tx), B_FALSE);
 
 	/*
 	 * If we're upgrading the spa version then make sure that
 	 * the config object gets updated with the correct version.
 	 */
 	if (spa->spa_ubsync.ub_version < spa->spa_uberblock.ub_version)
 		fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
 		    spa->spa_uberblock.ub_version);
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	if (spa->spa_config_syncing)
 		nvlist_free(spa->spa_config_syncing);
 	spa->spa_config_syncing = config;
 
 	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
 }
 
 static void
 spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	uint64_t version = *(uint64_t *)arg2;
 
 	/*
 	 * Setting the version is special cased when first creating the pool.
 	 */
 	ASSERT(tx->tx_txg != TXG_INITIAL);
 
 	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 	ASSERT(version >= spa_version(spa));
 
 	spa->spa_uberblock.ub_version = version;
 	vdev_config_dirty(spa->spa_root_vdev);
+	spa_history_log_internal(spa, "set", tx, "version=%lld", version);
 }
 
 /*
  * Set zpool properties.
  */
 static void
 spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	objset_t *mos = spa->spa_meta_objset;
 	nvlist_t *nvp = arg2;
 	nvpair_t *elem = NULL;
 
 	mutex_enter(&spa->spa_props_lock);
 
 	while ((elem = nvlist_next_nvpair(nvp, elem))) {
 		uint64_t intval;
 		char *strval, *fname;
 		zpool_prop_t prop;
 		const char *propname;
 		zprop_type_t proptype;
 		zfeature_info_t *feature;
 
 		prop = zpool_name_to_prop(nvpair_name(elem));
 		switch ((int)prop) {
 		case ZPROP_INVAL:
 			/*
 			 * We checked this earlier in spa_prop_validate().
 			 */
 			ASSERT(zpool_prop_feature(nvpair_name(elem)));
 
 			fname = strchr(nvpair_name(elem), '@') + 1;
 			VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
 
 			spa_feature_enable(spa, feature, tx);
+			spa_history_log_internal(spa, "set", tx,
+			    "%s=enabled", nvpair_name(elem));
 			break;
 
 		case ZPOOL_PROP_VERSION:
 			VERIFY(nvpair_value_uint64(elem, &intval) == 0);
 			/*
 			 * The version is synced seperatly before other
 			 * properties and should be correct by now.
 			 */
 			ASSERT3U(spa_version(spa), >=, intval);
 			break;
 
 		case ZPOOL_PROP_ALTROOT:
 			/*
 			 * 'altroot' is a non-persistent property. It should
 			 * have been set temporarily at creation or import time.
 			 */
 			ASSERT(spa->spa_root != NULL);
 			break;
 
 		case ZPOOL_PROP_READONLY:
 		case ZPOOL_PROP_CACHEFILE:
 			/*
 			 * 'readonly' and 'cachefile' are also non-persisitent
 			 * properties.
 			 */
 			break;
 		case ZPOOL_PROP_COMMENT:
 			VERIFY(nvpair_value_string(elem, &strval) == 0);
 			if (spa->spa_comment != NULL)
 				spa_strfree(spa->spa_comment);
 			spa->spa_comment = spa_strdup(strval);
 			/*
 			 * We need to dirty the configuration on all the vdevs
 			 * so that their labels get updated.  It's unnecessary
 			 * to do this for pool creation since the vdev's
 			 * configuratoin has already been dirtied.
 			 */
 			if (tx->tx_txg != TXG_INITIAL)
 				vdev_config_dirty(spa->spa_root_vdev);
+			spa_history_log_internal(spa, "set", tx,
+			    "%s=%s", nvpair_name(elem), strval);
 			break;
 		default:
 			/*
 			 * Set pool property values in the poolprops mos object.
 			 */
 			if (spa->spa_pool_props_object == 0) {
 				spa->spa_pool_props_object =
 				    zap_create_link(mos, DMU_OT_POOL_PROPS,
 				    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
 				    tx);
 			}
 
 			/* normalize the property name */
 			propname = zpool_prop_to_name(prop);
 			proptype = zpool_prop_get_type(prop);
 
 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
 				ASSERT(proptype == PROP_TYPE_STRING);
 				VERIFY(nvpair_value_string(elem, &strval) == 0);
 				VERIFY(zap_update(mos,
 				    spa->spa_pool_props_object, propname,
 				    1, strlen(strval) + 1, strval, tx) == 0);
-
+				spa_history_log_internal(spa, "set", tx,
+				    "%s=%s", nvpair_name(elem), strval);
 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
 
 				if (proptype == PROP_TYPE_INDEX) {
 					const char *unused;
 					VERIFY(zpool_prop_index_to_string(
 					    prop, intval, &unused) == 0);
 				}
 				VERIFY(zap_update(mos,
 				    spa->spa_pool_props_object, propname,
 				    8, 1, &intval, tx) == 0);
+				spa_history_log_internal(spa, "set", tx,
+				    "%s=%lld", nvpair_name(elem), intval);
 			} else {
 				ASSERT(0); /* not allowed */
 			}
 
 			switch (prop) {
 			case ZPOOL_PROP_DELEGATION:
 				spa->spa_delegation = intval;
 				break;
 			case ZPOOL_PROP_BOOTFS:
 				spa->spa_bootfs = intval;
 				break;
 			case ZPOOL_PROP_FAILUREMODE:
 				spa->spa_failmode = intval;
 				break;
 			case ZPOOL_PROP_AUTOEXPAND:
 				spa->spa_autoexpand = intval;
 				if (tx->tx_txg != TXG_INITIAL)
 					spa_async_request(spa,
 					    SPA_ASYNC_AUTOEXPAND);
 				break;
 			case ZPOOL_PROP_DEDUPDITTO:
 				spa->spa_dedup_ditto = intval;
 				break;
 			default:
 				break;
 			}
 		}
 
-		/* log internal history if this is not a zpool create */
-		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
-		    tx->tx_txg != TXG_INITIAL) {
-			spa_history_log_internal(LOG_POOL_PROPSET,
-			    spa, tx, "%s %lld %s",
-			    nvpair_name(elem), intval, spa_name(spa));
-		}
 	}
 
 	mutex_exit(&spa->spa_props_lock);
 }
 
 /*
  * Perform one-time upgrade on-disk changes.  spa_version() does not
  * reflect the new version this txg, so there must be no changes this
  * txg to anything that the upgrade code depends on after it executes.
  * Therefore this must be called after dsl_pool_sync() does the sync
  * tasks.
  */
 static void
 spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = spa->spa_dsl_pool;
 
 	ASSERT(spa->spa_sync_pass == 1);
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
 		dsl_pool_create_origin(dp, tx);
 
 		/* Keeping the origin open increases spa_minref */
 		spa->spa_minref += 3;
 	}
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) {
 		dsl_pool_upgrade_clones(dp, tx);
 	}
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) {
 		dsl_pool_upgrade_dir_clones(dp, tx);
 
 		/* Keeping the freedir open increases spa_minref */
 		spa->spa_minref += 3;
 	}
 
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
 		spa_feature_create_zap_objects(spa, tx);
 	}
 }
 
 /*
  * Sync the specified transaction group.  New blocks may be dirtied as
  * part of the process, so we iterate until it converges.
  */
 void
 spa_sync(spa_t *spa, uint64_t txg)
 {
 	dsl_pool_t *dp = spa->spa_dsl_pool;
 	objset_t *mos = spa->spa_meta_objset;
 	bpobj_t *defer_bpo = &spa->spa_deferred_bpobj;
 	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *vd;
 	dmu_tx_t *tx;
 	int error;
 	int c;
 
 	VERIFY(spa_writeable(spa));
 
 	/*
 	 * Lock out configuration changes.
 	 */
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 
 	spa->spa_syncing_txg = txg;
 	spa->spa_sync_pass = 0;
 
 	/*
 	 * If there are any pending vdev state changes, convert them
 	 * into config changes that go out with this transaction group.
 	 */
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 	while (list_head(&spa->spa_state_dirty_list) != NULL) {
 		/*
 		 * We need the write lock here because, for aux vdevs,
 		 * calling vdev_config_dirty() modifies sav_config.
 		 * This is ugly and will become unnecessary when we
 		 * eliminate the aux vdev wart by integrating all vdevs
 		 * into the root vdev tree.
 		 */
 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
 		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
 			vdev_state_clean(vd);
 			vdev_config_dirty(vd);
 		}
 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
 	}
 	spa_config_exit(spa, SCL_STATE, FTAG);
 
 	tx = dmu_tx_create_assigned(dp, txg);
 
 	spa->spa_sync_starttime = gethrtime();
 	taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
 	spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
 	    spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
 	    NSEC_TO_TICK(spa->spa_deadman_synctime));
 
 	/*
 	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
 	 * set spa_deflate if we have no raid-z vdevs.
 	 */
 	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
 	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
 		int i;
 
 		for (i = 0; i < rvd->vdev_children; i++) {
 			vd = rvd->vdev_child[i];
 			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
 				break;
 		}
 		if (i == rvd->vdev_children) {
 			spa->spa_deflate = TRUE;
 			VERIFY(0 == zap_add(spa->spa_meta_objset,
 			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
 			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
 		}
 	}
 
 	/*
 	 * If anything has changed in this txg, or if someone is waiting
 	 * for this txg to sync (eg, spa_vdev_remove()), push the
 	 * deferred frees from the previous txg.  If not, leave them
 	 * alone so that we don't generate work on an otherwise idle
 	 * system.
 	 */
 	if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
 	    !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
 	    !txg_list_empty(&dp->dp_sync_tasks, txg) ||
 	    ((dsl_scan_active(dp->dp_scan) ||
 	    txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
 		zio_t *zio = zio_root(spa, NULL, NULL, 0);
 		VERIFY3U(bpobj_iterate(defer_bpo,
 		    spa_free_sync_cb, zio, tx), ==, 0);
 		VERIFY0(zio_wait(zio));
 	}
 
 	/*
 	 * Iterate to convergence.
 	 */
 	do {
 		int pass = ++spa->spa_sync_pass;
 
 		spa_sync_config_object(spa, tx);
 		spa_sync_aux_dev(spa, &spa->spa_spares, tx,
 		    ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
 		spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
 		    ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
 		spa_errlog_sync(spa, txg);
 		dsl_pool_sync(dp, txg);
 
 		if (pass < zfs_sync_pass_deferred_free) {
 			zio_t *zio = zio_root(spa, NULL, NULL, 0);
 			bplist_iterate(free_bpl, spa_free_sync_cb,
 			    zio, tx);
 			VERIFY(zio_wait(zio) == 0);
 		} else {
 			bplist_iterate(free_bpl, bpobj_enqueue_cb,
 			    defer_bpo, tx);
 		}
 
 		ddt_sync(spa, txg);
 		dsl_scan_sync(dp, tx);
 
 		while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, txg)))
 			vdev_sync(vd, txg);
 
 		if (pass == 1)
 			spa_sync_upgrades(spa, tx);
 
 	} while (dmu_objset_is_dirty(mos, txg));
 
 	/*
 	 * Rewrite the vdev configuration (which includes the uberblock)
 	 * to commit the transaction group.
 	 *
 	 * If there are no dirty vdevs, we sync the uberblock to a few
 	 * random top-level vdevs that are known to be visible in the
 	 * config cache (see spa_vdev_add() for a complete description).
 	 * If there *are* dirty vdevs, sync the uberblock to all vdevs.
 	 */
 	for (;;) {
 		/*
 		 * We hold SCL_STATE to prevent vdev open/close/etc.
 		 * while we're attempting to write the vdev labels.
 		 */
 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
 		if (list_is_empty(&spa->spa_config_dirty_list)) {
 			vdev_t *svd[SPA_DVAS_PER_BP];
 			int svdcount = 0;
 			int children = rvd->vdev_children;
 			int c0 = spa_get_random(children);
 
 			for (c = 0; c < children; c++) {
 				vd = rvd->vdev_child[(c0 + c) % children];
 				if (vd->vdev_ms_array == 0 || vd->vdev_islog)
 					continue;
 				svd[svdcount++] = vd;
 				if (svdcount == SPA_DVAS_PER_BP)
 					break;
 			}
 			error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
 			if (error != 0)
 				error = vdev_config_sync(svd, svdcount, txg,
 				    B_TRUE);
 		} else {
 			error = vdev_config_sync(rvd->vdev_child,
 			    rvd->vdev_children, txg, B_FALSE);
 			if (error != 0)
 				error = vdev_config_sync(rvd->vdev_child,
 				    rvd->vdev_children, txg, B_TRUE);
 		}
 
 		if (error == 0)
 			spa->spa_last_synced_guid = rvd->vdev_guid;
 
 		spa_config_exit(spa, SCL_STATE, FTAG);
 
 		if (error == 0)
 			break;
 		zio_suspend(spa, NULL);
 		zio_resume_wait(spa);
 	}
 	dmu_tx_commit(tx);
 
 	taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
 	spa->spa_deadman_tqid = 0;
 
 	/*
 	 * Clear the dirty config list.
 	 */
 	while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL)
 		vdev_config_clean(vd);
 
 	/*
 	 * Now that the new config has synced transactionally,
 	 * let it become visible to the config cache.
 	 */
 	if (spa->spa_config_syncing != NULL) {
 		spa_config_set(spa, spa->spa_config_syncing);
 		spa->spa_config_txg = txg;
 		spa->spa_config_syncing = NULL;
 	}
 
 	spa->spa_ubsync = spa->spa_uberblock;
 
 	dsl_pool_sync_done(dp, txg);
 
 	/*
 	 * Update usable space statistics.
 	 */
 	while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))))
 		vdev_sync_done(vd, txg);
 
 	spa_update_dspace(spa);
 
 	/*
 	 * It had better be the case that we didn't dirty anything
 	 * since vdev_config_sync().
 	 */
 	ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
 	ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
 	ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
 
 	spa->spa_sync_pass = 0;
 
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 
 	spa_handle_ignored_writes(spa);
 
 	/*
 	 * If any async tasks have been requested, kick them off.
 	 */
 	spa_async_dispatch(spa);
 }
 
 /*
  * Sync all pools.  We don't want to hold the namespace lock across these
  * operations, so we take a reference on the spa_t and drop the lock during the
  * sync.
  */
 void
 spa_sync_allpools(void)
 {
 	spa_t *spa = NULL;
 	mutex_enter(&spa_namespace_lock);
 	while ((spa = spa_next(spa)) != NULL) {
 		if (spa_state(spa) != POOL_STATE_ACTIVE ||
 		    !spa_writeable(spa) || spa_suspended(spa))
 			continue;
 		spa_open_ref(spa, FTAG);
 		mutex_exit(&spa_namespace_lock);
 		txg_wait_synced(spa_get_dsl(spa), 0);
 		mutex_enter(&spa_namespace_lock);
 		spa_close(spa, FTAG);
 	}
 	mutex_exit(&spa_namespace_lock);
 }
 
 /*
  * ==========================================================================
  * Miscellaneous routines
  * ==========================================================================
  */
 
 /*
  * Remove all pools in the system.
  */
 void
 spa_evict_all(void)
 {
 	spa_t *spa;
 
 	/*
 	 * Remove all cached state.  All pools should be closed now,
 	 * so every spa in the AVL tree should be unreferenced.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	while ((spa = spa_next(NULL)) != NULL) {
 		/*
 		 * Stop async tasks.  The async thread may need to detach
 		 * a device that's been replaced, which requires grabbing
 		 * spa_namespace_lock, so we must drop it here.
 		 */
 		spa_open_ref(spa, FTAG);
 		mutex_exit(&spa_namespace_lock);
 		spa_async_suspend(spa);
 		mutex_enter(&spa_namespace_lock);
 		spa_close(spa, FTAG);
 
 		if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
 			spa_unload(spa);
 			spa_deactivate(spa);
 		}
 		spa_remove(spa);
 	}
 	mutex_exit(&spa_namespace_lock);
 }
 
 vdev_t *
 spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
 {
 	vdev_t *vd;
 	int i;
 
 	if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
 		return (vd);
 
 	if (aux) {
 		for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
 			vd = spa->spa_l2cache.sav_vdevs[i];
 			if (vd->vdev_guid == guid)
 				return (vd);
 		}
 
 		for (i = 0; i < spa->spa_spares.sav_count; i++) {
 			vd = spa->spa_spares.sav_vdevs[i];
 			if (vd->vdev_guid == guid)
 				return (vd);
 		}
 	}
 
 	return (NULL);
 }
 
 void
 spa_upgrade(spa_t *spa, uint64_t version)
 {
 	ASSERT(spa_writeable(spa));
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	/*
 	 * This should only be called for a non-faulted pool, and since a
 	 * future version would result in an unopenable pool, this shouldn't be
 	 * possible.
 	 */
 	ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version));
 	ASSERT(version >= spa->spa_uberblock.ub_version);
 
 	spa->spa_uberblock.ub_version = version;
 	vdev_config_dirty(spa->spa_root_vdev);
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 }
 
 boolean_t
 spa_has_spare(spa_t *spa, uint64_t guid)
 {
 	int i;
 	uint64_t spareguid;
 	spa_aux_vdev_t *sav = &spa->spa_spares;
 
 	for (i = 0; i < sav->sav_count; i++)
 		if (sav->sav_vdevs[i]->vdev_guid == guid)
 			return (B_TRUE);
 
 	for (i = 0; i < sav->sav_npending; i++) {
 		if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
 		    &spareguid) == 0 && spareguid == guid)
 			return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 /*
  * Check if a pool has an active shared spare device.
  * Note: reference count of an active spare is 2, as a spare and as a replace
  */
 static boolean_t
 spa_has_active_shared_spare(spa_t *spa)
 {
 	int i, refcnt;
 	uint64_t pool;
 	spa_aux_vdev_t *sav = &spa->spa_spares;
 
 	for (i = 0; i < sav->sav_count; i++) {
 		if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool,
 		    &refcnt) && pool != 0ULL && pool == spa_guid(spa) &&
 		    refcnt > 2)
 			return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 /*
  * Post a FM_EREPORT_ZFS_* event from sys/fm/fs/zfs.h.  The payload will be
  * filled in from the spa and (optionally) the vdev.  This doesn't do anything
  * in the userland libzpool, as we don't want consumers to misinterpret ztest
  * or zdb as real changes.
  */
 void
 spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
 {
 #ifdef _KERNEL
 	zfs_ereport_post(name, spa, vd, NULL, 0, 0);
 #endif
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 /* state manipulation functions */
 EXPORT_SYMBOL(spa_open);
 EXPORT_SYMBOL(spa_open_rewind);
 EXPORT_SYMBOL(spa_get_stats);
 EXPORT_SYMBOL(spa_create);
 EXPORT_SYMBOL(spa_import_rootpool);
 EXPORT_SYMBOL(spa_import);
 EXPORT_SYMBOL(spa_tryimport);
 EXPORT_SYMBOL(spa_destroy);
 EXPORT_SYMBOL(spa_export);
 EXPORT_SYMBOL(spa_reset);
 EXPORT_SYMBOL(spa_async_request);
 EXPORT_SYMBOL(spa_async_suspend);
 EXPORT_SYMBOL(spa_async_resume);
 EXPORT_SYMBOL(spa_inject_addref);
 EXPORT_SYMBOL(spa_inject_delref);
 EXPORT_SYMBOL(spa_scan_stat_init);
 EXPORT_SYMBOL(spa_scan_get_stats);
 
 /* device maniion */
 EXPORT_SYMBOL(spa_vdev_add);
 EXPORT_SYMBOL(spa_vdev_attach);
 EXPORT_SYMBOL(spa_vdev_detach);
 EXPORT_SYMBOL(spa_vdev_remove);
 EXPORT_SYMBOL(spa_vdev_setpath);
 EXPORT_SYMBOL(spa_vdev_setfru);
 EXPORT_SYMBOL(spa_vdev_split_mirror);
 
 /* spare statech is global across all pools) */
 EXPORT_SYMBOL(spa_spare_add);
 EXPORT_SYMBOL(spa_spare_remove);
 EXPORT_SYMBOL(spa_spare_exists);
 EXPORT_SYMBOL(spa_spare_activate);
 
 /* L2ARC statech is global across all pools) */
 EXPORT_SYMBOL(spa_l2cache_add);
 EXPORT_SYMBOL(spa_l2cache_remove);
 EXPORT_SYMBOL(spa_l2cache_exists);
 EXPORT_SYMBOL(spa_l2cache_activate);
 EXPORT_SYMBOL(spa_l2cache_drop);
 
 /* scanning */
 EXPORT_SYMBOL(spa_scan);
 EXPORT_SYMBOL(spa_scan_stop);
 
 /* spa syncing */
 EXPORT_SYMBOL(spa_sync); /* only for DMU use */
 EXPORT_SYMBOL(spa_sync_allpools);
 
 /* properties */
 EXPORT_SYMBOL(spa_prop_set);
 EXPORT_SYMBOL(spa_prop_get);
 EXPORT_SYMBOL(spa_prop_clear_bootfs);
 
 /* asynchronous event notification */
 EXPORT_SYMBOL(spa_event_notify);
 #endif
diff --git a/module/zfs/spa_history.c b/module/zfs/spa_history.c
index 9fb75f391b9a..79d48620c9e1 100644
--- a/module/zfs/spa_history.c
+++ b/module/zfs/spa_history.c
@@ -1,514 +1,566 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/zap.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dmu_tx.h>
 #include <sys/dmu_objset.h>
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
 #include <sys/utsname.h>
 #include <sys/cmn_err.h>
 #include <sys/sunddi.h>
+#include <sys/cred.h>
 #include "zfs_comutil.h"
 #ifdef _KERNEL
 #include <sys/zone.h>
 #endif
 
 /*
  * Routines to manage the on-disk history log.
  *
  * The history log is stored as a dmu object containing
  * <packed record length, record nvlist> tuples.
  *
  * Where "record nvlist" is a nvlist containing uint64_ts and strings, and
  * "packed record length" is the packed length of the "record nvlist" stored
  * as a little endian uint64_t.
  *
  * The log is implemented as a ring buffer, though the original creation
  * of the pool ('zpool create') is never overwritten.
  *
  * The history log is tracked as object 'spa_t::spa_history'.  The bonus buffer
  * of 'spa_history' stores the offsets for logging/retrieving history as
  * 'spa_history_phys_t'.  'sh_pool_create_len' is the ending offset in bytes of
  * where the 'zpool create' record is stored.  This allows us to never
  * overwrite the original creation of the pool.  'sh_phys_max_off' is the
  * physical ending offset in bytes of the log.  This tells you the length of
  * the buffer. 'sh_eof' is the logical EOF (in bytes).  Whenever a record
  * is added, 'sh_eof' is incremented by the the size of the record.
  * 'sh_eof' is never decremented.  'sh_bof' is the logical BOF (in bytes).
  * This is where the consumer should start reading from after reading in
  * the 'zpool create' portion of the log.
  *
  * 'sh_records_lost' keeps track of how many records have been overwritten
  * and permanently lost.
  */
 
 /* convert a logical offset to physical */
 static uint64_t
 spa_history_log_to_phys(uint64_t log_off, spa_history_phys_t *shpp)
 {
 	uint64_t phys_len;
 
 	phys_len = shpp->sh_phys_max_off - shpp->sh_pool_create_len;
 	return ((log_off - shpp->sh_pool_create_len) % phys_len
 	    + shpp->sh_pool_create_len);
 }
 
 void
 spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
 {
 	dmu_buf_t *dbp;
 	spa_history_phys_t *shpp;
 	objset_t *mos = spa->spa_meta_objset;
 
 	ASSERT(spa->spa_history == 0);
 	spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY,
 	    SPA_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS,
 	    sizeof (spa_history_phys_t), tx);
 
 	VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
 	    DMU_POOL_HISTORY, sizeof (uint64_t), 1,
 	    &spa->spa_history, tx) == 0);
 
 	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
 	ASSERT(dbp->db_size >= sizeof (spa_history_phys_t));
 
 	shpp = dbp->db_data;
 	dmu_buf_will_dirty(dbp, tx);
 
 	/*
 	 * Figure out maximum size of history log.  We set it at
 	 * 0.1% of pool size, with a max of 1G and min of 128KB.
 	 */
 	shpp->sh_phys_max_off =
 	    metaslab_class_get_dspace(spa_normal_class(spa)) / 1000;
 	shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30);
 	shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
 
 	dmu_buf_rele(dbp, FTAG);
 }
 
 /*
  * Change 'sh_bof' to the beginning of the next record.
  */
 static int
 spa_history_advance_bof(spa_t *spa, spa_history_phys_t *shpp)
 {
 	objset_t *mos = spa->spa_meta_objset;
 	uint64_t firstread, reclen, phys_bof;
 	char buf[sizeof (reclen)];
 	int err;
 
 	phys_bof = spa_history_log_to_phys(shpp->sh_bof, shpp);
 	firstread = MIN(sizeof (reclen), shpp->sh_phys_max_off - phys_bof);
 
 	if ((err = dmu_read(mos, spa->spa_history, phys_bof, firstread,
 	    buf, DMU_READ_PREFETCH)) != 0)
 		return (err);
 	if (firstread != sizeof (reclen)) {
 		if ((err = dmu_read(mos, spa->spa_history,
 		    shpp->sh_pool_create_len, sizeof (reclen) - firstread,
 		    buf + firstread, DMU_READ_PREFETCH)) != 0)
 			return (err);
 	}
 
 	reclen = LE_64(*((uint64_t *)buf));
 	shpp->sh_bof += reclen + sizeof (reclen);
 	shpp->sh_records_lost++;
 	return (0);
 }
 
 static int
 spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
     dmu_tx_t *tx)
 {
 	uint64_t firstwrite, phys_eof;
 	objset_t *mos = spa->spa_meta_objset;
 	int err;
 
 	ASSERT(MUTEX_HELD(&spa->spa_history_lock));
 
 	/* see if we need to reset logical BOF */
 	while (shpp->sh_phys_max_off - shpp->sh_pool_create_len -
 	    (shpp->sh_eof - shpp->sh_bof) <= len) {
 		if ((err = spa_history_advance_bof(spa, shpp)) != 0) {
 			return (err);
 		}
 	}
 
 	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
 	firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof);
 	shpp->sh_eof += len;
 	dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx);
 
 	len -= firstwrite;
 	if (len > 0) {
 		/* write out the rest at the beginning of physical file */
 		dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len,
 		    len, (char *)buf + firstwrite, tx);
 	}
 
 	return (0);
 }
 
 static char *
 spa_history_zone(void)
 {
 #ifdef _KERNEL
 #ifdef HAVE_SPL
 	return ("linux");
 #else
 	return (curproc->p_zone->zone_name);
 #endif
 #else
-	return ("global");
+	return (NULL);
 #endif
 }
 
 /*
  * Write out a history event.
  */
 /*ARGSUSED*/
 static void
 spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t		*spa = arg1;
-	history_arg_t	*hap = arg2;
-	const char	*history_str = hap->ha_history_str;
+	nvlist_t	*nvl = arg2;
 	objset_t	*mos = spa->spa_meta_objset;
 	dmu_buf_t	*dbp;
 	spa_history_phys_t *shpp;
 	size_t		reclen;
 	uint64_t	le_len;
-	nvlist_t	*nvrecord;
 	char		*record_packed = NULL;
 	int		ret;
 
 	/*
 	 * If we have an older pool that doesn't have a command
 	 * history object, create it now.
 	 */
 	mutex_enter(&spa->spa_history_lock);
 	if (!spa->spa_history)
 		spa_history_create_obj(spa, tx);
 	mutex_exit(&spa->spa_history_lock);
 
 	/*
 	 * Get the offset of where we need to write via the bonus buffer.
 	 * Update the offset when the write completes.
 	 */
 	VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
 	shpp = dbp->db_data;
 
 	dmu_buf_will_dirty(dbp, tx);
 
 #ifdef ZFS_DEBUG
 	{
 		dmu_object_info_t doi;
 		dmu_object_info_from_db(dbp, &doi);
 		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
 	}
 #endif
 
-	VERIFY(nvlist_alloc(&nvrecord, NV_UNIQUE_NAME, KM_PUSHPAGE) == 0);
-	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TIME,
-	    gethrestime_sec()) == 0);
-	VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_WHO, hap->ha_uid) == 0);
-	if (hap->ha_zone != NULL)
-		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_ZONE,
-		    hap->ha_zone) == 0);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_TIME, gethrestime_sec());
 #ifdef _KERNEL
-	VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_HOST,
-	    utsname.nodename) == 0);
+	fnvlist_add_string(nvl, ZPOOL_HIST_HOST, utsname.nodename);
 #endif
-	if (hap->ha_log_type == LOG_CMD_POOL_CREATE ||
-	    hap->ha_log_type == LOG_CMD_NORMAL) {
-		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_CMD,
-		    history_str) == 0);
-
-		zfs_dbgmsg("command: %s", history_str);
-	} else {
-		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_INT_EVENT,
-		    hap->ha_event) == 0);
-		VERIFY(nvlist_add_uint64(nvrecord, ZPOOL_HIST_TXG,
-		    tx->tx_txg) == 0);
-		VERIFY(nvlist_add_string(nvrecord, ZPOOL_HIST_INT_STR,
-		    history_str) == 0);
-
-		zfs_dbgmsg("internal %s pool:%s txg:%llu %s",
-		    zfs_history_event_names[hap->ha_event], spa_name(spa),
-		    (longlong_t)tx->tx_txg, history_str);
-
+	if (nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
+		zfs_dbgmsg("command: %s",
+		    fnvlist_lookup_string(nvl, ZPOOL_HIST_CMD));
+	} else if (nvlist_exists(nvl, ZPOOL_HIST_INT_NAME)) {
+		if (nvlist_exists(nvl, ZPOOL_HIST_DSNAME)) {
+			zfs_dbgmsg("txg %lld %s %s (id %llu) %s",
+			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME),
+			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
+		} else {
+			zfs_dbgmsg("txg %lld %s %s",
+			    fnvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
+			    fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
+		}
+	} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
+		zfs_dbgmsg("ioctl %s",
+		    fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
 	}
 
-	VERIFY(nvlist_size(nvrecord, &reclen, NV_ENCODE_XDR) == 0);
-	record_packed = kmem_alloc(reclen, KM_PUSHPAGE);
-
-	VERIFY(nvlist_pack(nvrecord, &record_packed, &reclen,
-	    NV_ENCODE_XDR, KM_PUSHPAGE) == 0);
+	VERIFY3U(nvlist_pack(nvl, &record_packed, &reclen, NV_ENCODE_NATIVE,
+	    KM_PUSHPAGE), ==, 0);
 
 	mutex_enter(&spa->spa_history_lock);
-	if (hap->ha_log_type == LOG_CMD_POOL_CREATE)
-		VERIFY(shpp->sh_eof == shpp->sh_pool_create_len);
 
 	/* write out the packed length as little endian */
 	le_len = LE_64((uint64_t)reclen);
 	ret = spa_history_write(spa, &le_len, sizeof (le_len), shpp, tx);
 	if (!ret)
 		ret = spa_history_write(spa, record_packed, reclen, shpp, tx);
 
-	if (!ret && hap->ha_log_type == LOG_CMD_POOL_CREATE) {
-		shpp->sh_pool_create_len += sizeof (le_len) + reclen;
-		shpp->sh_bof = shpp->sh_pool_create_len;
+	/* The first command is the create, which we keep forever */
+	if (ret == 0 && shpp->sh_pool_create_len == 0 &&
+	    nvlist_exists(nvl, ZPOOL_HIST_CMD)) {
+		shpp->sh_pool_create_len = shpp->sh_bof = shpp->sh_eof;
 	}
 
 	mutex_exit(&spa->spa_history_lock);
-	nvlist_free(nvrecord);
-	kmem_free(record_packed, reclen);
+	fnvlist_pack_free(record_packed, reclen);
 	dmu_buf_rele(dbp, FTAG);
-
-	strfree(hap->ha_history_str);
-	if (hap->ha_zone != NULL)
-		strfree(hap->ha_zone);
-	kmem_free(hap, sizeof (history_arg_t));
+	fnvlist_free(nvl);
 }
 
 /*
  * Write out a history event.
  */
 int
-spa_history_log(spa_t *spa, const char *history_str, history_log_type_t what)
+spa_history_log(spa_t *spa, const char *msg)
+{
+	int err;
+	nvlist_t *nvl;
+
+        VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE));
+
+	fnvlist_add_string(nvl, ZPOOL_HIST_CMD, msg);
+	err = spa_history_log_nvl(spa, nvl);
+	fnvlist_free(nvl);
+	return (err);
+}
+
+int
+spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
 {
-	history_arg_t *ha;
 	int err = 0;
 	dmu_tx_t *tx;
+	nvlist_t *nvarg;
 
-	ASSERT(what != LOG_INTERNAL);
+	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa))
+		return (EINVAL);
 
 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 
-	ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE);
-	ha->ha_history_str = strdup(history_str);
-	ha->ha_zone = strdup(spa_history_zone());
-	ha->ha_log_type = what;
-	ha->ha_uid = crgetuid(CRED());
+	VERIFY0(nvlist_dup(nvl, &nvarg, KM_PUSHPAGE));
+	if (spa_history_zone() != NULL) {
+		fnvlist_add_string(nvarg, ZPOOL_HIST_ZONE,
+		    spa_history_zone());
+	}
+	fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
 
 	/* Kick this off asynchronously; errors are ignored. */
 	dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
-	    spa_history_log_sync, spa, ha, 0, tx);
+	    spa_history_log_sync, spa, nvarg, 0, tx);
 	dmu_tx_commit(tx);
 
-	/* spa_history_log_sync will free ha and strings */
+	/* spa_history_log_sync will free nvl */
 	return (err);
+
 }
 
 /*
  * Read out the command history.
  */
 int
 spa_history_get(spa_t *spa, uint64_t *offp, uint64_t *len, char *buf)
 {
 	objset_t *mos = spa->spa_meta_objset;
 	dmu_buf_t *dbp;
 	uint64_t read_len, phys_read_off, phys_eof;
 	uint64_t leftover = 0;
 	spa_history_phys_t *shpp;
 	int err;
 
 	/*
-	 * If the command history  doesn't exist (older pool),
+	 * If the command history doesn't exist (older pool),
 	 * that's ok, just return ENOENT.
 	 */
 	if (!spa->spa_history)
 		return (ENOENT);
 
 	/*
 	 * The history is logged asynchronously, so when they request
 	 * the first chunk of history, make sure everything has been
 	 * synced to disk so that we get it.
 	 */
 	if (*offp == 0 && spa_writeable(spa))
 		txg_wait_synced(spa_get_dsl(spa), 0);
 
 	if ((err = dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp)) != 0)
 		return (err);
 	shpp = dbp->db_data;
 
 #ifdef ZFS_DEBUG
 	{
 		dmu_object_info_t doi;
 		dmu_object_info_from_db(dbp, &doi);
 		ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_SPA_HISTORY_OFFSETS);
 	}
 #endif
 
 	mutex_enter(&spa->spa_history_lock);
 	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
 
 	if (*offp < shpp->sh_pool_create_len) {
 		/* read in just the zpool create history */
 		phys_read_off = *offp;
 		read_len = MIN(*len, shpp->sh_pool_create_len -
 		    phys_read_off);
 	} else {
 		/*
 		 * Need to reset passed in offset to BOF if the passed in
 		 * offset has since been overwritten.
 		 */
 		*offp = MAX(*offp, shpp->sh_bof);
 		phys_read_off = spa_history_log_to_phys(*offp, shpp);
 
 		/*
 		 * Read up to the minimum of what the user passed down or
 		 * the EOF (physical or logical).  If we hit physical EOF,
 		 * use 'leftover' to read from the physical BOF.
 		 */
 		if (phys_read_off <= phys_eof) {
 			read_len = MIN(*len, phys_eof - phys_read_off);
 		} else {
 			read_len = MIN(*len,
 			    shpp->sh_phys_max_off - phys_read_off);
 			if (phys_read_off + *len > shpp->sh_phys_max_off) {
 				leftover = MIN(*len - read_len,
 				    phys_eof - shpp->sh_pool_create_len);
 			}
 		}
 	}
 
 	/* offset for consumer to use next */
 	*offp += read_len + leftover;
 
 	/* tell the consumer how much you actually read */
 	*len = read_len + leftover;
 
 	if (read_len == 0) {
 		mutex_exit(&spa->spa_history_lock);
 		dmu_buf_rele(dbp, FTAG);
 		return (0);
 	}
 
 	err = dmu_read(mos, spa->spa_history, phys_read_off, read_len, buf,
 	    DMU_READ_PREFETCH);
 	if (leftover && err == 0) {
 		err = dmu_read(mos, spa->spa_history, shpp->sh_pool_create_len,
 		    leftover, buf + read_len, DMU_READ_PREFETCH);
 	}
 	mutex_exit(&spa->spa_history_lock);
 
 	dmu_buf_rele(dbp, FTAG);
 	return (err);
 }
 
+/*
+ * The nvlist will be consumed by this call.
+ */
 static void
-log_internal(history_internal_events_t event, spa_t *spa,
+log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
     dmu_tx_t *tx, const char *fmt, va_list adx)
 {
-	history_arg_t *ha;
-	va_list adx_copy;
+	char *msg;
+	va_list adx1;
 
 	/*
 	 * If this is part of creating a pool, not everything is
 	 * initialized yet, so don't bother logging the internal events.
+	 * Likewise if the pool is not writeable.
 	 */
-	if (tx->tx_txg == TXG_INITIAL)
+	if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) {
+		fnvlist_free(nvl);
 		return;
+	}
 
-	ha = kmem_alloc(sizeof (history_arg_t), KM_PUSHPAGE);
-	va_copy(adx_copy, adx);
-	ha->ha_history_str = kmem_vasprintf(fmt, adx_copy);
-	va_end(adx_copy);
-	ha->ha_log_type = LOG_INTERNAL;
-	ha->ha_event = event;
-	ha->ha_zone = NULL;
-	ha->ha_uid = 0;
+	va_copy(adx1, adx);
+	msg = kmem_alloc(vsnprintf(NULL, 0, fmt, adx1) + 1, KM_PUSHPAGE);
+	va_end(adx1);
+	va_copy(adx1, adx);
+	(void) vsprintf(msg, fmt, adx1);
+	va_end(adx1);
+	fnvlist_add_string(nvl, ZPOOL_HIST_INT_STR, msg);
+	strfree(msg);
+
+	fnvlist_add_string(nvl, ZPOOL_HIST_INT_NAME, operation);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
 
 	if (dmu_tx_is_syncing(tx)) {
-		spa_history_log_sync(spa, ha, tx);
+		spa_history_log_sync(spa, nvl, tx);
 	} else {
 		dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
-		    spa_history_log_sync, spa, ha, 0, tx);
+		    spa_history_log_sync, spa, nvl, 0, tx);
 	}
-	/* spa_history_log_sync() will free ha and strings */
+	/* spa_history_log_sync() will free nvl */
 }
 
 void
-spa_history_log_internal(history_internal_events_t event, spa_t *spa,
+spa_history_log_internal(spa_t *spa, const char *operation,
     dmu_tx_t *tx, const char *fmt, ...)
 {
 	dmu_tx_t *htx = tx;
 	va_list adx;
+	nvlist_t *nvl;
 
 	/* create a tx if we didn't get one */
 	if (tx == NULL) {
 		htx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
 		if (dmu_tx_assign(htx, TXG_WAIT) != 0) {
 			dmu_tx_abort(htx);
 			return;
 		}
 	}
 
 	va_start(adx, fmt);
-	log_internal(event, spa, htx, fmt, adx);
+	VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE));
+	log_internal(nvl, operation, spa, htx, fmt, adx);
 	va_end(adx);
 
 	/* if we didn't get a tx from the caller, commit the one we made */
 	if (tx == NULL)
 		dmu_tx_commit(htx);
 }
 
 void
-spa_history_log_version(spa_t *spa, history_internal_events_t event)
+spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
+    dmu_tx_t *tx, const char *fmt, ...)
+{
+	va_list adx;
+	char namebuf[MAXNAMELEN];
+	nvlist_t *nvl;
+
+	ASSERT(tx != NULL);
+
+	dsl_dataset_name(ds, namebuf);
+	VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE));
+	fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID, ds->ds_object);
+
+	va_start(adx, fmt);
+	log_internal(nvl, operation, dsl_dataset_get_spa(ds), tx, fmt, adx);
+	va_end(adx);
+}
+
+void
+spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
+    dmu_tx_t *tx, const char *fmt, ...)
+{
+	va_list adx;
+	char namebuf[MAXNAMELEN];
+	nvlist_t *nvl;
+
+	ASSERT(tx != NULL);
+
+	dsl_dir_name(dd, namebuf);
+	VERIFY0(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_PUSHPAGE));
+	fnvlist_add_string(nvl, ZPOOL_HIST_DSNAME, namebuf);
+	fnvlist_add_uint64(nvl, ZPOOL_HIST_DSID,
+	    dd->dd_phys->dd_head_dataset_obj);
+
+	va_start(adx, fmt);
+	log_internal(nvl, operation, dd->dd_pool->dp_spa, tx, fmt, adx);
+	va_end(adx);
+}
+
+void
+spa_history_log_version(spa_t *spa, const char *operation)
 {
 #ifdef _KERNEL
 	uint64_t current_vers = spa_version(spa);
 
-	if (current_vers >= SPA_VERSION_ZPOOL_HISTORY) {
-		spa_history_log_internal(event, spa, NULL,
-		    "pool spa %llu; zfs spa %llu; zpl %d; uts %s %s %s %s",
-		    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
-		    utsname.nodename, utsname.release, utsname.version,
-		    utsname.machine);
-	}
-	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
-	    event == LOG_POOL_IMPORT ? "imported" :
-	    event == LOG_POOL_CREATE ? "created" : "accessed",
+	spa_history_log_internal(spa, operation, NULL,
+	    "pool version %llu; software version %llu/%d; uts %s %s %s %s",
+	    (u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
+	    utsname.nodename, utsname.release, utsname.version,
+	    utsname.machine);
+	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation,
 	    (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
 #endif
 }
 
 #if defined(_KERNEL) && defined(HAVE_SPL)
 EXPORT_SYMBOL(spa_history_create_obj);
 EXPORT_SYMBOL(spa_history_get);
 EXPORT_SYMBOL(spa_history_log);
 EXPORT_SYMBOL(spa_history_log_internal);
 EXPORT_SYMBOL(spa_history_log_version);
 #endif
diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
index b35f27d194db..a03e1c6948ea 100644
--- a/module/zfs/zfs_ctldir.c
+++ b/module/zfs/zfs_ctldir.c
@@ -1,1010 +1,1009 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  *
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  * LLNL-CODE-403049.
  * Rewritten for Linux by:
  *   Rohan Puri <rohan.puri15@gmail.com>
  *   Brian Behlendorf <behlendorf1@llnl.gov>
  */
 
 /*
  * ZFS control directory (a.k.a. ".zfs")
  *
  * This directory provides a common location for all ZFS meta-objects.
  * Currently, this is only the 'snapshot' and 'shares' directory, but this may
  * expand in the future.  The elements are built dynamically, as the hierarchy
  * does not actually exist on disk.
  *
  * For 'snapshot', we don't want to have all snapshots always mounted, because
  * this would take up a huge amount of space in /etc/mnttab.  We have three
  * types of objects:
  *
  *	ctldir ------> snapshotdir -------> snapshot
  *                                             |
  *                                             |
  *                                             V
  *                                         mounted fs
  *
  * The 'snapshot' node contains just enough information to lookup '..' and act
  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
  * perform an automount of the underlying filesystem and return the
  * corresponding inode.
  *
  * All mounts are handled automatically by an user mode helper which invokes
  * the mount mount procedure.  Unmounts are handled by allowing the mount
  * point to expire so the kernel may automatically unmount it.
  *
  * The '.zfs', '.zfs/snapshot', and all directories created under
  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
  * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under).
  *
  * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
  * (ie: snapshots) are complete ZFS filesystems and have their own unique
  * zfs_sb_t.  However, the fsid reported by these mounts will be the same
  * as that used by the parent zfs_sb_t to make NFS happy.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sysmacros.h>
 #include <sys/pathname.h>
 #include <sys/vfs.h>
 #include <sys/vfs_opreg.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_vnops.h>
 #include <sys/stat.h>
 #include <sys/dmu.h>
 #include <sys/dsl_deleg.h>
 #include <sys/mount.h>
 #include <sys/zpl.h>
 #include "zfs_namecheck.h"
 
 /*
  * Control Directory Tunables (.zfs)
  */
 int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
 
 /*
  * Dedicated task queue for unmounting snapshots.
  */
 static taskq_t *zfs_expire_taskq;
 
 static zfs_snapentry_t *
 zfsctl_sep_alloc(void)
 {
 	return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
 }
 
 void
 zfsctl_sep_free(zfs_snapentry_t *sep)
 {
 	kmem_free(sep->se_name, MAXNAMELEN);
 	kmem_free(sep->se_path, PATH_MAX);
 	kmem_free(sep, sizeof (zfs_snapentry_t));
 }
 
 /*
  * Attempt to expire an automounted snapshot, unmounts are attempted every
  * 'zfs_expire_snapshot' seconds until they succeed.  The work request is
  * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t.
  */
 static void
 zfsctl_expire_snapshot(void *data)
 {
 	zfs_snapentry_t *sep = (zfs_snapentry_t *)data;
 	zfs_sb_t *zsb = ITOZSB(sep->se_inode);
 	int error;
 
 	error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE);
 	if (error == EBUSY)
 		sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
 		    zfsctl_expire_snapshot, sep, TQ_SLEEP,
 		    ddi_get_lbolt() + zfs_expire_snapshot * HZ);
 }
 
 int
 snapentry_compare(const void *a, const void *b)
 {
 	const zfs_snapentry_t *sa = a;
 	const zfs_snapentry_t *sb = b;
 	int ret = strcmp(sa->se_name, sb->se_name);
 
 	if (ret < 0)
 		return (-1);
 	else if (ret > 0)
 		return (1);
 	else
 		return (0);
 }
 
 boolean_t
 zfsctl_is_node(struct inode *ip)
 {
 	return (ITOZ(ip)->z_is_ctldir);
 }
 
 boolean_t
 zfsctl_is_snapdir(struct inode *ip)
 {
 	return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
 }
 
 /*
  * Allocate a new inode with the passed id and ops.
  */
 static struct inode *
 zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
     const struct file_operations *fops, const struct inode_operations *ops)
 {
 	struct timespec now = current_fs_time(zsb->z_sb);
 	struct inode *ip;
 	znode_t *zp;
 
 	ip = new_inode(zsb->z_sb);
 	if (ip == NULL)
 		return (NULL);
 
 	zp = ITOZ(ip);
 	ASSERT3P(zp->z_dirlocks, ==, NULL);
 	ASSERT3P(zp->z_acl_cached, ==, NULL);
 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
 	zp->z_id = id;
 	zp->z_unlinked = 0;
 	zp->z_atime_dirty = 0;
 	zp->z_zn_prefetch = 0;
 	zp->z_moved = 0;
 	zp->z_sa_hdl = NULL;
 	zp->z_blksz = 0;
 	zp->z_seq = 0;
 	zp->z_mapcnt = 0;
 	zp->z_gen = 0;
 	zp->z_size = 0;
 	zp->z_atime[0] = 0;
 	zp->z_atime[1] = 0;
 	zp->z_links = 0;
 	zp->z_pflags = 0;
 	zp->z_uid = 0;
 	zp->z_gid = 0;
 	zp->z_mode = 0;
 	zp->z_sync_cnt = 0;
 	zp->z_is_zvol = B_FALSE;
 	zp->z_is_mapped = B_FALSE;
 	zp->z_is_ctldir = B_TRUE;
 	zp->z_is_sa = B_FALSE;
 	zp->z_is_stale = B_FALSE;
 	ip->i_ino = id;
 	ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
 	ip->i_uid = SUID_TO_KUID(0);
 	ip->i_gid = SGID_TO_KGID(0);
 	ip->i_blkbits = SPA_MINBLOCKSHIFT;
 	ip->i_atime = now;
 	ip->i_mtime = now;
 	ip->i_ctime = now;
 	ip->i_fop = fops;
 	ip->i_op = ops;
 
 	if (insert_inode_locked(ip)) {
 		unlock_new_inode(ip);
 		iput(ip);
 		return (NULL);
 	}
 
 	mutex_enter(&zsb->z_znodes_lock);
 	list_insert_tail(&zsb->z_all_znodes, zp);
 	zsb->z_nr_znodes++;
 	membar_producer();
 	mutex_exit(&zsb->z_znodes_lock);
 
 	unlock_new_inode(ip);
 
 	return (ip);
 }
 
 /*
  * Lookup the inode with given id, it will be allocated if needed.
  */
 static struct inode *
 zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
     const struct file_operations *fops, const struct inode_operations *ops)
 {
 	struct inode *ip = NULL;
 
 	while (ip == NULL) {
 		ip = ilookup(zsb->z_sb, (unsigned long)id);
 		if (ip)
 			break;
 
 		/* May fail due to concurrent zfsctl_inode_alloc() */
 		ip = zfsctl_inode_alloc(zsb, id, fops, ops);
 	}
 
 	return (ip);
 }
 
 /*
  * Free zfsctl inode specific structures, currently there are none.
  */
 void
 zfsctl_inode_destroy(struct inode *ip)
 {
 	return;
 }
 
 /*
  * An inode is being evicted from the cache.
  */
 void
 zfsctl_inode_inactive(struct inode *ip)
 {
 	if (zfsctl_is_snapdir(ip))
 		zfsctl_snapdir_inactive(ip);
 }
 
 /*
  * Create the '.zfs' directory.  This directory is cached as part of the VFS
  * structure.  This results in a hold on the zfs_sb_t.  The code in zfs_umount()
  * therefore checks against a vfs_count of 2 instead of 1.  This reference
  * is removed when the ctldir is destroyed in the unmount.  All other entities
  * under the '.zfs' directory are created dynamically as needed.
  *
  * Because the dynamically created '.zfs' directory entries assume the use
  * of 64-bit inode numbers this support must be disabled on 32-bit systems.
  */
 int
 zfsctl_create(zfs_sb_t *zsb)
 {
 #if defined(CONFIG_64BIT)
 	ASSERT(zsb->z_ctldir == NULL);
 
 	zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT,
 	    &zpl_fops_root, &zpl_ops_root);
 	if (zsb->z_ctldir == NULL)
 		return (ENOENT);
 
 	return (0);
 #else
 	return (EOPNOTSUPP);
 #endif /* CONFIG_64BIT */
 }
 
 /*
  * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
  */
 void
 zfsctl_destroy(zfs_sb_t *zsb)
 {
 	iput(zsb->z_ctldir);
 	zsb->z_ctldir = NULL;
 }
 
 /*
  * Given a root znode, retrieve the associated .zfs directory.
  * Add a hold to the vnode and return it.
  */
 struct inode *
 zfsctl_root(znode_t *zp)
 {
 	ASSERT(zfs_has_ctldir(zp));
 	igrab(ZTOZSB(zp)->z_ctldir);
 	return (ZTOZSB(zp)->z_ctldir);
 }
 
 /*ARGSUSED*/
 int
 zfsctl_fid(struct inode *ip, fid_t *fidp)
 {
 	znode_t		*zp = ITOZ(ip);
 	zfs_sb_t	*zsb = ITOZSB(ip);
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
 	int		i;
 
 	ZFS_ENTER(zsb);
 
 	if (fidp->fid_len < SHORT_FID_LEN) {
 		fidp->fid_len = SHORT_FID_LEN;
 		ZFS_EXIT(zsb);
 		return (ENOSPC);
 	}
 
 	zfid = (zfid_short_t *)fidp;
 
 	zfid->zf_len = SHORT_FID_LEN;
 
 	for (i = 0; i < sizeof (zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
 	/* .zfs znodes always have a generation number of 0 */
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = 0;
 
 	ZFS_EXIT(zsb);
 	return (0);
 }
 
 static int
 zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname)
 {
 	objset_t *os = ITOZSB(ip)->z_os;
 
 	if (snapshot_namecheck(name, NULL, NULL) != 0)
 		return (EILSEQ);
 
 	dmu_objset_name(os, zname);
 	if ((strlen(zname) + 1 + strlen(name)) >= len)
 		return (ENAMETOOLONG);
 
 	(void) strcat(zname, "@");
 	(void) strcat(zname, name);
 
 	return (0);
 }
 
 static int
 zfsctl_snapshot_zpath(struct path *path, int len, char *zpath)
 {
 	char *path_buffer, *path_ptr;
 	int path_len, error = 0;
 
 	path_buffer = kmem_alloc(len, KM_SLEEP);
 
 	path_ptr = d_path(path, path_buffer, len);
 	if (IS_ERR(path_ptr)) {
 		error = -PTR_ERR(path_ptr);
 		goto out;
 	}
 
 	path_len = path_buffer + len - 1 - path_ptr;
 	if (path_len > len) {
 		error = EFAULT;
 		goto out;
 	}
 
 	memcpy(zpath, path_ptr, path_len);
 	zpath[path_len] = '\0';
 out:
 	kmem_free(path_buffer, len);
 
 	return (error);
 }
 
 /*
  * Special case the handling of "..".
  */
 /* ARGSUSED */
 int
 zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 {
 	zfs_sb_t *zsb = ITOZSB(dip);
 	int error = 0;
 
 	ZFS_ENTER(zsb);
 
 	if (strcmp(name, "..") == 0) {
 		*ipp = dip->i_sb->s_root->d_inode;
 	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
 		*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR,
 		    &zpl_fops_snapdir, &zpl_ops_snapdir);
 	} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
 		*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES,
 		    &zpl_fops_shares, &zpl_ops_shares);
 	} else {
 		*ipp = NULL;
 	}
 
 	if (*ipp == NULL)
 		error = ENOENT;
 
 	ZFS_EXIT(zsb);
 
 	return (error);
 }
 
 /*
  * Lookup entry point for the 'snapshot' directory.  Try to open the
  * snapshot if it exist, creating the pseudo filesystem inode as necessary.
  * Perform a mount of the associated dataset on top of the inode.
  */
 /* ARGSUSED */
 int
 zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 {
 	zfs_sb_t *zsb = ITOZSB(dip);
 	uint64_t id;
 	int error;
 
 	ZFS_ENTER(zsb);
 
 	error = dmu_snapshot_lookup(zsb->z_os, name, &id);
 	if (error) {
 		ZFS_EXIT(zsb);
 		return (error);
 	}
 
 	*ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id,
 	    &simple_dir_operations, &simple_dir_inode_operations);
 	if (*ipp) {
 #ifdef HAVE_AUTOMOUNT
 		(*ipp)->i_flags |= S_AUTOMOUNT;
 #endif /* HAVE_AUTOMOUNT */
 	} else {
 		error = ENOENT;
 	}
 
 	ZFS_EXIT(zsb);
 
 	return (error);
 }
 
 static void
 zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name)
 {
 	avl_index_t where;
 
 	ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock));
 	ASSERT(sep != NULL);
 
 	/*
 	 * Change the name in the AVL tree.
 	 */
 	avl_remove(&zsb->z_ctldir_snaps, sep);
 	(void) strcpy(sep->se_name, name);
 	VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL);
 	avl_insert(&zsb->z_ctldir_snaps, sep, where);
 }
 
 /*
  * Renaming a directory under '.zfs/snapshot' will automatically trigger
  * a rename of the snapshot to the new given name.  The rename is confined
  * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
  */
 /*ARGSUSED*/
 int
 zfsctl_snapdir_rename(struct inode *sdip, char *sname,
     struct inode *tdip, char *tname, cred_t *cr, int flags)
 {
 	zfs_sb_t *zsb = ITOZSB(sdip);
 	zfs_snapentry_t search, *sep;
 	avl_index_t where;
 	char *to, *from, *real;
 	int error;
 
 	ZFS_ENTER(zsb);
 
 	to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 	from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 	real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 
 	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
 		error = dmu_snapshot_realname(zsb->z_os, sname, real,
 		    MAXNAMELEN, NULL);
 		if (error == 0) {
 			sname = real;
 		} else if (error != ENOTSUP) {
 			goto out;
 		}
 	}
 
 	error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from);
 	if (!error)
 		error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to);
 	if (!error)
 		error = zfs_secpolicy_rename_perms(from, to, cr);
 	if (error)
 		goto out;
 
 	/*
 	 * Cannot move snapshots out of the snapdir.
 	 */
 	if (sdip != tdip) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/*
 	 * No-op when names are identical.
 	 */
 	if (strcmp(sname, tname) == 0) {
 		error = 0;
 		goto out;
 	}
 
 	mutex_enter(&zsb->z_ctldir_lock);
 
 	error = dmu_objset_rename(from, to, B_FALSE);
 	if (error)
 		goto out_unlock;
 
 	search.se_name = (char *)sname;
 	sep = avl_find(&zsb->z_ctldir_snaps, &search, &where);
 	if (sep)
 		zfsctl_rename_snap(zsb, sep, tname);
 
 out_unlock:
 	mutex_exit(&zsb->z_ctldir_lock);
 out:
 	kmem_free(from, MAXNAMELEN);
 	kmem_free(to, MAXNAMELEN);
 	kmem_free(real, MAXNAMELEN);
 
 	ZFS_EXIT(zsb);
 
 	return (error);
 }
 
 /*
  * Removing a directory under '.zfs/snapshot' will automatically trigger
  * the removal of the snapshot with the given name.
  */
 /* ARGSUSED */
 int
 zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
 {
 	zfs_sb_t *zsb = ITOZSB(dip);
 	char *snapname, *real;
 	int error;
 
 	ZFS_ENTER(zsb);
 
 	snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 	real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 
 	if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
 		error = dmu_snapshot_realname(zsb->z_os, name, real,
 		    MAXNAMELEN, NULL);
 		if (error == 0) {
 			name = real;
 		} else if (error != ENOTSUP) {
 			goto out;
 		}
 	}
 
 	error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname);
 	if (!error)
 		error = zfs_secpolicy_destroy_perms(snapname, cr);
 	if (error)
 		goto out;
 
 	error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE);
 	if ((error == 0) || (error == ENOENT))
 		error = dmu_objset_destroy(snapname, B_FALSE);
 out:
 	kmem_free(snapname, MAXNAMELEN);
 	kmem_free(real, MAXNAMELEN);
 
 	ZFS_EXIT(zsb);
 
 	return (error);
 }
 
 /*
  * Creating a directory under '.zfs/snapshot' will automatically trigger
  * the creation of a new snapshot with the given name.
  */
 /* ARGSUSED */
 int
 zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
 	struct inode **ipp, cred_t *cr, int flags)
 {
 	zfs_sb_t *zsb = ITOZSB(dip);
 	char *dsname;
 	int error;
 
 	dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 
 	if (snapshot_namecheck(dirname, NULL, NULL) != 0) {
 		error = EILSEQ;
 		goto out;
 	}
 
 	dmu_objset_name(zsb->z_os, dsname);
 
 	error = zfs_secpolicy_snapshot_perms(dsname, cr);
 	if (error)
 		goto out;
 
 	if (error == 0) {
-		error = dmu_objset_snapshot(dsname, dirname,
-		    NULL, NULL, B_FALSE, B_FALSE, -1);
+		error = dmu_objset_snapshot_one(dsname, dirname);
 		if (error)
 			goto out;
 
 		error = zfsctl_snapdir_lookup(dip, dirname, ipp,
 		    0, cr, NULL, NULL);
 	}
 out:
 	kmem_free(dsname, MAXNAMELEN);
 
 	return (error);
 }
 
 /*
  * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed
  * from the snapshot list.  This will normally happen as part of the auto
  * unmount, however in the case of a manual snapshot unmount this will be
  * the only notification we receive.
  */
 void
 zfsctl_snapdir_inactive(struct inode *ip)
 {
 	zfs_sb_t *zsb = ITOZSB(ip);
 	zfs_snapentry_t *sep, *next;
 
 	mutex_enter(&zsb->z_ctldir_lock);
 
 	sep = avl_first(&zsb->z_ctldir_snaps);
 	while (sep != NULL) {
 		next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
 
 		if (sep->se_inode == ip) {
 			avl_remove(&zsb->z_ctldir_snaps, sep);
 			taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
 			zfsctl_sep_free(sep);
 			break;
 		}
 		sep = next;
 	}
 
 	mutex_exit(&zsb->z_ctldir_lock);
 }
 
 /*
  * Attempt to unmount a snapshot by making a call to user space.
  * There is no assurance that this can or will succeed, is just a
  * best effort.  In the case where it does fail, perhaps because
  * it's in use, the unmount will fail harmlessly.
  */
 #define SET_UNMOUNT_CMD \
 	"exec 0</dev/null " \
 	"     1>/dev/null " \
 	"     2>/dev/null; " \
 	"umount -t zfs -n %s'%s'"
 
 static int
 __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
 {
 	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
 	char *envp[] = { NULL };
 	int error;
 
 	argv[2] = kmem_asprintf(SET_UNMOUNT_CMD,
 	    flags & MNT_FORCE ? "-f " : "", sep->se_path);
 	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 	strfree(argv[2]);
 
 	/*
 	 * The umount system utility will return 256 on error.  We must
 	 * assume this error is because the file system is busy so it is
 	 * converted to the more sensible EBUSY.
 	 */
 	if (error)
 		error = EBUSY;
 
 	/*
 	 * This was the result of a manual unmount, cancel the delayed work
 	 * to prevent zfsctl_expire_snapshot() from attempting a unmount.
 	 */
 	if ((error == 0) && !(flags & MNT_EXPIRE))
 		taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
 
 
 	return (error);
 }
 
 int
 zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags)
 {
 	zfs_snapentry_t search;
 	zfs_snapentry_t *sep;
 	int error = 0;
 
 	mutex_enter(&zsb->z_ctldir_lock);
 
 	search.se_name = name;
 	sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
 	if (sep) {
 		avl_remove(&zsb->z_ctldir_snaps, sep);
 		mutex_exit(&zsb->z_ctldir_lock);
 
 		error = __zfsctl_unmount_snapshot(sep, flags);
 
 		mutex_enter(&zsb->z_ctldir_lock);
 		if (error == EBUSY)
 			avl_add(&zsb->z_ctldir_snaps, sep);
 		else
 			zfsctl_sep_free(sep);
 	} else {
 		error = ENOENT;
 	}
 
 	mutex_exit(&zsb->z_ctldir_lock);
 	ASSERT3S(error, >=, 0);
 
 	return (error);
 }
 
 /*
  * Traverse all mounted snapshots and attempt to unmount them.  This
  * is best effort, on failure EEXIST is returned and count will be set
  * to the number of file snapshots which could not be unmounted.
  */
 int
 zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
 {
 	zfs_snapentry_t *sep, *next;
 	int error = 0;
 
 	*count = 0;
 
 	ASSERT(zsb->z_ctldir != NULL);
 	mutex_enter(&zsb->z_ctldir_lock);
 
 	sep = avl_first(&zsb->z_ctldir_snaps);
 	while (sep != NULL) {
 		next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
 		avl_remove(&zsb->z_ctldir_snaps, sep);
 		mutex_exit(&zsb->z_ctldir_lock);
 
 		error = __zfsctl_unmount_snapshot(sep, flags);
 
 		mutex_enter(&zsb->z_ctldir_lock);
 		if (error == EBUSY) {
 			avl_add(&zsb->z_ctldir_snaps, sep);
 			(*count)++;
 		} else {
 			zfsctl_sep_free(sep);
 		}
 
 		sep = next;
 	}
 
 	mutex_exit(&zsb->z_ctldir_lock);
 
 	return ((*count > 0) ? EEXIST : 0);
 }
 
 #define SET_MOUNT_CMD \
 	"exec 0</dev/null " \
 	"     1>/dev/null " \
 	"     2>/dev/null; " \
 	"mount -t zfs -n '%s' '%s'"
 
 int
 zfsctl_mount_snapshot(struct path *path, int flags)
 {
 	struct dentry *dentry = path->dentry;
 	struct inode *ip = dentry->d_inode;
 	zfs_sb_t *zsb = ITOZSB(ip);
 	char *full_name, *full_path;
 	zfs_snapentry_t *sep;
 	zfs_snapentry_t search;
 	char *argv[] = { "/bin/sh", "-c", NULL, NULL };
 	char *envp[] = { NULL };
 	int error;
 
 	ZFS_ENTER(zsb);
 
 	full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
 	full_path = kmem_zalloc(PATH_MAX, KM_SLEEP);
 
 	error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name);
 	if (error)
 		goto error;
 
 	error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path);
 	if (error)
 		goto error;
 
 	/*
 	 * Attempt to mount the snapshot from user space.  Normally this
 	 * would be done using the vfs_kern_mount() function, however that
 	 * function is marked GPL-only and cannot be used.  On error we
 	 * careful to log the real error to the console and return EISDIR
 	 * to safely abort the automount.  This should be very rare.
 	 */
 	argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path);
 	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
 	strfree(argv[2]);
 	if (error) {
 		printk("ZFS: Unable to automount %s at %s: %d\n",
 		    full_name, full_path, error);
 		error = EISDIR;
 		goto error;
 	}
 
 	mutex_enter(&zsb->z_ctldir_lock);
 
 	/*
 	 * Ensure a previous entry does not exist, if it does safely remove
 	 * it any cancel the outstanding expiration.  This can occur when a
 	 * snapshot is manually unmounted and then an automount is triggered.
 	 */
 	search.se_name = full_name;
 	sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
 	if (sep) {
 		avl_remove(&zsb->z_ctldir_snaps, sep);
 		taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
 		zfsctl_sep_free(sep);
 	}
 
 	sep = zfsctl_sep_alloc();
 	sep->se_name = full_name;
 	sep->se_path = full_path;
 	sep->se_inode = ip;
 	avl_add(&zsb->z_ctldir_snaps, sep);
 
 	sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
 	    zfsctl_expire_snapshot, sep, TQ_SLEEP,
 	    ddi_get_lbolt() + zfs_expire_snapshot * HZ);
 
 	mutex_exit(&zsb->z_ctldir_lock);
 error:
 	if (error) {
 		kmem_free(full_name, MAXNAMELEN);
 		kmem_free(full_path, PATH_MAX);
 	}
 
 	ZFS_EXIT(zsb);
 
 	return (error);
 }
 
 /*
  * Check if this super block has a matching objset id.
  */
 static int
 zfsctl_test_super(struct super_block *sb, void *objsetidp)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 	uint64_t objsetid = *(uint64_t *)objsetidp;
 
 	return (dmu_objset_id(zsb->z_os) == objsetid);
 }
 
 /*
  * Prevent a new super block from being allocated if an existing one
  * could not be located.  We only want to preform a lookup operation.
  */
 static int
 zfsctl_set_super(struct super_block *sb, void *objsetidp)
 {
 	return (-EEXIST);
 }
 
 int
 zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 	struct super_block *sbp;
 	zfs_snapentry_t *sep;
 	uint64_t id;
 	int error;
 
 	ASSERT(zsb->z_ctldir != NULL);
 
 	mutex_enter(&zsb->z_ctldir_lock);
 
 	/*
 	 * Verify that the snapshot is mounted.
 	 */
 	sep = avl_first(&zsb->z_ctldir_snaps);
 	while (sep != NULL) {
 		error = dmu_snapshot_lookup(zsb->z_os, sep->se_name, &id);
 		if (error)
 			goto out;
 
 		if (id == objsetid)
 			break;
 
 		sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
 	}
 
 	if (sep != NULL) {
 		/*
 		 * Lookup the mounted root rather than the covered mount
 		 * point.  This may fail if the snapshot has just been
 		 * unmounted by an unrelated user space process.  This
 		 * race cannot occur to an expired mount point because
 		 * we hold the zsb->z_ctldir_lock to prevent the race.
 		 */
 		sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super,
 		    zfsctl_set_super, 0, &id);
 		if (IS_ERR(sbp)) {
 			error = -PTR_ERR(sbp);
 		} else {
 			*zsbp = sbp->s_fs_info;
 			deactivate_super(sbp);
 		}
 	} else {
 		error = EINVAL;
 	}
 out:
 	mutex_exit(&zsb->z_ctldir_lock);
 	ASSERT3S(error, >=, 0);
 
 	return (error);
 }
 
 /* ARGSUSED */
 int
 zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
 {
 	zfs_sb_t *zsb = ITOZSB(dip);
 	struct inode *ip;
 	znode_t *dzp;
 	int error;
 
 	ZFS_ENTER(zsb);
 
 	if (zsb->z_shares_dir == 0) {
 		ZFS_EXIT(zsb);
 		return (ENOTSUP);
 	}
 
 	error = zfs_zget(zsb, zsb->z_shares_dir, &dzp);
 	if (error) {
 		ZFS_EXIT(zsb);
 		return (error);
 	}
 
 	error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
 
 	iput(ZTOI(dzp));
 	ZFS_EXIT(zsb);
 
 	return (error);
 }
 
 
 /*
  * Initialize the various pieces we'll need to create and manipulate .zfs
  * directories.  Currently this is unused but available.
  */
 void
 zfsctl_init(void)
 {
 	zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri,
 	    1, 8, TASKQ_PREPOPULATE);
 }
 
 /*
  * Cleanup the various pieces we needed for .zfs directories.  In particular
  * ensure the expiry timer is canceled safely.
  */
 void
 zfsctl_fini(void)
 {
 	taskq_destroy(zfs_expire_taskq);
 }
 
 module_param(zfs_expire_snapshot, int, 0644);
 MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index a9184a1b61bd..e64d6a1f04eb 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -1,5379 +1,5967 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 Martin Matuska
  * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 by Delphix. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+/*
+ * ZFS ioctls.
+ *
+ * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
+ * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
+ *
+ * There are two ways that we handle ioctls: the legacy way where almost
+ * all of the logic is in the ioctl callback, and the new way where most
+ * of the marshalling is handled in the common entry point, zfsdev_ioctl().
+ *
+ * Non-legacy ioctls should be registered by calling
+ * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
+ * from userland by lzc_ioctl().
+ *
+ * The registration arguments are as follows:
+ *
+ * const char *name
+ *   The name of the ioctl.  This is used for history logging.  If the
+ *   ioctl returns successfully (the callback returns 0), and allow_log
+ *   is true, then a history log entry will be recorded with the input &
+ *   output nvlists.  The log entry can be printed with "zpool history -i".
+ *
+ * zfs_ioc_t ioc
+ *   The ioctl request number, which userland will pass to ioctl(2).
+ *   The ioctl numbers can change from release to release, because
+ *   the caller (libzfs) must be matched to the kernel.
+ *
+ * zfs_secpolicy_func_t *secpolicy
+ *   This function will be called before the zfs_ioc_func_t, to
+ *   determine if this operation is permitted.  It should return EPERM
+ *   on failure, and 0 on success.  Checks include determining if the
+ *   dataset is visible in this zone, and if the user has either all
+ *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
+ *   to do this operation on this dataset with "zfs allow".
+ *
+ * zfs_ioc_namecheck_t namecheck
+ *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
+ *   name, a dataset name, or nothing.  If the name is not well-formed,
+ *   the ioctl will fail and the callback will not be called.
+ *   Therefore, the callback can assume that the name is well-formed
+ *   (e.g. is null-terminated, doesn't have more than one '@' character,
+ *   doesn't have invalid characters).
+ *
+ * zfs_ioc_poolcheck_t pool_check
+ *   This specifies requirements on the pool state.  If the pool does
+ *   not meet them (is suspended or is readonly), the ioctl will fail
+ *   and the callback will not be called.  If any checks are specified
+ *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
+ *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
+ *   POOL_CHECK_READONLY).
+ *
+ * boolean_t smush_outnvlist
+ *   If smush_outnvlist is true, then the output is presumed to be a
+ *   list of errors, and it will be "smushed" down to fit into the
+ *   caller's buffer, by removing some entries and replacing them with a
+ *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
+ *   nvlist_smush() for details.  If smush_outnvlist is false, and the
+ *   outnvlist does not fit into the userland-provided buffer, then the
+ *   ioctl will fail with ENOMEM.
+ *
+ * zfs_ioc_func_t *func
+ *   The callback function that will perform the operation.
+ *
+ *   The callback should return 0 on success, or an error number on
+ *   failure.  If the function fails, the userland ioctl will return -1,
+ *   and errno will be set to the callback's return value.  The callback
+ *   will be called with the following arguments:
+ *
+ *   const char *name
+ *     The name of the pool or dataset to operate on, from
+ *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
+ *     expected type (pool, dataset, or none).
+ *
+ *   nvlist_t *innvl
+ *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
+ *     NULL if no input nvlist was provided.  Changes to this nvlist are
+ *     ignored.  If the input nvlist could not be deserialized, the
+ *     ioctl will fail and the callback will not be called.
+ *
+ *   nvlist_t *outnvl
+ *     The output nvlist, initially empty.  The callback can fill it in,
+ *     and it will be returned to userland by serializing it into
+ *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
+ *     fails (e.g. because the caller didn't supply a large enough
+ *     buffer), then the overall ioctl will fail.  See the
+ *     'smush_nvlist' argument above for additional behaviors.
+ *
+ *     There are two typical uses of the output nvlist:
+ *       - To return state, e.g. property values.  In this case,
+ *         smush_outnvlist should be false.  If the buffer was not large
+ *         enough, the caller will reallocate a larger buffer and try
+ *         the ioctl again.
+ *
+ *       - To return multiple errors from an ioctl which makes on-disk
+ *         changes.  In this case, smush_outnvlist should be true.
+ *         Ioctls which make on-disk modifications should generally not
+ *         use the outnvl if they succeed, because the caller can not
+ *         distinguish between the operation failing, and
+ *         deserialization failing.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/uio.h>
 #include <sys/buf.h>
 #include <sys/modctl.h>
 #include <sys/open.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/conf.h>
 #include <sys/cmn_err.h>
 #include <sys/stat.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 #include <sys/zap.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev.h>
 #include <sys/priv_impl.h>
 #include <sys/dmu.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_impl.h>
 #include <sys/ddi.h>
 #include <sys/sunddi.h>
 #include <sys/sunldi.h>
 #include <sys/policy.h>
 #include <sys/zone.h>
 #include <sys/nvpair.h>
 #include <sys/pathname.h>
 #include <sys/mount.h>
 #include <sys/sdt.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sharefs/share.h>
 #include <sys/dmu_objset.h>
 #include <sys/fm/util.h>
 
 #include <sys/zfeature.h>
 
 #include <linux/miscdevice.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
 #include "zfs_comutil.h"
 
 kmutex_t zfsdev_state_lock;
 list_t zfsdev_state_list;
 
 extern void zfs_init(void);
 extern void zfs_fini(void);
 
-typedef int zfs_ioc_func_t(zfs_cmd_t *);
-typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
+uint_t zfs_fsyncer_key;
+extern uint_t rrw_tsd_key;
+static uint_t zfs_allow_log_key;
+
+typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
+typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
+typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
 
 typedef enum {
 	NO_NAME,
 	POOL_NAME,
 	DATASET_NAME
 } zfs_ioc_namecheck_t;
 
 typedef enum {
 	POOL_CHECK_NONE		= 1 << 0,
 	POOL_CHECK_SUSPENDED	= 1 << 1,
-	POOL_CHECK_READONLY	= 1 << 2
+	POOL_CHECK_READONLY	= 1 << 2,
 } zfs_ioc_poolcheck_t;
 
 typedef struct zfs_ioc_vec {
+	zfs_ioc_legacy_func_t	*zvec_legacy_func;
 	zfs_ioc_func_t		*zvec_func;
 	zfs_secpolicy_func_t	*zvec_secpolicy;
 	zfs_ioc_namecheck_t	zvec_namecheck;
-	boolean_t		zvec_his_log;
+	boolean_t		zvec_allow_log;
 	zfs_ioc_poolcheck_t	zvec_pool_check;
+	boolean_t		zvec_smush_outnvlist;
+	const char		*zvec_name;
 } zfs_ioc_vec_t;
 
 /* This array is indexed by zfs_userquota_prop_t */
 static const char *userquota_perms[] = {
 	ZFS_DELEG_PERM_USERUSED,
 	ZFS_DELEG_PERM_USERQUOTA,
 	ZFS_DELEG_PERM_GROUPUSED,
 	ZFS_DELEG_PERM_GROUPQUOTA,
 };
 
 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 static int zfs_check_settable(const char *name, nvpair_t *property,
     cred_t *cr);
 static int zfs_check_clearable(char *dataset, nvlist_t *props,
     nvlist_t **errors);
 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
     boolean_t *);
-int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
+int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
+static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 
 static int zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature);
 static int zfs_prop_activate_feature_check(void *arg1, void *arg2,
     dmu_tx_t *tx);
 static void zfs_prop_activate_feature_sync(void *arg1, void *arg2,
     dmu_tx_t *tx);
 
 static void
 history_str_free(char *buf)
 {
 	kmem_free(buf, HIS_MAX_RECORD_LEN);
 }
 
 static char *
 history_str_get(zfs_cmd_t *zc)
 {
 	char *buf;
 
 	if (zc->zc_history == 0)
 		return (NULL);
 
 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP | KM_NODEBUG);
 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 		history_str_free(buf);
 		return (NULL);
 	}
 
 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
 
 	return (buf);
 }
 
 /*
  * Check to see if the named dataset is currently defined as bootable
  */
 static boolean_t
 zfs_is_bootfs(const char *name)
 {
 	objset_t *os;
 
 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
 		boolean_t ret;
 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 		dmu_objset_rele(os, FTAG);
 		return (ret);
 	}
 	return (B_FALSE);
 }
 
 /*
  * zfs_earlier_version
  *
  *	Return non-zero if the spa version is less than requested version.
  */
 static int
 zfs_earlier_version(const char *name, int version)
 {
 	spa_t *spa;
 
 	if (spa_open(name, &spa, FTAG) == 0) {
 		if (spa_version(spa) < version) {
 			spa_close(spa, FTAG);
 			return (1);
 		}
 		spa_close(spa, FTAG);
 	}
 	return (0);
 }
 
 /*
  * zpl_earlier_version
  *
  * Return TRUE if the ZPL version is less than requested version.
  */
 static boolean_t
 zpl_earlier_version(const char *name, int version)
 {
 	objset_t *os;
 	boolean_t rc = B_TRUE;
 
 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
 		uint64_t zplversion;
 
 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
 			dmu_objset_rele(os, FTAG);
 			return (B_TRUE);
 		}
 		/* XXX reading from non-owned objset */
 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 			rc = zplversion < version;
 		dmu_objset_rele(os, FTAG);
 	}
 	return (rc);
 }
 
 static void
 zfs_log_history(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *buf;
 
 	if ((buf = history_str_get(zc)) == NULL)
 		return;
 
 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
-			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
+			(void) spa_history_log(spa, buf);
 		spa_close(spa, FTAG);
 	}
 	history_str_free(buf);
 }
 
 /*
  * Policy for top-level read operations (list pools).  Requires no privileges,
  * and can be used in the local zone, as there is no associated dataset.
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (0);
 }
 
 /*
  * Policy for dataset read operations (list children, get statistics).  Requires
  * no privileges, but must be visible in the local zone.
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (INGLOBALZONE(curproc) ||
 	    zone_dataset_visible(zc->zc_name, NULL))
 		return (0);
 
 	return (ENOENT);
 }
 
 static int
 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 {
 	int writable = 1;
 
 	/*
 	 * The dataset must be visible by this zone -- check this first
 	 * so they don't see EPERM on something they shouldn't know about.
 	 */
 	if (!INGLOBALZONE(curproc) &&
 	    !zone_dataset_visible(dataset, &writable))
 		return (ENOENT);
 
 	if (INGLOBALZONE(curproc)) {
 		/*
 		 * If the fs is zoned, only root can access it from the
 		 * global zone.
 		 */
 		if (secpolicy_zfs(cr) && zoned)
 			return (EPERM);
 	} else {
 		/*
 		 * If we are in a local zone, the 'zoned' property must be set.
 		 */
 		if (!zoned)
 			return (EPERM);
 
 		/* must be writable by this zone */
 		if (!writable)
 			return (EPERM);
 	}
 	return (0);
 }
 
 static int
 zfs_dozonecheck(const char *dataset, cred_t *cr)
 {
 	uint64_t zoned;
 
 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
 		return (ENOENT);
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
 static int
 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 {
 	uint64_t zoned;
 
 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 		return (ENOENT);
 	}
 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
-/*
- * If name ends in a '@', then require recursive permissions.
- */
-int
+static int
 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 {
 	int error;
-	boolean_t descendent = B_FALSE;
 	dsl_dataset_t *ds;
-	char *at;
-
-	at = strchr(name, '@');
-	if (at != NULL && at[1] == '\0') {
-		*at = '\0';
-		descendent = B_TRUE;
-	}
 
 	error = dsl_dataset_hold(name, FTAG, &ds);
-	if (at != NULL)
-		*at = '@';
 	if (error != 0)
 		return (error);
 
 	error = zfs_dozonecheck_ds(name, ds, cr);
 	if (error == 0) {
 		error = secpolicy_zfs(cr);
 		if (error)
-			error = dsl_deleg_access_impl(ds, descendent, perm, cr);
+			error = dsl_deleg_access_impl(ds, perm, cr);
 	}
 
 	dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
-int
+static int
 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
     const char *perm, cred_t *cr)
 {
 	int error;
 
 	error = zfs_dozonecheck_ds(name, ds, cr);
 	if (error == 0) {
 		error = secpolicy_zfs(cr);
 		if (error)
-			error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
+			error = dsl_deleg_access_impl(ds, perm, cr);
 	}
 	return (error);
 }
 
 /*
  * Policy for setting the security label property.
  *
  * Returns 0 for success, non-zero for access and other errors.
  */
 static int
 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 {
 #ifdef HAVE_MLSLABEL
 	char		ds_hexsl[MAXNAMELEN];
 	bslabel_t	ds_sl, new_sl;
 	boolean_t	new_default = FALSE;
 	uint64_t	zoned;
 	int		needed_priv = -1;
 	int		error;
 
 	/* First get the existing dataset label. */
 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 	if (error)
 		return (EPERM);
 
 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 		new_default = TRUE;
 
 	/* The label must be translatable */
 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 		return (EINVAL);
 
 	/*
 	 * In a non-global zone, disallow attempts to set a label that
 	 * doesn't match that of the zone; otherwise no other checks
 	 * are needed.
 	 */
 	if (!INGLOBALZONE(curproc)) {
 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 			return (EPERM);
 		return (0);
 	}
 
 	/*
 	 * For global-zone datasets (i.e., those whose zoned property is
 	 * "off", verify that the specified new label is valid for the
 	 * global zone.
 	 */
 	if (dsl_prop_get_integer(name,
 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 		return (EPERM);
 	if (!zoned) {
 		if (zfs_check_global_label(name, strval) != 0)
 			return (EPERM);
 	}
 
 	/*
 	 * If the existing dataset label is nondefault, check if the
 	 * dataset is mounted (label cannot be changed while mounted).
 	 * Get the zfs_sb_t; if there isn't one, then the dataset isn't
 	 * mounted (or isn't a dataset, doesn't exist, ...).
 	 */
 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 		objset_t *os;
 		static char *setsl_tag = "setsl_tag";
 
 		/*
 		 * Try to own the dataset; abort if there is any error,
 		 * (e.g., already mounted, in use, or other error).
 		 */
 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
 		    setsl_tag, &os);
 		if (error)
 			return (EPERM);
 
 		dmu_objset_disown(os, setsl_tag);
 
 		if (new_default) {
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
 			goto out_check;
 		}
 
 		if (hexstr_to_label(strval, &new_sl) != 0)
 			return (EPERM);
 
 		if (blstrictdom(&ds_sl, &new_sl))
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
 		else if (blstrictdom(&new_sl, &ds_sl))
 			needed_priv = PRIV_FILE_UPGRADE_SL;
 	} else {
 		/* dataset currently has a default label */
 		if (!new_default)
 			needed_priv = PRIV_FILE_UPGRADE_SL;
 	}
 
 out_check:
 	if (needed_priv != -1)
 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 	return (0);
 #else
 	return ENOTSUP;
 #endif /* HAVE_MLSLABEL */
 }
 
 static int
 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
     cred_t *cr)
 {
 	char *strval;
 
 	/*
 	 * Check permissions for special properties.
 	 */
 	switch (prop) {
 	default:
 		break;
 	case ZFS_PROP_ZONED:
 		/*
 		 * Disallow setting of 'zoned' from within a local zone.
 		 */
 		if (!INGLOBALZONE(curproc))
 			return (EPERM);
 		break;
 
 	case ZFS_PROP_QUOTA:
 		if (!INGLOBALZONE(curproc)) {
 			uint64_t zoned;
 			char setpoint[MAXNAMELEN];
 			/*
 			 * Unprivileged users are allowed to modify the
 			 * quota on things *under* (ie. contained by)
 			 * the thing they own.
 			 */
 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
 			    setpoint))
 				return (EPERM);
 			if (!zoned || strlen(dsname) <= strlen(setpoint))
 				return (EPERM);
 		}
 		break;
 
 	case ZFS_PROP_MLSLABEL:
 		if (!is_system_labeled())
 			return (EPERM);
 
 		if (nvpair_value_string(propval, &strval) == 0) {
 			int err;
 
 			err = zfs_set_slabel_policy(dsname, strval, CRED());
 			if (err != 0)
 				return (err);
 		}
 		break;
 	}
 
 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 }
 
-int
-zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
+/* ARGSUSED */
+static int
+zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	error = zfs_dozonecheck(zc->zc_name, cr);
 	if (error)
 		return (error);
 
 	/*
 	 * permission to set permissions will be evaluated later in
 	 * dsl_deleg_can_allow()
 	 */
 	return (0);
 }
 
-int
-zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
+/* ARGSUSED */
+static int
+zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_ROLLBACK, cr));
 }
 
-int
-zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
+/* ARGSUSED */
+static int
+zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	spa_t *spa;
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	char *cp;
 	int error;
 
 	/*
 	 * Generate the current snapshot name from the given objsetid, then
 	 * use that name for the secpolicy/zone checks.
 	 */
 	cp = strchr(zc->zc_name, '@');
 	if (cp == NULL)
 		return (EINVAL);
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error)
 		return (error);
 
 	dp = spa_get_dsl(spa);
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 	rw_exit(&dp->dp_config_rwlock);
 	spa_close(spa, FTAG);
 	if (error)
 		return (error);
 
 	dsl_dataset_name(ds, zc->zc_name);
 
 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 	    ZFS_DELEG_PERM_SEND, cr);
 	dsl_dataset_rele(ds, FTAG);
 
 	return (error);
 }
 
+/* ARGSUSED */
+static int
+zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+	return (zfs_secpolicy_write_perms(zc->zc_name,
+	    ZFS_DELEG_PERM_SEND, cr));
+}
+
 #ifdef HAVE_SMB_SHARE
+/* ARGSUSED */
 static int
-zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	vnode_t *vp;
 	int error;
 
 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 	    NO_FOLLOW, NULL, &vp)) != 0)
 		return (error);
 
 	/* Now make sure mntpnt and dataset are ZFS */
 
 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 	    zc->zc_name) != 0)) {
 		VN_RELE(vp);
 		return (EPERM);
 	}
 
 	VN_RELE(vp);
 	return (dsl_deleg_access(zc->zc_name,
 	    ZFS_DELEG_PERM_SHARE, cr));
 }
 #endif /* HAVE_SMB_SHARE */
 
 int
-zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 #ifdef HAVE_SMB_SHARE
 	if (!INGLOBALZONE(curproc))
 		return (EPERM);
 
 	if (secpolicy_nfs(cr) == 0) {
 		return (0);
 	} else {
-		return (zfs_secpolicy_deleg_share(zc, cr));
+		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 	}
 #else
 	return (ENOTSUP);
 #endif /* HAVE_SMB_SHARE */
 }
 
 int
-zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 #ifdef HAVE_SMB_SHARE
 	if (!INGLOBALZONE(curproc))
 		return (EPERM);
 
 	if (secpolicy_smb(cr) == 0) {
 		return (0);
 	} else {
-		return (zfs_secpolicy_deleg_share(zc, cr));
+		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 	}
 #else
 	return (ENOTSUP);
 #endif /* HAVE_SMB_SHARE */
 }
 
 static int
 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 {
 	char *cp;
 
 	/*
 	 * Remove the @bla or /bla from the end of the name to get the parent.
 	 */
 	(void) strncpy(parent, datasetname, parentsize);
 	cp = strrchr(parent, '@');
 	if (cp != NULL) {
 		cp[0] = '\0';
 	} else {
 		cp = strrchr(parent, '/');
 		if (cp == NULL)
 			return (ENOENT);
 		cp[0] = '\0';
 	}
 
 	return (0);
 }
 
 int
 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 {
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(name,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 }
 
 /*
  * Destroying snapshots with delegated permissions requires
- * descendent mount and destroy permissions.
+ * descendant mount and destroy permissions.
  */
+/* ARGSUSED */
 static int
-zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	int error;
-	char *dsname;
+	nvlist_t *snaps;
+	nvpair_t *pair, *nextpair;
+	int error = 0;
 
-	dsname = kmem_asprintf("%s@", zc->zc_name);
+	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+		return (EINVAL);
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nextpair) {
+		dsl_dataset_t *ds;
 
-	error = zfs_secpolicy_destroy_perms(dsname, cr);
-	if (error == ENOENT)
-		error = zfs_secpolicy_destroy_perms(zc->zc_name, cr);
+		nextpair = nvlist_next_nvpair(snaps, pair);
+		error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds);
+		if (error == 0) {
+			dsl_dataset_rele(ds, FTAG);
+		} else if (error == ENOENT) {
+			/*
+			 * Ignore any snapshots that don't exist (we consider
+			 * them "already destroyed").  Remove the name from the
+			 * nvl here in case the snapshot is created between
+			 * now and when we try to destroy it (in which case
+			 * we don't want to destroy it since we haven't
+			 * checked for permission).
+			 */
+			fnvlist_remove_nvpair(snaps, pair);
+			error = 0;
+			continue;
+		} else {
+			break;
+		}
+		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
+		if (error != 0)
+			break;
+	}
 
-	strfree(dsname);
 	return (error);
 }
 
 int
 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 {
 	char	parentname[MAXNAMELEN];
 	int	error;
 
 	if ((error = zfs_secpolicy_write_perms(from,
 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(from,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_get_parent(to, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (error);
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	char	parentname[MAXNAMELEN];
 	objset_t *clone;
 	int error;
 
 	error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_PROMOTE, cr);
 	if (error)
 		return (error);
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
 
 	if (error == 0) {
 		dsl_dataset_t *pclone = NULL;
 		dsl_dir_t *dd;
 		dd = clone->os_dsl_dataset->ds_dir;
 
 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dd->dd_pool,
 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
 		rw_exit(&dd->dd_pool->dp_config_rwlock);
 		if (error) {
 			dmu_objset_rele(clone, FTAG);
 			return (error);
 		}
 
 		error = zfs_secpolicy_write_perms(zc->zc_name,
 		    ZFS_DELEG_PERM_MOUNT, cr);
 
 		dsl_dataset_name(pclone, parentname);
 		dmu_objset_rele(clone, FTAG);
 		dsl_dataset_rele(pclone, FTAG);
 		if (error == 0)
 			error = zfs_secpolicy_write_perms(parentname,
 			    ZFS_DELEG_PERM_PROMOTE, cr);
 	}
 	return (error);
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_CREATE, cr));
 }
 
 int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(name,
 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
 }
 
+/*
+ * Check for permission to create each snapshot in the nvlist.
+ */
+/* ARGSUSED */
 static int
-zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
+	nvlist_t *snaps;
+	int error = 0;
+	nvpair_t *pair;
+
+	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+		return (EINVAL);
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(snaps, pair)) {
+		char *name = nvpair_name(pair);
+		char *atp = strchr(name, '@');
 
-	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
+		if (atp == NULL) {
+			error = EINVAL;
+			break;
+		}
+		*atp = '\0';
+		error = zfs_secpolicy_snapshot_perms(name, cr);
+		*atp = '@';
+		if (error != 0)
+			break;
+	}
+	return (error);
+}
+
+/* ARGSUSED */
+static int
+zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+	/*
+	 * Even root must have a proper TSD so that we know what pool
+	 * to log to.
+	 */
+	if (tsd_get(zfs_allow_log_key) == NULL)
+		return (EPERM);
+	return (0);
 }
 
 static int
-zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	char	parentname[MAXNAMELEN];
 	int	error;
+	char	*origin;
 
 	if ((error = zfs_get_parent(zc->zc_name, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
-	if (zc->zc_value[0] != '\0') {
-		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
-		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
-			return (error);
-	}
+	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
+	    (error = zfs_secpolicy_write_perms(origin,
+	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
+		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
-	error = zfs_secpolicy_write_perms(parentname,
-	    ZFS_DELEG_PERM_MOUNT, cr);
-
-	return (error);
+	return (zfs_secpolicy_write_perms(parentname,
+	    ZFS_DELEG_PERM_MOUNT, cr));
 }
 
 /*
  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
  * SYS_CONFIG privilege, which is not available in a local zone.
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
 		return (EPERM);
 
 	return (0);
 }
 
 /*
  * Policy for object to name lookups.
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
 		return (0);
 
 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
 	return (error);
 }
 
 /*
  * Policy for fault injection.  Requires all privileges.
  */
 /* ARGSUSED */
 static int
-zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (secpolicy_zinject(cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 
 	if (prop == ZPROP_INVAL) {
 		if (!zfs_prop_user(zc->zc_value))
 			return (EINVAL);
 		return (zfs_secpolicy_write_perms(zc->zc_name,
 		    ZFS_DELEG_PERM_USERPROP, cr));
 	} else {
 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
 		    NULL, cr));
 	}
 }
 
 static int
-zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	int err = zfs_secpolicy_read(zc, cr);
+	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (EINVAL);
 
 	if (zc->zc_value[0] == 0) {
 		/*
 		 * They are asking about a posix uid/gid.  If it's
 		 * themself, allow it.
 		 */
 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
 			if (zc->zc_guid == crgetuid(cr))
 				return (0);
 		} else {
 			if (groupmember(zc->zc_guid, cr))
 				return (0);
 		}
 	}
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
 static int
-zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
-	int err = zfs_secpolicy_read(zc, cr);
+	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (EINVAL);
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 	    NULL, cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_HOLD, cr));
 }
 
+/* ARGSUSED */
 static int
-zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_RELEASE, cr));
 }
 
 /*
  * Policy for allowing temporary snapshots to be taken or released
  */
 static int
-zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
+zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	/*
 	 * A temporary snapshot is the same as a snapshot,
 	 * hold, destroy and release all rolled into one.
 	 * Delegated diff alone is sufficient that we allow this.
 	 */
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
 		return (0);
 
-	error = zfs_secpolicy_snapshot(zc, cr);
+	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
 	if (!error)
-		error = zfs_secpolicy_hold(zc, cr);
+		error = zfs_secpolicy_hold(zc, innvl, cr);
 	if (!error)
-		error = zfs_secpolicy_release(zc, cr);
+		error = zfs_secpolicy_release(zc, innvl, cr);
 	if (!error)
-		error = zfs_secpolicy_destroy(zc, cr);
+		error = zfs_secpolicy_destroy(zc, innvl, cr);
 	return (error);
 }
 
 /*
  * Returns the nvlist as specified by the user in the zfs_cmd_t.
  */
 static int
 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
 {
 	char *packed;
 	int error;
 	nvlist_t *list = NULL;
 
 	/*
 	 * Read in and unpack the user-supplied nvlist.
 	 */
 	if (size == 0)
 		return (EINVAL);
 
 	packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
 
 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
 	    iflag)) != 0) {
 		kmem_free(packed, size);
 		return (error);
 	}
 
 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
 		kmem_free(packed, size);
 		return (error);
 	}
 
 	kmem_free(packed, size);
 
 	*nvp = list;
 	return (0);
 }
 
+/*
+ * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
+ * Entries will be removed from the end of the nvlist, and one int32 entry
+ * named "N_MORE_ERRORS" will be added indicating how many entries were
+ * removed.
+ */
 static int
-fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
+nvlist_smush(nvlist_t *errors, size_t max)
 {
 	size_t size;
 
-	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
+	size = fnvlist_size(errors);
 
-	if (size > zc->zc_nvlist_dst_size) {
+	if (size > max) {
 		nvpair_t *more_errors;
 		int n = 0;
 
-		if (zc->zc_nvlist_dst_size < 1024)
+		if (max < 1024)
 			return (ENOMEM);
 
-		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
-		more_errors = nvlist_prev_nvpair(*errors, NULL);
+		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
+		more_errors = nvlist_prev_nvpair(errors, NULL);
 
 		do {
-			nvpair_t *pair = nvlist_prev_nvpair(*errors,
+			nvpair_t *pair = nvlist_prev_nvpair(errors,
 			    more_errors);
-			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
+			fnvlist_remove_nvpair(errors, pair);
 			n++;
-			VERIFY(nvlist_size(*errors, &size,
-			    NV_ENCODE_NATIVE) == 0);
-		} while (size > zc->zc_nvlist_dst_size);
+			size = fnvlist_size(errors);
+		} while (size > max);
 
-		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
-		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
-		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
-		ASSERT(size <= zc->zc_nvlist_dst_size);
+		fnvlist_remove_nvpair(errors, more_errors);
+		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
+		ASSERT3U(fnvlist_size(errors), <=, max);
 	}
 
 	return (0);
 }
 
 static int
 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
 {
 	char *packed = NULL;
 	int error = 0;
 	size_t size;
 
-	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
+	size = fnvlist_size(nvl);
 
 	if (size > zc->zc_nvlist_dst_size) {
 		error = ENOMEM;
 	} else {
-		packed = kmem_alloc(size, KM_SLEEP | KM_NODEBUG);
-		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
-		    KM_SLEEP) == 0);
+		packed = fnvlist_pack(nvl, &size);
 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    size, zc->zc_iflags) != 0)
 			error = EFAULT;
-		kmem_free(packed, size);
+		fnvlist_pack_free(packed, size);
 	}
 
 	zc->zc_nvlist_dst_size = size;
+	zc->zc_nvlist_dst_filled = B_TRUE;
 	return (error);
 }
 
 static int
 get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(dsname, FTAG, &os);
 	if (error)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
 		return (EINVAL);
 	}
 
 	mutex_enter(&os->os_user_ptr_lock);
 	*zsbp = dmu_objset_get_user(os);
 	if (*zsbp && (*zsbp)->z_sb) {
 		atomic_inc(&((*zsbp)->z_sb->s_active));
 	} else {
 		error = ESRCH;
 	}
 	mutex_exit(&os->os_user_ptr_lock);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * Find a zfs_sb_t for a mounted filesystem, or create our own, in which
  * case its z_sb will be NULL, and it will be opened as the owner.
  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
  * which prevents all inode ops from running.
  */
 static int
 zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
 {
 	int error = 0;
 
 	if (get_zfs_sb(name, zsbp) != 0)
 		error = zfs_sb_create(name, zsbp);
 	if (error == 0) {
 		rrw_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER :
 		    RW_READER, tag);
 		if ((*zsbp)->z_unmounted) {
 			/*
 			 * XXX we could probably try again, since the unmounting
 			 * thread should be just about to disassociate the
 			 * objset from the zfsvfs.
 			 */
 			rrw_exit(&(*zsbp)->z_teardown_lock, tag);
 			return (EBUSY);
 		}
 	}
 	return (error);
 }
 
 static void
 zfs_sb_rele(zfs_sb_t *zsb, void *tag)
 {
 	rrw_exit(&zsb->z_teardown_lock, tag);
 
 	if (zsb->z_sb) {
 		deactivate_super(zsb->z_sb);
 	} else {
 		dmu_objset_disown(zsb->z_os, zsb);
 		zfs_sb_free(zsb);
 	}
 }
 
 static int
 zfs_ioc_pool_create(zfs_cmd_t *zc)
 {
 	int error;
 	nvlist_t *config, *props = NULL;
 	nvlist_t *rootprops = NULL;
 	nvlist_t *zplprops = NULL;
-	char *buf;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)))
 		return (error);
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		nvlist_free(config);
 		return (error);
 	}
 
 	if (props) {
 		nvlist_t *nvl = NULL;
 		uint64_t version = SPA_VERSION;
 
 		(void) nvlist_lookup_uint64(props,
 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
 			error = EINVAL;
 			goto pool_props_bad;
 		}
 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
 		if (nvl) {
 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
 			if (error != 0) {
 				nvlist_free(config);
 				nvlist_free(props);
 				return (error);
 			}
 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
 		}
 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops_root(version, rootprops,
 		    zplprops, NULL);
 		if (error)
 			goto pool_props_bad;
 	}
 
-	buf = history_str_get(zc);
-
-	error = spa_create(zc->zc_name, config, props, buf, zplprops);
+	error = spa_create(zc->zc_name, config, props, zplprops);
 
 	/*
 	 * Set the remaining root properties
 	 */
 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
 		(void) spa_destroy(zc->zc_name);
 
-	if (buf != NULL)
-		history_str_free(buf);
-
 pool_props_bad:
 	nvlist_free(rootprops);
 	nvlist_free(zplprops);
 	nvlist_free(config);
 	nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
 {
 	int error;
 	zfs_log_history(zc);
 	error = spa_destroy(zc->zc_name);
 	if (error == 0)
 		zvol_remove_minors(zc->zc_name);
 	return (error);
 }
 
 static int
 zfs_ioc_pool_import(zfs_cmd_t *zc)
 {
 	nvlist_t *config, *props = NULL;
 	uint64_t guid;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) != 0)
 		return (error);
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		nvlist_free(config);
 		return (error);
 	}
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
 	    guid != zc->zc_guid)
 		error = EINVAL;
 	else
 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
 
 	if (zc->zc_nvlist_dst != 0) {
 		int err;
 
 		if ((err = put_nvlist(zc, config)) != 0)
 			error = err;
 	}
 
 	nvlist_free(config);
 
 	if (props)
 		nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_export(zfs_cmd_t *zc)
 {
 	int error;
 	boolean_t force = (boolean_t)zc->zc_cookie;
 	boolean_t hardforce = (boolean_t)zc->zc_guid;
 
 	zfs_log_history(zc);
 	error = spa_export(zc->zc_name, NULL, force, hardforce);
 	if (error == 0)
 		zvol_remove_minors(zc->zc_name);
 	return (error);
 }
 
 static int
 zfs_ioc_pool_configs(zfs_cmd_t *zc)
 {
 	nvlist_t *configs;
 	int error;
 
 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
 		return (EEXIST);
 
 	error = put_nvlist(zc, configs);
 
 	nvlist_free(configs);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of the pool
  *
  * outputs:
  * zc_cookie		real errno
  * zc_nvlist_dst	config nvlist
  * zc_nvlist_dst_size	size of config nvlist
  */
 static int
 zfs_ioc_pool_stats(zfs_cmd_t *zc)
 {
 	nvlist_t *config;
 	int error;
 	int ret = 0;
 
 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
 	    sizeof (zc->zc_value));
 
 	if (config != NULL) {
 		ret = put_nvlist(zc, config);
 		nvlist_free(config);
 
 		/*
 		 * The config may be present even if 'error' is non-zero.
 		 * In this case we return success, and preserve the real errno
 		 * in 'zc_cookie'.
 		 */
 		zc->zc_cookie = error;
 	} else {
 		ret = error;
 	}
 
 	return (ret);
 }
 
 /*
  * Try to import the given pool, returning pool stats as appropriate so that
  * user land knows which devices are available and overall pool health.
  */
 static int
 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
 {
 	nvlist_t *tryconfig, *config;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &tryconfig)) != 0)
 		return (error);
 
 	config = spa_tryimport(tryconfig);
 
 	nvlist_free(tryconfig);
 
 	if (config == NULL)
 		return (EINVAL);
 
 	error = put_nvlist(zc, config);
 	nvlist_free(config);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name              name of the pool
  * zc_cookie            scan func (pool_scan_func_t)
  */
 static int
 zfs_ioc_pool_scan(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (zc->zc_cookie == POOL_SCAN_NONE)
 		error = spa_scan_stop(spa);
 	else
 		error = spa_scan(spa, zc->zc_cookie);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error == 0) {
 		spa_freeze(spa);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static int
 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (zc->zc_cookie < spa_version(spa) ||
 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
 		spa_close(spa, FTAG);
 		return (EINVAL);
 	}
 
 	spa_upgrade(spa, zc->zc_cookie);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *hist_buf;
 	uint64_t size;
 	int error;
 
 	if ((size = zc->zc_history_len) == 0)
 		return (EINVAL);
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
 		return (ENOTSUP);
 	}
 
 	hist_buf = vmem_alloc(size, KM_SLEEP);
 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
 	    &zc->zc_history_len, hist_buf)) == 0) {
 		error = ddi_copyout(hist_buf,
 		    (void *)(uintptr_t)zc->zc_history,
 		    zc->zc_history_len, zc->zc_iflags);
 	}
 
 	spa_close(spa, FTAG);
 	vmem_free(hist_buf, size);
 	return (error);
 }
 
 static int
 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error == 0) {
 		error = spa_change_guid(spa);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static int
 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
 {
 	int error;
 
 	if ((error = dsl_dsobj_to_dsname(zc->zc_name,zc->zc_obj,zc->zc_value)))
 		return (error);
 
 	return (0);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_obj		object to find
  *
  * outputs:
  * zc_value		name of object
  */
 static int
 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	/* XXX reading from objset not owned */
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
 		return (EINVAL);
 	}
 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 	    sizeof (zc->zc_value));
 	dmu_objset_rele(os, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_obj		object to find
  *
  * outputs:
  * zc_stat		stats on object
  * zc_value		path to object
  */
 static int
 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	/* XXX reading from objset not owned */
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele(os, FTAG);
 		return (EINVAL);
 	}
 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
 	    sizeof (zc->zc_value));
 	dmu_objset_rele(os, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_add(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	nvlist_t *config, **l2cache, **spares;
 	uint_t nl2cache = 0, nspares = 0;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config);
 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache);
 
 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares);
 
 	/*
 	 * A root pool with concatenated devices is not supported.
 	 * Thus, can not add a device to a root pool.
 	 *
 	 * Intent log device can not be added to a rootpool because
 	 * during mountroot, zil is replayed, a seperated log device
 	 * can not be accessed during the mountroot time.
 	 *
 	 * l2cache and spare devices are ok to be added to a rootpool.
 	 */
 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
 		nvlist_free(config);
 		spa_close(spa, FTAG);
 		return (EDOM);
 	}
 
 	if (error == 0) {
 		error = spa_vdev_add(spa, config);
 		nvlist_free(config);
 	}
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of the pool
  * zc_nvlist_conf	nvlist of devices to remove
  * zc_cookie		to stop the remove?
  */
 static int
 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 	switch (zc->zc_cookie) {
 	case VDEV_STATE_ONLINE:
 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
 		break;
 
 	case VDEV_STATE_OFFLINE:
 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	case VDEV_STATE_FAULTED:
 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 
 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	case VDEV_STATE_DEGRADED:
 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 
 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	zc->zc_cookie = newstate;
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int replacing = zc->zc_cookie;
 	nvlist_t *config;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) == 0) {
 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
 		nvlist_free(config);
 	}
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_split(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	nvlist_t *config, *props = NULL;
 	int error;
 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config))) {
 		spa_close(spa, FTAG);
 		return (error);
 	}
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		spa_close(spa, FTAG);
 		nvlist_free(config);
 		return (error);
 	}
 
 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
 
 	spa_close(spa, FTAG);
 
 	nvlist_free(config);
 	nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *path = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = spa_vdev_setpath(spa, guid, path);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *fru = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = spa_vdev_setfru(spa, guid, fru);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
 {
 	int error = 0;
 	nvlist_t *nv;
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
 		dmu_objset_stats(os, nv);
 		/*
 		 * NB: zvol_get_stats() will read the objset contents,
 		 * which we aren't supposed to do with a
 		 * DS_MODE_USER hold, because it could be
 		 * inconsistent.  So this is a bit of a workaround...
 		 * XXX reading with out owning
 		 */
 		if (!zc->zc_objset_stats.dds_inconsistent &&
 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
 			error = zvol_get_stats(os, nv);
 			if (error == EIO)
 				return (error);
 			VERIFY0(error);
 		}
 		if (error == 0)
 			error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_objset_stats(zfs_cmd_t *zc)
 {
 	objset_t *os = NULL;
 	int error;
 
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)))
 		return (error);
 
 	error = zfs_ioc_objset_stats_impl(zc, os);
 
 	dmu_objset_rele(os, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_nvlist_dst	received property nvlist
  * zc_nvlist_dst_size	size of received property nvlist
  *
  * Gets received properties (distinct from local properties on or after
  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
  * local property values.
  */
 static int
 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
 {
 	objset_t *os = NULL;
 	int error;
 	nvlist_t *nv;
 
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)))
 		return (error);
 
 	/*
 	 * Without this check, we would return local property values if the
 	 * caller has not already received properties on or after
 	 * SPA_VERSION_RECVD_PROPS.
 	 */
 	if (!dsl_prop_get_hasrecvd(os)) {
 		dmu_objset_rele(os, FTAG);
 		return (ENOTSUP);
 	}
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_received(os, &nv)) == 0) {
 		error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 static int
 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
 {
 	uint64_t value;
 	int error;
 
 	/*
 	 * zfs_get_zplprop() will either find a value or give us
 	 * the default value (if there is one).
 	 */
 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
 	return (0);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
  *
  * outputs:
  * zc_nvlist_dst	zpl property nvlist
  * zc_nvlist_dst_size	size of zpl property nvlist
  */
 static int
 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int err;
 
 	/* XXX reading without owning */
 	if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
 		return (err);
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	/*
 	 * NB: nvl_add_zplprop() will read the objset contents,
 	 * which we aren't supposed to do with a DS_MODE_USER
 	 * hold, because it could be inconsistent.
 	 */
 	if (zc->zc_nvlist_dst != 0 &&
 	    !zc->zc_objset_stats.dds_inconsistent &&
 	    dmu_objset_type(os) == DMU_OST_ZFS) {
 		nvlist_t *nv;
 
 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
 			err = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	} else {
 		err = ENOENT;
 	}
 	dmu_objset_rele(os, FTAG);
 	return (err);
 }
 
 static boolean_t
 dataset_name_hidden(const char *name)
 {
 	/*
 	 * Skip over datasets that are not visible in this zone,
 	 * internal datasets (which have a $ in their name), and
 	 * temporary datasets (which have a % in their name).
 	 */
 	if (strchr(name, '$') != NULL)
 		return (B_TRUE);
 	if (strchr(name, '%') != NULL)
 		return (B_TRUE);
 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
 		return (B_TRUE);
 	return (B_FALSE);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_name		name of next filesystem
  * zc_cookie		zap cursor
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 	char *p;
 	size_t orig_len = strlen(zc->zc_name);
 
 top:
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
 		if (error == ENOENT)
 			error = ESRCH;
 		return (error);
 	}
 
 	p = strrchr(zc->zc_name, '/');
 	if (p == NULL || p[1] != '\0')
 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
 	p = zc->zc_name + strlen(zc->zc_name);
 
 	/*
 	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
 	 * but is not declared void because its called by dmu_objset_find().
 	 */
 	if (zc->zc_cookie == 0) {
 		uint64_t cookie = 0;
 		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
 
 		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
 			if (!dataset_name_hidden(zc->zc_name))
 				(void) dmu_objset_prefetch(zc->zc_name, NULL);
 		}
 	}
 
 	do {
 		error = dmu_dir_list_next(os,
 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
 		    NULL, &zc->zc_cookie);
 		if (error == ENOENT)
 			error = ESRCH;
 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
 	dmu_objset_rele(os, FTAG);
 
 	/*
 	 * If it's an internal dataset (ie. with a '$' in its name),
 	 * don't try to get stats for it, otherwise we'll return ENOENT.
 	 */
 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 		if (error == ENOENT) {
 			/* We lost a race with destroy, get the next one. */
 			zc->zc_name[orig_len] = '\0';
 			goto top;
 		}
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_name		name of next snapshot
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 top:
 	if (zc->zc_cookie == 0 && !zc->zc_simple)
 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
 		    NULL, DS_FIND_SNAPSHOTS);
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error)
 		return (error == ENOENT ? ESRCH : error);
 
 	/*
 	 * A dataset name of maximum length cannot have any snapshots,
 	 * so exit immediately.
 	 */
 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
 		dmu_objset_rele(os, FTAG);
 		return (ESRCH);
 	}
 
 	error = dmu_snapshot_list_next(os,
 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
 	    NULL);
 
 	if (error == 0 && !zc->zc_simple) {
 		dsl_dataset_t *ds;
 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
 
 		/*
 		 * Since we probably don't have a hold on this snapshot,
 		 * it's possible that the objsetid could have been destroyed
 		 * and reused for a new objset. It's OK if this happens during
 		 * a zfs send operation, since the new createtxg will be
 		 * beyond the range we're interested in.
 		 */
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
 		rw_exit(&dp->dp_config_rwlock);
 		if (error) {
 			if (error == ENOENT) {
 				/* Racing with destroy, get the next one. */
 				*strchr(zc->zc_name, '@') = '\0';
 				dmu_objset_rele(os, FTAG);
 				goto top;
 			}
 		} else {
 			objset_t *ossnap;
 
 			error = dmu_objset_from_ds(ds, &ossnap);
 			if (error == 0)
 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
 			dsl_dataset_rele(ds, FTAG);
 		}
 	} else if (error == ENOENT) {
 		error = ESRCH;
 	}
 
 	dmu_objset_rele(os, FTAG);
 	/* if we failed, undo the @ that we tacked on to zc_name */
 	if (error)
 		*strchr(zc->zc_name, '@') = '\0';
 	return (error);
 }
 
 static int
 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
 {
 	const char *propname = nvpair_name(pair);
 	uint64_t *valary;
 	unsigned int vallen;
 	const char *domain;
 	char *dash;
 	zfs_userquota_prop_t type;
 	uint64_t rid;
 	uint64_t quota;
 	zfs_sb_t *zsb;
 	int err;
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) != 0)
 			return (EINVAL);
 	}
 
 	/*
 	 * A correctly constructed propname is encoded as
 	 * userquota@<rid>-<domain>.
 	 */
 	if ((dash = strchr(propname, '-')) == NULL ||
 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
 	    vallen != 3)
 		return (EINVAL);
 
 	domain = dash + 1;
 	type = valary[0];
 	rid = valary[1];
 	quota = valary[2];
 
 	err = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
 	if (err == 0) {
 		err = zfs_set_userquota(zsb, type, domain, rid, quota);
 		zfs_sb_rele(zsb, FTAG);
 	}
 
 	return (err);
 }
 
 /*
  * If the named property is one that has a special function to set its value,
  * return 0 on success and a positive error code on failure; otherwise if it is
  * not one of the special properties handled by this function, return -1.
  *
  * XXX: It would be better for callers of the property interface if we handled
  * these special cases in dsl_prop.c (in the dsl layer).
  */
 static int
 zfs_prop_set_special(const char *dsname, zprop_source_t source,
     nvpair_t *pair)
 {
 	const char *propname = nvpair_name(pair);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval;
 	int err;
 
 	if (prop == ZPROP_INVAL) {
 		if (zfs_prop_userquota(propname))
 			return (zfs_prop_set_userquota(dsname, pair));
 		return (-1);
 	}
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) == 0);
 	}
 
 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
 		return (-1);
 
 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
 
 	switch (prop) {
 	case ZFS_PROP_QUOTA:
 		err = dsl_dir_set_quota(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFQUOTA:
 		err = dsl_dataset_set_quota(dsname, source, intval);
 		break;
 	case ZFS_PROP_RESERVATION:
 		err = dsl_dir_set_reservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFRESERVATION:
 		err = dsl_dataset_set_reservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_VOLSIZE:
 		err = zvol_set_volsize(dsname, intval);
 		break;
 	case ZFS_PROP_SNAPDEV:
 		err = zvol_set_snapdev(dsname, intval);
 		break;
 	case ZFS_PROP_VERSION:
 	{
 		zfs_sb_t *zsb;
 
 		if ((err = zfs_sb_hold(dsname, FTAG, &zsb, B_TRUE)) != 0)
 			break;
 
 		err = zfs_set_version(zsb, intval);
 		zfs_sb_rele(zsb, FTAG);
 
 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
 			zfs_cmd_t *zc;
 
 			zc = kmem_zalloc(sizeof (zfs_cmd_t),
 			    KM_SLEEP | KM_NODEBUG);
 			(void) strcpy(zc->zc_name, dsname);
 			(void) zfs_ioc_userspace_upgrade(zc);
 			kmem_free(zc, sizeof (zfs_cmd_t));
 		}
 		break;
 	}
 	case ZFS_PROP_COMPRESSION:
 	{
 		if (intval == ZIO_COMPRESS_LZ4) {
 			zfeature_info_t *feature =
 			    &spa_feature_table[SPA_FEATURE_LZ4_COMPRESS];
 			spa_t *spa;
 			dsl_pool_t *dp;
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
 			dp = spa->spa_dsl_pool;
 
 			/*
 			 * Setting the LZ4 compression algorithm activates
 			 * the feature.
 			 */
 			if (!spa_feature_is_active(spa, feature)) {
 				if ((err = zfs_prop_activate_feature(dp,
 				    feature)) != 0) {
 					spa_close(spa, FTAG);
 					return (err);
 				}
 			}
 
 			spa_close(spa, FTAG);
 		}
 		/*
 		 * We still want the default set action to be performed in the
 		 * caller, we only performed zfeature settings here.
 		 */
 		err = -1;
 		break;
 	}
 
 	default:
 		err = -1;
 	}
 
 	return (err);
 }
 
 /*
  * This function is best effort. If it fails to set any of the given properties,
- * it continues to set as many as it can and returns the first error
- * encountered. If the caller provides a non-NULL errlist, it also gives the
- * complete list of names of all the properties it failed to set along with the
- * corresponding error numbers. The caller is responsible for freeing the
- * returned errlist.
+ * it continues to set as many as it can and returns the last error
+ * encountered. If the caller provides a non-NULL errlist, it will be filled in
+ * with the list of names of all the properties that failed along with the
+ * corresponding error numbers.
  *
- * If every property is set successfully, zero is returned and the list pointed
- * at by errlist is NULL.
+ * If every property is set successfully, zero is returned and errlist is not
+ * modified.
  */
 int
 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
-    nvlist_t **errlist)
+    nvlist_t *errlist)
 {
 	nvpair_t *pair;
 	nvpair_t *propval;
 	int rv = 0;
 	uint64_t intval;
 	char *strval;
-	nvlist_t *genericnvl;
-	nvlist_t *errors;
-	nvlist_t *retrynvl;
-
-	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
+	nvlist_t *genericnvl = fnvlist_alloc();
+	nvlist_t *retrynvl = fnvlist_alloc();
 retry:
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		int err = 0;
 
 		/* decode the property value */
 		propval = pair;
 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 			nvlist_t *attrs;
-			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
+			attrs = fnvpair_value_nvlist(pair);
 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 			    &propval) != 0)
 				err = EINVAL;
 		}
 
 		/* Validate value type */
 		if (err == 0 && prop == ZPROP_INVAL) {
 			if (zfs_prop_user(propname)) {
 				if (nvpair_type(propval) != DATA_TYPE_STRING)
 					err = EINVAL;
 			} else if (zfs_prop_userquota(propname)) {
 				if (nvpair_type(propval) !=
 				    DATA_TYPE_UINT64_ARRAY)
 					err = EINVAL;
 			} else {
 				err = EINVAL;
 			}
 		} else if (err == 0) {
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
 					err = EINVAL;
 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
 				const char *unused;
 
-				VERIFY(nvpair_value_uint64(propval,
-				    &intval) == 0);
+				intval = fnvpair_value_uint64(propval);
 
 				switch (zfs_prop_get_type(prop)) {
 				case PROP_TYPE_NUMBER:
 					break;
 				case PROP_TYPE_STRING:
 					err = EINVAL;
 					break;
 				case PROP_TYPE_INDEX:
 					if (zfs_prop_index_to_string(prop,
 					    intval, &unused) != 0)
 						err = EINVAL;
 					break;
 				default:
 					cmn_err(CE_PANIC,
 					    "unknown property type");
 				}
 			} else {
 				err = EINVAL;
 			}
 		}
 
 		/* Validate permissions */
 		if (err == 0)
 			err = zfs_check_settable(dsname, pair, CRED());
 
 		if (err == 0) {
 			err = zfs_prop_set_special(dsname, source, pair);
 			if (err == -1) {
 				/*
 				 * For better performance we build up a list of
 				 * properties to set in a single transaction.
 				 */
 				err = nvlist_add_nvpair(genericnvl, pair);
 			} else if (err != 0 && nvl != retrynvl) {
 				/*
 				 * This may be a spurious error caused by
 				 * receiving quota and reservation out of order.
 				 * Try again in a second pass.
 				 */
 				err = nvlist_add_nvpair(retrynvl, pair);
 			}
 		}
 
-		if (err != 0)
-			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
+		if (err != 0) {
+			if (errlist != NULL)
+				fnvlist_add_int32(errlist, propname, err);
+			rv = err;
+		}
 	}
 
 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
 		nvl = retrynvl;
 		goto retry;
 	}
 
 	if (!nvlist_empty(genericnvl) &&
 	    dsl_props_set(dsname, source, genericnvl) != 0) {
 		/*
 		 * If this fails, we still want to set as many properties as we
 		 * can, so try setting them individually.
 		 */
 		pair = NULL;
 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
 			const char *propname = nvpair_name(pair);
 			int err = 0;
 
 			propval = pair;
 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 				nvlist_t *attrs;
-				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
-				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
-				    &propval) == 0);
+				attrs = fnvpair_value_nvlist(pair);
+				propval = fnvlist_lookup_nvpair(attrs,
+				    ZPROP_VALUE);
 			}
 
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
-				VERIFY(nvpair_value_string(propval,
-				    &strval) == 0);
+				strval = fnvpair_value_string(propval);
 				err = dsl_prop_set(dsname, propname, source, 1,
 				    strlen(strval) + 1, strval);
 			} else {
-				VERIFY(nvpair_value_uint64(propval,
-				    &intval) == 0);
+				intval = fnvpair_value_uint64(propval);
 				err = dsl_prop_set(dsname, propname, source, 8,
 				    1, &intval);
 			}
 
 			if (err != 0) {
-				VERIFY(nvlist_add_int32(errors, propname,
-				    err) == 0);
+				if (errlist != NULL) {
+					fnvlist_add_int32(errlist, propname,
+					    err);
+				}
+				rv = err;
 			}
 		}
 	}
 	nvlist_free(genericnvl);
 	nvlist_free(retrynvl);
 
-	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
-		nvlist_free(errors);
-		errors = NULL;
-	} else {
-		VERIFY(nvpair_value_int32(pair, &rv) == 0);
-	}
-
-	if (errlist == NULL)
-		nvlist_free(errors);
-	else
-		*errlist = errors;
-
 	return (rv);
 }
 
 /*
  * Check that all the properties are valid user properties.
  */
 static int
-zfs_check_userprops(char *fsname, nvlist_t *nvl)
+zfs_check_userprops(const char *fsname, nvlist_t *nvl)
 {
 	nvpair_t *pair = NULL;
 	int error = 0;
 
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 		char *valstr;
 
 		if (!zfs_prop_user(propname) ||
 		    nvpair_type(pair) != DATA_TYPE_STRING)
 			return (EINVAL);
 
 		if ((error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_USERPROP, CRED())))
 			return (error);
 
 		if (strlen(propname) >= ZAP_MAXNAMELEN)
 			return (ENAMETOOLONG);
 
 		VERIFY(nvpair_value_string(pair, &valstr) == 0);
 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
 			return (E2BIG);
 	}
 	return (0);
 }
 
 static void
 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
 {
 	nvpair_t *pair;
 
 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
 		if (nvlist_exists(skipped, nvpair_name(pair)))
 			continue;
 
 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
 	}
 }
 
 static int
 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
     nvlist_t *skipped)
 {
 	int err = 0;
 	nvlist_t *cleared_props = NULL;
 	props_skip(props, skipped, &cleared_props);
 	if (!nvlist_empty(cleared_props)) {
 		/*
 		 * Acts on local properties until the dataset has received
 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
 		 */
 		zprop_source_t flags = (ZPROP_SRC_NONE |
 		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
 		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
 	}
 	nvlist_free(cleared_props);
 	return (err);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		name of property to set
  * zc_nvlist_src{_size}	nvlist of properties to apply
  * zc_cookie		received properties flag
  *
  * outputs:
  * zc_nvlist_dst{_size} error for each unapplied received property
  */
 static int
 zfs_ioc_set_prop(zfs_cmd_t *zc)
 {
 	nvlist_t *nvl;
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
 	    ZPROP_SRC_LOCAL);
-	nvlist_t *errors = NULL;
+	nvlist_t *errors;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &nvl)) != 0)
 		return (error);
 
 	if (received) {
 		nvlist_t *origprops;
 		objset_t *os;
 
 		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
 			if (dsl_prop_get_received(os, &origprops) == 0) {
 				(void) clear_received_props(os,
 				    zc->zc_name, origprops, nvl);
 				nvlist_free(origprops);
 			}
 
 			dsl_prop_set_hasrecvd(os);
 			dmu_objset_rele(os, FTAG);
 		}
 	}
 
-	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
+	errors = fnvlist_alloc();
+	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
 
 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
 		(void) put_nvlist(zc, errors);
 	}
 
 	nvlist_free(errors);
 	nvlist_free(nvl);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		name of property to inherit
  * zc_cookie		revert to received value if TRUE
  *
  * outputs:		none
  */
 static int
 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
 {
 	const char *propname = zc->zc_value;
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received
 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
 
 	if (received) {
 		nvlist_t *dummy;
 		nvpair_t *pair;
 		zprop_type_t type;
 		int err;
 
 		/*
 		 * zfs_prop_set_special() expects properties in the form of an
 		 * nvpair with type info.
 		 */
 		if (prop == ZPROP_INVAL) {
 			if (!zfs_prop_user(propname))
 				return (EINVAL);
 
 			type = PROP_TYPE_STRING;
 		} else if (prop == ZFS_PROP_VOLSIZE ||
 		    prop == ZFS_PROP_VERSION) {
 			return (EINVAL);
 		} else {
 			type = zfs_prop_get_type(prop);
 		}
 
 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 		switch (type) {
 		case PROP_TYPE_STRING:
 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
 			break;
 		case PROP_TYPE_NUMBER:
 		case PROP_TYPE_INDEX:
 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
 			break;
 		default:
 			nvlist_free(dummy);
 			return (EINVAL);
 		}
 
 		pair = nvlist_next_nvpair(dummy, NULL);
 		err = zfs_prop_set_special(zc->zc_name, source, pair);
 		nvlist_free(dummy);
 		if (err != -1)
 			return (err); /* special property already handled */
 	} else {
 		/*
 		 * Only check this in the non-received case. We want to allow
 		 * 'inherit -S' to revert non-inheritable properties like quota
 		 * and reservation to the received or default values even though
 		 * they are not considered inheritable.
 		 */
 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
 			return (EINVAL);
 	}
 
-	/* the property name has been validated by zfs_secpolicy_inherit() */
+	/* property name has been validated by zfs_secpolicy_inherit_prop() */
 	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
 }
 
 static int
 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
 {
 	nvlist_t *props;
 	spa_t *spa;
 	int error;
 	nvpair_t *pair;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props)))
 		return (error);
 
 	/*
 	 * If the only property is the configfile, then just do a spa_lookup()
 	 * to handle the faulted case.
 	 */
 	pair = nvlist_next_nvpair(props, NULL);
 	if (pair != NULL && strcmp(nvpair_name(pair),
 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
 	    nvlist_next_nvpair(props, pair) == NULL) {
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
 			spa_configfile_set(spa, props, B_FALSE);
 			spa_config_sync(spa, B_FALSE, B_TRUE);
 		}
 		mutex_exit(&spa_namespace_lock);
 		if (spa != NULL) {
 			nvlist_free(props);
 			return (0);
 		}
 	}
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 		nvlist_free(props);
 		return (error);
 	}
 
 	error = spa_prop_set(spa, props);
 
 	nvlist_free(props);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	nvlist_t *nvp = NULL;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 		/*
 		 * If the pool is faulted, there may be properties we can still
 		 * get (such as altroot and cachefile), so attempt to get them
 		 * anyway.
 		 */
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
 			error = spa_prop_get(spa, &nvp);
 		mutex_exit(&spa_namespace_lock);
 	} else {
 		error = spa_prop_get(spa, &nvp);
 		spa_close(spa, FTAG);
 	}
 
 	if (error == 0 && zc->zc_nvlist_dst != 0)
 		error = put_nvlist(zc, nvp);
 	else
 		error = EFAULT;
 
 	nvlist_free(nvp);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name              name of volume
  *
  * outputs:             none
  */
 static int
 zfs_ioc_create_minor(zfs_cmd_t *zc)
 {
 	return (zvol_create_minor(zc->zc_name));
 }
 
 /*
  * inputs:
  * zc_name              name of volume
  *
  * outputs:             none
  */
 static int
 zfs_ioc_remove_minor(zfs_cmd_t *zc)
 {
 	return (zvol_remove_minor(zc->zc_name));
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_src{_size}	nvlist of delegated permissions
  * zc_perm_action	allow/unallow flag
  *
  * outputs:		none
  */
 static int
 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
 {
 	int error;
 	nvlist_t *fsaclnv = NULL;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &fsaclnv)) != 0)
 		return (error);
 
 	/*
 	 * Verify nvlist is constructed correctly
 	 */
 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
 		nvlist_free(fsaclnv);
 		return (EINVAL);
 	}
 
 	/*
 	 * If we don't have PRIV_SYS_MOUNT, then validate
 	 * that user is allowed to hand out each permission in
 	 * the nvlist(s)
 	 */
 
 	error = secpolicy_zfs(CRED());
 	if (error) {
 		if (zc->zc_perm_action == B_FALSE) {
 			error = dsl_deleg_can_allow(zc->zc_name,
 			    fsaclnv, CRED());
 		} else {
 			error = dsl_deleg_can_unallow(zc->zc_name,
 			    fsaclnv, CRED());
 		}
 	}
 
 	if (error == 0)
 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
 
 	nvlist_free(fsaclnv);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * zc_nvlist_src{_size}	nvlist of delegated permissions
  */
 static int
 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
 {
 	nvlist_t *nvp;
 	int error;
 
 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
 		error = put_nvlist(zc, nvp);
 		nvlist_free(nvp);
 	}
 
 	return (error);
 }
 
 /* ARGSUSED */
 static void
 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 {
 	zfs_creat_t *zct = arg;
 
 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
 }
 
 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
 
 /*
  * inputs:
  * createprops		list of properties requested by creator
  * default_zplver	zpl version to use if unspecified in createprops
  * fuids_ok		fuids allowed in this version of the spa?
  * os			parent objset pointer (NULL if root fs)
  *
  * outputs:
  * zplprops	values for the zplprops we attach to the master node object
  * is_ci	true if requested file system will be purely case-insensitive
  *
  * Determine the settings for utf8only, normalization and
  * casesensitivity.  Specific values may have been requested by the
  * creator and/or we can inherit values from the parent dataset.  If
  * the file system is of too early a vintage, a creator can not
  * request settings for these properties, even if the requested
  * setting is the default value.  We don't actually want to create dsl
  * properties for these, so remove them from the source nvlist after
  * processing.
  */
 static int
 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	uint64_t sense = ZFS_PROP_UNDEFINED;
 	uint64_t norm = ZFS_PROP_UNDEFINED;
 	uint64_t u8 = ZFS_PROP_UNDEFINED;
 	int error;
 
 	ASSERT(zplprops != NULL);
 
 	/*
 	 * Pull out creator prop choices, if any.
 	 */
 	if (createprops) {
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_CASE));
 	}
 
 	/*
 	 * If the zpl version requested is whacky or the file system
 	 * or pool is version is too "young" to support normalization
 	 * and the creator tried to set a value for one of the props,
 	 * error out.
 	 */
 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
 	    (zplver < ZPL_VERSION_NORMALIZATION &&
 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
 	    sense != ZFS_PROP_UNDEFINED)))
 		return (ENOTSUP);
 
 	/*
 	 * Put the version in the zplprops
 	 */
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
 
 	if (norm == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
 
 	/*
 	 * If we're normalizing, names must always be valid UTF-8 strings.
 	 */
 	if (norm)
 		u8 = 1;
 	if (u8 == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
 
 	if (sense == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
 
 	if (is_ci)
 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
 
 	return (0);
 }
 
 static int
 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	boolean_t fuids_ok, sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	objset_t *os = NULL;
 	char parentname[MAXNAMELEN];
 	char *cp;
 	spa_t *spa;
 	uint64_t spa_vers;
 	int error;
 
 	(void) strlcpy(parentname, dataset, sizeof (parentname));
 	cp = strrchr(parentname, '/');
 	ASSERT(cp != NULL);
 	cp[0] = '\0';
 
 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
 		return (error);
 
 	spa_vers = spa_version(spa);
 	spa_close(spa, FTAG);
 
 	zplver = zfs_zpl_version_map(spa_vers);
 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
 	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	/*
 	 * Open parent object set so we can inherit zplprop values.
 	 */
 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
 		return (error);
 
 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
 	    zplprops, is_ci);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 static int
 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	boolean_t fuids_ok;
 	boolean_t sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	int error;
 
 	zplver = zfs_zpl_version_map(spa_vers);
 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
 	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
 	    createprops, zplprops, is_ci);
 	return (error);
 }
 
 /*
- * inputs:
- * zc_objset_type	type of objset to create (fs vs zvol)
- * zc_name		name of new objset
- * zc_value		name of snapshot to clone from (may be empty)
- * zc_nvlist_src{_size}	nvlist of properties to apply
+ * innvl: {
+ *     "type" -> dmu_objset_type_t (int32)
+ *     (optional) "props" -> { prop -> value }
+ * }
  *
- * outputs: none
+ * outnvl: propname -> error code (int32)
  */
 static int
-zfs_ioc_create(zfs_cmd_t *zc)
+zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	objset_t *clone;
 	int error = 0;
-	zfs_creat_t zct;
+	zfs_creat_t zct = { 0 };
 	nvlist_t *nvprops = NULL;
 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
-	dmu_objset_type_t type = zc->zc_objset_type;
+	int32_t type32;
+	dmu_objset_type_t type;
+	boolean_t is_insensitive = B_FALSE;
 
-	switch (type) {
+	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
+		return (EINVAL);
+	type = type32;
+	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 
+	switch (type) {
 	case DMU_OST_ZFS:
 		cbfunc = zfs_create_cb;
 		break;
 
 	case DMU_OST_ZVOL:
 		cbfunc = zvol_create_cb;
 		break;
 
 	default:
 		cbfunc = NULL;
 		break;
 	}
-	if (strchr(zc->zc_name, '@') ||
-	    strchr(zc->zc_name, '%'))
+	if (strchr(fsname, '@') ||
+	    strchr(fsname, '%'))
 		return (EINVAL);
 
-	if (zc->zc_nvlist_src != 0 &&
-	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
-	    zc->zc_iflags, &nvprops)) != 0)
-		return (error);
-
-	zct.zct_zplprops = NULL;
 	zct.zct_props = nvprops;
 
-	if (zc->zc_value[0] != '\0') {
-		/*
-		 * We're creating a clone of an existing snapshot.
-		 */
-		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
-		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
-			nvlist_free(nvprops);
-			return (EINVAL);
-		}
-
-		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
-		if (error) {
-			nvlist_free(nvprops);
-			return (error);
-		}
+	if (cbfunc == NULL)
+		return (EINVAL);
 
-		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
-		dmu_objset_rele(clone, FTAG);
-		if (error) {
-			nvlist_free(nvprops);
-			return (error);
-		}
-	} else {
-		boolean_t is_insensitive = B_FALSE;
+	if (type == DMU_OST_ZVOL) {
+		uint64_t volsize, volblocksize;
 
-		if (cbfunc == NULL) {
-			nvlist_free(nvprops);
+		if (nvprops == NULL)
+			return (EINVAL);
+		if (nvlist_lookup_uint64(nvprops,
+		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
 			return (EINVAL);
-		}
-
-		if (type == DMU_OST_ZVOL) {
-			uint64_t volsize, volblocksize;
-
-			if (nvprops == NULL ||
-			    nvlist_lookup_uint64(nvprops,
-			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
-			    &volsize) != 0) {
-				nvlist_free(nvprops);
-				return (EINVAL);
-			}
 
-			if ((error = nvlist_lookup_uint64(nvprops,
-			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
-			    &volblocksize)) != 0 && error != ENOENT) {
-				nvlist_free(nvprops);
-				return (EINVAL);
-			}
+		if ((error = nvlist_lookup_uint64(nvprops,
+		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
+		    &volblocksize)) != 0 && error != ENOENT)
+			return (EINVAL);
 
-			if (error != 0)
-				volblocksize = zfs_prop_default_numeric(
-				    ZFS_PROP_VOLBLOCKSIZE);
+		if (error != 0)
+			volblocksize = zfs_prop_default_numeric(
+			    ZFS_PROP_VOLBLOCKSIZE);
 
-			if ((error = zvol_check_volblocksize(
-			    volblocksize)) != 0 ||
-			    (error = zvol_check_volsize(volsize,
-			    volblocksize)) != 0) {
-				nvlist_free(nvprops);
-				return (error);
-			}
-		} else if (type == DMU_OST_ZFS) {
-			int error;
+		if ((error = zvol_check_volblocksize(
+		    volblocksize)) != 0 ||
+		    (error = zvol_check_volsize(volsize,
+		    volblocksize)) != 0)
+			return (error);
+	} else if (type == DMU_OST_ZFS) {
+		int error;
 
-			/*
-			 * We have to have normalization and
-			 * case-folding flags correct when we do the
-			 * file system creation, so go figure them out
-			 * now.
-			 */
-			VERIFY(nvlist_alloc(&zct.zct_zplprops,
-			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
-			error = zfs_fill_zplprops(zc->zc_name, nvprops,
-			    zct.zct_zplprops, &is_insensitive);
-			if (error != 0) {
-				nvlist_free(nvprops);
-				nvlist_free(zct.zct_zplprops);
-				return (error);
-			}
+		/*
+		 * We have to have normalization and
+		 * case-folding flags correct when we do the
+		 * file system creation, so go figure them out
+		 * now.
+		 */
+		VERIFY(nvlist_alloc(&zct.zct_zplprops,
+		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
+		error = zfs_fill_zplprops(fsname, nvprops,
+		    zct.zct_zplprops, &is_insensitive);
+		if (error != 0) {
+			nvlist_free(zct.zct_zplprops);
+			return (error);
 		}
-		error = dmu_objset_create(zc->zc_name, type,
-		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
-		nvlist_free(zct.zct_zplprops);
 	}
 
+	error = dmu_objset_create(fsname, type,
+	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
+	nvlist_free(zct.zct_zplprops);
+
 	/*
 	 * It would be nice to do this atomically.
 	 */
 	if (error == 0) {
-		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
-		    nvprops, NULL);
+		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
+		    nvprops, outnvl);
 		if (error != 0)
-			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
+			(void) dmu_objset_destroy(fsname, B_FALSE);
 	}
-	nvlist_free(nvprops);
 	return (error);
 }
 
 /*
- * inputs:
- * zc_name	name of filesystem
- * zc_value	short name of snapshot
- * zc_cookie	recursive flag
- * zc_nvlist_src[_size] property list
+ * innvl: {
+ *     "origin" -> name of origin snapshot
+ *     (optional) "props" -> { prop -> value }
+ * }
  *
  * outputs:
- * zc_value	short snapname (i.e. part after the '@')
+ * outnvl: propname -> error code (int32)
  */
 static int
-zfs_ioc_snapshot(zfs_cmd_t *zc)
+zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
+	int error = 0;
 	nvlist_t *nvprops = NULL;
-	int error;
-	boolean_t recursive = zc->zc_cookie;
+	char *origin_name;
+	dsl_dataset_t *origin;
 
-	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
+	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
 		return (EINVAL);
+	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 
-	if (zc->zc_nvlist_src != 0 &&
-	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
-	    zc->zc_iflags, &nvprops)) != 0)
+	if (strchr(fsname, '@') ||
+	    strchr(fsname, '%'))
+		return (EINVAL);
+
+	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
+		return (EINVAL);
+
+	error = dsl_dataset_hold(origin_name, FTAG, &origin);
+	if (error)
 		return (error);
 
-	error = zfs_check_userprops(zc->zc_name, nvprops);
+	error = dmu_objset_clone(fsname, origin, 0);
+	dsl_dataset_rele(origin, FTAG);
 	if (error)
-		goto out;
+		return (error);
 
-	if (!nvlist_empty(nvprops) &&
-	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
-		error = ENOTSUP;
-		goto out;
+	/*
+	 * It would be nice to do this atomically.
+	 */
+	if (error == 0) {
+		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
+		    nvprops, outnvl);
+		if (error != 0)
+			(void) dmu_objset_destroy(fsname, B_FALSE);
 	}
+	return (error);
+}
 
-	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
-	    nvprops, recursive, B_FALSE, -1);
+/*
+ * innvl: {
+ *     "snaps" -> { snapshot1, snapshot2 }
+ *     (optional) "props" -> { prop -> value (string) }
+ * }
+ *
+ * outnvl: snapshot -> error code (int32)
+ *
+ */
+static int
+zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	nvlist_t *snaps;
+	nvlist_t *props = NULL;
+	int error, poollen;
+	nvpair_t *pair, *pair2;
 
-out:
-	nvlist_free(nvprops);
+	(void) nvlist_lookup_nvlist(innvl, "props", &props);
+	if ((error = zfs_check_userprops(poolname, props)) != 0)
+		return (error);
+
+	if (!nvlist_empty(props) &&
+	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
+		return (ENOTSUP);
+
+	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+		return (EINVAL);
+	poollen = strlen(poolname);
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(snaps, pair)) {
+		const char *name = nvpair_name(pair);
+		const char *cp = strchr(name, '@');
+
+		/*
+		 * The snap name must contain an @, and the part after it must
+		 * contain only valid characters.
+		 */
+		if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0)
+			return (EINVAL);
+
+		/*
+		 * The snap must be in the specified pool.
+		 */
+		if (strncmp(name, poolname, poollen) != 0 ||
+		    (name[poollen] != '/' && name[poollen] != '@'))
+			return (EXDEV);
+
+		/* This must be the only snap of this fs. */
+		for (pair2 = nvlist_next_nvpair(snaps, pair);
+		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
+			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
+			    == 0) {
+				return (EXDEV);
+			}
+		}
+	}
+
+	error = dmu_objset_snapshot(snaps, props, outnvl);
+	return (error);
+}
+
+/*
+ * innvl: "message" -> string
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	char *message;
+	spa_t *spa;
+	int error;
+	char *poolname;
+
+	/*
+	 * The poolname in the ioctl is not set, we get it from the TSD,
+	 * which was set at the end of the last successful ioctl that allows
+	 * logging.  The secpolicy func already checked that it is set.
+	 * Only one log ioctl is allowed after each successful ioctl, so
+	 * we clear the TSD here.
+	 */
+	poolname = tsd_get(zfs_allow_log_key);
+	(void) tsd_set(zfs_allow_log_key, NULL);
+	error = spa_open(poolname, &spa, FTAG);
+	strfree(poolname);
+	if (error != 0)
+		return (error);
+
+	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
+		spa_close(spa, FTAG);
+		return (EINVAL);
+	}
+
+	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
+		spa_close(spa, FTAG);
+		return (ENOTSUP);
+	}
+
+	error = spa_history_log(spa, message);
+	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * name		dataset name, or when 'arg == NULL' the full snapshot name
  * arg		short snapshot name (i.e. part after the '@')
  */
+/* ARGSUSED */
 int
 zfs_unmount_snap(const char *name, void *arg)
 {
 	zfs_sb_t *zsb = NULL;
 	char *dsname;
 	char *snapname;
 	char *fullname;
 	char *ptr;
 	int error;
 
-	if (arg) {
-		dsname = strdup(name);
-		snapname = strdup(arg);
-	} else {
-		ptr = strchr(name, '@');
-		if (ptr) {
-			dsname = strdup(name);
-			dsname[ptr - name] = '\0';
-			snapname = strdup(ptr + 1);
-		} else {
-			return (0);
-		}
-	}
+	if ((ptr = strchr(name, '@')) == NULL)
+		return (0);
 
+	dsname = strdup(name);
+	dsname[ptr - name] = '\0';
+	snapname = strdup(ptr + 1);
 	fullname = kmem_asprintf("%s@%s", dsname, snapname);
-
 	error = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
 	if (error == 0) {
 		error = zfsctl_unmount_snapshot(zsb, fullname, MNT_FORCE);
 		zfs_sb_rele(zsb, FTAG);
 
 		/* Allow ENOENT for consistency with upstream */
 		if (error == ENOENT)
 			error = 0;
 	}
 
 	strfree(dsname);
 	strfree(snapname);
 	strfree(fullname);
 
 	return (error);
 }
 
 /*
- * inputs:
- * zc_name		name of filesystem, snaps must be under it
- * zc_nvlist_src[_size]	full names of snapshots to destroy
- * zc_defer_destroy	mark for deferred destroy
+ * innvl: {
+ *     "snaps" -> { snapshot1, snapshot2 }
+ *     (optional boolean) "defer"
+ * }
  *
- * outputs:
- * zc_name		on failure, name of failed snapshot
+ * outnvl: snapshot -> error code (int32)
  */
 static int
-zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
+zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
-	int err, len;
-	nvlist_t *nvl;
+	int poollen;
+	nvlist_t *snaps;
 	nvpair_t *pair;
+	boolean_t defer;
 
-	if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
-	    zc->zc_iflags, &nvl)) != 0)
-		return (err);
+	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
+		return (EINVAL);
+	defer = nvlist_exists(innvl, "defer");
 
-	len = strlen(zc->zc_name);
-	for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
-	    pair = nvlist_next_nvpair(nvl, pair)) {
+	poollen = strlen(poolname);
+	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
+	    pair = nvlist_next_nvpair(snaps, pair)) {
 		const char *name = nvpair_name(pair);
+
 		/*
-		 * The snap name must be underneath the zc_name.  This ensures
-		 * that our permission checks were legitimate.
+		 * The snap must be in the specified pool.
 		 */
-		if (strncmp(zc->zc_name, name, len) != 0 ||
-		    (name[len] != '@' && name[len] != '/')) {
-			nvlist_free(nvl);
-			return (EINVAL);
-		}
+		if (strncmp(name, poolname, poollen) != 0 ||
+		    (name[poollen] != '/' && name[poollen] != '@'))
+			return (EXDEV);
 
+		/*
+		 * Ignore failures to unmount; dmu_snapshots_destroy_nvl()
+		 * will deal with this gracefully (by filling in outnvl).
+		 */
 		(void) zfs_unmount_snap(name, NULL);
 		(void) zvol_remove_minor(name);
 	}
 
-	err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
-	    zc->zc_name);
-	nvlist_free(nvl);
-	return (err);
+	return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl));
 }
 
 /*
  * inputs:
  * zc_name		name of dataset to destroy
  * zc_objset_type	type of objset
  * zc_defer_destroy	mark for deferred destroy
  *
  * outputs:		none
  */
 static int
 zfs_ioc_destroy(zfs_cmd_t *zc)
 {
 	int err;
 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
 		err = zfs_unmount_snap(zc->zc_name, NULL);
 		if (err)
 			return (err);
 	}
 
 	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
 		(void) zvol_remove_minor(zc->zc_name);
 	return (err);
 }
 
 /*
  * inputs:
  * zc_name	name of dataset to rollback (to most recent snapshot)
  *
  * outputs:	none
  */
 static int
 zfs_ioc_rollback(zfs_cmd_t *zc)
 {
 	dsl_dataset_t *ds, *clone;
 	int error;
 	zfs_sb_t *zsb;
 	char *clone_name;
 
 	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
 	if (error)
 		return (error);
 
 	/* must not be a snapshot */
 	if (dsl_dataset_is_snapshot(ds)) {
 		dsl_dataset_rele(ds, FTAG);
 		return (EINVAL);
 	}
 
 	/* must have a most recent snapshot */
 	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
 		dsl_dataset_rele(ds, FTAG);
 		return (EINVAL);
 	}
 
 	/*
 	 * Create clone of most recent snapshot.
 	 */
 	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
 	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
 	if (error)
 		goto out;
 
 	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
 	if (error)
 		goto out;
 
 	/*
 	 * Do clone swap.
 	 */
 	if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
 		error = zfs_suspend_fs(zsb);
 		if (error == 0) {
 			int resume_err;
 
 			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
 				error = dsl_dataset_clone_swap(clone, ds,
 				    B_TRUE);
 				dsl_dataset_disown(ds, FTAG);
 				ds = NULL;
 			} else {
 				error = EBUSY;
 			}
 			resume_err = zfs_resume_fs(zsb, zc->zc_name);
 			error = error ? error : resume_err;
 		}
 		deactivate_super(zsb->z_sb);
 	} else {
 		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
 			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
 			dsl_dataset_disown(ds, FTAG);
 			ds = NULL;
 		} else {
 			error = EBUSY;
 		}
 	}
 
 	/*
 	 * Destroy clone (which also closes it).
 	 */
 	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
 
 out:
 	strfree(clone_name);
 	if (ds)
 		dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name	old name of dataset
  * zc_value	new name of dataset
  * zc_cookie	recursive flag (only valid for snapshots)
  *
  * outputs:	none
  */
 static int
 zfs_ioc_rename(zfs_cmd_t *zc)
 {
 	boolean_t recursive = zc->zc_cookie & 1;
 	int err;
 
 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '%'))
 		return (EINVAL);
 
 	/*
 	 * Unmount snapshot unless we're doing a recursive rename,
 	 * in which case the dataset code figures out which snapshots
 	 * to unmount.
 	 */
 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
 	    zc->zc_objset_type == DMU_OST_ZFS) {
 		err = zfs_unmount_snap(zc->zc_name, NULL);
 		if (err)
 			return (err);
 	}
 
 	err = dmu_objset_rename(zc->zc_name, zc->zc_value, recursive);
 	if ((err == 0) && (zc->zc_objset_type == DMU_OST_ZVOL)) {
 		(void) zvol_remove_minor(zc->zc_name);
 		(void) zvol_create_minor(zc->zc_value);
 	}
 
 	return (err);
 }
 
 static int
 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 {
 	const char *propname = nvpair_name(pair);
 	boolean_t issnap = (strchr(dsname, '@') != NULL);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval;
 	int err;
 
 	if (prop == ZPROP_INVAL) {
 		if (zfs_prop_user(propname)) {
 			if ((err = zfs_secpolicy_write_perms(dsname,
 			    ZFS_DELEG_PERM_USERPROP, cr)))
 				return (err);
 			return (0);
 		}
 
 		if (!issnap && zfs_prop_userquota(propname)) {
 			const char *perm = NULL;
 			const char *uq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
 			const char *gq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
 
 			if (strncmp(propname, uq_prefix,
 			    strlen(uq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_USERQUOTA;
 			} else if (strncmp(propname, gq_prefix,
 			    strlen(gq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
 			} else {
 				/* USERUSED and GROUPUSED are read-only */
 				return (EINVAL);
 			}
 
 			if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
 				return (err);
 			return (0);
 		}
 
 		return (EINVAL);
 	}
 
 	if (issnap)
 		return (EINVAL);
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		/*
 		 * dsl_prop_get_all_impl() returns properties in this
 		 * format.
 		 */
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) == 0);
 	}
 
 	/*
 	 * Check that this value is valid for this pool version
 	 */
 	switch (prop) {
 	case ZFS_PROP_COMPRESSION:
 		/*
 		 * If the user specified gzip compression, make sure
 		 * the SPA supports it. We ignore any errors here since
 		 * we'll catch them later.
 		 */
 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 		    nvpair_value_uint64(pair, &intval) == 0) {
 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
 			    intval <= ZIO_COMPRESS_GZIP_9 &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_GZIP_COMPRESSION)) {
 				return (ENOTSUP);
 			}
 
 			if (intval == ZIO_COMPRESS_ZLE &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_ZLE_COMPRESSION))
 				return (ENOTSUP);
 
 			if (intval == ZIO_COMPRESS_LZ4) {
 				zfeature_info_t *feature =
 				    &spa_feature_table[
 				    SPA_FEATURE_LZ4_COMPRESS];
 				spa_t *spa;
 
 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 					return (err);
 
 				if (!spa_feature_is_enabled(spa, feature)) {
 					spa_close(spa, FTAG);
 					return (ENOTSUP);
 				}
 				spa_close(spa, FTAG);
 			}
 
 			/*
 			 * If this is a bootable dataset then
 			 * verify that the compression algorithm
 			 * is supported for booting. We must return
 			 * something other than ENOTSUP since it
 			 * implies a downrev pool version.
 			 */
 			if (zfs_is_bootfs(dsname) &&
 			    !BOOTFS_COMPRESS_VALID(intval)) {
 				return (ERANGE);
 			}
 		}
 		break;
 
 	case ZFS_PROP_COPIES:
 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
 			return (ENOTSUP);
 		break;
 
 	case ZFS_PROP_DEDUP:
 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
 			return (ENOTSUP);
 		break;
 
 	case ZFS_PROP_SHARESMB:
 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
 			return (ENOTSUP);
 		break;
 
 	case ZFS_PROP_ACLINHERIT:
 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 		    nvpair_value_uint64(pair, &intval) == 0) {
 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_PASSTHROUGH_X))
 				return (ENOTSUP);
 		}
 		break;
 	default:
 		break;
 	}
 
 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
 }
 
 /*
  * Activates a feature on a pool in response to a property setting. This
  * creates a new sync task which modifies the pool to reflect the feature
  * as being active.
  */
 static int
 zfs_prop_activate_feature(dsl_pool_t *dp, zfeature_info_t *feature)
 {
 	int err;
 
 	/* EBUSY here indicates that the feature is already active */
 	err = dsl_sync_task_do(dp, zfs_prop_activate_feature_check,
 	    zfs_prop_activate_feature_sync, dp->dp_spa, feature, 2);
 
 	if (err != 0 && err != EBUSY)
 		return (err);
 	else
 		return (0);
 }
 
 /*
  * Checks for a race condition to make sure we don't increment a feature flag
  * multiple times.
  */
 /*ARGSUSED*/
 static int
 zfs_prop_activate_feature_check(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	zfeature_info_t *feature = arg2;
 
 	if (!spa_feature_is_active(spa, feature))
 		return (0);
 	else
 		return (EBUSY);
 }
 
 /*
  * The callback invoked on feature activation in the sync task caused by
  * zfs_prop_activate_feature.
  */
 static void
 zfs_prop_activate_feature_sync(void *arg1, void *arg2, dmu_tx_t *tx)
 {
 	spa_t *spa = arg1;
 	zfeature_info_t *feature = arg2;
 
 	spa_feature_incr(spa, feature, tx);
 }
 
 /*
  * Removes properties from the given props list that fail permission checks
  * needed to clear them and to restore them in case of a receive error. For each
  * property, make sure we have both set and inherit permissions.
  *
  * Returns the first error encountered if any permission checks fail. If the
  * caller provides a non-NULL errlist, it also gives the complete list of names
  * of all the properties that failed a permission check along with the
  * corresponding error numbers. The caller is responsible for freeing the
  * returned errlist.
  *
  * If every property checks out successfully, zero is returned and the list
  * pointed at by errlist is NULL.
  */
 static int
 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
 {
 	zfs_cmd_t *zc;
 	nvpair_t *pair, *next_pair;
 	nvlist_t *errors;
 	int err, rv = 0;
 
 	if (props == NULL)
 		return (0);
 
 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
 	(void) strcpy(zc->zc_name, dataset);
 	pair = nvlist_next_nvpair(props, NULL);
 	while (pair != NULL) {
 		next_pair = nvlist_next_nvpair(props, pair);
 
 		(void) strcpy(zc->zc_value, nvpair_name(pair));
 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
-		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
+		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
 			VERIFY(nvlist_add_int32(errors,
 			    zc->zc_value, err) == 0);
 		}
 		pair = next_pair;
 	}
 	kmem_free(zc, sizeof (zfs_cmd_t));
 
 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
 		nvlist_free(errors);
 		errors = NULL;
 	} else {
 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
 	}
 
 	if (errlist == NULL)
 		nvlist_free(errors);
 	else
 		*errlist = errors;
 
 	return (rv);
 }
 
 static boolean_t
 propval_equals(nvpair_t *p1, nvpair_t *p2)
 {
 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
 		/* dsl_prop_get_all_impl() format */
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &p1) == 0);
 	}
 
 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &p2) == 0);
 	}
 
 	if (nvpair_type(p1) != nvpair_type(p2))
 		return (B_FALSE);
 
 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
 		char *valstr1, *valstr2;
 
 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
 		return (strcmp(valstr1, valstr2) == 0);
 	} else {
 		uint64_t intval1, intval2;
 
 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
 		return (intval1 == intval2);
 	}
 }
 
 /*
  * Remove properties from props if they are not going to change (as determined
  * by comparison with origprops). Remove them from origprops as well, since we
  * do not need to clear or restore properties that won't change.
  */
 static void
 props_reduce(nvlist_t *props, nvlist_t *origprops)
 {
 	nvpair_t *pair, *next_pair;
 
 	if (origprops == NULL)
 		return; /* all props need to be received */
 
 	pair = nvlist_next_nvpair(props, NULL);
 	while (pair != NULL) {
 		const char *propname = nvpair_name(pair);
 		nvpair_t *match;
 
 		next_pair = nvlist_next_nvpair(props, pair);
 
 		if ((nvlist_lookup_nvpair(origprops, propname,
 		    &match) != 0) || !propval_equals(pair, match))
 			goto next; /* need to set received value */
 
 		/* don't clear the existing received value */
 		(void) nvlist_remove_nvpair(origprops, match);
 		/* don't bother receiving the property */
 		(void) nvlist_remove_nvpair(props, pair);
 next:
 		pair = next_pair;
 	}
 }
 
 #ifdef	DEBUG
 static boolean_t zfs_ioc_recv_inject_err;
 #endif
 
 /*
  * inputs:
  * zc_name		name of containing filesystem
  * zc_nvlist_src{_size}	nvlist of properties to apply
  * zc_value		name of snapshot to create
  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
  * zc_cookie		file descriptor to recv from
  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
  * zc_guid		force flag
  * zc_cleanup_fd	cleanup-on-exit file descriptor
  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
  *
  * outputs:
  * zc_cookie		number of bytes read
  * zc_nvlist_dst{_size} error for each unapplied received property
  * zc_obj		zprop_errflags_t
  * zc_action_handle	handle for this guid/ds mapping
  */
 static int
 zfs_ioc_recv(zfs_cmd_t *zc)
 {
 	file_t *fp;
 	objset_t *os;
 	dmu_recv_cookie_t drc;
 	boolean_t force = (boolean_t)zc->zc_guid;
 	int fd;
 	int error = 0;
 	int props_error = 0;
 	nvlist_t *errors;
 	offset_t off;
 	nvlist_t *props = NULL; /* sent properties */
 	nvlist_t *origprops = NULL; /* existing properties */
 	objset_t *origin = NULL;
 	char *tosnap;
 	char tofs[ZFS_MAXNAMELEN];
 	boolean_t first_recvd_props = B_FALSE;
 
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '@') == NULL ||
 	    strchr(zc->zc_value, '%'))
 		return (EINVAL);
 
 	(void) strcpy(tofs, zc->zc_value);
 	tosnap = strchr(tofs, '@');
 	*tosnap++ = '\0';
 
 	if (zc->zc_nvlist_src != 0 &&
 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props)) != 0)
 		return (error);
 
 	fd = zc->zc_cookie;
 	fp = getf(fd);
 	if (fp == NULL) {
 		nvlist_free(props);
 		return (EBADF);
 	}
 
 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
 		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
 		    !dsl_prop_get_hasrecvd(os)) {
 			first_recvd_props = B_TRUE;
 		}
 
 		/*
 		 * If new received properties are supplied, they are to
 		 * completely replace the existing received properties, so stash
 		 * away the existing ones.
 		 */
 		if (dsl_prop_get_received(os, &origprops) == 0) {
 			nvlist_t *errlist = NULL;
 			/*
 			 * Don't bother writing a property if its value won't
 			 * change (and avoid the unnecessary security checks).
 			 *
 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
 			 * special case where we blow away all local properties
 			 * regardless.
 			 */
 			if (!first_recvd_props)
 				props_reduce(props, origprops);
 			if (zfs_check_clearable(tofs, origprops,
 			    &errlist) != 0)
 				(void) nvlist_merge(errors, errlist, 0);
 			nvlist_free(errlist);
 		}
 
 		dmu_objset_rele(os, FTAG);
 	}
 
 	if (zc->zc_string[0]) {
 		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
 		if (error)
 			goto out;
 	}
 
 	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
 	    &zc->zc_begin_record, force, origin, &drc);
 	if (origin)
 		dmu_objset_rele(origin, FTAG);
 	if (error)
 		goto out;
 
 	/*
 	 * Set properties before we receive the stream so that they are applied
 	 * to the new data. Note that we must call dmu_recv_stream() if
 	 * dmu_recv_begin() succeeds.
 	 */
 	if (props) {
-		nvlist_t *errlist;
-
 		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
 			if (drc.drc_newfs) {
 				if (spa_version(os->os_spa) >=
 				    SPA_VERSION_RECVD_PROPS)
 					first_recvd_props = B_TRUE;
 			} else if (origprops != NULL) {
 				if (clear_received_props(os, tofs, origprops,
 				    first_recvd_props ? NULL : props) != 0)
 					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
 			} else {
 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
 			}
 			dsl_prop_set_hasrecvd(os);
 		} else if (!drc.drc_newfs) {
 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
 		}
 
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
-		    props, &errlist);
-		(void) nvlist_merge(errors, errlist, 0);
-		nvlist_free(errlist);
+		    props, errors);
 	}
 
-	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
+	if (zc->zc_nvlist_dst_size != 0 &&
+	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
+	    put_nvlist(zc, errors) != 0)) {
 		/*
 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
 		 * size or supplied an invalid address.
 		 */
 		props_error = EINVAL;
 	}
 
 	off = fp->f_offset;
 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
 	    &zc->zc_action_handle);
 
 	if (error == 0) {
 		zfs_sb_t *zsb = NULL;
 
 		if (get_zfs_sb(tofs, &zsb) == 0) {
 			/* online recv */
 			int end_err;
 
 			error = zfs_suspend_fs(zsb);
 			/*
 			 * If the suspend fails, then the recv_end will
 			 * likely also fail, and clean up after itself.
 			 */
 			end_err = dmu_recv_end(&drc);
 			if (error == 0)
 				error = zfs_resume_fs(zsb, tofs);
 			error = error ? error : end_err;
 			deactivate_super(zsb->z_sb);
 		} else {
 			error = dmu_recv_end(&drc);
 		}
 	}
 
 	zc->zc_cookie = off - fp->f_offset;
 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
 		fp->f_offset = off;
 
 #ifdef	DEBUG
 	if (zfs_ioc_recv_inject_err) {
 		zfs_ioc_recv_inject_err = B_FALSE;
 		error = 1;
 	}
 #endif
 	/*
 	 * On error, restore the original props.
 	 */
 	if (error && props) {
 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
 			if (clear_received_props(os, tofs, props, NULL) != 0) {
 				/*
 				 * We failed to clear the received properties.
 				 * Since we may have left a $recvd value on the
 				 * system, we can't clear the $hasrecvd flag.
 				 */
 				zc->zc_obj |= ZPROP_ERR_NORESTORE;
 			} else if (first_recvd_props) {
 				dsl_prop_unset_hasrecvd(os);
 			}
 			dmu_objset_rele(os, FTAG);
 		} else if (!drc.drc_newfs) {
 			/* We failed to clear the received properties. */
 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
 		}
 
 		if (origprops == NULL && !drc.drc_newfs) {
 			/* We failed to stash the original properties. */
 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
 		}
 
 		/*
 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
 		 * explictly if we're restoring local properties cleared in the
 		 * first new-style receive.
 		 */
 		if (origprops != NULL &&
 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
 		    origprops, NULL) != 0) {
 			/*
 			 * We stashed the original properties but failed to
 			 * restore them.
 			 */
 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
 		}
 	}
 out:
 	nvlist_free(props);
 	nvlist_free(origprops);
 	nvlist_free(errors);
 	releasef(fd);
 
 	if (error == 0)
 		error = props_error;
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name	name of snapshot to send
  * zc_cookie	file descriptor to send stream to
  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
  * zc_sendobj	objsetid of snapshot to send
  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
  *		output size in zc_objset_type.
  *
  * outputs: none
  */
 static int
 zfs_ioc_send(zfs_cmd_t *zc)
 {
 	objset_t *fromsnap = NULL;
 	objset_t *tosnap;
 	int error;
 	offset_t off;
 	dsl_dataset_t *ds;
 	dsl_dataset_t *dsfrom = NULL;
 	spa_t *spa;
 	dsl_pool_t *dp;
 	boolean_t estimate = (zc->zc_guid != 0);
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error)
 		return (error);
 
 	dp = spa_get_dsl(spa);
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 	rw_exit(&dp->dp_config_rwlock);
-	if (error) {
-		spa_close(spa, FTAG);
+	spa_close(spa, FTAG);
+	if (error)
 		return (error);
-	}
 
 	error = dmu_objset_from_ds(ds, &tosnap);
 	if (error) {
 		dsl_dataset_rele(ds, FTAG);
-		spa_close(spa, FTAG);
 		return (error);
 	}
 
 	if (zc->zc_fromobj != 0) {
 		rw_enter(&dp->dp_config_rwlock, RW_READER);
 		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
 		rw_exit(&dp->dp_config_rwlock);
-		spa_close(spa, FTAG);
 		if (error) {
 			dsl_dataset_rele(ds, FTAG);
 			return (error);
 		}
 		error = dmu_objset_from_ds(dsfrom, &fromsnap);
 		if (error) {
 			dsl_dataset_rele(dsfrom, FTAG);
 			dsl_dataset_rele(ds, FTAG);
 			return (error);
 		}
-	} else {
-		spa_close(spa, FTAG);
+	}
+
+	if (zc->zc_obj) {
+		dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+		if (fromsnap != NULL) {
+			dsl_dataset_rele(dsfrom, FTAG);
+			dsl_dataset_rele(ds, FTAG);
+			return (EINVAL);
+		}
+
+		if (dsl_dir_is_clone(ds->ds_dir)) {
+			rw_enter(&dp->dp_config_rwlock, RW_READER);
+			error = dsl_dataset_hold_obj(dp,
+			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom);
+			rw_exit(&dp->dp_config_rwlock);
+			if (error) {
+				dsl_dataset_rele(ds, FTAG);
+				return (error);
+			}
+			error = dmu_objset_from_ds(dsfrom, &fromsnap);
+			if (error) {
+				dsl_dataset_rele(dsfrom, FTAG);
+				dsl_dataset_rele(ds, FTAG);
+				return (error);
+			}
+		}
 	}
 
 	if (estimate) {
-		error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
+		error = dmu_send_estimate(tosnap, fromsnap,
 		    &zc->zc_objset_type);
 	} else {
 		file_t *fp = getf(zc->zc_cookie);
 		if (fp == NULL) {
 			dsl_dataset_rele(ds, FTAG);
 			if (dsfrom)
 				dsl_dataset_rele(dsfrom, FTAG);
 			return (EBADF);
 		}
 
 		off = fp->f_offset;
-		error = dmu_send(tosnap, fromsnap, zc->zc_obj,
+		error = dmu_send(tosnap, fromsnap,
 		    zc->zc_cookie, fp->f_vnode, &off);
 
 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
 			fp->f_offset = off;
 		releasef(zc->zc_cookie);
 	}
 	if (dsfrom)
 		dsl_dataset_rele(dsfrom, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name	name of snapshot on which to report progress
  * zc_cookie	file descriptor of send stream
  *
  * outputs:
  * zc_cookie	number of bytes written in send stream thus far
  */
 static int
 zfs_ioc_send_progress(zfs_cmd_t *zc)
 {
 	dsl_dataset_t *ds;
 	dmu_sendarg_t *dsp = NULL;
 	int error;
 
 	if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
 		return (error);
 
 	mutex_enter(&ds->ds_sendstream_lock);
 
 	/*
 	 * Iterate over all the send streams currently active on this dataset.
 	 * If there's one which matches the specified file descriptor _and_ the
 	 * stream was started by the current process, return the progress of
 	 * that stream.
 	 */
 
 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
 		if (dsp->dsa_outfd == zc->zc_cookie &&
 		    dsp->dsa_proc->group_leader == curproc->group_leader)
 			break;
 	}
 
 	if (dsp != NULL)
 		zc->zc_cookie = *(dsp->dsa_off);
 	else
 		error = ENOENT;
 
 	mutex_exit(&ds->ds_sendstream_lock);
 	dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_inject_fault(zfs_cmd_t *zc)
 {
 	int id, error;
 
 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
 	    &zc->zc_inject_record);
 
 	if (error == 0)
 		zc->zc_guid = (uint64_t)id;
 
 	return (error);
 }
 
 static int
 zfs_ioc_clear_fault(zfs_cmd_t *zc)
 {
 	return (zio_clear_fault((int)zc->zc_guid));
 }
 
 static int
 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
 {
 	int id = (int)zc->zc_guid;
 	int error;
 
 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
 	    &zc->zc_inject_record);
 
 	zc->zc_guid = id;
 
 	return (error);
 }
 
 static int
 zfs_ioc_error_log(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	size_t count = (size_t)zc->zc_nvlist_dst_size;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
 	    &count);
 	if (error == 0)
 		zc->zc_nvlist_dst_size = count;
 	else
 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_clear(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	vdev_t *vd;
 	int error;
 
 	/*
 	 * On zpool clear we also fix up missing slogs
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa = spa_lookup(zc->zc_name);
 	if (spa == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (EIO);
 	}
 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
 		/* we need to let spa_open/spa_load clear the chains */
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 	}
 	spa->spa_last_open_failed = 0;
 	mutex_exit(&spa_namespace_lock);
 
 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
 		error = spa_open(zc->zc_name, &spa, FTAG);
 	} else {
 		nvlist_t *policy;
 		nvlist_t *config = NULL;
 
 		if (zc->zc_nvlist_src == 0)
 			return (EINVAL);
 
 		if ((error = get_nvlist(zc->zc_nvlist_src,
 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
 			    policy, &config);
 			if (config != NULL) {
 				int err;
 
 				if ((err = put_nvlist(zc, config)) != 0)
 					error = err;
 				nvlist_free(config);
 			}
 			nvlist_free(policy);
 		}
 	}
 
 	if (error)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	if (zc->zc_guid == 0) {
 		vd = NULL;
 	} else {
 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
 		if (vd == NULL) {
 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
 			spa_close(spa, FTAG);
 			return (ENODEV);
 		}
 	}
 
 	vdev_clear(spa, vd);
 
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 
 	/*
 	 * Resume any suspended I/Os.
 	 */
 	if (zio_resume(spa) != 0)
 		error = EIO;
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	/*
 	 * If a resilver is already in progress then set the
 	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
 	 * the scan as a side effect of the reopen. Otherwise, let
 	 * vdev_open() decided if a resilver is required.
 	 */
 	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
 	vdev_reopen(spa->spa_root_vdev);
 	spa->spa_scrub_reopen = B_FALSE;
 
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (0);
 }
 /*
  * inputs:
  * zc_name	name of filesystem
  * zc_value	name of origin snapshot
  *
  * outputs:
  * zc_string	name of conflicting snapshot, if there is one
  */
 static int
 zfs_ioc_promote(zfs_cmd_t *zc)
 {
 	char *cp;
 
 	/*
 	 * We don't need to unmount *all* the origin fs's snapshots, but
 	 * it's easier.
 	 */
 	cp = strchr(zc->zc_value, '@');
 	if (cp)
 		*cp = '\0';
 	(void) dmu_objset_find(zc->zc_value,
 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
 }
 
 /*
  * Retrieve a single {user|group}{used|quota}@... property.
  *
  * inputs:
  * zc_name	name of filesystem
  * zc_objset_type zfs_userquota_prop_t
  * zc_value	domain name (eg. "S-1-234-567-89")
  * zc_guid	RID/UID/GID
  *
  * outputs:
  * zc_cookie	property value
  */
 static int
 zfs_ioc_userspace_one(zfs_cmd_t *zc)
 {
 	zfs_sb_t *zsb;
 	int error;
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (EINVAL);
 
 	error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
 	if (error)
 		return (error);
 
 	error = zfs_userspace_one(zsb,
 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
 	zfs_sb_rele(zsb, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_objset_type	zfs_userquota_prop_t
  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
  *
  * outputs:
  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
  * zc_cookie	zap cursor
  */
 static int
 zfs_ioc_userspace_many(zfs_cmd_t *zc)
 {
 	zfs_sb_t *zsb;
 	int bufsize = zc->zc_nvlist_dst_size;
 	int error;
 	void *buf;
 
 	if (bufsize <= 0)
 		return (ENOMEM);
 
 	error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
 	if (error)
 		return (error);
 
 	buf = vmem_alloc(bufsize, KM_SLEEP);
 
 	error = zfs_userspace_many(zsb, zc->zc_objset_type, &zc->zc_cookie,
 	    buf, &zc->zc_nvlist_dst_size);
 
 	if (error == 0) {
 		error = xcopyout(buf,
 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    zc->zc_nvlist_dst_size);
 	}
 	vmem_free(buf, bufsize);
 	zfs_sb_rele(zsb, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * none
  */
 static int
 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error = 0;
 	zfs_sb_t *zsb;
 
 	if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
 		if (!dmu_objset_userused_enabled(zsb->z_os)) {
 			/*
 			 * If userused is not enabled, it may be because the
 			 * objset needs to be closed & reopened (to grow the
 			 * objset_phys_t).  Suspend/resume the fs will do that.
 			 */
 			error = zfs_suspend_fs(zsb);
 			if (error == 0)
 				error = zfs_resume_fs(zsb, zc->zc_name);
 		}
 		if (error == 0)
 			error = dmu_objset_userspace_upgrade(zsb->z_os);
 		deactivate_super(zsb->z_sb);
 	} else {
 		/* XXX kind of reading contents without owning */
 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 		if (error)
 			return (error);
 
 		error = dmu_objset_userspace_upgrade(os);
 		dmu_objset_rele(os, FTAG);
 	}
 
 	return (error);
 }
 
 static int
 zfs_ioc_share(zfs_cmd_t *zc)
 {
 	return (ENOSYS);
 }
 
 ace_t full_access[] = {
 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
 };
 
 /*
  * inputs:
  * zc_name		name of containing filesystem
  * zc_obj		object # beyond which we want next in-use object #
  *
  * outputs:
  * zc_obj		next in-use object #
  */
 static int
 zfs_ioc_next_obj(zfs_cmd_t *zc)
 {
 	objset_t *os = NULL;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error)
 		return (error);
 
 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
 	    os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
 
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		prefix name for snapshot
  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
  *
  * outputs:
+ * zc_value		short name of new snapshot
  */
 static int
 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
 {
 	char *snap_name;
 	int error;
 
-	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
+	snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value,
 	    (u_longlong_t)ddi_get_lbolt64());
 
-	if (strlen(snap_name) >= MAXNAMELEN) {
+	if (strlen(snap_name) >= MAXPATHLEN) {
 		strfree(snap_name);
 		return (E2BIG);
 	}
 
-	error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
-	    NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
+	error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd);
 	if (error != 0) {
 		strfree(snap_name);
 		return (error);
 	}
 
-	(void) strcpy(zc->zc_value, snap_name);
+	(void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1);
 	strfree(snap_name);
 	return (0);
 }
 
 /*
  * inputs:
  * zc_name		name of "to" snapshot
  * zc_value		name of "from" snapshot
  * zc_cookie		file descriptor to write diff data on
  *
  * outputs:
  * dmu_diff_record_t's to the file descriptor
  */
 static int
 zfs_ioc_diff(zfs_cmd_t *zc)
 {
 	objset_t *fromsnap;
 	objset_t *tosnap;
 	file_t *fp;
 	offset_t off;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
 	if (error)
 		return (error);
 
 	error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
 	if (error) {
 		dmu_objset_rele(tosnap, FTAG);
 		return (error);
 	}
 
 	fp = getf(zc->zc_cookie);
 	if (fp == NULL) {
 		dmu_objset_rele(fromsnap, FTAG);
 		dmu_objset_rele(tosnap, FTAG);
 		return (EBADF);
 	}
 
 	off = fp->f_offset;
 
 	error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
 
 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
 		fp->f_offset = off;
 	releasef(zc->zc_cookie);
 
 	dmu_objset_rele(fromsnap, FTAG);
 	dmu_objset_rele(tosnap, FTAG);
 	return (error);
 }
 
 /*
  * Remove all ACL files in shares dir
  */
 #ifdef HAVE_SMB_SHARE
 static int
 zfs_smb_acl_purge(znode_t *dzp)
 {
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	zfs_sb_t *zsb = ZTOZSB(dzp);
 	int error;
 
 	for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
 	    zap_cursor_advance(&zc)) {
 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
 		    NULL, 0)) != 0)
 			break;
 	}
 	zap_cursor_fini(&zc);
 	return (error);
 }
 #endif /* HAVE_SMB_SHARE */
 
 static int
 zfs_ioc_smb_acl(zfs_cmd_t *zc)
 {
 #ifdef HAVE_SMB_SHARE
 	vnode_t *vp;
 	znode_t *dzp;
 	vnode_t *resourcevp = NULL;
 	znode_t *sharedir;
 	zfs_sb_t *zsb;
 	nvlist_t *nvlist;
 	char *src, *target;
 	vattr_t vattr;
 	vsecattr_t vsec;
 	int error = 0;
 
 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 	    NO_FOLLOW, NULL, &vp)) != 0)
 		return (error);
 
 	/* Now make sure mntpnt and dataset are ZFS */
 
 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 	    zc->zc_name) != 0)) {
 		VN_RELE(vp);
 		return (EINVAL);
 	}
 
 	dzp = VTOZ(vp);
 	zsb = ZTOZSB(dzp);
 	ZFS_ENTER(zsb);
 
 	/*
 	 * Create share dir if its missing.
 	 */
 	mutex_enter(&zsb->z_lock);
 	if (zsb->z_shares_dir == 0) {
 		dmu_tx_t *tx;
 
 		tx = dmu_tx_create(zsb->z_os);
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
 		    ZFS_SHARES_DIR);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 		} else {
 			error = zfs_create_share_dir(zsb, tx);
 			dmu_tx_commit(tx);
 		}
 		if (error) {
 			mutex_exit(&zsb->z_lock);
 			VN_RELE(vp);
 			ZFS_EXIT(zsb);
 			return (error);
 		}
 	}
 	mutex_exit(&zsb->z_lock);
 
 	ASSERT(zsb->z_shares_dir);
 	if ((error = zfs_zget(zsb, zsb->z_shares_dir, &sharedir)) != 0) {
 		VN_RELE(vp);
 		ZFS_EXIT(zsb);
 		return (error);
 	}
 
 	switch (zc->zc_cookie) {
 	case ZFS_SMB_ACL_ADD:
 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
 		vattr.va_mode = S_IFREG|0777;
 		vattr.va_uid = 0;
 		vattr.va_gid = 0;
 
 		vsec.vsa_mask = VSA_ACE;
 		vsec.vsa_aclentp = &full_access;
 		vsec.vsa_aclentsz = sizeof (full_access);
 		vsec.vsa_aclcnt = 1;
 
 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
 		if (resourcevp)
 			VN_RELE(resourcevp);
 		break;
 
 	case ZFS_SMB_ACL_REMOVE:
 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
 		    NULL, 0);
 		break;
 
 	case ZFS_SMB_ACL_RENAME:
 		if ((error = get_nvlist(zc->zc_nvlist_src,
 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
 			VN_RELE(vp);
 			ZFS_EXIT(zsb);
 			return (error);
 		}
 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
 		    &target)) {
 			VN_RELE(vp);
 			VN_RELE(ZTOV(sharedir));
 			ZFS_EXIT(zsb);
 			nvlist_free(nvlist);
 			return (error);
 		}
 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
 		    kcred, NULL, 0);
 		nvlist_free(nvlist);
 		break;
 
 	case ZFS_SMB_ACL_PURGE:
 		error = zfs_smb_acl_purge(sharedir);
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	VN_RELE(vp);
 	VN_RELE(ZTOV(sharedir));
 
 	ZFS_EXIT(zsb);
 
 	return (error);
 #else
 	return (ENOTSUP);
 #endif /* HAVE_SMB_SHARE */
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		short name of snap
  * zc_string		user-supplied tag for this hold
  * zc_cookie		recursive flag
  * zc_temphold		set if hold is temporary
  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
  * zc_sendobj		if non-zero, the objid for zc_name@zc_value
  * zc_createtxg		if zc_sendobj is non-zero, snap must have zc_createtxg
  *
  * outputs:		none
  */
 static int
 zfs_ioc_hold(zfs_cmd_t *zc)
 {
 	boolean_t recursive = zc->zc_cookie;
 	spa_t *spa;
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	int error;
 	minor_t minor = 0;
 
 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
 		return (EINVAL);
 
 	if (zc->zc_sendobj == 0) {
 		return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
 		    zc->zc_string, recursive, zc->zc_temphold,
 		    zc->zc_cleanup_fd));
 	}
 
 	if (recursive)
 		return (EINVAL);
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error)
 		return (error);
 
 	dp = spa_get_dsl(spa);
 	rw_enter(&dp->dp_config_rwlock, RW_READER);
 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 	rw_exit(&dp->dp_config_rwlock);
 	spa_close(spa, FTAG);
 	if (error)
 		return (error);
 
 	/*
 	 * Until we have a hold on this snapshot, it's possible that
 	 * zc_sendobj could've been destroyed and reused as part
 	 * of a later txg.  Make sure we're looking at the right object.
 	 */
 	if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
 		dsl_dataset_rele(ds, FTAG);
 		return (ENOENT);
 	}
 
 	if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
 		error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
 		if (error) {
 			dsl_dataset_rele(ds, FTAG);
 			return (error);
 		}
 	}
 
 	error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
 	    zc->zc_temphold);
 	if (minor != 0) {
 		if (error == 0) {
 			dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
 			    minor);
 		}
 		zfs_onexit_fd_rele(zc->zc_cleanup_fd);
 	}
 	dsl_dataset_rele(ds, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name	name of dataset from which we're releasing a user hold
  * zc_value	short name of snap
  * zc_string	user-supplied tag for this hold
  * zc_cookie	recursive flag
  *
  * outputs:	none
  */
 static int
 zfs_ioc_release(zfs_cmd_t *zc)
 {
 	boolean_t recursive = zc->zc_cookie;
 
 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
 		return (EINVAL);
 
 	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
 	    zc->zc_string, recursive));
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * zc_nvlist_src{_size}	nvlist of snapshot holds
  */
 static int
 zfs_ioc_get_holds(zfs_cmd_t *zc)
 {
 	nvlist_t *nvp;
 	int error;
 
 	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
 		error = put_nvlist(zc, nvp);
 		nvlist_free(nvp);
 	}
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_guid		flags (ZEVENT_NONBLOCK)
  *
  * outputs:
  * zc_nvlist_dst	next nvlist event
  * zc_cookie		dropped events since last get
  * zc_cleanup_fd	cleanup-on-exit file descriptor
  */
 static int
 zfs_ioc_events_next(zfs_cmd_t *zc)
 {
 	zfs_zevent_t *ze;
 	nvlist_t *event = NULL;
 	minor_t minor;
 	uint64_t dropped = 0;
 	int error;
 
 	error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
 	if (error != 0)
 		return (error);
 
 	do {
 		error = zfs_zevent_next(ze, &event,
 			&zc->zc_nvlist_dst_size, &dropped);
 		if (event != NULL) {
 			zc->zc_cookie = dropped;
 			error = put_nvlist(zc, event);
 			nvlist_free(event);
 		}
 
 		if (zc->zc_guid & ZEVENT_NONBLOCK)
 			break;
 
 		if ((error == 0) || (error != ENOENT))
 			break;
 
 		error = zfs_zevent_wait(ze);
 		if (error)
 			break;
 	} while (1);
 
 	zfs_zevent_fd_rele(zc->zc_cleanup_fd);
 
 	return (error);
 }
 
 /*
  * outputs:
  * zc_cookie		cleared events count
  */
 static int
 zfs_ioc_events_clear(zfs_cmd_t *zc)
 {
 	int count;
 
 	zfs_zevent_drain_all(&count);
 	zc->zc_cookie = count;
 
 	return 0;
 }
 
 /*
  * inputs:
  * zc_name		name of new filesystem or snapshot
  * zc_value		full name of old snapshot
  *
  * outputs:
  * zc_cookie		space in bytes
  * zc_objset_type	compressed space in bytes
  * zc_perm_action	uncompressed space in bytes
  */
 static int
 zfs_ioc_space_written(zfs_cmd_t *zc)
 {
 	int error;
 	dsl_dataset_t *new, *old;
 
 	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
 	if (error != 0)
 		return (error);
 	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
 	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
 		return (error);
 	}
 
 	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
 	    &zc->zc_objset_type, &zc->zc_perm_action);
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
 	return (error);
 }
 
 /*
- * inputs:
- * zc_name		full name of last snapshot
- * zc_value		full name of first snapshot
+ * innvl: {
+ *     "firstsnap" -> snapshot name
+ * }
  *
- * outputs:
- * zc_cookie		space in bytes
- * zc_objset_type	compressed space in bytes
- * zc_perm_action	uncompressed space in bytes
+ * outnvl: {
+ *     "used" -> space in bytes
+ *     "compressed" -> compressed space in bytes
+ *     "uncompressed" -> uncompressed space in bytes
+ * }
  */
 static int
-zfs_ioc_space_snaps(zfs_cmd_t *zc)
+zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error;
 	dsl_dataset_t *new, *old;
+	char *firstsnap;
+	uint64_t used, comp, uncomp;
 
-	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
+	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
+		return (EINVAL);
+
+	error = dsl_dataset_hold(lastsnap, FTAG, &new);
 	if (error != 0)
 		return (error);
-	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
+	error = dsl_dataset_hold(firstsnap, FTAG, &old);
 	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
 		return (error);
 	}
 
-	error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
-	    &zc->zc_objset_type, &zc->zc_perm_action);
+	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
+	fnvlist_add_uint64(outnvl, "used", used);
+	fnvlist_add_uint64(outnvl, "compressed", comp);
+	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
 	return (error);
 }
 
 /*
- * pool create, destroy, and export don't log the history as part of
- * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
- * do the logging of those commands.
+ * innvl: {
+ *     "fd" -> file descriptor to write stream to (int32)
+ *     (optional) "fromsnap" -> full snap name to send an incremental from
+ * }
+ *
+ * outnvl is unused
  */
-static zfs_ioc_vec_t zfs_ioc_vec[] = {
-	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
-	    POOL_CHECK_READONLY },
-	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_create_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_remove_minor, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
-	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
-	    B_FALSE, POOL_CHECK_NONE },
-	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
-	    B_FALSE, POOL_CHECK_NONE },
-	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
-	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
-	    B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_events_next, zfs_secpolicy_config, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_events_clear, zfs_secpolicy_config, NO_NAME, B_FALSE,
-	    POOL_CHECK_NONE },
-	{ zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
-	{ zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
-	    POOL_CHECK_SUSPENDED },
-	{ zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
-	    POOL_CHECK_NONE }
-};
+/* ARGSUSED */
+static int
+zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	objset_t *fromsnap = NULL;
+	objset_t *tosnap;
+	int error;
+	offset_t off;
+	char *fromname;
+	int fd;
+
+	error = nvlist_lookup_int32(innvl, "fd", &fd);
+	if (error != 0)
+		return (EINVAL);
+
+	error = dmu_objset_hold(snapname, FTAG, &tosnap);
+	if (error)
+		return (error);
+
+	error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
+	if (error == 0) {
+		error = dmu_objset_hold(fromname, FTAG, &fromsnap);
+		if (error) {
+			dmu_objset_rele(tosnap, FTAG);
+			return (error);
+		}
+	}
+
+	{
+	file_t *fp = getf(fd);
+	if (fp == NULL) {
+		dmu_objset_rele(tosnap, FTAG);
+		if (fromsnap != NULL)
+			dmu_objset_rele(fromsnap, FTAG);
+		return (EBADF);
+	}
+
+	off = fp->f_offset;
+	error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off);
+
+	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
+		fp->f_offset = off;
+	}
+	releasef(fd);
+	if (fromsnap != NULL)
+		dmu_objset_rele(fromsnap, FTAG);
+	dmu_objset_rele(tosnap, FTAG);
+	return (error);
+}
+
+/*
+ * Determine approximately how large a zfs send stream will be -- the number
+ * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
+ *
+ * innvl: {
+ *     (optional) "fromsnap" -> full snap name to send an incremental from
+ * }
+ *
+ * outnvl: {
+ *     "space" -> bytes of space (uint64)
+ * }
+ */
+static int
+zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+	objset_t *fromsnap = NULL;
+	objset_t *tosnap;
+	int error;
+	char *fromname;
+	uint64_t space;
+
+	error = dmu_objset_hold(snapname, FTAG, &tosnap);
+	if (error)
+		return (error);
+
+	error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
+	if (error == 0) {
+		error = dmu_objset_hold(fromname, FTAG, &fromsnap);
+		if (error) {
+			dmu_objset_rele(tosnap, FTAG);
+			return (error);
+		}
+	}
+
+	error = dmu_send_estimate(tosnap, fromsnap, &space);
+	fnvlist_add_uint64(outnvl, "space", space);
+
+	if (fromsnap != NULL)
+		dmu_objset_rele(fromsnap, FTAG);
+	dmu_objset_rele(tosnap, FTAG);
+	return (error);
+}
+
+
+static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
+
+static void
+zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
+    boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
+{
+	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
+
+	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
+	ASSERT3U(ioc, <, ZFS_IOC_LAST);
+	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
+	ASSERT3P(vec->zvec_func, ==, NULL);
+
+	vec->zvec_legacy_func = func;
+	vec->zvec_secpolicy = secpolicy;
+	vec->zvec_namecheck = namecheck;
+	vec->zvec_allow_log = log_history;
+	vec->zvec_pool_check = pool_check;
+}
+
+/*
+ * See the block comment at the beginning of this file for details on
+ * each argument to this function.
+ */
+static void
+zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
+    zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
+    zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
+    boolean_t allow_log)
+{
+	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
+
+	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
+	ASSERT3U(ioc, <, ZFS_IOC_LAST);
+	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
+	ASSERT3P(vec->zvec_func, ==, NULL);
+
+	/* if we are logging, the name must be valid */
+	ASSERT(!allow_log || namecheck != NO_NAME);
+
+	vec->zvec_name = name;
+	vec->zvec_func = func;
+	vec->zvec_secpolicy = secpolicy;
+	vec->zvec_namecheck = namecheck;
+	vec->zvec_pool_check = pool_check;
+	vec->zvec_smush_outnvlist = smush_outnvlist;
+	vec->zvec_allow_log = allow_log;
+}
+
+static void
+zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+    zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
+    zfs_ioc_poolcheck_t pool_check)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    POOL_NAME, log_history, pool_check);
+}
+
+static void
+zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+    zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    DATASET_NAME, B_FALSE, pool_check);
+}
+
+static void
+zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
+{
+	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
+	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
+}
+
+static void
+zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+    zfs_secpolicy_func_t *secpolicy)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
+}
+
+static void
+zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
+    zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
+}
+
+static void
+zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
+{
+	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
+	    zfs_secpolicy_read);
+}
+
+static void
+zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
+	zfs_secpolicy_func_t *secpolicy)
+{
+	zfs_ioctl_register_legacy(ioc, func, secpolicy,
+	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
+}
+
+static void
+zfs_ioctl_init(void)
+{
+	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
+	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
+	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
+
+	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
+	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
+	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
+	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
+
+	zfs_ioctl_register("create", ZFS_IOC_CREATE,
+	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
+	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
+	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+	/* IOCTLS that use the legacy function signature */
+
+	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
+	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
+
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
+	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
+	    zfs_ioc_pool_scan);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
+	    zfs_ioc_pool_upgrade);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
+	    zfs_ioc_vdev_add);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
+	    zfs_ioc_vdev_remove);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
+	    zfs_ioc_vdev_set_state);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
+	    zfs_ioc_vdev_attach);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
+	    zfs_ioc_vdev_detach);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
+	    zfs_ioc_vdev_setpath);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
+	    zfs_ioc_vdev_setfru);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
+	    zfs_ioc_pool_set_props);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
+	    zfs_ioc_vdev_split);
+	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
+	    zfs_ioc_pool_reguid);
+
+	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
+	    zfs_ioc_pool_configs, zfs_secpolicy_none);
+	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
+	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
+	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
+	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
+	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
+	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
+	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
+	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
+
+	/*
+	 * pool destroy, and export don't log the history as part of
+	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
+	 * does the logging of those commands.
+	 */
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
+	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
+	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
+
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
+	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
+	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
+
+	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
+	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
+	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
+	    zfs_ioc_dsobj_to_dsname,
+	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
+	    zfs_ioc_pool_get_history,
+	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
+
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
+	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
+
+	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
+	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
+	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
+	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
+
+	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
+	    zfs_ioc_space_written);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS,
+	    zfs_ioc_get_holds);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
+	    zfs_ioc_objset_recvd_props);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
+	    zfs_ioc_next_obj);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
+	    zfs_ioc_get_fsacl);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
+	    zfs_ioc_objset_stats);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
+	    zfs_ioc_objset_zplprops);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
+	    zfs_ioc_dataset_list_next);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
+	    zfs_ioc_snapshot_list_next);
+	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
+	    zfs_ioc_send_progress);
+
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
+	    zfs_ioc_diff, zfs_secpolicy_diff);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
+	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
+	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
+	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
+	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
+	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
+	    zfs_ioc_send, zfs_secpolicy_send);
+
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
+	    zfs_secpolicy_none);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
+	    zfs_secpolicy_destroy);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback,
+	    zfs_secpolicy_rollback);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
+	    zfs_secpolicy_rename);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
+	    zfs_secpolicy_recv);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
+	    zfs_secpolicy_promote);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold,
+	    zfs_secpolicy_hold);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release,
+	    zfs_secpolicy_release);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
+	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
+	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
+	    zfs_secpolicy_set_fsacl);
+
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
+	    zfs_secpolicy_share, POOL_CHECK_NONE);
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
+	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
+	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
+	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
+	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
+
+	/*
+ 	 * ZoL functions
+	 */
+	zfs_ioctl_register_legacy(ZFS_IOC_CREATE_MINOR, zfs_ioc_create_minor,
+	    zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE);
+	zfs_ioctl_register_legacy(ZFS_IOC_REMOVE_MINOR, zfs_ioc_remove_minor,
+	    zfs_secpolicy_config, DATASET_NAME, B_FALSE, POOL_CHECK_NONE);
+	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
+	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
+	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
+	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
+}
 
 int
 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
     zfs_ioc_poolcheck_t check)
 {
 	spa_t *spa;
 	int error;
 
 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
 
 	if (check & POOL_CHECK_NONE)
 		return (0);
 
 	error = spa_open(name, &spa, FTAG);
 	if (error == 0) {
 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
 			error = EAGAIN;
 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
 			error = EROFS;
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static void *
 zfsdev_get_state_impl(minor_t minor, enum zfsdev_state_type which)
 {
 	zfsdev_state_t *zs;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
 	for (zs = list_head(&zfsdev_state_list); zs != NULL;
 	     zs = list_next(&zfsdev_state_list, zs)) {
 		if (zs->zs_minor == minor) {
 			switch (which) {
 				case ZST_ONEXIT:  return (zs->zs_onexit);
 				case ZST_ZEVENT:  return (zs->zs_zevent);
 				case ZST_ALL:     return (zs);
 			}
 		}
 	}
 
 	return NULL;
 }
 
 void *
 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
 {
 	void *ptr;
 
 	mutex_enter(&zfsdev_state_lock);
 	ptr = zfsdev_get_state_impl(minor, which);
 	mutex_exit(&zfsdev_state_lock);
 
 	return ptr;
 }
 
 minor_t
 zfsdev_getminor(struct file *filp)
 {
 	ASSERT(filp != NULL);
 	ASSERT(filp->private_data != NULL);
 
 	return (((zfsdev_state_t *)filp->private_data)->zs_minor);
 }
 
 /*
  * Find a free minor number.  The zfsdev_state_list is expected to
  * be short since it is only a list of currently open file handles.
  */
 minor_t
 zfsdev_minor_alloc(void)
 {
 	static minor_t last_minor = 0;
 	minor_t m;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
 	for (m = last_minor + 1; m != last_minor; m++) {
 		if (m > ZFSDEV_MAX_MINOR)
 			m = 1;
 		if (zfsdev_get_state_impl(m, ZST_ALL) == NULL) {
 			last_minor = m;
 			return (m);
 		}
 	}
 
 	return (0);
 }
 
 static int
 zfsdev_state_init(struct file *filp)
 {
 	zfsdev_state_t *zs;
 	minor_t minor;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
         minor = zfsdev_minor_alloc();
         if (minor == 0)
                 return (ENXIO);
 
 	zs = kmem_zalloc( sizeof(zfsdev_state_t), KM_SLEEP);
 
 	zs->zs_file = filp;
 	zs->zs_minor = minor;
 	filp->private_data = zs;
 
 	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
 	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
 
 	list_insert_tail(&zfsdev_state_list, zs);
 
 	return (0);
 }
 
 static int
 zfsdev_state_destroy(struct file *filp)
 {
 	zfsdev_state_t *zs;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 	ASSERT(filp->private_data != NULL);
 
 	zs = filp->private_data;
 	zfs_onexit_destroy(zs->zs_onexit);
 	zfs_zevent_destroy(zs->zs_zevent);
 
 	list_remove(&zfsdev_state_list, zs);
 	kmem_free(zs, sizeof(zfsdev_state_t));
 
 	return 0;
 }
 
 static int
 zfsdev_open(struct inode *ino, struct file *filp)
 {
 	int error;
 
 	mutex_enter(&zfsdev_state_lock);
 	error = zfsdev_state_init(filp);
 	mutex_exit(&zfsdev_state_lock);
 
 	return (-error);
 }
 
 static int
 zfsdev_release(struct inode *ino, struct file *filp)
 {
 	int error;
 
 	mutex_enter(&zfsdev_state_lock);
 	error = zfsdev_state_destroy(filp);
 	mutex_exit(&zfsdev_state_lock);
 
 	return (-error);
 }
 
 static long
 zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 {
 	zfs_cmd_t *zc;
-	uint_t vec;
-	int error, rc, flag = 0;
-
-	vec = cmd - ZFS_IOC;
-	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
+	uint_t vecnum;
+	int error, rc, len, flag = 0;
+	const zfs_ioc_vec_t *vec;
+	char saved_poolname[MAXNAMELEN];
+	nvlist_t *innvl = NULL;
+
+	vecnum = cmd - ZFS_IOC_FIRST;
+	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
 		return (-EINVAL);
+	vec = &zfs_ioc_vec[vecnum];
 
 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP | KM_NODEBUG);
 
 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
-	if (error != 0)
+	if (error != 0) {
 		error = EFAULT;
+		goto out;
+	}
 
-	if ((error == 0) && !(flag & FKIOCTL))
-		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, CRED());
+	zc->zc_iflags = flag & FKIOCTL;
+	if (zc->zc_nvlist_src_size != 0) {
+		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+		    zc->zc_iflags, &innvl);
+		if (error != 0)
+			goto out;
+	}
 
 	/*
 	 * Ensure that all pool/dataset names are valid before we pass down to
 	 * the lower layers.
 	 */
-	if (error == 0) {
-		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
-		zc->zc_iflags = flag & FKIOCTL;
-		switch (zfs_ioc_vec[vec].zvec_namecheck) {
-		case POOL_NAME:
-			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
-				error = EINVAL;
+	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
+	switch (vec->zvec_namecheck) {
+	case POOL_NAME:
+		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
+			error = EINVAL;
+		else
 			error = pool_status_check(zc->zc_name,
-			    zfs_ioc_vec[vec].zvec_namecheck,
-			    zfs_ioc_vec[vec].zvec_pool_check);
-			break;
+			    vec->zvec_namecheck, vec->zvec_pool_check);
+		break;
 
-		case DATASET_NAME:
-			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
-				error = EINVAL;
+	case DATASET_NAME:
+		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
+			error = EINVAL;
+		else
 			error = pool_status_check(zc->zc_name,
-			    zfs_ioc_vec[vec].zvec_namecheck,
-			    zfs_ioc_vec[vec].zvec_pool_check);
-			break;
+			    vec->zvec_namecheck, vec->zvec_pool_check);
+		break;
 
-		case NO_NAME:
-			break;
-		}
+	case NO_NAME:
+		break;
 	}
 
-	if (error == 0)
-		error = zfs_ioc_vec[vec].zvec_func(zc);
 
+	if (error == 0 && !(flag & FKIOCTL))
+		error = vec->zvec_secpolicy(zc, innvl, CRED());
+
+	if (error != 0)
+		goto out;
+
+	/* legacy ioctls can modify zc_name */
+	(void) strlcpy(saved_poolname, zc->zc_name, sizeof(saved_poolname));
+	len = strcspn(saved_poolname, "/@") + 1;
+	saved_poolname[len] = '\0';
+
+	if (vec->zvec_func != NULL) {
+		nvlist_t *outnvl;
+		int puterror = 0;
+		spa_t *spa;
+		nvlist_t *lognv = NULL;
+
+		ASSERT(vec->zvec_legacy_func == NULL);
+
+		/*
+		 * Add the innvl to the lognv before calling the func,
+		 * in case the func changes the innvl.
+		 */
+		if (vec->zvec_allow_log) {
+			lognv = fnvlist_alloc();
+			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
+			    vec->zvec_name);
+			if (!nvlist_empty(innvl)) {
+				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
+				    innvl);
+			}
+		}
+
+		outnvl = fnvlist_alloc();
+		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
+
+		if (error == 0 && vec->zvec_allow_log &&
+		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
+			if (!nvlist_empty(outnvl)) {
+				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
+				    outnvl);
+			}
+			(void) spa_history_log_nvl(spa, lognv);
+			spa_close(spa, FTAG);
+		}
+		fnvlist_free(lognv);
+
+		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
+			int smusherror = 0;
+			if (vec->zvec_smush_outnvlist) {
+				smusherror = nvlist_smush(outnvl,
+				    zc->zc_nvlist_dst_size);
+			}
+			if (smusherror == 0)
+				puterror = put_nvlist(zc, outnvl);
+		}
+
+		if (puterror != 0)
+			error = puterror;
+
+		nvlist_free(outnvl);
+	} else {
+		error = vec->zvec_legacy_func(zc);
+	}
+
+out:
+	nvlist_free(innvl);
 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
-	if (error == 0) {
-		if (rc != 0)
-			error = EFAULT;
-		if (zfs_ioc_vec[vec].zvec_his_log)
-			zfs_log_history(zc);
+	if (error == 0 && rc != 0)
+		error = EFAULT;
+	if (error == 0 && vec->zvec_allow_log) {
+		char *s = tsd_get(zfs_allow_log_key);
+		if (s != NULL)
+			strfree(s);
+		(void) tsd_set(zfs_allow_log_key, strdup(saved_poolname));
 	}
 
 	kmem_free(zc, sizeof (zfs_cmd_t));
 	return (-error);
 }
 
 #ifdef CONFIG_COMPAT
 static long
 zfsdev_compat_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 {
         return zfsdev_ioctl(filp, cmd, arg);
 }
 #else
 #define zfsdev_compat_ioctl   NULL
 #endif
 
 static const struct file_operations zfsdev_fops = {
 	.open            = zfsdev_open,
 	.release         = zfsdev_release,
 	.unlocked_ioctl  = zfsdev_ioctl,
 	.compat_ioctl    = zfsdev_compat_ioctl,
 	.owner           = THIS_MODULE,
 };
 
 static struct miscdevice zfs_misc = {
 	.minor          = MISC_DYNAMIC_MINOR,
 	.name           = ZFS_DRIVER,
 	.fops           = &zfsdev_fops,
 };
 
 static int
 zfs_attach(void)
 {
 	int error;
 
 	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsdev_state_list, sizeof (zfsdev_state_t),
 	    offsetof(zfsdev_state_t, zs_next));
 
 	error = misc_register(&zfs_misc);
         if (error) {
 		printk(KERN_INFO "ZFS: misc_register() failed %d\n", error);
 		return (error);
 	}
 
 	return (0);
 }
 
 static void
 zfs_detach(void)
 {
 	int error;
 
 	error = misc_deregister(&zfs_misc);
 	if (error)
 		printk(KERN_INFO "ZFS: misc_deregister() failed %d\n", error);
 
 	mutex_destroy(&zfsdev_state_lock);
 	list_destroy(&zfsdev_state_list);
 }
 
-uint_t zfs_fsyncer_key;
-extern uint_t rrw_tsd_key;
+static void
+zfs_allow_log_destroy(void *arg)
+{
+	char *poolname = arg;
+	strfree(poolname);
+}
 
 #ifdef DEBUG
 #define ZFS_DEBUG_STR	" (DEBUG mode)"
 #else
 #define ZFS_DEBUG_STR	""
 #endif
 
 int
 _init(void)
 {
 	int error;
 
 	spa_init(FREAD | FWRITE);
 	zfs_init();
 
 	if ((error = zvol_init()) != 0)
 		goto out1;
 
+	zfs_ioctl_init();
+
 	if ((error = zfs_attach()) != 0)
 		goto out2;
 
 	tsd_create(&zfs_fsyncer_key, NULL);
-	tsd_create(&rrw_tsd_key, NULL);
+	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
+	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
 	printk(KERN_NOTICE "ZFS: Loaded module v%s-%s%s, "
 	       "ZFS pool version %s, ZFS filesystem version %s\n",
 	       ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR,
 	       SPA_VERSION_STRING, ZPL_VERSION_STRING);
 
 	return (0);
 
 out2:
 	(void) zvol_fini();
 out1:
 	zfs_fini();
 	spa_fini();
 	printk(KERN_NOTICE "ZFS: Failed to Load ZFS Filesystem v%s-%s%s"
 	       ", rc = %d\n", ZFS_META_VERSION, ZFS_META_RELEASE,
 	       ZFS_DEBUG_STR, error);
 
 	return (error);
 }
 
 int
 _fini(void)
 {
 	zfs_detach();
 	zvol_fini();
 	zfs_fini();
 	spa_fini();
 
 	tsd_destroy(&zfs_fsyncer_key);
 	tsd_destroy(&rrw_tsd_key);
+	tsd_destroy(&zfs_allow_log_key);
 
 	printk(KERN_NOTICE "ZFS: Unloaded module v%s-%s%s\n",
 	       ZFS_META_VERSION, ZFS_META_RELEASE, ZFS_DEBUG_STR);
 
 	return (0);
 }
 
 #ifdef HAVE_SPL
 spl_module_init(_init);
 spl_module_exit(_fini);
 
 MODULE_DESCRIPTION("ZFS");
 MODULE_AUTHOR(ZFS_META_AUTHOR);
 MODULE_LICENSE(ZFS_META_LICENSE);
 #endif /* HAVE_SPL */
diff --git a/module/zfs/zfs_vfsops.c b/module/zfs/zfs_vfsops.c
index 9ae7ab500942..8fee441b14e5 100644
--- a/module/zfs/zfs_vfsops.c
+++ b/module/zfs/zfs_vfsops.c
@@ -1,1627 +1,1627 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <sys/pathname.h>
 #include <sys/vnode.h>
 #include <sys/vfs.h>
 #include <sys/vfs_opreg.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
 #include <sys/cmn_err.h>
 #include "fs/fs_subr.h"
 #include <sys/zfs_znode.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_dir.h>
 #include <sys/zil.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/varargs.h>
 #include <sys/policy.h>
 #include <sys/atomic.h>
 #include <sys/mkdev.h>
 #include <sys/modctl.h>
 #include <sys/refstr.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/bootconf.h>
 #include <sys/sunddi.h>
 #include <sys/dnlc.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa_boot.h>
 #include <sys/zpl.h>
 #include "zfs_comutil.h"
 
 
 /*ARGSUSED*/
 int
 zfs_sync(struct super_block *sb, int wait, cred_t *cr)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 
 	/*
 	 * Data integrity is job one.  We don't want a compromised kernel
 	 * writing to the storage pool, so we never sync during panic.
 	 */
 	if (unlikely(oops_in_progress))
 		return (0);
 
 	/*
 	 * Semantically, the only requirement is that the sync be initiated.
 	 * The DMU syncs out txgs frequently, so there's nothing to do.
 	 */
 	if (!wait)
 		return (0);
 
 	if (zsb != NULL) {
 		/*
 		 * Sync a specific filesystem.
 		 */
 		dsl_pool_t *dp;
 
 		ZFS_ENTER(zsb);
 		dp = dmu_objset_pool(zsb->z_os);
 
 		/*
 		 * If the system is shutting down, then skip any
 		 * filesystems which may exist on a suspended pool.
 		 */
 		if (spa_suspended(dp->dp_spa)) {
 			ZFS_EXIT(zsb);
 			return (0);
 		}
 
 		if (zsb->z_log != NULL)
 			zil_commit(zsb->z_log, 0);
 
 		ZFS_EXIT(zsb);
 	} else {
 		/*
 		 * Sync all ZFS filesystems.  This is what happens when you
 		 * run sync(1M).  Unlike other filesystems, ZFS honors the
 		 * request by waiting for all pools to commit all dirty data.
 		 */
 		spa_sync_allpools();
 	}
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_sync);
 
 boolean_t
 zfs_is_readonly(zfs_sb_t *zsb)
 {
 	return (!!(zsb->z_sb->s_flags & MS_RDONLY));
 }
 EXPORT_SYMBOL(zfs_is_readonly);
 
 static void
 atime_changed_cb(void *arg, uint64_t newval)
 {
 	((zfs_sb_t *)arg)->z_atime = newval;
 }
 
 static void
 xattr_changed_cb(void *arg, uint64_t newval)
 {
 	zfs_sb_t *zsb = arg;
 
 	if (newval == ZFS_XATTR_OFF) {
 		zsb->z_flags &= ~ZSB_XATTR;
 	} else {
 		zsb->z_flags |= ZSB_XATTR;
 
 		if (newval == ZFS_XATTR_SA)
 			zsb->z_xattr_sa = B_TRUE;
 		else
 			zsb->z_xattr_sa = B_FALSE;
 	}
 }
 
 static void
 blksz_changed_cb(void *arg, uint64_t newval)
 {
 	zfs_sb_t *zsb = arg;
 
 	if (newval < SPA_MINBLOCKSIZE ||
 	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
 		newval = SPA_MAXBLOCKSIZE;
 
 	zsb->z_max_blksz = newval;
 }
 
 static void
 readonly_changed_cb(void *arg, uint64_t newval)
 {
 	zfs_sb_t *zsb = arg;
 	struct super_block *sb = zsb->z_sb;
 
 	if (sb == NULL)
 		return;
 
 	if (newval)
 		sb->s_flags |= MS_RDONLY;
 	else
 		sb->s_flags &= ~MS_RDONLY;
 }
 
 static void
 devices_changed_cb(void *arg, uint64_t newval)
 {
 }
 
 static void
 setuid_changed_cb(void *arg, uint64_t newval)
 {
 }
 
 static void
 exec_changed_cb(void *arg, uint64_t newval)
 {
 }
 
 static void
 nbmand_changed_cb(void *arg, uint64_t newval)
 {
 	zfs_sb_t *zsb = arg;
 	struct super_block *sb = zsb->z_sb;
 
 	if (sb == NULL)
 		return;
 
 	if (newval == TRUE)
 		sb->s_flags |= MS_MANDLOCK;
 	else
 		sb->s_flags &= ~MS_MANDLOCK;
 }
 
 static void
 snapdir_changed_cb(void *arg, uint64_t newval)
 {
 	((zfs_sb_t *)arg)->z_show_ctldir = newval;
 }
 
 static void
 vscan_changed_cb(void *arg, uint64_t newval)
 {
 	((zfs_sb_t *)arg)->z_vscan = newval;
 }
 
 static void
 acl_inherit_changed_cb(void *arg, uint64_t newval)
 {
 	((zfs_sb_t *)arg)->z_acl_inherit = newval;
 }
 
 int
 zfs_register_callbacks(zfs_sb_t *zsb)
 {
 	struct dsl_dataset *ds = NULL;
 	objset_t *os = zsb->z_os;
 	boolean_t do_readonly = B_FALSE;
 	int error = 0;
 
 	if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os)))
 		do_readonly = B_TRUE;
 
 	/*
 	 * Register property callbacks.
 	 *
 	 * It would probably be fine to just check for i/o error from
 	 * the first prop_register(), but I guess I like to go
 	 * overboard...
 	 */
 	ds = dmu_objset_ds(os);
 	error = dsl_prop_register(ds,
 	    "atime", atime_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "xattr", xattr_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "recordsize", blksz_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "readonly", readonly_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "devices", devices_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "setuid", setuid_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "exec", exec_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "snapdir", snapdir_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "aclinherit", acl_inherit_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "vscan", vscan_changed_cb, zsb);
 	error = error ? error : dsl_prop_register(ds,
 	    "nbmand", nbmand_changed_cb, zsb);
 	if (error)
 		goto unregister;
 
 	if (do_readonly)
 		readonly_changed_cb(zsb, B_TRUE);
 
 	return (0);
 
 unregister:
 	/*
 	 * We may attempt to unregister some callbacks that are not
 	 * registered, but this is OK; it will simply return ENOMSG,
 	 * which we will ignore.
 	 */
 	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
 	    zsb);
 	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zsb);
 	(void) dsl_prop_unregister(ds, "nbmand", nbmand_changed_cb, zsb);
 
 	return (error);
 }
 EXPORT_SYMBOL(zfs_register_callbacks);
 
 static int
 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
 {
 	int error = 0;
 
 	/*
 	 * Is it a valid type of object to track?
 	 */
 	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
 		return (ENOENT);
 
 	/*
 	 * If we have a NULL data pointer
 	 * then assume the id's aren't changing and
 	 * return EEXIST to the dmu to let it know to
 	 * use the same ids
 	 */
 	if (data == NULL)
 		return (EEXIST);
 
 	if (bonustype == DMU_OT_ZNODE) {
 		znode_phys_t *znp = data;
 		*userp = znp->zp_uid;
 		*groupp = znp->zp_gid;
 	} else {
 		int hdrsize;
 		sa_hdr_phys_t *sap = data;
 		sa_hdr_phys_t sa = *sap;
 		boolean_t swap = B_FALSE;
 
 		ASSERT(bonustype == DMU_OT_SA);
 
 		if (sa.sa_magic == 0) {
 			/*
 			 * This should only happen for newly created
 			 * files that haven't had the znode data filled
 			 * in yet.
 			 */
 			*userp = 0;
 			*groupp = 0;
 			return (0);
 		}
 		if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
 			sa.sa_magic = SA_MAGIC;
 			sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
 			swap = B_TRUE;
 		} else {
 			VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
 		}
 
 		hdrsize = sa_hdrsize(&sa);
 		VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
 		*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
 		    SA_UID_OFFSET));
 		*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
 		    SA_GID_OFFSET));
 		if (swap) {
 			*userp = BSWAP_64(*userp);
 			*groupp = BSWAP_64(*groupp);
 		}
 	}
 	return (error);
 }
 
 static void
 fuidstr_to_sid(zfs_sb_t *zsb, const char *fuidstr,
     char *domainbuf, int buflen, uid_t *ridp)
 {
 	uint64_t fuid;
 	const char *domain;
 
 	fuid = strtonum(fuidstr, NULL);
 
 	domain = zfs_fuid_find_by_idx(zsb, FUID_INDEX(fuid));
 	if (domain)
 		(void) strlcpy(domainbuf, domain, buflen);
 	else
 		domainbuf[0] = '\0';
 	*ridp = FUID_RID(fuid);
 }
 
 static uint64_t
 zfs_userquota_prop_to_obj(zfs_sb_t *zsb, zfs_userquota_prop_t type)
 {
 	switch (type) {
 	case ZFS_PROP_USERUSED:
 		return (DMU_USERUSED_OBJECT);
 	case ZFS_PROP_GROUPUSED:
 		return (DMU_GROUPUSED_OBJECT);
 	case ZFS_PROP_USERQUOTA:
 		return (zsb->z_userquota_obj);
 	case ZFS_PROP_GROUPQUOTA:
 		return (zsb->z_groupquota_obj);
 	default:
 		return (ENOTSUP);
 	}
 	return (0);
 }
 
 int
 zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type,
     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
 {
 	int error;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	zfs_useracct_t *buf = vbuf;
 	uint64_t obj;
 
 	if (!dmu_objset_userspace_present(zsb->z_os))
 		return (ENOTSUP);
 
 	obj = zfs_userquota_prop_to_obj(zsb, type);
 	if (obj == 0) {
 		*bufsizep = 0;
 		return (0);
 	}
 
 	for (zap_cursor_init_serialized(&zc, zsb->z_os, obj, *cookiep);
 	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
 		    *bufsizep)
 			break;
 
 		fuidstr_to_sid(zsb, za.za_name,
 		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
 
 		buf->zu_space = za.za_first_integer;
 		buf++;
 	}
 	if (error == ENOENT)
 		error = 0;
 
 	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
 	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
 	*cookiep = zap_cursor_serialize(&zc);
 	zap_cursor_fini(&zc);
 	return (error);
 }
 EXPORT_SYMBOL(zfs_userspace_many);
 
 /*
  * buf must be big enough (eg, 32 bytes)
  */
 static int
 id_to_fuidstr(zfs_sb_t *zsb, const char *domain, uid_t rid,
     char *buf, boolean_t addok)
 {
 	uint64_t fuid;
 	int domainid = 0;
 
 	if (domain && domain[0]) {
 		domainid = zfs_fuid_find_by_domain(zsb, domain, NULL, addok);
 		if (domainid == -1)
 			return (ENOENT);
 	}
 	fuid = FUID_ENCODE(domainid, rid);
 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
 	return (0);
 }
 
 int
 zfs_userspace_one(zfs_sb_t *zsb, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t *valp)
 {
 	char buf[32];
 	int err;
 	uint64_t obj;
 
 	*valp = 0;
 
 	if (!dmu_objset_userspace_present(zsb->z_os))
 		return (ENOTSUP);
 
 	obj = zfs_userquota_prop_to_obj(zsb, type);
 	if (obj == 0)
 		return (0);
 
 	err = id_to_fuidstr(zsb, domain, rid, buf, B_FALSE);
 	if (err)
 		return (err);
 
 	err = zap_lookup(zsb->z_os, obj, buf, 8, 1, valp);
 	if (err == ENOENT)
 		err = 0;
 	return (err);
 }
 EXPORT_SYMBOL(zfs_userspace_one);
 
 int
 zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t quota)
 {
 	char buf[32];
 	int err;
 	dmu_tx_t *tx;
 	uint64_t *objp;
 	boolean_t fuid_dirtied;
 
 	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
 		return (EINVAL);
 
 	if (zsb->z_version < ZPL_VERSION_USERSPACE)
 		return (ENOTSUP);
 
 	objp = (type == ZFS_PROP_USERQUOTA) ? &zsb->z_userquota_obj :
 	    &zsb->z_groupquota_obj;
 
 	err = id_to_fuidstr(zsb, domain, rid, buf, B_TRUE);
 	if (err)
 		return (err);
 	fuid_dirtied = zsb->z_fuid_dirty;
 
 	tx = dmu_tx_create(zsb->z_os);
 	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
 	if (*objp == 0) {
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 		    zfs_userquota_prop_prefixes[type]);
 	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zsb, tx);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 
 	mutex_enter(&zsb->z_lock);
 	if (*objp == 0) {
 		*objp = zap_create(zsb->z_os, DMU_OT_USERGROUP_QUOTA,
 		    DMU_OT_NONE, 0, tx);
 		VERIFY(0 == zap_add(zsb->z_os, MASTER_NODE_OBJ,
 		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
 	}
 	mutex_exit(&zsb->z_lock);
 
 	if (quota == 0) {
 		err = zap_remove(zsb->z_os, *objp, buf, tx);
 		if (err == ENOENT)
 			err = 0;
 	} else {
 		err = zap_update(zsb->z_os, *objp, buf, 8, 1, &quota, tx);
 	}
 	ASSERT(err == 0);
 	if (fuid_dirtied)
 		zfs_fuid_sync(zsb, tx);
 	dmu_tx_commit(tx);
 	return (err);
 }
 EXPORT_SYMBOL(zfs_set_userquota);
 
 boolean_t
 zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup, uint64_t fuid)
 {
 	char buf[32];
 	uint64_t used, quota, usedobj, quotaobj;
 	int err;
 
 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
 	quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
 
 	if (quotaobj == 0 || zsb->z_replay)
 		return (B_FALSE);
 
 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
 	err = zap_lookup(zsb->z_os, quotaobj, buf, 8, 1, &quota);
 	if (err != 0)
 		return (B_FALSE);
 
 	err = zap_lookup(zsb->z_os, usedobj, buf, 8, 1, &used);
 	if (err != 0)
 		return (B_FALSE);
 	return (used >= quota);
 }
 EXPORT_SYMBOL(zfs_fuid_overquota);
 
 boolean_t
 zfs_owner_overquota(zfs_sb_t *zsb, znode_t *zp, boolean_t isgroup)
 {
 	uint64_t fuid;
 	uint64_t quotaobj;
 
 	quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
 
 	fuid = isgroup ? zp->z_gid : zp->z_uid;
 
 	if (quotaobj == 0 || zsb->z_replay)
 		return (B_FALSE);
 
 	return (zfs_fuid_overquota(zsb, isgroup, fuid));
 }
 EXPORT_SYMBOL(zfs_owner_overquota);
 
 int
 zfs_sb_create(const char *osname, zfs_sb_t **zsbp)
 {
 	objset_t *os;
 	zfs_sb_t *zsb;
 	uint64_t zval;
 	int i, error;
 	uint64_t sa_obj;
 
 	zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP | KM_NODEBUG);
 
 	/*
 	 * We claim to always be readonly so we can open snapshots;
 	 * other ZPL code will prevent us from writing to snapshots.
 	 */
 	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zsb, &os);
 	if (error) {
 		kmem_free(zsb, sizeof (zfs_sb_t));
 		return (error);
 	}
 
 	/*
 	 * Initialize the zfs-specific filesystem structure.
 	 * Should probably make this a kmem cache, shuffle fields,
 	 * and just bzero up to z_hold_mtx[].
 	 */
 	zsb->z_sb = NULL;
 	zsb->z_parent = zsb;
 	zsb->z_max_blksz = SPA_MAXBLOCKSIZE;
 	zsb->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
 	zsb->z_os = os;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zsb->z_version);
 	if (error) {
 		goto out;
 	} else if (zsb->z_version >
 	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
 		(void) printk("Can't mount a version %lld file system "
 		    "on a version %lld pool\n. Pool must be upgraded to mount "
 		    "this file system.", (u_longlong_t)zsb->z_version,
 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
 		error = ENOTSUP;
 		goto out;
 	}
 	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
 		goto out;
 	zsb->z_norm = (int)zval;
 
 	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
 		goto out;
 	zsb->z_utf8 = (zval != 0);
 
 	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
 		goto out;
 	zsb->z_case = (uint_t)zval;
 
 	/*
 	 * Fold case on file systems that are always or sometimes case
 	 * insensitive.
 	 */
 	if (zsb->z_case == ZFS_CASE_INSENSITIVE ||
 	    zsb->z_case == ZFS_CASE_MIXED)
 		zsb->z_norm |= U8_TEXTPREP_TOUPPER;
 
 	zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os);
 	zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os);
 
 	if (zsb->z_use_sa) {
 		/* should either have both of these objects or none */
 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
 		    &sa_obj);
 		if (error)
 			goto out;
 
 		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &zval);
 		if ((error == 0) && (zval == ZFS_XATTR_SA))
 			zsb->z_xattr_sa = B_TRUE;
 	} else {
 		/*
 		 * Pre SA versions file systems should never touch
 		 * either the attribute registration or layout objects.
 		 */
 		sa_obj = 0;
 	}
 
 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
 	    &zsb->z_attr_table);
 	if (error)
 		goto out;
 
 	if (zsb->z_version >= ZPL_VERSION_SA)
 		sa_register_update_callback(os, zfs_sa_upgrade);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
 	    &zsb->z_root);
 	if (error)
 		goto out;
 	ASSERT(zsb->z_root != 0);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
 	    &zsb->z_unlinkedobj);
 	if (error)
 		goto out;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
 	    8, 1, &zsb->z_userquota_obj);
 	if (error && error != ENOENT)
 		goto out;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
 	    8, 1, &zsb->z_groupquota_obj);
 	if (error && error != ENOENT)
 		goto out;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
 	    &zsb->z_fuid_obj);
 	if (error && error != ENOENT)
 		goto out;
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
 	    &zsb->z_shares_dir);
 	if (error && error != ENOENT)
 		goto out;
 
 	mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&zsb->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zsb->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 	rrw_init(&zsb->z_teardown_lock);
 	rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
 	rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
 	avl_create(&zsb->z_ctldir_snaps, snapentry_compare,
 	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
 	mutex_init(&zsb->z_ctldir_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	*zsbp = zsb;
 	return (0);
 
 out:
 	dmu_objset_disown(os, zsb);
 	*zsbp = NULL;
 	kmem_free(zsb, sizeof (zfs_sb_t));
 	return (error);
 }
 EXPORT_SYMBOL(zfs_sb_create);
 
 int
 zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting)
 {
 	int error;
 
 	error = zfs_register_callbacks(zsb);
 	if (error)
 		return (error);
 
 	/*
 	 * Set the objset user_ptr to track its zsb.
 	 */
 	mutex_enter(&zsb->z_os->os_user_ptr_lock);
 	dmu_objset_set_user(zsb->z_os, zsb);
 	mutex_exit(&zsb->z_os->os_user_ptr_lock);
 
 	zsb->z_log = zil_open(zsb->z_os, zfs_get_data);
 
 	/*
 	 * If we are not mounting (ie: online recv), then we don't
 	 * have to worry about replaying the log as we blocked all
 	 * operations out since we closed the ZIL.
 	 */
 	if (mounting) {
 		boolean_t readonly;
 
 		/*
 		 * During replay we remove the read only flag to
 		 * allow replays to succeed.
 		 */
 		readonly = zfs_is_readonly(zsb);
 		if (readonly != 0)
 			readonly_changed_cb(zsb, B_FALSE);
 		else
 			zfs_unlinked_drain(zsb);
 
 		/*
 		 * Parse and replay the intent log.
 		 *
 		 * Because of ziltest, this must be done after
 		 * zfs_unlinked_drain().  (Further note: ziltest
 		 * doesn't use readonly mounts, where
 		 * zfs_unlinked_drain() isn't called.)  This is because
 		 * ziltest causes spa_sync() to think it's committed,
 		 * but actually it is not, so the intent log contains
 		 * many txg's worth of changes.
 		 *
 		 * In particular, if object N is in the unlinked set in
 		 * the last txg to actually sync, then it could be
 		 * actually freed in a later txg and then reallocated
 		 * in a yet later txg.  This would write a "create
 		 * object N" record to the intent log.  Normally, this
 		 * would be fine because the spa_sync() would have
 		 * written out the fact that object N is free, before
 		 * we could write the "create object N" intent log
 		 * record.
 		 *
 		 * But when we are in ziltest mode, we advance the "open
 		 * txg" without actually spa_sync()-ing the changes to
 		 * disk.  So we would see that object N is still
 		 * allocated and in the unlinked set, and there is an
 		 * intent log record saying to allocate it.
 		 */
 		if (spa_writeable(dmu_objset_spa(zsb->z_os))) {
 			if (zil_replay_disable) {
 				zil_destroy(zsb->z_log, B_FALSE);
 			} else {
 				zsb->z_replay = B_TRUE;
 				zil_replay(zsb->z_os, zsb,
 				    zfs_replay_vector);
 				zsb->z_replay = B_FALSE;
 			}
 		}
 
 		/* restore readonly bit */
 		if (readonly != 0)
 			readonly_changed_cb(zsb, B_TRUE);
 	}
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_sb_setup);
 
 void
 zfs_sb_free(zfs_sb_t *zsb)
 {
 	int i;
 
 	zfs_fuid_destroy(zsb);
 
 	mutex_destroy(&zsb->z_znodes_lock);
 	mutex_destroy(&zsb->z_lock);
 	list_destroy(&zsb->z_all_znodes);
 	rrw_destroy(&zsb->z_teardown_lock);
 	rw_destroy(&zsb->z_teardown_inactive_lock);
 	rw_destroy(&zsb->z_fuid_lock);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zsb->z_hold_mtx[i]);
 	mutex_destroy(&zsb->z_ctldir_lock);
 	avl_destroy(&zsb->z_ctldir_snaps);
 	kmem_free(zsb, sizeof (zfs_sb_t));
 }
 EXPORT_SYMBOL(zfs_sb_free);
 
 static void
 zfs_set_fuid_feature(zfs_sb_t *zsb)
 {
 	zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os);
 	zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os);
 }
 
 void
 zfs_unregister_callbacks(zfs_sb_t *zsb)
 {
 	objset_t *os = zsb->z_os;
 	struct dsl_dataset *ds;
 
 	/*
 	 * Unregister properties.
 	 */
 	if (!dmu_objset_is_snapshot(os)) {
 		ds = dmu_objset_ds(os);
 		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
 		    zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "aclinherit",
 		    acl_inherit_changed_cb, zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "vscan",
 		    vscan_changed_cb, zsb) == 0);
 
 		VERIFY(dsl_prop_unregister(ds, "nbmand",
 		    nbmand_changed_cb, zsb) == 0);
 	}
 }
 EXPORT_SYMBOL(zfs_unregister_callbacks);
 
 #ifdef HAVE_MLSLABEL
 /*
  * zfs_check_global_label:
  *	Check that the hex label string is appropriate for the dataset
  *	being mounted into the global_zone proper.
  *
  *	Return an error if the hex label string is not default or
  *	admin_low/admin_high.  For admin_low labels, the corresponding
  *	dataset must be readonly.
  */
 int
 zfs_check_global_label(const char *dsname, const char *hexsl)
 {
 	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
 		return (0);
 	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
 		return (0);
 	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
 		/* must be readonly */
 		uint64_t rdonly;
 
 		if (dsl_prop_get_integer(dsname,
 		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
 			return (EACCES);
 		return (rdonly ? 0 : EACCES);
 	}
 	return (EACCES);
 }
 EXPORT_SYMBOL(zfs_check_global_label);
 #endif /* HAVE_MLSLABEL */
 
 int
 zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
 {
 	zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
 	uint64_t fsid;
 	uint32_t bshift;
 
 	ZFS_ENTER(zsb);
 
 	dmu_objset_space(zsb->z_os,
 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
 
 	fsid = dmu_objset_fsid_guid(zsb->z_os);
 	/*
 	 * The underlying storage pool actually uses multiple block
 	 * size.  Under Solaris frsize (fragment size) is reported as
 	 * the smallest block size we support, and bsize (block size)
 	 * as the filesystem's maximum block size.  Unfortunately,
 	 * under Linux the fragment size and block size are often used
 	 * interchangeably.  Thus we are forced to report both of them
 	 * as the filesystem's maximum block size.
 	 */
 	statp->f_frsize = zsb->z_max_blksz;
 	statp->f_bsize = zsb->z_max_blksz;
 	bshift = fls(statp->f_bsize) - 1;
 
 	/*
 	 * The following report "total" blocks of various kinds in
 	 * the file system, but reported in terms of f_bsize - the
 	 * "preferred" size.
 	 */
 
 	statp->f_blocks = (refdbytes + availbytes) >> bshift;
 	statp->f_bfree = availbytes >> bshift;
 	statp->f_bavail = statp->f_bfree; /* no root reservation */
 
 	/*
 	 * statvfs() should really be called statufs(), because it assumes
 	 * static metadata.  ZFS doesn't preallocate files, so the best
 	 * we can do is report the max that could possibly fit in f_files,
 	 * and that minus the number actually used in f_ffree.
 	 * For f_ffree, report the smaller of the number of object available
 	 * and the number of blocks (each object will take at least a block).
 	 */
 	statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT);
 	statp->f_files = statp->f_ffree + usedobjs;
 	statp->f_fsid.val[0] = (uint32_t)fsid;
 	statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
 	statp->f_type = ZFS_SUPER_MAGIC;
 	statp->f_namelen = ZFS_MAXNAMELEN;
 
 	/*
 	 * We have all of 40 characters to stuff a string here.
 	 * Is there anything useful we could/should provide?
 	 */
 	bzero(statp->f_spare, sizeof (statp->f_spare));
 
 	ZFS_EXIT(zsb);
 	return (0);
 }
 EXPORT_SYMBOL(zfs_statvfs);
 
 int
 zfs_root(zfs_sb_t *zsb, struct inode **ipp)
 {
 	znode_t *rootzp;
 	int error;
 
 	ZFS_ENTER(zsb);
 
 	error = zfs_zget(zsb, zsb->z_root, &rootzp);
 	if (error == 0)
 		*ipp = ZTOI(rootzp);
 
 	ZFS_EXIT(zsb);
 	return (error);
 }
 EXPORT_SYMBOL(zfs_root);
 
 #ifdef HAVE_SHRINK
 int
 zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 	struct shrinker *shrinker = &sb->s_shrink;
 	struct shrink_control sc = {
 		.nr_to_scan = nr_to_scan,
 		.gfp_mask = GFP_KERNEL,
 	};
 
 	ZFS_ENTER(zsb);
 	*objects = (*shrinker->shrink)(shrinker, &sc);
 	ZFS_EXIT(zsb);
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_sb_prune);
 #endif /* HAVE_SHRINK */
 
 /*
  * Teardown the zfs_sb_t.
  *
  * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
  * and 'z_teardown_inactive_lock' held.
  */
 int
 zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
 {
 	znode_t	*zp;
 
 	rrw_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG);
 
 	if (!unmounting) {
 		/*
 		 * We purge the parent filesystem's super block as the
 		 * parent filesystem and all of its snapshots have their
 		 * inode's super block set to the parent's filesystem's
 		 * super block.  Note,  'z_parent' is self referential
 		 * for non-snapshots.
 		 */
 		shrink_dcache_sb(zsb->z_parent->z_sb);
 	}
 
 	/*
 	 * If someone has not already unmounted this file system,
 	 * drain the iput_taskq to ensure all active references to the
 	 * zfs_sb_t have been handled only then can it be safely destroyed.
 	 */
 	if (zsb->z_os)
 		taskq_wait(dsl_pool_iput_taskq(dmu_objset_pool(zsb->z_os)));
 
 	/*
 	 * Close the zil. NB: Can't close the zil while zfs_inactive
 	 * threads are blocked as zil_close can call zfs_inactive.
 	 */
 	if (zsb->z_log) {
 		zil_close(zsb->z_log);
 		zsb->z_log = NULL;
 	}
 
 	rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER);
 
 	/*
 	 * If we are not unmounting (ie: online recv) and someone already
 	 * unmounted this file system while we were doing the switcheroo,
 	 * or a reopen of z_os failed then just bail out now.
 	 */
 	if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) {
 		rw_exit(&zsb->z_teardown_inactive_lock);
 		rrw_exit(&zsb->z_teardown_lock, FTAG);
 		return (EIO);
 	}
 
 	/*
 	 * At this point there are no VFS ops active, and any new VFS ops
 	 * will fail with EIO since we have z_teardown_lock for writer (only
 	 * relevant for forced unmount).
 	 *
 	 * Release all holds on dbufs.
 	 */
 	mutex_enter(&zsb->z_znodes_lock);
 	for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
 	    zp = list_next(&zsb->z_all_znodes, zp)) {
 		if (zp->z_sa_hdl) {
 			ASSERT(atomic_read(&ZTOI(zp)->i_count) > 0);
 			zfs_znode_dmu_fini(zp);
 		}
 	}
 	mutex_exit(&zsb->z_znodes_lock);
 
 	/*
 	 * If we are unmounting, set the unmounted flag and let new VFS ops
 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
 	 * other VFS ops will fail with EIO.
 	 */
 	if (unmounting) {
 		zsb->z_unmounted = B_TRUE;
 		rrw_exit(&zsb->z_teardown_lock, FTAG);
 		rw_exit(&zsb->z_teardown_inactive_lock);
 	}
 
 	/*
 	 * z_os will be NULL if there was an error in attempting to reopen
 	 * zsb, so just return as the properties had already been
 	 *
 	 * unregistered and cached data had been evicted before.
 	 */
 	if (zsb->z_os == NULL)
 		return (0);
 
 	/*
 	 * Unregister properties.
 	 */
 	zfs_unregister_callbacks(zsb);
 
 	/*
 	 * Evict cached data
 	 */
 	if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) &&
 	    !zfs_is_readonly(zsb))
 		txg_wait_synced(dmu_objset_pool(zsb->z_os), 0);
 	(void) dmu_objset_evict_dbufs(zsb->z_os);
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_sb_teardown);
 
 #if defined(HAVE_BDI) && !defined(HAVE_BDI_SETUP_AND_REGISTER)
 atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
 #endif /* HAVE_BDI && !HAVE_BDI_SETUP_AND_REGISTER */
 
 int
 zfs_domount(struct super_block *sb, void *data, int silent)
 {
 	zpl_mount_data_t *zmd = data;
 	const char *osname = zmd->z_osname;
 	zfs_sb_t *zsb;
 	struct inode *root_inode;
 	uint64_t recordsize;
 	int error;
 
 	error = zfs_sb_create(osname, &zsb);
 	if (error)
 		return (error);
 
 	if ((error = dsl_prop_get_integer(osname, "recordsize",
 	    &recordsize, NULL)))
 		goto out;
 
 	zsb->z_sb = sb;
 	sb->s_fs_info = zsb;
 	sb->s_magic = ZFS_SUPER_MAGIC;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 	sb->s_time_gran = 1;
 	sb->s_blocksize = recordsize;
 	sb->s_blocksize_bits = ilog2(recordsize);
 
 #ifdef HAVE_BDI
 	/*
 	 * 2.6.32 API change,
 	 * Added backing_device_info (BDI) per super block interfaces.  A BDI
 	 * must be configured when using a non-device backed filesystem for
 	 * proper writeback.  This is not required for older pdflush kernels.
 	 *
 	 * NOTE: Linux read-ahead is disabled in favor of zfs read-ahead.
 	 */
 	zsb->z_bdi.ra_pages = 0;
 	sb->s_bdi = &zsb->z_bdi;
 
 	error = -bdi_setup_and_register(&zsb->z_bdi, "zfs", BDI_CAP_MAP_COPY);
 	if (error)
 		goto out;
 #endif /* HAVE_BDI */
 
 	/* Set callback operations for the file system. */
 	sb->s_op = &zpl_super_operations;
 	sb->s_xattr = zpl_xattr_handlers;
 	sb->s_export_op = &zpl_export_operations;
 #ifdef HAVE_S_D_OP
 	sb->s_d_op = &zpl_dentry_operations;
 #endif /* HAVE_S_D_OP */
 
 	/* Set features for file system. */
 	zfs_set_fuid_feature(zsb);
 
 	if (dmu_objset_is_snapshot(zsb->z_os)) {
 		uint64_t pval;
 
 		atime_changed_cb(zsb, B_FALSE);
 		readonly_changed_cb(zsb, B_TRUE);
 		if ((error = dsl_prop_get_integer(osname,"xattr",&pval,NULL)))
 			goto out;
 		xattr_changed_cb(zsb, pval);
 		zsb->z_issnap = B_TRUE;
 		zsb->z_os->os_sync = ZFS_SYNC_DISABLED;
 
 		mutex_enter(&zsb->z_os->os_user_ptr_lock);
 		dmu_objset_set_user(zsb->z_os, zsb);
 		mutex_exit(&zsb->z_os->os_user_ptr_lock);
 	} else {
 		error = zfs_sb_setup(zsb, B_TRUE);
 	}
 
 	/* Allocate a root inode for the filesystem. */
 	error = zfs_root(zsb, &root_inode);
 	if (error) {
 		(void) zfs_umount(sb);
 		goto out;
 	}
 
 	/* Allocate a root dentry for the filesystem */
 	sb->s_root = d_make_root(root_inode);
 	if (sb->s_root == NULL) {
 		(void) zfs_umount(sb);
 		error = ENOMEM;
 		goto out;
 	}
 
 	if (!zsb->z_issnap)
 		zfsctl_create(zsb);
 out:
 	if (error) {
 		dmu_objset_disown(zsb->z_os, zsb);
 		zfs_sb_free(zsb);
 	}
 
 	return (error);
 }
 EXPORT_SYMBOL(zfs_domount);
 
 /*
  * Called when an unmount is requested and certain sanity checks have
  * already passed.  At this point no dentries or inodes have been reclaimed
  * from their respective caches.  We drop the extra reference on the .zfs
  * control directory to allow everything to be reclaimed.  All snapshots
  * must already have been unmounted to reach this point.
  */
 void
 zfs_preumount(struct super_block *sb)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 
 	if (zsb != NULL && zsb->z_ctldir != NULL)
 		zfsctl_destroy(zsb);
 }
 EXPORT_SYMBOL(zfs_preumount);
 
 /*
  * Called once all other unmount released tear down has occurred.
  * It is our responsibility to release any remaining infrastructure.
  */
 /*ARGSUSED*/
 int
 zfs_umount(struct super_block *sb)
 {
 	zfs_sb_t *zsb = sb->s_fs_info;
 	objset_t *os;
 
 	VERIFY(zfs_sb_teardown(zsb, B_TRUE) == 0);
 	os = zsb->z_os;
 
 #ifdef HAVE_BDI
 	bdi_destroy(sb->s_bdi);
 #endif /* HAVE_BDI */
 
 	/*
 	 * z_os will be NULL if there was an error in
 	 * attempting to reopen zsb.
 	 */
 	if (os != NULL) {
 		/*
 		 * Unset the objset user_ptr.
 		 */
 		mutex_enter(&os->os_user_ptr_lock);
 		dmu_objset_set_user(os, NULL);
 		mutex_exit(&os->os_user_ptr_lock);
 
 		/*
 		 * Finally release the objset
 		 */
 		dmu_objset_disown(os, zsb);
 	}
 
 	zfs_sb_free(zsb);
 	return (0);
 }
 EXPORT_SYMBOL(zfs_umount);
 
 int
 zfs_remount(struct super_block *sb, int *flags, char *data)
 {
 	/*
 	 * All namespace flags (MNT_*) and super block flags (MS_*) will
 	 * be handled by the Linux VFS.  Only handle custom options here.
 	 */
 	return (0);
 }
 EXPORT_SYMBOL(zfs_remount);
 
 int
 zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 {
 	zfs_sb_t	*zsb = sb->s_fs_info;
 	znode_t		*zp;
 	uint64_t	object = 0;
 	uint64_t	fid_gen = 0;
 	uint64_t	gen_mask;
 	uint64_t	zp_gen;
 	int		i, err;
 
 	*ipp = NULL;
 
 	ZFS_ENTER(zsb);
 
 	if (fidp->fid_len == LONG_FID_LEN) {
 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
 		uint64_t	objsetid = 0;
 		uint64_t	setgen = 0;
 
 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
 
 		ZFS_EXIT(zsb);
 
 		err = zfsctl_lookup_objset(sb, objsetid, &zsb);
 		if (err)
 			return (EINVAL);
 
 		ZFS_ENTER(zsb);
 	}
 
 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
 
 		for (i = 0; i < sizeof (zfid->zf_object); i++)
 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
 	} else {
 		ZFS_EXIT(zsb);
 		return (EINVAL);
 	}
 
 	/* A zero fid_gen means we are in the .zfs control directories */
 	if (fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
 		*ipp = zsb->z_ctldir;
 		ASSERT(*ipp != NULL);
 		if (object == ZFSCTL_INO_SNAPDIR) {
 			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
 			    0, kcred, NULL, NULL) == 0);
 		} else {
 			igrab(*ipp);
 		}
 		ZFS_EXIT(zsb);
 		return (0);
 	}
 
 	gen_mask = -1ULL >> (64 - 8 * i);
 
 	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
 	if ((err = zfs_zget(zsb, object, &zp))) {
 		ZFS_EXIT(zsb);
 		return (err);
 	}
 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &zp_gen,
 	    sizeof (uint64_t));
 	zp_gen = zp_gen & gen_mask;
 	if (zp_gen == 0)
 		zp_gen = 1;
 	if (zp->z_unlinked || zp_gen != fid_gen) {
 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
 		iput(ZTOI(zp));
 		ZFS_EXIT(zsb);
 		return (EINVAL);
 	}
 
 	*ipp = ZTOI(zp);
 	if (*ipp)
 		zfs_inode_update(ITOZ(*ipp));
 
 	ZFS_EXIT(zsb);
 	return (0);
 }
 EXPORT_SYMBOL(zfs_vget);
 
 /*
  * Block out VFS ops and close zfs_sb_t
  *
  * Note, if successful, then we return with the 'z_teardown_lock' and
  * 'z_teardown_inactive_lock' write held.
  */
 int
 zfs_suspend_fs(zfs_sb_t *zsb)
 {
 	int error;
 
 	if ((error = zfs_sb_teardown(zsb, B_FALSE)) != 0)
 		return (error);
 
 	dmu_objset_disown(zsb->z_os, zsb);
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_suspend_fs);
 
 /*
  * Reopen zfs_sb_t and release VFS ops.
  */
 int
 zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
 {
 	int err, err2;
 
 	ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock));
 	ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock));
 
 	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os);
 	if (err) {
 		zsb->z_os = NULL;
 	} else {
 		znode_t *zp;
 		uint64_t sa_obj = 0;
 
 		err2 = zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
 		    ZFS_SA_ATTRS, 8, 1, &sa_obj);
 
 		if ((err || err2) && zsb->z_version >= ZPL_VERSION_SA)
 			goto bail;
 
 
 		if ((err = sa_setup(zsb->z_os, sa_obj,
 		    zfs_attr_table,  ZPL_END, &zsb->z_attr_table)) != 0)
 			goto bail;
 
 		VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
 		zsb->z_rollback_time = jiffies;
 
 		/*
 		 * Attempt to re-establish all the active inodes with their
 		 * dbufs.  If a zfs_rezget() fails, then we unhash the inode
 		 * and mark it stale.  This prevents a collision if a new
 		 * inode/object is created which must use the same inode
 		 * number.  The stale inode will be be released when the
 		 * VFS prunes the dentry holding the remaining references
 		 * on the stale inode.
 		 */
 		mutex_enter(&zsb->z_znodes_lock);
 		for (zp = list_head(&zsb->z_all_znodes); zp;
 		    zp = list_next(&zsb->z_all_znodes, zp)) {
 			err2 = zfs_rezget(zp);
 			if (err2) {
 				remove_inode_hash(ZTOI(zp));
 				zp->z_is_stale = B_TRUE;
 			}
 		}
 		mutex_exit(&zsb->z_znodes_lock);
 	}
 
 bail:
 	/* release the VFS ops */
 	rw_exit(&zsb->z_teardown_inactive_lock);
 	rrw_exit(&zsb->z_teardown_lock, FTAG);
 
 	if (err) {
 		/*
 		 * Since we couldn't reopen zfs_sb_t or, setup the
 		 * sa framework, force unmount this file system.
 		 */
 		if (zsb->z_os)
 			(void) zfs_umount(zsb->z_sb);
 	}
 	return (err);
 }
 EXPORT_SYMBOL(zfs_resume_fs);
 
 int
 zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
 {
 	int error;
 	objset_t *os = zsb->z_os;
 	dmu_tx_t *tx;
 
 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
 		return (EINVAL);
 
 	if (newvers < zsb->z_version)
 		return (EINVAL);
 
 	if (zfs_spa_version_map(newvers) >
 	    spa_version(dmu_objset_spa(zsb->z_os)))
 		return (ENOTSUP);
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
 	if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 		    ZFS_SA_ATTRS);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	}
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		return (error);
 	}
 
 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 	    8, 1, &newvers, tx);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		return (error);
 	}
 
 	if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
 		uint64_t sa_obj;
 
 		ASSERT3U(spa_version(dmu_objset_spa(zsb->z_os)), >=,
 		    SPA_VERSION_SA);
 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
 		    DMU_OT_NONE, 0, tx);
 
 		error = zap_add(os, MASTER_NODE_OBJ,
 		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
 		ASSERT0(error);
 
 		VERIFY(0 == sa_set_sa_object(os, sa_obj));
 		sa_register_update_callback(os, zfs_sa_upgrade);
 	}
 
-	spa_history_log_internal(LOG_DS_UPGRADE,
-	    dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
-	    zsb->z_version, newvers, dmu_objset_id(os));
+	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
+	    "from %llu to %llu", zsb->z_version, newvers);
 
 	dmu_tx_commit(tx);
 
 	zsb->z_version = newvers;
 
 	if (zsb->z_version >= ZPL_VERSION_FUID)
 		zfs_set_fuid_feature(zsb);
 
 	return (0);
 }
 EXPORT_SYMBOL(zfs_set_version);
 
 /*
  * Read a property stored within the master node.
  */
 int
 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
 {
 	const char *pname;
 	int error = ENOENT;
 
 	/*
 	 * Look up the file system's value for the property.  For the
 	 * version property, we look up a slightly different string.
 	 */
 	if (prop == ZFS_PROP_VERSION)
 		pname = ZPL_VERSION_STR;
 	else
 		pname = zfs_prop_to_name(prop);
 
 	if (os != NULL)
 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
 
 	if (error == ENOENT) {
 		/* No value set, use the default value */
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 			*value = ZPL_VERSION;
 			break;
 		case ZFS_PROP_NORMALIZE:
 		case ZFS_PROP_UTF8ONLY:
 			*value = 0;
 			break;
 		case ZFS_PROP_CASE:
 			*value = ZFS_CASE_SENSITIVE;
 			break;
 		default:
 			return (error);
 		}
 		error = 0;
 	}
 	return (error);
 }
 EXPORT_SYMBOL(zfs_get_zplprop);
 
 void
 zfs_init(void)
 {
 	zfsctl_init();
 	zfs_znode_init();
 	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
 	register_filesystem(&zpl_fs_type);
 	(void) arc_add_prune_callback(zpl_prune_sbs, NULL);
 }
 
 void
 zfs_fini(void)
 {
 	unregister_filesystem(&zpl_fs_type);
 	zfs_znode_fini();
 	zfsctl_fini();
 }