diff --git a/sys/contrib/openzfs/.github/ISSUE_TEMPLATE/bug_report.md b/sys/contrib/openzfs/.github/ISSUE_TEMPLATE/bug_report.md
index 1dbb5f6edb55..92d0e03a9b9c 100644
--- a/sys/contrib/openzfs/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/sys/contrib/openzfs/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,53 +1,55 @@
 ---
 name: Bug report
 about: Create a report to help us improve OpenZFS
 title: ''
-labels: 'Type: Defect, Status: Triage Needed'
+labels: 'Type: Defect'
 assignees: ''
 
 ---
 
 <!-- Please fill out the following template, which will help other contributors address your issue. -->
 
 <!--
 Thank you for reporting an issue.
 
 *IMPORTANT* - Please check our issue tracker before opening a new issue.
 Additional valuable information can be found in the OpenZFS documentation
 and mailing list archives.
 
 Please fill in as much of the template as possible.
 -->
 
 ### System information
 <!--  add version after "|" character -->
 Type | Version/Name
  --- | ---
 Distribution Name	|
 Distribution Version	|
-Linux Kernel	|
+Kernel Version	|
 Architecture	|
-ZFS Version	|
-SPL Version	|
+OpenZFS Version	|
 <!--
-Commands to find ZFS/SPL versions:
-modinfo zfs | grep -iw version
-modinfo spl | grep -iw version
+Command to find OpenZFS version:
+zfs version
+
+Commands to find kernel version:
+uname -r		# Linux
+freebsd-version -r	# FreeBSD
 -->
 
 ### Describe the problem you're observing
 
 ### Describe how to reproduce the problem
 
 ### Include any warning/errors/backtraces from the system logs
 <!--
 *IMPORTANT* - Please mark logs and text output from terminal commands
 or else Github will not display them correctly.
 An example is provided below.
 
 Example:
 ```
 this is an example how log text should be marked (wrap it with ```)
 ```
 -->
 
diff --git a/sys/contrib/openzfs/include/sys/dbuf.h b/sys/contrib/openzfs/include/sys/dbuf.h
index 6ae079c6a64b..d2c175af649c 100644
--- a/sys/contrib/openzfs/include/sys/dbuf.h
+++ b/sys/contrib/openzfs/include/sys/dbuf.h
@@ -1,503 +1,503 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
 #ifndef	_SYS_DBUF_H
 #define	_SYS_DBUF_H
 
 #include <sys/dmu.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/zio.h>
 #include <sys/arc.h>
 #include <sys/zfs_context.h>
 #include <sys/zfs_refcount.h>
 #include <sys/zrlock.h>
 #include <sys/multilist.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #define	IN_DMU_SYNC 2
 
 /*
  * define flags for dbuf_read
  */
 
 #define	DB_RF_MUST_SUCCEED	(1 << 0)
 #define	DB_RF_CANFAIL		(1 << 1)
 #define	DB_RF_HAVESTRUCT	(1 << 2)
 #define	DB_RF_NOPREFETCH	(1 << 3)
 #define	DB_RF_NEVERWAIT		(1 << 4)
 #define	DB_RF_CACHED		(1 << 5)
 #define	DB_RF_NO_DECRYPT	(1 << 6)
 
 /*
  * The simplified state transition diagram for dbufs looks like:
  *
  *		+----> READ ----+
  *		|		|
  *		|		V
  *  (alloc)-->UNCACHED	     CACHED-->EVICTING-->(free)
  *		|		^	 ^
  *		|		|	 |
  *		+----> FILL ----+	 |
  *		|			 |
  *		|			 |
  *		+--------> NOFILL -------+
  *
  * DB_SEARCH is an invalid state for a dbuf. It is used by dbuf_free_range
  * to find all dbufs in a range of a dnode and must be less than any other
  * dbuf_states_t (see comment on dn_dbufs in dnode.h).
  */
 typedef enum dbuf_states {
 	DB_SEARCH = -1,
 	DB_UNCACHED,
 	DB_FILL,
 	DB_NOFILL,
 	DB_READ,
 	DB_CACHED,
 	DB_EVICTING
 } dbuf_states_t;
 
 typedef enum dbuf_cached_state {
 	DB_NO_CACHE = -1,
 	DB_DBUF_CACHE,
 	DB_DBUF_METADATA_CACHE,
 	DB_CACHE_MAX
 } dbuf_cached_state_t;
 
 struct dnode;
 struct dmu_tx;
 
 /*
  * level = 0 means the user data
  * level = 1 means the single indirect block
  * etc.
  */
 
 struct dmu_buf_impl;
 
 typedef enum override_states {
 	DR_NOT_OVERRIDDEN,
 	DR_IN_DMU_SYNC,
 	DR_OVERRIDDEN
 } override_states_t;
 
 typedef enum db_lock_type {
 	DLT_NONE,
 	DLT_PARENT,
 	DLT_OBJSET
 } db_lock_type_t;
 
 typedef struct dbuf_dirty_record {
 	/* link on our parents dirty list */
 	list_node_t dr_dirty_node;
 
 	/* transaction group this data will sync in */
 	uint64_t dr_txg;
 
 	/* zio of outstanding write IO */
 	zio_t *dr_zio;
 
 	/* pointer back to our dbuf */
 	struct dmu_buf_impl *dr_dbuf;
 
 	/* list link for dbuf dirty records */
 	list_node_t dr_dbuf_node;
 
 	/*
 	 * The dnode we are part of.  Note that the dnode can not be moved or
 	 * evicted due to the hold that's added by dnode_setdirty() or
 	 * dmu_objset_sync_dnodes(), and released by dnode_rele_task() or
 	 * userquota_updates_task().  This hold is necessary for
 	 * dirty_lightweight_leaf-type dirty records, which don't have a hold
 	 * on a dbuf.
 	 */
 	dnode_t *dr_dnode;
 
 	/* pointer to parent dirty record */
 	struct dbuf_dirty_record *dr_parent;
 
 	/* How much space was changed to dsl_pool_dirty_space() for this? */
 	unsigned int dr_accounted;
 
 	/* A copy of the bp that points to us */
 	blkptr_t dr_bp_copy;
 
 	union dirty_types {
 		struct dirty_indirect {
 
 			/* protect access to list */
 			kmutex_t dr_mtx;
 
 			/* Our list of dirty children */
 			list_t dr_children;
 		} di;
 		struct dirty_leaf {
 
 			/*
 			 * dr_data is set when we dirty the buffer
 			 * so that we can retain the pointer even if it
 			 * gets COW'd in a subsequent transaction group.
 			 */
 			arc_buf_t *dr_data;
 			blkptr_t dr_overridden_by;
 			override_states_t dr_override_state;
 			uint8_t dr_copies;
 			boolean_t dr_nopwrite;
 			boolean_t dr_has_raw_params;
 
 			/*
 			 * If dr_has_raw_params is set, the following crypt
 			 * params will be set on the BP that's written.
 			 */
 			boolean_t dr_byteorder;
 			uint8_t	dr_salt[ZIO_DATA_SALT_LEN];
 			uint8_t	dr_iv[ZIO_DATA_IV_LEN];
 			uint8_t	dr_mac[ZIO_DATA_MAC_LEN];
 		} dl;
 		struct dirty_lightweight_leaf {
 			/*
 			 * This dirty record refers to a leaf (level=0)
 			 * block, whose dbuf has not been instantiated for
 			 * performance reasons.
 			 */
 			uint64_t dr_blkid;
 			abd_t *dr_abd;
 			zio_prop_t dr_props;
 			enum zio_flag dr_flags;
 		} dll;
 	} dt;
 } dbuf_dirty_record_t;
 
 typedef struct dmu_buf_impl {
 	/*
 	 * The following members are immutable, with the exception of
 	 * db.db_data, which is protected by db_mtx.
 	 */
 
 	/* the publicly visible structure */
 	dmu_buf_t db;
 
 	/* the objset we belong to */
 	struct objset *db_objset;
 
 	/*
 	 * handle to safely access the dnode we belong to (NULL when evicted)
 	 */
 	struct dnode_handle *db_dnode_handle;
 
 	/*
 	 * our parent buffer; if the dnode points to us directly,
 	 * db_parent == db_dnode_handle->dnh_dnode->dn_dbuf
 	 * only accessed by sync thread ???
 	 * (NULL when evicted)
 	 * May change from NULL to non-NULL under the protection of db_mtx
 	 * (see dbuf_check_blkptr())
 	 */
 	struct dmu_buf_impl *db_parent;
 
 	/*
 	 * link for hash table of all dmu_buf_impl_t's
 	 */
 	struct dmu_buf_impl *db_hash_next;
 
 	/*
 	 * Our link on the owner dnodes's dn_dbufs list.
 	 * Protected by its dn_dbufs_mtx.  Should be on the same cache line
 	 * as db_level and db_blkid for the best avl_add() performance.
 	 */
 	avl_node_t db_link;
 
 	/* our block number */
 	uint64_t db_blkid;
 
 	/*
 	 * Pointer to the blkptr_t which points to us. May be NULL if we
 	 * don't have one yet. (NULL when evicted)
 	 */
 	blkptr_t *db_blkptr;
 
 	/*
 	 * Our indirection level.  Data buffers have db_level==0.
 	 * Indirect buffers which point to data buffers have
 	 * db_level==1. etc.  Buffers which contain dnodes have
 	 * db_level==0, since the dnodes are stored in a file.
 	 */
 	uint8_t db_level;
 
 	/*
 	 * Protects db_buf's contents if they contain an indirect block or data
 	 * block of the meta-dnode. We use this lock to protect the structure of
 	 * the block tree. This means that when modifying this dbuf's data, we
 	 * grab its rwlock. When modifying its parent's data (including the
 	 * blkptr to this dbuf), we grab the parent's rwlock. The lock ordering
 	 * for this lock is:
 	 * 1) dn_struct_rwlock
 	 * 2) db_rwlock
 	 * We don't currently grab multiple dbufs' db_rwlocks at once.
 	 */
 	krwlock_t db_rwlock;
 
 	/* buffer holding our data */
 	arc_buf_t *db_buf;
 
 	/* db_mtx protects the members below */
 	kmutex_t db_mtx;
 
 	/*
 	 * Current state of the buffer
 	 */
 	dbuf_states_t db_state;
 
 	/*
 	 * Refcount accessed by dmu_buf_{hold,rele}.
 	 * If nonzero, the buffer can't be destroyed.
 	 * Protected by db_mtx.
 	 */
 	zfs_refcount_t db_holds;
 
 	kcondvar_t db_changed;
 	dbuf_dirty_record_t *db_data_pending;
 
 	/* List of dirty records for the buffer sorted newest to oldest. */
 	list_t db_dirty_records;
 
 	/* Link in dbuf_cache or dbuf_metadata_cache */
 	multilist_node_t db_cache_link;
 
 	/* Tells us which dbuf cache this dbuf is in, if any */
 	dbuf_cached_state_t db_caching_status;
 
 	/* Data which is unique to data (leaf) blocks: */
 
 	/* User callback information. */
 	dmu_buf_user_t *db_user;
 
 	/*
 	 * Evict user data as soon as the dirty and reference
 	 * counts are equal.
 	 */
 	uint8_t db_user_immediate_evict;
 
 	/*
 	 * This block was freed while a read or write was
 	 * active.
 	 */
 	uint8_t db_freed_in_flight;
 
 	/*
 	 * dnode_evict_dbufs() or dnode_evict_bonus() tried to
 	 * evict this dbuf, but couldn't due to outstanding
 	 * references.  Evict once the refcount drops to 0.
 	 */
 	uint8_t db_pending_evict;
 
 	uint8_t db_dirtycnt;
 } dmu_buf_impl_t;
 
 /* Note: the dbuf hash table is exposed only for the mdb module */
 #define	DBUF_MUTEXES 2048
 #define	DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
 typedef struct dbuf_hash_table {
 	uint64_t hash_table_mask;
 	dmu_buf_impl_t **hash_table;
 	kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
 } dbuf_hash_table_t;
 
 typedef void (*dbuf_prefetch_fn)(void *, boolean_t);
 
 uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level,
     const uint64_t offset);
 
 void dbuf_create_bonus(struct dnode *dn);
 int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx);
 
 void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
 
 dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
 dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
     void *tag);
 int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid,
     boolean_t fail_sparse, boolean_t fail_uncached,
     void *tag, dmu_buf_impl_t **dbp);
 
 int dbuf_prefetch_impl(struct dnode *dn, int64_t level, uint64_t blkid,
     zio_priority_t prio, arc_flags_t aflags, dbuf_prefetch_fn cb,
     void *arg);
 int dbuf_prefetch(struct dnode *dn, int64_t level, uint64_t blkid,
     zio_priority_t prio, arc_flags_t aflags);
 
 void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
 boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj,
     uint64_t blkid, void *tag);
 uint64_t dbuf_refcount(dmu_buf_impl_t *db);
 
 void dbuf_rele(dmu_buf_impl_t *db, void *tag);
 void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting);
 
 dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
     uint64_t blkid);
 
 int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
 void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
 void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
 void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
 void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
 dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
 dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
     dmu_tx_t *tx);
 arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
 void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
     bp_embedded_type_t etype, enum zio_compress comp,
     int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
 
 int dmu_lightweight_write_by_dnode(dnode_t *dn, uint64_t offset, abd_t *abd,
     const struct zio_prop *zp, enum zio_flag flags, dmu_tx_t *tx);
 
 void dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx);
 void dbuf_destroy(dmu_buf_impl_t *db);
 
 void dbuf_unoverride(dbuf_dirty_record_t *dr);
 void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx);
 void dbuf_release_bp(dmu_buf_impl_t *db);
 db_lock_type_t dmu_buf_lock_parent(dmu_buf_impl_t *db, krw_t rw, void *tag);
 void dmu_buf_unlock_parent(dmu_buf_impl_t *db, db_lock_type_t type, void *tag);
 
 void dbuf_free_range(struct dnode *dn, uint64_t start, uint64_t end,
     struct dmu_tx *);
 
 void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
 
 void dbuf_stats_init(dbuf_hash_table_t *hash);
 void dbuf_stats_destroy(void);
 
 int dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid,
     blkptr_t *bp, uint16_t *datablkszsec, uint8_t *indblkshift);
 
 #define	DB_DNODE(_db)		((_db)->db_dnode_handle->dnh_dnode)
 #define	DB_DNODE_LOCK(_db)	((_db)->db_dnode_handle->dnh_zrlock)
 #define	DB_DNODE_ENTER(_db)	(zrl_add(&DB_DNODE_LOCK(_db)))
 #define	DB_DNODE_EXIT(_db)	(zrl_remove(&DB_DNODE_LOCK(_db)))
 #define	DB_DNODE_HELD(_db)	(!zrl_is_zero(&DB_DNODE_LOCK(_db)))
 
 void dbuf_init(void);
 void dbuf_fini(void);
 
 boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
 
 static inline dbuf_dirty_record_t *
 dbuf_find_dirty_lte(dmu_buf_impl_t *db, uint64_t txg)
 {
 	dbuf_dirty_record_t *dr;
 
 	for (dr = list_head(&db->db_dirty_records);
 	    dr != NULL && dr->dr_txg > txg;
 	    dr = list_next(&db->db_dirty_records, dr))
 		continue;
 	return (dr);
 }
 
 static inline dbuf_dirty_record_t *
 dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
 {
 	dbuf_dirty_record_t *dr;
 
 	dr = dbuf_find_dirty_lte(db, txg);
 	if (dr && dr->dr_txg == txg)
 		return (dr);
 	return (NULL);
 }
 
 #define	DBUF_GET_BUFC_TYPE(_db)	\
 	(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
 
 #define	DBUF_IS_CACHEABLE(_db)						\
 	((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL ||		\
 	(dbuf_is_metadata(_db) &&					\
 	((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
 
 #define	DBUF_IS_L2CACHEABLE(_db)					\
 	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL ||	\
 	(dbuf_is_metadata(_db) &&					\
 	((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
 
 #define	DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level)				\
 	((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL ||	\
 	(((_level) > 0 ||						\
 	DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) &&	\
 	((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
 
 #ifdef ZFS_DEBUG
 
 /*
  * There should be a ## between the string literal and fmt, to make it
  * clear that we're joining two strings together, but gcc does not
  * support that preprocessor token.
  */
 #define	dprintf_dbuf(dbuf, fmt, ...) do { \
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
 	char __db_buf[32]; \
 	uint64_t __db_obj = (dbuf)->db.db_object; \
 	if (__db_obj == DMU_META_DNODE_OBJECT) \
-		(void) strcpy(__db_buf, "mdn"); \
+		(void) strlcpy(__db_buf, "mdn", sizeof (__db_buf));	\
 	else \
 		(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
 		    (u_longlong_t)__db_obj); \
 	dprintf_ds((dbuf)->db_objset->os_dsl_dataset, \
 	    "obj=%s lvl=%u blkid=%lld " fmt, \
 	    __db_buf, (dbuf)->db_level, \
 	    (u_longlong_t)(dbuf)->db_blkid, __VA_ARGS__); \
 	} \
 _NOTE(CONSTCOND) } while (0)
 
 #define	dprintf_dbuf_bp(db, bp, fmt, ...) do {			\
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) {			\
 	char *__blkbuf = kmem_alloc(BP_SPRINTF_LEN, KM_SLEEP);	\
 	snprintf_blkptr(__blkbuf, BP_SPRINTF_LEN, bp);		\
 	dprintf_dbuf(db, fmt " %s\n", __VA_ARGS__, __blkbuf);	\
 	kmem_free(__blkbuf, BP_SPRINTF_LEN);			\
 	}							\
 _NOTE(CONSTCOND) } while (0)
 
 #define	DBUF_VERIFY(db)	dbuf_verify(db)
 
 #else
 
 #define	dprintf_dbuf(db, fmt, ...)
 #define	dprintf_dbuf_bp(db, bp, fmt, ...)
 #define	DBUF_VERIFY(db)
 
 #endif
 
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_DBUF_H */
diff --git a/sys/contrib/openzfs/include/sys/dnode.h b/sys/contrib/openzfs/include/sys/dnode.h
index de6492bb7618..2cdc5b8798ad 100644
--- a/sys/contrib/openzfs/include/sys/dnode.h
+++ b/sys/contrib/openzfs/include/sys/dnode.h
@@ -1,627 +1,627 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
 #ifndef	_SYS_DNODE_H
 #define	_SYS_DNODE_H
 
 #include <sys/zfs_context.h>
 #include <sys/avl.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/zio.h>
 #include <sys/zfs_refcount.h>
 #include <sys/dmu_zfetch.h>
 #include <sys/zrlock.h>
 #include <sys/multilist.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * dnode_hold() flags.
  */
 #define	DNODE_MUST_BE_ALLOCATED	1
 #define	DNODE_MUST_BE_FREE	2
 #define	DNODE_DRY_RUN		4
 
 /*
  * dnode_next_offset() flags.
  */
 #define	DNODE_FIND_HOLE		1
 #define	DNODE_FIND_BACKWARDS	2
 #define	DNODE_FIND_HAVELOCK	4
 
 /*
  * Fixed constants.
  */
 #define	DNODE_SHIFT		9	/* 512 bytes */
 #define	DN_MIN_INDBLKSHIFT	12	/* 4k */
 /*
  * If we ever increase this value beyond 20, we need to revisit all logic that
  * does x << level * ebps to handle overflow.  With a 1M indirect block size,
  * 4 levels of indirect blocks would not be able to guarantee addressing an
  * entire object, so 5 levels will be used, but 5 * (20 - 7) = 65.
  */
 #define	DN_MAX_INDBLKSHIFT	17	/* 128k */
 #define	DNODE_BLOCK_SHIFT	14	/* 16k */
 #define	DNODE_CORE_SIZE		64	/* 64 bytes for dnode sans blkptrs */
 #define	DN_MAX_OBJECT_SHIFT	48	/* 256 trillion (zfs_fid_t limit) */
 #define	DN_MAX_OFFSET_SHIFT	64	/* 2^64 bytes in a dnode */
 
 /*
  * dnode id flags
  *
  * Note: a file will never ever have its ids moved from bonus->spill
  */
 #define	DN_ID_CHKED_BONUS	0x1
 #define	DN_ID_CHKED_SPILL	0x2
 #define	DN_ID_OLD_EXIST		0x4
 #define	DN_ID_NEW_EXIST		0x8
 
 /*
  * Derived constants.
  */
 #define	DNODE_MIN_SIZE		(1 << DNODE_SHIFT)
 #define	DNODE_MAX_SIZE		(1 << DNODE_BLOCK_SHIFT)
 #define	DNODE_BLOCK_SIZE	(1 << DNODE_BLOCK_SHIFT)
 #define	DNODE_MIN_SLOTS		(DNODE_MIN_SIZE >> DNODE_SHIFT)
 #define	DNODE_MAX_SLOTS		(DNODE_MAX_SIZE >> DNODE_SHIFT)
 #define	DN_BONUS_SIZE(dnsize)	((dnsize) - DNODE_CORE_SIZE - \
 	(1 << SPA_BLKPTRSHIFT))
 #define	DN_SLOTS_TO_BONUSLEN(slots)	DN_BONUS_SIZE((slots) << DNODE_SHIFT)
 #define	DN_OLD_MAX_BONUSLEN	(DN_BONUS_SIZE(DNODE_MIN_SIZE))
 #define	DN_MAX_NBLKPTR	((DNODE_MIN_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
 #define	DN_MAX_OBJECT	(1ULL << DN_MAX_OBJECT_SHIFT)
 #define	DN_ZERO_BONUSLEN	(DN_BONUS_SIZE(DNODE_MAX_SIZE) + 1)
 #define	DN_KILL_SPILLBLK (1)
 
 #define	DN_SLOT_UNINIT		((void *)NULL)	/* Uninitialized */
 #define	DN_SLOT_FREE		((void *)1UL)	/* Free slot */
 #define	DN_SLOT_ALLOCATED	((void *)2UL)	/* Allocated slot */
 #define	DN_SLOT_INTERIOR	((void *)3UL)	/* Interior allocated slot */
 #define	DN_SLOT_IS_PTR(dn)	((void *)dn > DN_SLOT_INTERIOR)
 #define	DN_SLOT_IS_VALID(dn)	((void *)dn != NULL)
 
 #define	DNODES_PER_BLOCK_SHIFT	(DNODE_BLOCK_SHIFT - DNODE_SHIFT)
 #define	DNODES_PER_BLOCK	(1ULL << DNODES_PER_BLOCK_SHIFT)
 
 /*
  * This is inaccurate if the indblkshift of the particular object is not the
  * max.  But it's only used by userland to calculate the zvol reservation.
  */
 #define	DNODES_PER_LEVEL_SHIFT	(DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
 #define	DNODES_PER_LEVEL	(1ULL << DNODES_PER_LEVEL_SHIFT)
 
 #define	DN_MAX_LEVELS	(DIV_ROUND_UP(DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT, \
 	DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT) + 1)
 
 #define	DN_BONUS(dnp)	((void*)((dnp)->dn_bonus + \
 	(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
 #define	DN_MAX_BONUS_LEN(dnp) \
 	((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
 	(uint8_t *)DN_SPILL_BLKPTR(dnp) - (uint8_t *)DN_BONUS(dnp) : \
 	(uint8_t *)(dnp + (dnp->dn_extra_slots + 1)) - (uint8_t *)DN_BONUS(dnp))
 
 #define	DN_USED_BYTES(dnp) (((dnp)->dn_flags & DNODE_FLAG_USED_BYTES) ? \
 	(dnp)->dn_used : (dnp)->dn_used << SPA_MINBLOCKSHIFT)
 
 #define	EPB(blkshift, typeshift)	(1 << (blkshift - typeshift))
 
 struct dmu_buf_impl;
 struct objset;
 struct zio;
 
 enum dnode_dirtycontext {
 	DN_UNDIRTIED,
 	DN_DIRTY_OPEN,
 	DN_DIRTY_SYNC
 };
 
 /* Is dn_used in bytes?  if not, it's in multiples of SPA_MINBLOCKSIZE */
 #define	DNODE_FLAG_USED_BYTES			(1 << 0)
 #define	DNODE_FLAG_USERUSED_ACCOUNTED		(1 << 1)
 
 /* Does dnode have a SA spill blkptr in bonus? */
 #define	DNODE_FLAG_SPILL_BLKPTR			(1 << 2)
 
 /* User/Group/Project dnode accounting */
 #define	DNODE_FLAG_USEROBJUSED_ACCOUNTED	(1 << 3)
 
 /*
  * This mask defines the set of flags which are "portable", meaning
  * that they can be preserved when doing a raw encrypted zfs send.
  * Flags included in this mask will be protected by AAD when the block
  * of dnodes is encrypted.
  */
 #define	DNODE_CRYPT_PORTABLE_FLAGS_MASK		(DNODE_FLAG_SPILL_BLKPTR)
 
 /*
  * VARIABLE-LENGTH (LARGE) DNODES
  *
  * The motivation for variable-length dnodes is to eliminate the overhead
  * associated with using spill blocks.  Spill blocks are used to store
  * system attribute data (i.e. file metadata) that does not fit in the
  * dnode's bonus buffer. By allowing a larger bonus buffer area the use of
  * a spill block can be avoided.  Spill blocks potentially incur an
  * additional read I/O for every dnode in a dnode block. As a worst case
  * example, reading 32 dnodes from a 16k dnode block and all of the spill
  * blocks could issue 33 separate reads. Now suppose those dnodes have size
  * 1024 and therefore don't need spill blocks. Then the worst case number
  * of blocks read is reduced from 33 to two--one per dnode block.
  *
  * ZFS-on-Linux systems that make heavy use of extended attributes benefit
  * from this feature. In particular, ZFS-on-Linux supports the xattr=sa
  * dataset property which allows file extended attribute data to be stored
  * in the dnode bonus buffer as an alternative to the traditional
  * directory-based format. Workloads such as SELinux and the Lustre
  * distributed filesystem often store enough xattr data to force spill
  * blocks when xattr=sa is in effect. Large dnodes may therefore provide a
  * performance benefit to such systems. Other use cases that benefit from
  * this feature include files with large ACLs and symbolic links with long
  * target names.
  *
  * The size of a dnode may be a multiple of 512 bytes up to the size of a
  * dnode block (currently 16384 bytes). The dn_extra_slots field of the
  * on-disk dnode_phys_t structure describes the size of the physical dnode
  * on disk. The field represents how many "extra" dnode_phys_t slots a
  * dnode consumes in its dnode block. This convention results in a value of
  * 0 for 512 byte dnodes which preserves on-disk format compatibility with
  * older software which doesn't support large dnodes.
  *
  * Similarly, the in-memory dnode_t structure has a dn_num_slots field
  * to represent the total number of dnode_phys_t slots consumed on disk.
  * Thus dn->dn_num_slots is 1 greater than the corresponding
  * dnp->dn_extra_slots. This difference in convention was adopted
  * because, unlike on-disk structures, backward compatibility is not a
  * concern for in-memory objects, so we used a more natural way to
  * represent size for a dnode_t.
  *
  * The default size for newly created dnodes is determined by the value of
  * the "dnodesize" dataset property. By default the property is set to
  * "legacy" which is compatible with older software. Setting the property
  * to "auto" will allow the filesystem to choose the most suitable dnode
  * size. Currently this just sets the default dnode size to 1k, but future
  * code improvements could dynamically choose a size based on observed
  * workload patterns. Dnodes of varying sizes can coexist within the same
  * dataset and even within the same dnode block.
  */
 
 typedef struct dnode_phys {
 	uint8_t dn_type;		/* dmu_object_type_t */
 	uint8_t dn_indblkshift;		/* ln2(indirect block size) */
 	uint8_t dn_nlevels;		/* 1=dn_blkptr->data blocks */
 	uint8_t dn_nblkptr;		/* length of dn_blkptr */
 	uint8_t dn_bonustype;		/* type of data in bonus buffer */
 	uint8_t	dn_checksum;		/* ZIO_CHECKSUM type */
 	uint8_t	dn_compress;		/* ZIO_COMPRESS type */
 	uint8_t dn_flags;		/* DNODE_FLAG_* */
 	uint16_t dn_datablkszsec;	/* data block size in 512b sectors */
 	uint16_t dn_bonuslen;		/* length of dn_bonus */
 	uint8_t dn_extra_slots;		/* # of subsequent slots consumed */
 	uint8_t dn_pad2[3];
 
 	/* accounting is protected by dn_dirty_mtx */
 	uint64_t dn_maxblkid;		/* largest allocated block ID */
 	uint64_t dn_used;		/* bytes (or sectors) of disk space */
 
 	/*
 	 * Both dn_pad2 and dn_pad3 are protected by the block's MAC. This
 	 * allows us to protect any fields that might be added here in the
 	 * future. In either case, developers will want to check
 	 * zio_crypt_init_uios_dnode() and zio_crypt_do_dnode_hmac_updates()
 	 * to ensure the new field is being protected and updated properly.
 	 */
 	uint64_t dn_pad3[4];
 
 	/*
 	 * The tail region is 448 bytes for a 512 byte dnode, and
 	 * correspondingly larger for larger dnode sizes. The spill
 	 * block pointer, when present, is always at the end of the tail
 	 * region. There are three ways this space may be used, using
 	 * a 512 byte dnode for this diagram:
 	 *
 	 * 0       64      128     192     256     320     384     448 (offset)
 	 * +---------------+---------------+---------------+-------+
 	 * | dn_blkptr[0]  | dn_blkptr[1]  | dn_blkptr[2]  | /     |
 	 * +---------------+---------------+---------------+-------+
 	 * | dn_blkptr[0]  | dn_bonus[0..319]                      |
 	 * +---------------+-----------------------+---------------+
 	 * | dn_blkptr[0]  | dn_bonus[0..191]      | dn_spill      |
 	 * +---------------+-----------------------+---------------+
 	 */
 	union {
 		blkptr_t dn_blkptr[1+DN_OLD_MAX_BONUSLEN/sizeof (blkptr_t)];
 		struct {
 			blkptr_t __dn_ignore1;
 			uint8_t dn_bonus[DN_OLD_MAX_BONUSLEN];
 		};
 		struct {
 			blkptr_t __dn_ignore2;
 			uint8_t __dn_ignore3[DN_OLD_MAX_BONUSLEN -
 			    sizeof (blkptr_t)];
 			blkptr_t dn_spill;
 		};
 	};
 } dnode_phys_t;
 
 #define	DN_SPILL_BLKPTR(dnp)	((blkptr_t *)((char *)(dnp) + \
 	(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)))
 
 struct dnode {
 	/*
 	 * Protects the structure of the dnode, including the number of levels
 	 * of indirection (dn_nlevels), dn_maxblkid, and dn_next_*
 	 */
 	krwlock_t dn_struct_rwlock;
 
 	/* Our link on dn_objset->os_dnodes list; protected by os_lock.  */
 	list_node_t dn_link;
 
 	/* immutable: */
 	struct objset *dn_objset;
 	uint64_t dn_object;
 	struct dmu_buf_impl *dn_dbuf;
 	struct dnode_handle *dn_handle;
 	dnode_phys_t *dn_phys; /* pointer into dn->dn_dbuf->db.db_data */
 
 	/*
 	 * Copies of stuff in dn_phys.  They're valid in the open
 	 * context (eg. even before the dnode is first synced).
 	 * Where necessary, these are protected by dn_struct_rwlock.
 	 */
 	dmu_object_type_t dn_type;	/* object type */
 	uint16_t dn_bonuslen;		/* bonus length */
 	uint8_t dn_bonustype;		/* bonus type */
 	uint8_t dn_nblkptr;		/* number of blkptrs (immutable) */
 	uint8_t dn_checksum;		/* ZIO_CHECKSUM type */
 	uint8_t dn_compress;		/* ZIO_COMPRESS type */
 	uint8_t dn_nlevels;
 	uint8_t dn_indblkshift;
 	uint8_t dn_datablkshift;	/* zero if blksz not power of 2! */
 	uint8_t dn_moved;		/* Has this dnode been moved? */
 	uint16_t dn_datablkszsec;	/* in 512b sectors */
 	uint32_t dn_datablksz;		/* in bytes */
 	uint64_t dn_maxblkid;
 	uint8_t dn_next_type[TXG_SIZE];
 	uint8_t dn_num_slots;		/* metadnode slots consumed on disk */
 	uint8_t dn_next_nblkptr[TXG_SIZE];
 	uint8_t dn_next_nlevels[TXG_SIZE];
 	uint8_t dn_next_indblkshift[TXG_SIZE];
 	uint8_t dn_next_bonustype[TXG_SIZE];
 	uint8_t dn_rm_spillblk[TXG_SIZE];	/* for removing spill blk */
 	uint16_t dn_next_bonuslen[TXG_SIZE];
 	uint32_t dn_next_blksz[TXG_SIZE];	/* next block size in bytes */
 	uint64_t dn_next_maxblkid[TXG_SIZE];	/* next maxblkid in bytes */
 
 	/* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */
 	uint32_t dn_dbufs_count;	/* count of dn_dbufs */
 
 	/* protected by os_lock: */
 	multilist_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
 
 	/* protected by dn_mtx: */
 	kmutex_t dn_mtx;
 	list_t dn_dirty_records[TXG_SIZE];
 	struct range_tree *dn_free_ranges[TXG_SIZE];
 	uint64_t dn_allocated_txg;
 	uint64_t dn_free_txg;
 	uint64_t dn_assigned_txg;
 	uint64_t dn_dirty_txg;			/* txg dnode was last dirtied */
 	kcondvar_t dn_notxholds;
 	kcondvar_t dn_nodnholds;
 	enum dnode_dirtycontext dn_dirtyctx;
 	void *dn_dirtyctx_firstset;		/* dbg: contents meaningless */
 
 	/* protected by own devices */
 	zfs_refcount_t dn_tx_holds;
 	zfs_refcount_t dn_holds;
 
 	kmutex_t dn_dbufs_mtx;
 	/*
 	 * Descendent dbufs, ordered by dbuf_compare. Note that dn_dbufs
 	 * can contain multiple dbufs of the same (level, blkid) when a
 	 * dbuf is marked DB_EVICTING without being removed from
 	 * dn_dbufs. To maintain the avl invariant that there cannot be
 	 * duplicate entries, we order the dbufs by an arbitrary value -
 	 * their address in memory. This means that dn_dbufs cannot be used to
 	 * directly look up a dbuf. Instead, callers must use avl_walk, have
 	 * a reference to the dbuf, or look up a non-existent node with
 	 * db_state = DB_SEARCH (see dbuf_free_range for an example).
 	 */
 	avl_tree_t dn_dbufs;
 
 	/* protected by dn_struct_rwlock */
 	struct dmu_buf_impl *dn_bonus;	/* bonus buffer dbuf */
 
 	boolean_t dn_have_spill;	/* have spill or are spilling */
 
 	/* parent IO for current sync write */
 	zio_t *dn_zio;
 
 	/* used in syncing context */
 	uint64_t dn_oldused;	/* old phys used bytes */
 	uint64_t dn_oldflags;	/* old phys dn_flags */
 	uint64_t dn_olduid, dn_oldgid, dn_oldprojid;
 	uint64_t dn_newuid, dn_newgid, dn_newprojid;
 	int dn_id_flags;
 
 	/* holds prefetch structure */
 	struct zfetch	dn_zfetch;
 };
 
 /*
  * Since AVL already has embedded element counter, use dn_dbufs_count
  * only for dbufs not counted there (bonus buffers) and just add them.
  */
 #define	DN_DBUFS_COUNT(dn)	((dn)->dn_dbufs_count + \
     avl_numnodes(&(dn)->dn_dbufs))
 
 /*
  * We use this (otherwise unused) bit to indicate if the value of
  * dn_next_maxblkid[txgoff] is valid to use in dnode_sync().
  */
 #define	DMU_NEXT_MAXBLKID_SET		(1ULL << 63)
 
 /*
  * Adds a level of indirection between the dbuf and the dnode to avoid
  * iterating descendent dbufs in dnode_move(). Handles are not allocated
  * individually, but as an array of child dnodes in dnode_hold_impl().
  */
 typedef struct dnode_handle {
 	/* Protects dnh_dnode from modification by dnode_move(). */
 	zrlock_t dnh_zrlock;
 	dnode_t *dnh_dnode;
 } dnode_handle_t;
 
 typedef struct dnode_children {
 	dmu_buf_user_t dnc_dbu;		/* User evict data */
 	size_t dnc_count;		/* number of children */
 	dnode_handle_t dnc_children[];	/* sized dynamically */
 } dnode_children_t;
 
 typedef struct free_range {
 	avl_node_t fr_node;
 	uint64_t fr_blkid;
 	uint64_t fr_nblks;
 } free_range_t;
 
 void dnode_special_open(struct objset *dd, dnode_phys_t *dnp,
     uint64_t object, dnode_handle_t *dnh);
 void dnode_special_close(dnode_handle_t *dnh);
 
 void dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx);
 void dnode_setbonus_type(dnode_t *dn, dmu_object_type_t, dmu_tx_t *tx);
 void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx);
 
 int dnode_hold(struct objset *dd, uint64_t object,
     void *ref, dnode_t **dnp);
 int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots,
     void *ref, dnode_t **dnp);
 boolean_t dnode_add_ref(dnode_t *dn, void *ref);
 void dnode_rele(dnode_t *dn, void *ref);
 void dnode_rele_and_unlock(dnode_t *dn, void *tag, boolean_t evicting);
 int dnode_try_claim(objset_t *os, uint64_t object, int slots);
 void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
 void dnode_set_dirtyctx(dnode_t *dn, dmu_tx_t *tx, void *tag);
 void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
 void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
     dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
 void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
     dmu_object_type_t bonustype, int bonuslen, int dn_slots,
     boolean_t keep_spill, dmu_tx_t *tx);
 void dnode_free(dnode_t *dn, dmu_tx_t *tx);
 void dnode_byteswap(dnode_phys_t *dnp);
 void dnode_buf_byteswap(void *buf, size_t size);
 void dnode_verify(dnode_t *dn);
 int dnode_set_nlevels(dnode_t *dn, int nlevels, dmu_tx_t *tx);
 int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
 void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
 void dnode_diduse_space(dnode_t *dn, int64_t space);
 void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx,
     boolean_t have_read, boolean_t force);
 uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
 void dnode_init(void);
 void dnode_fini(void);
 int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
     int minlvl, uint64_t blkfill, uint64_t txg);
 void dnode_evict_dbufs(dnode_t *dn);
 void dnode_evict_bonus(dnode_t *dn);
 void dnode_free_interior_slots(dnode_t *dn);
 
 #define	DNODE_IS_DIRTY(_dn)						\
 	((_dn)->dn_dirty_txg >= spa_syncing_txg((_dn)->dn_objset->os_spa))
 
 #define	DNODE_IS_CACHEABLE(_dn)						\
 	((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL ||		\
 	(DMU_OT_IS_METADATA((_dn)->dn_type) &&				\
 	(_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA))
 
 #define	DNODE_META_IS_CACHEABLE(_dn)					\
 	((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL ||		\
 	(_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA)
 
 /*
  * Used for dnodestats kstat.
  */
 typedef struct dnode_stats {
 	/*
 	 * Number of failed attempts to hold a meta dnode dbuf.
 	 */
 	kstat_named_t dnode_hold_dbuf_hold;
 	/*
 	 * Number of failed attempts to read a meta dnode dbuf.
 	 */
 	kstat_named_t dnode_hold_dbuf_read;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was able
 	 * to hold the requested object number which was allocated.  This is
 	 * the common case when looking up any allocated object number.
 	 */
 	kstat_named_t dnode_hold_alloc_hits;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was not
 	 * able to hold the request object number because it was not allocated.
 	 */
 	kstat_named_t dnode_hold_alloc_misses;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was not
 	 * able to hold the request object number because the object number
 	 * refers to an interior large dnode slot.
 	 */
 	kstat_named_t dnode_hold_alloc_interior;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) needed
 	 * to retry acquiring slot zrl locks due to contention.
 	 */
 	kstat_named_t dnode_hold_alloc_lock_retry;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) did not
 	 * need to create the dnode because another thread did so after
 	 * dropping the read lock but before acquiring the write lock.
 	 */
 	kstat_named_t dnode_hold_alloc_lock_misses;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) found
 	 * a free dnode instantiated by dnode_create() but not yet allocated
 	 * by dnode_allocate().
 	 */
 	kstat_named_t dnode_hold_alloc_type_none;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was able
 	 * to hold the requested range of free dnode slots.
 	 */
 	kstat_named_t dnode_hold_free_hits;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was not
 	 * able to hold the requested range of free dnode slots because
 	 * at least one slot was allocated.
 	 */
 	kstat_named_t dnode_hold_free_misses;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was not
 	 * able to hold the requested range of free dnode slots because
 	 * after acquiring the zrl lock at least one slot was allocated.
 	 */
 	kstat_named_t dnode_hold_free_lock_misses;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) needed
 	 * to retry acquiring slot zrl locks due to contention.
 	 */
 	kstat_named_t dnode_hold_free_lock_retry;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) requested
 	 * a range of dnode slots which were held by another thread.
 	 */
 	kstat_named_t dnode_hold_free_refcount;
 	/*
 	 * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) requested
 	 * a range of dnode slots which would overflow the dnode_phys_t.
 	 */
 	kstat_named_t dnode_hold_free_overflow;
 	/*
 	 * Number of times dnode_free_interior_slots() needed to retry
 	 * acquiring a slot zrl lock due to contention.
 	 */
 	kstat_named_t dnode_free_interior_lock_retry;
 	/*
 	 * Number of new dnodes allocated by dnode_allocate().
 	 */
 	kstat_named_t dnode_allocate;
 	/*
 	 * Number of dnodes re-allocated by dnode_reallocate().
 	 */
 	kstat_named_t dnode_reallocate;
 	/*
 	 * Number of meta dnode dbufs evicted.
 	 */
 	kstat_named_t dnode_buf_evict;
 	/*
 	 * Number of times dmu_object_alloc*() reached the end of the existing
 	 * object ID chunk and advanced to a new one.
 	 */
 	kstat_named_t dnode_alloc_next_chunk;
 	/*
 	 * Number of times multiple threads attempted to allocate a dnode
 	 * from the same block of free dnodes.
 	 */
 	kstat_named_t dnode_alloc_race;
 	/*
 	 * Number of times dmu_object_alloc*() was forced to advance to the
 	 * next meta dnode dbuf due to an error from  dmu_object_next().
 	 */
 	kstat_named_t dnode_alloc_next_block;
 	/*
 	 * Statistics for tracking dnodes which have been moved.
 	 */
 	kstat_named_t dnode_move_invalid;
 	kstat_named_t dnode_move_recheck1;
 	kstat_named_t dnode_move_recheck2;
 	kstat_named_t dnode_move_special;
 	kstat_named_t dnode_move_handle;
 	kstat_named_t dnode_move_rwlock;
 	kstat_named_t dnode_move_active;
 } dnode_stats_t;
 
 extern dnode_stats_t dnode_stats;
 
 #define	DNODE_STAT_INCR(stat, val) \
     atomic_add_64(&dnode_stats.stat.value.ui64, (val));
 #define	DNODE_STAT_BUMP(stat) \
     DNODE_STAT_INCR(stat, 1);
 
 #ifdef ZFS_DEBUG
 
 #define	dprintf_dnode(dn, fmt, ...) do { \
 	if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
 	char __db_buf[32]; \
 	uint64_t __db_obj = (dn)->dn_object; \
 	if (__db_obj == DMU_META_DNODE_OBJECT) \
-		(void) strcpy(__db_buf, "mdn"); \
+		(void) strlcpy(__db_buf, "mdn", sizeof (__db_buf));	\
 	else \
 		(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
 		    (u_longlong_t)__db_obj);\
 	dprintf_ds((dn)->dn_objset->os_dsl_dataset, "obj=%s " fmt, \
 	    __db_buf, __VA_ARGS__); \
 	} \
 _NOTE(CONSTCOND) } while (0)
 
 #define	DNODE_VERIFY(dn)		dnode_verify(dn)
 #define	FREE_VERIFY(db, start, end, tx)	free_verify(db, start, end, tx)
 
 #else
 
 #define	dprintf_dnode(db, fmt, ...)
 #define	DNODE_VERIFY(dn)
 #define	FREE_VERIFY(db, start, end, tx)
 
 #endif
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_DNODE_H */
diff --git a/sys/contrib/openzfs/include/sys/fm/util.h b/sys/contrib/openzfs/include/sys/fm/util.h
index 56ba8798beb0..5fb6d1d6072b 100644
--- a/sys/contrib/openzfs/include/sys/fm/util.h
+++ b/sys/contrib/openzfs/include/sys/fm/util.h
@@ -1,120 +1,121 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #ifndef	_SYS_FM_UTIL_H
 #define	_SYS_FM_UTIL_H
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #include <sys/nvpair.h>
+#include <sys/zfs_file.h>
 
 /*
  * Shared user/kernel definitions for class length, error channel name,
  * and kernel event publisher string.
  */
 #define	FM_MAX_CLASS 100
 #define	FM_ERROR_CHAN	"com.sun:fm:error"
 #define	FM_PUB		"fm"
 
 /*
  * ereport dump device transport support
  *
  * Ereports are written out to the dump device at a proscribed offset from the
  * end, similar to in-transit log messages.  The ereports are represented as a
  * erpt_dump_t header followed by ed_size bytes of packed native nvlist data.
  *
  * NOTE: All of these constants and the header must be defined so they have the
  * same representation for *both* 32-bit and 64-bit producers and consumers.
  */
 #define	ERPT_MAGIC	0xf00d4eddU
 #define	ERPT_MAX_ERRS	16
 #define	ERPT_DATA_SZ	(6 * 1024)
 #define	ERPT_EVCH_MAX	256
 #define	ERPT_HIWAT	64
 
 typedef struct erpt_dump {
 	uint32_t ed_magic;	/* ERPT_MAGIC or zero to indicate end */
 	uint32_t ed_chksum;	/* checksum32() of packed nvlist data */
 	uint32_t ed_size;	/* ereport (nvl) fixed buf size */
 	uint32_t ed_pad;	/* reserved for future use */
 	hrtime_t ed_hrt_nsec;	/* hrtime of this ereport */
 	hrtime_t ed_hrt_base;	/* hrtime sample corresponding to ed_tod_base */
 	struct {
 		uint64_t sec;	/* seconds since gettimeofday() Epoch */
 		uint64_t nsec;	/* nanoseconds past ed_tod_base.sec */
 	} ed_tod_base;
 } erpt_dump_t;
 
 #ifdef _KERNEL
 
 #define	ZEVENT_SHUTDOWN		0x1
 
 typedef void zevent_cb_t(nvlist_t *, nvlist_t *);
 
 typedef struct zevent_s {
 	nvlist_t	*ev_nvl;	/* protected by the zevent_lock */
 	nvlist_t	*ev_detector;	/* " */
 	list_t		ev_ze_list;	/* " */
 	list_node_t	ev_node;	/* " */
 	zevent_cb_t	*ev_cb;		/* " */
 	uint64_t	ev_eid;
 } zevent_t;
 
 typedef struct zfs_zevent {
 	zevent_t	*ze_zevent;	/* protected by the zevent_lock */
 	list_node_t	ze_node;	/* " */
 	uint64_t	ze_dropped;	/* " */
 } zfs_zevent_t;
 
 extern void fm_init(void);
 extern void fm_fini(void);
 extern void zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector);
 extern int zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
 extern void zfs_zevent_drain_all(int *);
-extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
-extern void zfs_zevent_fd_rele(int);
+extern zfs_file_t *zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
+extern void zfs_zevent_fd_rele(zfs_file_t *);
 extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *, uint64_t *);
 extern int zfs_zevent_wait(zfs_zevent_t *);
 extern int zfs_zevent_seek(zfs_zevent_t *, uint64_t);
 extern void zfs_zevent_init(zfs_zevent_t **);
 extern void zfs_zevent_destroy(zfs_zevent_t *);
 
 extern void zfs_zevent_track_duplicate(void);
 extern void zfs_ereport_init(void);
 extern void zfs_ereport_fini(void);
 #else
 
 static inline void fm_init(void) { }
 static inline void fm_fini(void) { }
 
 #endif  /* _KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif /* _SYS_FM_UTIL_H */
diff --git a/sys/contrib/openzfs/include/sys/zfs_file.h b/sys/contrib/openzfs/include/sys/zfs_file.h
index d117933a6e4c..02cd1a6f041a 100644
--- a/sys/contrib/openzfs/include/sys/zfs_file.h
+++ b/sys/contrib/openzfs/include/sys/zfs_file.h
@@ -1,62 +1,64 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 #ifndef	_SYS_ZFS_FILE_H
 #define	_SYS_ZFS_FILE_H
 
+#include <sys/zfs_context.h>
+
 #ifndef _KERNEL
 typedef struct zfs_file {
 	int f_fd;
 	int f_dump_fd;
 } zfs_file_t;
 #elif defined(__linux__) || defined(__FreeBSD__)
 typedef struct file zfs_file_t;
 #else
 #error "unknown OS"
 #endif
 
 typedef struct zfs_file_attr {
 	uint64_t	zfa_size;	/* file size */
 	mode_t		zfa_mode;	/* file type */
 } zfs_file_attr_t;
 
 int zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fp);
 void zfs_file_close(zfs_file_t *fp);
 
 int zfs_file_write(zfs_file_t *fp, const void *buf, size_t len, ssize_t *resid);
 int zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t len, loff_t off,
     ssize_t *resid);
 int zfs_file_read(zfs_file_t *fp, void *buf, size_t len, ssize_t *resid);
 int zfs_file_pread(zfs_file_t *fp, void *buf, size_t len, loff_t off,
     ssize_t *resid);
 
 int zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence);
 int zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr);
 int zfs_file_fsync(zfs_file_t *fp, int flags);
 int zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len);
 loff_t zfs_file_off(zfs_file_t *fp);
 int zfs_file_unlink(const char *);
 
-int zfs_file_get(int fd, zfs_file_t **fp);
-void zfs_file_put(int fd);
+zfs_file_t *zfs_file_get(int fd);
+void zfs_file_put(zfs_file_t *fp);
 void *zfs_file_private(zfs_file_t *fp);
 
 #endif /* _SYS_ZFS_FILE_H */
diff --git a/sys/contrib/openzfs/include/sys/zfs_ioctl.h b/sys/contrib/openzfs/include/sys/zfs_ioctl.h
index 41c978a3fff5..4fb15636ecb8 100644
--- a/sys/contrib/openzfs/include/sys/zfs_ioctl.h
+++ b/sys/contrib/openzfs/include/sys/zfs_ioctl.h
@@ -1,580 +1,580 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
  * Copyright 2016 RackTop Systems.
  * Copyright (c) 2017, Intel Corporation.
  */
 
 #ifndef	_SYS_ZFS_IOCTL_H
 #define	_SYS_ZFS_IOCTL_H
 
 #include <sys/cred.h>
 #include <sys/dmu.h>
 #include <sys/zio.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zfs_stat.h>
 
 #ifdef _KERNEL
 #include <sys/nvpair.h>
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 /*
  * The structures in this file are passed between userland and the
  * kernel.  Userland may be running a 32-bit process, while the kernel
  * is 64-bit.  Therefore, these structures need to compile the same in
  * 32-bit and 64-bit.  This means not using type "long", and adding
  * explicit padding so that the 32-bit structure will not be packed more
  * tightly than the 64-bit structure (which requires 64-bit alignment).
  */
 
 /*
  * Property values for snapdir
  */
 #define	ZFS_SNAPDIR_HIDDEN		0
 #define	ZFS_SNAPDIR_VISIBLE		1
 
 /*
  * Property values for snapdev
  */
 #define	ZFS_SNAPDEV_HIDDEN		0
 #define	ZFS_SNAPDEV_VISIBLE		1
 /*
  * Property values for acltype
  */
 #define	ZFS_ACLTYPE_OFF			0
 #define	ZFS_ACLTYPE_POSIX		1
 #define	ZFS_ACLTYPE_NFSV4		2
 
 /*
  * Field manipulation macros for the drr_versioninfo field of the
  * send stream header.
  */
 
 /*
  * Header types for zfs send streams.
  */
 typedef enum drr_headertype {
 	DMU_SUBSTREAM = 0x1,
 	DMU_COMPOUNDSTREAM = 0x2
 } drr_headertype_t;
 
 #define	DMU_GET_STREAM_HDRTYPE(vi)	BF64_GET((vi), 0, 2)
 #define	DMU_SET_STREAM_HDRTYPE(vi, x)	BF64_SET((vi), 0, 2, x)
 
 #define	DMU_GET_FEATUREFLAGS(vi)	BF64_GET((vi), 2, 30)
 #define	DMU_SET_FEATUREFLAGS(vi, x)	BF64_SET((vi), 2, 30, x)
 
 /*
  * Feature flags for zfs send streams (flags in drr_versioninfo)
  */
 
 #define	DMU_BACKUP_FEATURE_DEDUP		(1 << 0)
 #define	DMU_BACKUP_FEATURE_DEDUPPROPS		(1 << 1)
 #define	DMU_BACKUP_FEATURE_SA_SPILL		(1 << 2)
 /* flags #3 - #15 are reserved for incompatible closed-source implementations */
 #define	DMU_BACKUP_FEATURE_EMBED_DATA		(1 << 16)
 #define	DMU_BACKUP_FEATURE_LZ4			(1 << 17)
 /* flag #18 is reserved for a Delphix feature */
 #define	DMU_BACKUP_FEATURE_LARGE_BLOCKS		(1 << 19)
 #define	DMU_BACKUP_FEATURE_RESUMING		(1 << 20)
 #define	DMU_BACKUP_FEATURE_REDACTED		(1 << 21)
 #define	DMU_BACKUP_FEATURE_COMPRESSED		(1 << 22)
 #define	DMU_BACKUP_FEATURE_LARGE_DNODE		(1 << 23)
 #define	DMU_BACKUP_FEATURE_RAW			(1 << 24)
 #define	DMU_BACKUP_FEATURE_ZSTD			(1 << 25)
 #define	DMU_BACKUP_FEATURE_HOLDS		(1 << 26)
 /*
  * The SWITCH_TO_LARGE_BLOCKS feature indicates that we can receive
  * incremental LARGE_BLOCKS streams (those with WRITE records of >128KB) even
  * if the previous send did not use LARGE_BLOCKS, and thus its large blocks
  * were split into multiple 128KB WRITE records.  (See
  * flush_write_batch_impl() and receive_object()).  Older software that does
  * not support this flag may encounter a bug when switching to large blocks,
  * which causes files to incorrectly be zeroed.
  *
  * This flag is currently not set on any send streams.  In the future, we
  * intend for incremental send streams of snapshots that have large blocks to
  * use LARGE_BLOCKS by default, and these streams will also have the
  * SWITCH_TO_LARGE_BLOCKS feature set. This ensures that streams from the
  * default use of "zfs send" won't encounter the bug mentioned above.
  */
 #define	DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
 
 /*
  * Mask of all supported backup features
  */
 #define	DMU_BACKUP_FEATURE_MASK	(DMU_BACKUP_FEATURE_SA_SPILL | \
     DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
     DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
     DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
     DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
     DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS | \
     DMU_BACKUP_FEATURE_ZSTD)
 
 /* Are all features in the given flag word currently supported? */
 #define	DMU_STREAM_SUPPORTED(x)	(!((x) & ~DMU_BACKUP_FEATURE_MASK))
 
 typedef enum dmu_send_resume_token_version {
 	ZFS_SEND_RESUME_TOKEN_VERSION = 1
 } dmu_send_resume_token_version_t;
 
 /*
  * The drr_versioninfo field of the dmu_replay_record has the
  * following layout:
  *
  *	64	56	48	40	32	24	16	8	0
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  *	|		reserved	|        feature-flags	    |C|S|
  *	+-------+-------+-------+-------+-------+-------+-------+-------+
  *
  * The low order two bits indicate the header type: SUBSTREAM (0x1)
  * or COMPOUNDSTREAM (0x2).  Using two bits for this is historical:
  * this field used to be a version number, where the two version types
  * were 1 and 2.  Using two bits for this allows earlier versions of
  * the code to be able to recognize send streams that don't use any
  * of the features indicated by feature flags.
  */
 
 #define	DMU_BACKUP_MAGIC 0x2F5bacbacULL
 
 /*
  * Send stream flags.  Bits 24-31 are reserved for vendor-specific
  * implementations and should not be used.
  */
 #define	DRR_FLAG_CLONE		(1<<0)
 #define	DRR_FLAG_CI_DATA	(1<<1)
 /*
  * This send stream, if it is a full send, includes the FREE and FREEOBJECT
  * records that are created by the sending process.  This means that the send
  * stream can be received as a clone, even though it is not an incremental.
  * This is not implemented as a feature flag, because the receiving side does
  * not need to have implemented it to receive this stream; it is fully backwards
  * compatible.  We need a flag, though, because full send streams without it
  * cannot necessarily be received as a clone correctly.
  */
 #define	DRR_FLAG_FREERECORDS	(1<<2)
 /*
  * When DRR_FLAG_SPILL_BLOCK is set it indicates the DRR_OBJECT_SPILL
  * and DRR_SPILL_UNMODIFIED flags are meaningful in the send stream.
  *
  * When DRR_FLAG_SPILL_BLOCK is set, DRR_OBJECT records will have
  * DRR_OBJECT_SPILL set if and only if they should have a spill block
  * (either an existing one, or a new one in the send stream).  When clear
  * the object does not have a spill block and any existing spill block
  * should be freed.
  *
  * Similarly, when DRR_FLAG_SPILL_BLOCK is set, DRR_SPILL records will
  * have DRR_SPILL_UNMODIFIED set if and only if they were included for
  * backward compatibility purposes, and can be safely ignored by new versions
  * of zfs receive.  Previous versions of ZFS which do not understand the
  * DRR_FLAG_SPILL_BLOCK will process this record and recreate any missing
  * spill blocks.
  */
 #define	DRR_FLAG_SPILL_BLOCK	(1<<3)
 
 /*
  * flags in the drr_flags field in the DRR_WRITE, DRR_SPILL, DRR_OBJECT,
  * DRR_WRITE_BYREF, and DRR_OBJECT_RANGE blocks
  */
 #define	DRR_CHECKSUM_DEDUP	(1<<0) /* not used for SPILL records */
 #define	DRR_RAW_BYTESWAP	(1<<1)
 #define	DRR_OBJECT_SPILL	(1<<2) /* OBJECT record has a spill block */
 #define	DRR_SPILL_UNMODIFIED	(1<<2) /* SPILL record for unmodified block */
 
 #define	DRR_IS_DEDUP_CAPABLE(flags)	((flags) & DRR_CHECKSUM_DEDUP)
 #define	DRR_IS_RAW_BYTESWAPPED(flags)	((flags) & DRR_RAW_BYTESWAP)
 #define	DRR_OBJECT_HAS_SPILL(flags)	((flags) & DRR_OBJECT_SPILL)
 #define	DRR_SPILL_IS_UNMODIFIED(flags)	((flags) & DRR_SPILL_UNMODIFIED)
 
 /* deal with compressed drr_write replay records */
 #define	DRR_WRITE_COMPRESSED(drrw)	((drrw)->drr_compressiontype != 0)
 #define	DRR_WRITE_PAYLOAD_SIZE(drrw) \
 	(DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
 	(drrw)->drr_logical_size)
 #define	DRR_SPILL_PAYLOAD_SIZE(drrs) \
 	((drrs)->drr_compressed_size ? \
 	(drrs)->drr_compressed_size : (drrs)->drr_length)
 #define	DRR_OBJECT_PAYLOAD_SIZE(drro) \
 	((drro)->drr_raw_bonuslen != 0 ? \
 	(drro)->drr_raw_bonuslen : P2ROUNDUP((drro)->drr_bonuslen, 8))
 
 /*
  * zfs ioctl command structure
  */
 
 /* Header is used in C++ so can't forward declare untagged struct */
 struct drr_begin {
 	uint64_t drr_magic;
 	uint64_t drr_versioninfo; /* was drr_version */
 	uint64_t drr_creation_time;
 	dmu_objset_type_t drr_type;
 	uint32_t drr_flags;
 	uint64_t drr_toguid;
 	uint64_t drr_fromguid;
 	char drr_toname[MAXNAMELEN];
 };
 
 typedef struct dmu_replay_record {
 	enum {
 		DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
 		DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
 		DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, DRR_REDACT,
 		DRR_NUMTYPES
 	} drr_type;
 	uint32_t drr_payloadlen;
 	union {
 		struct drr_begin drr_begin;
 		struct drr_end {
 			zio_cksum_t drr_checksum;
 			uint64_t drr_toguid;
 		} drr_end;
 		struct drr_object {
 			uint64_t drr_object;
 			dmu_object_type_t drr_type;
 			dmu_object_type_t drr_bonustype;
 			uint32_t drr_blksz;
 			uint32_t drr_bonuslen;
 			uint8_t drr_checksumtype;
 			uint8_t drr_compress;
 			uint8_t drr_dn_slots;
 			uint8_t drr_flags;
 			uint32_t drr_raw_bonuslen;
 			uint64_t drr_toguid;
 			/* only (possibly) nonzero for raw streams */
 			uint8_t drr_indblkshift;
 			uint8_t drr_nlevels;
 			uint8_t drr_nblkptr;
 			uint8_t drr_pad[5];
 			uint64_t drr_maxblkid;
 			/* bonus content follows */
 		} drr_object;
 		struct drr_freeobjects {
 			uint64_t drr_firstobj;
 			uint64_t drr_numobjs;
 			uint64_t drr_toguid;
 		} drr_freeobjects;
 		struct drr_write {
 			uint64_t drr_object;
 			dmu_object_type_t drr_type;
 			uint32_t drr_pad;
 			uint64_t drr_offset;
 			uint64_t drr_logical_size;
 			uint64_t drr_toguid;
 			uint8_t drr_checksumtype;
 			uint8_t drr_flags;
 			uint8_t drr_compressiontype;
 			uint8_t drr_pad2[5];
 			/* deduplication key */
 			ddt_key_t drr_key;
 			/* only nonzero if drr_compressiontype is not 0 */
 			uint64_t drr_compressed_size;
 			/* only nonzero for raw streams */
 			uint8_t drr_salt[ZIO_DATA_SALT_LEN];
 			uint8_t drr_iv[ZIO_DATA_IV_LEN];
 			uint8_t drr_mac[ZIO_DATA_MAC_LEN];
 			/* content follows */
 		} drr_write;
 		struct drr_free {
 			uint64_t drr_object;
 			uint64_t drr_offset;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 		} drr_free;
 		struct drr_write_byref {
 			/* where to put the data */
 			uint64_t drr_object;
 			uint64_t drr_offset;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 			/* where to find the prior copy of the data */
 			uint64_t drr_refguid;
 			uint64_t drr_refobject;
 			uint64_t drr_refoffset;
 			/* properties of the data */
 			uint8_t drr_checksumtype;
 			uint8_t drr_flags;
 			uint8_t drr_pad2[6];
 			ddt_key_t drr_key; /* deduplication key */
 		} drr_write_byref;
 		struct drr_spill {
 			uint64_t drr_object;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 			uint8_t drr_flags;
 			uint8_t drr_compressiontype;
 			uint8_t drr_pad[6];
 			/* only nonzero for raw streams */
 			uint64_t drr_compressed_size;
 			uint8_t drr_salt[ZIO_DATA_SALT_LEN];
 			uint8_t drr_iv[ZIO_DATA_IV_LEN];
 			uint8_t drr_mac[ZIO_DATA_MAC_LEN];
 			dmu_object_type_t drr_type;
 			/* spill data follows */
 		} drr_spill;
 		struct drr_write_embedded {
 			uint64_t drr_object;
 			uint64_t drr_offset;
 			/* logical length, should equal blocksize */
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 			uint8_t drr_compression;
 			uint8_t drr_etype;
 			uint8_t drr_pad[6];
 			uint32_t drr_lsize; /* uncompressed size of payload */
 			uint32_t drr_psize; /* compr. (real) size of payload */
 			/* (possibly compressed) content follows */
 		} drr_write_embedded;
 		struct drr_object_range {
 			uint64_t drr_firstobj;
 			uint64_t drr_numslots;
 			uint64_t drr_toguid;
 			uint8_t drr_salt[ZIO_DATA_SALT_LEN];
 			uint8_t drr_iv[ZIO_DATA_IV_LEN];
 			uint8_t drr_mac[ZIO_DATA_MAC_LEN];
 			uint8_t drr_flags;
 			uint8_t drr_pad[3];
 		} drr_object_range;
 		struct drr_redact {
 			uint64_t drr_object;
 			uint64_t drr_offset;
 			uint64_t drr_length;
 			uint64_t drr_toguid;
 		} drr_redact;
 
 		/*
 		 * Note: drr_checksum is overlaid with all record types
 		 * except DRR_BEGIN.  Therefore its (non-pad) members
 		 * must not overlap with members from the other structs.
 		 * We accomplish this by putting its members at the very
 		 * end of the struct.
 		 */
 		struct drr_checksum {
 			uint64_t drr_pad[34];
 			/*
 			 * fletcher-4 checksum of everything preceding the
 			 * checksum.
 			 */
 			zio_cksum_t drr_checksum;
 		} drr_checksum;
 	} drr_u;
 } dmu_replay_record_t;
 
 /* diff record range types */
 typedef enum diff_type {
 	DDR_NONE = 0x1,
 	DDR_INUSE = 0x2,
 	DDR_FREE = 0x4
 } diff_type_t;
 
 /*
  * The diff reports back ranges of free or in-use objects.
  */
 typedef struct dmu_diff_record {
 	uint64_t ddr_type;
 	uint64_t ddr_first;
 	uint64_t ddr_last;
 } dmu_diff_record_t;
 
 typedef struct zinject_record {
 	uint64_t	zi_objset;
 	uint64_t	zi_object;
 	uint64_t	zi_start;
 	uint64_t	zi_end;
 	uint64_t	zi_guid;
 	uint32_t	zi_level;
 	uint32_t	zi_error;
 	uint64_t	zi_type;
 	uint32_t	zi_freq;
 	uint32_t	zi_failfast;
 	char		zi_func[MAXNAMELEN];
 	uint32_t	zi_iotype;
 	int32_t		zi_duration;
 	uint64_t	zi_timer;
 	uint64_t	zi_nlanes;
 	uint32_t	zi_cmd;
 	uint32_t	zi_dvas;
 } zinject_record_t;
 
 #define	ZINJECT_NULL		0x1
 #define	ZINJECT_FLUSH_ARC	0x2
 #define	ZINJECT_UNLOAD_SPA	0x4
 #define	ZINJECT_CALC_RANGE	0x8
 
 #define	ZEVENT_NONE		0x0
 #define	ZEVENT_NONBLOCK		0x1
 #define	ZEVENT_SIZE		1024
 
 #define	ZEVENT_SEEK_START	0
 #define	ZEVENT_SEEK_END		UINT64_MAX
 
 /* scaled frequency ranges */
 #define	ZI_PERCENTAGE_MIN	4294UL
 #define	ZI_PERCENTAGE_MAX	UINT32_MAX
 
 #define	ZI_NO_DVA		(-1)
 
 typedef enum zinject_type {
 	ZINJECT_UNINITIALIZED,
 	ZINJECT_DATA_FAULT,
 	ZINJECT_DEVICE_FAULT,
 	ZINJECT_LABEL_FAULT,
 	ZINJECT_IGNORED_WRITES,
 	ZINJECT_PANIC,
 	ZINJECT_DELAY_IO,
 	ZINJECT_DECRYPT_FAULT,
 } zinject_type_t;
 
 typedef struct zfs_share {
 	uint64_t	z_exportdata;
 	uint64_t	z_sharedata;
 	uint64_t	z_sharetype;	/* 0 = share, 1 = unshare */
 	uint64_t	z_sharemax;  /* max length of share string */
 } zfs_share_t;
 
 /*
  * ZFS file systems may behave the usual, POSIX-compliant way, where
  * name lookups are case-sensitive.  They may also be set up so that
  * all the name lookups are case-insensitive, or so that only some
  * lookups, the ones that set an FIGNORECASE flag, are case-insensitive.
  */
 typedef enum zfs_case {
 	ZFS_CASE_SENSITIVE,
 	ZFS_CASE_INSENSITIVE,
 	ZFS_CASE_MIXED
 } zfs_case_t;
 
 /*
  * Note: this struct must have the same layout in 32-bit and 64-bit, so
  * that 32-bit processes (like /sbin/zfs) can pass it to the 64-bit
  * kernel.  Therefore, we add padding to it so that no "hidden" padding
  * is automatically added on 64-bit (but not on 32-bit).
  */
 typedef struct zfs_cmd {
 	char		zc_name[MAXPATHLEN];	/* name of pool or dataset */
 	uint64_t	zc_nvlist_src;		/* really (char *) */
 	uint64_t	zc_nvlist_src_size;
 	uint64_t	zc_nvlist_dst;		/* really (char *) */
 	uint64_t	zc_nvlist_dst_size;
 	boolean_t	zc_nvlist_dst_filled;	/* put an nvlist in dst? */
 	int		zc_pad2;
 
 	/*
 	 * The following members are for legacy ioctls which haven't been
 	 * converted to the new method.
 	 */
 	uint64_t	zc_history;		/* really (char *) */
 	char		zc_value[MAXPATHLEN * 2];
 	char		zc_string[MAXNAMELEN];
 	uint64_t	zc_guid;
 	uint64_t	zc_nvlist_conf;		/* really (char *) */
 	uint64_t	zc_nvlist_conf_size;
 	uint64_t	zc_cookie;
 	uint64_t	zc_objset_type;
 	uint64_t	zc_perm_action;
 	uint64_t	zc_history_len;
 	uint64_t	zc_history_offset;
 	uint64_t	zc_obj;
 	uint64_t	zc_iflags;		/* internal to zfs(7fs) */
 	zfs_share_t	zc_share;
 	dmu_objset_stats_t zc_objset_stats;
 	struct drr_begin zc_begin_record;
 	zinject_record_t zc_inject_record;
 	uint32_t	zc_defer_destroy;
 	uint32_t	zc_flags;
 	uint64_t	zc_action_handle;
 	int		zc_cleanup_fd;
 	uint8_t		zc_simple;
 	uint8_t		zc_pad[3];		/* alignment */
 	uint64_t	zc_sendobj;
 	uint64_t	zc_fromobj;
 	uint64_t	zc_createtxg;
 	zfs_stat_t	zc_stat;
 	uint64_t	zc_zoneid;
 } zfs_cmd_t;
 
 typedef struct zfs_useracct {
 	char zu_domain[256];
 	uid_t zu_rid;
 	uint32_t zu_pad;
 	uint64_t zu_space;
 } zfs_useracct_t;
 
 #define	ZFSDEV_MAX_MINOR	(1 << 16)
 
 #define	ZPOOL_EXPORT_AFTER_SPLIT 0x1
 
 #ifdef _KERNEL
 struct objset;
 struct zfsvfs;
 
 typedef struct zfs_creat {
 	nvlist_t	*zct_zplprops;
 	nvlist_t	*zct_props;
 } zfs_creat_t;
 
 extern int zfs_secpolicy_snapshot_perms(const char *, cred_t *);
 extern int zfs_secpolicy_rename_perms(const char *, const char *, cred_t *);
 extern int zfs_secpolicy_destroy_perms(const char *, cred_t *);
 extern void zfs_unmount_snap(const char *);
 extern void zfs_destroy_unmount_origin(const char *);
 extern int getzfsvfs_impl(struct objset *, struct zfsvfs **);
 extern int getzfsvfs(const char *, struct zfsvfs **);
 
 enum zfsdev_state_type {
 	ZST_ONEXIT,
 	ZST_ZEVENT,
 	ZST_ALL,
 };
 
 /*
  * The zfsdev_state_t structure is managed as a singly-linked list
  * from which items are never deleted.  This allows for lock-free
  * reading of the list so long as assignments to the zs_next and
  * reads from zs_minor are performed atomically.  Empty items are
  * indicated by storing -1 into zs_minor.
  */
 typedef struct zfsdev_state {
 	struct zfsdev_state	*zs_next;	/* next zfsdev_state_t link */
 	minor_t			zs_minor;	/* made up minor number */
 	void			*zs_onexit;	/* onexit data */
 	void			*zs_zevent;	/* zevent data */
 } zfsdev_state_t;
 
 extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
-extern int zfsdev_getminor(int fd, minor_t *minorp);
+extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
 
 extern uint_t zfs_fsyncer_key;
 extern uint_t zfs_allow_log_key;
 
 #endif	/* _KERNEL */
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_ZFS_IOCTL_H */
diff --git a/sys/contrib/openzfs/include/sys/zfs_onexit.h b/sys/contrib/openzfs/include/sys/zfs_onexit.h
index 0fab23ff849b..fd3030e3ac2d 100644
--- a/sys/contrib/openzfs/include/sys/zfs_onexit.h
+++ b/sys/contrib/openzfs/include/sys/zfs_onexit.h
@@ -1,63 +1,63 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2020 by Delphix. All rights reserved.
  */
 
 #ifndef	_SYS_ZFS_ONEXIT_H
 #define	_SYS_ZFS_ONEXIT_H
 
 #include <sys/zfs_context.h>
 
 #ifdef	__cplusplus
 extern "C" {
 #endif
 
 #ifdef _KERNEL
 
 typedef struct zfs_onexit {
 	kmutex_t	zo_lock;
 	list_t		zo_actions;
 } zfs_onexit_t;
 
 typedef struct zfs_onexit_action_node {
 	list_node_t	za_link;
 	void		(*za_func)(void *);
 	void		*za_data;
 } zfs_onexit_action_node_t;
 
 extern void zfs_onexit_init(zfs_onexit_t **zo);
 extern void zfs_onexit_destroy(zfs_onexit_t *zo);
 
 #endif
 
-extern int zfs_onexit_fd_hold(int fd, minor_t *minorp);
-extern void zfs_onexit_fd_rele(int fd);
+extern zfs_file_t *zfs_onexit_fd_hold(int fd, minor_t *minorp);
+extern void zfs_onexit_fd_rele(zfs_file_t *);
 extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle);
 
 #ifdef	__cplusplus
 }
 #endif
 
 #endif	/* _SYS_ZFS_ONEXIT_H */
diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
index c0bf9d067d42..7338b9d72cad 100644
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
@@ -1,4948 +1,4950 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright (c) 2018 Datto Inc.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
  * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
  */
 
 #include <errno.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <libgen.h>
 #include <zone.h>
 #include <sys/stat.h>
 #include <sys/efi_partition.h>
 #include <sys/systeminfo.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_sysfs.h>
 #include <sys/vdev_disk.h>
 #include <sys/types.h>
 #include <dlfcn.h>
 #include <libzutil.h>
 #include <fcntl.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "libzfs_impl.h"
 #include "zfs_comutil.h"
 #include "zfeature_common.h"
 
 static boolean_t zpool_vdev_is_interior(const char *name);
 
 typedef struct prop_flags {
 	int create:1;	/* Validate property on creation */
 	int import:1;	/* Validate property on import */
 } prop_flags_t;
 
 /*
  * ====================================================================
  *   zpool property functions
  * ====================================================================
  */
 
 static int
 zpool_get_all_props(zpool_handle_t *zhp)
 {
 	zfs_cmd_t zc = {"\0"};
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
 		return (-1);
 
 	while (zfs_ioctl(hdl, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
 		if (errno == ENOMEM) {
 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 				zcmd_free_nvlists(&zc);
 				return (-1);
 			}
 		} else {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 
 	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (-1);
 	}
 
 	zcmd_free_nvlists(&zc);
 
 	return (0);
 }
 
 int
 zpool_props_refresh(zpool_handle_t *zhp)
 {
 	nvlist_t *old_props;
 
 	old_props = zhp->zpool_props;
 
 	if (zpool_get_all_props(zhp) != 0)
 		return (-1);
 
 	nvlist_free(old_props);
 	return (0);
 }
 
 static const char *
 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
     zprop_source_t *src)
 {
 	nvlist_t *nv, *nvl;
 	uint64_t ival;
 	char *value;
 	zprop_source_t source;
 
 	nvl = zhp->zpool_props;
 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
 		source = ival;
 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
 	} else {
 		source = ZPROP_SRC_DEFAULT;
 		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
 			value = "-";
 	}
 
 	if (src)
 		*src = source;
 
 	return (value);
 }
 
 uint64_t
 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
 {
 	nvlist_t *nv, *nvl;
 	uint64_t value;
 	zprop_source_t source;
 
 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
 		/*
 		 * zpool_get_all_props() has most likely failed because
 		 * the pool is faulted, but if all we need is the top level
 		 * vdev's guid then get it from the zhp config nvlist.
 		 */
 		if ((prop == ZPOOL_PROP_GUID) &&
 		    (nvlist_lookup_nvlist(zhp->zpool_config,
 		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
 		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
 		    == 0)) {
 			return (value);
 		}
 		return (zpool_prop_default_numeric(prop));
 	}
 
 	nvl = zhp->zpool_props;
 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
 		source = value;
 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
 	} else {
 		source = ZPROP_SRC_DEFAULT;
 		value = zpool_prop_default_numeric(prop);
 	}
 
 	if (src)
 		*src = source;
 
 	return (value);
 }
 
 /*
  * Map VDEV STATE to printed strings.
  */
 const char *
 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
 {
 	switch (state) {
 	case VDEV_STATE_CLOSED:
 	case VDEV_STATE_OFFLINE:
 		return (gettext("OFFLINE"));
 	case VDEV_STATE_REMOVED:
 		return (gettext("REMOVED"));
 	case VDEV_STATE_CANT_OPEN:
 		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
 			return (gettext("FAULTED"));
 		else if (aux == VDEV_AUX_SPLIT_POOL)
 			return (gettext("SPLIT"));
 		else
 			return (gettext("UNAVAIL"));
 	case VDEV_STATE_FAULTED:
 		return (gettext("FAULTED"));
 	case VDEV_STATE_DEGRADED:
 		return (gettext("DEGRADED"));
 	case VDEV_STATE_HEALTHY:
 		return (gettext("ONLINE"));
 
 	default:
 		break;
 	}
 
 	return (gettext("UNKNOWN"));
 }
 
 /*
  * Map POOL STATE to printed strings.
  */
 const char *
 zpool_pool_state_to_name(pool_state_t state)
 {
 	switch (state) {
 	default:
 		break;
 	case POOL_STATE_ACTIVE:
 		return (gettext("ACTIVE"));
 	case POOL_STATE_EXPORTED:
 		return (gettext("EXPORTED"));
 	case POOL_STATE_DESTROYED:
 		return (gettext("DESTROYED"));
 	case POOL_STATE_SPARE:
 		return (gettext("SPARE"));
 	case POOL_STATE_L2CACHE:
 		return (gettext("L2CACHE"));
 	case POOL_STATE_UNINITIALIZED:
 		return (gettext("UNINITIALIZED"));
 	case POOL_STATE_UNAVAIL:
 		return (gettext("UNAVAIL"));
 	case POOL_STATE_POTENTIALLY_ACTIVE:
 		return (gettext("POTENTIALLY_ACTIVE"));
 	}
 
 	return (gettext("UNKNOWN"));
 }
 
 /*
  * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED",
  * "SUSPENDED", etc).
  */
 const char *
 zpool_get_state_str(zpool_handle_t *zhp)
 {
 	zpool_errata_t errata;
 	zpool_status_t status;
 	nvlist_t *nvroot;
 	vdev_stat_t *vs;
 	uint_t vsc;
 	const char *str;
 
 	status = zpool_get_status(zhp, NULL, &errata);
 
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
 		str = gettext("FAULTED");
 	} else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
 	    status == ZPOOL_STATUS_IO_FAILURE_MMP) {
 		str = gettext("SUSPENDED");
 	} else {
 		verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 		verify(nvlist_lookup_uint64_array(nvroot,
 		    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
 		    == 0);
 		str = zpool_state_to_name(vs->vs_state, vs->vs_aux);
 	}
 	return (str);
 }
 
 /*
  * Get a zpool property value for 'prop' and return the value in
  * a pre-allocated buffer.
  */
 int
 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
     size_t len, zprop_source_t *srctype, boolean_t literal)
 {
 	uint64_t intval;
 	const char *strval;
 	zprop_source_t src = ZPROP_SRC_NONE;
 
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
 		switch (prop) {
 		case ZPOOL_PROP_NAME:
 			(void) strlcpy(buf, zpool_get_name(zhp), len);
 			break;
 
 		case ZPOOL_PROP_HEALTH:
 			(void) strlcpy(buf, zpool_get_state_str(zhp), len);
 			break;
 
 		case ZPOOL_PROP_GUID:
 			intval = zpool_get_prop_int(zhp, prop, &src);
 			(void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
 			break;
 
 		case ZPOOL_PROP_ALTROOT:
 		case ZPOOL_PROP_CACHEFILE:
 		case ZPOOL_PROP_COMMENT:
 		case ZPOOL_PROP_COMPATIBILITY:
 			if (zhp->zpool_props != NULL ||
 			    zpool_get_all_props(zhp) == 0) {
 				(void) strlcpy(buf,
 				    zpool_get_prop_string(zhp, prop, &src),
 				    len);
 				break;
 			}
 			/* FALLTHROUGH */
 		default:
 			(void) strlcpy(buf, "-", len);
 			break;
 		}
 
 		if (srctype != NULL)
 			*srctype = src;
 		return (0);
 	}
 
 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
 	    prop != ZPOOL_PROP_NAME)
 		return (-1);
 
 	switch (zpool_prop_get_type(prop)) {
 	case PROP_TYPE_STRING:
 		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
 		    len);
 		break;
 
 	case PROP_TYPE_NUMBER:
 		intval = zpool_get_prop_int(zhp, prop, &src);
 
 		switch (prop) {
 		case ZPOOL_PROP_SIZE:
 		case ZPOOL_PROP_ALLOCATED:
 		case ZPOOL_PROP_FREE:
 		case ZPOOL_PROP_FREEING:
 		case ZPOOL_PROP_LEAKED:
 		case ZPOOL_PROP_ASHIFT:
 			if (literal)
 				(void) snprintf(buf, len, "%llu",
 				    (u_longlong_t)intval);
 			else
 				(void) zfs_nicenum(intval, buf, len);
 			break;
 
 		case ZPOOL_PROP_EXPANDSZ:
 		case ZPOOL_PROP_CHECKPOINT:
 			if (intval == 0) {
 				(void) strlcpy(buf, "-", len);
 			} else if (literal) {
 				(void) snprintf(buf, len, "%llu",
 				    (u_longlong_t)intval);
 			} else {
 				(void) zfs_nicebytes(intval, buf, len);
 			}
 			break;
 
 		case ZPOOL_PROP_CAPACITY:
 			if (literal) {
 				(void) snprintf(buf, len, "%llu",
 				    (u_longlong_t)intval);
 			} else {
 				(void) snprintf(buf, len, "%llu%%",
 				    (u_longlong_t)intval);
 			}
 			break;
 
 		case ZPOOL_PROP_FRAGMENTATION:
 			if (intval == UINT64_MAX) {
 				(void) strlcpy(buf, "-", len);
 			} else if (literal) {
 				(void) snprintf(buf, len, "%llu",
 				    (u_longlong_t)intval);
 			} else {
 				(void) snprintf(buf, len, "%llu%%",
 				    (u_longlong_t)intval);
 			}
 			break;
 
 		case ZPOOL_PROP_DEDUPRATIO:
 			if (literal)
 				(void) snprintf(buf, len, "%llu.%02llu",
 				    (u_longlong_t)(intval / 100),
 				    (u_longlong_t)(intval % 100));
 			else
 				(void) snprintf(buf, len, "%llu.%02llux",
 				    (u_longlong_t)(intval / 100),
 				    (u_longlong_t)(intval % 100));
 			break;
 
 		case ZPOOL_PROP_HEALTH:
 			(void) strlcpy(buf, zpool_get_state_str(zhp), len);
 			break;
 		case ZPOOL_PROP_VERSION:
 			if (intval >= SPA_VERSION_FEATURES) {
 				(void) snprintf(buf, len, "-");
 				break;
 			}
 			/* FALLTHROUGH */
 		default:
 			(void) snprintf(buf, len, "%llu", (u_longlong_t)intval);
 		}
 		break;
 
 	case PROP_TYPE_INDEX:
 		intval = zpool_get_prop_int(zhp, prop, &src);
 		if (zpool_prop_index_to_string(prop, intval, &strval)
 		    != 0)
 			return (-1);
 		(void) strlcpy(buf, strval, len);
 		break;
 
 	default:
 		abort();
 	}
 
 	if (srctype)
 		*srctype = src;
 
 	return (0);
 }
 
 /*
  * Check if the bootfs name has the same pool name as it is set to.
  * Assuming bootfs is a valid dataset name.
  */
 static boolean_t
 bootfs_name_valid(const char *pool, const char *bootfs)
 {
 	int len = strlen(pool);
 	if (bootfs[0] == '\0')
 		return (B_TRUE);
 
 	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
 		return (B_FALSE);
 
 	if (strncmp(pool, bootfs, len) == 0 &&
 	    (bootfs[len] == '/' || bootfs[len] == '\0'))
 		return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 /*
  * Given an nvlist of zpool properties to be set, validate that they are
  * correct, and parse any numeric properties (index, boolean, etc) if they are
  * specified as strings.
  */
 static nvlist_t *
 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
     nvlist_t *props, uint64_t version, prop_flags_t flags, char *errbuf)
 {
 	nvpair_t *elem;
 	nvlist_t *retprops;
 	zpool_prop_t prop;
 	char *strval;
 	uint64_t intval;
 	char *slash, *check;
 	struct stat64 statbuf;
 	zpool_handle_t *zhp;
 	char report[1024];
 
 	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
 		return (NULL);
 	}
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		const char *propname = nvpair_name(elem);
 
 		prop = zpool_name_to_prop(propname);
 		if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) {
 			int err;
 			char *fname = strchr(propname, '@') + 1;
 
 			err = zfeature_lookup_name(fname, NULL);
 			if (err != 0) {
 				ASSERT3U(err, ==, ENOENT);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "feature '%s' unsupported by kernel"),
 				    fname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (nvpair_type(elem) != DATA_TYPE_STRING) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' must be a string"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			(void) nvpair_value_string(elem, &strval);
 			if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0 &&
 			    strcmp(strval, ZFS_FEATURE_DISABLED) != 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' can only be set to "
 				    "'enabled' or 'disabled'"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (!flags.create &&
 			    strcmp(strval, ZFS_FEATURE_DISABLED) == 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' can only be set to "
 				    "'disabled' at creation time"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (nvlist_add_uint64(retprops, propname, 0) != 0) {
 				(void) no_memory(hdl);
 				goto error;
 			}
 			continue;
 		}
 
 		/*
 		 * Make sure this property is valid and applies to this type.
 		 */
 		if (prop == ZPOOL_PROP_INVAL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid property '%s'"), propname);
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			goto error;
 		}
 
 		if (zpool_prop_readonly(prop)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
 			    "is readonly"), propname);
 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
 			goto error;
 		}
 
 		if (!flags.create && zpool_prop_setonce(prop)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "property '%s' can only be set at "
 			    "creation time"), propname);
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			goto error;
 		}
 
 		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
 		    &strval, &intval, errbuf) != 0)
 			goto error;
 
 		/*
 		 * Perform additional checking for specific properties.
 		 */
 		switch (prop) {
 		case ZPOOL_PROP_VERSION:
 			if (intval < version ||
 			    !SPA_VERSION_IS_SUPPORTED(intval)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' number %llu is invalid."),
 				    propname, (unsigned long long)intval);
 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZPOOL_PROP_ASHIFT:
 			if (intval != 0 &&
 			    (intval < ASHIFT_MIN || intval > ASHIFT_MAX)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' number %llu is invalid, "
 				    "only values between %" PRId32 " and %"
 				    PRId32 " are allowed."),
 				    propname, (unsigned long long)intval,
 				    ASHIFT_MIN, ASHIFT_MAX);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZPOOL_PROP_BOOTFS:
 			if (flags.create || flags.import) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' cannot be set at creation "
 				    "or import time"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (version < SPA_VERSION_BOOTFS) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "pool must be upgraded to support "
 				    "'%s' property"), propname);
 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 				goto error;
 			}
 
 			/*
 			 * bootfs property value has to be a dataset name and
 			 * the dataset has to be in the same pool as it sets to.
 			 */
 			if (!bootfs_name_valid(poolname, strval)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
 				    "is an invalid name"), strval);
 				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
 				goto error;
 			}
 
 			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "could not open pool '%s'"), poolname);
 				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
 				goto error;
 			}
 			zpool_close(zhp);
 			break;
 
 		case ZPOOL_PROP_ALTROOT:
 			if (!flags.create && !flags.import) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' can only be set during pool "
 				    "creation or import"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (strval[0] != '/') {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "bad alternate root '%s'"), strval);
 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZPOOL_PROP_CACHEFILE:
 			if (strval[0] == '\0')
 				break;
 
 			if (strcmp(strval, "none") == 0)
 				break;
 
 			if (strval[0] != '/') {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' must be empty, an "
 				    "absolute path, or 'none'"), propname);
 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
 				goto error;
 			}
 
 			slash = strrchr(strval, '/');
 
 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
 			    strcmp(slash, "/..") == 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' is not a valid file"), strval);
 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
 				goto error;
 			}
 
 			*slash = '\0';
 
 			if (strval[0] != '\0' &&
 			    (stat64(strval, &statbuf) != 0 ||
 			    !S_ISDIR(statbuf.st_mode))) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' is not a valid directory"),
 				    strval);
 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
 				goto error;
 			}
 
 			*slash = '/';
 			break;
 
 		case ZPOOL_PROP_COMPATIBILITY:
 			switch (zpool_load_compat(strval, NULL, report, 1024)) {
 			case ZPOOL_COMPATIBILITY_OK:
 			case ZPOOL_COMPATIBILITY_WARNTOKEN:
 				break;
 			case ZPOOL_COMPATIBILITY_BADFILE:
 			case ZPOOL_COMPATIBILITY_BADTOKEN:
 			case ZPOOL_COMPATIBILITY_NOFILES:
 				zfs_error_aux(hdl, "%s", report);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZPOOL_PROP_COMMENT:
 			for (check = strval; *check != '\0'; check++) {
 				if (!isprint(*check)) {
 					zfs_error_aux(hdl,
 					    dgettext(TEXT_DOMAIN,
 					    "comment may only have printable "
 					    "characters"));
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 			}
 			if (strlen(strval) > ZPROP_MAX_COMMENT) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "comment must not exceed %d characters"),
 				    ZPROP_MAX_COMMENT);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		case ZPOOL_PROP_READONLY:
 			if (!flags.import) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property '%s' can only be set at "
 				    "import time"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		case ZPOOL_PROP_MULTIHOST:
 			if (get_system_hostid() == 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "requires a non-zero system hostid"));
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		case ZPOOL_PROP_DEDUPDITTO:
 			printf("Note: property '%s' no longer has "
 			    "any effect\n", propname);
 			break;
 
 		default:
 			break;
 		}
 	}
 
 	return (retprops);
 error:
 	nvlist_free(retprops);
 	return (NULL);
 }
 
 /*
  * Set zpool property : propname=propval.
  */
 int
 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
 {
 	zfs_cmd_t zc = {"\0"};
 	int ret = -1;
 	char errbuf[1024];
 	nvlist_t *nvl = NULL;
 	nvlist_t *realprops;
 	uint64_t version;
 	prop_flags_t flags = { 0 };
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
 	    zhp->zpool_name);
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 		return (no_memory(zhp->zpool_hdl));
 
 	if (nvlist_add_string(nvl, propname, propval) != 0) {
 		nvlist_free(nvl);
 		return (no_memory(zhp->zpool_hdl));
 	}
 
 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
 	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
 	    zhp->zpool_name, nvl, version, flags, errbuf)) == NULL) {
 		nvlist_free(nvl);
 		return (-1);
 	}
 
 	nvlist_free(nvl);
 	nvl = realprops;
 
 	/*
 	 * Execute the corresponding ioctl() to set this property.
 	 */
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
 		nvlist_free(nvl);
 		return (-1);
 	}
 
 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
 
 	zcmd_free_nvlists(&zc);
 	nvlist_free(nvl);
 
 	if (ret)
 		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
 	else
 		(void) zpool_props_refresh(zhp);
 
 	return (ret);
 }
 
 int
 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
     boolean_t literal)
 {
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	zprop_list_t *entry;
 	char buf[ZFS_MAXPROPLEN];
 	nvlist_t *features = NULL;
 	nvpair_t *nvp;
 	zprop_list_t **last;
 	boolean_t firstexpand = (NULL == *plp);
 	int i;
 
 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
 		return (-1);
 
 	last = plp;
 	while (*last != NULL)
 		last = &(*last)->pl_next;
 
 	if ((*plp)->pl_all)
 		features = zpool_get_features(zhp);
 
 	if ((*plp)->pl_all && firstexpand) {
 		for (i = 0; i < SPA_FEATURES; i++) {
 			zprop_list_t *entry = zfs_alloc(hdl,
 			    sizeof (zprop_list_t));
 			entry->pl_prop = ZPROP_INVAL;
 			entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
 			    spa_feature_table[i].fi_uname);
 			entry->pl_width = strlen(entry->pl_user_prop);
 			entry->pl_all = B_TRUE;
 
 			*last = entry;
 			last = &entry->pl_next;
 		}
 	}
 
 	/* add any unsupported features */
 	for (nvp = nvlist_next_nvpair(features, NULL);
 	    nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
 		char *propname;
 		boolean_t found;
 		zprop_list_t *entry;
 
 		if (zfeature_is_supported(nvpair_name(nvp)))
 			continue;
 
 		propname = zfs_asprintf(hdl, "unsupported@%s",
 		    nvpair_name(nvp));
 
 		/*
 		 * Before adding the property to the list make sure that no
 		 * other pool already added the same property.
 		 */
 		found = B_FALSE;
 		entry = *plp;
 		while (entry != NULL) {
 			if (entry->pl_user_prop != NULL &&
 			    strcmp(propname, entry->pl_user_prop) == 0) {
 				found = B_TRUE;
 				break;
 			}
 			entry = entry->pl_next;
 		}
 		if (found) {
 			free(propname);
 			continue;
 		}
 
 		entry = zfs_alloc(hdl, sizeof (zprop_list_t));
 		entry->pl_prop = ZPROP_INVAL;
 		entry->pl_user_prop = propname;
 		entry->pl_width = strlen(entry->pl_user_prop);
 		entry->pl_all = B_TRUE;
 
 		*last = entry;
 		last = &entry->pl_next;
 	}
 
 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
 		if (entry->pl_fixed && !literal)
 			continue;
 
 		if (entry->pl_prop != ZPROP_INVAL &&
 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
 		    NULL, literal) == 0) {
 			if (strlen(buf) > entry->pl_width)
 				entry->pl_width = strlen(buf);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Get the state for the given feature on the given ZFS pool.
  */
 int
 zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
     size_t len)
 {
 	uint64_t refcount;
 	boolean_t found = B_FALSE;
 	nvlist_t *features = zpool_get_features(zhp);
 	boolean_t supported;
 	const char *feature = strchr(propname, '@') + 1;
 
 	supported = zpool_prop_feature(propname);
 	ASSERT(supported || zpool_prop_unsupported(propname));
 
 	/*
 	 * Convert from feature name to feature guid. This conversion is
 	 * unnecessary for unsupported@... properties because they already
 	 * use guids.
 	 */
 	if (supported) {
 		int ret;
 		spa_feature_t fid;
 
 		ret = zfeature_lookup_name(feature, &fid);
 		if (ret != 0) {
 			(void) strlcpy(buf, "-", len);
 			return (ENOTSUP);
 		}
 		feature = spa_feature_table[fid].fi_guid;
 	}
 
 	if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
 		found = B_TRUE;
 
 	if (supported) {
 		if (!found) {
 			(void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
 		} else  {
 			if (refcount == 0)
 				(void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
 			else
 				(void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
 		}
 	} else {
 		if (found) {
 			if (refcount == 0) {
 				(void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
 			} else {
 				(void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
 			}
 		} else {
 			(void) strlcpy(buf, "-", len);
 			return (ENOTSUP);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Validate the given pool name, optionally putting an extended error message in
  * 'buf'.
  */
 boolean_t
 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
 {
 	namecheck_err_t why;
 	char what;
 	int ret;
 
 	ret = pool_namecheck(pool, &why, &what);
 
 	/*
 	 * The rules for reserved pool names were extended at a later point.
 	 * But we need to support users with existing pools that may now be
 	 * invalid.  So we only check for this expanded set of names during a
 	 * create (or import), and only in userland.
 	 */
 	if (ret == 0 && !isopen &&
 	    (strncmp(pool, "mirror", 6) == 0 ||
 	    strncmp(pool, "raidz", 5) == 0 ||
 	    strncmp(pool, "draid", 5) == 0 ||
 	    strncmp(pool, "spare", 5) == 0 ||
 	    strcmp(pool, "log") == 0)) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl,
 			    dgettext(TEXT_DOMAIN, "name is reserved"));
 		return (B_FALSE);
 	}
 
 
 	if (ret != 0) {
 		if (hdl != NULL) {
 			switch (why) {
 			case NAME_ERR_TOOLONG:
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "name is too long"));
 				break;
 
 			case NAME_ERR_INVALCHAR:
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "invalid character "
 				    "'%c' in pool name"), what);
 				break;
 
 			case NAME_ERR_NOLETTER:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "name must begin with a letter"));
 				break;
 
 			case NAME_ERR_RESERVED:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "name is reserved"));
 				break;
 
 			case NAME_ERR_DISKLIKE:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "pool name is reserved"));
 				break;
 
 			case NAME_ERR_LEADING_SLASH:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "leading slash in name"));
 				break;
 
 			case NAME_ERR_EMPTY_COMPONENT:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "empty component in name"));
 				break;
 
 			case NAME_ERR_TRAILING_SLASH:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "trailing slash in name"));
 				break;
 
 			case NAME_ERR_MULTIPLE_DELIMITERS:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "multiple '@' and/or '#' delimiters in "
 				    "name"));
 				break;
 
 			case NAME_ERR_NO_AT:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "permission set is missing '@'"));
 				break;
 
 			default:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "(%d) not defined"), why);
 				break;
 			}
 		}
 		return (B_FALSE);
 	}
 
 	return (B_TRUE);
 }
 
 /*
  * Open a handle to the given pool, even if the pool is currently in the FAULTED
  * state.
  */
 zpool_handle_t *
 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
 {
 	zpool_handle_t *zhp;
 	boolean_t missing;
 
 	/*
 	 * Make sure the pool name is valid.
 	 */
 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
 		    pool);
 		return (NULL);
 	}
 
 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
 		return (NULL);
 
 	zhp->zpool_hdl = hdl;
 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
 
 	if (zpool_refresh_stats(zhp, &missing) != 0) {
 		zpool_close(zhp);
 		return (NULL);
 	}
 
 	if (missing) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
 		zpool_close(zhp);
 		return (NULL);
 	}
 
 	return (zhp);
 }
 
 /*
  * Like the above, but silent on error.  Used when iterating over pools (because
  * the configuration cache may be out of date).
  */
 int
 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
 {
 	zpool_handle_t *zhp;
 	boolean_t missing;
 
 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
 		return (-1);
 
 	zhp->zpool_hdl = hdl;
 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
 
 	if (zpool_refresh_stats(zhp, &missing) != 0) {
 		zpool_close(zhp);
 		return (-1);
 	}
 
 	if (missing) {
 		zpool_close(zhp);
 		*ret = NULL;
 		return (0);
 	}
 
 	*ret = zhp;
 	return (0);
 }
 
 /*
  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
  * state.
  */
 zpool_handle_t *
 zpool_open(libzfs_handle_t *hdl, const char *pool)
 {
 	zpool_handle_t *zhp;
 
 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
 		return (NULL);
 
 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
 		zpool_close(zhp);
 		return (NULL);
 	}
 
 	return (zhp);
 }
 
 /*
  * Close the handle.  Simply frees the memory associated with the handle.
  */
 void
 zpool_close(zpool_handle_t *zhp)
 {
 	nvlist_free(zhp->zpool_config);
 	nvlist_free(zhp->zpool_old_config);
 	nvlist_free(zhp->zpool_props);
 	free(zhp);
 }
 
 /*
  * Return the name of the pool.
  */
 const char *
 zpool_get_name(zpool_handle_t *zhp)
 {
 	return (zhp->zpool_name);
 }
 
 
 /*
  * Return the state of the pool (ACTIVE or UNAVAILABLE)
  */
 int
 zpool_get_state(zpool_handle_t *zhp)
 {
 	return (zhp->zpool_state);
 }
 
 /*
  * Check if vdev list contains a special vdev
  */
 static boolean_t
 zpool_has_special_vdev(nvlist_t *nvroot)
 {
 	nvlist_t **child;
 	uint_t children;
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) == 0) {
 		for (uint_t c = 0; c < children; c++) {
 			char *bias;
 
 			if (nvlist_lookup_string(child[c],
 			    ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0 &&
 			    strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
 				return (B_TRUE);
 			}
 		}
 	}
 	return (B_FALSE);
 }
 
 /*
  * Check if vdev list contains a dRAID vdev
  */
 static boolean_t
 zpool_has_draid_vdev(nvlist_t *nvroot)
 {
 	nvlist_t **child;
 	uint_t children;
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) == 0) {
 		for (uint_t c = 0; c < children; c++) {
 			char *type;
 
 			if (nvlist_lookup_string(child[c],
 			    ZPOOL_CONFIG_TYPE, &type) == 0 &&
 			    strcmp(type, VDEV_TYPE_DRAID) == 0) {
 				return (B_TRUE);
 			}
 		}
 	}
 	return (B_FALSE);
 }
 
 /*
  * Output a dRAID top-level vdev name in to the provided buffer.
  */
 static char *
 zpool_draid_name(char *name, int len, uint64_t data, uint64_t parity,
     uint64_t spares, uint64_t children)
 {
 	snprintf(name, len, "%s%llu:%llud:%lluc:%llus",
 	    VDEV_TYPE_DRAID, (u_longlong_t)parity, (u_longlong_t)data,
 	    (u_longlong_t)children, (u_longlong_t)spares);
 
 	return (name);
 }
 
 /*
  * Return B_TRUE if the provided name is a dRAID spare name.
  */
 boolean_t
 zpool_is_draid_spare(const char *name)
 {
 	uint64_t spare_id, parity, vdev_id;
 
 	if (sscanf(name, VDEV_TYPE_DRAID "%llu-%llu-%llu",
 	    (u_longlong_t *)&parity, (u_longlong_t *)&vdev_id,
 	    (u_longlong_t *)&spare_id) == 3) {
 		return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 /*
  * Create the named pool, using the provided vdev list.  It is assumed
  * that the consumer has already validated the contents of the nvlist, so we
  * don't have to worry about error semantics.
  */
 int
 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
     nvlist_t *props, nvlist_t *fsprops)
 {
 	zfs_cmd_t zc = {"\0"};
 	nvlist_t *zc_fsprops = NULL;
 	nvlist_t *zc_props = NULL;
 	nvlist_t *hidden_args = NULL;
 	uint8_t *wkeydata = NULL;
 	uint_t wkeylen = 0;
 	char msg[1024];
 	int ret = -1;
 
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), pool);
 
 	if (!zpool_name_valid(hdl, B_FALSE, pool))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
 		return (-1);
 
 	if (props) {
 		prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
 
 		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
 		    SPA_VERSION_1, flags, msg)) == NULL) {
 			goto create_failed;
 		}
 	}
 
 	if (fsprops) {
 		uint64_t zoned;
 		char *zonestr;
 
 		zoned = ((nvlist_lookup_string(fsprops,
 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
 		    strcmp(zonestr, "on") == 0);
 
 		if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
 		    fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
 			goto create_failed;
 		}
 
 		if (nvlist_exists(zc_fsprops,
 		    zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS)) &&
 		    !zpool_has_special_vdev(nvroot)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "%s property requires a special vdev"),
 			    zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS));
 			(void) zfs_error(hdl, EZFS_BADPROP, msg);
 			goto create_failed;
 		}
 
 		if (!zc_props &&
 		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
 			goto create_failed;
 		}
 		if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, B_TRUE,
 		    &wkeydata, &wkeylen) != 0) {
 			zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
 			goto create_failed;
 		}
 		if (nvlist_add_nvlist(zc_props,
 		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
 			goto create_failed;
 		}
 		if (wkeydata != NULL) {
 			if (nvlist_alloc(&hidden_args, NV_UNIQUE_NAME, 0) != 0)
 				goto create_failed;
 
 			if (nvlist_add_uint8_array(hidden_args, "wkeydata",
 			    wkeydata, wkeylen) != 0)
 				goto create_failed;
 
 			if (nvlist_add_nvlist(zc_props, ZPOOL_HIDDEN_ARGS,
 			    hidden_args) != 0)
 				goto create_failed;
 		}
 	}
 
 	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
 		goto create_failed;
 
 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
 
 	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
 
 		zcmd_free_nvlists(&zc);
 		nvlist_free(zc_props);
 		nvlist_free(zc_fsprops);
 		nvlist_free(hidden_args);
 		if (wkeydata != NULL)
 			free(wkeydata);
 
 		switch (errno) {
 		case EBUSY:
 			/*
 			 * This can happen if the user has specified the same
 			 * device multiple times.  We can't reliably detect this
 			 * until we try to add it and see we already have a
 			 * label.  This can also happen under if the device is
 			 * part of an active md or lvm device.
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more vdevs refer to the same device, or "
 			    "one of\nthe devices is part of an active md or "
 			    "lvm device"));
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case ERANGE:
 			/*
 			 * This happens if the record size is smaller or larger
 			 * than the allowed size range, or not a power of 2.
 			 *
 			 * NOTE: although zfs_valid_proplist is called earlier,
 			 * this case may have slipped through since the
 			 * pool does not exist yet and it is therefore
 			 * impossible to read properties e.g. max blocksize
 			 * from the pool.
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "record size invalid"));
 			return (zfs_error(hdl, EZFS_BADPROP, msg));
 
 		case EOVERFLOW:
 			/*
 			 * This occurs when one of the devices is below
 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
 			 * device was the problem device since there's no
 			 * reliable way to determine device size from userland.
 			 */
 			{
 				char buf[64];
 
 				zfs_nicebytes(SPA_MINDEVSIZE, buf,
 				    sizeof (buf));
 
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "one or more devices is less than the "
 				    "minimum size (%s)"), buf);
 			}
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case ENOSPC:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices is out of space"));
 			return (zfs_error(hdl, EZFS_BADDEV, msg));
 
 		case EINVAL:
 			if (zpool_has_draid_vdev(nvroot) &&
 			    zfeature_lookup_name("draid", NULL) != 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "dRAID vdevs are unsupported by the "
 				    "kernel"));
 				return (zfs_error(hdl, EZFS_BADDEV, msg));
 			} else {
 				return (zpool_standard_error(hdl, errno, msg));
 			}
 
 		default:
 			return (zpool_standard_error(hdl, errno, msg));
 		}
 	}
 
 create_failed:
 	zcmd_free_nvlists(&zc);
 	nvlist_free(zc_props);
 	nvlist_free(zc_fsprops);
 	nvlist_free(hidden_args);
 	if (wkeydata != NULL)
 		free(wkeydata);
 	return (ret);
 }
 
 /*
  * Destroy the given pool.  It is up to the caller to ensure that there are no
  * datasets left in the pool.
  */
 int
 zpool_destroy(zpool_handle_t *zhp, const char *log_str)
 {
 	zfs_cmd_t zc = {"\0"};
 	zfs_handle_t *zfp = NULL;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	char msg[1024];
 
 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
 	    (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (-1);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_history = (uint64_t)(uintptr_t)log_str;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot destroy '%s'"), zhp->zpool_name);
 
 		if (errno == EROFS) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices is read only"));
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		} else {
 			(void) zpool_standard_error(hdl, errno, msg);
 		}
 
 		if (zfp)
 			zfs_close(zfp);
 		return (-1);
 	}
 
 	if (zfp) {
 		remove_mountpoint(zfp);
 		zfs_close(zfp);
 	}
 
 	return (0);
 }
 
 /*
  * Create a checkpoint in the given pool.
  */
 int
 zpool_checkpoint(zpool_handle_t *zhp)
 {
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	char msg[1024];
 	int error;
 
 	error = lzc_pool_checkpoint(zhp->zpool_name);
 	if (error != 0) {
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot checkpoint '%s'"), zhp->zpool_name);
 		(void) zpool_standard_error(hdl, error, msg);
 		return (-1);
 	}
 
 	return (0);
 }
 
 /*
  * Discard the checkpoint from the given pool.
  */
 int
 zpool_discard_checkpoint(zpool_handle_t *zhp)
 {
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	char msg[1024];
 	int error;
 
 	error = lzc_pool_checkpoint_discard(zhp->zpool_name);
 	if (error != 0) {
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot discard checkpoint in '%s'"), zhp->zpool_name);
 		(void) zpool_standard_error(hdl, error, msg);
 		return (-1);
 	}
 
 	return (0);
 }
 
 /*
  * Add the given vdevs to the pool.  The caller must have already performed the
  * necessary verification to ensure that the vdev specification is well-formed.
  */
 int
 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 {
 	zfs_cmd_t zc = {"\0"};
 	int ret;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	char msg[1024];
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;
 
 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 	    "cannot add to '%s'"), zhp->zpool_name);
 
 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
 	    SPA_VERSION_SPARES &&
 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
 	    &spares, &nspares) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
 		    "upgraded to add hot spares"));
 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
 	}
 
 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
 	    SPA_VERSION_L2CACHE &&
 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
 	    &l2cache, &nl2cache) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
 		    "upgraded to add cache devices"));
 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
 	}
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
 		return (-1);
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
 		switch (errno) {
 		case EBUSY:
 			/*
 			 * This can happen if the user has specified the same
 			 * device multiple times.  We can't reliably detect this
 			 * until we try to add it and see we already have a
 			 * label.
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more vdevs refer to the same device"));
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 			break;
 
 		case EINVAL:
 
 			if (zpool_has_draid_vdev(nvroot) &&
 			    zfeature_lookup_name("draid", NULL) != 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "dRAID vdevs are unsupported by the "
 				    "kernel"));
 			} else {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "invalid config; a pool with removing/"
 				    "removed vdevs does not support adding "
 				    "raidz or dRAID vdevs"));
 			}
 
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 			break;
 
 		case EOVERFLOW:
 			/*
 			 * This occurs when one of the devices is below
 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
 			 * device was the problem device since there's no
 			 * reliable way to determine device size from userland.
 			 */
 			{
 				char buf[64];
 
 				zfs_nicebytes(SPA_MINDEVSIZE, buf,
 				    sizeof (buf));
 
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "device is less than the minimum "
 				    "size (%s)"), buf);
 			}
 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
 			break;
 
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded to add these vdevs"));
 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
 			break;
 
 		default:
 			(void) zpool_standard_error(hdl, errno, msg);
 		}
 
 		ret = -1;
 	} else {
 		ret = 0;
 	}
 
 	zcmd_free_nvlists(&zc);
 
 	return (ret);
 }
 
 /*
  * Exports the pool from the system.  The caller must ensure that there are no
  * mounted datasets in the pool.
  */
 static int
 zpool_export_common(zpool_handle_t *zhp, boolean_t force, boolean_t hardforce,
     const char *log_str)
 {
 	zfs_cmd_t zc = {"\0"};
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_cookie = force;
 	zc.zc_guid = hardforce;
 	zc.zc_history = (uint64_t)(uintptr_t)log_str;
 
 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
 		switch (errno) {
 		case EXDEV:
 			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
 			    "use '-f' to override the following errors:\n"
 			    "'%s' has an active shared spare which could be"
 			    " used by other pools once '%s' is exported."),
 			    zhp->zpool_name, zhp->zpool_name);
 			return (zfs_error_fmt(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
 			    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
 			    zhp->zpool_name));
 		default:
 			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
 			    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
 			    zhp->zpool_name));
 		}
 	}
 
 	return (0);
 }
 
 int
 zpool_export(zpool_handle_t *zhp, boolean_t force, const char *log_str)
 {
 	return (zpool_export_common(zhp, force, B_FALSE, log_str));
 }
 
 int
 zpool_export_force(zpool_handle_t *zhp, const char *log_str)
 {
 	return (zpool_export_common(zhp, B_TRUE, B_TRUE, log_str));
 }
 
 static void
 zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
     nvlist_t *config)
 {
 	nvlist_t *nv = NULL;
 	uint64_t rewindto;
 	int64_t loss = -1;
 	struct tm t;
 	char timestr[128];
 
 	if (!hdl->libzfs_printerr || config == NULL)
 		return;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
 	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
 		return;
 	}
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
 		return;
 	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
 
 	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
 	    strftime(timestr, 128, "%c", &t) != 0) {
 		if (dryrun) {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "Would be able to return %s "
 			    "to its state as of %s.\n"),
 			    name, timestr);
 		} else {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "Pool %s returned to its state as of %s.\n"),
 			    name, timestr);
 		}
 		if (loss > 120) {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "%s approximately %lld "),
 			    dryrun ? "Would discard" : "Discarded",
 			    ((longlong_t)loss + 30) / 60);
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "minutes of transactions.\n"));
 		} else if (loss > 0) {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "%s approximately %lld "),
 			    dryrun ? "Would discard" : "Discarded",
 			    (longlong_t)loss);
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "seconds of transactions.\n"));
 		}
 	}
 }
 
 void
 zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
     nvlist_t *config)
 {
 	nvlist_t *nv = NULL;
 	int64_t loss = -1;
 	uint64_t edata = UINT64_MAX;
 	uint64_t rewindto;
 	struct tm t;
 	char timestr[128];
 
 	if (!hdl->libzfs_printerr)
 		return;
 
 	if (reason >= 0)
 		(void) printf(dgettext(TEXT_DOMAIN, "action: "));
 	else
 		(void) printf(dgettext(TEXT_DOMAIN, "\t"));
 
 	/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
 	    nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
 		goto no_info;
 
 	(void) nvlist_lookup_int64(nv, ZPOOL_CONFIG_REWIND_TIME, &loss);
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_DATA_ERRORS,
 	    &edata);
 
 	(void) printf(dgettext(TEXT_DOMAIN,
 	    "Recovery is possible, but will result in some data loss.\n"));
 
 	if (localtime_r((time_t *)&rewindto, &t) != NULL &&
 	    strftime(timestr, 128, "%c", &t) != 0) {
 		(void) printf(dgettext(TEXT_DOMAIN,
 		    "\tReturning the pool to its state as of %s\n"
 		    "\tshould correct the problem.  "),
 		    timestr);
 	} else {
 		(void) printf(dgettext(TEXT_DOMAIN,
 		    "\tReverting the pool to an earlier state "
 		    "should correct the problem.\n\t"));
 	}
 
 	if (loss > 120) {
 		(void) printf(dgettext(TEXT_DOMAIN,
 		    "Approximately %lld minutes of data\n"
 		    "\tmust be discarded, irreversibly.  "),
 		    ((longlong_t)loss + 30) / 60);
 	} else if (loss > 0) {
 		(void) printf(dgettext(TEXT_DOMAIN,
 		    "Approximately %lld seconds of data\n"
 		    "\tmust be discarded, irreversibly.  "),
 		    (longlong_t)loss);
 	}
 	if (edata != 0 && edata != UINT64_MAX) {
 		if (edata == 1) {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "After rewind, at least\n"
 			    "\tone persistent user-data error will remain.  "));
 		} else {
 			(void) printf(dgettext(TEXT_DOMAIN,
 			    "After rewind, several\n"
 			    "\tpersistent user-data errors will remain.  "));
 		}
 	}
 	(void) printf(dgettext(TEXT_DOMAIN,
 	    "Recovery can be attempted\n\tby executing 'zpool %s -F %s'.  "),
 	    reason >= 0 ? "clear" : "import", name);
 
 	(void) printf(dgettext(TEXT_DOMAIN,
 	    "A scrub of the pool\n"
 	    "\tis strongly recommended after recovery.\n"));
 	return;
 
 no_info:
 	(void) printf(dgettext(TEXT_DOMAIN,
 	    "Destroy and re-create the pool from\n\ta backup source.\n"));
 }
 
 /*
  * zpool_import() is a contracted interface. Should be kept the same
  * if possible.
  *
  * Applications should use zpool_import_props() to import a pool with
  * new properties value to be set.
  */
 int
 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
     char *altroot)
 {
 	nvlist_t *props = NULL;
 	int ret;
 
 	if (altroot != NULL) {
 		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
 			    newname));
 		}
 
 		if (nvlist_add_string(props,
 		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0 ||
 		    nvlist_add_string(props,
 		    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE), "none") != 0) {
 			nvlist_free(props);
 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
 			    newname));
 		}
 	}
 
 	ret = zpool_import_props(hdl, config, newname, props,
 	    ZFS_IMPORT_NORMAL);
 	nvlist_free(props);
 	return (ret);
 }
 
 static void
 print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
     int indent)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	char *vname;
 	uint64_t is_log = 0;
 
 	(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG,
 	    &is_log);
 
 	if (name != NULL)
 		(void) printf("\t%*s%s%s\n", indent, "", name,
 		    is_log ? " [log]" : "");
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		return;
 
 	for (c = 0; c < children; c++) {
 		vname = zpool_vdev_name(hdl, NULL, child[c], VDEV_NAME_TYPE_ID);
 		print_vdev_tree(hdl, vname, child[c], indent + 2);
 		free(vname);
 	}
 }
 
 void
 zpool_print_unsup_feat(nvlist_t *config)
 {
 	nvlist_t *nvinfo, *unsup_feat;
 	nvpair_t *nvp;
 
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
 	    0);
 	verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
 	    &unsup_feat) == 0);
 
 	for (nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
 	    nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
 		char *desc;
 
 		verify(nvpair_type(nvp) == DATA_TYPE_STRING);
 		verify(nvpair_value_string(nvp, &desc) == 0);
 
 		if (strlen(desc) > 0)
 			(void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
 		else
 			(void) printf("\t%s\n", nvpair_name(nvp));
 	}
 }
 
 /*
  * Import the given pool using the known configuration and a list of
  * properties to be set. The configuration should have come from
  * zpool_find_import(). The 'newname' parameters control whether the pool
  * is imported with a different name.
  */
 int
 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
     nvlist_t *props, int flags)
 {
 	zfs_cmd_t zc = {"\0"};
 	zpool_load_policy_t policy;
 	nvlist_t *nv = NULL;
 	nvlist_t *nvinfo = NULL;
 	nvlist_t *missing = NULL;
 	char *thename;
 	char *origname;
 	int ret;
 	int error = 0;
 	char errbuf[1024];
 
 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
 	    &origname) == 0);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot import pool '%s'"), origname);
 
 	if (newname != NULL) {
 		if (!zpool_name_valid(hdl, B_FALSE, newname))
 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
 			    newname));
 		thename = (char *)newname;
 	} else {
 		thename = origname;
 	}
 
 	if (props != NULL) {
 		uint64_t version;
 		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
 
 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
 		    &version) == 0);
 
 		if ((props = zpool_valid_proplist(hdl, origname,
 		    props, version, flags, errbuf)) == NULL)
 			return (-1);
 		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
 			nvlist_free(props);
 			return (-1);
 		}
 		nvlist_free(props);
 	}
 
 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
 
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
 	    &zc.zc_guid) == 0);
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (-1);
 	}
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, zc.zc_nvlist_conf_size * 2) != 0) {
 		zcmd_free_nvlists(&zc);
 		return (-1);
 	}
 
 	zc.zc_cookie = flags;
 	while ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc)) != 0 &&
 	    errno == ENOMEM) {
 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 	if (ret != 0)
 		error = errno;
 
 	(void) zcmd_read_dst_nvlist(hdl, &zc, &nv);
 
 	zcmd_free_nvlists(&zc);
 
 	zpool_get_load_policy(config, &policy);
 
 	if (error) {
 		char desc[1024];
 		char aux[256];
 
 		/*
 		 * Dry-run failed, but we print out what success
 		 * looks like if we found a best txg
 		 */
 		if (policy.zlp_rewind & ZPOOL_TRY_REWIND) {
 			zpool_rewind_exclaim(hdl, newname ? origname : thename,
 			    B_TRUE, nv);
 			nvlist_free(nv);
 			return (-1);
 		}
 
 		if (newname == NULL)
 			(void) snprintf(desc, sizeof (desc),
 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
 			    thename);
 		else
 			(void) snprintf(desc, sizeof (desc),
 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
 			    origname, thename);
 
 		switch (error) {
 		case ENOTSUP:
 			if (nv != NULL && nvlist_lookup_nvlist(nv,
 			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
 			    nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
 				(void) printf(dgettext(TEXT_DOMAIN, "This "
 				    "pool uses the following feature(s) not "
 				    "supported by this system:\n"));
 				zpool_print_unsup_feat(nv);
 				if (nvlist_exists(nvinfo,
 				    ZPOOL_CONFIG_CAN_RDONLY)) {
 					(void) printf(dgettext(TEXT_DOMAIN,
 					    "All unsupported features are only "
 					    "required for writing to the pool."
 					    "\nThe pool can be imported using "
 					    "'-o readonly=on'.\n"));
 				}
 			}
 			/*
 			 * Unsupported version.
 			 */
 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
 			break;
 
 		case EREMOTEIO:
 			if (nv != NULL && nvlist_lookup_nvlist(nv,
 			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
 				char *hostname = "<unknown>";
 				uint64_t hostid = 0;
 				mmp_state_t mmp_state;
 
 				mmp_state = fnvlist_lookup_uint64(nvinfo,
 				    ZPOOL_CONFIG_MMP_STATE);
 
 				if (nvlist_exists(nvinfo,
 				    ZPOOL_CONFIG_MMP_HOSTNAME))
 					hostname = fnvlist_lookup_string(nvinfo,
 					    ZPOOL_CONFIG_MMP_HOSTNAME);
 
 				if (nvlist_exists(nvinfo,
 				    ZPOOL_CONFIG_MMP_HOSTID))
 					hostid = fnvlist_lookup_uint64(nvinfo,
 					    ZPOOL_CONFIG_MMP_HOSTID);
 
 				if (mmp_state == MMP_STATE_ACTIVE) {
 					(void) snprintf(aux, sizeof (aux),
 					    dgettext(TEXT_DOMAIN, "pool is imp"
 					    "orted on host '%s' (hostid=%lx).\n"
 					    "Export the pool on the other "
 					    "system, then run 'zpool import'."),
 					    hostname, (unsigned long) hostid);
 				} else if (mmp_state == MMP_STATE_NO_HOSTID) {
 					(void) snprintf(aux, sizeof (aux),
 					    dgettext(TEXT_DOMAIN, "pool has "
 					    "the multihost property on and "
 					    "the\nsystem's hostid is not set. "
 					    "Set a unique system hostid with "
 					    "the zgenhostid(8) command.\n"));
 				}
 
 				(void) zfs_error_aux(hdl, "%s", aux);
 			}
 			(void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
 			break;
 
 		case EINVAL:
 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
 			break;
 
 		case EROFS:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices is read only"));
 			(void) zfs_error(hdl, EZFS_BADDEV, desc);
 			break;
 
 		case ENXIO:
 			if (nv && nvlist_lookup_nvlist(nv,
 			    ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
 			    nvlist_lookup_nvlist(nvinfo,
 			    ZPOOL_CONFIG_MISSING_DEVICES, &missing) == 0) {
 				(void) printf(dgettext(TEXT_DOMAIN,
 				    "The devices below are missing or "
 				    "corrupted, use '-m' to import the pool "
 				    "anyway:\n"));
 				print_vdev_tree(hdl, NULL, missing, 2);
 				(void) printf("\n");
 			}
 			(void) zpool_standard_error(hdl, error, desc);
 			break;
 
 		case EEXIST:
 			(void) zpool_standard_error(hdl, error, desc);
 			break;
 
 		case EBUSY:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices are already in use\n"));
 			(void) zfs_error(hdl, EZFS_BADDEV, desc);
 			break;
 		case ENAMETOOLONG:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "new name of at least one dataset is longer than "
 			    "the maximum allowable length"));
 			(void) zfs_error(hdl, EZFS_NAMETOOLONG, desc);
 			break;
 		default:
 			(void) zpool_standard_error(hdl, error, desc);
 			zpool_explain_recover(hdl,
 			    newname ? origname : thename, -error, nv);
 			break;
 		}
 
 		nvlist_free(nv);
 		ret = -1;
 	} else {
 		zpool_handle_t *zhp;
 
 		/*
 		 * This should never fail, but play it safe anyway.
 		 */
 		if (zpool_open_silent(hdl, thename, &zhp) != 0)
 			ret = -1;
 		else if (zhp != NULL)
 			zpool_close(zhp);
 		if (policy.zlp_rewind &
 		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
 			zpool_rewind_exclaim(hdl, newname ? origname : thename,
 			    ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0), nv);
 		}
 		nvlist_free(nv);
 		return (0);
 	}
 
 	return (ret);
 }
 
 /*
  * Translate vdev names to guids.  If a vdev_path is determined to be
  * unsuitable then a vd_errlist is allocated and the vdev path and errno
  * are added to it.
  */
 static int
 zpool_translate_vdev_guids(zpool_handle_t *zhp, nvlist_t *vds,
     nvlist_t *vdev_guids, nvlist_t *guids_to_paths, nvlist_t **vd_errlist)
 {
 	nvlist_t *errlist = NULL;
 	int error = 0;
 
 	for (nvpair_t *elem = nvlist_next_nvpair(vds, NULL); elem != NULL;
 	    elem = nvlist_next_nvpair(vds, elem)) {
 		boolean_t spare, cache;
 
 		char *vd_path = nvpair_name(elem);
 		nvlist_t *tgt = zpool_find_vdev(zhp, vd_path, &spare, &cache,
 		    NULL);
 
 		if ((tgt == NULL) || cache || spare) {
 			if (errlist == NULL) {
 				errlist = fnvlist_alloc();
 				error = EINVAL;
 			}
 
 			uint64_t err = (tgt == NULL) ? EZFS_NODEVICE :
 			    (spare ? EZFS_ISSPARE : EZFS_ISL2CACHE);
 			fnvlist_add_int64(errlist, vd_path, err);
 			continue;
 		}
 
 		uint64_t guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
 		fnvlist_add_uint64(vdev_guids, vd_path, guid);
 
 		char msg[MAXNAMELEN];
 		(void) snprintf(msg, sizeof (msg), "%llu", (u_longlong_t)guid);
 		fnvlist_add_string(guids_to_paths, msg, vd_path);
 	}
 
 	if (error != 0) {
 		verify(errlist != NULL);
 		if (vd_errlist != NULL)
 			*vd_errlist = errlist;
 		else
 			fnvlist_free(errlist);
 	}
 
 	return (error);
 }
 
 static int
 xlate_init_err(int err)
 {
 	switch (err) {
 	case ENODEV:
 		return (EZFS_NODEVICE);
 	case EINVAL:
 	case EROFS:
 		return (EZFS_BADDEV);
 	case EBUSY:
 		return (EZFS_INITIALIZING);
 	case ESRCH:
 		return (EZFS_NO_INITIALIZE);
 	}
 	return (err);
 }
 
 /*
  * Begin, suspend, or cancel the initialization (initializing of all free
  * blocks) for the given vdevs in the given pool.
  */
 static int
 zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
     nvlist_t *vds, boolean_t wait)
 {
 	int err;
 
 	nvlist_t *vdev_guids = fnvlist_alloc();
 	nvlist_t *guids_to_paths = fnvlist_alloc();
 	nvlist_t *vd_errlist = NULL;
 	nvlist_t *errlist;
 	nvpair_t *elem;
 
 	err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
 	    guids_to_paths, &vd_errlist);
 
 	if (err != 0) {
 		verify(vd_errlist != NULL);
 		goto list_errors;
 	}
 
 	err = lzc_initialize(zhp->zpool_name, cmd_type,
 	    vdev_guids, &errlist);
 
 	if (err != 0) {
 		if (errlist != NULL) {
 			vd_errlist = fnvlist_lookup_nvlist(errlist,
 			    ZPOOL_INITIALIZE_VDEVS);
 			goto list_errors;
 		}
 		(void) zpool_standard_error(zhp->zpool_hdl, err,
 		    dgettext(TEXT_DOMAIN, "operation failed"));
 		goto out;
 	}
 
 	if (wait) {
 		for (elem = nvlist_next_nvpair(vdev_guids, NULL); elem != NULL;
 		    elem = nvlist_next_nvpair(vdev_guids, elem)) {
 
 			uint64_t guid = fnvpair_value_uint64(elem);
 
 			err = lzc_wait_tag(zhp->zpool_name,
 			    ZPOOL_WAIT_INITIALIZE, guid, NULL);
 			if (err != 0) {
 				(void) zpool_standard_error_fmt(zhp->zpool_hdl,
 				    err, dgettext(TEXT_DOMAIN, "error "
 				    "waiting for '%s' to initialize"),
 				    nvpair_name(elem));
 
 				goto out;
 			}
 		}
 	}
 	goto out;
 
 list_errors:
 	for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL;
 	    elem = nvlist_next_nvpair(vd_errlist, elem)) {
 		int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem));
 		char *path;
 
 		if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem),
 		    &path) != 0)
 			path = nvpair_name(elem);
 
 		(void) zfs_error_fmt(zhp->zpool_hdl, vd_error,
 		    "cannot initialize '%s'", path);
 	}
 
 out:
 	fnvlist_free(vdev_guids);
 	fnvlist_free(guids_to_paths);
 
 	if (vd_errlist != NULL)
 		fnvlist_free(vd_errlist);
 
 	return (err == 0 ? 0 : -1);
 }
 
 int
 zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
     nvlist_t *vds)
 {
 	return (zpool_initialize_impl(zhp, cmd_type, vds, B_FALSE));
 }
 
 int
 zpool_initialize_wait(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
     nvlist_t *vds)
 {
 	return (zpool_initialize_impl(zhp, cmd_type, vds, B_TRUE));
 }
 
 static int
 xlate_trim_err(int err)
 {
 	switch (err) {
 	case ENODEV:
 		return (EZFS_NODEVICE);
 	case EINVAL:
 	case EROFS:
 		return (EZFS_BADDEV);
 	case EBUSY:
 		return (EZFS_TRIMMING);
 	case ESRCH:
 		return (EZFS_NO_TRIM);
 	case EOPNOTSUPP:
 		return (EZFS_TRIM_NOTSUP);
 	}
 	return (err);
 }
 
 static int
 zpool_trim_wait(zpool_handle_t *zhp, nvlist_t *vdev_guids)
 {
 	int err;
 	nvpair_t *elem;
 
 	for (elem = nvlist_next_nvpair(vdev_guids, NULL); elem != NULL;
 	    elem = nvlist_next_nvpair(vdev_guids, elem)) {
 
 		uint64_t guid = fnvpair_value_uint64(elem);
 
 		err = lzc_wait_tag(zhp->zpool_name,
 		    ZPOOL_WAIT_TRIM, guid, NULL);
 		if (err != 0) {
 			(void) zpool_standard_error_fmt(zhp->zpool_hdl,
 			    err, dgettext(TEXT_DOMAIN, "error "
 			    "waiting to trim '%s'"), nvpair_name(elem));
 
 			return (err);
 		}
 	}
 	return (0);
 }
 
 /*
  * Check errlist and report any errors, omitting ones which should be
  * suppressed. Returns B_TRUE if any errors were reported.
  */
 static boolean_t
 check_trim_errs(zpool_handle_t *zhp, trimflags_t *trim_flags,
     nvlist_t *guids_to_paths, nvlist_t *vds, nvlist_t *errlist)
 {
 	nvpair_t *elem;
 	boolean_t reported_errs = B_FALSE;
 	int num_vds = 0;
 	int num_suppressed_errs = 0;
 
 	for (elem = nvlist_next_nvpair(vds, NULL);
 	    elem != NULL; elem = nvlist_next_nvpair(vds, elem)) {
 		num_vds++;
 	}
 
 	for (elem = nvlist_next_nvpair(errlist, NULL);
 	    elem != NULL; elem = nvlist_next_nvpair(errlist, elem)) {
 		int64_t vd_error = xlate_trim_err(fnvpair_value_int64(elem));
 		char *path;
 
 		/*
 		 * If only the pool was specified, and it was not a secure
 		 * trim then suppress warnings for individual vdevs which
 		 * do not support trimming.
 		 */
 		if (vd_error == EZFS_TRIM_NOTSUP &&
 		    trim_flags->fullpool &&
 		    !trim_flags->secure) {
 			num_suppressed_errs++;
 			continue;
 		}
 
 		reported_errs = B_TRUE;
 		if (nvlist_lookup_string(guids_to_paths, nvpair_name(elem),
 		    &path) != 0)
 			path = nvpair_name(elem);
 
 		(void) zfs_error_fmt(zhp->zpool_hdl, vd_error,
 		    "cannot trim '%s'", path);
 	}
 
 	if (num_suppressed_errs == num_vds) {
 		(void) zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
 		    "no devices in pool support trim operations"));
 		(void) (zfs_error(zhp->zpool_hdl, EZFS_TRIM_NOTSUP,
 		    dgettext(TEXT_DOMAIN, "cannot trim")));
 		reported_errs = B_TRUE;
 	}
 
 	return (reported_errs);
 }
 
 /*
  * Begin, suspend, or cancel the TRIM (discarding of all free blocks) for
  * the given vdevs in the given pool.
  */
 int
 zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
     trimflags_t *trim_flags)
 {
 	int err;
 	int retval = 0;
 
 	nvlist_t *vdev_guids = fnvlist_alloc();
 	nvlist_t *guids_to_paths = fnvlist_alloc();
 	nvlist_t *errlist = NULL;
 
 	err = zpool_translate_vdev_guids(zhp, vds, vdev_guids,
 	    guids_to_paths, &errlist);
 	if (err != 0) {
 		check_trim_errs(zhp, trim_flags, guids_to_paths, vds, errlist);
 		retval = -1;
 		goto out;
 	}
 
 	err = lzc_trim(zhp->zpool_name, cmd_type, trim_flags->rate,
 	    trim_flags->secure, vdev_guids, &errlist);
 	if (err != 0) {
 		nvlist_t *vd_errlist;
 		if (errlist != NULL && nvlist_lookup_nvlist(errlist,
 		    ZPOOL_TRIM_VDEVS, &vd_errlist) == 0) {
 			if (check_trim_errs(zhp, trim_flags, guids_to_paths,
 			    vds, vd_errlist)) {
 				retval = -1;
 				goto out;
 			}
 		} else {
 			char msg[1024];
 
 			(void) snprintf(msg, sizeof (msg),
 			    dgettext(TEXT_DOMAIN, "operation failed"));
 			zpool_standard_error(zhp->zpool_hdl, err, msg);
 			retval = -1;
 			goto out;
 		}
 	}
 
 
 	if (trim_flags->wait)
 		retval = zpool_trim_wait(zhp, vdev_guids);
 
 out:
 	if (errlist != NULL)
 		fnvlist_free(errlist);
 	fnvlist_free(vdev_guids);
 	fnvlist_free(guids_to_paths);
 	return (retval);
 }
 
 /*
  * Scan the pool.
  */
 int
 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	int err;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_cookie = func;
 	zc.zc_flags = cmd;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_SCAN, &zc) == 0)
 		return (0);
 
 	err = errno;
 
 	/* ECANCELED on a scrub means we resumed a paused scrub */
 	if (err == ECANCELED && func == POOL_SCAN_SCRUB &&
 	    cmd == POOL_SCRUB_NORMAL)
 		return (0);
 
 	if (err == ENOENT && func != POOL_SCAN_NONE && cmd == POOL_SCRUB_NORMAL)
 		return (0);
 
 	if (func == POOL_SCAN_SCRUB) {
 		if (cmd == POOL_SCRUB_PAUSE) {
 			(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 			    "cannot pause scrubbing %s"), zc.zc_name);
 		} else {
 			assert(cmd == POOL_SCRUB_NORMAL);
 			(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 			    "cannot scrub %s"), zc.zc_name);
 		}
 	} else if (func == POOL_SCAN_RESILVER) {
 		assert(cmd == POOL_SCRUB_NORMAL);
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot restart resilver on %s"), zc.zc_name);
 	} else if (func == POOL_SCAN_NONE) {
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
 		    zc.zc_name);
 	} else {
 		assert(!"unexpected result");
 	}
 
 	if (err == EBUSY) {
 		nvlist_t *nvroot;
 		pool_scan_stat_t *ps = NULL;
 		uint_t psc;
 
 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
 		(void) nvlist_lookup_uint64_array(nvroot,
 		    ZPOOL_CONFIG_SCAN_STATS, (uint64_t **)&ps, &psc);
 		if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
 		    ps->pss_state == DSS_SCANNING) {
 			if (cmd == POOL_SCRUB_PAUSE)
 				return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
 			else
 				return (zfs_error(hdl, EZFS_SCRUBBING, msg));
 		} else {
 			return (zfs_error(hdl, EZFS_RESILVERING, msg));
 		}
 	} else if (err == ENOENT) {
 		return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
 	} else if (err == ENOTSUP && func == POOL_SCAN_RESILVER) {
 		return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, msg));
 	} else {
 		return (zpool_standard_error(hdl, err, msg));
 	}
 }
 
 /*
  * Find a vdev that matches the search criteria specified. We use the
  * the nvpair name to determine how we should look for the device.
  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
  * spare; but FALSE if its an INUSE spare.
  */
 static nvlist_t *
 vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
 {
 	uint_t c, children;
 	nvlist_t **child;
 	nvlist_t *ret;
 	uint64_t is_log;
 	char *srchkey;
 	nvpair_t *pair = nvlist_next_nvpair(search, NULL);
 
 	/* Nothing to look for */
 	if (search == NULL || pair == NULL)
 		return (NULL);
 
 	/* Obtain the key we will use to search */
 	srchkey = nvpair_name(pair);
 
 	switch (nvpair_type(pair)) {
 	case DATA_TYPE_UINT64:
 		if (strcmp(srchkey, ZPOOL_CONFIG_GUID) == 0) {
 			uint64_t srchval, theguid;
 
 			verify(nvpair_value_uint64(pair, &srchval) == 0);
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
 			    &theguid) == 0);
 			if (theguid == srchval)
 				return (nv);
 		}
 		break;
 
 	case DATA_TYPE_STRING: {
 		char *srchval, *val;
 
 		verify(nvpair_value_string(pair, &srchval) == 0);
 		if (nvlist_lookup_string(nv, srchkey, &val) != 0)
 			break;
 
 		/*
 		 * Search for the requested value. Special cases:
 		 *
 		 * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
 		 *   "-part1", or "p1".  The suffix is hidden from the user,
 		 *   but included in the string, so this matches around it.
 		 * - ZPOOL_CONFIG_PATH for short names zfs_strcmp_shortname()
 		 *   is used to check all possible expanded paths.
 		 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
 		 *
 		 * Otherwise, all other searches are simple string compares.
 		 */
 		if (strcmp(srchkey, ZPOOL_CONFIG_PATH) == 0) {
 			uint64_t wholedisk = 0;
 
 			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
 			    &wholedisk);
 			if (zfs_strcmp_pathname(srchval, val, wholedisk) == 0)
 				return (nv);
 
 		} else if (strcmp(srchkey, ZPOOL_CONFIG_TYPE) == 0 && val) {
 			char *type, *idx, *end, *p;
 			uint64_t id, vdev_id;
 
 			/*
 			 * Determine our vdev type, keeping in mind
 			 * that the srchval is composed of a type and
 			 * vdev id pair (i.e. mirror-4).
 			 */
 			if ((type = strdup(srchval)) == NULL)
 				return (NULL);
 
 			if ((p = strrchr(type, '-')) == NULL) {
 				free(type);
 				break;
 			}
 			idx = p + 1;
 			*p = '\0';
 
 			/*
 			 * If the types don't match then keep looking.
 			 */
 			if (strncmp(val, type, strlen(val)) != 0) {
 				free(type);
 				break;
 			}
 
 			verify(zpool_vdev_is_interior(type));
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
 			    &id) == 0);
 
 			errno = 0;
 			vdev_id = strtoull(idx, &end, 10);
 
 			/*
 			 * If we are looking for a raidz and a parity is
 			 * specified, make sure it matches.
 			 */
 			int rzlen = strlen(VDEV_TYPE_RAIDZ);
 			assert(rzlen == strlen(VDEV_TYPE_DRAID));
 			int typlen = strlen(type);
 			if ((strncmp(type, VDEV_TYPE_RAIDZ, rzlen) == 0 ||
 			    strncmp(type, VDEV_TYPE_DRAID, rzlen) == 0) &&
 			    typlen != rzlen) {
 				uint64_t vdev_parity;
 				int parity = *(type + rzlen) - '0';
 
 				if (parity <= 0 || parity > 3 ||
 				    (typlen - rzlen) != 1) {
 					/*
 					 * Nonsense parity specified, can
 					 * never match
 					 */
 					free(type);
 					return (NULL);
 				}
 				verify(nvlist_lookup_uint64(nv,
 				    ZPOOL_CONFIG_NPARITY, &vdev_parity) == 0);
 				if ((int)vdev_parity != parity) {
 					free(type);
 					break;
 				}
 			}
 
 			free(type);
 			if (errno != 0)
 				return (NULL);
 
 			/*
 			 * Now verify that we have the correct vdev id.
 			 */
 			if (vdev_id == id)
 				return (nv);
 		}
 
 		/*
 		 * Common case
 		 */
 		if (strcmp(srchval, val) == 0)
 			return (nv);
 		break;
 	}
 
 	default:
 		break;
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0)
 		return (NULL);
 
 	for (c = 0; c < children; c++) {
 		if ((ret = vdev_to_nvlist_iter(child[c], search,
 		    avail_spare, l2cache, NULL)) != NULL) {
 			/*
 			 * The 'is_log' value is only set for the toplevel
 			 * vdev, not the leaf vdevs.  So we always lookup the
 			 * log device from the root of the vdev tree (where
 			 * 'log' is non-NULL).
 			 */
 			if (log != NULL &&
 			    nvlist_lookup_uint64(child[c],
 			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
 			    is_log) {
 				*log = B_TRUE;
 			}
 			return (ret);
 		}
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
 			if ((ret = vdev_to_nvlist_iter(child[c], search,
 			    avail_spare, l2cache, NULL)) != NULL) {
 				*avail_spare = B_TRUE;
 				return (ret);
 			}
 		}
 	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0) {
 		for (c = 0; c < children; c++) {
 			if ((ret = vdev_to_nvlist_iter(child[c], search,
 			    avail_spare, l2cache, NULL)) != NULL) {
 				*l2cache = B_TRUE;
 				return (ret);
 			}
 		}
 	}
 
 	return (NULL);
 }
 
 /*
  * Given a physical path or guid, find the associated vdev.
  */
 nvlist_t *
 zpool_find_vdev_by_physpath(zpool_handle_t *zhp, const char *ppath,
     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
 {
 	nvlist_t *search, *nvroot, *ret;
 	uint64_t guid;
 	char *end;
 
 	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	guid = strtoull(ppath, &end, 0);
 	if (guid != 0 && *end == '\0') {
 		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
 	} else {
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_PHYS_PATH,
 		    ppath) == 0);
 	}
 
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
 	*avail_spare = B_FALSE;
 	*l2cache = B_FALSE;
 	if (log != NULL)
 		*log = B_FALSE;
 	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
 	nvlist_free(search);
 
 	return (ret);
 }
 
 /*
  * Determine if we have an "interior" top-level vdev (i.e mirror/raidz).
  */
 static boolean_t
 zpool_vdev_is_interior(const char *name)
 {
 	if (strncmp(name, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
 	    strncmp(name, VDEV_TYPE_SPARE, strlen(VDEV_TYPE_SPARE)) == 0 ||
 	    strncmp(name,
 	    VDEV_TYPE_REPLACING, strlen(VDEV_TYPE_REPLACING)) == 0 ||
 	    strncmp(name, VDEV_TYPE_MIRROR, strlen(VDEV_TYPE_MIRROR)) == 0)
 		return (B_TRUE);
 
 	if (strncmp(name, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0 &&
 	    !zpool_is_draid_spare(name))
 		return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 nvlist_t *
 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
     boolean_t *l2cache, boolean_t *log)
 {
 	char *end;
 	nvlist_t *nvroot, *search, *ret;
 	uint64_t guid;
 
 	verify(nvlist_alloc(&search, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	guid = strtoull(path, &end, 0);
 	if (guid != 0 && *end == '\0') {
 		verify(nvlist_add_uint64(search, ZPOOL_CONFIG_GUID, guid) == 0);
 	} else if (zpool_vdev_is_interior(path)) {
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_TYPE, path) == 0);
 	} else {
 		verify(nvlist_add_string(search, ZPOOL_CONFIG_PATH, path) == 0);
 	}
 
 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
 	    &nvroot) == 0);
 
 	*avail_spare = B_FALSE;
 	*l2cache = B_FALSE;
 	if (log != NULL)
 		*log = B_FALSE;
 	ret = vdev_to_nvlist_iter(nvroot, search, avail_spare, l2cache, log);
 	nvlist_free(search);
 
 	return (ret);
 }
 
 static int
 vdev_is_online(nvlist_t *nv)
 {
 	uint64_t ival;
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
 		return (0);
 
 	return (1);
 }
 
 /*
  * Helper function for zpool_get_physpaths().
  */
 static int
 vdev_get_one_physpath(nvlist_t *config, char *physpath, size_t physpath_size,
     size_t *bytes_written)
 {
 	size_t bytes_left, pos, rsz;
 	char *tmppath;
 	const char *format;
 
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PHYS_PATH,
 	    &tmppath) != 0)
 		return (EZFS_NODEVICE);
 
 	pos = *bytes_written;
 	bytes_left = physpath_size - pos;
 	format = (pos == 0) ? "%s" : " %s";
 
 	rsz = snprintf(physpath + pos, bytes_left, format, tmppath);
 	*bytes_written += rsz;
 
 	if (rsz >= bytes_left) {
 		/* if physpath was not copied properly, clear it */
 		if (bytes_left != 0) {
 			physpath[pos] = 0;
 		}
 		return (EZFS_NOSPC);
 	}
 	return (0);
 }
 
 static int
 vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
     size_t *rsz, boolean_t is_spare)
 {
 	char *type;
 	int ret;
 
 	if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
 		return (EZFS_INVALCONFIG);
 
 	if (strcmp(type, VDEV_TYPE_DISK) == 0) {
 		/*
 		 * An active spare device has ZPOOL_CONFIG_IS_SPARE set.
 		 * For a spare vdev, we only want to boot from the active
 		 * spare device.
 		 */
 		if (is_spare) {
 			uint64_t spare = 0;
 			(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
 			    &spare);
 			if (!spare)
 				return (EZFS_INVALCONFIG);
 		}
 
 		if (vdev_is_online(nv)) {
 			if ((ret = vdev_get_one_physpath(nv, physpath,
 			    phypath_size, rsz)) != 0)
 				return (ret);
 		}
 	} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0 ||
 	    strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
 	    strcmp(type, VDEV_TYPE_REPLACING) == 0 ||
 	    (is_spare = (strcmp(type, VDEV_TYPE_SPARE) == 0))) {
 		nvlist_t **child;
 		uint_t count;
 		int i, ret;
 
 		if (nvlist_lookup_nvlist_array(nv,
 		    ZPOOL_CONFIG_CHILDREN, &child, &count) != 0)
 			return (EZFS_INVALCONFIG);
 
 		for (i = 0; i < count; i++) {
 			ret = vdev_get_physpaths(child[i], physpath,
 			    phypath_size, rsz, is_spare);
 			if (ret == EZFS_NOSPC)
 				return (ret);
 		}
 	}
 
 	return (EZFS_POOL_INVALARG);
 }
 
 /*
  * Get phys_path for a root pool config.
  * Return 0 on success; non-zero on failure.
  */
 static int
 zpool_get_config_physpath(nvlist_t *config, char *physpath, size_t phypath_size)
 {
 	size_t rsz;
 	nvlist_t *vdev_root;
 	nvlist_t **child;
 	uint_t count;
 	char *type;
 
 	rsz = 0;
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
 	    &vdev_root) != 0)
 		return (EZFS_INVALCONFIG);
 
 	if (nvlist_lookup_string(vdev_root, ZPOOL_CONFIG_TYPE, &type) != 0 ||
 	    nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
 	    &child, &count) != 0)
 		return (EZFS_INVALCONFIG);
 
 	/*
 	 * root pool can only have a single top-level vdev.
 	 */
 	if (strcmp(type, VDEV_TYPE_ROOT) != 0 || count != 1)
 		return (EZFS_POOL_INVALARG);
 
 	(void) vdev_get_physpaths(child[0], physpath, phypath_size, &rsz,
 	    B_FALSE);
 
 	/* No online devices */
 	if (rsz == 0)
 		return (EZFS_NODEVICE);
 
 	return (0);
 }
 
 /*
  * Get phys_path for a root pool
  * Return 0 on success; non-zero on failure.
  */
 int
 zpool_get_physpath(zpool_handle_t *zhp, char *physpath, size_t phypath_size)
 {
 	return (zpool_get_config_physpath(zhp->zpool_config, physpath,
 	    phypath_size));
 }
 
 /*
  * Convert a vdev path to a GUID.  Returns GUID or 0 on error.
  *
  * If is_spare, is_l2cache, or is_log is non-NULL, then store within it
  * if the VDEV is a spare, l2cache, or log device.  If they're NULL then
  * ignore them.
  */
 static uint64_t
 zpool_vdev_path_to_guid_impl(zpool_handle_t *zhp, const char *path,
     boolean_t *is_spare, boolean_t *is_l2cache, boolean_t *is_log)
 {
 	uint64_t guid;
 	boolean_t spare = B_FALSE, l2cache = B_FALSE, log = B_FALSE;
 	nvlist_t *tgt;
 
 	if ((tgt = zpool_find_vdev(zhp, path, &spare, &l2cache,
 	    &log)) == NULL)
 		return (0);
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &guid) == 0);
 	if (is_spare != NULL)
 		*is_spare = spare;
 	if (is_l2cache != NULL)
 		*is_l2cache = l2cache;
 	if (is_log != NULL)
 		*is_log = log;
 
 	return (guid);
 }
 
 /* Convert a vdev path to a GUID.  Returns GUID or 0 on error. */
 uint64_t
 zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path)
 {
 	return (zpool_vdev_path_to_guid_impl(zhp, path, NULL, NULL, NULL));
 }
 
 /*
  * Bring the specified vdev online.   The 'flags' parameter is a set of the
  * ZFS_ONLINE_* flags.
  */
 int
 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
     vdev_state_t *newstate)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	char *pathname;
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	int error;
 
 	if (flags & ZFS_ONLINE_EXPAND) {
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
 	} else {
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
 	}
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	if ((flags & ZFS_ONLINE_EXPAND ||
 	    zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) &&
 	    nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH, &pathname) == 0) {
 		uint64_t wholedisk = 0;
 
 		(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
 		    &wholedisk);
 
 		/*
 		 * XXX - L2ARC 1.0 devices can't support expansion.
 		 */
 		if (l2cache) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cannot expand cache devices"));
 			return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
 		}
 
 		if (wholedisk) {
 			const char *fullpath = path;
 			char buf[MAXPATHLEN];
 
 			if (path[0] != '/') {
 				error = zfs_resolve_shortname(path, buf,
 				    sizeof (buf));
 				if (error != 0)
 					return (zfs_error(hdl, EZFS_NODEVICE,
 					    msg));
 
 				fullpath = buf;
 			}
 
 			error = zpool_relabel_disk(hdl, fullpath, msg);
 			if (error != 0)
 				return (error);
 		}
 	}
 
 	zc.zc_cookie = VDEV_STATE_ONLINE;
 	zc.zc_obj = flags;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0) {
 		if (errno == EINVAL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
 			    "from this pool into a new one.  Use '%s' "
 			    "instead"), "zpool detach");
 			return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
 		}
 		return (zpool_standard_error(hdl, errno, msg));
 	}
 
 	*newstate = zc.zc_cookie;
 	return (0);
 }
 
 /*
  * Take the specified vdev offline
  */
 int
 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    NULL)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	zc.zc_cookie = VDEV_STATE_OFFLINE;
 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	switch (errno) {
 	case EBUSY:
 
 		/*
 		 * There are no other replicas of this device.
 		 */
 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
 
 	case EEXIST:
 		/*
 		 * The log device has unplayed logs
 		 */
 		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
 
 	default:
 		return (zpool_standard_error(hdl, errno, msg));
 	}
 }
 
 /*
  * Mark the given vdev faulted.
  */
 int
 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
 	zc.zc_cookie = VDEV_STATE_FAULTED;
 	zc.zc_obj = aux;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	switch (errno) {
 	case EBUSY:
 
 		/*
 		 * There are no other replicas of this device.
 		 */
 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
 
 	default:
 		return (zpool_standard_error(hdl, errno, msg));
 	}
 
 }
 
 /*
  * Mark the given vdev degraded.
  */
 int
 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
 	zc.zc_cookie = VDEV_STATE_DEGRADED;
 	zc.zc_obj = aux;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
  * a hot spare.
  */
 static boolean_t
 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
 {
 	nvlist_t **child;
 	uint_t c, children;
 	char *type;
 
 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) == 0) {
 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
 		    &type) == 0);
 
 		if ((strcmp(type, VDEV_TYPE_SPARE) == 0 ||
 		    strcmp(type, VDEV_TYPE_DRAID_SPARE) == 0) &&
 		    children == 2 && child[which] == tgt)
 			return (B_TRUE);
 
 		for (c = 0; c < children; c++)
 			if (is_replacing_spare(child[c], tgt, which))
 				return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 /*
  * Attach new_disk (fully described by nvroot) to old_disk.
  * If 'replacing' is specified, the new disk will replace the old one.
  */
 int
 zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
     const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	int ret;
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	uint64_t val;
 	char *newname;
 	nvlist_t **child;
 	uint_t children;
 	nvlist_t *config_root;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	if (replacing)
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot replace %s with %s"), old_disk, new_disk);
 	else
 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
 		    "cannot attach %s to %s"), new_disk, old_disk);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
 	    &islog)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	if (l2cache)
 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 	zc.zc_cookie = replacing;
 	zc.zc_simple = rebuild;
 
 	if (rebuild &&
 	    zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "the loaded zfs module doesn't support device rebuilds"));
 		return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
 	}
 
 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0 || children != 1) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "new device must be a single disk"));
 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
 	}
 
 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
 
 	if ((newname = zpool_vdev_name(NULL, NULL, child[0], 0)) == NULL)
 		return (-1);
 
 	/*
 	 * If the target is a hot spare that has been swapped in, we can only
 	 * replace it with another hot spare.
 	 */
 	if (replacing &&
 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
 	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
 	    NULL) == NULL || !avail_spare) &&
 	    is_replacing_spare(config_root, tgt, 1)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "can only be replaced by another hot spare"));
 		free(newname);
 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
 	}
 
 	free(newname);
 
 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
 		return (-1);
 
 	ret = zfs_ioctl(hdl, ZFS_IOC_VDEV_ATTACH, &zc);
 
 	zcmd_free_nvlists(&zc);
 
 	if (ret == 0)
 		return (0);
 
 	switch (errno) {
 	case ENOTSUP:
 		/*
 		 * Can't attach to or replace this type of vdev.
 		 */
 		if (replacing) {
 			uint64_t version = zpool_get_prop_int(zhp,
 			    ZPOOL_PROP_VERSION, NULL);
 
 			if (islog) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "cannot replace a log with a spare"));
 			} else if (rebuild) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "only mirror and dRAID vdevs support "
 				    "sequential reconstruction"));
 			} else if (zpool_is_draid_spare(new_disk)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "dRAID spares can only replace child "
 				    "devices in their parent's dRAID vdev"));
 			} else if (version >= SPA_VERSION_MULTI_REPLACE) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "already in replacing/spare config; wait "
 				    "for completion or use 'zpool detach'"));
 			} else {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "cannot replace a replacing device"));
 			}
 		} else {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "can only attach to mirrors and top-level "
 			    "disks"));
 		}
 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
 		break;
 
 	case EINVAL:
 		/*
 		 * The new device must be a single disk.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "new device must be a single disk"));
 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
 		break;
 
 	case EBUSY:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
 		    "or device removal is in progress"),
 		    new_disk);
 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case EOVERFLOW:
 		/*
 		 * The new device is too small.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "device is too small"));
 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case EDOM:
 		/*
 		 * The new device has a different optimal sector size.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "new device has a different optimal sector size; use the "
 		    "option '-o ashift=N' to override the optimal size"));
 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
 		break;
 
 	case ENAMETOOLONG:
 		/*
 		 * The resulting top-level vdev spec won't fit in the label.
 		 */
 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
 		break;
 
 	default:
 		(void) zpool_standard_error(hdl, errno, msg);
 	}
 
 	return (-1);
 }
 
 /*
  * Detach the specified device.
  */
 int
 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    NULL)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	if (avail_spare)
 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 	if (l2cache)
 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
 
 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
 		return (0);
 
 	switch (errno) {
 
 	case ENOTSUP:
 		/*
 		 * Can't detach from this type of vdev.
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
 		    "applicable to mirror and replacing vdevs"));
 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
 		break;
 
 	case EBUSY:
 		/*
 		 * There are no other replicas of this device.
 		 */
 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
 		break;
 
 	default:
 		(void) zpool_standard_error(hdl, errno, msg);
 	}
 
 	return (-1);
 }
 
 /*
  * Find a mirror vdev in the source nvlist.
  *
  * The mchild array contains a list of disks in one of the top-level mirrors
  * of the source pool.  The schild array contains a list of disks that the
  * user specified on the command line.  We loop over the mchild array to
  * see if any entry in the schild array matches.
  *
  * If a disk in the mchild array is found in the schild array, we return
  * the index of that entry.  Otherwise we return -1.
  */
 static int
 find_vdev_entry(zpool_handle_t *zhp, nvlist_t **mchild, uint_t mchildren,
     nvlist_t **schild, uint_t schildren)
 {
 	uint_t mc;
 
 	for (mc = 0; mc < mchildren; mc++) {
 		uint_t sc;
 		char *mpath = zpool_vdev_name(zhp->zpool_hdl, zhp,
 		    mchild[mc], 0);
 
 		for (sc = 0; sc < schildren; sc++) {
 			char *spath = zpool_vdev_name(zhp->zpool_hdl, zhp,
 			    schild[sc], 0);
 			boolean_t result = (strcmp(mpath, spath) == 0);
 
 			free(spath);
 			if (result) {
 				free(mpath);
 				return (mc);
 			}
 		}
 
 		free(mpath);
 	}
 
 	return (-1);
 }
 
 /*
  * Split a mirror pool.  If newroot points to null, then a new nvlist
  * is generated and it is the responsibility of the caller to free it.
  */
 int
 zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
     nvlist_t *props, splitflags_t flags)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024], *bias;
 	nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
 	nvlist_t **varray = NULL, *zc_props = NULL;
 	uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	uint64_t vers, readonly = B_FALSE;
 	boolean_t freelist = B_FALSE, memory_err = B_TRUE;
 	int retval = 0;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
 
 	if (!zpool_name_valid(hdl, B_FALSE, newname))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
 
 	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
 		(void) fprintf(stderr, gettext("Internal error: unable to "
 		    "retrieve pool configuration\n"));
 		return (-1);
 	}
 
 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree)
 	    == 0);
 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &vers) == 0);
 
 	if (props) {
 		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
 		if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
 		    props, vers, flags, msg)) == NULL)
 			return (-1);
 		(void) nvlist_lookup_uint64(zc_props,
 		    zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
 		if (readonly) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "property %s can only be set at import time"),
 			    zpool_prop_to_name(ZPOOL_PROP_READONLY));
 			return (-1);
 		}
 	}
 
 	if (nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
 	    &children) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "Source pool is missing vdev tree"));
 		nvlist_free(zc_props);
 		return (-1);
 	}
 
 	varray = zfs_alloc(hdl, children * sizeof (nvlist_t *));
 	vcount = 0;
 
 	if (*newroot == NULL ||
 	    nvlist_lookup_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN,
 	    &newchild, &newchildren) != 0)
 		newchildren = 0;
 
 	for (c = 0; c < children; c++) {
 		uint64_t is_log = B_FALSE, is_hole = B_FALSE;
 		boolean_t is_special = B_FALSE, is_dedup = B_FALSE;
 		char *type;
 		nvlist_t **mchild, *vdev;
 		uint_t mchildren;
 		int entry;
 
 		/*
 		 * Unlike cache & spares, slogs are stored in the
 		 * ZPOOL_CONFIG_CHILDREN array.  We filter them out here.
 		 */
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
 		    &is_log);
 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
 		    &is_hole);
 		if (is_log || is_hole) {
 			/*
 			 * Create a hole vdev and put it in the config.
 			 */
 			if (nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) != 0)
 				goto out;
 			if (nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE,
 			    VDEV_TYPE_HOLE) != 0)
 				goto out;
 			if (nvlist_add_uint64(vdev, ZPOOL_CONFIG_IS_HOLE,
 			    1) != 0)
 				goto out;
 			if (lastlog == 0)
 				lastlog = vcount;
 			varray[vcount++] = vdev;
 			continue;
 		}
 		lastlog = 0;
 		verify(nvlist_lookup_string(child[c], ZPOOL_CONFIG_TYPE, &type)
 		    == 0);
 
 		if (strcmp(type, VDEV_TYPE_INDIRECT) == 0) {
 			vdev = child[c];
 			if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
 				goto out;
 			continue;
 		} else if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Source pool must be composed only of mirrors\n"));
 			retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
 			goto out;
 		}
 
 		if (nvlist_lookup_string(child[c],
 		    ZPOOL_CONFIG_ALLOCATION_BIAS, &bias) == 0) {
 			if (strcmp(bias, VDEV_ALLOC_BIAS_SPECIAL) == 0)
 				is_special = B_TRUE;
 			else if (strcmp(bias, VDEV_ALLOC_BIAS_DEDUP) == 0)
 				is_dedup = B_TRUE;
 		}
 		verify(nvlist_lookup_nvlist_array(child[c],
 		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren) == 0);
 
 		/* find or add an entry for this top-level vdev */
 		if (newchildren > 0 &&
 		    (entry = find_vdev_entry(zhp, mchild, mchildren,
 		    newchild, newchildren)) >= 0) {
 			/* We found a disk that the user specified. */
 			vdev = mchild[entry];
 			++found;
 		} else {
 			/* User didn't specify a disk for this vdev. */
 			vdev = mchild[mchildren - 1];
 		}
 
 		if (nvlist_dup(vdev, &varray[vcount++], 0) != 0)
 			goto out;
 
 		if (flags.dryrun != 0) {
 			if (is_dedup == B_TRUE) {
 				if (nvlist_add_string(varray[vcount - 1],
 				    ZPOOL_CONFIG_ALLOCATION_BIAS,
 				    VDEV_ALLOC_BIAS_DEDUP) != 0)
 					goto out;
 			} else if (is_special == B_TRUE) {
 				if (nvlist_add_string(varray[vcount - 1],
 				    ZPOOL_CONFIG_ALLOCATION_BIAS,
 				    VDEV_ALLOC_BIAS_SPECIAL) != 0)
 					goto out;
 			}
 		}
 	}
 
 	/* did we find every disk the user specified? */
 	if (found != newchildren) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
 		    "include at most one disk from each mirror"));
 		retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
 		goto out;
 	}
 
 	/* Prepare the nvlist for populating. */
 	if (*newroot == NULL) {
 		if (nvlist_alloc(newroot, NV_UNIQUE_NAME, 0) != 0)
 			goto out;
 		freelist = B_TRUE;
 		if (nvlist_add_string(*newroot, ZPOOL_CONFIG_TYPE,
 		    VDEV_TYPE_ROOT) != 0)
 			goto out;
 	} else {
 		verify(nvlist_remove_all(*newroot, ZPOOL_CONFIG_CHILDREN) == 0);
 	}
 
 	/* Add all the children we found */
 	if (nvlist_add_nvlist_array(*newroot, ZPOOL_CONFIG_CHILDREN, varray,
 	    lastlog == 0 ? vcount : lastlog) != 0)
 		goto out;
 
 	/*
 	 * If we're just doing a dry run, exit now with success.
 	 */
 	if (flags.dryrun) {
 		memory_err = B_FALSE;
 		freelist = B_FALSE;
 		goto out;
 	}
 
 	/* now build up the config list & call the ioctl */
 	if (nvlist_alloc(&newconfig, NV_UNIQUE_NAME, 0) != 0)
 		goto out;
 
 	if (nvlist_add_nvlist(newconfig,
 	    ZPOOL_CONFIG_VDEV_TREE, *newroot) != 0 ||
 	    nvlist_add_string(newconfig,
 	    ZPOOL_CONFIG_POOL_NAME, newname) != 0 ||
 	    nvlist_add_uint64(newconfig, ZPOOL_CONFIG_VERSION, vers) != 0)
 		goto out;
 
 	/*
 	 * The new pool is automatically part of the namespace unless we
 	 * explicitly export it.
 	 */
 	if (!flags.import)
 		zc.zc_cookie = ZPOOL_EXPORT_AFTER_SPLIT;
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_string, newname, sizeof (zc.zc_string));
 	if (zcmd_write_conf_nvlist(hdl, &zc, newconfig) != 0)
 		goto out;
 	if (zc_props != NULL && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
 		goto out;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
 		retval = zpool_standard_error(hdl, errno, msg);
 		goto out;
 	}
 
 	freelist = B_FALSE;
 	memory_err = B_FALSE;
 
 out:
 	if (varray != NULL) {
 		int v;
 
 		for (v = 0; v < vcount; v++)
 			nvlist_free(varray[v]);
 		free(varray);
 	}
 	zcmd_free_nvlists(&zc);
 	nvlist_free(zc_props);
 	nvlist_free(newconfig);
 	if (freelist) {
 		nvlist_free(*newroot);
 		*newroot = NULL;
 	}
 
 	if (retval != 0)
 		return (retval);
 
 	if (memory_err)
 		return (no_memory(hdl));
 
 	return (0);
 }
 
 /*
  * Remove the given device.
  */
 int
 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	uint64_t version;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
 
 	if (zpool_is_draid_spare(path)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dRAID spares cannot be removed"));
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 	}
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
 	if (islog && version < SPA_VERSION_HOLES) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "pool must be upgraded to support log removal"));
 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
 	}
 
 	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
 		return (0);
 
 	switch (errno) {
 
 	case EINVAL:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "invalid config; all top-level vdevs must "
 		    "have the same sector size and not be raidz."));
 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
 		break;
 
 	case EBUSY:
 		if (islog) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Mount encrypted datasets to replay logs."));
 		} else {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Pool busy; removal may already be in progress"));
 		}
 		(void) zfs_error(hdl, EZFS_BUSY, msg);
 		break;
 
 	case EACCES:
 		if (islog) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Mount encrypted datasets to replay logs."));
 			(void) zfs_error(hdl, EZFS_BUSY, msg);
 		} else {
 			(void) zpool_standard_error(hdl, errno, msg);
 		}
 		break;
 
 	default:
 		(void) zpool_standard_error(hdl, errno, msg);
 	}
 	return (-1);
 }
 
 int
 zpool_vdev_remove_cancel(zpool_handle_t *zhp)
 {
 	zfs_cmd_t zc;
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot cancel removal"));
 
 	bzero(&zc, sizeof (zc));
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_cookie = 1;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 int
 zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
     uint64_t *sizep)
 {
 	char msg[1024];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
 	    path);
 
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 	if (avail_spare || l2cache || islog) {
 		*sizep = 0;
 		return (0);
 	}
 
 	if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "indirect size not available"));
 		return (zfs_error(hdl, EINVAL, msg));
 	}
 	return (0);
 }
 
 /*
  * Clear the errors for the pool, or the particular device if specified.
  */
 int
 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	nvlist_t *tgt;
 	zpool_load_policy_t policy;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	nvlist_t *nvi = NULL;
 	int error;
 
 	if (path)
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
 		    path);
 	else
 		(void) snprintf(msg, sizeof (msg),
 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
 		    zhp->zpool_name);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if (path) {
 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
 		    &l2cache, NULL)) == NULL)
 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
 
 		/*
 		 * Don't allow error clearing for hot spares.  Do allow
 		 * error clearing for l2cache devices.
 		 */
 		if (avail_spare)
 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
 
 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
 		    &zc.zc_guid) == 0);
 	}
 
 	zpool_get_load_policy(rewindnvl, &policy);
 	zc.zc_cookie = policy.zlp_rewind;
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, zhp->zpool_config_size * 2) != 0)
 		return (-1);
 
 	if (zcmd_write_src_nvlist(hdl, &zc, rewindnvl) != 0)
 		return (-1);
 
 	while ((error = zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc)) != 0 &&
 	    errno == ENOMEM) {
 		if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 
 	if (!error || ((policy.zlp_rewind & ZPOOL_TRY_REWIND) &&
 	    errno != EPERM && errno != EACCES)) {
 		if (policy.zlp_rewind &
 		    (ZPOOL_DO_REWIND | ZPOOL_TRY_REWIND)) {
 			(void) zcmd_read_dst_nvlist(hdl, &zc, &nvi);
 			zpool_rewind_exclaim(hdl, zc.zc_name,
 			    ((policy.zlp_rewind & ZPOOL_TRY_REWIND) != 0),
 			    nvi);
 			nvlist_free(nvi);
 		}
 		zcmd_free_nvlists(&zc);
 		return (0);
 	}
 
 	zcmd_free_nvlists(&zc);
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Similar to zpool_clear(), but takes a GUID (used by fmd).
  */
 int
 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
 {
 	zfs_cmd_t zc = {"\0"};
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
 	    (u_longlong_t)guid);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_guid = guid;
 	zc.zc_cookie = ZPOOL_NO_REWIND;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Change the GUID for a pool.
  */
 int
 zpool_reguid(zpool_handle_t *zhp)
 {
 	char msg[1024];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	zfs_cmd_t zc = {"\0"};
 
 	(void) snprintf(msg, sizeof (msg),
 	    dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
 		return (0);
 
 	return (zpool_standard_error(hdl, errno, msg));
 }
 
 /*
  * Reopen the pool.
  */
 int
 zpool_reopen_one(zpool_handle_t *zhp, void *data)
 {
 	libzfs_handle_t *hdl = zpool_get_handle(zhp);
 	const char *pool_name = zpool_get_name(zhp);
 	boolean_t *scrub_restart = data;
 	int error;
 
 	error = lzc_reopen(pool_name, *scrub_restart);
 	if (error) {
 		return (zpool_standard_error_fmt(hdl, error,
 		    dgettext(TEXT_DOMAIN, "cannot reopen '%s'"), pool_name));
 	}
 
 	return (0);
 }
 
 /* call into libzfs_core to execute the sync IOCTL per pool */
 int
 zpool_sync_one(zpool_handle_t *zhp, void *data)
 {
 	int ret;
 	libzfs_handle_t *hdl = zpool_get_handle(zhp);
 	const char *pool_name = zpool_get_name(zhp);
 	boolean_t *force = data;
 	nvlist_t *innvl = fnvlist_alloc();
 
 	fnvlist_add_boolean_value(innvl, "force", *force);
 	if ((ret = lzc_sync(pool_name, innvl, NULL)) != 0) {
 		nvlist_free(innvl);
 		return (zpool_standard_error_fmt(hdl, ret,
 		    dgettext(TEXT_DOMAIN, "sync '%s' failed"), pool_name));
 	}
 	nvlist_free(innvl);
 
 	return (0);
 }
 
 #define	PATH_BUF_LEN	64
 
 /*
  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
  * We also check if this is a whole disk, in which case we strip off the
  * trailing 's0' slice name.
  *
  * This routine is also responsible for identifying when disks have been
  * reconfigured in a new location.  The kernel will have opened the device by
  * devid, but the path will still refer to the old location.  To catch this, we
  * first do a path -> devid translation (which is fast for the common case).  If
  * the devid matches, we're done.  If not, we do a reverse devid -> path
  * translation and issue the appropriate ioctl() to update the path of the vdev.
  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
  * of these checks.
  */
 char *
 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
     int name_flags)
 {
 	char *path, *type, *env;
 	uint64_t value;
 	char buf[PATH_BUF_LEN];
 	char tmpbuf[PATH_BUF_LEN];
 
 	/*
 	 * vdev_name will be "root"/"root-0" for the root vdev, but it is the
 	 * zpool name that will be displayed to the user.
 	 */
 	verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
 	if (zhp != NULL && strcmp(type, "root") == 0)
 		return (zfs_strdup(hdl, zpool_get_name(zhp)));
 
 	env = getenv("ZPOOL_VDEV_NAME_PATH");
 	if (env && (strtoul(env, NULL, 0) > 0 ||
 	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
 		name_flags |= VDEV_NAME_PATH;
 
 	env = getenv("ZPOOL_VDEV_NAME_GUID");
 	if (env && (strtoul(env, NULL, 0) > 0 ||
 	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
 		name_flags |= VDEV_NAME_GUID;
 
 	env = getenv("ZPOOL_VDEV_NAME_FOLLOW_LINKS");
 	if (env && (strtoul(env, NULL, 0) > 0 ||
 	    !strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2)))
 		name_flags |= VDEV_NAME_FOLLOW_LINKS;
 
 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &value) == 0 ||
 	    name_flags & VDEV_NAME_GUID) {
 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value);
 		(void) snprintf(buf, sizeof (buf), "%llu", (u_longlong_t)value);
 		path = buf;
 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
 		if (name_flags & VDEV_NAME_FOLLOW_LINKS) {
 			char *rp = realpath(path, NULL);
 			if (rp) {
 				strlcpy(buf, rp, sizeof (buf));
 				path = buf;
 				free(rp);
 			}
 		}
 
 		/*
 		 * For a block device only use the name.
 		 */
 		if ((strcmp(type, VDEV_TYPE_DISK) == 0) &&
 		    !(name_flags & VDEV_NAME_PATH)) {
 			path = zfs_strip_path(path);
 		}
 
 		/*
 		 * Remove the partition from the path if this is a whole disk.
 		 */
 		if (strcmp(type, VDEV_TYPE_DRAID_SPARE) != 0 &&
 		    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &value)
 		    == 0 && value && !(name_flags & VDEV_NAME_PATH)) {
 			return (zfs_strip_partition(path));
 		}
 	} else {
 		path = type;
 
 		/*
 		 * If it's a raidz device, we need to stick in the parity level.
 		 */
 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
 			    &value) == 0);
 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
 			    (u_longlong_t)value);
 			path = buf;
 		}
 
 		/*
 		 * If it's a dRAID device, we add parity, groups, and spares.
 		 */
 		if (strcmp(path, VDEV_TYPE_DRAID) == 0) {
 			uint64_t ndata, nparity, nspares;
 			nvlist_t **child;
 			uint_t children;
 
 			verify(nvlist_lookup_nvlist_array(nv,
 			    ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
 			verify(nvlist_lookup_uint64(nv,
 			    ZPOOL_CONFIG_NPARITY, &nparity) == 0);
 			verify(nvlist_lookup_uint64(nv,
 			    ZPOOL_CONFIG_DRAID_NDATA, &ndata) == 0);
 			verify(nvlist_lookup_uint64(nv,
 			    ZPOOL_CONFIG_DRAID_NSPARES, &nspares) == 0);
 
 			path = zpool_draid_name(buf, sizeof (buf), ndata,
 			    nparity, nspares, children);
 		}
 
 		/*
 		 * We identify each top-level vdev by using a <type-id>
 		 * naming convention.
 		 */
 		if (name_flags & VDEV_NAME_TYPE_ID) {
 			uint64_t id;
 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
 			    &id) == 0);
 			(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
 			    path, (u_longlong_t)id);
 			path = tmpbuf;
 		}
 	}
 
 	return (zfs_strdup(hdl, path));
 }
 
 static int
 zbookmark_mem_compare(const void *a, const void *b)
 {
 	return (memcmp(a, b, sizeof (zbookmark_phys_t)));
 }
 
 /*
  * Retrieve the persistent error log, uniquify the members, and return to the
  * caller.
  */
 int
 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
 {
 	zfs_cmd_t zc = {"\0"};
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	uint64_t count;
 	zbookmark_phys_t *zb = NULL;
 	int i;
 
 	/*
 	 * Retrieve the raw error list from the kernel.  If the number of errors
 	 * has increased, allocate more space and continue until we get the
 	 * entire list.
 	 */
 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
 	    &count) == 0);
 	if (count == 0)
 		return (0);
 	zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
 	    count * sizeof (zbookmark_phys_t));
 	zc.zc_nvlist_dst_size = count;
 	(void) strcpy(zc.zc_name, zhp->zpool_name);
 	for (;;) {
 		if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_ERROR_LOG,
 		    &zc) != 0) {
 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
 			if (errno == ENOMEM) {
 				void *dst;
 
 				count = zc.zc_nvlist_dst_size;
 				dst = zfs_alloc(zhp->zpool_hdl, count *
 				    sizeof (zbookmark_phys_t));
 				zc.zc_nvlist_dst = (uintptr_t)dst;
 			} else {
 				return (zpool_standard_error_fmt(hdl, errno,
 				    dgettext(TEXT_DOMAIN, "errors: List of "
 				    "errors unavailable")));
 			}
 		} else {
 			break;
 		}
 	}
 
 	/*
 	 * Sort the resulting bookmarks.  This is a little confusing due to the
 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
 	 * to first, and 'zc_nvlist_dst_size' indicates the number of bookmarks
 	 * _not_ copied as part of the process.  So we point the start of our
 	 * array appropriate and decrement the total number of elements.
 	 */
 	zb = ((zbookmark_phys_t *)(uintptr_t)zc.zc_nvlist_dst) +
 	    zc.zc_nvlist_dst_size;
 	count -= zc.zc_nvlist_dst_size;
 
 	qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
 
 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
 
 	/*
 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
 	 */
 	for (i = 0; i < count; i++) {
 		nvlist_t *nv;
 
 		/* ignoring zb_blkid and zb_level for now */
 		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
 		    zb[i-1].zb_object == zb[i].zb_object)
 			continue;
 
 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
 			goto nomem;
 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
 		    zb[i].zb_objset) != 0) {
 			nvlist_free(nv);
 			goto nomem;
 		}
 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
 		    zb[i].zb_object) != 0) {
 			nvlist_free(nv);
 			goto nomem;
 		}
 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
 			nvlist_free(nv);
 			goto nomem;
 		}
 		nvlist_free(nv);
 	}
 
 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
 	return (0);
 
 nomem:
 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
 	return (no_memory(zhp->zpool_hdl));
 }
 
 /*
  * Upgrade a ZFS pool to the latest on-disk version.
  */
 int
 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
 {
 	zfs_cmd_t zc = {"\0"};
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strcpy(zc.zc_name, zhp->zpool_name);
 	zc.zc_cookie = new_version;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
 		return (zpool_standard_error_fmt(hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
 		    zhp->zpool_name));
 	return (0);
 }
 
 void
 zfs_save_arguments(int argc, char **argv, char *string, int len)
 {
 	int i;
 
 	(void) strlcpy(string, zfs_basename(argv[0]), len);
 	for (i = 1; i < argc; i++) {
 		(void) strlcat(string, " ", len);
 		(void) strlcat(string, argv[i], len);
 	}
 }
 
 int
 zpool_log_history(libzfs_handle_t *hdl, const char *message)
 {
 	zfs_cmd_t zc = {"\0"};
 	nvlist_t *args;
 	int err;
 
 	args = fnvlist_alloc();
 	fnvlist_add_string(args, "message", message);
 	err = zcmd_write_src_nvlist(hdl, &zc, args);
 	if (err == 0)
 		err = zfs_ioctl(hdl, ZFS_IOC_LOG_HISTORY, &zc);
 	nvlist_free(args);
 	zcmd_free_nvlists(&zc);
 	return (err);
 }
 
 /*
  * Perform ioctl to get some command history of a pool.
  *
  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
  * logical offset of the history buffer to start reading from.
  *
  * Upon return, 'off' is the next logical offset to read from and
  * 'len' is the actual amount of bytes read into 'buf'.
  */
 static int
 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
 {
 	zfs_cmd_t zc = {"\0"};
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 
 	zc.zc_history = (uint64_t)(uintptr_t)buf;
 	zc.zc_history_len = *len;
 	zc.zc_history_offset = *off;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
 		switch (errno) {
 		case EPERM:
 			return (zfs_error_fmt(hdl, EZFS_PERM,
 			    dgettext(TEXT_DOMAIN,
 			    "cannot show history for pool '%s'"),
 			    zhp->zpool_name));
 		case ENOENT:
 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
 			    "'%s'"), zhp->zpool_name));
 		case ENOTSUP:
 			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
 			    "'%s', pool must be upgraded"), zhp->zpool_name));
 		default:
 			return (zpool_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN,
 			    "cannot get history for '%s'"), zhp->zpool_name));
 		}
 	}
 
 	*len = zc.zc_history_len;
 	*off = zc.zc_history_offset;
 
 	return (0);
 }
 
 /*
  * Retrieve the command history of a pool.
  */
 int
 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp, uint64_t *off,
     boolean_t *eof)
 {
 	char *buf;
 	int buflen = 128 * 1024;
 	nvlist_t **records = NULL;
 	uint_t numrecords = 0;
 	int err, i;
 	uint64_t start = *off;
 
 	buf = malloc(buflen);
 	if (buf == NULL)
 		return (ENOMEM);
 	/* process about 1MB a time */
 	while (*off - start < 1024 * 1024) {
 		uint64_t bytes_read = buflen;
 		uint64_t leftover;
 
 		if ((err = get_history(zhp, buf, off, &bytes_read)) != 0)
 			break;
 
 		/* if nothing else was read in, we're at EOF, just return */
 		if (!bytes_read) {
 			*eof = B_TRUE;
 			break;
 		}
 
 		if ((err = zpool_history_unpack(buf, bytes_read,
 		    &leftover, &records, &numrecords)) != 0)
 			break;
 		*off -= leftover;
 		if (leftover == bytes_read) {
 			/*
 			 * no progress made, because buffer is not big enough
 			 * to hold this record; resize and retry.
 			 */
 			buflen *= 2;
 			free(buf);
 			buf = malloc(buflen);
 			if (buf == NULL)
 				return (ENOMEM);
 		}
 	}
 
 	free(buf);
 
 	if (!err) {
 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
 		    records, numrecords) == 0);
 	}
 	for (i = 0; i < numrecords; i++)
 		nvlist_free(records[i]);
 	free(records);
 
 	return (err);
 }
 
 /*
  * Retrieve the next event given the passed 'zevent_fd' file descriptor.
  * If there is a new event available 'nvp' will contain a newly allocated
  * nvlist and 'dropped' will be set to the number of missed events since
  * the last call to this function.  When 'nvp' is set to NULL it indicates
  * no new events are available.  In either case the function returns 0 and
  * it is up to the caller to free 'nvp'.  In the case of a fatal error the
  * function will return a non-zero value.  When the function is called in
  * blocking mode (the default, unless the ZEVENT_NONBLOCK flag is passed),
  * it will not return until a new event is available.
  */
 int
 zpool_events_next(libzfs_handle_t *hdl, nvlist_t **nvp,
     int *dropped, unsigned flags, int zevent_fd)
 {
 	zfs_cmd_t zc = {"\0"};
 	int error = 0;
 
 	*nvp = NULL;
 	*dropped = 0;
 	zc.zc_cleanup_fd = zevent_fd;
 
 	if (flags & ZEVENT_NONBLOCK)
 		zc.zc_guid = ZEVENT_NONBLOCK;
 
 	if (zcmd_alloc_dst_nvlist(hdl, &zc, ZEVENT_SIZE) != 0)
 		return (-1);
 
 retry:
 	if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_NEXT, &zc) != 0) {
 		switch (errno) {
 		case ESHUTDOWN:
 			error = zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
 			    dgettext(TEXT_DOMAIN, "zfs shutdown"));
 			goto out;
 		case ENOENT:
 			/* Blocking error case should not occur */
 			if (!(flags & ZEVENT_NONBLOCK))
 				error = zpool_standard_error_fmt(hdl, errno,
 				    dgettext(TEXT_DOMAIN, "cannot get event"));
 
 			goto out;
 		case ENOMEM:
 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
 				error = zfs_error_fmt(hdl, EZFS_NOMEM,
 				    dgettext(TEXT_DOMAIN, "cannot get event"));
 				goto out;
 			} else {
 				goto retry;
 			}
 		default:
 			error = zpool_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN, "cannot get event"));
 			goto out;
 		}
 	}
 
 	error = zcmd_read_dst_nvlist(hdl, &zc, nvp);
 	if (error != 0)
 		goto out;
 
 	*dropped = (int)zc.zc_cookie;
 out:
 	zcmd_free_nvlists(&zc);
 
 	return (error);
 }
 
 /*
  * Clear all events.
  */
 int
 zpool_events_clear(libzfs_handle_t *hdl, int *count)
 {
 	zfs_cmd_t zc = {"\0"};
 
 	if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_CLEAR, &zc) != 0)
 		return (zpool_standard_error(hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot clear events")));
 
 	if (count != NULL)
 		*count = (int)zc.zc_cookie; /* # of events cleared */
 
 	return (0);
 }
 
 /*
  * Seek to a specific EID, ZEVENT_SEEK_START, or ZEVENT_SEEK_END for
  * the passed zevent_fd file handle.  On success zero is returned,
  * otherwise -1 is returned and hdl->libzfs_error is set to the errno.
  */
 int
 zpool_events_seek(libzfs_handle_t *hdl, uint64_t eid, int zevent_fd)
 {
 	zfs_cmd_t zc = {"\0"};
 	int error = 0;
 
 	zc.zc_guid = eid;
 	zc.zc_cleanup_fd = zevent_fd;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_EVENTS_SEEK, &zc) != 0) {
 		switch (errno) {
 		case ENOENT:
 			error = zfs_error_fmt(hdl, EZFS_NOENT,
 			    dgettext(TEXT_DOMAIN, "cannot get event"));
 			break;
 
 		case ENOMEM:
 			error = zfs_error_fmt(hdl, EZFS_NOMEM,
 			    dgettext(TEXT_DOMAIN, "cannot get event"));
 			break;
 
 		default:
 			error = zpool_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN, "cannot get event"));
 			break;
 		}
 	}
 
 	return (error);
 }
 
 static void
 zpool_obj_to_path_impl(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
     char *pathname, size_t len, boolean_t always_unmounted)
 {
 	zfs_cmd_t zc = {"\0"};
 	boolean_t mounted = B_FALSE;
 	char *mntpnt = NULL;
 	char dsname[ZFS_MAX_DATASET_NAME_LEN];
 
 	if (dsobj == 0) {
 		/* special case for the MOS */
 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>",
 		    (longlong_t)obj);
 		return;
 	}
 
 	/* get the dataset's name */
 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	zc.zc_obj = dsobj;
 	if (zfs_ioctl(zhp->zpool_hdl,
 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
 		/* just write out a path of two object numbers */
 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
 		    (longlong_t)dsobj, (longlong_t)obj);
 		return;
 	}
 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
 
 	/* find out if the dataset is mounted */
 	mounted = !always_unmounted && is_mounted(zhp->zpool_hdl, dsname,
 	    &mntpnt);
 
 	/* get the corrupted object's path */
 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
 	zc.zc_obj = obj;
 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_OBJ_TO_PATH,
 	    &zc) == 0) {
 		if (mounted) {
 			(void) snprintf(pathname, len, "%s%s", mntpnt,
 			    zc.zc_value);
 		} else {
 			(void) snprintf(pathname, len, "%s:%s",
 			    dsname, zc.zc_value);
 		}
 	} else {
 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname,
 		    (longlong_t)obj);
 	}
 	free(mntpnt);
 }
 
 void
 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
     char *pathname, size_t len)
 {
 	zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_FALSE);
 }
 
 void
 zpool_obj_to_path_ds(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
     char *pathname, size_t len)
 {
 	zpool_obj_to_path_impl(zhp, dsobj, obj, pathname, len, B_TRUE);
 }
 /*
  * Wait while the specified activity is in progress in the pool.
  */
 int
 zpool_wait(zpool_handle_t *zhp, zpool_wait_activity_t activity)
 {
 	boolean_t missing;
 
 	int error = zpool_wait_status(zhp, activity, &missing, NULL);
 
 	if (missing) {
 		(void) zpool_standard_error_fmt(zhp->zpool_hdl, ENOENT,
 		    dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"),
 		    zhp->zpool_name);
 		return (ENOENT);
 	} else {
 		return (error);
 	}
 }
 
 /*
  * Wait for the given activity and return the status of the wait (whether or not
  * any waiting was done) in the 'waited' parameter. Non-existent pools are
  * reported via the 'missing' parameter, rather than by printing an error
  * message. This is convenient when this function is called in a loop over a
  * long period of time (as it is, for example, by zpool's wait cmd). In that
  * scenario, a pool being exported or destroyed should be considered a normal
  * event, so we don't want to print an error when we find that the pool doesn't
  * exist.
  */
 int
 zpool_wait_status(zpool_handle_t *zhp, zpool_wait_activity_t activity,
     boolean_t *missing, boolean_t *waited)
 {
 	int error = lzc_wait(zhp->zpool_name, activity, waited);
 	*missing = (error == ENOENT);
 	if (*missing)
 		return (0);
 
 	if (error != 0) {
 		(void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
 		    dgettext(TEXT_DOMAIN, "error waiting in pool '%s'"),
 		    zhp->zpool_name);
 	}
 
 	return (error);
 }
 
 int
 zpool_set_bootenv(zpool_handle_t *zhp, const nvlist_t *envmap)
 {
 	int error = lzc_set_bootenv(zhp->zpool_name, envmap);
 	if (error != 0) {
 		(void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
 		    dgettext(TEXT_DOMAIN,
 		    "error setting bootenv in pool '%s'"), zhp->zpool_name);
 	}
 
 	return (error);
 }
 
 int
 zpool_get_bootenv(zpool_handle_t *zhp, nvlist_t **nvlp)
 {
 	nvlist_t *nvl;
 	int error;
 
 	nvl = NULL;
 	error = lzc_get_bootenv(zhp->zpool_name, &nvl);
 	if (error != 0) {
 		(void) zpool_standard_error_fmt(zhp->zpool_hdl, error,
 		    dgettext(TEXT_DOMAIN,
 		    "error getting bootenv in pool '%s'"), zhp->zpool_name);
 	} else {
 		*nvlp = nvl;
 	}
 
 	return (error);
 }
 
 /*
  * Attempt to read and parse feature file(s) (from "compatibility" property).
  * Files contain zpool feature names, comma or whitespace-separated.
  * Comments (# character to next newline) are discarded.
  *
  * Arguments:
  *  compatibility : string containing feature filenames
  *  features : either NULL or pointer to array of boolean
  *  report : either NULL or pointer to string buffer
  *  rlen : length of "report" buffer
  *
  * compatibility is NULL (unset), "", "off", "legacy", or list of
  * comma-separated filenames. filenames should either be absolute,
  * or relative to:
  *   1) ZPOOL_SYSCONF_COMPAT_D (eg: /etc/zfs/compatibility.d) or
  *   2) ZPOOL_DATA_COMPAT_D (eg: /usr/share/zfs/compatibility.d).
  * (Unset), "" or "off" => enable all features
  * "legacy" => disable all features
  *
  * Any feature names read from files which match unames in spa_feature_table
  * will have the corresponding boolean set in the features array (if non-NULL).
  * If more than one feature set specified, only features present in *all* of
  * them will be set.
  *
  * "report" if not NULL will be populated with a suitable status message.
  *
  * Return values:
  *   ZPOOL_COMPATIBILITY_OK : files read and parsed ok
  *   ZPOOL_COMPATIBILITY_BADFILE : file too big or not a text file
  *   ZPOOL_COMPATIBILITY_BADTOKEN : SYSCONF file contains invalid feature name
  *   ZPOOL_COMPATIBILITY_WARNTOKEN : DATA file contains invalid feature name
  *   ZPOOL_COMPATIBILITY_NOFILES : no feature files found
  */
 zpool_compat_status_t
 zpool_load_compat(const char *compat, boolean_t *features, char *report,
     size_t rlen)
 {
 	int sdirfd, ddirfd, featfd;
 	struct stat fs;
 	char *fc;
 	char *ps, *ls, *ws;
 	char *file, *line, *word;
 
 	char l_compat[ZFS_MAXPROPLEN];
 
 	boolean_t ret_nofiles = B_TRUE;
 	boolean_t ret_badfile = B_FALSE;
 	boolean_t ret_badtoken = B_FALSE;
 	boolean_t ret_warntoken = B_FALSE;
 
 	/* special cases (unset), "" and "off" => enable all features */
 	if (compat == NULL || compat[0] == '\0' ||
 	    strcmp(compat, ZPOOL_COMPAT_OFF) == 0) {
 		if (features != NULL)
 			for (uint_t i = 0; i < SPA_FEATURES; i++)
 				features[i] = B_TRUE;
 		if (report != NULL)
 			strlcpy(report, gettext("all features enabled"), rlen);
 		return (ZPOOL_COMPATIBILITY_OK);
 	}
 
 	/* Final special case "legacy" => disable all features */
 	if (strcmp(compat, ZPOOL_COMPAT_LEGACY) == 0) {
 		if (features != NULL)
 			for (uint_t i = 0; i < SPA_FEATURES; i++)
 				features[i] = B_FALSE;
 		if (report != NULL)
 			strlcpy(report, gettext("all features disabled"), rlen);
 		return (ZPOOL_COMPATIBILITY_OK);
 	}
 
 	/*
 	 * Start with all true; will be ANDed with results from each file
 	 */
 	if (features != NULL)
 		for (uint_t i = 0; i < SPA_FEATURES; i++)
 			features[i] = B_TRUE;
 
 	char err_badfile[1024] = "";
 	char err_badtoken[1024] = "";
 
 	/*
 	 * We ignore errors from the directory open()
 	 * as they're only needed if the filename is relative
 	 * which will be checked during the openat().
 	 */
 
 /* O_PATH safer than O_RDONLY if system allows it */
 #if defined(O_PATH)
 #define	ZC_DIR_FLAGS (O_DIRECTORY | O_CLOEXEC | O_PATH)
 #else
 #define	ZC_DIR_FLAGS (O_DIRECTORY | O_CLOEXEC | O_RDONLY)
 #endif
 
 	sdirfd = open(ZPOOL_SYSCONF_COMPAT_D, ZC_DIR_FLAGS);
 	ddirfd = open(ZPOOL_DATA_COMPAT_D, ZC_DIR_FLAGS);
 
 	(void) strlcpy(l_compat, compat, ZFS_MAXPROPLEN);
 
 	for (file = strtok_r(l_compat, ",", &ps);
 	    file != NULL;
 	    file = strtok_r(NULL, ",", &ps)) {
 
 		boolean_t l_features[SPA_FEATURES];
 
 		enum { Z_SYSCONF, Z_DATA } source;
 
 		/* try sysconfdir first, then datadir */
 		source = Z_SYSCONF;
 		if ((featfd = openat(sdirfd, file, O_RDONLY | O_CLOEXEC)) < 0) {
 			featfd = openat(ddirfd, file, O_RDONLY | O_CLOEXEC);
 			source = Z_DATA;
 		}
 
 		/* File readable and correct size? */
 		if (featfd < 0 ||
 		    fstat(featfd, &fs) < 0 ||
 		    fs.st_size < 1 ||
 		    fs.st_size > ZPOOL_COMPAT_MAXSIZE) {
 			(void) close(featfd);
 			strlcat(err_badfile, file, ZFS_MAXPROPLEN);
 			strlcat(err_badfile, " ", ZFS_MAXPROPLEN);
 			ret_badfile = B_TRUE;
 			continue;
 		}
 
 /* Prefault the file if system allows */
 #if defined(MAP_POPULATE)
 #define	ZC_MMAP_FLAGS (MAP_PRIVATE | MAP_POPULATE)
 #elif defined(MAP_PREFAULT_READ)
 #define	ZC_MMAP_FLAGS (MAP_PRIVATE | MAP_PREFAULT_READ)
 #else
 #define	ZC_MMAP_FLAGS (MAP_PRIVATE)
 #endif
 
 		/* private mmap() so we can strtok safely */
 		fc = (char *)mmap(NULL, fs.st_size, PROT_READ | PROT_WRITE,
 		    ZC_MMAP_FLAGS, featfd, 0);
 		(void) close(featfd);
 
 		/* map ok, and last character == newline? */
 		if (fc == MAP_FAILED || fc[fs.st_size - 1] != '\n') {
 			(void) munmap((void *) fc, fs.st_size);
 			strlcat(err_badfile, file, ZFS_MAXPROPLEN);
 			strlcat(err_badfile, " ", ZFS_MAXPROPLEN);
 			ret_badfile = B_TRUE;
 			continue;
 		}
 
 		ret_nofiles = B_FALSE;
 
 		for (uint_t i = 0; i < SPA_FEATURES; i++)
 			l_features[i] = B_FALSE;
 
 		/* replace final newline with NULL to ensure string ends */
 		fc[fs.st_size - 1] = '\0';
 
 		for (line = strtok_r(fc, "\n", &ls);
 		    line != NULL;
 		    line = strtok_r(NULL, "\n", &ls)) {
 			/* discard comments */
-			*(strchrnul(line, '#')) = '\0';
+			char *r = strchr(line, '#');
+			if (r != NULL)
+				*r = '\0';
 
 			for (word = strtok_r(line, ", \t", &ws);
 			    word != NULL;
 			    word = strtok_r(NULL, ", \t", &ws)) {
 				/* Find matching feature name */
 				uint_t f;
 				for (f = 0; f < SPA_FEATURES; f++) {
 					zfeature_info_t *fi =
 					    &spa_feature_table[f];
 					if (strcmp(word, fi->fi_uname) == 0) {
 						l_features[f] = B_TRUE;
 						break;
 					}
 				}
 				if (f < SPA_FEATURES)
 					continue;
 
 				/* found an unrecognized word */
 				/* lightly sanitize it */
 				if (strlen(word) > 32)
 					word[32] = '\0';
 				for (char *c = word; *c != '\0'; c++)
 					if (!isprint(*c))
 						*c = '?';
 
 				strlcat(err_badtoken, word, ZFS_MAXPROPLEN);
 				strlcat(err_badtoken, " ", ZFS_MAXPROPLEN);
 				if (source == Z_SYSCONF)
 					ret_badtoken = B_TRUE;
 				else
 					ret_warntoken = B_TRUE;
 			}
 		}
 		(void) munmap((void *) fc, fs.st_size);
 
 		if (features != NULL)
 			for (uint_t i = 0; i < SPA_FEATURES; i++)
 				features[i] &= l_features[i];
 	}
 	(void) close(sdirfd);
 	(void) close(ddirfd);
 
 	/* Return the most serious error */
 	if (ret_badfile) {
 		if (report != NULL)
 			snprintf(report, rlen, gettext("could not read/"
 			    "parse feature file(s): %s"), err_badfile);
 		return (ZPOOL_COMPATIBILITY_BADFILE);
 	}
 	if (ret_nofiles) {
 		if (report != NULL)
 			strlcpy(report,
 			    gettext("no valid compatibility files specified"),
 			    rlen);
 		return (ZPOOL_COMPATIBILITY_NOFILES);
 	}
 	if (ret_badtoken) {
 		if (report != NULL)
 			snprintf(report, rlen, gettext("invalid feature "
 			    "name(s) in local compatibility files: %s"),
 			    err_badtoken);
 		return (ZPOOL_COMPATIBILITY_BADTOKEN);
 	}
 	if (ret_warntoken) {
 		if (report != NULL)
 			snprintf(report, rlen, gettext("unrecognized feature "
 			    "name(s) in distribution compatibility files: %s"),
 			    err_badtoken);
 		return (ZPOOL_COMPATIBILITY_WARNTOKEN);
 	}
 	if (report != NULL)
 		strlcpy(report, gettext("compatibility set ok"), rlen);
 	return (ZPOOL_COMPATIBILITY_OK);
 }
diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c
index b6d836f414ee..25f58f156bf9 100644
--- a/sys/contrib/openzfs/lib/libzpool/kernel.c
+++ b/sys/contrib/openzfs/lib/libzpool/kernel.c
@@ -1,1379 +1,1377 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  */
 
 #include <assert.h>
 #include <fcntl.h>
 #include <libgen.h>
 #include <poll.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <libzutil.h>
 #include <sys/crypto/icp.h>
 #include <sys/processor.h>
 #include <sys/rrwlock.h>
 #include <sys/spa.h>
 #include <sys/stat.h>
 #include <sys/systeminfo.h>
 #include <sys/time.h>
 #include <sys/utsname.h>
 #include <sys/zfs_context.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zstd/zstd.h>
 #include <sys/zvol.h>
 #include <zfs_fletcher.h>
 #include <zlib.h>
 
 /*
  * Emulation of kernel services in userland.
  */
 
 uint64_t physmem;
 char hw_serial[HW_HOSTID_LEN];
 struct utsname hw_utsname;
 
 /* If set, all blocks read will be copied to the specified directory. */
 char *vn_dumpdir = NULL;
 
 /* this only exists to have its address taken */
 struct proc p0;
 
 /*
  * =========================================================================
  * threads
  * =========================================================================
  *
  * TS_STACK_MIN is dictated by the minimum allowed pthread stack size.  While
  * TS_STACK_MAX is somewhat arbitrary, it was selected to be large enough for
  * the expected stack depth while small enough to avoid exhausting address
  * space with high thread counts.
  */
 #define	TS_STACK_MIN	MAX(PTHREAD_STACK_MIN, 32768)
 #define	TS_STACK_MAX	(256 * 1024)
 
 /*ARGSUSED*/
 kthread_t *
 zk_thread_create(void (*func)(void *), void *arg, size_t stksize, int state)
 {
 	pthread_attr_t attr;
 	pthread_t tid;
 	char *stkstr;
 	int detachstate = PTHREAD_CREATE_DETACHED;
 
 	VERIFY0(pthread_attr_init(&attr));
 
 	if (state & TS_JOINABLE)
 		detachstate = PTHREAD_CREATE_JOINABLE;
 
 	VERIFY0(pthread_attr_setdetachstate(&attr, detachstate));
 
 	/*
 	 * We allow the default stack size in user space to be specified by
 	 * setting the ZFS_STACK_SIZE environment variable.  This allows us
 	 * the convenience of observing and debugging stack overruns in
 	 * user space.  Explicitly specified stack sizes will be honored.
 	 * The usage of ZFS_STACK_SIZE is discussed further in the
 	 * ENVIRONMENT VARIABLES sections of the ztest(1) man page.
 	 */
 	if (stksize == 0) {
 		stkstr = getenv("ZFS_STACK_SIZE");
 
 		if (stkstr == NULL)
 			stksize = TS_STACK_MAX;
 		else
 			stksize = MAX(atoi(stkstr), TS_STACK_MIN);
 	}
 
 	VERIFY3S(stksize, >, 0);
 	stksize = P2ROUNDUP(MAX(stksize, TS_STACK_MIN), PAGESIZE);
 
 	/*
 	 * If this ever fails, it may be because the stack size is not a
 	 * multiple of system page size.
 	 */
 	VERIFY0(pthread_attr_setstacksize(&attr, stksize));
 	VERIFY0(pthread_attr_setguardsize(&attr, PAGESIZE));
 
 	VERIFY0(pthread_create(&tid, &attr, (void *(*)(void *))func, arg));
 	VERIFY0(pthread_attr_destroy(&attr));
 
 	return ((void *)(uintptr_t)tid);
 }
 
 /*
  * =========================================================================
  * kstats
  * =========================================================================
  */
 /*ARGSUSED*/
 kstat_t *
 kstat_create(const char *module, int instance, const char *name,
     const char *class, uchar_t type, ulong_t ndata, uchar_t ks_flag)
 {
 	return (NULL);
 }
 
 /*ARGSUSED*/
 void
 kstat_install(kstat_t *ksp)
 {}
 
 /*ARGSUSED*/
 void
 kstat_delete(kstat_t *ksp)
 {}
 
 void
 kstat_set_raw_ops(kstat_t *ksp,
     int (*headers)(char *buf, size_t size),
     int (*data)(char *buf, size_t size, void *data),
     void *(*addr)(kstat_t *ksp, loff_t index))
 {}
 
 /*
  * =========================================================================
  * mutexes
  * =========================================================================
  */
 
 void
 mutex_init(kmutex_t *mp, char *name, int type, void *cookie)
 {
 	VERIFY0(pthread_mutex_init(&mp->m_lock, NULL));
 	memset(&mp->m_owner, 0, sizeof (pthread_t));
 }
 
 void
 mutex_destroy(kmutex_t *mp)
 {
 	VERIFY0(pthread_mutex_destroy(&mp->m_lock));
 }
 
 void
 mutex_enter(kmutex_t *mp)
 {
 	VERIFY0(pthread_mutex_lock(&mp->m_lock));
 	mp->m_owner = pthread_self();
 }
 
 int
 mutex_tryenter(kmutex_t *mp)
 {
 	int error;
 
 	error = pthread_mutex_trylock(&mp->m_lock);
 	if (error == 0) {
 		mp->m_owner = pthread_self();
 		return (1);
 	} else {
 		VERIFY3S(error, ==, EBUSY);
 		return (0);
 	}
 }
 
 void
 mutex_exit(kmutex_t *mp)
 {
 	memset(&mp->m_owner, 0, sizeof (pthread_t));
 	VERIFY0(pthread_mutex_unlock(&mp->m_lock));
 }
 
 /*
  * =========================================================================
  * rwlocks
  * =========================================================================
  */
 
 void
 rw_init(krwlock_t *rwlp, char *name, int type, void *arg)
 {
 	VERIFY0(pthread_rwlock_init(&rwlp->rw_lock, NULL));
 	rwlp->rw_readers = 0;
 	rwlp->rw_owner = 0;
 }
 
 void
 rw_destroy(krwlock_t *rwlp)
 {
 	VERIFY0(pthread_rwlock_destroy(&rwlp->rw_lock));
 }
 
 void
 rw_enter(krwlock_t *rwlp, krw_t rw)
 {
 	if (rw == RW_READER) {
 		VERIFY0(pthread_rwlock_rdlock(&rwlp->rw_lock));
 		atomic_inc_uint(&rwlp->rw_readers);
 	} else {
 		VERIFY0(pthread_rwlock_wrlock(&rwlp->rw_lock));
 		rwlp->rw_owner = pthread_self();
 	}
 }
 
 void
 rw_exit(krwlock_t *rwlp)
 {
 	if (RW_READ_HELD(rwlp))
 		atomic_dec_uint(&rwlp->rw_readers);
 	else
 		rwlp->rw_owner = 0;
 
 	VERIFY0(pthread_rwlock_unlock(&rwlp->rw_lock));
 }
 
 int
 rw_tryenter(krwlock_t *rwlp, krw_t rw)
 {
 	int error;
 
 	if (rw == RW_READER)
 		error = pthread_rwlock_tryrdlock(&rwlp->rw_lock);
 	else
 		error = pthread_rwlock_trywrlock(&rwlp->rw_lock);
 
 	if (error == 0) {
 		if (rw == RW_READER)
 			atomic_inc_uint(&rwlp->rw_readers);
 		else
 			rwlp->rw_owner = pthread_self();
 
 		return (1);
 	}
 
 	VERIFY3S(error, ==, EBUSY);
 
 	return (0);
 }
 
 /* ARGSUSED */
 uint32_t
 zone_get_hostid(void *zonep)
 {
 	/*
 	 * We're emulating the system's hostid in userland.
 	 */
 	return (strtoul(hw_serial, NULL, 10));
 }
 
 int
 rw_tryupgrade(krwlock_t *rwlp)
 {
 	return (0);
 }
 
 /*
  * =========================================================================
  * condition variables
  * =========================================================================
  */
 
 void
 cv_init(kcondvar_t *cv, char *name, int type, void *arg)
 {
 	VERIFY0(pthread_cond_init(cv, NULL));
 }
 
 void
 cv_destroy(kcondvar_t *cv)
 {
 	VERIFY0(pthread_cond_destroy(cv));
 }
 
 void
 cv_wait(kcondvar_t *cv, kmutex_t *mp)
 {
 	memset(&mp->m_owner, 0, sizeof (pthread_t));
 	VERIFY0(pthread_cond_wait(cv, &mp->m_lock));
 	mp->m_owner = pthread_self();
 }
 
 int
 cv_wait_sig(kcondvar_t *cv, kmutex_t *mp)
 {
 	cv_wait(cv, mp);
 	return (1);
 }
 
 int
 cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
 {
 	int error;
 	struct timeval tv;
 	struct timespec ts;
 	clock_t delta;
 
 	delta = abstime - ddi_get_lbolt();
 	if (delta <= 0)
 		return (-1);
 
 	VERIFY(gettimeofday(&tv, NULL) == 0);
 
 	ts.tv_sec = tv.tv_sec + delta / hz;
 	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % hz) * (NANOSEC / hz);
 	if (ts.tv_nsec >= NANOSEC) {
 		ts.tv_sec++;
 		ts.tv_nsec -= NANOSEC;
 	}
 
 	memset(&mp->m_owner, 0, sizeof (pthread_t));
 	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
 	mp->m_owner = pthread_self();
 
 	if (error == ETIMEDOUT)
 		return (-1);
 
 	VERIFY0(error);
 
 	return (1);
 }
 
 /*ARGSUSED*/
 int
 cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
     int flag)
 {
 	int error;
 	struct timeval tv;
 	struct timespec ts;
 	hrtime_t delta;
 
 	ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
 
 	delta = tim;
 	if (flag & CALLOUT_FLAG_ABSOLUTE)
 		delta -= gethrtime();
 
 	if (delta <= 0)
 		return (-1);
 
 	VERIFY0(gettimeofday(&tv, NULL));
 
 	ts.tv_sec = tv.tv_sec + delta / NANOSEC;
 	ts.tv_nsec = tv.tv_usec * NSEC_PER_USEC + (delta % NANOSEC);
 	if (ts.tv_nsec >= NANOSEC) {
 		ts.tv_sec++;
 		ts.tv_nsec -= NANOSEC;
 	}
 
 	memset(&mp->m_owner, 0, sizeof (pthread_t));
 	error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
 	mp->m_owner = pthread_self();
 
 	if (error == ETIMEDOUT)
 		return (-1);
 
 	VERIFY0(error);
 
 	return (1);
 }
 
 void
 cv_signal(kcondvar_t *cv)
 {
 	VERIFY0(pthread_cond_signal(cv));
 }
 
 void
 cv_broadcast(kcondvar_t *cv)
 {
 	VERIFY0(pthread_cond_broadcast(cv));
 }
 
 /*
  * =========================================================================
  * procfs list
  * =========================================================================
  */
 
 void
 seq_printf(struct seq_file *m, const char *fmt, ...)
 {}
 
 void
 procfs_list_install(const char *module,
     const char *submodule,
     const char *name,
     mode_t mode,
     procfs_list_t *procfs_list,
     int (*show)(struct seq_file *f, void *p),
     int (*show_header)(struct seq_file *f),
     int (*clear)(procfs_list_t *procfs_list),
     size_t procfs_list_node_off)
 {
 	mutex_init(&procfs_list->pl_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&procfs_list->pl_list,
 	    procfs_list_node_off + sizeof (procfs_list_node_t),
 	    procfs_list_node_off + offsetof(procfs_list_node_t, pln_link));
 	procfs_list->pl_next_id = 1;
 	procfs_list->pl_node_offset = procfs_list_node_off;
 }
 
 void
 procfs_list_uninstall(procfs_list_t *procfs_list)
 {}
 
 void
 procfs_list_destroy(procfs_list_t *procfs_list)
 {
 	ASSERT(list_is_empty(&procfs_list->pl_list));
 	list_destroy(&procfs_list->pl_list);
 	mutex_destroy(&procfs_list->pl_lock);
 }
 
 #define	NODE_ID(procfs_list, obj) \
 		(((procfs_list_node_t *)(((char *)obj) + \
 		(procfs_list)->pl_node_offset))->pln_id)
 
 void
 procfs_list_add(procfs_list_t *procfs_list, void *p)
 {
 	ASSERT(MUTEX_HELD(&procfs_list->pl_lock));
 	NODE_ID(procfs_list, p) = procfs_list->pl_next_id++;
 	list_insert_tail(&procfs_list->pl_list, p);
 }
 
 /*
  * =========================================================================
  * vnode operations
  * =========================================================================
  */
 
 /*
  * =========================================================================
  * Figure out which debugging statements to print
  * =========================================================================
  */
 
 static char *dprintf_string;
 static int dprintf_print_all;
 
 int
 dprintf_find_string(const char *string)
 {
 	char *tmp_str = dprintf_string;
 	int len = strlen(string);
 
 	/*
 	 * Find out if this is a string we want to print.
 	 * String format: file1.c,function_name1,file2.c,file3.c
 	 */
 
 	while (tmp_str != NULL) {
 		if (strncmp(tmp_str, string, len) == 0 &&
 		    (tmp_str[len] == ',' || tmp_str[len] == '\0'))
 			return (1);
 		tmp_str = strchr(tmp_str, ',');
 		if (tmp_str != NULL)
 			tmp_str++; /* Get rid of , */
 	}
 	return (0);
 }
 
 void
 dprintf_setup(int *argc, char **argv)
 {
 	int i, j;
 
 	/*
 	 * Debugging can be specified two ways: by setting the
 	 * environment variable ZFS_DEBUG, or by including a
 	 * "debug=..."  argument on the command line.  The command
 	 * line setting overrides the environment variable.
 	 */
 
 	for (i = 1; i < *argc; i++) {
 		int len = strlen("debug=");
 		/* First look for a command line argument */
 		if (strncmp("debug=", argv[i], len) == 0) {
 			dprintf_string = argv[i] + len;
 			/* Remove from args */
 			for (j = i; j < *argc; j++)
 				argv[j] = argv[j+1];
 			argv[j] = NULL;
 			(*argc)--;
 		}
 	}
 
 	if (dprintf_string == NULL) {
 		/* Look for ZFS_DEBUG environment variable */
 		dprintf_string = getenv("ZFS_DEBUG");
 	}
 
 	/*
 	 * Are we just turning on all debugging?
 	 */
 	if (dprintf_find_string("on"))
 		dprintf_print_all = 1;
 
 	if (dprintf_string != NULL)
 		zfs_flags |= ZFS_DEBUG_DPRINTF;
 }
 
 /*
  * =========================================================================
  * debug printfs
  * =========================================================================
  */
 void
 __dprintf(boolean_t dprint, const char *file, const char *func,
     int line, const char *fmt, ...)
 {
 	/* Get rid of annoying "../common/" prefix to filename. */
 	const char *newfile = zfs_basename(file);
 
 	va_list adx;
 	if (dprint) {
 		/* dprintf messages are printed immediately */
 
 		if (!dprintf_print_all &&
 		    !dprintf_find_string(newfile) &&
 		    !dprintf_find_string(func))
 			return;
 
 		/* Print out just the function name if requested */
 		flockfile(stdout);
 		if (dprintf_find_string("pid"))
 			(void) printf("%d ", getpid());
 		if (dprintf_find_string("tid"))
 			(void) printf("%ju ",
 			    (uintmax_t)(uintptr_t)pthread_self());
 		if (dprintf_find_string("cpu"))
 			(void) printf("%u ", getcpuid());
 		if (dprintf_find_string("time"))
 			(void) printf("%llu ", gethrtime());
 		if (dprintf_find_string("long"))
 			(void) printf("%s, line %d: ", newfile, line);
 		(void) printf("dprintf: %s: ", func);
 		va_start(adx, fmt);
 		(void) vprintf(fmt, adx);
 		va_end(adx);
 		funlockfile(stdout);
 	} else {
 		/* zfs_dbgmsg is logged for dumping later */
 		size_t size;
 		char *buf;
 		int i;
 
 		size = 1024;
 		buf = umem_alloc(size, UMEM_NOFAIL);
 		i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
 
 		if (i < size) {
 			va_start(adx, fmt);
 			(void) vsnprintf(buf + i, size - i, fmt, adx);
 			va_end(adx);
 		}
 
 		__zfs_dbgmsg(buf);
 
 		umem_free(buf, size);
 	}
 }
 
 /*
  * =========================================================================
  * cmn_err() and panic()
  * =========================================================================
  */
 static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" };
 static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" };
 
 void
 vpanic(const char *fmt, va_list adx)
 {
 	(void) fprintf(stderr, "error: ");
 	(void) vfprintf(stderr, fmt, adx);
 	(void) fprintf(stderr, "\n");
 
 	abort();	/* think of it as a "user-level crash dump" */
 }
 
 void
 panic(const char *fmt, ...)
 {
 	va_list adx;
 
 	va_start(adx, fmt);
 	vpanic(fmt, adx);
 	va_end(adx);
 }
 
 void
 vcmn_err(int ce, const char *fmt, va_list adx)
 {
 	if (ce == CE_PANIC)
 		vpanic(fmt, adx);
 	if (ce != CE_NOTE) {	/* suppress noise in userland stress testing */
 		(void) fprintf(stderr, "%s", ce_prefix[ce]);
 		(void) vfprintf(stderr, fmt, adx);
 		(void) fprintf(stderr, "%s", ce_suffix[ce]);
 	}
 }
 
 /*PRINTFLIKE2*/
 void
 cmn_err(int ce, const char *fmt, ...)
 {
 	va_list adx;
 
 	va_start(adx, fmt);
 	vcmn_err(ce, fmt, adx);
 	va_end(adx);
 }
 
 /*
  * =========================================================================
  * misc routines
  * =========================================================================
  */
 
 void
 delay(clock_t ticks)
 {
 	(void) poll(0, 0, ticks * (1000 / hz));
 }
 
 /*
  * Find highest one bit set.
  * Returns bit number + 1 of highest bit that is set, otherwise returns 0.
  * The __builtin_clzll() function is supported by both GCC and Clang.
  */
 int
 highbit64(uint64_t i)
 {
 	if (i == 0)
 	return (0);
 
 	return (NBBY * sizeof (uint64_t) - __builtin_clzll(i));
 }
 
 /*
  * Find lowest one bit set.
  * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
  * The __builtin_ffsll() function is supported by both GCC and Clang.
  */
 int
 lowbit64(uint64_t i)
 {
 	if (i == 0)
 		return (0);
 
 	return (__builtin_ffsll(i));
 }
 
 const char *random_path = "/dev/random";
 const char *urandom_path = "/dev/urandom";
 static int random_fd = -1, urandom_fd = -1;
 
 void
 random_init(void)
 {
 	VERIFY((random_fd = open(random_path, O_RDONLY | O_CLOEXEC)) != -1);
 	VERIFY((urandom_fd = open(urandom_path, O_RDONLY | O_CLOEXEC)) != -1);
 }
 
 void
 random_fini(void)
 {
 	close(random_fd);
 	close(urandom_fd);
 
 	random_fd = -1;
 	urandom_fd = -1;
 }
 
 static int
 random_get_bytes_common(uint8_t *ptr, size_t len, int fd)
 {
 	size_t resid = len;
 	ssize_t bytes;
 
 	ASSERT(fd != -1);
 
 	while (resid != 0) {
 		bytes = read(fd, ptr, resid);
 		ASSERT3S(bytes, >=, 0);
 		ptr += bytes;
 		resid -= bytes;
 	}
 
 	return (0);
 }
 
 int
 random_get_bytes(uint8_t *ptr, size_t len)
 {
 	return (random_get_bytes_common(ptr, len, random_fd));
 }
 
 int
 random_get_pseudo_bytes(uint8_t *ptr, size_t len)
 {
 	return (random_get_bytes_common(ptr, len, urandom_fd));
 }
 
 int
 ddi_strtoul(const char *hw_serial, char **nptr, int base, unsigned long *result)
 {
 	char *end;
 
 	*result = strtoul(hw_serial, &end, base);
 	if (*result == 0)
 		return (errno);
 	return (0);
 }
 
 int
 ddi_strtoull(const char *str, char **nptr, int base, u_longlong_t *result)
 {
 	char *end;
 
 	*result = strtoull(str, &end, base);
 	if (*result == 0)
 		return (errno);
 	return (0);
 }
 
 utsname_t *
 utsname(void)
 {
 	return (&hw_utsname);
 }
 
 /*
  * =========================================================================
  * kernel emulation setup & teardown
  * =========================================================================
  */
 static int
 umem_out_of_memory(void)
 {
 	char errmsg[] = "out of memory -- generating core dump\n";
 
 	(void) fprintf(stderr, "%s", errmsg);
 	abort();
 	return (0);
 }
 
 void
 kernel_init(int mode)
 {
 	extern uint_t rrw_tsd_key;
 
 	umem_nofail_callback(umem_out_of_memory);
 
 	physmem = sysconf(_SC_PHYS_PAGES);
 
 	dprintf("physmem = %llu pages (%.2f GB)\n", (u_longlong_t)physmem,
 	    (double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
 
 	(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
 	    (mode & SPA_MODE_WRITE) ? get_system_hostid() : 0);
 
 	random_init();
 
 	VERIFY0(uname(&hw_utsname));
 
 	system_taskq_init();
 	icp_init();
 
 	zstd_init();
 
 	spa_init((spa_mode_t)mode);
 
 	fletcher_4_init();
 
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 }
 
 void
 kernel_fini(void)
 {
 	fletcher_4_fini();
 	spa_fini();
 
 	zstd_fini();
 
 	icp_fini();
 	system_taskq_fini();
 
 	random_fini();
 }
 
 uid_t
 crgetuid(cred_t *cr)
 {
 	return (0);
 }
 
 uid_t
 crgetruid(cred_t *cr)
 {
 	return (0);
 }
 
 gid_t
 crgetgid(cred_t *cr)
 {
 	return (0);
 }
 
 int
 crgetngroups(cred_t *cr)
 {
 	return (0);
 }
 
 gid_t *
 crgetgroups(cred_t *cr)
 {
 	return (NULL);
 }
 
 int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
 	return (0);
 }
 
 int
 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 {
 	return (0);
 }
 
 int
 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 {
 	return (0);
 }
 
 int
 secpolicy_zfs(const cred_t *cr)
 {
 	return (0);
 }
 
 int
 secpolicy_zfs_proc(const cred_t *cr, proc_t *proc)
 {
 	return (0);
 }
 
 ksiddomain_t *
 ksid_lookupdomain(const char *dom)
 {
 	ksiddomain_t *kd;
 
 	kd = umem_zalloc(sizeof (ksiddomain_t), UMEM_NOFAIL);
 	kd->kd_name = spa_strdup(dom);
 	return (kd);
 }
 
 void
 ksiddomain_rele(ksiddomain_t *ksid)
 {
 	spa_strfree(ksid->kd_name);
 	umem_free(ksid, sizeof (ksiddomain_t));
 }
 
 char *
 kmem_vasprintf(const char *fmt, va_list adx)
 {
 	char *buf = NULL;
 	va_list adx_copy;
 
 	va_copy(adx_copy, adx);
 	VERIFY(vasprintf(&buf, fmt, adx_copy) != -1);
 	va_end(adx_copy);
 
 	return (buf);
 }
 
 char *
 kmem_asprintf(const char *fmt, ...)
 {
 	char *buf = NULL;
 	va_list adx;
 
 	va_start(adx, fmt);
 	VERIFY(vasprintf(&buf, fmt, adx) != -1);
 	va_end(adx);
 
 	return (buf);
 }
 
 /* ARGSUSED */
-int
+zfs_file_t *
 zfs_onexit_fd_hold(int fd, minor_t *minorp)
 {
 	*minorp = 0;
-	return (0);
+	return (NULL);
 }
 
 /* ARGSUSED */
 void
-zfs_onexit_fd_rele(int fd)
+zfs_onexit_fd_rele(zfs_file_t *fp)
 {
 }
 
 /* ARGSUSED */
 int
 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle)
 {
 	return (0);
 }
 
 fstrans_cookie_t
 spl_fstrans_mark(void)
 {
 	return ((fstrans_cookie_t)0);
 }
 
 void
 spl_fstrans_unmark(fstrans_cookie_t cookie)
 {
 }
 
 int
 __spl_pf_fstrans_check(void)
 {
 	return (0);
 }
 
 int
 kmem_cache_reap_active(void)
 {
 	return (0);
 }
 
 void *zvol_tag = "zvol_tag";
 
 void
 zvol_create_minor(const char *name)
 {
 }
 
 void
 zvol_create_minors_recursive(const char *name)
 {
 }
 
 void
 zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
 {
 }
 
 void
 zvol_rename_minors(spa_t *spa, const char *oldname, const char *newname,
     boolean_t async)
 {
 }
 
 /*
  * Open file
  *
  * path - fully qualified path to file
  * flags - file attributes O_READ / O_WRITE / O_EXCL
  * fpp - pointer to return file pointer
  *
  * Returns 0 on success underlying error on failure.
  */
 int
 zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
 {
 	int fd = -1;
 	int dump_fd = -1;
 	int err;
 	int old_umask = 0;
 	zfs_file_t *fp;
 	struct stat64 st;
 
 	if (!(flags & O_CREAT) && stat64(path, &st) == -1)
 		return (errno);
 
 	if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
 		flags |= O_DIRECT;
 
 	if (flags & O_CREAT)
 		old_umask = umask(0);
 
 	fd = open64(path, flags, mode);
 	if (fd == -1)
 		return (errno);
 
 	if (flags & O_CREAT)
 		(void) umask(old_umask);
 
 	if (vn_dumpdir != NULL) {
 		char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
 		const char *inpath = zfs_basename(path);
 
 		(void) snprintf(dumppath, MAXPATHLEN,
 		    "%s/%s", vn_dumpdir, inpath);
 		dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
 		umem_free(dumppath, MAXPATHLEN);
 		if (dump_fd == -1) {
 			err = errno;
 			close(fd);
 			return (err);
 		}
 	} else {
 		dump_fd = -1;
 	}
 
 	(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
 
 	fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
 	fp->f_fd = fd;
 	fp->f_dump_fd = dump_fd;
 	*fpp = fp;
 
 	return (0);
 }
 
 void
 zfs_file_close(zfs_file_t *fp)
 {
 	close(fp->f_fd);
 	if (fp->f_dump_fd != -1)
 		close(fp->f_dump_fd);
 
 	umem_free(fp, sizeof (zfs_file_t));
 }
 
 /*
  * Stateful write - use os internal file pointer to determine where to
  * write and update on successful completion.
  *
  * fp -  pointer to file (pipe, socket, etc) to write to
  * buf - buffer to write
  * count - # of bytes to write
  * resid -  pointer to count of unwritten bytes  (if short write)
  *
  * Returns 0 on success errno on failure.
  */
 int
 zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
 {
 	ssize_t rc;
 
 	rc = write(fp->f_fd, buf, count);
 	if (rc < 0)
 		return (errno);
 
 	if (resid) {
 		*resid = count - rc;
 	} else if (rc != count) {
 		return (EIO);
 	}
 
 	return (0);
 }
 
 /*
  * Stateless write - os internal file pointer is not updated.
  *
  * fp -  pointer to file (pipe, socket, etc) to write to
  * buf - buffer to write
  * count - # of bytes to write
  * off - file offset to write to (only valid for seekable types)
  * resid -  pointer to count of unwritten bytes
  *
  * Returns 0 on success errno on failure.
  */
 int
 zfs_file_pwrite(zfs_file_t *fp, const void *buf,
     size_t count, loff_t pos, ssize_t *resid)
 {
 	ssize_t rc, split, done;
 	int sectors;
 
 	/*
 	 * To simulate partial disk writes, we split writes into two
 	 * system calls so that the process can be killed in between.
 	 * This is used by ztest to simulate realistic failure modes.
 	 */
 	sectors = count >> SPA_MINBLOCKSHIFT;
 	split = (sectors > 0 ? rand() % sectors : 0) << SPA_MINBLOCKSHIFT;
 	rc = pwrite64(fp->f_fd, buf, split, pos);
 	if (rc != -1) {
 		done = rc;
 		rc = pwrite64(fp->f_fd, (char *)buf + split,
 		    count - split, pos + split);
 	}
 #ifdef __linux__
 	if (rc == -1 && errno == EINVAL) {
 		/*
 		 * Under Linux, this most likely means an alignment issue
 		 * (memory or disk) due to O_DIRECT, so we abort() in order
 		 * to catch the offender.
 		 */
 		abort();
 	}
 #endif
 
 	if (rc < 0)
 		return (errno);
 
 	done += rc;
 
 	if (resid) {
 		*resid = count - done;
 	} else if (done != count) {
 		return (EIO);
 	}
 
 	return (0);
 }
 
 /*
  * Stateful read - use os internal file pointer to determine where to
  * read and update on successful completion.
  *
  * fp -  pointer to file (pipe, socket, etc) to read from
  * buf - buffer to write
  * count - # of bytes to read
  * resid -  pointer to count of unread bytes (if short read)
  *
  * Returns 0 on success errno on failure.
  */
 int
 zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
 {
 	int rc;
 
 	rc = read(fp->f_fd, buf, count);
 	if (rc < 0)
 		return (errno);
 
 	if (resid) {
 		*resid = count - rc;
 	} else if (rc != count) {
 		return (EIO);
 	}
 
 	return (0);
 }
 
 /*
  * Stateless read - os internal file pointer is not updated.
  *
  * fp -  pointer to file (pipe, socket, etc) to read from
  * buf - buffer to write
  * count - # of bytes to write
  * off - file offset to read from (only valid for seekable types)
  * resid -  pointer to count of unwritten bytes (if short write)
  *
  * Returns 0 on success errno on failure.
  */
 int
 zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
     ssize_t *resid)
 {
 	ssize_t rc;
 
 	rc = pread64(fp->f_fd, buf, count, off);
 	if (rc < 0) {
 #ifdef __linux__
 		/*
 		 * Under Linux, this most likely means an alignment issue
 		 * (memory or disk) due to O_DIRECT, so we abort() in order to
 		 * catch the offender.
 		 */
 		if (errno == EINVAL)
 			abort();
 #endif
 		return (errno);
 	}
 
 	if (fp->f_dump_fd != -1) {
 		int status;
 
 		status = pwrite64(fp->f_dump_fd, buf, rc, off);
 		ASSERT(status != -1);
 	}
 
 	if (resid) {
 		*resid = count - rc;
 	} else if (rc != count) {
 		return (EIO);
 	}
 
 	return (0);
 }
 
 /*
  * lseek - set / get file pointer
  *
  * fp -  pointer to file (pipe, socket, etc) to read from
  * offp - value to seek to, returns current value plus passed offset
  * whence - see man pages for standard lseek whence values
  *
  * Returns 0 on success errno on failure (ESPIPE for non seekable types)
  */
 int
 zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
 {
 	loff_t rc;
 
 	rc = lseek(fp->f_fd, *offp, whence);
 	if (rc < 0)
 		return (errno);
 
 	*offp = rc;
 
 	return (0);
 }
 
 /*
  * Get file attributes
  *
  * filp - file pointer
  * zfattr - pointer to file attr structure
  *
  * Currently only used for fetching size and file mode
  *
  * Returns 0 on success or error code of underlying getattr call on failure.
  */
 int
 zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
 {
 	struct stat64 st;
 
 	if (fstat64_blk(fp->f_fd, &st) == -1)
 		return (errno);
 
 	zfattr->zfa_size = st.st_size;
 	zfattr->zfa_mode = st.st_mode;
 
 	return (0);
 }
 
 /*
  * Sync file to disk
  *
  * filp - file pointer
  * flags - O_SYNC and or O_DSYNC
  *
  * Returns 0 on success or error code of underlying sync call on failure.
  */
 int
 zfs_file_fsync(zfs_file_t *fp, int flags)
 {
 	int rc;
 
 	rc = fsync(fp->f_fd);
 	if (rc < 0)
 		return (errno);
 
 	return (0);
 }
 
 /*
  * fallocate - allocate or free space on disk
  *
  * fp - file pointer
  * mode (non-standard options for hole punching etc)
  * offset - offset to start allocating or freeing from
  * len - length to free / allocate
  *
  * OPTIONAL
  */
 int
 zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
 {
 #ifdef __linux__
 	return (fallocate(fp->f_fd, mode, offset, len));
 #else
 	return (EOPNOTSUPP);
 #endif
 }
 
 /*
  * Request current file pointer offset
  *
  * fp - pointer to file
  *
  * Returns current file offset.
  */
 loff_t
 zfs_file_off(zfs_file_t *fp)
 {
 	return (lseek(fp->f_fd, SEEK_CUR, 0));
 }
 
 /*
  * unlink file
  *
  * path - fully qualified file path
  *
  * Returns 0 on success.
  *
  * OPTIONAL
  */
 int
 zfs_file_unlink(const char *path)
 {
 	return (remove(path));
 }
 
 /*
  * Get reference to file pointer
  *
  * fd - input file descriptor
- * fpp - pointer to file pointer
  *
- * Returns 0 on success EBADF on failure.
+ * Returns pointer to file struct or NULL.
  * Unsupported in user space.
  */
-int
-zfs_file_get(int fd, zfs_file_t **fpp)
+zfs_file_t *
+zfs_file_get(int fd)
 {
 	abort();
 
-	return (EOPNOTSUPP);
+	return (NULL);
 }
-
 /*
  * Drop reference to file pointer
  *
- * fd - input file descriptor
+ * fp - pointer to file struct
  *
  * Unsupported in user space.
  */
 void
-zfs_file_put(int fd)
+zfs_file_put(zfs_file_t *fp)
 {
 	abort();
 }
 
 void
 zfsvfs_update_fromname(const char *oldname, const char *newname)
 {
 }
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
index 3853b2b5c900..b1407e4bd61d 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/vdev_geom.c
@@ -1,1214 +1,1317 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Portions Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>
  */
 
 #include <sys/zfs_context.h>
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/bio.h>
+#include <sys/buf.h>
 #include <sys/file.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_os.h>
 #include <sys/fs/zfs.h>
 #include <sys/zio.h>
+#include <vm/vm_page.h>
 #include <geom/geom.h>
 #include <geom/geom_disk.h>
 #include <geom/geom_int.h>
 
 #ifndef g_topology_locked
 #define	g_topology_locked()	sx_xlocked(&topology_lock)
 #endif
 
 /*
  * Virtual device vector for GEOM.
  */
 
 static g_attrchanged_t vdev_geom_attrchanged;
 struct g_class zfs_vdev_class = {
 	.name = "ZFS::VDEV",
 	.version = G_VERSION,
 	.attrchanged = vdev_geom_attrchanged,
 };
 
 struct consumer_vdev_elem {
 	SLIST_ENTRY(consumer_vdev_elem)	elems;
 	vdev_t	*vd;
 };
 
 SLIST_HEAD(consumer_priv_t, consumer_vdev_elem);
 /* BEGIN CSTYLED */
 _Static_assert(sizeof (((struct g_consumer *)NULL)->private)
 	== sizeof (struct consumer_priv_t*),
 	"consumer_priv_t* can't be stored in g_consumer.private");
 
 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
 
 SYSCTL_DECL(_vfs_zfs_vdev);
 /* Don't send BIO_FLUSH. */
 static int vdev_geom_bio_flush_disable;
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_flush_disable, CTLFLAG_RWTUN,
     &vdev_geom_bio_flush_disable, 0, "Disable BIO_FLUSH");
 /* Don't send BIO_DELETE. */
 static int vdev_geom_bio_delete_disable;
 SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RWTUN,
     &vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
 /* END CSTYLED */
 
 /* Declare local functions */
 static void vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read);
 
 /*
  * Thread local storage used to indicate when a thread is probing geoms
  * for their guids.  If NULL, this thread is not tasting geoms.  If non NULL,
  * it is looking for a replacement for the vdev_t* that is its value.
  */
 uint_t zfs_geom_probe_vdev_key;
 
 static void
 vdev_geom_set_physpath(vdev_t *vd, struct g_consumer *cp,
     boolean_t do_null_update)
 {
 	boolean_t needs_update = B_FALSE;
 	char *physpath;
 	int error, physpath_len;
 
 	physpath_len = MAXPATHLEN;
 	physpath = g_malloc(physpath_len, M_WAITOK|M_ZERO);
 	error = g_io_getattr("GEOM::physpath", cp, &physpath_len, physpath);
 	if (error == 0) {
 		char *old_physpath;
 
 		/* g_topology lock ensures that vdev has not been closed */
 		g_topology_assert();
 		old_physpath = vd->vdev_physpath;
 		vd->vdev_physpath = spa_strdup(physpath);
 
 		if (old_physpath != NULL) {
 			needs_update = (strcmp(old_physpath,
 			    vd->vdev_physpath) != 0);
 			spa_strfree(old_physpath);
 		} else
 			needs_update = do_null_update;
 	}
 	g_free(physpath);
 
 	/*
 	 * If the physical path changed, update the config.
 	 * Only request an update for previously unset physpaths if
 	 * requested by the caller.
 	 */
 	if (needs_update)
 		spa_async_request(vd->vdev_spa, SPA_ASYNC_CONFIG_UPDATE);
 
 }
 
 static void
 vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
 {
 	struct consumer_priv_t *priv;
 	struct consumer_vdev_elem *elem;
 
 	priv = (struct consumer_priv_t *)&cp->private;
 	if (SLIST_EMPTY(priv))
 		return;
 
 	SLIST_FOREACH(elem, priv, elems) {
 		vdev_t *vd = elem->vd;
 		if (strcmp(attr, "GEOM::physpath") == 0) {
 			vdev_geom_set_physpath(vd, cp, /* null_update */B_TRUE);
 			return;
 		}
 	}
 }
 
 static void
 vdev_geom_resize(struct g_consumer *cp)
 {
 	struct consumer_priv_t *priv;
 	struct consumer_vdev_elem *elem;
 	spa_t *spa;
 	vdev_t *vd;
 
 	priv = (struct consumer_priv_t *)&cp->private;
 	if (SLIST_EMPTY(priv))
 		return;
 
 	SLIST_FOREACH(elem, priv, elems) {
 		vd = elem->vd;
 		if (vd->vdev_state != VDEV_STATE_HEALTHY)
 			continue;
 		spa = vd->vdev_spa;
 		if (!spa->spa_autoexpand)
 			continue;
 		vdev_online(spa, vd->vdev_guid, ZFS_ONLINE_EXPAND, NULL);
 	}
 }
 
 static void
 vdev_geom_orphan(struct g_consumer *cp)
 {
 	struct consumer_priv_t *priv;
 	// cppcheck-suppress uninitvar
 	struct consumer_vdev_elem *elem;
 
 	g_topology_assert();
 
 	priv = (struct consumer_priv_t *)&cp->private;
 	if (SLIST_EMPTY(priv))
 		/* Vdev close in progress.  Ignore the event. */
 		return;
 
 	/*
 	 * Orphan callbacks occur from the GEOM event thread.
 	 * Concurrent with this call, new I/O requests may be
 	 * working their way through GEOM about to find out
 	 * (only once executed by the g_down thread) that we've
 	 * been orphaned from our disk provider.  These I/Os
 	 * must be retired before we can detach our consumer.
 	 * This is most easily achieved by acquiring the
 	 * SPA ZIO configuration lock as a writer, but doing
 	 * so with the GEOM topology lock held would cause
 	 * a lock order reversal.  Instead, rely on the SPA's
 	 * async removal support to invoke a close on this
 	 * vdev once it is safe to do so.
 	 */
 	// cppcheck-suppress All
 	SLIST_FOREACH(elem, priv, elems) {
 		// cppcheck-suppress uninitvar
 		vdev_t *vd = elem->vd;
 
 		vd->vdev_remove_wanted = B_TRUE;
 		spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
 	}
 }
 
 static struct g_consumer *
 vdev_geom_attach(struct g_provider *pp, vdev_t *vd, boolean_t sanity)
 {
 	struct g_geom *gp;
 	struct g_consumer *cp;
 	int error;
 
 	g_topology_assert();
 
 	ZFS_LOG(1, "Attaching to %s.", pp->name);
 
 	if (sanity) {
 		if (pp->sectorsize > VDEV_PAD_SIZE || !ISP2(pp->sectorsize)) {
 			ZFS_LOG(1, "Failing attach of %s. "
 			    "Incompatible sectorsize %d\n",
 			    pp->name, pp->sectorsize);
 			return (NULL);
 		} else if (pp->mediasize < SPA_MINDEVSIZE) {
 			ZFS_LOG(1, "Failing attach of %s. "
 			    "Incompatible mediasize %ju\n",
 			    pp->name, pp->mediasize);
 			return (NULL);
 		}
 	}
 
 	/* Do we have geom already? No? Create one. */
 	LIST_FOREACH(gp, &zfs_vdev_class.geom, geom) {
 		if (gp->flags & G_GEOM_WITHER)
 			continue;
 		if (strcmp(gp->name, "zfs::vdev") != 0)
 			continue;
 		break;
 	}
 	if (gp == NULL) {
 		gp = g_new_geomf(&zfs_vdev_class, "zfs::vdev");
 		gp->orphan = vdev_geom_orphan;
 		gp->attrchanged = vdev_geom_attrchanged;
 		gp->resize = vdev_geom_resize;
 		cp = g_new_consumer(gp);
 		error = g_attach(cp, pp);
 		if (error != 0) {
 			ZFS_LOG(1, "%s(%d): g_attach failed: %d\n", __func__,
 			    __LINE__, error);
 			vdev_geom_detach(cp, B_FALSE);
 			return (NULL);
 		}
 		error = g_access(cp, 1, 0, 1);
 		if (error != 0) {
 			ZFS_LOG(1, "%s(%d): g_access failed: %d\n", __func__,
 			    __LINE__, error);
 			vdev_geom_detach(cp, B_FALSE);
 			return (NULL);
 		}
 		ZFS_LOG(1, "Created geom and consumer for %s.", pp->name);
 	} else {
 		/* Check if we are already connected to this provider. */
 		LIST_FOREACH(cp, &gp->consumer, consumer) {
 			if (cp->provider == pp) {
 				ZFS_LOG(1, "Found consumer for %s.", pp->name);
 				break;
 			}
 		}
 		if (cp == NULL) {
 			cp = g_new_consumer(gp);
 			error = g_attach(cp, pp);
 			if (error != 0) {
 				ZFS_LOG(1, "%s(%d): g_attach failed: %d\n",
 				    __func__, __LINE__, error);
 				vdev_geom_detach(cp, B_FALSE);
 				return (NULL);
 			}
 			error = g_access(cp, 1, 0, 1);
 			if (error != 0) {
 				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
 				    __func__, __LINE__, error);
 				vdev_geom_detach(cp, B_FALSE);
 				return (NULL);
 			}
 			ZFS_LOG(1, "Created consumer for %s.", pp->name);
 		} else {
 			error = g_access(cp, 1, 0, 1);
 			if (error != 0) {
 				ZFS_LOG(1, "%s(%d): g_access failed: %d\n",
 				    __func__, __LINE__, error);
 				return (NULL);
 			}
 			ZFS_LOG(1, "Used existing consumer for %s.", pp->name);
 		}
 	}
 
 	if (vd != NULL)
 		vd->vdev_tsd = cp;
 
 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
 	return (cp);
 }
 
 static void
 vdev_geom_detach(struct g_consumer *cp, boolean_t open_for_read)
 {
 	struct g_geom *gp;
 
 	g_topology_assert();
 
 	ZFS_LOG(1, "Detaching from %s.",
 	    cp->provider && cp->provider->name ? cp->provider->name : "NULL");
 
 	gp = cp->geom;
 	if (open_for_read)
 		g_access(cp, -1, 0, -1);
 	/* Destroy consumer on last close. */
 	if (cp->acr == 0 && cp->ace == 0) {
 		if (cp->acw > 0)
 			g_access(cp, 0, -cp->acw, 0);
 		if (cp->provider != NULL) {
 			ZFS_LOG(1, "Destroying consumer for %s.",
 			    cp->provider->name ? cp->provider->name : "NULL");
 			g_detach(cp);
 		}
 		g_destroy_consumer(cp);
 	}
 	/* Destroy geom if there are no consumers left. */
 	if (LIST_EMPTY(&gp->consumer)) {
 		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
 		g_wither_geom(gp, ENXIO);
 	}
 }
 
 static void
 vdev_geom_close_locked(vdev_t *vd)
 {
 	struct g_consumer *cp;
 	struct consumer_priv_t *priv;
 	struct consumer_vdev_elem *elem, *elem_temp;
 
 	g_topology_assert();
 
 	cp = vd->vdev_tsd;
 	vd->vdev_delayed_close = B_FALSE;
 	if (cp == NULL)
 		return;
 
 	ZFS_LOG(1, "Closing access to %s.", cp->provider->name);
 	KASSERT(cp->private != NULL, ("%s: cp->private is NULL", __func__));
 	priv = (struct consumer_priv_t *)&cp->private;
 	vd->vdev_tsd = NULL;
 	SLIST_FOREACH_SAFE(elem, priv, elems, elem_temp) {
 		if (elem->vd == vd) {
 			SLIST_REMOVE(priv, elem, consumer_vdev_elem, elems);
 			g_free(elem);
 		}
 	}
 
 	vdev_geom_detach(cp, B_TRUE);
 }
 
 /*
  * Issue one or more bios to the vdev in parallel
  * cmds, datas, offsets, errors, and sizes are arrays of length ncmds.  Each IO
  * operation is described by parallel entries from each array.  There may be
  * more bios actually issued than entries in the array
  */
 static void
 vdev_geom_io(struct g_consumer *cp, int *cmds, void **datas, off_t *offsets,
     off_t *sizes, int *errors, int ncmds)
 {
 	struct bio **bios;
 	uint8_t *p;
 	off_t off, maxio, s, end;
 	int i, n_bios, j;
 	size_t bios_size;
 
 	maxio = maxphys - (maxphys % cp->provider->sectorsize);
 	n_bios = 0;
 
 	/* How many bios are required for all commands ? */
 	for (i = 0; i < ncmds; i++)
 		n_bios += (sizes[i] + maxio - 1) / maxio;
 
 	/* Allocate memory for the bios */
 	bios_size = n_bios * sizeof (struct bio *);
 	bios = kmem_zalloc(bios_size, KM_SLEEP);
 
 	/* Prepare and issue all of the bios */
 	for (i = j = 0; i < ncmds; i++) {
 		off = offsets[i];
 		p = datas[i];
 		s = sizes[i];
 		end = off + s;
 		ASSERT0(off % cp->provider->sectorsize);
 		ASSERT0(s % cp->provider->sectorsize);
 
 		for (; off < end; off += maxio, p += maxio, s -= maxio, j++) {
 			bios[j] = g_alloc_bio();
 			bios[j]->bio_cmd = cmds[i];
 			bios[j]->bio_done = NULL;
 			bios[j]->bio_offset = off;
 			bios[j]->bio_length = MIN(s, maxio);
 			bios[j]->bio_data = (caddr_t)p;
 			g_io_request(bios[j], cp);
 		}
 	}
 	ASSERT3S(j, ==, n_bios);
 
 	/* Wait for all of the bios to complete, and clean them up */
 	for (i = j = 0; i < ncmds; i++) {
 		off = offsets[i];
 		s = sizes[i];
 		end = off + s;
 
 		for (; off < end; off += maxio, s -= maxio, j++) {
 			errors[i] = biowait(bios[j], "vdev_geom_io") ||
 			    errors[i];
 			g_destroy_bio(bios[j]);
 		}
 	}
 	kmem_free(bios, bios_size);
 }
 
 /*
  * Read the vdev config from a device.  Return the number of valid labels that
  * were found.  The vdev config will be returned in config if and only if at
  * least one valid label was found.
  */
 static int
 vdev_geom_read_config(struct g_consumer *cp, nvlist_t **configp)
 {
 	struct g_provider *pp;
 	nvlist_t *config;
 	vdev_phys_t *vdev_lists[VDEV_LABELS];
 	char *buf;
 	size_t buflen;
 	uint64_t psize, state, txg;
 	off_t offsets[VDEV_LABELS];
 	off_t size;
 	off_t sizes[VDEV_LABELS];
 	int cmds[VDEV_LABELS];
 	int errors[VDEV_LABELS];
 	int l, nlabels;
 
 	g_topology_assert_not();
 
 	pp = cp->provider;
 	ZFS_LOG(1, "Reading config from %s...", pp->name);
 
 	psize = pp->mediasize;
 	psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
 
 	size = sizeof (*vdev_lists[0]) + pp->sectorsize -
 	    ((sizeof (*vdev_lists[0]) - 1) % pp->sectorsize) - 1;
 
 	buflen = sizeof (vdev_lists[0]->vp_nvlist);
 
 	/* Create all of the IO requests */
 	for (l = 0; l < VDEV_LABELS; l++) {
 		cmds[l] = BIO_READ;
 		vdev_lists[l] = kmem_alloc(size, KM_SLEEP);
 		offsets[l] = vdev_label_offset(psize, l, 0) + VDEV_SKIP_SIZE;
 		sizes[l] = size;
 		errors[l] = 0;
 		ASSERT0(offsets[l] % pp->sectorsize);
 	}
 
 	/* Issue the IO requests */
 	vdev_geom_io(cp, cmds, (void**)vdev_lists, offsets, sizes, errors,
 	    VDEV_LABELS);
 
 	/* Parse the labels */
 	config = *configp = NULL;
 	nlabels = 0;
 	for (l = 0; l < VDEV_LABELS; l++) {
 		if (errors[l] != 0)
 			continue;
 
 		buf = vdev_lists[l]->vp_nvlist;
 
 		if (nvlist_unpack(buf, buflen, &config, 0) != 0)
 			continue;
 
 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
 		    &state) != 0 || state > POOL_STATE_L2CACHE) {
 			nvlist_free(config);
 			continue;
 		}
 
 		if (state != POOL_STATE_SPARE &&
 		    state != POOL_STATE_L2CACHE &&
 		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
 		    &txg) != 0 || txg == 0)) {
 			nvlist_free(config);
 			continue;
 		}
 
 		if (*configp != NULL)
 			nvlist_free(*configp);
 		*configp = config;
 		nlabels++;
 	}
 
 	/* Free the label storage */
 	for (l = 0; l < VDEV_LABELS; l++)
 		kmem_free(vdev_lists[l], size);
 
 	return (nlabels);
 }
 
 static void
 resize_configs(nvlist_t ***configs, uint64_t *count, uint64_t id)
 {
 	nvlist_t **new_configs;
 	uint64_t i;
 
 	if (id < *count)
 		return;
 	new_configs = kmem_zalloc((id + 1) * sizeof (nvlist_t *),
 	    KM_SLEEP);
 	for (i = 0; i < *count; i++)
 		new_configs[i] = (*configs)[i];
 	if (*configs != NULL)
 		kmem_free(*configs, *count * sizeof (void *));
 	*configs = new_configs;
 	*count = id + 1;
 }
 
 static void
 process_vdev_config(nvlist_t ***configs, uint64_t *count, nvlist_t *cfg,
     const char *name, uint64_t *known_pool_guid)
 {
 	nvlist_t *vdev_tree;
 	uint64_t pool_guid;
 	uint64_t vdev_guid;
 	uint64_t id, txg, known_txg;
 	char *pname;
 
 	if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &pname) != 0 ||
 	    strcmp(pname, name) != 0)
 		goto ignore;
 
 	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pool_guid) != 0)
 		goto ignore;
 
 	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_TOP_GUID, &vdev_guid) != 0)
 		goto ignore;
 
 	if (nvlist_lookup_nvlist(cfg, ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0)
 		goto ignore;
 
 	if (nvlist_lookup_uint64(vdev_tree, ZPOOL_CONFIG_ID, &id) != 0)
 		goto ignore;
 
 	txg = fnvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_TXG);
 
 	if (*known_pool_guid != 0) {
 		if (pool_guid != *known_pool_guid)
 			goto ignore;
 	} else
 		*known_pool_guid = pool_guid;
 
 	resize_configs(configs, count, id);
 
 	if ((*configs)[id] != NULL) {
 		known_txg = fnvlist_lookup_uint64((*configs)[id],
 		    ZPOOL_CONFIG_POOL_TXG);
 		if (txg <= known_txg)
 			goto ignore;
 		nvlist_free((*configs)[id]);
 	}
 
 	(*configs)[id] = cfg;
 	return;
 
 ignore:
 	nvlist_free(cfg);
 }
 
 int
 vdev_geom_read_pool_label(const char *name,
     nvlist_t ***configs, uint64_t *count)
 {
 	struct g_class *mp;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	struct g_consumer *zcp;
 	nvlist_t *vdev_cfg;
 	uint64_t pool_guid;
 	int nlabels;
 
 	DROP_GIANT();
 	g_topology_lock();
 
 	*configs = NULL;
 	*count = 0;
 	pool_guid = 0;
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mp == &zfs_vdev_class)
 			continue;
 		LIST_FOREACH(gp, &mp->geom, geom) {
 			if (gp->flags & G_GEOM_WITHER)
 				continue;
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				if (pp->flags & G_PF_WITHER)
 					continue;
 				zcp = vdev_geom_attach(pp, NULL, B_TRUE);
 				if (zcp == NULL)
 					continue;
 				g_topology_unlock();
 				nlabels = vdev_geom_read_config(zcp, &vdev_cfg);
 				g_topology_lock();
 				vdev_geom_detach(zcp, B_TRUE);
 				if (nlabels == 0)
 					continue;
 				ZFS_LOG(1, "successfully read vdev config");
 
 				process_vdev_config(configs, count,
 				    vdev_cfg, name, &pool_guid);
 			}
 		}
 	}
 	g_topology_unlock();
 	PICKUP_GIANT();
 
 	return (*count > 0 ? 0 : ENOENT);
 }
 
 enum match {
 	NO_MATCH = 0,		/* No matching labels found */
 	TOPGUID_MATCH = 1,	/* Labels match top guid, not vdev guid */
 	ZERO_MATCH = 1,		/* Should never be returned */
 	ONE_MATCH = 2,		/* 1 label matching the vdev_guid */
 	TWO_MATCH = 3,		/* 2 label matching the vdev_guid */
 	THREE_MATCH = 4,	/* 3 label matching the vdev_guid */
 	FULL_MATCH = 5		/* all labels match the vdev_guid */
 };
 
 static enum match
 vdev_attach_ok(vdev_t *vd, struct g_provider *pp)
 {
 	nvlist_t *config;
 	uint64_t pool_guid, top_guid, vdev_guid;
 	struct g_consumer *cp;
 	int nlabels;
 
 	cp = vdev_geom_attach(pp, NULL, B_TRUE);
 	if (cp == NULL) {
 		ZFS_LOG(1, "Unable to attach tasting instance to %s.",
 		    pp->name);
 		return (NO_MATCH);
 	}
 	g_topology_unlock();
 	nlabels = vdev_geom_read_config(cp, &config);
 	g_topology_lock();
 	vdev_geom_detach(cp, B_TRUE);
 	if (nlabels == 0) {
 		ZFS_LOG(1, "Unable to read config from %s.", pp->name);
 		return (NO_MATCH);
 	}
 
 	pool_guid = 0;
 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid);
 	top_guid = 0;
 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TOP_GUID, &top_guid);
 	vdev_guid = 0;
 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
 	nvlist_free(config);
 
 	/*
 	 * Check that the label's pool guid matches the desired guid.
 	 * Inactive spares and L2ARCs do not have any pool guid in the label.
 	 */
 	if (pool_guid != 0 && pool_guid != spa_guid(vd->vdev_spa)) {
 		ZFS_LOG(1, "pool guid mismatch for provider %s: %ju != %ju.",
 		    pp->name,
 		    (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)pool_guid);
 		return (NO_MATCH);
 	}
 
 	/*
 	 * Check that the label's vdev guid matches the desired guid.
 	 * The second condition handles possible race on vdev detach, when
 	 * remaining vdev receives GUID of destroyed top level mirror vdev.
 	 */
 	if (vdev_guid == vd->vdev_guid) {
 		ZFS_LOG(1, "guids match for provider %s.", pp->name);
 		return (ZERO_MATCH + nlabels);
 	} else if (top_guid == vd->vdev_guid && vd == vd->vdev_top) {
 		ZFS_LOG(1, "top vdev guid match for provider %s.", pp->name);
 		return (TOPGUID_MATCH);
 	}
 	ZFS_LOG(1, "vdev guid mismatch for provider %s: %ju != %ju.",
 	    pp->name, (uintmax_t)vd->vdev_guid, (uintmax_t)vdev_guid);
 	return (NO_MATCH);
 }
 
 static struct g_consumer *
 vdev_geom_attach_by_guids(vdev_t *vd)
 {
 	struct g_class *mp;
 	struct g_geom *gp;
 	struct g_provider *pp, *best_pp;
 	struct g_consumer *cp;
 	const char *vdpath;
 	enum match match, best_match;
 
 	g_topology_assert();
 
 	vdpath = vd->vdev_path + sizeof ("/dev/") - 1;
 	cp = NULL;
 	best_pp = NULL;
 	best_match = NO_MATCH;
 	LIST_FOREACH(mp, &g_classes, class) {
 		if (mp == &zfs_vdev_class)
 			continue;
 		LIST_FOREACH(gp, &mp->geom, geom) {
 			if (gp->flags & G_GEOM_WITHER)
 				continue;
 			LIST_FOREACH(pp, &gp->provider, provider) {
 				match = vdev_attach_ok(vd, pp);
 				if (match > best_match) {
 					best_match = match;
 					best_pp = pp;
 				} else if (match == best_match) {
 					if (strcmp(pp->name, vdpath) == 0) {
 						best_pp = pp;
 					}
 				}
 				if (match == FULL_MATCH)
 					goto out;
 			}
 		}
 	}
 
 out:
 	if (best_pp) {
 		cp = vdev_geom_attach(best_pp, vd, B_TRUE);
 		if (cp == NULL) {
 			printf("ZFS WARNING: Unable to attach to %s.\n",
 			    best_pp->name);
 		}
 	}
 	return (cp);
 }
 
 static struct g_consumer *
 vdev_geom_open_by_guids(vdev_t *vd)
 {
 	struct g_consumer *cp;
 	char *buf;
 	size_t len;
 
 	g_topology_assert();
 
 	ZFS_LOG(1, "Searching by guids [%ju:%ju].",
 	    (uintmax_t)spa_guid(vd->vdev_spa), (uintmax_t)vd->vdev_guid);
 	cp = vdev_geom_attach_by_guids(vd);
 	if (cp != NULL) {
 		len = strlen(cp->provider->name) + strlen("/dev/") + 1;
 		buf = kmem_alloc(len, KM_SLEEP);
 
 		snprintf(buf, len, "/dev/%s", cp->provider->name);
 		spa_strfree(vd->vdev_path);
 		vd->vdev_path = buf;
 
 		ZFS_LOG(1, "Attach by guid [%ju:%ju] succeeded, provider %s.",
 		    (uintmax_t)spa_guid(vd->vdev_spa),
 		    (uintmax_t)vd->vdev_guid, cp->provider->name);
 	} else {
 		ZFS_LOG(1, "Search by guid [%ju:%ju] failed.",
 		    (uintmax_t)spa_guid(vd->vdev_spa),
 		    (uintmax_t)vd->vdev_guid);
 	}
 
 	return (cp);
 }
 
 static struct g_consumer *
 vdev_geom_open_by_path(vdev_t *vd, int check_guid)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 
 	g_topology_assert();
 
 	cp = NULL;
 	pp = g_provider_by_name(vd->vdev_path + sizeof ("/dev/") - 1);
 	if (pp != NULL) {
 		ZFS_LOG(1, "Found provider by name %s.", vd->vdev_path);
 		if (!check_guid || vdev_attach_ok(vd, pp) == FULL_MATCH)
 			cp = vdev_geom_attach(pp, vd, B_FALSE);
 	}
 
 	return (cp);
 }
 
 static int
 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
     uint64_t *logical_ashift, uint64_t *physical_ashift)
 {
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	int error, has_trim;
 	uint16_t rate;
 
 	/*
 	 * Set the TLS to indicate downstack that we
 	 * should not access zvols
 	 */
 	VERIFY0(tsd_set(zfs_geom_probe_vdev_key, vd));
 
 	/*
 	 * We must have a pathname, and it must be absolute.
 	 */
 	if (vd->vdev_path == NULL || strncmp(vd->vdev_path, "/dev/", 5) != 0) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
 		return (EINVAL);
 	}
 
 	/*
 	 * Reopen the device if it's not currently open. Otherwise,
 	 * just update the physical size of the device.
 	 */
 	if ((cp = vd->vdev_tsd) != NULL) {
 		ASSERT(vd->vdev_reopening);
 		goto skip_open;
 	}
 
 	DROP_GIANT();
 	g_topology_lock();
 	error = 0;
 
 	if (vd->vdev_spa->spa_is_splitting ||
 	    ((vd->vdev_prevstate == VDEV_STATE_UNKNOWN &&
 	    (vd->vdev_spa->spa_load_state == SPA_LOAD_NONE ||
 	    vd->vdev_spa->spa_load_state == SPA_LOAD_CREATE)))) {
 		/*
 		 * We are dealing with a vdev that hasn't been previously
 		 * opened (since boot), and we are not loading an
 		 * existing pool configuration.  This looks like a
 		 * vdev add operation to a new or existing pool.
 		 * Assume the user really wants to do this, and find
 		 * GEOM provider by its name, ignoring GUID mismatches.
 		 *
 		 * XXPOLICY: It would be safer to only allow a device
 		 *           that is unlabeled or labeled but missing
 		 *           GUID information to be opened in this fashion,
 		 *           unless we are doing a split, in which case we
 		 *           should allow any guid.
 		 */
 		cp = vdev_geom_open_by_path(vd, 0);
 	} else {
 		/*
 		 * Try using the recorded path for this device, but only
 		 * accept it if its label data contains the expected GUIDs.
 		 */
 		cp = vdev_geom_open_by_path(vd, 1);
 		if (cp == NULL) {
 			/*
 			 * The device at vd->vdev_path doesn't have the
 			 * expected GUIDs. The disks might have merely
 			 * moved around so try all other GEOM providers
 			 * to find one with the right GUIDs.
 			 */
 			cp = vdev_geom_open_by_guids(vd);
 		}
 	}
 
 	/* Clear the TLS now that tasting is done */
 	VERIFY0(tsd_set(zfs_geom_probe_vdev_key, NULL));
 
 	if (cp == NULL) {
 		ZFS_LOG(1, "Vdev %s not found.", vd->vdev_path);
 		error = ENOENT;
 	} else {
 		struct consumer_priv_t *priv;
 		struct consumer_vdev_elem *elem;
 		int spamode;
 
 		priv = (struct consumer_priv_t *)&cp->private;
 		if (cp->private == NULL)
 			SLIST_INIT(priv);
 		elem = g_malloc(sizeof (*elem), M_WAITOK|M_ZERO);
 		elem->vd = vd;
 		SLIST_INSERT_HEAD(priv, elem, elems);
 
 		spamode = spa_mode(vd->vdev_spa);
 		if (cp->provider->sectorsize > VDEV_PAD_SIZE ||
 		    !ISP2(cp->provider->sectorsize)) {
 			ZFS_LOG(1, "Provider %s has unsupported sectorsize.",
 			    cp->provider->name);
 
 			vdev_geom_close_locked(vd);
 			error = EINVAL;
 			cp = NULL;
 		} else if (cp->acw == 0 && (spamode & FWRITE) != 0) {
 			int i;
 
 			for (i = 0; i < 5; i++) {
 				error = g_access(cp, 0, 1, 0);
 				if (error == 0)
 					break;
 				g_topology_unlock();
 				tsleep(vd, 0, "vdev", hz / 2);
 				g_topology_lock();
 			}
 			if (error != 0) {
 				printf("ZFS WARNING: Unable to open %s for "
 				    "writing (error=%d).\n",
 				    cp->provider->name, error);
 				vdev_geom_close_locked(vd);
 				cp = NULL;
 			}
 		}
 	}
 
 	/* Fetch initial physical path information for this device. */
 	if (cp != NULL) {
 		vdev_geom_attrchanged(cp, "GEOM::physpath");
 
 		/* Set other GEOM characteristics */
 		vdev_geom_set_physpath(vd, cp, /* do_null_update */B_FALSE);
 	}
 
 	g_topology_unlock();
 	PICKUP_GIANT();
 	if (cp == NULL) {
 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 		vdev_dbgmsg(vd, "vdev_geom_open: failed to open [error=%d]",
 		    error);
 		return (error);
 	}
 skip_open:
 	pp = cp->provider;
 
 	/*
 	 * Determine the actual size of the device.
 	 */
 	*max_psize = *psize = pp->mediasize;
 
 	/*
 	 * Determine the device's minimum transfer size and preferred
 	 * transfer size.
 	 */
 	*logical_ashift = highbit(MAX(pp->sectorsize, SPA_MINBLOCKSIZE)) - 1;
 	*physical_ashift = 0;
 	if (pp->stripesize && pp->stripesize > (1 << *logical_ashift) &&
 	    ISP2(pp->stripesize) && pp->stripesize <= (1 << ASHIFT_MAX) &&
 	    pp->stripeoffset == 0)
 		*physical_ashift = highbit(pp->stripesize) - 1;
 
 	/*
 	 * Clear the nowritecache settings, so that on a vdev_reopen()
 	 * we will try again.
 	 */
 	vd->vdev_nowritecache = B_FALSE;
 
 	/* Inform the ZIO pipeline that we are non-rotational. */
 	error = g_getattr("GEOM::rotation_rate", cp, &rate);
 	if (error == 0 && rate == DISK_RR_NON_ROTATING)
 		vd->vdev_nonrot = B_TRUE;
 	else
 		vd->vdev_nonrot = B_FALSE;
 
 	/* Set when device reports it supports TRIM. */
 	error = g_getattr("GEOM::candelete", cp, &has_trim);
 	vd->vdev_has_trim = (error == 0 && has_trim);
 
 	/* Set when device reports it supports secure TRIM. */
 	/* unavailable on FreeBSD */
 	vd->vdev_has_securetrim = B_FALSE;
 
 	return (0);
 }
 
 static void
 vdev_geom_close(vdev_t *vd)
 {
 	struct g_consumer *cp;
 	boolean_t locked;
 
 	cp = vd->vdev_tsd;
 
 	DROP_GIANT();
 	locked = g_topology_locked();
 	if (!locked)
 		g_topology_lock();
 
 	if (!vd->vdev_reopening ||
 	    (cp != NULL && ((cp->flags & G_CF_ORPHAN) != 0 ||
 	    (cp->provider != NULL && cp->provider->error != 0))))
 		vdev_geom_close_locked(vd);
 
 	if (!locked)
 		g_topology_unlock();
 	PICKUP_GIANT();
 }
 
 static void
 vdev_geom_io_intr(struct bio *bp)
 {
 	vdev_t *vd;
 	zio_t *zio;
 
 	zio = bp->bio_caller1;
 	vd = zio->io_vd;
 	zio->io_error = bp->bio_error;
 	if (zio->io_error == 0 && bp->bio_resid != 0)
 		zio->io_error = SET_ERROR(EIO);
 
 	switch (zio->io_error) {
 	case ENOTSUP:
 		/*
 		 * If we get ENOTSUP for BIO_FLUSH or BIO_DELETE we know
 		 * that future attempts will never succeed. In this case
 		 * we set a persistent flag so that we don't bother with
 		 * requests in the future.
 		 */
 		switch (bp->bio_cmd) {
 		case BIO_FLUSH:
 			vd->vdev_nowritecache = B_TRUE;
 			break;
 		case BIO_DELETE:
 			break;
 		}
 		break;
 	case ENXIO:
 		if (!vd->vdev_remove_wanted) {
 			/*
 			 * If provider's error is set we assume it is being
 			 * removed.
 			 */
 			if (bp->bio_to->error != 0) {
 				vd->vdev_remove_wanted = B_TRUE;
 				spa_async_request(zio->io_spa,
 				    SPA_ASYNC_REMOVE);
 			} else if (!vd->vdev_delayed_close) {
 				vd->vdev_delayed_close = B_TRUE;
 			}
 		}
 		break;
 	}
 
 	/*
 	 * We have to split bio freeing into two parts, because the ABD code
 	 * cannot be called in this context and vdev_op_io_done is not called
 	 * for ZIO_TYPE_IOCTL zio-s.
 	 */
 	if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) {
 		g_destroy_bio(bp);
 		zio->io_bio = NULL;
 	}
 	zio_delay_interrupt(zio);
 }
 
+struct vdev_geom_check_unmapped_cb_state {
+	int	pages;
+	uint_t	end;
+};
+
+/*
+ * Callback to check the ABD segment size/alignment and count the pages.
+ * GEOM requires data buffer to look virtually contiguous.  It means only
+ * the first page of the buffer may not start and only the last may not
+ * end on a page boundary.  All other physical pages must be full.
+ */
+static int
+vdev_geom_check_unmapped_cb(void *buf, size_t len, void *priv)
+{
+	struct vdev_geom_check_unmapped_cb_state *s = priv;
+	vm_offset_t off = (vm_offset_t)buf & PAGE_MASK;
+
+	if (s->pages != 0 && off != 0)
+		return (1);
+	if (s->end != 0)
+		return (1);
+	s->end = (off + len) & PAGE_MASK;
+	s->pages += (off + len + PAGE_MASK) >> PAGE_SHIFT;
+	return (0);
+}
+
+/*
+ * Check whether we can use unmapped I/O for this ZIO on this device to
+ * avoid data copying between scattered and/or gang ABD buffer and linear.
+ */
+static int
+vdev_geom_check_unmapped(zio_t *zio, struct g_consumer *cp)
+{
+	struct vdev_geom_check_unmapped_cb_state s;
+
+	/* If the buffer is already linear, then nothing to do here. */
+	if (abd_is_linear(zio->io_abd))
+		return (0);
+
+	/*
+	 * If unmapped I/O is not supported by the GEOM provider,
+	 * then we can't do anything and have to copy the data.
+	 */
+	if ((cp->provider->flags & G_PF_ACCEPT_UNMAPPED) == 0)
+		return (0);
+
+	/* Check the buffer chunks sizes/alignments and count pages. */
+	s.pages = s.end = 0;
+	if (abd_iterate_func(zio->io_abd, 0, zio->io_size,
+	    vdev_geom_check_unmapped_cb, &s))
+		return (0);
+	return (s.pages);
+}
+
+/*
+ * Callback to translate the ABD segment into array of physical pages.
+ */
+static int
+vdev_geom_fill_unmap_cb(void *buf, size_t len, void *priv)
+{
+	struct bio *bp = priv;
+	vm_offset_t addr = (vm_offset_t)buf;
+	vm_offset_t end = addr + len;
+
+	if (bp->bio_ma_n == 0)
+		bp->bio_ma_offset = addr & PAGE_MASK;
+	do {
+		bp->bio_ma[bp->bio_ma_n++] =
+		    PHYS_TO_VM_PAGE(pmap_kextract(addr));
+		addr += PAGE_SIZE;
+	} while (addr < end);
+	return (0);
+}
+
 static void
 vdev_geom_io_start(zio_t *zio)
 {
 	vdev_t *vd;
 	struct g_consumer *cp;
 	struct bio *bp;
 
 	vd = zio->io_vd;
 
 	switch (zio->io_type) {
 	case ZIO_TYPE_IOCTL:
 		/* XXPOLICY */
 		if (!vdev_readable(vd)) {
 			zio->io_error = SET_ERROR(ENXIO);
 			zio_interrupt(zio);
 			return;
 		} else {
 			switch (zio->io_cmd) {
 			case DKIOCFLUSHWRITECACHE:
 				if (zfs_nocacheflush ||
 				    vdev_geom_bio_flush_disable)
 					break;
 				if (vd->vdev_nowritecache) {
 					zio->io_error = SET_ERROR(ENOTSUP);
 					break;
 				}
 				goto sendreq;
 			default:
 				zio->io_error = SET_ERROR(ENOTSUP);
 			}
 		}
 
 		zio_execute(zio);
 		return;
 	case ZIO_TYPE_TRIM:
 		if (!vdev_geom_bio_delete_disable) {
 			goto sendreq;
 		}
 		zio_execute(zio);
 		return;
 	default:
 			;
 		/* PASSTHROUGH --- placate compiler */
 	}
 sendreq:
 	ASSERT(zio->io_type == ZIO_TYPE_READ ||
 	    zio->io_type == ZIO_TYPE_WRITE ||
 	    zio->io_type == ZIO_TYPE_TRIM ||
 	    zio->io_type == ZIO_TYPE_IOCTL);
 
 	cp = vd->vdev_tsd;
 	if (cp == NULL) {
 		zio->io_error = SET_ERROR(ENXIO);
 		zio_interrupt(zio);
 		return;
 	}
 	bp = g_alloc_bio();
 	bp->bio_caller1 = zio;
 	switch (zio->io_type) {
 	case ZIO_TYPE_READ:
 	case ZIO_TYPE_WRITE:
 		zio->io_target_timestamp = zio_handle_io_delay(zio);
 		bp->bio_offset = zio->io_offset;
 		bp->bio_length = zio->io_size;
-		if (zio->io_type == ZIO_TYPE_READ) {
+		if (zio->io_type == ZIO_TYPE_READ)
 			bp->bio_cmd = BIO_READ;
-			bp->bio_data =
-			    abd_borrow_buf(zio->io_abd, zio->io_size);
-		} else {
+		else
 			bp->bio_cmd = BIO_WRITE;
-			bp->bio_data =
-			    abd_borrow_buf_copy(zio->io_abd, zio->io_size);
+
+		/*
+		 * If possible, represent scattered and/or gang ABD buffer to
+		 * GEOM as an array of physical pages.  It allows to satisfy
+		 * requirement of virtually contiguous buffer without copying.
+		 */
+		int pgs = vdev_geom_check_unmapped(zio, cp);
+		if (pgs > 0) {
+			bp->bio_ma = malloc(sizeof (struct vm_page *) * pgs,
+			    M_DEVBUF, M_WAITOK);
+			bp->bio_ma_n = 0;
+			bp->bio_ma_offset = 0;
+			abd_iterate_func(zio->io_abd, 0, zio->io_size,
+			    vdev_geom_fill_unmap_cb, bp);
+			bp->bio_data = unmapped_buf;
+			bp->bio_flags |= BIO_UNMAPPED;
+		} else {
+			if (zio->io_type == ZIO_TYPE_READ) {
+				bp->bio_data = abd_borrow_buf(zio->io_abd,
+				    zio->io_size);
+			} else {
+				bp->bio_data = abd_borrow_buf_copy(zio->io_abd,
+				    zio->io_size);
+			}
 		}
 		break;
 	case ZIO_TYPE_TRIM:
 		bp->bio_cmd = BIO_DELETE;
 		bp->bio_data = NULL;
 		bp->bio_offset = zio->io_offset;
 		bp->bio_length = zio->io_size;
 		break;
 	case ZIO_TYPE_IOCTL:
 		bp->bio_cmd = BIO_FLUSH;
 		bp->bio_data = NULL;
 		bp->bio_offset = cp->provider->mediasize;
 		bp->bio_length = 0;
 		break;
 	default:
 		panic("invalid zio->io_type: %d\n", zio->io_type);
 	}
 	bp->bio_done = vdev_geom_io_intr;
 	zio->io_bio = bp;
 
 	g_io_request(bp, cp);
 }
 
 static void
 vdev_geom_io_done(zio_t *zio)
 {
 	struct bio *bp = zio->io_bio;
 
 	if (zio->io_type != ZIO_TYPE_READ && zio->io_type != ZIO_TYPE_WRITE) {
 		ASSERT3P(bp, ==, NULL);
 		return;
 	}
 
 	if (bp == NULL) {
 		ASSERT3S(zio->io_error, ==, ENXIO);
 		return;
 	}
 
-	if (zio->io_type == ZIO_TYPE_READ)
-		abd_return_buf_copy(zio->io_abd, bp->bio_data, zio->io_size);
-	else
-		abd_return_buf(zio->io_abd, bp->bio_data, zio->io_size);
+	if (bp->bio_ma != NULL) {
+		free(bp->bio_ma, M_DEVBUF);
+	} else {
+		if (zio->io_type == ZIO_TYPE_READ) {
+			abd_return_buf_copy(zio->io_abd, bp->bio_data,
+			    zio->io_size);
+		} else {
+			abd_return_buf(zio->io_abd, bp->bio_data,
+			    zio->io_size);
+		}
+	}
 
 	g_destroy_bio(bp);
 	zio->io_bio = NULL;
 }
 
 static void
 vdev_geom_hold(vdev_t *vd)
 {
 }
 
 static void
 vdev_geom_rele(vdev_t *vd)
 {
 }
 
 vdev_ops_t vdev_disk_ops = {
 	.vdev_op_init = NULL,
 	.vdev_op_fini = NULL,
 	.vdev_op_open = vdev_geom_open,
 	.vdev_op_close = vdev_geom_close,
 	.vdev_op_asize = vdev_default_asize,
 	.vdev_op_min_asize = vdev_default_min_asize,
 	.vdev_op_min_alloc = NULL,
 	.vdev_op_io_start = vdev_geom_io_start,
 	.vdev_op_io_done = vdev_geom_io_done,
 	.vdev_op_state_change = NULL,
 	.vdev_op_need_resilver = NULL,
 	.vdev_op_hold = vdev_geom_hold,
 	.vdev_op_rele = vdev_geom_rele,
 	.vdev_op_remap = NULL,
 	.vdev_op_xlate = vdev_default_xlate,
 	.vdev_op_rebuild_asize = NULL,
 	.vdev_op_metaslab_init = NULL,
 	.vdev_op_config_generate = NULL,
 	.vdev_op_nparity = NULL,
 	.vdev_op_ndisks = NULL,
 	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
 };
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
index 908cff6810eb..a3d67aaa11ba 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_file_os.c
@@ -1,306 +1,299 @@
 /*
  * Copyright (c) 2020 iXsystems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/dmu.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_recv.h>
 #include <sys/dmu_tx.h>
 #include <sys/dbuf.h>
 #include <sys/dnode.h>
 #include <sys/zfs_context.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_synctask.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zap.h>
 #include <sys/zio_checksum.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_file.h>
 #include <sys/buf.h>
 #include <sys/stat.h>
 
 int
 zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
 {
 	struct thread *td;
 	int rc, fd;
 
 	td = curthread;
 	pwd_ensure_dirs();
 	/* 12.x doesn't take a const char * */
 	rc = kern_openat(td, AT_FDCWD, __DECONST(char *, path),
 	    UIO_SYSSPACE, flags, mode);
 	if (rc)
 		return (SET_ERROR(rc));
 	fd = td->td_retval[0];
 	td->td_retval[0] = 0;
 	if (fget(curthread, fd, &cap_no_rights, fpp))
 		kern_close(td, fd);
 	return (0);
 }
 
 void
 zfs_file_close(zfs_file_t *fp)
 {
 	fo_close(fp, curthread);
 }
 
 static int
 zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *offp,
     ssize_t *resid)
 {
 	ssize_t rc;
 	struct uio auio;
 	struct thread *td;
 	struct iovec aiov;
 
 	td = curthread;
 	aiov.iov_base = (void *)(uintptr_t)buf;
 	aiov.iov_len = count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_resid = count;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_td = td;
 	auio.uio_offset = *offp;
 
 	if ((fp->f_flag & FWRITE) == 0)
 		return (SET_ERROR(EBADF));
 
 	if (fp->f_type == DTYPE_VNODE)
 		bwillwrite();
 
 	rc = fo_write(fp, &auio, td->td_ucred, FOF_OFFSET, td);
 	if (rc)
 		return (SET_ERROR(rc));
 	if (resid)
 		*resid = auio.uio_resid;
 	else if (auio.uio_resid)
 		return (SET_ERROR(EIO));
 	*offp += count - auio.uio_resid;
 	return (rc);
 }
 
 int
 zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
 {
 	loff_t off = fp->f_offset;
 	ssize_t rc;
 
 	rc = zfs_file_write_impl(fp, buf, count, &off, resid);
 	if (rc == 0)
 		fp->f_offset = off;
 
 	return (SET_ERROR(rc));
 }
 
 int
 zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
     ssize_t *resid)
 {
 	return (zfs_file_write_impl(fp, buf, count, &off, resid));
 }
 
 static int
 zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *offp,
     ssize_t *resid)
 {
 	ssize_t rc;
 	struct uio auio;
 	struct thread *td;
 	struct iovec aiov;
 
 	td = curthread;
 	aiov.iov_base = (void *)(uintptr_t)buf;
 	aiov.iov_len = count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_resid = count;
 	auio.uio_rw = UIO_READ;
 	auio.uio_td = td;
 	auio.uio_offset = *offp;
 
 	if ((fp->f_flag & FREAD) == 0)
 		return (SET_ERROR(EBADF));
 
 	rc = fo_read(fp, &auio, td->td_ucred, FOF_OFFSET, td);
 	if (rc)
 		return (SET_ERROR(rc));
 	if (resid)
 		*resid = auio.uio_resid;
 	*offp += count - auio.uio_resid;
 	return (SET_ERROR(0));
 }
 
 int
 zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
 {
 	loff_t off = fp->f_offset;
 	ssize_t rc;
 
 	rc = zfs_file_read_impl(fp, buf, count, &off, resid);
 	if (rc == 0)
 		fp->f_offset = off;
 	return (rc);
 }
 
 int
 zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
     ssize_t *resid)
 {
 	return (zfs_file_read_impl(fp, buf, count, &off, resid));
 }
 
 int
 zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
 {
 	int rc;
 	struct thread *td;
 
 	td = curthread;
 	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0)
 		return (SET_ERROR(ESPIPE));
 	rc = fo_seek(fp, *offp, whence, td);
 	if (rc == 0)
 		*offp = td->td_uretoff.tdu_off;
 	return (SET_ERROR(rc));
 }
 
 int
 zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
 {
 	struct thread *td;
 	struct stat sb;
 	int rc;
 
 	td = curthread;
 
 	rc = fo_stat(fp, &sb, td->td_ucred, td);
 	if (rc)
 		return (SET_ERROR(rc));
 	zfattr->zfa_size = sb.st_size;
 	zfattr->zfa_mode = sb.st_mode;
 
 	return (0);
 }
 
 static __inline int
 zfs_vop_fsync(vnode_t *vp)
 {
 	struct mount *mp;
 	int error;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto drop;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	error = VOP_FSYNC(vp, MNT_WAIT, curthread);
 	VOP_UNLOCK1(vp);
 	vn_finished_write(mp);
 drop:
 	return (SET_ERROR(error));
 }
 
 int
 zfs_file_fsync(zfs_file_t *fp, int flags)
 {
 	if (fp->f_type != DTYPE_VNODE)
 		return (EINVAL);
 
 	return (zfs_vop_fsync(fp->f_vnode));
 }
 
-int
-zfs_file_get(int fd, zfs_file_t **fpp)
+zfs_file_t *
+zfs_file_get(int fd)
 {
 	struct file *fp;
 
 	if (fget(curthread, fd, &cap_no_rights, &fp))
-		return (SET_ERROR(EBADF));
+		return (NULL);
 
-	*fpp = fp;
-	return (0);
+	return (fp);
 }
 
 void
-zfs_file_put(int fd)
+zfs_file_put(zfs_file_t *fp)
 {
-	struct file *fp;
-
-	/* No CAP_ rights required, as we're only releasing. */
-	if (fget(curthread, fd, &cap_no_rights, &fp) == 0) {
-		fdrop(fp, curthread);
-		fdrop(fp, curthread);
-	}
+	fdrop(fp, curthread);
 }
 
 loff_t
 zfs_file_off(zfs_file_t *fp)
 {
 	return (fp->f_offset);
 }
 
 void *
 zfs_file_private(zfs_file_t *fp)
 {
 	file_t *tmpfp;
 	void *data;
 	int error;
 
 	tmpfp = curthread->td_fpop;
 	curthread->td_fpop = fp;
 	error = devfs_get_cdevpriv(&data);
 	curthread->td_fpop = tmpfp;
 	if (error != 0)
 		return (NULL);
 	return (data);
 }
 
 int
 zfs_file_unlink(const char *fnamep)
 {
 	zfs_uio_seg_t seg = UIO_SYSSPACE;
 	int rc;
 
 #if __FreeBSD_version >= 1300018
 	rc = kern_funlinkat(curthread, AT_FDCWD, fnamep, FD_NONE, seg, 0, 0);
 #elif __FreeBSD_version >= 1202504 || defined(AT_BENEATH)
 	rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep),
 	    seg, 0, 0);
 #else
 	rc = kern_unlinkat(curthread, AT_FDCWD, __DECONST(char *, fnamep),
 	    seg, 0);
 #endif
 	return (SET_ERROR(rc));
 }
diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
index 35e647375d9d..e12f7c3ced43 100644
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_file_os.c
@@ -1,442 +1,428 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 #include <sys/zfs_context.h>
 #include <sys/zfs_file.h>
 #include <sys/stat.h>
 #include <sys/file.h>
 #include <linux/falloc.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #ifdef HAVE_FDTABLE_HEADER
 #include <linux/fdtable.h>
 #endif
 
 /*
  * Open file
  *
  * path - fully qualified path to file
  * flags - file attributes O_READ / O_WRITE / O_EXCL
  * fpp - pointer to return file pointer
  *
  * Returns 0 on success underlying error on failure.
  */
 int
 zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
 {
 	struct file *filp;
 	int saved_umask;
 
 	if (!(flags & O_CREAT) && (flags & O_WRONLY))
 		flags |= O_EXCL;
 
 	if (flags & O_CREAT)
 		saved_umask = xchg(&current->fs->umask, 0);
 
 	filp = filp_open(path, flags, mode);
 
 	if (flags & O_CREAT)
 		(void) xchg(&current->fs->umask, saved_umask);
 
 	if (IS_ERR(filp))
 		return (-PTR_ERR(filp));
 
 	*fpp = filp;
 	return (0);
 }
 
 void
 zfs_file_close(zfs_file_t *fp)
 {
 	filp_close(fp, 0);
 }
 
 static ssize_t
 zfs_file_write_impl(zfs_file_t *fp, const void *buf, size_t count, loff_t *off)
 {
 #if defined(HAVE_KERNEL_WRITE_PPOS)
 	return (kernel_write(fp, buf, count, off));
 #else
 	mm_segment_t saved_fs;
 	ssize_t rc;
 
 	saved_fs = get_fs();
 	set_fs(KERNEL_DS);
 
 	rc = vfs_write(fp, (__force const char __user __user *)buf, count, off);
 
 	set_fs(saved_fs);
 
 	return (rc);
 #endif
 }
 
 /*
  * Stateful write - use os internal file pointer to determine where to
  * write and update on successful completion.
  *
  * fp -  pointer to file (pipe, socket, etc) to write to
  * buf - buffer to write
  * count - # of bytes to write
  * resid -  pointer to count of unwritten bytes  (if short write)
  *
  * Returns 0 on success errno on failure.
  */
 int
 zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
 {
 	loff_t off = fp->f_pos;
 	ssize_t rc;
 
 	rc = zfs_file_write_impl(fp, buf, count, &off);
 	if (rc < 0)
 		return (-rc);
 
 	fp->f_pos = off;
 
 	if (resid) {
 		*resid = count - rc;
 	} else if (rc != count) {
 		return (EIO);
 	}
 
 	return (0);
 }
 
 /*
  * Stateless write - os internal file pointer is not updated.
  *
  * fp -  pointer to file (pipe, socket, etc) to write to
  * buf - buffer to write
  * count - # of bytes to write
  * off - file offset to write to (only valid for seekable types)
  * resid -  pointer to count of unwritten bytes
  *
  * Returns 0 on success errno on failure.
  */
 int
 zfs_file_pwrite(zfs_file_t *fp, const void *buf, size_t count, loff_t off,
     ssize_t *resid)
 {
 	ssize_t rc;
 
 	rc  = zfs_file_write_impl(fp, buf, count, &off);
 	if (rc < 0)
 		return (-rc);
 
 	if (resid) {
 		*resid = count - rc;
 	} else if (rc != count) {
 		return (EIO);
 	}
 
 	return (0);
 }
 
 static ssize_t
 zfs_file_read_impl(zfs_file_t *fp, void *buf, size_t count, loff_t *off)
 {
 #if defined(HAVE_KERNEL_READ_PPOS)
 	return (kernel_read(fp, buf, count, off));
 #else
 	mm_segment_t saved_fs;
 	ssize_t rc;
 
 	saved_fs = get_fs();
 	set_fs(KERNEL_DS);
 
 	rc = vfs_read(fp, (void __user *)buf, count, off);
 	set_fs(saved_fs);
 
 	return (rc);
 #endif
 }
 
 /*
  * Stateful read - use os internal file pointer to determine where to
  * read and update on successful completion.
  *
  * fp -  pointer to file (pipe, socket, etc) to read from
  * buf - buffer to write
  * count - # of bytes to read
  * resid -  pointer to count of unread bytes (if short read)
  *
  * Returns 0 on success errno on failure.
  */
 int
 zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
 {
 	loff_t off = fp->f_pos;
 	ssize_t rc;
 
 	rc = zfs_file_read_impl(fp, buf, count, &off);
 	if (rc < 0)
 		return (-rc);
 
 	fp->f_pos = off;
 
 	if (resid) {
 		*resid = count - rc;
 	} else if (rc != count) {
 		return (EIO);
 	}
 
 	return (0);
 }
 
 /*
  * Stateless read - os internal file pointer is not updated.
  *
  * fp -  pointer to file (pipe, socket, etc) to read from
  * buf - buffer to write
  * count - # of bytes to write
  * off - file offset to read from (only valid for seekable types)
  * resid -  pointer to count of unwritten bytes (if short write)
  *
  * Returns 0 on success errno on failure.
  */
 int
 zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
     ssize_t *resid)
 {
 	ssize_t rc;
 
 	rc = zfs_file_read_impl(fp, buf, count, &off);
 	if (rc < 0)
 		return (-rc);
 
 	if (resid) {
 		*resid = count - rc;
 	} else if (rc != count) {
 		return (EIO);
 	}
 
 	return (0);
 }
 
 /*
  * lseek - set / get file pointer
  *
  * fp -  pointer to file (pipe, socket, etc) to read from
  * offp - value to seek to, returns current value plus passed offset
  * whence - see man pages for standard lseek whence values
  *
  * Returns 0 on success errno on failure (ESPIPE for non seekable types)
  */
 int
 zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
 {
 	loff_t rc;
 
 	if (*offp < 0 || *offp > MAXOFFSET_T)
 		return (EINVAL);
 
 	rc = vfs_llseek(fp, *offp, whence);
 	if (rc < 0)
 		return (-rc);
 
 	*offp = rc;
 
 	return (0);
 }
 
 /*
  * Get file attributes
  *
  * filp - file pointer
  * zfattr - pointer to file attr structure
  *
  * Currently only used for fetching size and file mode.
  *
  * Returns 0 on success or error code of underlying getattr call on failure.
  */
 int
 zfs_file_getattr(zfs_file_t *filp, zfs_file_attr_t *zfattr)
 {
 	struct kstat stat;
 	int rc;
 
 #if defined(HAVE_4ARGS_VFS_GETATTR)
 	rc = vfs_getattr(&filp->f_path, &stat, STATX_BASIC_STATS,
 	    AT_STATX_SYNC_AS_STAT);
 #elif defined(HAVE_2ARGS_VFS_GETATTR)
 	rc = vfs_getattr(&filp->f_path, &stat);
 #elif defined(HAVE_3ARGS_VFS_GETATTR)
 	rc = vfs_getattr(filp->f_path.mnt, filp->f_dentry, &stat);
 #else
 #error "No available vfs_getattr()"
 #endif
 	if (rc)
 		return (-rc);
 
 	zfattr->zfa_size = stat.size;
 	zfattr->zfa_mode = stat.mode;
 
 	return (0);
 }
 
 /*
  * Sync file to disk
  *
  * filp - file pointer
  * flags - O_SYNC and or O_DSYNC
  *
  * Returns 0 on success or error code of underlying sync call on failure.
  */
 int
 zfs_file_fsync(zfs_file_t *filp, int flags)
 {
 	int datasync = 0;
 	int error;
 	int fstrans;
 
 	if (flags & O_DSYNC)
 		datasync = 1;
 
 	/*
 	 * May enter XFS which generates a warning when PF_FSTRANS is set.
 	 * To avoid this the flag is cleared over vfs_sync() and then reset.
 	 */
 	fstrans = __spl_pf_fstrans_check();
 	if (fstrans)
 		current->flags &= ~(__SPL_PF_FSTRANS);
 
 	error = -vfs_fsync(filp, datasync);
 
 	if (fstrans)
 		current->flags |= __SPL_PF_FSTRANS;
 
 	return (error);
 }
 
 /*
  * fallocate - allocate or free space on disk
  *
  * fp - file pointer
  * mode (non-standard options for hole punching etc)
  * offset - offset to start allocating or freeing from
  * len - length to free / allocate
  *
  * OPTIONAL
  */
 int
 zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len)
 {
 	/*
 	 * May enter XFS which generates a warning when PF_FSTRANS is set.
 	 * To avoid this the flag is cleared over vfs_sync() and then reset.
 	 */
 	int fstrans = __spl_pf_fstrans_check();
 	if (fstrans)
 		current->flags &= ~(__SPL_PF_FSTRANS);
 
 	/*
 	 * When supported by the underlying file system preferentially
 	 * use the fallocate() callback to preallocate the space.
 	 */
 	int error = EOPNOTSUPP;
 	if (fp->f_op->fallocate)
 		error = fp->f_op->fallocate(fp, mode, offset, len);
 
 	if (fstrans)
 		current->flags |= __SPL_PF_FSTRANS;
 
 	return (error);
 }
 
 /*
  * Request current file pointer offset
  *
  * fp - pointer to file
  *
  * Returns current file offset.
  */
 loff_t
 zfs_file_off(zfs_file_t *fp)
 {
 	return (fp->f_pos);
 }
 
 /*
  * Request file pointer private data
  *
  * fp - pointer to file
  *
  * Returns pointer to file private data.
  */
 void *
 zfs_file_private(zfs_file_t *fp)
 {
 	return (fp->private_data);
 }
 
 /*
  * unlink file
  *
  * path - fully qualified file path
  *
  * Returns 0 on success.
  *
  * OPTIONAL
  */
 int
 zfs_file_unlink(const char *path)
 {
 	return (EOPNOTSUPP);
 }
 
 /*
  * Get reference to file pointer
  *
  * fd - input file descriptor
- * fpp - pointer to file pointer
  *
- * Returns 0 on success EBADF on failure.
+ * Returns pointer to file struct or NULL
  */
-int
-zfs_file_get(int fd, zfs_file_t **fpp)
+zfs_file_t *
+zfs_file_get(int fd)
 {
-	zfs_file_t *fp;
-
-	fp = fget(fd);
-	if (fp == NULL)
-		return (EBADF);
-
-	*fpp = fp;
-
-	return (0);
+	return (fget(fd));
 }
 
 /*
  * Drop reference to file pointer
  *
- * fd - input file descriptor
+ * fp - input file struct pointer
  */
 void
-zfs_file_put(int fd)
+zfs_file_put(zfs_file_t *fp)
 {
-	struct file *fp;
-
-	if ((fp = fget(fd)) != NULL) {
-		fput(fp);
-		fput(fp);
-	}
+	fput(fp);
 }
diff --git a/sys/contrib/openzfs/module/zfs/fm.c b/sys/contrib/openzfs/module/zfs/fm.c
index dff7d8ece4be..b8a1c7c8a5ca 100644
--- a/sys/contrib/openzfs/module/zfs/fm.c
+++ b/sys/contrib/openzfs/module/zfs/fm.c
@@ -1,1368 +1,1372 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
  * Fault Management Architecture (FMA) Resource and Protocol Support
  *
  * The routines contained herein provide services to support kernel subsystems
  * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
  *
  * Name-Value Pair Lists
  *
  * The embodiment of an FMA protocol element (event, fmri or authority) is a
  * name-value pair list (nvlist_t).  FMA-specific nvlist constructor and
  * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
  * to create an nvpair list using custom allocators.  Callers may choose to
  * allocate either from the kernel memory allocator, or from a preallocated
  * buffer, useful in constrained contexts like high-level interrupt routines.
  *
  * Protocol Event and FMRI Construction
  *
  * Convenience routines are provided to construct nvlist events according to
  * the FMA Event Protocol and Naming Schema specification for ereports and
  * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
  *
  * ENA Manipulation
  *
  * Routines to generate ENA formats 0, 1 and 2 are available as well as
  * routines to increment formats 1 and 2.  Individual fields within the
  * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
  * fm_ena_format_get() and fm_ena_gen_get().
  */
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/list.h>
 #include <sys/nvpair.h>
 #include <sys/cmn_err.h>
 #include <sys/sysmacros.h>
 #include <sys/sunddi.h>
 #include <sys/systeminfo.h>
 #include <sys/fm/util.h>
 #include <sys/fm/protocol.h>
 #include <sys/kstat.h>
 #include <sys/zfs_context.h>
 #ifdef _KERNEL
 #include <sys/atomic.h>
 #include <sys/condvar.h>
 #include <sys/zfs_ioctl.h>
 
 int zfs_zevent_len_max = 512;
 
 static int zevent_len_cur = 0;
 static int zevent_waiters = 0;
 static int zevent_flags = 0;
 
 /* Num events rate limited since the last time zfs_zevent_next() was called */
 static uint64_t ratelimit_dropped = 0;
 
 /*
  * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
  * posted.  The posted EIDs are monotonically increasing but not persistent.
  * They will be reset to the initial value (1) each time the kernel module is
  * loaded.
  */
 static uint64_t zevent_eid = 0;
 
 static kmutex_t zevent_lock;
 static list_t zevent_list;
 static kcondvar_t zevent_cv;
 #endif /* _KERNEL */
 
 
 /*
  * Common fault management kstats to record event generation failures
  */
 
 struct erpt_kstat {
 	kstat_named_t	erpt_dropped;		/* num erpts dropped on post */
 	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
 	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
 	kstat_named_t	payload_set_failed;	/* num payload set failures */
 	kstat_named_t	erpt_duplicates;	/* num duplicate erpts */
 };
 
 static struct erpt_kstat erpt_kstat_data = {
 	{ "erpt-dropped", KSTAT_DATA_UINT64 },
 	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
 	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
 	{ "payload-set-failed", KSTAT_DATA_UINT64 },
 	{ "erpt-duplicates", KSTAT_DATA_UINT64 }
 };
 
 kstat_t *fm_ksp;
 
 #ifdef _KERNEL
 
 static zevent_t *
 zfs_zevent_alloc(void)
 {
 	zevent_t *ev;
 
 	ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
 
 	list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
 	    offsetof(zfs_zevent_t, ze_node));
 	list_link_init(&ev->ev_node);
 
 	return (ev);
 }
 
 static void
 zfs_zevent_free(zevent_t *ev)
 {
 	/* Run provided cleanup callback */
 	ev->ev_cb(ev->ev_nvl, ev->ev_detector);
 
 	list_destroy(&ev->ev_ze_list);
 	kmem_free(ev, sizeof (zevent_t));
 }
 
 static void
 zfs_zevent_drain(zevent_t *ev)
 {
 	zfs_zevent_t *ze;
 
 	ASSERT(MUTEX_HELD(&zevent_lock));
 	list_remove(&zevent_list, ev);
 
 	/* Remove references to this event in all private file data */
 	while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
 		list_remove(&ev->ev_ze_list, ze);
 		ze->ze_zevent = NULL;
 		ze->ze_dropped++;
 	}
 
 	zfs_zevent_free(ev);
 }
 
 void
 zfs_zevent_drain_all(int *count)
 {
 	zevent_t *ev;
 
 	mutex_enter(&zevent_lock);
 	while ((ev = list_head(&zevent_list)) != NULL)
 		zfs_zevent_drain(ev);
 
 	*count = zevent_len_cur;
 	zevent_len_cur = 0;
 	mutex_exit(&zevent_lock);
 }
 
 /*
  * New zevents are inserted at the head.  If the maximum queue
  * length is exceeded a zevent will be drained from the tail.
  * As part of this any user space processes which currently have
  * a reference to this zevent_t in their private data will have
  * this reference set to NULL.
  */
 static void
 zfs_zevent_insert(zevent_t *ev)
 {
 	ASSERT(MUTEX_HELD(&zevent_lock));
 	list_insert_head(&zevent_list, ev);
 
 	if (zevent_len_cur >= zfs_zevent_len_max)
 		zfs_zevent_drain(list_tail(&zevent_list));
 	else
 		zevent_len_cur++;
 }
 
 /*
  * Post a zevent. The cb will be called when nvl and detector are no longer
  * needed, i.e.:
  * - An error happened and a zevent can't be posted. In this case, cb is called
  *   before zfs_zevent_post() returns.
  * - The event is being drained and freed.
  */
 int
 zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
 {
 	inode_timespec_t tv;
 	int64_t tv_array[2];
 	uint64_t eid;
 	size_t nvl_size = 0;
 	zevent_t *ev;
 	int error;
 
 	ASSERT(cb != NULL);
 
 	gethrestime(&tv);
 	tv_array[0] = tv.tv_sec;
 	tv_array[1] = tv.tv_nsec;
 
 	error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
 	if (error) {
 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
 		goto out;
 	}
 
 	eid = atomic_inc_64_nv(&zevent_eid);
 	error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
 	if (error) {
 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
 		goto out;
 	}
 
 	error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
 	if (error) {
 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
 		goto out;
 	}
 
 	if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
 		error = EOVERFLOW;
 		goto out;
 	}
 
 	ev = zfs_zevent_alloc();
 	if (ev == NULL) {
 		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
 		error = ENOMEM;
 		goto out;
 	}
 
 	ev->ev_nvl = nvl;
 	ev->ev_detector = detector;
 	ev->ev_cb = cb;
 	ev->ev_eid = eid;
 
 	mutex_enter(&zevent_lock);
 	zfs_zevent_insert(ev);
 	cv_broadcast(&zevent_cv);
 	mutex_exit(&zevent_lock);
 
 out:
 	if (error)
 		cb(nvl, detector);
 
 	return (error);
 }
 
 void
 zfs_zevent_track_duplicate(void)
 {
 	atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
 }
 
 static int
 zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
 {
 	*ze = zfsdev_get_state(minor, ZST_ZEVENT);
 	if (*ze == NULL)
 		return (SET_ERROR(EBADF));
 
 	return (0);
 }
 
-int
+zfs_file_t *
 zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
 {
-	int error;
+	zfs_file_t *fp = zfs_file_get(fd);
+	if (fp == NULL)
+		return (NULL);
 
-	error = zfsdev_getminor(fd, minorp);
+	int error = zfsdev_getminor(fp, minorp);
 	if (error == 0)
 		error = zfs_zevent_minor_to_state(*minorp, ze);
 
-	if (error)
-		zfs_zevent_fd_rele(fd);
+	if (error) {
+		zfs_zevent_fd_rele(fp);
+		fp = NULL;
+	}
 
-	return (error);
+	return (fp);
 }
 
 void
-zfs_zevent_fd_rele(int fd)
+zfs_zevent_fd_rele(zfs_file_t *fp)
 {
-	zfs_file_put(fd);
+	zfs_file_put(fp);
 }
 
 /*
  * Get the next zevent in the stream and place a copy in 'event'.  This
  * may fail with ENOMEM if the encoded nvlist size exceeds the passed
  * 'event_size'.  In this case the stream pointer is not advanced and
  * and 'event_size' is set to the minimum required buffer size.
  */
 int
 zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
     uint64_t *dropped)
 {
 	zevent_t *ev;
 	size_t size;
 	int error = 0;
 
 	mutex_enter(&zevent_lock);
 	if (ze->ze_zevent == NULL) {
 		/* New stream start at the beginning/tail */
 		ev = list_tail(&zevent_list);
 		if (ev == NULL) {
 			error = ENOENT;
 			goto out;
 		}
 	} else {
 		/*
 		 * Existing stream continue with the next element and remove
 		 * ourselves from the wait queue for the previous element
 		 */
 		ev = list_prev(&zevent_list, ze->ze_zevent);
 		if (ev == NULL) {
 			error = ENOENT;
 			goto out;
 		}
 	}
 
 	VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
 	if (size > *event_size) {
 		*event_size = size;
 		error = ENOMEM;
 		goto out;
 	}
 
 	if (ze->ze_zevent)
 		list_remove(&ze->ze_zevent->ev_ze_list, ze);
 
 	ze->ze_zevent = ev;
 	list_insert_head(&ev->ev_ze_list, ze);
 	(void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
 	*dropped = ze->ze_dropped;
 
 #ifdef _KERNEL
 	/* Include events dropped due to rate limiting */
 	*dropped += atomic_swap_64(&ratelimit_dropped, 0);
 #endif
 	ze->ze_dropped = 0;
 out:
 	mutex_exit(&zevent_lock);
 
 	return (error);
 }
 
 /*
  * Wait in an interruptible state for any new events.
  */
 int
 zfs_zevent_wait(zfs_zevent_t *ze)
 {
 	int error = EAGAIN;
 
 	mutex_enter(&zevent_lock);
 	zevent_waiters++;
 
 	while (error == EAGAIN) {
 		if (zevent_flags & ZEVENT_SHUTDOWN) {
 			error = SET_ERROR(ESHUTDOWN);
 			break;
 		}
 
 		error = cv_wait_sig(&zevent_cv, &zevent_lock);
 		if (signal_pending(current)) {
 			error = SET_ERROR(EINTR);
 			break;
 		} else if (!list_is_empty(&zevent_list)) {
 			error = 0;
 			continue;
 		} else {
 			error = EAGAIN;
 		}
 	}
 
 	zevent_waiters--;
 	mutex_exit(&zevent_lock);
 
 	return (error);
 }
 
 /*
  * The caller may seek to a specific EID by passing that EID.  If the EID
  * is still available in the posted list of events the cursor is positioned
  * there.  Otherwise ENOENT is returned and the cursor is not moved.
  *
  * There are two reserved EIDs which may be passed and will never fail.
  * ZEVENT_SEEK_START positions the cursor at the start of the list, and
  * ZEVENT_SEEK_END positions the cursor at the end of the list.
  */
 int
 zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
 {
 	zevent_t *ev;
 	int error = 0;
 
 	mutex_enter(&zevent_lock);
 
 	if (eid == ZEVENT_SEEK_START) {
 		if (ze->ze_zevent)
 			list_remove(&ze->ze_zevent->ev_ze_list, ze);
 
 		ze->ze_zevent = NULL;
 		goto out;
 	}
 
 	if (eid == ZEVENT_SEEK_END) {
 		if (ze->ze_zevent)
 			list_remove(&ze->ze_zevent->ev_ze_list, ze);
 
 		ev = list_head(&zevent_list);
 		if (ev) {
 			ze->ze_zevent = ev;
 			list_insert_head(&ev->ev_ze_list, ze);
 		} else {
 			ze->ze_zevent = NULL;
 		}
 
 		goto out;
 	}
 
 	for (ev = list_tail(&zevent_list); ev != NULL;
 	    ev = list_prev(&zevent_list, ev)) {
 		if (ev->ev_eid == eid) {
 			if (ze->ze_zevent)
 				list_remove(&ze->ze_zevent->ev_ze_list, ze);
 
 			ze->ze_zevent = ev;
 			list_insert_head(&ev->ev_ze_list, ze);
 			break;
 		}
 	}
 
 	if (ev == NULL)
 		error = ENOENT;
 
 out:
 	mutex_exit(&zevent_lock);
 
 	return (error);
 }
 
 void
 zfs_zevent_init(zfs_zevent_t **zep)
 {
 	zfs_zevent_t *ze;
 
 	ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
 	list_link_init(&ze->ze_node);
 }
 
 void
 zfs_zevent_destroy(zfs_zevent_t *ze)
 {
 	mutex_enter(&zevent_lock);
 	if (ze->ze_zevent)
 		list_remove(&ze->ze_zevent->ev_ze_list, ze);
 	mutex_exit(&zevent_lock);
 
 	kmem_free(ze, sizeof (zfs_zevent_t));
 }
 #endif /* _KERNEL */
 
 /*
  * Wrappers for FM nvlist allocators
  */
 /* ARGSUSED */
 static void *
 i_fm_alloc(nv_alloc_t *nva, size_t size)
 {
 	return (kmem_zalloc(size, KM_SLEEP));
 }
 
 /* ARGSUSED */
 static void
 i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
 {
 	kmem_free(buf, size);
 }
 
 const nv_alloc_ops_t fm_mem_alloc_ops = {
 	.nv_ao_init = NULL,
 	.nv_ao_fini = NULL,
 	.nv_ao_alloc = i_fm_alloc,
 	.nv_ao_free = i_fm_free,
 	.nv_ao_reset = NULL
 };
 
 /*
  * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
  * to the newly allocated nv_alloc_t structure is returned upon success or NULL
  * is returned to indicate that the nv_alloc structure could not be created.
  */
 nv_alloc_t *
 fm_nva_xcreate(char *buf, size_t bufsz)
 {
 	nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
 
 	if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
 		kmem_free(nvhdl, sizeof (nv_alloc_t));
 		return (NULL);
 	}
 
 	return (nvhdl);
 }
 
 /*
  * Destroy a previously allocated nv_alloc structure.  The fixed buffer
  * associated with nva must be freed by the caller.
  */
 void
 fm_nva_xdestroy(nv_alloc_t *nva)
 {
 	nv_alloc_fini(nva);
 	kmem_free(nva, sizeof (nv_alloc_t));
 }
 
 /*
  * Create a new nv list.  A pointer to a new nv list structure is returned
  * upon success or NULL is returned to indicate that the structure could
  * not be created.  The newly created nv list is created and managed by the
  * operations installed in nva.   If nva is NULL, the default FMA nva
  * operations are installed and used.
  *
  * When called from the kernel and nva == NULL, this function must be called
  * from passive kernel context with no locks held that can prevent a
  * sleeping memory allocation from occurring.  Otherwise, this function may
  * be called from other kernel contexts as long a valid nva created via
  * fm_nva_create() is supplied.
  */
 nvlist_t *
 fm_nvlist_create(nv_alloc_t *nva)
 {
 	int hdl_alloced = 0;
 	nvlist_t *nvl;
 	nv_alloc_t *nvhdl;
 
 	if (nva == NULL) {
 		nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
 
 		if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
 			kmem_free(nvhdl, sizeof (nv_alloc_t));
 			return (NULL);
 		}
 		hdl_alloced = 1;
 	} else {
 		nvhdl = nva;
 	}
 
 	if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
 		if (hdl_alloced) {
 			nv_alloc_fini(nvhdl);
 			kmem_free(nvhdl, sizeof (nv_alloc_t));
 		}
 		return (NULL);
 	}
 
 	return (nvl);
 }
 
 /*
  * Destroy a previously allocated nvlist structure.  flag indicates whether
  * or not the associated nva structure should be freed (FM_NVA_FREE) or
  * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
  * it to be re-used for future nvlist creation operations.
  */
 void
 fm_nvlist_destroy(nvlist_t *nvl, int flag)
 {
 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
 
 	nvlist_free(nvl);
 
 	if (nva != NULL) {
 		if (flag == FM_NVA_FREE)
 			fm_nva_xdestroy(nva);
 	}
 }
 
 int
 i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
 {
 	int nelem, ret = 0;
 	data_type_t type;
 
 	while (ret == 0 && name != NULL) {
 		type = va_arg(ap, data_type_t);
 		switch (type) {
 		case DATA_TYPE_BYTE:
 			ret = nvlist_add_byte(payload, name,
 			    va_arg(ap, uint_t));
 			break;
 		case DATA_TYPE_BYTE_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_byte_array(payload, name,
 			    va_arg(ap, uchar_t *), nelem);
 			break;
 		case DATA_TYPE_BOOLEAN_VALUE:
 			ret = nvlist_add_boolean_value(payload, name,
 			    va_arg(ap, boolean_t));
 			break;
 		case DATA_TYPE_BOOLEAN_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_boolean_array(payload, name,
 			    va_arg(ap, boolean_t *), nelem);
 			break;
 		case DATA_TYPE_INT8:
 			ret = nvlist_add_int8(payload, name,
 			    va_arg(ap, int));
 			break;
 		case DATA_TYPE_INT8_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_int8_array(payload, name,
 			    va_arg(ap, int8_t *), nelem);
 			break;
 		case DATA_TYPE_UINT8:
 			ret = nvlist_add_uint8(payload, name,
 			    va_arg(ap, uint_t));
 			break;
 		case DATA_TYPE_UINT8_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_uint8_array(payload, name,
 			    va_arg(ap, uint8_t *), nelem);
 			break;
 		case DATA_TYPE_INT16:
 			ret = nvlist_add_int16(payload, name,
 			    va_arg(ap, int));
 			break;
 		case DATA_TYPE_INT16_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_int16_array(payload, name,
 			    va_arg(ap, int16_t *), nelem);
 			break;
 		case DATA_TYPE_UINT16:
 			ret = nvlist_add_uint16(payload, name,
 			    va_arg(ap, uint_t));
 			break;
 		case DATA_TYPE_UINT16_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_uint16_array(payload, name,
 			    va_arg(ap, uint16_t *), nelem);
 			break;
 		case DATA_TYPE_INT32:
 			ret = nvlist_add_int32(payload, name,
 			    va_arg(ap, int32_t));
 			break;
 		case DATA_TYPE_INT32_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_int32_array(payload, name,
 			    va_arg(ap, int32_t *), nelem);
 			break;
 		case DATA_TYPE_UINT32:
 			ret = nvlist_add_uint32(payload, name,
 			    va_arg(ap, uint32_t));
 			break;
 		case DATA_TYPE_UINT32_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_uint32_array(payload, name,
 			    va_arg(ap, uint32_t *), nelem);
 			break;
 		case DATA_TYPE_INT64:
 			ret = nvlist_add_int64(payload, name,
 			    va_arg(ap, int64_t));
 			break;
 		case DATA_TYPE_INT64_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_int64_array(payload, name,
 			    va_arg(ap, int64_t *), nelem);
 			break;
 		case DATA_TYPE_UINT64:
 			ret = nvlist_add_uint64(payload, name,
 			    va_arg(ap, uint64_t));
 			break;
 		case DATA_TYPE_UINT64_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_uint64_array(payload, name,
 			    va_arg(ap, uint64_t *), nelem);
 			break;
 		case DATA_TYPE_STRING:
 			ret = nvlist_add_string(payload, name,
 			    va_arg(ap, char *));
 			break;
 		case DATA_TYPE_STRING_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_string_array(payload, name,
 			    va_arg(ap, char **), nelem);
 			break;
 		case DATA_TYPE_NVLIST:
 			ret = nvlist_add_nvlist(payload, name,
 			    va_arg(ap, nvlist_t *));
 			break;
 		case DATA_TYPE_NVLIST_ARRAY:
 			nelem = va_arg(ap, int);
 			ret = nvlist_add_nvlist_array(payload, name,
 			    va_arg(ap, nvlist_t **), nelem);
 			break;
 		default:
 			ret = EINVAL;
 		}
 
 		name = va_arg(ap, char *);
 	}
 	return (ret);
 }
 
 void
 fm_payload_set(nvlist_t *payload, ...)
 {
 	int ret;
 	const char *name;
 	va_list ap;
 
 	va_start(ap, payload);
 	name = va_arg(ap, char *);
 	ret = i_fm_payload_set(payload, name, ap);
 	va_end(ap);
 
 	if (ret)
 		atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
 }
 
 /*
  * Set-up and validate the members of an ereport event according to:
  *
  *	Member name		Type		Value
  *	====================================================
  *	class			string		ereport
  *	version			uint8_t		0
  *	ena			uint64_t	<ena>
  *	detector		nvlist_t	<detector>
  *	ereport-payload		nvlist_t	<var args>
  *
  * We don't actually add a 'version' member to the payload.  Really,
  * the version quoted to us by our caller is that of the category 1
  * "ereport" event class (and we require FM_EREPORT_VERS0) but
  * the payload version of the actual leaf class event under construction
  * may be something else.  Callers should supply a version in the varargs,
  * or (better) we could take two version arguments - one for the
  * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
  * for the leaf class.
  */
 void
 fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
     uint64_t ena, const nvlist_t *detector, ...)
 {
 	char ereport_class[FM_MAX_CLASS];
 	const char *name;
 	va_list ap;
 	int ret;
 
 	if (version != FM_EREPORT_VERS0) {
 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
 		return;
 	}
 
 	(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
 	    FM_EREPORT_CLASS, erpt_class);
 	if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
 		return;
 	}
 
 	if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
 	}
 
 	if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
 	    (nvlist_t *)detector) != 0) {
 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
 	}
 
 	va_start(ap, detector);
 	name = va_arg(ap, const char *);
 	ret = i_fm_payload_set(ereport, name, ap);
 	va_end(ap);
 
 	if (ret)
 		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
 }
 
 /*
  * Set-up and validate the members of an hc fmri according to;
  *
  *	Member name		Type		Value
  *	===================================================
  *	version			uint8_t		0
  *	auth			nvlist_t	<auth>
  *	hc-name			string		<name>
  *	hc-id			string		<id>
  *
  * Note that auth and hc-id are optional members.
  */
 
 #define	HC_MAXPAIRS	20
 #define	HC_MAXNAMELEN	50
 
 static int
 fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
 {
 	if (version != FM_HC_SCHEME_VERSION) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return (0);
 	}
 
 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
 	    nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return (0);
 	}
 
 	if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
 	    (nvlist_t *)auth) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return (0);
 	}
 
 	return (1);
 }
 
 void
 fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
     nvlist_t *snvl, int npairs, ...)
 {
 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
 	nvlist_t *pairs[HC_MAXPAIRS];
 	va_list ap;
 	int i;
 
 	if (!fm_fmri_hc_set_common(fmri, version, auth))
 		return;
 
 	npairs = MIN(npairs, HC_MAXPAIRS);
 
 	va_start(ap, npairs);
 	for (i = 0; i < npairs; i++) {
 		const char *name = va_arg(ap, const char *);
 		uint32_t id = va_arg(ap, uint32_t);
 		char idstr[11];
 
 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
 
 		pairs[i] = fm_nvlist_create(nva);
 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 		}
 	}
 	va_end(ap);
 
 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 
 	for (i = 0; i < npairs; i++)
 		fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
 
 	if (snvl != NULL) {
 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 		}
 	}
 }
 
 void
 fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
     nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
 {
 	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
 	nvlist_t *pairs[HC_MAXPAIRS];
 	nvlist_t **hcl;
 	uint_t n;
 	int i, j;
 	va_list ap;
 	char *hcname, *hcid;
 
 	if (!fm_fmri_hc_set_common(fmri, version, auth))
 		return;
 
 	/*
 	 * copy the bboard nvpairs to the pairs array
 	 */
 	if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
 	    != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	for (i = 0; i < n; i++) {
 		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
 		    &hcname) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 			return;
 		}
 		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 			return;
 		}
 
 		pairs[i] = fm_nvlist_create(nva);
 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
 			for (j = 0; j <= i; j++) {
 				if (pairs[j] != NULL)
 					fm_nvlist_destroy(pairs[j],
 					    FM_NVA_RETAIN);
 			}
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 			return;
 		}
 	}
 
 	/*
 	 * create the pairs from passed in pairs
 	 */
 	npairs = MIN(npairs, HC_MAXPAIRS);
 
 	va_start(ap, npairs);
 	for (i = n; i < npairs + n; i++) {
 		const char *name = va_arg(ap, const char *);
 		uint32_t id = va_arg(ap, uint32_t);
 		char idstr[11];
 		(void) snprintf(idstr, sizeof (idstr), "%u", id);
 		pairs[i] = fm_nvlist_create(nva);
 		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
 		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
 			for (j = 0; j <= i; j++) {
 				if (pairs[j] != NULL)
 					fm_nvlist_destroy(pairs[j],
 					    FM_NVA_RETAIN);
 			}
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 			return;
 		}
 	}
 	va_end(ap);
 
 	/*
 	 * Create the fmri hc list
 	 */
 	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
 	    npairs + n) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	for (i = 0; i < npairs + n; i++) {
 			fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
 	}
 
 	if (snvl != NULL) {
 		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 			return;
 		}
 	}
 }
 
 /*
  * Set-up and validate the members of an dev fmri according to:
  *
  *	Member name		Type		Value
  *	====================================================
  *	version			uint8_t		0
  *	auth			nvlist_t	<auth>
  *	devpath			string		<devpath>
  *	[devid]			string		<devid>
  *	[target-port-l0id]	string		<target-port-lun0-id>
  *
  * Note that auth and devid are optional members.
  */
 void
 fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
     const char *devpath, const char *devid, const char *tpl0)
 {
 	int err = 0;
 
 	if (version != DEV_SCHEME_VERSION0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
 	err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
 
 	if (auth != NULL) {
 		err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
 		    (nvlist_t *)auth);
 	}
 
 	err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
 
 	if (devid != NULL)
 		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
 
 	if (tpl0 != NULL)
 		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
 
 	if (err)
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 
 }
 
 /*
  * Set-up and validate the members of an cpu fmri according to:
  *
  *	Member name		Type		Value
  *	====================================================
  *	version			uint8_t		0
  *	auth			nvlist_t	<auth>
  *	cpuid			uint32_t	<cpu_id>
  *	cpumask			uint8_t		<cpu_mask>
  *	serial			uint64_t	<serial_id>
  *
  * Note that auth, cpumask, serial are optional members.
  *
  */
 void
 fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
     uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
 {
 	uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
 
 	if (version < CPU_SCHEME_VERSION1) {
 		atomic_inc_64(failedp);
 		return;
 	}
 
 	if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
 		atomic_inc_64(failedp);
 		return;
 	}
 
 	if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
 	    FM_FMRI_SCHEME_CPU) != 0) {
 		atomic_inc_64(failedp);
 		return;
 	}
 
 	if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
 	    (nvlist_t *)auth) != 0)
 		atomic_inc_64(failedp);
 
 	if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
 		atomic_inc_64(failedp);
 
 	if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
 	    *cpu_maskp) != 0)
 		atomic_inc_64(failedp);
 
 	if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
 	    FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
 			atomic_inc_64(failedp);
 }
 
 /*
  * Set-up and validate the members of a mem according to:
  *
  *	Member name		Type		Value
  *	====================================================
  *	version			uint8_t		0
  *	auth			nvlist_t	<auth>		[optional]
  *	unum			string		<unum>
  *	serial			string		<serial>	[optional*]
  *	offset			uint64_t	<offset>	[optional]
  *
  *	* serial is required if offset is present
  */
 void
 fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
     const char *unum, const char *serial, uint64_t offset)
 {
 	if (version != MEM_SCHEME_VERSION0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	if (!serial && (offset != (uint64_t)-1)) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	if (auth != NULL) {
 		if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
 		    (nvlist_t *)auth) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 		}
 	}
 
 	if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 	}
 
 	if (serial != NULL) {
 		if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
 		    (char **)&serial, 1) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 		}
 		if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
 		    FM_FMRI_MEM_OFFSET, offset) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 		}
 	}
 }
 
 void
 fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
     uint64_t vdev_guid)
 {
 	if (version != ZFS_SCHEME_VERSION0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 		return;
 	}
 
 	if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
 		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
 	}
 
 	if (vdev_guid != 0) {
 		if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
 			atomic_inc_64(
 			    &erpt_kstat_data.fmri_set_failed.value.ui64);
 		}
 	}
 }
 
 uint64_t
 fm_ena_increment(uint64_t ena)
 {
 	uint64_t new_ena;
 
 	switch (ENA_FORMAT(ena)) {
 	case FM_ENA_FMT1:
 		new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
 		break;
 	case FM_ENA_FMT2:
 		new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
 		break;
 	default:
 		new_ena = 0;
 	}
 
 	return (new_ena);
 }
 
 uint64_t
 fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
 {
 	uint64_t ena = 0;
 
 	switch (format) {
 	case FM_ENA_FMT1:
 		if (timestamp) {
 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
 			    ENA_FMT1_CPUID_MASK) |
 			    ((timestamp << ENA_FMT1_TIME_SHFT) &
 			    ENA_FMT1_TIME_MASK));
 		} else {
 			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
 			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
 			    ENA_FMT1_CPUID_MASK) |
 			    ((gethrtime() << ENA_FMT1_TIME_SHFT) &
 			    ENA_FMT1_TIME_MASK));
 		}
 		break;
 	case FM_ENA_FMT2:
 		ena = (uint64_t)((format & ENA_FORMAT_MASK) |
 		    ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
 		break;
 	default:
 		break;
 	}
 
 	return (ena);
 }
 
 uint64_t
 fm_ena_generate(uint64_t timestamp, uchar_t format)
 {
 	uint64_t ena;
 
 	kpreempt_disable();
 	ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
 	kpreempt_enable();
 
 	return (ena);
 }
 
 uint64_t
 fm_ena_generation_get(uint64_t ena)
 {
 	uint64_t gen;
 
 	switch (ENA_FORMAT(ena)) {
 	case FM_ENA_FMT1:
 		gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
 		break;
 	case FM_ENA_FMT2:
 		gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
 		break;
 	default:
 		gen = 0;
 		break;
 	}
 
 	return (gen);
 }
 
 uchar_t
 fm_ena_format_get(uint64_t ena)
 {
 
 	return (ENA_FORMAT(ena));
 }
 
 uint64_t
 fm_ena_id_get(uint64_t ena)
 {
 	uint64_t id;
 
 	switch (ENA_FORMAT(ena)) {
 	case FM_ENA_FMT1:
 		id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
 		break;
 	case FM_ENA_FMT2:
 		id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
 		break;
 	default:
 		id = 0;
 	}
 
 	return (id);
 }
 
 uint64_t
 fm_ena_time_get(uint64_t ena)
 {
 	uint64_t time;
 
 	switch (ENA_FORMAT(ena)) {
 	case FM_ENA_FMT1:
 		time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
 		break;
 	case FM_ENA_FMT2:
 		time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
 		break;
 	default:
 		time = 0;
 	}
 
 	return (time);
 }
 
 #ifdef _KERNEL
 /*
  * Helper function to increment ereport dropped count.  Used by the event
  * rate limiting code to give feedback to the user about how many events were
  * rate limited by including them in the 'dropped' count.
  */
 void
 fm_erpt_dropped_increment(void)
 {
 	atomic_inc_64(&ratelimit_dropped);
 }
 
 void
 fm_init(void)
 {
 	zevent_len_cur = 0;
 	zevent_flags = 0;
 
 	/* Initialize zevent allocation and generation kstats */
 	fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
 	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
 	    KSTAT_FLAG_VIRTUAL);
 
 	if (fm_ksp != NULL) {
 		fm_ksp->ks_data = &erpt_kstat_data;
 		kstat_install(fm_ksp);
 	} else {
 		cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
 	}
 
 	mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zevent_list, sizeof (zevent_t),
 	    offsetof(zevent_t, ev_node));
 	cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
 
 	zfs_ereport_init();
 }
 
 void
 fm_fini(void)
 {
 	int count;
 
 	zfs_ereport_fini();
 
 	zfs_zevent_drain_all(&count);
 
 	mutex_enter(&zevent_lock);
 	cv_broadcast(&zevent_cv);
 
 	zevent_flags |= ZEVENT_SHUTDOWN;
 	while (zevent_waiters > 0) {
 		mutex_exit(&zevent_lock);
 		schedule();
 		mutex_enter(&zevent_lock);
 	}
 	mutex_exit(&zevent_lock);
 
 	cv_destroy(&zevent_cv);
 	list_destroy(&zevent_list);
 	mutex_destroy(&zevent_lock);
 
 	if (fm_ksp != NULL) {
 		kstat_delete(fm_ksp);
 		fm_ksp = NULL;
 	}
 }
 #endif /* _KERNEL */
 
 ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW,
 	"Max event queue length");
diff --git a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
index 0d5536cf7cb0..96a021acbc95 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@@ -1,7778 +1,7775 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 Martin Matuska
  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Toomas Soome <tsoome@me.com>
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2019 Datto Inc.
  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
  * Copyright (c) 2019, Klara Inc.
  * Copyright (c) 2019, Allan Jude
  */
 
 /*
  * ZFS ioctls.
  *
  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
  *
  * There are two ways that we handle ioctls: the legacy way where almost
  * all of the logic is in the ioctl callback, and the new way where most
  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
  *
  * Non-legacy ioctls should be registered by calling
  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
  * from userland by lzc_ioctl().
  *
  * The registration arguments are as follows:
  *
  * const char *name
  *   The name of the ioctl.  This is used for history logging.  If the
  *   ioctl returns successfully (the callback returns 0), and allow_log
  *   is true, then a history log entry will be recorded with the input &
  *   output nvlists.  The log entry can be printed with "zpool history -i".
  *
  * zfs_ioc_t ioc
  *   The ioctl request number, which userland will pass to ioctl(2).
  *   We want newer versions of libzfs and libzfs_core to run against
  *   existing zfs kernel modules (i.e. a deferred reboot after an update).
  *   Therefore the ioctl numbers cannot change from release to release.
  *
  * zfs_secpolicy_func_t *secpolicy
  *   This function will be called before the zfs_ioc_func_t, to
  *   determine if this operation is permitted.  It should return EPERM
  *   on failure, and 0 on success.  Checks include determining if the
  *   dataset is visible in this zone, and if the user has either all
  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
  *   to do this operation on this dataset with "zfs allow".
  *
  * zfs_ioc_namecheck_t namecheck
  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
  *   name, a dataset name, or nothing.  If the name is not well-formed,
  *   the ioctl will fail and the callback will not be called.
  *   Therefore, the callback can assume that the name is well-formed
  *   (e.g. is null-terminated, doesn't have more than one '@' character,
  *   doesn't have invalid characters).
  *
  * zfs_ioc_poolcheck_t pool_check
  *   This specifies requirements on the pool state.  If the pool does
  *   not meet them (is suspended or is readonly), the ioctl will fail
  *   and the callback will not be called.  If any checks are specified
  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
  *   POOL_CHECK_READONLY).
  *
  * zfs_ioc_key_t *nvl_keys
  *  The list of expected/allowable innvl input keys. This list is used
  *  to validate the nvlist input to the ioctl.
  *
  * boolean_t smush_outnvlist
  *   If smush_outnvlist is true, then the output is presumed to be a
  *   list of errors, and it will be "smushed" down to fit into the
  *   caller's buffer, by removing some entries and replacing them with a
  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
  *   outnvlist does not fit into the userland-provided buffer, then the
  *   ioctl will fail with ENOMEM.
  *
  * zfs_ioc_func_t *func
  *   The callback function that will perform the operation.
  *
  *   The callback should return 0 on success, or an error number on
  *   failure.  If the function fails, the userland ioctl will return -1,
  *   and errno will be set to the callback's return value.  The callback
  *   will be called with the following arguments:
  *
  *   const char *name
  *     The name of the pool or dataset to operate on, from
  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
  *     expected type (pool, dataset, or none).
  *
  *   nvlist_t *innvl
  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
  *     NULL if no input nvlist was provided.  Changes to this nvlist are
  *     ignored.  If the input nvlist could not be deserialized, the
  *     ioctl will fail and the callback will not be called.
  *
  *   nvlist_t *outnvl
  *     The output nvlist, initially empty.  The callback can fill it in,
  *     and it will be returned to userland by serializing it into
  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
  *     fails (e.g. because the caller didn't supply a large enough
  *     buffer), then the overall ioctl will fail.  See the
  *     'smush_nvlist' argument above for additional behaviors.
  *
  *     There are two typical uses of the output nvlist:
  *       - To return state, e.g. property values.  In this case,
  *         smush_outnvlist should be false.  If the buffer was not large
  *         enough, the caller will reallocate a larger buffer and try
  *         the ioctl again.
  *
  *       - To return multiple errors from an ioctl which makes on-disk
  *         changes.  In this case, smush_outnvlist should be true.
  *         Ioctls which make on-disk modifications should generally not
  *         use the outnvl if they succeed, because the caller can not
  *         distinguish between the operation failing, and
  *         deserialization failing.
  *
  * IOCTL Interface Errors
  *
  * The following ioctl input errors can be returned:
  *   ZFS_ERR_IOC_CMD_UNAVAIL	the ioctl number is not supported by kernel
  *   ZFS_ERR_IOC_ARG_UNAVAIL	an input argument is not supported by kernel
  *   ZFS_ERR_IOC_ARG_REQUIRED	a required input argument is missing
  *   ZFS_ERR_IOC_ARG_BADTYPE	an input argument has an invalid type
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/uio_impl.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/stat.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_quota.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 #include <sys/zap.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/dmu.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_redact.h>
 #include <sys/dmu_tx.h>
 #include <sys/sunddi.h>
 #include <sys/policy.h>
 #include <sys/zone.h>
 #include <sys/nvpair.h>
 #include <sys/pathname.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/fm/util.h>
 #include <sys/dsl_crypt.h>
 #include <sys/rrwlock.h>
 #include <sys/zfs_file.h>
 
 #include <sys/dmu_recv.h>
 #include <sys/dmu_send.h>
 #include <sys/dmu_recv.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_bookmark.h>
 #include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 #include <sys/zcp.h>
 #include <sys/zio_checksum.h>
 #include <sys/vdev_removal.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
 #include "zfs_comutil.h"
 
 #include <sys/lua/lua.h>
 #include <sys/lua/lauxlib.h>
 #include <sys/zfs_ioctl_impl.h>
 
 kmutex_t zfsdev_state_lock;
 zfsdev_state_t *zfsdev_state_list;
 
 /*
  * Limit maximum nvlist size.  We don't want users passing in insane values
  * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
  * Defaults to 0=auto which is handled by platform code.
  */
 unsigned long zfs_max_nvlist_src_size = 0;
 
 /*
  * When logging the output nvlist of an ioctl in the on-disk history, limit
  * the logged size to this many bytes.  This must be less than DMU_MAX_ACCESS.
  * This applies primarily to zfs_ioc_channel_program().
  */
 unsigned long zfs_history_output_max = 1024 * 1024;
 
 uint_t zfs_fsyncer_key;
 uint_t zfs_allow_log_key;
 
 /* DATA_TYPE_ANY is used when zkey_type can vary. */
 #define	DATA_TYPE_ANY	DATA_TYPE_UNKNOWN
 
 typedef struct zfs_ioc_vec {
 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
 	zfs_ioc_func_t		*zvec_func;
 	zfs_secpolicy_func_t	*zvec_secpolicy;
 	zfs_ioc_namecheck_t	zvec_namecheck;
 	boolean_t		zvec_allow_log;
 	zfs_ioc_poolcheck_t	zvec_pool_check;
 	boolean_t		zvec_smush_outnvlist;
 	const char		*zvec_name;
 	const zfs_ioc_key_t	*zvec_nvl_keys;
 	size_t			zvec_nvl_key_count;
 } zfs_ioc_vec_t;
 
 /* This array is indexed by zfs_userquota_prop_t */
 static const char *userquota_perms[] = {
 	ZFS_DELEG_PERM_USERUSED,
 	ZFS_DELEG_PERM_USERQUOTA,
 	ZFS_DELEG_PERM_GROUPUSED,
 	ZFS_DELEG_PERM_GROUPQUOTA,
 	ZFS_DELEG_PERM_USEROBJUSED,
 	ZFS_DELEG_PERM_USEROBJQUOTA,
 	ZFS_DELEG_PERM_GROUPOBJUSED,
 	ZFS_DELEG_PERM_GROUPOBJQUOTA,
 	ZFS_DELEG_PERM_PROJECTUSED,
 	ZFS_DELEG_PERM_PROJECTQUOTA,
 	ZFS_DELEG_PERM_PROJECTOBJUSED,
 	ZFS_DELEG_PERM_PROJECTOBJQUOTA,
 };
 
 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
 static int zfs_check_settable(const char *name, nvpair_t *property,
     cred_t *cr);
 static int zfs_check_clearable(const char *dataset, nvlist_t *props,
     nvlist_t **errors);
 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
     boolean_t *);
 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 
 static void
 history_str_free(char *buf)
 {
 	kmem_free(buf, HIS_MAX_RECORD_LEN);
 }
 
 static char *
 history_str_get(zfs_cmd_t *zc)
 {
 	char *buf;
 
 	if (zc->zc_history == 0)
 		return (NULL);
 
 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 		history_str_free(buf);
 		return (NULL);
 	}
 
 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
 
 	return (buf);
 }
 
 /*
  * Return non-zero if the spa version is less than requested version.
  */
 static int
 zfs_earlier_version(const char *name, int version)
 {
 	spa_t *spa;
 
 	if (spa_open(name, &spa, FTAG) == 0) {
 		if (spa_version(spa) < version) {
 			spa_close(spa, FTAG);
 			return (1);
 		}
 		spa_close(spa, FTAG);
 	}
 	return (0);
 }
 
 /*
  * Return TRUE if the ZPL version is less than requested version.
  */
 static boolean_t
 zpl_earlier_version(const char *name, int version)
 {
 	objset_t *os;
 	boolean_t rc = B_TRUE;
 
 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
 		uint64_t zplversion;
 
 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
 			dmu_objset_rele(os, FTAG);
 			return (B_TRUE);
 		}
 		/* XXX reading from non-owned objset */
 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 			rc = zplversion < version;
 		dmu_objset_rele(os, FTAG);
 	}
 	return (rc);
 }
 
 static void
 zfs_log_history(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *buf;
 
 	if ((buf = history_str_get(zc)) == NULL)
 		return;
 
 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 			(void) spa_history_log(spa, buf);
 		spa_close(spa, FTAG);
 	}
 	history_str_free(buf);
 }
 
 /*
  * Policy for top-level read operations (list pools).  Requires no privileges,
  * and can be used in the local zone, as there is no associated dataset.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (0);
 }
 
 /*
  * Policy for dataset read operations (list children, get statistics).  Requires
  * no privileges, but must be visible in the local zone.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (INGLOBALZONE(curproc) ||
 	    zone_dataset_visible(zc->zc_name, NULL))
 		return (0);
 
 	return (SET_ERROR(ENOENT));
 }
 
 static int
 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 {
 	int writable = 1;
 
 	/*
 	 * The dataset must be visible by this zone -- check this first
 	 * so they don't see EPERM on something they shouldn't know about.
 	 */
 	if (!INGLOBALZONE(curproc) &&
 	    !zone_dataset_visible(dataset, &writable))
 		return (SET_ERROR(ENOENT));
 
 	if (INGLOBALZONE(curproc)) {
 		/*
 		 * If the fs is zoned, only root can access it from the
 		 * global zone.
 		 */
 		if (secpolicy_zfs(cr) && zoned)
 			return (SET_ERROR(EPERM));
 	} else {
 		/*
 		 * If we are in a local zone, the 'zoned' property must be set.
 		 */
 		if (!zoned)
 			return (SET_ERROR(EPERM));
 
 		/* must be writable by this zone */
 		if (!writable)
 			return (SET_ERROR(EPERM));
 	}
 	return (0);
 }
 
 static int
 zfs_dozonecheck(const char *dataset, cred_t *cr)
 {
 	uint64_t zoned;
 
 	if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
 	    &zoned, NULL))
 		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
 static int
 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 {
 	uint64_t zoned;
 
 	if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
 		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
 static int
 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
     const char *perm, cred_t *cr)
 {
 	int error;
 
 	error = zfs_dozonecheck_ds(name, ds, cr);
 	if (error == 0) {
 		error = secpolicy_zfs(cr);
 		if (error != 0)
 			error = dsl_deleg_access_impl(ds, perm, cr);
 	}
 	return (error);
 }
 
 static int
 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 {
 	int error;
 	dsl_dataset_t *ds;
 	dsl_pool_t *dp;
 
 	/*
 	 * First do a quick check for root in the global zone, which
 	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
 	 * will get to handle nonexistent datasets.
 	 */
 	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
 		return (0);
 
 	error = dsl_pool_hold(name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
 
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 /*
  * Policy for setting the security label property.
  *
  * Returns 0 for success, non-zero for access and other errors.
  */
 static int
 zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
 {
 #ifdef HAVE_MLSLABEL
 	char		ds_hexsl[MAXNAMELEN];
 	bslabel_t	ds_sl, new_sl;
 	boolean_t	new_default = FALSE;
 	uint64_t	zoned;
 	int		needed_priv = -1;
 	int		error;
 
 	/* First get the existing dataset label. */
 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 	if (error != 0)
 		return (SET_ERROR(EPERM));
 
 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 		new_default = TRUE;
 
 	/* The label must be translatable */
 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * In a non-global zone, disallow attempts to set a label that
 	 * doesn't match that of the zone; otherwise no other checks
 	 * are needed.
 	 */
 	if (!INGLOBALZONE(curproc)) {
 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 			return (SET_ERROR(EPERM));
 		return (0);
 	}
 
 	/*
 	 * For global-zone datasets (i.e., those whose zoned property is
 	 * "off", verify that the specified new label is valid for the
 	 * global zone.
 	 */
 	if (dsl_prop_get_integer(name,
 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 		return (SET_ERROR(EPERM));
 	if (!zoned) {
 		if (zfs_check_global_label(name, strval) != 0)
 			return (SET_ERROR(EPERM));
 	}
 
 	/*
 	 * If the existing dataset label is nondefault, check if the
 	 * dataset is mounted (label cannot be changed while mounted).
 	 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
 	 * mounted (or isn't a dataset, doesn't exist, ...).
 	 */
 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 		objset_t *os;
 		static const char *setsl_tag = "setsl_tag";
 
 		/*
 		 * Try to own the dataset; abort if there is any error,
 		 * (e.g., already mounted, in use, or other error).
 		 */
 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
 		    setsl_tag, &os);
 		if (error != 0)
 			return (SET_ERROR(EPERM));
 
 		dmu_objset_disown(os, B_TRUE, setsl_tag);
 
 		if (new_default) {
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
 			goto out_check;
 		}
 
 		if (hexstr_to_label(strval, &new_sl) != 0)
 			return (SET_ERROR(EPERM));
 
 		if (blstrictdom(&ds_sl, &new_sl))
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
 		else if (blstrictdom(&new_sl, &ds_sl))
 			needed_priv = PRIV_FILE_UPGRADE_SL;
 	} else {
 		/* dataset currently has a default label */
 		if (!new_default)
 			needed_priv = PRIV_FILE_UPGRADE_SL;
 	}
 
 out_check:
 	if (needed_priv != -1)
 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 	return (0);
 #else
 	return (SET_ERROR(ENOTSUP));
 #endif /* HAVE_MLSLABEL */
 }
 
 static int
 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
     cred_t *cr)
 {
 	char *strval;
 
 	/*
 	 * Check permissions for special properties.
 	 */
 	switch (prop) {
 	default:
 		break;
 	case ZFS_PROP_ZONED:
 		/*
 		 * Disallow setting of 'zoned' from within a local zone.
 		 */
 		if (!INGLOBALZONE(curproc))
 			return (SET_ERROR(EPERM));
 		break;
 
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_FILESYSTEM_LIMIT:
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 		if (!INGLOBALZONE(curproc)) {
 			uint64_t zoned;
 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 			/*
 			 * Unprivileged users are allowed to modify the
 			 * limit on things *under* (ie. contained by)
 			 * the thing they own.
 			 */
 			if (dsl_prop_get_integer(dsname,
 			    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
 				return (SET_ERROR(EPERM));
 			if (!zoned || strlen(dsname) <= strlen(setpoint))
 				return (SET_ERROR(EPERM));
 		}
 		break;
 
 	case ZFS_PROP_MLSLABEL:
 		if (!is_system_labeled())
 			return (SET_ERROR(EPERM));
 
 		if (nvpair_value_string(propval, &strval) == 0) {
 			int err;
 
 			err = zfs_set_slabel_policy(dsname, strval, CRED());
 			if (err != 0)
 				return (err);
 		}
 		break;
 	}
 
 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	error = zfs_dozonecheck(zc->zc_name, cr);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * permission to set permissions will be evaluated later in
 	 * dsl_deleg_can_allow()
 	 */
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_ROLLBACK, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	const char *cp;
 	int error;
 
 	/*
 	 * Generate the current snapshot name from the given objsetid, then
 	 * use that name for the secpolicy/zone checks.
 	 */
 	cp = strchr(zc->zc_name, '@');
 	if (cp == NULL)
 		return (SET_ERROR(EINVAL));
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	dsl_dataset_name(ds, zc->zc_name);
 
 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 	    ZFS_DELEG_PERM_SEND, cr);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_SEND, cr));
 }
 
 static int
 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (SET_ERROR(ENOTSUP));
 }
 
 static int
 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (SET_ERROR(ENOTSUP));
 }
 
 static int
 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 {
 	char *cp;
 
 	/*
 	 * Remove the @bla or /bla from the end of the name to get the parent.
 	 */
 	(void) strncpy(parent, datasetname, parentsize);
 	cp = strrchr(parent, '@');
 	if (cp != NULL) {
 		cp[0] = '\0';
 	} else {
 		cp = strrchr(parent, '/');
 		if (cp == NULL)
 			return (SET_ERROR(ENOENT));
 		cp[0] = '\0';
 	}
 
 	return (0);
 }
 
 int
 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 {
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(name,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 }
 
 /*
  * Destroying snapshots with delegated permissions requires
  * descendant mount and destroy permissions.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvlist_t *snaps;
 	nvpair_t *pair, *nextpair;
 	int error = 0;
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nextpair) {
 		nextpair = nvlist_next_nvpair(snaps, pair);
 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
 		if (error == ENOENT) {
 			/*
 			 * Ignore any snapshots that don't exist (we consider
 			 * them "already destroyed").  Remove the name from the
 			 * nvl here in case the snapshot is created between
 			 * now and when we try to destroy it (in which case
 			 * we don't want to destroy it since we haven't
 			 * checked for permission).
 			 */
 			fnvlist_remove_nvpair(snaps, pair);
 			error = 0;
 		}
 		if (error != 0)
 			break;
 	}
 
 	return (error);
 }
 
 int
 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 {
 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
 	int	error;
 
 	if ((error = zfs_secpolicy_write_perms(from,
 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(from,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_get_parent(to, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *clone;
 	int error;
 
 	error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_PROMOTE, cr);
 	if (error != 0)
 		return (error);
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
 
 	if (error == 0) {
 		char parentname[ZFS_MAX_DATASET_NAME_LEN];
 		dsl_dataset_t *origin = NULL;
 		dsl_dir_t *dd;
 		dd = clone->ds_dir;
 
 		error = dsl_dataset_hold_obj(dd->dd_pool,
 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
 		if (error != 0) {
 			dsl_dataset_rele(clone, FTAG);
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
 		    ZFS_DELEG_PERM_MOUNT, cr);
 
 		dsl_dataset_name(origin, parentname);
 		if (error == 0) {
 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
 			    ZFS_DELEG_PERM_PROMOTE, cr);
 		}
 		dsl_dataset_rele(clone, FTAG);
 		dsl_dataset_rele(origin, FTAG);
 	}
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_CREATE, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_recv(zc, innvl, cr));
 }
 
 int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(name,
 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
 }
 
 /*
  * Check for permission to create each snapshot in the nvlist.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvlist_t *snaps;
 	int error = 0;
 	nvpair_t *pair;
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		char *name = nvpair_name(pair);
 		char *atp = strchr(name, '@');
 
 		if (atp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 		*atp = '\0';
 		error = zfs_secpolicy_snapshot_perms(name, cr);
 		*atp = '@';
 		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
 /*
  * Check for permission to create each bookmark in the nvlist.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error = 0;
 
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		char *name = nvpair_name(pair);
 		char *hashp = strchr(name, '#');
 
 		if (hashp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 		*hashp = '\0';
 		error = zfs_secpolicy_write_perms(name,
 		    ZFS_DELEG_PERM_BOOKMARK, cr);
 		*hashp = '#';
 		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvpair_t *pair, *nextpair;
 	int error = 0;
 
 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
 	    pair = nextpair) {
 		char *name = nvpair_name(pair);
 		char *hashp = strchr(name, '#');
 		nextpair = nvlist_next_nvpair(innvl, pair);
 
 		if (hashp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 
 		*hashp = '\0';
 		error = zfs_secpolicy_write_perms(name,
 		    ZFS_DELEG_PERM_DESTROY, cr);
 		*hashp = '#';
 		if (error == ENOENT) {
 			/*
 			 * Ignore any filesystems that don't exist (we consider
 			 * their bookmarks "already destroyed").  Remove
 			 * the name from the nvl here in case the filesystem
 			 * is created between now and when we try to destroy
 			 * the bookmark (in which case we don't want to
 			 * destroy it since we haven't checked for permission).
 			 */
 			fnvlist_remove_nvpair(innvl, pair);
 			error = 0;
 		}
 		if (error != 0)
 			break;
 	}
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	/*
 	 * Even root must have a proper TSD so that we know what pool
 	 * to log to.
 	 */
 	if (tsd_get(zfs_allow_log_key) == NULL)
 		return (SET_ERROR(EPERM));
 	return (0);
 }
 
 static int
 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
 	int	error;
 	char	*origin;
 
 	if ((error = zfs_get_parent(zc->zc_name, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
 	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
 	    (error = zfs_secpolicy_write_perms(origin,
 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_MOUNT, cr));
 }
 
 /*
  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
  * SYS_CONFIG privilege, which is not available in a local zone.
  */
 /* ARGSUSED */
 int
 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
 		return (SET_ERROR(EPERM));
 
 	return (0);
 }
 
 /*
  * Policy for object to name lookups.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int error;
 
 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
 		return (0);
 
 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
 	return (error);
 }
 
 /*
  * Policy for fault injection.  Requires all privileges.
  */
 /* ARGSUSED */
 static int
 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (secpolicy_zinject(cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 
 	if (prop == ZPROP_INVAL) {
 		if (!zfs_prop_user(zc->zc_value))
 			return (SET_ERROR(EINVAL));
 		return (zfs_secpolicy_write_perms(zc->zc_name,
 		    ZFS_DELEG_PERM_USERPROP, cr));
 	} else {
 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
 		    NULL, cr));
 	}
 }
 
 static int
 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	if (zc->zc_value[0] == 0) {
 		/*
 		 * They are asking about a posix uid/gid.  If it's
 		 * themself, allow it.
 		 */
 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
 		    zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
 		    zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
 			if (zc->zc_guid == crgetuid(cr))
 				return (0);
 		} else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
 		    zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
 			if (groupmember(zc->zc_guid, cr))
 				return (0);
 		}
 		/* else is for project quota/used */
 	}
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
 static int
 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 	    NULL, cr));
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvpair_t *pair;
 	nvlist_t *holds;
 	int error;
 
 	holds = fnvlist_lookup_nvlist(innvl, "holds");
 
 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(holds, pair)) {
 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
 		error = dmu_fsname(nvpair_name(pair), fsname);
 		if (error != 0)
 			return (error);
 		error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_HOLD, cr);
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /* ARGSUSED */
 static int
 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	nvpair_t *pair;
 	int error;
 
 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(innvl, pair)) {
 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
 		error = dmu_fsname(nvpair_name(pair), fsname);
 		if (error != 0)
 			return (error);
 		error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_RELEASE, cr);
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Policy for allowing temporary snapshots to be taken or released
  */
 static int
 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	/*
 	 * A temporary snapshot is the same as a snapshot,
 	 * hold, destroy and release all rolled into one.
 	 * Delegated diff alone is sufficient that we allow this.
 	 */
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
 		return (0);
 
 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
 
 	if (innvl != NULL) {
 		if (error == 0)
 			error = zfs_secpolicy_hold(zc, innvl, cr);
 		if (error == 0)
 			error = zfs_secpolicy_release(zc, innvl, cr);
 		if (error == 0)
 			error = zfs_secpolicy_destroy(zc, innvl, cr);
 	}
 	return (error);
 }
 
 static int
 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_LOAD_KEY, cr));
 }
 
 static int
 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
 }
 
 /*
  * Returns the nvlist as specified by the user in the zfs_cmd_t.
  */
 static int
 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
 {
 	char *packed;
 	int error;
 	nvlist_t *list = NULL;
 
 	/*
 	 * Read in and unpack the user-supplied nvlist.
 	 */
 	if (size == 0)
 		return (SET_ERROR(EINVAL));
 
 	packed = vmem_alloc(size, KM_SLEEP);
 
 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
 	    iflag)) != 0) {
 		vmem_free(packed, size);
 		return (SET_ERROR(EFAULT));
 	}
 
 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
 		vmem_free(packed, size);
 		return (error);
 	}
 
 	vmem_free(packed, size);
 
 	*nvp = list;
 	return (0);
 }
 
 /*
  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
  * Entries will be removed from the end of the nvlist, and one int32 entry
  * named "N_MORE_ERRORS" will be added indicating how many entries were
  * removed.
  */
 static int
 nvlist_smush(nvlist_t *errors, size_t max)
 {
 	size_t size;
 
 	size = fnvlist_size(errors);
 
 	if (size > max) {
 		nvpair_t *more_errors;
 		int n = 0;
 
 		if (max < 1024)
 			return (SET_ERROR(ENOMEM));
 
 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
 		more_errors = nvlist_prev_nvpair(errors, NULL);
 
 		do {
 			nvpair_t *pair = nvlist_prev_nvpair(errors,
 			    more_errors);
 			fnvlist_remove_nvpair(errors, pair);
 			n++;
 			size = fnvlist_size(errors);
 		} while (size > max);
 
 		fnvlist_remove_nvpair(errors, more_errors);
 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
 		ASSERT3U(fnvlist_size(errors), <=, max);
 	}
 
 	return (0);
 }
 
 static int
 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
 {
 	char *packed = NULL;
 	int error = 0;
 	size_t size;
 
 	size = fnvlist_size(nvl);
 
 	if (size > zc->zc_nvlist_dst_size) {
 		error = SET_ERROR(ENOMEM);
 	} else {
 		packed = fnvlist_pack(nvl, &size);
 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    size, zc->zc_iflags) != 0)
 			error = SET_ERROR(EFAULT);
 		fnvlist_pack_free(packed, size);
 	}
 
 	zc->zc_nvlist_dst_size = size;
 	zc->zc_nvlist_dst_filled = B_TRUE;
 	return (error);
 }
 
 int
 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
 {
 	int error = 0;
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	mutex_enter(&os->os_user_ptr_lock);
 	*zfvp = dmu_objset_get_user(os);
 	/* bump s_active only when non-zero to prevent umount race */
 	error = zfs_vfs_ref(zfvp);
 	mutex_exit(&os->os_user_ptr_lock);
 	return (error);
 }
 
 int
 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(dsname, FTAG, &os);
 	if (error != 0)
 		return (error);
 
 	error = getzfsvfs_impl(os, zfvp);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
  * case its z_sb will be NULL, and it will be opened as the owner.
  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
  * which prevents all inode ops from running.
  */
 static int
 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
 {
 	int error = 0;
 
 	if (getzfsvfs(name, zfvp) != 0)
 		error = zfsvfs_create(name, B_FALSE, zfvp);
 	if (error == 0) {
 		if (writer)
 			ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
 		else
 			ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
 		if ((*zfvp)->z_unmounted) {
 			/*
 			 * XXX we could probably try again, since the unmounting
 			 * thread should be just about to disassociate the
 			 * objset from the zfsvfs.
 			 */
 			ZFS_TEARDOWN_EXIT(*zfvp, tag);
 			return (SET_ERROR(EBUSY));
 		}
 	}
 	return (error);
 }
 
 static void
 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
 {
 	ZFS_TEARDOWN_EXIT(zfsvfs, tag);
 
 	if (zfs_vfs_held(zfsvfs)) {
 		zfs_vfs_rele(zfsvfs);
 	} else {
 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	}
 }
 
 static int
 zfs_ioc_pool_create(zfs_cmd_t *zc)
 {
 	int error;
 	nvlist_t *config, *props = NULL;
 	nvlist_t *rootprops = NULL;
 	nvlist_t *zplprops = NULL;
 	dsl_crypto_params_t *dcp = NULL;
 	const char *spa_name = zc->zc_name;
 	boolean_t unload_wkey = B_TRUE;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)))
 		return (error);
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		nvlist_free(config);
 		return (error);
 	}
 
 	if (props) {
 		nvlist_t *nvl = NULL;
 		nvlist_t *hidden_args = NULL;
 		uint64_t version = SPA_VERSION;
 		char *tname;
 
 		(void) nvlist_lookup_uint64(props,
 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
 			error = SET_ERROR(EINVAL);
 			goto pool_props_bad;
 		}
 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
 		if (nvl) {
 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
 			if (error != 0)
 				goto pool_props_bad;
 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
 		}
 
 		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
 		    &hidden_args);
 		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
 		    rootprops, hidden_args, &dcp);
 		if (error != 0)
 			goto pool_props_bad;
 		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
 
 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops_root(version, rootprops,
 		    zplprops, NULL);
 		if (error != 0)
 			goto pool_props_bad;
 
 		if (nvlist_lookup_string(props,
 		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
 			spa_name = tname;
 	}
 
 	error = spa_create(zc->zc_name, config, props, zplprops, dcp);
 
 	/*
 	 * Set the remaining root properties
 	 */
 	if (!error && (error = zfs_set_prop_nvlist(spa_name,
 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
 		(void) spa_destroy(spa_name);
 		unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
 	}
 
 pool_props_bad:
 	nvlist_free(rootprops);
 	nvlist_free(zplprops);
 	nvlist_free(config);
 	nvlist_free(props);
 	dsl_crypto_params_free(dcp, unload_wkey && !!error);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
 {
 	int error;
 	zfs_log_history(zc);
 	error = spa_destroy(zc->zc_name);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_import(zfs_cmd_t *zc)
 {
 	nvlist_t *config, *props = NULL;
 	uint64_t guid;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) != 0)
 		return (error);
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		nvlist_free(config);
 		return (error);
 	}
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
 	    guid != zc->zc_guid)
 		error = SET_ERROR(EINVAL);
 	else
 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
 
 	if (zc->zc_nvlist_dst != 0) {
 		int err;
 
 		if ((err = put_nvlist(zc, config)) != 0)
 			error = err;
 	}
 
 	nvlist_free(config);
 	nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_export(zfs_cmd_t *zc)
 {
 	int error;
 	boolean_t force = (boolean_t)zc->zc_cookie;
 	boolean_t hardforce = (boolean_t)zc->zc_guid;
 
 	zfs_log_history(zc);
 	error = spa_export(zc->zc_name, NULL, force, hardforce);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_configs(zfs_cmd_t *zc)
 {
 	nvlist_t *configs;
 	int error;
 
 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
 		return (SET_ERROR(EEXIST));
 
 	error = put_nvlist(zc, configs);
 
 	nvlist_free(configs);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of the pool
  *
  * outputs:
  * zc_cookie		real errno
  * zc_nvlist_dst	config nvlist
  * zc_nvlist_dst_size	size of config nvlist
  */
 static int
 zfs_ioc_pool_stats(zfs_cmd_t *zc)
 {
 	nvlist_t *config;
 	int error;
 	int ret = 0;
 
 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
 	    sizeof (zc->zc_value));
 
 	if (config != NULL) {
 		ret = put_nvlist(zc, config);
 		nvlist_free(config);
 
 		/*
 		 * The config may be present even if 'error' is non-zero.
 		 * In this case we return success, and preserve the real errno
 		 * in 'zc_cookie'.
 		 */
 		zc->zc_cookie = error;
 	} else {
 		ret = error;
 	}
 
 	return (ret);
 }
 
 /*
  * Try to import the given pool, returning pool stats as appropriate so that
  * user land knows which devices are available and overall pool health.
  */
 static int
 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
 {
 	nvlist_t *tryconfig, *config = NULL;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &tryconfig)) != 0)
 		return (error);
 
 	config = spa_tryimport(tryconfig);
 
 	nvlist_free(tryconfig);
 
 	if (config == NULL)
 		return (SET_ERROR(EINVAL));
 
 	error = put_nvlist(zc, config);
 	nvlist_free(config);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name              name of the pool
  * zc_cookie            scan func (pool_scan_func_t)
  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
  */
 static int
 zfs_ioc_pool_scan(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (zc->zc_flags == POOL_SCRUB_PAUSE)
 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
 	else if (zc->zc_cookie == POOL_SCAN_NONE)
 		error = spa_scan_stop(spa);
 	else
 		error = spa_scan(spa, zc->zc_cookie);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error == 0) {
 		spa_freeze(spa);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static int
 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (zc->zc_cookie < spa_version(spa) ||
 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	spa_upgrade(spa, zc->zc_cookie);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *hist_buf;
 	uint64_t size;
 	int error;
 
 	if ((size = zc->zc_history_len) == 0)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	hist_buf = vmem_alloc(size, KM_SLEEP);
 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
 	    &zc->zc_history_len, hist_buf)) == 0) {
 		error = ddi_copyout(hist_buf,
 		    (void *)(uintptr_t)zc->zc_history,
 		    zc->zc_history_len, zc->zc_iflags);
 	}
 
 	spa_close(spa, FTAG);
 	vmem_free(hist_buf, size);
 	return (error);
 }
 
 static int
 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error == 0) {
 		error = spa_change_guid(spa);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static int
 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
 {
 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_obj		object to find
  *
  * outputs:
  * zc_value		name of object
  */
 static int
 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	/* XXX reading from objset not owned */
 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
 	    FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 	    sizeof (zc->zc_value));
 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_obj		object to find
  *
  * outputs:
  * zc_stat		stats on object
  * zc_value		path to object
  */
 static int
 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	/* XXX reading from objset not owned */
 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
 	    FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
 	    sizeof (zc->zc_value));
 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_add(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	nvlist_t *config;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config);
 	if (error == 0) {
 		error = spa_vdev_add(spa, config);
 		nvlist_free(config);
 	}
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of the pool
  * zc_guid		guid of vdev to remove
  * zc_cookie		cancel removal
  */
 static int
 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 	if (zc->zc_cookie != 0) {
 		error = spa_vdev_remove_cancel(spa);
 	} else {
 		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
 	}
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 	switch (zc->zc_cookie) {
 	case VDEV_STATE_ONLINE:
 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
 		break;
 
 	case VDEV_STATE_OFFLINE:
 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	case VDEV_STATE_FAULTED:
 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 
 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	case VDEV_STATE_DEGRADED:
 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 
 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	default:
 		error = SET_ERROR(EINVAL);
 	}
 	zc->zc_cookie = newstate;
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	nvlist_t *config;
 	int replacing = zc->zc_cookie;
 	int rebuild = zc->zc_simple;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) == 0) {
 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
 		    rebuild);
 		nvlist_free(config);
 	}
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_split(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	nvlist_t *config, *props = NULL;
 	int error;
 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config))) {
 		spa_close(spa, FTAG);
 		return (error);
 	}
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		spa_close(spa, FTAG);
 		nvlist_free(config);
 		return (error);
 	}
 
 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
 
 	spa_close(spa, FTAG);
 
 	nvlist_free(config);
 	nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	const char *path = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = spa_vdev_setpath(spa, guid, path);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	const char *fru = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = spa_vdev_setfru(spa, guid, fru);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
 {
 	int error = 0;
 	nvlist_t *nv;
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
 		dmu_objset_stats(os, nv);
 		/*
 		 * NB: zvol_get_stats() will read the objset contents,
 		 * which we aren't supposed to do with a
 		 * DS_MODE_USER hold, because it could be
 		 * inconsistent.  So this is a bit of a workaround...
 		 * XXX reading without owning
 		 */
 		if (!zc->zc_objset_stats.dds_inconsistent &&
 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
 			error = zvol_get_stats(os, nv);
 			if (error == EIO) {
 				nvlist_free(nv);
 				return (error);
 			}
 			VERIFY0(error);
 		}
 		if (error == 0)
 			error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_objset_stats(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error == 0) {
 		error = zfs_ioc_objset_stats_impl(zc, os);
 		dmu_objset_rele(os, FTAG);
 	}
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_nvlist_dst	received property nvlist
  * zc_nvlist_dst_size	size of received property nvlist
  *
  * Gets received properties (distinct from local properties on or after
  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
  * local property values.
  */
 static int
 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
 {
 	int error = 0;
 	nvlist_t *nv;
 
 	/*
 	 * Without this check, we would return local property values if the
 	 * caller has not already received properties on or after
 	 * SPA_VERSION_RECVD_PROPS.
 	 */
 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
 		return (SET_ERROR(ENOTSUP));
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
 		error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
 	return (error);
 }
 
 static int
 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
 {
 	uint64_t value;
 	int error;
 
 	/*
 	 * zfs_get_zplprop() will either find a value or give us
 	 * the default value (if there is one).
 	 */
 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
 	return (0);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
  *
  * outputs:
  * zc_nvlist_dst	zpl property nvlist
  * zc_nvlist_dst_size	size of zpl property nvlist
  */
 static int
 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int err;
 
 	/* XXX reading without owning */
 	if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
 		return (err);
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	/*
 	 * NB: nvl_add_zplprop() will read the objset contents,
 	 * which we aren't supposed to do with a DS_MODE_USER
 	 * hold, because it could be inconsistent.
 	 */
 	if (zc->zc_nvlist_dst != 0 &&
 	    !zc->zc_objset_stats.dds_inconsistent &&
 	    dmu_objset_type(os) == DMU_OST_ZFS) {
 		nvlist_t *nv;
 
 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
 			err = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	} else {
 		err = SET_ERROR(ENOENT);
 	}
 	dmu_objset_rele(os, FTAG);
 	return (err);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_name		name of next filesystem
  * zc_cookie		zap cursor
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 	char *p;
 	size_t orig_len = strlen(zc->zc_name);
 
 top:
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
 		if (error == ENOENT)
 			error = SET_ERROR(ESRCH);
 		return (error);
 	}
 
 	p = strrchr(zc->zc_name, '/');
 	if (p == NULL || p[1] != '\0')
 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
 	p = zc->zc_name + strlen(zc->zc_name);
 
 	do {
 		error = dmu_dir_list_next(os,
 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
 		    NULL, &zc->zc_cookie);
 		if (error == ENOENT)
 			error = SET_ERROR(ESRCH);
 	} while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
 	dmu_objset_rele(os, FTAG);
 
 	/*
 	 * If it's an internal dataset (ie. with a '$' in its name),
 	 * don't try to get stats for it, otherwise we'll return ENOENT.
 	 */
 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 		if (error == ENOENT) {
 			/* We lost a race with destroy, get the next one. */
 			zc->zc_name[orig_len] = '\0';
 			goto top;
 		}
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_nvlist_src	iteration range nvlist
  * zc_nvlist_src_size	size of iteration range nvlist
  *
  * outputs:
  * zc_name		name of next snapshot
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
 {
 	int error;
 	objset_t *os, *ossnap;
 	dsl_dataset_t *ds;
 	uint64_t min_txg = 0, max_txg = 0;
 
 	if (zc->zc_nvlist_src_size != 0) {
 		nvlist_t *props = NULL;
 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 		    zc->zc_iflags, &props);
 		if (error != 0)
 			return (error);
 		(void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
 		    &min_txg);
 		(void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
 		    &max_txg);
 		nvlist_free(props);
 	}
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error != 0) {
 		return (error == ENOENT ? SET_ERROR(ESRCH) : error);
 	}
 
 	/*
 	 * A dataset name of maximum length cannot have any snapshots,
 	 * so exit immediately.
 	 */
 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
 	    ZFS_MAX_DATASET_NAME_LEN) {
 		dmu_objset_rele(os, FTAG);
 		return (SET_ERROR(ESRCH));
 	}
 
 	while (error == 0) {
 		if (issig(JUSTLOOKING) && issig(FORREAL)) {
 			error = SET_ERROR(EINTR);
 			break;
 		}
 
 		error = dmu_snapshot_list_next(os,
 		    sizeof (zc->zc_name) - strlen(zc->zc_name),
 		    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
 		    &zc->zc_cookie, NULL);
 		if (error == ENOENT) {
 			error = SET_ERROR(ESRCH);
 			break;
 		} else if (error != 0) {
 			break;
 		}
 
 		error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
 		    FTAG, &ds);
 		if (error != 0)
 			break;
 
 		if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
 		    (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
 			dsl_dataset_rele(ds, FTAG);
 			/* undo snapshot name append */
 			*(strchr(zc->zc_name, '@') + 1) = '\0';
 			/* skip snapshot */
 			continue;
 		}
 
 		if (zc->zc_simple) {
 			dsl_dataset_rele(ds, FTAG);
 			break;
 		}
 
 		if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
 			dsl_dataset_rele(ds, FTAG);
 			break;
 		}
 		if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
 			dsl_dataset_rele(ds, FTAG);
 			break;
 		}
 		dsl_dataset_rele(ds, FTAG);
 		break;
 	}
 
 	dmu_objset_rele(os, FTAG);
 	/* if we failed, undo the @ that we tacked on to zc_name */
 	if (error != 0)
 		*strchr(zc->zc_name, '@') = '\0';
 	return (error);
 }
 
 static int
 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
 {
 	const char *propname = nvpair_name(pair);
 	uint64_t *valary;
 	unsigned int vallen;
 	const char *dash, *domain;
 	zfs_userquota_prop_t type;
 	uint64_t rid;
 	uint64_t quota;
 	zfsvfs_t *zfsvfs;
 	int err;
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) != 0)
 			return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * A correctly constructed propname is encoded as
 	 * userquota@<rid>-<domain>.
 	 */
 	if ((dash = strchr(propname, '-')) == NULL ||
 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
 	    vallen != 3)
 		return (SET_ERROR(EINVAL));
 
 	domain = dash + 1;
 	type = valary[0];
 	rid = valary[1];
 	quota = valary[2];
 
 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
 	if (err == 0) {
 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
 		zfsvfs_rele(zfsvfs, FTAG);
 	}
 
 	return (err);
 }
 
 /*
  * If the named property is one that has a special function to set its value,
  * return 0 on success and a positive error code on failure; otherwise if it is
  * not one of the special properties handled by this function, return -1.
  *
  * XXX: It would be better for callers of the property interface if we handled
  * these special cases in dsl_prop.c (in the dsl layer).
  */
 static int
 zfs_prop_set_special(const char *dsname, zprop_source_t source,
     nvpair_t *pair)
 {
 	const char *propname = nvpair_name(pair);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval = 0;
 	const char *strval = NULL;
 	int err = -1;
 
 	if (prop == ZPROP_INVAL) {
 		if (zfs_prop_userquota(propname))
 			return (zfs_prop_set_userquota(dsname, pair));
 		return (-1);
 	}
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) == 0);
 	}
 
 	/* all special properties are numeric except for keylocation */
 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 		strval = fnvpair_value_string(pair);
 	} else {
 		intval = fnvpair_value_uint64(pair);
 	}
 
 	switch (prop) {
 	case ZFS_PROP_QUOTA:
 		err = dsl_dir_set_quota(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFQUOTA:
 		err = dsl_dataset_set_refquota(dsname, source, intval);
 		break;
 	case ZFS_PROP_FILESYSTEM_LIMIT:
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 		if (intval == UINT64_MAX) {
 			/* clearing the limit, just do it */
 			err = 0;
 		} else {
 			err = dsl_dir_activate_fs_ss_limit(dsname);
 		}
 		/*
 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
 		 * default path to set the value in the nvlist.
 		 */
 		if (err == 0)
 			err = -1;
 		break;
 	case ZFS_PROP_KEYLOCATION:
 		err = dsl_crypto_can_set_keylocation(dsname, strval);
 
 		/*
 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
 		 * default path to set the value in the nvlist.
 		 */
 		if (err == 0)
 			err = -1;
 		break;
 	case ZFS_PROP_RESERVATION:
 		err = dsl_dir_set_reservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFRESERVATION:
 		err = dsl_dataset_set_refreservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_COMPRESSION:
 		err = dsl_dataset_set_compression(dsname, source, intval);
 		/*
 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
 		 * default path to set the value in the nvlist.
 		 */
 		if (err == 0)
 			err = -1;
 		break;
 	case ZFS_PROP_VOLSIZE:
 		err = zvol_set_volsize(dsname, intval);
 		break;
 	case ZFS_PROP_SNAPDEV:
 		err = zvol_set_snapdev(dsname, source, intval);
 		break;
 	case ZFS_PROP_VOLMODE:
 		err = zvol_set_volmode(dsname, source, intval);
 		break;
 	case ZFS_PROP_VERSION:
 	{
 		zfsvfs_t *zfsvfs;
 
 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
 			break;
 
 		err = zfs_set_version(zfsvfs, intval);
 		zfsvfs_rele(zfsvfs, FTAG);
 
 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
 			zfs_cmd_t *zc;
 
 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
 			(void) strlcpy(zc->zc_name, dsname,
 			    sizeof (zc->zc_name));
 			(void) zfs_ioc_userspace_upgrade(zc);
 			(void) zfs_ioc_id_quota_upgrade(zc);
 			kmem_free(zc, sizeof (zfs_cmd_t));
 		}
 		break;
 	}
 	default:
 		err = -1;
 	}
 
 	return (err);
 }
 
 static boolean_t
 zfs_is_namespace_prop(zfs_prop_t prop)
 {
 	switch (prop) {
 
 	case ZFS_PROP_ATIME:
 	case ZFS_PROP_RELATIME:
 	case ZFS_PROP_DEVICES:
 	case ZFS_PROP_EXEC:
 	case ZFS_PROP_SETUID:
 	case ZFS_PROP_READONLY:
 	case ZFS_PROP_XATTR:
 	case ZFS_PROP_NBMAND:
 		return (B_TRUE);
 
 	default:
 		return (B_FALSE);
 	}
 }
 
 /*
  * This function is best effort. If it fails to set any of the given properties,
  * it continues to set as many as it can and returns the last error
  * encountered. If the caller provides a non-NULL errlist, it will be filled in
  * with the list of names of all the properties that failed along with the
  * corresponding error numbers.
  *
  * If every property is set successfully, zero is returned and errlist is not
  * modified.
  */
 int
 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
     nvlist_t *errlist)
 {
 	nvpair_t *pair;
 	nvpair_t *propval;
 	int rv = 0;
 	uint64_t intval;
 	const char *strval;
 	boolean_t should_update_mount_cache = B_FALSE;
 
 	nvlist_t *genericnvl = fnvlist_alloc();
 	nvlist_t *retrynvl = fnvlist_alloc();
 retry:
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		int err = 0;
 
 		/* decode the property value */
 		propval = pair;
 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 			nvlist_t *attrs;
 			attrs = fnvpair_value_nvlist(pair);
 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 			    &propval) != 0)
 				err = SET_ERROR(EINVAL);
 		}
 
 		/* Validate value type */
 		if (err == 0 && source == ZPROP_SRC_INHERITED) {
 			/* inherited properties are expected to be booleans */
 			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
 				err = SET_ERROR(EINVAL);
 		} else if (err == 0 && prop == ZPROP_INVAL) {
 			if (zfs_prop_user(propname)) {
 				if (nvpair_type(propval) != DATA_TYPE_STRING)
 					err = SET_ERROR(EINVAL);
 			} else if (zfs_prop_userquota(propname)) {
 				if (nvpair_type(propval) !=
 				    DATA_TYPE_UINT64_ARRAY)
 					err = SET_ERROR(EINVAL);
 			} else {
 				err = SET_ERROR(EINVAL);
 			}
 		} else if (err == 0) {
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
 					err = SET_ERROR(EINVAL);
 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
 				const char *unused;
 
 				intval = fnvpair_value_uint64(propval);
 
 				switch (zfs_prop_get_type(prop)) {
 				case PROP_TYPE_NUMBER:
 					break;
 				case PROP_TYPE_STRING:
 					err = SET_ERROR(EINVAL);
 					break;
 				case PROP_TYPE_INDEX:
 					if (zfs_prop_index_to_string(prop,
 					    intval, &unused) != 0)
 						err =
 						    SET_ERROR(ZFS_ERR_BADPROP);
 					break;
 				default:
 					cmn_err(CE_PANIC,
 					    "unknown property type");
 				}
 			} else {
 				err = SET_ERROR(EINVAL);
 			}
 		}
 
 		/* Validate permissions */
 		if (err == 0)
 			err = zfs_check_settable(dsname, pair, CRED());
 
 		if (err == 0) {
 			if (source == ZPROP_SRC_INHERITED)
 				err = -1; /* does not need special handling */
 			else
 				err = zfs_prop_set_special(dsname, source,
 				    pair);
 			if (err == -1) {
 				/*
 				 * For better performance we build up a list of
 				 * properties to set in a single transaction.
 				 */
 				err = nvlist_add_nvpair(genericnvl, pair);
 			} else if (err != 0 && nvl != retrynvl) {
 				/*
 				 * This may be a spurious error caused by
 				 * receiving quota and reservation out of order.
 				 * Try again in a second pass.
 				 */
 				err = nvlist_add_nvpair(retrynvl, pair);
 			}
 		}
 
 		if (err != 0) {
 			if (errlist != NULL)
 				fnvlist_add_int32(errlist, propname, err);
 			rv = err;
 		}
 
 		if (zfs_is_namespace_prop(prop))
 			should_update_mount_cache = B_TRUE;
 	}
 
 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
 		nvl = retrynvl;
 		goto retry;
 	}
 
 	if (!nvlist_empty(genericnvl) &&
 	    dsl_props_set(dsname, source, genericnvl) != 0) {
 		/*
 		 * If this fails, we still want to set as many properties as we
 		 * can, so try setting them individually.
 		 */
 		pair = NULL;
 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
 			const char *propname = nvpair_name(pair);
 			int err = 0;
 
 			propval = pair;
 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 				nvlist_t *attrs;
 				attrs = fnvpair_value_nvlist(pair);
 				propval = fnvlist_lookup_nvpair(attrs,
 				    ZPROP_VALUE);
 			}
 
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
 				strval = fnvpair_value_string(propval);
 				err = dsl_prop_set_string(dsname, propname,
 				    source, strval);
 			} else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
 				err = dsl_prop_inherit(dsname, propname,
 				    source);
 			} else {
 				intval = fnvpair_value_uint64(propval);
 				err = dsl_prop_set_int(dsname, propname, source,
 				    intval);
 			}
 
 			if (err != 0) {
 				if (errlist != NULL) {
 					fnvlist_add_int32(errlist, propname,
 					    err);
 				}
 				rv = err;
 			}
 		}
 	}
 	if (should_update_mount_cache)
 		zfs_ioctl_update_mount_cache(dsname);
 
 	nvlist_free(genericnvl);
 	nvlist_free(retrynvl);
 
 	return (rv);
 }
 
 /*
  * Check that all the properties are valid user properties.
  */
 static int
 zfs_check_userprops(nvlist_t *nvl)
 {
 	nvpair_t *pair = NULL;
 
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 
 		if (!zfs_prop_user(propname) ||
 		    nvpair_type(pair) != DATA_TYPE_STRING)
 			return (SET_ERROR(EINVAL));
 
 		if (strlen(propname) >= ZAP_MAXNAMELEN)
 			return (SET_ERROR(ENAMETOOLONG));
 
 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
 			return (SET_ERROR(E2BIG));
 	}
 	return (0);
 }
 
 static void
 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
 {
 	nvpair_t *pair;
 
 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
 		if (nvlist_exists(skipped, nvpair_name(pair)))
 			continue;
 
 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
 	}
 }
 
 static int
 clear_received_props(const char *dsname, nvlist_t *props,
     nvlist_t *skipped)
 {
 	int err = 0;
 	nvlist_t *cleared_props = NULL;
 	props_skip(props, skipped, &cleared_props);
 	if (!nvlist_empty(cleared_props)) {
 		/*
 		 * Acts on local properties until the dataset has received
 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
 		 */
 		zprop_source_t flags = (ZPROP_SRC_NONE |
 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
 	}
 	nvlist_free(cleared_props);
 	return (err);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		name of property to set
  * zc_nvlist_src{_size}	nvlist of properties to apply
  * zc_cookie		received properties flag
  *
  * outputs:
  * zc_nvlist_dst{_size} error for each unapplied received property
  */
 static int
 zfs_ioc_set_prop(zfs_cmd_t *zc)
 {
 	nvlist_t *nvl;
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
 	    ZPROP_SRC_LOCAL);
 	nvlist_t *errors;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &nvl)) != 0)
 		return (error);
 
 	if (received) {
 		nvlist_t *origprops;
 
 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
 			(void) clear_received_props(zc->zc_name,
 			    origprops, nvl);
 			nvlist_free(origprops);
 		}
 
 		error = dsl_prop_set_hasrecvd(zc->zc_name);
 	}
 
 	errors = fnvlist_alloc();
 	if (error == 0)
 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
 
 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
 		(void) put_nvlist(zc, errors);
 	}
 
 	nvlist_free(errors);
 	nvlist_free(nvl);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		name of property to inherit
  * zc_cookie		revert to received value if TRUE
  *
  * outputs:		none
  */
 static int
 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
 {
 	const char *propname = zc->zc_value;
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received
 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
 	nvlist_t *dummy;
 	nvpair_t *pair;
 	zprop_type_t type;
 	int err;
 
 	if (!received) {
 		/*
 		 * Only check this in the non-received case. We want to allow
 		 * 'inherit -S' to revert non-inheritable properties like quota
 		 * and reservation to the received or default values even though
 		 * they are not considered inheritable.
 		 */
 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
 			return (SET_ERROR(EINVAL));
 	}
 
 	if (prop == ZPROP_INVAL) {
 		if (!zfs_prop_user(propname))
 			return (SET_ERROR(EINVAL));
 
 		type = PROP_TYPE_STRING;
 	} else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
 		return (SET_ERROR(EINVAL));
 	} else {
 		type = zfs_prop_get_type(prop);
 	}
 
 	/*
 	 * zfs_prop_set_special() expects properties in the form of an
 	 * nvpair with type info.
 	 */
 	dummy = fnvlist_alloc();
 
 	switch (type) {
 	case PROP_TYPE_STRING:
 		VERIFY(0 == nvlist_add_string(dummy, propname, ""));
 		break;
 	case PROP_TYPE_NUMBER:
 	case PROP_TYPE_INDEX:
 		VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
 		break;
 	default:
 		err = SET_ERROR(EINVAL);
 		goto errout;
 	}
 
 	pair = nvlist_next_nvpair(dummy, NULL);
 	if (pair == NULL) {
 		err = SET_ERROR(EINVAL);
 	} else {
 		err = zfs_prop_set_special(zc->zc_name, source, pair);
 		if (err == -1) /* property is not "special", needs handling */
 			err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
 			    source);
 	}
 
 errout:
 	nvlist_free(dummy);
 	return (err);
 }
 
 static int
 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
 {
 	nvlist_t *props;
 	spa_t *spa;
 	int error;
 	nvpair_t *pair;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props)))
 		return (error);
 
 	/*
 	 * If the only property is the configfile, then just do a spa_lookup()
 	 * to handle the faulted case.
 	 */
 	pair = nvlist_next_nvpair(props, NULL);
 	if (pair != NULL && strcmp(nvpair_name(pair),
 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
 	    nvlist_next_nvpair(props, pair) == NULL) {
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
 			spa_configfile_set(spa, props, B_FALSE);
 			spa_write_cachefile(spa, B_FALSE, B_TRUE);
 		}
 		mutex_exit(&spa_namespace_lock);
 		if (spa != NULL) {
 			nvlist_free(props);
 			return (0);
 		}
 	}
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 		nvlist_free(props);
 		return (error);
 	}
 
 	error = spa_prop_set(spa, props);
 
 	nvlist_free(props);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	nvlist_t *nvp = NULL;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 		/*
 		 * If the pool is faulted, there may be properties we can still
 		 * get (such as altroot and cachefile), so attempt to get them
 		 * anyway.
 		 */
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
 			error = spa_prop_get(spa, &nvp);
 		mutex_exit(&spa_namespace_lock);
 	} else {
 		error = spa_prop_get(spa, &nvp);
 		spa_close(spa, FTAG);
 	}
 
 	if (error == 0 && zc->zc_nvlist_dst != 0)
 		error = put_nvlist(zc, nvp);
 	else
 		error = SET_ERROR(EFAULT);
 
 	nvlist_free(nvp);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_src{_size}	nvlist of delegated permissions
  * zc_perm_action	allow/unallow flag
  *
  * outputs:		none
  */
 static int
 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
 {
 	int error;
 	nvlist_t *fsaclnv = NULL;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &fsaclnv)) != 0)
 		return (error);
 
 	/*
 	 * Verify nvlist is constructed correctly
 	 */
 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
 		nvlist_free(fsaclnv);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * If we don't have PRIV_SYS_MOUNT, then validate
 	 * that user is allowed to hand out each permission in
 	 * the nvlist(s)
 	 */
 
 	error = secpolicy_zfs(CRED());
 	if (error != 0) {
 		if (zc->zc_perm_action == B_FALSE) {
 			error = dsl_deleg_can_allow(zc->zc_name,
 			    fsaclnv, CRED());
 		} else {
 			error = dsl_deleg_can_unallow(zc->zc_name,
 			    fsaclnv, CRED());
 		}
 	}
 
 	if (error == 0)
 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
 
 	nvlist_free(fsaclnv);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * zc_nvlist_src{_size}	nvlist of delegated permissions
  */
 static int
 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
 {
 	nvlist_t *nvp;
 	int error;
 
 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
 		error = put_nvlist(zc, nvp);
 		nvlist_free(nvp);
 	}
 
 	return (error);
 }
 
 /* ARGSUSED */
 static void
 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 {
 	zfs_creat_t *zct = arg;
 
 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
 }
 
 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
 
 /*
  * inputs:
  * os			parent objset pointer (NULL if root fs)
  * fuids_ok		fuids allowed in this version of the spa?
  * sa_ok		SAs allowed in this version of the spa?
  * createprops		list of properties requested by creator
  *
  * outputs:
  * zplprops	values for the zplprops we attach to the master node object
  * is_ci	true if requested file system will be purely case-insensitive
  *
  * Determine the settings for utf8only, normalization and
  * casesensitivity.  Specific values may have been requested by the
  * creator and/or we can inherit values from the parent dataset.  If
  * the file system is of too early a vintage, a creator can not
  * request settings for these properties, even if the requested
  * setting is the default value.  We don't actually want to create dsl
  * properties for these, so remove them from the source nvlist after
  * processing.
  */
 static int
 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	uint64_t sense = ZFS_PROP_UNDEFINED;
 	uint64_t norm = ZFS_PROP_UNDEFINED;
 	uint64_t u8 = ZFS_PROP_UNDEFINED;
 	int error;
 
 	ASSERT(zplprops != NULL);
 
 	/* parent dataset must be a filesystem */
 	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
 		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
 
 	/*
 	 * Pull out creator prop choices, if any.
 	 */
 	if (createprops) {
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_CASE));
 	}
 
 	/*
 	 * If the zpl version requested is whacky or the file system
 	 * or pool is version is too "young" to support normalization
 	 * and the creator tried to set a value for one of the props,
 	 * error out.
 	 */
 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
 	    (zplver < ZPL_VERSION_NORMALIZATION &&
 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
 	    sense != ZFS_PROP_UNDEFINED)))
 		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * Put the version in the zplprops
 	 */
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
 
 	if (norm == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
 
 	/*
 	 * If we're normalizing, names must always be valid UTF-8 strings.
 	 */
 	if (norm)
 		u8 = 1;
 	if (u8 == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
 
 	if (sense == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
 
 	if (is_ci)
 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
 
 	return (0);
 }
 
 static int
 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	boolean_t fuids_ok, sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	objset_t *os = NULL;
 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
 	spa_t *spa;
 	uint64_t spa_vers;
 	int error;
 
 	zfs_get_parent(dataset, parentname, sizeof (parentname));
 
 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
 		return (error);
 
 	spa_vers = spa_version(spa);
 	spa_close(spa, FTAG);
 
 	zplver = zfs_zpl_version_map(spa_vers);
 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
 	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	/*
 	 * Open parent object set so we can inherit zplprop values.
 	 */
 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
 		return (error);
 
 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
 	    zplprops, is_ci);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 static int
 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	boolean_t fuids_ok;
 	boolean_t sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	int error;
 
 	zplver = zfs_zpl_version_map(spa_vers);
 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
 	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
 	    createprops, zplprops, is_ci);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "type" -> dmu_objset_type_t (int32)
  *     (optional) "props" -> { prop -> value }
  *     (optional) "hidden_args" -> { "wkeydata" -> value }
  *         raw uint8_t array of encryption wrapping key data (32 bytes)
  * }
  *
  * outnvl: propname -> error code (int32)
  */
 
 static const zfs_ioc_key_t zfs_keys_create[] = {
 	{"type",	DATA_TYPE_INT32,	0},
 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error = 0;
 	zfs_creat_t zct = { 0 };
 	nvlist_t *nvprops = NULL;
 	nvlist_t *hidden_args = NULL;
 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
 	dmu_objset_type_t type;
 	boolean_t is_insensitive = B_FALSE;
 	dsl_crypto_params_t *dcp = NULL;
 
 	type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 
 	switch (type) {
 	case DMU_OST_ZFS:
 		cbfunc = zfs_create_cb;
 		break;
 
 	case DMU_OST_ZVOL:
 		cbfunc = zvol_create_cb;
 		break;
 
 	default:
 		cbfunc = NULL;
 		break;
 	}
 	if (strchr(fsname, '@') ||
 	    strchr(fsname, '%'))
 		return (SET_ERROR(EINVAL));
 
 	zct.zct_props = nvprops;
 
 	if (cbfunc == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if (type == DMU_OST_ZVOL) {
 		uint64_t volsize, volblocksize;
 
 		if (nvprops == NULL)
 			return (SET_ERROR(EINVAL));
 		if (nvlist_lookup_uint64(nvprops,
 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
 			return (SET_ERROR(EINVAL));
 
 		if ((error = nvlist_lookup_uint64(nvprops,
 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 		    &volblocksize)) != 0 && error != ENOENT)
 			return (SET_ERROR(EINVAL));
 
 		if (error != 0)
 			volblocksize = zfs_prop_default_numeric(
 			    ZFS_PROP_VOLBLOCKSIZE);
 
 		if ((error = zvol_check_volblocksize(fsname,
 		    volblocksize)) != 0 ||
 		    (error = zvol_check_volsize(volsize,
 		    volblocksize)) != 0)
 			return (error);
 	} else if (type == DMU_OST_ZFS) {
 		int error;
 
 		/*
 		 * We have to have normalization and
 		 * case-folding flags correct when we do the
 		 * file system creation, so go figure them out
 		 * now.
 		 */
 		VERIFY(nvlist_alloc(&zct.zct_zplprops,
 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops(fsname, nvprops,
 		    zct.zct_zplprops, &is_insensitive);
 		if (error != 0) {
 			nvlist_free(zct.zct_zplprops);
 			return (error);
 		}
 	}
 
 	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
 	    hidden_args, &dcp);
 	if (error != 0) {
 		nvlist_free(zct.zct_zplprops);
 		return (error);
 	}
 
 	error = dmu_objset_create(fsname, type,
 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
 
 	nvlist_free(zct.zct_zplprops);
 	dsl_crypto_params_free(dcp, !!error);
 
 	/*
 	 * It would be nice to do this atomically.
 	 */
 	if (error == 0) {
 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
 		    nvprops, outnvl);
 		if (error != 0) {
 			spa_t *spa;
 			int error2;
 
 			/*
 			 * Volumes will return EBUSY and cannot be destroyed
 			 * until all asynchronous minor handling (e.g. from
 			 * setting the volmode property) has completed. Wait for
 			 * the spa_zvol_taskq to drain then retry.
 			 */
 			error2 = dsl_destroy_head(fsname);
 			while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
 				error2 = spa_open(fsname, &spa, FTAG);
 				if (error2 == 0) {
 					taskq_wait(spa->spa_zvol_taskq);
 					spa_close(spa, FTAG);
 				}
 				error2 = dsl_destroy_head(fsname);
 			}
 		}
 	}
 	return (error);
 }
 
 /*
  * innvl: {
  *     "origin" -> name of origin snapshot
  *     (optional) "props" -> { prop -> value }
  *     (optional) "hidden_args" -> { "wkeydata" -> value }
  *         raw uint8_t array of encryption wrapping key data (32 bytes)
  * }
  *
  * outputs:
  * outnvl: propname -> error code (int32)
  */
 static const zfs_ioc_key_t zfs_keys_clone[] = {
 	{"origin",	DATA_TYPE_STRING,	0},
 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error = 0;
 	nvlist_t *nvprops = NULL;
 	const char *origin_name;
 
 	origin_name = fnvlist_lookup_string(innvl, "origin");
 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 
 	if (strchr(fsname, '@') ||
 	    strchr(fsname, '%'))
 		return (SET_ERROR(EINVAL));
 
 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 
 	error = dmu_objset_clone(fsname, origin_name);
 
 	/*
 	 * It would be nice to do this atomically.
 	 */
 	if (error == 0) {
 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
 		    nvprops, outnvl);
 		if (error != 0)
 			(void) dsl_destroy_head(fsname);
 	}
 	return (error);
 }
 
 static const zfs_ioc_key_t zfs_keys_remap[] = {
 	/* no nvl keys */
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	/* This IOCTL is no longer supported. */
 	return (0);
 }
 
 /*
  * innvl: {
  *     "snaps" -> { snapshot1, snapshot2 }
  *     (optional) "props" -> { prop -> value (string) }
  * }
  *
  * outnvl: snapshot -> error code (int32)
  */
 static const zfs_ioc_key_t zfs_keys_snapshot[] = {
 	{"snaps",	DATA_TYPE_NVLIST,	0},
 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	nvlist_t *snaps;
 	nvlist_t *props = NULL;
 	int error, poollen;
 	nvpair_t *pair;
 
 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
 	if (!nvlist_empty(props) &&
 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
 		return (SET_ERROR(ENOTSUP));
 	if ((error = zfs_check_userprops(props)) != 0)
 		return (error);
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 	poollen = strlen(poolname);
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		const char *name = nvpair_name(pair);
 		char *cp = strchr(name, '@');
 
 		/*
 		 * The snap name must contain an @, and the part after it must
 		 * contain only valid characters.
 		 */
 		if (cp == NULL ||
 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 			return (SET_ERROR(EINVAL));
 
 		/*
 		 * The snap must be in the specified pool.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '@'))
 			return (SET_ERROR(EXDEV));
 
 		/*
 		 * Check for permission to set the properties on the fs.
 		 */
 		if (!nvlist_empty(props)) {
 			*cp = '\0';
 			error = zfs_secpolicy_write_perms(name,
 			    ZFS_DELEG_PERM_USERPROP, CRED());
 			*cp = '@';
 			if (error != 0)
 				return (error);
 		}
 
 		/* This must be the only snap of this fs. */
 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
 			    == 0) {
 				return (SET_ERROR(EXDEV));
 			}
 		}
 	}
 
 	error = dsl_dataset_snapshot(snaps, props, outnvl);
 
 	return (error);
 }
 
 /*
  * innvl: "message" -> string
  */
 static const zfs_ioc_key_t zfs_keys_log_history[] = {
 	{"message",	DATA_TYPE_STRING,	0},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	const char *message;
 	char *poolname;
 	spa_t *spa;
 	int error;
 
 	/*
 	 * The poolname in the ioctl is not set, we get it from the TSD,
 	 * which was set at the end of the last successful ioctl that allows
 	 * logging.  The secpolicy func already checked that it is set.
 	 * Only one log ioctl is allowed after each successful ioctl, so
 	 * we clear the TSD here.
 	 */
 	poolname = tsd_get(zfs_allow_log_key);
 	if (poolname == NULL)
 		return (SET_ERROR(EINVAL));
 	(void) tsd_set(zfs_allow_log_key, NULL);
 	error = spa_open(poolname, &spa, FTAG);
 	kmem_strfree(poolname);
 	if (error != 0)
 		return (error);
 
 	message = fnvlist_lookup_string(innvl, "message");
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	error = spa_history_log(spa, message);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * This ioctl is used to set the bootenv configuration on the current
  * pool. This configuration is stored in the second padding area of the label,
  * and it is used by the bootloader(s) to store the bootloader and/or system
  * specific data.
  * The data is stored as nvlist data stream, and is protected by
  * an embedded checksum.
  * The version can have two possible values:
  * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
  * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
  */
 static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
 	{"version",	DATA_TYPE_UINT64,	0},
 	{"<keys>",	DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
 };
 
 static int
 zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error;
 	spa_t *spa;
 
 	if ((error = spa_open(name, &spa, FTAG)) != 0)
 		return (error);
 	spa_vdev_state_enter(spa, SCL_ALL);
 	error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
 	/* no nvl keys */
 };
 
 static int
 zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(name, &spa, FTAG)) != 0)
 		return (error);
 	spa_vdev_state_enter(spa, SCL_ALL);
 	error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * The dp_config_rwlock must not be held when calling this, because the
  * unmount may need to write out data.
  *
  * This function is best-effort.  Callers must deal gracefully if it
  * remains mounted (or is remounted after this call).
  *
  * Returns 0 if the argument is not a snapshot, or it is not currently a
  * filesystem, or we were able to unmount it.  Returns error code otherwise.
  */
 void
 zfs_unmount_snap(const char *snapname)
 {
 	if (strchr(snapname, '@') == NULL)
 		return;
 
 	(void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
 }
 
 /* ARGSUSED */
 static int
 zfs_unmount_snap_cb(const char *snapname, void *arg)
 {
 	zfs_unmount_snap(snapname);
 	return (0);
 }
 
 /*
  * When a clone is destroyed, its origin may also need to be destroyed,
  * in which case it must be unmounted.  This routine will do that unmount
  * if necessary.
  */
 void
 zfs_destroy_unmount_origin(const char *fsname)
 {
 	int error;
 	objset_t *os;
 	dsl_dataset_t *ds;
 
 	error = dmu_objset_hold(fsname, FTAG, &os);
 	if (error != 0)
 		return;
 	ds = dmu_objset_ds(os);
 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
 		char originname[ZFS_MAX_DATASET_NAME_LEN];
 		dsl_dataset_name(ds->ds_prev, originname);
 		dmu_objset_rele(os, FTAG);
 		zfs_unmount_snap(originname);
 	} else {
 		dmu_objset_rele(os, FTAG);
 	}
 }
 
 /*
  * innvl: {
  *     "snaps" -> { snapshot1, snapshot2 }
  *     (optional boolean) "defer"
  * }
  *
  * outnvl: snapshot -> error code (int32)
  */
 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
 	{"snaps",	DATA_TYPE_NVLIST,	0},
 	{"defer",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int poollen;
 	nvlist_t *snaps;
 	nvpair_t *pair;
 	boolean_t defer;
 	spa_t *spa;
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 	defer = nvlist_exists(innvl, "defer");
 
 	poollen = strlen(poolname);
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		const char *name = nvpair_name(pair);
 
 		/*
 		 * The snap must be in the specified pool to prevent the
 		 * invalid removal of zvol minors below.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '@'))
 			return (SET_ERROR(EXDEV));
 
 		zfs_unmount_snap(nvpair_name(pair));
 		if (spa_open(name, &spa, FTAG) == 0) {
 			zvol_remove_minors(spa, name, B_TRUE);
 			spa_close(spa, FTAG);
 		}
 	}
 
 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
 }
 
 /*
  * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
  * All bookmarks and snapshots must be in the same pool.
  * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
  *
  * innvl: {
  *     new_bookmark1 -> existing_snapshot,
  *     new_bookmark2 -> existing_bookmark,
  * }
  *
  * outnvl: bookmark -> error code (int32)
  *
  */
 static const zfs_ioc_key_t zfs_keys_bookmark[] = {
 	{"<bookmark>...",	DATA_TYPE_STRING,	ZK_WILDCARDLIST},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	return (dsl_bookmark_create(innvl, outnvl));
 }
 
 /*
  * innvl: {
  *     property 1, property 2, ...
  * }
  *
  * outnvl: {
  *     bookmark name 1 -> { property 1, property 2, ... },
  *     bookmark name 2 -> { property 1, property 2, ... }
  * }
  *
  */
 static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
 	{"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
 }
 
 /*
  * innvl is not used.
  *
  * outnvl: {
  *     property 1, property 2, ...
  * }
  *
  */
 static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
 	/* no nvl keys */
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	char fsname[ZFS_MAX_DATASET_NAME_LEN];
 	char *bmname;
 
 	bmname = strchr(bookmark, '#');
 	if (bmname == NULL)
 		return (SET_ERROR(EINVAL));
 	bmname++;
 
 	(void) strlcpy(fsname, bookmark, sizeof (fsname));
 	*(strchr(fsname, '#')) = '\0';
 
 	return (dsl_get_bookmark_props(fsname, bmname, outnvl));
 }
 
 /*
  * innvl: {
  *     bookmark name 1, bookmark name 2
  * }
  *
  * outnvl: bookmark -> error code (int32)
  *
  */
 static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
 	{"<bookmark>...",	DATA_TYPE_BOOLEAN,	ZK_WILDCARDLIST},
 };
 
 static int
 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	int error, poollen;
 
 	poollen = strlen(poolname);
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		const char *name = nvpair_name(pair);
 		const char *cp = strchr(name, '#');
 
 		/*
 		 * The bookmark name must contain an #, and the part after it
 		 * must contain only valid characters.
 		 */
 		if (cp == NULL ||
 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 			return (SET_ERROR(EINVAL));
 
 		/*
 		 * The bookmark must be in the specified pool.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '#'))
 			return (SET_ERROR(EXDEV));
 	}
 
 	error = dsl_bookmark_destroy(innvl, outnvl);
 	return (error);
 }
 
 static const zfs_ioc_key_t zfs_keys_channel_program[] = {
 	{"program",	DATA_TYPE_STRING,		0},
 	{"arg",		DATA_TYPE_ANY,			0},
 	{"sync",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
 	{"instrlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
 	{"memlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	char *program;
 	uint64_t instrlimit, memlimit;
 	boolean_t sync_flag;
 	nvpair_t *nvarg = NULL;
 
 	program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
 	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
 		sync_flag = B_TRUE;
 	}
 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
 		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
 	}
 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
 		memlimit = ZCP_DEFAULT_MEMLIMIT;
 	}
 	nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
 
 	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
 		return (SET_ERROR(EINVAL));
 	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
 		return (SET_ERROR(EINVAL));
 
 	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
 	    nvarg, outnvl));
 }
 
 /*
  * innvl: unused
  * outnvl: empty
  */
 static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
 	/* no nvl keys */
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	return (spa_checkpoint(poolname));
 }
 
 /*
  * innvl: unused
  * outnvl: empty
  */
 static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
 	/* no nvl keys */
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	return (spa_checkpoint_discard(poolname));
 }
 
 /*
  * inputs:
  * zc_name		name of dataset to destroy
  * zc_defer_destroy	mark for deferred destroy
  *
  * outputs:		none
  */
 static int
 zfs_ioc_destroy(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	dmu_objset_type_t ost;
 	int err;
 
 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (err != 0)
 		return (err);
 	ost = dmu_objset_type(os);
 	dmu_objset_rele(os, FTAG);
 
 	if (ost == DMU_OST_ZFS)
 		zfs_unmount_snap(zc->zc_name);
 
 	if (strchr(zc->zc_name, '@')) {
 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
 	} else {
 		err = dsl_destroy_head(zc->zc_name);
 		if (err == EEXIST) {
 			/*
 			 * It is possible that the given DS may have
 			 * hidden child (%recv) datasets - "leftovers"
 			 * resulting from the previously interrupted
 			 * 'zfs receive'.
 			 *
 			 * 6 extra bytes for /%recv
 			 */
 			char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
 
 			if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
 			    zc->zc_name, recv_clone_name) >=
 			    sizeof (namebuf))
 				return (SET_ERROR(EINVAL));
 
 			/*
 			 * Try to remove the hidden child (%recv) and after
 			 * that try to remove the target dataset.
 			 * If the hidden child (%recv) does not exist
 			 * the original error (EEXIST) will be returned
 			 */
 			err = dsl_destroy_head(namebuf);
 			if (err == 0)
 				err = dsl_destroy_head(zc->zc_name);
 			else if (err == ENOENT)
 				err = SET_ERROR(EEXIST);
 		}
 	}
 
 	return (err);
 }
 
 /*
  * innvl: {
  *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
  *     "initialize_vdevs": { -> guids to initialize (nvlist)
  *         "vdev_path_1": vdev_guid_1, (uint64),
  *         "vdev_path_2": vdev_guid_2, (uint64),
  *         ...
  *     },
  * }
  *
  * outnvl: {
  *     "initialize_vdevs": { -> initialization errors (nvlist)
  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
  *         "vdev_path_2": errno, ... (uint64)
  *         ...
  *     }
  * }
  *
  * EINVAL is returned for an unknown commands or if any of the provided vdev
  * guids have be specified with a type other than uint64.
  */
 static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
 	{ZPOOL_INITIALIZE_COMMAND,	DATA_TYPE_UINT64,	0},
 	{ZPOOL_INITIALIZE_VDEVS,	DATA_TYPE_NVLIST,	0}
 };
 
 static int
 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	uint64_t cmd_type;
 	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
 	    &cmd_type) != 0) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
 	    cmd_type == POOL_INITIALIZE_START ||
 	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	nvlist_t *vdev_guids;
 	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
 	    &vdev_guids) != 0) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
 		uint64_t vdev_guid;
 		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
 			return (SET_ERROR(EINVAL));
 		}
 	}
 
 	spa_t *spa;
 	int error = spa_open(poolname, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	nvlist_t *vdev_errlist = fnvlist_alloc();
 	int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
 	    vdev_errlist);
 
 	if (fnvlist_size(vdev_errlist) > 0) {
 		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
 		    vdev_errlist);
 	}
 	fnvlist_free(vdev_errlist);
 
 	spa_close(spa, FTAG);
 	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
 }
 
 /*
  * innvl: {
  *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
  *     "trim_vdevs": { -> guids to TRIM (nvlist)
  *         "vdev_path_1": vdev_guid_1, (uint64),
  *         "vdev_path_2": vdev_guid_2, (uint64),
  *         ...
  *     },
  *     "trim_rate" -> Target TRIM rate in bytes/sec.
  *     "trim_secure" -> Set to request a secure TRIM.
  * }
  *
  * outnvl: {
  *     "trim_vdevs": { -> TRIM errors (nvlist)
  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
  *         "vdev_path_2": errno, ... (uint64)
  *         ...
  *     }
  * }
  *
  * EINVAL is returned for an unknown commands or if any of the provided vdev
  * guids have be specified with a type other than uint64.
  */
 static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
 	{ZPOOL_TRIM_COMMAND,	DATA_TYPE_UINT64,		0},
 	{ZPOOL_TRIM_VDEVS,	DATA_TYPE_NVLIST,		0},
 	{ZPOOL_TRIM_RATE,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
 	{ZPOOL_TRIM_SECURE,	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	uint64_t cmd_type;
 	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
 		return (SET_ERROR(EINVAL));
 
 	if (!(cmd_type == POOL_TRIM_CANCEL ||
 	    cmd_type == POOL_TRIM_START ||
 	    cmd_type == POOL_TRIM_SUSPEND)) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	nvlist_t *vdev_guids;
 	if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
 		return (SET_ERROR(EINVAL));
 
 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
 		uint64_t vdev_guid;
 		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
 			return (SET_ERROR(EINVAL));
 		}
 	}
 
 	/* Optional, defaults to maximum rate when not provided */
 	uint64_t rate;
 	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
 		rate = 0;
 
 	/* Optional, defaults to standard TRIM when not provided */
 	boolean_t secure;
 	if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
 	    &secure) != 0) {
 		secure = B_FALSE;
 	}
 
 	spa_t *spa;
 	int error = spa_open(poolname, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	nvlist_t *vdev_errlist = fnvlist_alloc();
 	int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
 	    rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
 
 	if (fnvlist_size(vdev_errlist) > 0)
 		fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
 
 	fnvlist_free(vdev_errlist);
 
 	spa_close(spa, FTAG);
 	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
 }
 
 /*
  * This ioctl waits for activity of a particular type to complete. If there is
  * no activity of that type in progress, it returns immediately, and the
  * returned value "waited" is false. If there is activity in progress, and no
  * tag is passed in, the ioctl blocks until all activity of that type is
  * complete, and then returns with "waited" set to true.
  *
  * If a tag is provided, it identifies a particular instance of an activity to
  * wait for. Currently, this is only valid for use with 'initialize', because
  * that is the only activity for which there can be multiple instances running
  * concurrently. In the case of 'initialize', the tag corresponds to the guid of
  * the vdev on which to wait.
  *
  * If a thread waiting in the ioctl receives a signal, the call will return
  * immediately, and the return value will be EINTR.
  *
  * innvl: {
  *     "wait_activity" -> int32_t
  *     (optional) "wait_tag" -> uint64_t
  * }
  *
  * outnvl: "waited" -> boolean_t
  */
 static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
 	{ZPOOL_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
 	{ZPOOL_WAIT_TAG,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int32_t activity;
 	uint64_t tag;
 	boolean_t waited;
 	int error;
 
 	if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
 		return (EINVAL);
 
 	if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
 		error = spa_wait_tag(name, activity, tag, &waited);
 	else
 		error = spa_wait(name, activity, &waited);
 
 	if (error == 0)
 		fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
 
 	return (error);
 }
 
 /*
  * This ioctl waits for activity of a particular type to complete. If there is
  * no activity of that type in progress, it returns immediately, and the
  * returned value "waited" is false. If there is activity in progress, and no
  * tag is passed in, the ioctl blocks until all activity of that type is
  * complete, and then returns with "waited" set to true.
  *
  * If a thread waiting in the ioctl receives a signal, the call will return
  * immediately, and the return value will be EINTR.
  *
  * innvl: {
  *     "wait_activity" -> int32_t
  * }
  *
  * outnvl: "waited" -> boolean_t
  */
 static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
 	{ZFS_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
 };
 
 static int
 zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int32_t activity;
 	boolean_t waited = B_FALSE;
 	int error;
 	dsl_pool_t *dp;
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 
 	if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
 		return (SET_ERROR(EINVAL));
 
 	if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
 		return (error);
 
 	if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	dd = ds->ds_dir;
 	mutex_enter(&dd->dd_activity_lock);
 	dd->dd_activity_waiters++;
 
 	/*
 	 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
 	 * aren't evicted while we're waiting. Normally this is prevented by
 	 * holding the pool, but we can't do that while we're waiting since
 	 * that would prevent TXGs from syncing out. Some of the functionality
 	 * of long-holds (e.g. preventing deletion) is unnecessary for this
 	 * case, since we would cancel the waiters before proceeding with a
 	 * deletion. An alternative mechanism for keeping the dataset around
 	 * could be developed but this is simpler.
 	 */
 	dsl_dataset_long_hold(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 
 	error = dsl_dir_wait(dd, ds, activity, &waited);
 
 	dsl_dataset_long_rele(ds, FTAG);
 	dd->dd_activity_waiters--;
 	if (dd->dd_activity_waiters == 0)
 		cv_signal(&dd->dd_activity_cv);
 	mutex_exit(&dd->dd_activity_lock);
 
 	dsl_dataset_rele(ds, FTAG);
 
 	if (error == 0)
 		fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
 
 	return (error);
 }
 
 /*
  * fsname is name of dataset to rollback (to most recent snapshot)
  *
  * innvl may contain name of expected target snapshot
  *
  * outnvl: "target" -> name of most recent snapshot
  * }
  */
 static const zfs_ioc_key_t zfs_keys_rollback[] = {
 	{"target",	DATA_TYPE_STRING,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	zfsvfs_t *zfsvfs;
 	zvol_state_handle_t *zv;
 	char *target = NULL;
 	int error;
 
 	(void) nvlist_lookup_string(innvl, "target", &target);
 	if (target != NULL) {
 		const char *cp = strchr(target, '@');
 
 		/*
 		 * The snap name must contain an @, and the part after it must
 		 * contain only valid characters.
 		 */
 		if (cp == NULL ||
 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 			return (SET_ERROR(EINVAL));
 	}
 
 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
 		dsl_dataset_t *ds;
 
 		ds = dmu_objset_ds(zfsvfs->z_os);
 		error = zfs_suspend_fs(zfsvfs);
 		if (error == 0) {
 			int resume_err;
 
 			error = dsl_dataset_rollback(fsname, target, zfsvfs,
 			    outnvl);
 			resume_err = zfs_resume_fs(zfsvfs, ds);
 			error = error ? error : resume_err;
 		}
 		zfs_vfs_rele(zfsvfs);
 	} else if ((zv = zvol_suspend(fsname)) != NULL) {
 		error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
 		    outnvl);
 		zvol_resume(zv);
 	} else {
 		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
 	}
 	return (error);
 }
 
 static int
 recursive_unmount(const char *fsname, void *arg)
 {
 	const char *snapname = arg;
 	char *fullname;
 
 	fullname = kmem_asprintf("%s@%s", fsname, snapname);
 	zfs_unmount_snap(fullname);
 	kmem_strfree(fullname);
 
 	return (0);
 }
 
 /*
  *
  * snapname is the snapshot to redact.
  * innvl: {
  *     "bookname" -> (string)
  *         shortname of the redaction bookmark to generate
  *     "snapnv" -> (nvlist, values ignored)
  *         snapshots to redact snapname with respect to
  * }
  *
  * outnvl is unused
  */
 
 /* ARGSUSED */
 static const zfs_ioc_key_t zfs_keys_redact[] = {
 	{"bookname",		DATA_TYPE_STRING,	0},
 	{"snapnv",		DATA_TYPE_NVLIST,	0},
 };
 static int
 zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	nvlist_t *redactnvl = NULL;
 	char *redactbook = NULL;
 
 	if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
 		return (SET_ERROR(EINVAL));
 	if (fnvlist_num_pairs(redactnvl) == 0)
 		return (SET_ERROR(ENXIO));
 	if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
 		return (SET_ERROR(EINVAL));
 
 	return (dmu_redact_snap(snapname, redactnvl, redactbook));
 }
 
 /*
  * inputs:
  * zc_name	old name of dataset
  * zc_value	new name of dataset
  * zc_cookie	recursive flag (only valid for snapshots)
  *
  * outputs:	none
  */
 static int
 zfs_ioc_rename(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	dmu_objset_type_t ost;
 	boolean_t recursive = zc->zc_cookie & 1;
 	boolean_t nounmount = !!(zc->zc_cookie & 2);
 	char *at;
 	int err;
 
 	/* "zfs rename" from and to ...%recv datasets should both fail */
 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
 	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
 		return (SET_ERROR(EINVAL));
 
 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (err != 0)
 		return (err);
 	ost = dmu_objset_type(os);
 	dmu_objset_rele(os, FTAG);
 
 	at = strchr(zc->zc_name, '@');
 	if (at != NULL) {
 		/* snaps must be in same fs */
 		int error;
 
 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
 			return (SET_ERROR(EXDEV));
 		*at = '\0';
 		if (ost == DMU_OST_ZFS && !nounmount) {
 			error = dmu_objset_find(zc->zc_name,
 			    recursive_unmount, at + 1,
 			    recursive ? DS_FIND_CHILDREN : 0);
 			if (error != 0) {
 				*at = '@';
 				return (error);
 			}
 		}
 		error = dsl_dataset_rename_snapshot(zc->zc_name,
 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
 		*at = '@';
 
 		return (error);
 	} else {
 		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
 	}
 }
 
 static int
 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 {
 	const char *propname = nvpair_name(pair);
 	boolean_t issnap = (strchr(dsname, '@') != NULL);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval, compval;
 	int err;
 
 	if (prop == ZPROP_INVAL) {
 		if (zfs_prop_user(propname)) {
 			if ((err = zfs_secpolicy_write_perms(dsname,
 			    ZFS_DELEG_PERM_USERPROP, cr)))
 				return (err);
 			return (0);
 		}
 
 		if (!issnap && zfs_prop_userquota(propname)) {
 			const char *perm = NULL;
 			const char *uq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
 			const char *gq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
 			const char *uiq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
 			const char *giq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
 			const char *pq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
 			const char *piq_prefix = zfs_userquota_prop_prefixes[\
 			    ZFS_PROP_PROJECTOBJQUOTA];
 
 			if (strncmp(propname, uq_prefix,
 			    strlen(uq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_USERQUOTA;
 			} else if (strncmp(propname, uiq_prefix,
 			    strlen(uiq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_USEROBJQUOTA;
 			} else if (strncmp(propname, gq_prefix,
 			    strlen(gq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
 			} else if (strncmp(propname, giq_prefix,
 			    strlen(giq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
 			} else if (strncmp(propname, pq_prefix,
 			    strlen(pq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_PROJECTQUOTA;
 			} else if (strncmp(propname, piq_prefix,
 			    strlen(piq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
 			} else {
 				/* {USER|GROUP|PROJECT}USED are read-only */
 				return (SET_ERROR(EINVAL));
 			}
 
 			if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
 				return (err);
 			return (0);
 		}
 
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (issnap)
 		return (SET_ERROR(EINVAL));
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		/*
 		 * dsl_prop_get_all_impl() returns properties in this
 		 * format.
 		 */
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) == 0);
 	}
 
 	/*
 	 * Check that this value is valid for this pool version
 	 */
 	switch (prop) {
 	case ZFS_PROP_COMPRESSION:
 		/*
 		 * If the user specified gzip compression, make sure
 		 * the SPA supports it. We ignore any errors here since
 		 * we'll catch them later.
 		 */
 		if (nvpair_value_uint64(pair, &intval) == 0) {
 			compval = ZIO_COMPRESS_ALGO(intval);
 			if (compval >= ZIO_COMPRESS_GZIP_1 &&
 			    compval <= ZIO_COMPRESS_GZIP_9 &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_GZIP_COMPRESSION)) {
 				return (SET_ERROR(ENOTSUP));
 			}
 
 			if (compval == ZIO_COMPRESS_ZLE &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_ZLE_COMPRESSION))
 				return (SET_ERROR(ENOTSUP));
 
 			if (compval == ZIO_COMPRESS_LZ4) {
 				spa_t *spa;
 
 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 					return (err);
 
 				if (!spa_feature_is_enabled(spa,
 				    SPA_FEATURE_LZ4_COMPRESS)) {
 					spa_close(spa, FTAG);
 					return (SET_ERROR(ENOTSUP));
 				}
 				spa_close(spa, FTAG);
 			}
 
 			if (compval == ZIO_COMPRESS_ZSTD) {
 				spa_t *spa;
 
 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 					return (err);
 
 				if (!spa_feature_is_enabled(spa,
 				    SPA_FEATURE_ZSTD_COMPRESS)) {
 					spa_close(spa, FTAG);
 					return (SET_ERROR(ENOTSUP));
 				}
 				spa_close(spa, FTAG);
 			}
 		}
 		break;
 
 	case ZFS_PROP_COPIES:
 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
 			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_VOLBLOCKSIZE:
 	case ZFS_PROP_RECORDSIZE:
 		/* Record sizes above 128k need the feature to be enabled */
 		if (nvpair_value_uint64(pair, &intval) == 0 &&
 		    intval > SPA_OLD_MAXBLOCKSIZE) {
 			spa_t *spa;
 
 			/*
 			 * We don't allow setting the property above 1MB,
 			 * unless the tunable has been changed.
 			 */
 			if (intval > zfs_max_recordsize ||
 			    intval > SPA_MAXBLOCKSIZE)
 				return (SET_ERROR(ERANGE));
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
 			if (!spa_feature_is_enabled(spa,
 			    SPA_FEATURE_LARGE_BLOCKS)) {
 				spa_close(spa, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 			spa_close(spa, FTAG);
 		}
 		break;
 
 	case ZFS_PROP_DNODESIZE:
 		/* Dnode sizes above 512 need the feature to be enabled */
 		if (nvpair_value_uint64(pair, &intval) == 0 &&
 		    intval != ZFS_DNSIZE_LEGACY) {
 			spa_t *spa;
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
 			if (!spa_feature_is_enabled(spa,
 			    SPA_FEATURE_LARGE_DNODE)) {
 				spa_close(spa, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 			spa_close(spa, FTAG);
 		}
 		break;
 
 	case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
 		/*
 		 * This property could require the allocation classes
 		 * feature to be active for setting, however we allow
 		 * it so that tests of settable properties succeed.
 		 * The CLI will issue a warning in this case.
 		 */
 		break;
 
 	case ZFS_PROP_SHARESMB:
 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
 			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_ACLINHERIT:
 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 		    nvpair_value_uint64(pair, &intval) == 0) {
 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_PASSTHROUGH_X))
 				return (SET_ERROR(ENOTSUP));
 		}
 		break;
 	case ZFS_PROP_CHECKSUM:
 	case ZFS_PROP_DEDUP:
 	{
 		spa_feature_t feature;
 		spa_t *spa;
 		int err;
 
 		/* dedup feature version checks */
 		if (prop == ZFS_PROP_DEDUP &&
 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
 			return (SET_ERROR(ENOTSUP));
 
 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 		    nvpair_value_uint64(pair, &intval) == 0) {
 			/* check prop value is enabled in features */
 			feature = zio_checksum_to_feature(
 			    intval & ZIO_CHECKSUM_MASK);
 			if (feature == SPA_FEATURE_NONE)
 				break;
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
 			if (!spa_feature_is_enabled(spa, feature)) {
 				spa_close(spa, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 			spa_close(spa, FTAG);
 		}
 		break;
 	}
 
 	default:
 		break;
 	}
 
 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
 }
 
 /*
  * Removes properties from the given props list that fail permission checks
  * needed to clear them and to restore them in case of a receive error. For each
  * property, make sure we have both set and inherit permissions.
  *
  * Returns the first error encountered if any permission checks fail. If the
  * caller provides a non-NULL errlist, it also gives the complete list of names
  * of all the properties that failed a permission check along with the
  * corresponding error numbers. The caller is responsible for freeing the
  * returned errlist.
  *
  * If every property checks out successfully, zero is returned and the list
  * pointed at by errlist is NULL.
  */
 static int
 zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
 {
 	zfs_cmd_t *zc;
 	nvpair_t *pair, *next_pair;
 	nvlist_t *errors;
 	int err, rv = 0;
 
 	if (props == NULL)
 		return (0);
 
 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
 	(void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
 	pair = nvlist_next_nvpair(props, NULL);
 	while (pair != NULL) {
 		next_pair = nvlist_next_nvpair(props, pair);
 
 		(void) strlcpy(zc->zc_value, nvpair_name(pair),
 		    sizeof (zc->zc_value));
 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
 			VERIFY(nvlist_add_int32(errors,
 			    zc->zc_value, err) == 0);
 		}
 		pair = next_pair;
 	}
 	kmem_free(zc, sizeof (zfs_cmd_t));
 
 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
 		nvlist_free(errors);
 		errors = NULL;
 	} else {
 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
 	}
 
 	if (errlist == NULL)
 		nvlist_free(errors);
 	else
 		*errlist = errors;
 
 	return (rv);
 }
 
 static boolean_t
 propval_equals(nvpair_t *p1, nvpair_t *p2)
 {
 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
 		/* dsl_prop_get_all_impl() format */
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &p1) == 0);
 	}
 
 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &p2) == 0);
 	}
 
 	if (nvpair_type(p1) != nvpair_type(p2))
 		return (B_FALSE);
 
 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
 		char *valstr1, *valstr2;
 
 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
 		return (strcmp(valstr1, valstr2) == 0);
 	} else {
 		uint64_t intval1, intval2;
 
 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
 		return (intval1 == intval2);
 	}
 }
 
 /*
  * Remove properties from props if they are not going to change (as determined
  * by comparison with origprops). Remove them from origprops as well, since we
  * do not need to clear or restore properties that won't change.
  */
 static void
 props_reduce(nvlist_t *props, nvlist_t *origprops)
 {
 	nvpair_t *pair, *next_pair;
 
 	if (origprops == NULL)
 		return; /* all props need to be received */
 
 	pair = nvlist_next_nvpair(props, NULL);
 	while (pair != NULL) {
 		const char *propname = nvpair_name(pair);
 		nvpair_t *match;
 
 		next_pair = nvlist_next_nvpair(props, pair);
 
 		if ((nvlist_lookup_nvpair(origprops, propname,
 		    &match) != 0) || !propval_equals(pair, match))
 			goto next; /* need to set received value */
 
 		/* don't clear the existing received value */
 		(void) nvlist_remove_nvpair(origprops, match);
 		/* don't bother receiving the property */
 		(void) nvlist_remove_nvpair(props, pair);
 next:
 		pair = next_pair;
 	}
 }
 
 /*
  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
  * For example, refquota cannot be set until after the receipt of a dataset,
  * because in replication streams, an older/earlier snapshot may exceed the
  * refquota.  We want to receive the older/earlier snapshot, but setting
  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
  * the older/earlier snapshot from being received (with EDQUOT).
  *
  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
  *
  * libzfs will need to be judicious handling errors encountered by props
  * extracted by this function.
  */
 static nvlist_t *
 extract_delay_props(nvlist_t *props)
 {
 	nvlist_t *delayprops;
 	nvpair_t *nvp, *tmp;
 	static const zfs_prop_t delayable[] = {
 		ZFS_PROP_REFQUOTA,
 		ZFS_PROP_KEYLOCATION,
 		0
 	};
 	int i;
 
 	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
 	    nvp = nvlist_next_nvpair(props, nvp)) {
 		/*
 		 * strcmp() is safe because zfs_prop_to_name() always returns
 		 * a bounded string.
 		 */
 		for (i = 0; delayable[i] != 0; i++) {
 			if (strcmp(zfs_prop_to_name(delayable[i]),
 			    nvpair_name(nvp)) == 0) {
 				break;
 			}
 		}
 		if (delayable[i] != 0) {
 			tmp = nvlist_prev_nvpair(props, nvp);
 			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
 			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
 			nvp = tmp;
 		}
 	}
 
 	if (nvlist_empty(delayprops)) {
 		nvlist_free(delayprops);
 		delayprops = NULL;
 	}
 	return (delayprops);
 }
 
 static void
 zfs_allow_log_destroy(void *arg)
 {
 	char *poolname = arg;
 
 	if (poolname != NULL)
 		kmem_strfree(poolname);
 }
 
 #ifdef	ZFS_DEBUG
 static boolean_t zfs_ioc_recv_inject_err;
 #endif
 
 /*
  * nvlist 'errors' is always allocated. It will contain descriptions of
  * encountered errors, if any. It's the callers responsibility to free.
  */
 static int
 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
     nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
     boolean_t resumable, int input_fd,
     dmu_replay_record_t *begin_record, uint64_t *read_bytes,
     uint64_t *errflags, nvlist_t **errors)
 {
 	dmu_recv_cookie_t drc;
 	int error = 0;
 	int props_error = 0;
 	offset_t off, noff;
 	nvlist_t *local_delayprops = NULL;
 	nvlist_t *recv_delayprops = NULL;
 	nvlist_t *origprops = NULL; /* existing properties */
 	nvlist_t *origrecvd = NULL; /* existing received properties */
 	boolean_t first_recvd_props = B_FALSE;
 	boolean_t tofs_was_redacted;
 	zfs_file_t *input_fp;
 
 	*read_bytes = 0;
 	*errflags = 0;
 	*errors = fnvlist_alloc();
 	off = 0;
 
-	if ((error = zfs_file_get(input_fd, &input_fp)))
-		return (error);
+	if ((input_fp = zfs_file_get(input_fd)) == NULL)
+		return (SET_ERROR(EBADF));
 
 	noff = off = zfs_file_off(input_fp);
 	error = dmu_recv_begin(tofs, tosnap, begin_record, force,
 	    resumable, localprops, hidden_args, origin, &drc, input_fp,
 	    &off);
 	if (error != 0)
 		goto out;
 	tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
 
 	/*
 	 * Set properties before we receive the stream so that they are applied
 	 * to the new data. Note that we must call dmu_recv_stream() if
 	 * dmu_recv_begin() succeeds.
 	 */
 	if (recvprops != NULL && !drc.drc_newfs) {
 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
 		    SPA_VERSION_RECVD_PROPS &&
 		    !dsl_prop_get_hasrecvd(tofs))
 			first_recvd_props = B_TRUE;
 
 		/*
 		 * If new received properties are supplied, they are to
 		 * completely replace the existing received properties,
 		 * so stash away the existing ones.
 		 */
 		if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
 			nvlist_t *errlist = NULL;
 			/*
 			 * Don't bother writing a property if its value won't
 			 * change (and avoid the unnecessary security checks).
 			 *
 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
 			 * special case where we blow away all local properties
 			 * regardless.
 			 */
 			if (!first_recvd_props)
 				props_reduce(recvprops, origrecvd);
 			if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
 				(void) nvlist_merge(*errors, errlist, 0);
 			nvlist_free(errlist);
 
 			if (clear_received_props(tofs, origrecvd,
 			    first_recvd_props ? NULL : recvprops) != 0)
 				*errflags |= ZPROP_ERR_NOCLEAR;
 		} else {
 			*errflags |= ZPROP_ERR_NOCLEAR;
 		}
 	}
 
 	/*
 	 * Stash away existing properties so we can restore them on error unless
 	 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
 	 * case "origrecvd" will take care of that.
 	 */
 	if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
 		objset_t *os;
 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
 			if (dsl_prop_get_all(os, &origprops) != 0) {
 				*errflags |= ZPROP_ERR_NOCLEAR;
 			}
 			dmu_objset_rele(os, FTAG);
 		} else {
 			*errflags |= ZPROP_ERR_NOCLEAR;
 		}
 	}
 
 	if (recvprops != NULL) {
 		props_error = dsl_prop_set_hasrecvd(tofs);
 
 		if (props_error == 0) {
 			recv_delayprops = extract_delay_props(recvprops);
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
 			    recvprops, *errors);
 		}
 	}
 
 	if (localprops != NULL) {
 		nvlist_t *oprops = fnvlist_alloc();
 		nvlist_t *xprops = fnvlist_alloc();
 		nvpair_t *nvp = NULL;
 
 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
 			if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
 				/* -x property */
 				const char *name = nvpair_name(nvp);
 				zfs_prop_t prop = zfs_name_to_prop(name);
 				if (prop != ZPROP_INVAL) {
 					if (!zfs_prop_inheritable(prop))
 						continue;
 				} else if (!zfs_prop_user(name))
 					continue;
 				fnvlist_add_boolean(xprops, name);
 			} else {
 				/* -o property=value */
 				fnvlist_add_nvpair(oprops, nvp);
 			}
 		}
 
 		local_delayprops = extract_delay_props(oprops);
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 		    oprops, *errors);
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
 		    xprops, *errors);
 
 		nvlist_free(oprops);
 		nvlist_free(xprops);
 	}
 
 	error = dmu_recv_stream(&drc, &off);
 
 	if (error == 0) {
 		zfsvfs_t *zfsvfs = NULL;
 		zvol_state_handle_t *zv = NULL;
 
 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
 			/* online recv */
 			dsl_dataset_t *ds;
 			int end_err;
 			boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
 			    begin_record->drr_u.drr_begin.
 			    drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
 
 			ds = dmu_objset_ds(zfsvfs->z_os);
 			error = zfs_suspend_fs(zfsvfs);
 			/*
 			 * If the suspend fails, then the recv_end will
 			 * likely also fail, and clean up after itself.
 			 */
 			end_err = dmu_recv_end(&drc, zfsvfs);
 			/*
 			 * If the dataset was not redacted, but we received a
 			 * redacted stream onto it, we need to unmount the
 			 * dataset.  Otherwise, resume the filesystem.
 			 */
 			if (error == 0 && !drc.drc_newfs &&
 			    stream_is_redacted && !tofs_was_redacted) {
 				error = zfs_end_fs(zfsvfs, ds);
 			} else if (error == 0) {
 				error = zfs_resume_fs(zfsvfs, ds);
 			}
 			error = error ? error : end_err;
 			zfs_vfs_rele(zfsvfs);
 		} else if ((zv = zvol_suspend(tofs)) != NULL) {
 			error = dmu_recv_end(&drc, zvol_tag(zv));
 			zvol_resume(zv);
 		} else {
 			error = dmu_recv_end(&drc, NULL);
 		}
 
 		/* Set delayed properties now, after we're done receiving. */
 		if (recv_delayprops != NULL && error == 0) {
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
 			    recv_delayprops, *errors);
 		}
 		if (local_delayprops != NULL && error == 0) {
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 			    local_delayprops, *errors);
 		}
 	}
 
 	/*
 	 * Merge delayed props back in with initial props, in case
 	 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
 	 * we have to make sure clear_received_props() includes
 	 * the delayed properties).
 	 *
 	 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
 	 * using ASSERT() will be just like a VERIFY.
 	 */
 	if (recv_delayprops != NULL) {
 		ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
 		nvlist_free(recv_delayprops);
 	}
 	if (local_delayprops != NULL) {
 		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
 		nvlist_free(local_delayprops);
 	}
 	*read_bytes = off - noff;
 
 #ifdef	ZFS_DEBUG
 	if (zfs_ioc_recv_inject_err) {
 		zfs_ioc_recv_inject_err = B_FALSE;
 		error = 1;
 	}
 #endif
 
 	/*
 	 * On error, restore the original props.
 	 */
 	if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
 		if (clear_received_props(tofs, recvprops, NULL) != 0) {
 			/*
 			 * We failed to clear the received properties.
 			 * Since we may have left a $recvd value on the
 			 * system, we can't clear the $hasrecvd flag.
 			 */
 			*errflags |= ZPROP_ERR_NORESTORE;
 		} else if (first_recvd_props) {
 			dsl_prop_unset_hasrecvd(tofs);
 		}
 
 		if (origrecvd == NULL && !drc.drc_newfs) {
 			/* We failed to stash the original properties. */
 			*errflags |= ZPROP_ERR_NORESTORE;
 		}
 
 		/*
 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
 		 * explicitly if we're restoring local properties cleared in the
 		 * first new-style receive.
 		 */
 		if (origrecvd != NULL &&
 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
 		    origrecvd, NULL) != 0) {
 			/*
 			 * We stashed the original properties but failed to
 			 * restore them.
 			 */
 			*errflags |= ZPROP_ERR_NORESTORE;
 		}
 	}
 	if (error != 0 && localprops != NULL && !drc.drc_newfs &&
 	    !first_recvd_props) {
 		nvlist_t *setprops;
 		nvlist_t *inheritprops;
 		nvpair_t *nvp;
 
 		if (origprops == NULL) {
 			/* We failed to stash the original properties. */
 			*errflags |= ZPROP_ERR_NORESTORE;
 			goto out;
 		}
 
 		/* Restore original props */
 		setprops = fnvlist_alloc();
 		inheritprops = fnvlist_alloc();
 		nvp = NULL;
 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
 			const char *name = nvpair_name(nvp);
 			const char *source;
 			nvlist_t *attrs;
 
 			if (!nvlist_exists(origprops, name)) {
 				/*
 				 * Property was not present or was explicitly
 				 * inherited before the receive, restore this.
 				 */
 				fnvlist_add_boolean(inheritprops, name);
 				continue;
 			}
 			attrs = fnvlist_lookup_nvlist(origprops, name);
 			source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
 
 			/* Skip received properties */
 			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
 				continue;
 
 			if (strcmp(source, tofs) == 0) {
 				/* Property was locally set */
 				fnvlist_add_nvlist(setprops, name, attrs);
 			} else {
 				/* Property was implicitly inherited */
 				fnvlist_add_boolean(inheritprops, name);
 			}
 		}
 
 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
 		    NULL) != 0)
 			*errflags |= ZPROP_ERR_NORESTORE;
 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
 		    NULL) != 0)
 			*errflags |= ZPROP_ERR_NORESTORE;
 
 		nvlist_free(setprops);
 		nvlist_free(inheritprops);
 	}
 out:
-	zfs_file_put(input_fd);
+	zfs_file_put(input_fp);
 	nvlist_free(origrecvd);
 	nvlist_free(origprops);
 
 	if (error == 0)
 		error = props_error;
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of containing filesystem (unused)
  * zc_nvlist_src{_size}	nvlist of properties to apply
  * zc_nvlist_conf{_size}	nvlist of properties to exclude
  *			(DATA_TYPE_BOOLEAN) and override (everything else)
  * zc_value		name of snapshot to create
  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
  * zc_cookie		file descriptor to recv from
  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
  * zc_guid		force flag
  *
  * outputs:
  * zc_cookie		number of bytes read
  * zc_obj		zprop_errflags_t
  * zc_nvlist_dst{_size} error for each unapplied received property
  */
 static int
 zfs_ioc_recv(zfs_cmd_t *zc)
 {
 	dmu_replay_record_t begin_record;
 	nvlist_t *errors = NULL;
 	nvlist_t *recvdprops = NULL;
 	nvlist_t *localprops = NULL;
 	char *origin = NULL;
 	char *tosnap;
 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
 	int error = 0;
 
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '@') == NULL ||
 	    strchr(zc->zc_value, '%'))
 		return (SET_ERROR(EINVAL));
 
 	(void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
 	tosnap = strchr(tofs, '@');
 	*tosnap++ = '\0';
 
 	if (zc->zc_nvlist_src != 0 &&
 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &recvdprops)) != 0)
 		return (error);
 
 	if (zc->zc_nvlist_conf != 0 &&
 	    (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &localprops)) != 0)
 		return (error);
 
 	if (zc->zc_string[0])
 		origin = zc->zc_string;
 
 	begin_record.drr_type = DRR_BEGIN;
 	begin_record.drr_payloadlen = 0;
 	begin_record.drr_u.drr_begin = zc->zc_begin_record;
 
 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
 	    NULL, zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
 	    &zc->zc_cookie, &zc->zc_obj, &errors);
 	nvlist_free(recvdprops);
 	nvlist_free(localprops);
 
 	/*
 	 * Now that all props, initial and delayed, are set, report the prop
 	 * errors to the caller.
 	 */
 	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
 	    put_nvlist(zc, errors) != 0)) {
 		/*
 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
 		 * size or supplied an invalid address.
 		 */
 		error = SET_ERROR(EINVAL);
 	}
 
 	nvlist_free(errors);
 
 	return (error);
 }
 
 /*
  * innvl: {
  *     "snapname" -> full name of the snapshot to create
  *     (optional) "props" -> received properties to set (nvlist)
  *     (optional) "localprops" -> override and exclude properties (nvlist)
  *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
  *     "begin_record" -> non-byteswapped dmu_replay_record_t
  *     "input_fd" -> file descriptor to read stream from (int32)
  *     (optional) "force" -> force flag (value ignored)
  *     (optional) "resumable" -> resumable flag (value ignored)
  *     (optional) "cleanup_fd" -> unused
  *     (optional) "action_handle" -> unused
  *     (optional) "hidden_args" -> { "wkeydata" -> value }
  * }
  *
  * outnvl: {
  *     "read_bytes" -> number of bytes read
  *     "error_flags" -> zprop_errflags_t
  *     "errors" -> error for each unapplied received property (nvlist)
  * }
  */
 static const zfs_ioc_key_t zfs_keys_recv_new[] = {
 	{"snapname",		DATA_TYPE_STRING,	0},
 	{"props",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"localprops",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"origin",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"begin_record",	DATA_TYPE_BYTE_ARRAY,	0},
 	{"input_fd",		DATA_TYPE_INT32,	0},
 	{"force",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"resumable",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
 	{"action_handle",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"hidden_args",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	dmu_replay_record_t *begin_record;
 	uint_t begin_record_size;
 	nvlist_t *errors = NULL;
 	nvlist_t *recvprops = NULL;
 	nvlist_t *localprops = NULL;
 	nvlist_t *hidden_args = NULL;
 	char *snapname;
 	char *origin = NULL;
 	char *tosnap;
 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
 	boolean_t force;
 	boolean_t resumable;
 	uint64_t read_bytes = 0;
 	uint64_t errflags = 0;
 	int input_fd = -1;
 	int error;
 
 	snapname = fnvlist_lookup_string(innvl, "snapname");
 
 	if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
 	    strchr(snapname, '@') == NULL ||
 	    strchr(snapname, '%'))
 		return (SET_ERROR(EINVAL));
 
 	(void) strlcpy(tofs, snapname, sizeof (tofs));
 	tosnap = strchr(tofs, '@');
 	*tosnap++ = '\0';
 
 	error = nvlist_lookup_string(innvl, "origin", &origin);
 	if (error && error != ENOENT)
 		return (error);
 
 	error = nvlist_lookup_byte_array(innvl, "begin_record",
 	    (uchar_t **)&begin_record, &begin_record_size);
 	if (error != 0 || begin_record_size != sizeof (*begin_record))
 		return (SET_ERROR(EINVAL));
 
 	input_fd = fnvlist_lookup_int32(innvl, "input_fd");
 
 	force = nvlist_exists(innvl, "force");
 	resumable = nvlist_exists(innvl, "resumable");
 
 	/* we still use "props" here for backwards compatibility */
 	error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
 	if (error && error != ENOENT)
 		return (error);
 
 	error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
 	if (error && error != ENOENT)
 		return (error);
 
 	error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 	if (error && error != ENOENT)
 		return (error);
 
 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
 	    hidden_args, force, resumable, input_fd, begin_record,
 	    &read_bytes, &errflags, &errors);
 
 	fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
 	fnvlist_add_uint64(outnvl, "error_flags", errflags);
 	fnvlist_add_nvlist(outnvl, "errors", errors);
 
 	nvlist_free(errors);
 	nvlist_free(recvprops);
 	nvlist_free(localprops);
 
 	return (error);
 }
 
 typedef struct dump_bytes_io {
 	zfs_file_t	*dbi_fp;
 	caddr_t		dbi_buf;
 	int		dbi_len;
 	int		dbi_err;
 } dump_bytes_io_t;
 
 static void
 dump_bytes_cb(void *arg)
 {
 	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
 	zfs_file_t *fp;
 	caddr_t buf;
 
 	fp = dbi->dbi_fp;
 	buf = dbi->dbi_buf;
 
 	dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
 }
 
 static int
 dump_bytes(objset_t *os, void *buf, int len, void *arg)
 {
 	dump_bytes_io_t dbi;
 
 	dbi.dbi_fp = arg;
 	dbi.dbi_buf = buf;
 	dbi.dbi_len = len;
 
 #if defined(HAVE_LARGE_STACKS)
 	dump_bytes_cb(&dbi);
 #else
 	/*
 	 * The vn_rdwr() call is performed in a taskq to ensure that there is
 	 * always enough stack space to write safely to the target filesystem.
 	 * The ZIO_TYPE_FREE threads are used because there can be a lot of
 	 * them and they are used in vdev_file.c for a similar purpose.
 	 */
 	spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
 	    ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
 #endif /* HAVE_LARGE_STACKS */
 
 	return (dbi.dbi_err);
 }
 
 /*
  * inputs:
  * zc_name	name of snapshot to send
  * zc_cookie	file descriptor to send stream to
  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
  * zc_sendobj	objsetid of snapshot to send
  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
  *		output size in zc_objset_type.
  * zc_flags	lzc_send_flags
  *
  * outputs:
  * zc_objset_type	estimated size, if zc_guid is set
  *
  * NOTE: This is no longer the preferred interface, any new functionality
  *	  should be added to zfs_ioc_send_new() instead.
  */
 static int
 zfs_ioc_send(zfs_cmd_t *zc)
 {
 	int error;
 	offset_t off;
 	boolean_t estimate = (zc->zc_guid != 0);
 	boolean_t embedok = (zc->zc_flags & 0x1);
 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
 	boolean_t compressok = (zc->zc_flags & 0x4);
 	boolean_t rawok = (zc->zc_flags & 0x8);
 	boolean_t savedok = (zc->zc_flags & 0x10);
 
 	if (zc->zc_obj != 0) {
 		dsl_pool_t *dp;
 		dsl_dataset_t *tosnap;
 
 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 		if (error != 0)
 			return (error);
 
 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
 		if (error != 0) {
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		if (dsl_dir_is_clone(tosnap->ds_dir))
 			zc->zc_fromobj =
 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	}
 
 	if (estimate) {
 		dsl_pool_t *dp;
 		dsl_dataset_t *tosnap;
 		dsl_dataset_t *fromsnap = NULL;
 
 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 		if (error != 0)
 			return (error);
 
 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
 		    FTAG, &tosnap);
 		if (error != 0) {
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		if (zc->zc_fromobj != 0) {
 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
 			    FTAG, &fromsnap);
 			if (error != 0) {
 				dsl_dataset_rele(tosnap, FTAG);
 				dsl_pool_rele(dp, FTAG);
 				return (error);
 			}
 		}
 
 		error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
 		    compressok || rawok, savedok, &zc->zc_objset_type);
 
 		if (fromsnap != NULL)
 			dsl_dataset_rele(fromsnap, FTAG);
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	} else {
 		zfs_file_t *fp;
 		dmu_send_outparams_t out = {0};
 
-		if ((error = zfs_file_get(zc->zc_cookie, &fp)))
-			return (error);
+		if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
+			return (SET_ERROR(EBADF));
 
 		off = zfs_file_off(fp);
 		out.dso_outfunc = dump_bytes;
 		out.dso_arg = fp;
 		out.dso_dryrun = B_FALSE;
 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
 		    zc->zc_fromobj, embedok, large_block_ok, compressok,
 		    rawok, savedok, zc->zc_cookie, &off, &out);
 
-		zfs_file_put(zc->zc_cookie);
+		zfs_file_put(fp);
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of snapshot on which to report progress
  * zc_cookie		file descriptor of send stream
  *
  * outputs:
  * zc_cookie		number of bytes written in send stream thus far
  * zc_objset_type	logical size of data traversed by send thus far
  */
 static int
 zfs_ioc_send_progress(zfs_cmd_t *zc)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	dmu_sendstatus_t *dsp = NULL;
 	int error;
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	mutex_enter(&ds->ds_sendstream_lock);
 
 	/*
 	 * Iterate over all the send streams currently active on this dataset.
 	 * If there's one which matches the specified file descriptor _and_ the
 	 * stream was started by the current process, return the progress of
 	 * that stream.
 	 */
 
 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
 		if (dsp->dss_outfd == zc->zc_cookie &&
 		    zfs_proc_is_caller(dsp->dss_proc))
 			break;
 	}
 
 	if (dsp != NULL) {
 		zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
 		    0, 0);
 		/* This is the closest thing we have to atomic_read_64. */
 		zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
 	} else {
 		error = SET_ERROR(ENOENT);
 	}
 
 	mutex_exit(&ds->ds_sendstream_lock);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_inject_fault(zfs_cmd_t *zc)
 {
 	int id, error;
 
 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
 	    &zc->zc_inject_record);
 
 	if (error == 0)
 		zc->zc_guid = (uint64_t)id;
 
 	return (error);
 }
 
 static int
 zfs_ioc_clear_fault(zfs_cmd_t *zc)
 {
 	return (zio_clear_fault((int)zc->zc_guid));
 }
 
 static int
 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
 {
 	int id = (int)zc->zc_guid;
 	int error;
 
 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
 	    &zc->zc_inject_record);
 
 	zc->zc_guid = id;
 
 	return (error);
 }
 
 static int
 zfs_ioc_error_log(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	size_t count = (size_t)zc->zc_nvlist_dst_size;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
 	    &count);
 	if (error == 0)
 		zc->zc_nvlist_dst_size = count;
 	else
 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_clear(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	vdev_t *vd;
 	int error;
 
 	/*
 	 * On zpool clear we also fix up missing slogs
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa = spa_lookup(zc->zc_name);
 	if (spa == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (SET_ERROR(EIO));
 	}
 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
 		/* we need to let spa_open/spa_load clear the chains */
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 	}
 	spa->spa_last_open_failed = 0;
 	mutex_exit(&spa_namespace_lock);
 
 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
 		error = spa_open(zc->zc_name, &spa, FTAG);
 	} else {
 		nvlist_t *policy;
 		nvlist_t *config = NULL;
 
 		if (zc->zc_nvlist_src == 0)
 			return (SET_ERROR(EINVAL));
 
 		if ((error = get_nvlist(zc->zc_nvlist_src,
 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
 			    policy, &config);
 			if (config != NULL) {
 				int err;
 
 				if ((err = put_nvlist(zc, config)) != 0)
 					error = err;
 				nvlist_free(config);
 			}
 			nvlist_free(policy);
 		}
 	}
 
 	if (error != 0)
 		return (error);
 
 	/*
 	 * If multihost is enabled, resuming I/O is unsafe as another
 	 * host may have imported the pool.
 	 */
 	if (spa_multihost(spa) && spa_suspended(spa))
 		return (SET_ERROR(EINVAL));
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	if (zc->zc_guid == 0) {
 		vd = NULL;
 	} else {
 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
 		if (vd == NULL) {
 			error = SET_ERROR(ENODEV);
 			(void) spa_vdev_state_exit(spa, NULL, error);
 			spa_close(spa, FTAG);
 			return (error);
 		}
 	}
 
 	vdev_clear(spa, vd);
 
 	(void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
 	    NULL : spa->spa_root_vdev, 0);
 
 	/*
 	 * Resume any suspended I/Os.
 	 */
 	if (zio_resume(spa) != 0)
 		error = SET_ERROR(EIO);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 /*
  * Reopen all the vdevs associated with the pool.
  *
  * innvl: {
  *  "scrub_restart" -> when true and scrub is running, allow to restart
  *              scrub as the side effect of the reopen (boolean).
  * }
  *
  * outnvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
 	{"scrub_restart",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	spa_t *spa;
 	int error;
 	boolean_t rc, scrub_restart = B_TRUE;
 
 	if (innvl) {
 		error = nvlist_lookup_boolean_value(innvl,
 		    "scrub_restart", &rc);
 		if (error == 0)
 			scrub_restart = rc;
 	}
 
 	error = spa_open(pool, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	/*
 	 * If the scrub_restart flag is B_FALSE and a scrub is already
 	 * in progress then set spa_scrub_reopen flag to B_TRUE so that
 	 * we don't restart the scrub as a side effect of the reopen.
 	 * Otherwise, let vdev_open() decided if a resilver is required.
 	 */
 
 	spa->spa_scrub_reopen = (!scrub_restart &&
 	    dsl_scan_scrubbing(spa->spa_dsl_pool));
 	vdev_reopen(spa->spa_root_vdev);
 	spa->spa_scrub_reopen = B_FALSE;
 
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (0);
 }
 
 /*
  * inputs:
  * zc_name	name of filesystem
  *
  * outputs:
  * zc_string	name of conflicting snapshot, if there is one
  */
 static int
 zfs_ioc_promote(zfs_cmd_t *zc)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds, *ods;
 	char origin[ZFS_MAX_DATASET_NAME_LEN];
 	char *cp;
 	int error;
 
 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
 	    strchr(zc->zc_name, '%'))
 		return (SET_ERROR(EINVAL));
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	if (!dsl_dir_is_clone(ds->ds_dir)) {
 		dsl_dataset_rele(ds, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	error = dsl_dataset_hold_obj(dp,
 	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
 	if (error != 0) {
 		dsl_dataset_rele(ds, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	dsl_dataset_name(ods, origin);
 	dsl_dataset_rele(ods, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 
 	/*
 	 * We don't need to unmount *all* the origin fs's snapshots, but
 	 * it's easier.
 	 */
 	cp = strchr(origin, '@');
 	if (cp)
 		*cp = '\0';
 	(void) dmu_objset_find(origin,
 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
 }
 
 /*
  * Retrieve a single {user|group|project}{used|quota}@... property.
  *
  * inputs:
  * zc_name	name of filesystem
  * zc_objset_type zfs_userquota_prop_t
  * zc_value	domain name (eg. "S-1-234-567-89")
  * zc_guid	RID/UID/GID
  *
  * outputs:
  * zc_cookie	property value
  */
 static int
 zfs_ioc_userspace_one(zfs_cmd_t *zc)
 {
 	zfsvfs_t *zfsvfs;
 	int error;
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	error = zfs_userspace_one(zfsvfs,
 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
 	zfsvfs_rele(zfsvfs, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_objset_type	zfs_userquota_prop_t
  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
  *
  * outputs:
  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
  * zc_cookie	zap cursor
  */
 static int
 zfs_ioc_userspace_many(zfs_cmd_t *zc)
 {
 	zfsvfs_t *zfsvfs;
 	int bufsize = zc->zc_nvlist_dst_size;
 
 	if (bufsize <= 0)
 		return (SET_ERROR(ENOMEM));
 
 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	void *buf = vmem_alloc(bufsize, KM_SLEEP);
 
 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
 	    buf, &zc->zc_nvlist_dst_size);
 
 	if (error == 0) {
 		error = xcopyout(buf,
 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    zc->zc_nvlist_dst_size);
 	}
 	vmem_free(buf, bufsize);
 	zfsvfs_rele(zfsvfs, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * none
  */
 static int
 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
 {
 	int error = 0;
 	zfsvfs_t *zfsvfs;
 
 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
 			/*
 			 * If userused is not enabled, it may be because the
 			 * objset needs to be closed & reopened (to grow the
 			 * objset_phys_t).  Suspend/resume the fs will do that.
 			 */
 			dsl_dataset_t *ds, *newds;
 
 			ds = dmu_objset_ds(zfsvfs->z_os);
 			error = zfs_suspend_fs(zfsvfs);
 			if (error == 0) {
 				dmu_objset_refresh_ownership(ds, &newds,
 				    B_TRUE, zfsvfs);
 				error = zfs_resume_fs(zfsvfs, newds);
 			}
 		}
 		if (error == 0) {
 			mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
 			if (zfsvfs->z_os->os_upgrade_id == 0) {
 				/* clear potential error code and retry */
 				zfsvfs->z_os->os_upgrade_status = 0;
 				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
 
 				dsl_pool_config_enter(
 				    dmu_objset_pool(zfsvfs->z_os), FTAG);
 				dmu_objset_userspace_upgrade(zfsvfs->z_os);
 				dsl_pool_config_exit(
 				    dmu_objset_pool(zfsvfs->z_os), FTAG);
 			} else {
 				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
 			}
 
 			taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
 			    zfsvfs->z_os->os_upgrade_id);
 			error = zfsvfs->z_os->os_upgrade_status;
 		}
 		zfs_vfs_rele(zfsvfs);
 	} else {
 		objset_t *os;
 
 		/* XXX kind of reading contents without owning */
 		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 		if (error != 0)
 			return (error);
 
 		mutex_enter(&os->os_upgrade_lock);
 		if (os->os_upgrade_id == 0) {
 			/* clear potential error code and retry */
 			os->os_upgrade_status = 0;
 			mutex_exit(&os->os_upgrade_lock);
 
 			dmu_objset_userspace_upgrade(os);
 		} else {
 			mutex_exit(&os->os_upgrade_lock);
 		}
 
 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
 
 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
 		error = os->os_upgrade_status;
 
 		dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
 		    FTAG);
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * none
  */
 static int
 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 	if (error != 0)
 		return (error);
 
 	if (dmu_objset_userobjspace_upgradable(os) ||
 	    dmu_objset_projectquota_upgradable(os)) {
 		mutex_enter(&os->os_upgrade_lock);
 		if (os->os_upgrade_id == 0) {
 			/* clear potential error code and retry */
 			os->os_upgrade_status = 0;
 			mutex_exit(&os->os_upgrade_lock);
 
 			dmu_objset_id_quota_upgrade(os);
 		} else {
 			mutex_exit(&os->os_upgrade_lock);
 		}
 
 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
 
 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
 		error = os->os_upgrade_status;
 	} else {
 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
 	}
 
 	dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_share(zfs_cmd_t *zc)
 {
 	return (SET_ERROR(ENOSYS));
 }
 
 ace_t full_access[] = {
 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
 };
 
 /*
  * inputs:
  * zc_name		name of containing filesystem
  * zc_obj		object # beyond which we want next in-use object #
  *
  * outputs:
  * zc_obj		next in-use object #
  */
 static int
 zfs_ioc_next_obj(zfs_cmd_t *zc)
 {
 	objset_t *os = NULL;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error != 0)
 		return (error);
 
 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
 
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		prefix name for snapshot
  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
  *
  * outputs:
  * zc_value		short name of new snapshot
  */
 static int
 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
 {
 	char *snap_name;
 	char *hold_name;
-	int error;
 	minor_t minor;
 
-	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
-	if (error != 0)
-		return (error);
+	zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
+	if (fp == NULL)
+		return (SET_ERROR(EBADF));
 
 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
 	    (u_longlong_t)ddi_get_lbolt64());
 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
 
-	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
+	int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
 	    hold_name);
 	if (error == 0)
 		(void) strlcpy(zc->zc_value, snap_name,
 		    sizeof (zc->zc_value));
 	kmem_strfree(snap_name);
 	kmem_strfree(hold_name);
-	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
+	zfs_onexit_fd_rele(fp);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of "to" snapshot
  * zc_value		name of "from" snapshot
  * zc_cookie		file descriptor to write diff data on
  *
  * outputs:
  * dmu_diff_record_t's to the file descriptor
  */
 static int
 zfs_ioc_diff(zfs_cmd_t *zc)
 {
 	zfs_file_t *fp;
 	offset_t off;
 	int error;
 
-	if ((error = zfs_file_get(zc->zc_cookie, &fp)))
-		return (error);
+	if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
+		return (SET_ERROR(EBADF));
 
 	off = zfs_file_off(fp);
 	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
 
-	zfs_file_put(zc->zc_cookie);
+	zfs_file_put(fp);
 
 	return (error);
 }
 
 static int
 zfs_ioc_smb_acl(zfs_cmd_t *zc)
 {
 	return (SET_ERROR(ENOTSUP));
 }
 
 /*
  * innvl: {
  *     "holds" -> { snapname -> holdname (string), ... }
  *     (optional) "cleanup_fd" -> fd (int32)
  * }
  *
  * outnvl: {
  *     snapname -> error value (int32)
  *     ...
  * }
  */
 static const zfs_ioc_key_t zfs_keys_hold[] = {
 	{"holds",		DATA_TYPE_NVLIST,	0},
 	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
 {
 	nvpair_t *pair;
 	nvlist_t *holds;
 	int cleanup_fd = -1;
 	int error;
 	minor_t minor = 0;
+	zfs_file_t *fp = NULL;
 
 	holds = fnvlist_lookup_nvlist(args, "holds");
 
 	/* make sure the user didn't pass us any invalid (empty) tags */
 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(holds, pair)) {
 		char *htag;
 
 		error = nvpair_value_string(pair, &htag);
 		if (error != 0)
 			return (SET_ERROR(error));
 
 		if (strlen(htag) == 0)
 			return (SET_ERROR(EINVAL));
 	}
 
 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
-		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
-		if (error != 0)
-			return (SET_ERROR(error));
+		fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
+		if (fp == NULL)
+			return (SET_ERROR(EBADF));
 	}
 
 	error = dsl_dataset_user_hold(holds, minor, errlist);
-	if (minor != 0)
-		zfs_onexit_fd_rele(cleanup_fd);
+	if (fp != NULL) {
+		ASSERT3U(minor, !=, 0);
+		zfs_onexit_fd_rele(fp);
+	}
 	return (SET_ERROR(error));
 }
 
 /*
  * innvl is not used.
  *
  * outnvl: {
  *    holdname -> time added (uint64 seconds since epoch)
  *    ...
  * }
  */
 static const zfs_ioc_key_t zfs_keys_get_holds[] = {
 	/* no nvl keys */
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
 {
 	return (dsl_dataset_get_holds(snapname, outnvl));
 }
 
 /*
  * innvl: {
  *     snapname -> { holdname, ... }
  *     ...
  * }
  *
  * outnvl: {
  *     snapname -> error value (int32)
  *     ...
  * }
  */
 static const zfs_ioc_key_t zfs_keys_release[] = {
 	{"<snapname>...",	DATA_TYPE_NVLIST,	ZK_WILDCARDLIST},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 {
 	return (dsl_dataset_user_release(holds, errlist));
 }
 
 /*
  * inputs:
  * zc_guid		flags (ZEVENT_NONBLOCK)
  * zc_cleanup_fd	zevent file descriptor
  *
  * outputs:
  * zc_nvlist_dst	next nvlist event
  * zc_cookie		dropped events since last get
  */
 static int
 zfs_ioc_events_next(zfs_cmd_t *zc)
 {
 	zfs_zevent_t *ze;
 	nvlist_t *event = NULL;
 	minor_t minor;
 	uint64_t dropped = 0;
 	int error;
 
-	error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
-	if (error != 0)
-		return (error);
+	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+	if (fp == NULL)
+		return (SET_ERROR(EBADF));
 
 	do {
 		error = zfs_zevent_next(ze, &event,
 		    &zc->zc_nvlist_dst_size, &dropped);
 		if (event != NULL) {
 			zc->zc_cookie = dropped;
 			error = put_nvlist(zc, event);
 			nvlist_free(event);
 		}
 
 		if (zc->zc_guid & ZEVENT_NONBLOCK)
 			break;
 
 		if ((error == 0) || (error != ENOENT))
 			break;
 
 		error = zfs_zevent_wait(ze);
 		if (error != 0)
 			break;
 	} while (1);
 
-	zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+	zfs_zevent_fd_rele(fp);
 
 	return (error);
 }
 
 /*
  * outputs:
  * zc_cookie		cleared events count
  */
 static int
 zfs_ioc_events_clear(zfs_cmd_t *zc)
 {
 	int count;
 
 	zfs_zevent_drain_all(&count);
 	zc->zc_cookie = count;
 
 	return (0);
 }
 
 /*
  * inputs:
  * zc_guid		eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
  * zc_cleanup		zevent file descriptor
  */
 static int
 zfs_ioc_events_seek(zfs_cmd_t *zc)
 {
 	zfs_zevent_t *ze;
 	minor_t minor;
 	int error;
 
-	error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
-	if (error != 0)
-		return (error);
+	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
+	if (fp == NULL)
+		return (SET_ERROR(EBADF));
 
 	error = zfs_zevent_seek(ze, zc->zc_guid);
-	zfs_zevent_fd_rele(zc->zc_cleanup_fd);
+	zfs_zevent_fd_rele(fp);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of later filesystem or snapshot
  * zc_value		full name of old snapshot or bookmark
  *
  * outputs:
  * zc_cookie		space in bytes
  * zc_objset_type	compressed space in bytes
  * zc_perm_action	uncompressed space in bytes
  */
 static int
 zfs_ioc_space_written(zfs_cmd_t *zc)
 {
 	int error;
 	dsl_pool_t *dp;
 	dsl_dataset_t *new;
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 	if (strchr(zc->zc_value, '#') != NULL) {
 		zfs_bookmark_phys_t bmp;
 		error = dsl_bookmark_lookup(dp, zc->zc_value,
 		    new, &bmp);
 		if (error == 0) {
 			error = dsl_dataset_space_written_bookmark(&bmp, new,
 			    &zc->zc_cookie,
 			    &zc->zc_objset_type, &zc->zc_perm_action);
 		}
 	} else {
 		dsl_dataset_t *old;
 		error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
 
 		if (error == 0) {
 			error = dsl_dataset_space_written(old, new,
 			    &zc->zc_cookie,
 			    &zc->zc_objset_type, &zc->zc_perm_action);
 			dsl_dataset_rele(old, FTAG);
 		}
 	}
 	dsl_dataset_rele(new, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "firstsnap" -> snapshot name
  * }
  *
  * outnvl: {
  *     "used" -> space in bytes
  *     "compressed" -> compressed space in bytes
  *     "uncompressed" -> uncompressed space in bytes
  * }
  */
 static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
 	{"firstsnap",	DATA_TYPE_STRING,	0},
 };
 
 static int
 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error;
 	dsl_pool_t *dp;
 	dsl_dataset_t *new, *old;
 	char *firstsnap;
 	uint64_t used, comp, uncomp;
 
 	firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
 
 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
 	if (error == 0 && !new->ds_is_snapshot) {
 		dsl_dataset_rele(new, FTAG);
 		error = SET_ERROR(EINVAL);
 	}
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
 	if (error == 0 && !old->ds_is_snapshot) {
 		dsl_dataset_rele(old, FTAG);
 		error = SET_ERROR(EINVAL);
 	}
 	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	fnvlist_add_uint64(outnvl, "used", used);
 	fnvlist_add_uint64(outnvl, "compressed", comp);
 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "fd" -> file descriptor to write stream to (int32)
  *     (optional) "fromsnap" -> full snap name to send an incremental from
  *     (optional) "largeblockok" -> (value ignored)
  *         indicates that blocks > 128KB are permitted
  *     (optional) "embedok" -> (value ignored)
  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  *     (optional) "compressok" -> (value ignored)
  *         presence indicates compressed DRR_WRITE records are permitted
  *     (optional) "rawok" -> (value ignored)
  *         presence indicates raw encrypted records should be used.
  *     (optional) "savedok" -> (value ignored)
  *         presence indicates we should send a partially received snapshot
  *     (optional) "resume_object" and "resume_offset" -> (uint64)
  *         if present, resume send stream from specified object and offset.
  *     (optional) "redactbook" -> (string)
  *         if present, use this bookmark's redaction list to generate a redacted
  *         send stream
  * }
  *
  * outnvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_send_new[] = {
 	{"fd",			DATA_TYPE_INT32,	0},
 	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"savedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error;
 	offset_t off;
 	char *fromname = NULL;
 	int fd;
 	zfs_file_t *fp;
 	boolean_t largeblockok;
 	boolean_t embedok;
 	boolean_t compressok;
 	boolean_t rawok;
 	boolean_t savedok;
 	uint64_t resumeobj = 0;
 	uint64_t resumeoff = 0;
 	char *redactbook = NULL;
 
 	fd = fnvlist_lookup_int32(innvl, "fd");
 
 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
 
 	largeblockok = nvlist_exists(innvl, "largeblockok");
 	embedok = nvlist_exists(innvl, "embedok");
 	compressok = nvlist_exists(innvl, "compressok");
 	rawok = nvlist_exists(innvl, "rawok");
 	savedok = nvlist_exists(innvl, "savedok");
 
 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
 
 	(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
 
-	if ((error = zfs_file_get(fd, &fp)))
-		return (error);
+	if ((fp = zfs_file_get(fd)) == NULL)
+		return (SET_ERROR(EBADF));
 
 	off = zfs_file_off(fp);
 
 	dmu_send_outparams_t out = {0};
 	out.dso_outfunc = dump_bytes;
 	out.dso_arg = fp;
 	out.dso_dryrun = B_FALSE;
 	error = dmu_send(snapname, fromname, embedok, largeblockok,
 	    compressok, rawok, savedok, resumeobj, resumeoff,
 	    redactbook, fd, &off, &out);
 
-	zfs_file_put(fd);
+	zfs_file_put(fp);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 send_space_sum(objset_t *os, void *buf, int len, void *arg)
 {
 	uint64_t *size = arg;
 	*size += len;
 	return (0);
 }
 
 /*
  * Determine approximately how large a zfs send stream will be -- the number
  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
  *
  * innvl: {
  *     (optional) "from" -> full snap or bookmark name to send an incremental
  *                          from
  *     (optional) "largeblockok" -> (value ignored)
  *         indicates that blocks > 128KB are permitted
  *     (optional) "embedok" -> (value ignored)
  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  *     (optional) "compressok" -> (value ignored)
  *         presence indicates compressed DRR_WRITE records are permitted
  *     (optional) "rawok" -> (value ignored)
  *         presence indicates raw encrypted records should be used.
  *     (optional) "resume_object" and "resume_offset" -> (uint64)
  *         if present, resume send stream from specified object and offset.
  *     (optional) "fd" -> file descriptor to use as a cookie for progress
  *         tracking (int32)
  * }
  *
  * outnvl: {
  *     "space" -> bytes of space (uint64)
  * }
  */
 static const zfs_ioc_key_t zfs_keys_send_space[] = {
 	{"from",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"fd",			DATA_TYPE_INT32,	ZK_OPTIONAL},
 	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"bytes",		DATA_TYPE_UINT64,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *tosnap;
 	dsl_dataset_t *fromsnap = NULL;
 	int error;
 	char *fromname = NULL;
 	char *redactlist_book = NULL;
 	boolean_t largeblockok;
 	boolean_t embedok;
 	boolean_t compressok;
 	boolean_t rawok;
 	boolean_t savedok;
 	uint64_t space = 0;
 	boolean_t full_estimate = B_FALSE;
 	uint64_t resumeobj = 0;
 	uint64_t resumeoff = 0;
 	uint64_t resume_bytes = 0;
 	int32_t fd = -1;
 	zfs_bookmark_phys_t zbm = {0};
 
 	error = dsl_pool_hold(snapname, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 	(void) nvlist_lookup_int32(innvl, "fd", &fd);
 
 	largeblockok = nvlist_exists(innvl, "largeblockok");
 	embedok = nvlist_exists(innvl, "embedok");
 	compressok = nvlist_exists(innvl, "compressok");
 	rawok = nvlist_exists(innvl, "rawok");
 	savedok = nvlist_exists(innvl, "savedok");
 	boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
 	boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
 	    &redactlist_book) == 0);
 
 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
 	(void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
 
 	if (altbook) {
 		full_estimate = B_TRUE;
 	} else if (from) {
 		if (strchr(fromname, '#')) {
 			error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
 
 			/*
 			 * dsl_bookmark_lookup() will fail with EXDEV if
 			 * the from-bookmark and tosnap are at the same txg.
 			 * However, it's valid to do a send (and therefore,
 			 * a send estimate) from and to the same time point,
 			 * if the bookmark is redacted (the incremental send
 			 * can change what's redacted on the target).  In
 			 * this case, dsl_bookmark_lookup() fills in zbm
 			 * but returns EXDEV.  Ignore this error.
 			 */
 			if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
 			    zbm.zbm_guid ==
 			    dsl_dataset_phys(tosnap)->ds_guid)
 				error = 0;
 
 			if (error != 0) {
 				dsl_dataset_rele(tosnap, FTAG);
 				dsl_pool_rele(dp, FTAG);
 				return (error);
 			}
 			if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
 			    ZBM_FLAG_HAS_FBN)) {
 				full_estimate = B_TRUE;
 			}
 		} else if (strchr(fromname, '@')) {
 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
 			if (error != 0) {
 				dsl_dataset_rele(tosnap, FTAG);
 				dsl_pool_rele(dp, FTAG);
 				return (error);
 			}
 
 			if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
 				full_estimate = B_TRUE;
 				dsl_dataset_rele(fromsnap, FTAG);
 			}
 		} else {
 			/*
 			 * from is not properly formatted as a snapshot or
 			 * bookmark
 			 */
 			dsl_dataset_rele(tosnap, FTAG);
 			dsl_pool_rele(dp, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
 	}
 
 	if (full_estimate) {
 		dmu_send_outparams_t out = {0};
 		offset_t off = 0;
 		out.dso_outfunc = send_space_sum;
 		out.dso_arg = &space;
 		out.dso_dryrun = B_TRUE;
 		/*
 		 * We have to release these holds so dmu_send can take them.  It
 		 * will do all the error checking we need.
 		 */
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		error = dmu_send(snapname, fromname, embedok, largeblockok,
 		    compressok, rawok, savedok, resumeobj, resumeoff,
 		    redactlist_book, fd, &off, &out);
 	} else {
 		error = dmu_send_estimate_fast(tosnap, fromsnap,
 		    (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
 		    compressok || rawok, savedok, &space);
 		space -= resume_bytes;
 		if (fromsnap != NULL)
 			dsl_dataset_rele(fromsnap, FTAG);
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	}
 
 	fnvlist_add_uint64(outnvl, "space", space);
 
 	return (error);
 }
 
 /*
  * Sync the currently open TXG to disk for the specified pool.
  * This is somewhat similar to 'zfs_sync()'.
  * For cases that do not result in error this ioctl will wait for
  * the currently open TXG to commit before returning back to the caller.
  *
  * innvl: {
  *  "force" -> when true, force uberblock update even if there is no dirty data.
  *             In addition this will cause the vdev configuration to be written
  *             out including updating the zpool cache file. (boolean_t)
  * }
  *
  * onvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
 	{"force",	DATA_TYPE_BOOLEAN_VALUE,	0},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
 {
 	int err;
 	boolean_t rc, force = B_FALSE;
 	spa_t *spa;
 
 	if ((err = spa_open(pool, &spa, FTAG)) != 0)
 		return (err);
 
 	if (innvl) {
 		err = nvlist_lookup_boolean_value(innvl, "force", &rc);
 		if (err == 0)
 			force = rc;
 	}
 
 	if (force) {
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
 		vdev_config_dirty(spa->spa_root_vdev);
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 	}
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	spa_close(spa, FTAG);
 
 	return (0);
 }
 
 /*
  * Load a user's wrapping key into the kernel.
  * innvl: {
  *     "hidden_args" -> { "wkeydata" -> value }
  *         raw uint8_t array of encryption wrapping key data (32 bytes)
  *     (optional) "noop" -> (value ignored)
  *         presence indicated key should only be verified, not loaded
  * }
  */
 static const zfs_ioc_key_t zfs_keys_load_key[] = {
 	{"hidden_args",	DATA_TYPE_NVLIST,	0},
 	{"noop",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int ret;
 	dsl_crypto_params_t *dcp = NULL;
 	nvlist_t *hidden_args;
 	boolean_t noop = nvlist_exists(innvl, "noop");
 
 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 		ret = SET_ERROR(EINVAL);
 		goto error;
 	}
 
 	hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
 
 	ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
 	    hidden_args, &dcp);
 	if (ret != 0)
 		goto error;
 
 	ret = spa_keystore_load_wkey(dsname, dcp, noop);
 	if (ret != 0)
 		goto error;
 
 	dsl_crypto_params_free(dcp, noop);
 
 	return (0);
 
 error:
 	dsl_crypto_params_free(dcp, B_TRUE);
 	return (ret);
 }
 
 /*
  * Unload a user's wrapping key from the kernel.
  * Both innvl and outnvl are unused.
  */
 static const zfs_ioc_key_t zfs_keys_unload_key[] = {
 	/* no nvl keys */
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int ret = 0;
 
 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 		ret = (SET_ERROR(EINVAL));
 		goto out;
 	}
 
 	ret = spa_keystore_unload_wkey(dsname);
 	if (ret != 0)
 		goto out;
 
 out:
 	return (ret);
 }
 
 /*
  * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
  * keylocation, pbkdf2salt, and  pbkdf2iters properties can also be specified
  * here to change how the key is derived in userspace.
  *
  * innvl: {
  *    "hidden_args" (optional) -> { "wkeydata" -> value }
  *         raw uint8_t array of new encryption wrapping key data (32 bytes)
  *    "props" (optional) -> { prop -> value }
  * }
  *
  * outnvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_change_key[] = {
 	{"crypt_cmd",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 /* ARGSUSED */
 static int
 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int ret;
 	uint64_t cmd = DCP_CMD_NONE;
 	dsl_crypto_params_t *dcp = NULL;
 	nvlist_t *args = NULL, *hidden_args = NULL;
 
 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 		ret = (SET_ERROR(EINVAL));
 		goto error;
 	}
 
 	(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
 	(void) nvlist_lookup_nvlist(innvl, "props", &args);
 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 
 	ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
 	if (ret != 0)
 		goto error;
 
 	ret = spa_keystore_change_key(dsname, dcp);
 	if (ret != 0)
 		goto error;
 
 	dsl_crypto_params_free(dcp, B_FALSE);
 
 	return (0);
 
 error:
 	dsl_crypto_params_free(dcp, B_TRUE);
 	return (ret);
 }
 
 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
 
 static void
 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
 
 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
 	ASSERT3P(vec->zvec_func, ==, NULL);
 
 	vec->zvec_legacy_func = func;
 	vec->zvec_secpolicy = secpolicy;
 	vec->zvec_namecheck = namecheck;
 	vec->zvec_allow_log = log_history;
 	vec->zvec_pool_check = pool_check;
 }
 
 /*
  * See the block comment at the beginning of this file for details on
  * each argument to this function.
  */
 void
 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
     boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
 {
 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
 
 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
 	ASSERT3P(vec->zvec_func, ==, NULL);
 
 	/* if we are logging, the name must be valid */
 	ASSERT(!allow_log || namecheck != NO_NAME);
 
 	vec->zvec_name = name;
 	vec->zvec_func = func;
 	vec->zvec_secpolicy = secpolicy;
 	vec->zvec_namecheck = namecheck;
 	vec->zvec_pool_check = pool_check;
 	vec->zvec_smush_outnvlist = smush_outnvlist;
 	vec->zvec_allow_log = allow_log;
 	vec->zvec_nvl_keys = nvl_keys;
 	vec->zvec_nvl_key_count = num_keys;
 }
 
 static void
 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
     zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    POOL_NAME, log_history, pool_check);
 }
 
 void
 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_FALSE, pool_check);
 }
 
 static void
 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
 {
 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 }
 
 static void
 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
 }
 
 static void
 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
 }
 
 static void
 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
 {
 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
 	    zfs_secpolicy_read);
 }
 
 static void
 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 }
 
 static void
 zfs_ioctl_init(void)
 {
 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
 
 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
 
 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
 
 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
 
 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
 
 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
 
 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
 
 	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
 	    zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 	    zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
 
 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
 
 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
 
 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
 
 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 	    zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
 
 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
 
 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
 
 	zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
 	    zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
 	    ARRAY_SIZE(zfs_keys_get_bookmark_props));
 
 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
 	    POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_destroy_bookmarks,
 	    ARRAY_SIZE(zfs_keys_destroy_bookmarks));
 
 	zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
 	    zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
 	zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
 	    zfs_ioc_load_key, zfs_secpolicy_load_key,
 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
 	    zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
 	zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
 	    zfs_ioc_unload_key, zfs_secpolicy_load_key,
 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
 	    zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
 	zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
 	    zfs_ioc_change_key, zfs_secpolicy_change_key,
 	    DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
 	    B_TRUE, B_TRUE, zfs_keys_change_key,
 	    ARRAY_SIZE(zfs_keys_change_key));
 
 	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
 	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
 	zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
 	    zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
 	    B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
 
 	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
 	    zfs_ioc_channel_program, zfs_secpolicy_config,
 	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
 	    B_TRUE, zfs_keys_channel_program,
 	    ARRAY_SIZE(zfs_keys_channel_program));
 
 	zfs_ioctl_register("redact", ZFS_IOC_REDACT,
 	    zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
 
 	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
 	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
 
 	zfs_ioctl_register("zpool_discard_checkpoint",
 	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
 	    zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_discard_checkpoint,
 	    ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
 
 	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
 	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
 
 	zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
 	    zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
 
 	zfs_ioctl_register("wait", ZFS_IOC_WAIT,
 	    zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
 
 	zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
 	    zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
 
 	zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
 	    zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 	    zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
 
 	zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
 	    zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
 	    zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
 
 	/* IOCTLS that use the legacy function signature */
 
 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
 	    zfs_ioc_pool_scan);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
 	    zfs_ioc_pool_upgrade);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
 	    zfs_ioc_vdev_add);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
 	    zfs_ioc_vdev_remove);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
 	    zfs_ioc_vdev_set_state);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
 	    zfs_ioc_vdev_attach);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
 	    zfs_ioc_vdev_detach);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
 	    zfs_ioc_vdev_setpath);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
 	    zfs_ioc_vdev_setfru);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
 	    zfs_ioc_pool_set_props);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
 	    zfs_ioc_vdev_split);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
 	    zfs_ioc_pool_reguid);
 
 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
 
 	/*
 	 * pool destroy, and export don't log the history as part of
 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
 	 * does the logging of those commands.
 	 */
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
 
 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
 	    zfs_ioc_dsobj_to_dsname,
 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
 	    zfs_ioc_pool_get_history,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 
 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
 
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
 	    zfs_ioc_space_written);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
 	    zfs_ioc_objset_recvd_props);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
 	    zfs_ioc_next_obj);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
 	    zfs_ioc_get_fsacl);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
 	    zfs_ioc_objset_stats);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
 	    zfs_ioc_objset_zplprops);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
 	    zfs_ioc_dataset_list_next);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
 	    zfs_ioc_snapshot_list_next);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
 	    zfs_ioc_send_progress);
 
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
 	    zfs_ioc_diff, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
 	    zfs_ioc_send, zfs_secpolicy_send);
 
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
 	    zfs_secpolicy_none);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
 	    zfs_secpolicy_destroy);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
 	    zfs_secpolicy_rename);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
 	    zfs_secpolicy_recv);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
 	    zfs_secpolicy_promote);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
 	    zfs_secpolicy_set_fsacl);
 
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
 	    zfs_secpolicy_share, POOL_CHECK_NONE);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 
 	zfs_ioctl_init_os();
 }
 
 /*
  * Verify that for non-legacy ioctls the input nvlist
  * pairs match against the expected input.
  *
  * Possible errors are:
  * ZFS_ERR_IOC_ARG_UNAVAIL	An unrecognized nvpair was encountered
  * ZFS_ERR_IOC_ARG_REQUIRED	A required nvpair is missing
  * ZFS_ERR_IOC_ARG_BADTYPE	Invalid type for nvpair
  */
 static int
 zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
 {
 	const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
 	boolean_t required_keys_found = B_FALSE;
 
 	/*
 	 * examine each input pair
 	 */
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		char *name = nvpair_name(pair);
 		data_type_t type = nvpair_type(pair);
 		boolean_t identified = B_FALSE;
 
 		/*
 		 * check pair against the documented names and type
 		 */
 		for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
 			/* if not a wild card name, check for an exact match */
 			if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
 			    strcmp(nvl_keys[k].zkey_name, name) != 0)
 				continue;
 
 			identified = B_TRUE;
 
 			if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
 			    nvl_keys[k].zkey_type != type) {
 				return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
 			}
 
 			if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
 				continue;
 
 			required_keys_found = B_TRUE;
 			break;
 		}
 
 		/* allow an 'optional' key, everything else is invalid */
 		if (!identified &&
 		    (strcmp(name, "optional") != 0 ||
 		    type != DATA_TYPE_NVLIST)) {
 			return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
 		}
 	}
 
 	/* verify that all required keys were found */
 	for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
 		if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
 			continue;
 
 		if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
 			/* at least one non-optional key is expected here */
 			if (!required_keys_found)
 				return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
 			continue;
 		}
 
 		if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
 			return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
 	}
 
 	return (0);
 }
 
 static int
 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
     zfs_ioc_poolcheck_t check)
 {
 	spa_t *spa;
 	int error;
 
 	ASSERT(type == POOL_NAME || type == DATASET_NAME ||
 	    type == ENTITY_NAME);
 
 	if (check & POOL_CHECK_NONE)
 		return (0);
 
 	error = spa_open(name, &spa, FTAG);
 	if (error == 0) {
 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
 			error = SET_ERROR(EAGAIN);
 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
 			error = SET_ERROR(EROFS);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 int
-zfsdev_getminor(int fd, minor_t *minorp)
+zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
 {
 	zfsdev_state_t *zs, *fpd;
-	zfs_file_t *fp;
-	int rc;
 
 	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
 
-	if ((rc = zfs_file_get(fd, &fp)))
-		return (rc);
-
 	fpd = zfs_file_private(fp);
 	if (fpd == NULL)
 		return (SET_ERROR(EBADF));
 
 	mutex_enter(&zfsdev_state_lock);
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 
 		if (zs->zs_minor == -1)
 			continue;
 
 		if (fpd == zs) {
 			*minorp = fpd->zs_minor;
 			mutex_exit(&zfsdev_state_lock);
 			return (0);
 		}
 	}
 
 	mutex_exit(&zfsdev_state_lock);
 
 	return (SET_ERROR(EBADF));
 }
 
 void *
 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
 {
 	zfsdev_state_t *zs;
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 		if (zs->zs_minor == minor) {
 			smp_rmb();
 			switch (which) {
 			case ZST_ONEXIT:
 				return (zs->zs_onexit);
 			case ZST_ZEVENT:
 				return (zs->zs_zevent);
 			case ZST_ALL:
 				return (zs);
 			}
 		}
 	}
 
 	return (NULL);
 }
 
 /*
  * Find a free minor number.  The zfsdev_state_list is expected to
  * be short since it is only a list of currently open file handles.
  */
 static minor_t
 zfsdev_minor_alloc(void)
 {
 	static minor_t last_minor = 0;
 	minor_t m;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
 	for (m = last_minor + 1; m != last_minor; m++) {
 		if (m > ZFSDEV_MAX_MINOR)
 			m = 1;
 		if (zfsdev_get_state(m, ZST_ALL) == NULL) {
 			last_minor = m;
 			return (m);
 		}
 	}
 
 	return (0);
 }
 
 int
 zfsdev_state_init(void *priv)
 {
 	zfsdev_state_t *zs, *zsprev = NULL;
 	minor_t minor;
 	boolean_t newzs = B_FALSE;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
 	minor = zfsdev_minor_alloc();
 	if (minor == 0)
 		return (SET_ERROR(ENXIO));
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 		if (zs->zs_minor == -1)
 			break;
 		zsprev = zs;
 	}
 
 	if (!zs) {
 		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
 		newzs = B_TRUE;
 	}
 
 	zfsdev_private_set_state(priv, zs);
 
 	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
 	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
 
 	/*
 	 * In order to provide for lock-free concurrent read access
 	 * to the minor list in zfsdev_get_state(), new entries
 	 * must be completely written before linking them into the
 	 * list whereas existing entries are already linked; the last
 	 * operation must be updating zs_minor (from -1 to the new
 	 * value).
 	 */
 	if (newzs) {
 		zs->zs_minor = minor;
 		membar_producer();
 		zsprev->zs_next = zs;
 	} else {
 		membar_producer();
 		zs->zs_minor = minor;
 	}
 
 	return (0);
 }
 
 void
 zfsdev_state_destroy(void *priv)
 {
 	zfsdev_state_t *zs = zfsdev_private_get_state(priv);
 
 	ASSERT(zs != NULL);
 	ASSERT3S(zs->zs_minor, >, 0);
 
 	/*
 	 * The last reference to this zfsdev file descriptor is being dropped.
 	 * We don't have to worry about lookup grabbing this state object, and
 	 * zfsdev_state_init() will not try to reuse this object until it is
 	 * invalidated by setting zs_minor to -1.  Invalidation must be done
 	 * last, with a memory barrier to ensure ordering.  This lets us avoid
 	 * taking the global zfsdev state lock around destruction.
 	 */
 	zfs_onexit_destroy(zs->zs_onexit);
 	zfs_zevent_destroy(zs->zs_zevent);
 	zs->zs_onexit = NULL;
 	zs->zs_zevent = NULL;
 	membar_producer();
 	zs->zs_minor = -1;
 }
 
 long
 zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
 {
 	int error, cmd;
 	const zfs_ioc_vec_t *vec;
 	char *saved_poolname = NULL;
 	uint64_t max_nvlist_src_size;
 	size_t saved_poolname_len = 0;
 	nvlist_t *innvl = NULL;
 	fstrans_cookie_t cookie;
 	hrtime_t start_time = gethrtime();
 
 	cmd = vecnum;
 	error = 0;
 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
 		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
 
 	vec = &zfs_ioc_vec[vecnum];
 
 	/*
 	 * The registered ioctl list may be sparse, verify that either
 	 * a normal or legacy handler are registered.
 	 */
 	if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
 		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
 
 	zc->zc_iflags = flag & FKIOCTL;
 	max_nvlist_src_size = zfs_max_nvlist_src_size_os();
 	if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
 		/*
 		 * Make sure the user doesn't pass in an insane value for
 		 * zc_nvlist_src_size.  We have to check, since we will end
 		 * up allocating that much memory inside of get_nvlist().  This
 		 * prevents a nefarious user from allocating tons of kernel
 		 * memory.
 		 *
 		 * Also, we return EINVAL instead of ENOMEM here.  The reason
 		 * being that returning ENOMEM from an ioctl() has a special
 		 * connotation; that the user's size value is too small and
 		 * needs to be expanded to hold the nvlist.  See
 		 * zcmd_expand_dst_nvlist() for details.
 		 */
 		error = SET_ERROR(EINVAL);	/* User's size too big */
 
 	} else if (zc->zc_nvlist_src_size != 0) {
 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 		    zc->zc_iflags, &innvl);
 		if (error != 0)
 			goto out;
 	}
 
 	/*
 	 * Ensure that all pool/dataset names are valid before we pass down to
 	 * the lower layers.
 	 */
 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 	switch (vec->zvec_namecheck) {
 	case POOL_NAME:
 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
 			error = SET_ERROR(EINVAL);
 		else
 			error = pool_status_check(zc->zc_name,
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		break;
 
 	case DATASET_NAME:
 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
 			error = SET_ERROR(EINVAL);
 		else
 			error = pool_status_check(zc->zc_name,
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		break;
 
 	case ENTITY_NAME:
 		if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
 			error = SET_ERROR(EINVAL);
 		} else {
 			error = pool_status_check(zc->zc_name,
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		}
 		break;
 
 	case NO_NAME:
 		break;
 	}
 	/*
 	 * Ensure that all input pairs are valid before we pass them down
 	 * to the lower layers.
 	 *
 	 * The vectored functions can use fnvlist_lookup_{type} for any
 	 * required pairs since zfs_check_input_nvpairs() confirmed that
 	 * they exist and are of the correct type.
 	 */
 	if (error == 0 && vec->zvec_func != NULL) {
 		error = zfs_check_input_nvpairs(innvl, vec);
 		if (error != 0)
 			goto out;
 	}
 
 	if (error == 0) {
 		cookie = spl_fstrans_mark();
 		error = vec->zvec_secpolicy(zc, innvl, CRED());
 		spl_fstrans_unmark(cookie);
 	}
 
 	if (error != 0)
 		goto out;
 
 	/* legacy ioctls can modify zc_name */
 	/*
 	 * Can't use kmem_strdup() as we might truncate the string and
 	 * kmem_strfree() would then free with incorrect size.
 	 */
 	saved_poolname_len = strlen(zc->zc_name) + 1;
 	saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
 
 	strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
 	saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
 
 	if (vec->zvec_func != NULL) {
 		nvlist_t *outnvl;
 		int puterror = 0;
 		spa_t *spa;
 		nvlist_t *lognv = NULL;
 
 		ASSERT(vec->zvec_legacy_func == NULL);
 
 		/*
 		 * Add the innvl to the lognv before calling the func,
 		 * in case the func changes the innvl.
 		 */
 		if (vec->zvec_allow_log) {
 			lognv = fnvlist_alloc();
 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
 			    vec->zvec_name);
 			if (!nvlist_empty(innvl)) {
 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
 				    innvl);
 			}
 		}
 
 		outnvl = fnvlist_alloc();
 		cookie = spl_fstrans_mark();
 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
 		spl_fstrans_unmark(cookie);
 
 		/*
 		 * Some commands can partially execute, modify state, and still
 		 * return an error.  In these cases, attempt to record what
 		 * was modified.
 		 */
 		if ((error == 0 ||
 		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
 		    vec->zvec_allow_log &&
 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
 			if (!nvlist_empty(outnvl)) {
 				size_t out_size = fnvlist_size(outnvl);
 				if (out_size > zfs_history_output_max) {
 					fnvlist_add_int64(lognv,
 					    ZPOOL_HIST_OUTPUT_SIZE, out_size);
 				} else {
 					fnvlist_add_nvlist(lognv,
 					    ZPOOL_HIST_OUTPUT_NVL, outnvl);
 				}
 			}
 			if (error != 0) {
 				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
 				    error);
 			}
 			fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
 			    gethrtime() - start_time);
 			(void) spa_history_log_nvl(spa, lognv);
 			spa_close(spa, FTAG);
 		}
 		fnvlist_free(lognv);
 
 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
 			int smusherror = 0;
 			if (vec->zvec_smush_outnvlist) {
 				smusherror = nvlist_smush(outnvl,
 				    zc->zc_nvlist_dst_size);
 			}
 			if (smusherror == 0)
 				puterror = put_nvlist(zc, outnvl);
 		}
 
 		if (puterror != 0)
 			error = puterror;
 
 		nvlist_free(outnvl);
 	} else {
 		cookie = spl_fstrans_mark();
 		error = vec->zvec_legacy_func(zc);
 		spl_fstrans_unmark(cookie);
 	}
 
 out:
 	nvlist_free(innvl);
 	if (error == 0 && vec->zvec_allow_log) {
 		char *s = tsd_get(zfs_allow_log_key);
 		if (s != NULL)
 			kmem_strfree(s);
 		(void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
 	}
 	if (saved_poolname != NULL)
 		kmem_free(saved_poolname, saved_poolname_len);
 
 	return (error);
 }
 
 int
 zfs_kmod_init(void)
 {
 	int error;
 
 	if ((error = zvol_init()) != 0)
 		return (error);
 
 	spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
 	zfs_init();
 
 	zfs_ioctl_init();
 
 	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
 	zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
 	zfsdev_state_list->zs_minor = -1;
 
 	if ((error = zfsdev_attach()) != 0)
 		goto out;
 
 	tsd_create(&zfs_fsyncer_key, NULL);
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
 	return (0);
 out:
 	zfs_fini();
 	spa_fini();
 	zvol_fini();
 
 	return (error);
 }
 
 void
 zfs_kmod_fini(void)
 {
 	zfsdev_state_t *zs, *zsnext = NULL;
 
 	zfsdev_detach();
 
 	mutex_destroy(&zfsdev_state_lock);
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zsnext) {
 		zsnext = zs->zs_next;
 		if (zs->zs_onexit)
 			zfs_onexit_destroy(zs->zs_onexit);
 		if (zs->zs_zevent)
 			zfs_zevent_destroy(zs->zs_zevent);
 		kmem_free(zs, sizeof (zfsdev_state_t));
 	}
 
 	zfs_ereport_taskq_fini();	/* run before zfs_fini() on Linux */
 	zfs_fini();
 	spa_fini();
 	zvol_fini();
 
 	tsd_destroy(&zfs_fsyncer_key);
 	tsd_destroy(&rrw_tsd_key);
 	tsd_destroy(&zfs_allow_log_key);
 }
 
 /* BEGIN CSTYLED */
 ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
     "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
 
 ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, ULONG, ZMOD_RW,
     "Maximum size in bytes of ZFS ioctl output that will be logged");
 /* END CSTYLED */
diff --git a/sys/contrib/openzfs/module/zfs/zfs_onexit.c b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
index 2a1332e715ee..7c56dd9c97f5 100644
--- a/sys/contrib/openzfs/module/zfs/zfs_onexit.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_onexit.c
@@ -1,173 +1,176 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013, 2020 by Delphix. All rights reserved.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/kmem.h>
 #include <sys/sunddi.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 
 /*
  * ZFS kernel routines may add/delete callback routines to be invoked
  * upon process exit (triggered via the close operation from the /dev/zfs
  * driver).
  *
  * These cleanup callbacks are intended to allow for the accumulation
  * of kernel state across multiple ioctls.  User processes participate
  * simply by opening ZFS_DEV. This causes the ZFS driver to do create
  * some private data for the file descriptor and generating a unique
  * minor number. The process then passes along that file descriptor to
  * each ioctl that might have a cleanup operation.
  *
  * Consumers of the onexit routines should call zfs_onexit_fd_hold() early
  * on to validate the given fd and add a reference to its file table entry.
  * This allows the consumer to do its work and then add a callback, knowing
  * that zfs_onexit_add_cb() won't fail with EBADF.  When finished, consumers
  * should call zfs_onexit_fd_rele().
  *
  * A simple example is zfs_ioc_recv(), where we might create an AVL tree
  * with dataset/GUID mappings and then reuse that tree on subsequent
  * zfs_ioc_recv() calls.
  *
  * On the first zfs_ioc_recv() call, dmu_recv_stream() will kmem_alloc()
  * the AVL tree and pass it along with a callback function to
  * zfs_onexit_add_cb(). The zfs_onexit_add_cb() routine will register the
  * callback and return an action handle.
  *
  * The action handle is then passed from user space to subsequent
  * zfs_ioc_recv() calls, so that dmu_recv_stream() can fetch its AVL tree
  * by calling zfs_onexit_cb_data() with the device minor number and
  * action handle.
  *
  * If the user process exits abnormally, the callback is invoked implicitly
  * as part of the driver close operation.  Once the user space process is
  * finished with the accumulated kernel state, it can also just call close(2)
  * on the cleanup fd to trigger the cleanup callback.
  */
 
 void
 zfs_onexit_init(zfs_onexit_t **zop)
 {
 	zfs_onexit_t *zo;
 
 	zo = *zop = kmem_zalloc(sizeof (zfs_onexit_t), KM_SLEEP);
 	mutex_init(&zo->zo_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zo->zo_actions, sizeof (zfs_onexit_action_node_t),
 	    offsetof(zfs_onexit_action_node_t, za_link));
 }
 
 void
 zfs_onexit_destroy(zfs_onexit_t *zo)
 {
 	zfs_onexit_action_node_t *ap;
 
 	mutex_enter(&zo->zo_lock);
 	while ((ap = list_head(&zo->zo_actions)) != NULL) {
 		list_remove(&zo->zo_actions, ap);
 		mutex_exit(&zo->zo_lock);
 		ap->za_func(ap->za_data);
 		kmem_free(ap, sizeof (zfs_onexit_action_node_t));
 		mutex_enter(&zo->zo_lock);
 	}
 	mutex_exit(&zo->zo_lock);
 
 	list_destroy(&zo->zo_actions);
 	mutex_destroy(&zo->zo_lock);
 	kmem_free(zo, sizeof (zfs_onexit_t));
 }
 
 /*
  * Consumers might need to operate by minor number instead of fd, since
  * they might be running in another thread (e.g. txg_sync_thread). Callers
  * of this function must call zfs_onexit_fd_rele() when they're finished
  * using the minor number.
  */
-int
+zfs_file_t *
 zfs_onexit_fd_hold(int fd, minor_t *minorp)
 {
 	zfs_onexit_t *zo = NULL;
-	int error;
 
-	error = zfsdev_getminor(fd, minorp);
+	zfs_file_t *fp = zfs_file_get(fd);
+	if (fp == NULL)
+		return (NULL);
+
+	int error = zfsdev_getminor(fp, minorp);
 	if (error) {
-		zfs_onexit_fd_rele(fd);
-		return (error);
+		zfs_onexit_fd_rele(fp);
+		return (NULL);
 	}
 
 	zo = zfsdev_get_state(*minorp, ZST_ONEXIT);
 	if (zo == NULL) {
-		zfs_onexit_fd_rele(fd);
-		return (SET_ERROR(EBADF));
+		zfs_onexit_fd_rele(fp);
+		return (NULL);
 	}
-	return (0);
+	return (fp);
 }
 
 void
-zfs_onexit_fd_rele(int fd)
+zfs_onexit_fd_rele(zfs_file_t *fp)
 {
-	zfs_file_put(fd);
+	zfs_file_put(fp);
 }
 
 static int
 zfs_onexit_minor_to_state(minor_t minor, zfs_onexit_t **zo)
 {
 	*zo = zfsdev_get_state(minor, ZST_ONEXIT);
 	if (*zo == NULL)
 		return (SET_ERROR(EBADF));
 
 	return (0);
 }
 
 /*
  * Add a callback to be invoked when the calling process exits.
  */
 int
 zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
     uint64_t *action_handle)
 {
 	zfs_onexit_t *zo;
 	zfs_onexit_action_node_t *ap;
 	int error;
 
 	error = zfs_onexit_minor_to_state(minor, &zo);
 	if (error)
 		return (error);
 
 	ap = kmem_alloc(sizeof (zfs_onexit_action_node_t), KM_SLEEP);
 	list_link_init(&ap->za_link);
 	ap->za_func = func;
 	ap->za_data = data;
 
 	mutex_enter(&zo->zo_lock);
 	list_insert_tail(&zo->zo_actions, ap);
 	mutex_exit(&zo->zo_lock);
 	if (action_handle)
 		*action_handle = (uint64_t)(uintptr_t)ap;
 
 	return (0);
 }
diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h
index ddb2527e56fd..87b0ae33fb81 100644
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@@ -1,846 +1,846 @@
 /*
  * $FreeBSD$
  */
 
 /* zfs_config.h.  Generated from zfs_config.h.in by configure.  */
 /* zfs_config.h.in.  Generated from configure.ac by autoheader.  */
 
 /* Define to 1 if translation of program messages to the user's native
    language is requested. */
 /* #undef ENABLE_NLS */
 
 /* bio_end_io_t wants 1 arg */
 /* #undef HAVE_1ARG_BIO_END_IO_T */
 
 /* lookup_bdev() wants 1 arg */
 /* #undef HAVE_1ARG_LOOKUP_BDEV */
 
 /* submit_bio() wants 1 arg */
 /* #undef HAVE_1ARG_SUBMIT_BIO */
 
 /* bdi_setup_and_register() wants 2 args */
 /* #undef HAVE_2ARGS_BDI_SETUP_AND_REGISTER */
 
 /* vfs_getattr wants 2 args */
 /* #undef HAVE_2ARGS_VFS_GETATTR */
 
 /* zlib_deflate_workspacesize() wants 2 args */
 /* #undef HAVE_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE */
 
 /* bdi_setup_and_register() wants 3 args */
 /* #undef HAVE_3ARGS_BDI_SETUP_AND_REGISTER */
 
 /* vfs_getattr wants 3 args */
 /* #undef HAVE_3ARGS_VFS_GETATTR */
 
 /* vfs_getattr wants 4 args */
 /* #undef HAVE_4ARGS_VFS_GETATTR */
 
 /* kernel has access_ok with 'type' parameter */
 /* #undef HAVE_ACCESS_OK_TYPE */
 
 /* posix_acl has refcount_t */
 /* #undef HAVE_ACL_REFCOUNT */
 
 /* Define if host toolchain supports AES */
 #define HAVE_AES 1
 
 #ifdef __amd64__
 #ifndef RESCUE
 /* Define if host toolchain supports AVX */
 #define HAVE_AVX 1
 #endif
 
 /* Define if host toolchain supports AVX2 */
 #define HAVE_AVX2 1
 
 /* Define if host toolchain supports AVX512BW */
 #define HAVE_AVX512BW 1
 
 /* Define if host toolchain supports AVX512CD */
 #define HAVE_AVX512CD 1
 
 /* Define if host toolchain supports AVX512DQ */
 #define HAVE_AVX512DQ 1
 
 /* Define if host toolchain supports AVX512ER */
 #define HAVE_AVX512ER 1
 
 /* Define if host toolchain supports AVX512F */
 #define HAVE_AVX512F 1
 
 /* Define if host toolchain supports AVX512IFMA */
 #define HAVE_AVX512IFMA 1
 
 /* Define if host toolchain supports AVX512PF */
 #define HAVE_AVX512PF 1
 
 /* Define if host toolchain supports AVX512VBMI */
 #define HAVE_AVX512VBMI 1
 
 /* Define if host toolchain supports AVX512VL */
 #define HAVE_AVX512VL 1
 #endif
 
 /* bdev_check_media_change() exists */
 /* #undef HAVE_BDEV_CHECK_MEDIA_CHANGE */
 
 /* bdev_whole() is available */
 /* #undef HAVE_BDEV_WHOLE */
 
 /* bio->bi_bdev->bd_disk exists */
 /* #undef HAVE_BIO_BDEV_DISK */
 
 /* bio->bi_opf is defined */
 /* #undef HAVE_BIO_BI_OPF */
 
 /* bio->bi_status exists */
 /* #undef HAVE_BIO_BI_STATUS */
 
 /* bio has bi_iter */
 /* #undef HAVE_BIO_BVEC_ITER */
 
 /* bio_*_io_acct() available */
 /* #undef HAVE_BIO_IO_ACCT */
 
 /* bio_max_segs() is implemented */
 /* #undef HAVE_BIO_MAX_SEGS */
 
 /* bio_set_dev() is available */
 /* #undef HAVE_BIO_SET_DEV */
 
 /* bio_set_dev() GPL-only */
 /* #undef HAVE_BIO_SET_DEV_GPL_ONLY */
 
 /* bio_set_op_attrs is available */
 /* #undef HAVE_BIO_SET_OP_ATTRS */
 
 /* blkdev_reread_part() exists */
 /* #undef HAVE_BLKDEV_REREAD_PART */
 
 /* blkg_tryget() is available */
 /* #undef HAVE_BLKG_TRYGET */
 
 /* blkg_tryget() GPL-only */
 /* #undef HAVE_BLKG_TRYGET_GPL_ONLY */
 
 /* blk_alloc_queue() expects request function */
 /* #undef HAVE_BLK_ALLOC_QUEUE_REQUEST_FN */
 
 /* blk_alloc_queue_rh() expects request function */
 /* #undef HAVE_BLK_ALLOC_QUEUE_REQUEST_FN_RH */
 
 /* blk queue backing_dev_info is dynamic */
 /* #undef HAVE_BLK_QUEUE_BDI_DYNAMIC */
 
 /* blk_queue_flag_clear() exists */
 /* #undef HAVE_BLK_QUEUE_FLAG_CLEAR */
 
 /* blk_queue_flag_set() exists */
 /* #undef HAVE_BLK_QUEUE_FLAG_SET */
 
 /* blk_queue_flush() is available */
 /* #undef HAVE_BLK_QUEUE_FLUSH */
 
 /* blk_queue_flush() is GPL-only */
 /* #undef HAVE_BLK_QUEUE_FLUSH_GPL_ONLY */
 
 /* blk_queue_secdiscard() is available */
 /* #undef HAVE_BLK_QUEUE_SECDISCARD */
 
 /* blk_queue_secure_erase() is available */
 /* #undef HAVE_BLK_QUEUE_SECURE_ERASE */
 
 /* blk_queue_write_cache() exists */
 /* #undef HAVE_BLK_QUEUE_WRITE_CACHE */
 
 /* blk_queue_write_cache() is GPL-only */
 /* #undef HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY */
 
 /* Define if revalidate_disk() in block_device_operations */
 /* #undef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK */
 
 /* Define to 1 if you have the Mac OS X function CFLocaleCopyCurrent in the
    CoreFoundation framework. */
 /* #undef HAVE_CFLOCALECOPYCURRENT */
 
 /* Define to 1 if you have the Mac OS X function
    CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
 /* #undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES */
 
 /* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in
    the CoreFoundation framework. */
 /* #undef HAVE_CFPREFERENCESCOPYAPPVALUE */
 
 /* check_disk_change() exists */
 /* #undef HAVE_CHECK_DISK_CHANGE */
 
 /* clear_inode() is available */
 /* #undef HAVE_CLEAR_INODE */
 
 /* dentry uses const struct dentry_operations */
 /* #undef HAVE_CONST_DENTRY_OPERATIONS */
 
 /* copy_from_iter() is available */
 /* #undef HAVE_COPY_FROM_ITER */
 
 /* copy_to_iter() is available */
 /* #undef HAVE_COPY_TO_ITER */
 
 /* yes */
 /* #undef HAVE_CPU_HOTPLUG */
 
 /* current_time() exists */
 /* #undef HAVE_CURRENT_TIME */
 
 /* Define if the GNU dcgettext() function is already present or preinstalled.
    */
 /* #undef HAVE_DCGETTEXT */
 
 /* DECLARE_EVENT_CLASS() is available */
 /* #undef HAVE_DECLARE_EVENT_CLASS */
 
 /* lookup_bdev() wants dev_t arg */
 /* #undef HAVE_DEVT_LOOKUP_BDEV */
 
 /* sops->dirty_inode() wants flags */
 /* #undef HAVE_DIRTY_INODE_WITH_FLAGS */
 
 /* disk_*_io_acct() available */
 /* #undef HAVE_DISK_IO_ACCT */
 
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #define HAVE_DLFCN_H 1
 
 /* d_make_root() is available */
 /* #undef HAVE_D_MAKE_ROOT */
 
 /* d_prune_aliases() is available */
 /* #undef HAVE_D_PRUNE_ALIASES */
 
 /* dops->d_revalidate() operation takes nameidata */
 /* #undef HAVE_D_REVALIDATE_NAMEIDATA */
 
 /* eops->encode_fh() wants child and parent inodes */
 /* #undef HAVE_ENCODE_FH_WITH_INODE */
 
 /* sops->evict_inode() exists */
 /* #undef HAVE_EVICT_INODE */
 
 /* fops->aio_fsync() exists */
 /* #undef HAVE_FILE_AIO_FSYNC */
 
 /* file_dentry() is available */
 /* #undef HAVE_FILE_DENTRY */
 
 /* file_inode() is available */
 /* #undef HAVE_FILE_INODE */
 
 /* iops->follow_link() cookie */
 /* #undef HAVE_FOLLOW_LINK_COOKIE */
 
 /* iops->follow_link() nameidata */
 /* #undef HAVE_FOLLOW_LINK_NAMEIDATA */
 
 /* fops->fsync() with range */
 /* #undef HAVE_FSYNC_RANGE */
 
 /* fops->fsync() without dentry */
 /* #undef HAVE_FSYNC_WITHOUT_DENTRY */
 
 /* generic_fillattr requires struct user_namespace* */
 /* #undef HAVE_GENERIC_FILLATTR_USERNS */
 
 /* generic_*_io_acct() 3 arg available */
 /* #undef HAVE_GENERIC_IO_ACCT_3ARG */
 
 /* generic_*_io_acct() 4 arg available */
 /* #undef HAVE_GENERIC_IO_ACCT_4ARG */
 
 /* generic_readlink is global */
 /* #undef HAVE_GENERIC_READLINK */
 
 /* generic_setxattr() exists */
 /* #undef HAVE_GENERIC_SETXATTR */
 
 /* generic_write_checks() takes kiocb */
 /* #undef HAVE_GENERIC_WRITE_CHECKS_KIOCB */
 
 /* Define if the GNU gettext() function is already present or preinstalled. */
 /* #undef HAVE_GETTEXT */
 
 /* iops->get_link() cookie */
 /* #undef HAVE_GET_LINK_COOKIE */
 
 /* iops->get_link() delayed */
 /* #undef HAVE_GET_LINK_DELAYED */
 
 /* group_info->gid exists */
 /* #undef HAVE_GROUP_INFO_GID */
 
 /* has_capability() is available */
 /* #undef HAVE_HAS_CAPABILITY */
 
 /* Define if you have the iconv() function and it works. */
 #define HAVE_ICONV 1
 
 /* yes */
 /* #undef HAVE_INODE_LOCK_SHARED */
 
 /* inode_owner_or_capable() exists */
 /* #undef HAVE_INODE_OWNER_OR_CAPABLE */
 
 /* inode_owner_or_capable() takes user_ns */
 /* #undef HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED */
 
 /* inode_set_flags() exists */
 /* #undef HAVE_INODE_SET_FLAGS */
 
 /* inode_set_iversion() exists */
 /* #undef HAVE_INODE_SET_IVERSION */
 
 /* inode->i_*time's are timespec64 */
 /* #undef HAVE_INODE_TIMESPEC64_TIMES */
 
 /* timestamp_truncate() exists */
 /* #undef HAVE_INODE_TIMESTAMP_TRUNCATE */
 
 /* Define to 1 if you have the <inttypes.h> header file. */
 #define HAVE_INTTYPES_H 1
 
 /* in_compat_syscall() is available */
 /* #undef HAVE_IN_COMPAT_SYSCALL */
 
 /* iops->create() takes struct user_namespace* */
 /* #undef HAVE_IOPS_CREATE_USERNS */
 
 /* iops->mkdir() takes struct user_namespace* */
 /* #undef HAVE_IOPS_MKDIR_USERNS */
 
 /* iops->mknod() takes struct user_namespace* */
 /* #undef HAVE_IOPS_MKNOD_USERNS */
 
 /* iops->rename() takes struct user_namespace* */
 /* #undef HAVE_IOPS_RENAME_USERNS */
 
 /* iops->symlink() takes struct user_namespace* */
 /* #undef HAVE_IOPS_SYMLINK_USERNS */
 
 /* iov_iter_advance() is available */
 /* #undef HAVE_IOV_ITER_ADVANCE */
 
 /* iov_iter_count() is available */
 /* #undef HAVE_IOV_ITER_COUNT */
 
 /* iov_iter_fault_in_readable() is available */
 /* #undef HAVE_IOV_ITER_FAULT_IN_READABLE */
 
 /* iov_iter_revert() is available */
 /* #undef HAVE_IOV_ITER_REVERT */
 
 /* iov_iter types are available */
 /* #undef HAVE_IOV_ITER_TYPES */
 
 /* yes */
 /* #undef HAVE_IO_SCHEDULE_TIMEOUT */
 
 /* Define to 1 if you have the `issetugid' function. */
 #define HAVE_ISSETUGID 1
 
 /* kernel has kernel_fpu_* functions */
 /* #undef HAVE_KERNEL_FPU */
 
 /* kernel has asm/fpu/api.h */
 /* #undef HAVE_KERNEL_FPU_API_HEADER */
 
 /* kernel fpu internal */
 /* #undef HAVE_KERNEL_FPU_INTERNAL */
 
 /* uncached_acl_sentinel() exists */
 /* #undef HAVE_KERNEL_GET_ACL_HANDLE_CACHE */
 
 /* kernel does stack verification */
 /* #undef HAVE_KERNEL_OBJTOOL */
 
 /* kernel has linux/objtool.h */
 /* #undef HAVE_KERNEL_OBJTOOL_HEADER */
 
 /* kernel_read() take loff_t pointer */
 /* #undef HAVE_KERNEL_READ_PPOS */
 
 /* timer_list.function gets a timer_list */
 /* #undef HAVE_KERNEL_TIMER_FUNCTION_TIMER_LIST */
 
 /* struct timer_list has a flags member */
 /* #undef HAVE_KERNEL_TIMER_LIST_FLAGS */
 
 /* timer_setup() is available */
 /* #undef HAVE_KERNEL_TIMER_SETUP */
 
 /* kernel_write() take loff_t pointer */
 /* #undef HAVE_KERNEL_WRITE_PPOS */
 
 /* kmem_cache_create_usercopy() exists */
 /* #undef HAVE_KMEM_CACHE_CREATE_USERCOPY */
 
 /* kstrtoul() exists */
 /* #undef HAVE_KSTRTOUL */
 
 /* ktime_get_coarse_real_ts64() exists */
 /* #undef HAVE_KTIME_GET_COARSE_REAL_TS64 */
 
 /* ktime_get_raw_ts64() exists */
 /* #undef HAVE_KTIME_GET_RAW_TS64 */
 
 /* kvmalloc exists */
 /* #undef HAVE_KVMALLOC */
 
 /* kernel has large stacks */
 /* #undef HAVE_LARGE_STACKS */
 
 /* Define if you have [aio] */
 /* #undef HAVE_LIBAIO */
 
 /* Define if you have [blkid] */
 /* #undef HAVE_LIBBLKID */
 
 /* Define if you have [crypto] */
 #define HAVE_LIBCRYPTO 1
 
 /* Define if you have [tirpc] */
 /* #undef HAVE_LIBTIRPC */
 
 /* Define if you have [udev] */
 /* #undef HAVE_LIBUDEV */
 
 /* Define if you have [uuid] */
 /* #undef HAVE_LIBUUID */
 
 /* lseek_execute() is available */
 /* #undef HAVE_LSEEK_EXECUTE */
 
 /* makedev() is declared in sys/mkdev.h */
 /* #undef HAVE_MAKEDEV_IN_MKDEV */
 
 /* makedev() is declared in sys/sysmacros.h */
 /* #undef HAVE_MAKEDEV_IN_SYSMACROS */
 
 /* Noting that make_request_fn() returns blk_qc_t */
 /* #undef HAVE_MAKE_REQUEST_FN_RET_QC */
 
 /* Noting that make_request_fn() returns void */
 /* #undef HAVE_MAKE_REQUEST_FN_RET_VOID */
 
 /* Define to 1 if you have the <memory.h> header file. */
 #define HAVE_MEMORY_H 1
 
 /* iops->mkdir() takes umode_t */
 /* #undef HAVE_MKDIR_UMODE_T */
 
 /* Define to 1 if you have the `mlockall' function. */
 #define HAVE_MLOCKALL 1
 
 /* lookup_bdev() wants mode arg */
 /* #undef HAVE_MODE_LOOKUP_BDEV */
 
 /* Define if host toolchain supports MOVBE */
 #define HAVE_MOVBE 1
 
 /* new_sync_read()/new_sync_write() are available */
 /* #undef HAVE_NEW_SYNC_READ */
 
 /* iops->getattr() takes a path */
 /* #undef HAVE_PATH_IOPS_GETATTR */
 
 /* Define if host toolchain supports PCLMULQDQ */
 #define HAVE_PCLMULQDQ 1
 
 /* percpu_counter_add_batch() is defined */
 /* #undef HAVE_PERCPU_COUNTER_ADD_BATCH */
 
 /* percpu_counter_init() wants gfp_t */
 /* #undef HAVE_PERCPU_COUNTER_INIT_WITH_GFP */
 
 /* posix_acl_chmod() exists */
 /* #undef HAVE_POSIX_ACL_CHMOD */
 
 /* posix_acl_from_xattr() needs user_ns */
 /* #undef HAVE_POSIX_ACL_FROM_XATTR_USERNS */
 
 /* posix_acl_release() is available */
 /* #undef HAVE_POSIX_ACL_RELEASE */
 
 /* posix_acl_release() is GPL-only */
 /* #undef HAVE_POSIX_ACL_RELEASE_GPL_ONLY */
 
 /* posix_acl_valid() wants user namespace */
 /* #undef HAVE_POSIX_ACL_VALID_WITH_NS */
 
 /* proc_ops structure exists */
 /* #undef HAVE_PROC_OPS_STRUCT */
 
 /* iops->put_link() cookie */
 /* #undef HAVE_PUT_LINK_COOKIE */
 
 /* iops->put_link() delayed */
 /* #undef HAVE_PUT_LINK_DELAYED */
 
 /* iops->put_link() nameidata */
 /* #undef HAVE_PUT_LINK_NAMEIDATA */
 
 /* If available, contains the Python version number currently in use. */
 #define HAVE_PYTHON "3.7"
 
 /* qat is enabled and existed */
 /* #undef HAVE_QAT */
 
 /* iops->rename() wants flags */
 /* #undef HAVE_RENAME_WANTS_FLAGS */
 
 /* REQ_DISCARD is defined */
 /* #undef HAVE_REQ_DISCARD */
 
 /* REQ_FLUSH is defined */
 /* #undef HAVE_REQ_FLUSH */
 
 /* REQ_OP_DISCARD is defined */
 /* #undef HAVE_REQ_OP_DISCARD */
 
 /* REQ_OP_FLUSH is defined */
 /* #undef HAVE_REQ_OP_FLUSH */
 
 /* REQ_OP_SECURE_ERASE is defined */
 /* #undef HAVE_REQ_OP_SECURE_ERASE */
 
 /* REQ_PREFLUSH is defined */
 /* #undef HAVE_REQ_PREFLUSH */
 
 /* revalidate_disk() is available */
 /* #undef HAVE_REVALIDATE_DISK */
 
 /* revalidate_disk_size() is available */
 /* #undef HAVE_REVALIDATE_DISK_SIZE */
 
 /* struct rw_semaphore has member activity */
 /* #undef HAVE_RWSEM_ACTIVITY */
 
 /* struct rw_semaphore has atomic_long_t member count */
 /* #undef HAVE_RWSEM_ATOMIC_LONG_COUNT */
 
 /* linux/sched/signal.h exists */
 /* #undef HAVE_SCHED_SIGNAL_HEADER */
 
 /* Define to 1 if you have the <security/pam_modules.h> header file. */
 #define HAVE_SECURITY_PAM_MODULES_H 1
 
 /* setattr_prepare() is available, doesn't accept user_namespace */
 /* #undef HAVE_SETATTR_PREPARE_NO_USERNS */
 
 /* setattr_prepare() accepts user_namespace */
 /* #undef HAVE_SETATTR_PREPARE_USERNS */
 
 /* iops->set_acl() exists, takes 3 args */
 /* #undef HAVE_SET_ACL */
 
 /* iops->set_acl() takes 4 args */
 /* #undef HAVE_SET_ACL_USERNS */
 
 /* set_cached_acl() is usable */
 /* #undef HAVE_SET_CACHED_ACL_USABLE */
 
 /* set_special_state() exists */
 /* #undef HAVE_SET_SPECIAL_STATE */
 
 /* struct shrink_control exists */
 /* #undef HAVE_SHRINK_CONTROL_STRUCT */
 
 /* kernel_siginfo_t exists */
 /* #undef HAVE_SIGINFO */
 
 /* signal_stop() exists */
 /* #undef HAVE_SIGNAL_STOP */
 
 /* new shrinker callback wants 2 args */
 /* #undef HAVE_SINGLE_SHRINKER_CALLBACK */
 
 /* ->count_objects exists */
 /* #undef HAVE_SPLIT_SHRINKER_CALLBACK */
 
 #if defined(__amd64__) || defined(__i386__)
 /* Define if host toolchain supports SSE */
 #define HAVE_SSE 1
 
 /* Define if host toolchain supports SSE2 */
 #define HAVE_SSE2 1
 
 /* Define if host toolchain supports SSE3 */
 #define HAVE_SSE3 1
 
 /* Define if host toolchain supports SSE4.1 */
 #define HAVE_SSE4_1 1
 
 /* Define if host toolchain supports SSE4.2 */
 #define HAVE_SSE4_2 1
 
 /* Define if host toolchain supports SSSE3 */
 #define HAVE_SSSE3 1
 #endif
 
 /* STACK_FRAME_NON_STANDARD is defined */
 /* #undef HAVE_STACK_FRAME_NON_STANDARD */
 
 /* Define to 1 if you have the <stdint.h> header file. */
 #define HAVE_STDINT_H 1
 
 /* Define to 1 if you have the <stdlib.h> header file. */
 #define HAVE_STDLIB_H 1
 
 /* Define to 1 if you have the <strings.h> header file. */
 #define HAVE_STRINGS_H 1
 
 /* Define to 1 if you have the <string.h> header file. */
 #define HAVE_STRING_H 1
 
 /* Define to 1 if you have the `strlcat' function. */
 #define HAVE_STRLCAT 1
 
 /* Define to 1 if you have the `strlcpy' function. */
 #define HAVE_STRLCPY 1
 
 /* submit_bio is member of struct block_device_operations */
 /* #undef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
 
 /* super_setup_bdi_name() exits */
 /* #undef HAVE_SUPER_SETUP_BDI_NAME */
 
 /* super_block->s_user_ns exists */
 /* #undef HAVE_SUPER_USER_NS */
 
 /* Define to 1 if you have the <sys/stat.h> header file. */
 #define HAVE_SYS_STAT_H 1
 
 /* Define to 1 if you have the <sys/types.h> header file. */
 #define HAVE_SYS_TYPES_H 1
 
 /* i_op->tmpfile() exists */
 /* #undef HAVE_TMPFILE */
 
 /* i_op->tmpfile() has userns */
 /* #undef HAVE_TMPFILE_USERNS */
 
 /* totalhigh_pages() exists */
 /* #undef HAVE_TOTALHIGH_PAGES */
 
 /* kernel has totalram_pages() */
 /* #undef HAVE_TOTALRAM_PAGES_FUNC */
 
 /* Define to 1 if you have the `udev_device_get_is_initialized' function. */
 /* #undef HAVE_UDEV_DEVICE_GET_IS_INITIALIZED */
 
 /* kernel has __kernel_fpu_* functions */
 /* #undef HAVE_UNDERSCORE_KERNEL_FPU */
 
 /* Define to 1 if you have the <unistd.h> header file. */
 #define HAVE_UNISTD_H 1
 
 /* iops->getattr() takes struct user_namespace* */
 /* #undef HAVE_USERNS_IOPS_GETATTR */
 
 /* iops->getattr() takes a vfsmount */
 /* #undef HAVE_VFSMOUNT_IOPS_GETATTR */
 
 /* aops->direct_IO() uses iovec */
 /* #undef HAVE_VFS_DIRECT_IO_IOVEC */
 
 /* aops->direct_IO() uses iov_iter without rw */
 /* #undef HAVE_VFS_DIRECT_IO_ITER */
 
 /* aops->direct_IO() uses iov_iter with offset */
 /* #undef HAVE_VFS_DIRECT_IO_ITER_OFFSET */
 
 /* aops->direct_IO() uses iov_iter with rw and offset */
 /* #undef HAVE_VFS_DIRECT_IO_ITER_RW_OFFSET */
 
 /* All required iov_iter interfaces are available */
 /* #undef HAVE_VFS_IOV_ITER */
 
 /* fops->iterate() is available */
 /* #undef HAVE_VFS_ITERATE */
 
 /* fops->iterate_shared() is available */
 /* #undef HAVE_VFS_ITERATE_SHARED */
 
 /* fops->readdir() is available */
 /* #undef HAVE_VFS_READDIR */
 
 /* fops->read/write_iter() are available */
 /* #undef HAVE_VFS_RW_ITERATE */
 
 /* __vmalloc page flags exists */
 /* #undef HAVE_VMALLOC_PAGE_KERNEL */
 
 /* yes */
 /* #undef HAVE_WAIT_ON_BIT_ACTION */
 
 /* wait_queue_entry_t exists */
 /* #undef HAVE_WAIT_QUEUE_ENTRY_T */
 
 /* wq_head->head and wq_entry->entry exist */
 /* #undef HAVE_WAIT_QUEUE_HEAD_ENTRY */
 
 /* xattr_handler->get() wants dentry */
 /* #undef HAVE_XATTR_GET_DENTRY */
 
 /* xattr_handler->get() wants both dentry and inode */
 /* #undef HAVE_XATTR_GET_DENTRY_INODE */
 
 /* xattr_handler->get() wants xattr_handler */
 /* #undef HAVE_XATTR_GET_HANDLER */
 
 /* xattr_handler has name */
 /* #undef HAVE_XATTR_HANDLER_NAME */
 
 /* xattr_handler->list() wants dentry */
 /* #undef HAVE_XATTR_LIST_DENTRY */
 
 /* xattr_handler->list() wants xattr_handler */
 /* #undef HAVE_XATTR_LIST_HANDLER */
 
 /* xattr_handler->list() wants simple */
 /* #undef HAVE_XATTR_LIST_SIMPLE */
 
 /* xattr_handler->set() wants dentry */
 /* #undef HAVE_XATTR_SET_DENTRY */
 
 /* xattr_handler->set() wants both dentry and inode */
 /* #undef HAVE_XATTR_SET_DENTRY_INODE */
 
 /* xattr_handler->set() wants xattr_handler */
 /* #undef HAVE_XATTR_SET_HANDLER */
 
 /* xattr_handler->set() takes user_namespace */
 /* #undef HAVE_XATTR_SET_USERNS */
 
 /* Define if you have [z] */
 #define HAVE_ZLIB 1
 
 /* __posix_acl_chmod() exists */
 /* #undef HAVE___POSIX_ACL_CHMOD */
 
 /* kernel exports FPU functions */
 /* #undef KERNEL_EXPORTS_X86_FPU */
 
 /* TBD: fetch(3) support */
 #if 0
 /* whether the chosen libfetch is to be loaded at run-time */
 #define LIBFETCH_DYNAMIC 1
 
 /* libfetch is fetch(3) */
 #define LIBFETCH_IS_FETCH 1
 
 /* libfetch is libcurl */
 #define LIBFETCH_IS_LIBCURL 0
 
 /* soname of chosen libfetch */
 #define LIBFETCH_SONAME "libfetch.so.6"
 #endif
 
 /* Define to the sub-directory where libtool stores uninstalled libraries. */
 #define LT_OBJDIR ".libs/"
 
 /* make_request_fn() return type */
 /* #undef MAKE_REQUEST_FN_RET */
 
 /* hardened module_param_call */
 /* #undef MODULE_PARAM_CALL_CONST */
 
 /* struct shrink_control has nid */
 /* #undef SHRINK_CONTROL_HAS_NID */
 
 /* Defined for legacy compatibility. */
 #define SPL_META_ALIAS ZFS_META_ALIAS
 
 /* Defined for legacy compatibility. */
 #define SPL_META_RELEASE ZFS_META_RELEASE
 
 /* Defined for legacy compatibility. */
 #define SPL_META_VERSION ZFS_META_VERSION
 
 /* True if ZFS is to be compiled for a FreeBSD system */
 #define SYSTEM_FREEBSD 1
 
 /* True if ZFS is to be compiled for a Linux system */
 /* #undef SYSTEM_LINUX */
 
 /* zfs debugging enabled */
 /* #undef ZFS_DEBUG */
 
 /* /dev/zfs minor */
 /* #undef ZFS_DEVICE_MINOR */
 
 /* enum node_stat_item contains NR_FILE_PAGES */
 /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_FILE_PAGES */
 
 /* enum node_stat_item contains NR_INACTIVE_ANON */
 /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_ANON */
 
 /* enum node_stat_item contains NR_INACTIVE_FILE */
 /* #undef ZFS_ENUM_NODE_STAT_ITEM_NR_INACTIVE_FILE */
 
 /* enum zone_stat_item contains NR_FILE_PAGES */
 /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_FILE_PAGES */
 
 /* enum zone_stat_item contains NR_INACTIVE_ANON */
 /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_INACTIVE_ANON */
 
 /* enum zone_stat_item contains NR_INACTIVE_FILE */
 /* #undef ZFS_ENUM_ZONE_STAT_ITEM_NR_INACTIVE_FILE */
 
 /* global_node_page_state() exists */
 /* #undef ZFS_GLOBAL_NODE_PAGE_STATE */
 
 /* global_zone_page_state() exists */
 /* #undef ZFS_GLOBAL_ZONE_PAGE_STATE */
 
 /* Define to 1 if GPL-only symbols can be used */
 /* #undef ZFS_IS_GPL_COMPATIBLE */
 
 /* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gbdd11cbb9"
+#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g07a4c76e9"
 
 /* Define the project author. */
 #define ZFS_META_AUTHOR "OpenZFS"
 
 /* Define the project release date. */
 /* #undef ZFS_META_DATA */
 
 /* Define the maximum compatible kernel version. */
 #define ZFS_META_KVER_MAX "5.13"
 
 /* Define the minimum compatible kernel version. */
 #define ZFS_META_KVER_MIN "3.10"
 
 /* Define the project license. */
 #define ZFS_META_LICENSE "CDDL"
 
 /* Define the libtool library 'age' version information. */
 /* #undef ZFS_META_LT_AGE */
 
 /* Define the libtool library 'current' version information. */
 /* #undef ZFS_META_LT_CURRENT */
 
 /* Define the libtool library 'revision' version information. */
 /* #undef ZFS_META_LT_REVISION */
 
 /* Define the project name. */
 #define ZFS_META_NAME "zfs"
 
 /* Define the project release. */
-#define ZFS_META_RELEASE "FreeBSD_gbdd11cbb9"
+#define ZFS_META_RELEASE "FreeBSD_g07a4c76e9"
 
 /* Define the project version. */
 #define ZFS_META_VERSION "2.1.99"
 
 /* count is located in percpu_ref.data */
 /* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */