diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
index 6a1e37a2e362..74bdd796e88f 100644
--- a/cmd/zfs/zfs_main.c
+++ b/cmd/zfs/zfs_main.c
@@ -1,8823 +1,8823 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright 2012 Milan Jurik. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright (c) 2013 Steven Hartland.  All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
  * Copyright 2016 Nexenta Systems, Inc.
  * Copyright (c) 2019 Datto Inc.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
  * Copyright 2019 Joyent, Inc.
  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
  */
 
 #include <assert.h>
 #include <ctype.h>
 #include <sys/debug.h>
 #include <errno.h>
 #include <getopt.h>
 #include <libgen.h>
 #include <libintl.h>
 #include <libuutil.h>
 #include <libnvpair.h>
 #include <locale.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <zone.h>
 #include <grp.h>
 #include <pwd.h>
 #include <umem.h>
 #include <pthread.h>
 #include <signal.h>
 #include <sys/list.h>
 #include <sys/mkdev.h>
 #include <sys/mntent.h>
 #include <sys/mnttab.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/fs/zfs.h>
 #include <sys/systeminfo.h>
 #include <sys/types.h>
 #include <time.h>
 #include <sys/zfs_project.h>
 
 #include <libzfs.h>
 #include <libzfs_core.h>
 #include <zfs_prop.h>
 #include <zfs_deleg.h>
 #include <libzutil.h>
 #ifdef HAVE_IDMAP
 #include <aclutils.h>
 #include <directory.h>
 #endif /* HAVE_IDMAP */
 
 #include "zfs_iter.h"
 #include "zfs_util.h"
 #include "zfs_comutil.h"
 #include "zfs_projectutil.h"
 
 libzfs_handle_t *g_zfs;
 
 static char history_str[HIS_MAX_RECORD_LEN];
 static boolean_t log_history = B_TRUE;
 
 static int zfs_do_clone(int argc, char **argv);
 static int zfs_do_create(int argc, char **argv);
 static int zfs_do_destroy(int argc, char **argv);
 static int zfs_do_get(int argc, char **argv);
 static int zfs_do_inherit(int argc, char **argv);
 static int zfs_do_list(int argc, char **argv);
 static int zfs_do_mount(int argc, char **argv);
 static int zfs_do_rename(int argc, char **argv);
 static int zfs_do_rollback(int argc, char **argv);
 static int zfs_do_set(int argc, char **argv);
 static int zfs_do_upgrade(int argc, char **argv);
 static int zfs_do_snapshot(int argc, char **argv);
 static int zfs_do_unmount(int argc, char **argv);
 static int zfs_do_share(int argc, char **argv);
 static int zfs_do_unshare(int argc, char **argv);
 static int zfs_do_send(int argc, char **argv);
 static int zfs_do_receive(int argc, char **argv);
 static int zfs_do_promote(int argc, char **argv);
 static int zfs_do_userspace(int argc, char **argv);
 static int zfs_do_allow(int argc, char **argv);
 static int zfs_do_unallow(int argc, char **argv);
 static int zfs_do_hold(int argc, char **argv);
 static int zfs_do_holds(int argc, char **argv);
 static int zfs_do_release(int argc, char **argv);
 static int zfs_do_diff(int argc, char **argv);
 static int zfs_do_bookmark(int argc, char **argv);
 static int zfs_do_channel_program(int argc, char **argv);
 static int zfs_do_load_key(int argc, char **argv);
 static int zfs_do_unload_key(int argc, char **argv);
 static int zfs_do_change_key(int argc, char **argv);
 static int zfs_do_project(int argc, char **argv);
 static int zfs_do_version(int argc, char **argv);
 static int zfs_do_redact(int argc, char **argv);
 static int zfs_do_wait(int argc, char **argv);
 
 #ifdef __FreeBSD__
 static int zfs_do_jail(int argc, char **argv);
 static int zfs_do_unjail(int argc, char **argv);
 #endif
 
 #ifdef __linux__
 static int zfs_do_zone(int argc, char **argv);
 static int zfs_do_unzone(int argc, char **argv);
 #endif
 
 /*
  * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
  */
 
 #ifdef DEBUG
 const char *
 _umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 #endif
 
 typedef enum {
 	HELP_CLONE,
 	HELP_CREATE,
 	HELP_DESTROY,
 	HELP_GET,
 	HELP_INHERIT,
 	HELP_UPGRADE,
 	HELP_LIST,
 	HELP_MOUNT,
 	HELP_PROMOTE,
 	HELP_RECEIVE,
 	HELP_RENAME,
 	HELP_ROLLBACK,
 	HELP_SEND,
 	HELP_SET,
 	HELP_SHARE,
 	HELP_SNAPSHOT,
 	HELP_UNMOUNT,
 	HELP_UNSHARE,
 	HELP_ALLOW,
 	HELP_UNALLOW,
 	HELP_USERSPACE,
 	HELP_GROUPSPACE,
 	HELP_PROJECTSPACE,
 	HELP_PROJECT,
 	HELP_HOLD,
 	HELP_HOLDS,
 	HELP_RELEASE,
 	HELP_DIFF,
 	HELP_BOOKMARK,
 	HELP_CHANNEL_PROGRAM,
 	HELP_LOAD_KEY,
 	HELP_UNLOAD_KEY,
 	HELP_CHANGE_KEY,
 	HELP_VERSION,
 	HELP_REDACT,
 	HELP_JAIL,
 	HELP_UNJAIL,
 	HELP_WAIT,
 	HELP_ZONE,
 	HELP_UNZONE,
 } zfs_help_t;
 
 typedef struct zfs_command {
 	const char	*name;
 	int		(*func)(int argc, char **argv);
 	zfs_help_t	usage;
 } zfs_command_t;
 
 /*
  * Master command table.  Each ZFS command has a name, associated function, and
  * usage message.  The usage messages need to be internationalized, so we have
  * to have a function to return the usage message based on a command index.
  *
  * These commands are organized according to how they are displayed in the usage
  * message.  An empty command (one with a NULL name) indicates an empty line in
  * the generic usage message.
  */
 static zfs_command_t command_table[] = {
 	{ "version",	zfs_do_version, 	HELP_VERSION		},
 	{ NULL },
 	{ "create",	zfs_do_create,		HELP_CREATE		},
 	{ "destroy",	zfs_do_destroy,		HELP_DESTROY		},
 	{ NULL },
 	{ "snapshot",	zfs_do_snapshot,	HELP_SNAPSHOT		},
 	{ "rollback",	zfs_do_rollback,	HELP_ROLLBACK		},
 	{ "clone",	zfs_do_clone,		HELP_CLONE		},
 	{ "promote",	zfs_do_promote,		HELP_PROMOTE		},
 	{ "rename",	zfs_do_rename,		HELP_RENAME		},
 	{ "bookmark",	zfs_do_bookmark,	HELP_BOOKMARK		},
 	{ "program",    zfs_do_channel_program, HELP_CHANNEL_PROGRAM    },
 	{ NULL },
 	{ "list",	zfs_do_list,		HELP_LIST		},
 	{ NULL },
 	{ "set",	zfs_do_set,		HELP_SET		},
 	{ "get",	zfs_do_get,		HELP_GET		},
 	{ "inherit",	zfs_do_inherit,		HELP_INHERIT		},
 	{ "upgrade",	zfs_do_upgrade,		HELP_UPGRADE		},
 	{ NULL },
 	{ "userspace",	zfs_do_userspace,	HELP_USERSPACE		},
 	{ "groupspace",	zfs_do_userspace,	HELP_GROUPSPACE		},
 	{ "projectspace", zfs_do_userspace,	HELP_PROJECTSPACE	},
 	{ NULL },
 	{ "project",	zfs_do_project,		HELP_PROJECT		},
 	{ NULL },
 	{ "mount",	zfs_do_mount,		HELP_MOUNT		},
 	{ "unmount",	zfs_do_unmount,		HELP_UNMOUNT		},
 	{ "share",	zfs_do_share,		HELP_SHARE		},
 	{ "unshare",	zfs_do_unshare,		HELP_UNSHARE		},
 	{ NULL },
 	{ "send",	zfs_do_send,		HELP_SEND		},
 	{ "receive",	zfs_do_receive,		HELP_RECEIVE		},
 	{ NULL },
 	{ "allow",	zfs_do_allow,		HELP_ALLOW		},
 	{ NULL },
 	{ "unallow",	zfs_do_unallow,		HELP_UNALLOW		},
 	{ NULL },
 	{ "hold",	zfs_do_hold,		HELP_HOLD		},
 	{ "holds",	zfs_do_holds,		HELP_HOLDS		},
 	{ "release",	zfs_do_release,		HELP_RELEASE		},
 	{ "diff",	zfs_do_diff,		HELP_DIFF		},
 	{ "load-key",	zfs_do_load_key,	HELP_LOAD_KEY		},
 	{ "unload-key",	zfs_do_unload_key,	HELP_UNLOAD_KEY		},
 	{ "change-key",	zfs_do_change_key,	HELP_CHANGE_KEY		},
 	{ "redact",	zfs_do_redact,		HELP_REDACT		},
 	{ "wait",	zfs_do_wait,		HELP_WAIT		},
 
 #ifdef __FreeBSD__
 	{ "jail",	zfs_do_jail,		HELP_JAIL		},
 	{ "unjail",	zfs_do_unjail,		HELP_UNJAIL		},
 #endif
 
 #ifdef __linux__
 	{ "zone",	zfs_do_zone,		HELP_ZONE		},
 	{ "unzone",	zfs_do_unzone,		HELP_UNZONE		},
 #endif
 };
 
 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
 
 zfs_command_t *current_command;
 
 static const char *
 get_usage(zfs_help_t idx)
 {
 	switch (idx) {
 	case HELP_CLONE:
 		return (gettext("\tclone [-p] [-o property=value] ... "
 		    "<snapshot> <filesystem|volume>\n"));
 	case HELP_CREATE:
 		return (gettext("\tcreate [-Pnpuv] [-o property=value] ... "
 		    "<filesystem>\n"
 		    "\tcreate [-Pnpsv] [-b blocksize] [-o property=value] ... "
 		    "-V <size> <volume>\n"));
 	case HELP_DESTROY:
 		return (gettext("\tdestroy [-fnpRrv] <filesystem|volume>\n"
 		    "\tdestroy [-dnpRrv] "
 		    "<filesystem|volume>@<snap>[%<snap>][,...]\n"
 		    "\tdestroy <filesystem|volume>#<bookmark>\n"));
 	case HELP_GET:
 		return (gettext("\tget [-rHp] [-d max] "
 		    "[-o \"all\" | field[,...]]\n"
 		    "\t    [-t type[,...]] [-s source[,...]]\n"
 		    "\t    <\"all\" | property[,...]> "
 		    "[filesystem|volume|snapshot|bookmark] ...\n"));
 	case HELP_INHERIT:
 		return (gettext("\tinherit [-rS] <property> "
 		    "<filesystem|volume|snapshot> ...\n"));
 	case HELP_UPGRADE:
 		return (gettext("\tupgrade [-v]\n"
 		    "\tupgrade [-r] [-V version] <-a | filesystem ...>\n"));
 	case HELP_LIST:
 		return (gettext("\tlist [-Hp] [-r|-d max] [-o property[,...]] "
 		    "[-s property]...\n\t    [-S property]... [-t type[,...]] "
 		    "[filesystem|volume|snapshot] ...\n"));
 	case HELP_MOUNT:
 		return (gettext("\tmount\n"
 		    "\tmount [-flvO] [-o opts] <-a | filesystem>\n"));
 	case HELP_PROMOTE:
 		return (gettext("\tpromote <clone-filesystem>\n"));
 	case HELP_RECEIVE:
 		return (gettext("\treceive [-vMnsFhu] "
 		    "[-o <property>=<value>] ... [-x <property>] ...\n"
 		    "\t    <filesystem|volume|snapshot>\n"
 		    "\treceive [-vMnsFhu] [-o <property>=<value>] ... "
 		    "[-x <property>] ... \n"
 		    "\t    [-d | -e] <filesystem>\n"
 		    "\treceive -A <filesystem|volume>\n"));
 	case HELP_RENAME:
 		return (gettext("\trename [-f] <filesystem|volume|snapshot> "
 		    "<filesystem|volume|snapshot>\n"
 		    "\trename -p [-f] <filesystem|volume> <filesystem|volume>\n"
 		    "\trename -u [-f] <filesystem> <filesystem>\n"
 		    "\trename -r <snapshot> <snapshot>\n"));
 	case HELP_ROLLBACK:
 		return (gettext("\trollback [-rRf] <snapshot>\n"));
 	case HELP_SEND:
 		return (gettext("\tsend [-DLPbcehnpsvw] "
 		    "[-i|-I snapshot]\n"
 		    "\t     [-R [-X dataset[,dataset]...]]     <snapshot>\n"
 		    "\tsend [-DnvPLecw] [-i snapshot|bookmark] "
 		    "<filesystem|volume|snapshot>\n"
 		    "\tsend [-DnPpvLec] [-i bookmark|snapshot] "
 		    "--redact <bookmark> <snapshot>\n"
 		    "\tsend [-nvPe] -t <receive_resume_token>\n"
 		    "\tsend [-Pnv] --saved filesystem\n"));
 	case HELP_SET:
 		return (gettext("\tset <property=value> ... "
 		    "<filesystem|volume|snapshot> ...\n"));
 	case HELP_SHARE:
 		return (gettext("\tshare [-l] <-a [nfs|smb] | filesystem>\n"));
 	case HELP_SNAPSHOT:
 		return (gettext("\tsnapshot [-r] [-o property=value] ... "
 		    "<filesystem|volume>@<snap> ...\n"));
 	case HELP_UNMOUNT:
 		return (gettext("\tunmount [-fu] "
 		    "<-a | filesystem|mountpoint>\n"));
 	case HELP_UNSHARE:
 		return (gettext("\tunshare "
 		    "<-a [nfs|smb] | filesystem|mountpoint>\n"));
 	case HELP_ALLOW:
 		return (gettext("\tallow <filesystem|volume>\n"
 		    "\tallow [-ldug] "
 		    "<\"everyone\"|user|group>[,...] <perm|@setname>[,...]\n"
 		    "\t    <filesystem|volume>\n"
 		    "\tallow [-ld] -e <perm|@setname>[,...] "
 		    "<filesystem|volume>\n"
 		    "\tallow -c <perm|@setname>[,...] <filesystem|volume>\n"
 		    "\tallow -s @setname <perm|@setname>[,...] "
 		    "<filesystem|volume>\n"));
 	case HELP_UNALLOW:
 		return (gettext("\tunallow [-rldug] "
 		    "<\"everyone\"|user|group>[,...]\n"
 		    "\t    [<perm|@setname>[,...]] <filesystem|volume>\n"
 		    "\tunallow [-rld] -e [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"
 		    "\tunallow [-r] -c [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"
 		    "\tunallow [-r] -s @setname [<perm|@setname>[,...]] "
 		    "<filesystem|volume>\n"));
 	case HELP_USERSPACE:
 		return (gettext("\tuserspace [-Hinp] [-o field[,...]] "
 		    "[-s field] ...\n"
 		    "\t    [-S field] ... [-t type[,...]] "
 		    "<filesystem|snapshot|path>\n"));
 	case HELP_GROUPSPACE:
 		return (gettext("\tgroupspace [-Hinp] [-o field[,...]] "
 		    "[-s field] ...\n"
 		    "\t    [-S field] ... [-t type[,...]] "
 		    "<filesystem|snapshot|path>\n"));
 	case HELP_PROJECTSPACE:
 		return (gettext("\tprojectspace [-Hp] [-o field[,...]] "
 		    "[-s field] ... \n"
 		    "\t    [-S field] ... <filesystem|snapshot|path>\n"));
 	case HELP_PROJECT:
 		return (gettext("\tproject [-d|-r] <directory|file ...>\n"
 		    "\tproject -c [-0] [-d|-r] [-p id] <directory|file ...>\n"
 		    "\tproject -C [-k] [-r] <directory ...>\n"
 		    "\tproject [-p id] [-r] [-s] <directory ...>\n"));
 	case HELP_HOLD:
 		return (gettext("\thold [-r] <tag> <snapshot> ...\n"));
 	case HELP_HOLDS:
 		return (gettext("\tholds [-rH] <snapshot> ...\n"));
 	case HELP_RELEASE:
 		return (gettext("\trelease [-r] <tag> <snapshot> ...\n"));
 	case HELP_DIFF:
 		return (gettext("\tdiff [-FHt] <snapshot> "
 		    "[snapshot|filesystem]\n"));
 	case HELP_BOOKMARK:
 		return (gettext("\tbookmark <snapshot|bookmark> "
 		    "<newbookmark>\n"));
 	case HELP_CHANNEL_PROGRAM:
 		return (gettext("\tprogram [-jn] [-t <instruction limit>] "
 		    "[-m <memory limit (b)>]\n"
 		    "\t    <pool> <program file> [lua args...]\n"));
 	case HELP_LOAD_KEY:
 		return (gettext("\tload-key [-rn] [-L <keylocation>] "
 		    "<-a | filesystem|volume>\n"));
 	case HELP_UNLOAD_KEY:
 		return (gettext("\tunload-key [-r] "
 		    "<-a | filesystem|volume>\n"));
 	case HELP_CHANGE_KEY:
 		return (gettext("\tchange-key [-l] [-o keyformat=<value>]\n"
 		    "\t    [-o keylocation=<value>] [-o pbkdf2iters=<value>]\n"
 		    "\t    <filesystem|volume>\n"
 		    "\tchange-key -i [-l] <filesystem|volume>\n"));
 	case HELP_VERSION:
 		return (gettext("\tversion\n"));
 	case HELP_REDACT:
 		return (gettext("\tredact <snapshot> <bookmark> "
 		    "<redaction_snapshot> ...\n"));
 	case HELP_JAIL:
 		return (gettext("\tjail <jailid|jailname> <filesystem>\n"));
 	case HELP_UNJAIL:
 		return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
 	case HELP_WAIT:
 		return (gettext("\twait [-t <activity>] <filesystem>\n"));
 	case HELP_ZONE:
 		return (gettext("\tzone <nsfile> <filesystem>\n"));
 	case HELP_UNZONE:
 		return (gettext("\tunzone <nsfile> <filesystem>\n"));
 	default:
 		__builtin_unreachable();
 	}
 }
 
 void
 nomem(void)
 {
 	(void) fprintf(stderr, gettext("internal error: out of memory\n"));
 	exit(1);
 }
 
 /*
  * Utility function to guarantee malloc() success.
  */
 
 void *
 safe_malloc(size_t size)
 {
 	void *data;
 
 	if ((data = calloc(1, size)) == NULL)
 		nomem();
 
 	return (data);
 }
 
 static void *
 safe_realloc(void *data, size_t size)
 {
 	void *newp;
 	if ((newp = realloc(data, size)) == NULL) {
 		free(data);
 		nomem();
 	}
 
 	return (newp);
 }
 
 static char *
 safe_strdup(const char *str)
 {
 	char *dupstr = strdup(str);
 
 	if (dupstr == NULL)
 		nomem();
 
 	return (dupstr);
 }
 
 /*
  * Callback routine that will print out information for each of
  * the properties.
  */
 static int
 usage_prop_cb(int prop, void *cb)
 {
 	FILE *fp = cb;
 
 	(void) fprintf(fp, "\t%-15s ", zfs_prop_to_name(prop));
 
 	if (zfs_prop_readonly(prop))
 		(void) fprintf(fp, " NO    ");
 	else
 		(void) fprintf(fp, "YES    ");
 
 	if (zfs_prop_inheritable(prop))
 		(void) fprintf(fp, "  YES   ");
 	else
 		(void) fprintf(fp, "   NO   ");
 
 	(void) fprintf(fp, "%s\n", zfs_prop_values(prop) ?: "-");
 
 	return (ZPROP_CONT);
 }
 
 /*
  * Display usage message.  If we're inside a command, display only the usage for
  * that command.  Otherwise, iterate over the entire command table and display
  * a complete usage message.
  */
 static __attribute__((noreturn)) void
 usage(boolean_t requested)
 {
 	int i;
 	boolean_t show_properties = B_FALSE;
 	FILE *fp = requested ? stdout : stderr;
 
 	if (current_command == NULL) {
 
 		(void) fprintf(fp, gettext("usage: zfs command args ...\n"));
 		(void) fprintf(fp,
 		    gettext("where 'command' is one of the following:\n\n"));
 
 		for (i = 0; i < NCOMMAND; i++) {
 			if (command_table[i].name == NULL)
 				(void) fprintf(fp, "\n");
 			else
 				(void) fprintf(fp, "%s",
 				    get_usage(command_table[i].usage));
 		}
 
 		(void) fprintf(fp, gettext("\nEach dataset is of the form: "
 		    "pool/[dataset/]*dataset[@name]\n"));
 	} else {
 		(void) fprintf(fp, gettext("usage:\n"));
 		(void) fprintf(fp, "%s", get_usage(current_command->usage));
 	}
 
 	if (current_command != NULL &&
 	    (strcmp(current_command->name, "set") == 0 ||
 	    strcmp(current_command->name, "get") == 0 ||
 	    strcmp(current_command->name, "inherit") == 0 ||
 	    strcmp(current_command->name, "list") == 0))
 		show_properties = B_TRUE;
 
 	if (show_properties) {
 		(void) fprintf(fp,
 		    gettext("\nThe following properties are supported:\n"));
 
 		(void) fprintf(fp, "\n\t%-14s %s  %s   %s\n\n",
 		    "PROPERTY", "EDIT", "INHERIT", "VALUES");
 
 		/* Iterate over all properties */
 		(void) zprop_iter(usage_prop_cb, fp, B_FALSE, B_TRUE,
 		    ZFS_TYPE_DATASET);
 
 		(void) fprintf(fp, "\t%-15s ", "userused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "groupused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "projectused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "userobjused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "groupobjused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "projectobjused@...");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "userquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "groupquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "projectquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "userobjquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "groupobjquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "projectobjquota@...");
 		(void) fprintf(fp, "YES       NO   <size> | none\n");
 		(void) fprintf(fp, "\t%-15s ", "written@<snap>");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 		(void) fprintf(fp, "\t%-15s ", "written#<bookmark>");
 		(void) fprintf(fp, " NO       NO   <size>\n");
 
 		(void) fprintf(fp, gettext("\nSizes are specified in bytes "
 		    "with standard units such as K, M, G, etc.\n"));
 		(void) fprintf(fp, "%s", gettext("\nUser-defined properties "
 		    "can be specified by using a name containing a colon "
 		    "(:).\n"));
 		(void) fprintf(fp, gettext("\nThe {user|group|project}"
 		    "[obj]{used|quota}@ properties must be appended with\n"
 		    "a user|group|project specifier of one of these forms:\n"
 		    "    POSIX name      (eg: \"matt\")\n"
 		    "    POSIX id        (eg: \"126829\")\n"
 		    "    SMB name@domain (eg: \"matt@sun\")\n"
 		    "    SMB SID         (eg: \"S-1-234-567-89\")\n"));
 	} else {
 		(void) fprintf(fp,
 		    gettext("\nFor the property list, run: %s\n"),
 		    "zfs set|get");
 		(void) fprintf(fp,
 		    gettext("\nFor the delegated permission list, run: %s\n"),
 		    "zfs allow|unallow");
 	}
 
 	/*
 	 * See comments at end of main().
 	 */
 	if (getenv("ZFS_ABORT") != NULL) {
 		(void) printf("dumping core by request\n");
 		abort();
 	}
 
 	exit(requested ? 0 : 2);
 }
 
 /*
  * Take a property=value argument string and add it to the given nvlist.
  * Modifies the argument inplace.
  */
 static boolean_t
 parseprop(nvlist_t *props, char *propname)
 {
 	char *propval;
 
 	if ((propval = strchr(propname, '=')) == NULL) {
 		(void) fprintf(stderr, gettext("missing "
 		    "'=' for property=value argument\n"));
 		return (B_FALSE);
 	}
 	*propval = '\0';
 	propval++;
 	if (nvlist_exists(props, propname)) {
 		(void) fprintf(stderr, gettext("property '%s' "
 		    "specified multiple times\n"), propname);
 		return (B_FALSE);
 	}
 	if (nvlist_add_string(props, propname, propval) != 0)
 		nomem();
 	return (B_TRUE);
 }
 
 /*
  * Take a property name argument and add it to the given nvlist.
  * Modifies the argument inplace.
  */
 static boolean_t
 parsepropname(nvlist_t *props, char *propname)
 {
 	if (strchr(propname, '=') != NULL) {
 		(void) fprintf(stderr, gettext("invalid character "
 		    "'=' in property argument\n"));
 		return (B_FALSE);
 	}
 	if (nvlist_exists(props, propname)) {
 		(void) fprintf(stderr, gettext("property '%s' "
 		    "specified multiple times\n"), propname);
 		return (B_FALSE);
 	}
 	if (nvlist_add_boolean(props, propname) != 0)
 		nomem();
 	return (B_TRUE);
 }
 
 static int
 parse_depth(char *opt, int *flags)
 {
 	char *tmp;
 	int depth;
 
 	depth = (int)strtol(opt, &tmp, 0);
 	if (*tmp) {
 		(void) fprintf(stderr,
 		    gettext("%s is not an integer\n"), optarg);
 		usage(B_FALSE);
 	}
 	if (depth < 0) {
 		(void) fprintf(stderr,
 		    gettext("Depth can not be negative.\n"));
 		usage(B_FALSE);
 	}
 	*flags |= (ZFS_ITER_DEPTH_LIMIT|ZFS_ITER_RECURSE);
 	return (depth);
 }
 
 #define	PROGRESS_DELAY 2		/* seconds */
 
 static const char *pt_reverse =
 	"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b";
 static time_t pt_begin;
 static char *pt_header = NULL;
 static boolean_t pt_shown;
 
 static void
 start_progress_timer(void)
 {
 	pt_begin = time(NULL) + PROGRESS_DELAY;
 	pt_shown = B_FALSE;
 }
 
 static void
 set_progress_header(const char *header)
 {
 	assert(pt_header == NULL);
 	pt_header = safe_strdup(header);
 	if (pt_shown) {
 		(void) printf("%s: ", header);
 		(void) fflush(stdout);
 	}
 }
 
 static void
 update_progress(const char *update)
 {
 	if (!pt_shown && time(NULL) > pt_begin) {
 		int len = strlen(update);
 
 		(void) printf("%s: %s%*.*s", pt_header, update, len, len,
 		    pt_reverse);
 		(void) fflush(stdout);
 		pt_shown = B_TRUE;
 	} else if (pt_shown) {
 		int len = strlen(update);
 
 		(void) printf("%s%*.*s", update, len, len, pt_reverse);
 		(void) fflush(stdout);
 	}
 }
 
 static void
 finish_progress(const char *done)
 {
 	if (pt_shown) {
 		(void) puts(done);
 		(void) fflush(stdout);
 	}
 	free(pt_header);
 	pt_header = NULL;
 }
 
 static int
 zfs_mount_and_share(libzfs_handle_t *hdl, const char *dataset, zfs_type_t type)
 {
 	zfs_handle_t *zhp = NULL;
 	int ret = 0;
 
 	zhp = zfs_open(hdl, dataset, type);
 	if (zhp == NULL)
 		return (1);
 
 	/*
 	 * Volumes may neither be mounted or shared.  Potentially in the
 	 * future filesystems detected on these volumes could be mounted.
 	 */
 	if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	/*
 	 * Mount and/or share the new filesystem as appropriate.  We provide a
 	 * verbose error message to let the user know that their filesystem was
 	 * in fact created, even if we failed to mount or share it.
 	 *
 	 * If the user doesn't want the dataset automatically mounted, then
 	 * skip the mount/share step
 	 */
 	if (zfs_prop_valid_for_type(ZFS_PROP_CANMOUNT, type, B_FALSE) &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_ON) {
 		if (zfs_mount_delegation_check()) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but it may only be "
 			    "mounted by root\n"));
 			ret = 1;
 		} else if (zfs_mount(zhp, NULL, 0) != 0) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but not mounted\n"));
 			ret = 1;
 		} else if (zfs_share(zhp, NULL) != 0) {
 			(void) fprintf(stderr, gettext("filesystem "
 			    "successfully created, but not shared\n"));
 			ret = 1;
 		}
 		zfs_commit_shares(NULL);
 	}
 
 	zfs_close(zhp);
 
 	return (ret);
 }
 
 /*
  * zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
  *
  * Given an existing dataset, create a writable copy whose initial contents
  * are the same as the source.  The newly created dataset maintains a
  * dependency on the original; the original cannot be destroyed so long as
  * the clone exists.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  */
 static int
 zfs_do_clone(int argc, char **argv)
 {
 	zfs_handle_t *zhp = NULL;
 	boolean_t parents = B_FALSE;
 	nvlist_t *props;
 	int ret = 0;
 	int c;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, "o:p")) != -1) {
 		switch (c) {
 		case 'o':
 			if (!parseprop(props, optarg)) {
 				nvlist_free(props);
 				return (1);
 			}
 			break;
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing source dataset "
 		    "argument\n"));
 		goto usage;
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing target dataset "
 		    "argument\n"));
 		goto usage;
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		goto usage;
 	}
 
 	/* open the source dataset */
 	if ((zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL) {
 		nvlist_free(props);
 		return (1);
 	}
 
 	if (parents && zfs_name_valid(argv[1], ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) {
 		/*
 		 * Now create the ancestors of the target dataset.  If the
 		 * target already exists and '-p' option was used we should not
 		 * complain.
 		 */
 		if (zfs_dataset_exists(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM |
 		    ZFS_TYPE_VOLUME)) {
 			zfs_close(zhp);
 			nvlist_free(props);
 			return (0);
 		}
 		if (zfs_create_ancestors(g_zfs, argv[1]) != 0) {
 			zfs_close(zhp);
 			nvlist_free(props);
 			return (1);
 		}
 	}
 
 	/* pass to libzfs */
 	ret = zfs_clone(zhp, argv[1], props);
 
 	/* create the mountpoint if necessary */
 	if (ret == 0) {
 		if (log_history) {
 			(void) zpool_log_history(g_zfs, history_str);
 			log_history = B_FALSE;
 		}
 
 		ret = zfs_mount_and_share(g_zfs, argv[1], ZFS_TYPE_DATASET);
 	}
 
 	zfs_close(zhp);
 	nvlist_free(props);
 
 	return (!!ret);
 
 usage:
 	ASSERT3P(zhp, ==, NULL);
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (-1);
 }
 
 /*
  * Return a default volblocksize for the pool which always uses more than
  * half of the data sectors.  This primarily applies to dRAID which always
  * writes full stripe widths.
  */
 static uint64_t
 default_volblocksize(zpool_handle_t *zhp, nvlist_t *props)
 {
 	uint64_t volblocksize, asize = SPA_MINBLOCKSIZE;
 	nvlist_t *tree, **vdevs;
 	uint_t nvdevs;
 
 	nvlist_t *config = zpool_get_config(zhp, NULL);
 
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 ||
 	    nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
 	    &vdevs, &nvdevs) != 0) {
 		return (ZVOL_DEFAULT_BLOCKSIZE);
 	}
 
 	for (int i = 0; i < nvdevs; i++) {
 		nvlist_t *nv = vdevs[i];
 		uint64_t ashift, ndata, nparity;
 
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ASHIFT, &ashift) != 0)
 			continue;
 
 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA,
 		    &ndata) == 0) {
 			/* dRAID minimum allocation width */
 			asize = MAX(asize, ndata * (1ULL << ashift));
 		} else if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
 		    &nparity) == 0) {
 			/* raidz minimum allocation width */
 			if (nparity == 1)
 				asize = MAX(asize, 2 * (1ULL << ashift));
 			else
 				asize = MAX(asize, 4 * (1ULL << ashift));
 		} else {
 			/* mirror or (non-redundant) leaf vdev */
 			asize = MAX(asize, 1ULL << ashift);
 		}
 	}
 
 	/*
 	 * Calculate the target volblocksize such that more than half
 	 * of the asize is used. The following table is for 4k sectors.
 	 *
 	 * n   asize   blksz  used  |   n   asize   blksz  used
 	 * -------------------------+---------------------------------
 	 * 1   4,096   8,192  100%  |   9  36,864  32,768   88%
 	 * 2   8,192   8,192  100%  |  10  40,960  32,768   80%
 	 * 3  12,288   8,192   66%  |  11  45,056  32,768   72%
 	 * 4  16,384  16,384  100%  |  12  49,152  32,768   66%
 	 * 5  20,480  16,384   80%  |  13  53,248  32,768   61%
 	 * 6  24,576  16,384   66%  |  14  57,344  32,768   57%
 	 * 7  28,672  16,384   57%  |  15  61,440  32,768   53%
 	 * 8  32,768  32,768  100%  |  16  65,536  65,636  100%
 	 *
 	 * This is primarily a concern for dRAID which always allocates
 	 * a full stripe width.  For dRAID the default stripe width is
 	 * n=8 in which case the volblocksize is set to 32k. Ignoring
 	 * compression there are no unused sectors.  This same reasoning
 	 * applies to raidz[2,3] so target 4 sectors to minimize waste.
 	 */
 	uint64_t tgt_volblocksize = ZVOL_DEFAULT_BLOCKSIZE;
 	while (tgt_volblocksize * 2 <= asize)
 		tgt_volblocksize *= 2;
 
 	const char *prop = zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE);
 	if (nvlist_lookup_uint64(props, prop, &volblocksize) == 0) {
 
 		/* Issue a warning when a non-optimal size is requested. */
 		if (volblocksize < ZVOL_DEFAULT_BLOCKSIZE) {
 			(void) fprintf(stderr, gettext("Warning: "
 			    "volblocksize (%llu) is less than the default "
 			    "minimum block size (%llu).\nTo reduce wasted "
 			    "space a volblocksize of %llu is recommended.\n"),
 			    (u_longlong_t)volblocksize,
 			    (u_longlong_t)ZVOL_DEFAULT_BLOCKSIZE,
 			    (u_longlong_t)tgt_volblocksize);
 		} else if (volblocksize < tgt_volblocksize) {
 			(void) fprintf(stderr, gettext("Warning: "
 			    "volblocksize (%llu) is much less than the "
 			    "minimum allocation\nunit (%llu), which wastes "
 			    "at least %llu%% of space. To reduce wasted "
 			    "space,\nuse a larger volblocksize (%llu is "
 			    "recommended), fewer dRAID data disks\n"
 			    "per group, or smaller sector size (ashift).\n"),
 			    (u_longlong_t)volblocksize, (u_longlong_t)asize,
 			    (u_longlong_t)((100 * (asize - volblocksize)) /
 			    asize), (u_longlong_t)tgt_volblocksize);
 		}
 	} else {
 		volblocksize = tgt_volblocksize;
 		fnvlist_add_uint64(props, prop, volblocksize);
 	}
 
 	return (volblocksize);
 }
 
 /*
  * zfs create [-Pnpv] [-o prop=value] ... fs
  * zfs create [-Pnpsv] [-b blocksize] [-o prop=value] ... -V vol size
  *
  * Create a new dataset.  This command can be used to create filesystems
  * and volumes.  Snapshot creation is handled by 'zfs snapshot'.
  * For volumes, the user must specify a size to be used.
  *
  * The '-s' flag applies only to volumes, and indicates that we should not try
  * to set the reservation for this volume.  By default we set a reservation
  * equal to the size for any volume.  For pools with SPA_VERSION >=
  * SPA_VERSION_REFRESERVATION, we set a refreservation instead.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  *
  * The '-n' flag is no-op (dry run) mode.  This will perform a user-space sanity
  * check of arguments and properties, but does not check for permissions,
  * available space, etc.
  *
  * The '-u' flag prevents the newly created file system from being mounted.
  *
  * The '-v' flag is for verbose output.
  *
  * The '-P' flag is used for parseable output.  It implies '-v'.
  */
 static int
 zfs_do_create(int argc, char **argv)
 {
 	zfs_type_t type = ZFS_TYPE_FILESYSTEM;
 	zpool_handle_t *zpool_handle = NULL;
 	nvlist_t *real_props = NULL;
 	uint64_t volsize = 0;
 	int c;
 	boolean_t noreserve = B_FALSE;
 	boolean_t bflag = B_FALSE;
 	boolean_t parents = B_FALSE;
 	boolean_t dryrun = B_FALSE;
 	boolean_t nomount = B_FALSE;
 	boolean_t verbose = B_FALSE;
 	boolean_t parseable = B_FALSE;
 	int ret = 1;
 	nvlist_t *props;
 	uint64_t intval;
 	char *strval;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":PV:b:nso:puv")) != -1) {
 		switch (c) {
 		case 'V':
 			type = ZFS_TYPE_VOLUME;
 			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
 				(void) fprintf(stderr, gettext("bad volume "
 				    "size '%s': %s\n"), optarg,
 				    libzfs_error_description(g_zfs));
 				goto error;
 			}
 
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE), intval) != 0)
 				nomem();
 			volsize = intval;
 			break;
 		case 'P':
 			verbose = B_TRUE;
 			parseable = B_TRUE;
 			break;
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case 'b':
 			bflag = B_TRUE;
 			if (zfs_nicestrtonum(g_zfs, optarg, &intval) != 0) {
 				(void) fprintf(stderr, gettext("bad volume "
 				    "block size '%s': %s\n"), optarg,
 				    libzfs_error_description(g_zfs));
 				goto error;
 			}
 
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 			    intval) != 0)
 				nomem();
 			break;
 		case 'n':
 			dryrun = B_TRUE;
 			break;
 		case 'o':
 			if (!parseprop(props, optarg))
 				goto error;
 			break;
 		case 's':
 			noreserve = B_TRUE;
 			break;
 		case 'u':
 			nomount = B_TRUE;
 			break;
 		case 'v':
 			verbose = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing size "
 			    "argument\n"));
 			goto badusage;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto badusage;
 		}
 	}
 
 	if ((bflag || noreserve) && type != ZFS_TYPE_VOLUME) {
 		(void) fprintf(stderr, gettext("'-s' and '-b' can only be "
 		    "used when creating a volume\n"));
 		goto badusage;
 	}
 	if (nomount && type != ZFS_TYPE_FILESYSTEM) {
 		(void) fprintf(stderr, gettext("'-u' can only be "
 		    "used when creating a filesystem\n"));
 		goto badusage;
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc == 0) {
 		(void) fprintf(stderr, gettext("missing %s argument\n"),
 		    zfs_type_to_name(type));
 		goto badusage;
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		goto badusage;
 	}
 
 	if (dryrun || type == ZFS_TYPE_VOLUME) {
 		char msg[ZFS_MAX_DATASET_NAME_LEN * 2];
 		char *p;
 
 		if ((p = strchr(argv[0], '/')) != NULL)
 			*p = '\0';
 		zpool_handle = zpool_open(g_zfs, argv[0]);
 		if (p != NULL)
 			*p = '/';
 		if (zpool_handle == NULL)
 			goto error;
 
 		(void) snprintf(msg, sizeof (msg),
 		    dryrun ? gettext("cannot verify '%s'") :
 		    gettext("cannot create '%s'"), argv[0]);
 		if (props && (real_props = zfs_valid_proplist(g_zfs, type,
 		    props, 0, NULL, zpool_handle, B_TRUE, msg)) == NULL) {
 			zpool_close(zpool_handle);
 			goto error;
 		}
 	}
 
 	if (type == ZFS_TYPE_VOLUME) {
 		const char *prop = zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE);
 		uint64_t volblocksize = default_volblocksize(zpool_handle,
 		    real_props);
 
 		if (volblocksize != ZVOL_DEFAULT_BLOCKSIZE &&
 		    nvlist_lookup_string(props, prop, &strval) != 0) {
 			if (asprintf(&strval, "%llu",
 			    (u_longlong_t)volblocksize) == -1)
 				nomem();
 			nvlist_add_string(props, prop, strval);
 			free(strval);
 		}
 
 		/*
 		 * If volsize is not a multiple of volblocksize, round it
 		 * up to the nearest multiple of the volblocksize.
 		 */
 		if (volsize % volblocksize) {
 			volsize = P2ROUNDUP_TYPED(volsize, volblocksize,
 			    uint64_t);
 
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE), volsize) != 0) {
 				nvlist_free(props);
 				nomem();
 			}
 		}
 	}
 
 	if (type == ZFS_TYPE_VOLUME && !noreserve) {
 		uint64_t spa_version;
 		zfs_prop_t resv_prop;
 
 		spa_version = zpool_get_prop_int(zpool_handle,
 		    ZPOOL_PROP_VERSION, NULL);
 		if (spa_version >= SPA_VERSION_REFRESERVATION)
 			resv_prop = ZFS_PROP_REFRESERVATION;
 		else
 			resv_prop = ZFS_PROP_RESERVATION;
 
 		volsize = zvol_volsize_to_reservation(zpool_handle, volsize,
 		    real_props);
 
 		if (nvlist_lookup_string(props, zfs_prop_to_name(resv_prop),
 		    &strval) != 0) {
 			if (nvlist_add_uint64(props,
 			    zfs_prop_to_name(resv_prop), volsize) != 0) {
 				nvlist_free(props);
 				nomem();
 			}
 		}
 	}
 	if (zpool_handle != NULL) {
 		zpool_close(zpool_handle);
 		nvlist_free(real_props);
 	}
 
 	if (parents && zfs_name_valid(argv[0], type)) {
 		/*
 		 * Now create the ancestors of target dataset.  If the target
 		 * already exists and '-p' option was used we should not
 		 * complain.
 		 */
 		if (zfs_dataset_exists(g_zfs, argv[0], type)) {
 			ret = 0;
 			goto error;
 		}
 		if (verbose) {
 			(void) printf(parseable ? "create_ancestors\t%s\n" :
 			    dryrun ?  "would create ancestors of %s\n" :
 			    "create ancestors of %s\n", argv[0]);
 		}
 		if (!dryrun) {
 			if (zfs_create_ancestors(g_zfs, argv[0]) != 0) {
 				goto error;
 			}
 		}
 	}
 
 	if (verbose) {
 		nvpair_t *nvp = NULL;
 		(void) printf(parseable ? "create\t%s\n" :
 		    dryrun ? "would create %s\n" : "create %s\n", argv[0]);
 		while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
 			uint64_t uval;
 			char *sval;
 
 			switch (nvpair_type(nvp)) {
 			case DATA_TYPE_UINT64:
 				VERIFY0(nvpair_value_uint64(nvp, &uval));
 				(void) printf(parseable ?
 				    "property\t%s\t%llu\n" : "\t%s=%llu\n",
 				    nvpair_name(nvp), (u_longlong_t)uval);
 				break;
 			case DATA_TYPE_STRING:
 				VERIFY0(nvpair_value_string(nvp, &sval));
 				(void) printf(parseable ?
 				    "property\t%s\t%s\n" : "\t%s=%s\n",
 				    nvpair_name(nvp), sval);
 				break;
 			default:
 				(void) fprintf(stderr, "property '%s' "
 				    "has illegal type %d\n",
 				    nvpair_name(nvp), nvpair_type(nvp));
 				abort();
 			}
 		}
 	}
 	if (dryrun) {
 		ret = 0;
 		goto error;
 	}
 
 	/* pass to libzfs */
 	if (zfs_create(g_zfs, argv[0], type, props) != 0)
 		goto error;
 
 	if (log_history) {
 		(void) zpool_log_history(g_zfs, history_str);
 		log_history = B_FALSE;
 	}
 
 	if (nomount) {
 		ret = 0;
 		goto error;
 	}
 
 	ret = zfs_mount_and_share(g_zfs, argv[0], ZFS_TYPE_DATASET);
 error:
 	nvlist_free(props);
 	return (ret);
 badusage:
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (2);
 }
 
 /*
  * zfs destroy [-rRf] <fs, vol>
  * zfs destroy [-rRd] <snap>
  *
  *	-r	Recursively destroy all children
  *	-R	Recursively destroy all dependents, including clones
  *	-f	Force unmounting of any dependents
  *	-d	If we can't destroy now, mark for deferred destruction
  *
  * Destroys the given dataset.  By default, it will unmount any filesystems,
  * and refuse to destroy a dataset that has any dependents.  A dependent can
  * either be a child, or a clone of a child.
  */
 typedef struct destroy_cbdata {
 	boolean_t	cb_first;
 	boolean_t	cb_force;
 	boolean_t	cb_recurse;
 	boolean_t	cb_error;
 	boolean_t	cb_doclones;
 	zfs_handle_t	*cb_target;
 	boolean_t	cb_defer_destroy;
 	boolean_t	cb_verbose;
 	boolean_t	cb_parsable;
 	boolean_t	cb_dryrun;
 	nvlist_t	*cb_nvl;
 	nvlist_t	*cb_batchedsnaps;
 
 	/* first snap in contiguous run */
 	char		*cb_firstsnap;
 	/* previous snap in contiguous run */
 	char		*cb_prevsnap;
 	int64_t		cb_snapused;
 	char		*cb_snapspec;
 	char		*cb_bookmark;
 	uint64_t	cb_snap_count;
 } destroy_cbdata_t;
 
 /*
  * Check for any dependents based on the '-r' or '-R' flags.
  */
 static int
 destroy_check_dependent(zfs_handle_t *zhp, void *data)
 {
 	destroy_cbdata_t *cbp = data;
 	const char *tname = zfs_get_name(cbp->cb_target);
 	const char *name = zfs_get_name(zhp);
 
 	if (strncmp(tname, name, strlen(tname)) == 0 &&
 	    (name[strlen(tname)] == '/' || name[strlen(tname)] == '@')) {
 		/*
 		 * This is a direct descendant, not a clone somewhere else in
 		 * the hierarchy.
 		 */
 		if (cbp->cb_recurse)
 			goto out;
 
 		if (cbp->cb_first) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "%s has children\n"),
 			    zfs_get_name(cbp->cb_target),
 			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
 			(void) fprintf(stderr, gettext("use '-r' to destroy "
 			    "the following datasets:\n"));
 			cbp->cb_first = B_FALSE;
 			cbp->cb_error = B_TRUE;
 		}
 
 		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 	} else {
 		/*
 		 * This is a clone.  We only want to report this if the '-r'
 		 * wasn't specified, or the target is a snapshot.
 		 */
 		if (!cbp->cb_recurse &&
 		    zfs_get_type(cbp->cb_target) != ZFS_TYPE_SNAPSHOT)
 			goto out;
 
 		if (cbp->cb_first) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "%s has dependent clones\n"),
 			    zfs_get_name(cbp->cb_target),
 			    zfs_type_to_name(zfs_get_type(cbp->cb_target)));
 			(void) fprintf(stderr, gettext("use '-R' to destroy "
 			    "the following datasets:\n"));
 			cbp->cb_first = B_FALSE;
 			cbp->cb_error = B_TRUE;
 			cbp->cb_dryrun = B_TRUE;
 		}
 
 		(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 	}
 
 out:
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 destroy_batched(destroy_cbdata_t *cb)
 {
 	int error = zfs_destroy_snaps_nvl(g_zfs,
 	    cb->cb_batchedsnaps, B_FALSE);
 	fnvlist_free(cb->cb_batchedsnaps);
 	cb->cb_batchedsnaps = fnvlist_alloc();
 	return (error);
 }
 
 static int
 destroy_callback(zfs_handle_t *zhp, void *data)
 {
 	destroy_cbdata_t *cb = data;
 	const char *name = zfs_get_name(zhp);
 	int error;
 
 	if (cb->cb_verbose) {
 		if (cb->cb_parsable) {
 			(void) printf("destroy\t%s\n", name);
 		} else if (cb->cb_dryrun) {
 			(void) printf(gettext("would destroy %s\n"),
 			    name);
 		} else {
 			(void) printf(gettext("will destroy %s\n"),
 			    name);
 		}
 	}
 
 	/*
 	 * Ignore pools (which we've already flagged as an error before getting
 	 * here).
 	 */
 	if (strchr(zfs_get_name(zhp), '/') == NULL &&
 	    zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
 		zfs_close(zhp);
 		return (0);
 	}
 	if (cb->cb_dryrun) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	/*
 	 * We batch up all contiguous snapshots (even of different
 	 * filesystems) and destroy them with one ioctl.  We can't
 	 * simply do all snap deletions and then all fs deletions,
 	 * because we must delete a clone before its origin.
 	 */
 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) {
 		cb->cb_snap_count++;
 		fnvlist_add_boolean(cb->cb_batchedsnaps, name);
 		if (cb->cb_snap_count % 10 == 0 && cb->cb_defer_destroy)
 			error = destroy_batched(cb);
 	} else {
 		error = destroy_batched(cb);
 		if (error != 0 ||
 		    zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
 		    zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
 			zfs_close(zhp);
 			/*
 			 * When performing a recursive destroy we ignore errors
 			 * so that the recursive destroy could continue
 			 * destroying past problem datasets
 			 */
 			if (cb->cb_recurse) {
 				cb->cb_error = B_TRUE;
 				return (0);
 			}
 			return (-1);
 		}
 	}
 
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 destroy_print_cb(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	const char *name = zfs_get_name(zhp);
 	int err = 0;
 
 	if (nvlist_exists(cb->cb_nvl, name)) {
 		if (cb->cb_firstsnap == NULL)
 			cb->cb_firstsnap = strdup(name);
 		if (cb->cb_prevsnap != NULL)
 			free(cb->cb_prevsnap);
 		/* this snap continues the current range */
 		cb->cb_prevsnap = strdup(name);
 		if (cb->cb_firstsnap == NULL || cb->cb_prevsnap == NULL)
 			nomem();
 		if (cb->cb_verbose) {
 			if (cb->cb_parsable) {
 				(void) printf("destroy\t%s\n", name);
 			} else if (cb->cb_dryrun) {
 				(void) printf(gettext("would destroy %s\n"),
 				    name);
 			} else {
 				(void) printf(gettext("will destroy %s\n"),
 				    name);
 			}
 		}
 	} else if (cb->cb_firstsnap != NULL) {
 		/* end of this range */
 		uint64_t used = 0;
 		err = lzc_snaprange_space(cb->cb_firstsnap,
 		    cb->cb_prevsnap, &used);
 		cb->cb_snapused += used;
 		free(cb->cb_firstsnap);
 		cb->cb_firstsnap = NULL;
 		free(cb->cb_prevsnap);
 		cb->cb_prevsnap = NULL;
 	}
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 destroy_print_snapshots(zfs_handle_t *fs_zhp, destroy_cbdata_t *cb)
 {
 	int err;
 	assert(cb->cb_firstsnap == NULL);
 	assert(cb->cb_prevsnap == NULL);
 	err = zfs_iter_snapshots_sorted(fs_zhp, destroy_print_cb, cb, 0, 0);
 	if (cb->cb_firstsnap != NULL) {
 		uint64_t used = 0;
 		if (err == 0) {
 			err = lzc_snaprange_space(cb->cb_firstsnap,
 			    cb->cb_prevsnap, &used);
 		}
 		cb->cb_snapused += used;
 		free(cb->cb_firstsnap);
 		cb->cb_firstsnap = NULL;
 		free(cb->cb_prevsnap);
 		cb->cb_prevsnap = NULL;
 	}
 	return (err);
 }
 
 static int
 snapshot_to_nvl_cb(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	int err = 0;
 
 	/* Check for clones. */
 	if (!cb->cb_doclones && !cb->cb_defer_destroy) {
 		cb->cb_target = zhp;
 		cb->cb_first = B_TRUE;
 		err = zfs_iter_dependents(zhp, B_TRUE,
 		    destroy_check_dependent, cb);
 	}
 
 	if (err == 0) {
 		if (nvlist_add_boolean(cb->cb_nvl, zfs_get_name(zhp)))
 			nomem();
 	}
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 gather_snapshots(zfs_handle_t *zhp, void *arg)
 {
 	destroy_cbdata_t *cb = arg;
 	int err = 0;
 
 	err = zfs_iter_snapspec(zhp, cb->cb_snapspec, snapshot_to_nvl_cb, cb);
 	if (err == ENOENT)
 		err = 0;
 	if (err != 0)
 		goto out;
 
 	if (cb->cb_verbose) {
 		err = destroy_print_snapshots(zhp, cb);
 		if (err != 0)
 			goto out;
 	}
 
 	if (cb->cb_recurse)
 		err = zfs_iter_filesystems(zhp, gather_snapshots, cb);
 
 out:
 	zfs_close(zhp);
 	return (err);
 }
 
 static int
 destroy_clones(destroy_cbdata_t *cb)
 {
 	nvpair_t *pair;
 	for (pair = nvlist_next_nvpair(cb->cb_nvl, NULL);
 	    pair != NULL;
 	    pair = nvlist_next_nvpair(cb->cb_nvl, pair)) {
 		zfs_handle_t *zhp = zfs_open(g_zfs, nvpair_name(pair),
 		    ZFS_TYPE_SNAPSHOT);
 		if (zhp != NULL) {
 			boolean_t defer = cb->cb_defer_destroy;
 			int err;
 
 			/*
 			 * We can't defer destroy non-snapshots, so set it to
 			 * false while destroying the clones.
 			 */
 			cb->cb_defer_destroy = B_FALSE;
 			err = zfs_iter_dependents(zhp, B_FALSE,
 			    destroy_callback, cb);
 			cb->cb_defer_destroy = defer;
 			zfs_close(zhp);
 			if (err != 0)
 				return (err);
 		}
 	}
 	return (0);
 }
 
 static int
 zfs_do_destroy(int argc, char **argv)
 {
 	destroy_cbdata_t cb = { 0 };
 	int rv = 0;
 	int err = 0;
 	int c;
 	zfs_handle_t *zhp = NULL;
 	char *at, *pound;
 	zfs_type_t type = ZFS_TYPE_DATASET;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "vpndfrR")) != -1) {
 		switch (c) {
 		case 'v':
 			cb.cb_verbose = B_TRUE;
 			break;
 		case 'p':
 			cb.cb_verbose = B_TRUE;
 			cb.cb_parsable = B_TRUE;
 			break;
 		case 'n':
 			cb.cb_dryrun = B_TRUE;
 			break;
 		case 'd':
 			cb.cb_defer_destroy = B_TRUE;
 			type = ZFS_TYPE_SNAPSHOT;
 			break;
 		case 'f':
 			cb.cb_force = B_TRUE;
 			break;
 		case 'r':
 			cb.cb_recurse = B_TRUE;
 			break;
 		case 'R':
 			cb.cb_recurse = B_TRUE;
 			cb.cb_doclones = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc == 0) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	at = strchr(argv[0], '@');
 	pound = strchr(argv[0], '#');
 	if (at != NULL) {
 
 		/* Build the list of snaps to destroy in cb_nvl. */
 		cb.cb_nvl = fnvlist_alloc();
 
 		*at = '\0';
 		zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (zhp == NULL) {
 			nvlist_free(cb.cb_nvl);
 			return (1);
 		}
 
 		cb.cb_snapspec = at + 1;
 		if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 ||
 		    cb.cb_error) {
 			rv = 1;
 			goto out;
 		}
 
 		if (nvlist_empty(cb.cb_nvl)) {
 			(void) fprintf(stderr, gettext("could not find any "
 			    "snapshots to destroy; check snapshot names.\n"));
 			rv = 1;
 			goto out;
 		}
 
 		if (cb.cb_verbose) {
 			char buf[16];
 			zfs_nicebytes(cb.cb_snapused, buf, sizeof (buf));
 			if (cb.cb_parsable) {
 				(void) printf("reclaim\t%llu\n",
 				    (u_longlong_t)cb.cb_snapused);
 			} else if (cb.cb_dryrun) {
 				(void) printf(gettext("would reclaim %s\n"),
 				    buf);
 			} else {
 				(void) printf(gettext("will reclaim %s\n"),
 				    buf);
 			}
 		}
 
 		if (!cb.cb_dryrun) {
 			if (cb.cb_doclones) {
 				cb.cb_batchedsnaps = fnvlist_alloc();
 				err = destroy_clones(&cb);
 				if (err == 0) {
 					err = zfs_destroy_snaps_nvl(g_zfs,
 					    cb.cb_batchedsnaps, B_FALSE);
 				}
 				if (err != 0) {
 					rv = 1;
 					goto out;
 				}
 			}
 			if (err == 0) {
 				err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl,
 				    cb.cb_defer_destroy);
 			}
 		}
 
 		if (err != 0)
 			rv = 1;
 	} else if (pound != NULL) {
 		int err;
 		nvlist_t *nvl;
 
 		if (cb.cb_dryrun) {
 			(void) fprintf(stderr,
 			    "dryrun is not supported with bookmark\n");
 			return (-1);
 		}
 
 		if (cb.cb_defer_destroy) {
 			(void) fprintf(stderr,
 			    "defer destroy is not supported with bookmark\n");
 			return (-1);
 		}
 
 		if (cb.cb_recurse) {
 			(void) fprintf(stderr,
 			    "recursive is not supported with bookmark\n");
 			return (-1);
 		}
 
 		/*
 		 * Unfortunately, zfs_bookmark() doesn't honor the
 		 * casesensitivity setting.  However, we can't simply
 		 * remove this check, because lzc_destroy_bookmarks()
 		 * ignores non-existent bookmarks, so this is necessary
 		 * to get a proper error message.
 		 */
 		if (!zfs_bookmark_exists(argv[0])) {
 			(void) fprintf(stderr, gettext("bookmark '%s' "
 			    "does not exist.\n"), argv[0]);
 			return (1);
 		}
 
 		nvl = fnvlist_alloc();
 		fnvlist_add_boolean(nvl, argv[0]);
 
 		err = lzc_destroy_bookmarks(nvl, NULL);
 		if (err != 0) {
 			(void) zfs_standard_error(g_zfs, err,
 			    "cannot destroy bookmark");
 		}
 
 		nvlist_free(nvl);
 
 		return (err);
 	} else {
 		/* Open the given dataset */
 		if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
 			return (1);
 
 		cb.cb_target = zhp;
 
 		/*
 		 * Perform an explicit check for pools before going any further.
 		 */
 		if (!cb.cb_recurse && strchr(zfs_get_name(zhp), '/') == NULL &&
 		    zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) {
 			(void) fprintf(stderr, gettext("cannot destroy '%s': "
 			    "operation does not apply to pools\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use 'zfs destroy -r "
 			    "%s' to destroy all datasets in the pool\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use 'zpool destroy %s' "
 			    "to destroy the pool itself\n"), zfs_get_name(zhp));
 			rv = 1;
 			goto out;
 		}
 
 		/*
 		 * Check for any dependents and/or clones.
 		 */
 		cb.cb_first = B_TRUE;
 		if (!cb.cb_doclones &&
 		    zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
 		    &cb) != 0) {
 			rv = 1;
 			goto out;
 		}
 
 		if (cb.cb_error) {
 			rv = 1;
 			goto out;
 		}
 		cb.cb_batchedsnaps = fnvlist_alloc();
 		if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback,
 		    &cb) != 0) {
 			rv = 1;
 			goto out;
 		}
 
 		/*
 		 * Do the real thing.  The callback will close the
 		 * handle regardless of whether it succeeds or not.
 		 */
 		err = destroy_callback(zhp, &cb);
 		zhp = NULL;
 		if (err == 0) {
 			err = zfs_destroy_snaps_nvl(g_zfs,
 			    cb.cb_batchedsnaps, cb.cb_defer_destroy);
 		}
 		if (err != 0 || cb.cb_error == B_TRUE)
 			rv = 1;
 	}
 
 out:
 	fnvlist_free(cb.cb_batchedsnaps);
 	fnvlist_free(cb.cb_nvl);
 	if (zhp != NULL)
 		zfs_close(zhp);
 	return (rv);
 }
 
 static boolean_t
 is_recvd_column(zprop_get_cbdata_t *cbp)
 {
 	int i;
 	zfs_get_column_t col;
 
 	for (i = 0; i < ZFS_GET_NCOLS &&
 	    (col = cbp->cb_columns[i]) != GET_COL_NONE; i++)
 		if (col == GET_COL_RECVD)
 			return (B_TRUE);
 	return (B_FALSE);
 }
 
 /*
  * zfs get [-rHp] [-o all | field[,field]...] [-s source[,source]...]
  *	< all | property[,property]... > < fs | snap | vol > ...
  *
  *	-r	recurse over any child datasets
  *	-H	scripted mode.  Headers are stripped, and fields are separated
  *		by tabs instead of spaces.
  *	-o	Set of fields to display.  One of "name,property,value,
  *		received,source". Default is "name,property,value,source".
  *		"all" is an alias for all five.
  *	-s	Set of sources to allow.  One of
  *		"local,default,inherited,received,temporary,none".  Default is
  *		all six.
  *	-p	Display values in parsable (literal) format.
  *
  *  Prints properties for the given datasets.  The user can control which
  *  columns to display as well as which property types to allow.
  */
 
 /*
  * Invoked to display the properties for a single dataset.
  */
 static int
 get_callback(zfs_handle_t *zhp, void *data)
 {
 	char buf[ZFS_MAXPROPLEN];
 	char rbuf[ZFS_MAXPROPLEN];
 	zprop_source_t sourcetype;
 	char source[ZFS_MAX_DATASET_NAME_LEN];
 	zprop_get_cbdata_t *cbp = data;
 	nvlist_t *user_props = zfs_get_user_props(zhp);
 	zprop_list_t *pl = cbp->cb_proplist;
 	nvlist_t *propval;
 	const char *strval;
 	const char *sourceval;
 	boolean_t received = is_recvd_column(cbp);
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		char *recvdval = NULL;
 		/*
 		 * Skip the special fake placeholder.  This will also skip over
 		 * the name property when 'all' is specified.
 		 */
 		if (pl->pl_prop == ZFS_PROP_NAME &&
 		    pl == cbp->cb_proplist)
 			continue;
 
 		if (pl->pl_prop != ZPROP_USERPROP) {
 			if (zfs_prop_get(zhp, pl->pl_prop, buf,
 			    sizeof (buf), &sourcetype, source,
 			    sizeof (source),
 			    cbp->cb_literal) != 0) {
 				if (pl->pl_all)
 					continue;
 				if (!zfs_prop_valid_for_type(pl->pl_prop,
 				    ZFS_TYPE_DATASET, B_FALSE)) {
 					(void) fprintf(stderr,
 					    gettext("No such property '%s'\n"),
 					    zfs_prop_to_name(pl->pl_prop));
 					continue;
 				}
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			if (received && (zfs_prop_get_recvd(zhp,
 			    zfs_prop_to_name(pl->pl_prop), rbuf, sizeof (rbuf),
 			    cbp->cb_literal) == 0))
 				recvdval = rbuf;
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    zfs_prop_to_name(pl->pl_prop),
 			    buf, sourcetype, source, recvdval);
 		} else if (zfs_prop_userquota(pl->pl_user_prop)) {
 			sourcetype = ZPROP_SRC_LOCAL;
 
 			if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
 			    buf, sizeof (buf), cbp->cb_literal) != 0) {
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, buf, sourcetype, source, NULL);
 		} else if (zfs_prop_written(pl->pl_user_prop)) {
 			sourcetype = ZPROP_SRC_LOCAL;
 
 			if (zfs_prop_get_written(zhp, pl->pl_user_prop,
 			    buf, sizeof (buf), cbp->cb_literal) != 0) {
 				sourcetype = ZPROP_SRC_NONE;
 				(void) strlcpy(buf, "-", sizeof (buf));
 			}
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, buf, sourcetype, source, NULL);
 		} else {
 			if (nvlist_lookup_nvlist(user_props,
 			    pl->pl_user_prop, &propval) != 0) {
 				if (pl->pl_all)
 					continue;
 				sourcetype = ZPROP_SRC_NONE;
 				strval = "-";
 			} else {
 				strval = fnvlist_lookup_string(propval,
 				    ZPROP_VALUE);
 				sourceval = fnvlist_lookup_string(propval,
 				    ZPROP_SOURCE);
 
 				if (strcmp(sourceval,
 				    zfs_get_name(zhp)) == 0) {
 					sourcetype = ZPROP_SRC_LOCAL;
 				} else if (strcmp(sourceval,
 				    ZPROP_SOURCE_VAL_RECVD) == 0) {
 					sourcetype = ZPROP_SRC_RECEIVED;
 				} else {
 					sourcetype = ZPROP_SRC_INHERITED;
 					(void) strlcpy(source,
 					    sourceval, sizeof (source));
 				}
 			}
 
 			if (received && (zfs_prop_get_recvd(zhp,
 			    pl->pl_user_prop, rbuf, sizeof (rbuf),
 			    cbp->cb_literal) == 0))
 				recvdval = rbuf;
 
 			zprop_print_one_property(zfs_get_name(zhp), cbp,
 			    pl->pl_user_prop, strval, sourcetype,
 			    source, recvdval);
 		}
 	}
 
 	return (0);
 }
 
 static int
 zfs_do_get(int argc, char **argv)
 {
 	zprop_get_cbdata_t cb = { 0 };
 	int i, c, flags = ZFS_ITER_ARGS_CAN_BE_PATHS;
 	int types = ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK;
 	char *fields;
 	int ret = 0;
 	int limit = 0;
 	zprop_list_t fake_name = { 0 };
 
 	/*
 	 * Set up default columns and sources.
 	 */
 	cb.cb_sources = ZPROP_SRC_ALL;
 	cb.cb_columns[0] = GET_COL_NAME;
 	cb.cb_columns[1] = GET_COL_PROPERTY;
 	cb.cb_columns[2] = GET_COL_VALUE;
 	cb.cb_columns[3] = GET_COL_SOURCE;
 	cb.cb_type = ZFS_TYPE_DATASET;
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":d:o:s:rt:Hp")) != -1) {
 		switch (c) {
 		case 'p':
 			cb.cb_literal = B_TRUE;
 			break;
 		case 'd':
 			limit = parse_depth(optarg, &flags);
 			break;
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'H':
 			cb.cb_scripted = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case 'o':
 			/*
 			 * Process the set of columns to display.  We zero out
 			 * the structure to give us a blank slate.
 			 */
 			memset(&cb.cb_columns, 0, sizeof (cb.cb_columns));
 
 			i = 0;
 			for (char *tok; (tok = strsep(&optarg, ",")); ) {
 				static const char *const col_subopts[] =
 				{ "name", "property", "value",
 				    "received", "source", "all" };
 				static const zfs_get_column_t col_subopt_col[] =
 				{ GET_COL_NAME, GET_COL_PROPERTY, GET_COL_VALUE,
 				    GET_COL_RECVD, GET_COL_SOURCE };
 				static const int col_subopt_flags[] =
 				{ 0, 0, 0, ZFS_ITER_RECVD_PROPS, 0 };
 
 				if (i == ZFS_GET_NCOLS) {
 					(void) fprintf(stderr, gettext("too "
 					    "many fields given to -o "
 					    "option\n"));
 					usage(B_FALSE);
 				}
 
 				for (c = 0; c < ARRAY_SIZE(col_subopts); ++c)
 					if (strcmp(tok, col_subopts[c]) == 0)
 						goto found;
 
 				(void) fprintf(stderr,
 				    gettext("invalid column name '%s'\n"), tok);
 				usage(B_FALSE);
 
 found:
 				if (c >= 5) {
 					if (i > 0) {
 						(void) fprintf(stderr,
 						    gettext("\"all\" conflicts "
 						    "with specific fields "
 						    "given to -o option\n"));
 						usage(B_FALSE);
 					}
 
 					memcpy(cb.cb_columns, col_subopt_col,
 					    sizeof (col_subopt_col));
 					flags |= ZFS_ITER_RECVD_PROPS;
 					i = ZFS_GET_NCOLS;
 				} else {
 					cb.cb_columns[i++] = col_subopt_col[c];
 					flags |= col_subopt_flags[c];
 				}
 			}
 			break;
 
 		case 's':
 			cb.cb_sources = 0;
 
 			for (char *tok; (tok = strsep(&optarg, ",")); ) {
 				static const char *const source_opt[] = {
 					"local", "default",
 					"inherited", "received",
 					"temporary", "none" };
 				static const int source_flg[] = {
 					ZPROP_SRC_LOCAL, ZPROP_SRC_DEFAULT,
 					ZPROP_SRC_INHERITED, ZPROP_SRC_RECEIVED,
 					ZPROP_SRC_TEMPORARY, ZPROP_SRC_NONE };
 
 				for (i = 0; i < ARRAY_SIZE(source_opt); ++i)
 					if (strcmp(tok, source_opt[i]) == 0) {
 						cb.cb_sources |= source_flg[i];
 						goto found2;
 					}
 
 				(void) fprintf(stderr,
 				    gettext("invalid source '%s'\n"), tok);
 				usage(B_FALSE);
 found2:;
 			}
 			break;
 
 		case 't':
 			types = 0;
 			flags &= ~ZFS_ITER_PROP_LISTSNAPS;
 
 			for (char *tok; (tok = strsep(&optarg, ",")); ) {
 				static const char *const type_opts[] = {
 					"filesystem", "volume",
 					"snapshot", "snap",
 					"bookmark",
 					"all" };
 				static const int type_types[] = {
 					ZFS_TYPE_FILESYSTEM, ZFS_TYPE_VOLUME,
 					ZFS_TYPE_SNAPSHOT, ZFS_TYPE_SNAPSHOT,
 					ZFS_TYPE_BOOKMARK,
 					ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK };
 
 				for (i = 0; i < ARRAY_SIZE(type_opts); ++i)
 					if (strcmp(tok, type_opts[i]) == 0) {
 						types |= type_types[i];
 						goto found3;
 					}
 
 				(void) fprintf(stderr,
 				    gettext("invalid type '%s'\n"), tok);
 				usage(B_FALSE);
 found3:;
 			}
 			break;
 
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 
 	fields = argv[0];
 
 	/*
 	 * Handle users who want to get all snapshots or bookmarks
 	 * of a dataset (ex. 'zfs get -t snapshot refer <dataset>').
 	 */
 	if ((types == ZFS_TYPE_SNAPSHOT || types == ZFS_TYPE_BOOKMARK) &&
 	    argc > 1 && (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) {
 		flags |= (ZFS_ITER_DEPTH_LIMIT | ZFS_ITER_RECURSE);
 		limit = 1;
 	}
 
 	if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET)
 	    != 0)
 		usage(B_FALSE);
 
 	argc--;
 	argv++;
 
 	/*
 	 * As part of zfs_expand_proplist(), we keep track of the maximum column
 	 * width for each property.  For the 'NAME' (and 'SOURCE') columns, we
 	 * need to know the maximum name length.  However, the user likely did
 	 * not specify 'name' as one of the properties to fetch, so we need to
 	 * make sure we always include at least this property for
 	 * print_get_headers() to work properly.
 	 */
 	if (cb.cb_proplist != NULL) {
 		fake_name.pl_prop = ZFS_PROP_NAME;
 		fake_name.pl_width = strlen(gettext("NAME"));
 		fake_name.pl_next = cb.cb_proplist;
 		cb.cb_proplist = &fake_name;
 	}
 
 	cb.cb_first = B_TRUE;
 
 	/* run for each object */
 	ret = zfs_for_each(argc, argv, flags, types, NULL,
 	    &cb.cb_proplist, limit, get_callback, &cb);
 
 	if (cb.cb_proplist == &fake_name)
 		zprop_free_list(fake_name.pl_next);
 	else
 		zprop_free_list(cb.cb_proplist);
 
 	return (ret);
 }
 
 /*
  * inherit [-rS] <property> <fs|vol> ...
  *
  *	-r	Recurse over all children
  *	-S	Revert to received value, if any
  *
  * For each dataset specified on the command line, inherit the given property
  * from its parent.  Inheriting a property at the pool level will cause it to
  * use the default value.  The '-r' flag will recurse over all children, and is
  * useful for setting a property on a hierarchy-wide basis, regardless of any
  * local modifications for each dataset.
  */
 
 typedef struct inherit_cbdata {
 	const char *cb_propname;
 	boolean_t cb_received;
 } inherit_cbdata_t;
 
 static int
 inherit_recurse_cb(zfs_handle_t *zhp, void *data)
 {
 	inherit_cbdata_t *cb = data;
 	zfs_prop_t prop = zfs_name_to_prop(cb->cb_propname);
 
 	/*
 	 * If we're doing it recursively, then ignore properties that
 	 * are not valid for this type of dataset.
 	 */
 	if (prop != ZPROP_INVAL &&
 	    !zfs_prop_valid_for_type(prop, zfs_get_type(zhp), B_FALSE))
 		return (0);
 
 	return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
 }
 
 static int
 inherit_cb(zfs_handle_t *zhp, void *data)
 {
 	inherit_cbdata_t *cb = data;
 
 	return (zfs_prop_inherit(zhp, cb->cb_propname, cb->cb_received) != 0);
 }
 
 static int
 zfs_do_inherit(int argc, char **argv)
 {
 	int c;
 	zfs_prop_t prop;
 	inherit_cbdata_t cb = { 0 };
 	char *propname;
 	int ret = 0;
 	int flags = 0;
 	boolean_t received = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rS")) != -1) {
 		switch (c) {
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'S':
 			received = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing property argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 
 	propname = argv[0];
 	argc--;
 	argv++;
 
 	if ((prop = zfs_name_to_prop(propname)) != ZPROP_USERPROP) {
 		if (zfs_prop_readonly(prop)) {
 			(void) fprintf(stderr, gettext(
 			    "%s property is read-only\n"),
 			    propname);
 			return (1);
 		}
 		if (!zfs_prop_inheritable(prop) && !received) {
 			(void) fprintf(stderr, gettext("'%s' property cannot "
 			    "be inherited\n"), propname);
 			if (prop == ZFS_PROP_QUOTA ||
 			    prop == ZFS_PROP_RESERVATION ||
 			    prop == ZFS_PROP_REFQUOTA ||
 			    prop == ZFS_PROP_REFRESERVATION) {
 				(void) fprintf(stderr, gettext("use 'zfs set "
 				    "%s=none' to clear\n"), propname);
 				(void) fprintf(stderr, gettext("use 'zfs "
 				    "inherit -S %s' to revert to received "
 				    "value\n"), propname);
 			}
 			return (1);
 		}
 		if (received && (prop == ZFS_PROP_VOLSIZE ||
 		    prop == ZFS_PROP_VERSION)) {
 			(void) fprintf(stderr, gettext("'%s' property cannot "
 			    "be reverted to a received value\n"), propname);
 			return (1);
 		}
 	} else if (!zfs_prop_user(propname)) {
 		(void) fprintf(stderr, gettext("invalid property '%s'\n"),
 		    propname);
 		usage(B_FALSE);
 	}
 
 	cb.cb_propname = propname;
 	cb.cb_received = received;
 
 	if (flags & ZFS_ITER_RECURSE) {
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
 		    NULL, NULL, 0, inherit_recurse_cb, &cb);
 	} else {
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_DATASET,
 		    NULL, NULL, 0, inherit_cb, &cb);
 	}
 
 	return (ret);
 }
 
 typedef struct upgrade_cbdata {
 	uint64_t cb_numupgraded;
 	uint64_t cb_numsamegraded;
 	uint64_t cb_numfailed;
 	uint64_t cb_version;
 	boolean_t cb_newer;
 	boolean_t cb_foundone;
 	char cb_lastfs[ZFS_MAX_DATASET_NAME_LEN];
 } upgrade_cbdata_t;
 
 static int
 same_pool(zfs_handle_t *zhp, const char *name)
 {
 	int len1 = strcspn(name, "/@");
 	const char *zhname = zfs_get_name(zhp);
 	int len2 = strcspn(zhname, "/@");
 
 	if (len1 != len2)
 		return (B_FALSE);
 	return (strncmp(name, zhname, len1) == 0);
 }
 
 static int
 upgrade_list_callback(zfs_handle_t *zhp, void *data)
 {
 	upgrade_cbdata_t *cb = data;
 	int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 
 	/* list if it's old/new */
 	if ((!cb->cb_newer && version < ZPL_VERSION) ||
 	    (cb->cb_newer && version > ZPL_VERSION)) {
 		char *str;
 		if (cb->cb_newer) {
 			str = gettext("The following filesystems are "
 			    "formatted using a newer software version and\n"
 			    "cannot be accessed on the current system.\n\n");
 		} else {
 			str = gettext("The following filesystems are "
 			    "out of date, and can be upgraded.  After being\n"
 			    "upgraded, these filesystems (and any 'zfs send' "
 			    "streams generated from\n"
 			    "subsequent snapshots) will no longer be "
 			    "accessible by older software versions.\n\n");
 		}
 
 		if (!cb->cb_foundone) {
 			(void) puts(str);
 			(void) printf(gettext("VER  FILESYSTEM\n"));
 			(void) printf(gettext("---  ------------\n"));
 			cb->cb_foundone = B_TRUE;
 		}
 
 		(void) printf("%2u   %s\n", version, zfs_get_name(zhp));
 	}
 
 	return (0);
 }
 
 static int
 upgrade_set_callback(zfs_handle_t *zhp, void *data)
 {
 	upgrade_cbdata_t *cb = data;
 	int version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 	int needed_spa_version;
 	int spa_version;
 
 	if (zfs_spa_version(zhp, &spa_version) < 0)
 		return (-1);
 
 	needed_spa_version = zfs_spa_version_map(cb->cb_version);
 
 	if (needed_spa_version < 0)
 		return (-1);
 
 	if (spa_version < needed_spa_version) {
 		/* can't upgrade */
 		(void) printf(gettext("%s: can not be "
 		    "upgraded; the pool version needs to first "
 		    "be upgraded\nto version %d\n\n"),
 		    zfs_get_name(zhp), needed_spa_version);
 		cb->cb_numfailed++;
 		return (0);
 	}
 
 	/* upgrade */
 	if (version < cb->cb_version) {
 		char verstr[24];
 		(void) snprintf(verstr, sizeof (verstr),
 		    "%llu", (u_longlong_t)cb->cb_version);
 		if (cb->cb_lastfs[0] && !same_pool(zhp, cb->cb_lastfs)) {
 			/*
 			 * If they did "zfs upgrade -a", then we could
 			 * be doing ioctls to different pools.  We need
 			 * to log this history once to each pool, and bypass
 			 * the normal history logging that happens in main().
 			 */
 			(void) zpool_log_history(g_zfs, history_str);
 			log_history = B_FALSE;
 		}
 		if (zfs_prop_set(zhp, "version", verstr) == 0)
 			cb->cb_numupgraded++;
 		else
 			cb->cb_numfailed++;
 		(void) strcpy(cb->cb_lastfs, zfs_get_name(zhp));
 	} else if (version > cb->cb_version) {
 		/* can't downgrade */
 		(void) printf(gettext("%s: can not be downgraded; "
 		    "it is already at version %u\n"),
 		    zfs_get_name(zhp), version);
 		cb->cb_numfailed++;
 	} else {
 		cb->cb_numsamegraded++;
 	}
 	return (0);
 }
 
 /*
  * zfs upgrade
  * zfs upgrade -v
  * zfs upgrade [-r] [-V <version>] <-a | filesystem>
  */
 static int
 zfs_do_upgrade(int argc, char **argv)
 {
 	boolean_t all = B_FALSE;
 	boolean_t showversions = B_FALSE;
 	int ret = 0;
 	upgrade_cbdata_t cb = { 0 };
 	int c;
 	int flags = ZFS_ITER_ARGS_CAN_BE_PATHS;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rvV:a")) != -1) {
 		switch (c) {
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'v':
 			showversions = B_TRUE;
 			break;
 		case 'V':
 			if (zfs_prop_string_to_index(ZFS_PROP_VERSION,
 			    optarg, &cb.cb_version) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid version %s\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 'a':
 			all = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if ((!all && !argc) && ((flags & ZFS_ITER_RECURSE) | cb.cb_version))
 		usage(B_FALSE);
 	if (showversions && (flags & ZFS_ITER_RECURSE || all ||
 	    cb.cb_version || argc))
 		usage(B_FALSE);
 	if ((all || argc) && (showversions))
 		usage(B_FALSE);
 	if (all && argc)
 		usage(B_FALSE);
 
 	if (showversions) {
 		/* Show info on available versions. */
 		(void) printf(gettext("The following filesystem versions are "
 		    "supported:\n\n"));
 		(void) printf(gettext("VER  DESCRIPTION\n"));
 		(void) printf("---  -----------------------------------------"
 		    "---------------\n");
 		(void) printf(gettext(" 1   Initial ZFS filesystem version\n"));
 		(void) printf(gettext(" 2   Enhanced directory entries\n"));
 		(void) printf(gettext(" 3   Case insensitive and filesystem "
 		    "user identifier (FUID)\n"));
 		(void) printf(gettext(" 4   userquota, groupquota "
 		    "properties\n"));
 		(void) printf(gettext(" 5   System attributes\n"));
 		(void) printf(gettext("\nFor more information on a particular "
 		    "version, including supported releases,\n"));
 		(void) printf("see the ZFS Administration Guide.\n\n");
 		ret = 0;
 	} else if (argc || all) {
 		/* Upgrade filesystems */
 		if (cb.cb_version == 0)
 			cb.cb_version = ZPL_VERSION;
 		ret = zfs_for_each(argc, argv, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_set_callback, &cb);
 		(void) printf(gettext("%llu filesystems upgraded\n"),
 		    (u_longlong_t)cb.cb_numupgraded);
 		if (cb.cb_numsamegraded) {
 			(void) printf(gettext("%llu filesystems already at "
 			    "this version\n"),
 			    (u_longlong_t)cb.cb_numsamegraded);
 		}
 		if (cb.cb_numfailed != 0)
 			ret = 1;
 	} else {
 		/* List old-version filesystems */
 		boolean_t found;
 		(void) printf(gettext("This system is currently running "
 		    "ZFS filesystem version %llu.\n\n"), ZPL_VERSION);
 
 		flags |= ZFS_ITER_RECURSE;
 		ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_list_callback, &cb);
 
 		found = cb.cb_foundone;
 		cb.cb_foundone = B_FALSE;
 		cb.cb_newer = B_TRUE;
 
 		ret = zfs_for_each(0, NULL, flags, ZFS_TYPE_FILESYSTEM,
 		    NULL, NULL, 0, upgrade_list_callback, &cb);
 
 		if (!cb.cb_foundone && !found) {
 			(void) printf(gettext("All filesystems are "
 			    "formatted with the current version.\n"));
 		}
 	}
 
 	return (ret);
 }
 
 /*
  * zfs userspace [-Hinp] [-o field[,...]] [-s field [-s field]...]
  *               [-S field [-S field]...] [-t type[,...]]
  *               filesystem | snapshot | path
  * zfs groupspace [-Hinp] [-o field[,...]] [-s field [-s field]...]
  *                [-S field [-S field]...] [-t type[,...]]
  *                filesystem | snapshot | path
  * zfs projectspace [-Hp] [-o field[,...]] [-s field [-s field]...]
  *                [-S field [-S field]...] filesystem | snapshot | path
  *
  *	-H      Scripted mode; elide headers and separate columns by tabs.
  *	-i	Translate SID to POSIX ID.
  *	-n	Print numeric ID instead of user/group name.
  *	-o      Control which fields to display.
  *	-p	Use exact (parsable) numeric output.
  *	-s      Specify sort columns, descending order.
  *	-S      Specify sort columns, ascending order.
  *	-t      Control which object types to display.
  *
  *	Displays space consumed by, and quotas on, each user in the specified
  *	filesystem or snapshot.
  */
 
 /* us_field_types, us_field_hdr and us_field_names should be kept in sync */
 enum us_field_types {
 	USFIELD_TYPE,
 	USFIELD_NAME,
 	USFIELD_USED,
 	USFIELD_QUOTA,
 	USFIELD_OBJUSED,
 	USFIELD_OBJQUOTA
 };
 static const char *const us_field_hdr[] = { "TYPE", "NAME", "USED", "QUOTA",
 				    "OBJUSED", "OBJQUOTA" };
 static const char *const us_field_names[] = { "type", "name", "used", "quota",
 				    "objused", "objquota" };
 #define	USFIELD_LAST	(sizeof (us_field_names) / sizeof (char *))
 
 #define	USTYPE_PSX_GRP	(1 << 0)
 #define	USTYPE_PSX_USR	(1 << 1)
 #define	USTYPE_SMB_GRP	(1 << 2)
 #define	USTYPE_SMB_USR	(1 << 3)
 #define	USTYPE_PROJ	(1 << 4)
 #define	USTYPE_ALL	\
 	(USTYPE_PSX_GRP | USTYPE_PSX_USR | USTYPE_SMB_GRP | USTYPE_SMB_USR | \
 	    USTYPE_PROJ)
 
 static int us_type_bits[] = {
 	USTYPE_PSX_GRP,
 	USTYPE_PSX_USR,
 	USTYPE_SMB_GRP,
 	USTYPE_SMB_USR,
 	USTYPE_ALL
 };
 static const char *const us_type_names[] = { "posixgroup", "posixuser",
 	"smbgroup", "smbuser", "all" };
 
 typedef struct us_node {
 	nvlist_t	*usn_nvl;
 	uu_avl_node_t	usn_avlnode;
 	uu_list_node_t	usn_listnode;
 } us_node_t;
 
 typedef struct us_cbdata {
 	nvlist_t	**cb_nvlp;
 	uu_avl_pool_t	*cb_avl_pool;
 	uu_avl_t	*cb_avl;
 	boolean_t	cb_numname;
 	boolean_t	cb_nicenum;
 	boolean_t	cb_sid2posix;
 	zfs_userquota_prop_t cb_prop;
 	zfs_sort_column_t *cb_sortcol;
 	size_t		cb_width[USFIELD_LAST];
 } us_cbdata_t;
 
 static boolean_t us_populated = B_FALSE;
 
 typedef struct {
 	zfs_sort_column_t *si_sortcol;
 	boolean_t	si_numname;
 } us_sort_info_t;
 
 static int
 us_field_index(const char *field)
 {
 	for (int i = 0; i < USFIELD_LAST; i++) {
 		if (strcmp(field, us_field_names[i]) == 0)
 			return (i);
 	}
 
 	return (-1);
 }
 
 static int
 us_compare(const void *larg, const void *rarg, void *unused)
 {
 	const us_node_t *l = larg;
 	const us_node_t *r = rarg;
 	us_sort_info_t *si = (us_sort_info_t *)unused;
 	zfs_sort_column_t *sortcol = si->si_sortcol;
 	boolean_t numname = si->si_numname;
 	nvlist_t *lnvl = l->usn_nvl;
 	nvlist_t *rnvl = r->usn_nvl;
 	int rc = 0;
 	boolean_t lvb, rvb;
 
 	for (; sortcol != NULL; sortcol = sortcol->sc_next) {
 		char *lvstr = (char *)"";
 		char *rvstr = (char *)"";
 		uint32_t lv32 = 0;
 		uint32_t rv32 = 0;
 		uint64_t lv64 = 0;
 		uint64_t rv64 = 0;
 		zfs_prop_t prop = sortcol->sc_prop;
 		const char *propname = NULL;
 		boolean_t reverse = sortcol->sc_reverse;
 
 		switch (prop) {
 		case ZFS_PROP_TYPE:
 			propname = "type";
 			(void) nvlist_lookup_uint32(lnvl, propname, &lv32);
 			(void) nvlist_lookup_uint32(rnvl, propname, &rv32);
 			if (rv32 != lv32)
 				rc = (rv32 < lv32) ? 1 : -1;
 			break;
 		case ZFS_PROP_NAME:
 			propname = "name";
 			if (numname) {
 compare_nums:
 				(void) nvlist_lookup_uint64(lnvl, propname,
 				    &lv64);
 				(void) nvlist_lookup_uint64(rnvl, propname,
 				    &rv64);
 				if (rv64 != lv64)
 					rc = (rv64 < lv64) ? 1 : -1;
 			} else {
 				if ((nvlist_lookup_string(lnvl, propname,
 				    &lvstr) == ENOENT) ||
 				    (nvlist_lookup_string(rnvl, propname,
 				    &rvstr) == ENOENT)) {
 					goto compare_nums;
 				}
 				rc = strcmp(lvstr, rvstr);
 			}
 			break;
 		case ZFS_PROP_USED:
 		case ZFS_PROP_QUOTA:
 			if (!us_populated)
 				break;
 			if (prop == ZFS_PROP_USED)
 				propname = "used";
 			else
 				propname = "quota";
 			(void) nvlist_lookup_uint64(lnvl, propname, &lv64);
 			(void) nvlist_lookup_uint64(rnvl, propname, &rv64);
 			if (rv64 != lv64)
 				rc = (rv64 < lv64) ? 1 : -1;
 			break;
 
 		default:
 			break;
 		}
 
 		if (rc != 0) {
 			if (rc < 0)
 				return (reverse ? 1 : -1);
 			else
 				return (reverse ? -1 : 1);
 		}
 	}
 
 	/*
 	 * If entries still seem to be the same, check if they are of the same
 	 * type (smbentity is added only if we are doing SID to POSIX ID
 	 * translation where we can have duplicate type/name combinations).
 	 */
 	if (nvlist_lookup_boolean_value(lnvl, "smbentity", &lvb) == 0 &&
 	    nvlist_lookup_boolean_value(rnvl, "smbentity", &rvb) == 0 &&
 	    lvb != rvb)
 		return (lvb < rvb ? -1 : 1);
 
 	return (0);
 }
 
 static boolean_t
 zfs_prop_is_user(unsigned p)
 {
 	return (p == ZFS_PROP_USERUSED || p == ZFS_PROP_USERQUOTA ||
 	    p == ZFS_PROP_USEROBJUSED || p == ZFS_PROP_USEROBJQUOTA);
 }
 
 static boolean_t
 zfs_prop_is_group(unsigned p)
 {
 	return (p == ZFS_PROP_GROUPUSED || p == ZFS_PROP_GROUPQUOTA ||
 	    p == ZFS_PROP_GROUPOBJUSED || p == ZFS_PROP_GROUPOBJQUOTA);
 }
 
 static boolean_t
 zfs_prop_is_project(unsigned p)
 {
 	return (p == ZFS_PROP_PROJECTUSED || p == ZFS_PROP_PROJECTQUOTA ||
 	    p == ZFS_PROP_PROJECTOBJUSED || p == ZFS_PROP_PROJECTOBJQUOTA);
 }
 
 static inline const char *
 us_type2str(unsigned field_type)
 {
 	switch (field_type) {
 	case USTYPE_PSX_USR:
 		return ("POSIX User");
 	case USTYPE_PSX_GRP:
 		return ("POSIX Group");
 	case USTYPE_SMB_USR:
 		return ("SMB User");
 	case USTYPE_SMB_GRP:
 		return ("SMB Group");
 	case USTYPE_PROJ:
 		return ("Project");
 	default:
 		return ("Undefined");
 	}
 }
 
 static int
 userspace_cb(void *arg, const char *domain, uid_t rid, uint64_t space)
 {
 	us_cbdata_t *cb = (us_cbdata_t *)arg;
 	zfs_userquota_prop_t prop = cb->cb_prop;
 	char *name = NULL;
 	const char *propname;
 	char sizebuf[32];
 	us_node_t *node;
 	uu_avl_pool_t *avl_pool = cb->cb_avl_pool;
 	uu_avl_t *avl = cb->cb_avl;
 	uu_avl_index_t idx;
 	nvlist_t *props;
 	us_node_t *n;
 	zfs_sort_column_t *sortcol = cb->cb_sortcol;
 	unsigned type = 0;
 	const char *typestr;
 	size_t namelen;
 	size_t typelen;
 	size_t sizelen;
 	int typeidx, nameidx, sizeidx;
 	us_sort_info_t sortinfo = { sortcol, cb->cb_numname };
 	boolean_t smbentity = B_FALSE;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	node = safe_malloc(sizeof (us_node_t));
 	uu_avl_node_init(node, &node->usn_avlnode, avl_pool);
 	node->usn_nvl = props;
 
 	if (domain != NULL && domain[0] != '\0') {
 #ifdef HAVE_IDMAP
 		/* SMB */
 		char sid[MAXNAMELEN + 32];
 		uid_t id;
 		uint64_t classes;
 		int err;
 		directory_error_t e;
 
 		smbentity = B_TRUE;
 
 		(void) snprintf(sid, sizeof (sid), "%s-%u", domain, rid);
 
 		if (prop == ZFS_PROP_GROUPUSED || prop == ZFS_PROP_GROUPQUOTA) {
 			type = USTYPE_SMB_GRP;
 			err = sid_to_id(sid, B_FALSE, &id);
 		} else {
 			type = USTYPE_SMB_USR;
 			err = sid_to_id(sid, B_TRUE, &id);
 		}
 
 		if (err == 0) {
 			rid = id;
 			if (!cb->cb_sid2posix) {
 				e = directory_name_from_sid(NULL, sid, &name,
 				    &classes);
 				if (e != NULL)
 					directory_error_free(e);
 				if (name == NULL)
 					name = sid;
 			}
 		}
 #else
 		nvlist_free(props);
 		free(node);
 
 		return (-1);
 #endif /* HAVE_IDMAP */
 	}
 
 	if (cb->cb_sid2posix || domain == NULL || domain[0] == '\0') {
 		/* POSIX or -i */
 		if (zfs_prop_is_group(prop)) {
 			type = USTYPE_PSX_GRP;
 			if (!cb->cb_numname) {
 				struct group *g;
 
 				if ((g = getgrgid(rid)) != NULL)
 					name = g->gr_name;
 			}
 		} else if (zfs_prop_is_user(prop)) {
 			type = USTYPE_PSX_USR;
 			if (!cb->cb_numname) {
 				struct passwd *p;
 
 				if ((p = getpwuid(rid)) != NULL)
 					name = p->pw_name;
 			}
 		} else {
 			type = USTYPE_PROJ;
 		}
 	}
 
 	/*
 	 * Make sure that the type/name combination is unique when doing
 	 * SID to POSIX ID translation (hence changing the type from SMB to
 	 * POSIX).
 	 */
 	if (cb->cb_sid2posix &&
 	    nvlist_add_boolean_value(props, "smbentity", smbentity) != 0)
 		nomem();
 
 	/* Calculate/update width of TYPE field */
 	typestr = us_type2str(type);
 	typelen = strlen(gettext(typestr));
 	typeidx = us_field_index("type");
 	if (typelen > cb->cb_width[typeidx])
 		cb->cb_width[typeidx] = typelen;
 	if (nvlist_add_uint32(props, "type", type) != 0)
 		nomem();
 
 	/* Calculate/update width of NAME field */
 	if ((cb->cb_numname && cb->cb_sid2posix) || name == NULL) {
 		if (nvlist_add_uint64(props, "name", rid) != 0)
 			nomem();
 		namelen = snprintf(NULL, 0, "%u", rid);
 	} else {
 		if (nvlist_add_string(props, "name", name) != 0)
 			nomem();
 		namelen = strlen(name);
 	}
 	nameidx = us_field_index("name");
 	if (nameidx >= 0 && namelen > cb->cb_width[nameidx])
 		cb->cb_width[nameidx] = namelen;
 
 	/*
 	 * Check if this type/name combination is in the list and update it;
 	 * otherwise add new node to the list.
 	 */
 	if ((n = uu_avl_find(avl, node, &sortinfo, &idx)) == NULL) {
 		uu_avl_insert(avl, node, idx);
 	} else {
 		nvlist_free(props);
 		free(node);
 		node = n;
 		props = node->usn_nvl;
 	}
 
 	/* Calculate/update width of USED/QUOTA fields */
 	if (cb->cb_nicenum) {
 		if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED ||
 		    prop == ZFS_PROP_USERQUOTA || prop == ZFS_PROP_GROUPQUOTA ||
 		    prop == ZFS_PROP_PROJECTUSED ||
 		    prop == ZFS_PROP_PROJECTQUOTA) {
 			zfs_nicebytes(space, sizebuf, sizeof (sizebuf));
 		} else {
 			zfs_nicenum(space, sizebuf, sizeof (sizebuf));
 		}
 	} else {
 		(void) snprintf(sizebuf, sizeof (sizebuf), "%llu",
 		    (u_longlong_t)space);
 	}
 	sizelen = strlen(sizebuf);
 	if (prop == ZFS_PROP_USERUSED || prop == ZFS_PROP_GROUPUSED ||
 	    prop == ZFS_PROP_PROJECTUSED) {
 		propname = "used";
 		if (!nvlist_exists(props, "quota"))
 			(void) nvlist_add_uint64(props, "quota", 0);
 	} else if (prop == ZFS_PROP_USERQUOTA || prop == ZFS_PROP_GROUPQUOTA ||
 	    prop == ZFS_PROP_PROJECTQUOTA) {
 		propname = "quota";
 		if (!nvlist_exists(props, "used"))
 			(void) nvlist_add_uint64(props, "used", 0);
 	} else if (prop == ZFS_PROP_USEROBJUSED ||
 	    prop == ZFS_PROP_GROUPOBJUSED || prop == ZFS_PROP_PROJECTOBJUSED) {
 		propname = "objused";
 		if (!nvlist_exists(props, "objquota"))
 			(void) nvlist_add_uint64(props, "objquota", 0);
 	} else if (prop == ZFS_PROP_USEROBJQUOTA ||
 	    prop == ZFS_PROP_GROUPOBJQUOTA ||
 	    prop == ZFS_PROP_PROJECTOBJQUOTA) {
 		propname = "objquota";
 		if (!nvlist_exists(props, "objused"))
 			(void) nvlist_add_uint64(props, "objused", 0);
 	} else {
 		return (-1);
 	}
 	sizeidx = us_field_index(propname);
 	if (sizeidx >= 0 && sizelen > cb->cb_width[sizeidx])
 		cb->cb_width[sizeidx] = sizelen;
 
 	if (nvlist_add_uint64(props, propname, space) != 0)
 		nomem();
 
 	return (0);
 }
 
 static void
 print_us_node(boolean_t scripted, boolean_t parsable, int *fields, int types,
     size_t *width, us_node_t *node)
 {
 	nvlist_t *nvl = node->usn_nvl;
 	char valstr[MAXNAMELEN];
 	boolean_t first = B_TRUE;
 	int cfield = 0;
 	int field;
 	uint32_t ustype;
 
 	/* Check type */
 	(void) nvlist_lookup_uint32(nvl, "type", &ustype);
 	if (!(ustype & types))
 		return;
 
 	while ((field = fields[cfield]) != USFIELD_LAST) {
 		nvpair_t *nvp = NULL;
 		data_type_t type;
 		uint32_t val32 = -1;
 		uint64_t val64 = -1;
 		const char *strval = "-";
 
 		while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL)
 			if (strcmp(nvpair_name(nvp),
 			    us_field_names[field]) == 0)
 				break;
 
 		type = nvp == NULL ? DATA_TYPE_UNKNOWN : nvpair_type(nvp);
 		switch (type) {
 		case DATA_TYPE_UINT32:
 			val32 = fnvpair_value_uint32(nvp);
 			break;
 		case DATA_TYPE_UINT64:
 			val64 = fnvpair_value_uint64(nvp);
 			break;
 		case DATA_TYPE_STRING:
 			strval = fnvpair_value_string(nvp);
 			break;
 		case DATA_TYPE_UNKNOWN:
 			break;
 		default:
 			(void) fprintf(stderr, "invalid data type\n");
 		}
 
 		switch (field) {
 		case USFIELD_TYPE:
 			if (type == DATA_TYPE_UINT32)
 				strval = us_type2str(val32);
 			break;
 		case USFIELD_NAME:
 			if (type == DATA_TYPE_UINT64) {
 				(void) sprintf(valstr, "%llu",
 				    (u_longlong_t)val64);
 				strval = valstr;
 			}
 			break;
 		case USFIELD_USED:
 		case USFIELD_QUOTA:
 			if (type == DATA_TYPE_UINT64) {
 				if (parsable) {
 					(void) sprintf(valstr, "%llu",
 					    (u_longlong_t)val64);
 					strval = valstr;
 				} else if (field == USFIELD_QUOTA &&
 				    val64 == 0) {
 					strval = "none";
 				} else {
 					zfs_nicebytes(val64, valstr,
 					    sizeof (valstr));
 					strval = valstr;
 				}
 			}
 			break;
 		case USFIELD_OBJUSED:
 		case USFIELD_OBJQUOTA:
 			if (type == DATA_TYPE_UINT64) {
 				if (parsable) {
 					(void) sprintf(valstr, "%llu",
 					    (u_longlong_t)val64);
 					strval = valstr;
 				} else if (field == USFIELD_OBJQUOTA &&
 				    val64 == 0) {
 					strval = "none";
 				} else {
 					zfs_nicenum(val64, valstr,
 					    sizeof (valstr));
 					strval = valstr;
 				}
 			}
 			break;
 		}
 
 		if (!first) {
 			if (scripted)
 				(void) putchar('\t');
 			else
 				(void) fputs("  ", stdout);
 		}
 		if (scripted)
 			(void) fputs(strval, stdout);
 		else if (field == USFIELD_TYPE || field == USFIELD_NAME)
 			(void) printf("%-*s", (int)width[field], strval);
 		else
 			(void) printf("%*s", (int)width[field], strval);
 
 		first = B_FALSE;
 		cfield++;
 	}
 
 	(void) putchar('\n');
 }
 
 static void
 print_us(boolean_t scripted, boolean_t parsable, int *fields, int types,
     size_t *width, boolean_t rmnode, uu_avl_t *avl)
 {
 	us_node_t *node;
 	const char *col;
 	int cfield = 0;
 	int field;
 
 	if (!scripted) {
 		boolean_t first = B_TRUE;
 
 		while ((field = fields[cfield]) != USFIELD_LAST) {
 			col = gettext(us_field_hdr[field]);
 			if (field == USFIELD_TYPE || field == USFIELD_NAME) {
 				(void) printf(first ? "%-*s" : "  %-*s",
 				    (int)width[field], col);
 			} else {
 				(void) printf(first ? "%*s" : "  %*s",
 				    (int)width[field], col);
 			}
 			first = B_FALSE;
 			cfield++;
 		}
 		(void) printf("\n");
 	}
 
 	for (node = uu_avl_first(avl); node; node = uu_avl_next(avl, node)) {
 		print_us_node(scripted, parsable, fields, types, width, node);
 		if (rmnode)
 			nvlist_free(node->usn_nvl);
 	}
 }
 
 static int
 zfs_do_userspace(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	zfs_userquota_prop_t p;
 	uu_avl_pool_t *avl_pool;
 	uu_avl_t *avl_tree;
 	uu_avl_walk_t *walk;
 	char *delim;
 	char deffields[] = "type,name,used,quota,objused,objquota";
 	char *ofield = NULL;
 	char *tfield = NULL;
 	int cfield = 0;
 	int fields[256];
 	int i;
 	boolean_t scripted = B_FALSE;
 	boolean_t prtnum = B_FALSE;
 	boolean_t parsable = B_FALSE;
 	boolean_t sid2posix = B_FALSE;
 	int ret = 0;
 	int c;
 	zfs_sort_column_t *sortcol = NULL;
 	int types = USTYPE_PSX_USR | USTYPE_SMB_USR;
 	us_cbdata_t cb;
 	us_node_t *node;
 	us_node_t *rmnode;
 	uu_list_pool_t *listpool;
 	uu_list_t *list;
 	uu_avl_index_t idx = 0;
 	uu_list_index_t idx2 = 0;
 
 	if (argc < 2)
 		usage(B_FALSE);
 
 	if (strcmp(argv[0], "groupspace") == 0) {
 		/* Toggle default group types */
 		types = USTYPE_PSX_GRP | USTYPE_SMB_GRP;
 	} else if (strcmp(argv[0], "projectspace") == 0) {
 		types = USTYPE_PROJ;
 		prtnum = B_TRUE;
 	}
 
 	while ((c = getopt(argc, argv, "nHpo:s:S:t:i")) != -1) {
 		switch (c) {
 		case 'n':
 			if (types == USTYPE_PROJ) {
 				(void) fprintf(stderr,
 				    gettext("invalid option 'n'\n"));
 				usage(B_FALSE);
 			}
 			prtnum = B_TRUE;
 			break;
 		case 'H':
 			scripted = B_TRUE;
 			break;
 		case 'p':
 			parsable = B_TRUE;
 			break;
 		case 'o':
 			ofield = optarg;
 			break;
 		case 's':
 		case 'S':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    c == 's' ? B_FALSE : B_TRUE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid field '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 't':
 			if (types == USTYPE_PROJ) {
 				(void) fprintf(stderr,
 				    gettext("invalid option 't'\n"));
 				usage(B_FALSE);
 			}
 			tfield = optarg;
 			break;
 		case 'i':
 			if (types == USTYPE_PROJ) {
 				(void) fprintf(stderr,
 				    gettext("invalid option 'i'\n"));
 				usage(B_FALSE);
 			}
 			sid2posix = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing dataset name\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	/* Use default output fields if not specified using -o */
 	if (ofield == NULL)
 		ofield = deffields;
 	do {
 		if ((delim = strchr(ofield, ',')) != NULL)
 			*delim = '\0';
 		if ((fields[cfield++] = us_field_index(ofield)) == -1) {
 			(void) fprintf(stderr, gettext("invalid type '%s' "
 			    "for -o option\n"), ofield);
 			return (-1);
 		}
 		if (delim != NULL)
 			ofield = delim + 1;
 	} while (delim != NULL);
 	fields[cfield] = USFIELD_LAST;
 
 	/* Override output types (-t option) */
 	if (tfield != NULL) {
 		types = 0;
 
 		do {
 			boolean_t found = B_FALSE;
 
 			if ((delim = strchr(tfield, ',')) != NULL)
 				*delim = '\0';
 			for (i = 0; i < sizeof (us_type_bits) / sizeof (int);
 			    i++) {
 				if (strcmp(tfield, us_type_names[i]) == 0) {
 					found = B_TRUE;
 					types |= us_type_bits[i];
 					break;
 				}
 			}
 			if (!found) {
 				(void) fprintf(stderr, gettext("invalid type "
 				    "'%s' for -t option\n"), tfield);
 				return (-1);
 			}
 			if (delim != NULL)
 				tfield = delim + 1;
 		} while (delim != NULL);
 	}
 
 	if ((zhp = zfs_path_to_zhandle(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 	if (zfs_get_underlying_type(zhp) != ZFS_TYPE_FILESYSTEM) {
 		(void) fprintf(stderr, gettext("operation is only applicable "
 		    "to filesystems and their snapshots\n"));
 		zfs_close(zhp);
 		return (1);
 	}
 
 	if ((avl_pool = uu_avl_pool_create("us_avl_pool", sizeof (us_node_t),
 	    offsetof(us_node_t, usn_avlnode), us_compare, UU_DEFAULT)) == NULL)
 		nomem();
 	if ((avl_tree = uu_avl_create(avl_pool, NULL, UU_DEFAULT)) == NULL)
 		nomem();
 
 	/* Always add default sorting columns */
 	(void) zfs_add_sort_column(&sortcol, "type", B_FALSE);
 	(void) zfs_add_sort_column(&sortcol, "name", B_FALSE);
 
 	cb.cb_sortcol = sortcol;
 	cb.cb_numname = prtnum;
 	cb.cb_nicenum = !parsable;
 	cb.cb_avl_pool = avl_pool;
 	cb.cb_avl = avl_tree;
 	cb.cb_sid2posix = sid2posix;
 
 	for (i = 0; i < USFIELD_LAST; i++)
 		cb.cb_width[i] = strlen(gettext(us_field_hdr[i]));
 
 	for (p = 0; p < ZFS_NUM_USERQUOTA_PROPS; p++) {
 		if ((zfs_prop_is_user(p) &&
 		    !(types & (USTYPE_PSX_USR | USTYPE_SMB_USR))) ||
 		    (zfs_prop_is_group(p) &&
 		    !(types & (USTYPE_PSX_GRP | USTYPE_SMB_GRP))) ||
 		    (zfs_prop_is_project(p) && types != USTYPE_PROJ))
 			continue;
 
 		cb.cb_prop = p;
 		if ((ret = zfs_userspace(zhp, p, userspace_cb, &cb)) != 0) {
 			zfs_close(zhp);
 			return (ret);
 		}
 	}
 	zfs_close(zhp);
 
 	/* Sort the list */
 	if ((node = uu_avl_first(avl_tree)) == NULL)
 		return (0);
 
 	us_populated = B_TRUE;
 
 	listpool = uu_list_pool_create("tmplist", sizeof (us_node_t),
 	    offsetof(us_node_t, usn_listnode), NULL, UU_DEFAULT);
 	list = uu_list_create(listpool, NULL, UU_DEFAULT);
 	uu_list_node_init(node, &node->usn_listnode, listpool);
 
 	while (node != NULL) {
 		rmnode = node;
 		node = uu_avl_next(avl_tree, node);
 		uu_avl_remove(avl_tree, rmnode);
 		if (uu_list_find(list, rmnode, NULL, &idx2) == NULL)
 			uu_list_insert(list, rmnode, idx2);
 	}
 
 	for (node = uu_list_first(list); node != NULL;
 	    node = uu_list_next(list, node)) {
 		us_sort_info_t sortinfo = { sortcol, cb.cb_numname };
 
 		if (uu_avl_find(avl_tree, node, &sortinfo, &idx) == NULL)
 			uu_avl_insert(avl_tree, node, idx);
 	}
 
 	uu_list_destroy(list);
 	uu_list_pool_destroy(listpool);
 
 	/* Print and free node nvlist memory */
 	print_us(scripted, parsable, fields, types, cb.cb_width, B_TRUE,
 	    cb.cb_avl);
 
 	zfs_free_sort_columns(sortcol);
 
 	/* Clean up the AVL tree */
 	if ((walk = uu_avl_walk_start(cb.cb_avl, UU_WALK_ROBUST)) == NULL)
 		nomem();
 
 	while ((node = uu_avl_walk_next(walk)) != NULL) {
 		uu_avl_remove(cb.cb_avl, node);
 		free(node);
 	}
 
 	uu_avl_walk_end(walk);
 	uu_avl_destroy(avl_tree);
 	uu_avl_pool_destroy(avl_pool);
 
 	return (ret);
 }
 
 /*
  * list [-Hp][-r|-d max] [-o property[,...]] [-s property] ... [-S property]
  *      [-t type[,...]] [filesystem|volume|snapshot] ...
  *
  *	-H	Scripted mode; elide headers and separate columns by tabs
  *	-p	Display values in parsable (literal) format.
  *	-r	Recurse over all children
  *	-d	Limit recursion by depth.
  *	-o	Control which fields to display.
  *	-s	Specify sort columns, descending order.
  *	-S	Specify sort columns, ascending order.
  *	-t	Control which object types to display.
  *
  * When given no arguments, list all filesystems in the system.
  * Otherwise, list the specified datasets, optionally recursing down them if
  * '-r' is specified.
  */
 typedef struct list_cbdata {
 	boolean_t	cb_first;
 	boolean_t	cb_literal;
 	boolean_t	cb_scripted;
 	zprop_list_t	*cb_proplist;
 } list_cbdata_t;
 
 /*
  * Given a list of columns to display, output appropriate headers for each one.
  */
 static void
 print_header(list_cbdata_t *cb)
 {
 	zprop_list_t *pl = cb->cb_proplist;
 	char headerbuf[ZFS_MAXPROPLEN];
 	const char *header;
 	int i;
 	boolean_t first = B_TRUE;
 	boolean_t right_justify;
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		if (!first) {
 			(void) printf("  ");
 		} else {
 			first = B_FALSE;
 		}
 
 		right_justify = B_FALSE;
 		if (pl->pl_prop != ZPROP_USERPROP) {
 			header = zfs_prop_column_name(pl->pl_prop);
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else {
 			for (i = 0; pl->pl_user_prop[i] != '\0'; i++)
 				headerbuf[i] = toupper(pl->pl_user_prop[i]);
 			headerbuf[i] = '\0';
 			header = headerbuf;
 		}
 
 		if (pl->pl_next == NULL && !right_justify)
 			(void) printf("%s", header);
 		else if (right_justify)
 			(void) printf("%*s", (int)pl->pl_width, header);
 		else
 			(void) printf("%-*s", (int)pl->pl_width, header);
 	}
 
 	(void) printf("\n");
 }
 
 /*
  * Given a dataset and a list of fields, print out all the properties according
  * to the described layout.
  */
 static void
 print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb)
 {
 	zprop_list_t *pl = cb->cb_proplist;
 	boolean_t first = B_TRUE;
 	char property[ZFS_MAXPROPLEN];
 	nvlist_t *userprops = zfs_get_user_props(zhp);
 	nvlist_t *propval;
 	const char *propstr;
 	boolean_t right_justify;
 
 	for (; pl != NULL; pl = pl->pl_next) {
 		if (!first) {
 			if (cb->cb_scripted)
 				(void) putchar('\t');
 			else
 				(void) fputs("  ", stdout);
 		} else {
 			first = B_FALSE;
 		}
 
 		if (pl->pl_prop == ZFS_PROP_NAME) {
 			(void) strlcpy(property, zfs_get_name(zhp),
 			    sizeof (property));
 			propstr = property;
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else if (pl->pl_prop != ZPROP_USERPROP) {
 			if (zfs_prop_get(zhp, pl->pl_prop, property,
 			    sizeof (property), NULL, NULL, 0,
 			    cb->cb_literal) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else if (zfs_prop_userquota(pl->pl_user_prop)) {
 			if (zfs_prop_get_userquota(zhp, pl->pl_user_prop,
 			    property, sizeof (property), cb->cb_literal) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 			right_justify = B_TRUE;
 		} else if (zfs_prop_written(pl->pl_user_prop)) {
 			if (zfs_prop_get_written(zhp, pl->pl_user_prop,
 			    property, sizeof (property), cb->cb_literal) != 0)
 				propstr = "-";
 			else
 				propstr = property;
 			right_justify = B_TRUE;
 		} else {
 			if (nvlist_lookup_nvlist(userprops,
 			    pl->pl_user_prop, &propval) != 0)
 				propstr = "-";
 			else
 				propstr = fnvlist_lookup_string(propval,
 				    ZPROP_VALUE);
 			right_justify = B_FALSE;
 		}
 
 		/*
 		 * If this is being called in scripted mode, or if this is the
 		 * last column and it is left-justified, don't include a width
 		 * format specifier.
 		 */
 		if (cb->cb_scripted || (pl->pl_next == NULL && !right_justify))
 			(void) fputs(propstr, stdout);
 		else if (right_justify)
 			(void) printf("%*s", (int)pl->pl_width, propstr);
 		else
 			(void) printf("%-*s", (int)pl->pl_width, propstr);
 	}
 
 	(void) putchar('\n');
 }
 
 /*
  * Generic callback function to list a dataset or snapshot.
  */
 static int
 list_callback(zfs_handle_t *zhp, void *data)
 {
 	list_cbdata_t *cbp = data;
 
 	if (cbp->cb_first) {
 		if (!cbp->cb_scripted)
 			print_header(cbp);
 		cbp->cb_first = B_FALSE;
 	}
 
 	print_dataset(zhp, cbp);
 
 	return (0);
 }
 
 static int
 zfs_do_list(int argc, char **argv)
 {
 	int c;
 	char default_fields[] =
 	    "name,used,available,referenced,mountpoint";
 	int types = ZFS_TYPE_DATASET;
 	boolean_t types_specified = B_FALSE;
 	char *fields = default_fields;
 	list_cbdata_t cb = { 0 };
 	int limit = 0;
 	int ret = 0;
 	zfs_sort_column_t *sortcol = NULL;
 	int flags = ZFS_ITER_PROP_LISTSNAPS | ZFS_ITER_ARGS_CAN_BE_PATHS;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "HS:d:o:prs:t:")) != -1) {
 		switch (c) {
 		case 'o':
 			fields = optarg;
 			break;
 		case 'p':
 			cb.cb_literal = B_TRUE;
 			flags |= ZFS_ITER_LITERAL_PROPS;
 			break;
 		case 'd':
 			limit = parse_depth(optarg, &flags);
 			break;
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			break;
 		case 'H':
 			cb.cb_scripted = B_TRUE;
 			break;
 		case 's':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    B_FALSE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid property '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 'S':
 			if (zfs_add_sort_column(&sortcol, optarg,
 			    B_TRUE) != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid property '%s'\n"), optarg);
 				usage(B_FALSE);
 			}
 			break;
 		case 't':
 			types = 0;
 			types_specified = B_TRUE;
 			flags &= ~ZFS_ITER_PROP_LISTSNAPS;
 
 			for (char *tok; (tok = strsep(&optarg, ",")); ) {
 				static const char *const type_subopts[] = {
 					"filesystem", "volume",
 					"snapshot", "snap",
 					"bookmark",
 					"all" };
 				static const int type_types[] = {
 					ZFS_TYPE_FILESYSTEM, ZFS_TYPE_VOLUME,
 					ZFS_TYPE_SNAPSHOT, ZFS_TYPE_SNAPSHOT,
 					ZFS_TYPE_BOOKMARK,
 					ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK };
 
 				for (c = 0; c < ARRAY_SIZE(type_subopts); ++c)
 					if (strcmp(tok, type_subopts[c]) == 0) {
 						types |= type_types[c];
 						goto found3;
 					}
 
 				(void) fprintf(stderr,
 				    gettext("invalid type '%s'\n"), tok);
 				usage(B_FALSE);
 found3:;
 			}
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/*
 	 * If we are only going to list snapshot names and sort by name or
 	 * by createtxg, then we can use faster version.
 	 */
 	if (strcmp(fields, "name") == 0 &&
 	    (zfs_sort_only_by_name(sortcol) ||
 	    zfs_sort_only_by_createtxg(sortcol))) {
 		flags |= ZFS_ITER_SIMPLE;
 	}
 
 	/*
 	 * If "-o space" and no types were specified, don't display snapshots.
 	 */
 	if (strcmp(fields, "space") == 0 && types_specified == B_FALSE)
 		types &= ~ZFS_TYPE_SNAPSHOT;
 
 	/*
 	 * Handle users who want to list all snapshots or bookmarks
 	 * of the current dataset (ex. 'zfs list -t snapshot <dataset>').
 	 */
 	if ((types == ZFS_TYPE_SNAPSHOT || types == ZFS_TYPE_BOOKMARK) &&
 	    argc > 0 && (flags & ZFS_ITER_RECURSE) == 0 && limit == 0) {
 		flags |= (ZFS_ITER_DEPTH_LIMIT | ZFS_ITER_RECURSE);
 		limit = 1;
 	}
 
 	/*
 	 * If the user specifies '-o all', the zprop_get_list() doesn't
 	 * normally include the name of the dataset.  For 'zfs list', we always
 	 * want this property to be first.
 	 */
 	if (zprop_get_list(g_zfs, fields, &cb.cb_proplist, ZFS_TYPE_DATASET)
 	    != 0)
 		usage(B_FALSE);
 
 	cb.cb_first = B_TRUE;
 
 	ret = zfs_for_each(argc, argv, flags, types, sortcol, &cb.cb_proplist,
 	    limit, list_callback, &cb);
 
 	zprop_free_list(cb.cb_proplist);
 	zfs_free_sort_columns(sortcol);
 
 	if (ret == 0 && cb.cb_first && !cb.cb_scripted)
 		(void) fprintf(stderr, gettext("no datasets available\n"));
 
 	return (ret);
 }
 
 /*
  * zfs rename [-fu] <fs | snap | vol> <fs | snap | vol>
  * zfs rename [-f] -p <fs | vol> <fs | vol>
  * zfs rename [-u] -r <snap> <snap>
  *
  * Renames the given dataset to another of the same type.
  *
  * The '-p' flag creates all the non-existing ancestors of the target first.
  * The '-u' flag prevents file systems from being remounted during rename.
  */
 static int
 zfs_do_rename(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	renameflags_t flags = { 0 };
 	int c;
 	int ret = 0;
 	int types;
 	boolean_t parents = B_FALSE;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "pruf")) != -1) {
 		switch (c) {
 		case 'p':
 			parents = B_TRUE;
 			break;
 		case 'r':
 			flags.recursive = B_TRUE;
 			break;
 		case 'u':
 			flags.nounmount = B_TRUE;
 			break;
 		case 'f':
 			flags.forceunmount = B_TRUE;
 			break;
 		case '?':
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing source dataset "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing target dataset "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if (flags.recursive && parents) {
 		(void) fprintf(stderr, gettext("-p and -r options are mutually "
 		    "exclusive\n"));
 		usage(B_FALSE);
 	}
 
 	if (flags.nounmount && parents) {
 		(void) fprintf(stderr, gettext("-u and -p options are mutually "
 		    "exclusive\n"));
 		usage(B_FALSE);
 	}
 
 	if (flags.recursive && strchr(argv[0], '@') == 0) {
 		(void) fprintf(stderr, gettext("source dataset for recursive "
 		    "rename must be a snapshot\n"));
 		usage(B_FALSE);
 	}
 
 	if (flags.nounmount)
 		types = ZFS_TYPE_FILESYSTEM;
 	else if (parents)
 		types = ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
 	else
 		types = ZFS_TYPE_DATASET;
 
 	if ((zhp = zfs_open(g_zfs, argv[0], types)) == NULL)
 		return (1);
 
 	/* If we were asked and the name looks good, try to create ancestors. */
 	if (parents && zfs_name_valid(argv[1], zfs_get_type(zhp)) &&
 	    zfs_create_ancestors(g_zfs, argv[1]) != 0) {
 		zfs_close(zhp);
 		return (1);
 	}
 
 	ret = (zfs_rename(zhp, argv[1], flags) != 0);
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * zfs promote <fs>
  *
  * Promotes the given clone fs to be the parent
  */
 static int
 zfs_do_promote(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	int ret = 0;
 
 	/* check options */
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
 		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing clone filesystem"
 		    " argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	zhp = zfs_open(g_zfs, argv[1], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (1);
 
 	ret = (zfs_promote(zhp) != 0);
 
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 static int
 zfs_do_redact(int argc, char **argv)
 {
 	char *snap = NULL;
 	char *bookname = NULL;
 	char **rsnaps = NULL;
 	int numrsnaps = 0;
 	argv++;
 	argc--;
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("too few arguments\n"));
 		usage(B_FALSE);
 	}
 
 	snap = argv[0];
 	bookname = argv[1];
 	rsnaps = argv + 2;
 	numrsnaps = argc - 2;
 
 	nvlist_t *rsnapnv = fnvlist_alloc();
 
 	for (int i = 0; i < numrsnaps; i++) {
 		fnvlist_add_boolean(rsnapnv, rsnaps[i]);
 	}
 
 	int err = lzc_redact(snap, bookname, rsnapnv);
 	fnvlist_free(rsnapnv);
 
 	switch (err) {
 	case 0:
 		break;
 	case ENOENT:
 		(void) fprintf(stderr,
 		    gettext("provided snapshot %s does not exist\n"), snap);
 		break;
 	case EEXIST:
 		(void) fprintf(stderr, gettext("specified redaction bookmark "
 		    "(%s) provided already exists\n"), bookname);
 		break;
 	case ENAMETOOLONG:
 		(void) fprintf(stderr, gettext("provided bookmark name cannot "
 		    "be used, final name would be too long\n"));
 		break;
 	case E2BIG:
 		(void) fprintf(stderr, gettext("too many redaction snapshots "
 		    "specified\n"));
 		break;
 	case EINVAL:
 		if (strchr(bookname, '#') != NULL)
 			(void) fprintf(stderr, gettext(
 			    "redaction bookmark name must not contain '#'\n"));
 		else
 			(void) fprintf(stderr, gettext(
 			    "redaction snapshot must be descendent of "
 			    "snapshot being redacted\n"));
 		break;
 	case EALREADY:
 		(void) fprintf(stderr, gettext("attempted to redact redacted "
 		    "dataset or with respect to redacted dataset\n"));
 		break;
 	case ENOTSUP:
 		(void) fprintf(stderr, gettext("redaction bookmarks feature "
 		    "not enabled\n"));
 		break;
 	case EXDEV:
 		(void) fprintf(stderr, gettext("potentially invalid redaction "
 		    "snapshot; full dataset names required\n"));
 		break;
 	default:
 		(void) fprintf(stderr, gettext("internal error: %s\n"),
 		    strerror(errno));
 	}
 
 	return (err);
 }
 
 /*
  * zfs rollback [-rRf] <snapshot>
  *
  *	-r	Delete any intervening snapshots before doing rollback
  *	-R	Delete any snapshots and their clones
  *	-f	ignored for backwards compatibility
  *
  * Given a filesystem, rollback to a specific snapshot, discarding any changes
  * since then and making it the active dataset.  If more recent snapshots exist,
  * the command will complain unless the '-r' flag is given.
  */
 typedef struct rollback_cbdata {
 	uint64_t	cb_create;
 	uint8_t		cb_younger_ds_printed;
 	boolean_t	cb_first;
 	int		cb_doclones;
 	char		*cb_target;
 	int		cb_error;
 	boolean_t	cb_recurse;
 } rollback_cbdata_t;
 
 static int
 rollback_check_dependent(zfs_handle_t *zhp, void *data)
 {
 	rollback_cbdata_t *cbp = data;
 
 	if (cbp->cb_first && cbp->cb_recurse) {
 		(void) fprintf(stderr, gettext("cannot rollback to "
 		    "'%s': clones of previous snapshots exist\n"),
 		    cbp->cb_target);
 		(void) fprintf(stderr, gettext("use '-R' to "
 		    "force deletion of the following clones and "
 		    "dependents:\n"));
 		cbp->cb_first = 0;
 		cbp->cb_error = 1;
 	}
 
 	(void) fprintf(stderr, "%s\n", zfs_get_name(zhp));
 
 	zfs_close(zhp);
 	return (0);
 }
 
 
 /*
  * Report some snapshots/bookmarks more recent than the one specified.
  * Used when '-r' is not specified. We reuse this same callback for the
  * snapshot dependents - if 'cb_dependent' is set, then this is a
  * dependent and we should report it without checking the transaction group.
  */
 static int
 rollback_check(zfs_handle_t *zhp, void *data)
 {
 	rollback_cbdata_t *cbp = data;
 	/*
 	 * Max number of younger snapshots and/or bookmarks to display before
 	 * we stop the iteration.
 	 */
 	const uint8_t max_younger = 32;
 
 	if (cbp->cb_doclones) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) {
 		if (cbp->cb_first && !cbp->cb_recurse) {
 			(void) fprintf(stderr, gettext("cannot "
 			    "rollback to '%s': more recent snapshots "
 			    "or bookmarks exist\n"),
 			    cbp->cb_target);
 			(void) fprintf(stderr, gettext("use '-r' to "
 			    "force deletion of the following "
 			    "snapshots and bookmarks:\n"));
 			cbp->cb_first = 0;
 			cbp->cb_error = 1;
 		}
 
 		if (cbp->cb_recurse) {
 			if (zfs_iter_dependents(zhp, B_TRUE,
 			    rollback_check_dependent, cbp) != 0) {
 				zfs_close(zhp);
 				return (-1);
 			}
 		} else {
 			(void) fprintf(stderr, "%s\n",
 			    zfs_get_name(zhp));
 			cbp->cb_younger_ds_printed++;
 		}
 	}
 	zfs_close(zhp);
 
 	if (cbp->cb_younger_ds_printed == max_younger) {
 		/*
 		 * This non-recursive rollback is going to fail due to the
 		 * presence of snapshots and/or bookmarks that are younger than
 		 * the rollback target.
 		 * We printed some of the offending objects, now we stop
 		 * zfs_iter_snapshot/bookmark iteration so we can fail fast and
 		 * avoid iterating over the rest of the younger objects
 		 */
 		(void) fprintf(stderr, gettext("Output limited to %d "
 		    "snapshots/bookmarks\n"), max_younger);
 		return (-1);
 	}
 	return (0);
 }
 
 static int
 zfs_do_rollback(int argc, char **argv)
 {
 	int ret = 0;
 	int c;
 	boolean_t force = B_FALSE;
 	rollback_cbdata_t cb = { 0 };
 	zfs_handle_t *zhp, *snap;
 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
 	char *delim;
 	uint64_t min_txg = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rRf")) != -1) {
 		switch (c) {
 		case 'r':
 			cb.cb_recurse = 1;
 			break;
 		case 'R':
 			cb.cb_recurse = 1;
 			cb.cb_doclones = 1;
 			break;
 		case 'f':
 			force = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	/* open the snapshot */
 	if ((snap = zfs_open(g_zfs, argv[0], ZFS_TYPE_SNAPSHOT)) == NULL)
 		return (1);
 
 	/* open the parent dataset */
 	(void) strlcpy(parentname, argv[0], sizeof (parentname));
 	verify((delim = strrchr(parentname, '@')) != NULL);
 	*delim = '\0';
 	if ((zhp = zfs_open(g_zfs, parentname, ZFS_TYPE_DATASET)) == NULL) {
 		zfs_close(snap);
 		return (1);
 	}
 
 	/*
 	 * Check for more recent snapshots and/or clones based on the presence
 	 * of '-r' and '-R'.
 	 */
 	cb.cb_target = argv[0];
 	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
 	cb.cb_first = B_TRUE;
 	cb.cb_error = 0;
 
 	if (cb.cb_create > 0)
 		min_txg = cb.cb_create;
 
 	if ((ret = zfs_iter_snapshots(zhp, B_FALSE, rollback_check, &cb,
 	    min_txg, 0)) != 0)
 		goto out;
 	if ((ret = zfs_iter_bookmarks(zhp, rollback_check, &cb)) != 0)
 		goto out;
 
 	if ((ret = cb.cb_error) != 0)
 		goto out;
 
 	/*
 	 * Rollback parent to the given snapshot.
 	 */
 	ret = zfs_rollback(zhp, snap, force);
 
 out:
 	zfs_close(snap);
 	zfs_close(zhp);
 
 	if (ret == 0)
 		return (0);
 	else
 		return (1);
 }
 
 /*
  * zfs set property=value ... { fs | snap | vol } ...
  *
  * Sets the given properties for all datasets specified on the command line.
  */
 
 static int
 set_callback(zfs_handle_t *zhp, void *data)
 {
 	nvlist_t *props = data;
 
 	if (zfs_prop_set_list(zhp, props) != 0) {
 		switch (libzfs_errno(g_zfs)) {
 		case EZFS_MOUNTFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
 			    "but unable to remount filesystem\n"));
 			break;
 		case EZFS_SHARENFSFAILED:
 			(void) fprintf(stderr, gettext("property may be set "
 			    "but unable to reshare filesystem\n"));
 			break;
 		}
 		return (1);
 	}
 	return (0);
 }
 
 static int
 zfs_do_set(int argc, char **argv)
 {
 	nvlist_t *props = NULL;
 	int ds_start = -1; /* argv idx of first dataset arg */
 	int ret = 0;
 	int i;
 
 	/* check for options */
 	if (argc > 1 && argv[1][0] == '-') {
 		(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 		    argv[1][1]);
 		usage(B_FALSE);
 	}
 
 	/* check number of arguments */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing arguments\n"));
 		usage(B_FALSE);
 	}
 	if (argc < 3) {
 		if (strchr(argv[1], '=') == NULL) {
 			(void) fprintf(stderr, gettext("missing property=value "
 			    "argument(s)\n"));
 		} else {
 			(void) fprintf(stderr, gettext("missing dataset "
 			    "name(s)\n"));
 		}
 		usage(B_FALSE);
 	}
 
 	/* validate argument order:  prop=val args followed by dataset args */
 	for (i = 1; i < argc; i++) {
 		if (strchr(argv[i], '=') != NULL) {
 			if (ds_start > 0) {
 				/* out-of-order prop=val argument */
 				(void) fprintf(stderr, gettext("invalid "
 				    "argument order\n"));
 				usage(B_FALSE);
 			}
 		} else if (ds_start < 0) {
 			ds_start = i;
 		}
 	}
 	if (ds_start < 0) {
 		(void) fprintf(stderr, gettext("missing dataset name(s)\n"));
 		usage(B_FALSE);
 	}
 
 	/* Populate a list of property settings */
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	for (i = 1; i < ds_start; i++) {
 		if (!parseprop(props, argv[i])) {
 			ret = -1;
 			goto error;
 		}
 	}
 
 	ret = zfs_for_each(argc - ds_start, argv + ds_start, 0,
 	    ZFS_TYPE_DATASET, NULL, NULL, 0, set_callback, props);
 
 error:
 	nvlist_free(props);
 	return (ret);
 }
 
 typedef struct snap_cbdata {
 	nvlist_t *sd_nvl;
 	boolean_t sd_recursive;
 	const char *sd_snapname;
 } snap_cbdata_t;
 
 static int
 zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
 {
 	snap_cbdata_t *sd = arg;
 	char *name;
 	int rv = 0;
 	int error;
 
 	if (sd->sd_recursive &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) != 0) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	error = asprintf(&name, "%s@%s", zfs_get_name(zhp), sd->sd_snapname);
 	if (error == -1)
 		nomem();
 	fnvlist_add_boolean(sd->sd_nvl, name);
 	free(name);
 
 	if (sd->sd_recursive)
 		rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
 	zfs_close(zhp);
 	return (rv);
 }
 
 /*
  * zfs snapshot [-r] [-o prop=value] ... <fs@snap>
  *
  * Creates a snapshot with the given name.  While functionally equivalent to
  * 'zfs create', it is a separate command to differentiate intent.
  */
 static int
 zfs_do_snapshot(int argc, char **argv)
 {
 	int ret = 0;
 	int c;
 	nvlist_t *props;
 	snap_cbdata_t sd = { 0 };
 	boolean_t multiple_snaps = B_FALSE;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	if (nvlist_alloc(&sd.sd_nvl, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, "ro:")) != -1) {
 		switch (c) {
 		case 'o':
 			if (!parseprop(props, optarg)) {
 				nvlist_free(sd.sd_nvl);
 				nvlist_free(props);
 				return (1);
 			}
 			break;
 		case 'r':
 			sd.sd_recursive = B_TRUE;
 			multiple_snaps = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		goto usage;
 	}
 
 	if (argc > 1)
 		multiple_snaps = B_TRUE;
 	for (; argc > 0; argc--, argv++) {
 		char *atp;
 		zfs_handle_t *zhp;
 
 		atp = strchr(argv[0], '@');
 		if (atp == NULL)
 			goto usage;
 		*atp = '\0';
 		sd.sd_snapname = atp + 1;
 		zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (zhp == NULL)
 			goto usage;
 		if (zfs_snapshot_cb(zhp, &sd) != 0)
 			goto usage;
 	}
 
 	ret = zfs_snapshot_nvl(g_zfs, sd.sd_nvl, props);
 	nvlist_free(sd.sd_nvl);
 	nvlist_free(props);
 	if (ret != 0 && multiple_snaps)
 		(void) fprintf(stderr, gettext("no snapshots were created\n"));
 	return (ret != 0);
 
 usage:
 	nvlist_free(sd.sd_nvl);
 	nvlist_free(props);
 	usage(B_FALSE);
 	return (-1);
 }
 
 /*
  * Array of prefixes to exclude –
  * a linear search, even if executed for each dataset,
  * is plenty good enough.
  */
 typedef struct zfs_send_exclude_arg {
 	size_t count;
 	const char **list;
 } zfs_send_exclude_arg_t;
 
 static boolean_t
 zfs_do_send_exclude(zfs_handle_t *zhp, void *context)
 {
 	zfs_send_exclude_arg_t *excludes = context;
 	const char *name = zfs_get_name(zhp);
 
 	for (size_t i = 0; i < excludes->count; ++i) {
 		size_t len = strlen(excludes->list[i]);
 		if (strncmp(name, excludes->list[i], len) == 0 &&
 		    memchr("/@", name[len], sizeof ("/@")))
 			return (B_FALSE);
 	}
 
 	return (B_TRUE);
 }
 
 /*
  * Send a backup stream to stdout.
  */
 static int
 zfs_do_send(int argc, char **argv)
 {
 	char *fromname = NULL;
 	char *toname = NULL;
 	char *resume_token = NULL;
 	char *cp;
 	zfs_handle_t *zhp;
 	sendflags_t flags = { 0 };
 	int c, err;
 	nvlist_t *dbgnv = NULL;
 	char *redactbook = NULL;
 	zfs_send_exclude_arg_t excludes = { 0 };
 
 	struct option long_options[] = {
 		{"replicate",	no_argument,		NULL, 'R'},
 		{"skip-missing",	no_argument,	NULL, 's'},
 		{"redact",	required_argument,	NULL, 'd'},
 		{"props",	no_argument,		NULL, 'p'},
 		{"parsable",	no_argument,		NULL, 'P'},
 		{"dedup",	no_argument,		NULL, 'D'},
 		{"verbose",	no_argument,		NULL, 'v'},
 		{"dryrun",	no_argument,		NULL, 'n'},
 		{"large-block",	no_argument,		NULL, 'L'},
 		{"embed",	no_argument,		NULL, 'e'},
 		{"resume",	required_argument,	NULL, 't'},
 		{"compressed",	no_argument,		NULL, 'c'},
 		{"raw",		no_argument,		NULL, 'w'},
 		{"backup",	no_argument,		NULL, 'b'},
 		{"holds",	no_argument,		NULL, 'h'},
 		{"saved",	no_argument,		NULL, 'S'},
 		{"exclude",	required_argument,	NULL, 'X'},
 		{0, 0, 0, 0}
 	};
 
 	/* check options */
 	while ((c = getopt_long(argc, argv, ":i:I:RsDpvnPLeht:cwbd:SX:",
 	    long_options, NULL)) != -1) {
 		switch (c) {
 		case 'X':
 			for (char *ds; (ds = strsep(&optarg, ",")) != NULL; ) {
 				if (!zfs_name_valid(ds, ZFS_TYPE_DATASET) ||
 				    strchr(ds, '/') == NULL) {
 					(void) fprintf(stderr, gettext("-X %s: "
 					    "not a valid non-root dataset name"
 					    ".\n"), ds);
 					usage(B_FALSE);
 				}
 				excludes.list = safe_realloc(excludes.list,
 				    sizeof (char *) * (excludes.count + 1));
 				excludes.list[excludes.count++] = ds;
 			}
 			break;
 		case 'i':
 			if (fromname)
 				usage(B_FALSE);
 			fromname = optarg;
 			break;
 		case 'I':
 			if (fromname)
 				usage(B_FALSE);
 			fromname = optarg;
 			flags.doall = B_TRUE;
 			break;
 		case 'R':
 			flags.replicate = B_TRUE;
 			break;
 		case 's':
 			flags.skipmissing = B_TRUE;
 			break;
 		case 'd':
 			redactbook = optarg;
 			break;
 		case 'p':
 			flags.props = B_TRUE;
 			break;
 		case 'b':
 			flags.backup = B_TRUE;
 			break;
 		case 'h':
 			flags.holds = B_TRUE;
 			break;
 		case 'P':
 			flags.parsable = B_TRUE;
 			break;
 		case 'v':
 			flags.verbosity++;
 			flags.progress = B_TRUE;
 			break;
 		case 'D':
 			(void) fprintf(stderr,
 			    gettext("WARNING: deduplicated send is no "
 			    "longer supported.  A regular,\n"
 			    "non-deduplicated stream will be generated.\n\n"));
 			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
 			break;
 		case 'L':
 			flags.largeblock = B_TRUE;
 			break;
 		case 'e':
 			flags.embed_data = B_TRUE;
 			break;
 		case 't':
 			resume_token = optarg;
 			break;
 		case 'c':
 			flags.compress = B_TRUE;
 			break;
 		case 'w':
 			flags.raw = B_TRUE;
 			flags.compress = B_TRUE;
 			flags.embed_data = B_TRUE;
 			flags.largeblock = B_TRUE;
 			break;
 		case 'S':
 			flags.saved = B_TRUE;
 			break;
 		case ':':
 			/*
 			 * If a parameter was not passed, optopt contains the
 			 * value that would normally lead us into the
 			 * appropriate case statement.  If it's > 256, then this
 			 * must be a longopt and we should look at argv to get
 			 * the string.  Otherwise it's just the character, so we
 			 * should use it directly.
 			 */
 			if (optopt <= UINT8_MAX) {
 				(void) fprintf(stderr,
 				    gettext("missing argument for '%c' "
 				    "option\n"), optopt);
 			} else {
 				(void) fprintf(stderr,
 				    gettext("missing argument for '%s' "
 				    "option\n"), argv[optind - 1]);
 			}
 			usage(B_FALSE);
 			break;
 		case '?':
 		default:
 			/*
 			 * If an invalid flag was passed, optopt contains the
 			 * character if it was a short flag, or 0 if it was a
 			 * longopt.
 			 */
 			if (optopt != 0) {
 				(void) fprintf(stderr,
 				    gettext("invalid option '%c'\n"), optopt);
 			} else {
 				(void) fprintf(stderr,
 				    gettext("invalid option '%s'\n"),
 				    argv[optind - 1]);
 
 			}
 			usage(B_FALSE);
 		}
 	}
 
 	if (flags.parsable && flags.verbosity == 0)
 		flags.verbosity = 1;
 
 	if (excludes.count > 0 && !flags.replicate) {
 		(void) fprintf(stderr, gettext("Cannot specify "
 		    "dataset exclusion (-X) on a non-recursive "
 		    "send.\n"));
 		return (1);
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (resume_token != NULL) {
 		if (fromname != NULL || flags.replicate || flags.props ||
 		    flags.backup || flags.holds ||
 		    flags.saved || redactbook != NULL) {
 			(void) fprintf(stderr,
 			    gettext("invalid flags combined with -t\n"));
 			usage(B_FALSE);
 		}
 		if (argc > 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 	} else {
 		if (argc < 1) {
 			(void) fprintf(stderr,
 			    gettext("missing snapshot argument\n"));
 			usage(B_FALSE);
 		}
 		if (argc > 1) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 	}
 
 	if (flags.saved) {
 		if (fromname != NULL || flags.replicate || flags.props ||
 		    flags.doall || flags.backup ||
 		    flags.holds || flags.largeblock || flags.embed_data ||
 		    flags.compress || flags.raw || redactbook != NULL) {
 			(void) fprintf(stderr, gettext("incompatible flags "
 			    "combined with saved send flag\n"));
 			usage(B_FALSE);
 		}
 		if (strchr(argv[0], '@') != NULL) {
 			(void) fprintf(stderr, gettext("saved send must "
 			    "specify the dataset with partially-received "
 			    "state\n"));
 			usage(B_FALSE);
 		}
 	}
 
 	if (flags.raw && redactbook != NULL) {
 		(void) fprintf(stderr,
 		    gettext("Error: raw sends may not be redacted.\n"));
 		return (1);
 	}
 
 	if (!flags.dryrun && isatty(STDOUT_FILENO)) {
 		(void) fprintf(stderr,
 		    gettext("Error: Stream can not be written to a terminal.\n"
 		    "You must redirect standard output.\n"));
 		return (1);
 	}
 
 	if (flags.saved) {
 		zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET);
 		if (zhp == NULL)
 			return (1);
 
 		err = zfs_send_saved(zhp, &flags, STDOUT_FILENO,
 		    resume_token);
 		zfs_close(zhp);
 		return (err != 0);
 	} else if (resume_token != NULL) {
 		return (zfs_send_resume(g_zfs, &flags, STDOUT_FILENO,
 		    resume_token));
 	}
 
 	if (flags.skipmissing && !flags.replicate) {
 		(void) fprintf(stderr,
 		    gettext("skip-missing flag can only be used in "
 		    "conjunction with replicate\n"));
 		usage(B_FALSE);
 	}
 
 	/*
 	 * For everything except -R and -I, use the new, cleaner code path.
 	 */
 	if (!(flags.replicate || flags.doall)) {
 		char frombuf[ZFS_MAX_DATASET_NAME_LEN];
 
 		if (fromname != NULL && (strchr(fromname, '#') == NULL &&
 		    strchr(fromname, '@') == NULL)) {
 			/*
 			 * Neither bookmark or snapshot was specified.  Print a
 			 * warning, and assume snapshot.
 			 */
 			(void) fprintf(stderr, "Warning: incremental source "
 			    "didn't specify type, assuming snapshot. Use '@' "
 			    "or '#' prefix to avoid ambiguity.\n");
 			(void) snprintf(frombuf, sizeof (frombuf), "@%s",
 			    fromname);
 			fromname = frombuf;
 		}
 		if (fromname != NULL &&
 		    (fromname[0] == '#' || fromname[0] == '@')) {
 			/*
 			 * Incremental source name begins with # or @.
 			 * Default to same fs as target.
 			 */
 			char tmpbuf[ZFS_MAX_DATASET_NAME_LEN];
 			(void) strlcpy(tmpbuf, fromname, sizeof (tmpbuf));
 			(void) strlcpy(frombuf, argv[0], sizeof (frombuf));
 			cp = strchr(frombuf, '@');
 			if (cp != NULL)
 				*cp = '\0';
 			(void) strlcat(frombuf, tmpbuf, sizeof (frombuf));
 			fromname = frombuf;
 		}
 
 		zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET);
 		if (zhp == NULL)
 			return (1);
 		err = zfs_send_one(zhp, fromname, STDOUT_FILENO, &flags,
 		    redactbook);
 		zfs_close(zhp);
 		return (err != 0);
 	}
 
 	if (fromname != NULL && strchr(fromname, '#')) {
 		(void) fprintf(stderr,
 		    gettext("Error: multiple snapshots cannot be "
 		    "sent from a bookmark.\n"));
 		return (1);
 	}
 
 	if (redactbook != NULL) {
 		(void) fprintf(stderr, gettext("Error: multiple snapshots "
 		    "cannot be sent redacted.\n"));
 		return (1);
 	}
 
 	if ((cp = strchr(argv[0], '@')) == NULL) {
 		(void) fprintf(stderr, gettext("Error: "
 		    "Unsupported flag with filesystem or bookmark.\n"));
 		return (1);
 	}
 	*cp = '\0';
 	toname = cp + 1;
 	zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		return (1);
 
 	/*
 	 * If they specified the full path to the snapshot, chop off
 	 * everything except the short name of the snapshot, but special
 	 * case if they specify the origin.
 	 */
 	if (fromname && (cp = strchr(fromname, '@')) != NULL) {
 		char origin[ZFS_MAX_DATASET_NAME_LEN];
 		zprop_source_t src;
 
 		(void) zfs_prop_get(zhp, ZFS_PROP_ORIGIN,
 		    origin, sizeof (origin), &src, NULL, 0, B_FALSE);
 
 		if (strcmp(origin, fromname) == 0) {
 			fromname = NULL;
 			flags.fromorigin = B_TRUE;
 		} else {
 			*cp = '\0';
 			if (cp != fromname && strcmp(argv[0], fromname)) {
 				(void) fprintf(stderr,
 				    gettext("incremental source must be "
 				    "in same filesystem\n"));
 				usage(B_FALSE);
 			}
 			fromname = cp + 1;
 			if (strchr(fromname, '@') || strchr(fromname, '/')) {
 				(void) fprintf(stderr,
 				    gettext("invalid incremental source\n"));
 				usage(B_FALSE);
 			}
 		}
 	}
 
 	if (flags.replicate && fromname == NULL)
 		flags.doall = B_TRUE;
 
 	err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO,
 	    excludes.count > 0 ? zfs_do_send_exclude : NULL,
 	    &excludes, flags.verbosity >= 3 ? &dbgnv : NULL);
 
 	if (flags.verbosity >= 3 && dbgnv != NULL) {
 		/*
 		 * dump_nvlist prints to stdout, but that's been
 		 * redirected to a file.  Make it print to stderr
 		 * instead.
 		 */
 		(void) dup2(STDERR_FILENO, STDOUT_FILENO);
 		dump_nvlist(dbgnv, 0);
 		nvlist_free(dbgnv);
 	}
 
 	zfs_close(zhp);
 	free(excludes.list);
 	return (err != 0);
 }
 
 /*
  * Restore a backup stream from stdin.
  */
 static int
 zfs_do_receive(int argc, char **argv)
 {
 	int c, err = 0;
 	recvflags_t flags = { 0 };
 	boolean_t abort_resumable = B_FALSE;
 	nvlist_t *props;
 
 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	/* check options */
 	while ((c = getopt(argc, argv, ":o:x:dehMnuvFsAc")) != -1) {
 		switch (c) {
 		case 'o':
 			if (!parseprop(props, optarg)) {
 				nvlist_free(props);
 				usage(B_FALSE);
 			}
 			break;
 		case 'x':
 			if (!parsepropname(props, optarg)) {
 				nvlist_free(props);
 				usage(B_FALSE);
 			}
 			break;
 		case 'd':
 			if (flags.istail) {
 				(void) fprintf(stderr, gettext("invalid option "
 				    "combination: -d and -e are mutually "
 				    "exclusive\n"));
 				usage(B_FALSE);
 			}
 			flags.isprefix = B_TRUE;
 			break;
 		case 'e':
 			if (flags.isprefix) {
 				(void) fprintf(stderr, gettext("invalid option "
 				    "combination: -d and -e are mutually "
 				    "exclusive\n"));
 				usage(B_FALSE);
 			}
 			flags.istail = B_TRUE;
 			break;
 		case 'h':
 			flags.skipholds = B_TRUE;
 			break;
 		case 'M':
 			flags.forceunmount = B_TRUE;
 			break;
 		case 'n':
 			flags.dryrun = B_TRUE;
 			break;
 		case 'u':
 			flags.nomount = B_TRUE;
 			break;
 		case 'v':
 			flags.verbose = B_TRUE;
 			break;
 		case 's':
 			flags.resumable = B_TRUE;
 			break;
 		case 'F':
 			flags.force = B_TRUE;
 			break;
 		case 'A':
 			abort_resumable = B_TRUE;
 			break;
 		case 'c':
 			flags.heal = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* zfs recv -e (use "tail" name) implies -d (remove dataset "head") */
 	if (flags.istail)
 		flags.isprefix = B_TRUE;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing snapshot argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	if (abort_resumable) {
 		if (flags.isprefix || flags.istail || flags.dryrun ||
 		    flags.resumable || flags.nomount) {
 			(void) fprintf(stderr, gettext("invalid option\n"));
 			usage(B_FALSE);
 		}
 
 		char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 		(void) snprintf(namebuf, sizeof (namebuf),
 		    "%s/%%recv", argv[0]);
 
 		if (zfs_dataset_exists(g_zfs, namebuf,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) {
 			zfs_handle_t *zhp = zfs_open(g_zfs,
 			    namebuf, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 			if (zhp == NULL) {
 				nvlist_free(props);
 				return (1);
 			}
 			err = zfs_destroy(zhp, B_FALSE);
 			zfs_close(zhp);
 		} else {
 			zfs_handle_t *zhp = zfs_open(g_zfs,
 			    argv[0], ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 			if (zhp == NULL)
 				usage(B_FALSE);
 			if (!zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) ||
 			    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
 			    NULL, 0, NULL, NULL, 0, B_TRUE) == -1) {
 				(void) fprintf(stderr,
 				    gettext("'%s' does not have any "
 				    "resumable receive state to abort\n"),
 				    argv[0]);
 				nvlist_free(props);
 				zfs_close(zhp);
 				return (1);
 			}
 			err = zfs_destroy(zhp, B_FALSE);
 			zfs_close(zhp);
 		}
 		nvlist_free(props);
 		return (err != 0);
 	}
 
 	if (isatty(STDIN_FILENO)) {
 		(void) fprintf(stderr,
 		    gettext("Error: Backup stream can not be read "
 		    "from a terminal.\n"
 		    "You must redirect standard input.\n"));
 		nvlist_free(props);
 		return (1);
 	}
 	err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL);
 	nvlist_free(props);
 
 	return (err != 0);
 }
 
 /*
  * allow/unallow stuff
  */
 /* copied from zfs/sys/dsl_deleg.h */
 #define	ZFS_DELEG_PERM_CREATE		"create"
 #define	ZFS_DELEG_PERM_DESTROY		"destroy"
 #define	ZFS_DELEG_PERM_SNAPSHOT		"snapshot"
 #define	ZFS_DELEG_PERM_ROLLBACK		"rollback"
 #define	ZFS_DELEG_PERM_CLONE		"clone"
 #define	ZFS_DELEG_PERM_PROMOTE		"promote"
 #define	ZFS_DELEG_PERM_RENAME		"rename"
 #define	ZFS_DELEG_PERM_MOUNT		"mount"
 #define	ZFS_DELEG_PERM_SHARE		"share"
 #define	ZFS_DELEG_PERM_SEND		"send"
 #define	ZFS_DELEG_PERM_RECEIVE		"receive"
 #define	ZFS_DELEG_PERM_ALLOW		"allow"
 #define	ZFS_DELEG_PERM_USERPROP		"userprop"
 #define	ZFS_DELEG_PERM_VSCAN		"vscan" /* ??? */
 #define	ZFS_DELEG_PERM_USERQUOTA	"userquota"
 #define	ZFS_DELEG_PERM_GROUPQUOTA	"groupquota"
 #define	ZFS_DELEG_PERM_USERUSED		"userused"
 #define	ZFS_DELEG_PERM_GROUPUSED	"groupused"
 #define	ZFS_DELEG_PERM_USEROBJQUOTA	"userobjquota"
 #define	ZFS_DELEG_PERM_GROUPOBJQUOTA	"groupobjquota"
 #define	ZFS_DELEG_PERM_USEROBJUSED	"userobjused"
 #define	ZFS_DELEG_PERM_GROUPOBJUSED	"groupobjused"
 
 #define	ZFS_DELEG_PERM_HOLD		"hold"
 #define	ZFS_DELEG_PERM_RELEASE		"release"
 #define	ZFS_DELEG_PERM_DIFF		"diff"
 #define	ZFS_DELEG_PERM_BOOKMARK		"bookmark"
 #define	ZFS_DELEG_PERM_LOAD_KEY		"load-key"
 #define	ZFS_DELEG_PERM_CHANGE_KEY	"change-key"
 
 #define	ZFS_DELEG_PERM_PROJECTUSED	"projectused"
 #define	ZFS_DELEG_PERM_PROJECTQUOTA	"projectquota"
 #define	ZFS_DELEG_PERM_PROJECTOBJUSED	"projectobjused"
 #define	ZFS_DELEG_PERM_PROJECTOBJQUOTA	"projectobjquota"
 
 #define	ZFS_NUM_DELEG_NOTES ZFS_DELEG_NOTE_NONE
 
 static zfs_deleg_perm_tab_t zfs_deleg_perm_tbl[] = {
 	{ ZFS_DELEG_PERM_ALLOW, ZFS_DELEG_NOTE_ALLOW },
 	{ ZFS_DELEG_PERM_CLONE, ZFS_DELEG_NOTE_CLONE },
 	{ ZFS_DELEG_PERM_CREATE, ZFS_DELEG_NOTE_CREATE },
 	{ ZFS_DELEG_PERM_DESTROY, ZFS_DELEG_NOTE_DESTROY },
 	{ ZFS_DELEG_PERM_DIFF, ZFS_DELEG_NOTE_DIFF},
 	{ ZFS_DELEG_PERM_HOLD, ZFS_DELEG_NOTE_HOLD },
 	{ ZFS_DELEG_PERM_MOUNT, ZFS_DELEG_NOTE_MOUNT },
 	{ ZFS_DELEG_PERM_PROMOTE, ZFS_DELEG_NOTE_PROMOTE },
 	{ ZFS_DELEG_PERM_RECEIVE, ZFS_DELEG_NOTE_RECEIVE },
 	{ ZFS_DELEG_PERM_RELEASE, ZFS_DELEG_NOTE_RELEASE },
 	{ ZFS_DELEG_PERM_RENAME, ZFS_DELEG_NOTE_RENAME },
 	{ ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
 	{ ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
 	{ ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
 	{ ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
 	{ ZFS_DELEG_PERM_BOOKMARK, ZFS_DELEG_NOTE_BOOKMARK },
 	{ ZFS_DELEG_PERM_LOAD_KEY, ZFS_DELEG_NOTE_LOAD_KEY },
 	{ ZFS_DELEG_PERM_CHANGE_KEY, ZFS_DELEG_NOTE_CHANGE_KEY },
 
 	{ ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
 	{ ZFS_DELEG_PERM_GROUPUSED, ZFS_DELEG_NOTE_GROUPUSED },
 	{ ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
 	{ ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
 	{ ZFS_DELEG_PERM_USERUSED, ZFS_DELEG_NOTE_USERUSED },
 	{ ZFS_DELEG_PERM_USEROBJQUOTA, ZFS_DELEG_NOTE_USEROBJQUOTA },
 	{ ZFS_DELEG_PERM_USEROBJUSED, ZFS_DELEG_NOTE_USEROBJUSED },
 	{ ZFS_DELEG_PERM_GROUPOBJQUOTA, ZFS_DELEG_NOTE_GROUPOBJQUOTA },
 	{ ZFS_DELEG_PERM_GROUPOBJUSED, ZFS_DELEG_NOTE_GROUPOBJUSED },
 	{ ZFS_DELEG_PERM_PROJECTUSED, ZFS_DELEG_NOTE_PROJECTUSED },
 	{ ZFS_DELEG_PERM_PROJECTQUOTA, ZFS_DELEG_NOTE_PROJECTQUOTA },
 	{ ZFS_DELEG_PERM_PROJECTOBJUSED, ZFS_DELEG_NOTE_PROJECTOBJUSED },
 	{ ZFS_DELEG_PERM_PROJECTOBJQUOTA, ZFS_DELEG_NOTE_PROJECTOBJQUOTA },
 	{ NULL, ZFS_DELEG_NOTE_NONE }
 };
 
 /* permission structure */
 typedef struct deleg_perm {
 	zfs_deleg_who_type_t	dp_who_type;
 	const char		*dp_name;
 	boolean_t		dp_local;
 	boolean_t		dp_descend;
 } deleg_perm_t;
 
 /* */
 typedef struct deleg_perm_node {
 	deleg_perm_t		dpn_perm;
 
 	uu_avl_node_t		dpn_avl_node;
 } deleg_perm_node_t;
 
 typedef struct fs_perm fs_perm_t;
 
 /* permissions set */
 typedef struct who_perm {
 	zfs_deleg_who_type_t	who_type;
 	const char		*who_name;		/* id */
 	char			who_ug_name[256];	/* user/group name */
 	fs_perm_t		*who_fsperm;		/* uplink */
 
 	uu_avl_t		*who_deleg_perm_avl;	/* permissions */
 } who_perm_t;
 
 /* */
 typedef struct who_perm_node {
 	who_perm_t	who_perm;
 	uu_avl_node_t	who_avl_node;
 } who_perm_node_t;
 
 typedef struct fs_perm_set fs_perm_set_t;
 /* fs permissions */
 struct fs_perm {
 	const char		*fsp_name;
 
 	uu_avl_t		*fsp_sc_avl;	/* sets,create */
 	uu_avl_t		*fsp_uge_avl;	/* user,group,everyone */
 
 	fs_perm_set_t		*fsp_set;	/* uplink */
 };
 
 /* */
 typedef struct fs_perm_node {
 	fs_perm_t	fspn_fsperm;
 	uu_avl_t	*fspn_avl;
 
 	uu_list_node_t	fspn_list_node;
 } fs_perm_node_t;
 
 /* top level structure */
 struct fs_perm_set {
 	uu_list_pool_t	*fsps_list_pool;
 	uu_list_t	*fsps_list; /* list of fs_perms */
 
 	uu_avl_pool_t	*fsps_named_set_avl_pool;
 	uu_avl_pool_t	*fsps_who_perm_avl_pool;
 	uu_avl_pool_t	*fsps_deleg_perm_avl_pool;
 };
 
 static inline const char *
 deleg_perm_type(zfs_deleg_note_t note)
 {
 	/* subcommands */
 	switch (note) {
 		/* SUBCOMMANDS */
 		/* OTHER */
 	case ZFS_DELEG_NOTE_GROUPQUOTA:
 	case ZFS_DELEG_NOTE_GROUPUSED:
 	case ZFS_DELEG_NOTE_USERPROP:
 	case ZFS_DELEG_NOTE_USERQUOTA:
 	case ZFS_DELEG_NOTE_USERUSED:
 	case ZFS_DELEG_NOTE_USEROBJQUOTA:
 	case ZFS_DELEG_NOTE_USEROBJUSED:
 	case ZFS_DELEG_NOTE_GROUPOBJQUOTA:
 	case ZFS_DELEG_NOTE_GROUPOBJUSED:
 	case ZFS_DELEG_NOTE_PROJECTUSED:
 	case ZFS_DELEG_NOTE_PROJECTQUOTA:
 	case ZFS_DELEG_NOTE_PROJECTOBJUSED:
 	case ZFS_DELEG_NOTE_PROJECTOBJQUOTA:
 		/* other */
 		return (gettext("other"));
 	default:
 		return (gettext("subcommand"));
 	}
 }
 
 static int
 who_type2weight(zfs_deleg_who_type_t who_type)
 {
 	int res;
 	switch (who_type) {
 		case ZFS_DELEG_NAMED_SET_SETS:
 		case ZFS_DELEG_NAMED_SET:
 			res = 0;
 			break;
 		case ZFS_DELEG_CREATE_SETS:
 		case ZFS_DELEG_CREATE:
 			res = 1;
 			break;
 		case ZFS_DELEG_USER_SETS:
 		case ZFS_DELEG_USER:
 			res = 2;
 			break;
 		case ZFS_DELEG_GROUP_SETS:
 		case ZFS_DELEG_GROUP:
 			res = 3;
 			break;
 		case ZFS_DELEG_EVERYONE_SETS:
 		case ZFS_DELEG_EVERYONE:
 			res = 4;
 			break;
 		default:
 			res = -1;
 	}
 
 	return (res);
 }
 
 static int
 who_perm_compare(const void *larg, const void *rarg, void *unused)
 {
 	(void) unused;
 	const who_perm_node_t *l = larg;
 	const who_perm_node_t *r = rarg;
 	zfs_deleg_who_type_t ltype = l->who_perm.who_type;
 	zfs_deleg_who_type_t rtype = r->who_perm.who_type;
 	int lweight = who_type2weight(ltype);
 	int rweight = who_type2weight(rtype);
 	int res = lweight - rweight;
 	if (res == 0)
 		res = strncmp(l->who_perm.who_name, r->who_perm.who_name,
 		    ZFS_MAX_DELEG_NAME-1);
 
 	if (res == 0)
 		return (0);
 	if (res > 0)
 		return (1);
 	else
 		return (-1);
 }
 
 static int
 deleg_perm_compare(const void *larg, const void *rarg, void *unused)
 {
 	(void) unused;
 	const deleg_perm_node_t *l = larg;
 	const deleg_perm_node_t *r = rarg;
 	int res =  strncmp(l->dpn_perm.dp_name, r->dpn_perm.dp_name,
 	    ZFS_MAX_DELEG_NAME-1);
 
 	if (res == 0)
 		return (0);
 
 	if (res > 0)
 		return (1);
 	else
 		return (-1);
 }
 
 static inline void
 fs_perm_set_init(fs_perm_set_t *fspset)
 {
 	memset(fspset, 0, sizeof (fs_perm_set_t));
 
 	if ((fspset->fsps_list_pool = uu_list_pool_create("fsps_list_pool",
 	    sizeof (fs_perm_node_t), offsetof(fs_perm_node_t, fspn_list_node),
 	    NULL, UU_DEFAULT)) == NULL)
 		nomem();
 	if ((fspset->fsps_list = uu_list_create(fspset->fsps_list_pool, NULL,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_named_set_avl_pool = uu_avl_pool_create(
 	    "named_set_avl_pool", sizeof (who_perm_node_t), offsetof(
 	    who_perm_node_t, who_avl_node), who_perm_compare,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_who_perm_avl_pool = uu_avl_pool_create(
 	    "who_perm_avl_pool", sizeof (who_perm_node_t), offsetof(
 	    who_perm_node_t, who_avl_node), who_perm_compare,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	if ((fspset->fsps_deleg_perm_avl_pool = uu_avl_pool_create(
 	    "deleg_perm_avl_pool", sizeof (deleg_perm_node_t), offsetof(
 	    deleg_perm_node_t, dpn_avl_node), deleg_perm_compare, UU_DEFAULT))
 	    == NULL)
 		nomem();
 }
 
 static inline void fs_perm_fini(fs_perm_t *);
 static inline void who_perm_fini(who_perm_t *);
 
 static inline void
 fs_perm_set_fini(fs_perm_set_t *fspset)
 {
 	fs_perm_node_t *node = uu_list_first(fspset->fsps_list);
 
 	while (node != NULL) {
 		fs_perm_node_t *next_node =
 		    uu_list_next(fspset->fsps_list, node);
 		fs_perm_t *fsperm = &node->fspn_fsperm;
 		fs_perm_fini(fsperm);
 		uu_list_remove(fspset->fsps_list, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_pool_destroy(fspset->fsps_named_set_avl_pool);
 	uu_avl_pool_destroy(fspset->fsps_who_perm_avl_pool);
 	uu_avl_pool_destroy(fspset->fsps_deleg_perm_avl_pool);
 }
 
 static inline void
 deleg_perm_init(deleg_perm_t *deleg_perm, zfs_deleg_who_type_t type,
     const char *name)
 {
 	deleg_perm->dp_who_type = type;
 	deleg_perm->dp_name = name;
 }
 
 static inline void
 who_perm_init(who_perm_t *who_perm, fs_perm_t *fsperm,
     zfs_deleg_who_type_t type, const char *name)
 {
 	uu_avl_pool_t	*pool;
 	pool = fsperm->fsp_set->fsps_deleg_perm_avl_pool;
 
 	memset(who_perm, 0, sizeof (who_perm_t));
 
 	if ((who_perm->who_deleg_perm_avl = uu_avl_create(pool, NULL,
 	    UU_DEFAULT)) == NULL)
 		nomem();
 
 	who_perm->who_type = type;
 	who_perm->who_name = name;
 	who_perm->who_fsperm = fsperm;
 }
 
 static inline void
 who_perm_fini(who_perm_t *who_perm)
 {
 	deleg_perm_node_t *node = uu_avl_first(who_perm->who_deleg_perm_avl);
 
 	while (node != NULL) {
 		deleg_perm_node_t *next_node =
 		    uu_avl_next(who_perm->who_deleg_perm_avl, node);
 
 		uu_avl_remove(who_perm->who_deleg_perm_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_destroy(who_perm->who_deleg_perm_avl);
 }
 
 static inline void
 fs_perm_init(fs_perm_t *fsperm, fs_perm_set_t *fspset, const char *fsname)
 {
 	uu_avl_pool_t	*nset_pool = fspset->fsps_named_set_avl_pool;
 	uu_avl_pool_t	*who_pool = fspset->fsps_who_perm_avl_pool;
 
 	memset(fsperm, 0, sizeof (fs_perm_t));
 
 	if ((fsperm->fsp_sc_avl = uu_avl_create(nset_pool, NULL, UU_DEFAULT))
 	    == NULL)
 		nomem();
 
 	if ((fsperm->fsp_uge_avl = uu_avl_create(who_pool, NULL, UU_DEFAULT))
 	    == NULL)
 		nomem();
 
 	fsperm->fsp_set = fspset;
 	fsperm->fsp_name = fsname;
 }
 
 static inline void
 fs_perm_fini(fs_perm_t *fsperm)
 {
 	who_perm_node_t *node = uu_avl_first(fsperm->fsp_sc_avl);
 	while (node != NULL) {
 		who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_sc_avl,
 		    node);
 		who_perm_t *who_perm = &node->who_perm;
 		who_perm_fini(who_perm);
 		uu_avl_remove(fsperm->fsp_sc_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	node = uu_avl_first(fsperm->fsp_uge_avl);
 	while (node != NULL) {
 		who_perm_node_t *next_node = uu_avl_next(fsperm->fsp_uge_avl,
 		    node);
 		who_perm_t *who_perm = &node->who_perm;
 		who_perm_fini(who_perm);
 		uu_avl_remove(fsperm->fsp_uge_avl, node);
 		free(node);
 		node = next_node;
 	}
 
 	uu_avl_destroy(fsperm->fsp_sc_avl);
 	uu_avl_destroy(fsperm->fsp_uge_avl);
 }
 
 static void
 set_deleg_perm_node(uu_avl_t *avl, deleg_perm_node_t *node,
     zfs_deleg_who_type_t who_type, const char *name, char locality)
 {
 	uu_avl_index_t idx = 0;
 
 	deleg_perm_node_t *found_node = NULL;
 	deleg_perm_t	*deleg_perm = &node->dpn_perm;
 
 	deleg_perm_init(deleg_perm, who_type, name);
 
 	if ((found_node = uu_avl_find(avl, node, NULL, &idx))
 	    == NULL)
 		uu_avl_insert(avl, node, idx);
 	else {
 		node = found_node;
 		deleg_perm = &node->dpn_perm;
 	}
 
 
 	switch (locality) {
 	case ZFS_DELEG_LOCAL:
 		deleg_perm->dp_local = B_TRUE;
 		break;
 	case ZFS_DELEG_DESCENDENT:
 		deleg_perm->dp_descend = B_TRUE;
 		break;
 	case ZFS_DELEG_NA:
 		break;
 	default:
 		assert(B_FALSE); /* invalid locality */
 	}
 }
 
 static inline int
 parse_who_perm(who_perm_t *who_perm, nvlist_t *nvl, char locality)
 {
 	nvpair_t *nvp = NULL;
 	fs_perm_set_t *fspset = who_perm->who_fsperm->fsp_set;
 	uu_avl_t *avl = who_perm->who_deleg_perm_avl;
 	zfs_deleg_who_type_t who_type = who_perm->who_type;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		const char *name = nvpair_name(nvp);
 		data_type_t type = nvpair_type(nvp);
 		uu_avl_pool_t *avl_pool = fspset->fsps_deleg_perm_avl_pool;
 		deleg_perm_node_t *node =
 		    safe_malloc(sizeof (deleg_perm_node_t));
 
 		VERIFY(type == DATA_TYPE_BOOLEAN);
 
 		uu_avl_node_init(node, &node->dpn_avl_node, avl_pool);
 		set_deleg_perm_node(avl, node, who_type, name, locality);
 	}
 
 	return (0);
 }
 
 static inline int
 parse_fs_perm(fs_perm_t *fsperm, nvlist_t *nvl)
 {
 	nvpair_t *nvp = NULL;
 	fs_perm_set_t *fspset = fsperm->fsp_set;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		nvlist_t *nvl2 = NULL;
 		const char *name = nvpair_name(nvp);
 		uu_avl_t *avl = NULL;
 		uu_avl_pool_t *avl_pool = NULL;
 		zfs_deleg_who_type_t perm_type = name[0];
 		char perm_locality = name[1];
 		const char *perm_name = name + 3;
 		who_perm_t *who_perm = NULL;
 
 		assert('$' == name[2]);
 
 		if (nvpair_value_nvlist(nvp, &nvl2) != 0)
 			return (-1);
 
 		switch (perm_type) {
 		case ZFS_DELEG_CREATE:
 		case ZFS_DELEG_CREATE_SETS:
 		case ZFS_DELEG_NAMED_SET:
 		case ZFS_DELEG_NAMED_SET_SETS:
 			avl_pool = fspset->fsps_named_set_avl_pool;
 			avl = fsperm->fsp_sc_avl;
 			break;
 		case ZFS_DELEG_USER:
 		case ZFS_DELEG_USER_SETS:
 		case ZFS_DELEG_GROUP:
 		case ZFS_DELEG_GROUP_SETS:
 		case ZFS_DELEG_EVERYONE:
 		case ZFS_DELEG_EVERYONE_SETS:
 			avl_pool = fspset->fsps_who_perm_avl_pool;
 			avl = fsperm->fsp_uge_avl;
 			break;
 
 		default:
 			assert(!"unhandled zfs_deleg_who_type_t");
 		}
 
 		who_perm_node_t *found_node = NULL;
 		who_perm_node_t *node = safe_malloc(
 		    sizeof (who_perm_node_t));
 		who_perm = &node->who_perm;
 		uu_avl_index_t idx = 0;
 
 		uu_avl_node_init(node, &node->who_avl_node, avl_pool);
 		who_perm_init(who_perm, fsperm, perm_type, perm_name);
 
 		if ((found_node = uu_avl_find(avl, node, NULL, &idx))
 		    == NULL) {
 			if (avl == fsperm->fsp_uge_avl) {
 				uid_t rid = 0;
 				struct passwd *p = NULL;
 				struct group *g = NULL;
 				const char *nice_name = NULL;
 
 				switch (perm_type) {
 				case ZFS_DELEG_USER_SETS:
 				case ZFS_DELEG_USER:
 					rid = atoi(perm_name);
 					p = getpwuid(rid);
 					if (p)
 						nice_name = p->pw_name;
 					break;
 				case ZFS_DELEG_GROUP_SETS:
 				case ZFS_DELEG_GROUP:
 					rid = atoi(perm_name);
 					g = getgrgid(rid);
 					if (g)
 						nice_name = g->gr_name;
 					break;
 
 				default:
 					break;
 				}
 
 				if (nice_name != NULL) {
 					(void) strlcpy(
 					    node->who_perm.who_ug_name,
 					    nice_name, 256);
 				} else {
 					/* User or group unknown */
 					(void) snprintf(
 					    node->who_perm.who_ug_name,
 					    sizeof (node->who_perm.who_ug_name),
 					    "(unknown: %d)", rid);
 				}
 			}
 
 			uu_avl_insert(avl, node, idx);
 		} else {
 			node = found_node;
 			who_perm = &node->who_perm;
 		}
 
 		assert(who_perm != NULL);
 		(void) parse_who_perm(who_perm, nvl2, perm_locality);
 	}
 
 	return (0);
 }
 
 static inline int
 parse_fs_perm_set(fs_perm_set_t *fspset, nvlist_t *nvl)
 {
 	nvpair_t *nvp = NULL;
 	uu_avl_index_t idx = 0;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		nvlist_t *nvl2 = NULL;
 		const char *fsname = nvpair_name(nvp);
 		data_type_t type = nvpair_type(nvp);
 		fs_perm_t *fsperm = NULL;
 		fs_perm_node_t *node = safe_malloc(sizeof (fs_perm_node_t));
 
 		fsperm = &node->fspn_fsperm;
 
 		VERIFY(DATA_TYPE_NVLIST == type);
 
 		uu_list_node_init(node, &node->fspn_list_node,
 		    fspset->fsps_list_pool);
 
 		idx = uu_list_numnodes(fspset->fsps_list);
 		fs_perm_init(fsperm, fspset, fsname);
 
 		if (nvpair_value_nvlist(nvp, &nvl2) != 0)
 			return (-1);
 
 		(void) parse_fs_perm(fsperm, nvl2);
 
 		uu_list_insert(fspset->fsps_list, node, idx);
 	}
 
 	return (0);
 }
 
 static inline const char *
 deleg_perm_comment(zfs_deleg_note_t note)
 {
 	const char *str = "";
 
 	/* subcommands */
 	switch (note) {
 		/* SUBCOMMANDS */
 	case ZFS_DELEG_NOTE_ALLOW:
 		str = gettext("Must also have the permission that is being"
 		    "\n\t\t\t\tallowed");
 		break;
 	case ZFS_DELEG_NOTE_CLONE:
 		str = gettext("Must also have the 'create' ability and 'mount'"
 		    "\n\t\t\t\tability in the origin file system");
 		break;
 	case ZFS_DELEG_NOTE_CREATE:
 		str = gettext("Must also have the 'mount' ability");
 		break;
 	case ZFS_DELEG_NOTE_DESTROY:
 		str = gettext("Must also have the 'mount' ability");
 		break;
 	case ZFS_DELEG_NOTE_DIFF:
 		str = gettext("Allows lookup of paths within a dataset;"
 		    "\n\t\t\t\tgiven an object number. Ordinary users need this"
 		    "\n\t\t\t\tin order to use zfs diff");
 		break;
 	case ZFS_DELEG_NOTE_HOLD:
 		str = gettext("Allows adding a user hold to a snapshot");
 		break;
 	case ZFS_DELEG_NOTE_MOUNT:
 		str = gettext("Allows mount/umount of ZFS datasets");
 		break;
 	case ZFS_DELEG_NOTE_PROMOTE:
 		str = gettext("Must also have the 'mount'\n\t\t\t\tand"
 		    " 'promote' ability in the origin file system");
 		break;
 	case ZFS_DELEG_NOTE_RECEIVE:
 		str = gettext("Must also have the 'mount' and 'create'"
 		    " ability");
 		break;
 	case ZFS_DELEG_NOTE_RELEASE:
 		str = gettext("Allows releasing a user hold which\n\t\t\t\t"
 		    "might destroy the snapshot");
 		break;
 	case ZFS_DELEG_NOTE_RENAME:
 		str = gettext("Must also have the 'mount' and 'create'"
 		    "\n\t\t\t\tability in the new parent");
 		break;
 	case ZFS_DELEG_NOTE_ROLLBACK:
 		str = gettext("");
 		break;
 	case ZFS_DELEG_NOTE_SEND:
 		str = gettext("");
 		break;
 	case ZFS_DELEG_NOTE_SHARE:
 		str = gettext("Allows sharing file systems over NFS or SMB"
 		    "\n\t\t\t\tprotocols");
 		break;
 	case ZFS_DELEG_NOTE_SNAPSHOT:
 		str = gettext("");
 		break;
 	case ZFS_DELEG_NOTE_LOAD_KEY:
 		str = gettext("Allows loading or unloading an encryption key");
 		break;
 	case ZFS_DELEG_NOTE_CHANGE_KEY:
 		str = gettext("Allows changing or adding an encryption key");
 		break;
 /*
  *	case ZFS_DELEG_NOTE_VSCAN:
  *		str = gettext("");
  *		break;
  */
 		/* OTHER */
 	case ZFS_DELEG_NOTE_GROUPQUOTA:
 		str = gettext("Allows accessing any groupquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_GROUPUSED:
 		str = gettext("Allows reading any groupused@... property");
 		break;
 	case ZFS_DELEG_NOTE_USERPROP:
 		str = gettext("Allows changing any user property");
 		break;
 	case ZFS_DELEG_NOTE_USERQUOTA:
 		str = gettext("Allows accessing any userquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_USERUSED:
 		str = gettext("Allows reading any userused@... property");
 		break;
 	case ZFS_DELEG_NOTE_USEROBJQUOTA:
 		str = gettext("Allows accessing any userobjquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_GROUPOBJQUOTA:
 		str = gettext("Allows accessing any \n\t\t\t\t"
 		    "groupobjquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_GROUPOBJUSED:
 		str = gettext("Allows reading any groupobjused@... property");
 		break;
 	case ZFS_DELEG_NOTE_USEROBJUSED:
 		str = gettext("Allows reading any userobjused@... property");
 		break;
 	case ZFS_DELEG_NOTE_PROJECTQUOTA:
 		str = gettext("Allows accessing any projectquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_PROJECTOBJQUOTA:
 		str = gettext("Allows accessing any \n\t\t\t\t"
 		    "projectobjquota@... property");
 		break;
 	case ZFS_DELEG_NOTE_PROJECTUSED:
 		str = gettext("Allows reading any projectused@... property");
 		break;
 	case ZFS_DELEG_NOTE_PROJECTOBJUSED:
 		str = gettext("Allows accessing any \n\t\t\t\t"
 		    "projectobjused@... property");
 		break;
 		/* other */
 	default:
 		str = "";
 	}
 
 	return (str);
 }
 
 struct allow_opts {
 	boolean_t local;
 	boolean_t descend;
 	boolean_t user;
 	boolean_t group;
 	boolean_t everyone;
 	boolean_t create;
 	boolean_t set;
 	boolean_t recursive; /* unallow only */
 	boolean_t prt_usage;
 
 	boolean_t prt_perms;
 	char *who;
 	char *perms;
 	const char *dataset;
 };
 
 static inline int
 prop_cmp(const void *a, const void *b)
 {
 	const char *str1 = *(const char **)a;
 	const char *str2 = *(const char **)b;
 	return (strcmp(str1, str2));
 }
 
 static void
 allow_usage(boolean_t un, boolean_t requested, const char *msg)
 {
 	const char *opt_desc[] = {
 		"-h", gettext("show this help message and exit"),
 		"-l", gettext("set permission locally"),
 		"-d", gettext("set permission for descents"),
 		"-u", gettext("set permission for user"),
 		"-g", gettext("set permission for group"),
 		"-e", gettext("set permission for everyone"),
 		"-c", gettext("set create time permission"),
 		"-s", gettext("define permission set"),
 		/* unallow only */
 		"-r", gettext("remove permissions recursively"),
 	};
 	size_t unallow_size = sizeof (opt_desc) / sizeof (char *);
 	size_t allow_size = unallow_size - 2;
 	const char *props[ZFS_NUM_PROPS];
 	int i;
 	size_t count = 0;
 	FILE *fp = requested ? stdout : stderr;
 	zprop_desc_t *pdtbl = zfs_prop_get_table();
 	const char *fmt = gettext("%-16s %-14s\t%s\n");
 
 	(void) fprintf(fp, gettext("Usage: %s\n"), get_usage(un ? HELP_UNALLOW :
 	    HELP_ALLOW));
 	(void) fprintf(fp, gettext("Options:\n"));
 	for (i = 0; i < (un ? unallow_size : allow_size); i += 2) {
 		const char *opt = opt_desc[i];
 		const char *optdsc = opt_desc[i + 1];
 		(void) fprintf(fp, gettext("  %-10s  %s\n"), opt, optdsc);
 	}
 
 	(void) fprintf(fp, gettext("\nThe following permissions are "
 	    "supported:\n\n"));
 	(void) fprintf(fp, fmt, gettext("NAME"), gettext("TYPE"),
 	    gettext("NOTES"));
 	for (i = 0; i < ZFS_NUM_DELEG_NOTES; i++) {
 		const char *perm_name = zfs_deleg_perm_tbl[i].z_perm;
 		zfs_deleg_note_t perm_note = zfs_deleg_perm_tbl[i].z_note;
 		const char *perm_type = deleg_perm_type(perm_note);
 		const char *perm_comment = deleg_perm_comment(perm_note);
 		(void) fprintf(fp, fmt, perm_name, perm_type, perm_comment);
 	}
 
 	for (i = 0; i < ZFS_NUM_PROPS; i++) {
 		zprop_desc_t *pd = &pdtbl[i];
 		if (pd->pd_visible != B_TRUE)
 			continue;
 
 		if (pd->pd_attr == PROP_READONLY)
 			continue;
 
 		props[count++] = pd->pd_name;
 	}
 	props[count] = NULL;
 
 	qsort(props, count, sizeof (char *), prop_cmp);
 
 	for (i = 0; i < count; i++)
 		(void) fprintf(fp, fmt, props[i], gettext("property"), "");
 
 	if (msg != NULL)
 		(void) fprintf(fp, gettext("\nzfs: error: %s"), msg);
 
 	exit(requested ? 0 : 2);
 }
 
 static inline const char *
 munge_args(int argc, char **argv, boolean_t un, size_t expected_argc,
     char **permsp)
 {
 	if (un && argc == expected_argc - 1)
 		*permsp = NULL;
 	else if (argc == expected_argc)
 		*permsp = argv[argc - 2];
 	else
 		allow_usage(un, B_FALSE,
 		    gettext("wrong number of parameters\n"));
 
 	return (argv[argc - 1]);
 }
 
 static void
 parse_allow_args(int argc, char **argv, boolean_t un, struct allow_opts *opts)
 {
 	int uge_sum = opts->user + opts->group + opts->everyone;
 	int csuge_sum = opts->create + opts->set + uge_sum;
 	int ldcsuge_sum = csuge_sum + opts->local + opts->descend;
 	int all_sum = un ? ldcsuge_sum + opts->recursive : ldcsuge_sum;
 
 	if (uge_sum > 1)
 		allow_usage(un, B_FALSE,
 		    gettext("-u, -g, and -e are mutually exclusive\n"));
 
 	if (opts->prt_usage) {
 		if (argc == 0 && all_sum == 0)
 			allow_usage(un, B_TRUE, NULL);
 		else
 			usage(B_FALSE);
 	}
 
 	if (opts->set) {
 		if (csuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -s\n"));
 
 		opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
 		if (argv[0][0] != '@')
 			allow_usage(un, B_FALSE,
 			    gettext("invalid set name: missing '@' prefix\n"));
 		opts->who = argv[0];
 	} else if (opts->create) {
 		if (ldcsuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -c\n"));
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (opts->everyone) {
 		if (csuge_sum > 1)
 			allow_usage(un, B_FALSE,
 			    gettext("invalid options combined with -e\n"));
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (uge_sum == 0 && argc > 0 && strcmp(argv[0], "everyone")
 	    == 0) {
 		opts->everyone = B_TRUE;
 		argc--;
 		argv++;
 		opts->dataset = munge_args(argc, argv, un, 2, &opts->perms);
 	} else if (argc == 1 && !un) {
 		opts->prt_perms = B_TRUE;
 		opts->dataset = argv[argc-1];
 	} else {
 		opts->dataset = munge_args(argc, argv, un, 3, &opts->perms);
 		opts->who = argv[0];
 	}
 
 	if (!opts->local && !opts->descend) {
 		opts->local = B_TRUE;
 		opts->descend = B_TRUE;
 	}
 }
 
 static void
 store_allow_perm(zfs_deleg_who_type_t type, boolean_t local, boolean_t descend,
     const char *who, char *perms, nvlist_t *top_nvl)
 {
 	int i;
 	char ld[2] = { '\0', '\0' };
 	char who_buf[MAXNAMELEN + 32];
 	char base_type = '\0';
 	char set_type = '\0';
 	nvlist_t *base_nvl = NULL;
 	nvlist_t *set_nvl = NULL;
 	nvlist_t *nvl;
 
 	if (nvlist_alloc(&base_nvl, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 	if (nvlist_alloc(&set_nvl, NV_UNIQUE_NAME, 0) !=  0)
 		nomem();
 
 	switch (type) {
 	case ZFS_DELEG_NAMED_SET_SETS:
 	case ZFS_DELEG_NAMED_SET:
 		set_type = ZFS_DELEG_NAMED_SET_SETS;
 		base_type = ZFS_DELEG_NAMED_SET;
 		ld[0] = ZFS_DELEG_NA;
 		break;
 	case ZFS_DELEG_CREATE_SETS:
 	case ZFS_DELEG_CREATE:
 		set_type = ZFS_DELEG_CREATE_SETS;
 		base_type = ZFS_DELEG_CREATE;
 		ld[0] = ZFS_DELEG_NA;
 		break;
 	case ZFS_DELEG_USER_SETS:
 	case ZFS_DELEG_USER:
 		set_type = ZFS_DELEG_USER_SETS;
 		base_type = ZFS_DELEG_USER;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 		break;
 	case ZFS_DELEG_GROUP_SETS:
 	case ZFS_DELEG_GROUP:
 		set_type = ZFS_DELEG_GROUP_SETS;
 		base_type = ZFS_DELEG_GROUP;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 		break;
 	case ZFS_DELEG_EVERYONE_SETS:
 	case ZFS_DELEG_EVERYONE:
 		set_type = ZFS_DELEG_EVERYONE_SETS;
 		base_type = ZFS_DELEG_EVERYONE;
 		if (local)
 			ld[0] = ZFS_DELEG_LOCAL;
 		if (descend)
 			ld[1] = ZFS_DELEG_DESCENDENT;
 		break;
 
 	default:
 		assert(set_type != '\0' && base_type != '\0');
 	}
 
 	if (perms != NULL) {
 		char *curr = perms;
 		char *end = curr + strlen(perms);
 
 		while (curr < end) {
 			char *delim = strchr(curr, ',');
 			if (delim == NULL)
 				delim = end;
 			else
 				*delim = '\0';
 
 			if (curr[0] == '@')
 				nvl = set_nvl;
 			else
 				nvl = base_nvl;
 
 			(void) nvlist_add_boolean(nvl, curr);
 			if (delim != end)
 				*delim = ',';
 			curr = delim + 1;
 		}
 
 		for (i = 0; i < 2; i++) {
 			char locality = ld[i];
 			if (locality == 0)
 				continue;
 
 			if (!nvlist_empty(base_nvl)) {
 				if (who != NULL)
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$%s",
 					    base_type, locality, who);
 				else
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$",
 					    base_type, locality);
 
 				(void) nvlist_add_nvlist(top_nvl, who_buf,
 				    base_nvl);
 			}
 
 
 			if (!nvlist_empty(set_nvl)) {
 				if (who != NULL)
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$%s",
 					    set_type, locality, who);
 				else
 					(void) snprintf(who_buf,
 					    sizeof (who_buf), "%c%c$",
 					    set_type, locality);
 
 				(void) nvlist_add_nvlist(top_nvl, who_buf,
 				    set_nvl);
 			}
 		}
 	} else {
 		for (i = 0; i < 2; i++) {
 			char locality = ld[i];
 			if (locality == 0)
 				continue;
 
 			if (who != NULL)
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$%s", base_type, locality, who);
 			else
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$", base_type, locality);
 			(void) nvlist_add_boolean(top_nvl, who_buf);
 
 			if (who != NULL)
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$%s", set_type, locality, who);
 			else
 				(void) snprintf(who_buf, sizeof (who_buf),
 				    "%c%c$", set_type, locality);
 			(void) nvlist_add_boolean(top_nvl, who_buf);
 		}
 	}
 }
 
 static int
 construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp)
 {
 	if (nvlist_alloc(nvlp, NV_UNIQUE_NAME, 0) != 0)
 		nomem();
 
 	if (opts->set) {
 		store_allow_perm(ZFS_DELEG_NAMED_SET, opts->local,
 		    opts->descend, opts->who, opts->perms, *nvlp);
 	} else if (opts->create) {
 		store_allow_perm(ZFS_DELEG_CREATE, opts->local,
 		    opts->descend, NULL, opts->perms, *nvlp);
 	} else if (opts->everyone) {
 		store_allow_perm(ZFS_DELEG_EVERYONE, opts->local,
 		    opts->descend, NULL, opts->perms, *nvlp);
 	} else {
 		char *curr = opts->who;
 		char *end = curr + strlen(curr);
 
 		while (curr < end) {
 			const char *who;
 			zfs_deleg_who_type_t who_type = ZFS_DELEG_WHO_UNKNOWN;
 			char *endch;
 			char *delim = strchr(curr, ',');
 			char errbuf[256];
 			char id[64];
 			struct passwd *p = NULL;
 			struct group *g = NULL;
 
 			uid_t rid;
 			if (delim == NULL)
 				delim = end;
 			else
 				*delim = '\0';
 
 			rid = (uid_t)strtol(curr, &endch, 0);
 			if (opts->user) {
 				who_type = ZFS_DELEG_USER;
 				if (*endch != '\0')
 					p = getpwnam(curr);
 				else
 					p = getpwuid(rid);
 
 				if (p != NULL)
 					rid = p->pw_uid;
 				else if (*endch != '\0') {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid user %s\n"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else if (opts->group) {
 				who_type = ZFS_DELEG_GROUP;
 				if (*endch != '\0')
 					g = getgrnam(curr);
 				else
 					g = getgrgid(rid);
 
 				if (g != NULL)
 					rid = g->gr_gid;
 				else if (*endch != '\0') {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid group %s\n"),  curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			} else {
 				if (*endch != '\0') {
 					p = getpwnam(curr);
 				} else {
 					p = getpwuid(rid);
 				}
 
 				if (p == NULL) {
 					if (*endch != '\0') {
 						g = getgrnam(curr);
 					} else {
 						g = getgrgid(rid);
 					}
 				}
 
 				if (p != NULL) {
 					who_type = ZFS_DELEG_USER;
 					rid = p->pw_uid;
 				} else if (g != NULL) {
 					who_type = ZFS_DELEG_GROUP;
 					rid = g->gr_gid;
 				} else {
 					(void) snprintf(errbuf, 256, gettext(
 					    "invalid user/group %s\n"), curr);
 					allow_usage(un, B_TRUE, errbuf);
 				}
 			}
 
 			(void) sprintf(id, "%u", rid);
 			who = id;
 
 			store_allow_perm(who_type, opts->local,
 			    opts->descend, who, opts->perms, *nvlp);
 			curr = delim + 1;
 		}
 	}
 
 	return (0);
 }
 
 static void
 print_set_creat_perms(uu_avl_t *who_avl)
 {
 	const char *sc_title[] = {
 		gettext("Permission sets:\n"),
 		gettext("Create time permissions:\n"),
 		NULL
 	};
 	who_perm_node_t *who_node = NULL;
 	int prev_weight = -1;
 
 	for (who_node = uu_avl_first(who_avl); who_node != NULL;
 	    who_node = uu_avl_next(who_avl, who_node)) {
 		uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
 		zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
 		const char *who_name = who_node->who_perm.who_name;
 		int weight = who_type2weight(who_type);
 		boolean_t first = B_TRUE;
 		deleg_perm_node_t *deleg_node;
 
 		if (prev_weight != weight) {
 			(void) printf("%s", sc_title[weight]);
 			prev_weight = weight;
 		}
 
 		if (who_name == NULL || strnlen(who_name, 1) == 0)
 			(void) printf("\t");
 		else
 			(void) printf("\t%s ", who_name);
 
 		for (deleg_node = uu_avl_first(avl); deleg_node != NULL;
 		    deleg_node = uu_avl_next(avl, deleg_node)) {
 			if (first) {
 				(void) printf("%s",
 				    deleg_node->dpn_perm.dp_name);
 				first = B_FALSE;
 			} else
 				(void) printf(",%s",
 				    deleg_node->dpn_perm.dp_name);
 		}
 
 		(void) printf("\n");
 	}
 }
 
 static void
 print_uge_deleg_perms(uu_avl_t *who_avl, boolean_t local, boolean_t descend,
     const char *title)
 {
 	who_perm_node_t *who_node = NULL;
 	boolean_t prt_title = B_TRUE;
 	uu_avl_walk_t *walk;
 
 	if ((walk = uu_avl_walk_start(who_avl, UU_WALK_ROBUST)) == NULL)
 		nomem();
 
 	while ((who_node = uu_avl_walk_next(walk)) != NULL) {
 		const char *who_name = who_node->who_perm.who_name;
 		const char *nice_who_name = who_node->who_perm.who_ug_name;
 		uu_avl_t *avl = who_node->who_perm.who_deleg_perm_avl;
 		zfs_deleg_who_type_t who_type = who_node->who_perm.who_type;
 		char delim = ' ';
 		deleg_perm_node_t *deleg_node;
 		boolean_t prt_who = B_TRUE;
 
 		for (deleg_node = uu_avl_first(avl);
 		    deleg_node != NULL;
 		    deleg_node = uu_avl_next(avl, deleg_node)) {
 			if (local != deleg_node->dpn_perm.dp_local ||
 			    descend != deleg_node->dpn_perm.dp_descend)
 				continue;
 
 			if (prt_who) {
 				const char *who = NULL;
 				if (prt_title) {
 					prt_title = B_FALSE;
 					(void) printf("%s", title);
 				}
 
 				switch (who_type) {
 				case ZFS_DELEG_USER_SETS:
 				case ZFS_DELEG_USER:
 					who = gettext("user");
 					if (nice_who_name)
 						who_name  = nice_who_name;
 					break;
 				case ZFS_DELEG_GROUP_SETS:
 				case ZFS_DELEG_GROUP:
 					who = gettext("group");
 					if (nice_who_name)
 						who_name  = nice_who_name;
 					break;
 				case ZFS_DELEG_EVERYONE_SETS:
 				case ZFS_DELEG_EVERYONE:
 					who = gettext("everyone");
 					who_name = NULL;
 					break;
 
 				default:
 					assert(who != NULL);
 				}
 
 				prt_who = B_FALSE;
 				if (who_name == NULL)
 					(void) printf("\t%s", who);
 				else
 					(void) printf("\t%s %s", who, who_name);
 			}
 
 			(void) printf("%c%s", delim,
 			    deleg_node->dpn_perm.dp_name);
 			delim = ',';
 		}
 
 		if (!prt_who)
 			(void) printf("\n");
 	}
 
 	uu_avl_walk_end(walk);
 }
 
 static void
 print_fs_perms(fs_perm_set_t *fspset)
 {
 	fs_perm_node_t *node = NULL;
 	char buf[MAXNAMELEN + 32];
 	const char *dsname = buf;
 
 	for (node = uu_list_first(fspset->fsps_list); node != NULL;
 	    node = uu_list_next(fspset->fsps_list, node)) {
 		uu_avl_t *sc_avl = node->fspn_fsperm.fsp_sc_avl;
 		uu_avl_t *uge_avl = node->fspn_fsperm.fsp_uge_avl;
 		int left = 0;
 
 		(void) snprintf(buf, sizeof (buf),
 		    gettext("---- Permissions on %s "),
 		    node->fspn_fsperm.fsp_name);
 		(void) printf("%s", dsname);
 		left = 70 - strlen(buf);
 		while (left-- > 0)
 			(void) printf("-");
 		(void) printf("\n");
 
 		print_set_creat_perms(sc_avl);
 		print_uge_deleg_perms(uge_avl, B_TRUE, B_FALSE,
 		    gettext("Local permissions:\n"));
 		print_uge_deleg_perms(uge_avl, B_FALSE, B_TRUE,
 		    gettext("Descendent permissions:\n"));
 		print_uge_deleg_perms(uge_avl, B_TRUE, B_TRUE,
 		    gettext("Local+Descendent permissions:\n"));
 	}
 }
 
 static fs_perm_set_t fs_perm_set = { NULL, NULL, NULL, NULL };
 
 struct deleg_perms {
 	boolean_t un;
 	nvlist_t *nvl;
 };
 
 static int
 set_deleg_perms(zfs_handle_t *zhp, void *data)
 {
 	struct deleg_perms *perms = (struct deleg_perms *)data;
 	zfs_type_t zfs_type = zfs_get_type(zhp);
 
 	if (zfs_type != ZFS_TYPE_FILESYSTEM && zfs_type != ZFS_TYPE_VOLUME)
 		return (0);
 
 	return (zfs_set_fsacl(zhp, perms->un, perms->nvl));
 }
 
 static int
 zfs_do_allow_unallow_impl(int argc, char **argv, boolean_t un)
 {
 	zfs_handle_t *zhp;
 	nvlist_t *perm_nvl = NULL;
 	nvlist_t *update_perm_nvl = NULL;
 	int error = 1;
 	int c;
 	struct allow_opts opts = { 0 };
 
 	const char *optstr = un ? "ldugecsrh" : "ldugecsh";
 
 	/* check opts */
 	while ((c = getopt(argc, argv, optstr)) != -1) {
 		switch (c) {
 		case 'l':
 			opts.local = B_TRUE;
 			break;
 		case 'd':
 			opts.descend = B_TRUE;
 			break;
 		case 'u':
 			opts.user = B_TRUE;
 			break;
 		case 'g':
 			opts.group = B_TRUE;
 			break;
 		case 'e':
 			opts.everyone = B_TRUE;
 			break;
 		case 's':
 			opts.set = B_TRUE;
 			break;
 		case 'c':
 			opts.create = B_TRUE;
 			break;
 		case 'r':
 			opts.recursive = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case 'h':
 			opts.prt_usage = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check arguments */
 	parse_allow_args(argc, argv, un, &opts);
 
 	/* try to open the dataset */
 	if ((zhp = zfs_open(g_zfs, opts.dataset, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) == NULL) {
 		(void) fprintf(stderr, "Failed to open dataset: %s\n",
 		    opts.dataset);
 		return (-1);
 	}
 
 	if (zfs_get_fsacl(zhp, &perm_nvl) != 0)
 		goto cleanup2;
 
 	fs_perm_set_init(&fs_perm_set);
 	if (parse_fs_perm_set(&fs_perm_set, perm_nvl) != 0) {
 		(void) fprintf(stderr, "Failed to parse fsacl permissions\n");
 		goto cleanup1;
 	}
 
 	if (opts.prt_perms)
 		print_fs_perms(&fs_perm_set);
 	else {
 		(void) construct_fsacl_list(un, &opts, &update_perm_nvl);
 		if (zfs_set_fsacl(zhp, un, update_perm_nvl) != 0)
 			goto cleanup0;
 
 		if (un && opts.recursive) {
 			struct deleg_perms data = { un, update_perm_nvl };
 			if (zfs_iter_filesystems(zhp, set_deleg_perms,
 			    &data) != 0)
 				goto cleanup0;
 		}
 	}
 
 	error = 0;
 
 cleanup0:
 	nvlist_free(perm_nvl);
 	nvlist_free(update_perm_nvl);
 cleanup1:
 	fs_perm_set_fini(&fs_perm_set);
 cleanup2:
 	zfs_close(zhp);
 
 	return (error);
 }
 
 static int
 zfs_do_allow(int argc, char **argv)
 {
 	return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
 }
 
 static int
 zfs_do_unallow(int argc, char **argv)
 {
 	return (zfs_do_allow_unallow_impl(argc, argv, B_TRUE));
 }
 
 static int
 zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
 {
 	int errors = 0;
 	int i;
 	const char *tag;
 	boolean_t recursive = B_FALSE;
 	const char *opts = holding ? "rt" : "r";
 	int c;
 
 	/* check options */
 	while ((c = getopt(argc, argv, opts)) != -1) {
 		switch (c) {
 		case 'r':
 			recursive = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 2)
 		usage(B_FALSE);
 
 	tag = argv[0];
 	--argc;
 	++argv;
 
 	if (holding && tag[0] == '.') {
 		/* tags starting with '.' are reserved for libzfs */
 		(void) fprintf(stderr, gettext("tag may not start with '.'\n"));
 		usage(B_FALSE);
 	}
 
 	for (i = 0; i < argc; ++i) {
 		zfs_handle_t *zhp;
 		char parent[ZFS_MAX_DATASET_NAME_LEN];
 		const char *delim;
 		char *path = argv[i];
 
 		delim = strchr(path, '@');
 		if (delim == NULL) {
 			(void) fprintf(stderr,
 			    gettext("'%s' is not a snapshot\n"), path);
 			++errors;
 			continue;
 		}
-		(void) strncpy(parent, path, delim - path);
-		parent[delim - path] = '\0';
+		(void) strlcpy(parent, path, MIN(sizeof (parent),
+		    delim - path + 1));
 
 		zhp = zfs_open(g_zfs, parent,
 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 		if (zhp == NULL) {
 			++errors;
 			continue;
 		}
 		if (holding) {
 			if (zfs_hold(zhp, delim+1, tag, recursive, -1) != 0)
 				++errors;
 		} else {
 			if (zfs_release(zhp, delim+1, tag, recursive) != 0)
 				++errors;
 		}
 		zfs_close(zhp);
 	}
 
 	return (errors != 0);
 }
 
 /*
  * zfs hold [-r] [-t] <tag> <snap> ...
  *
  *	-r	Recursively hold
  *
  * Apply a user-hold with the given tag to the list of snapshots.
  */
 static int
 zfs_do_hold(int argc, char **argv)
 {
 	return (zfs_do_hold_rele_impl(argc, argv, B_TRUE));
 }
 
 /*
  * zfs release [-r] <tag> <snap> ...
  *
  *	-r	Recursively release
  *
  * Release a user-hold with the given tag from the list of snapshots.
  */
 static int
 zfs_do_release(int argc, char **argv)
 {
 	return (zfs_do_hold_rele_impl(argc, argv, B_FALSE));
 }
 
 typedef struct holds_cbdata {
 	boolean_t	cb_recursive;
 	const char	*cb_snapname;
 	nvlist_t	**cb_nvlp;
 	size_t		cb_max_namelen;
 	size_t		cb_max_taglen;
 } holds_cbdata_t;
 
 #define	STRFTIME_FMT_STR "%a %b %e %H:%M %Y"
 #define	DATETIME_BUF_LEN (32)
 /*
  *
  */
 static void
 print_holds(boolean_t scripted, int nwidth, int tagwidth, nvlist_t *nvl)
 {
 	int i;
 	nvpair_t *nvp = NULL;
 	const char *const hdr_cols[] = { "NAME", "TAG", "TIMESTAMP" };
 	const char *col;
 
 	if (!scripted) {
 		for (i = 0; i < 3; i++) {
 			col = gettext(hdr_cols[i]);
 			if (i < 2)
 				(void) printf("%-*s  ", i ? tagwidth : nwidth,
 				    col);
 			else
 				(void) printf("%s\n", col);
 		}
 	}
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		char *zname = nvpair_name(nvp);
 		nvlist_t *nvl2;
 		nvpair_t *nvp2 = NULL;
 		(void) nvpair_value_nvlist(nvp, &nvl2);
 		while ((nvp2 = nvlist_next_nvpair(nvl2, nvp2)) != NULL) {
 			char tsbuf[DATETIME_BUF_LEN];
 			const char *tagname = nvpair_name(nvp2);
 			uint64_t val = 0;
 			time_t time;
 			struct tm t;
 
 			(void) nvpair_value_uint64(nvp2, &val);
 			time = (time_t)val;
 			(void) localtime_r(&time, &t);
 			(void) strftime(tsbuf, DATETIME_BUF_LEN,
 			    gettext(STRFTIME_FMT_STR), &t);
 
 			if (scripted) {
 				(void) printf("%s\t%s\t%s\n", zname,
 				    tagname, tsbuf);
 			} else {
 				(void) printf("%-*s  %-*s  %s\n", nwidth,
 				    zname, tagwidth, tagname, tsbuf);
 			}
 		}
 	}
 }
 
 /*
  * Generic callback function to list a dataset or snapshot.
  */
 static int
 holds_callback(zfs_handle_t *zhp, void *data)
 {
 	holds_cbdata_t *cbp = data;
 	nvlist_t *top_nvl = *cbp->cb_nvlp;
 	nvlist_t *nvl = NULL;
 	nvpair_t *nvp = NULL;
 	const char *zname = zfs_get_name(zhp);
 	size_t znamelen = strlen(zname);
 
 	if (cbp->cb_recursive) {
 		const char *snapname;
 		char *delim  = strchr(zname, '@');
 		if (delim == NULL)
 			return (0);
 
 		snapname = delim + 1;
 		if (strcmp(cbp->cb_snapname, snapname))
 			return (0);
 	}
 
 	if (zfs_get_holds(zhp, &nvl) != 0)
 		return (-1);
 
 	if (znamelen > cbp->cb_max_namelen)
 		cbp->cb_max_namelen  = znamelen;
 
 	while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
 		const char *tag = nvpair_name(nvp);
 		size_t taglen = strlen(tag);
 		if (taglen > cbp->cb_max_taglen)
 			cbp->cb_max_taglen  = taglen;
 	}
 
 	return (nvlist_add_nvlist(top_nvl, zname, nvl));
 }
 
 /*
  * zfs holds [-rH] <snap> ...
  *
  *	-r	Lists holds that are set on the named snapshots recursively.
  *	-H	Scripted mode; elide headers and separate columns by tabs.
  */
 static int
 zfs_do_holds(int argc, char **argv)
 {
 	int c;
 	boolean_t errors = B_FALSE;
 	boolean_t scripted = B_FALSE;
 	boolean_t recursive = B_FALSE;
 
 	int types = ZFS_TYPE_SNAPSHOT;
 	holds_cbdata_t cb = { 0 };
 
 	int limit = 0;
 	int ret = 0;
 	int flags = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "rH")) != -1) {
 		switch (c) {
 		case 'r':
 			recursive = B_TRUE;
 			break;
 		case 'H':
 			scripted = B_TRUE;
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	if (recursive) {
 		types |= ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME;
 		flags |= ZFS_ITER_RECURSE;
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1)
 		usage(B_FALSE);
 
 	nvlist_t *nvl = fnvlist_alloc();
 
 	for (int i = 0; i < argc; ++i) {
 		char *snapshot = argv[i];
 		const char *delim;
 		const char *snapname;
 
 		delim = strchr(snapshot, '@');
 		if (delim == NULL) {
 			(void) fprintf(stderr,
 			    gettext("'%s' is not a snapshot\n"), snapshot);
 			errors = B_TRUE;
 			continue;
 		}
 		snapname = delim + 1;
 		if (recursive)
 			snapshot[delim - snapshot] = '\0';
 
 		cb.cb_recursive = recursive;
 		cb.cb_snapname = snapname;
 		cb.cb_nvlp = &nvl;
 
 		/*
 		 *  1. collect holds data, set format options
 		 */
 		ret = zfs_for_each(1, argv + i, flags, types, NULL, NULL, limit,
 		    holds_callback, &cb);
 		if (ret != 0)
 			errors = B_TRUE;
 	}
 
 	/*
 	 *  2. print holds data
 	 */
 	print_holds(scripted, cb.cb_max_namelen, cb.cb_max_taglen, nvl);
 
 	if (nvlist_empty(nvl))
 		(void) fprintf(stderr, gettext("no datasets available\n"));
 
 	nvlist_free(nvl);
 
 	return (errors);
 }
 
 #define	CHECK_SPINNER 30
 #define	SPINNER_TIME 3		/* seconds */
 #define	MOUNT_TIME 1		/* seconds */
 
 typedef struct get_all_state {
 	boolean_t	ga_verbose;
 	get_all_cb_t	*ga_cbp;
 } get_all_state_t;
 
 static int
 get_one_dataset(zfs_handle_t *zhp, void *data)
 {
 	static const char *const spin[] = { "-", "\\", "|", "/" };
 	static int spinval = 0;
 	static int spincheck = 0;
 	static time_t last_spin_time = (time_t)0;
 	get_all_state_t *state = data;
 	zfs_type_t type = zfs_get_type(zhp);
 
 	if (state->ga_verbose) {
 		if (--spincheck < 0) {
 			time_t now = time(NULL);
 			if (last_spin_time + SPINNER_TIME < now) {
 				update_progress(spin[spinval++ % 4]);
 				last_spin_time = now;
 			}
 			spincheck = CHECK_SPINNER;
 		}
 	}
 
 	/*
 	 * Iterate over any nested datasets.
 	 */
 	if (zfs_iter_filesystems(zhp, get_one_dataset, data) != 0) {
 		zfs_close(zhp);
 		return (1);
 	}
 
 	/*
 	 * Skip any datasets whose type does not match.
 	 */
 	if ((type & ZFS_TYPE_FILESYSTEM) == 0) {
 		zfs_close(zhp);
 		return (0);
 	}
 	libzfs_add_handle(state->ga_cbp, zhp);
 	assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc);
 
 	return (0);
 }
 
 static void
 get_all_datasets(get_all_cb_t *cbp, boolean_t verbose)
 {
 	get_all_state_t state = {
 	    .ga_verbose = verbose,
 	    .ga_cbp = cbp
 	};
 
 	if (verbose)
 		set_progress_header(gettext("Reading ZFS config"));
 	(void) zfs_iter_root(g_zfs, get_one_dataset, &state);
 
 	if (verbose)
 		finish_progress(gettext("done."));
 }
 
 /*
  * Generic callback for sharing or mounting filesystems.  Because the code is so
  * similar, we have a common function with an extra parameter to determine which
  * mode we are using.
  */
 typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t;
 
 typedef struct share_mount_state {
 	share_mount_op_t	sm_op;
 	boolean_t	sm_verbose;
 	int	sm_flags;
 	char	*sm_options;
 	enum sa_protocol	sm_proto; /* only valid for OP_SHARE */
 	pthread_mutex_t	sm_lock; /* protects the remaining fields */
 	uint_t	sm_total; /* number of filesystems to process */
 	uint_t	sm_done; /* number of filesystems processed */
 	int	sm_status; /* -1 if any of the share/mount operations failed */
 } share_mount_state_t;
 
 /*
  * Share or mount a dataset.
  */
 static int
 share_mount_one(zfs_handle_t *zhp, int op, int flags, enum sa_protocol protocol,
     boolean_t explicit, const char *options)
 {
 	char mountpoint[ZFS_MAXPROPLEN];
 	char shareopts[ZFS_MAXPROPLEN];
 	char smbshareopts[ZFS_MAXPROPLEN];
 	const char *cmdname = op == OP_SHARE ? "share" : "mount";
 	struct mnttab mnt;
 	uint64_t zoned, canmount;
 	boolean_t shared_nfs, shared_smb;
 
 	assert(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM);
 
 	/*
 	 * Check to make sure we can mount/share this dataset.  If we
 	 * are in the global zone and the filesystem is exported to a
 	 * local zone, or if we are in a local zone and the
 	 * filesystem is not exported, then it is an error.
 	 */
 	zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
 
 	if (zoned && getzoneid() == GLOBAL_ZONEID) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "dataset is exported to a local zone\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 
 	} else if (!zoned && getzoneid() != GLOBAL_ZONEID) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "permission denied\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * Ignore any filesystems which don't apply to us. This
 	 * includes those with a legacy mountpoint, or those with
 	 * legacy share options.
 	 */
 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mountpoint,
 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
 	verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, shareopts,
 	    sizeof (shareopts), NULL, NULL, 0, B_FALSE) == 0);
 	verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshareopts,
 	    sizeof (smbshareopts), NULL, NULL, 0, B_FALSE) == 0);
 
 	if (op == OP_SHARE && strcmp(shareopts, "off") == 0 &&
 	    strcmp(smbshareopts, "off") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot share '%s': "
 		    "legacy share\n"), zfs_get_name(zhp));
 		(void) fprintf(stderr, gettext("use exports(5) or "
 		    "smb.conf(5) to share this filesystem, or set "
 		    "the sharenfs or sharesmb property\n"));
 		return (1);
 	}
 
 	/*
 	 * We cannot share or mount legacy filesystems. If the
 	 * shareopts is non-legacy but the mountpoint is legacy, we
 	 * treat it as a legacy share.
 	 */
 	if (strcmp(mountpoint, "legacy") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "legacy mountpoint\n"), cmdname, zfs_get_name(zhp));
 		(void) fprintf(stderr, gettext("use %s(8) to "
 		    "%s this filesystem\n"), cmdname, cmdname);
 		return (1);
 	}
 
 	if (strcmp(mountpoint, "none") == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': no "
 		    "mountpoint set\n"), cmdname, zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * canmount	explicit	outcome
 	 * on		no		pass through
 	 * on		yes		pass through
 	 * off		no		return 0
 	 * off		yes		display error, return 1
 	 * noauto	no		return 0
 	 * noauto	yes		pass through
 	 */
 	canmount = zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT);
 	if (canmount == ZFS_CANMOUNT_OFF) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "'canmount' property is set to 'off'\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 	} else if (canmount == ZFS_CANMOUNT_NOAUTO && !explicit) {
 		/*
 		 * When performing a 'zfs mount -a', we skip any mounts for
 		 * datasets that have 'noauto' set. Sharing a dataset with
 		 * 'noauto' set is only allowed if it's mounted.
 		 */
 		if (op == OP_MOUNT)
 			return (0);
 		if (op == OP_SHARE && !zfs_is_mounted(zhp, NULL)) {
 			/* also purge it from existing exports */
 			zfs_unshare(zhp, mountpoint, NULL);
 			return (0);
 		}
 	}
 
 	/*
 	 * If this filesystem is encrypted and does not have
 	 * a loaded key, we can not mount it.
 	 */
 	if ((flags & MS_CRYPT) == 0 &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
 	    ZFS_KEYSTATUS_UNAVAILABLE) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "encryption key not loaded\n"), cmdname, zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * If this filesystem is inconsistent and has a receive resume
 	 * token, we can not mount it.
 	 */
 	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
 	    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
 	    NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "Contains partially-completed state from "
 		    "\"zfs receive -s\", which can be resumed with "
 		    "\"zfs send -t\"\n"),
 		    cmdname, zfs_get_name(zhp));
 		return (1);
 	}
 
 	if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE)) {
 		if (!explicit)
 			return (0);
 
 		(void) fprintf(stderr, gettext("cannot %s '%s': "
 		    "Dataset is not complete, was created by receiving "
 		    "a redacted zfs send stream.\n"), cmdname,
 		    zfs_get_name(zhp));
 		return (1);
 	}
 
 	/*
 	 * At this point, we have verified that the mountpoint and/or
 	 * shareopts are appropriate for auto management. If the
 	 * filesystem is already mounted or shared, return (failing
 	 * for explicit requests); otherwise mount or share the
 	 * filesystem.
 	 */
 	switch (op) {
 	case OP_SHARE: {
 		enum sa_protocol prot[] = {SA_PROTOCOL_NFS, SA_NO_PROTOCOL};
 		shared_nfs = zfs_is_shared(zhp, NULL, prot);
 		*prot = SA_PROTOCOL_SMB;
 		shared_smb = zfs_is_shared(zhp, NULL, prot);
 
 		if ((shared_nfs && shared_smb) ||
 		    (shared_nfs && strcmp(shareopts, "on") == 0 &&
 		    strcmp(smbshareopts, "off") == 0) ||
 		    (shared_smb && strcmp(smbshareopts, "on") == 0 &&
 		    strcmp(shareopts, "off") == 0)) {
 			if (!explicit)
 				return (0);
 
 			(void) fprintf(stderr, gettext("cannot share "
 			    "'%s': filesystem already shared\n"),
 			    zfs_get_name(zhp));
 			return (1);
 		}
 
 		if (!zfs_is_mounted(zhp, NULL) &&
 		    zfs_mount(zhp, NULL, flags) != 0)
 			return (1);
 
 		*prot = protocol;
 		if (zfs_share(zhp, protocol == SA_NO_PROTOCOL ? NULL : prot))
 			return (1);
 
 	}
 		break;
 
 	case OP_MOUNT:
 		mnt.mnt_mntopts = (char *)(options ?: "");
 
 		if (!hasmntopt(&mnt, MNTOPT_REMOUNT) &&
 		    zfs_is_mounted(zhp, NULL)) {
 			if (!explicit)
 				return (0);
 
 			(void) fprintf(stderr, gettext("cannot mount "
 			    "'%s': filesystem already mounted\n"),
 			    zfs_get_name(zhp));
 			return (1);
 		}
 
 		if (zfs_mount(zhp, options, flags) != 0)
 			return (1);
 		break;
 	}
 
 	return (0);
 }
 
 /*
  * Reports progress in the form "(current/total)".  Not thread-safe.
  */
 static void
 report_mount_progress(int current, int total)
 {
 	static time_t last_progress_time = 0;
 	time_t now = time(NULL);
 	char info[32];
 
 	/* display header if we're here for the first time */
 	if (current == 1) {
 		set_progress_header(gettext("Mounting ZFS filesystems"));
 	} else if (current != total && last_progress_time + MOUNT_TIME >= now) {
 		/* too soon to report again */
 		return;
 	}
 
 	last_progress_time = now;
 
 	(void) sprintf(info, "(%d/%d)", current, total);
 
 	if (current == total)
 		finish_progress(info);
 	else
 		update_progress(info);
 }
 
 /*
  * zfs_foreach_mountpoint() callback that mounts or shares one filesystem and
  * updates the progress meter.
  */
 static int
 share_mount_one_cb(zfs_handle_t *zhp, void *arg)
 {
 	share_mount_state_t *sms = arg;
 	int ret;
 
 	ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto,
 	    B_FALSE, sms->sm_options);
 
 	pthread_mutex_lock(&sms->sm_lock);
 	if (ret != 0)
 		sms->sm_status = ret;
 	sms->sm_done++;
 	if (sms->sm_verbose)
 		report_mount_progress(sms->sm_done, sms->sm_total);
 	pthread_mutex_unlock(&sms->sm_lock);
 	return (ret);
 }
 
 static void
 append_options(char *mntopts, char *newopts)
 {
 	int len = strlen(mntopts);
 
 	/* original length plus new string to append plus 1 for the comma */
 	if (len + 1 + strlen(newopts) >= MNT_LINE_MAX) {
 		(void) fprintf(stderr, gettext("the opts argument for "
 		    "'%s' option is too long (more than %d chars)\n"),
 		    "-o", MNT_LINE_MAX);
 		usage(B_FALSE);
 	}
 
 	if (*mntopts)
 		mntopts[len++] = ',';
 
 	(void) strcpy(&mntopts[len], newopts);
 }
 
 static enum sa_protocol
 sa_protocol_decode(const char *protocol)
 {
 	for (enum sa_protocol i = 0; i < ARRAY_SIZE(sa_protocol_names); ++i)
 		if (strcmp(protocol, sa_protocol_names[i]) == 0)
 			return (i);
 
 	(void) fputs(gettext("share type must be one of: "), stderr);
 	for (enum sa_protocol i = 0;
 	    i < ARRAY_SIZE(sa_protocol_names); ++i)
 		(void) fprintf(stderr, "%s%s",
 		    i != 0 ? ", " : "", sa_protocol_names[i]);
 	(void) fputc('\n', stderr);
 	usage(B_FALSE);
 }
 
 static int
 share_mount(int op, int argc, char **argv)
 {
 	int do_all = 0;
 	boolean_t verbose = B_FALSE;
 	int c, ret = 0;
 	char *options = NULL;
 	int flags = 0;
 
 	/* check options */
 	while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:Of" : "al"))
 	    != -1) {
 		switch (c) {
 		case 'a':
 			do_all = 1;
 			break;
 		case 'v':
 			verbose = B_TRUE;
 			break;
 		case 'l':
 			flags |= MS_CRYPT;
 			break;
 		case 'o':
 			if (*optarg == '\0') {
 				(void) fprintf(stderr, gettext("empty mount "
 				    "options (-o) specified\n"));
 				usage(B_FALSE);
 			}
 
 			if (options == NULL)
 				options = safe_malloc(MNT_LINE_MAX + 1);
 
 			/* option validation is done later */
 			append_options(options, optarg);
 			break;
 		case 'O':
 			flags |= MS_OVERLAY;
 			break;
 		case 'f':
 			flags |= MS_FORCE;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (do_all) {
 		enum sa_protocol protocol = SA_NO_PROTOCOL;
 
 		if (op == OP_SHARE && argc > 0) {
 			protocol = sa_protocol_decode(argv[0]);
 			argc--;
 			argv++;
 		}
 
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		start_progress_timer();
 		get_all_cb_t cb = { 0 };
 		get_all_datasets(&cb, verbose);
 
 		if (cb.cb_used == 0) {
 			free(options);
 			return (0);
 		}
 
 		share_mount_state_t share_mount_state = { 0 };
 		share_mount_state.sm_op = op;
 		share_mount_state.sm_verbose = verbose;
 		share_mount_state.sm_flags = flags;
 		share_mount_state.sm_options = options;
 		share_mount_state.sm_proto = protocol;
 		share_mount_state.sm_total = cb.cb_used;
 		pthread_mutex_init(&share_mount_state.sm_lock, NULL);
 
 		/* For a 'zfs share -a' operation start with a clean slate. */
 		zfs_truncate_shares(NULL);
 
 		/*
 		 * libshare isn't mt-safe, so only do the operation in parallel
 		 * if we're mounting. Additionally, the key-loading option must
 		 * be serialized so that we can prompt the user for their keys
 		 * in a consistent manner.
 		 */
 		zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
 		    share_mount_one_cb, &share_mount_state,
 		    op == OP_MOUNT && !(flags & MS_CRYPT));
 		zfs_commit_shares(NULL);
 
 		ret = share_mount_state.sm_status;
 
 		for (int i = 0; i < cb.cb_used; i++)
 			zfs_close(cb.cb_handles[i]);
 		free(cb.cb_handles);
 	} else if (argc == 0) {
 		FILE *mnttab;
 		struct mnttab entry;
 
 		if ((op == OP_SHARE) || (options != NULL)) {
 			(void) fprintf(stderr, gettext("missing filesystem "
 			    "argument (specify -a for all)\n"));
 			usage(B_FALSE);
 		}
 
 		/*
 		 * When mount is given no arguments, go through
 		 * /proc/self/mounts and display any active ZFS mounts.
 		 * We hide any snapshots, since they are controlled
 		 * automatically.
 		 */
 
 		if ((mnttab = fopen(MNTTAB, "re")) == NULL) {
 			free(options);
 			return (ENOENT);
 		}
 
 		while (getmntent(mnttab, &entry) == 0) {
 			if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0 ||
 			    strchr(entry.mnt_special, '@') != NULL)
 				continue;
 
 			(void) printf("%-30s  %s\n", entry.mnt_special,
 			    entry.mnt_mountp);
 		}
 
 		(void) fclose(mnttab);
 	} else {
 		zfs_handle_t *zhp;
 
 		if (argc > 1) {
 			(void) fprintf(stderr,
 			    gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		if ((zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM)) == NULL) {
 			ret = 1;
 		} else {
 			ret = share_mount_one(zhp, op, flags, SA_NO_PROTOCOL,
 			    B_TRUE, options);
 			zfs_commit_shares(NULL);
 			zfs_close(zhp);
 		}
 	}
 
 	free(options);
 	return (ret);
 }
 
 /*
  * zfs mount -a
  * zfs mount filesystem
  *
  * Mount all filesystems, or mount the given filesystem.
  */
 static int
 zfs_do_mount(int argc, char **argv)
 {
 	return (share_mount(OP_MOUNT, argc, argv));
 }
 
 /*
  * zfs share -a [nfs | smb]
  * zfs share filesystem
  *
  * Share all filesystems, or share the given filesystem.
  */
 static int
 zfs_do_share(int argc, char **argv)
 {
 	return (share_mount(OP_SHARE, argc, argv));
 }
 
 typedef struct unshare_unmount_node {
 	zfs_handle_t	*un_zhp;
 	char		*un_mountp;
 	uu_avl_node_t	un_avlnode;
 } unshare_unmount_node_t;
 
 static int
 unshare_unmount_compare(const void *larg, const void *rarg, void *unused)
 {
 	(void) unused;
 	const unshare_unmount_node_t *l = larg;
 	const unshare_unmount_node_t *r = rarg;
 
 	return (strcmp(l->un_mountp, r->un_mountp));
 }
 
 /*
  * Convenience routine used by zfs_do_umount() and manual_unmount().  Given an
  * absolute path, find the entry /proc/self/mounts, verify that it's a
  * ZFS filesystem, and unmount it appropriately.
  */
 static int
 unshare_unmount_path(int op, char *path, int flags, boolean_t is_manual)
 {
 	zfs_handle_t *zhp;
 	int ret = 0;
 	struct stat64 statbuf;
 	struct extmnttab entry;
 	const char *cmdname = (op == OP_SHARE) ? "unshare" : "unmount";
 	ino_t path_inode;
 
 	/*
 	 * Search for the given (major,minor) pair in the mount table.
 	 */
 
 	if (getextmntent(path, &entry, &statbuf) != 0) {
 		if (op == OP_SHARE) {
 			(void) fprintf(stderr, gettext("cannot %s '%s': not "
 			    "currently mounted\n"), cmdname, path);
 			return (1);
 		}
 		(void) fprintf(stderr, gettext("warning: %s not in"
 		    "/proc/self/mounts\n"), path);
 		if ((ret = umount2(path, flags)) != 0)
 			(void) fprintf(stderr, gettext("%s: %s\n"), path,
 			    strerror(errno));
 		return (ret != 0);
 	}
 	path_inode = statbuf.st_ino;
 
 	if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': not a ZFS "
 		    "filesystem\n"), cmdname, path);
 		return (1);
 	}
 
 	if ((zhp = zfs_open(g_zfs, entry.mnt_special,
 	    ZFS_TYPE_FILESYSTEM)) == NULL)
 		return (1);
 
 	ret = 1;
 	if (stat64(entry.mnt_mountp, &statbuf) != 0) {
 		(void) fprintf(stderr, gettext("cannot %s '%s': %s\n"),
 		    cmdname, path, strerror(errno));
 		goto out;
 	} else if (statbuf.st_ino != path_inode) {
 		(void) fprintf(stderr, gettext("cannot "
 		    "%s '%s': not a mountpoint\n"), cmdname, path);
 		goto out;
 	}
 
 	if (op == OP_SHARE) {
 		char nfs_mnt_prop[ZFS_MAXPROPLEN];
 		char smbshare_prop[ZFS_MAXPROPLEN];
 
 		verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS, nfs_mnt_prop,
 		    sizeof (nfs_mnt_prop), NULL, NULL, 0, B_FALSE) == 0);
 		verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB, smbshare_prop,
 		    sizeof (smbshare_prop), NULL, NULL, 0, B_FALSE) == 0);
 
 		if (strcmp(nfs_mnt_prop, "off") == 0 &&
 		    strcmp(smbshare_prop, "off") == 0) {
 			(void) fprintf(stderr, gettext("cannot unshare "
 			    "'%s': legacy share\n"), path);
 			(void) fprintf(stderr, gettext("use exportfs(8) "
 			    "or smbcontrol(1) to unshare this filesystem\n"));
 		} else if (!zfs_is_shared(zhp, NULL, NULL)) {
 			(void) fprintf(stderr, gettext("cannot unshare '%s': "
 			    "not currently shared\n"), path);
 		} else {
 			ret = zfs_unshare(zhp, path, NULL);
 			zfs_commit_shares(NULL);
 		}
 	} else {
 		char mtpt_prop[ZFS_MAXPROPLEN];
 
 		verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mtpt_prop,
 		    sizeof (mtpt_prop), NULL, NULL, 0, B_FALSE) == 0);
 
 		if (is_manual) {
 			ret = zfs_unmount(zhp, NULL, flags);
 		} else if (strcmp(mtpt_prop, "legacy") == 0) {
 			(void) fprintf(stderr, gettext("cannot unmount "
 			    "'%s': legacy mountpoint\n"),
 			    zfs_get_name(zhp));
 			(void) fprintf(stderr, gettext("use umount(8) "
 			    "to unmount this filesystem\n"));
 		} else {
 			ret = zfs_unmountall(zhp, flags);
 		}
 	}
 
 out:
 	zfs_close(zhp);
 
 	return (ret != 0);
 }
 
 /*
  * Generic callback for unsharing or unmounting a filesystem.
  */
 static int
 unshare_unmount(int op, int argc, char **argv)
 {
 	int do_all = 0;
 	int flags = 0;
 	int ret = 0;
 	int c;
 	zfs_handle_t *zhp;
 	char nfs_mnt_prop[ZFS_MAXPROPLEN];
 	char sharesmb[ZFS_MAXPROPLEN];
 
 	/* check options */
 	while ((c = getopt(argc, argv, op == OP_SHARE ? ":a" : "afu")) != -1) {
 		switch (c) {
 		case 'a':
 			do_all = 1;
 			break;
 		case 'f':
 			flags |= MS_FORCE;
 			break;
 		case 'u':
 			flags |= MS_CRYPT;
 			break;
 		case ':':
 			(void) fprintf(stderr, gettext("missing argument for "
 			    "'%c' option\n"), optopt);
 			usage(B_FALSE);
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (do_all) {
 		/*
 		 * We could make use of zfs_for_each() to walk all datasets in
 		 * the system, but this would be very inefficient, especially
 		 * since we would have to linearly search /proc/self/mounts for
 		 * each one. Instead, do one pass through /proc/self/mounts
 		 * looking for zfs entries and call zfs_unmount() for each one.
 		 *
 		 * Things get a little tricky if the administrator has created
 		 * mountpoints beneath other ZFS filesystems.  In this case, we
 		 * have to unmount the deepest filesystems first.  To accomplish
 		 * this, we place all the mountpoints in an AVL tree sorted by
 		 * the special type (dataset name), and walk the result in
 		 * reverse to make sure to get any snapshots first.
 		 */
 		FILE *mnttab;
 		struct mnttab entry;
 		uu_avl_pool_t *pool;
 		uu_avl_t *tree = NULL;
 		unshare_unmount_node_t *node;
 		uu_avl_index_t idx;
 		uu_avl_walk_t *walk;
 		enum sa_protocol *protocol = NULL,
 		    single_protocol[] = {SA_NO_PROTOCOL, SA_NO_PROTOCOL};
 
 		if (op == OP_SHARE && argc > 0) {
 			*single_protocol = sa_protocol_decode(argv[0]);
 			protocol = single_protocol;
 			argc--;
 			argv++;
 		}
 
 		if (argc != 0) {
 			(void) fprintf(stderr, gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		if (((pool = uu_avl_pool_create("unmount_pool",
 		    sizeof (unshare_unmount_node_t),
 		    offsetof(unshare_unmount_node_t, un_avlnode),
 		    unshare_unmount_compare, UU_DEFAULT)) == NULL) ||
 		    ((tree = uu_avl_create(pool, NULL, UU_DEFAULT)) == NULL))
 			nomem();
 
 		if ((mnttab = fopen(MNTTAB, "re")) == NULL) {
 			uu_avl_destroy(tree);
 			uu_avl_pool_destroy(pool);
 			return (ENOENT);
 		}
 
 		while (getmntent(mnttab, &entry) == 0) {
 
 			/* ignore non-ZFS entries */
 			if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
 				continue;
 
 			/* ignore snapshots */
 			if (strchr(entry.mnt_special, '@') != NULL)
 				continue;
 
 			if ((zhp = zfs_open(g_zfs, entry.mnt_special,
 			    ZFS_TYPE_FILESYSTEM)) == NULL) {
 				ret = 1;
 				continue;
 			}
 
 			/*
 			 * Ignore datasets that are excluded/restricted by
 			 * parent pool name.
 			 */
 			if (zpool_skip_pool(zfs_get_pool_name(zhp))) {
 				zfs_close(zhp);
 				continue;
 			}
 
 			switch (op) {
 			case OP_SHARE:
 				verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "off") != 0)
 					break;
 				verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "off") == 0)
 					continue;
 				break;
 			case OP_MOUNT:
 				/* Ignore legacy mounts */
 				verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT,
 				    nfs_mnt_prop,
 				    sizeof (nfs_mnt_prop),
 				    NULL, NULL, 0, B_FALSE) == 0);
 				if (strcmp(nfs_mnt_prop, "legacy") == 0)
 					continue;
 				/* Ignore canmount=noauto mounts */
 				if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) ==
 				    ZFS_CANMOUNT_NOAUTO)
 					continue;
 				break;
 			default:
 				break;
 			}
 
 			node = safe_malloc(sizeof (unshare_unmount_node_t));
 			node->un_zhp = zhp;
 			node->un_mountp = safe_strdup(entry.mnt_mountp);
 
 			uu_avl_node_init(node, &node->un_avlnode, pool);
 
 			if (uu_avl_find(tree, node, NULL, &idx) == NULL) {
 				uu_avl_insert(tree, node, idx);
 			} else {
 				zfs_close(node->un_zhp);
 				free(node->un_mountp);
 				free(node);
 			}
 		}
 		(void) fclose(mnttab);
 
 		/*
 		 * Walk the AVL tree in reverse, unmounting each filesystem and
 		 * removing it from the AVL tree in the process.
 		 */
 		if ((walk = uu_avl_walk_start(tree,
 		    UU_WALK_REVERSE | UU_WALK_ROBUST)) == NULL)
 			nomem();
 
 		while ((node = uu_avl_walk_next(walk)) != NULL) {
 			const char *mntarg = NULL;
 
 			uu_avl_remove(tree, node);
 			switch (op) {
 			case OP_SHARE:
 				if (zfs_unshare(node->un_zhp,
 				    node->un_mountp, protocol) != 0)
 					ret = 1;
 				break;
 
 			case OP_MOUNT:
 				if (zfs_unmount(node->un_zhp,
 				    mntarg, flags) != 0)
 					ret = 1;
 				break;
 			}
 
 			zfs_close(node->un_zhp);
 			free(node->un_mountp);
 			free(node);
 		}
 
 		if (op == OP_SHARE)
 			zfs_commit_shares(protocol);
 
 		uu_avl_walk_end(walk);
 		uu_avl_destroy(tree);
 		uu_avl_pool_destroy(pool);
 
 	} else {
 		if (argc != 1) {
 			if (argc == 0)
 				(void) fprintf(stderr,
 				    gettext("missing filesystem argument\n"));
 			else
 				(void) fprintf(stderr,
 				    gettext("too many arguments\n"));
 			usage(B_FALSE);
 		}
 
 		/*
 		 * We have an argument, but it may be a full path or a ZFS
 		 * filesystem.  Pass full paths off to unmount_path() (shared by
 		 * manual_unmount), otherwise open the filesystem and pass to
 		 * zfs_unmount().
 		 */
 		if (argv[0][0] == '/')
 			return (unshare_unmount_path(op, argv[0],
 			    flags, B_FALSE));
 
 		if ((zhp = zfs_open(g_zfs, argv[0],
 		    ZFS_TYPE_FILESYSTEM)) == NULL)
 			return (1);
 
 		verify(zfs_prop_get(zhp, op == OP_SHARE ?
 		    ZFS_PROP_SHARENFS : ZFS_PROP_MOUNTPOINT,
 		    nfs_mnt_prop, sizeof (nfs_mnt_prop), NULL,
 		    NULL, 0, B_FALSE) == 0);
 
 		switch (op) {
 		case OP_SHARE:
 			verify(zfs_prop_get(zhp, ZFS_PROP_SHARENFS,
 			    nfs_mnt_prop,
 			    sizeof (nfs_mnt_prop),
 			    NULL, NULL, 0, B_FALSE) == 0);
 			verify(zfs_prop_get(zhp, ZFS_PROP_SHARESMB,
 			    sharesmb, sizeof (sharesmb), NULL, NULL,
 			    0, B_FALSE) == 0);
 
 			if (strcmp(nfs_mnt_prop, "off") == 0 &&
 			    strcmp(sharesmb, "off") == 0) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unshare '%s': legacy share\n"),
 				    zfs_get_name(zhp));
 				(void) fprintf(stderr, gettext("use "
 				    "exports(5) or smb.conf(5) to unshare "
 				    "this filesystem\n"));
 				ret = 1;
 			} else if (!zfs_is_shared(zhp, NULL, NULL)) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unshare '%s': not currently "
 				    "shared\n"), zfs_get_name(zhp));
 				ret = 1;
 			} else if (zfs_unshareall(zhp, NULL) != 0) {
 				ret = 1;
 			}
 			break;
 
 		case OP_MOUNT:
 			if (strcmp(nfs_mnt_prop, "legacy") == 0) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unmount '%s': legacy "
 				    "mountpoint\n"), zfs_get_name(zhp));
 				(void) fprintf(stderr, gettext("use "
 				    "umount(8) to unmount this "
 				    "filesystem\n"));
 				ret = 1;
 			} else if (!zfs_is_mounted(zhp, NULL)) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "unmount '%s': not currently "
 				    "mounted\n"),
 				    zfs_get_name(zhp));
 				ret = 1;
 			} else if (zfs_unmountall(zhp, flags) != 0) {
 				ret = 1;
 			}
 			break;
 		}
 
 		zfs_close(zhp);
 	}
 
 	return (ret);
 }
 
 /*
  * zfs unmount [-fu] -a
  * zfs unmount [-fu] filesystem
  *
  * Unmount all filesystems, or a specific ZFS filesystem.
  */
 static int
 zfs_do_unmount(int argc, char **argv)
 {
 	return (unshare_unmount(OP_MOUNT, argc, argv));
 }
 
 /*
  * zfs unshare -a
  * zfs unshare filesystem
  *
  * Unshare all filesystems, or a specific ZFS filesystem.
  */
 static int
 zfs_do_unshare(int argc, char **argv)
 {
 	return (unshare_unmount(OP_SHARE, argc, argv));
 }
 
 static int
 find_command_idx(const char *command, int *idx)
 {
 	int i;
 
 	for (i = 0; i < NCOMMAND; i++) {
 		if (command_table[i].name == NULL)
 			continue;
 
 		if (strcmp(command, command_table[i].name) == 0) {
 			*idx = i;
 			return (0);
 		}
 	}
 	return (1);
 }
 
 static int
 zfs_do_diff(int argc, char **argv)
 {
 	zfs_handle_t *zhp;
 	int flags = 0;
 	char *tosnap = NULL;
 	char *fromsnap = NULL;
 	char *atp, *copy;
 	int err = 0;
 	int c;
 	struct sigaction sa;
 
 	while ((c = getopt(argc, argv, "FHth")) != -1) {
 		switch (c) {
 		case 'F':
 			flags |= ZFS_DIFF_CLASSIFY;
 			break;
 		case 'H':
 			flags |= ZFS_DIFF_PARSEABLE;
 			break;
 		case 't':
 			flags |= ZFS_DIFF_TIMESTAMP;
 			break;
 		case 'h':
 			flags |= ZFS_DIFF_NO_MANGLE;
 			break;
 		default:
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr,
 		    gettext("must provide at least one snapshot name\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc > 2) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	fromsnap = argv[0];
 	tosnap = (argc == 2) ? argv[1] : NULL;
 
 	copy = NULL;
 	if (*fromsnap != '@')
 		copy = strdup(fromsnap);
 	else if (tosnap)
 		copy = strdup(tosnap);
 	if (copy == NULL)
 		usage(B_FALSE);
 
 	if ((atp = strchr(copy, '@')) != NULL)
 		*atp = '\0';
 
 	if ((zhp = zfs_open(g_zfs, copy, ZFS_TYPE_FILESYSTEM)) == NULL) {
 		free(copy);
 		return (1);
 	}
 	free(copy);
 
 	/*
 	 * Ignore SIGPIPE so that the library can give us
 	 * information on any failure
 	 */
 	if (sigemptyset(&sa.sa_mask) == -1) {
 		err = errno;
 		goto out;
 	}
 	sa.sa_flags = 0;
 	sa.sa_handler = SIG_IGN;
 	if (sigaction(SIGPIPE, &sa, NULL) == -1) {
 		err = errno;
 		goto out;
 	}
 
 	err = zfs_show_diffs(zhp, STDOUT_FILENO, fromsnap, tosnap, flags);
 out:
 	zfs_close(zhp);
 
 	return (err != 0);
 }
 
 /*
  * zfs bookmark <fs@source>|<fs#source> <fs#bookmark>
  *
  * Creates a bookmark with the given name from the source snapshot
  * or creates a copy of an existing source bookmark.
  */
 static int
 zfs_do_bookmark(int argc, char **argv)
 {
 	char *source, *bookname;
 	char expbuf[ZFS_MAX_DATASET_NAME_LEN];
 	int source_type;
 	nvlist_t *nvl;
 	int ret = 0;
 	int c;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "")) != -1) {
 		switch (c) {
 		case '?':
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	/* check number of arguments */
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing source argument\n"));
 		goto usage;
 	}
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing bookmark argument\n"));
 		goto usage;
 	}
 
 	source = argv[0];
 	bookname = argv[1];
 
 	if (strchr(source, '@') == NULL && strchr(source, '#') == NULL) {
 		(void) fprintf(stderr,
 		    gettext("invalid source name '%s': "
 		    "must contain a '@' or '#'\n"), source);
 		goto usage;
 	}
 	if (strchr(bookname, '#') == NULL) {
 		(void) fprintf(stderr,
 		    gettext("invalid bookmark name '%s': "
 		    "must contain a '#'\n"), bookname);
 		goto usage;
 	}
 
 	/*
 	 * expand source or bookname to full path:
 	 * one of them may be specified as short name
 	 */
 	{
 		char **expand;
 		char *source_short, *bookname_short;
 		source_short = strpbrk(source, "@#");
 		bookname_short = strpbrk(bookname, "#");
 		if (source_short == source &&
 		    bookname_short == bookname) {
 			(void) fprintf(stderr, gettext(
 			    "either source or bookmark must be specified as "
 			    "full dataset paths"));
 			goto usage;
 		} else if (source_short != source &&
 		    bookname_short != bookname) {
 			expand = NULL;
 		} else if (source_short != source) {
 			strlcpy(expbuf, source, sizeof (expbuf));
 			expand = &bookname;
 		} else if (bookname_short != bookname) {
 			strlcpy(expbuf, bookname, sizeof (expbuf));
 			expand = &source;
 		} else {
 			abort();
 		}
 		if (expand != NULL) {
 			*strpbrk(expbuf, "@#") = '\0'; /* dataset name in buf */
 			(void) strlcat(expbuf, *expand, sizeof (expbuf));
 			*expand = expbuf;
 		}
 	}
 
 	/* determine source type */
 	switch (*strpbrk(source, "@#")) {
 		case '@': source_type = ZFS_TYPE_SNAPSHOT; break;
 		case '#': source_type = ZFS_TYPE_BOOKMARK; break;
 		default: abort();
 	}
 
 	/* test the source exists */
 	zfs_handle_t *zhp;
 	zhp = zfs_open(g_zfs, source, source_type);
 	if (zhp == NULL)
 		goto usage;
 	zfs_close(zhp);
 
 	nvl = fnvlist_alloc();
 	fnvlist_add_string(nvl, bookname, source);
 	ret = lzc_bookmark(nvl, NULL);
 	fnvlist_free(nvl);
 
 	if (ret != 0) {
 		const char *err_msg = NULL;
 		char errbuf[1024];
 
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "cannot create bookmark '%s'"), bookname);
 
 		switch (ret) {
 		case EXDEV:
 			err_msg = "bookmark is in a different pool";
 			break;
 		case ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR:
 			err_msg = "source is not an ancestor of the "
 			    "new bookmark's dataset";
 			break;
 		case EEXIST:
 			err_msg = "bookmark exists";
 			break;
 		case EINVAL:
 			err_msg = "invalid argument";
 			break;
 		case ENOTSUP:
 			err_msg = "bookmark feature not enabled";
 			break;
 		case ENOSPC:
 			err_msg = "out of space";
 			break;
 		case ENOENT:
 			err_msg = "dataset does not exist";
 			break;
 		default:
 			(void) zfs_standard_error(g_zfs, ret, errbuf);
 			break;
 		}
 		if (err_msg != NULL) {
 			(void) fprintf(stderr, "%s: %s\n", errbuf,
 			    dgettext(TEXT_DOMAIN, err_msg));
 		}
 	}
 
 	return (ret != 0);
 
 usage:
 	usage(B_FALSE);
 	return (-1);
 }
 
 static int
 zfs_do_channel_program(int argc, char **argv)
 {
 	int ret, fd, c;
 	size_t progsize, progread;
 	nvlist_t *outnvl = NULL;
 	uint64_t instrlimit = ZCP_DEFAULT_INSTRLIMIT;
 	uint64_t memlimit = ZCP_DEFAULT_MEMLIMIT;
 	boolean_t sync_flag = B_TRUE, json_output = B_FALSE;
 	zpool_handle_t *zhp;
 
 	/* check options */
 	while ((c = getopt(argc, argv, "nt:m:j")) != -1) {
 		switch (c) {
 		case 't':
 		case 'm': {
 			uint64_t arg;
 			char *endp;
 
 			errno = 0;
 			arg = strtoull(optarg, &endp, 0);
 			if (errno != 0 || *endp != '\0') {
 				(void) fprintf(stderr, gettext(
 				    "invalid argument "
 				    "'%s': expected integer\n"), optarg);
 				goto usage;
 			}
 
 			if (c == 't') {
 				instrlimit = arg;
 			} else {
 				ASSERT3U(c, ==, 'm');
 				memlimit = arg;
 			}
 			break;
 		}
 		case 'n': {
 			sync_flag = B_FALSE;
 			break;
 		}
 		case 'j': {
 			json_output = B_TRUE;
 			break;
 		}
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			goto usage;
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 2) {
 		(void) fprintf(stderr,
 		    gettext("invalid number of arguments\n"));
 		goto usage;
 	}
 
 	const char *poolname = argv[0];
 	const char *filename = argv[1];
 	if (strcmp(filename, "-") == 0) {
 		fd = 0;
 		filename = "standard input";
 	} else if ((fd = open(filename, O_RDONLY)) < 0) {
 		(void) fprintf(stderr, gettext("cannot open '%s': %s\n"),
 		    filename, strerror(errno));
 		return (1);
 	}
 
 	if ((zhp = zpool_open(g_zfs, poolname)) == NULL) {
 		(void) fprintf(stderr, gettext("cannot open pool '%s'\n"),
 		    poolname);
 		if (fd != 0)
 			(void) close(fd);
 		return (1);
 	}
 	zpool_close(zhp);
 
 	/*
 	 * Read in the channel program, expanding the program buffer as
 	 * necessary.
 	 */
 	progread = 0;
 	progsize = 1024;
 	char *progbuf = safe_malloc(progsize);
 	do {
 		ret = read(fd, progbuf + progread, progsize - progread);
 		progread += ret;
 		if (progread == progsize && ret > 0) {
 			progsize *= 2;
 			progbuf = safe_realloc(progbuf, progsize);
 		}
 	} while (ret > 0);
 
 	if (fd != 0)
 		(void) close(fd);
 	if (ret < 0) {
 		free(progbuf);
 		(void) fprintf(stderr,
 		    gettext("cannot read '%s': %s\n"),
 		    filename, strerror(errno));
 		return (1);
 	}
 	progbuf[progread] = '\0';
 
 	/*
 	 * Any remaining arguments are passed as arguments to the lua script as
 	 * a string array:
 	 * {
 	 *	"argv" -> [ "arg 1", ... "arg n" ],
 	 * }
 	 */
 	nvlist_t *argnvl = fnvlist_alloc();
 	fnvlist_add_string_array(argnvl, ZCP_ARG_CLIARGV,
 	    (const char **)argv + 2, argc - 2);
 
 	if (sync_flag) {
 		ret = lzc_channel_program(poolname, progbuf,
 		    instrlimit, memlimit, argnvl, &outnvl);
 	} else {
 		ret = lzc_channel_program_nosync(poolname, progbuf,
 		    instrlimit, memlimit, argnvl, &outnvl);
 	}
 
 	if (ret != 0) {
 		/*
 		 * On error, report the error message handed back by lua if one
 		 * exists.  Otherwise, generate an appropriate error message,
 		 * falling back on strerror() for an unexpected return code.
 		 */
 		const char *errstring = NULL;
 		const char *msg = gettext("Channel program execution failed");
 		uint64_t instructions = 0;
 		if (outnvl != NULL && nvlist_exists(outnvl, ZCP_RET_ERROR)) {
 			char *es = NULL;
 			(void) nvlist_lookup_string(outnvl,
 			    ZCP_RET_ERROR, &es);
 			if (es == NULL)
 				errstring = strerror(ret);
 			else
 				errstring = es;
 			if (ret == ETIME) {
 				(void) nvlist_lookup_uint64(outnvl,
 				    ZCP_ARG_INSTRLIMIT, &instructions);
 			}
 		} else {
 			switch (ret) {
 			case EINVAL:
 				errstring =
 				    "Invalid instruction or memory limit.";
 				break;
 			case ENOMEM:
 				errstring = "Return value too large.";
 				break;
 			case ENOSPC:
 				errstring = "Memory limit exhausted.";
 				break;
 			case ETIME:
 				errstring = "Timed out.";
 				break;
 			case EPERM:
 				errstring = "Permission denied. Channel "
 				    "programs must be run as root.";
 				break;
 			default:
 				(void) zfs_standard_error(g_zfs, ret, msg);
 			}
 		}
 		if (errstring != NULL)
 			(void) fprintf(stderr, "%s:\n%s\n", msg, errstring);
 
 		if (ret == ETIME && instructions != 0)
 			(void) fprintf(stderr,
 			    gettext("%llu Lua instructions\n"),
 			    (u_longlong_t)instructions);
 	} else {
 		if (json_output) {
 			(void) nvlist_print_json(stdout, outnvl);
 		} else if (nvlist_empty(outnvl)) {
 			(void) fprintf(stdout, gettext("Channel program fully "
 			    "executed and did not produce output.\n"));
 		} else {
 			(void) fprintf(stdout, gettext("Channel program fully "
 			    "executed and produced output:\n"));
 			dump_nvlist(outnvl, 4);
 		}
 	}
 
 	free(progbuf);
 	fnvlist_free(outnvl);
 	fnvlist_free(argnvl);
 	return (ret != 0);
 
 usage:
 	usage(B_FALSE);
 	return (-1);
 }
 
 
 typedef struct loadkey_cbdata {
 	boolean_t cb_loadkey;
 	boolean_t cb_recursive;
 	boolean_t cb_noop;
 	char *cb_keylocation;
 	uint64_t cb_numfailed;
 	uint64_t cb_numattempted;
 } loadkey_cbdata_t;
 
 static int
 load_key_callback(zfs_handle_t *zhp, void *data)
 {
 	int ret;
 	boolean_t is_encroot;
 	loadkey_cbdata_t *cb = data;
 	uint64_t keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
 
 	/*
 	 * If we are working recursively, we want to skip loading / unloading
 	 * keys for non-encryption roots and datasets whose keys are already
 	 * in the desired end-state.
 	 */
 	if (cb->cb_recursive) {
 		ret = zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
 		if (ret != 0)
 			return (ret);
 		if (!is_encroot)
 			return (0);
 
 		if ((cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_AVAILABLE) ||
 		    (!cb->cb_loadkey && keystatus == ZFS_KEYSTATUS_UNAVAILABLE))
 			return (0);
 	}
 
 	cb->cb_numattempted++;
 
 	if (cb->cb_loadkey)
 		ret = zfs_crypto_load_key(zhp, cb->cb_noop, cb->cb_keylocation);
 	else
 		ret = zfs_crypto_unload_key(zhp);
 
 	if (ret != 0) {
 		cb->cb_numfailed++;
 		return (ret);
 	}
 
 	return (0);
 }
 
 static int
 load_unload_keys(int argc, char **argv, boolean_t loadkey)
 {
 	int c, ret = 0, flags = 0;
 	boolean_t do_all = B_FALSE;
 	loadkey_cbdata_t cb = { 0 };
 
 	cb.cb_loadkey = loadkey;
 
 	while ((c = getopt(argc, argv, "anrL:")) != -1) {
 		/* noop and alternate keylocations only apply to zfs load-key */
 		if (loadkey) {
 			switch (c) {
 			case 'n':
 				cb.cb_noop = B_TRUE;
 				continue;
 			case 'L':
 				cb.cb_keylocation = optarg;
 				continue;
 			default:
 				break;
 			}
 		}
 
 		switch (c) {
 		case 'a':
 			do_all = B_TRUE;
 			cb.cb_recursive = B_TRUE;
 			break;
 		case 'r':
 			flags |= ZFS_ITER_RECURSE;
 			cb.cb_recursive = B_TRUE;
 			break;
 		default:
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (!do_all && argc == 0) {
 		(void) fprintf(stderr,
 		    gettext("Missing dataset argument or -a option\n"));
 		usage(B_FALSE);
 	}
 
 	if (do_all && argc != 0) {
 		(void) fprintf(stderr,
 		    gettext("Cannot specify dataset with -a option\n"));
 		usage(B_FALSE);
 	}
 
 	if (cb.cb_recursive && cb.cb_keylocation != NULL &&
 	    strcmp(cb.cb_keylocation, "prompt") != 0) {
 		(void) fprintf(stderr, gettext("alternate keylocation may only "
 		    "be 'prompt' with -r or -a\n"));
 		usage(B_FALSE);
 	}
 
 	ret = zfs_for_each(argc, argv, flags,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, NULL, NULL, 0,
 	    load_key_callback, &cb);
 
 	if (cb.cb_noop || (cb.cb_recursive && cb.cb_numattempted != 0)) {
 		(void) printf(gettext("%llu / %llu key(s) successfully %s\n"),
 		    (u_longlong_t)(cb.cb_numattempted - cb.cb_numfailed),
 		    (u_longlong_t)cb.cb_numattempted,
 		    loadkey ? (cb.cb_noop ? "verified" : "loaded") :
 		    "unloaded");
 	}
 
 	if (cb.cb_numfailed != 0)
 		ret = -1;
 
 	return (ret);
 }
 
 static int
 zfs_do_load_key(int argc, char **argv)
 {
 	return (load_unload_keys(argc, argv, B_TRUE));
 }
 
 
 static int
 zfs_do_unload_key(int argc, char **argv)
 {
 	return (load_unload_keys(argc, argv, B_FALSE));
 }
 
 static int
 zfs_do_change_key(int argc, char **argv)
 {
 	int c, ret;
 	uint64_t keystatus;
 	boolean_t loadkey = B_FALSE, inheritkey = B_FALSE;
 	zfs_handle_t *zhp = NULL;
 	nvlist_t *props = fnvlist_alloc();
 
 	while ((c = getopt(argc, argv, "lio:")) != -1) {
 		switch (c) {
 		case 'l':
 			loadkey = B_TRUE;
 			break;
 		case 'i':
 			inheritkey = B_TRUE;
 			break;
 		case 'o':
 			if (!parseprop(props, optarg)) {
 				nvlist_free(props);
 				return (1);
 			}
 			break;
 		default:
 			(void) fprintf(stderr,
 			    gettext("invalid option '%c'\n"), optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	if (inheritkey && !nvlist_empty(props)) {
 		(void) fprintf(stderr,
 		    gettext("Properties not allowed for inheriting\n"));
 		usage(B_FALSE);
 	}
 
 	argc -= optind;
 	argv += optind;
 
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("Missing dataset argument\n"));
 		usage(B_FALSE);
 	}
 
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("Too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	zhp = zfs_open(g_zfs, argv[argc - 1],
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 	if (zhp == NULL)
 		usage(B_FALSE);
 
 	if (loadkey) {
 		keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
 		if (keystatus != ZFS_KEYSTATUS_AVAILABLE) {
 			ret = zfs_crypto_load_key(zhp, B_FALSE, NULL);
 			if (ret != 0) {
 				nvlist_free(props);
 				zfs_close(zhp);
 				return (-1);
 			}
 		}
 
 		/* refresh the properties so the new keystatus is visible */
 		zfs_refresh_properties(zhp);
 	}
 
 	ret = zfs_crypto_rewrap(zhp, props, inheritkey);
 	if (ret != 0) {
 		nvlist_free(props);
 		zfs_close(zhp);
 		return (-1);
 	}
 
 	nvlist_free(props);
 	zfs_close(zhp);
 	return (0);
 }
 
 /*
  * 1) zfs project [-d|-r] <file|directory ...>
  *    List project ID and inherit flag of file(s) or directories.
  *    -d: List the directory itself, not its children.
  *    -r: List subdirectories recursively.
  *
  * 2) zfs project -C [-k] [-r] <file|directory ...>
  *    Clear project inherit flag and/or ID on the file(s) or directories.
  *    -k: Keep the project ID unchanged. If not specified, the project ID
  *	  will be reset as zero.
  *    -r: Clear on subdirectories recursively.
  *
  * 3) zfs project -c [-0] [-d|-r] [-p id] <file|directory ...>
  *    Check project ID and inherit flag on the file(s) or directories,
  *    report the outliers.
  *    -0: Print file name followed by a NUL instead of newline.
  *    -d: Check the directory itself, not its children.
  *    -p: Specify the referenced ID for comparing with the target file(s)
  *	  or directories' project IDs. If not specified, the target (top)
  *	  directory's project ID will be used as the referenced one.
  *    -r: Check subdirectories recursively.
  *
  * 4) zfs project [-p id] [-r] [-s] <file|directory ...>
  *    Set project ID and/or inherit flag on the file(s) or directories.
  *    -p: Set the project ID as the given id.
  *    -r: Set on subdirectories recursively. If not specify "-p" option,
  *	  it will use top-level directory's project ID as the given id,
  *	  then set both project ID and inherit flag on all descendants
  *	  of the top-level directory.
  *    -s: Set project inherit flag.
  */
 static int
 zfs_do_project(int argc, char **argv)
 {
 	zfs_project_control_t zpc = {
 		.zpc_expected_projid = ZFS_INVALID_PROJID,
 		.zpc_op = ZFS_PROJECT_OP_DEFAULT,
 		.zpc_dironly = B_FALSE,
 		.zpc_keep_projid = B_FALSE,
 		.zpc_newline = B_TRUE,
 		.zpc_recursive = B_FALSE,
 		.zpc_set_flag = B_FALSE,
 	};
 	int ret = 0, c;
 
 	if (argc < 2)
 		usage(B_FALSE);
 
 	while ((c = getopt(argc, argv, "0Ccdkp:rs")) != -1) {
 		switch (c) {
 		case '0':
 			zpc.zpc_newline = B_FALSE;
 			break;
 		case 'C':
 			if (zpc.zpc_op != ZFS_PROJECT_OP_DEFAULT) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "specify '-C' '-c' '-s' together\n"));
 				usage(B_FALSE);
 			}
 
 			zpc.zpc_op = ZFS_PROJECT_OP_CLEAR;
 			break;
 		case 'c':
 			if (zpc.zpc_op != ZFS_PROJECT_OP_DEFAULT) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "specify '-C' '-c' '-s' together\n"));
 				usage(B_FALSE);
 			}
 
 			zpc.zpc_op = ZFS_PROJECT_OP_CHECK;
 			break;
 		case 'd':
 			zpc.zpc_dironly = B_TRUE;
 			/* overwrite "-r" option */
 			zpc.zpc_recursive = B_FALSE;
 			break;
 		case 'k':
 			zpc.zpc_keep_projid = B_TRUE;
 			break;
 		case 'p': {
 			char *endptr;
 
 			errno = 0;
 			zpc.zpc_expected_projid = strtoull(optarg, &endptr, 0);
 			if (errno != 0 || *endptr != '\0') {
 				(void) fprintf(stderr,
 				    gettext("project ID must be less than "
 				    "%u\n"), UINT32_MAX);
 				usage(B_FALSE);
 			}
 			if (zpc.zpc_expected_projid >= UINT32_MAX) {
 				(void) fprintf(stderr,
 				    gettext("invalid project ID\n"));
 				usage(B_FALSE);
 			}
 			break;
 		}
 		case 'r':
 			zpc.zpc_recursive = B_TRUE;
 			/* overwrite "-d" option */
 			zpc.zpc_dironly = B_FALSE;
 			break;
 		case 's':
 			if (zpc.zpc_op != ZFS_PROJECT_OP_DEFAULT) {
 				(void) fprintf(stderr, gettext("cannot "
 				    "specify '-C' '-c' '-s' together\n"));
 				usage(B_FALSE);
 			}
 
 			zpc.zpc_set_flag = B_TRUE;
 			zpc.zpc_op = ZFS_PROJECT_OP_SET;
 			break;
 		default:
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	if (zpc.zpc_op == ZFS_PROJECT_OP_DEFAULT) {
 		if (zpc.zpc_expected_projid != ZFS_INVALID_PROJID)
 			zpc.zpc_op = ZFS_PROJECT_OP_SET;
 		else
 			zpc.zpc_op = ZFS_PROJECT_OP_LIST;
 	}
 
 	switch (zpc.zpc_op) {
 	case ZFS_PROJECT_OP_LIST:
 		if (zpc.zpc_keep_projid) {
 			(void) fprintf(stderr,
 			    gettext("'-k' is only valid together with '-C'\n"));
 			usage(B_FALSE);
 		}
 		if (!zpc.zpc_newline) {
 			(void) fprintf(stderr,
 			    gettext("'-0' is only valid together with '-c'\n"));
 			usage(B_FALSE);
 		}
 		break;
 	case ZFS_PROJECT_OP_CHECK:
 		if (zpc.zpc_keep_projid) {
 			(void) fprintf(stderr,
 			    gettext("'-k' is only valid together with '-C'\n"));
 			usage(B_FALSE);
 		}
 		break;
 	case ZFS_PROJECT_OP_CLEAR:
 		if (zpc.zpc_dironly) {
 			(void) fprintf(stderr,
 			    gettext("'-d' is useless together with '-C'\n"));
 			usage(B_FALSE);
 		}
 		if (!zpc.zpc_newline) {
 			(void) fprintf(stderr,
 			    gettext("'-0' is only valid together with '-c'\n"));
 			usage(B_FALSE);
 		}
 		if (zpc.zpc_expected_projid != ZFS_INVALID_PROJID) {
 			(void) fprintf(stderr,
 			    gettext("'-p' is useless together with '-C'\n"));
 			usage(B_FALSE);
 		}
 		break;
 	case ZFS_PROJECT_OP_SET:
 		if (zpc.zpc_dironly) {
 			(void) fprintf(stderr,
 			    gettext("'-d' is useless for set project ID and/or "
 			    "inherit flag\n"));
 			usage(B_FALSE);
 		}
 		if (zpc.zpc_keep_projid) {
 			(void) fprintf(stderr,
 			    gettext("'-k' is only valid together with '-C'\n"));
 			usage(B_FALSE);
 		}
 		if (!zpc.zpc_newline) {
 			(void) fprintf(stderr,
 			    gettext("'-0' is only valid together with '-c'\n"));
 			usage(B_FALSE);
 		}
 		break;
 	default:
 		ASSERT(0);
 		break;
 	}
 
 	argv += optind;
 	argc -= optind;
 	if (argc == 0) {
 		(void) fprintf(stderr,
 		    gettext("missing file or directory target(s)\n"));
 		usage(B_FALSE);
 	}
 
 	for (int i = 0; i < argc; i++) {
 		int err;
 
 		err = zfs_project_handle(argv[i], &zpc);
 		if (err && !ret)
 			ret = err;
 	}
 
 	return (ret);
 }
 
 static int
 zfs_do_wait(int argc, char **argv)
 {
 	boolean_t enabled[ZFS_WAIT_NUM_ACTIVITIES];
 	int error, i;
 	int c;
 
 	/* By default, wait for all types of activity. */
 	for (i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++)
 		enabled[i] = B_TRUE;
 
 	while ((c = getopt(argc, argv, "t:")) != -1) {
 		switch (c) {
 		case 't':
 			/* Reset activities array */
 			memset(&enabled, 0, sizeof (enabled));
 
 			for (char *tok; (tok = strsep(&optarg, ",")); ) {
 				static const char *const col_subopts[
 				    ZFS_WAIT_NUM_ACTIVITIES] = { "deleteq" };
 
 				for (i = 0; i < ARRAY_SIZE(col_subopts); ++i)
 					if (strcmp(tok, col_subopts[i]) == 0) {
 						enabled[i] = B_TRUE;
 						goto found;
 					}
 
 				(void) fprintf(stderr,
 				    gettext("invalid activity '%s'\n"), tok);
 				usage(B_FALSE);
 found:;
 			}
 			break;
 		case '?':
 			(void) fprintf(stderr, gettext("invalid option '%c'\n"),
 			    optopt);
 			usage(B_FALSE);
 		}
 	}
 
 	argv += optind;
 	argc -= optind;
 	if (argc < 1) {
 		(void) fprintf(stderr, gettext("missing 'filesystem' "
 		    "argument\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 1) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	zfs_handle_t *zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM);
 	if (zhp == NULL)
 		return (1);
 
 	for (;;) {
 		boolean_t missing = B_FALSE;
 		boolean_t any_waited = B_FALSE;
 
 		for (int i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++) {
 			boolean_t waited;
 
 			if (!enabled[i])
 				continue;
 
 			error = zfs_wait_status(zhp, i, &missing, &waited);
 			if (error != 0 || missing)
 				break;
 
 			any_waited = (any_waited || waited);
 		}
 
 		if (error != 0 || missing || !any_waited)
 			break;
 	}
 
 	zfs_close(zhp);
 
 	return (error);
 }
 
 /*
  * Display version message
  */
 static int
 zfs_do_version(int argc, char **argv)
 {
 	(void) argc, (void) argv;
 	return (zfs_version_print() != 0);
 }
 
 int
 main(int argc, char **argv)
 {
 	int ret = 0;
 	int i = 0;
 	const char *cmdname;
 	char **newargv;
 
 	(void) setlocale(LC_ALL, "");
 	(void) setlocale(LC_NUMERIC, "C");
 	(void) textdomain(TEXT_DOMAIN);
 
 	opterr = 0;
 
 	/*
 	 * Make sure the user has specified some command.
 	 */
 	if (argc < 2) {
 		(void) fprintf(stderr, gettext("missing command\n"));
 		usage(B_FALSE);
 	}
 
 	cmdname = argv[1];
 
 	/*
 	 * The 'umount' command is an alias for 'unmount'
 	 */
 	if (strcmp(cmdname, "umount") == 0)
 		cmdname = "unmount";
 
 	/*
 	 * The 'recv' command is an alias for 'receive'
 	 */
 	if (strcmp(cmdname, "recv") == 0)
 		cmdname = "receive";
 
 	/*
 	 * The 'snap' command is an alias for 'snapshot'
 	 */
 	if (strcmp(cmdname, "snap") == 0)
 		cmdname = "snapshot";
 
 	/*
 	 * Special case '-?'
 	 */
 	if ((strcmp(cmdname, "-?") == 0) ||
 	    (strcmp(cmdname, "--help") == 0))
 		usage(B_TRUE);
 
 	/*
 	 * Special case '-V|--version'
 	 */
 	if ((strcmp(cmdname, "-V") == 0) || (strcmp(cmdname, "--version") == 0))
 		return (zfs_do_version(argc, argv));
 
 	if ((g_zfs = libzfs_init()) == NULL) {
 		(void) fprintf(stderr, "%s\n", libzfs_error_init(errno));
 		return (1);
 	}
 
 	zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
 
 	libzfs_print_on_error(g_zfs, B_TRUE);
 
 	/*
 	 * Many commands modify input strings for string parsing reasons.
 	 * We create a copy to protect the original argv.
 	 */
 	newargv = safe_malloc((argc + 1) * sizeof (newargv[0]));
 	for (i = 0; i < argc; i++)
 		newargv[i] = strdup(argv[i]);
 	newargv[argc] = NULL;
 
 	/*
 	 * Run the appropriate command.
 	 */
 	libzfs_mnttab_cache(g_zfs, B_TRUE);
 	if (find_command_idx(cmdname, &i) == 0) {
 		current_command = &command_table[i];
 		ret = command_table[i].func(argc - 1, newargv + 1);
 	} else if (strchr(cmdname, '=') != NULL) {
 		verify(find_command_idx("set", &i) == 0);
 		current_command = &command_table[i];
 		ret = command_table[i].func(argc, newargv);
 	} else {
 		(void) fprintf(stderr, gettext("unrecognized "
 		    "command '%s'\n"), cmdname);
 		usage(B_FALSE);
 		ret = 1;
 	}
 
 	for (i = 0; i < argc; i++)
 		free(newargv[i]);
 	free(newargv);
 
 	if (ret == 0 && log_history)
 		(void) zpool_log_history(g_zfs, history_str);
 
 	libzfs_fini(g_zfs);
 
 	/*
 	 * The 'ZFS_ABORT' environment variable causes us to dump core on exit
 	 * for the purposes of running ::findleaks.
 	 */
 	if (getenv("ZFS_ABORT") != NULL) {
 		(void) printf("dumping core by request\n");
 		abort();
 	}
 
 	return (ret);
 }
 
 /*
  * zfs zone nsfile filesystem
  *
  * Add or delete the given dataset to/from the namespace.
  */
 #ifdef __linux__
 static int
 zfs_do_zone_impl(int argc, char **argv, boolean_t attach)
 {
 	zfs_handle_t *zhp;
 	int ret;
 
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("missing argument(s)\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 3) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
 	if (zhp == NULL)
 		return (1);
 
 	ret = (zfs_userns(zhp, argv[1], attach) != 0);
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 static int
 zfs_do_zone(int argc, char **argv)
 {
 	return (zfs_do_zone_impl(argc, argv, B_TRUE));
 }
 
 static int
 zfs_do_unzone(int argc, char **argv)
 {
 	return (zfs_do_zone_impl(argc, argv, B_FALSE));
 }
 #endif
 
 #ifdef __FreeBSD__
 #include <sys/jail.h>
 #include <jail.h>
 /*
  * Attach/detach the given dataset to/from the given jail
  */
 static int
 zfs_do_jail_impl(int argc, char **argv, boolean_t attach)
 {
 	zfs_handle_t *zhp;
 	int jailid, ret;
 
 	/* check number of arguments */
 	if (argc < 3) {
 		(void) fprintf(stderr, gettext("missing argument(s)\n"));
 		usage(B_FALSE);
 	}
 	if (argc > 3) {
 		(void) fprintf(stderr, gettext("too many arguments\n"));
 		usage(B_FALSE);
 	}
 
 	jailid = jail_getid(argv[1]);
 	if (jailid < 0) {
 		(void) fprintf(stderr, gettext("invalid jail id or name\n"));
 		usage(B_FALSE);
 	}
 
 	zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
 	if (zhp == NULL)
 		return (1);
 
 	ret = (zfs_jail(zhp, jailid, attach) != 0);
 
 	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * zfs jail jailid filesystem
  *
  * Attach the given dataset to the given jail
  */
 static int
 zfs_do_jail(int argc, char **argv)
 {
 	return (zfs_do_jail_impl(argc, argv, B_TRUE));
 }
 
 /*
  * zfs unjail jailid filesystem
  *
  * Detach the given dataset from the given jail
  */
 static int
 zfs_do_unjail(int argc, char **argv)
 {
 	return (zfs_do_jail_impl(argc, argv, B_FALSE));
 }
 #endif
diff --git a/cmd/ztest.c b/cmd/ztest.c
index b5a3d1810a5a..55020805d14f 100644
--- a/cmd/ztest.c
+++ b/cmd/ztest.c
@@ -1,8288 +1,8289 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Joyent, Inc.
  * Copyright (c) 2017, Intel Corporation.
  */
 
 /*
  * The objective of this program is to provide a DMU/ZAP/SPA stress test
  * that runs entirely in userland, is easy to use, and easy to extend.
  *
  * The overall design of the ztest program is as follows:
  *
  * (1) For each major functional area (e.g. adding vdevs to a pool,
  *     creating and destroying datasets, reading and writing objects, etc)
  *     we have a simple routine to test that functionality.  These
  *     individual routines do not have to do anything "stressful".
  *
  * (2) We turn these simple functionality tests into a stress test by
  *     running them all in parallel, with as many threads as desired,
  *     and spread across as many datasets, objects, and vdevs as desired.
  *
  * (3) While all this is happening, we inject faults into the pool to
  *     verify that self-healing data really works.
  *
  * (4) Every time we open a dataset, we change its checksum and compression
  *     functions.  Thus even individual objects vary from block to block
  *     in which checksum they use and whether they're compressed.
  *
  * (5) To verify that we never lose on-disk consistency after a crash,
  *     we run the entire test in a child of the main process.
  *     At random times, the child self-immolates with a SIGKILL.
  *     This is the software equivalent of pulling the power cord.
  *     The parent then runs the test again, using the existing
  *     storage pool, as many times as desired. If backwards compatibility
  *     testing is enabled ztest will sometimes run the "older" version
  *     of ztest after a SIGKILL.
  *
  * (6) To verify that we don't have future leaks or temporal incursions,
  *     many of the functional tests record the transaction group number
  *     as part of their data.  When reading old data, they verify that
  *     the transaction group number is less than the current, open txg.
  *     If you add a new test, please do this if applicable.
  *
  * (7) Threads are created with a reduced stack size, for sanity checking.
  *     Therefore, it's important not to allocate huge buffers on the stack.
  *
  * When run with no arguments, ztest runs for about five minutes and
  * produces no output if successful.  To get a little bit of information,
  * specify -V.  To get more information, specify -VV, and so on.
  *
  * To turn this into an overnight stress test, use -T to specify run time.
  *
  * You can ask more vdevs [-v], datasets [-d], or threads [-t]
  * to increase the pool capacity, fanout, and overall stress level.
  *
  * Use the -k option to set the desired frequency of kills.
  *
  * When ztest invokes itself it passes all relevant information through a
  * temporary file which is mmap-ed in the child process. This allows shared
  * memory to survive the exec syscall. The ztest_shared_hdr_t struct is always
  * stored at offset 0 of this file and contains information on the size and
  * number of shared structures in the file. The information stored in this file
  * must remain backwards compatible with older versions of ztest so that
  * ztest can invoke them during backwards compatibility testing (-B).
  */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
 #include <sys/dmu.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/zap.h>
 #include <sys/dmu_objset.h>
 #include <sys/poll.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/wait.h>
 #include <sys/mman.h>
 #include <sys/resource.h>
 #include <sys/zio.h>
 #include <sys/zil.h>
 #include <sys/zil_impl.h>
 #include <sys/vdev_draid.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_file.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_raidz.h>
 #include <sys/vdev_trim.h>
 #include <sys/spa_impl.h>
 #include <sys/metaslab_impl.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_scan.h>
 #include <sys/zio_checksum.h>
 #include <sys/zfs_refcount.h>
 #include <sys/zfeature.h>
 #include <sys/dsl_userhold.h>
 #include <sys/abd.h>
 #include <sys/blake3.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
 #include <getopt.h>
 #include <signal.h>
 #include <umem.h>
 #include <ctype.h>
 #include <math.h>
 #include <sys/fs/zfs.h>
 #include <zfs_fletcher.h>
 #include <libnvpair.h>
 #include <libzutil.h>
 #include <sys/crypto/icp.h>
 #if (__GLIBC__ && !__UCLIBC__)
 #include <execinfo.h> /* for backtrace() */
 #endif
 
 static int ztest_fd_data = -1;
 static int ztest_fd_rand = -1;
 
 typedef struct ztest_shared_hdr {
 	uint64_t	zh_hdr_size;
 	uint64_t	zh_opts_size;
 	uint64_t	zh_size;
 	uint64_t	zh_stats_size;
 	uint64_t	zh_stats_count;
 	uint64_t	zh_ds_size;
 	uint64_t	zh_ds_count;
 } ztest_shared_hdr_t;
 
 static ztest_shared_hdr_t *ztest_shared_hdr;
 
 enum ztest_class_state {
 	ZTEST_VDEV_CLASS_OFF,
 	ZTEST_VDEV_CLASS_ON,
 	ZTEST_VDEV_CLASS_RND
 };
 
 #define	ZO_GVARS_MAX_ARGLEN	((size_t)64)
 #define	ZO_GVARS_MAX_COUNT	((size_t)10)
 
 typedef struct ztest_shared_opts {
 	char zo_pool[ZFS_MAX_DATASET_NAME_LEN];
 	char zo_dir[ZFS_MAX_DATASET_NAME_LEN];
 	char zo_alt_ztest[MAXNAMELEN];
 	char zo_alt_libpath[MAXNAMELEN];
 	uint64_t zo_vdevs;
 	uint64_t zo_vdevtime;
 	size_t zo_vdev_size;
 	int zo_ashift;
 	int zo_mirrors;
 	int zo_raid_children;
 	int zo_raid_parity;
 	char zo_raid_type[8];
 	int zo_draid_data;
 	int zo_draid_spares;
 	int zo_datasets;
 	int zo_threads;
 	uint64_t zo_passtime;
 	uint64_t zo_killrate;
 	int zo_verbose;
 	int zo_init;
 	uint64_t zo_time;
 	uint64_t zo_maxloops;
 	uint64_t zo_metaslab_force_ganging;
 	int zo_mmp_test;
 	int zo_special_vdevs;
 	int zo_dump_dbgmsg;
 	int zo_gvars_count;
 	char zo_gvars[ZO_GVARS_MAX_COUNT][ZO_GVARS_MAX_ARGLEN];
 } ztest_shared_opts_t;
 
 /* Default values for command line options. */
 #define	DEFAULT_POOL "ztest"
 #define	DEFAULT_VDEV_DIR "/tmp"
 #define	DEFAULT_VDEV_COUNT 5
 #define	DEFAULT_VDEV_SIZE (SPA_MINDEVSIZE * 4)	/* 256m default size */
 #define	DEFAULT_VDEV_SIZE_STR "256M"
 #define	DEFAULT_ASHIFT SPA_MINBLOCKSHIFT
 #define	DEFAULT_MIRRORS 2
 #define	DEFAULT_RAID_CHILDREN 4
 #define	DEFAULT_RAID_PARITY 1
 #define	DEFAULT_DRAID_DATA 4
 #define	DEFAULT_DRAID_SPARES 1
 #define	DEFAULT_DATASETS_COUNT 7
 #define	DEFAULT_THREADS 23
 #define	DEFAULT_RUN_TIME 300 /* 300 seconds */
 #define	DEFAULT_RUN_TIME_STR "300 sec"
 #define	DEFAULT_PASS_TIME 60 /* 60 seconds */
 #define	DEFAULT_PASS_TIME_STR "60 sec"
 #define	DEFAULT_KILL_RATE 70 /* 70% kill rate */
 #define	DEFAULT_KILLRATE_STR "70%"
 #define	DEFAULT_INITS 1
 #define	DEFAULT_MAX_LOOPS 50 /* 5 minutes */
 #define	DEFAULT_FORCE_GANGING (64 << 10)
 #define	DEFAULT_FORCE_GANGING_STR "64K"
 
 /* Simplifying assumption: -1 is not a valid default. */
 #define	NO_DEFAULT -1
 
 static const ztest_shared_opts_t ztest_opts_defaults = {
 	.zo_pool = DEFAULT_POOL,
 	.zo_dir = DEFAULT_VDEV_DIR,
 	.zo_alt_ztest = { '\0' },
 	.zo_alt_libpath = { '\0' },
 	.zo_vdevs = DEFAULT_VDEV_COUNT,
 	.zo_ashift = DEFAULT_ASHIFT,
 	.zo_mirrors = DEFAULT_MIRRORS,
 	.zo_raid_children = DEFAULT_RAID_CHILDREN,
 	.zo_raid_parity = DEFAULT_RAID_PARITY,
 	.zo_raid_type = VDEV_TYPE_RAIDZ,
 	.zo_vdev_size = DEFAULT_VDEV_SIZE,
 	.zo_draid_data = DEFAULT_DRAID_DATA,	/* data drives */
 	.zo_draid_spares = DEFAULT_DRAID_SPARES, /* distributed spares */
 	.zo_datasets = DEFAULT_DATASETS_COUNT,
 	.zo_threads = DEFAULT_THREADS,
 	.zo_passtime = DEFAULT_PASS_TIME,
 	.zo_killrate = DEFAULT_KILL_RATE,
 	.zo_verbose = 0,
 	.zo_mmp_test = 0,
 	.zo_init = DEFAULT_INITS,
 	.zo_time = DEFAULT_RUN_TIME,
 	.zo_maxloops = DEFAULT_MAX_LOOPS, /* max loops during spa_freeze() */
 	.zo_metaslab_force_ganging = DEFAULT_FORCE_GANGING,
 	.zo_special_vdevs = ZTEST_VDEV_CLASS_RND,
 	.zo_gvars_count = 0,
 };
 
 extern uint64_t metaslab_force_ganging;
 extern uint64_t metaslab_df_alloc_threshold;
 extern unsigned long zfs_deadman_synctime_ms;
 extern int metaslab_preload_limit;
 extern int zfs_compressed_arc_enabled;
 extern int zfs_abd_scatter_enabled;
 extern int dmu_object_alloc_chunk_shift;
 extern boolean_t zfs_force_some_double_word_sm_entries;
 extern unsigned long zio_decompress_fail_fraction;
 extern unsigned long zfs_reconstruct_indirect_damage_fraction;
 
 
 static ztest_shared_opts_t *ztest_shared_opts;
 static ztest_shared_opts_t ztest_opts;
 static const char *const ztest_wkeydata = "abcdefghijklmnopqrstuvwxyz012345";
 
 typedef struct ztest_shared_ds {
 	uint64_t	zd_seq;
 } ztest_shared_ds_t;
 
 static ztest_shared_ds_t *ztest_shared_ds;
 #define	ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d])
 
 #define	BT_MAGIC	0x123456789abcdefULL
 #define	MAXFAULTS(zs) \
 	(MAX((zs)->zs_mirrors, 1) * (ztest_opts.zo_raid_parity + 1) - 1)
 
 enum ztest_io_type {
 	ZTEST_IO_WRITE_TAG,
 	ZTEST_IO_WRITE_PATTERN,
 	ZTEST_IO_WRITE_ZEROES,
 	ZTEST_IO_TRUNCATE,
 	ZTEST_IO_SETATTR,
 	ZTEST_IO_REWRITE,
 	ZTEST_IO_TYPES
 };
 
 typedef struct ztest_block_tag {
 	uint64_t	bt_magic;
 	uint64_t	bt_objset;
 	uint64_t	bt_object;
 	uint64_t	bt_dnodesize;
 	uint64_t	bt_offset;
 	uint64_t	bt_gen;
 	uint64_t	bt_txg;
 	uint64_t	bt_crtxg;
 } ztest_block_tag_t;
 
 typedef struct bufwad {
 	uint64_t	bw_index;
 	uint64_t	bw_txg;
 	uint64_t	bw_data;
 } bufwad_t;
 
 /*
  * It would be better to use a rangelock_t per object.  Unfortunately
  * the rangelock_t is not a drop-in replacement for rl_t, because we
  * still need to map from object ID to rangelock_t.
  */
 typedef enum {
 	RL_READER,
 	RL_WRITER,
 	RL_APPEND
 } rl_type_t;
 
 typedef struct rll {
 	void		*rll_writer;
 	int		rll_readers;
 	kmutex_t	rll_lock;
 	kcondvar_t	rll_cv;
 } rll_t;
 
 typedef struct rl {
 	uint64_t	rl_object;
 	uint64_t	rl_offset;
 	uint64_t	rl_size;
 	rll_t		*rl_lock;
 } rl_t;
 
 #define	ZTEST_RANGE_LOCKS	64
 #define	ZTEST_OBJECT_LOCKS	64
 
 /*
  * Object descriptor.  Used as a template for object lookup/create/remove.
  */
 typedef struct ztest_od {
 	uint64_t	od_dir;
 	uint64_t	od_object;
 	dmu_object_type_t od_type;
 	dmu_object_type_t od_crtype;
 	uint64_t	od_blocksize;
 	uint64_t	od_crblocksize;
 	uint64_t	od_crdnodesize;
 	uint64_t	od_gen;
 	uint64_t	od_crgen;
 	char		od_name[ZFS_MAX_DATASET_NAME_LEN];
 } ztest_od_t;
 
 /*
  * Per-dataset state.
  */
 typedef struct ztest_ds {
 	ztest_shared_ds_t *zd_shared;
 	objset_t	*zd_os;
 	pthread_rwlock_t zd_zilog_lock;
 	zilog_t		*zd_zilog;
 	ztest_od_t	*zd_od;		/* debugging aid */
 	char		zd_name[ZFS_MAX_DATASET_NAME_LEN];
 	kmutex_t	zd_dirobj_lock;
 	rll_t		zd_object_lock[ZTEST_OBJECT_LOCKS];
 	rll_t		zd_range_lock[ZTEST_RANGE_LOCKS];
 } ztest_ds_t;
 
 /*
  * Per-iteration state.
  */
 typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
 
 typedef struct ztest_info {
 	ztest_func_t	*zi_func;	/* test function */
 	uint64_t	zi_iters;	/* iterations per execution */
 	uint64_t	*zi_interval;	/* execute every <interval> seconds */
 	const char	*zi_funcname;	/* name of test function */
 } ztest_info_t;
 
 typedef struct ztest_shared_callstate {
 	uint64_t	zc_count;	/* per-pass count */
 	uint64_t	zc_time;	/* per-pass time */
 	uint64_t	zc_next;	/* next time to call this function */
 } ztest_shared_callstate_t;
 
 static ztest_shared_callstate_t *ztest_shared_callstate;
 #define	ZTEST_GET_SHARED_CALLSTATE(c) (&ztest_shared_callstate[c])
 
 ztest_func_t ztest_dmu_read_write;
 ztest_func_t ztest_dmu_write_parallel;
 ztest_func_t ztest_dmu_object_alloc_free;
 ztest_func_t ztest_dmu_object_next_chunk;
 ztest_func_t ztest_dmu_commit_callbacks;
 ztest_func_t ztest_zap;
 ztest_func_t ztest_zap_parallel;
 ztest_func_t ztest_zil_commit;
 ztest_func_t ztest_zil_remount;
 ztest_func_t ztest_dmu_read_write_zcopy;
 ztest_func_t ztest_dmu_objset_create_destroy;
 ztest_func_t ztest_dmu_prealloc;
 ztest_func_t ztest_fzap;
 ztest_func_t ztest_dmu_snapshot_create_destroy;
 ztest_func_t ztest_dsl_prop_get_set;
 ztest_func_t ztest_spa_prop_get_set;
 ztest_func_t ztest_spa_create_destroy;
 ztest_func_t ztest_fault_inject;
 ztest_func_t ztest_dmu_snapshot_hold;
 ztest_func_t ztest_mmp_enable_disable;
 ztest_func_t ztest_scrub;
 ztest_func_t ztest_dsl_dataset_promote_busy;
 ztest_func_t ztest_vdev_attach_detach;
 ztest_func_t ztest_vdev_LUN_growth;
 ztest_func_t ztest_vdev_add_remove;
 ztest_func_t ztest_vdev_class_add;
 ztest_func_t ztest_vdev_aux_add_remove;
 ztest_func_t ztest_split_pool;
 ztest_func_t ztest_reguid;
 ztest_func_t ztest_spa_upgrade;
 ztest_func_t ztest_device_removal;
 ztest_func_t ztest_spa_checkpoint_create_discard;
 ztest_func_t ztest_initialize;
 ztest_func_t ztest_trim;
 ztest_func_t ztest_blake3;
 ztest_func_t ztest_fletcher;
 ztest_func_t ztest_fletcher_incr;
 ztest_func_t ztest_verify_dnode_bt;
 
 uint64_t zopt_always = 0ULL * NANOSEC;		/* all the time */
 uint64_t zopt_incessant = 1ULL * NANOSEC / 10;	/* every 1/10 second */
 uint64_t zopt_often = 1ULL * NANOSEC;		/* every second */
 uint64_t zopt_sometimes = 10ULL * NANOSEC;	/* every 10 seconds */
 uint64_t zopt_rarely = 60ULL * NANOSEC;		/* every 60 seconds */
 
 #define	ZTI_INIT(func, iters, interval) \
 	{   .zi_func = (func), \
 	    .zi_iters = (iters), \
 	    .zi_interval = (interval), \
 	    .zi_funcname = # func }
 
 ztest_info_t ztest_info[] = {
 	ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always),
 	ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always),
 	ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always),
 	ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always),
 	ZTI_INIT(ztest_zap, 30, &zopt_always),
 	ZTI_INIT(ztest_zap_parallel, 100, &zopt_always),
 	ZTI_INIT(ztest_split_pool, 1, &zopt_always),
 	ZTI_INIT(ztest_zil_commit, 1, &zopt_incessant),
 	ZTI_INIT(ztest_zil_remount, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_dmu_read_write_zcopy, 1, &zopt_often),
 	ZTI_INIT(ztest_dmu_objset_create_destroy, 1, &zopt_often),
 	ZTI_INIT(ztest_dsl_prop_get_set, 1, &zopt_often),
 	ZTI_INIT(ztest_spa_prop_get_set, 1, &zopt_sometimes),
 #if 0
 	ZTI_INIT(ztest_dmu_prealloc, 1, &zopt_sometimes),
 #endif
 	ZTI_INIT(ztest_fzap, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_spa_create_destroy, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
 	ZTI_INIT(ztest_scrub, 1, &zopt_rarely),
 	ZTI_INIT(ztest_spa_upgrade, 1, &zopt_rarely),
 	ZTI_INIT(ztest_dsl_dataset_promote_busy, 1, &zopt_rarely),
 	ZTI_INIT(ztest_vdev_attach_detach, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_vdev_LUN_growth, 1, &zopt_rarely),
 	ZTI_INIT(ztest_vdev_add_remove, 1, &ztest_opts.zo_vdevtime),
 	ZTI_INIT(ztest_vdev_class_add, 1, &ztest_opts.zo_vdevtime),
 	ZTI_INIT(ztest_vdev_aux_add_remove, 1, &ztest_opts.zo_vdevtime),
 	ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
 	ZTI_INIT(ztest_initialize, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_trim, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_blake3, 1, &zopt_rarely),
 	ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
 	ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
 	ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
 };
 
 #define	ZTEST_FUNCS	(sizeof (ztest_info) / sizeof (ztest_info_t))
 
 /*
  * The following struct is used to hold a list of uncalled commit callbacks.
  * The callbacks are ordered by txg number.
  */
 typedef struct ztest_cb_list {
 	kmutex_t	zcl_callbacks_lock;
 	list_t		zcl_callbacks;
 } ztest_cb_list_t;
 
 /*
  * Stuff we need to share writably between parent and child.
  */
 typedef struct ztest_shared {
 	boolean_t	zs_do_init;
 	hrtime_t	zs_proc_start;
 	hrtime_t	zs_proc_stop;
 	hrtime_t	zs_thread_start;
 	hrtime_t	zs_thread_stop;
 	hrtime_t	zs_thread_kill;
 	uint64_t	zs_enospc_count;
 	uint64_t	zs_vdev_next_leaf;
 	uint64_t	zs_vdev_aux;
 	uint64_t	zs_alloc;
 	uint64_t	zs_space;
 	uint64_t	zs_splits;
 	uint64_t	zs_mirrors;
 	uint64_t	zs_metaslab_sz;
 	uint64_t	zs_metaslab_df_alloc_threshold;
 	uint64_t	zs_guid;
 } ztest_shared_t;
 
 #define	ID_PARALLEL	-1ULL
 
 static char ztest_dev_template[] = "%s/%s.%llua";
 static char ztest_aux_template[] = "%s/%s.%s.%llu";
 ztest_shared_t *ztest_shared;
 
 static spa_t *ztest_spa = NULL;
 static ztest_ds_t *ztest_ds;
 
 static kmutex_t ztest_vdev_lock;
 static boolean_t ztest_device_removal_active = B_FALSE;
 static boolean_t ztest_pool_scrubbed = B_FALSE;
 static kmutex_t ztest_checkpoint_lock;
 
 /*
  * The ztest_name_lock protects the pool and dataset namespace used by
  * the individual tests. To modify the namespace, consumers must grab
  * this lock as writer. Grabbing the lock as reader will ensure that the
  * namespace does not change while the lock is held.
  */
 static pthread_rwlock_t ztest_name_lock;
 
 static boolean_t ztest_dump_core = B_TRUE;
 static boolean_t ztest_exiting;
 
 /* Global commit callback list */
 static ztest_cb_list_t zcl;
 /* Commit cb delay */
 static uint64_t zc_min_txg_delay = UINT64_MAX;
 static int zc_cb_counter = 0;
 
 /*
  * Minimum number of commit callbacks that need to be registered for us to check
  * whether the minimum txg delay is acceptable.
  */
 #define	ZTEST_COMMIT_CB_MIN_REG	100
 
 /*
  * If a number of txgs equal to this threshold have been created after a commit
  * callback has been registered but not called, then we assume there is an
  * implementation bug.
  */
 #define	ZTEST_COMMIT_CB_THRESH	(TXG_CONCURRENT_STATES + 1000)
 
 enum ztest_object {
 	ZTEST_META_DNODE = 0,
 	ZTEST_DIROBJ,
 	ZTEST_OBJECTS
 };
 
 static __attribute__((noreturn)) void usage(boolean_t requested);
 static int ztest_scrub_impl(spa_t *spa);
 
 /*
  * These libumem hooks provide a reasonable set of defaults for the allocator's
  * debugging facilities.
  */
 const char *
 _umem_debug_init(void)
 {
 	return ("default,verbose"); /* $UMEM_DEBUG setting */
 }
 
 const char *
 _umem_logging_init(void)
 {
 	return ("fail,contents"); /* $UMEM_LOGGING setting */
 }
 
 static void
 dump_debug_buffer(void)
 {
 	ssize_t ret __attribute__((unused));
 
 	if (!ztest_opts.zo_dump_dbgmsg)
 		return;
 
 	/*
 	 * We use write() instead of printf() so that this function
 	 * is safe to call from a signal handler.
 	 */
 	ret = write(STDOUT_FILENO, "\n", 1);
 	zfs_dbgmsg_print("ztest");
 }
 
 #define	BACKTRACE_SZ	100
 
 static void sig_handler(int signo)
 {
 	struct sigaction action;
 #if (__GLIBC__ && !__UCLIBC__) /* backtrace() is a GNU extension */
 	int nptrs;
 	void *buffer[BACKTRACE_SZ];
 
 	nptrs = backtrace(buffer, BACKTRACE_SZ);
 	backtrace_symbols_fd(buffer, nptrs, STDERR_FILENO);
 #endif
 	dump_debug_buffer();
 
 	/*
 	 * Restore default action and re-raise signal so SIGSEGV and
 	 * SIGABRT can trigger a core dump.
 	 */
 	action.sa_handler = SIG_DFL;
 	sigemptyset(&action.sa_mask);
 	action.sa_flags = 0;
 	(void) sigaction(signo, &action, NULL);
 	raise(signo);
 }
 
 #define	FATAL_MSG_SZ	1024
 
 static const char *fatal_msg;
 
 static __attribute__((format(printf, 2, 3))) __attribute__((noreturn)) void
 fatal(int do_perror, const char *message, ...)
 {
 	va_list args;
 	int save_errno = errno;
 	char *buf;
 
 	(void) fflush(stdout);
 	buf = umem_alloc(FATAL_MSG_SZ, UMEM_NOFAIL);
 	if (buf == NULL)
 		goto out;
 
 	va_start(args, message);
 	(void) sprintf(buf, "ztest: ");
 	/* LINTED */
 	(void) vsprintf(buf + strlen(buf), message, args);
 	va_end(args);
 	if (do_perror) {
 		(void) snprintf(buf + strlen(buf), FATAL_MSG_SZ - strlen(buf),
 		    ": %s", strerror(save_errno));
 	}
 	(void) fprintf(stderr, "%s\n", buf);
 	fatal_msg = buf;			/* to ease debugging */
 
 out:
 	if (ztest_dump_core)
 		abort();
 	else
 		dump_debug_buffer();
 
 	exit(3);
 }
 
 static int
 str2shift(const char *buf)
 {
 	const char *ends = "BKMGTPEZ";
 	int i;
 
 	if (buf[0] == '\0')
 		return (0);
 	for (i = 0; i < strlen(ends); i++) {
 		if (toupper(buf[0]) == ends[i])
 			break;
 	}
 	if (i == strlen(ends)) {
 		(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n",
 		    buf);
 		usage(B_FALSE);
 	}
 	if (buf[1] == '\0' || (toupper(buf[1]) == 'B' && buf[2] == '\0')) {
 		return (10*i);
 	}
 	(void) fprintf(stderr, "ztest: invalid bytes suffix: %s\n", buf);
 	usage(B_FALSE);
 }
 
 static uint64_t
 nicenumtoull(const char *buf)
 {
 	char *end;
 	uint64_t val;
 
 	val = strtoull(buf, &end, 0);
 	if (end == buf) {
 		(void) fprintf(stderr, "ztest: bad numeric value: %s\n", buf);
 		usage(B_FALSE);
 	} else if (end[0] == '.') {
 		double fval = strtod(buf, &end);
 		fval *= pow(2, str2shift(end));
 		/*
 		 * UINT64_MAX is not exactly representable as a double.
 		 * The closest representation is UINT64_MAX + 1, so we
 		 * use a >= comparison instead of > for the bounds check.
 		 */
 		if (fval >= (double)UINT64_MAX) {
 			(void) fprintf(stderr, "ztest: value too large: %s\n",
 			    buf);
 			usage(B_FALSE);
 		}
 		val = (uint64_t)fval;
 	} else {
 		int shift = str2shift(end);
 		if (shift >= 64 || (val << shift) >> shift != val) {
 			(void) fprintf(stderr, "ztest: value too large: %s\n",
 			    buf);
 			usage(B_FALSE);
 		}
 		val <<= shift;
 	}
 	return (val);
 }
 
 typedef struct ztest_option {
 	const char	short_opt;
 	const char	*long_opt;
 	const char	*long_opt_param;
 	const char	*comment;
 	unsigned int	default_int;
 	const char	*default_str;
 } ztest_option_t;
 
 /*
  * The following option_table is used for generating the usage info as well as
  * the long and short option information for calling getopt_long().
  */
 static ztest_option_t option_table[] = {
 	{ 'v',	"vdevs", "INTEGER", "Number of vdevs", DEFAULT_VDEV_COUNT,
 	    NULL},
 	{ 's',	"vdev-size", "INTEGER", "Size of each vdev",
 	    NO_DEFAULT, DEFAULT_VDEV_SIZE_STR},
 	{ 'a',	"alignment-shift", "INTEGER",
 	    "Alignment shift; use 0 for random", DEFAULT_ASHIFT, NULL},
 	{ 'm',	"mirror-copies", "INTEGER", "Number of mirror copies",
 	    DEFAULT_MIRRORS, NULL},
 	{ 'r',	"raid-disks", "INTEGER", "Number of raidz/draid disks",
 	    DEFAULT_RAID_CHILDREN, NULL},
 	{ 'R',	"raid-parity", "INTEGER", "Raid parity",
 	    DEFAULT_RAID_PARITY, NULL},
 	{ 'K',	"raid-kind", "raidz|draid|random", "Raid kind",
 	    NO_DEFAULT, "random"},
 	{ 'D',	"draid-data", "INTEGER", "Number of draid data drives",
 	    DEFAULT_DRAID_DATA, NULL},
 	{ 'S',	"draid-spares", "INTEGER", "Number of draid spares",
 	    DEFAULT_DRAID_SPARES, NULL},
 	{ 'd',	"datasets", "INTEGER", "Number of datasets",
 	    DEFAULT_DATASETS_COUNT, NULL},
 	{ 't',	"threads", "INTEGER", "Number of ztest threads",
 	    DEFAULT_THREADS, NULL},
 	{ 'g',	"gang-block-threshold", "INTEGER",
 	    "Metaslab gang block threshold",
 	    NO_DEFAULT, DEFAULT_FORCE_GANGING_STR},
 	{ 'i',	"init-count", "INTEGER", "Number of times to initialize pool",
 	    DEFAULT_INITS, NULL},
 	{ 'k',	"kill-percentage", "INTEGER", "Kill percentage",
 	    NO_DEFAULT, DEFAULT_KILLRATE_STR},
 	{ 'p',	"pool-name", "STRING", "Pool name",
 	    NO_DEFAULT, DEFAULT_POOL},
 	{ 'f',	"vdev-file-directory", "PATH", "File directory for vdev files",
 	    NO_DEFAULT, DEFAULT_VDEV_DIR},
 	{ 'M',	"multi-host", NULL,
 	    "Multi-host; simulate pool imported on remote host",
 	    NO_DEFAULT, NULL},
 	{ 'E',	"use-existing-pool", NULL,
 	    "Use existing pool instead of creating new one", NO_DEFAULT, NULL},
 	{ 'T',	"run-time", "INTEGER", "Total run time",
 	    NO_DEFAULT, DEFAULT_RUN_TIME_STR},
 	{ 'P',	"pass-time", "INTEGER", "Time per pass",
 	    NO_DEFAULT, DEFAULT_PASS_TIME_STR},
 	{ 'F',	"freeze-loops", "INTEGER", "Max loops in spa_freeze()",
 	    DEFAULT_MAX_LOOPS, NULL},
 	{ 'B',	"alt-ztest", "PATH", "Alternate ztest path",
 	    NO_DEFAULT, NULL},
 	{ 'C',	"vdev-class-state", "on|off|random", "vdev class state",
 	    NO_DEFAULT, "random"},
 	{ 'o',	"option", "\"OPTION=INTEGER\"",
 	    "Set global variable to an unsigned 32-bit integer value",
 	    NO_DEFAULT, NULL},
 	{ 'G',	"dump-debug-msg", NULL,
 	    "Dump zfs_dbgmsg buffer before exiting due to an error",
 	    NO_DEFAULT, NULL},
 	{ 'V',	"verbose", NULL,
 	    "Verbose (use multiple times for ever more verbosity)",
 	    NO_DEFAULT, NULL},
 	{ 'h',	"help",	NULL, "Show this help",
 	    NO_DEFAULT, NULL},
 	{0, 0, 0, 0, 0, 0}
 };
 
 static struct option *long_opts = NULL;
 static char *short_opts = NULL;
 
 static void
 init_options(void)
 {
 	ASSERT3P(long_opts, ==, NULL);
 	ASSERT3P(short_opts, ==, NULL);
 
 	int count = sizeof (option_table) / sizeof (option_table[0]);
 	long_opts = umem_alloc(sizeof (struct option) * count, UMEM_NOFAIL);
 
 	short_opts = umem_alloc(sizeof (char) * 2 * count, UMEM_NOFAIL);
 	int short_opt_index = 0;
 
 	for (int i = 0; i < count; i++) {
 		long_opts[i].val = option_table[i].short_opt;
 		long_opts[i].name = option_table[i].long_opt;
 		long_opts[i].has_arg = option_table[i].long_opt_param != NULL
 		    ? required_argument : no_argument;
 		long_opts[i].flag = NULL;
 		short_opts[short_opt_index++] = option_table[i].short_opt;
 		if (option_table[i].long_opt_param != NULL) {
 			short_opts[short_opt_index++] = ':';
 		}
 	}
 }
 
 static void
 fini_options(void)
 {
 	int count = sizeof (option_table) / sizeof (option_table[0]);
 
 	umem_free(long_opts, sizeof (struct option) * count);
 	umem_free(short_opts, sizeof (char) * 2 * count);
 
 	long_opts = NULL;
 	short_opts = NULL;
 }
 
 static __attribute__((noreturn)) void
 usage(boolean_t requested)
 {
 	char option[80];
 	FILE *fp = requested ? stdout : stderr;
 
 	(void) fprintf(fp, "Usage: %s [OPTIONS...]\n", DEFAULT_POOL);
 	for (int i = 0; option_table[i].short_opt != 0; i++) {
 		if (option_table[i].long_opt_param != NULL) {
 			(void) sprintf(option, "  -%c --%s=%s",
 			    option_table[i].short_opt,
 			    option_table[i].long_opt,
 			    option_table[i].long_opt_param);
 		} else {
 			(void) sprintf(option, "  -%c --%s",
 			    option_table[i].short_opt,
 			    option_table[i].long_opt);
 		}
 		(void) fprintf(fp, "  %-40s%s", option,
 		    option_table[i].comment);
 
 		if (option_table[i].long_opt_param != NULL) {
 			if (option_table[i].default_str != NULL) {
 				(void) fprintf(fp, " (default: %s)",
 				    option_table[i].default_str);
 			} else if (option_table[i].default_int != NO_DEFAULT) {
 				(void) fprintf(fp, " (default: %u)",
 				    option_table[i].default_int);
 			}
 		}
 		(void) fprintf(fp, "\n");
 	}
 	exit(requested ? 0 : 1);
 }
 
 static uint64_t
 ztest_random(uint64_t range)
 {
 	uint64_t r;
 
 	ASSERT3S(ztest_fd_rand, >=, 0);
 
 	if (range == 0)
 		return (0);
 
 	if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
 		fatal(B_TRUE, "short read from /dev/urandom");
 
 	return (r % range);
 }
 
 static void
 ztest_parse_name_value(const char *input, ztest_shared_opts_t *zo)
 {
 	char name[32];
 	char *value;
 	int state = ZTEST_VDEV_CLASS_RND;
 
 	(void) strlcpy(name, input, sizeof (name));
 
 	value = strchr(name, '=');
 	if (value == NULL) {
 		(void) fprintf(stderr, "missing value in property=value "
 		    "'-C' argument (%s)\n", input);
 		usage(B_FALSE);
 	}
 	*(value) = '\0';
 	value++;
 
 	if (strcmp(value, "on") == 0) {
 		state = ZTEST_VDEV_CLASS_ON;
 	} else if (strcmp(value, "off") == 0) {
 		state = ZTEST_VDEV_CLASS_OFF;
 	} else if (strcmp(value, "random") == 0) {
 		state = ZTEST_VDEV_CLASS_RND;
 	} else {
 		(void) fprintf(stderr, "invalid property value '%s'\n", value);
 		usage(B_FALSE);
 	}
 
 	if (strcmp(name, "special") == 0) {
 		zo->zo_special_vdevs = state;
 	} else {
 		(void) fprintf(stderr, "invalid property name '%s'\n", name);
 		usage(B_FALSE);
 	}
 	if (zo->zo_verbose >= 3)
 		(void) printf("%s vdev state is '%s'\n", name, value);
 }
 
 static void
 process_options(int argc, char **argv)
 {
 	char *path;
 	ztest_shared_opts_t *zo = &ztest_opts;
 
 	int opt;
 	uint64_t value;
 	const char *raid_kind = "random";
 
 	memcpy(zo, &ztest_opts_defaults, sizeof (*zo));
 
 	init_options();
 
 	while ((opt = getopt_long(argc, argv, short_opts, long_opts,
 	    NULL)) != EOF) {
 		value = 0;
 		switch (opt) {
 		case 'v':
 		case 's':
 		case 'a':
 		case 'm':
 		case 'r':
 		case 'R':
 		case 'D':
 		case 'S':
 		case 'd':
 		case 't':
 		case 'g':
 		case 'i':
 		case 'k':
 		case 'T':
 		case 'P':
 		case 'F':
 			value = nicenumtoull(optarg);
 		}
 		switch (opt) {
 		case 'v':
 			zo->zo_vdevs = value;
 			break;
 		case 's':
 			zo->zo_vdev_size = MAX(SPA_MINDEVSIZE, value);
 			break;
 		case 'a':
 			zo->zo_ashift = value;
 			break;
 		case 'm':
 			zo->zo_mirrors = value;
 			break;
 		case 'r':
 			zo->zo_raid_children = MAX(1, value);
 			break;
 		case 'R':
 			zo->zo_raid_parity = MIN(MAX(value, 1), 3);
 			break;
 		case 'K':
 			raid_kind = optarg;
 			break;
 		case 'D':
 			zo->zo_draid_data = MAX(1, value);
 			break;
 		case 'S':
 			zo->zo_draid_spares = MAX(1, value);
 			break;
 		case 'd':
 			zo->zo_datasets = MAX(1, value);
 			break;
 		case 't':
 			zo->zo_threads = MAX(1, value);
 			break;
 		case 'g':
 			zo->zo_metaslab_force_ganging =
 			    MAX(SPA_MINBLOCKSIZE << 1, value);
 			break;
 		case 'i':
 			zo->zo_init = value;
 			break;
 		case 'k':
 			zo->zo_killrate = value;
 			break;
 		case 'p':
 			(void) strlcpy(zo->zo_pool, optarg,
 			    sizeof (zo->zo_pool));
 			break;
 		case 'f':
 			path = realpath(optarg, NULL);
 			if (path == NULL) {
 				(void) fprintf(stderr, "error: %s: %s\n",
 				    optarg, strerror(errno));
 				usage(B_FALSE);
 			} else {
 				(void) strlcpy(zo->zo_dir, path,
 				    sizeof (zo->zo_dir));
 				free(path);
 			}
 			break;
 		case 'M':
 			zo->zo_mmp_test = 1;
 			break;
 		case 'V':
 			zo->zo_verbose++;
 			break;
 		case 'E':
 			zo->zo_init = 0;
 			break;
 		case 'T':
 			zo->zo_time = value;
 			break;
 		case 'P':
 			zo->zo_passtime = MAX(1, value);
 			break;
 		case 'F':
 			zo->zo_maxloops = MAX(1, value);
 			break;
 		case 'B':
 			(void) strlcpy(zo->zo_alt_ztest, optarg,
 			    sizeof (zo->zo_alt_ztest));
 			break;
 		case 'C':
 			ztest_parse_name_value(optarg, zo);
 			break;
 		case 'o':
 			if (zo->zo_gvars_count >= ZO_GVARS_MAX_COUNT) {
 				(void) fprintf(stderr,
 				    "max global var count (%zu) exceeded\n",
 				    ZO_GVARS_MAX_COUNT);
 				usage(B_FALSE);
 			}
 			char *v = zo->zo_gvars[zo->zo_gvars_count];
 			if (strlcpy(v, optarg, ZO_GVARS_MAX_ARGLEN) >=
 			    ZO_GVARS_MAX_ARGLEN) {
 				(void) fprintf(stderr,
 				    "global var option '%s' is too long\n",
 				    optarg);
 				usage(B_FALSE);
 			}
 			zo->zo_gvars_count++;
 			break;
 		case 'G':
 			zo->zo_dump_dbgmsg = 1;
 			break;
 		case 'h':
 			usage(B_TRUE);
 			break;
 		case '?':
 		default:
 			usage(B_FALSE);
 			break;
 		}
 	}
 
 	fini_options();
 
 	/* When raid choice is 'random' add a draid pool 50% of the time */
 	if (strcmp(raid_kind, "random") == 0) {
 		raid_kind = (ztest_random(2) == 0) ? "draid" : "raidz";
 
 		if (ztest_opts.zo_verbose >= 3)
 			(void) printf("choosing RAID type '%s'\n", raid_kind);
 	}
 
 	if (strcmp(raid_kind, "draid") == 0) {
 		uint64_t min_devsize;
 
 		/* With fewer disk use 256M, otherwise 128M is OK */
 		min_devsize = (ztest_opts.zo_raid_children < 16) ?
 		    (256ULL << 20) : (128ULL << 20);
 
 		/* No top-level mirrors with dRAID for now */
 		zo->zo_mirrors = 0;
 
 		/* Use more appropriate defaults for dRAID */
 		if (zo->zo_vdevs == ztest_opts_defaults.zo_vdevs)
 			zo->zo_vdevs = 1;
 		if (zo->zo_raid_children ==
 		    ztest_opts_defaults.zo_raid_children)
 			zo->zo_raid_children = 16;
 		if (zo->zo_ashift < 12)
 			zo->zo_ashift = 12;
 		if (zo->zo_vdev_size < min_devsize)
 			zo->zo_vdev_size = min_devsize;
 
 		if (zo->zo_draid_data + zo->zo_raid_parity >
 		    zo->zo_raid_children - zo->zo_draid_spares) {
 			(void) fprintf(stderr, "error: too few draid "
 			    "children (%d) for stripe width (%d)\n",
 			    zo->zo_raid_children,
 			    zo->zo_draid_data + zo->zo_raid_parity);
 			usage(B_FALSE);
 		}
 
 		(void) strlcpy(zo->zo_raid_type, VDEV_TYPE_DRAID,
 		    sizeof (zo->zo_raid_type));
 
 	} else /* using raidz */ {
 		ASSERT0(strcmp(raid_kind, "raidz"));
 
 		zo->zo_raid_parity = MIN(zo->zo_raid_parity,
 		    zo->zo_raid_children - 1);
 	}
 
 	zo->zo_vdevtime =
 	    (zo->zo_vdevs > 0 ? zo->zo_time * NANOSEC / zo->zo_vdevs :
 	    UINT64_MAX >> 2);
 
 	if (*zo->zo_alt_ztest) {
 		const char *invalid_what = "ztest";
 		char *val = zo->zo_alt_ztest;
 		if (0 != access(val, X_OK) ||
 		    (strrchr(val, '/') == NULL && (errno = EINVAL)))
 			goto invalid;
 
 		int dirlen = strrchr(val, '/') - val;
-		strncpy(zo->zo_alt_libpath, val, dirlen);
+		strlcpy(zo->zo_alt_libpath, val,
+		    MIN(sizeof (zo->zo_alt_libpath), dirlen + 1));
 		invalid_what = "library path", val = zo->zo_alt_libpath;
 		if (strrchr(val, '/') == NULL && (errno = EINVAL))
 			goto invalid;
 		*strrchr(val, '/') = '\0';
 		strlcat(val, "/lib", sizeof (zo->zo_alt_libpath));
 
 		if (0 != access(zo->zo_alt_libpath, X_OK))
 			goto invalid;
 		return;
 
 invalid:
 		ztest_dump_core = B_FALSE;
 		fatal(B_TRUE, "invalid alternate %s %s", invalid_what, val);
 	}
 }
 
 static void
 ztest_kill(ztest_shared_t *zs)
 {
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
 
 	/*
 	 * Before we kill ourselves, make sure that the config is updated.
 	 * See comment above spa_write_cachefile().
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa_write_cachefile(ztest_spa, B_FALSE, B_FALSE);
 	mutex_exit(&spa_namespace_lock);
 
 	(void) raise(SIGKILL);
 }
 
 static void
 ztest_record_enospc(const char *s)
 {
 	(void) s;
 	ztest_shared->zs_enospc_count++;
 }
 
 static uint64_t
 ztest_get_ashift(void)
 {
 	if (ztest_opts.zo_ashift == 0)
 		return (SPA_MINBLOCKSHIFT + ztest_random(5));
 	return (ztest_opts.zo_ashift);
 }
 
 static boolean_t
 ztest_is_draid_spare(const char *name)
 {
 	uint64_t spare_id = 0, parity = 0, vdev_id = 0;
 
 	if (sscanf(name, VDEV_TYPE_DRAID "%"PRIu64"-%"PRIu64"-%"PRIu64"",
 	    &parity, &vdev_id, &spare_id) == 3) {
 		return (B_TRUE);
 	}
 
 	return (B_FALSE);
 }
 
 static nvlist_t *
 make_vdev_file(const char *path, const char *aux, const char *pool,
     size_t size, uint64_t ashift)
 {
 	char *pathbuf = NULL;
 	uint64_t vdev;
 	nvlist_t *file;
 	boolean_t draid_spare = B_FALSE;
 
 
 	if (ashift == 0)
 		ashift = ztest_get_ashift();
 
 	if (path == NULL) {
 		pathbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 		path = pathbuf;
 
 		if (aux != NULL) {
 			vdev = ztest_shared->zs_vdev_aux;
 			(void) snprintf(pathbuf, MAXPATHLEN,
 			    ztest_aux_template, ztest_opts.zo_dir,
 			    pool == NULL ? ztest_opts.zo_pool : pool,
 			    aux, vdev);
 		} else {
 			vdev = ztest_shared->zs_vdev_next_leaf++;
 			(void) snprintf(pathbuf, MAXPATHLEN,
 			    ztest_dev_template, ztest_opts.zo_dir,
 			    pool == NULL ? ztest_opts.zo_pool : pool, vdev);
 		}
 	} else {
 		draid_spare = ztest_is_draid_spare(path);
 	}
 
 	if (size != 0 && !draid_spare) {
 		int fd = open(path, O_RDWR | O_CREAT | O_TRUNC, 0666);
 		if (fd == -1)
 			fatal(B_TRUE, "can't open %s", path);
 		if (ftruncate(fd, size) != 0)
 			fatal(B_TRUE, "can't ftruncate %s", path);
 		(void) close(fd);
 	}
 
 	file = fnvlist_alloc();
 	fnvlist_add_string(file, ZPOOL_CONFIG_TYPE,
 	    draid_spare ? VDEV_TYPE_DRAID_SPARE : VDEV_TYPE_FILE);
 	fnvlist_add_string(file, ZPOOL_CONFIG_PATH, path);
 	fnvlist_add_uint64(file, ZPOOL_CONFIG_ASHIFT, ashift);
 	umem_free(pathbuf, MAXPATHLEN);
 
 	return (file);
 }
 
 static nvlist_t *
 make_vdev_raid(const char *path, const char *aux, const char *pool, size_t size,
     uint64_t ashift, int r)
 {
 	nvlist_t *raid, **child;
 	int c;
 
 	if (r < 2)
 		return (make_vdev_file(path, aux, pool, size, ashift));
 	child = umem_alloc(r * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < r; c++)
 		child[c] = make_vdev_file(path, aux, pool, size, ashift);
 
 	raid = fnvlist_alloc();
 	fnvlist_add_string(raid, ZPOOL_CONFIG_TYPE,
 	    ztest_opts.zo_raid_type);
 	fnvlist_add_uint64(raid, ZPOOL_CONFIG_NPARITY,
 	    ztest_opts.zo_raid_parity);
 	fnvlist_add_nvlist_array(raid, ZPOOL_CONFIG_CHILDREN,
 	    (const nvlist_t **)child, r);
 
 	if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0) {
 		uint64_t ndata = ztest_opts.zo_draid_data;
 		uint64_t nparity = ztest_opts.zo_raid_parity;
 		uint64_t nspares = ztest_opts.zo_draid_spares;
 		uint64_t children = ztest_opts.zo_raid_children;
 		uint64_t ngroups = 1;
 
 		/*
 		 * Calculate the minimum number of groups required to fill a
 		 * slice. This is the LCM of the stripe width (data + parity)
 		 * and the number of data drives (children - spares).
 		 */
 		while (ngroups * (ndata + nparity) % (children - nspares) != 0)
 			ngroups++;
 
 		/* Store the basic dRAID configuration. */
 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NDATA, ndata);
 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
 		fnvlist_add_uint64(raid, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
 	}
 
 	for (c = 0; c < r; c++)
 		fnvlist_free(child[c]);
 
 	umem_free(child, r * sizeof (nvlist_t *));
 
 	return (raid);
 }
 
 static nvlist_t *
 make_vdev_mirror(const char *path, const char *aux, const char *pool,
     size_t size, uint64_t ashift, int r, int m)
 {
 	nvlist_t *mirror, **child;
 	int c;
 
 	if (m < 1)
 		return (make_vdev_raid(path, aux, pool, size, ashift, r));
 
 	child = umem_alloc(m * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < m; c++)
 		child[c] = make_vdev_raid(path, aux, pool, size, ashift, r);
 
 	mirror = fnvlist_alloc();
 	fnvlist_add_string(mirror, ZPOOL_CONFIG_TYPE, VDEV_TYPE_MIRROR);
 	fnvlist_add_nvlist_array(mirror, ZPOOL_CONFIG_CHILDREN,
 	    (const nvlist_t **)child, m);
 
 	for (c = 0; c < m; c++)
 		fnvlist_free(child[c]);
 
 	umem_free(child, m * sizeof (nvlist_t *));
 
 	return (mirror);
 }
 
 static nvlist_t *
 make_vdev_root(const char *path, const char *aux, const char *pool, size_t size,
     uint64_t ashift, const char *class, int r, int m, int t)
 {
 	nvlist_t *root, **child;
 	int c;
 	boolean_t log;
 
 	ASSERT3S(t, >, 0);
 
 	log = (class != NULL && strcmp(class, "log") == 0);
 
 	child = umem_alloc(t * sizeof (nvlist_t *), UMEM_NOFAIL);
 
 	for (c = 0; c < t; c++) {
 		child[c] = make_vdev_mirror(path, aux, pool, size, ashift,
 		    r, m);
 		fnvlist_add_uint64(child[c], ZPOOL_CONFIG_IS_LOG, log);
 
 		if (class != NULL && class[0] != '\0') {
 			ASSERT(m > 1 || log);   /* expecting a mirror */
 			fnvlist_add_string(child[c],
 			    ZPOOL_CONFIG_ALLOCATION_BIAS, class);
 		}
 	}
 
 	root = fnvlist_alloc();
 	fnvlist_add_string(root, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
 	fnvlist_add_nvlist_array(root, aux ? aux : ZPOOL_CONFIG_CHILDREN,
 	    (const nvlist_t **)child, t);
 
 	for (c = 0; c < t; c++)
 		fnvlist_free(child[c]);
 
 	umem_free(child, t * sizeof (nvlist_t *));
 
 	return (root);
 }
 
 /*
  * Find a random spa version. Returns back a random spa version in the
  * range [initial_version, SPA_VERSION_FEATURES].
  */
 static uint64_t
 ztest_random_spa_version(uint64_t initial_version)
 {
 	uint64_t version = initial_version;
 
 	if (version <= SPA_VERSION_BEFORE_FEATURES) {
 		version = version +
 		    ztest_random(SPA_VERSION_BEFORE_FEATURES - version + 1);
 	}
 
 	if (version > SPA_VERSION_BEFORE_FEATURES)
 		version = SPA_VERSION_FEATURES;
 
 	ASSERT(SPA_VERSION_IS_SUPPORTED(version));
 	return (version);
 }
 
 static int
 ztest_random_blocksize(void)
 {
 	ASSERT3U(ztest_spa->spa_max_ashift, !=, 0);
 
 	/*
 	 * Choose a block size >= the ashift.
 	 * If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks.
 	 */
 	int maxbs = SPA_OLD_MAXBLOCKSHIFT;
 	if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE)
 		maxbs = 20;
 	uint64_t block_shift =
 	    ztest_random(maxbs - ztest_spa->spa_max_ashift + 1);
 	return (1 << (SPA_MINBLOCKSHIFT + block_shift));
 }
 
 static int
 ztest_random_dnodesize(void)
 {
 	int slots;
 	int max_slots = spa_maxdnodesize(ztest_spa) >> DNODE_SHIFT;
 
 	if (max_slots == DNODE_MIN_SLOTS)
 		return (DNODE_MIN_SIZE);
 
 	/*
 	 * Weight the random distribution more heavily toward smaller
 	 * dnode sizes since that is more likely to reflect real-world
 	 * usage.
 	 */
 	ASSERT3U(max_slots, >, 4);
 	switch (ztest_random(10)) {
 	case 0:
 		slots = 5 + ztest_random(max_slots - 4);
 		break;
 	case 1 ... 4:
 		slots = 2 + ztest_random(3);
 		break;
 	default:
 		slots = 1;
 		break;
 	}
 
 	return (slots << DNODE_SHIFT);
 }
 
 static int
 ztest_random_ibshift(void)
 {
 	return (DN_MIN_INDBLKSHIFT +
 	    ztest_random(DN_MAX_INDBLKSHIFT - DN_MIN_INDBLKSHIFT + 1));
 }
 
 static uint64_t
 ztest_random_vdev_top(spa_t *spa, boolean_t log_ok)
 {
 	uint64_t top;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *tvd;
 
 	ASSERT3U(spa_config_held(spa, SCL_ALL, RW_READER), !=, 0);
 
 	do {
 		top = ztest_random(rvd->vdev_children);
 		tvd = rvd->vdev_child[top];
 	} while (!vdev_is_concrete(tvd) || (tvd->vdev_islog && !log_ok) ||
 	    tvd->vdev_mg == NULL || tvd->vdev_mg->mg_class == NULL);
 
 	return (top);
 }
 
 static uint64_t
 ztest_random_dsl_prop(zfs_prop_t prop)
 {
 	uint64_t value;
 
 	do {
 		value = zfs_prop_random_value(prop, ztest_random(-1ULL));
 	} while (prop == ZFS_PROP_CHECKSUM && value == ZIO_CHECKSUM_OFF);
 
 	return (value);
 }
 
 static int
 ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
     boolean_t inherit)
 {
 	const char *propname = zfs_prop_to_name(prop);
 	const char *valname;
 	char *setpoint;
 	uint64_t curval;
 	int error;
 
 	error = dsl_prop_set_int(osname, propname,
 	    (inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT0(error);
 
 	setpoint = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 	VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint));
 
 	if (ztest_opts.zo_verbose >= 6) {
 		int err;
 
 		err = zfs_prop_index_to_string(prop, curval, &valname);
 		if (err)
 			(void) printf("%s %s = %llu at '%s'\n", osname,
 			    propname, (unsigned long long)curval, setpoint);
 		else
 			(void) printf("%s %s = %s at '%s'\n",
 			    osname, propname, valname, setpoint);
 	}
 	umem_free(setpoint, MAXPATHLEN);
 
 	return (error);
 }
 
 static int
 ztest_spa_prop_set_uint64(zpool_prop_t prop, uint64_t value)
 {
 	spa_t *spa = ztest_spa;
 	nvlist_t *props = NULL;
 	int error;
 
 	props = fnvlist_alloc();
 	fnvlist_add_uint64(props, zpool_prop_to_name(prop), value);
 
 	error = spa_prop_set(spa, props);
 
 	fnvlist_free(props);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT0(error);
 
 	return (error);
 }
 
 static int
 ztest_dmu_objset_own(const char *name, dmu_objset_type_t type,
     boolean_t readonly, boolean_t decrypt, const void *tag, objset_t **osp)
 {
 	int err;
 	char *cp = NULL;
 	char ddname[ZFS_MAX_DATASET_NAME_LEN];
 
 	strcpy(ddname, name);
 	cp = strchr(ddname, '@');
 	if (cp != NULL)
 		*cp = '\0';
 
 	err = dmu_objset_own(name, type, readonly, decrypt, tag, osp);
 	while (decrypt && err == EACCES) {
 		dsl_crypto_params_t *dcp;
 		nvlist_t *crypto_args = fnvlist_alloc();
 
 		fnvlist_add_uint8_array(crypto_args, "wkeydata",
 		    (uint8_t *)ztest_wkeydata, WRAPPING_KEY_LEN);
 		VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
 		    crypto_args, &dcp));
 		err = spa_keystore_load_wkey(ddname, dcp, B_FALSE);
 		/*
 		 * Note: if there was an error loading, the wkey was not
 		 * consumed, and needs to be freed.
 		 */
 		dsl_crypto_params_free(dcp, (err != 0));
 		fnvlist_free(crypto_args);
 
 		if (err == EINVAL) {
 			/*
 			 * We couldn't load a key for this dataset so try
 			 * the parent. This loop will eventually hit the
 			 * encryption root since ztest only makes clones
 			 * as children of their origin datasets.
 			 */
 			cp = strrchr(ddname, '/');
 			if (cp == NULL)
 				return (err);
 
 			*cp = '\0';
 			err = EACCES;
 			continue;
 		} else if (err != 0) {
 			break;
 		}
 
 		err = dmu_objset_own(name, type, readonly, decrypt, tag, osp);
 		break;
 	}
 
 	return (err);
 }
 
 static void
 ztest_rll_init(rll_t *rll)
 {
 	rll->rll_writer = NULL;
 	rll->rll_readers = 0;
 	mutex_init(&rll->rll_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&rll->rll_cv, NULL, CV_DEFAULT, NULL);
 }
 
 static void
 ztest_rll_destroy(rll_t *rll)
 {
 	ASSERT3P(rll->rll_writer, ==, NULL);
 	ASSERT0(rll->rll_readers);
 	mutex_destroy(&rll->rll_lock);
 	cv_destroy(&rll->rll_cv);
 }
 
 static void
 ztest_rll_lock(rll_t *rll, rl_type_t type)
 {
 	mutex_enter(&rll->rll_lock);
 
 	if (type == RL_READER) {
 		while (rll->rll_writer != NULL)
 			(void) cv_wait(&rll->rll_cv, &rll->rll_lock);
 		rll->rll_readers++;
 	} else {
 		while (rll->rll_writer != NULL || rll->rll_readers)
 			(void) cv_wait(&rll->rll_cv, &rll->rll_lock);
 		rll->rll_writer = curthread;
 	}
 
 	mutex_exit(&rll->rll_lock);
 }
 
 static void
 ztest_rll_unlock(rll_t *rll)
 {
 	mutex_enter(&rll->rll_lock);
 
 	if (rll->rll_writer) {
 		ASSERT0(rll->rll_readers);
 		rll->rll_writer = NULL;
 	} else {
 		ASSERT3S(rll->rll_readers, >, 0);
 		ASSERT3P(rll->rll_writer, ==, NULL);
 		rll->rll_readers--;
 	}
 
 	if (rll->rll_writer == NULL && rll->rll_readers == 0)
 		cv_broadcast(&rll->rll_cv);
 
 	mutex_exit(&rll->rll_lock);
 }
 
 static void
 ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
 {
 	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
 
 	ztest_rll_lock(rll, type);
 }
 
 static void
 ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
 {
 	rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
 
 	ztest_rll_unlock(rll);
 }
 
 static rl_t *
 ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,
     uint64_t size, rl_type_t type)
 {
 	uint64_t hash = object ^ (offset % (ZTEST_RANGE_LOCKS + 1));
 	rll_t *rll = &zd->zd_range_lock[hash & (ZTEST_RANGE_LOCKS - 1)];
 	rl_t *rl;
 
 	rl = umem_alloc(sizeof (*rl), UMEM_NOFAIL);
 	rl->rl_object = object;
 	rl->rl_offset = offset;
 	rl->rl_size = size;
 	rl->rl_lock = rll;
 
 	ztest_rll_lock(rll, type);
 
 	return (rl);
 }
 
 static void
 ztest_range_unlock(rl_t *rl)
 {
 	rll_t *rll = rl->rl_lock;
 
 	ztest_rll_unlock(rll);
 
 	umem_free(rl, sizeof (*rl));
 }
 
 static void
 ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
 {
 	zd->zd_os = os;
 	zd->zd_zilog = dmu_objset_zil(os);
 	zd->zd_shared = szd;
 	dmu_objset_name(os, zd->zd_name);
 	int l;
 
 	if (zd->zd_shared != NULL)
 		zd->zd_shared->zd_seq = 0;
 
 	VERIFY0(pthread_rwlock_init(&zd->zd_zilog_lock, NULL));
 	mutex_init(&zd->zd_dirobj_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
 		ztest_rll_init(&zd->zd_object_lock[l]);
 
 	for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
 		ztest_rll_init(&zd->zd_range_lock[l]);
 }
 
 static void
 ztest_zd_fini(ztest_ds_t *zd)
 {
 	int l;
 
 	mutex_destroy(&zd->zd_dirobj_lock);
 	(void) pthread_rwlock_destroy(&zd->zd_zilog_lock);
 
 	for (l = 0; l < ZTEST_OBJECT_LOCKS; l++)
 		ztest_rll_destroy(&zd->zd_object_lock[l]);
 
 	for (l = 0; l < ZTEST_RANGE_LOCKS; l++)
 		ztest_rll_destroy(&zd->zd_range_lock[l]);
 }
 
 #define	TXG_MIGHTWAIT	(ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
 
 static uint64_t
 ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
 {
 	uint64_t txg;
 	int error;
 
 	/*
 	 * Attempt to assign tx to some transaction group.
 	 */
 	error = dmu_tx_assign(tx, txg_how);
 	if (error) {
 		if (error == ERESTART) {
 			ASSERT3U(txg_how, ==, TXG_NOWAIT);
 			dmu_tx_wait(tx);
 		} else {
 			ASSERT3U(error, ==, ENOSPC);
 			ztest_record_enospc(tag);
 		}
 		dmu_tx_abort(tx);
 		return (0);
 	}
 	txg = dmu_tx_get_txg(tx);
 	ASSERT3U(txg, !=, 0);
 	return (txg);
 }
 
 static void
 ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
     uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
     uint64_t crtxg)
 {
 	bt->bt_magic = BT_MAGIC;
 	bt->bt_objset = dmu_objset_id(os);
 	bt->bt_object = object;
 	bt->bt_dnodesize = dnodesize;
 	bt->bt_offset = offset;
 	bt->bt_gen = gen;
 	bt->bt_txg = txg;
 	bt->bt_crtxg = crtxg;
 }
 
 static void
 ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
     uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
     uint64_t crtxg)
 {
 	ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
 	ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
 	ASSERT3U(bt->bt_object, ==, object);
 	ASSERT3U(bt->bt_dnodesize, ==, dnodesize);
 	ASSERT3U(bt->bt_offset, ==, offset);
 	ASSERT3U(bt->bt_gen, <=, gen);
 	ASSERT3U(bt->bt_txg, <=, txg);
 	ASSERT3U(bt->bt_crtxg, ==, crtxg);
 }
 
 static ztest_block_tag_t *
 ztest_bt_bonus(dmu_buf_t *db)
 {
 	dmu_object_info_t doi;
 	ztest_block_tag_t *bt;
 
 	dmu_object_info_from_db(db, &doi);
 	ASSERT3U(doi.doi_bonus_size, <=, db->db_size);
 	ASSERT3U(doi.doi_bonus_size, >=, sizeof (*bt));
 	bt = (void *)((char *)db->db_data + doi.doi_bonus_size - sizeof (*bt));
 
 	return (bt);
 }
 
 /*
  * Generate a token to fill up unused bonus buffer space.  Try to make
  * it unique to the object, generation, and offset to verify that data
  * is not getting overwritten by data from other dnodes.
  */
 #define	ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset) \
 	(((ds) << 48) | ((gen) << 32) | ((obj) << 8) | (offset))
 
 /*
  * Fill up the unused bonus buffer region before the block tag with a
  * verifiable pattern. Filling the whole bonus area with non-zero data
  * helps ensure that all dnode traversal code properly skips the
  * interior regions of large dnodes.
  */
 static void
 ztest_fill_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
     objset_t *os, uint64_t gen)
 {
 	uint64_t *bonusp;
 
 	ASSERT(IS_P2ALIGNED((char *)end - (char *)db->db_data, 8));
 
 	for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
 		uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
 		    gen, bonusp - (uint64_t *)db->db_data);
 		*bonusp = token;
 	}
 }
 
 /*
  * Verify that the unused area of a bonus buffer is filled with the
  * expected tokens.
  */
 static void
 ztest_verify_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
     objset_t *os, uint64_t gen)
 {
 	uint64_t *bonusp;
 
 	for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
 		uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
 		    gen, bonusp - (uint64_t *)db->db_data);
 		VERIFY3U(*bonusp, ==, token);
 	}
 }
 
 /*
  * ZIL logging ops
  */
 
 #define	lrz_type	lr_mode
 #define	lrz_blocksize	lr_uid
 #define	lrz_ibshift	lr_gid
 #define	lrz_bonustype	lr_rdev
 #define	lrz_dnodesize	lr_crtime[1]
 
 static void
 ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
 {
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	size_t namesize = strlen(name) + 1;
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_CREATE, sizeof (*lr) + namesize);
 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
 	    sizeof (*lr) + namesize - sizeof (lr_t));
 
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_remove(ztest_ds_t *zd, dmu_tx_t *tx, lr_remove_t *lr, uint64_t object)
 {
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	size_t namesize = strlen(name) + 1;
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_REMOVE, sizeof (*lr) + namesize);
 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
 	    sizeof (*lr) + namesize - sizeof (lr_t));
 
 	itx->itx_oid = object;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
 {
 	itx_t *itx;
 	itx_wr_state_t write_state = ztest_random(WR_NUM_STATES);
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	if (lr->lr_length > zil_max_log_data(zd->zd_zilog))
 		write_state = WR_INDIRECT;
 
 	itx = zil_itx_create(TX_WRITE,
 	    sizeof (*lr) + (write_state == WR_COPIED ? lr->lr_length : 0));
 
 	if (write_state == WR_COPIED &&
 	    dmu_read(zd->zd_os, lr->lr_foid, lr->lr_offset, lr->lr_length,
 	    ((lr_write_t *)&itx->itx_lr) + 1, DMU_READ_NO_PREFETCH) != 0) {
 		zil_itx_destroy(itx);
 		itx = zil_itx_create(TX_WRITE, sizeof (*lr));
 		write_state = WR_NEED_COPY;
 	}
 	itx->itx_private = zd;
 	itx->itx_wr_state = write_state;
 	itx->itx_sync = (ztest_random(8) == 0);
 
 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_truncate(ztest_ds_t *zd, dmu_tx_t *tx, lr_truncate_t *lr)
 {
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	itx->itx_sync = B_FALSE;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 static void
 ztest_log_setattr(ztest_ds_t *zd, dmu_tx_t *tx, lr_setattr_t *lr)
 {
 	itx_t *itx;
 
 	if (zil_replaying(zd->zd_zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_SETATTR, sizeof (*lr));
 	memcpy(&itx->itx_lr + 1, &lr->lr_common + 1,
 	    sizeof (*lr) - sizeof (lr_t));
 
 	itx->itx_sync = B_FALSE;
 	zil_itx_assign(zd->zd_zilog, itx, tx);
 }
 
 /*
  * ZIL replay ops
  */
 static int
 ztest_replay_create(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_create_t *lr = arg2;
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	objset_t *os = zd->zd_os;
 	ztest_block_tag_t *bbt;
 	dmu_buf_t *db;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	int error = 0;
 	int bonuslen;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ASSERT3U(lr->lr_doid, ==, ZTEST_DIROBJ);
 	ASSERT3S(name[0], !=, '\0');
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_zap(tx, lr->lr_doid, B_TRUE, name);
 
 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
 	} else {
 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
 	}
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0)
 		return (ENOSPC);
 
 	ASSERT3U(dmu_objset_zil(os)->zl_replay, ==, !!lr->lr_foid);
 	bonuslen = DN_BONUS_SIZE(lr->lrz_dnodesize);
 
 	if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
 		if (lr->lr_foid == 0) {
 			lr->lr_foid = zap_create_dnsize(os,
 			    lr->lrz_type, lr->lrz_bonustype,
 			    bonuslen, lr->lrz_dnodesize, tx);
 		} else {
 			error = zap_create_claim_dnsize(os, lr->lr_foid,
 			    lr->lrz_type, lr->lrz_bonustype,
 			    bonuslen, lr->lrz_dnodesize, tx);
 		}
 	} else {
 		if (lr->lr_foid == 0) {
 			lr->lr_foid = dmu_object_alloc_dnsize(os,
 			    lr->lrz_type, 0, lr->lrz_bonustype,
 			    bonuslen, lr->lrz_dnodesize, tx);
 		} else {
 			error = dmu_object_claim_dnsize(os, lr->lr_foid,
 			    lr->lrz_type, 0, lr->lrz_bonustype,
 			    bonuslen, lr->lrz_dnodesize, tx);
 		}
 	}
 
 	if (error) {
 		ASSERT3U(error, ==, EEXIST);
 		ASSERT(zd->zd_zilog->zl_replay);
 		dmu_tx_commit(tx);
 		return (error);
 	}
 
 	ASSERT3U(lr->lr_foid, !=, 0);
 
 	if (lr->lrz_type != DMU_OT_ZAP_OTHER)
 		VERIFY0(dmu_object_set_blocksize(os, lr->lr_foid,
 		    lr->lrz_blocksize, lr->lrz_ibshift, tx));
 
 	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 	bbt = ztest_bt_bonus(db);
 	dmu_buf_will_dirty(db, tx);
 	ztest_bt_generate(bbt, os, lr->lr_foid, lr->lrz_dnodesize, -1ULL,
 	    lr->lr_gen, txg, txg);
 	ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, lr->lr_gen);
 	dmu_buf_rele(db, FTAG);
 
 	VERIFY0(zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
 	    &lr->lr_foid, tx));
 
 	(void) ztest_log_create(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	return (0);
 }
 
 static int
 ztest_replay_remove(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_remove_t *lr = arg2;
 	char *name = (void *)(lr + 1);		/* name follows lr */
 	objset_t *os = zd->zd_os;
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
 	uint64_t object, txg;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ASSERT3U(lr->lr_doid, ==, ZTEST_DIROBJ);
 	ASSERT3S(name[0], !=, '\0');
 
 	VERIFY0(
 	    zap_lookup(os, lr->lr_doid, name, sizeof (object), 1, &object));
 	ASSERT3U(object, !=, 0);
 
 	ztest_object_lock(zd, object, RL_WRITER);
 
 	VERIFY0(dmu_object_info(os, object, &doi));
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_zap(tx, lr->lr_doid, B_FALSE, name);
 	dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		ztest_object_unlock(zd, object);
 		return (ENOSPC);
 	}
 
 	if (doi.doi_type == DMU_OT_ZAP_OTHER) {
 		VERIFY0(zap_destroy(os, object, tx));
 	} else {
 		VERIFY0(dmu_object_free(os, object, tx));
 	}
 
 	VERIFY0(zap_remove(os, lr->lr_doid, name, tx));
 
 	(void) ztest_log_remove(zd, tx, lr, object);
 
 	dmu_tx_commit(tx);
 
 	ztest_object_unlock(zd, object);
 
 	return (0);
 }
 
 static int
 ztest_replay_write(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_write_t *lr = arg2;
 	objset_t *os = zd->zd_os;
 	void *data = lr + 1;			/* data follows lr */
 	uint64_t offset, length;
 	ztest_block_tag_t *bt = data;
 	ztest_block_tag_t *bbt;
 	uint64_t gen, txg, lrtxg, crtxg;
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
 	dmu_buf_t *db;
 	arc_buf_t *abuf = NULL;
 	rl_t *rl;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	offset = lr->lr_offset;
 	length = lr->lr_length;
 
 	/* If it's a dmu_sync() block, write the whole block */
 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
 		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
 		if (length < blocksize) {
 			offset -= offset % blocksize;
 			length = blocksize;
 		}
 	}
 
 	if (bt->bt_magic == BSWAP_64(BT_MAGIC))
 		byteswap_uint64_array(bt, sizeof (*bt));
 
 	if (bt->bt_magic != BT_MAGIC)
 		bt = NULL;
 
 	ztest_object_lock(zd, lr->lr_foid, RL_READER);
 	rl = ztest_range_lock(zd, lr->lr_foid, offset, length, RL_WRITER);
 
 	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 
 	dmu_object_info_from_db(db, &doi);
 
 	bbt = ztest_bt_bonus(db);
 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 	gen = bbt->bt_gen;
 	crtxg = bbt->bt_crtxg;
 	lrtxg = lr->lr_common.lrc_txg;
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, lr->lr_foid, offset, length);
 
 	if (ztest_random(8) == 0 && length == doi.doi_data_block_size &&
 	    P2PHASE(offset, length) == 0)
 		abuf = dmu_request_arcbuf(db, length);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		if (abuf != NULL)
 			dmu_return_arcbuf(abuf);
 		dmu_buf_rele(db, FTAG);
 		ztest_range_unlock(rl);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	if (bt != NULL) {
 		/*
 		 * Usually, verify the old data before writing new data --
 		 * but not always, because we also want to verify correct
 		 * behavior when the data was not recently read into cache.
 		 */
 		ASSERT0(offset % doi.doi_data_block_size);
 		if (ztest_random(4) != 0) {
 			int prefetch = ztest_random(2) ?
 			    DMU_READ_PREFETCH : DMU_READ_NO_PREFETCH;
 			ztest_block_tag_t rbt;
 
 			VERIFY(dmu_read(os, lr->lr_foid, offset,
 			    sizeof (rbt), &rbt, prefetch) == 0);
 			if (rbt.bt_magic == BT_MAGIC) {
 				ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
 				    offset, gen, txg, crtxg);
 			}
 		}
 
 		/*
 		 * Writes can appear to be newer than the bonus buffer because
 		 * the ztest_get_data() callback does a dmu_read() of the
 		 * open-context data, which may be different than the data
 		 * as it was when the write was generated.
 		 */
 		if (zd->zd_zilog->zl_replay) {
 			ztest_bt_verify(bt, os, lr->lr_foid, 0, offset,
 			    MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
 			    bt->bt_crtxg);
 		}
 
 		/*
 		 * Set the bt's gen/txg to the bonus buffer's gen/txg
 		 * so that all of the usual ASSERTs will work.
 		 */
 		ztest_bt_generate(bt, os, lr->lr_foid, 0, offset, gen, txg,
 		    crtxg);
 	}
 
 	if (abuf == NULL) {
 		dmu_write(os, lr->lr_foid, offset, length, data, tx);
 	} else {
 		memcpy(abuf->b_data, data, length);
 		VERIFY0(dmu_assign_arcbuf_by_dbuf(db, offset, abuf, tx));
 	}
 
 	(void) ztest_log_write(zd, tx, lr);
 
 	dmu_buf_rele(db, FTAG);
 
 	dmu_tx_commit(tx);
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 static int
 ztest_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_truncate_t *lr = arg2;
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	rl_t *rl;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ztest_object_lock(zd, lr->lr_foid, RL_READER);
 	rl = ztest_range_lock(zd, lr->lr_foid, lr->lr_offset, lr->lr_length,
 	    RL_WRITER);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_free(tx, lr->lr_foid, lr->lr_offset, lr->lr_length);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		ztest_range_unlock(rl);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	VERIFY0(dmu_free_range(os, lr->lr_foid, lr->lr_offset,
 	    lr->lr_length, tx));
 
 	(void) ztest_log_truncate(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 static int
 ztest_replay_setattr(void *arg1, void *arg2, boolean_t byteswap)
 {
 	ztest_ds_t *zd = arg1;
 	lr_setattr_t *lr = arg2;
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	dmu_buf_t *db;
 	ztest_block_tag_t *bbt;
 	uint64_t txg, lrtxg, crtxg, dnodesize;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	ztest_object_lock(zd, lr->lr_foid, RL_WRITER);
 
 	VERIFY0(dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_bonus(tx, lr->lr_foid);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 	if (txg == 0) {
 		dmu_buf_rele(db, FTAG);
 		ztest_object_unlock(zd, lr->lr_foid);
 		return (ENOSPC);
 	}
 
 	bbt = ztest_bt_bonus(db);
 	ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 	crtxg = bbt->bt_crtxg;
 	lrtxg = lr->lr_common.lrc_txg;
 	dnodesize = bbt->bt_dnodesize;
 
 	if (zd->zd_zilog->zl_replay) {
 		ASSERT3U(lr->lr_size, !=, 0);
 		ASSERT3U(lr->lr_mode, !=, 0);
 		ASSERT3U(lrtxg, !=, 0);
 	} else {
 		/*
 		 * Randomly change the size and increment the generation.
 		 */
 		lr->lr_size = (ztest_random(db->db_size / sizeof (*bbt)) + 1) *
 		    sizeof (*bbt);
 		lr->lr_mode = bbt->bt_gen + 1;
 		ASSERT0(lrtxg);
 	}
 
 	/*
 	 * Verify that the current bonus buffer is not newer than our txg.
 	 */
 	ztest_bt_verify(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
 	    MAX(txg, lrtxg), crtxg);
 
 	dmu_buf_will_dirty(db, tx);
 
 	ASSERT3U(lr->lr_size, >=, sizeof (*bbt));
 	ASSERT3U(lr->lr_size, <=, db->db_size);
 	VERIFY0(dmu_set_bonus(db, lr->lr_size, tx));
 	bbt = ztest_bt_bonus(db);
 
 	ztest_bt_generate(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
 	    txg, crtxg);
 	ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, bbt->bt_gen);
 	dmu_buf_rele(db, FTAG);
 
 	(void) ztest_log_setattr(zd, tx, lr);
 
 	dmu_tx_commit(tx);
 
 	ztest_object_unlock(zd, lr->lr_foid);
 
 	return (0);
 }
 
 zil_replay_func_t *ztest_replay_vector[TX_MAX_TYPE] = {
 	NULL,			/* 0 no such transaction type */
 	ztest_replay_create,	/* TX_CREATE */
 	NULL,			/* TX_MKDIR */
 	NULL,			/* TX_MKXATTR */
 	NULL,			/* TX_SYMLINK */
 	ztest_replay_remove,	/* TX_REMOVE */
 	NULL,			/* TX_RMDIR */
 	NULL,			/* TX_LINK */
 	NULL,			/* TX_RENAME */
 	ztest_replay_write,	/* TX_WRITE */
 	ztest_replay_truncate,	/* TX_TRUNCATE */
 	ztest_replay_setattr,	/* TX_SETATTR */
 	NULL,			/* TX_ACL */
 	NULL,			/* TX_CREATE_ACL */
 	NULL,			/* TX_CREATE_ATTR */
 	NULL,			/* TX_CREATE_ACL_ATTR */
 	NULL,			/* TX_MKDIR_ACL */
 	NULL,			/* TX_MKDIR_ATTR */
 	NULL,			/* TX_MKDIR_ACL_ATTR */
 	NULL,			/* TX_WRITE2 */
 	NULL,			/* TX_SETSAXATTR */
 };
 
 /*
  * ZIL get_data callbacks
  */
 
 static void
 ztest_get_done(zgd_t *zgd, int error)
 {
 	(void) error;
 	ztest_ds_t *zd = zgd->zgd_private;
 	uint64_t object = ((rl_t *)zgd->zgd_lr)->rl_object;
 
 	if (zgd->zgd_db)
 		dmu_buf_rele(zgd->zgd_db, zgd);
 
 	ztest_range_unlock((rl_t *)zgd->zgd_lr);
 	ztest_object_unlock(zd, object);
 
 	umem_free(zgd, sizeof (*zgd));
 }
 
 static int
 ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
     struct lwb *lwb, zio_t *zio)
 {
 	(void) arg2;
 	ztest_ds_t *zd = arg;
 	objset_t *os = zd->zd_os;
 	uint64_t object = lr->lr_foid;
 	uint64_t offset = lr->lr_offset;
 	uint64_t size = lr->lr_length;
 	uint64_t txg = lr->lr_common.lrc_txg;
 	uint64_t crtxg;
 	dmu_object_info_t doi;
 	dmu_buf_t *db;
 	zgd_t *zgd;
 	int error;
 
 	ASSERT3P(lwb, !=, NULL);
 	ASSERT3P(zio, !=, NULL);
 	ASSERT3U(size, !=, 0);
 
 	ztest_object_lock(zd, object, RL_READER);
 	error = dmu_bonus_hold(os, object, FTAG, &db);
 	if (error) {
 		ztest_object_unlock(zd, object);
 		return (error);
 	}
 
 	crtxg = ztest_bt_bonus(db)->bt_crtxg;
 
 	if (crtxg == 0 || crtxg > txg) {
 		dmu_buf_rele(db, FTAG);
 		ztest_object_unlock(zd, object);
 		return (ENOENT);
 	}
 
 	dmu_object_info_from_db(db, &doi);
 	dmu_buf_rele(db, FTAG);
 	db = NULL;
 
 	zgd = umem_zalloc(sizeof (*zgd), UMEM_NOFAIL);
 	zgd->zgd_lwb = lwb;
 	zgd->zgd_private = zd;
 
 	if (buf != NULL) {	/* immediate write */
 		zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd,
 		    object, offset, size, RL_READER);
 
 		error = dmu_read(os, object, offset, size, buf,
 		    DMU_READ_NO_PREFETCH);
 		ASSERT0(error);
 	} else {
 		size = doi.doi_data_block_size;
 		if (ISP2(size)) {
 			offset = P2ALIGN(offset, size);
 		} else {
 			ASSERT3U(offset, <, size);
 			offset = 0;
 		}
 
 		zgd->zgd_lr = (struct zfs_locked_range *)ztest_range_lock(zd,
 		    object, offset, size, RL_READER);
 
 		error = dmu_buf_hold(os, object, offset, zgd, &db,
 		    DMU_READ_NO_PREFETCH);
 
 		if (error == 0) {
 			blkptr_t *bp = &lr->lr_blkptr;
 
 			zgd->zgd_db = db;
 			zgd->zgd_bp = bp;
 
 			ASSERT3U(db->db_offset, ==, offset);
 			ASSERT3U(db->db_size, ==, size);
 
 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
 			    ztest_get_done, zgd);
 
 			if (error == 0)
 				return (0);
 		}
 	}
 
 	ztest_get_done(zgd, error);
 
 	return (error);
 }
 
 static void *
 ztest_lr_alloc(size_t lrsize, char *name)
 {
 	char *lr;
 	size_t namesize = name ? strlen(name) + 1 : 0;
 
 	lr = umem_zalloc(lrsize + namesize, UMEM_NOFAIL);
 
 	if (name)
 		memcpy(lr + lrsize, name, namesize);
 
 	return (lr);
 }
 
 static void
 ztest_lr_free(void *lr, size_t lrsize, char *name)
 {
 	size_t namesize = name ? strlen(name) + 1 : 0;
 
 	umem_free(lr, lrsize + namesize);
 }
 
 /*
  * Lookup a bunch of objects.  Returns the number of objects not found.
  */
 static int
 ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 	int error;
 	int i;
 
 	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
 
 	for (i = 0; i < count; i++, od++) {
 		od->od_object = 0;
 		error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
 		    sizeof (uint64_t), 1, &od->od_object);
 		if (error) {
 			ASSERT3S(error, ==, ENOENT);
 			ASSERT0(od->od_object);
 			missing++;
 		} else {
 			dmu_buf_t *db;
 			ztest_block_tag_t *bbt;
 			dmu_object_info_t doi;
 
 			ASSERT3U(od->od_object, !=, 0);
 			ASSERT0(missing);	/* there should be no gaps */
 
 			ztest_object_lock(zd, od->od_object, RL_READER);
 			VERIFY0(dmu_bonus_hold(zd->zd_os, od->od_object,
 			    FTAG, &db));
 			dmu_object_info_from_db(db, &doi);
 			bbt = ztest_bt_bonus(db);
 			ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
 			od->od_type = doi.doi_type;
 			od->od_blocksize = doi.doi_data_block_size;
 			od->od_gen = bbt->bt_gen;
 			dmu_buf_rele(db, FTAG);
 			ztest_object_unlock(zd, od->od_object);
 		}
 	}
 
 	return (missing);
 }
 
 static int
 ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 	int i;
 
 	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
 
 	for (i = 0; i < count; i++, od++) {
 		if (missing) {
 			od->od_object = 0;
 			missing++;
 			continue;
 		}
 
 		lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
 
 		lr->lr_doid = od->od_dir;
 		lr->lr_foid = 0;	/* 0 to allocate, > 0 to claim */
 		lr->lrz_type = od->od_crtype;
 		lr->lrz_blocksize = od->od_crblocksize;
 		lr->lrz_ibshift = ztest_random_ibshift();
 		lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
 		lr->lrz_dnodesize = od->od_crdnodesize;
 		lr->lr_gen = od->od_crgen;
 		lr->lr_crtime[0] = time(NULL);
 
 		if (ztest_replay_create(zd, lr, B_FALSE) != 0) {
 			ASSERT0(missing);
 			od->od_object = 0;
 			missing++;
 		} else {
 			od->od_object = lr->lr_foid;
 			od->od_type = od->od_crtype;
 			od->od_blocksize = od->od_crblocksize;
 			od->od_gen = od->od_crgen;
 			ASSERT3U(od->od_object, !=, 0);
 		}
 
 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
 	}
 
 	return (missing);
 }
 
 static int
 ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
 {
 	int missing = 0;
 	int error;
 	int i;
 
 	ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
 
 	od += count - 1;
 
 	for (i = count - 1; i >= 0; i--, od--) {
 		if (missing) {
 			missing++;
 			continue;
 		}
 
 		/*
 		 * No object was found.
 		 */
 		if (od->od_object == 0)
 			continue;
 
 		lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
 
 		lr->lr_doid = od->od_dir;
 
 		if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {
 			ASSERT3U(error, ==, ENOSPC);
 			missing++;
 		} else {
 			od->od_object = 0;
 		}
 		ztest_lr_free(lr, sizeof (*lr), od->od_name);
 	}
 
 	return (missing);
 }
 
 static int
 ztest_write(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size,
     void *data)
 {
 	lr_write_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr) + size, NULL);
 
 	lr->lr_foid = object;
 	lr->lr_offset = offset;
 	lr->lr_length = size;
 	lr->lr_blkoff = 0;
 	BP_ZERO(&lr->lr_blkptr);
 
 	memcpy(lr + 1, data, size);
 
 	error = ztest_replay_write(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr) + size, NULL);
 
 	return (error);
 }
 
 static int
 ztest_truncate(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
 {
 	lr_truncate_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr), NULL);
 
 	lr->lr_foid = object;
 	lr->lr_offset = offset;
 	lr->lr_length = size;
 
 	error = ztest_replay_truncate(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr), NULL);
 
 	return (error);
 }
 
 static int
 ztest_setattr(ztest_ds_t *zd, uint64_t object)
 {
 	lr_setattr_t *lr;
 	int error;
 
 	lr = ztest_lr_alloc(sizeof (*lr), NULL);
 
 	lr->lr_foid = object;
 	lr->lr_size = 0;
 	lr->lr_mode = 0;
 
 	error = ztest_replay_setattr(zd, lr, B_FALSE);
 
 	ztest_lr_free(lr, sizeof (*lr), NULL);
 
 	return (error);
 }
 
 static void
 ztest_prealloc(ztest_ds_t *zd, uint64_t object, uint64_t offset, uint64_t size)
 {
 	objset_t *os = zd->zd_os;
 	dmu_tx_t *tx;
 	uint64_t txg;
 	rl_t *rl;
 
 	txg_wait_synced(dmu_objset_pool(os), 0);
 
 	ztest_object_lock(zd, object, RL_READER);
 	rl = ztest_range_lock(zd, object, offset, size, RL_WRITER);
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, object, offset, size);
 
 	txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
 
 	if (txg != 0) {
 		dmu_prealloc(os, object, offset, size, tx);
 		dmu_tx_commit(tx);
 		txg_wait_synced(dmu_objset_pool(os), txg);
 	} else {
 		(void) dmu_free_long_range(os, object, offset, size);
 	}
 
 	ztest_range_unlock(rl);
 	ztest_object_unlock(zd, object);
 }
 
 static void
 ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
 {
 	int err;
 	ztest_block_tag_t wbt;
 	dmu_object_info_t doi;
 	enum ztest_io_type io_type;
 	uint64_t blocksize;
 	void *data;
 
 	VERIFY0(dmu_object_info(zd->zd_os, object, &doi));
 	blocksize = doi.doi_data_block_size;
 	data = umem_alloc(blocksize, UMEM_NOFAIL);
 
 	/*
 	 * Pick an i/o type at random, biased toward writing block tags.
 	 */
 	io_type = ztest_random(ZTEST_IO_TYPES);
 	if (ztest_random(2) == 0)
 		io_type = ZTEST_IO_WRITE_TAG;
 
 	(void) pthread_rwlock_rdlock(&zd->zd_zilog_lock);
 
 	switch (io_type) {
 
 	case ZTEST_IO_WRITE_TAG:
 		ztest_bt_generate(&wbt, zd->zd_os, object, doi.doi_dnodesize,
 		    offset, 0, 0, 0);
 		(void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
 		break;
 
 	case ZTEST_IO_WRITE_PATTERN:
 		(void) memset(data, 'a' + (object + offset) % 5, blocksize);
 		if (ztest_random(2) == 0) {
 			/*
 			 * Induce fletcher2 collisions to ensure that
 			 * zio_ddt_collision() detects and resolves them
 			 * when using fletcher2-verify for deduplication.
 			 */
 			((uint64_t *)data)[0] ^= 1ULL << 63;
 			((uint64_t *)data)[4] ^= 1ULL << 63;
 		}
 		(void) ztest_write(zd, object, offset, blocksize, data);
 		break;
 
 	case ZTEST_IO_WRITE_ZEROES:
 		memset(data, 0, blocksize);
 		(void) ztest_write(zd, object, offset, blocksize, data);
 		break;
 
 	case ZTEST_IO_TRUNCATE:
 		(void) ztest_truncate(zd, object, offset, blocksize);
 		break;
 
 	case ZTEST_IO_SETATTR:
 		(void) ztest_setattr(zd, object);
 		break;
 	default:
 		break;
 
 	case ZTEST_IO_REWRITE:
 		(void) pthread_rwlock_rdlock(&ztest_name_lock);
 		err = ztest_dsl_prop_set_uint64(zd->zd_name,
 		    ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa),
 		    B_FALSE);
 		VERIFY(err == 0 || err == ENOSPC);
 		err = ztest_dsl_prop_set_uint64(zd->zd_name,
 		    ZFS_PROP_COMPRESSION,
 		    ztest_random_dsl_prop(ZFS_PROP_COMPRESSION),
 		    B_FALSE);
 		VERIFY(err == 0 || err == ENOSPC);
 		(void) pthread_rwlock_unlock(&ztest_name_lock);
 
 		VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data,
 		    DMU_READ_NO_PREFETCH));
 
 		(void) ztest_write(zd, object, offset, blocksize, data);
 		break;
 	}
 
 	(void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
 
 	umem_free(data, blocksize);
 }
 
 /*
  * Initialize an object description template.
  */
 static void
 ztest_od_init(ztest_od_t *od, uint64_t id, const char *tag, uint64_t index,
     dmu_object_type_t type, uint64_t blocksize, uint64_t dnodesize,
     uint64_t gen)
 {
 	od->od_dir = ZTEST_DIROBJ;
 	od->od_object = 0;
 
 	od->od_crtype = type;
 	od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
 	od->od_crdnodesize = dnodesize ? dnodesize : ztest_random_dnodesize();
 	od->od_crgen = gen;
 
 	od->od_type = DMU_OT_NONE;
 	od->od_blocksize = 0;
 	od->od_gen = 0;
 
 	(void) snprintf(od->od_name, sizeof (od->od_name),
 	    "%s(%"PRId64")[%"PRIu64"]",
 	    tag, id, index);
 }
 
 /*
  * Lookup or create the objects for a test using the od template.
  * If the objects do not all exist, or if 'remove' is specified,
  * remove any existing objects and create new ones.  Otherwise,
  * use the existing objects.
  */
 static int
 ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
 {
 	int count = size / sizeof (*od);
 	int rv = 0;
 
 	mutex_enter(&zd->zd_dirobj_lock);
 	if ((ztest_lookup(zd, od, count) != 0 || remove) &&
 	    (ztest_remove(zd, od, count) != 0 ||
 	    ztest_create(zd, od, count) != 0))
 		rv = -1;
 	zd->zd_od = od;
 	mutex_exit(&zd->zd_dirobj_lock);
 
 	return (rv);
 }
 
 void
 ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
 {
 	(void) id;
 	zilog_t *zilog = zd->zd_zilog;
 
 	(void) pthread_rwlock_rdlock(&zd->zd_zilog_lock);
 
 	zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
 
 	/*
 	 * Remember the committed values in zd, which is in parent/child
 	 * shared memory.  If we die, the next iteration of ztest_run()
 	 * will verify that the log really does contain this record.
 	 */
 	mutex_enter(&zilog->zl_lock);
 	ASSERT3P(zd->zd_shared, !=, NULL);
 	ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
 	zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
 	mutex_exit(&zilog->zl_lock);
 
 	(void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
 }
 
 /*
  * This function is designed to simulate the operations that occur during a
  * mount/unmount operation.  We hold the dataset across these operations in an
  * attempt to expose any implicit assumptions about ZIL management.
  */
 void
 ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
 {
 	(void) id;
 	objset_t *os = zd->zd_os;
 
 	/*
 	 * We hold the ztest_vdev_lock so we don't cause problems with
 	 * other threads that wish to remove a log device, such as
 	 * ztest_device_removal().
 	 */
 	mutex_enter(&ztest_vdev_lock);
 
 	/*
 	 * We grab the zd_dirobj_lock to ensure that no other thread is
 	 * updating the zil (i.e. adding in-memory log records) and the
 	 * zd_zilog_lock to block any I/O.
 	 */
 	mutex_enter(&zd->zd_dirobj_lock);
 	(void) pthread_rwlock_wrlock(&zd->zd_zilog_lock);
 
 	/* zfsvfs_teardown() */
 	zil_close(zd->zd_zilog);
 
 	/* zfsvfs_setup() */
 	VERIFY3P(zil_open(os, ztest_get_data, NULL), ==, zd->zd_zilog);
 	zil_replay(os, zd, ztest_replay_vector);
 
 	(void) pthread_rwlock_unlock(&zd->zd_zilog_lock);
 	mutex_exit(&zd->zd_dirobj_lock);
 	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Verify that we can't destroy an active pool, create an existing pool,
  * or create a pool with a bad vdev spec.
  */
 void
 ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	ztest_shared_opts_t *zo = &ztest_opts;
 	spa_t *spa;
 	nvlist_t *nvroot;
 
 	if (zo->zo_mmp_test)
 		return;
 
 	/*
 	 * Attempt to create using a bad file.
 	 */
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1);
 	VERIFY3U(ENOENT, ==,
 	    spa_create("ztest_bad_file", nvroot, NULL, NULL, NULL));
 	fnvlist_free(nvroot);
 
 	/*
 	 * Attempt to create using a bad mirror.
 	 */
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 2, 1);
 	VERIFY3U(ENOENT, ==,
 	    spa_create("ztest_bad_mirror", nvroot, NULL, NULL, NULL));
 	fnvlist_free(nvroot);
 
 	/*
 	 * Attempt to create an existing pool.  It shouldn't matter
 	 * what's in the nvroot; we should fail with EEXIST.
 	 */
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 	nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, NULL, 0, 0, 1);
 	VERIFY3U(EEXIST, ==,
 	    spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
 	fnvlist_free(nvroot);
 
 	/*
 	 * We open a reference to the spa and then we try to export it
 	 * expecting one of the following errors:
 	 *
 	 * EBUSY
 	 *	Because of the reference we just opened.
 	 *
 	 * ZFS_ERR_EXPORT_IN_PROGRESS
 	 *	For the case that there is another ztest thread doing
 	 *	an export concurrently.
 	 */
 	VERIFY0(spa_open(zo->zo_pool, &spa, FTAG));
 	int error = spa_destroy(zo->zo_pool);
 	if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) {
 		fatal(B_FALSE, "spa_destroy(%s) returned unexpected value %d",
 		    spa->spa_name, error);
 	}
 	spa_close(spa, FTAG);
 
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 }
 
 /*
  * Start and then stop the MMP threads to ensure the startup and shutdown code
  * works properly.  Actual protection and property-related code tested via ZTS.
  */
 void
 ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	ztest_shared_opts_t *zo = &ztest_opts;
 	spa_t *spa = ztest_spa;
 
 	if (zo->zo_mmp_test)
 		return;
 
 	/*
 	 * Since enabling MMP involves setting a property, it could not be done
 	 * while the pool is suspended.
 	 */
 	if (spa_suspended(spa))
 		return;
 
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 	mutex_enter(&spa->spa_props_lock);
 
 	zfs_multihost_fail_intervals = 0;
 
 	if (!spa_multihost(spa)) {
 		spa->spa_multihost = B_TRUE;
 		mmp_thread_start(spa);
 	}
 
 	mutex_exit(&spa->spa_props_lock);
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 	mmp_signal_all_threads();
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
 	mutex_enter(&spa->spa_props_lock);
 
 	if (spa_multihost(spa)) {
 		mmp_thread_stop(spa);
 		spa->spa_multihost = B_FALSE;
 	}
 
 	mutex_exit(&spa->spa_props_lock);
 	spa_config_exit(spa, SCL_CONFIG, FTAG);
 }
 
 void
 ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	spa_t *spa;
 	uint64_t initial_version = SPA_VERSION_INITIAL;
 	uint64_t version, newversion;
 	nvlist_t *nvroot, *props;
 	char *name;
 
 	if (ztest_opts.zo_mmp_test)
 		return;
 
 	/* dRAID added after feature flags, skip upgrade test. */
 	if (strcmp(ztest_opts.zo_raid_type, VDEV_TYPE_DRAID) == 0)
 		return;
 
 	mutex_enter(&ztest_vdev_lock);
 	name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
 
 	/*
 	 * Clean up from previous runs.
 	 */
 	(void) spa_destroy(name);
 
 	nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
 	    NULL, ztest_opts.zo_raid_children, ztest_opts.zo_mirrors, 1);
 
 	/*
 	 * If we're configuring a RAIDZ device then make sure that the
 	 * initial version is capable of supporting that feature.
 	 */
 	switch (ztest_opts.zo_raid_parity) {
 	case 0:
 	case 1:
 		initial_version = SPA_VERSION_INITIAL;
 		break;
 	case 2:
 		initial_version = SPA_VERSION_RAIDZ2;
 		break;
 	case 3:
 		initial_version = SPA_VERSION_RAIDZ3;
 		break;
 	}
 
 	/*
 	 * Create a pool with a spa version that can be upgraded. Pick
 	 * a value between initial_version and SPA_VERSION_BEFORE_FEATURES.
 	 */
 	do {
 		version = ztest_random_spa_version(initial_version);
 	} while (version > SPA_VERSION_BEFORE_FEATURES);
 
 	props = fnvlist_alloc();
 	fnvlist_add_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION), version);
 	VERIFY0(spa_create(name, nvroot, props, NULL, NULL));
 	fnvlist_free(nvroot);
 	fnvlist_free(props);
 
 	VERIFY0(spa_open(name, &spa, FTAG));
 	VERIFY3U(spa_version(spa), ==, version);
 	newversion = ztest_random_spa_version(version + 1);
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("upgrading spa version from "
 		    "%"PRIu64" to %"PRIu64"\n",
 		    version, newversion);
 	}
 
 	spa_upgrade(spa, newversion);
 	VERIFY3U(spa_version(spa), >, version);
 	VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config,
 	    zpool_prop_to_name(ZPOOL_PROP_VERSION)));
 	spa_close(spa, FTAG);
 
 	kmem_strfree(name);
 	mutex_exit(&ztest_vdev_lock);
 }
 
 static void
 ztest_spa_checkpoint(spa_t *spa)
 {
 	ASSERT(MUTEX_HELD(&ztest_checkpoint_lock));
 
 	int error = spa_checkpoint(spa->spa_name);
 
 	switch (error) {
 	case 0:
 	case ZFS_ERR_DEVRM_IN_PROGRESS:
 	case ZFS_ERR_DISCARDING_CHECKPOINT:
 	case ZFS_ERR_CHECKPOINT_EXISTS:
 		break;
 	case ENOSPC:
 		ztest_record_enospc(FTAG);
 		break;
 	default:
 		fatal(B_FALSE, "spa_checkpoint(%s) = %d", spa->spa_name, error);
 	}
 }
 
 static void
 ztest_spa_discard_checkpoint(spa_t *spa)
 {
 	ASSERT(MUTEX_HELD(&ztest_checkpoint_lock));
 
 	int error = spa_checkpoint_discard(spa->spa_name);
 
 	switch (error) {
 	case 0:
 	case ZFS_ERR_DISCARDING_CHECKPOINT:
 	case ZFS_ERR_NO_CHECKPOINT:
 		break;
 	default:
 		fatal(B_FALSE, "spa_discard_checkpoint(%s) = %d",
 		    spa->spa_name, error);
 	}
 
 }
 
 void
 ztest_spa_checkpoint_create_discard(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	spa_t *spa = ztest_spa;
 
 	mutex_enter(&ztest_checkpoint_lock);
 	if (ztest_random(2) == 0) {
 		ztest_spa_checkpoint(spa);
 	} else {
 		ztest_spa_discard_checkpoint(spa);
 	}
 	mutex_exit(&ztest_checkpoint_lock);
 }
 
 
 static vdev_t *
 vdev_lookup_by_path(vdev_t *vd, const char *path)
 {
 	vdev_t *mvd;
 	int c;
 
 	if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
 		return (vd);
 
 	for (c = 0; c < vd->vdev_children; c++)
 		if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
 		    NULL)
 			return (mvd);
 
 	return (NULL);
 }
 
 static int
 spa_num_top_vdevs(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	ASSERT3U(spa_config_held(spa, SCL_VDEV, RW_READER), ==, SCL_VDEV);
 	return (rvd->vdev_children);
 }
 
 /*
  * Verify that vdev_add() works as expected.
  */
 void
 ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	uint64_t leaves;
 	uint64_t guid;
 	nvlist_t *nvroot;
 	int error;
 
 	if (ztest_opts.zo_mmp_test)
 		return;
 
 	mutex_enter(&ztest_vdev_lock);
 	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) *
 	    ztest_opts.zo_raid_children;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	ztest_shared->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
 
 	/*
 	 * If we have slogs then remove them 1/4 of the time.
 	 */
 	if (spa_has_slogs(spa) && ztest_random(4) == 0) {
 		metaslab_group_t *mg;
 
 		/*
 		 * find the first real slog in log allocation class
 		 */
 		mg =  spa_log_class(spa)->mc_allocator[0].mca_rotor;
 		while (!mg->mg_vd->vdev_islog)
 			mg = mg->mg_next;
 
 		guid = mg->mg_vd->vdev_guid;
 
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 
 		/*
 		 * We have to grab the zs_name_lock as writer to
 		 * prevent a race between removing a slog (dmu_objset_find)
 		 * and destroying a dataset. Removing the slog will
 		 * grab a reference on the dataset which may cause
 		 * dsl_destroy_head() to fail with EBUSY thus
 		 * leaving the dataset in an inconsistent state.
 		 */
 		pthread_rwlock_wrlock(&ztest_name_lock);
 		error = spa_vdev_remove(spa, guid, B_FALSE);
 		pthread_rwlock_unlock(&ztest_name_lock);
 
 		switch (error) {
 		case 0:
 		case EEXIST:	/* Generic zil_reset() error */
 		case EBUSY:	/* Replay required */
 		case EACCES:	/* Crypto key not loaded */
 		case ZFS_ERR_CHECKPOINT_EXISTS:
 		case ZFS_ERR_DISCARDING_CHECKPOINT:
 			break;
 		default:
 			fatal(B_FALSE, "spa_vdev_remove() = %d", error);
 		}
 	} else {
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 
 		/*
 		 * Make 1/4 of the devices be log devices
 		 */
 		nvroot = make_vdev_root(NULL, NULL, NULL,
 		    ztest_opts.zo_vdev_size, 0, (ztest_random(4) == 0) ?
 		    "log" : NULL, ztest_opts.zo_raid_children, zs->zs_mirrors,
 		    1);
 
 		error = spa_vdev_add(spa, nvroot);
 		fnvlist_free(nvroot);
 
 		switch (error) {
 		case 0:
 			break;
 		case ENOSPC:
 			ztest_record_enospc("spa_vdev_add");
 			break;
 		default:
 			fatal(B_FALSE, "spa_vdev_add() = %d", error);
 		}
 	}
 
 	mutex_exit(&ztest_vdev_lock);
 }
 
 void
 ztest_vdev_class_add(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	uint64_t leaves;
 	nvlist_t *nvroot;
 	const char *class = (ztest_random(2) == 0) ?
 	    VDEV_ALLOC_BIAS_SPECIAL : VDEV_ALLOC_BIAS_DEDUP;
 	int error;
 
 	/*
 	 * By default add a special vdev 50% of the time
 	 */
 	if ((ztest_opts.zo_special_vdevs == ZTEST_VDEV_CLASS_OFF) ||
 	    (ztest_opts.zo_special_vdevs == ZTEST_VDEV_CLASS_RND &&
 	    ztest_random(2) == 0)) {
 		return;
 	}
 
 	mutex_enter(&ztest_vdev_lock);
 
 	/* Only test with mirrors */
 	if (zs->zs_mirrors < 2) {
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/* requires feature@allocation_classes */
 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_ALLOCATION_CLASSES)) {
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) *
 	    ztest_opts.zo_raid_children;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 	ztest_shared->zs_vdev_next_leaf = spa_num_top_vdevs(spa) * leaves;
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
 	    class, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
 
 	error = spa_vdev_add(spa, nvroot);
 	fnvlist_free(nvroot);
 
 	if (error == ENOSPC)
 		ztest_record_enospc("spa_vdev_add");
 	else if (error != 0)
 		fatal(B_FALSE, "spa_vdev_add() = %d", error);
 
 	/*
 	 * 50% of the time allow small blocks in the special class
 	 */
 	if (error == 0 &&
 	    spa_special_class(spa)->mc_groups == 1 && ztest_random(2) == 0) {
 		if (ztest_opts.zo_verbose >= 3)
 			(void) printf("Enabling special VDEV small blocks\n");
 		(void) ztest_dsl_prop_set_uint64(zd->zd_name,
 		    ZFS_PROP_SPECIAL_SMALL_BLOCKS, 32768, B_FALSE);
 	}
 
 	mutex_exit(&ztest_vdev_lock);
 
 	if (ztest_opts.zo_verbose >= 3) {
 		metaslab_class_t *mc;
 
 		if (strcmp(class, VDEV_ALLOC_BIAS_SPECIAL) == 0)
 			mc = spa_special_class(spa);
 		else
 			mc = spa_dedup_class(spa);
 		(void) printf("Added a %s mirrored vdev (of %d)\n",
 		    class, (int)mc->mc_groups);
 	}
 }
 
 /*
  * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
  */
 void
 ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	spa_aux_vdev_t *sav;
 	const char *aux;
 	char *path;
 	uint64_t guid = 0;
 	int error, ignore_err = 0;
 
 	if (ztest_opts.zo_mmp_test)
 		return;
 
 	path = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	if (ztest_random(2) == 0) {
 		sav = &spa->spa_spares;
 		aux = ZPOOL_CONFIG_SPARES;
 	} else {
 		sav = &spa->spa_l2cache;
 		aux = ZPOOL_CONFIG_L2CACHE;
 	}
 
 	mutex_enter(&ztest_vdev_lock);
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	if (sav->sav_count != 0 && ztest_random(4) == 0) {
 		/*
 		 * Pick a random device to remove.
 		 */
 		vdev_t *svd = sav->sav_vdevs[ztest_random(sav->sav_count)];
 
 		/* dRAID spares cannot be removed; try anyways to see ENOTSUP */
 		if (strstr(svd->vdev_path, VDEV_TYPE_DRAID) != NULL)
 			ignore_err = ENOTSUP;
 
 		guid = svd->vdev_guid;
 	} else {
 		/*
 		 * Find an unused device we can add.
 		 */
 		zs->zs_vdev_aux = 0;
 		for (;;) {
 			int c;
 			(void) snprintf(path, MAXPATHLEN, ztest_aux_template,
 			    ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
 			    zs->zs_vdev_aux);
 			for (c = 0; c < sav->sav_count; c++)
 				if (strcmp(sav->sav_vdevs[c]->vdev_path,
 				    path) == 0)
 					break;
 			if (c == sav->sav_count &&
 			    vdev_lookup_by_path(rvd, path) == NULL)
 				break;
 			zs->zs_vdev_aux++;
 		}
 	}
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	if (guid == 0) {
 		/*
 		 * Add a new device.
 		 */
 		nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
 		    (ztest_opts.zo_vdev_size * 5) / 4, 0, NULL, 0, 0, 1);
 		error = spa_vdev_add(spa, nvroot);
 
 		switch (error) {
 		case 0:
 			break;
 		default:
 			fatal(B_FALSE, "spa_vdev_add(%p) = %d", nvroot, error);
 		}
 		fnvlist_free(nvroot);
 	} else {
 		/*
 		 * Remove an existing device.  Sometimes, dirty its
 		 * vdev state first to make sure we handle removal
 		 * of devices that have pending state changes.
 		 */
 		if (ztest_random(2) == 0)
 			(void) vdev_online(spa, guid, 0, NULL);
 
 		error = spa_vdev_remove(spa, guid, B_FALSE);
 
 		switch (error) {
 		case 0:
 		case EBUSY:
 		case ZFS_ERR_CHECKPOINT_EXISTS:
 		case ZFS_ERR_DISCARDING_CHECKPOINT:
 			break;
 		default:
 			if (error != ignore_err)
 				fatal(B_FALSE,
 				    "spa_vdev_remove(%"PRIu64") = %d",
 				    guid, error);
 		}
 	}
 
 	mutex_exit(&ztest_vdev_lock);
 
 	umem_free(path, MAXPATHLEN);
 }
 
 /*
  * split a pool if it has mirror tlvdevs
  */
 void
 ztest_split_pool(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	vdev_t *rvd = spa->spa_root_vdev;
 	nvlist_t *tree, **child, *config, *split, **schild;
 	uint_t c, children, schildren = 0, lastlogid = 0;
 	int error = 0;
 
 	if (ztest_opts.zo_mmp_test)
 		return;
 
 	mutex_enter(&ztest_vdev_lock);
 
 	/* ensure we have a usable config; mirrors of raidz aren't supported */
 	if (zs->zs_mirrors < 3 || ztest_opts.zo_raid_children > 1) {
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/* clean up the old pool, if any */
 	(void) spa_destroy("splitp");
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	/* generate a config from the existing config */
 	mutex_enter(&spa->spa_props_lock);
 	tree = fnvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE);
 	mutex_exit(&spa->spa_props_lock);
 
 	VERIFY0(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children));
 
 	schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
 	for (c = 0; c < children; c++) {
 		vdev_t *tvd = rvd->vdev_child[c];
 		nvlist_t **mchild;
 		uint_t mchildren;
 
 		if (tvd->vdev_islog || tvd->vdev_ops == &vdev_hole_ops) {
 			schild[schildren] = fnvlist_alloc();
 			fnvlist_add_string(schild[schildren],
 			    ZPOOL_CONFIG_TYPE, VDEV_TYPE_HOLE);
 			fnvlist_add_uint64(schild[schildren],
 			    ZPOOL_CONFIG_IS_HOLE, 1);
 			if (lastlogid == 0)
 				lastlogid = schildren;
 			++schildren;
 			continue;
 		}
 		lastlogid = 0;
 		VERIFY0(nvlist_lookup_nvlist_array(child[c],
 		    ZPOOL_CONFIG_CHILDREN, &mchild, &mchildren));
 		schild[schildren++] = fnvlist_dup(mchild[0]);
 	}
 
 	/* OK, create a config that can be used to split */
 	split = fnvlist_alloc();
 	fnvlist_add_string(split, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
 	fnvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN,
 	    (const nvlist_t **)schild, lastlogid != 0 ? lastlogid : schildren);
 
 	config = fnvlist_alloc();
 	fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split);
 
 	for (c = 0; c < schildren; c++)
 		fnvlist_free(schild[c]);
 	free(schild);
 	fnvlist_free(split);
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	(void) pthread_rwlock_wrlock(&ztest_name_lock);
 	error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 
 	fnvlist_free(config);
 
 	if (error == 0) {
 		(void) printf("successful split - results:\n");
 		mutex_enter(&spa_namespace_lock);
 		show_pool_stats(spa);
 		show_pool_stats(spa_lookup("splitp"));
 		mutex_exit(&spa_namespace_lock);
 		++zs->zs_splits;
 		--zs->zs_mirrors;
 	}
 	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Verify that we can attach and detach devices.
  */
 void
 ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	spa_aux_vdev_t *sav = &spa->spa_spares;
 	vdev_t *rvd = spa->spa_root_vdev;
 	vdev_t *oldvd, *newvd, *pvd;
 	nvlist_t *root;
 	uint64_t leaves;
 	uint64_t leaf, top;
 	uint64_t ashift = ztest_get_ashift();
 	uint64_t oldguid, pguid;
 	uint64_t oldsize, newsize;
 	char *oldpath, *newpath;
 	int replacing;
 	int oldvd_has_siblings = B_FALSE;
 	int newvd_is_spare = B_FALSE;
 	int newvd_is_dspare = B_FALSE;
 	int oldvd_is_log;
 	int error, expected_error;
 
 	if (ztest_opts.zo_mmp_test)
 		return;
 
 	oldpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 	newpath = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	mutex_enter(&ztest_vdev_lock);
 	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raid_children;
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 
 	/*
 	 * If a vdev is in the process of being removed, its removal may
 	 * finish while we are in progress, leading to an unexpected error
 	 * value.  Don't bother trying to attach while we are in the middle
 	 * of removal.
 	 */
 	if (ztest_device_removal_active) {
 		spa_config_exit(spa, SCL_ALL, FTAG);
 		goto out;
 	}
 
 	/*
 	 * Decide whether to do an attach or a replace.
 	 */
 	replacing = ztest_random(2);
 
 	/*
 	 * Pick a random top-level vdev.
 	 */
 	top = ztest_random_vdev_top(spa, B_TRUE);
 
 	/*
 	 * Pick a random leaf within it.
 	 */
 	leaf = ztest_random(leaves);
 
 	/*
 	 * Locate this vdev.
 	 */
 	oldvd = rvd->vdev_child[top];
 
 	/* pick a child from the mirror */
 	if (zs->zs_mirrors >= 1) {
 		ASSERT3P(oldvd->vdev_ops, ==, &vdev_mirror_ops);
 		ASSERT3U(oldvd->vdev_children, >=, zs->zs_mirrors);
 		oldvd = oldvd->vdev_child[leaf / ztest_opts.zo_raid_children];
 	}
 
 	/* pick a child out of the raidz group */
 	if (ztest_opts.zo_raid_children > 1) {
 		if (strcmp(oldvd->vdev_ops->vdev_op_type, "raidz") == 0)
 			ASSERT3P(oldvd->vdev_ops, ==, &vdev_raidz_ops);
 		else
 			ASSERT3P(oldvd->vdev_ops, ==, &vdev_draid_ops);
 		ASSERT3U(oldvd->vdev_children, ==, ztest_opts.zo_raid_children);
 		oldvd = oldvd->vdev_child[leaf % ztest_opts.zo_raid_children];
 	}
 
 	/*
 	 * If we're already doing an attach or replace, oldvd may be a
 	 * mirror vdev -- in which case, pick a random child.
 	 */
 	while (oldvd->vdev_children != 0) {
 		oldvd_has_siblings = B_TRUE;
 		ASSERT3U(oldvd->vdev_children, >=, 2);
 		oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
 	}
 
 	oldguid = oldvd->vdev_guid;
 	oldsize = vdev_get_min_asize(oldvd);
 	oldvd_is_log = oldvd->vdev_top->vdev_islog;
 	(void) strcpy(oldpath, oldvd->vdev_path);
 	pvd = oldvd->vdev_parent;
 	pguid = pvd->vdev_guid;
 
 	/*
 	 * If oldvd has siblings, then half of the time, detach it.  Prior
 	 * to the detach the pool is scrubbed in order to prevent creating
 	 * unrepairable blocks as a result of the data corruption injection.
 	 */
 	if (oldvd_has_siblings && ztest_random(2) == 0) {
 		spa_config_exit(spa, SCL_ALL, FTAG);
 
 		error = ztest_scrub_impl(spa);
 		if (error)
 			goto out;
 
 		error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
 		if (error != 0 && error != ENODEV && error != EBUSY &&
 		    error != ENOTSUP && error != ZFS_ERR_CHECKPOINT_EXISTS &&
 		    error != ZFS_ERR_DISCARDING_CHECKPOINT)
 			fatal(B_FALSE, "detach (%s) returned %d",
 			    oldpath, error);
 		goto out;
 	}
 
 	/*
 	 * For the new vdev, choose with equal probability between the two
 	 * standard paths (ending in either 'a' or 'b') or a random hot spare.
 	 */
 	if (sav->sav_count != 0 && ztest_random(3) == 0) {
 		newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		newvd_is_spare = B_TRUE;
 
 		if (newvd->vdev_ops == &vdev_draid_spare_ops)
 			newvd_is_dspare = B_TRUE;
 
 		(void) strcpy(newpath, newvd->vdev_path);
 	} else {
 		(void) snprintf(newpath, MAXPATHLEN, ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + leaf);
 		if (ztest_random(2) == 0)
 			newpath[strlen(newpath) - 1] = 'b';
 		newvd = vdev_lookup_by_path(rvd, newpath);
 	}
 
 	if (newvd) {
 		/*
 		 * Reopen to ensure the vdev's asize field isn't stale.
 		 */
 		vdev_reopen(newvd);
 		newsize = vdev_get_min_asize(newvd);
 	} else {
 		/*
 		 * Make newsize a little bigger or smaller than oldsize.
 		 * If it's smaller, the attach should fail.
 		 * If it's larger, and we're doing a replace,
 		 * we should get dynamic LUN growth when we're done.
 		 */
 		newsize = 10 * oldsize / (9 + ztest_random(3));
 	}
 
 	/*
 	 * If pvd is not a mirror or root, the attach should fail with ENOTSUP,
 	 * unless it's a replace; in that case any non-replacing parent is OK.
 	 *
 	 * If newvd is already part of the pool, it should fail with EBUSY.
 	 *
 	 * If newvd is too small, it should fail with EOVERFLOW.
 	 *
 	 * If newvd is a distributed spare and it's being attached to a
 	 * dRAID which is not its parent it should fail with EINVAL.
 	 */
 	if (pvd->vdev_ops != &vdev_mirror_ops &&
 	    pvd->vdev_ops != &vdev_root_ops && (!replacing ||
 	    pvd->vdev_ops == &vdev_replacing_ops ||
 	    pvd->vdev_ops == &vdev_spare_ops))
 		expected_error = ENOTSUP;
 	else if (newvd_is_spare && (!replacing || oldvd_is_log))
 		expected_error = ENOTSUP;
 	else if (newvd == oldvd)
 		expected_error = replacing ? 0 : EBUSY;
 	else if (vdev_lookup_by_path(rvd, newpath) != NULL)
 		expected_error = EBUSY;
 	else if (!newvd_is_dspare && newsize < oldsize)
 		expected_error = EOVERFLOW;
 	else if (ashift > oldvd->vdev_top->vdev_ashift)
 		expected_error = EDOM;
 	else if (newvd_is_dspare && pvd != vdev_draid_spare_get_parent(newvd))
 		expected_error = ENOTSUP;
 	else
 		expected_error = 0;
 
 	spa_config_exit(spa, SCL_ALL, FTAG);
 
 	/*
 	 * Build the nvlist describing newpath.
 	 */
 	root = make_vdev_root(newpath, NULL, NULL, newvd == NULL ? newsize : 0,
 	    ashift, NULL, 0, 0, 1);
 
 	/*
 	 * When supported select either a healing or sequential resilver.
 	 */
 	boolean_t rebuilding = B_FALSE;
 	if (pvd->vdev_ops == &vdev_mirror_ops ||
 	    pvd->vdev_ops ==  &vdev_root_ops) {
 		rebuilding = !!ztest_random(2);
 	}
 
 	error = spa_vdev_attach(spa, oldguid, root, replacing, rebuilding);
 
 	fnvlist_free(root);
 
 	/*
 	 * If our parent was the replacing vdev, but the replace completed,
 	 * then instead of failing with ENOTSUP we may either succeed,
 	 * fail with ENODEV, or fail with EOVERFLOW.
 	 */
 	if (expected_error == ENOTSUP &&
 	    (error == 0 || error == ENODEV || error == EOVERFLOW))
 		expected_error = error;
 
 	/*
 	 * If someone grew the LUN, the replacement may be too small.
 	 */
 	if (error == EOVERFLOW || error == EBUSY)
 		expected_error = error;
 
 	if (error == ZFS_ERR_CHECKPOINT_EXISTS ||
 	    error == ZFS_ERR_DISCARDING_CHECKPOINT ||
 	    error == ZFS_ERR_RESILVER_IN_PROGRESS ||
 	    error == ZFS_ERR_REBUILD_IN_PROGRESS)
 		expected_error = error;
 
 	if (error != expected_error && expected_error != EBUSY) {
 		fatal(B_FALSE, "attach (%s %"PRIu64", %s %"PRIu64", %d) "
 		    "returned %d, expected %d",
 		    oldpath, oldsize, newpath,
 		    newsize, replacing, error, expected_error);
 	}
 out:
 	mutex_exit(&ztest_vdev_lock);
 
 	umem_free(oldpath, MAXPATHLEN);
 	umem_free(newpath, MAXPATHLEN);
 }
 
 void
 ztest_device_removal(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	spa_t *spa = ztest_spa;
 	vdev_t *vd;
 	uint64_t guid;
 	int error;
 
 	mutex_enter(&ztest_vdev_lock);
 
 	if (ztest_device_removal_active) {
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/*
 	 * Remove a random top-level vdev and wait for removal to finish.
 	 */
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 	vd = vdev_lookup_top(spa, ztest_random_vdev_top(spa, B_FALSE));
 	guid = vd->vdev_guid;
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	error = spa_vdev_remove(spa, guid, B_FALSE);
 	if (error == 0) {
 		ztest_device_removal_active = B_TRUE;
 		mutex_exit(&ztest_vdev_lock);
 
 		/*
 		 * spa->spa_vdev_removal is created in a sync task that
 		 * is initiated via dsl_sync_task_nowait(). Since the
 		 * task may not run before spa_vdev_remove() returns, we
 		 * must wait at least 1 txg to ensure that the removal
 		 * struct has been created.
 		 */
 		txg_wait_synced(spa_get_dsl(spa), 0);
 
 		while (spa->spa_removing_phys.sr_state == DSS_SCANNING)
 			txg_wait_synced(spa_get_dsl(spa), 0);
 	} else {
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/*
 	 * The pool needs to be scrubbed after completing device removal.
 	 * Failure to do so may result in checksum errors due to the
 	 * strategy employed by ztest_fault_inject() when selecting which
 	 * offset are redundant and can be damaged.
 	 */
 	error = spa_scan(spa, POOL_SCAN_SCRUB);
 	if (error == 0) {
 		while (dsl_scan_scrubbing(spa_get_dsl(spa)))
 			txg_wait_synced(spa_get_dsl(spa), 0);
 	}
 
 	mutex_enter(&ztest_vdev_lock);
 	ztest_device_removal_active = B_FALSE;
 	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Callback function which expands the physical size of the vdev.
  */
 static vdev_t *
 grow_vdev(vdev_t *vd, void *arg)
 {
 	spa_t *spa __maybe_unused = vd->vdev_spa;
 	size_t *newsize = arg;
 	size_t fsize;
 	int fd;
 
 	ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), ==, SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
 		return (vd);
 
 	fsize = lseek(fd, 0, SEEK_END);
 	VERIFY0(ftruncate(fd, *newsize));
 
 	if (ztest_opts.zo_verbose >= 6) {
 		(void) printf("%s grew from %lu to %lu bytes\n",
 		    vd->vdev_path, (ulong_t)fsize, (ulong_t)*newsize);
 	}
 	(void) close(fd);
 	return (NULL);
 }
 
 /*
  * Callback function which expands a given vdev by calling vdev_online().
  */
 static vdev_t *
 online_vdev(vdev_t *vd, void *arg)
 {
 	(void) arg;
 	spa_t *spa = vd->vdev_spa;
 	vdev_t *tvd = vd->vdev_top;
 	uint64_t guid = vd->vdev_guid;
 	uint64_t generation = spa->spa_config_generation + 1;
 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 	int error;
 
 	ASSERT3S(spa_config_held(spa, SCL_STATE, RW_READER), ==, SCL_STATE);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	/* Calling vdev_online will initialize the new metaslabs */
 	spa_config_exit(spa, SCL_STATE, spa);
 	error = vdev_online(spa, guid, ZFS_ONLINE_EXPAND, &newstate);
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	/*
 	 * If vdev_online returned an error or the underlying vdev_open
 	 * failed then we abort the expand. The only way to know that
 	 * vdev_open fails is by checking the returned newstate.
 	 */
 	if (error || newstate != VDEV_STATE_HEALTHY) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Unable to expand vdev, state %u, "
 			    "error %d\n", newstate, error);
 		}
 		return (vd);
 	}
 	ASSERT3U(newstate, ==, VDEV_STATE_HEALTHY);
 
 	/*
 	 * Since we dropped the lock we need to ensure that we're
 	 * still talking to the original vdev. It's possible this
 	 * vdev may have been detached/replaced while we were
 	 * trying to online it.
 	 */
 	if (generation != spa->spa_config_generation) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("vdev configuration has changed, "
 			    "guid %"PRIu64", state %"PRIu64", "
 			    "expected gen %"PRIu64", got gen %"PRIu64"\n",
 			    guid,
 			    tvd->vdev_state,
 			    generation,
 			    spa->spa_config_generation);
 		}
 		return (vd);
 	}
 	return (NULL);
 }
 
 /*
  * Traverse the vdev tree calling the supplied function.
  * We continue to walk the tree until we either have walked all
  * children or we receive a non-NULL return from the callback.
  * If a NULL callback is passed, then we just return back the first
  * leaf vdev we encounter.
  */
 static vdev_t *
 vdev_walk_tree(vdev_t *vd, vdev_t *(*func)(vdev_t *, void *), void *arg)
 {
 	uint_t c;
 
 	if (vd->vdev_ops->vdev_op_leaf) {
 		if (func == NULL)
 			return (vd);
 		else
 			return (func(vd, arg));
 	}
 
 	for (c = 0; c < vd->vdev_children; c++) {
 		vdev_t *cvd = vd->vdev_child[c];
 		if ((cvd = vdev_walk_tree(cvd, func, arg)) != NULL)
 			return (cvd);
 	}
 	return (NULL);
 }
 
 /*
  * Verify that dynamic LUN growth works as expected.
  */
 void
 ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	spa_t *spa = ztest_spa;
 	vdev_t *vd, *tvd;
 	metaslab_class_t *mc;
 	metaslab_group_t *mg;
 	size_t psize, newsize;
 	uint64_t top;
 	uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
 
 	mutex_enter(&ztest_checkpoint_lock);
 	mutex_enter(&ztest_vdev_lock);
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	/*
 	 * If there is a vdev removal in progress, it could complete while
 	 * we are running, in which case we would not be able to verify
 	 * that the metaslab_class space increased (because it decreases
 	 * when the device removal completes).
 	 */
 	if (ztest_device_removal_active) {
 		spa_config_exit(spa, SCL_STATE, spa);
 		mutex_exit(&ztest_vdev_lock);
 		mutex_exit(&ztest_checkpoint_lock);
 		return;
 	}
 
 	top = ztest_random_vdev_top(spa, B_TRUE);
 
 	tvd = spa->spa_root_vdev->vdev_child[top];
 	mg = tvd->vdev_mg;
 	mc = mg->mg_class;
 	old_ms_count = tvd->vdev_ms_count;
 	old_class_space = metaslab_class_get_space(mc);
 
 	/*
 	 * Determine the size of the first leaf vdev associated with
 	 * our top-level device.
 	 */
 	vd = vdev_walk_tree(tvd, NULL, NULL);
 	ASSERT3P(vd, !=, NULL);
 	ASSERT(vd->vdev_ops->vdev_op_leaf);
 
 	psize = vd->vdev_psize;
 
 	/*
 	 * We only try to expand the vdev if it's healthy, less than 4x its
 	 * original size, and it has a valid psize.
 	 */
 	if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
 	    psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) {
 		spa_config_exit(spa, SCL_STATE, spa);
 		mutex_exit(&ztest_vdev_lock);
 		mutex_exit(&ztest_checkpoint_lock);
 		return;
 	}
 	ASSERT3U(psize, >, 0);
 	newsize = psize + MAX(psize / 8, SPA_MAXBLOCKSIZE);
 	ASSERT3U(newsize, >, psize);
 
 	if (ztest_opts.zo_verbose >= 6) {
 		(void) printf("Expanding LUN %s from %lu to %lu\n",
 		    vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
 	}
 
 	/*
 	 * Growing the vdev is a two step process:
 	 *	1). expand the physical size (i.e. relabel)
 	 *	2). online the vdev to create the new metaslabs
 	 */
 	if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
 	    vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
 	    tvd->vdev_state != VDEV_STATE_HEALTHY) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Could not expand LUN because "
 			    "the vdev configuration changed.\n");
 		}
 		spa_config_exit(spa, SCL_STATE, spa);
 		mutex_exit(&ztest_vdev_lock);
 		mutex_exit(&ztest_checkpoint_lock);
 		return;
 	}
 
 	spa_config_exit(spa, SCL_STATE, spa);
 
 	/*
 	 * Expanding the LUN will update the config asynchronously,
 	 * thus we must wait for the async thread to complete any
 	 * pending tasks before proceeding.
 	 */
 	for (;;) {
 		boolean_t done;
 		mutex_enter(&spa->spa_async_lock);
 		done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks);
 		mutex_exit(&spa->spa_async_lock);
 		if (done)
 			break;
 		txg_wait_synced(spa_get_dsl(spa), 0);
 		(void) poll(NULL, 0, 100);
 	}
 
 	spa_config_enter(spa, SCL_STATE, spa, RW_READER);
 
 	tvd = spa->spa_root_vdev->vdev_child[top];
 	new_ms_count = tvd->vdev_ms_count;
 	new_class_space = metaslab_class_get_space(mc);
 
 	if (tvd->vdev_mg != mg || mg->mg_class != mc) {
 		if (ztest_opts.zo_verbose >= 5) {
 			(void) printf("Could not verify LUN expansion due to "
 			    "intervening vdev offline or remove.\n");
 		}
 		spa_config_exit(spa, SCL_STATE, spa);
 		mutex_exit(&ztest_vdev_lock);
 		mutex_exit(&ztest_checkpoint_lock);
 		return;
 	}
 
 	/*
 	 * Make sure we were able to grow the vdev.
 	 */
 	if (new_ms_count <= old_ms_count) {
 		fatal(B_FALSE,
 		    "LUN expansion failed: ms_count %"PRIu64" < %"PRIu64"\n",
 		    old_ms_count, new_ms_count);
 	}
 
 	/*
 	 * Make sure we were able to grow the pool.
 	 */
 	if (new_class_space <= old_class_space) {
 		fatal(B_FALSE,
 		    "LUN expansion failed: class_space %"PRIu64" < %"PRIu64"\n",
 		    old_class_space, new_class_space);
 	}
 
 	if (ztest_opts.zo_verbose >= 5) {
 		char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
 
 		nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));
 		nicenum(new_class_space, newnumbuf, sizeof (newnumbuf));
 		(void) printf("%s grew from %s to %s\n",
 		    spa->spa_name, oldnumbuf, newnumbuf);
 	}
 
 	spa_config_exit(spa, SCL_STATE, spa);
 	mutex_exit(&ztest_vdev_lock);
 	mutex_exit(&ztest_checkpoint_lock);
 }
 
 /*
  * Verify that dmu_objset_{create,destroy,open,close} work as expected.
  */
 static void
 ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 {
 	(void) arg, (void) cr;
 
 	/*
 	 * Create the objects common to all ztest datasets.
 	 */
 	VERIFY0(zap_create_claim(os, ZTEST_DIROBJ,
 	    DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx));
 }
 
 static int
 ztest_dataset_create(char *dsname)
 {
 	int err;
 	uint64_t rand;
 	dsl_crypto_params_t *dcp = NULL;
 
 	/*
 	 * 50% of the time, we create encrypted datasets
 	 * using a random cipher suite and a hard-coded
 	 * wrapping key.
 	 */
 	rand = ztest_random(2);
 	if (rand != 0) {
 		nvlist_t *crypto_args = fnvlist_alloc();
 		nvlist_t *props = fnvlist_alloc();
 
 		/* slight bias towards the default cipher suite */
 		rand = ztest_random(ZIO_CRYPT_FUNCTIONS);
 		if (rand < ZIO_CRYPT_AES_128_CCM)
 			rand = ZIO_CRYPT_ON;
 
 		fnvlist_add_uint64(props,
 		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), rand);
 		fnvlist_add_uint8_array(crypto_args, "wkeydata",
 		    (uint8_t *)ztest_wkeydata, WRAPPING_KEY_LEN);
 
 		/*
 		 * These parameters aren't really used by the kernel. They
 		 * are simply stored so that userspace knows how to load
 		 * the wrapping key.
 		 */
 		fnvlist_add_uint64(props,
 		    zfs_prop_to_name(ZFS_PROP_KEYFORMAT), ZFS_KEYFORMAT_RAW);
 		fnvlist_add_string(props,
 		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), "prompt");
 		fnvlist_add_uint64(props,
 		    zfs_prop_to_name(ZFS_PROP_PBKDF2_SALT), 0ULL);
 		fnvlist_add_uint64(props,
 		    zfs_prop_to_name(ZFS_PROP_PBKDF2_ITERS), 0ULL);
 
 		VERIFY0(dsl_crypto_params_create_nvlist(DCP_CMD_NONE, props,
 		    crypto_args, &dcp));
 
 		/*
 		 * Cycle through all available encryption implementations
 		 * to verify interoperability.
 		 */
 		VERIFY0(gcm_impl_set("cycle"));
 		VERIFY0(aes_impl_set("cycle"));
 
 		fnvlist_free(crypto_args);
 		fnvlist_free(props);
 	}
 
 	err = dmu_objset_create(dsname, DMU_OST_OTHER, 0, dcp,
 	    ztest_objset_create_cb, NULL);
 	dsl_crypto_params_free(dcp, !!err);
 
 	rand = ztest_random(100);
 	if (err || rand < 80)
 		return (err);
 
 	if (ztest_opts.zo_verbose >= 5)
 		(void) printf("Setting dataset %s to sync always\n", dsname);
 	return (ztest_dsl_prop_set_uint64(dsname, ZFS_PROP_SYNC,
 	    ZFS_SYNC_ALWAYS, B_FALSE));
 }
 
 static int
 ztest_objset_destroy_cb(const char *name, void *arg)
 {
 	(void) arg;
 	objset_t *os;
 	dmu_object_info_t doi;
 	int error;
 
 	/*
 	 * Verify that the dataset contains a directory object.
 	 */
 	VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
 	    B_TRUE, FTAG, &os));
 	error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
 	if (error != ENOENT) {
 		/* We could have crashed in the middle of destroying it */
 		ASSERT0(error);
 		ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
 		ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
 	}
 	dmu_objset_disown(os, B_TRUE, FTAG);
 
 	/*
 	 * Destroy the dataset.
 	 */
 	if (strchr(name, '@') != NULL) {
 		error = dsl_destroy_snapshot(name, B_TRUE);
 		if (error != ECHRNG) {
 			/*
 			 * The program was executed, but encountered a runtime
 			 * error, such as insufficient slop, or a hold on the
 			 * dataset.
 			 */
 			ASSERT0(error);
 		}
 	} else {
 		error = dsl_destroy_head(name);
 		if (error == ENOSPC) {
 			/* There could be checkpoint or insufficient slop */
 			ztest_record_enospc(FTAG);
 		} else if (error != EBUSY) {
 			/* There could be a hold on this dataset */
 			ASSERT0(error);
 		}
 	}
 	return (0);
 }
 
 static boolean_t
 ztest_snapshot_create(char *osname, uint64_t id)
 {
 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	(void) snprintf(snapname, sizeof (snapname), "%"PRIu64"", id);
 
 	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error == ENOSPC) {
 		ztest_record_enospc(FTAG);
 		return (B_FALSE);
 	}
 	if (error != 0 && error != EEXIST) {
 		fatal(B_FALSE, "ztest_snapshot_create(%s@%s) = %d", osname,
 		    snapname, error);
 	}
 	return (B_TRUE);
 }
 
 static boolean_t
 ztest_snapshot_destroy(char *osname, uint64_t id)
 {
 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	(void) snprintf(snapname, sizeof (snapname), "%s@%"PRIu64"",
 	    osname, id);
 
 	error = dsl_destroy_snapshot(snapname, B_FALSE);
 	if (error != 0 && error != ENOENT)
 		fatal(B_FALSE, "ztest_snapshot_destroy(%s) = %d",
 		    snapname, error);
 	return (B_TRUE);
 }
 
 void
 ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd;
 	ztest_ds_t *zdtmp;
 	int iters;
 	int error;
 	objset_t *os, *os2;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	zilog_t *zilog;
 	int i;
 
 	zdtmp = umem_alloc(sizeof (ztest_ds_t), UMEM_NOFAIL);
 
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 
 	(void) snprintf(name, sizeof (name), "%s/temp_%"PRIu64"",
 	    ztest_opts.zo_pool, id);
 
 	/*
 	 * If this dataset exists from a previous run, process its replay log
 	 * half of the time.  If we don't replay it, then dsl_destroy_head()
 	 * (invoked from ztest_objset_destroy_cb()) should just throw it away.
 	 */
 	if (ztest_random(2) == 0 &&
 	    ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE,
 	    B_TRUE, FTAG, &os) == 0) {
 		ztest_zd_init(zdtmp, NULL, os);
 		zil_replay(os, zdtmp, ztest_replay_vector);
 		ztest_zd_fini(zdtmp);
 		dmu_objset_disown(os, B_TRUE, FTAG);
 	}
 
 	/*
 	 * There may be an old instance of the dataset we're about to
 	 * create lying around from a previous run.  If so, destroy it
 	 * and all of its snapshots.
 	 */
 	(void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
 	    DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
 
 	/*
 	 * Verify that the destroyed dataset is no longer in the namespace.
 	 */
 	VERIFY3U(ENOENT, ==, ztest_dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
 	    B_TRUE, FTAG, &os));
 
 	/*
 	 * Verify that we can create a new dataset.
 	 */
 	error = ztest_dataset_create(name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_objset_create(%s) = %d", name, error);
 	}
 
 	VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, B_TRUE,
 	    FTAG, &os));
 
 	ztest_zd_init(zdtmp, NULL, os);
 
 	/*
 	 * Open the intent log for it.
 	 */
 	zilog = zil_open(os, ztest_get_data, NULL);
 
 	/*
 	 * Put some objects in there, do a little I/O to them,
 	 * and randomly take a couple of snapshots along the way.
 	 */
 	iters = ztest_random(5);
 	for (i = 0; i < iters; i++) {
 		ztest_dmu_object_alloc_free(zdtmp, id);
 		if (ztest_random(iters) == 0)
 			(void) ztest_snapshot_create(name, i);
 	}
 
 	/*
 	 * Verify that we cannot create an existing dataset.
 	 */
 	VERIFY3U(EEXIST, ==,
 	    dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL, NULL));
 
 	/*
 	 * Verify that we can hold an objset that is also owned.
 	 */
 	VERIFY0(dmu_objset_hold(name, FTAG, &os2));
 	dmu_objset_rele(os2, FTAG);
 
 	/*
 	 * Verify that we cannot own an objset that is already owned.
 	 */
 	VERIFY3U(EBUSY, ==, ztest_dmu_objset_own(name, DMU_OST_OTHER,
 	    B_FALSE, B_TRUE, FTAG, &os2));
 
 	zil_close(zilog);
 	dmu_objset_disown(os, B_TRUE, FTAG);
 	ztest_zd_fini(zdtmp);
 out:
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 
 	umem_free(zdtmp, sizeof (ztest_ds_t));
 }
 
 /*
  * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
  */
 void
 ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
 {
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 	(void) ztest_snapshot_destroy(zd->zd_name, id);
 	(void) ztest_snapshot_create(zd->zd_name, id);
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 }
 
 /*
  * Cleanup non-standard snapshots and clones.
  */
 static void
 ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
 {
 	char *snap1name;
 	char *clone1name;
 	char *snap2name;
 	char *clone2name;
 	char *snap3name;
 	int error;
 
 	snap1name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 	clone1name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 	snap2name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 	clone2name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 	snap3name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 
 	(void) snprintf(snap1name, ZFS_MAX_DATASET_NAME_LEN, "%s@s1_%"PRIu64"",
 	    osname, id);
 	(void) snprintf(clone1name, ZFS_MAX_DATASET_NAME_LEN, "%s/c1_%"PRIu64"",
 	    osname, id);
 	(void) snprintf(snap2name, ZFS_MAX_DATASET_NAME_LEN, "%s@s2_%"PRIu64"",
 	    clone1name, id);
 	(void) snprintf(clone2name, ZFS_MAX_DATASET_NAME_LEN, "%s/c2_%"PRIu64"",
 	    osname, id);
 	(void) snprintf(snap3name, ZFS_MAX_DATASET_NAME_LEN, "%s@s3_%"PRIu64"",
 	    clone1name, id);
 
 	error = dsl_destroy_head(clone2name);
 	if (error && error != ENOENT)
 		fatal(B_FALSE, "dsl_destroy_head(%s) = %d", clone2name, error);
 	error = dsl_destroy_snapshot(snap3name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(B_FALSE, "dsl_destroy_snapshot(%s) = %d",
 		    snap3name, error);
 	error = dsl_destroy_snapshot(snap2name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(B_FALSE, "dsl_destroy_snapshot(%s) = %d",
 		    snap2name, error);
 	error = dsl_destroy_head(clone1name);
 	if (error && error != ENOENT)
 		fatal(B_FALSE, "dsl_destroy_head(%s) = %d", clone1name, error);
 	error = dsl_destroy_snapshot(snap1name, B_FALSE);
 	if (error && error != ENOENT)
 		fatal(B_FALSE, "dsl_destroy_snapshot(%s) = %d",
 		    snap1name, error);
 
 	umem_free(snap1name, ZFS_MAX_DATASET_NAME_LEN);
 	umem_free(clone1name, ZFS_MAX_DATASET_NAME_LEN);
 	umem_free(snap2name, ZFS_MAX_DATASET_NAME_LEN);
 	umem_free(clone2name, ZFS_MAX_DATASET_NAME_LEN);
 	umem_free(snap3name, ZFS_MAX_DATASET_NAME_LEN);
 }
 
 /*
  * Verify dsl_dataset_promote handles EBUSY
  */
 void
 ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os;
 	char *snap1name;
 	char *clone1name;
 	char *snap2name;
 	char *clone2name;
 	char *snap3name;
 	char *osname = zd->zd_name;
 	int error;
 
 	snap1name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 	clone1name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 	snap2name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 	clone2name = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 	snap3name  = umem_alloc(ZFS_MAX_DATASET_NAME_LEN, UMEM_NOFAIL);
 
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 
 	ztest_dsl_dataset_cleanup(osname, id);
 
 	(void) snprintf(snap1name, ZFS_MAX_DATASET_NAME_LEN, "%s@s1_%"PRIu64"",
 	    osname, id);
 	(void) snprintf(clone1name, ZFS_MAX_DATASET_NAME_LEN, "%s/c1_%"PRIu64"",
 	    osname, id);
 	(void) snprintf(snap2name, ZFS_MAX_DATASET_NAME_LEN, "%s@s2_%"PRIu64"",
 	    clone1name, id);
 	(void) snprintf(clone2name, ZFS_MAX_DATASET_NAME_LEN, "%s/c2_%"PRIu64"",
 	    osname, id);
 	(void) snprintf(snap3name, ZFS_MAX_DATASET_NAME_LEN, "%s@s3_%"PRIu64"",
 	    clone1name, id);
 
 	error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_take_snapshot(%s) = %d", snap1name, error);
 	}
 
 	error = dmu_objset_clone(clone1name, snap1name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_objset_create(%s) = %d", clone1name, error);
 	}
 
 	error = dmu_objset_snapshot_one(clone1name, strchr(snap2name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_open_snapshot(%s) = %d", snap2name, error);
 	}
 
 	error = dmu_objset_snapshot_one(clone1name, strchr(snap3name, '@') + 1);
 	if (error && error != EEXIST) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_open_snapshot(%s) = %d", snap3name, error);
 	}
 
 	error = dmu_objset_clone(clone2name, snap3name);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc(FTAG);
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_objset_create(%s) = %d", clone2name, error);
 	}
 
 	error = ztest_dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, B_TRUE,
 	    FTAG, &os);
 	if (error)
 		fatal(B_FALSE, "dmu_objset_own(%s) = %d", snap2name, error);
 	error = dsl_dataset_promote(clone2name, NULL);
 	if (error == ENOSPC) {
 		dmu_objset_disown(os, B_TRUE, FTAG);
 		ztest_record_enospc(FTAG);
 		goto out;
 	}
 	if (error != EBUSY)
 		fatal(B_FALSE, "dsl_dataset_promote(%s), %d, not EBUSY",
 		    clone2name, error);
 	dmu_objset_disown(os, B_TRUE, FTAG);
 
 out:
 	ztest_dsl_dataset_cleanup(osname, id);
 
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 
 	umem_free(snap1name, ZFS_MAX_DATASET_NAME_LEN);
 	umem_free(clone1name, ZFS_MAX_DATASET_NAME_LEN);
 	umem_free(snap2name, ZFS_MAX_DATASET_NAME_LEN);
 	umem_free(clone2name, ZFS_MAX_DATASET_NAME_LEN);
 	umem_free(snap3name, ZFS_MAX_DATASET_NAME_LEN);
 }
 
 #undef OD_ARRAY_SIZE
 #define	OD_ARRAY_SIZE	4
 
 /*
  * Verify that dmu_object_{alloc,free} work as expected.
  */
 void
 ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_od_t *od;
 	int batchsize;
 	int size;
 	int b;
 
 	size = sizeof (ztest_od_t) * OD_ARRAY_SIZE;
 	od = umem_alloc(size, UMEM_NOFAIL);
 	batchsize = OD_ARRAY_SIZE;
 
 	for (b = 0; b < batchsize; b++)
 		ztest_od_init(od + b, id, FTAG, b, DMU_OT_UINT64_OTHER,
 		    0, 0, 0);
 
 	/*
 	 * Destroy the previous batch of objects, create a new batch,
 	 * and do some I/O on the new objects.
 	 */
 	if (ztest_object_init(zd, od, size, B_TRUE) != 0)
 		return;
 
 	while (ztest_random(4 * batchsize) != 0)
 		ztest_io(zd, od[ztest_random(batchsize)].od_object,
 		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 
 	umem_free(od, size);
 }
 
 /*
  * Rewind the global allocator to verify object allocation backfilling.
  */
 void
 ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id)
 {
 	(void) id;
 	objset_t *os = zd->zd_os;
 	int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
 	uint64_t object;
 
 	/*
 	 * Rewind the global allocator randomly back to a lower object number
 	 * to force backfilling and reclamation of recently freed dnodes.
 	 */
 	mutex_enter(&os->os_obj_lock);
 	object = ztest_random(os->os_obj_next_chunk);
 	os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk);
 	mutex_exit(&os->os_obj_lock);
 }
 
 #undef OD_ARRAY_SIZE
 #define	OD_ARRAY_SIZE	2
 
 /*
  * Verify that dmu_{read,write} work as expected.
  */
 void
 ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
 {
 	int size;
 	ztest_od_t *od;
 
 	objset_t *os = zd->zd_os;
 	size = sizeof (ztest_od_t) * OD_ARRAY_SIZE;
 	od = umem_alloc(size, UMEM_NOFAIL);
 	dmu_tx_t *tx;
 	int freeit, error;
 	uint64_t i, n, s, txg;
 	bufwad_t *packbuf, *bigbuf, *pack, *bigH, *bigT;
 	uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
 	uint64_t chunksize = (1000 + ztest_random(1000)) * sizeof (uint64_t);
 	uint64_t regions = 997;
 	uint64_t stride = 123456789ULL;
 	uint64_t width = 40;
 	int free_percent = 5;
 
 	/*
 	 * This test uses two objects, packobj and bigobj, that are always
 	 * updated together (i.e. in the same tx) so that their contents are
 	 * in sync and can be compared.  Their contents relate to each other
 	 * in a simple way: packobj is a dense array of 'bufwad' structures,
 	 * while bigobj is a sparse array of the same bufwads.  Specifically,
 	 * for any index n, there are three bufwads that should be identical:
 	 *
 	 *	packobj, at offset n * sizeof (bufwad_t)
 	 *	bigobj, at the head of the nth chunk
 	 *	bigobj, at the tail of the nth chunk
 	 *
 	 * The chunk size is arbitrary. It doesn't have to be a power of two,
 	 * and it doesn't have any relation to the object blocksize.
 	 * The only requirement is that it can hold at least two bufwads.
 	 *
 	 * Normally, we write the bufwad to each of these locations.
 	 * However, free_percent of the time we instead write zeroes to
 	 * packobj and perform a dmu_free_range() on bigobj.  By comparing
 	 * bigobj to packobj, we can verify that the DMU is correctly
 	 * tracking which parts of an object are allocated and free,
 	 * and that the contents of the allocated blocks are correct.
 	 */
 
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
 	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
 	    chunksize);
 
 	if (ztest_object_init(zd, od, size, B_FALSE) != 0) {
 		umem_free(od, size);
 		return;
 	}
 
 	bigobj = od[0].od_object;
 	packobj = od[1].od_object;
 	chunksize = od[0].od_gen;
 	ASSERT3U(chunksize, ==, od[1].od_gen);
 
 	/*
 	 * Prefetch a random chunk of the big object.
 	 * Our aim here is to get some async reads in flight
 	 * for blocks that we may free below; the DMU should
 	 * handle this race correctly.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(2 * width - 1);
 	dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize,
 	    ZIO_PRIORITY_SYNC_READ);
 
 	/*
 	 * Pick a random index and compute the offsets into packobj and bigobj.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(width - 1);
 
 	packoff = n * sizeof (bufwad_t);
 	packsize = s * sizeof (bufwad_t);
 
 	bigoff = n * chunksize;
 	bigsize = s * chunksize;
 
 	packbuf = umem_alloc(packsize, UMEM_NOFAIL);
 	bigbuf = umem_alloc(bigsize, UMEM_NOFAIL);
 
 	/*
 	 * free_percent of the time, free a range of bigobj rather than
 	 * overwriting it.
 	 */
 	freeit = (ztest_random(100) < free_percent);
 
 	/*
 	 * Read the current contents of our objects.
 	 */
 	error = dmu_read(os, packobj, packoff, packsize, packbuf,
 	    DMU_READ_PREFETCH);
 	ASSERT0(error);
 	error = dmu_read(os, bigobj, bigoff, bigsize, bigbuf,
 	    DMU_READ_PREFETCH);
 	ASSERT0(error);
 
 	/*
 	 * Get a tx for the mods to both packobj and bigobj.
 	 */
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_write(tx, packobj, packoff, packsize);
 
 	if (freeit)
 		dmu_tx_hold_free(tx, bigobj, bigoff, bigsize);
 	else
 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
 
 	/* This accounts for setting the checksum/compression. */
 	dmu_tx_hold_bonus(tx, bigobj);
 
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0) {
 		umem_free(packbuf, packsize);
 		umem_free(bigbuf, bigsize);
 		umem_free(od, size);
 		return;
 	}
 
 	enum zio_checksum cksum;
 	do {
 		cksum = (enum zio_checksum)
 		    ztest_random_dsl_prop(ZFS_PROP_CHECKSUM);
 	} while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS);
 	dmu_object_set_checksum(os, bigobj, cksum, tx);
 
 	enum zio_compress comp;
 	do {
 		comp = (enum zio_compress)
 		    ztest_random_dsl_prop(ZFS_PROP_COMPRESSION);
 	} while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS);
 	dmu_object_set_compress(os, bigobj, comp, tx);
 
 	/*
 	 * For each index from n to n + s, verify that the existing bufwad
 	 * in packobj matches the bufwads at the head and tail of the
 	 * corresponding chunk in bigobj.  Then update all three bufwads
 	 * with the new values we want to write out.
 	 */
 	for (i = 0; i < s; i++) {
 		/* LINTED */
 		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
 		/* LINTED */
 		bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
 		/* LINTED */
 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
 
 		ASSERT3U((uintptr_t)bigH - (uintptr_t)bigbuf, <, bigsize);
 		ASSERT3U((uintptr_t)bigT - (uintptr_t)bigbuf, <, bigsize);
 
 		if (pack->bw_txg > txg)
 			fatal(B_FALSE,
 			    "future leak: got %"PRIx64", open txg is %"PRIx64"",
 			    pack->bw_txg, txg);
 
 		if (pack->bw_data != 0 && pack->bw_index != n + i)
 			fatal(B_FALSE, "wrong index: "
 			    "got %"PRIx64", wanted %"PRIx64"+%"PRIx64"",
 			    pack->bw_index, n, i);
 
 		if (memcmp(pack, bigH, sizeof (bufwad_t)) != 0)
 			fatal(B_FALSE, "pack/bigH mismatch in %p/%p",
 			    pack, bigH);
 
 		if (memcmp(pack, bigT, sizeof (bufwad_t)) != 0)
 			fatal(B_FALSE, "pack/bigT mismatch in %p/%p",
 			    pack, bigT);
 
 		if (freeit) {
 			memset(pack, 0, sizeof (bufwad_t));
 		} else {
 			pack->bw_index = n + i;
 			pack->bw_txg = txg;
 			pack->bw_data = 1 + ztest_random(-2ULL);
 		}
 		*bigH = *pack;
 		*bigT = *pack;
 	}
 
 	/*
 	 * We've verified all the old bufwads, and made new ones.
 	 * Now write them out.
 	 */
 	dmu_write(os, packobj, packoff, packsize, packbuf, tx);
 
 	if (freeit) {
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("freeing offset %"PRIx64" size %"PRIx64""
 			    " txg %"PRIx64"\n",
 			    bigoff, bigsize, txg);
 		}
 		VERIFY0(dmu_free_range(os, bigobj, bigoff, bigsize, tx));
 	} else {
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("writing offset %"PRIx64" size %"PRIx64""
 			    " txg %"PRIx64"\n",
 			    bigoff, bigsize, txg);
 		}
 		dmu_write(os, bigobj, bigoff, bigsize, bigbuf, tx);
 	}
 
 	dmu_tx_commit(tx);
 
 	/*
 	 * Sanity check the stuff we just wrote.
 	 */
 	{
 		void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 		void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
 
 		VERIFY0(dmu_read(os, packobj, packoff,
 		    packsize, packcheck, DMU_READ_PREFETCH));
 		VERIFY0(dmu_read(os, bigobj, bigoff,
 		    bigsize, bigcheck, DMU_READ_PREFETCH));
 
 		ASSERT0(memcmp(packbuf, packcheck, packsize));
 		ASSERT0(memcmp(bigbuf, bigcheck, bigsize));
 
 		umem_free(packcheck, packsize);
 		umem_free(bigcheck, bigsize);
 	}
 
 	umem_free(packbuf, packsize);
 	umem_free(bigbuf, bigsize);
 	umem_free(od, size);
 }
 
 static void
 compare_and_update_pbbufs(uint64_t s, bufwad_t *packbuf, bufwad_t *bigbuf,
     uint64_t bigsize, uint64_t n, uint64_t chunksize, uint64_t txg)
 {
 	uint64_t i;
 	bufwad_t *pack;
 	bufwad_t *bigH;
 	bufwad_t *bigT;
 
 	/*
 	 * For each index from n to n + s, verify that the existing bufwad
 	 * in packobj matches the bufwads at the head and tail of the
 	 * corresponding chunk in bigobj.  Then update all three bufwads
 	 * with the new values we want to write out.
 	 */
 	for (i = 0; i < s; i++) {
 		/* LINTED */
 		pack = (bufwad_t *)((char *)packbuf + i * sizeof (bufwad_t));
 		/* LINTED */
 		bigH = (bufwad_t *)((char *)bigbuf + i * chunksize);
 		/* LINTED */
 		bigT = (bufwad_t *)((char *)bigH + chunksize) - 1;
 
 		ASSERT3U((uintptr_t)bigH - (uintptr_t)bigbuf, <, bigsize);
 		ASSERT3U((uintptr_t)bigT - (uintptr_t)bigbuf, <, bigsize);
 
 		if (pack->bw_txg > txg)
 			fatal(B_FALSE,
 			    "future leak: got %"PRIx64", open txg is %"PRIx64"",
 			    pack->bw_txg, txg);
 
 		if (pack->bw_data != 0 && pack->bw_index != n + i)
 			fatal(B_FALSE, "wrong index: "
 			    "got %"PRIx64", wanted %"PRIx64"+%"PRIx64"",
 			    pack->bw_index, n, i);
 
 		if (memcmp(pack, bigH, sizeof (bufwad_t)) != 0)
 			fatal(B_FALSE, "pack/bigH mismatch in %p/%p",
 			    pack, bigH);
 
 		if (memcmp(pack, bigT, sizeof (bufwad_t)) != 0)
 			fatal(B_FALSE, "pack/bigT mismatch in %p/%p",
 			    pack, bigT);
 
 		pack->bw_index = n + i;
 		pack->bw_txg = txg;
 		pack->bw_data = 1 + ztest_random(-2ULL);
 
 		*bigH = *pack;
 		*bigT = *pack;
 	}
 }
 
 #undef OD_ARRAY_SIZE
 #define	OD_ARRAY_SIZE	2
 
 void
 ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	dmu_tx_t *tx;
 	uint64_t i;
 	int error;
 	int size;
 	uint64_t n, s, txg;
 	bufwad_t *packbuf, *bigbuf;
 	uint64_t packobj, packoff, packsize, bigobj, bigoff, bigsize;
 	uint64_t blocksize = ztest_random_blocksize();
 	uint64_t chunksize = blocksize;
 	uint64_t regions = 997;
 	uint64_t stride = 123456789ULL;
 	uint64_t width = 9;
 	dmu_buf_t *bonus_db;
 	arc_buf_t **bigbuf_arcbufs;
 	dmu_object_info_t doi;
 
 	size = sizeof (ztest_od_t) * OD_ARRAY_SIZE;
 	od = umem_alloc(size, UMEM_NOFAIL);
 
 	/*
 	 * This test uses two objects, packobj and bigobj, that are always
 	 * updated together (i.e. in the same tx) so that their contents are
 	 * in sync and can be compared.  Their contents relate to each other
 	 * in a simple way: packobj is a dense array of 'bufwad' structures,
 	 * while bigobj is a sparse array of the same bufwads.  Specifically,
 	 * for any index n, there are three bufwads that should be identical:
 	 *
 	 *	packobj, at offset n * sizeof (bufwad_t)
 	 *	bigobj, at the head of the nth chunk
 	 *	bigobj, at the tail of the nth chunk
 	 *
 	 * The chunk size is set equal to bigobj block size so that
 	 * dmu_assign_arcbuf_by_dbuf() can be tested for object updates.
 	 */
 
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
 	ztest_od_init(od + 1, id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
 	    chunksize);
 
 
 	if (ztest_object_init(zd, od, size, B_FALSE) != 0) {
 		umem_free(od, size);
 		return;
 	}
 
 	bigobj = od[0].od_object;
 	packobj = od[1].od_object;
 	blocksize = od[0].od_blocksize;
 	chunksize = blocksize;
 	ASSERT3U(chunksize, ==, od[1].od_gen);
 
 	VERIFY0(dmu_object_info(os, bigobj, &doi));
 	VERIFY(ISP2(doi.doi_data_block_size));
 	VERIFY3U(chunksize, ==, doi.doi_data_block_size);
 	VERIFY3U(chunksize, >=, 2 * sizeof (bufwad_t));
 
 	/*
 	 * Pick a random index and compute the offsets into packobj and bigobj.
 	 */
 	n = ztest_random(regions) * stride + ztest_random(width);
 	s = 1 + ztest_random(width - 1);
 
 	packoff = n * sizeof (bufwad_t);
 	packsize = s * sizeof (bufwad_t);
 
 	bigoff = n * chunksize;
 	bigsize = s * chunksize;
 
 	packbuf = umem_zalloc(packsize, UMEM_NOFAIL);
 	bigbuf = umem_zalloc(bigsize, UMEM_NOFAIL);
 
 	VERIFY0(dmu_bonus_hold(os, bigobj, FTAG, &bonus_db));
 
 	bigbuf_arcbufs = umem_zalloc(2 * s * sizeof (arc_buf_t *), UMEM_NOFAIL);
 
 	/*
 	 * Iteration 0 test zcopy for DB_UNCACHED dbufs.
 	 * Iteration 1 test zcopy to already referenced dbufs.
 	 * Iteration 2 test zcopy to dirty dbuf in the same txg.
 	 * Iteration 3 test zcopy to dbuf dirty in previous txg.
 	 * Iteration 4 test zcopy when dbuf is no longer dirty.
 	 * Iteration 5 test zcopy when it can't be done.
 	 * Iteration 6 one more zcopy write.
 	 */
 	for (i = 0; i < 7; i++) {
 		uint64_t j;
 		uint64_t off;
 
 		/*
 		 * In iteration 5 (i == 5) use arcbufs
 		 * that don't match bigobj blksz to test
 		 * dmu_assign_arcbuf_by_dbuf() when it can't directly
 		 * assign an arcbuf to a dbuf.
 		 */
 		for (j = 0; j < s; j++) {
 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				bigbuf_arcbufs[j] =
 				    dmu_request_arcbuf(bonus_db, chunksize);
 			} else {
 				bigbuf_arcbufs[2 * j] =
 				    dmu_request_arcbuf(bonus_db, chunksize / 2);
 				bigbuf_arcbufs[2 * j + 1] =
 				    dmu_request_arcbuf(bonus_db, chunksize / 2);
 			}
 		}
 
 		/*
 		 * Get a tx for the mods to both packobj and bigobj.
 		 */
 		tx = dmu_tx_create(os);
 
 		dmu_tx_hold_write(tx, packobj, packoff, packsize);
 		dmu_tx_hold_write(tx, bigobj, bigoff, bigsize);
 
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0) {
 			umem_free(packbuf, packsize);
 			umem_free(bigbuf, bigsize);
 			for (j = 0; j < s; j++) {
 				if (i != 5 ||
 				    chunksize < (SPA_MINBLOCKSIZE * 2)) {
 					dmu_return_arcbuf(bigbuf_arcbufs[j]);
 				} else {
 					dmu_return_arcbuf(
 					    bigbuf_arcbufs[2 * j]);
 					dmu_return_arcbuf(
 					    bigbuf_arcbufs[2 * j + 1]);
 				}
 			}
 			umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
 			umem_free(od, size);
 			dmu_buf_rele(bonus_db, FTAG);
 			return;
 		}
 
 		/*
 		 * 50% of the time don't read objects in the 1st iteration to
 		 * test dmu_assign_arcbuf_by_dbuf() for the case when there are
 		 * no existing dbufs for the specified offsets.
 		 */
 		if (i != 0 || ztest_random(2) != 0) {
 			error = dmu_read(os, packobj, packoff,
 			    packsize, packbuf, DMU_READ_PREFETCH);
 			ASSERT0(error);
 			error = dmu_read(os, bigobj, bigoff, bigsize,
 			    bigbuf, DMU_READ_PREFETCH);
 			ASSERT0(error);
 		}
 		compare_and_update_pbbufs(s, packbuf, bigbuf, bigsize,
 		    n, chunksize, txg);
 
 		/*
 		 * We've verified all the old bufwads, and made new ones.
 		 * Now write them out.
 		 */
 		dmu_write(os, packobj, packoff, packsize, packbuf, tx);
 		if (ztest_opts.zo_verbose >= 7) {
 			(void) printf("writing offset %"PRIx64" size %"PRIx64""
 			    " txg %"PRIx64"\n",
 			    bigoff, bigsize, txg);
 		}
 		for (off = bigoff, j = 0; j < s; j++, off += chunksize) {
 			dmu_buf_t *dbt;
 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				memcpy(bigbuf_arcbufs[j]->b_data,
 				    (caddr_t)bigbuf + (off - bigoff),
 				    chunksize);
 			} else {
 				memcpy(bigbuf_arcbufs[2 * j]->b_data,
 				    (caddr_t)bigbuf + (off - bigoff),
 				    chunksize / 2);
 				memcpy(bigbuf_arcbufs[2 * j + 1]->b_data,
 				    (caddr_t)bigbuf + (off - bigoff) +
 				    chunksize / 2,
 				    chunksize / 2);
 			}
 
 			if (i == 1) {
 				VERIFY(dmu_buf_hold(os, bigobj, off,
 				    FTAG, &dbt, DMU_READ_NO_PREFETCH) == 0);
 			}
 			if (i != 5 || chunksize < (SPA_MINBLOCKSIZE * 2)) {
 				VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
 				    off, bigbuf_arcbufs[j], tx));
 			} else {
 				VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
 				    off, bigbuf_arcbufs[2 * j], tx));
 				VERIFY0(dmu_assign_arcbuf_by_dbuf(bonus_db,
 				    off + chunksize / 2,
 				    bigbuf_arcbufs[2 * j + 1], tx));
 			}
 			if (i == 1) {
 				dmu_buf_rele(dbt, FTAG);
 			}
 		}
 		dmu_tx_commit(tx);
 
 		/*
 		 * Sanity check the stuff we just wrote.
 		 */
 		{
 			void *packcheck = umem_alloc(packsize, UMEM_NOFAIL);
 			void *bigcheck = umem_alloc(bigsize, UMEM_NOFAIL);
 
 			VERIFY0(dmu_read(os, packobj, packoff,
 			    packsize, packcheck, DMU_READ_PREFETCH));
 			VERIFY0(dmu_read(os, bigobj, bigoff,
 			    bigsize, bigcheck, DMU_READ_PREFETCH));
 
 			ASSERT0(memcmp(packbuf, packcheck, packsize));
 			ASSERT0(memcmp(bigbuf, bigcheck, bigsize));
 
 			umem_free(packcheck, packsize);
 			umem_free(bigcheck, bigsize);
 		}
 		if (i == 2) {
 			txg_wait_open(dmu_objset_pool(os), 0, B_TRUE);
 		} else if (i == 3) {
 			txg_wait_synced(dmu_objset_pool(os), 0);
 		}
 	}
 
 	dmu_buf_rele(bonus_db, FTAG);
 	umem_free(packbuf, packsize);
 	umem_free(bigbuf, bigsize);
 	umem_free(bigbuf_arcbufs, 2 * s * sizeof (arc_buf_t *));
 	umem_free(od, size);
 }
 
 void
 ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
 {
 	(void) id;
 	ztest_od_t *od;
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
 	uint64_t offset = (1ULL << (ztest_random(20) + 43)) +
 	    (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 
 	/*
 	 * Have multiple threads write to large offsets in an object
 	 * to verify that parallel writes to an object -- even to the
 	 * same blocks within the object -- doesn't cause any trouble.
 	 */
 	ztest_od_init(od, ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0)
 		return;
 
 	while (ztest_random(10) != 0)
 		ztest_io(zd, od->od_object, offset);
 
 	umem_free(od, sizeof (ztest_od_t));
 }
 
 void
 ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
 {
 	ztest_od_t *od;
 	uint64_t offset = (1ULL << (ztest_random(4) + SPA_MAXBLOCKSHIFT)) +
 	    (ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 	uint64_t count = ztest_random(20) + 1;
 	uint64_t blocksize = ztest_random_blocksize();
 	void *data;
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
 
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
 	    !ztest_random(2)) != 0) {
 		umem_free(od, sizeof (ztest_od_t));
 		return;
 	}
 
 	if (ztest_truncate(zd, od->od_object, offset, count * blocksize) != 0) {
 		umem_free(od, sizeof (ztest_od_t));
 		return;
 	}
 
 	ztest_prealloc(zd, od->od_object, offset, count * blocksize);
 
 	data = umem_zalloc(blocksize, UMEM_NOFAIL);
 
 	while (ztest_random(count) != 0) {
 		uint64_t randoff = offset + (ztest_random(count) * blocksize);
 		if (ztest_write(zd, od->od_object, randoff, blocksize,
 		    data) != 0)
 			break;
 		while (ztest_random(4) != 0)
 			ztest_io(zd, od->od_object, randoff);
 	}
 
 	umem_free(data, blocksize);
 	umem_free(od, sizeof (ztest_od_t));
 }
 
 /*
  * Verify that zap_{create,destroy,add,remove,update} work as expected.
  */
 #define	ZTEST_ZAP_MIN_INTS	1
 #define	ZTEST_ZAP_MAX_INTS	4
 #define	ZTEST_ZAP_MAX_PROPS	1000
 
 void
 ztest_zap(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	uint64_t object;
 	uint64_t txg, last_txg;
 	uint64_t value[ZTEST_ZAP_MAX_INTS];
 	uint64_t zl_ints, zl_intsize, prop;
 	int i, ints;
 	dmu_tx_t *tx;
 	char propname[100], txgname[100];
 	int error;
 	const char *const hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
 	    !ztest_random(2)) != 0)
 		goto out;
 
 	object = od->od_object;
 
 	/*
 	 * Generate a known hash collision, and verify that
 	 * we can lookup and remove both entries.
 	 */
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		goto out;
 	for (i = 0; i < 2; i++) {
 		value[i] = i;
 		VERIFY0(zap_add(os, object, hc[i], sizeof (uint64_t),
 		    1, &value[i], tx));
 	}
 	for (i = 0; i < 2; i++) {
 		VERIFY3U(EEXIST, ==, zap_add(os, object, hc[i],
 		    sizeof (uint64_t), 1, &value[i], tx));
 		VERIFY0(
 		    zap_length(os, object, hc[i], &zl_intsize, &zl_ints));
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, 1);
 	}
 	for (i = 0; i < 2; i++) {
 		VERIFY0(zap_remove(os, object, hc[i], tx));
 	}
 	dmu_tx_commit(tx);
 
 	/*
 	 * Generate a bunch of random entries.
 	 */
 	ints = MAX(ZTEST_ZAP_MIN_INTS, object % ZTEST_ZAP_MAX_INTS);
 
 	prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
 	(void) sprintf(propname, "prop_%"PRIu64"", prop);
 	(void) sprintf(txgname, "txg_%"PRIu64"", prop);
 	memset(value, 0, sizeof (value));
 	last_txg = 0;
 
 	/*
 	 * If these zap entries already exist, validate their contents.
 	 */
 	error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
 	if (error == 0) {
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, 1);
 
 		VERIFY0(zap_lookup(os, object, txgname, zl_intsize,
 		    zl_ints, &last_txg));
 
 		VERIFY0(zap_length(os, object, propname, &zl_intsize,
 		    &zl_ints));
 
 		ASSERT3U(zl_intsize, ==, sizeof (uint64_t));
 		ASSERT3U(zl_ints, ==, ints);
 
 		VERIFY0(zap_lookup(os, object, propname, zl_intsize,
 		    zl_ints, value));
 
 		for (i = 0; i < ints; i++) {
 			ASSERT3U(value[i], ==, last_txg + object + i);
 		}
 	} else {
 		ASSERT3U(error, ==, ENOENT);
 	}
 
 	/*
 	 * Atomically update two entries in our zap object.
 	 * The first is named txg_%llu, and contains the txg
 	 * in which the property was last updated.  The second
 	 * is named prop_%llu, and the nth element of its value
 	 * should be txg + object + n.
 	 */
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		goto out;
 
 	if (last_txg > txg)
 		fatal(B_FALSE, "zap future leak: old %"PRIu64" new %"PRIu64"",
 		    last_txg, txg);
 
 	for (i = 0; i < ints; i++)
 		value[i] = txg + object + i;
 
 	VERIFY0(zap_update(os, object, txgname, sizeof (uint64_t),
 	    1, &txg, tx));
 	VERIFY0(zap_update(os, object, propname, sizeof (uint64_t),
 	    ints, value, tx));
 
 	dmu_tx_commit(tx);
 
 	/*
 	 * Remove a random pair of entries.
 	 */
 	prop = ztest_random(ZTEST_ZAP_MAX_PROPS);
 	(void) sprintf(propname, "prop_%"PRIu64"", prop);
 	(void) sprintf(txgname, "txg_%"PRIu64"", prop);
 
 	error = zap_length(os, object, txgname, &zl_intsize, &zl_ints);
 
 	if (error == ENOENT)
 		goto out;
 
 	ASSERT0(error);
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 	txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 	if (txg == 0)
 		goto out;
 	VERIFY0(zap_remove(os, object, txgname, tx));
 	VERIFY0(zap_remove(os, object, propname, tx));
 	dmu_tx_commit(tx);
 out:
 	umem_free(od, sizeof (ztest_od_t));
 }
 
 /*
  * Test case to test the upgrading of a microzap to fatzap.
  */
 void
 ztest_fzap(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	uint64_t object, txg, value;
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t),
 	    !ztest_random(2)) != 0)
 		goto out;
 	object = od->od_object;
 
 	/*
 	 * Add entries to this ZAP and make sure it spills over
 	 * and gets upgraded to a fatzap. Also, since we are adding
 	 * 2050 entries we should see ptrtbl growth and leaf-block split.
 	 */
 	for (value = 0; value < 2050; value++) {
 		char name[ZFS_MAX_DATASET_NAME_LEN];
 		dmu_tx_t *tx;
 		int error;
 
 		(void) snprintf(name, sizeof (name), "fzap-%"PRIu64"-%"PRIu64"",
 		    id, value);
 
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_zap(tx, object, B_TRUE, name);
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0)
 			goto out;
 		error = zap_add(os, object, name, sizeof (uint64_t), 1,
 		    &value, tx);
 		ASSERT(error == 0 || error == EEXIST);
 		dmu_tx_commit(tx);
 	}
 out:
 	umem_free(od, sizeof (ztest_od_t));
 }
 
 void
 ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
 {
 	(void) id;
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	uint64_t txg, object, count, wsize, wc, zl_wsize, zl_wc;
 	dmu_tx_t *tx;
 	int i, namelen, error;
 	int micro = ztest_random(2);
 	char name[20], string_value[20];
 	void *data;
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
 		umem_free(od, sizeof (ztest_od_t));
 		return;
 	}
 
 	object = od->od_object;
 
 	/*
 	 * Generate a random name of the form 'xxx.....' where each
 	 * x is a random printable character and the dots are dots.
 	 * There are 94 such characters, and the name length goes from
 	 * 6 to 20, so there are 94^3 * 15 = 12,458,760 possible names.
 	 */
 	namelen = ztest_random(sizeof (name) - 5) + 5 + 1;
 
 	for (i = 0; i < 3; i++)
 		name[i] = '!' + ztest_random('~' - '!' + 1);
 	for (; i < namelen - 1; i++)
 		name[i] = '.';
 	name[i] = '\0';
 
 	if ((namelen & 1) || micro) {
 		wsize = sizeof (txg);
 		wc = 1;
 		data = &txg;
 	} else {
 		wsize = 1;
 		wc = namelen;
 		data = string_value;
 	}
 
 	count = -1ULL;
 	VERIFY0(zap_count(os, object, &count));
 	ASSERT3S(count, !=, -1ULL);
 
 	/*
 	 * Select an operation: length, lookup, add, update, remove.
 	 */
 	i = ztest_random(5);
 
 	if (i >= 2) {
 		tx = dmu_tx_create(os);
 		dmu_tx_hold_zap(tx, object, B_TRUE, NULL);
 		txg = ztest_tx_assign(tx, TXG_MIGHTWAIT, FTAG);
 		if (txg == 0) {
 			umem_free(od, sizeof (ztest_od_t));
 			return;
 		}
 		memcpy(string_value, name, namelen);
 	} else {
 		tx = NULL;
 		txg = 0;
 		memset(string_value, 0, namelen);
 	}
 
 	switch (i) {
 
 	case 0:
 		error = zap_length(os, object, name, &zl_wsize, &zl_wc);
 		if (error == 0) {
 			ASSERT3U(wsize, ==, zl_wsize);
 			ASSERT3U(wc, ==, zl_wc);
 		} else {
 			ASSERT3U(error, ==, ENOENT);
 		}
 		break;
 
 	case 1:
 		error = zap_lookup(os, object, name, wsize, wc, data);
 		if (error == 0) {
 			if (data == string_value &&
 			    memcmp(name, data, namelen) != 0)
 				fatal(B_FALSE, "name '%s' != val '%s' len %d",
 				    name, (char *)data, namelen);
 		} else {
 			ASSERT3U(error, ==, ENOENT);
 		}
 		break;
 
 	case 2:
 		error = zap_add(os, object, name, wsize, wc, data, tx);
 		ASSERT(error == 0 || error == EEXIST);
 		break;
 
 	case 3:
 		VERIFY0(zap_update(os, object, name, wsize, wc, data, tx));
 		break;
 
 	case 4:
 		error = zap_remove(os, object, name, tx);
 		ASSERT(error == 0 || error == ENOENT);
 		break;
 	}
 
 	if (tx != NULL)
 		dmu_tx_commit(tx);
 
 	umem_free(od, sizeof (ztest_od_t));
 }
 
 /*
  * Commit callback data.
  */
 typedef struct ztest_cb_data {
 	list_node_t		zcd_node;
 	uint64_t		zcd_txg;
 	int			zcd_expected_err;
 	boolean_t		zcd_added;
 	boolean_t		zcd_called;
 	spa_t			*zcd_spa;
 } ztest_cb_data_t;
 
 /* This is the actual commit callback function */
 static void
 ztest_commit_callback(void *arg, int error)
 {
 	ztest_cb_data_t *data = arg;
 	uint64_t synced_txg;
 
 	VERIFY3P(data, !=, NULL);
 	VERIFY3S(data->zcd_expected_err, ==, error);
 	VERIFY(!data->zcd_called);
 
 	synced_txg = spa_last_synced_txg(data->zcd_spa);
 	if (data->zcd_txg > synced_txg)
 		fatal(B_FALSE,
 		    "commit callback of txg %"PRIu64" called prematurely, "
 		    "last synced txg = %"PRIu64"\n",
 		    data->zcd_txg, synced_txg);
 
 	data->zcd_called = B_TRUE;
 
 	if (error == ECANCELED) {
 		ASSERT0(data->zcd_txg);
 		ASSERT(!data->zcd_added);
 
 		/*
 		 * The private callback data should be destroyed here, but
 		 * since we are going to check the zcd_called field after
 		 * dmu_tx_abort(), we will destroy it there.
 		 */
 		return;
 	}
 
 	ASSERT(data->zcd_added);
 	ASSERT3U(data->zcd_txg, !=, 0);
 
 	(void) mutex_enter(&zcl.zcl_callbacks_lock);
 
 	/* See if this cb was called more quickly */
 	if ((synced_txg - data->zcd_txg) < zc_min_txg_delay)
 		zc_min_txg_delay = synced_txg - data->zcd_txg;
 
 	/* Remove our callback from the list */
 	list_remove(&zcl.zcl_callbacks, data);
 
 	(void) mutex_exit(&zcl.zcl_callbacks_lock);
 
 	umem_free(data, sizeof (ztest_cb_data_t));
 }
 
 /* Allocate and initialize callback data structure */
 static ztest_cb_data_t *
 ztest_create_cb_data(objset_t *os, uint64_t txg)
 {
 	ztest_cb_data_t *cb_data;
 
 	cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL);
 
 	cb_data->zcd_txg = txg;
 	cb_data->zcd_spa = dmu_objset_spa(os);
 	list_link_init(&cb_data->zcd_node);
 
 	return (cb_data);
 }
 
 /*
  * Commit callback test.
  */
 void
 ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id)
 {
 	objset_t *os = zd->zd_os;
 	ztest_od_t *od;
 	dmu_tx_t *tx;
 	ztest_cb_data_t *cb_data[3], *tmp_cb;
 	uint64_t old_txg, txg;
 	int i, error = 0;
 
 	od = umem_alloc(sizeof (ztest_od_t), UMEM_NOFAIL);
 	ztest_od_init(od, id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (ztest_od_t), B_FALSE) != 0) {
 		umem_free(od, sizeof (ztest_od_t));
 		return;
 	}
 
 	tx = dmu_tx_create(os);
 
 	cb_data[0] = ztest_create_cb_data(os, 0);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[0]);
 
 	dmu_tx_hold_write(tx, od->od_object, 0, sizeof (uint64_t));
 
 	/* Every once in a while, abort the transaction on purpose */
 	if (ztest_random(100) == 0)
 		error = -1;
 
 	if (!error)
 		error = dmu_tx_assign(tx, TXG_NOWAIT);
 
 	txg = error ? 0 : dmu_tx_get_txg(tx);
 
 	cb_data[0]->zcd_txg = txg;
 	cb_data[1] = ztest_create_cb_data(os, txg);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[1]);
 
 	if (error) {
 		/*
 		 * It's not a strict requirement to call the registered
 		 * callbacks from inside dmu_tx_abort(), but that's what
 		 * it's supposed to happen in the current implementation
 		 * so we will check for that.
 		 */
 		for (i = 0; i < 2; i++) {
 			cb_data[i]->zcd_expected_err = ECANCELED;
 			VERIFY(!cb_data[i]->zcd_called);
 		}
 
 		dmu_tx_abort(tx);
 
 		for (i = 0; i < 2; i++) {
 			VERIFY(cb_data[i]->zcd_called);
 			umem_free(cb_data[i], sizeof (ztest_cb_data_t));
 		}
 
 		umem_free(od, sizeof (ztest_od_t));
 		return;
 	}
 
 	cb_data[2] = ztest_create_cb_data(os, txg);
 	dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]);
 
 	/*
 	 * Read existing data to make sure there isn't a future leak.
 	 */
 	VERIFY0(dmu_read(os, od->od_object, 0, sizeof (uint64_t),
 	    &old_txg, DMU_READ_PREFETCH));
 
 	if (old_txg > txg)
 		fatal(B_FALSE,
 		    "future leak: got %"PRIu64", open txg is %"PRIu64"",
 		    old_txg, txg);
 
 	dmu_write(os, od->od_object, 0, sizeof (uint64_t), &txg, tx);
 
 	(void) mutex_enter(&zcl.zcl_callbacks_lock);
 
 	/*
 	 * Since commit callbacks don't have any ordering requirement and since
 	 * it is theoretically possible for a commit callback to be called
 	 * after an arbitrary amount of time has elapsed since its txg has been
 	 * synced, it is difficult to reliably determine whether a commit
 	 * callback hasn't been called due to high load or due to a flawed
 	 * implementation.
 	 *
 	 * In practice, we will assume that if after a certain number of txgs a
 	 * commit callback hasn't been called, then most likely there's an
 	 * implementation bug..
 	 */
 	tmp_cb = list_head(&zcl.zcl_callbacks);
 	if (tmp_cb != NULL &&
 	    tmp_cb->zcd_txg + ZTEST_COMMIT_CB_THRESH < txg) {
 		fatal(B_FALSE,
 		    "Commit callback threshold exceeded, "
 		    "oldest txg: %"PRIu64", open txg: %"PRIu64"\n",
 		    tmp_cb->zcd_txg, txg);
 	}
 
 	/*
 	 * Let's find the place to insert our callbacks.
 	 *
 	 * Even though the list is ordered by txg, it is possible for the
 	 * insertion point to not be the end because our txg may already be
 	 * quiescing at this point and other callbacks in the open txg
 	 * (from other objsets) may have sneaked in.
 	 */
 	tmp_cb = list_tail(&zcl.zcl_callbacks);
 	while (tmp_cb != NULL && tmp_cb->zcd_txg > txg)
 		tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb);
 
 	/* Add the 3 callbacks to the list */
 	for (i = 0; i < 3; i++) {
 		if (tmp_cb == NULL)
 			list_insert_head(&zcl.zcl_callbacks, cb_data[i]);
 		else
 			list_insert_after(&zcl.zcl_callbacks, tmp_cb,
 			    cb_data[i]);
 
 		cb_data[i]->zcd_added = B_TRUE;
 		VERIFY(!cb_data[i]->zcd_called);
 
 		tmp_cb = cb_data[i];
 	}
 
 	zc_cb_counter += 3;
 
 	(void) mutex_exit(&zcl.zcl_callbacks_lock);
 
 	dmu_tx_commit(tx);
 
 	umem_free(od, sizeof (ztest_od_t));
 }
 
 /*
  * Visit each object in the dataset. Verify that its properties
  * are consistent what was stored in the block tag when it was created,
  * and that its unused bonus buffer space has not been overwritten.
  */
 void
 ztest_verify_dnode_bt(ztest_ds_t *zd, uint64_t id)
 {
 	(void) id;
 	objset_t *os = zd->zd_os;
 	uint64_t obj;
 	int err = 0;
 
 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
 		ztest_block_tag_t *bt = NULL;
 		dmu_object_info_t doi;
 		dmu_buf_t *db;
 
 		ztest_object_lock(zd, obj, RL_READER);
 		if (dmu_bonus_hold(os, obj, FTAG, &db) != 0) {
 			ztest_object_unlock(zd, obj);
 			continue;
 		}
 
 		dmu_object_info_from_db(db, &doi);
 		if (doi.doi_bonus_size >= sizeof (*bt))
 			bt = ztest_bt_bonus(db);
 
 		if (bt && bt->bt_magic == BT_MAGIC) {
 			ztest_bt_verify(bt, os, obj, doi.doi_dnodesize,
 			    bt->bt_offset, bt->bt_gen, bt->bt_txg,
 			    bt->bt_crtxg);
 			ztest_verify_unused_bonus(db, bt, obj, os, bt->bt_gen);
 		}
 
 		dmu_buf_rele(db, FTAG);
 		ztest_object_unlock(zd, obj);
 	}
 }
 
 void
 ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
 {
 	(void) id;
 	zfs_prop_t proplist[] = {
 		ZFS_PROP_CHECKSUM,
 		ZFS_PROP_COMPRESSION,
 		ZFS_PROP_COPIES,
 		ZFS_PROP_DEDUP
 	};
 
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 
 	for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
 		(void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
 		    ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
 
 	VERIFY0(ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_RECORDSIZE,
 	    ztest_random_blocksize(), (int)ztest_random(2)));
 
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 }
 
 void
 ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	nvlist_t *props = NULL;
 
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 
 	(void) ztest_spa_prop_set_uint64(ZPOOL_PROP_AUTOTRIM, ztest_random(2));
 
 	VERIFY0(spa_prop_get(ztest_spa, &props));
 
 	if (ztest_opts.zo_verbose >= 6)
 		dump_nvlist(props, 4);
 
 	fnvlist_free(props);
 
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 }
 
 static int
 user_release_one(const char *snapname, const char *holdname)
 {
 	nvlist_t *snaps, *holds;
 	int error;
 
 	snaps = fnvlist_alloc();
 	holds = fnvlist_alloc();
 	fnvlist_add_boolean(holds, holdname);
 	fnvlist_add_nvlist(snaps, snapname, holds);
 	fnvlist_free(holds);
 	error = dsl_dataset_user_release(snaps, NULL);
 	fnvlist_free(snaps);
 	return (error);
 }
 
 /*
  * Test snapshot hold/release and deferred destroy.
  */
 void
 ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
 {
 	int error;
 	objset_t *os = zd->zd_os;
 	objset_t *origin;
 	char snapname[100];
 	char fullname[100];
 	char clonename[100];
 	char tag[100];
 	char osname[ZFS_MAX_DATASET_NAME_LEN];
 	nvlist_t *holds;
 
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 
 	dmu_objset_name(os, osname);
 
 	(void) snprintf(snapname, sizeof (snapname), "sh1_%"PRIu64"", id);
 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
 	(void) snprintf(clonename, sizeof (clonename), "%s/ch1_%"PRIu64"",
 	    osname, id);
 	(void) snprintf(tag, sizeof (tag), "tag_%"PRIu64"", id);
 
 	/*
 	 * Clean up from any previous run.
 	 */
 	error = dsl_destroy_head(clonename);
 	if (error != ENOENT)
 		ASSERT0(error);
 	error = user_release_one(fullname, tag);
 	if (error != ESRCH && error != ENOENT)
 		ASSERT0(error);
 	error = dsl_destroy_snapshot(fullname, B_FALSE);
 	if (error != ENOENT)
 		ASSERT0(error);
 
 	/*
 	 * Create snapshot, clone it, mark snap for deferred destroy,
 	 * destroy clone, verify snap was also destroyed.
 	 */
 	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_snapshot");
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
 	error = dmu_objset_clone(clonename, fullname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_clone");
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_objset_clone(%s) = %d", clonename, error);
 	}
 
 	error = dsl_destroy_snapshot(fullname, B_TRUE);
 	if (error) {
 		fatal(B_FALSE, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
 		    fullname, error);
 	}
 
 	error = dsl_destroy_head(clonename);
 	if (error)
 		fatal(B_FALSE, "dsl_destroy_head(%s) = %d", clonename, error);
 
 	error = dmu_objset_hold(fullname, FTAG, &origin);
 	if (error != ENOENT)
 		fatal(B_FALSE, "dmu_objset_hold(%s) = %d", fullname, error);
 
 	/*
 	 * Create snapshot, add temporary hold, verify that we can't
 	 * destroy a held snapshot, mark for deferred destroy,
 	 * release hold, verify snapshot was destroyed.
 	 */
 	error = dmu_objset_snapshot_one(osname, snapname);
 	if (error) {
 		if (error == ENOSPC) {
 			ztest_record_enospc("dmu_objset_snapshot");
 			goto out;
 		}
 		fatal(B_FALSE, "dmu_objset_snapshot(%s) = %d", fullname, error);
 	}
 
 	holds = fnvlist_alloc();
 	fnvlist_add_string(holds, fullname, tag);
 	error = dsl_dataset_user_hold(holds, 0, NULL);
 	fnvlist_free(holds);
 
 	if (error == ENOSPC) {
 		ztest_record_enospc("dsl_dataset_user_hold");
 		goto out;
 	} else if (error) {
 		fatal(B_FALSE, "dsl_dataset_user_hold(%s, %s) = %u",
 		    fullname, tag, error);
 	}
 
 	error = dsl_destroy_snapshot(fullname, B_FALSE);
 	if (error != EBUSY) {
 		fatal(B_FALSE, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
 		    fullname, error);
 	}
 
 	error = dsl_destroy_snapshot(fullname, B_TRUE);
 	if (error) {
 		fatal(B_FALSE, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
 		    fullname, error);
 	}
 
 	error = user_release_one(fullname, tag);
 	if (error)
 		fatal(B_FALSE, "user_release_one(%s, %s) = %d",
 		    fullname, tag, error);
 
 	VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
 
 out:
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 }
 
 /*
  * Inject random faults into the on-disk data.
  */
 void
 ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	ztest_shared_t *zs = ztest_shared;
 	spa_t *spa = ztest_spa;
 	int fd;
 	uint64_t offset;
 	uint64_t leaves;
 	uint64_t bad = 0x1990c0ffeedecadeull;
 	uint64_t top, leaf;
 	char *path0;
 	char *pathrand;
 	size_t fsize;
 	int bshift = SPA_MAXBLOCKSHIFT + 2;
 	int iters = 1000;
 	int maxfaults;
 	int mirror_save;
 	vdev_t *vd0 = NULL;
 	uint64_t guid0 = 0;
 	boolean_t islog = B_FALSE;
 
 	path0 = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 	pathrand = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 
 	mutex_enter(&ztest_vdev_lock);
 
 	/*
 	 * Device removal is in progress, fault injection must be disabled
 	 * until it completes and the pool is scrubbed.  The fault injection
 	 * strategy for damaging blocks does not take in to account evacuated
 	 * blocks which may have already been damaged.
 	 */
 	if (ztest_device_removal_active) {
 		mutex_exit(&ztest_vdev_lock);
 		goto out;
 	}
 
 	maxfaults = MAXFAULTS(zs);
 	leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raid_children;
 	mirror_save = zs->zs_mirrors;
 	mutex_exit(&ztest_vdev_lock);
 
 	ASSERT3U(leaves, >=, 1);
 
 	/*
 	 * While ztest is running the number of leaves will not change.  This
 	 * is critical for the fault injection logic as it determines where
 	 * errors can be safely injected such that they are always repairable.
 	 *
 	 * When restarting ztest a different number of leaves may be requested
 	 * which will shift the regions to be damaged.  This is fine as long
 	 * as the pool has been scrubbed prior to using the new mapping.
 	 * Failure to do can result in non-repairable damage being injected.
 	 */
 	if (ztest_pool_scrubbed == B_FALSE)
 		goto out;
 
 	/*
 	 * Grab the name lock as reader. There are some operations
 	 * which don't like to have their vdevs changed while
 	 * they are in progress (i.e. spa_change_guid). Those
 	 * operations will have grabbed the name lock as writer.
 	 */
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 
 	/*
 	 * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
 	 */
 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
 
 	if (ztest_random(2) == 0) {
 		/*
 		 * Inject errors on a normal data device or slog device.
 		 */
 		top = ztest_random_vdev_top(spa, B_TRUE);
 		leaf = ztest_random(leaves) + zs->zs_splits;
 
 		/*
 		 * Generate paths to the first leaf in this top-level vdev,
 		 * and to the random leaf we selected.  We'll induce transient
 		 * write failures and random online/offline activity on leaf 0,
 		 * and we'll write random garbage to the randomly chosen leaf.
 		 */
 		(void) snprintf(path0, MAXPATHLEN, ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + zs->zs_splits);
 		(void) snprintf(pathrand, MAXPATHLEN, ztest_dev_template,
 		    ztest_opts.zo_dir, ztest_opts.zo_pool,
 		    top * leaves + leaf);
 
 		vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
 		if (vd0 != NULL && vd0->vdev_top->vdev_islog)
 			islog = B_TRUE;
 
 		/*
 		 * If the top-level vdev needs to be resilvered
 		 * then we only allow faults on the device that is
 		 * resilvering.
 		 */
 		if (vd0 != NULL && maxfaults != 1 &&
 		    (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
 		    vd0->vdev_resilver_txg != 0)) {
 			/*
 			 * Make vd0 explicitly claim to be unreadable,
 			 * or unwritable, or reach behind its back
 			 * and close the underlying fd.  We can do this if
 			 * maxfaults == 0 because we'll fail and reexecute,
 			 * and we can do it if maxfaults >= 2 because we'll
 			 * have enough redundancy.  If maxfaults == 1, the
 			 * combination of this with injection of random data
 			 * corruption below exceeds the pool's fault tolerance.
 			 */
 			vdev_file_t *vf = vd0->vdev_tsd;
 
 			zfs_dbgmsg("injecting fault to vdev %llu; maxfaults=%d",
 			    (long long)vd0->vdev_id, (int)maxfaults);
 
 			if (vf != NULL && ztest_random(3) == 0) {
 				(void) close(vf->vf_file->f_fd);
 				vf->vf_file->f_fd = -1;
 			} else if (ztest_random(2) == 0) {
 				vd0->vdev_cant_read = B_TRUE;
 			} else {
 				vd0->vdev_cant_write = B_TRUE;
 			}
 			guid0 = vd0->vdev_guid;
 		}
 	} else {
 		/*
 		 * Inject errors on an l2cache device.
 		 */
 		spa_aux_vdev_t *sav = &spa->spa_l2cache;
 
 		if (sav->sav_count == 0) {
 			spa_config_exit(spa, SCL_STATE, FTAG);
 			(void) pthread_rwlock_unlock(&ztest_name_lock);
 			goto out;
 		}
 		vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
 		guid0 = vd0->vdev_guid;
 		(void) strcpy(path0, vd0->vdev_path);
 		(void) strcpy(pathrand, vd0->vdev_path);
 
 		leaf = 0;
 		leaves = 1;
 		maxfaults = INT_MAX;	/* no limit on cache devices */
 	}
 
 	spa_config_exit(spa, SCL_STATE, FTAG);
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 
 	/*
 	 * If we can tolerate two or more faults, or we're dealing
 	 * with a slog, randomly online/offline vd0.
 	 */
 	if ((maxfaults >= 2 || islog) && guid0 != 0) {
 		if (ztest_random(10) < 6) {
 			int flags = (ztest_random(2) == 0 ?
 			    ZFS_OFFLINE_TEMPORARY : 0);
 
 			/*
 			 * We have to grab the zs_name_lock as writer to
 			 * prevent a race between offlining a slog and
 			 * destroying a dataset. Offlining the slog will
 			 * grab a reference on the dataset which may cause
 			 * dsl_destroy_head() to fail with EBUSY thus
 			 * leaving the dataset in an inconsistent state.
 			 */
 			if (islog)
 				(void) pthread_rwlock_wrlock(&ztest_name_lock);
 
 			VERIFY3U(vdev_offline(spa, guid0, flags), !=, EBUSY);
 
 			if (islog)
 				(void) pthread_rwlock_unlock(&ztest_name_lock);
 		} else {
 			/*
 			 * Ideally we would like to be able to randomly
 			 * call vdev_[on|off]line without holding locks
 			 * to force unpredictable failures but the side
 			 * effects of vdev_[on|off]line prevent us from
 			 * doing so. We grab the ztest_vdev_lock here to
 			 * prevent a race between injection testing and
 			 * aux_vdev removal.
 			 */
 			mutex_enter(&ztest_vdev_lock);
 			(void) vdev_online(spa, guid0, 0, NULL);
 			mutex_exit(&ztest_vdev_lock);
 		}
 	}
 
 	if (maxfaults == 0)
 		goto out;
 
 	/*
 	 * We have at least single-fault tolerance, so inject data corruption.
 	 */
 	fd = open(pathrand, O_RDWR);
 
 	if (fd == -1) /* we hit a gap in the device namespace */
 		goto out;
 
 	fsize = lseek(fd, 0, SEEK_END);
 
 	while (--iters != 0) {
 		/*
 		 * The offset must be chosen carefully to ensure that
 		 * we do not inject a given logical block with errors
 		 * on two different leaf devices, because ZFS can not
 		 * tolerate that (if maxfaults==1).
 		 *
 		 * To achieve this we divide each leaf device into
 		 * chunks of size (# leaves * SPA_MAXBLOCKSIZE * 4).
 		 * Each chunk is further divided into error-injection
 		 * ranges (can accept errors) and clear ranges (we do
 		 * not inject errors in those). Each error-injection
 		 * range can accept errors only for a single leaf vdev.
 		 * Error-injection ranges are separated by clear ranges.
 		 *
 		 * For example, with 3 leaves, each chunk looks like:
 		 *    0 to  32M: injection range for leaf 0
 		 *  32M to  64M: clear range - no injection allowed
 		 *  64M to  96M: injection range for leaf 1
 		 *  96M to 128M: clear range - no injection allowed
 		 * 128M to 160M: injection range for leaf 2
 		 * 160M to 192M: clear range - no injection allowed
 		 *
 		 * Each clear range must be large enough such that a
 		 * single block cannot straddle it. This way a block
 		 * can't be a target in two different injection ranges
 		 * (on different leaf vdevs).
 		 */
 		offset = ztest_random(fsize / (leaves << bshift)) *
 		    (leaves << bshift) + (leaf << bshift) +
 		    (ztest_random(1ULL << (bshift - 1)) & -8ULL);
 
 		/*
 		 * Only allow damage to the labels at one end of the vdev.
 		 *
 		 * If all labels are damaged, the device will be totally
 		 * inaccessible, which will result in loss of data,
 		 * because we also damage (parts of) the other side of
 		 * the mirror/raidz.
 		 *
 		 * Additionally, we will always have both an even and an
 		 * odd label, so that we can handle crashes in the
 		 * middle of vdev_config_sync().
 		 */
 		if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE)
 			continue;
 
 		/*
 		 * The two end labels are stored at the "end" of the disk, but
 		 * the end of the disk (vdev_psize) is aligned to
 		 * sizeof (vdev_label_t).
 		 */
 		uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t));
 		if ((leaf & 1) == 1 &&
 		    offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE)
 			continue;
 
 		mutex_enter(&ztest_vdev_lock);
 		if (mirror_save != zs->zs_mirrors) {
 			mutex_exit(&ztest_vdev_lock);
 			(void) close(fd);
 			goto out;
 		}
 
 		if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
 			fatal(B_TRUE,
 			    "can't inject bad word at 0x%"PRIx64" in %s",
 			    offset, pathrand);
 
 		mutex_exit(&ztest_vdev_lock);
 
 		if (ztest_opts.zo_verbose >= 7)
 			(void) printf("injected bad word into %s,"
 			    " offset 0x%"PRIx64"\n", pathrand, offset);
 	}
 
 	(void) close(fd);
 out:
 	umem_free(path0, MAXPATHLEN);
 	umem_free(pathrand, MAXPATHLEN);
 }
 
 /*
  * By design ztest will never inject uncorrectable damage in to the pool.
  * Issue a scrub, wait for it to complete, and verify there is never any
  * persistent damage.
  *
  * Only after a full scrub has been completed is it safe to start injecting
  * data corruption.  See the comment in zfs_fault_inject().
  */
 static int
 ztest_scrub_impl(spa_t *spa)
 {
 	int error = spa_scan(spa, POOL_SCAN_SCRUB);
 	if (error)
 		return (error);
 
 	while (dsl_scan_scrubbing(spa_get_dsl(spa)))
 		txg_wait_synced(spa_get_dsl(spa), 0);
 
 	if (spa_get_errlog_size(spa) > 0)
 		return (ECKSUM);
 
 	ztest_pool_scrubbed = B_TRUE;
 
 	return (0);
 }
 
 /*
  * Scrub the pool.
  */
 void
 ztest_scrub(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	spa_t *spa = ztest_spa;
 	int error;
 
 	/*
 	 * Scrub in progress by device removal.
 	 */
 	if (ztest_device_removal_active)
 		return;
 
 	/*
 	 * Start a scrub, wait a moment, then force a restart.
 	 */
 	(void) spa_scan(spa, POOL_SCAN_SCRUB);
 	(void) poll(NULL, 0, 100);
 
 	error = ztest_scrub_impl(spa);
 	if (error == EBUSY)
 		error = 0;
 	ASSERT0(error);
 }
 
 /*
  * Change the guid for the pool.
  */
 void
 ztest_reguid(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	spa_t *spa = ztest_spa;
 	uint64_t orig, load;
 	int error;
 
 	if (ztest_opts.zo_mmp_test)
 		return;
 
 	orig = spa_guid(spa);
 	load = spa_load_guid(spa);
 
 	(void) pthread_rwlock_wrlock(&ztest_name_lock);
 	error = spa_change_guid(spa);
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 
 	if (error != 0)
 		return;
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("Changed guid old %"PRIu64" -> %"PRIu64"\n",
 		    orig, spa_guid(spa));
 	}
 
 	VERIFY3U(orig, !=, spa_guid(spa));
 	VERIFY3U(load, ==, spa_load_guid(spa));
 }
 
 void
 ztest_blake3(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	hrtime_t end = gethrtime() + NANOSEC;
 	zio_cksum_salt_t salt;
 	void *salt_ptr = &salt.zcs_bytes;
 	struct abd *abd_data, *abd_meta;
 	void *buf, *templ;
 	int i, *ptr;
 	uint32_t size;
 	BLAKE3_CTX ctx;
 
 	size = ztest_random_blocksize();
 	buf = umem_alloc(size, UMEM_NOFAIL);
 	abd_data = abd_alloc(size, B_FALSE);
 	abd_meta = abd_alloc(size, B_TRUE);
 
 	for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
 		*ptr = ztest_random(UINT_MAX);
 	memset(salt_ptr, 'A', 32);
 
 	abd_copy_from_buf_off(abd_data, buf, 0, size);
 	abd_copy_from_buf_off(abd_meta, buf, 0, size);
 
 	while (gethrtime() <= end) {
 		int run_count = 100;
 		zio_cksum_t zc_ref1, zc_ref2;
 		zio_cksum_t zc_res1, zc_res2;
 
 		void *ref1 = &zc_ref1;
 		void *ref2 = &zc_ref2;
 		void *res1 = &zc_res1;
 		void *res2 = &zc_res2;
 
 		/* BLAKE3_KEY_LEN = 32 */
 		VERIFY0(blake3_impl_setname("generic"));
 		templ = abd_checksum_blake3_tmpl_init(&salt);
 		Blake3_InitKeyed(&ctx, salt_ptr);
 		Blake3_Update(&ctx, buf, size);
 		Blake3_Final(&ctx, ref1);
 		zc_ref2 = zc_ref1;
 		ZIO_CHECKSUM_BSWAP(&zc_ref2);
 		abd_checksum_blake3_tmpl_free(templ);
 
 		VERIFY0(blake3_impl_setname("cycle"));
 		while (run_count-- > 0) {
 
 			/* Test current implementation */
 			Blake3_InitKeyed(&ctx, salt_ptr);
 			Blake3_Update(&ctx, buf, size);
 			Blake3_Final(&ctx, res1);
 			zc_res2 = zc_res1;
 			ZIO_CHECKSUM_BSWAP(&zc_res2);
 
 			VERIFY0(memcmp(ref1, res1, 32));
 			VERIFY0(memcmp(ref2, res2, 32));
 
 			/* Test ABD - data */
 			templ = abd_checksum_blake3_tmpl_init(&salt);
 			abd_checksum_blake3_native(abd_data, size,
 			    templ, &zc_res1);
 			abd_checksum_blake3_byteswap(abd_data, size,
 			    templ, &zc_res2);
 
 			VERIFY0(memcmp(ref1, res1, 32));
 			VERIFY0(memcmp(ref2, res2, 32));
 
 			/* Test ABD - metadata */
 			abd_checksum_blake3_native(abd_meta, size,
 			    templ, &zc_res1);
 			abd_checksum_blake3_byteswap(abd_meta, size,
 			    templ, &zc_res2);
 			abd_checksum_blake3_tmpl_free(templ);
 
 			VERIFY0(memcmp(ref1, res1, 32));
 			VERIFY0(memcmp(ref2, res2, 32));
 
 		}
 	}
 
 	abd_free(abd_data);
 	abd_free(abd_meta);
 	umem_free(buf, size);
 }
 
 void
 ztest_fletcher(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	hrtime_t end = gethrtime() + NANOSEC;
 
 	while (gethrtime() <= end) {
 		int run_count = 100;
 		void *buf;
 		struct abd *abd_data, *abd_meta;
 		uint32_t size;
 		int *ptr;
 		int i;
 		zio_cksum_t zc_ref;
 		zio_cksum_t zc_ref_byteswap;
 
 		size = ztest_random_blocksize();
 
 		buf = umem_alloc(size, UMEM_NOFAIL);
 		abd_data = abd_alloc(size, B_FALSE);
 		abd_meta = abd_alloc(size, B_TRUE);
 
 		for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
 			*ptr = ztest_random(UINT_MAX);
 
 		abd_copy_from_buf_off(abd_data, buf, 0, size);
 		abd_copy_from_buf_off(abd_meta, buf, 0, size);
 
 		VERIFY0(fletcher_4_impl_set("scalar"));
 		fletcher_4_native(buf, size, NULL, &zc_ref);
 		fletcher_4_byteswap(buf, size, NULL, &zc_ref_byteswap);
 
 		VERIFY0(fletcher_4_impl_set("cycle"));
 		while (run_count-- > 0) {
 			zio_cksum_t zc;
 			zio_cksum_t zc_byteswap;
 
 			fletcher_4_byteswap(buf, size, NULL, &zc_byteswap);
 			fletcher_4_native(buf, size, NULL, &zc);
 
 			VERIFY0(memcmp(&zc, &zc_ref, sizeof (zc)));
 			VERIFY0(memcmp(&zc_byteswap, &zc_ref_byteswap,
 			    sizeof (zc_byteswap)));
 
 			/* Test ABD - data */
 			abd_fletcher_4_byteswap(abd_data, size, NULL,
 			    &zc_byteswap);
 			abd_fletcher_4_native(abd_data, size, NULL, &zc);
 
 			VERIFY0(memcmp(&zc, &zc_ref, sizeof (zc)));
 			VERIFY0(memcmp(&zc_byteswap, &zc_ref_byteswap,
 			    sizeof (zc_byteswap)));
 
 			/* Test ABD - metadata */
 			abd_fletcher_4_byteswap(abd_meta, size, NULL,
 			    &zc_byteswap);
 			abd_fletcher_4_native(abd_meta, size, NULL, &zc);
 
 			VERIFY0(memcmp(&zc, &zc_ref, sizeof (zc)));
 			VERIFY0(memcmp(&zc_byteswap, &zc_ref_byteswap,
 			    sizeof (zc_byteswap)));
 
 		}
 
 		umem_free(buf, size);
 		abd_free(abd_data);
 		abd_free(abd_meta);
 	}
 }
 
 void
 ztest_fletcher_incr(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	void *buf;
 	size_t size;
 	int *ptr;
 	int i;
 	zio_cksum_t zc_ref;
 	zio_cksum_t zc_ref_bswap;
 
 	hrtime_t end = gethrtime() + NANOSEC;
 
 	while (gethrtime() <= end) {
 		int run_count = 100;
 
 		size = ztest_random_blocksize();
 		buf = umem_alloc(size, UMEM_NOFAIL);
 
 		for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
 			*ptr = ztest_random(UINT_MAX);
 
 		VERIFY0(fletcher_4_impl_set("scalar"));
 		fletcher_4_native(buf, size, NULL, &zc_ref);
 		fletcher_4_byteswap(buf, size, NULL, &zc_ref_bswap);
 
 		VERIFY0(fletcher_4_impl_set("cycle"));
 
 		while (run_count-- > 0) {
 			zio_cksum_t zc;
 			zio_cksum_t zc_bswap;
 			size_t pos = 0;
 
 			ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
 			ZIO_SET_CHECKSUM(&zc_bswap, 0, 0, 0, 0);
 
 			while (pos < size) {
 				size_t inc = 64 * ztest_random(size / 67);
 				/* sometimes add few bytes to test non-simd */
 				if (ztest_random(100) < 10)
 					inc += P2ALIGN(ztest_random(64),
 					    sizeof (uint32_t));
 
 				if (inc > (size - pos))
 					inc = size - pos;
 
 				fletcher_4_incremental_native(buf + pos, inc,
 				    &zc);
 				fletcher_4_incremental_byteswap(buf + pos, inc,
 				    &zc_bswap);
 
 				pos += inc;
 			}
 
 			VERIFY3U(pos, ==, size);
 
 			VERIFY(ZIO_CHECKSUM_EQUAL(zc, zc_ref));
 			VERIFY(ZIO_CHECKSUM_EQUAL(zc_bswap, zc_ref_bswap));
 
 			/*
 			 * verify if incremental on the whole buffer is
 			 * equivalent to non-incremental version
 			 */
 			ZIO_SET_CHECKSUM(&zc, 0, 0, 0, 0);
 			ZIO_SET_CHECKSUM(&zc_bswap, 0, 0, 0, 0);
 
 			fletcher_4_incremental_native(buf, size, &zc);
 			fletcher_4_incremental_byteswap(buf, size, &zc_bswap);
 
 			VERIFY(ZIO_CHECKSUM_EQUAL(zc, zc_ref));
 			VERIFY(ZIO_CHECKSUM_EQUAL(zc_bswap, zc_ref_bswap));
 		}
 
 		umem_free(buf, size);
 	}
 }
 
 static int
 ztest_set_global_vars(void)
 {
 	for (size_t i = 0; i < ztest_opts.zo_gvars_count; i++) {
 		char *kv = ztest_opts.zo_gvars[i];
 		VERIFY3U(strlen(kv), <=, ZO_GVARS_MAX_ARGLEN);
 		VERIFY3U(strlen(kv), >, 0);
 		int err = set_global_var(kv);
 		if (ztest_opts.zo_verbose > 0) {
 			(void) printf("setting global var %s ... %s\n", kv,
 			    err ? "failed" : "ok");
 		}
 		if (err != 0) {
 			(void) fprintf(stderr,
 			    "failed to set global var '%s'\n", kv);
 			return (err);
 		}
 	}
 	return (0);
 }
 
 static char **
 ztest_global_vars_to_zdb_args(void)
 {
 	char **args = calloc(2*ztest_opts.zo_gvars_count + 1, sizeof (char *));
 	char **cur = args;
 	for (size_t i = 0; i < ztest_opts.zo_gvars_count; i++) {
 		*cur++ = (char *)"-o";
 		*cur++ = ztest_opts.zo_gvars[i];
 	}
 	ASSERT3P(cur, ==, &args[2*ztest_opts.zo_gvars_count]);
 	*cur = NULL;
 	return (args);
 }
 
 /* The end of strings is indicated by a NULL element */
 static char *
 join_strings(char **strings, const char *sep)
 {
 	size_t totallen = 0;
 	for (char **sp = strings; *sp != NULL; sp++) {
 		totallen += strlen(*sp);
 		totallen += strlen(sep);
 	}
 	if (totallen > 0) {
 		ASSERT(totallen >= strlen(sep));
 		totallen -= strlen(sep);
 	}
 
 	size_t buflen = totallen + 1;
 	char *o = malloc(buflen); /* trailing 0 byte */
 	o[0] = '\0';
 	for (char **sp = strings; *sp != NULL; sp++) {
 		size_t would;
 		would = strlcat(o, *sp, buflen);
 		VERIFY3U(would, <, buflen);
 		if (*(sp+1) == NULL) {
 			break;
 		}
 		would = strlcat(o, sep, buflen);
 		VERIFY3U(would, <, buflen);
 	}
 	ASSERT3S(strlen(o), ==, totallen);
 	return (o);
 }
 
 static int
 ztest_check_path(char *path)
 {
 	struct stat s;
 	/* return true on success */
 	return (!stat(path, &s));
 }
 
 static void
 ztest_get_zdb_bin(char *bin, int len)
 {
 	char *zdb_path;
 	/*
 	 * Try to use $ZDB and in-tree zdb path. If not successful, just
 	 * let popen to search through PATH.
 	 */
 	if ((zdb_path = getenv("ZDB"))) {
 		strlcpy(bin, zdb_path, len); /* In env */
 		if (!ztest_check_path(bin)) {
 			ztest_dump_core = 0;
 			fatal(B_TRUE, "invalid ZDB '%s'", bin);
 		}
 		return;
 	}
 
 	VERIFY3P(realpath(getexecname(), bin), !=, NULL);
 	if (strstr(bin, ".libs/ztest")) {
 		strstr(bin, ".libs/ztest")[0] = '\0'; /* In-tree */
 		strcat(bin, "zdb");
 		if (ztest_check_path(bin))
 			return;
 	}
 	strcpy(bin, "zdb");
 }
 
 static vdev_t *
 ztest_random_concrete_vdev_leaf(vdev_t *vd)
 {
 	if (vd == NULL)
 		return (NULL);
 
 	if (vd->vdev_children == 0)
 		return (vd);
 
 	vdev_t *eligible[vd->vdev_children];
 	int eligible_idx = 0, i;
 	for (i = 0; i < vd->vdev_children; i++) {
 		vdev_t *cvd = vd->vdev_child[i];
 		if (cvd->vdev_top->vdev_removing)
 			continue;
 		if (cvd->vdev_children > 0 ||
 		    (vdev_is_concrete(cvd) && !cvd->vdev_detached)) {
 			eligible[eligible_idx++] = cvd;
 		}
 	}
 	VERIFY3S(eligible_idx, >, 0);
 
 	uint64_t child_no = ztest_random(eligible_idx);
 	return (ztest_random_concrete_vdev_leaf(eligible[child_no]));
 }
 
 void
 ztest_initialize(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	spa_t *spa = ztest_spa;
 	int error = 0;
 
 	mutex_enter(&ztest_vdev_lock);
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	/* Random leaf vdev */
 	vdev_t *rand_vd = ztest_random_concrete_vdev_leaf(spa->spa_root_vdev);
 	if (rand_vd == NULL) {
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/*
 	 * The random vdev we've selected may change as soon as we
 	 * drop the spa_config_lock. We create local copies of things
 	 * we're interested in.
 	 */
 	uint64_t guid = rand_vd->vdev_guid;
 	char *path = strdup(rand_vd->vdev_path);
 	boolean_t active = rand_vd->vdev_initialize_thread != NULL;
 
 	zfs_dbgmsg("vd %px, guid %llu", rand_vd, (u_longlong_t)guid);
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	uint64_t cmd = ztest_random(POOL_INITIALIZE_FUNCS);
 
 	nvlist_t *vdev_guids = fnvlist_alloc();
 	nvlist_t *vdev_errlist = fnvlist_alloc();
 	fnvlist_add_uint64(vdev_guids, path, guid);
 	error = spa_vdev_initialize(spa, vdev_guids, cmd, vdev_errlist);
 	fnvlist_free(vdev_guids);
 	fnvlist_free(vdev_errlist);
 
 	switch (cmd) {
 	case POOL_INITIALIZE_CANCEL:
 		if (ztest_opts.zo_verbose >= 4) {
 			(void) printf("Cancel initialize %s", path);
 			if (!active)
 				(void) printf(" failed (no initialize active)");
 			(void) printf("\n");
 		}
 		break;
 	case POOL_INITIALIZE_START:
 		if (ztest_opts.zo_verbose >= 4) {
 			(void) printf("Start initialize %s", path);
 			if (active && error == 0)
 				(void) printf(" failed (already active)");
 			else if (error != 0)
 				(void) printf(" failed (error %d)", error);
 			(void) printf("\n");
 		}
 		break;
 	case POOL_INITIALIZE_SUSPEND:
 		if (ztest_opts.zo_verbose >= 4) {
 			(void) printf("Suspend initialize %s", path);
 			if (!active)
 				(void) printf(" failed (no initialize active)");
 			(void) printf("\n");
 		}
 		break;
 	}
 	free(path);
 	mutex_exit(&ztest_vdev_lock);
 }
 
 void
 ztest_trim(ztest_ds_t *zd, uint64_t id)
 {
 	(void) zd, (void) id;
 	spa_t *spa = ztest_spa;
 	int error = 0;
 
 	mutex_enter(&ztest_vdev_lock);
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	/* Random leaf vdev */
 	vdev_t *rand_vd = ztest_random_concrete_vdev_leaf(spa->spa_root_vdev);
 	if (rand_vd == NULL) {
 		spa_config_exit(spa, SCL_VDEV, FTAG);
 		mutex_exit(&ztest_vdev_lock);
 		return;
 	}
 
 	/*
 	 * The random vdev we've selected may change as soon as we
 	 * drop the spa_config_lock. We create local copies of things
 	 * we're interested in.
 	 */
 	uint64_t guid = rand_vd->vdev_guid;
 	char *path = strdup(rand_vd->vdev_path);
 	boolean_t active = rand_vd->vdev_trim_thread != NULL;
 
 	zfs_dbgmsg("vd %p, guid %llu", rand_vd, (u_longlong_t)guid);
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	uint64_t cmd = ztest_random(POOL_TRIM_FUNCS);
 	uint64_t rate = 1 << ztest_random(30);
 	boolean_t partial = (ztest_random(5) > 0);
 	boolean_t secure = (ztest_random(5) > 0);
 
 	nvlist_t *vdev_guids = fnvlist_alloc();
 	nvlist_t *vdev_errlist = fnvlist_alloc();
 	fnvlist_add_uint64(vdev_guids, path, guid);
 	error = spa_vdev_trim(spa, vdev_guids, cmd, rate, partial,
 	    secure, vdev_errlist);
 	fnvlist_free(vdev_guids);
 	fnvlist_free(vdev_errlist);
 
 	switch (cmd) {
 	case POOL_TRIM_CANCEL:
 		if (ztest_opts.zo_verbose >= 4) {
 			(void) printf("Cancel TRIM %s", path);
 			if (!active)
 				(void) printf(" failed (no TRIM active)");
 			(void) printf("\n");
 		}
 		break;
 	case POOL_TRIM_START:
 		if (ztest_opts.zo_verbose >= 4) {
 			(void) printf("Start TRIM %s", path);
 			if (active && error == 0)
 				(void) printf(" failed (already active)");
 			else if (error != 0)
 				(void) printf(" failed (error %d)", error);
 			(void) printf("\n");
 		}
 		break;
 	case POOL_TRIM_SUSPEND:
 		if (ztest_opts.zo_verbose >= 4) {
 			(void) printf("Suspend TRIM %s", path);
 			if (!active)
 				(void) printf(" failed (no TRIM active)");
 			(void) printf("\n");
 		}
 		break;
 	}
 	free(path);
 	mutex_exit(&ztest_vdev_lock);
 }
 
 /*
  * Verify pool integrity by running zdb.
  */
 static void
 ztest_run_zdb(const char *pool)
 {
 	int status;
 	char *bin;
 	char *zdb;
 	char *zbuf;
 	const int len = MAXPATHLEN + MAXNAMELEN + 20;
 	FILE *fp;
 
 	bin = umem_alloc(len, UMEM_NOFAIL);
 	zdb = umem_alloc(len, UMEM_NOFAIL);
 	zbuf = umem_alloc(1024, UMEM_NOFAIL);
 
 	ztest_get_zdb_bin(bin, len);
 
 	char **set_gvars_args = ztest_global_vars_to_zdb_args();
 	char *set_gvars_args_joined = join_strings(set_gvars_args, " ");
 	free(set_gvars_args);
 
 	size_t would = snprintf(zdb, len,
 	    "%s -bcc%s%s -G -d -Y -e -y %s -p %s %s",
 	    bin,
 	    ztest_opts.zo_verbose >= 3 ? "s" : "",
 	    ztest_opts.zo_verbose >= 4 ? "v" : "",
 	    set_gvars_args_joined,
 	    ztest_opts.zo_dir,
 	    pool);
 	ASSERT3U(would, <, len);
 
 	free(set_gvars_args_joined);
 
 	if (ztest_opts.zo_verbose >= 5)
 		(void) printf("Executing %s\n", zdb);
 
 	fp = popen(zdb, "r");
 
 	while (fgets(zbuf, 1024, fp) != NULL)
 		if (ztest_opts.zo_verbose >= 3)
 			(void) printf("%s", zbuf);
 
 	status = pclose(fp);
 
 	if (status == 0)
 		goto out;
 
 	ztest_dump_core = 0;
 	if (WIFEXITED(status))
 		fatal(B_FALSE, "'%s' exit code %d", zdb, WEXITSTATUS(status));
 	else
 		fatal(B_FALSE, "'%s' died with signal %d",
 		    zdb, WTERMSIG(status));
 out:
 	umem_free(bin, len);
 	umem_free(zdb, len);
 	umem_free(zbuf, 1024);
 }
 
 static void
 ztest_walk_pool_directory(const char *header)
 {
 	spa_t *spa = NULL;
 
 	if (ztest_opts.zo_verbose >= 6)
 		(void) puts(header);
 
 	mutex_enter(&spa_namespace_lock);
 	while ((spa = spa_next(spa)) != NULL)
 		if (ztest_opts.zo_verbose >= 6)
 			(void) printf("\t%s\n", spa_name(spa));
 	mutex_exit(&spa_namespace_lock);
 }
 
 static void
 ztest_spa_import_export(char *oldname, char *newname)
 {
 	nvlist_t *config, *newconfig;
 	uint64_t pool_guid;
 	spa_t *spa;
 	int error;
 
 	if (ztest_opts.zo_verbose >= 4) {
 		(void) printf("import/export: old = %s, new = %s\n",
 		    oldname, newname);
 	}
 
 	/*
 	 * Clean up from previous runs.
 	 */
 	(void) spa_destroy(newname);
 
 	/*
 	 * Get the pool's configuration and guid.
 	 */
 	VERIFY0(spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Kick off a scrub to tickle scrub/export races.
 	 */
 	if (ztest_random(2) == 0)
 		(void) spa_scan(spa, POOL_SCAN_SCRUB);
 
 	pool_guid = spa_guid(spa);
 	spa_close(spa, FTAG);
 
 	ztest_walk_pool_directory("pools before export");
 
 	/*
 	 * Export it.
 	 */
 	VERIFY0(spa_export(oldname, &config, B_FALSE, B_FALSE));
 
 	ztest_walk_pool_directory("pools after export");
 
 	/*
 	 * Try to import it.
 	 */
 	newconfig = spa_tryimport(config);
 	ASSERT3P(newconfig, !=, NULL);
 	fnvlist_free(newconfig);
 
 	/*
 	 * Import it under the new name.
 	 */
 	error = spa_import(newname, config, NULL, 0);
 	if (error != 0) {
 		dump_nvlist(config, 0);
 		fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
 		    oldname, newname, error);
 	}
 
 	ztest_walk_pool_directory("pools after import");
 
 	/*
 	 * Try to import it again -- should fail with EEXIST.
 	 */
 	VERIFY3U(EEXIST, ==, spa_import(newname, config, NULL, 0));
 
 	/*
 	 * Try to import it under a different name -- should fail with EEXIST.
 	 */
 	VERIFY3U(EEXIST, ==, spa_import(oldname, config, NULL, 0));
 
 	/*
 	 * Verify that the pool is no longer visible under the old name.
 	 */
 	VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
 
 	/*
 	 * Verify that we can open and close the pool using the new name.
 	 */
 	VERIFY0(spa_open(newname, &spa, FTAG));
 	ASSERT3U(pool_guid, ==, spa_guid(spa));
 	spa_close(spa, FTAG);
 
 	fnvlist_free(config);
 }
 
 static void
 ztest_resume(spa_t *spa)
 {
 	if (spa_suspended(spa) && ztest_opts.zo_verbose >= 6)
 		(void) printf("resuming from suspended state\n");
 	spa_vdev_state_enter(spa, SCL_NONE);
 	vdev_clear(spa, NULL);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	(void) zio_resume(spa);
 }
 
 static __attribute__((noreturn)) void
 ztest_resume_thread(void *arg)
 {
 	spa_t *spa = arg;
 
 	while (!ztest_exiting) {
 		if (spa_suspended(spa))
 			ztest_resume(spa);
 		(void) poll(NULL, 0, 100);
 
 		/*
 		 * Periodically change the zfs_compressed_arc_enabled setting.
 		 */
 		if (ztest_random(10) == 0)
 			zfs_compressed_arc_enabled = ztest_random(2);
 
 		/*
 		 * Periodically change the zfs_abd_scatter_enabled setting.
 		 */
 		if (ztest_random(10) == 0)
 			zfs_abd_scatter_enabled = ztest_random(2);
 	}
 
 	thread_exit();
 }
 
 static __attribute__((noreturn)) void
 ztest_deadman_thread(void *arg)
 {
 	ztest_shared_t *zs = arg;
 	spa_t *spa = ztest_spa;
 	hrtime_t delay, overdue, last_run = gethrtime();
 
 	delay = (zs->zs_thread_stop - zs->zs_thread_start) +
 	    MSEC2NSEC(zfs_deadman_synctime_ms);
 
 	while (!ztest_exiting) {
 		/*
 		 * Wait for the delay timer while checking occasionally
 		 * if we should stop.
 		 */
 		if (gethrtime() < last_run + delay) {
 			(void) poll(NULL, 0, 1000);
 			continue;
 		}
 
 		/*
 		 * If the pool is suspended then fail immediately. Otherwise,
 		 * check to see if the pool is making any progress. If
 		 * vdev_deadman() discovers that there hasn't been any recent
 		 * I/Os then it will end up aborting the tests.
 		 */
 		if (spa_suspended(spa) || spa->spa_root_vdev == NULL) {
 			fatal(B_FALSE,
 			    "aborting test after %lu seconds because "
 			    "pool has transitioned to a suspended state.",
 			    zfs_deadman_synctime_ms / 1000);
 		}
 		vdev_deadman(spa->spa_root_vdev, FTAG);
 
 		/*
 		 * If the process doesn't complete within a grace period of
 		 * zfs_deadman_synctime_ms over the expected finish time,
 		 * then it may be hung and is terminated.
 		 */
 		overdue = zs->zs_proc_stop + MSEC2NSEC(zfs_deadman_synctime_ms);
 		if (gethrtime() > overdue) {
 			fatal(B_FALSE,
 			    "aborting test after %llu seconds because "
 			    "the process is overdue for termination.",
 			    (gethrtime() - zs->zs_proc_start) / NANOSEC);
 		}
 
 		(void) printf("ztest has been running for %lld seconds\n",
 		    (gethrtime() - zs->zs_proc_start) / NANOSEC);
 
 		last_run = gethrtime();
 		delay = MSEC2NSEC(zfs_deadman_checktime_ms);
 	}
 
 	thread_exit();
 }
 
 static void
 ztest_execute(int test, ztest_info_t *zi, uint64_t id)
 {
 	ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets];
 	ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test);
 	hrtime_t functime = gethrtime();
 	int i;
 
 	for (i = 0; i < zi->zi_iters; i++)
 		zi->zi_func(zd, id);
 
 	functime = gethrtime() - functime;
 
 	atomic_add_64(&zc->zc_count, 1);
 	atomic_add_64(&zc->zc_time, functime);
 
 	if (ztest_opts.zo_verbose >= 4)
 		(void) printf("%6.2f sec in %s\n",
 		    (double)functime / NANOSEC, zi->zi_funcname);
 }
 
 static __attribute__((noreturn)) void
 ztest_thread(void *arg)
 {
 	int rand;
 	uint64_t id = (uintptr_t)arg;
 	ztest_shared_t *zs = ztest_shared;
 	uint64_t call_next;
 	hrtime_t now;
 	ztest_info_t *zi;
 	ztest_shared_callstate_t *zc;
 
 	while ((now = gethrtime()) < zs->zs_thread_stop) {
 		/*
 		 * See if it's time to force a crash.
 		 */
 		if (now > zs->zs_thread_kill)
 			ztest_kill(zs);
 
 		/*
 		 * If we're getting ENOSPC with some regularity, stop.
 		 */
 		if (zs->zs_enospc_count > 10)
 			break;
 
 		/*
 		 * Pick a random function to execute.
 		 */
 		rand = ztest_random(ZTEST_FUNCS);
 		zi = &ztest_info[rand];
 		zc = ZTEST_GET_SHARED_CALLSTATE(rand);
 		call_next = zc->zc_next;
 
 		if (now >= call_next &&
 		    atomic_cas_64(&zc->zc_next, call_next, call_next +
 		    ztest_random(2 * zi->zi_interval[0] + 1)) == call_next) {
 			ztest_execute(rand, zi, id);
 		}
 	}
 
 	thread_exit();
 }
 
 static void
 ztest_dataset_name(char *dsname, const char *pool, int d)
 {
 	(void) snprintf(dsname, ZFS_MAX_DATASET_NAME_LEN, "%s/ds_%d", pool, d);
 }
 
 static void
 ztest_dataset_destroy(int d)
 {
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	int t;
 
 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
 
 	if (ztest_opts.zo_verbose >= 3)
 		(void) printf("Destroying %s to free up space\n", name);
 
 	/*
 	 * Cleanup any non-standard clones and snapshots.  In general,
 	 * ztest thread t operates on dataset (t % zopt_datasets),
 	 * so there may be more than one thing to clean up.
 	 */
 	for (t = d; t < ztest_opts.zo_threads;
 	    t += ztest_opts.zo_datasets)
 		ztest_dsl_dataset_cleanup(name, t);
 
 	(void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
 	    DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
 }
 
 static void
 ztest_dataset_dirobj_verify(ztest_ds_t *zd)
 {
 	uint64_t usedobjs, dirobjs, scratch;
 
 	/*
 	 * ZTEST_DIROBJ is the object directory for the entire dataset.
 	 * Therefore, the number of objects in use should equal the
 	 * number of ZTEST_DIROBJ entries, +1 for ZTEST_DIROBJ itself.
 	 * If not, we have an object leak.
 	 *
 	 * Note that we can only check this in ztest_dataset_open(),
 	 * when the open-context and syncing-context values agree.
 	 * That's because zap_count() returns the open-context value,
 	 * while dmu_objset_space() returns the rootbp fill count.
 	 */
 	VERIFY0(zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
 	dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch);
 	ASSERT3U(dirobjs + 1, ==, usedobjs);
 }
 
 static int
 ztest_dataset_open(int d)
 {
 	ztest_ds_t *zd = &ztest_ds[d];
 	uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq;
 	objset_t *os;
 	zilog_t *zilog;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	int error;
 
 	ztest_dataset_name(name, ztest_opts.zo_pool, d);
 
 	(void) pthread_rwlock_rdlock(&ztest_name_lock);
 
 	error = ztest_dataset_create(name);
 	if (error == ENOSPC) {
 		(void) pthread_rwlock_unlock(&ztest_name_lock);
 		ztest_record_enospc(FTAG);
 		return (error);
 	}
 	ASSERT(error == 0 || error == EEXIST);
 
 	VERIFY0(ztest_dmu_objset_own(name, DMU_OST_OTHER, B_FALSE,
 	    B_TRUE, zd, &os));
 	(void) pthread_rwlock_unlock(&ztest_name_lock);
 
 	ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
 
 	zilog = zd->zd_zilog;
 
 	if (zilog->zl_header->zh_claim_lr_seq != 0 &&
 	    zilog->zl_header->zh_claim_lr_seq < committed_seq)
 		fatal(B_FALSE, "missing log records: "
 		    "claimed %"PRIu64" < committed %"PRIu64"",
 		    zilog->zl_header->zh_claim_lr_seq, committed_seq);
 
 	ztest_dataset_dirobj_verify(zd);
 
 	zil_replay(os, zd, ztest_replay_vector);
 
 	ztest_dataset_dirobj_verify(zd);
 
 	if (ztest_opts.zo_verbose >= 6)
 		(void) printf("%s replay %"PRIu64" blocks, "
 		    "%"PRIu64" records, seq %"PRIu64"\n",
 		    zd->zd_name,
 		    zilog->zl_parse_blk_count,
 		    zilog->zl_parse_lr_count,
 		    zilog->zl_replaying_seq);
 
 	zilog = zil_open(os, ztest_get_data, NULL);
 
 	if (zilog->zl_replaying_seq != 0 &&
 	    zilog->zl_replaying_seq < committed_seq)
 		fatal(B_FALSE, "missing log records: "
 		    "replayed %"PRIu64" < committed %"PRIu64"",
 		    zilog->zl_replaying_seq, committed_seq);
 
 	return (0);
 }
 
 static void
 ztest_dataset_close(int d)
 {
 	ztest_ds_t *zd = &ztest_ds[d];
 
 	zil_close(zd->zd_zilog);
 	dmu_objset_disown(zd->zd_os, B_TRUE, zd);
 
 	ztest_zd_fini(zd);
 }
 
 static int
 ztest_replay_zil_cb(const char *name, void *arg)
 {
 	(void) arg;
 	objset_t *os;
 	ztest_ds_t *zdtmp;
 
 	VERIFY0(ztest_dmu_objset_own(name, DMU_OST_ANY, B_TRUE,
 	    B_TRUE, FTAG, &os));
 
 	zdtmp = umem_alloc(sizeof (ztest_ds_t), UMEM_NOFAIL);
 
 	ztest_zd_init(zdtmp, NULL, os);
 	zil_replay(os, zdtmp, ztest_replay_vector);
 	ztest_zd_fini(zdtmp);
 
 	if (dmu_objset_zil(os)->zl_parse_lr_count != 0 &&
 	    ztest_opts.zo_verbose >= 6) {
 		zilog_t *zilog = dmu_objset_zil(os);
 
 		(void) printf("%s replay %"PRIu64" blocks, "
 		    "%"PRIu64" records, seq %"PRIu64"\n",
 		    name,
 		    zilog->zl_parse_blk_count,
 		    zilog->zl_parse_lr_count,
 		    zilog->zl_replaying_seq);
 	}
 
 	umem_free(zdtmp, sizeof (ztest_ds_t));
 
 	dmu_objset_disown(os, B_TRUE, FTAG);
 	return (0);
 }
 
 static void
 ztest_freeze(void)
 {
 	ztest_ds_t *zd = &ztest_ds[0];
 	spa_t *spa;
 	int numloops = 0;
 
 	if (ztest_opts.zo_verbose >= 3)
 		(void) printf("testing spa_freeze()...\n");
 
 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	VERIFY0(ztest_dataset_open(0));
 	ztest_spa = spa;
 
 	/*
 	 * Force the first log block to be transactionally allocated.
 	 * We have to do this before we freeze the pool -- otherwise
 	 * the log chain won't be anchored.
 	 */
 	while (BP_IS_HOLE(&zd->zd_zilog->zl_header->zh_log)) {
 		ztest_dmu_object_alloc_free(zd, 0);
 		zil_commit(zd->zd_zilog, 0);
 	}
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	/*
 	 * Freeze the pool.  This stops spa_sync() from doing anything,
 	 * so that the only way to record changes from now on is the ZIL.
 	 */
 	spa_freeze(spa);
 
 	/*
 	 * Because it is hard to predict how much space a write will actually
 	 * require beforehand, we leave ourselves some fudge space to write over
 	 * capacity.
 	 */
 	uint64_t capacity = metaslab_class_get_space(spa_normal_class(spa)) / 2;
 
 	/*
 	 * Run tests that generate log records but don't alter the pool config
 	 * or depend on DSL sync tasks (snapshots, objset create/destroy, etc).
 	 * We do a txg_wait_synced() after each iteration to force the txg
 	 * to increase well beyond the last synced value in the uberblock.
 	 * The ZIL should be OK with that.
 	 *
 	 * Run a random number of times less than zo_maxloops and ensure we do
 	 * not run out of space on the pool.
 	 */
 	while (ztest_random(10) != 0 &&
 	    numloops++ < ztest_opts.zo_maxloops &&
 	    metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) {
 		ztest_od_t od;
 		ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
 		VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE));
 		ztest_io(zd, od.od_object,
 		    ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
 		txg_wait_synced(spa_get_dsl(spa), 0);
 	}
 
 	/*
 	 * Commit all of the changes we just generated.
 	 */
 	zil_commit(zd->zd_zilog, 0);
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	/*
 	 * Close our dataset and close the pool.
 	 */
 	ztest_dataset_close(0);
 	spa_close(spa, FTAG);
 	kernel_fini();
 
 	/*
 	 * Open and close the pool and dataset to induce log replay.
 	 */
 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	ASSERT3U(spa_freeze_txg(spa), ==, UINT64_MAX);
 	VERIFY0(ztest_dataset_open(0));
 	ztest_spa = spa;
 	txg_wait_synced(spa_get_dsl(spa), 0);
 	ztest_dataset_close(0);
 	ztest_reguid(NULL, 0);
 
 	spa_close(spa, FTAG);
 	kernel_fini();
 }
 
 static void
 ztest_import_impl(void)
 {
 	importargs_t args = { 0 };
 	nvlist_t *cfg = NULL;
 	int nsearch = 1;
 	char *searchdirs[nsearch];
 	int flags = ZFS_IMPORT_MISSING_LOG;
 
 	searchdirs[0] = ztest_opts.zo_dir;
 	args.paths = nsearch;
 	args.path = searchdirs;
 	args.can_be_active = B_FALSE;
 
 	VERIFY0(zpool_find_config(NULL, ztest_opts.zo_pool, &cfg, &args,
 	    &libzpool_config_ops));
 	VERIFY0(spa_import(ztest_opts.zo_pool, cfg, NULL, flags));
 	fnvlist_free(cfg);
 }
 
 /*
  * Import a storage pool with the given name.
  */
 static void
 ztest_import(ztest_shared_t *zs)
 {
 	spa_t *spa;
 
 	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
 	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
 
 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
 
 	ztest_import_impl();
 
 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
 	spa_close(spa, FTAG);
 
 	kernel_fini();
 
 	if (!ztest_opts.zo_mmp_test) {
 		ztest_run_zdb(ztest_opts.zo_pool);
 		ztest_freeze();
 		ztest_run_zdb(ztest_opts.zo_pool);
 	}
 
 	(void) pthread_rwlock_destroy(&ztest_name_lock);
 	mutex_destroy(&ztest_vdev_lock);
 	mutex_destroy(&ztest_checkpoint_lock);
 }
 
 /*
  * Kick off threads to run tests on all datasets in parallel.
  */
 static void
 ztest_run(ztest_shared_t *zs)
 {
 	spa_t *spa;
 	objset_t *os;
 	kthread_t *resume_thread, *deadman_thread;
 	kthread_t **run_threads;
 	uint64_t object;
 	int error;
 	int t, d;
 
 	ztest_exiting = B_FALSE;
 
 	/*
 	 * Initialize parent/child shared state.
 	 */
 	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
 	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
 
 	zs->zs_thread_start = gethrtime();
 	zs->zs_thread_stop =
 	    zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;
 	zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
 	zs->zs_thread_kill = zs->zs_thread_stop;
 	if (ztest_random(100) < ztest_opts.zo_killrate) {
 		zs->zs_thread_kill -=
 		    ztest_random(ztest_opts.zo_passtime * NANOSEC);
 	}
 
 	mutex_init(&zcl.zcl_callbacks_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
 	    offsetof(ztest_cb_data_t, zcd_node));
 
 	/*
 	 * Open our pool.  It may need to be imported first depending on
 	 * what tests were running when the previous pass was terminated.
 	 */
 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
 	error = spa_open(ztest_opts.zo_pool, &spa, FTAG);
 	if (error) {
 		VERIFY3S(error, ==, ENOENT);
 		ztest_import_impl();
 		VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 		zs->zs_metaslab_sz =
 		    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
 	}
 
 	metaslab_preload_limit = ztest_random(20) + 1;
 	ztest_spa = spa;
 
 	VERIFY0(vdev_raidz_impl_set("cycle"));
 
 	dmu_objset_stats_t dds;
 	VERIFY0(ztest_dmu_objset_own(ztest_opts.zo_pool,
 	    DMU_OST_ANY, B_TRUE, B_TRUE, FTAG, &os));
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 	dmu_objset_fast_stat(os, &dds);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 	zs->zs_guid = dds.dds_guid;
 	dmu_objset_disown(os, B_TRUE, FTAG);
 
 	/*
 	 * Create a thread to periodically resume suspended I/O.
 	 */
 	resume_thread = thread_create(NULL, 0, ztest_resume_thread,
 	    spa, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
 
 	/*
 	 * Create a deadman thread and set to panic if we hang.
 	 */
 	deadman_thread = thread_create(NULL, 0, ztest_deadman_thread,
 	    zs, 0, NULL, TS_RUN | TS_JOINABLE, defclsyspri);
 
 	spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;
 
 	/*
 	 * Verify that we can safely inquire about any object,
 	 * whether it's allocated or not.  To make it interesting,
 	 * we probe a 5-wide window around each power of two.
 	 * This hits all edge cases, including zero and the max.
 	 */
 	for (t = 0; t < 64; t++) {
 		for (d = -5; d <= 5; d++) {
 			error = dmu_object_info(spa->spa_meta_objset,
 			    (1ULL << t) + d, NULL);
 			ASSERT(error == 0 || error == ENOENT ||
 			    error == EINVAL);
 		}
 	}
 
 	/*
 	 * If we got any ENOSPC errors on the previous run, destroy something.
 	 */
 	if (zs->zs_enospc_count != 0) {
 		int d = ztest_random(ztest_opts.zo_datasets);
 		ztest_dataset_destroy(d);
 	}
 	zs->zs_enospc_count = 0;
 
 	/*
 	 * If we were in the middle of ztest_device_removal() and were killed
 	 * we need to ensure the removal and scrub complete before running
 	 * any tests that check ztest_device_removal_active. The removal will
 	 * be restarted automatically when the spa is opened, but we need to
 	 * initiate the scrub manually if it is not already in progress. Note
 	 * that we always run the scrub whenever an indirect vdev exists
 	 * because we have no way of knowing for sure if ztest_device_removal()
 	 * fully completed its scrub before the pool was reimported.
 	 */
 	if (spa->spa_removing_phys.sr_state == DSS_SCANNING ||
 	    spa->spa_removing_phys.sr_prev_indirect_vdev != -1) {
 		while (spa->spa_removing_phys.sr_state == DSS_SCANNING)
 			txg_wait_synced(spa_get_dsl(spa), 0);
 
 		error = ztest_scrub_impl(spa);
 		if (error == EBUSY)
 			error = 0;
 		ASSERT0(error);
 	}
 
 	run_threads = umem_zalloc(ztest_opts.zo_threads * sizeof (kthread_t *),
 	    UMEM_NOFAIL);
 
 	if (ztest_opts.zo_verbose >= 4)
 		(void) printf("starting main threads...\n");
 
 	/*
 	 * Replay all logs of all datasets in the pool. This is primarily for
 	 * temporary datasets which wouldn't otherwise get replayed, which
 	 * can trigger failures when attempting to offline a SLOG in
 	 * ztest_fault_inject().
 	 */
 	(void) dmu_objset_find(ztest_opts.zo_pool, ztest_replay_zil_cb,
 	    NULL, DS_FIND_CHILDREN);
 
 	/*
 	 * Kick off all the tests that run in parallel.
 	 */
 	for (t = 0; t < ztest_opts.zo_threads; t++) {
 		if (t < ztest_opts.zo_datasets && ztest_dataset_open(t) != 0) {
 			umem_free(run_threads, ztest_opts.zo_threads *
 			    sizeof (kthread_t *));
 			return;
 		}
 
 		run_threads[t] = thread_create(NULL, 0, ztest_thread,
 		    (void *)(uintptr_t)t, 0, NULL, TS_RUN | TS_JOINABLE,
 		    defclsyspri);
 	}
 
 	/*
 	 * Wait for all of the tests to complete.
 	 */
 	for (t = 0; t < ztest_opts.zo_threads; t++)
 		VERIFY0(thread_join(run_threads[t]));
 
 	/*
 	 * Close all datasets. This must be done after all the threads
 	 * are joined so we can be sure none of the datasets are in-use
 	 * by any of the threads.
 	 */
 	for (t = 0; t < ztest_opts.zo_threads; t++) {
 		if (t < ztest_opts.zo_datasets)
 			ztest_dataset_close(t);
 	}
 
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
 	zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
 
 	umem_free(run_threads, ztest_opts.zo_threads * sizeof (kthread_t *));
 
 	/* Kill the resume and deadman threads */
 	ztest_exiting = B_TRUE;
 	VERIFY0(thread_join(resume_thread));
 	VERIFY0(thread_join(deadman_thread));
 	ztest_resume(spa);
 
 	/*
 	 * Right before closing the pool, kick off a bunch of async I/O;
 	 * spa_close() should wait for it to complete.
 	 */
 	for (object = 1; object < 50; object++) {
 		dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
 		    ZIO_PRIORITY_SYNC_READ);
 	}
 
 	/* Verify that at least one commit cb was called in a timely fashion */
 	if (zc_cb_counter >= ZTEST_COMMIT_CB_MIN_REG)
 		VERIFY0(zc_min_txg_delay);
 
 	spa_close(spa, FTAG);
 
 	/*
 	 * Verify that we can loop over all pools.
 	 */
 	mutex_enter(&spa_namespace_lock);
 	for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa))
 		if (ztest_opts.zo_verbose > 3)
 			(void) printf("spa_next: found %s\n", spa_name(spa));
 	mutex_exit(&spa_namespace_lock);
 
 	/*
 	 * Verify that we can export the pool and reimport it under a
 	 * different name.
 	 */
 	if ((ztest_random(2) == 0) && !ztest_opts.zo_mmp_test) {
 		char name[ZFS_MAX_DATASET_NAME_LEN];
 		(void) snprintf(name, sizeof (name), "%s_import",
 		    ztest_opts.zo_pool);
 		ztest_spa_import_export(ztest_opts.zo_pool, name);
 		ztest_spa_import_export(name, ztest_opts.zo_pool);
 	}
 
 	kernel_fini();
 
 	list_destroy(&zcl.zcl_callbacks);
 	mutex_destroy(&zcl.zcl_callbacks_lock);
 	(void) pthread_rwlock_destroy(&ztest_name_lock);
 	mutex_destroy(&ztest_vdev_lock);
 	mutex_destroy(&ztest_checkpoint_lock);
 }
 
 static void
 print_time(hrtime_t t, char *timebuf)
 {
 	hrtime_t s = t / NANOSEC;
 	hrtime_t m = s / 60;
 	hrtime_t h = m / 60;
 	hrtime_t d = h / 24;
 
 	s -= m * 60;
 	m -= h * 60;
 	h -= d * 24;
 
 	timebuf[0] = '\0';
 
 	if (d)
 		(void) sprintf(timebuf,
 		    "%llud%02lluh%02llum%02llus", d, h, m, s);
 	else if (h)
 		(void) sprintf(timebuf, "%lluh%02llum%02llus", h, m, s);
 	else if (m)
 		(void) sprintf(timebuf, "%llum%02llus", m, s);
 	else
 		(void) sprintf(timebuf, "%llus", s);
 }
 
 static nvlist_t *
 make_random_props(void)
 {
 	nvlist_t *props;
 
 	props = fnvlist_alloc();
 
 	if (ztest_random(2) == 0)
 		return (props);
 
 	fnvlist_add_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE), 1);
 
 	return (props);
 }
 
 /*
  * Create a storage pool with the given name and initial vdev size.
  * Then test spa_freeze() functionality.
  */
 static void
 ztest_init(ztest_shared_t *zs)
 {
 	spa_t *spa;
 	nvlist_t *nvroot, *props;
 	int i;
 
 	mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&ztest_checkpoint_lock, NULL, MUTEX_DEFAULT, NULL);
 	VERIFY0(pthread_rwlock_init(&ztest_name_lock, NULL));
 
 	kernel_init(SPA_MODE_READ | SPA_MODE_WRITE);
 
 	/*
 	 * Create the storage pool.
 	 */
 	(void) spa_destroy(ztest_opts.zo_pool);
 	ztest_shared->zs_vdev_next_leaf = 0;
 	zs->zs_splits = 0;
 	zs->zs_mirrors = ztest_opts.zo_mirrors;
 	nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
 	    NULL, ztest_opts.zo_raid_children, zs->zs_mirrors, 1);
 	props = make_random_props();
 
 	/*
 	 * We don't expect the pool to suspend unless maxfaults == 0,
 	 * in which case ztest_fault_inject() temporarily takes away
 	 * the only valid replica.
 	 */
 	fnvlist_add_uint64(props,
 	    zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
 	    MAXFAULTS(zs) ? ZIO_FAILURE_MODE_PANIC : ZIO_FAILURE_MODE_WAIT);
 
 	for (i = 0; i < SPA_FEATURES; i++) {
 		char *buf;
 
 		if (!spa_feature_table[i].fi_zfs_mod_supported)
 			continue;
 
 		/*
 		 * 75% chance of using the log space map feature. We want ztest
 		 * to exercise both the code paths that use the log space map
 		 * feature and the ones that don't.
 		 */
 		if (i == SPA_FEATURE_LOG_SPACEMAP && ztest_random(4) == 0)
 			continue;
 
 		VERIFY3S(-1, !=, asprintf(&buf, "feature@%s",
 		    spa_feature_table[i].fi_uname));
 		fnvlist_add_uint64(props, buf, 0);
 		free(buf);
 	}
 
 	VERIFY0(spa_create(ztest_opts.zo_pool, nvroot, props, NULL, NULL));
 	fnvlist_free(nvroot);
 	fnvlist_free(props);
 
 	VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
 	zs->zs_metaslab_sz =
 	    1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
 	spa_close(spa, FTAG);
 
 	kernel_fini();
 
 	if (!ztest_opts.zo_mmp_test) {
 		ztest_run_zdb(ztest_opts.zo_pool);
 		ztest_freeze();
 		ztest_run_zdb(ztest_opts.zo_pool);
 	}
 
 	(void) pthread_rwlock_destroy(&ztest_name_lock);
 	mutex_destroy(&ztest_vdev_lock);
 	mutex_destroy(&ztest_checkpoint_lock);
 }
 
 static void
 setup_data_fd(void)
 {
 	static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
 
 	ztest_fd_data = mkstemp(ztest_name_data);
 	ASSERT3S(ztest_fd_data, >=, 0);
 	(void) unlink(ztest_name_data);
 }
 
 static int
 shared_data_size(ztest_shared_hdr_t *hdr)
 {
 	int size;
 
 	size = hdr->zh_hdr_size;
 	size += hdr->zh_opts_size;
 	size += hdr->zh_size;
 	size += hdr->zh_stats_size * hdr->zh_stats_count;
 	size += hdr->zh_ds_size * hdr->zh_ds_count;
 
 	return (size);
 }
 
 static void
 setup_hdr(void)
 {
 	int size;
 	ztest_shared_hdr_t *hdr;
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT3P(hdr, !=, MAP_FAILED);
 
 	VERIFY0(ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t)));
 
 	hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t);
 	hdr->zh_opts_size = sizeof (ztest_shared_opts_t);
 	hdr->zh_size = sizeof (ztest_shared_t);
 	hdr->zh_stats_size = sizeof (ztest_shared_callstate_t);
 	hdr->zh_stats_count = ZTEST_FUNCS;
 	hdr->zh_ds_size = sizeof (ztest_shared_ds_t);
 	hdr->zh_ds_count = ztest_opts.zo_datasets;
 
 	size = shared_data_size(hdr);
 	VERIFY0(ftruncate(ztest_fd_data, size));
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 }
 
 static void
 setup_data(void)
 {
 	int size, offset;
 	ztest_shared_hdr_t *hdr;
 	uint8_t *buf;
 
 	hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
 	    PROT_READ, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT3P(hdr, !=, MAP_FAILED);
 
 	size = shared_data_size(hdr);
 
 	(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
 	hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()),
 	    PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
 	ASSERT3P(hdr, !=, MAP_FAILED);
 	buf = (uint8_t *)hdr;
 
 	offset = hdr->zh_hdr_size;
 	ztest_shared_opts = (void *)&buf[offset];
 	offset += hdr->zh_opts_size;
 	ztest_shared = (void *)&buf[offset];
 	offset += hdr->zh_size;
 	ztest_shared_callstate = (void *)&buf[offset];
 	offset += hdr->zh_stats_size * hdr->zh_stats_count;
 	ztest_shared_ds = (void *)&buf[offset];
 }
 
 static boolean_t
 exec_child(char *cmd, char *libpath, boolean_t ignorekill, int *statusp)
 {
 	pid_t pid;
 	int status;
 	char *cmdbuf = NULL;
 
 	pid = fork();
 
 	if (cmd == NULL) {
 		cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
 		(void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN);
 		cmd = cmdbuf;
 	}
 
 	if (pid == -1)
 		fatal(B_TRUE, "fork failed");
 
 	if (pid == 0) {	/* child */
 		char fd_data_str[12];
 
 		VERIFY3S(11, >=,
 		    snprintf(fd_data_str, 12, "%d", ztest_fd_data));
 		VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1));
 
 		if (libpath != NULL) {
 			const char *curlp = getenv("LD_LIBRARY_PATH");
 			if (curlp == NULL)
 				VERIFY0(setenv("LD_LIBRARY_PATH", libpath, 1));
 			else {
 				char *newlp = NULL;
 				VERIFY3S(-1, !=,
 				    asprintf(&newlp, "%s:%s", libpath, curlp));
 				VERIFY0(setenv("LD_LIBRARY_PATH", newlp, 1));
 				free(newlp);
 			}
 		}
 		(void) execl(cmd, cmd, (char *)NULL);
 		ztest_dump_core = B_FALSE;
 		fatal(B_TRUE, "exec failed: %s", cmd);
 	}
 
 	if (cmdbuf != NULL) {
 		umem_free(cmdbuf, MAXPATHLEN);
 		cmd = NULL;
 	}
 
 	while (waitpid(pid, &status, 0) != pid)
 		continue;
 	if (statusp != NULL)
 		*statusp = status;
 
 	if (WIFEXITED(status)) {
 		if (WEXITSTATUS(status) != 0) {
 			(void) fprintf(stderr, "child exited with code %d\n",
 			    WEXITSTATUS(status));
 			exit(2);
 		}
 		return (B_FALSE);
 	} else if (WIFSIGNALED(status)) {
 		if (!ignorekill || WTERMSIG(status) != SIGKILL) {
 			(void) fprintf(stderr, "child died with signal %d\n",
 			    WTERMSIG(status));
 			exit(3);
 		}
 		return (B_TRUE);
 	} else {
 		(void) fprintf(stderr, "something strange happened to child\n");
 		exit(4);
 	}
 }
 
 static void
 ztest_run_init(void)
 {
 	int i;
 
 	ztest_shared_t *zs = ztest_shared;
 
 	/*
 	 * Blow away any existing copy of zpool.cache
 	 */
 	(void) remove(spa_config_path);
 
 	if (ztest_opts.zo_init == 0) {
 		if (ztest_opts.zo_verbose >= 1)
 			(void) printf("Importing pool %s\n",
 			    ztest_opts.zo_pool);
 		ztest_import(zs);
 		return;
 	}
 
 	/*
 	 * Create and initialize our storage pool.
 	 */
 	for (i = 1; i <= ztest_opts.zo_init; i++) {
 		memset(zs, 0, sizeof (*zs));
 		if (ztest_opts.zo_verbose >= 3 &&
 		    ztest_opts.zo_init != 1) {
 			(void) printf("ztest_init(), pass %d\n", i);
 		}
 		ztest_init(zs);
 	}
 }
 
 int
 main(int argc, char **argv)
 {
 	int kills = 0;
 	int iters = 0;
 	int older = 0;
 	int newer = 0;
 	ztest_shared_t *zs;
 	ztest_info_t *zi;
 	ztest_shared_callstate_t *zc;
 	char timebuf[100];
 	char numbuf[NN_NUMBUF_SZ];
 	char *cmd;
 	boolean_t hasalt;
 	int f, err;
 	char *fd_data_str = getenv("ZTEST_FD_DATA");
 	struct sigaction action;
 
 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
 
 	dprintf_setup(&argc, argv);
 	zfs_deadman_synctime_ms = 300000;
 	zfs_deadman_checktime_ms = 30000;
 	/*
 	 * As two-word space map entries may not come up often (especially
 	 * if pool and vdev sizes are small) we want to force at least some
 	 * of them so the feature get tested.
 	 */
 	zfs_force_some_double_word_sm_entries = B_TRUE;
 
 	/*
 	 * Verify that even extensively damaged split blocks with many
 	 * segments can be reconstructed in a reasonable amount of time
 	 * when reconstruction is known to be possible.
 	 *
 	 * Note: the lower this value is, the more damage we inflict, and
 	 * the more time ztest spends in recovering that damage. We chose
 	 * to induce damage 1/100th of the time so recovery is tested but
 	 * not so frequently that ztest doesn't get to test other code paths.
 	 */
 	zfs_reconstruct_indirect_damage_fraction = 100;
 
 	action.sa_handler = sig_handler;
 	sigemptyset(&action.sa_mask);
 	action.sa_flags = 0;
 
 	if (sigaction(SIGSEGV, &action, NULL) < 0) {
 		(void) fprintf(stderr, "ztest: cannot catch SIGSEGV: %s.\n",
 		    strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 
 	if (sigaction(SIGABRT, &action, NULL) < 0) {
 		(void) fprintf(stderr, "ztest: cannot catch SIGABRT: %s.\n",
 		    strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 
 	/*
 	 * Force random_get_bytes() to use /dev/urandom in order to prevent
 	 * ztest from needlessly depleting the system entropy pool.
 	 */
 	random_path = "/dev/urandom";
 	ztest_fd_rand = open(random_path, O_RDONLY | O_CLOEXEC);
 	ASSERT3S(ztest_fd_rand, >=, 0);
 
 	if (!fd_data_str) {
 		process_options(argc, argv);
 
 		setup_data_fd();
 		setup_hdr();
 		setup_data();
 		memcpy(ztest_shared_opts, &ztest_opts,
 		    sizeof (*ztest_shared_opts));
 	} else {
 		ztest_fd_data = atoi(fd_data_str);
 		setup_data();
 		memcpy(&ztest_opts, ztest_shared_opts, sizeof (ztest_opts));
 	}
 	ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count);
 
 	err = ztest_set_global_vars();
 	if (err != 0 && !fd_data_str) {
 		/* error message done by ztest_set_global_vars */
 		exit(EXIT_FAILURE);
 	} else {
 		/* children should not be spawned if setting gvars fails */
 		VERIFY3S(err, ==, 0);
 	}
 
 	/* Override location of zpool.cache */
 	VERIFY3S(asprintf((char **)&spa_config_path, "%s/zpool.cache",
 	    ztest_opts.zo_dir), !=, -1);
 
 	ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t),
 	    UMEM_NOFAIL);
 	zs = ztest_shared;
 
 	if (fd_data_str) {
 		metaslab_force_ganging = ztest_opts.zo_metaslab_force_ganging;
 		metaslab_df_alloc_threshold =
 		    zs->zs_metaslab_df_alloc_threshold;
 
 		if (zs->zs_do_init)
 			ztest_run_init();
 		else
 			ztest_run(zs);
 		exit(0);
 	}
 
 	hasalt = (strlen(ztest_opts.zo_alt_ztest) != 0);
 
 	if (ztest_opts.zo_verbose >= 1) {
 		(void) printf("%"PRIu64" vdevs, %d datasets, %d threads,"
 		    "%d %s disks, %"PRIu64" seconds...\n\n",
 		    ztest_opts.zo_vdevs,
 		    ztest_opts.zo_datasets,
 		    ztest_opts.zo_threads,
 		    ztest_opts.zo_raid_children,
 		    ztest_opts.zo_raid_type,
 		    ztest_opts.zo_time);
 	}
 
 	cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
 	(void) strlcpy(cmd, getexecname(), MAXNAMELEN);
 
 	zs->zs_do_init = B_TRUE;
 	if (strlen(ztest_opts.zo_alt_ztest) != 0) {
 		if (ztest_opts.zo_verbose >= 1) {
 			(void) printf("Executing older ztest for "
 			    "initialization: %s\n", ztest_opts.zo_alt_ztest);
 		}
 		VERIFY(!exec_child(ztest_opts.zo_alt_ztest,
 		    ztest_opts.zo_alt_libpath, B_FALSE, NULL));
 	} else {
 		VERIFY(!exec_child(NULL, NULL, B_FALSE, NULL));
 	}
 	zs->zs_do_init = B_FALSE;
 
 	zs->zs_proc_start = gethrtime();
 	zs->zs_proc_stop = zs->zs_proc_start + ztest_opts.zo_time * NANOSEC;
 
 	for (f = 0; f < ZTEST_FUNCS; f++) {
 		zi = &ztest_info[f];
 		zc = ZTEST_GET_SHARED_CALLSTATE(f);
 		if (zs->zs_proc_start + zi->zi_interval[0] > zs->zs_proc_stop)
 			zc->zc_next = UINT64_MAX;
 		else
 			zc->zc_next = zs->zs_proc_start +
 			    ztest_random(2 * zi->zi_interval[0] + 1);
 	}
 
 	/*
 	 * Run the tests in a loop.  These tests include fault injection
 	 * to verify that self-healing data works, and forced crashes
 	 * to verify that we never lose on-disk consistency.
 	 */
 	while (gethrtime() < zs->zs_proc_stop) {
 		int status;
 		boolean_t killed;
 
 		/*
 		 * Initialize the workload counters for each function.
 		 */
 		for (f = 0; f < ZTEST_FUNCS; f++) {
 			zc = ZTEST_GET_SHARED_CALLSTATE(f);
 			zc->zc_count = 0;
 			zc->zc_time = 0;
 		}
 
 		/* Set the allocation switch size */
 		zs->zs_metaslab_df_alloc_threshold =
 		    ztest_random(zs->zs_metaslab_sz / 4) + 1;
 
 		if (!hasalt || ztest_random(2) == 0) {
 			if (hasalt && ztest_opts.zo_verbose >= 1) {
 				(void) printf("Executing newer ztest: %s\n",
 				    cmd);
 			}
 			newer++;
 			killed = exec_child(cmd, NULL, B_TRUE, &status);
 		} else {
 			if (hasalt && ztest_opts.zo_verbose >= 1) {
 				(void) printf("Executing older ztest: %s\n",
 				    ztest_opts.zo_alt_ztest);
 			}
 			older++;
 			killed = exec_child(ztest_opts.zo_alt_ztest,
 			    ztest_opts.zo_alt_libpath, B_TRUE, &status);
 		}
 
 		if (killed)
 			kills++;
 		iters++;
 
 		if (ztest_opts.zo_verbose >= 1) {
 			hrtime_t now = gethrtime();
 
 			now = MIN(now, zs->zs_proc_stop);
 			print_time(zs->zs_proc_stop - now, timebuf);
 			nicenum(zs->zs_space, numbuf, sizeof (numbuf));
 
 			(void) printf("Pass %3d, %8s, %3"PRIu64" ENOSPC, "
 			    "%4.1f%% of %5s used, %3.0f%% done, %8s to go\n",
 			    iters,
 			    WIFEXITED(status) ? "Complete" : "SIGKILL",
 			    zs->zs_enospc_count,
 			    100.0 * zs->zs_alloc / zs->zs_space,
 			    numbuf,
 			    100.0 * (now - zs->zs_proc_start) /
 			    (ztest_opts.zo_time * NANOSEC), timebuf);
 		}
 
 		if (ztest_opts.zo_verbose >= 2) {
 			(void) printf("\nWorkload summary:\n\n");
 			(void) printf("%7s %9s   %s\n",
 			    "Calls", "Time", "Function");
 			(void) printf("%7s %9s   %s\n",
 			    "-----", "----", "--------");
 			for (f = 0; f < ZTEST_FUNCS; f++) {
 				zi = &ztest_info[f];
 				zc = ZTEST_GET_SHARED_CALLSTATE(f);
 				print_time(zc->zc_time, timebuf);
 				(void) printf("%7"PRIu64" %9s   %s\n",
 				    zc->zc_count, timebuf,
 				    zi->zi_funcname);
 			}
 			(void) printf("\n");
 		}
 
 		if (!ztest_opts.zo_mmp_test)
 			ztest_run_zdb(ztest_opts.zo_pool);
 	}
 
 	if (ztest_opts.zo_verbose >= 1) {
 		if (hasalt) {
 			(void) printf("%d runs of older ztest: %s\n", older,
 			    ztest_opts.zo_alt_ztest);
 			(void) printf("%d runs of newer ztest: %s\n", newer,
 			    cmd);
 		}
 		(void) printf("%d killed, %d completed, %.0f%% kill rate\n",
 		    kills, iters - kills, (100.0 * kills) / MAX(1, iters));
 	}
 
 	umem_free(cmd, MAXNAMELEN);
 
 	return (0);
 }
diff --git a/config/Rules.am b/config/Rules.am
index 7162b771869d..abb4ced33233 100644
--- a/config/Rules.am
+++ b/config/Rules.am
@@ -1,83 +1,85 @@
 #
 # Default build rules for all user space components, every Makefile.am
 # should include these rules and override or extend them as needed.
 #
 
 PHONY =
 AM_CPPFLAGS = \
 	-include $(top_builddir)/zfs_config.h \
 	-I$(top_builddir)/include \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/module/icp/include \
 	-I$(top_srcdir)/lib/libspl/include \
 	-I$(top_srcdir)/lib/libspl/include/os/@ac_system_l@
 
 AM_LIBTOOLFLAGS = --silent
 
 AM_CFLAGS  = -std=gnu99 -Wall -Wextra -Wstrict-prototypes -Wmissing-prototypes -Wwrite-strings -Wno-sign-compare -Wno-missing-field-initializers
 AM_CFLAGS += -fno-strict-aliasing
 AM_CFLAGS += $(NO_OMIT_FRAME_POINTER)
 AM_CFLAGS += $(IMPLICIT_FALLTHROUGH)
 AM_CFLAGS += $(DEBUG_CFLAGS)
 AM_CFLAGS += $(ASAN_CFLAGS)
 AM_CFLAGS += $(UBSAN_CFLAGS)
 AM_CFLAGS += $(CODE_COVERAGE_CFLAGS) $(NO_FORMAT_ZERO_LENGTH)
 if BUILD_FREEBSD
 AM_CFLAGS += -fPIC -Werror -Wno-unknown-pragmas -Wno-enum-conversion
 AM_CFLAGS += -include $(top_srcdir)/include/os/freebsd/spl/sys/ccompile.h
 AM_CFLAGS += -I/usr/include -I/usr/local/include
 endif
 
 AM_CPPFLAGS += -D_GNU_SOURCE
 AM_CPPFLAGS += -D_REENTRANT
 AM_CPPFLAGS += -D_FILE_OFFSET_BITS=64
 AM_CPPFLAGS += -D_LARGEFILE64_SOURCE
 AM_CPPFLAGS += -DLIBEXECDIR=\"$(libexecdir)\"
 AM_CPPFLAGS += -DRUNSTATEDIR=\"$(runstatedir)\"
 AM_CPPFLAGS += -DSBINDIR=\"$(sbindir)\"
 AM_CPPFLAGS += -DSYSCONFDIR=\"$(sysconfdir)\"
 AM_CPPFLAGS += -DPKGDATADIR=\"$(pkgdatadir)\"
 AM_CPPFLAGS += $(DEBUG_CPPFLAGS)
 AM_CPPFLAGS += $(CODE_COVERAGE_CPPFLAGS)
 AM_CPPFLAGS += -DTEXT_DOMAIN=\"zfs-@ac_system_l@-user\"
 
 AM_CPPFLAGS_NOCHECK  = -D"strtok(...)=strtok(__VA_ARGS__) __attribute__((deprecated(\"Use strtok_r(3) instead!\")))"
 AM_CPPFLAGS_NOCHECK += -D"__xpg_basename(...)=__xpg_basename(__VA_ARGS__) __attribute__((deprecated(\"basename(3) is underspecified. Use zfs_basename() instead!\")))"
 AM_CPPFLAGS_NOCHECK += -D"basename(...)=basename(__VA_ARGS__) __attribute__((deprecated(\"basename(3) is underspecified. Use zfs_basename() instead!\")))"
 AM_CPPFLAGS_NOCHECK += -D"dirname(...)=dirname(__VA_ARGS__) __attribute__((deprecated(\"dirname(3) is underspecified. Use zfs_dirnamelen() instead!\")))"
 AM_CPPFLAGS_NOCHECK += -D"bcopy(...)=__attribute__((deprecated(\"bcopy(3) is deprecated. Use memcpy(3)/memmove(3) instead!\"))) bcopy(__VA_ARGS__)"
 AM_CPPFLAGS_NOCHECK += -D"bcmp(...)=__attribute__((deprecated(\"bcmp(3) is deprecated. Use memcmp(3) instead!\"))) bcmp(__VA_ARGS__)"
 AM_CPPFLAGS_NOCHECK += -D"bzero(...)=__attribute__((deprecated(\"bzero(3) is deprecated. Use memset(3) instead!\"))) bzero(__VA_ARGS__)"
 AM_CPPFLAGS_NOCHECK += -D"asctime(...)=__attribute__((deprecated(\"Use strftime(3) instead!\"))) asctime(__VA_ARGS__)"
 AM_CPPFLAGS_NOCHECK += -D"asctime_r(...)=__attribute__((deprecated(\"Use strftime(3) instead!\"))) asctime_r(__VA_ARGS__)"
 AM_CPPFLAGS_NOCHECK += -D"gmtime(...)=__attribute__((deprecated(\"gmtime(3) isn't thread-safe. Use gmtime_r(3) instead!\"))) gmtime(__VA_ARGS__)"
 AM_CPPFLAGS_NOCHECK += -D"localtime(...)=__attribute__((deprecated(\"localtime(3) isn't thread-safe. Use localtime_r(3) instead!\"))) localtime(__VA_ARGS__)"
+AM_CPPFLAGS_NOCHECK += -D"strncpy(...)=__attribute__((deprecated(\"strncpy(3) is deprecated. Use strlcpy(3) instead!\"))) strncpy(__VA_ARGS__)"
+
 AM_CPPFLAGS += $(AM_CPPFLAGS_NOCHECK)
 
 if ASAN_ENABLED
 AM_CPPFLAGS += -DZFS_ASAN_ENABLED
 endif
 
 if UBSAN_ENABLED
 AM_CPPFLAGS += -DZFS_UBSAN_ENABLED
 endif
 
 AM_LDFLAGS  = $(DEBUG_LDFLAGS)
 AM_LDFLAGS += $(ASAN_LDFLAGS)
 AM_LDFLAGS += $(UBSAN_LDFLAGS)
 
 if BUILD_FREEBSD
 AM_LDFLAGS += -fstack-protector-strong -shared
 AM_LDFLAGS += -Wl,-x -Wl,--fatal-warnings -Wl,--warn-shared-textrel
 AM_LDFLAGS += -lm
 endif
 
 
 # If a target includes kernel code, generate warnings for large stack frames
 KERNEL_CFLAGS       = $(FRAME_LARGER_THAN)
 
 # See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=54020
 LIBRARY_CFLAGS      = -no-suppress
 
 # Forcibly enable asserts/debugging for libzpool &al.
 FORCEDEBUG_CPPFLAGS = -DDEBUG -UNDEBUG -DZFS_DEBUG
diff --git a/include/os/freebsd/spl/sys/kstat.h b/include/os/freebsd/spl/sys/kstat.h
index 947dfee62393..7dc2c4753b02 100644
--- a/include/os/freebsd/spl/sys/kstat.h
+++ b/include/os/freebsd/spl/sys/kstat.h
@@ -1,230 +1,230 @@
 /*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  *  UCRL-CODE-235197
  *
  *  This file is part of the SPL, Solaris Porting Layer.
  *
  *  The SPL is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License as published by the
  *  Free Software Foundation; either version 2 of the License, or (at your
  *  option) any later version.
  *
  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _SPL_KSTAT_H
 #define	_SPL_KSTAT_H
 
 #include <sys/types.h>
 #ifndef _STANDALONE
 #include <sys/sysctl.h>
 #endif
 struct list_head {};
 #include <sys/mutex.h>
 #include <sys/proc.h>
 
 #define	KSTAT_STRLEN		255
 #define	KSTAT_RAW_MAX		(128*1024)
 
 /*
  * For reference valid classes are:
  * disk, tape, net, controller, vm, kvm, hat, streams, kstat, misc
  */
 
 #define	KSTAT_TYPE_RAW		0 /* can be anything; ks_ndata >= 1 */
 #define	KSTAT_TYPE_NAMED	1 /* name/value pair; ks_ndata >= 1 */
 #define	KSTAT_TYPE_INTR		2 /* interrupt stats; ks_ndata == 1 */
 #define	KSTAT_TYPE_IO		3 /* I/O stats; ks_ndata == 1 */
 #define	KSTAT_TYPE_TIMER	4 /* event timer; ks_ndata >= 1 */
 #define	KSTAT_NUM_TYPES		5
 
 #define	KSTAT_DATA_CHAR		0
 #define	KSTAT_DATA_INT32	1
 #define	KSTAT_DATA_UINT32	2
 #define	KSTAT_DATA_INT64	3
 #define	KSTAT_DATA_UINT64	4
 #define	KSTAT_DATA_LONG		5
 #define	KSTAT_DATA_ULONG	6
 #define	KSTAT_DATA_STRING	7
 #define	KSTAT_NUM_DATAS		8
 
 #define	KSTAT_INTR_HARD		0
 #define	KSTAT_INTR_SOFT		1
 #define	KSTAT_INTR_WATCHDOG	2
 #define	KSTAT_INTR_SPURIOUS	3
 #define	KSTAT_INTR_MULTSVC	4
 #define	KSTAT_NUM_INTRS		5
 
 #define	KSTAT_FLAG_VIRTUAL	0x01
 #define	KSTAT_FLAG_VAR_SIZE	0x02
 #define	KSTAT_FLAG_WRITABLE	0x04
 #define	KSTAT_FLAG_PERSISTENT	0x08
 #define	KSTAT_FLAG_DORMANT	0x10
 #define	KSTAT_FLAG_INVALID	0x20
 #define	KSTAT_FLAG_LONGSTRINGS	0x40
 #define	KSTAT_FLAG_NO_HEADERS	0x80
 
 #define	KS_MAGIC		0x9d9d9d9d
 
 /* Dynamic updates */
 #define	KSTAT_READ		0
 #define	KSTAT_WRITE		1
 
 struct kstat_s;
 typedef struct kstat_s kstat_t;
 
 typedef int kid_t;				/* unique kstat id */
 typedef int kstat_update_t(struct kstat_s *, int); /* dynamic update cb */
 
 struct seq_file {
 	char *sf_buf;
 	size_t sf_size;
 };
 
 void seq_printf(struct seq_file *m, const char *fmt, ...);
 
 
 typedef struct kstat_module {
-	char ksm_name[KSTAT_STRLEN+1];		/* module name */
+	char ksm_name[KSTAT_STRLEN];		/* module name */
 	struct list_head ksm_module_list;	/* module linkage */
 	struct list_head ksm_kstat_list;	/* list of kstat entries */
 	struct proc_dir_entry *ksm_proc;	/* proc entry */
 } kstat_module_t;
 
 typedef struct kstat_raw_ops {
 	int (*headers)(char *buf, size_t size);
 	int (*seq_headers)(struct seq_file *);
 	int (*data)(char *buf, size_t size, void *data);
 	void *(*addr)(kstat_t *ksp, loff_t index);
 } kstat_raw_ops_t;
 
 struct kstat_s {
 	int		ks_magic;		/* magic value */
 	kid_t		ks_kid;			/* unique kstat ID */
 	hrtime_t	ks_crtime;		/* creation time */
 	hrtime_t	ks_snaptime;		/* last access time */
-	char		ks_module[KSTAT_STRLEN+1]; /* provider module name */
+	char		ks_module[KSTAT_STRLEN]; /* provider module name */
 	int		ks_instance;		/* provider module instance */
-	char		ks_name[KSTAT_STRLEN+1]; /* kstat name */
-	char		ks_class[KSTAT_STRLEN+1]; /* kstat class */
+	char		ks_name[KSTAT_STRLEN]; /* kstat name */
+	char		ks_class[KSTAT_STRLEN]; /* kstat class */
 	uchar_t		ks_type;		/* kstat data type */
 	uchar_t		ks_flags;		/* kstat flags */
 	void		*ks_data;		/* kstat type-specific data */
 	uint_t		ks_ndata;		/* # of data records */
 	size_t		ks_data_size;		/* size of kstat data section */
 	kstat_update_t	*ks_update;		/* dynamic updates */
 	void		*ks_private;		/* private data */
 	void		*ks_private1;		/* private data */
 	kmutex_t	ks_private_lock;	/* kstat private data lock */
 	kmutex_t	*ks_lock;		/* kstat data lock */
 	struct list_head ks_list;		/* kstat linkage */
 	kstat_module_t	*ks_owner;		/* kstat module linkage */
 	kstat_raw_ops_t	ks_raw_ops;		/* ops table for raw type */
 	char		*ks_raw_buf;		/* buf used for raw ops */
 	size_t		ks_raw_bufsize;		/* size of raw ops buffer */
 #ifndef _STANDALONE
 	struct sysctl_ctx_list ks_sysctl_ctx;
 	struct sysctl_oid *ks_sysctl_root;
 #endif /* _STANDALONE */
 };
 
 typedef struct kstat_named_s {
 	char	name[KSTAT_STRLEN];	/* name of counter */
 	uchar_t	data_type;		/* data type */
 	union {
 		char c[16];	/* 128-bit int */
 		int32_t	i32;	/* 32-bit signed int */
 		uint32_t ui32;	/* 32-bit unsigned int */
 		int64_t i64;	/* 64-bit signed int */
 		uint64_t ui64;	/* 64-bit unsigned int */
 		long l;		/* native signed long */
 		ulong_t ul;	/* native unsigned long */
 		struct {
 			union {
 				char *ptr;	/* NULL-term string */
 				char __pad[8];	/* 64-bit padding */
 			} addr;
 			uint32_t len;		/* # bytes for strlen + '\0' */
 		} string;
 	} value;
 } kstat_named_t;
 
 #define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr)
 #define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len)
 
 typedef struct kstat_intr {
 	uint_t intrs[KSTAT_NUM_INTRS];
 } kstat_intr_t;
 
 typedef struct kstat_io {
 	u_longlong_t	nread;		/* number of bytes read */
 	u_longlong_t	nwritten;	/* number of bytes written */
 	uint_t		reads;		/* number of read operations */
 	uint_t		writes;		/* number of write operations */
 	hrtime_t	wtime;		/* cumulative wait (pre-service) time */
 	hrtime_t	wlentime;	/* cumulative wait len*time product */
 	hrtime_t	wlastupdate;	/* last time wait queue changed */
 	hrtime_t	rtime;		/* cumulative run (service) time */
 	hrtime_t	rlentime;	/* cumulative run length*time product */
 	hrtime_t	rlastupdate;	/* last time run queue changed */
 	uint_t		wcnt;		/* count of elements in wait state */
 	uint_t		rcnt;		/* count of elements in run state */
 } kstat_io_t;
 
 typedef struct kstat_timer {
-	char		name[KSTAT_STRLEN+1]; /* event name */
+	char		name[KSTAT_STRLEN]; /* event name */
 	u_longlong_t	num_events;	 /* number of events */
 	hrtime_t	elapsed_time;	 /* cumulative elapsed time */
 	hrtime_t	min_time;	 /* shortest event duration */
 	hrtime_t	max_time;	 /* longest event duration */
 	hrtime_t	start_time;	 /* previous event start time */
 	hrtime_t	stop_time;	 /* previous event stop time */
 } kstat_timer_t;
 
 int spl_kstat_init(void);
 void spl_kstat_fini(void);
 
 extern void __kstat_set_raw_ops(kstat_t *ksp,
     int (*headers)(char *buf, size_t size),
     int (*data)(char *buf, size_t size, void *data),
     void* (*addr)(kstat_t *ksp, loff_t index));
 
 extern void __kstat_set_seq_raw_ops(kstat_t *ksp,
     int (*headers)(struct seq_file *),
     int (*data)(char *buf, size_t size, void *data),
     void* (*addr)(kstat_t *ksp, loff_t index));
 
 
 extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
     const char *ks_name, const char *ks_class, uchar_t ks_type,
     uint_t ks_ndata, uchar_t ks_flags);
 
 extern void __kstat_install(kstat_t *ksp);
 extern void __kstat_delete(kstat_t *ksp);
 
 #define	kstat_set_seq_raw_ops(k, h, d, a) \
     __kstat_set_seq_raw_ops(k, h, d, a)
 #define	kstat_set_raw_ops(k, h, d, a) \
     __kstat_set_raw_ops(k, h, d, a)
 #ifndef _STANDALONE
 #define	kstat_create(m, i, n, c, t, s, f) \
     __kstat_create(m, i, n, c, t, s, f)
 
 #define	kstat_install(k)		__kstat_install(k)
 #define	kstat_delete(k)			__kstat_delete(k)
 #else
 #define	kstat_create(m, i, n, c, t, s, f)	((kstat_t *)0)
 #define	kstat_install(k)
 #define	kstat_delete(k)
 #endif
 
 #endif  /* _SPL_KSTAT_H */
diff --git a/include/os/linux/spl/sys/kstat.h b/include/os/linux/spl/sys/kstat.h
index 928f70757545..305c411ddfa0 100644
--- a/include/os/linux/spl/sys/kstat.h
+++ b/include/os/linux/spl/sys/kstat.h
@@ -1,218 +1,218 @@
 /*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  *  UCRL-CODE-235197
  *
  *  This file is part of the SPL, Solaris Porting Layer.
  *
  *  The SPL is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License as published by the
  *  Free Software Foundation; either version 2 of the License, or (at your
  *  option) any later version.
  *
  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef _SPL_KSTAT_H
 #define	_SPL_KSTAT_H
 
 #include <linux/module.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/kmem.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 
 #define	KSTAT_STRLEN		255
 #define	KSTAT_RAW_MAX		(128*1024)
 
 /*
  * For reference valid classes are:
  * disk, tape, net, controller, vm, kvm, hat, streams, kstat, misc
  */
 
 #define	KSTAT_TYPE_RAW		0 /* can be anything; ks_ndata >= 1 */
 #define	KSTAT_TYPE_NAMED	1 /* name/value pair; ks_ndata >= 1 */
 #define	KSTAT_TYPE_INTR		2 /* interrupt stats; ks_ndata == 1 */
 #define	KSTAT_TYPE_IO		3 /* I/O stats; ks_ndata == 1 */
 #define	KSTAT_TYPE_TIMER	4 /* event timer; ks_ndata >= 1 */
 #define	KSTAT_NUM_TYPES		5
 
 #define	KSTAT_DATA_CHAR		0
 #define	KSTAT_DATA_INT32	1
 #define	KSTAT_DATA_UINT32	2
 #define	KSTAT_DATA_INT64	3
 #define	KSTAT_DATA_UINT64	4
 #define	KSTAT_DATA_LONG		5
 #define	KSTAT_DATA_ULONG	6
 #define	KSTAT_DATA_STRING	7
 #define	KSTAT_NUM_DATAS		8
 
 #define	KSTAT_INTR_HARD		0
 #define	KSTAT_INTR_SOFT		1
 #define	KSTAT_INTR_WATCHDOG	2
 #define	KSTAT_INTR_SPURIOUS	3
 #define	KSTAT_INTR_MULTSVC	4
 #define	KSTAT_NUM_INTRS		5
 
 #define	KSTAT_FLAG_VIRTUAL	0x01
 #define	KSTAT_FLAG_VAR_SIZE	0x02
 #define	KSTAT_FLAG_WRITABLE	0x04
 #define	KSTAT_FLAG_PERSISTENT	0x08
 #define	KSTAT_FLAG_DORMANT	0x10
 #define	KSTAT_FLAG_INVALID	0x20
 #define	KSTAT_FLAG_LONGSTRINGS	0x40
 #define	KSTAT_FLAG_NO_HEADERS	0x80
 
 #define	KS_MAGIC		0x9d9d9d9d
 
 /* Dynamic updates */
 #define	KSTAT_READ		0
 #define	KSTAT_WRITE		1
 
 struct kstat_s;
 typedef struct kstat_s kstat_t;
 
 typedef int kid_t;				/* unique kstat id */
 typedef int kstat_update_t(struct kstat_s *, int); /* dynamic update cb */
 
 typedef struct kstat_module {
-	char ksm_name[KSTAT_STRLEN+1];		/* module name */
+	char ksm_name[KSTAT_STRLEN];		/* module name */
 	struct list_head ksm_module_list;	/* module linkage */
 	struct list_head ksm_kstat_list;	/* list of kstat entries */
 	struct proc_dir_entry *ksm_proc;	/* proc entry */
 } kstat_module_t;
 
 typedef struct kstat_raw_ops {
 	int (*headers)(char *buf, size_t size);
 	int (*data)(char *buf, size_t size, void *data);
 	void *(*addr)(kstat_t *ksp, loff_t index);
 } kstat_raw_ops_t;
 
 typedef struct kstat_proc_entry {
-	char	kpe_name[KSTAT_STRLEN+1];	/* kstat name */
-	char	kpe_module[KSTAT_STRLEN+1];	/* provider module name */
+	char	kpe_name[KSTAT_STRLEN];		/* kstat name */
+	char	kpe_module[KSTAT_STRLEN];	/* provider module name */
 	kstat_module_t		*kpe_owner;	/* kstat module linkage */
 	struct list_head	kpe_list;	/* kstat linkage */
 	struct proc_dir_entry	*kpe_proc;	/* procfs entry */
 } kstat_proc_entry_t;
 
 struct kstat_s {
 	int		ks_magic;		/* magic value */
 	kid_t		ks_kid;			/* unique kstat ID */
 	hrtime_t	ks_crtime;		/* creation time */
 	hrtime_t	ks_snaptime;		/* last access time */
 	int		ks_instance;		/* provider module instance */
-	char		ks_class[KSTAT_STRLEN+1]; /* kstat class */
+	char		ks_class[KSTAT_STRLEN]; /* kstat class */
 	uchar_t		ks_type;		/* kstat data type */
 	uchar_t		ks_flags;		/* kstat flags */
 	void		*ks_data;		/* kstat type-specific data */
 	uint_t		ks_ndata;		/* # of data records */
 	size_t		ks_data_size;		/* size of kstat data section */
 	kstat_update_t	*ks_update;		/* dynamic updates */
 	void		*ks_private;		/* private data */
 	kmutex_t	ks_private_lock;	/* kstat private data lock */
 	kmutex_t	*ks_lock;		/* kstat data lock */
 	kstat_raw_ops_t	ks_raw_ops;		/* ops table for raw type */
 	char		*ks_raw_buf;		/* buf used for raw ops */
 	size_t		ks_raw_bufsize;		/* size of raw ops buffer */
 	kstat_proc_entry_t	ks_proc;	/* data for procfs entry */
 };
 
 typedef struct kstat_named_s {
 	char	name[KSTAT_STRLEN];	/* name of counter */
 	uchar_t	data_type;		/* data type */
 	union {
 		char c[16];	/* 128-bit int */
 		int32_t	i32;	/* 32-bit signed int */
 		uint32_t ui32;	/* 32-bit unsigned int */
 		int64_t i64;	/* 64-bit signed int */
 		uint64_t ui64;	/* 64-bit unsigned int */
 		long l;		/* native signed long */
 		ulong_t ul;	/* native unsigned long */
 		struct {
 			union {
 				char *ptr;	/* NULL-term string */
 				char __pad[8];	/* 64-bit padding */
 			} addr;
 			uint32_t len;		/* # bytes for strlen + '\0' */
 		} string;
 	} value;
 } kstat_named_t;
 
 #define	KSTAT_NAMED_STR_PTR(knptr) ((knptr)->value.string.addr.ptr)
 #define	KSTAT_NAMED_STR_BUFLEN(knptr) ((knptr)->value.string.len)
 
 #ifdef HAVE_PROC_OPS_STRUCT
 typedef struct proc_ops kstat_proc_op_t;
 #else
 typedef struct file_operations kstat_proc_op_t;
 #endif
 
 typedef struct kstat_intr {
 	uint_t intrs[KSTAT_NUM_INTRS];
 } kstat_intr_t;
 
 typedef struct kstat_io {
 	u_longlong_t	nread;		/* number of bytes read */
 	u_longlong_t	nwritten;	/* number of bytes written */
 	uint_t		reads;		/* number of read operations */
 	uint_t		writes;		/* number of write operations */
 	hrtime_t	wtime;		/* cumulative wait (pre-service) time */
 	hrtime_t	wlentime;	/* cumulative wait len*time product */
 	hrtime_t	wlastupdate;	/* last time wait queue changed */
 	hrtime_t	rtime;		/* cumulative run (service) time */
 	hrtime_t	rlentime;	/* cumulative run length*time product */
 	hrtime_t	rlastupdate;	/* last time run queue changed */
 	uint_t		wcnt;		/* count of elements in wait state */
 	uint_t		rcnt;		/* count of elements in run state */
 } kstat_io_t;
 
 typedef struct kstat_timer {
-	char		name[KSTAT_STRLEN+1]; /* event name */
+	char		name[KSTAT_STRLEN]; /* event name */
 	u_longlong_t	num_events;	 /* number of events */
 	hrtime_t	elapsed_time;	 /* cumulative elapsed time */
 	hrtime_t	min_time;	 /* shortest event duration */
 	hrtime_t	max_time;	 /* longest event duration */
 	hrtime_t	start_time;	 /* previous event start time */
 	hrtime_t	stop_time;	 /* previous event stop time */
 } kstat_timer_t;
 
 int spl_kstat_init(void);
 void spl_kstat_fini(void);
 
 extern void __kstat_set_raw_ops(kstat_t *ksp,
     int (*headers)(char *buf, size_t size),
     int (*data)(char *buf, size_t size, void *data),
     void* (*addr)(kstat_t *ksp, loff_t index));
 
 extern kstat_t *__kstat_create(const char *ks_module, int ks_instance,
     const char *ks_name, const char *ks_class, uchar_t ks_type,
     uint_t ks_ndata, uchar_t ks_flags);
 
 extern void kstat_proc_entry_init(kstat_proc_entry_t *kpep,
     const char *module, const char *name);
 extern void kstat_proc_entry_delete(kstat_proc_entry_t *kpep);
 extern void kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
     const kstat_proc_op_t *file_ops, void *data);
 
 extern void __kstat_install(kstat_t *ksp);
 extern void __kstat_delete(kstat_t *ksp);
 
 #define	kstat_set_raw_ops(k, h, d, a) \
     __kstat_set_raw_ops(k, h, d, a)
 #define	kstat_create(m, i, n, c, t, s, f) \
     __kstat_create(m, i, n, c, t, s, f)
 
 #define	kstat_install(k)		__kstat_install(k)
 #define	kstat_delete(k)			__kstat_delete(k)
 
 #endif  /* _SPL_KSTAT_H */
diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
index 29798af0371e..3288268fb258 100644
--- a/lib/libzfs/libzfs_dataset.c
+++ b/lib/libzfs/libzfs_dataset.c
@@ -1,5590 +1,5590 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2019 Joyent, Inc.
  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2012 DEY Storage Systems, Inc.  All rights reserved.
  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright 2017-2018 RackTop Systems.
  * Copyright (c) 2019 Datto Inc.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>
  * Copyright (c) 2021 Matt Fiddaman
  */
 
 #include <ctype.h>
 #include <errno.h>
 #include <libintl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
 #include <stddef.h>
 #include <zone.h>
 #include <fcntl.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
 #include <pwd.h>
 #include <grp.h>
 #ifdef HAVE_IDMAP
 #include <idmap.h>
 #include <aclutils.h>
 #include <directory.h>
 #endif /* HAVE_IDMAP */
 
 #include <sys/dnode.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/dsl_crypt.h>
 #include <libzfs.h>
 #include <libzutil.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "libzfs_impl.h"
 #include "zfs_deleg.h"
 
 static int userquota_propname_decode(const char *propname, boolean_t zoned,
     zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp);
 
 /*
  * Given a single type (not a mask of types), return the type in a human
  * readable form.
  */
 const char *
 zfs_type_to_name(zfs_type_t type)
 {
 	switch (type) {
 	case ZFS_TYPE_FILESYSTEM:
 		return (dgettext(TEXT_DOMAIN, "filesystem"));
 	case ZFS_TYPE_SNAPSHOT:
 		return (dgettext(TEXT_DOMAIN, "snapshot"));
 	case ZFS_TYPE_VOLUME:
 		return (dgettext(TEXT_DOMAIN, "volume"));
 	case ZFS_TYPE_POOL:
 		return (dgettext(TEXT_DOMAIN, "pool"));
 	case ZFS_TYPE_BOOKMARK:
 		return (dgettext(TEXT_DOMAIN, "bookmark"));
 	default:
 		assert(!"unhandled zfs_type_t");
 	}
 
 	return (NULL);
 }
 
 /*
  * Validate a ZFS path.  This is used even before trying to open the dataset, to
  * provide a more meaningful error message.  We call zfs_error_aux() to
  * explain exactly why the name was not valid.
  */
 int
 zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
     boolean_t modifying)
 {
 	namecheck_err_t why;
 	char what;
 
 	if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "snapshot delimiter '@' is not expected here"));
 		return (0);
 	}
 
 	if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "missing '@' delimiter in snapshot name"));
 		return (0);
 	}
 
 	if (!(type & ZFS_TYPE_BOOKMARK) && strchr(path, '#') != NULL) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "bookmark delimiter '#' is not expected here"));
 		return (0);
 	}
 
 	if (type == ZFS_TYPE_BOOKMARK && strchr(path, '#') == NULL) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "missing '#' delimiter in bookmark name"));
 		return (0);
 	}
 
 	if (modifying && strchr(path, '%') != NULL) {
 		if (hdl != NULL)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid character %c in name"), '%');
 		return (0);
 	}
 
 	if (entity_namecheck(path, &why, &what) != 0) {
 		if (hdl != NULL) {
 			switch (why) {
 			case NAME_ERR_TOOLONG:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "name is too long"));
 				break;
 
 			case NAME_ERR_LEADING_SLASH:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "leading slash in name"));
 				break;
 
 			case NAME_ERR_EMPTY_COMPONENT:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "empty component or misplaced '@'"
 				    " or '#' delimiter in name"));
 				break;
 
 			case NAME_ERR_TRAILING_SLASH:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "trailing slash in name"));
 				break;
 
 			case NAME_ERR_INVALCHAR:
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "invalid character "
 				    "'%c' in name"), what);
 				break;
 
 			case NAME_ERR_MULTIPLE_DELIMITERS:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "multiple '@' and/or '#' delimiters in "
 				    "name"));
 				break;
 
 			case NAME_ERR_NOLETTER:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "pool doesn't begin with a letter"));
 				break;
 
 			case NAME_ERR_RESERVED:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "name is reserved"));
 				break;
 
 			case NAME_ERR_DISKLIKE:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "reserved disk name"));
 				break;
 
 			case NAME_ERR_SELF_REF:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "self reference, '.' is found in name"));
 				break;
 
 			case NAME_ERR_PARENT_REF:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "parent reference, '..' is found in name"));
 				break;
 
 			default:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "(%d) not defined"), why);
 				break;
 			}
 		}
 
 		return (0);
 	}
 
 	return (-1);
 }
 
 int
 zfs_name_valid(const char *name, zfs_type_t type)
 {
 	if (type == ZFS_TYPE_POOL)
 		return (zpool_name_valid(NULL, B_FALSE, name));
 	return (zfs_validate_name(NULL, name, type, B_FALSE));
 }
 
 /*
  * This function takes the raw DSL properties, and filters out the user-defined
  * properties into a separate nvlist.
  */
 static nvlist_t *
 process_user_props(zfs_handle_t *zhp, nvlist_t *props)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvpair_t *elem;
 	nvlist_t *nvl;
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
 		return (NULL);
 	}
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		if (!zfs_prop_user(nvpair_name(elem)))
 			continue;
 
 		nvlist_t *propval = fnvpair_value_nvlist(elem);
 		if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) {
 			nvlist_free(nvl);
 			(void) no_memory(hdl);
 			return (NULL);
 		}
 	}
 
 	return (nvl);
 }
 
 static zpool_handle_t *
 zpool_add_handle(zfs_handle_t *zhp, const char *pool_name)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zpool_handle_t *zph;
 
 	if ((zph = zpool_open_canfail(hdl, pool_name)) != NULL) {
 		if (hdl->libzfs_pool_handles != NULL)
 			zph->zpool_next = hdl->libzfs_pool_handles;
 		hdl->libzfs_pool_handles = zph;
 	}
 	return (zph);
 }
 
 static zpool_handle_t *
 zpool_find_handle(zfs_handle_t *zhp, const char *pool_name, int len)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zpool_handle_t *zph = hdl->libzfs_pool_handles;
 
 	while ((zph != NULL) &&
 	    (strncmp(pool_name, zpool_get_name(zph), len) != 0))
 		zph = zph->zpool_next;
 	return (zph);
 }
 
 /*
  * Returns a handle to the pool that contains the provided dataset.
  * If a handle to that pool already exists then that handle is returned.
  * Otherwise, a new handle is created and added to the list of handles.
  */
 static zpool_handle_t *
 zpool_handle(zfs_handle_t *zhp)
 {
 	char *pool_name;
 	int len;
 	zpool_handle_t *zph;
 
 	len = strcspn(zhp->zfs_name, "/@#") + 1;
 	pool_name = zfs_alloc(zhp->zfs_hdl, len);
 	(void) strlcpy(pool_name, zhp->zfs_name, len);
 
 	zph = zpool_find_handle(zhp, pool_name, len);
 	if (zph == NULL)
 		zph = zpool_add_handle(zhp, pool_name);
 
 	free(pool_name);
 	return (zph);
 }
 
 void
 zpool_free_handles(libzfs_handle_t *hdl)
 {
 	zpool_handle_t *next, *zph = hdl->libzfs_pool_handles;
 
 	while (zph != NULL) {
 		next = zph->zpool_next;
 		zpool_close(zph);
 		zph = next;
 	}
 	hdl->libzfs_pool_handles = NULL;
 }
 
 /*
  * Utility function to gather stats (objset and zpl) for the given object.
  */
 static int
 get_stats_ioctl(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 	(void) strlcpy(zc->zc_name, zhp->zfs_name, sizeof (zc->zc_name));
 
 	while (zfs_ioctl(hdl, ZFS_IOC_OBJSET_STATS, zc) != 0) {
 		if (errno == ENOMEM)
 			zcmd_expand_dst_nvlist(hdl, zc);
 		else
 			return (-1);
 	}
 	return (0);
 }
 
 /*
  * Utility function to get the received properties of the given object.
  */
 static int
 get_recvd_props_ioctl(zfs_handle_t *zhp)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *recvdprops;
 	zfs_cmd_t zc = {"\0"};
 	int err;
 
 	zcmd_alloc_dst_nvlist(hdl, &zc, 0);
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	while (zfs_ioctl(hdl, ZFS_IOC_OBJSET_RECVD_PROPS, &zc) != 0) {
 		if (errno == ENOMEM)
 			zcmd_expand_dst_nvlist(hdl, &zc);
 		else {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 	}
 
 	err = zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &recvdprops);
 	zcmd_free_nvlists(&zc);
 	if (err != 0)
 		return (-1);
 
 	nvlist_free(zhp->zfs_recvd_props);
 	zhp->zfs_recvd_props = recvdprops;
 
 	return (0);
 }
 
 static int
 put_stats_zhdl(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
 	nvlist_t *allprops, *userprops;
 
 	zhp->zfs_dmustats = zc->zc_objset_stats; /* structure assignment */
 
 	if (zcmd_read_dst_nvlist(zhp->zfs_hdl, zc, &allprops) != 0) {
 		return (-1);
 	}
 
 	/*
 	 * XXX Why do we store the user props separately, in addition to
 	 * storing them in zfs_props?
 	 */
 	if ((userprops = process_user_props(zhp, allprops)) == NULL) {
 		nvlist_free(allprops);
 		return (-1);
 	}
 
 	nvlist_free(zhp->zfs_props);
 	nvlist_free(zhp->zfs_user_props);
 
 	zhp->zfs_props = allprops;
 	zhp->zfs_user_props = userprops;
 
 	return (0);
 }
 
 static int
 get_stats(zfs_handle_t *zhp)
 {
 	int rc = 0;
 	zfs_cmd_t zc = {"\0"};
 
 	zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0);
 
 	if (get_stats_ioctl(zhp, &zc) != 0)
 		rc = -1;
 	else if (put_stats_zhdl(zhp, &zc) != 0)
 		rc = -1;
 	zcmd_free_nvlists(&zc);
 	return (rc);
 }
 
 /*
  * Refresh the properties currently stored in the handle.
  */
 void
 zfs_refresh_properties(zfs_handle_t *zhp)
 {
 	(void) get_stats(zhp);
 }
 
 /*
  * Makes a handle from the given dataset name.  Used by zfs_open() and
  * zfs_iter_* to create child handles on the fly.
  */
 static int
 make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc)
 {
 	if (put_stats_zhdl(zhp, zc) != 0)
 		return (-1);
 
 	/*
 	 * We've managed to open the dataset and gather statistics.  Determine
 	 * the high-level type.
 	 */
 	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
 		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
 	} else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS) {
 		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
 	} else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER) {
 		errno = EINVAL;
 		return (-1);
 	} else if (zhp->zfs_dmustats.dds_inconsistent) {
 		errno = EBUSY;
 		return (-1);
 	} else {
 		abort();
 	}
 
 	if (zhp->zfs_dmustats.dds_is_snapshot)
 		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
 		zhp->zfs_type = ZFS_TYPE_VOLUME;
 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
 		zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
 	else
 		abort();	/* we should never see any other types */
 
 	if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL)
 		return (-1);
 
 	return (0);
 }
 
 zfs_handle_t *
 make_dataset_handle(libzfs_handle_t *hdl, const char *path)
 {
 	zfs_cmd_t zc = {"\0"};
 
 	zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t));
 
 	if (zhp == NULL)
 		return (NULL);
 
 	zhp->zfs_hdl = hdl;
 	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
 	zcmd_alloc_dst_nvlist(hdl, &zc, 0);
 
 	if (get_stats_ioctl(zhp, &zc) == -1) {
 		zcmd_free_nvlists(&zc);
 		free(zhp);
 		return (NULL);
 	}
 	if (make_dataset_handle_common(zhp, &zc) == -1) {
 		free(zhp);
 		zhp = NULL;
 	}
 	zcmd_free_nvlists(&zc);
 	return (zhp);
 }
 
 zfs_handle_t *
 make_dataset_handle_zc(libzfs_handle_t *hdl, zfs_cmd_t *zc)
 {
 	zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t));
 
 	if (zhp == NULL)
 		return (NULL);
 
 	zhp->zfs_hdl = hdl;
 	(void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name));
 	if (make_dataset_handle_common(zhp, zc) == -1) {
 		free(zhp);
 		return (NULL);
 	}
 	return (zhp);
 }
 
 zfs_handle_t *
 make_dataset_simple_handle_zc(zfs_handle_t *pzhp, zfs_cmd_t *zc)
 {
 	zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t));
 
 	if (zhp == NULL)
 		return (NULL);
 
 	zhp->zfs_hdl = pzhp->zfs_hdl;
 	(void) strlcpy(zhp->zfs_name, zc->zc_name, sizeof (zhp->zfs_name));
 	zhp->zfs_head_type = pzhp->zfs_type;
 	zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
 	zhp->zpool_hdl = zpool_handle(zhp);
 	zhp->zfs_dmustats = zc->zc_objset_stats;
 
 	return (zhp);
 }
 
 zfs_handle_t *
 zfs_handle_dup(zfs_handle_t *zhp_orig)
 {
 	zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t));
 
 	if (zhp == NULL)
 		return (NULL);
 
 	zhp->zfs_hdl = zhp_orig->zfs_hdl;
 	zhp->zpool_hdl = zhp_orig->zpool_hdl;
 	(void) strlcpy(zhp->zfs_name, zhp_orig->zfs_name,
 	    sizeof (zhp->zfs_name));
 	zhp->zfs_type = zhp_orig->zfs_type;
 	zhp->zfs_head_type = zhp_orig->zfs_head_type;
 	zhp->zfs_dmustats = zhp_orig->zfs_dmustats;
 	if (zhp_orig->zfs_props != NULL) {
 		if (nvlist_dup(zhp_orig->zfs_props, &zhp->zfs_props, 0) != 0) {
 			(void) no_memory(zhp->zfs_hdl);
 			zfs_close(zhp);
 			return (NULL);
 		}
 	}
 	if (zhp_orig->zfs_user_props != NULL) {
 		if (nvlist_dup(zhp_orig->zfs_user_props,
 		    &zhp->zfs_user_props, 0) != 0) {
 			(void) no_memory(zhp->zfs_hdl);
 			zfs_close(zhp);
 			return (NULL);
 		}
 	}
 	if (zhp_orig->zfs_recvd_props != NULL) {
 		if (nvlist_dup(zhp_orig->zfs_recvd_props,
 		    &zhp->zfs_recvd_props, 0)) {
 			(void) no_memory(zhp->zfs_hdl);
 			zfs_close(zhp);
 			return (NULL);
 		}
 	}
 	zhp->zfs_mntcheck = zhp_orig->zfs_mntcheck;
 	if (zhp_orig->zfs_mntopts != NULL) {
 		zhp->zfs_mntopts = zfs_strdup(zhp_orig->zfs_hdl,
 		    zhp_orig->zfs_mntopts);
 	}
 	zhp->zfs_props_table = zhp_orig->zfs_props_table;
 	return (zhp);
 }
 
 boolean_t
 zfs_bookmark_exists(const char *path)
 {
 	nvlist_t *bmarks;
 	nvlist_t *props;
 	char fsname[ZFS_MAX_DATASET_NAME_LEN];
 	char *bmark_name;
 	char *pound;
 	int err;
 	boolean_t rv;
 
 	(void) strlcpy(fsname, path, sizeof (fsname));
 	pound = strchr(fsname, '#');
 	if (pound == NULL)
 		return (B_FALSE);
 
 	*pound = '\0';
 	bmark_name = pound + 1;
 	props = fnvlist_alloc();
 	err = lzc_get_bookmarks(fsname, props, &bmarks);
 	nvlist_free(props);
 	if (err != 0) {
 		nvlist_free(bmarks);
 		return (B_FALSE);
 	}
 
 	rv = nvlist_exists(bmarks, bmark_name);
 	nvlist_free(bmarks);
 	return (rv);
 }
 
 zfs_handle_t *
 make_bookmark_handle(zfs_handle_t *parent, const char *path,
     nvlist_t *bmark_props)
 {
 	zfs_handle_t *zhp = calloc(1, sizeof (zfs_handle_t));
 
 	if (zhp == NULL)
 		return (NULL);
 
 	/* Fill in the name. */
 	zhp->zfs_hdl = parent->zfs_hdl;
 	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
 
 	/* Set the property lists. */
 	if (nvlist_dup(bmark_props, &zhp->zfs_props, 0) != 0) {
 		free(zhp);
 		return (NULL);
 	}
 
 	/* Set the types. */
 	zhp->zfs_head_type = parent->zfs_head_type;
 	zhp->zfs_type = ZFS_TYPE_BOOKMARK;
 
 	if ((zhp->zpool_hdl = zpool_handle(zhp)) == NULL) {
 		nvlist_free(zhp->zfs_props);
 		free(zhp);
 		return (NULL);
 	}
 
 	return (zhp);
 }
 
 struct zfs_open_bookmarks_cb_data {
 	const char *path;
 	zfs_handle_t *zhp;
 };
 
 static int
 zfs_open_bookmarks_cb(zfs_handle_t *zhp, void *data)
 {
 	struct zfs_open_bookmarks_cb_data *dp = data;
 
 	/*
 	 * Is it the one we are looking for?
 	 */
 	if (strcmp(dp->path, zfs_get_name(zhp)) == 0) {
 		/*
 		 * We found it.  Save it and let the caller know we are done.
 		 */
 		dp->zhp = zhp;
 		return (EEXIST);
 	}
 
 	/*
 	 * Not found.  Close the handle and ask for another one.
 	 */
 	zfs_close(zhp);
 	return (0);
 }
 
 /*
  * Opens the given snapshot, bookmark, filesystem, or volume.   The 'types'
  * argument is a mask of acceptable types.  The function will print an
  * appropriate error message and return NULL if it can't be opened.
  */
 zfs_handle_t *
 zfs_open(libzfs_handle_t *hdl, const char *path, int types)
 {
 	zfs_handle_t *zhp;
 	char errbuf[ERRBUFLEN];
 	char *bookp;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
 
 	/*
 	 * Validate the name before we even try to open it.
 	 */
 	if (!zfs_validate_name(hdl, path, types, B_FALSE)) {
 		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
 		return (NULL);
 	}
 
 	/*
 	 * Bookmarks needs to be handled separately.
 	 */
 	bookp = strchr(path, '#');
 	if (bookp == NULL) {
 		/*
 		 * Try to get stats for the dataset, which will tell us if it
 		 * exists.
 		 */
 		errno = 0;
 		if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
 			(void) zfs_standard_error(hdl, errno, errbuf);
 			return (NULL);
 		}
 	} else {
 		char dsname[ZFS_MAX_DATASET_NAME_LEN];
 		zfs_handle_t *pzhp;
 		struct zfs_open_bookmarks_cb_data cb_data = {path, NULL};
 
 		/*
 		 * We need to cut out '#' and everything after '#'
 		 * to get the parent dataset name only.
 		 */
 		assert(bookp - path < sizeof (dsname));
-		(void) strncpy(dsname, path, bookp - path);
-		dsname[bookp - path] = '\0';
+		(void) strlcpy(dsname, path,
+		    MIN(sizeof (dsname), bookp - path + 1));
 
 		/*
 		 * Create handle for the parent dataset.
 		 */
 		errno = 0;
 		if ((pzhp = make_dataset_handle(hdl, dsname)) == NULL) {
 			(void) zfs_standard_error(hdl, errno, errbuf);
 			return (NULL);
 		}
 
 		/*
 		 * Iterate bookmarks to find the right one.
 		 */
 		errno = 0;
 		if ((zfs_iter_bookmarks(pzhp, zfs_open_bookmarks_cb,
 		    &cb_data) == 0) && (cb_data.zhp == NULL)) {
 			(void) zfs_error(hdl, EZFS_NOENT, errbuf);
 			zfs_close(pzhp);
 			return (NULL);
 		}
 		if (cb_data.zhp == NULL) {
 			(void) zfs_standard_error(hdl, errno, errbuf);
 			zfs_close(pzhp);
 			return (NULL);
 		}
 		zhp = cb_data.zhp;
 
 		/*
 		 * Cleanup.
 		 */
 		zfs_close(pzhp);
 	}
 
 	if (!(types & zhp->zfs_type)) {
 		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 		zfs_close(zhp);
 		return (NULL);
 	}
 
 	return (zhp);
 }
 
 /*
  * Release a ZFS handle.  Nothing to do but free the associated memory.
  */
 void
 zfs_close(zfs_handle_t *zhp)
 {
 	if (zhp->zfs_mntopts)
 		free(zhp->zfs_mntopts);
 	nvlist_free(zhp->zfs_props);
 	nvlist_free(zhp->zfs_user_props);
 	nvlist_free(zhp->zfs_recvd_props);
 	free(zhp);
 }
 
 typedef struct mnttab_node {
 	struct mnttab mtn_mt;
 	avl_node_t mtn_node;
 } mnttab_node_t;
 
 static int
 libzfs_mnttab_cache_compare(const void *arg1, const void *arg2)
 {
 	const mnttab_node_t *mtn1 = (const mnttab_node_t *)arg1;
 	const mnttab_node_t *mtn2 = (const mnttab_node_t *)arg2;
 	int rv;
 
 	rv = strcmp(mtn1->mtn_mt.mnt_special, mtn2->mtn_mt.mnt_special);
 
 	return (TREE_ISIGN(rv));
 }
 
 void
 libzfs_mnttab_init(libzfs_handle_t *hdl)
 {
 	pthread_mutex_init(&hdl->libzfs_mnttab_cache_lock, NULL);
 	assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0);
 	avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare,
 	    sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
 }
 
 static int
 libzfs_mnttab_update(libzfs_handle_t *hdl)
 {
 	FILE *mnttab;
 	struct mnttab entry;
 
 	if ((mnttab = fopen(MNTTAB, "re")) == NULL)
 		return (ENOENT);
 
 	while (getmntent(mnttab, &entry) == 0) {
 		mnttab_node_t *mtn;
 		avl_index_t where;
 
 		if (strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
 			continue;
 
 		mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
 		mtn->mtn_mt.mnt_special = zfs_strdup(hdl, entry.mnt_special);
 		mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, entry.mnt_mountp);
 		mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, entry.mnt_fstype);
 		mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, entry.mnt_mntopts);
 
 		/* Exclude duplicate mounts */
 		if (avl_find(&hdl->libzfs_mnttab_cache, mtn, &where) != NULL) {
 			free(mtn->mtn_mt.mnt_special);
 			free(mtn->mtn_mt.mnt_mountp);
 			free(mtn->mtn_mt.mnt_fstype);
 			free(mtn->mtn_mt.mnt_mntopts);
 			free(mtn);
 			continue;
 		}
 
 		avl_add(&hdl->libzfs_mnttab_cache, mtn);
 	}
 
 	(void) fclose(mnttab);
 	return (0);
 }
 
 void
 libzfs_mnttab_fini(libzfs_handle_t *hdl)
 {
 	void *cookie = NULL;
 	mnttab_node_t *mtn;
 
 	while ((mtn = avl_destroy_nodes(&hdl->libzfs_mnttab_cache, &cookie))
 	    != NULL) {
 		free(mtn->mtn_mt.mnt_special);
 		free(mtn->mtn_mt.mnt_mountp);
 		free(mtn->mtn_mt.mnt_fstype);
 		free(mtn->mtn_mt.mnt_mntopts);
 		free(mtn);
 	}
 	avl_destroy(&hdl->libzfs_mnttab_cache);
 	(void) pthread_mutex_destroy(&hdl->libzfs_mnttab_cache_lock);
 }
 
 void
 libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable)
 {
 	hdl->libzfs_mnttab_enable = enable;
 }
 
 int
 libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
     struct mnttab *entry)
 {
 	FILE *mnttab;
 	mnttab_node_t find;
 	mnttab_node_t *mtn;
 	int ret = ENOENT;
 
 	if (!hdl->libzfs_mnttab_enable) {
 		struct mnttab srch = { 0 };
 
 		if (avl_numnodes(&hdl->libzfs_mnttab_cache))
 			libzfs_mnttab_fini(hdl);
 
 		if ((mnttab = fopen(MNTTAB, "re")) == NULL)
 			return (ENOENT);
 
 		srch.mnt_special = (char *)fsname;
 		srch.mnt_fstype = (char *)MNTTYPE_ZFS;
 		ret = getmntany(mnttab, entry, &srch) ? ENOENT : 0;
 		(void) fclose(mnttab);
 		return (ret);
 	}
 
 	pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
 	if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) {
 		int error;
 
 		if ((error = libzfs_mnttab_update(hdl)) != 0) {
 			pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
 			return (error);
 		}
 	}
 
 	find.mtn_mt.mnt_special = (char *)fsname;
 	mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
 	if (mtn) {
 		*entry = mtn->mtn_mt;
 		ret = 0;
 	}
 	pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
 	return (ret);
 }
 
 void
 libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special,
     const char *mountp, const char *mntopts)
 {
 	mnttab_node_t *mtn;
 
 	pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
 	if (avl_numnodes(&hdl->libzfs_mnttab_cache) != 0) {
 		mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
 		mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
 		mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
 		mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
 		mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
 		/*
 		 * Another thread may have already added this entry
 		 * via libzfs_mnttab_update. If so we should skip it.
 		 */
 		if (avl_find(&hdl->libzfs_mnttab_cache, mtn, NULL) != NULL) {
 			free(mtn->mtn_mt.mnt_special);
 			free(mtn->mtn_mt.mnt_mountp);
 			free(mtn->mtn_mt.mnt_fstype);
 			free(mtn->mtn_mt.mnt_mntopts);
 			free(mtn);
 		} else {
 			avl_add(&hdl->libzfs_mnttab_cache, mtn);
 		}
 	}
 	pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
 }
 
 void
 libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
 {
 	mnttab_node_t find;
 	mnttab_node_t *ret;
 
 	pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
 	find.mtn_mt.mnt_special = (char *)fsname;
 	if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL))
 	    != NULL) {
 		avl_remove(&hdl->libzfs_mnttab_cache, ret);
 		free(ret->mtn_mt.mnt_special);
 		free(ret->mtn_mt.mnt_mountp);
 		free(ret->mtn_mt.mnt_fstype);
 		free(ret->mtn_mt.mnt_mntopts);
 		free(ret);
 	}
 	pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
 }
 
 int
 zfs_spa_version(zfs_handle_t *zhp, int *spa_version)
 {
 	zpool_handle_t *zpool_handle = zhp->zpool_hdl;
 
 	if (zpool_handle == NULL)
 		return (-1);
 
 	*spa_version = zpool_get_prop_int(zpool_handle,
 	    ZPOOL_PROP_VERSION, NULL);
 	return (0);
 }
 
 /*
  * The choice of reservation property depends on the SPA version.
  */
 static int
 zfs_which_resv_prop(zfs_handle_t *zhp, zfs_prop_t *resv_prop)
 {
 	int spa_version;
 
 	if (zfs_spa_version(zhp, &spa_version) < 0)
 		return (-1);
 
 	if (spa_version >= SPA_VERSION_REFRESERVATION)
 		*resv_prop = ZFS_PROP_REFRESERVATION;
 	else
 		*resv_prop = ZFS_PROP_RESERVATION;
 
 	return (0);
 }
 
 /*
  * Given an nvlist of properties to set, validates that they are correct, and
  * parses any numeric properties (index, boolean, etc) if they are specified as
  * strings.
  */
 nvlist_t *
 zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
     uint64_t zoned, zfs_handle_t *zhp, zpool_handle_t *zpool_hdl,
     boolean_t key_params_ok, const char *errbuf)
 {
 	nvpair_t *elem;
 	uint64_t intval;
 	char *strval;
 	zfs_prop_t prop;
 	nvlist_t *ret;
 	int chosen_normal = -1;
 	int chosen_utf = -1;
 
 	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
 		(void) no_memory(hdl);
 		return (NULL);
 	}
 
 	/*
 	 * Make sure this property is valid and applies to this type.
 	 */
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
 		const char *propname = nvpair_name(elem);
 
 		prop = zfs_name_to_prop(propname);
 		if (prop == ZPROP_USERPROP && zfs_prop_user(propname)) {
 			/*
 			 * This is a user property: make sure it's a
 			 * string, and that it's less than ZAP_MAXNAMELEN.
 			 */
 			if (nvpair_type(elem) != DATA_TYPE_STRING) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' must be a string"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "property name '%s' is too long"),
 				    propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			(void) nvpair_value_string(elem, &strval);
 			if (nvlist_add_string(ret, propname, strval) != 0) {
 				(void) no_memory(hdl);
 				goto error;
 			}
 			continue;
 		}
 
 		/*
 		 * Currently, only user properties can be modified on
 		 * snapshots.
 		 */
 		if (type == ZFS_TYPE_SNAPSHOT) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "this property can not be modified for snapshots"));
 			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
 			goto error;
 		}
 
 		if (prop == ZPROP_USERPROP && zfs_prop_userquota(propname)) {
 			zfs_userquota_prop_t uqtype;
 			char *newpropname = NULL;
 			char domain[128];
 			uint64_t rid;
 			uint64_t valary[3];
 			int rc;
 
 			if (userquota_propname_decode(propname, zoned,
 			    &uqtype, domain, sizeof (domain), &rid) != 0) {
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN,
 				    "'%s' has an invalid user/group name"),
 				    propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (uqtype != ZFS_PROP_USERQUOTA &&
 			    uqtype != ZFS_PROP_GROUPQUOTA &&
 			    uqtype != ZFS_PROP_USEROBJQUOTA &&
 			    uqtype != ZFS_PROP_GROUPOBJQUOTA &&
 			    uqtype != ZFS_PROP_PROJECTQUOTA &&
 			    uqtype != ZFS_PROP_PROJECTOBJQUOTA) {
 				zfs_error_aux(hdl,
 				    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
 				    propname);
 				(void) zfs_error(hdl, EZFS_PROPREADONLY,
 				    errbuf);
 				goto error;
 			}
 
 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
 				(void) nvpair_value_string(elem, &strval);
 				if (strcmp(strval, "none") == 0) {
 					intval = 0;
 				} else if (zfs_nicestrtonum(hdl,
 				    strval, &intval) != 0) {
 					(void) zfs_error(hdl,
 					    EZFS_BADPROP, errbuf);
 					goto error;
 				}
 			} else if (nvpair_type(elem) ==
 			    DATA_TYPE_UINT64) {
 				(void) nvpair_value_uint64(elem, &intval);
 				if (intval == 0) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "use 'none' to disable "
 					    "{user|group|project}quota"));
 					goto error;
 				}
 			} else {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' must be a number"), propname);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			/*
 			 * Encode the prop name as
 			 * userquota@<hex-rid>-domain, to make it easy
 			 * for the kernel to decode.
 			 */
 			rc = asprintf(&newpropname, "%s%llx-%s",
 			    zfs_userquota_prop_prefixes[uqtype],
 			    (longlong_t)rid, domain);
 			if (rc == -1 || newpropname == NULL) {
 				(void) no_memory(hdl);
 				goto error;
 			}
 
 			valary[0] = uqtype;
 			valary[1] = rid;
 			valary[2] = intval;
 			if (nvlist_add_uint64_array(ret, newpropname,
 			    valary, 3) != 0) {
 				free(newpropname);
 				(void) no_memory(hdl);
 				goto error;
 			}
 			free(newpropname);
 			continue;
 		} else if (prop == ZPROP_USERPROP &&
 		    zfs_prop_written(propname)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' is readonly"),
 			    propname);
 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
 			goto error;
 		}
 
 		if (prop == ZPROP_INVAL) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid property '%s'"), propname);
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			goto error;
 		}
 
 		if (!zfs_prop_valid_for_type(prop, type, B_FALSE)) {
 			zfs_error_aux(hdl,
 			    dgettext(TEXT_DOMAIN, "'%s' does not "
 			    "apply to datasets of this type"), propname);
 			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
 			goto error;
 		}
 
 		if (zfs_prop_readonly(prop) &&
 		    !(zfs_prop_setonce(prop) && zhp == NULL) &&
 		    !(zfs_prop_encryption_key_param(prop) && key_params_ok)) {
 			zfs_error_aux(hdl,
 			    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
 			    propname);
 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
 			goto error;
 		}
 
 		if (zprop_parse_value(hdl, elem, prop, type, ret,
 		    &strval, &intval, errbuf) != 0)
 			goto error;
 
 		/*
 		 * Perform some additional checks for specific properties.
 		 */
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 		{
 			int version;
 
 			if (zhp == NULL)
 				break;
 			version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
 			if (intval < version) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "Can not downgrade; already at version %u"),
 				    version);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		}
 
 		case ZFS_PROP_VOLBLOCKSIZE:
 		case ZFS_PROP_RECORDSIZE:
 		{
 			int maxbs = SPA_MAXBLOCKSIZE;
 			char buf[64];
 
 			if (zpool_hdl != NULL) {
 				maxbs = zpool_get_prop_int(zpool_hdl,
 				    ZPOOL_PROP_MAXBLOCKSIZE, NULL);
 			}
 			/*
 			 * The value must be a power of two between
 			 * SPA_MINBLOCKSIZE and maxbs.
 			 */
 			if (intval < SPA_MINBLOCKSIZE ||
 			    intval > maxbs || !ISP2(intval)) {
 				zfs_nicebytes(maxbs, buf, sizeof (buf));
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' must be power of 2 from 512B "
 				    "to %s"), propname, buf);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		}
 
 		case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
 		{
 			int maxbs = SPA_OLD_MAXBLOCKSIZE;
 			char buf[64];
 
 			if (zpool_hdl != NULL) {
 				char state[64] = "";
 
 				maxbs = zpool_get_prop_int(zpool_hdl,
 				    ZPOOL_PROP_MAXBLOCKSIZE, NULL);
 
 				/*
 				 * Issue a warning but do not fail so that
 				 * tests for settable properties succeed.
 				 */
 				if (zpool_prop_get_feature(zpool_hdl,
 				    "feature@allocation_classes", state,
 				    sizeof (state)) != 0 ||
 				    strcmp(state, ZFS_FEATURE_ACTIVE) != 0) {
 					(void) fprintf(stderr, gettext(
 					    "%s: property requires a special "
 					    "device in the pool\n"), propname);
 				}
 			}
 			if (intval != 0 &&
 			    (intval < SPA_MINBLOCKSIZE ||
 			    intval > maxbs || !ISP2(intval))) {
 				zfs_nicebytes(maxbs, buf, sizeof (buf));
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "invalid '%s=%llu' property: must be zero "
 				    "or a power of 2 from 512B to %s"),
 				    propname, (unsigned long long)intval, buf);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 		}
 
 		case ZFS_PROP_MLSLABEL:
 		{
 #ifdef HAVE_MLSLABEL
 			/*
 			 * Verify the mlslabel string and convert to
 			 * internal hex label string.
 			 */
 
 			m_label_t *new_sl;
 			char *hex = NULL;	/* internal label string */
 
 			/* Default value is already OK. */
 			if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 				break;
 
 			/* Verify the label can be converted to binary form */
 			if (((new_sl = m_label_alloc(MAC_LABEL)) == NULL) ||
 			    (str_to_label(strval, &new_sl, MAC_LABEL,
 			    L_NO_CORRECTION, NULL) == -1)) {
 				goto badlabel;
 			}
 
 			/* Now translate to hex internal label string */
 			if (label_to_str(new_sl, &hex, M_INTERNAL,
 			    DEF_NAMES) != 0) {
 				if (hex)
 					free(hex);
 				goto badlabel;
 			}
 			m_label_free(new_sl);
 
 			/* If string is already in internal form, we're done. */
 			if (strcmp(strval, hex) == 0) {
 				free(hex);
 				break;
 			}
 
 			/* Replace the label string with the internal form. */
 			(void) nvlist_remove(ret, zfs_prop_to_name(prop),
 			    DATA_TYPE_STRING);
 			fnvlist_add_string(ret, zfs_prop_to_name(prop), hex);
 			free(hex);
 
 			break;
 
 badlabel:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid mlslabel '%s'"), strval);
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			m_label_free(new_sl);	/* OK if null */
 			goto error;
 #else
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "mlslabels are unsupported"));
 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			goto error;
 #endif /* HAVE_MLSLABEL */
 		}
 
 		case ZFS_PROP_MOUNTPOINT:
 		{
 			namecheck_err_t why;
 
 			if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 ||
 			    strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0)
 				break;
 
 			if (mountpoint_namecheck(strval, &why)) {
 				switch (why) {
 				case NAME_ERR_LEADING_SLASH:
 					zfs_error_aux(hdl,
 					    dgettext(TEXT_DOMAIN,
 					    "'%s' must be an absolute path, "
 					    "'none', or 'legacy'"), propname);
 					break;
 				case NAME_ERR_TOOLONG:
 					zfs_error_aux(hdl,
 					    dgettext(TEXT_DOMAIN,
 					    "component of '%s' is too long"),
 					    propname);
 					break;
 
 				default:
 					zfs_error_aux(hdl,
 					    dgettext(TEXT_DOMAIN,
 					    "(%d) not defined"),
 					    why);
 					break;
 				}
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			zfs_fallthrough;
 		}
 
 		case ZFS_PROP_SHARESMB:
 		case ZFS_PROP_SHARENFS:
 			/*
 			 * For the mountpoint and sharenfs or sharesmb
 			 * properties, check if it can be set in a
 			 * global/non-global zone based on
 			 * the zoned property value:
 			 *
 			 *		global zone	    non-global zone
 			 * --------------------------------------------------
 			 * zoned=on	mountpoint (no)	    mountpoint (yes)
 			 *		sharenfs (no)	    sharenfs (no)
 			 *		sharesmb (no)	    sharesmb (no)
 			 *
 			 * zoned=off	mountpoint (yes)	N/A
 			 *		sharenfs (yes)
 			 *		sharesmb (yes)
 			 */
 			if (zoned) {
 				if (getzoneid() == GLOBAL_ZONEID) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be set on "
 					    "dataset in a non-global zone"),
 					    propname);
 					(void) zfs_error(hdl, EZFS_ZONED,
 					    errbuf);
 					goto error;
 				} else if (prop == ZFS_PROP_SHARENFS ||
 				    prop == ZFS_PROP_SHARESMB) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be set in "
 					    "a non-global zone"), propname);
 					(void) zfs_error(hdl, EZFS_ZONED,
 					    errbuf);
 					goto error;
 				}
 			} else if (getzoneid() != GLOBAL_ZONEID) {
 				/*
 				 * If zoned property is 'off', this must be in
 				 * a global zone. If not, something is wrong.
 				 */
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "'%s' cannot be set while dataset "
 				    "'zoned' property is set"), propname);
 				(void) zfs_error(hdl, EZFS_ZONED, errbuf);
 				goto error;
 			}
 
 			/*
 			 * At this point, it is legitimate to set the
 			 * property. Now we want to make sure that the
 			 * property value is valid if it is sharenfs.
 			 */
 			if ((prop == ZFS_PROP_SHARENFS ||
 			    prop == ZFS_PROP_SHARESMB) &&
 			    strcmp(strval, "on") != 0 &&
 			    strcmp(strval, "off") != 0) {
 				enum sa_protocol proto;
 
 				if (prop == ZFS_PROP_SHARESMB)
 					proto = SA_PROTOCOL_SMB;
 				else
 					proto = SA_PROTOCOL_NFS;
 
 				if (sa_validate_shareopts(strval, proto) !=
 				    SA_OK) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be set to invalid "
 					    "options"), propname);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 			}
 
 			break;
 
 		case ZFS_PROP_KEYLOCATION:
 			if (!zfs_prop_valid_keylocation(strval, B_FALSE)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "invalid keylocation"));
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 
 			if (zhp != NULL) {
 				uint64_t crypt =
 				    zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
 
 				if (crypt == ZIO_CRYPT_OFF &&
 				    strcmp(strval, "none") != 0) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "keylocation must be 'none' "
 					    "for unencrypted datasets"));
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				} else if (crypt != ZIO_CRYPT_OFF &&
 				    strcmp(strval, "none") == 0) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "keylocation must not be 'none' "
 					    "for encrypted datasets"));
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 			}
 			break;
 
 		case ZFS_PROP_PBKDF2_ITERS:
 			if (intval < MIN_PBKDF2_ITERATIONS) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "minimum pbkdf2 iterations is %u"),
 				    MIN_PBKDF2_ITERATIONS);
 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 				goto error;
 			}
 			break;
 
 		case ZFS_PROP_UTF8ONLY:
 			chosen_utf = (int)intval;
 			break;
 
 		case ZFS_PROP_NORMALIZE:
 			chosen_normal = (int)intval;
 			break;
 
 		default:
 			break;
 		}
 
 		/*
 		 * For changes to existing volumes, we have some additional
 		 * checks to enforce.
 		 */
 		if (type == ZFS_TYPE_VOLUME && zhp != NULL) {
 			uint64_t blocksize = zfs_prop_get_int(zhp,
 			    ZFS_PROP_VOLBLOCKSIZE);
 			char buf[64];
 
 			switch (prop) {
 			case ZFS_PROP_VOLSIZE:
 				if (intval % blocksize != 0) {
 					zfs_nicebytes(blocksize, buf,
 					    sizeof (buf));
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' must be a multiple of "
 					    "volume block size (%s)"),
 					    propname, buf);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 
 				if (intval == 0) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "'%s' cannot be zero"),
 					    propname);
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 				break;
 
 			default:
 				break;
 			}
 		}
 
 		/* check encryption properties */
 		if (zhp != NULL) {
 			int64_t crypt = zfs_prop_get_int(zhp,
 			    ZFS_PROP_ENCRYPTION);
 
 			switch (prop) {
 			case ZFS_PROP_COPIES:
 				if (crypt != ZIO_CRYPT_OFF && intval > 2) {
 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 					    "encrypted datasets cannot have "
 					    "3 copies"));
 					(void) zfs_error(hdl, EZFS_BADPROP,
 					    errbuf);
 					goto error;
 				}
 				break;
 			default:
 				break;
 			}
 		}
 	}
 
 	/*
 	 * If normalization was chosen, but no UTF8 choice was made,
 	 * enforce rejection of non-UTF8 names.
 	 *
 	 * If normalization was chosen, but rejecting non-UTF8 names
 	 * was explicitly not chosen, it is an error.
 	 *
 	 * If utf8only was turned off, but the parent has normalization,
 	 * turn off normalization.
 	 */
 	if (chosen_normal > 0 && chosen_utf < 0) {
 		if (nvlist_add_uint64(ret,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) {
 			(void) no_memory(hdl);
 			goto error;
 		}
 	} else if (chosen_normal > 0 && chosen_utf == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "'%s' must be set 'on' if normalization chosen"),
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
 		(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 		goto error;
 	} else if (chosen_normal < 0 && chosen_utf == 0) {
 		if (nvlist_add_uint64(ret,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), 0) != 0) {
 			(void) no_memory(hdl);
 			goto error;
 		}
 	}
 	return (ret);
 
 error:
 	nvlist_free(ret);
 	return (NULL);
 }
 
 static int
 zfs_add_synthetic_resv(zfs_handle_t *zhp, nvlist_t *nvl)
 {
 	uint64_t old_volsize;
 	uint64_t new_volsize;
 	uint64_t old_reservation;
 	uint64_t new_reservation;
 	zfs_prop_t resv_prop;
 	nvlist_t *props;
 	zpool_handle_t *zph = zpool_handle(zhp);
 
 	/*
 	 * If this is an existing volume, and someone is setting the volsize,
 	 * make sure that it matches the reservation, or add it if necessary.
 	 */
 	old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 	if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
 		return (-1);
 	old_reservation = zfs_prop_get_int(zhp, resv_prop);
 
 	props = fnvlist_alloc();
 	fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 	    zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE));
 
 	if ((zvol_volsize_to_reservation(zph, old_volsize, props) !=
 	    old_reservation) || nvlist_exists(nvl,
 	    zfs_prop_to_name(resv_prop))) {
 		fnvlist_free(props);
 		return (0);
 	}
 	if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
 	    &new_volsize) != 0) {
 		fnvlist_free(props);
 		return (-1);
 	}
 	new_reservation = zvol_volsize_to_reservation(zph, new_volsize, props);
 	fnvlist_free(props);
 
 	if (nvlist_add_uint64(nvl, zfs_prop_to_name(resv_prop),
 	    new_reservation) != 0) {
 		(void) no_memory(zhp->zfs_hdl);
 		return (-1);
 	}
 	return (1);
 }
 
 /*
  * Helper for 'zfs {set|clone} refreservation=auto'.  Must be called after
  * zfs_valid_proplist(), as it is what sets the UINT64_MAX sentinel value.
  * Return codes must match zfs_add_synthetic_resv().
  */
 static int
 zfs_fix_auto_resv(zfs_handle_t *zhp, nvlist_t *nvl)
 {
 	uint64_t volsize;
 	uint64_t resvsize;
 	zfs_prop_t prop;
 	nvlist_t *props;
 
 	if (!ZFS_IS_VOLUME(zhp)) {
 		return (0);
 	}
 
 	if (zfs_which_resv_prop(zhp, &prop) != 0) {
 		return (-1);
 	}
 
 	if (prop != ZFS_PROP_REFRESERVATION) {
 		return (0);
 	}
 
 	if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(prop), &resvsize) != 0) {
 		/* No value being set, so it can't be "auto" */
 		return (0);
 	}
 	if (resvsize != UINT64_MAX) {
 		/* Being set to a value other than "auto" */
 		return (0);
 	}
 
 	props = fnvlist_alloc();
 
 	fnvlist_add_uint64(props, zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 	    zfs_prop_get_int(zhp, ZFS_PROP_VOLBLOCKSIZE));
 
 	if (nvlist_lookup_uint64(nvl, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
 	    &volsize) != 0) {
 		volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 	}
 
 	resvsize = zvol_volsize_to_reservation(zpool_handle(zhp), volsize,
 	    props);
 	fnvlist_free(props);
 
 	(void) nvlist_remove_all(nvl, zfs_prop_to_name(prop));
 	if (nvlist_add_uint64(nvl, zfs_prop_to_name(prop), resvsize) != 0) {
 		(void) no_memory(zhp->zfs_hdl);
 		return (-1);
 	}
 	return (1);
 }
 
 static boolean_t
 zfs_is_namespace_prop(zfs_prop_t prop)
 {
 	switch (prop) {
 
 	case ZFS_PROP_ATIME:
 	case ZFS_PROP_RELATIME:
 	case ZFS_PROP_DEVICES:
 	case ZFS_PROP_EXEC:
 	case ZFS_PROP_SETUID:
 	case ZFS_PROP_READONLY:
 	case ZFS_PROP_XATTR:
 	case ZFS_PROP_NBMAND:
 		return (B_TRUE);
 
 	default:
 		return (B_FALSE);
 	}
 }
 
 /*
  * Given a property name and value, set the property for the given dataset.
  */
 int
 zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
 {
 	int ret = -1;
 	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *nvl = NULL;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
 	    zhp->zfs_name);
 
 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
 	    nvlist_add_string(nvl, propname, propval) != 0) {
 		(void) no_memory(hdl);
 		goto error;
 	}
 
 	ret = zfs_prop_set_list(zhp, nvl);
 
 error:
 	nvlist_free(nvl);
 	return (ret);
 }
 
 
 
 /*
  * Given an nvlist of property names and values, set the properties for the
  * given dataset.
  */
 int
 zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
 {
 	zfs_cmd_t zc = {"\0"};
 	int ret = -1;
 	prop_changelist_t **cls = NULL;
 	int cl_idx;
 	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *nvl;
 	int nvl_len = 0;
 	int added_resv = 0;
 	zfs_prop_t prop = 0;
 	nvpair_t *elem;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
 	    zhp->zfs_name);
 
 	if ((nvl = zfs_valid_proplist(hdl, zhp->zfs_type, props,
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, zhp->zpool_hdl,
 	    B_FALSE, errbuf)) == NULL)
 		goto error;
 
 	/*
 	 * We have to check for any extra properties which need to be added
 	 * before computing the length of the nvlist.
 	 */
 	for (elem = nvlist_next_nvpair(nvl, NULL);
 	    elem != NULL;
 	    elem = nvlist_next_nvpair(nvl, elem)) {
 		if (zfs_name_to_prop(nvpair_name(elem)) == ZFS_PROP_VOLSIZE &&
 		    (added_resv = zfs_add_synthetic_resv(zhp, nvl)) == -1) {
 			goto error;
 		}
 	}
 
 	if (added_resv != 1 &&
 	    (added_resv = zfs_fix_auto_resv(zhp, nvl)) == -1) {
 		goto error;
 	}
 
 	/*
 	 * Check how many properties we're setting and allocate an array to
 	 * store changelist pointers for postfix().
 	 */
 	for (elem = nvlist_next_nvpair(nvl, NULL);
 	    elem != NULL;
 	    elem = nvlist_next_nvpair(nvl, elem))
 		nvl_len++;
 	if ((cls = calloc(nvl_len, sizeof (prop_changelist_t *))) == NULL)
 		goto error;
 
 	cl_idx = 0;
 	for (elem = nvlist_next_nvpair(nvl, NULL);
 	    elem != NULL;
 	    elem = nvlist_next_nvpair(nvl, elem)) {
 
 		prop = zfs_name_to_prop(nvpair_name(elem));
 
 		assert(cl_idx < nvl_len);
 		/*
 		 * We don't want to unmount & remount the dataset when changing
 		 * its canmount property to 'on' or 'noauto'.  We only use
 		 * the changelist logic to unmount when setting canmount=off.
 		 */
 		if (prop != ZFS_PROP_CANMOUNT ||
 		    (fnvpair_value_uint64(elem) == ZFS_CANMOUNT_OFF &&
 		    zfs_is_mounted(zhp, NULL))) {
 			cls[cl_idx] = changelist_gather(zhp, prop, 0, 0);
 			if (cls[cl_idx] == NULL)
 				goto error;
 		}
 
 		if (prop == ZFS_PROP_MOUNTPOINT &&
 		    changelist_haszonedchild(cls[cl_idx])) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "child dataset with inherited mountpoint is used "
 			    "in a non-global zone"));
 			ret = zfs_error(hdl, EZFS_ZONED, errbuf);
 			goto error;
 		}
 
 		if (cls[cl_idx] != NULL &&
 		    (ret = changelist_prefix(cls[cl_idx])) != 0)
 			goto error;
 
 		cl_idx++;
 	}
 	assert(cl_idx == nvl_len);
 
 	/*
 	 * Execute the corresponding ioctl() to set this list of properties.
 	 */
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	zcmd_write_src_nvlist(hdl, &zc, nvl);
 	zcmd_alloc_dst_nvlist(hdl, &zc, 0);
 
 	ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
 
 	if (ret != 0) {
 		if (zc.zc_nvlist_dst_filled == B_FALSE) {
 			(void) zfs_standard_error(hdl, errno, errbuf);
 			goto error;
 		}
 
 		/* Get the list of unset properties back and report them. */
 		nvlist_t *errorprops = NULL;
 		if (zcmd_read_dst_nvlist(hdl, &zc, &errorprops) != 0)
 			goto error;
 		for (nvpair_t *elem = nvlist_next_nvpair(errorprops, NULL);
 		    elem != NULL;
 		    elem = nvlist_next_nvpair(errorprops, elem)) {
 			prop = zfs_name_to_prop(nvpair_name(elem));
 			zfs_setprop_error(hdl, prop, errno, errbuf);
 		}
 		nvlist_free(errorprops);
 
 		if (added_resv && errno == ENOSPC) {
 			/* clean up the volsize property we tried to set */
 			uint64_t old_volsize = zfs_prop_get_int(zhp,
 			    ZFS_PROP_VOLSIZE);
 			nvlist_free(nvl);
 			nvl = NULL;
 			zcmd_free_nvlists(&zc);
 
 			if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
 				goto error;
 			if (nvlist_add_uint64(nvl,
 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
 			    old_volsize) != 0)
 				goto error;
 			zcmd_write_src_nvlist(hdl, &zc, nvl);
 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
 		}
 	} else {
 		for (cl_idx = 0; cl_idx < nvl_len; cl_idx++) {
 			if (cls[cl_idx] != NULL) {
 				int clp_err = changelist_postfix(cls[cl_idx]);
 				if (clp_err != 0)
 					ret = clp_err;
 			}
 		}
 
 		if (ret == 0) {
 			/*
 			 * Refresh the statistics so the new property
 			 * value is reflected.
 			 */
 			(void) get_stats(zhp);
 
 			/*
 			 * Remount the filesystem to propagate the change
 			 * if one of the options handled by the generic
 			 * Linux namespace layer has been modified.
 			 */
 			if (zfs_is_namespace_prop(prop) &&
 			    zfs_is_mounted(zhp, NULL))
 				ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0);
 		}
 	}
 
 error:
 	nvlist_free(nvl);
 	zcmd_free_nvlists(&zc);
 	if (cls != NULL) {
 		for (cl_idx = 0; cl_idx < nvl_len; cl_idx++) {
 			if (cls[cl_idx] != NULL)
 				changelist_free(cls[cl_idx]);
 		}
 		free(cls);
 	}
 	return (ret);
 }
 
 /*
  * Given a property, inherit the value from the parent dataset, or if received
  * is TRUE, revert to the received value, if any.
  */
 int
 zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received)
 {
 	zfs_cmd_t zc = {"\0"};
 	int ret;
 	prop_changelist_t *cl;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[ERRBUFLEN];
 	zfs_prop_t prop;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
 
 	zc.zc_cookie = received;
 	if ((prop = zfs_name_to_prop(propname)) == ZPROP_USERPROP) {
 		/*
 		 * For user properties, the amount of work we have to do is very
 		 * small, so just do it here.
 		 */
 		if (!zfs_prop_user(propname)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid property"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 
 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 		(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
 
 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0)
 			return (zfs_standard_error(hdl, errno, errbuf));
 
 		(void) get_stats(zhp);
 		return (0);
 	}
 
 	/*
 	 * Verify that this property is inheritable.
 	 */
 	if (zfs_prop_readonly(prop))
 		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
 
 	if (!zfs_prop_inheritable(prop) && !received)
 		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
 
 	/*
 	 * Check to see if the value applies to this type
 	 */
 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type, B_FALSE))
 		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
 
 	/*
 	 * Normalize the name, to get rid of shorthand abbreviations.
 	 */
 	propname = zfs_prop_to_name(prop);
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
 
 	if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset is used in a non-global zone"));
 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
 	}
 
 	/*
 	 * Determine datasets which will be affected by this change, if any.
 	 */
 	if ((cl = changelist_gather(zhp, prop, 0, 0)) == NULL)
 		return (-1);
 
 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "child dataset with inherited mountpoint is used "
 		    "in a non-global zone"));
 		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
 		goto error;
 	}
 
 	if ((ret = changelist_prefix(cl)) != 0)
 		goto error;
 
 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) {
 		changelist_free(cl);
 		return (zfs_standard_error(hdl, errno, errbuf));
 	} else {
 
 		if ((ret = changelist_postfix(cl)) != 0)
 			goto error;
 
 		/*
 		 * Refresh the statistics so the new property is reflected.
 		 */
 		(void) get_stats(zhp);
 
 		/*
 		 * Remount the filesystem to propagate the change
 		 * if one of the options handled by the generic
 		 * Linux namespace layer has been modified.
 		 */
 		if (zfs_is_namespace_prop(prop) &&
 		    zfs_is_mounted(zhp, NULL))
 			ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0);
 	}
 
 error:
 	changelist_free(cl);
 	return (ret);
 }
 
 /*
  * True DSL properties are stored in an nvlist.  The following two functions
  * extract them appropriately.
  */
 uint64_t
 getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
 {
 	nvlist_t *nv;
 	uint64_t value;
 
 	*source = NULL;
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(prop), &nv) == 0) {
 		value = fnvlist_lookup_uint64(nv, ZPROP_VALUE);
 		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
 	} else {
 		verify(!zhp->zfs_props_table ||
 		    zhp->zfs_props_table[prop] == B_TRUE);
 		value = zfs_prop_default_numeric(prop);
 		*source = (char *)"";
 	}
 
 	return (value);
 }
 
 static const char *
 getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
 {
 	nvlist_t *nv;
 	const char *value;
 
 	*source = NULL;
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(prop), &nv) == 0) {
 		value = fnvlist_lookup_string(nv, ZPROP_VALUE);
 		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
 	} else {
 		verify(!zhp->zfs_props_table ||
 		    zhp->zfs_props_table[prop] == B_TRUE);
 		value = zfs_prop_default_string(prop);
 		*source = (char *)"";
 	}
 
 	return (value);
 }
 
 static boolean_t
 zfs_is_recvd_props_mode(zfs_handle_t *zhp)
 {
 	return (zhp->zfs_props == zhp->zfs_recvd_props);
 }
 
 static void
 zfs_set_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie)
 {
 	*cookie = (uint64_t)(uintptr_t)zhp->zfs_props;
 	zhp->zfs_props = zhp->zfs_recvd_props;
 }
 
 static void
 zfs_unset_recvd_props_mode(zfs_handle_t *zhp, uint64_t *cookie)
 {
 	zhp->zfs_props = (nvlist_t *)(uintptr_t)*cookie;
 	*cookie = 0;
 }
 
 /*
  * Internal function for getting a numeric property.  Both zfs_prop_get() and
  * zfs_prop_get_int() are built using this interface.
  *
  * Certain properties can be overridden using 'mount -o'.  In this case, scan
  * the contents of the /proc/self/mounts entry, searching for the
  * appropriate options. If they differ from the on-disk values, report the
  * current values and mark the source "temporary".
  */
 static int
 get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
     char **source, uint64_t *val)
 {
 	zfs_cmd_t zc = {"\0"};
 	nvlist_t *zplprops = NULL;
 	struct mnttab mnt;
 	const char *mntopt_on = NULL;
 	const char *mntopt_off = NULL;
 	boolean_t received = zfs_is_recvd_props_mode(zhp);
 
 	*source = NULL;
 
 	/*
 	 * If the property is being fetched for a snapshot, check whether
 	 * the property is valid for the snapshot's head dataset type.
 	 */
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT &&
 	    !zfs_prop_valid_for_type(prop, zhp->zfs_head_type, B_TRUE)) {
 		*val = zfs_prop_default_numeric(prop);
 		return (-1);
 	}
 
 	switch (prop) {
 	case ZFS_PROP_ATIME:
 		mntopt_on = MNTOPT_ATIME;
 		mntopt_off = MNTOPT_NOATIME;
 		break;
 
 	case ZFS_PROP_RELATIME:
 		mntopt_on = MNTOPT_RELATIME;
 		mntopt_off = MNTOPT_NORELATIME;
 		break;
 
 	case ZFS_PROP_DEVICES:
 		mntopt_on = MNTOPT_DEVICES;
 		mntopt_off = MNTOPT_NODEVICES;
 		break;
 
 	case ZFS_PROP_EXEC:
 		mntopt_on = MNTOPT_EXEC;
 		mntopt_off = MNTOPT_NOEXEC;
 		break;
 
 	case ZFS_PROP_READONLY:
 		mntopt_on = MNTOPT_RO;
 		mntopt_off = MNTOPT_RW;
 		break;
 
 	case ZFS_PROP_SETUID:
 		mntopt_on = MNTOPT_SETUID;
 		mntopt_off = MNTOPT_NOSETUID;
 		break;
 
 	case ZFS_PROP_XATTR:
 		mntopt_on = MNTOPT_XATTR;
 		mntopt_off = MNTOPT_NOXATTR;
 		break;
 
 	case ZFS_PROP_NBMAND:
 		mntopt_on = MNTOPT_NBMAND;
 		mntopt_off = MNTOPT_NONBMAND;
 		break;
 
 	default:
 		break;
 	}
 
 	/*
 	 * Because looking up the mount options is potentially expensive
 	 * (iterating over all of /proc/self/mounts), we defer its
 	 * calculation until we're looking up a property which requires
 	 * its presence.
 	 */
 	if (!zhp->zfs_mntcheck &&
 	    (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
 		libzfs_handle_t *hdl = zhp->zfs_hdl;
 		struct mnttab entry;
 
 		if (libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)
 			zhp->zfs_mntopts = zfs_strdup(hdl,
 			    entry.mnt_mntopts);
 
 		zhp->zfs_mntcheck = B_TRUE;
 	}
 
 	if (zhp->zfs_mntopts == NULL)
 		mnt.mnt_mntopts = (char *)"";
 	else
 		mnt.mnt_mntopts = zhp->zfs_mntopts;
 
 	switch (prop) {
 	case ZFS_PROP_ATIME:
 	case ZFS_PROP_RELATIME:
 	case ZFS_PROP_DEVICES:
 	case ZFS_PROP_EXEC:
 	case ZFS_PROP_READONLY:
 	case ZFS_PROP_SETUID:
 #ifndef __FreeBSD__
 	case ZFS_PROP_XATTR:
 #endif
 	case ZFS_PROP_NBMAND:
 		*val = getprop_uint64(zhp, prop, source);
 
 		if (received)
 			break;
 
 		if (hasmntopt(&mnt, mntopt_on) && !*val) {
 			*val = B_TRUE;
 			if (src)
 				*src = ZPROP_SRC_TEMPORARY;
 		} else if (hasmntopt(&mnt, mntopt_off) && *val) {
 			*val = B_FALSE;
 			if (src)
 				*src = ZPROP_SRC_TEMPORARY;
 		}
 		break;
 
 	case ZFS_PROP_CANMOUNT:
 	case ZFS_PROP_VOLSIZE:
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_REFQUOTA:
 	case ZFS_PROP_RESERVATION:
 	case ZFS_PROP_REFRESERVATION:
 	case ZFS_PROP_FILESYSTEM_LIMIT:
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 	case ZFS_PROP_FILESYSTEM_COUNT:
 	case ZFS_PROP_SNAPSHOT_COUNT:
 		*val = getprop_uint64(zhp, prop, source);
 
 		if (*source == NULL) {
 			/* not default, must be local */
 			*source = zhp->zfs_name;
 		}
 		break;
 
 	case ZFS_PROP_MOUNTED:
 		*val = (zhp->zfs_mntopts != NULL);
 		break;
 
 	case ZFS_PROP_NUMCLONES:
 		*val = zhp->zfs_dmustats.dds_num_clones;
 		break;
 
 	case ZFS_PROP_VERSION:
 	case ZFS_PROP_NORMALIZE:
 	case ZFS_PROP_UTF8ONLY:
 	case ZFS_PROP_CASE:
 		zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0);
 
 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_ZPLPROPS, &zc)) {
 			zcmd_free_nvlists(&zc);
 			if (prop == ZFS_PROP_VERSION &&
 			    zhp->zfs_type == ZFS_TYPE_VOLUME)
 				*val = zfs_prop_default_numeric(prop);
 			return (-1);
 		}
 		if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &zplprops) != 0 ||
 		    nvlist_lookup_uint64(zplprops, zfs_prop_to_name(prop),
 		    val) != 0) {
 			zcmd_free_nvlists(&zc);
 			return (-1);
 		}
 		nvlist_free(zplprops);
 		zcmd_free_nvlists(&zc);
 		break;
 
 	case ZFS_PROP_INCONSISTENT:
 		*val = zhp->zfs_dmustats.dds_inconsistent;
 		break;
 
 	case ZFS_PROP_REDACTED:
 		*val = zhp->zfs_dmustats.dds_redacted;
 		break;
 
 	case ZFS_PROP_CREATETXG:
 		/*
 		 * We can directly read createtxg property from zfs
 		 * handle for Filesystem, Snapshot and ZVOL types.
 		 */
 		if ((zhp->zfs_type == ZFS_TYPE_FILESYSTEM) ||
 		    (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) ||
 		    (zhp->zfs_type == ZFS_TYPE_VOLUME)) {
 			*val = zhp->zfs_dmustats.dds_creation_txg;
 			break;
 		}
 		zfs_fallthrough;
 
 	default:
 		switch (zfs_prop_get_type(prop)) {
 		case PROP_TYPE_NUMBER:
 		case PROP_TYPE_INDEX:
 			*val = getprop_uint64(zhp, prop, source);
 			/*
 			 * If we tried to use a default value for a
 			 * readonly property, it means that it was not
 			 * present.  Note this only applies to "truly"
 			 * readonly properties, not set-once properties
 			 * like volblocksize.
 			 */
 			if (zfs_prop_readonly(prop) &&
 			    !zfs_prop_setonce(prop) &&
 			    *source != NULL && (*source)[0] == '\0') {
 				*source = NULL;
 				return (-1);
 			}
 			break;
 
 		case PROP_TYPE_STRING:
 		default:
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "cannot get non-numeric property"));
 			return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
 			    dgettext(TEXT_DOMAIN, "internal error")));
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Calculate the source type, given the raw source string.
  */
 static void
 get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
     char *statbuf, size_t statlen)
 {
 	if (statbuf == NULL ||
 	    srctype == NULL || *srctype == ZPROP_SRC_TEMPORARY) {
 		return;
 	}
 
 	if (source == NULL) {
 		*srctype = ZPROP_SRC_NONE;
 	} else if (source[0] == '\0') {
 		*srctype = ZPROP_SRC_DEFAULT;
 	} else if (strstr(source, ZPROP_SOURCE_VAL_RECVD) != NULL) {
 		*srctype = ZPROP_SRC_RECEIVED;
 	} else {
 		if (strcmp(source, zhp->zfs_name) == 0) {
 			*srctype = ZPROP_SRC_LOCAL;
 		} else {
 			(void) strlcpy(statbuf, source, statlen);
 			*srctype = ZPROP_SRC_INHERITED;
 		}
 	}
 
 }
 
 int
 zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf,
     size_t proplen, boolean_t literal)
 {
 	zfs_prop_t prop;
 	int err = 0;
 
 	if (zhp->zfs_recvd_props == NULL)
 		if (get_recvd_props_ioctl(zhp) != 0)
 			return (-1);
 
 	prop = zfs_name_to_prop(propname);
 
 	if (prop != ZPROP_USERPROP) {
 		uint64_t cookie;
 		if (!nvlist_exists(zhp->zfs_recvd_props, propname))
 			return (-1);
 		zfs_set_recvd_props_mode(zhp, &cookie);
 		err = zfs_prop_get(zhp, prop, propbuf, proplen,
 		    NULL, NULL, 0, literal);
 		zfs_unset_recvd_props_mode(zhp, &cookie);
 	} else {
 		nvlist_t *propval;
 		char *recvdval;
 		if (nvlist_lookup_nvlist(zhp->zfs_recvd_props,
 		    propname, &propval) != 0)
 			return (-1);
 		recvdval = fnvlist_lookup_string(propval, ZPROP_VALUE);
 		(void) strlcpy(propbuf, recvdval, proplen);
 	}
 
 	return (err == 0 ? 0 : -1);
 }
 
 static int
 get_clones_string(zfs_handle_t *zhp, char *propbuf, size_t proplen)
 {
 	nvlist_t *value;
 	nvpair_t *pair;
 
 	value = zfs_get_clones_nvl(zhp);
 	if (value == NULL || nvlist_empty(value))
 		return (-1);
 
 	propbuf[0] = '\0';
 	for (pair = nvlist_next_nvpair(value, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(value, pair)) {
 		if (propbuf[0] != '\0')
 			(void) strlcat(propbuf, ",", proplen);
 		(void) strlcat(propbuf, nvpair_name(pair), proplen);
 	}
 
 	return (0);
 }
 
 struct get_clones_arg {
 	uint64_t numclones;
 	nvlist_t *value;
 	const char *origin;
 	char buf[ZFS_MAX_DATASET_NAME_LEN];
 };
 
 static int
 get_clones_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct get_clones_arg *gca = arg;
 
 	if (gca->numclones == 0) {
 		zfs_close(zhp);
 		return (0);
 	}
 
 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, gca->buf, sizeof (gca->buf),
 	    NULL, NULL, 0, B_TRUE) != 0)
 		goto out;
 	if (strcmp(gca->buf, gca->origin) == 0) {
 		fnvlist_add_boolean(gca->value, zfs_get_name(zhp));
 		gca->numclones--;
 	}
 
 out:
 	(void) zfs_iter_children(zhp, get_clones_cb, gca);
 	zfs_close(zhp);
 	return (0);
 }
 
 nvlist_t *
 zfs_get_clones_nvl(zfs_handle_t *zhp)
 {
 	nvlist_t *nv, *value;
 
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(ZFS_PROP_CLONES), &nv) != 0) {
 		struct get_clones_arg gca;
 
 		/*
 		 * if this is a snapshot, then the kernel wasn't able
 		 * to get the clones.  Do it by slowly iterating.
 		 */
 		if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT)
 			return (NULL);
 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, 0) != 0)
 			return (NULL);
 		if (nvlist_alloc(&value, NV_UNIQUE_NAME, 0) != 0) {
 			nvlist_free(nv);
 			return (NULL);
 		}
 
 		gca.numclones = zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES);
 		gca.value = value;
 		gca.origin = zhp->zfs_name;
 
 		if (gca.numclones != 0) {
 			zfs_handle_t *root;
 			char pool[ZFS_MAX_DATASET_NAME_LEN];
 			char *cp = pool;
 
 			/* get the pool name */
 			(void) strlcpy(pool, zhp->zfs_name, sizeof (pool));
 			(void) strsep(&cp, "/@");
 			root = zfs_open(zhp->zfs_hdl, pool,
 			    ZFS_TYPE_FILESYSTEM);
 			if (root == NULL) {
 				nvlist_free(nv);
 				nvlist_free(value);
 				return (NULL);
 			}
 
 			(void) get_clones_cb(root, &gca);
 		}
 
 		if (gca.numclones != 0 ||
 		    nvlist_add_nvlist(nv, ZPROP_VALUE, value) != 0 ||
 		    nvlist_add_nvlist(zhp->zfs_props,
 		    zfs_prop_to_name(ZFS_PROP_CLONES), nv) != 0) {
 			nvlist_free(nv);
 			nvlist_free(value);
 			return (NULL);
 		}
 		nvlist_free(nv);
 		nvlist_free(value);
 		nv = fnvlist_lookup_nvlist(zhp->zfs_props,
 		    zfs_prop_to_name(ZFS_PROP_CLONES));
 	}
 
 	return (fnvlist_lookup_nvlist(nv, ZPROP_VALUE));
 }
 
 static int
 get_rsnaps_string(zfs_handle_t *zhp, char *propbuf, size_t proplen)
 {
 	nvlist_t *value;
 	uint64_t *snaps;
 	uint_t nsnaps;
 
 	if (nvlist_lookup_nvlist(zhp->zfs_props,
 	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &value) != 0)
 		return (-1);
 	if (nvlist_lookup_uint64_array(value, ZPROP_VALUE, &snaps,
 	    &nsnaps) != 0)
 		return (-1);
 	if (nsnaps == 0) {
 		/* There's no redaction snapshots; pass a special value back */
 		(void) snprintf(propbuf, proplen, "none");
 		return (0);
 	}
 	propbuf[0] = '\0';
 	for (int i = 0; i < nsnaps; i++) {
 		char buf[128];
 		if (propbuf[0] != '\0')
 			(void) strlcat(propbuf, ",", proplen);
 		(void) snprintf(buf, sizeof (buf), "%llu",
 		    (u_longlong_t)snaps[i]);
 		(void) strlcat(propbuf, buf, proplen);
 	}
 
 	return (0);
 }
 
 /*
  * Accepts a property and value and checks that the value
  * matches the one found by the channel program. If they are
  * not equal, print both of them.
  */
 static void
 zcp_check(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t intval,
     const char *strval)
 {
 	if (!zhp->zfs_hdl->libzfs_prop_debug)
 		return;
 	int error;
 	char *poolname = zhp->zpool_hdl->zpool_name;
 	const char *prop_name = zfs_prop_to_name(prop);
 	const char *program =
 	    "args = ...\n"
 	    "ds = args['dataset']\n"
 	    "prop = args['property']\n"
 	    "value, setpoint = zfs.get_prop(ds, prop)\n"
 	    "return {value=value, setpoint=setpoint}\n";
 	nvlist_t *outnvl;
 	nvlist_t *retnvl;
 	nvlist_t *argnvl = fnvlist_alloc();
 
 	fnvlist_add_string(argnvl, "dataset", zhp->zfs_name);
 	fnvlist_add_string(argnvl, "property", zfs_prop_to_name(prop));
 
 	error = lzc_channel_program_nosync(poolname, program,
 	    10 * 1000 * 1000, 10 * 1024 * 1024, argnvl, &outnvl);
 
 	if (error == 0) {
 		retnvl = fnvlist_lookup_nvlist(outnvl, "return");
 		if (zfs_prop_get_type(prop) == PROP_TYPE_NUMBER) {
 			int64_t ans;
 			error = nvlist_lookup_int64(retnvl, "value", &ans);
 			if (error != 0) {
 				(void) fprintf(stderr, "%s: zcp check error: "
 				    "%u\n", prop_name, error);
 				return;
 			}
 			if (ans != intval) {
 				(void) fprintf(stderr, "%s: zfs found %llu, "
 				    "but zcp found %llu\n", prop_name,
 				    (u_longlong_t)intval, (u_longlong_t)ans);
 			}
 		} else {
 			char *str_ans;
 			error = nvlist_lookup_string(retnvl, "value", &str_ans);
 			if (error != 0) {
 				(void) fprintf(stderr, "%s: zcp check error: "
 				    "%u\n", prop_name, error);
 				return;
 			}
 			if (strcmp(strval, str_ans) != 0) {
 				(void) fprintf(stderr,
 				    "%s: zfs found '%s', but zcp found '%s'\n",
 				    prop_name, strval, str_ans);
 			}
 		}
 	} else {
 		(void) fprintf(stderr, "%s: zcp check failed, channel program "
 		    "error: %u\n", prop_name, error);
 	}
 	nvlist_free(argnvl);
 	nvlist_free(outnvl);
 }
 
 /*
  * Retrieve a property from the given object.  If 'literal' is specified, then
  * numbers are left as exact values.  Otherwise, numbers are converted to a
  * human-readable form.
  *
  * Returns 0 on success, or -1 on error.
  */
 int
 zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
     zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
 {
 	char *source = NULL;
 	uint64_t val;
 	const char *str;
 	const char *strval;
 	boolean_t received = zfs_is_recvd_props_mode(zhp);
 
 	/*
 	 * Check to see if this property applies to our object
 	 */
 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type, B_FALSE))
 		return (-1);
 
 	if (received && zfs_prop_readonly(prop))
 		return (-1);
 
 	if (src)
 		*src = ZPROP_SRC_NONE;
 
 	switch (prop) {
 	case ZFS_PROP_CREATION:
 		/*
 		 * 'creation' is a time_t stored in the statistics.  We convert
 		 * this into a string unless 'literal' is specified.
 		 */
 		{
 			val = getprop_uint64(zhp, prop, &source);
 			time_t time = (time_t)val;
 			struct tm t;
 
 			if (literal ||
 			    localtime_r(&time, &t) == NULL ||
 			    strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
 			    &t) == 0)
 				(void) snprintf(propbuf, proplen, "%llu",
 				    (u_longlong_t)val);
 		}
 		zcp_check(zhp, prop, val, NULL);
 		break;
 
 	case ZFS_PROP_MOUNTPOINT:
 		/*
 		 * Getting the precise mountpoint can be tricky.
 		 *
 		 *  - for 'none' or 'legacy', return those values.
 		 *  - for inherited mountpoints, we want to take everything
 		 *    after our ancestor and append it to the inherited value.
 		 *
 		 * If the pool has an alternate root, we want to prepend that
 		 * root to any values we return.
 		 */
 
 		str = getprop_string(zhp, prop, &source);
 
 		if (str[0] == '/') {
 			char buf[MAXPATHLEN];
 			char *root = buf;
 			const char *relpath;
 
 			/*
 			 * If we inherit the mountpoint, even from a dataset
 			 * with a received value, the source will be the path of
 			 * the dataset we inherit from. If source is
 			 * ZPROP_SOURCE_VAL_RECVD, the received value is not
 			 * inherited.
 			 */
 			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0) {
 				relpath = "";
 			} else {
 				relpath = zhp->zfs_name + strlen(source);
 				if (relpath[0] == '/')
 					relpath++;
 			}
 
 			if ((zpool_get_prop(zhp->zpool_hdl,
 			    ZPOOL_PROP_ALTROOT, buf, MAXPATHLEN, NULL,
 			    B_FALSE)) || (strcmp(root, "-") == 0))
 				root[0] = '\0';
 			/*
 			 * Special case an alternate root of '/'. This will
 			 * avoid having multiple leading slashes in the
 			 * mountpoint path.
 			 */
 			if (strcmp(root, "/") == 0)
 				root++;
 
 			/*
 			 * If the mountpoint is '/' then skip over this
 			 * if we are obtaining either an alternate root or
 			 * an inherited mountpoint.
 			 */
 			if (str[1] == '\0' && (root[0] != '\0' ||
 			    relpath[0] != '\0'))
 				str++;
 
 			if (relpath[0] == '\0')
 				(void) snprintf(propbuf, proplen, "%s%s",
 				    root, str);
 			else
 				(void) snprintf(propbuf, proplen, "%s%s%s%s",
 				    root, str, relpath[0] == '@' ? "" : "/",
 				    relpath);
 		} else {
 			/* 'legacy' or 'none' */
 			(void) strlcpy(propbuf, str, proplen);
 		}
 		zcp_check(zhp, prop, 0, propbuf);
 		break;
 
 	case ZFS_PROP_ORIGIN:
 		str = getprop_string(zhp, prop, &source);
 		if (str == NULL)
 			return (-1);
 		(void) strlcpy(propbuf, str, proplen);
 		zcp_check(zhp, prop, 0, str);
 		break;
 
 	case ZFS_PROP_REDACT_SNAPS:
 		if (get_rsnaps_string(zhp, propbuf, proplen) != 0)
 			return (-1);
 		break;
 
 	case ZFS_PROP_CLONES:
 		if (get_clones_string(zhp, propbuf, proplen) != 0)
 			return (-1);
 		break;
 
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_REFQUOTA:
 	case ZFS_PROP_RESERVATION:
 	case ZFS_PROP_REFRESERVATION:
 
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
 		/*
 		 * If quota or reservation is 0, we translate this into 'none'
 		 * (unless literal is set), and indicate that it's the default
 		 * value.  Otherwise, we print the number nicely and indicate
 		 * that its set locally.
 		 */
 		if (val == 0) {
 			if (literal)
 				(void) strlcpy(propbuf, "0", proplen);
 			else
 				(void) strlcpy(propbuf, "none", proplen);
 		} else {
 			if (literal)
 				(void) snprintf(propbuf, proplen, "%llu",
 				    (u_longlong_t)val);
 			else
 				zfs_nicebytes(val, propbuf, proplen);
 		}
 		zcp_check(zhp, prop, val, NULL);
 		break;
 
 	case ZFS_PROP_FILESYSTEM_LIMIT:
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 	case ZFS_PROP_FILESYSTEM_COUNT:
 	case ZFS_PROP_SNAPSHOT_COUNT:
 
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
 
 		/*
 		 * If limit is UINT64_MAX, we translate this into 'none', and
 		 * indicate that it's the default value. Otherwise, we print
 		 * the number nicely and indicate that it's set locally.
 		 */
 		if (val == UINT64_MAX) {
 			(void) strlcpy(propbuf, "none", proplen);
 		} else if (literal) {
 			(void) snprintf(propbuf, proplen, "%llu",
 			    (u_longlong_t)val);
 		} else {
 			zfs_nicenum(val, propbuf, proplen);
 		}
 
 		zcp_check(zhp, prop, val, NULL);
 		break;
 
 	case ZFS_PROP_REFRATIO:
 	case ZFS_PROP_COMPRESSRATIO:
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
 		if (literal)
 			(void) snprintf(propbuf, proplen, "%llu.%02llu",
 			    (u_longlong_t)(val / 100),
 			    (u_longlong_t)(val % 100));
 		else
 			(void) snprintf(propbuf, proplen, "%llu.%02llux",
 			    (u_longlong_t)(val / 100),
 			    (u_longlong_t)(val % 100));
 		zcp_check(zhp, prop, val, NULL);
 		break;
 
 	case ZFS_PROP_TYPE:
 		switch (zhp->zfs_type) {
 		case ZFS_TYPE_FILESYSTEM:
 			str = "filesystem";
 			break;
 		case ZFS_TYPE_VOLUME:
 			str = "volume";
 			break;
 		case ZFS_TYPE_SNAPSHOT:
 			str = "snapshot";
 			break;
 		case ZFS_TYPE_BOOKMARK:
 			str = "bookmark";
 			break;
 		default:
 			abort();
 		}
 		(void) snprintf(propbuf, proplen, "%s", str);
 		zcp_check(zhp, prop, 0, propbuf);
 		break;
 
 	case ZFS_PROP_MOUNTED:
 		/*
 		 * The 'mounted' property is a pseudo-property that described
 		 * whether the filesystem is currently mounted.  Even though
 		 * it's a boolean value, the typical values of "on" and "off"
 		 * don't make sense, so we translate to "yes" and "no".
 		 */
 		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
 		    src, &source, &val) != 0)
 			return (-1);
 		if (val)
 			(void) strlcpy(propbuf, "yes", proplen);
 		else
 			(void) strlcpy(propbuf, "no", proplen);
 		break;
 
 	case ZFS_PROP_NAME:
 		/*
 		 * The 'name' property is a pseudo-property derived from the
 		 * dataset name.  It is presented as a real property to simplify
 		 * consumers.
 		 */
 		(void) strlcpy(propbuf, zhp->zfs_name, proplen);
 		zcp_check(zhp, prop, 0, propbuf);
 		break;
 
 	case ZFS_PROP_MLSLABEL:
 		{
 #ifdef HAVE_MLSLABEL
 			m_label_t *new_sl = NULL;
 			char *ascii = NULL;	/* human readable label */
 
 			(void) strlcpy(propbuf,
 			    getprop_string(zhp, prop, &source), proplen);
 
 			if (literal || (strcasecmp(propbuf,
 			    ZFS_MLSLABEL_DEFAULT) == 0))
 				break;
 
 			/*
 			 * Try to translate the internal hex string to
 			 * human-readable output.  If there are any
 			 * problems just use the hex string.
 			 */
 
 			if (str_to_label(propbuf, &new_sl, MAC_LABEL,
 			    L_NO_CORRECTION, NULL) == -1) {
 				m_label_free(new_sl);
 				break;
 			}
 
 			if (label_to_str(new_sl, &ascii, M_LABEL,
 			    DEF_NAMES) != 0) {
 				if (ascii)
 					free(ascii);
 				m_label_free(new_sl);
 				break;
 			}
 			m_label_free(new_sl);
 
 			(void) strlcpy(propbuf, ascii, proplen);
 			free(ascii);
 #else
 			(void) strlcpy(propbuf,
 			    getprop_string(zhp, prop, &source), proplen);
 #endif /* HAVE_MLSLABEL */
 		}
 		break;
 
 	case ZFS_PROP_GUID:
 	case ZFS_PROP_KEY_GUID:
 	case ZFS_PROP_IVSET_GUID:
 	case ZFS_PROP_CREATETXG:
 	case ZFS_PROP_OBJSETID:
 	case ZFS_PROP_PBKDF2_ITERS:
 		/*
 		 * These properties are stored as numbers, but they are
 		 * identifiers or counters.
 		 * We don't want them to be pretty printed, because pretty
 		 * printing truncates their values making them useless.
 		 */
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
 		(void) snprintf(propbuf, proplen, "%llu", (u_longlong_t)val);
 		zcp_check(zhp, prop, val, NULL);
 		break;
 
 	case ZFS_PROP_REFERENCED:
 	case ZFS_PROP_AVAILABLE:
 	case ZFS_PROP_USED:
 	case ZFS_PROP_USEDSNAP:
 	case ZFS_PROP_USEDDS:
 	case ZFS_PROP_USEDREFRESERV:
 	case ZFS_PROP_USEDCHILD:
 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
 			return (-1);
 		if (literal) {
 			(void) snprintf(propbuf, proplen, "%llu",
 			    (u_longlong_t)val);
 		} else {
 			zfs_nicebytes(val, propbuf, proplen);
 		}
 		zcp_check(zhp, prop, val, NULL);
 		break;
 
 	case ZFS_PROP_SNAPSHOTS_CHANGED:
 		{
 			if ((get_numeric_property(zhp, prop, src, &source,
 			    &val) != 0) || val == 0) {
 				return (-1);
 			}
 
 			time_t time = (time_t)val;
 			struct tm t;
 
 			if (literal ||
 			    localtime_r(&time, &t) == NULL ||
 			    strftime(propbuf, proplen, "%a %b %e %k:%M:%S %Y",
 			    &t) == 0)
 				(void) snprintf(propbuf, proplen, "%llu",
 				    (u_longlong_t)val);
 		}
 		zcp_check(zhp, prop, val, NULL);
 		break;
 
 	default:
 		switch (zfs_prop_get_type(prop)) {
 		case PROP_TYPE_NUMBER:
 			if (get_numeric_property(zhp, prop, src,
 			    &source, &val) != 0) {
 				return (-1);
 			}
 
 			if (literal) {
 				(void) snprintf(propbuf, proplen, "%llu",
 				    (u_longlong_t)val);
 			} else {
 				zfs_nicenum(val, propbuf, proplen);
 			}
 			zcp_check(zhp, prop, val, NULL);
 			break;
 
 		case PROP_TYPE_STRING:
 			str = getprop_string(zhp, prop, &source);
 			if (str == NULL)
 				return (-1);
 
 			(void) strlcpy(propbuf, str, proplen);
 			zcp_check(zhp, prop, 0, str);
 			break;
 
 		case PROP_TYPE_INDEX:
 			if (get_numeric_property(zhp, prop, src,
 			    &source, &val) != 0)
 				return (-1);
 			if (zfs_prop_index_to_string(prop, val, &strval) != 0)
 				return (-1);
 
 			(void) strlcpy(propbuf, strval, proplen);
 			zcp_check(zhp, prop, 0, strval);
 			break;
 
 		default:
 			abort();
 		}
 	}
 
 	get_source(zhp, src, source, statbuf, statlen);
 
 	return (0);
 }
 
 /*
  * Utility function to get the given numeric property.  Does no validation that
  * the given property is the appropriate type; should only be used with
  * hard-coded property types.
  */
 uint64_t
 zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
 {
 	char *source;
 	uint64_t val = 0;
 
 	(void) get_numeric_property(zhp, prop, NULL, &source, &val);
 
 	return (val);
 }
 
 static int
 zfs_prop_set_int(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t val)
 {
 	char buf[64];
 
 	(void) snprintf(buf, sizeof (buf), "%llu", (longlong_t)val);
 	return (zfs_prop_set(zhp, zfs_prop_to_name(prop), buf));
 }
 
 /*
  * Similar to zfs_prop_get(), but returns the value as an integer.
  */
 int
 zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
     zprop_source_t *src, char *statbuf, size_t statlen)
 {
 	char *source;
 
 	/*
 	 * Check to see if this property applies to our object
 	 */
 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type, B_FALSE)) {
 		return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE,
 		    dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
 		    zfs_prop_to_name(prop)));
 	}
 
 	if (src)
 		*src = ZPROP_SRC_NONE;
 
 	if (get_numeric_property(zhp, prop, src, &source, value) != 0)
 		return (-1);
 
 	get_source(zhp, src, source, statbuf, statlen);
 
 	return (0);
 }
 
 #ifdef HAVE_IDMAP
 static int
 idmap_id_to_numeric_domain_rid(uid_t id, boolean_t isuser,
     char **domainp, idmap_rid_t *ridp)
 {
 	idmap_get_handle_t *get_hdl = NULL;
 	idmap_stat status;
 	int err = EINVAL;
 
 	if (idmap_get_create(&get_hdl) != IDMAP_SUCCESS)
 		goto out;
 
 	if (isuser) {
 		err = idmap_get_sidbyuid(get_hdl, id,
 		    IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status);
 	} else {
 		err = idmap_get_sidbygid(get_hdl, id,
 		    IDMAP_REQ_FLG_USE_CACHE, domainp, ridp, &status);
 	}
 	if (err == IDMAP_SUCCESS &&
 	    idmap_get_mappings(get_hdl) == IDMAP_SUCCESS &&
 	    status == IDMAP_SUCCESS)
 		err = 0;
 	else
 		err = EINVAL;
 out:
 	if (get_hdl)
 		idmap_get_destroy(get_hdl);
 	return (err);
 }
 #endif /* HAVE_IDMAP */
 
 /*
  * convert the propname into parameters needed by kernel
  * Eg: userquota@ahrens -> ZFS_PROP_USERQUOTA, "", 126829
  * Eg: userused@matt@domain -> ZFS_PROP_USERUSED, "S-1-123-456", 789
  * Eg: groupquota@staff -> ZFS_PROP_GROUPQUOTA, "", 1234
  * Eg: groupused@staff -> ZFS_PROP_GROUPUSED, "", 1234
  * Eg: projectquota@123 -> ZFS_PROP_PROJECTQUOTA, "", 123
  * Eg: projectused@789 -> ZFS_PROP_PROJECTUSED, "", 789
  */
 static int
 userquota_propname_decode(const char *propname, boolean_t zoned,
     zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp)
 {
 	zfs_userquota_prop_t type;
 	char *cp;
 	boolean_t isuser;
 	boolean_t isgroup;
 	boolean_t isproject;
 	struct passwd *pw;
 	struct group *gr;
 
 	domain[0] = '\0';
 
 	/* Figure out the property type ({user|group|project}{quota|space}) */
 	for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) {
 		if (strncmp(propname, zfs_userquota_prop_prefixes[type],
 		    strlen(zfs_userquota_prop_prefixes[type])) == 0)
 			break;
 	}
 	if (type == ZFS_NUM_USERQUOTA_PROPS)
 		return (EINVAL);
 	*typep = type;
 
 	isuser = (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_USERUSED ||
 	    type == ZFS_PROP_USEROBJQUOTA ||
 	    type == ZFS_PROP_USEROBJUSED);
 	isgroup = (type == ZFS_PROP_GROUPQUOTA || type == ZFS_PROP_GROUPUSED ||
 	    type == ZFS_PROP_GROUPOBJQUOTA ||
 	    type == ZFS_PROP_GROUPOBJUSED);
 	isproject = (type == ZFS_PROP_PROJECTQUOTA ||
 	    type == ZFS_PROP_PROJECTUSED || type == ZFS_PROP_PROJECTOBJQUOTA ||
 	    type == ZFS_PROP_PROJECTOBJUSED);
 
 	cp = strchr(propname, '@') + 1;
 
 	if (isuser && (pw = getpwnam(cp)) != NULL) {
 		if (zoned && getzoneid() == GLOBAL_ZONEID)
 			return (ENOENT);
 		*ridp = pw->pw_uid;
 	} else if (isgroup && (gr = getgrnam(cp)) != NULL) {
 		if (zoned && getzoneid() == GLOBAL_ZONEID)
 			return (ENOENT);
 		*ridp = gr->gr_gid;
 	} else if (!isproject && strchr(cp, '@')) {
 #ifdef HAVE_IDMAP
 		/*
 		 * It's a SID name (eg "user@domain") that needs to be
 		 * turned into S-1-domainID-RID.
 		 */
 		directory_error_t e;
 		char *numericsid = NULL;
 		char *end;
 
 		if (zoned && getzoneid() == GLOBAL_ZONEID)
 			return (ENOENT);
 		if (isuser) {
 			e = directory_sid_from_user_name(NULL,
 			    cp, &numericsid);
 		} else {
 			e = directory_sid_from_group_name(NULL,
 			    cp, &numericsid);
 		}
 		if (e != NULL) {
 			directory_error_free(e);
 			return (ENOENT);
 		}
 		if (numericsid == NULL)
 			return (ENOENT);
 		cp = numericsid;
 		(void) strlcpy(domain, cp, domainlen);
 		cp = strrchr(domain, '-');
 		*cp = '\0';
 		cp++;
 
 		errno = 0;
 		*ridp = strtoull(cp, &end, 10);
 		free(numericsid);
 
 		if (errno != 0 || *end != '\0')
 			return (EINVAL);
 #else
 		(void) domainlen;
 		return (ENOSYS);
 #endif /* HAVE_IDMAP */
 	} else {
 		/* It's a user/group/project ID (eg "12345"). */
 		uid_t id;
 		char *end;
 		id = strtoul(cp, &end, 10);
 		if (*end != '\0')
 			return (EINVAL);
 		if (id > MAXUID && !isproject) {
 #ifdef HAVE_IDMAP
 			/* It's an ephemeral ID. */
 			idmap_rid_t rid;
 			char *mapdomain;
 
 			if (idmap_id_to_numeric_domain_rid(id, isuser,
 			    &mapdomain, &rid) != 0)
 				return (ENOENT);
 			(void) strlcpy(domain, mapdomain, domainlen);
 			*ridp = rid;
 #else
 			return (ENOSYS);
 #endif /* HAVE_IDMAP */
 		} else {
 			*ridp = id;
 		}
 	}
 
 	return (0);
 }
 
 static int
 zfs_prop_get_userquota_common(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue, zfs_userquota_prop_t *typep)
 {
 	int err;
 	zfs_cmd_t zc = {"\0"};
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	err = userquota_propname_decode(propname,
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED),
 	    typep, zc.zc_value, sizeof (zc.zc_value), &zc.zc_guid);
 	zc.zc_objset_type = *typep;
 	if (err)
 		return (err);
 
 	err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_USERSPACE_ONE, &zc);
 	if (err)
 		return (err);
 
 	*propvalue = zc.zc_cookie;
 	return (0);
 }
 
 int
 zfs_prop_get_userquota_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue)
 {
 	zfs_userquota_prop_t type;
 
 	return (zfs_prop_get_userquota_common(zhp, propname, propvalue,
 	    &type));
 }
 
 int
 zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal)
 {
 	int err;
 	uint64_t propvalue;
 	zfs_userquota_prop_t type;
 
 	err = zfs_prop_get_userquota_common(zhp, propname, &propvalue,
 	    &type);
 
 	if (err)
 		return (err);
 
 	if (literal) {
 		(void) snprintf(propbuf, proplen, "%llu",
 		    (u_longlong_t)propvalue);
 	} else if (propvalue == 0 &&
 	    (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA ||
 	    type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA ||
 	    type == ZFS_PROP_PROJECTQUOTA ||
 	    type == ZFS_PROP_PROJECTOBJQUOTA)) {
 		(void) strlcpy(propbuf, "none", proplen);
 	} else if (type == ZFS_PROP_USERQUOTA || type == ZFS_PROP_GROUPQUOTA ||
 	    type == ZFS_PROP_USERUSED || type == ZFS_PROP_GROUPUSED ||
 	    type == ZFS_PROP_PROJECTUSED || type == ZFS_PROP_PROJECTQUOTA) {
 		zfs_nicebytes(propvalue, propbuf, proplen);
 	} else {
 		zfs_nicenum(propvalue, propbuf, proplen);
 	}
 	return (0);
 }
 
 /*
  * propname must start with "written@" or "written#".
  */
 int
 zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
     uint64_t *propvalue)
 {
 	int err;
 	zfs_cmd_t zc = {"\0"};
 	const char *snapname;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	assert(zfs_prop_written(propname));
 	snapname = propname + strlen("written@");
 	if (strchr(snapname, '@') != NULL || strchr(snapname, '#') != NULL) {
 		/* full snapshot or bookmark name specified */
 		(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 	} else {
 		/* snapname is the short name, append it to zhp's fsname */
 		char *cp;
 
 		(void) strlcpy(zc.zc_value, zhp->zfs_name,
 		    sizeof (zc.zc_value));
 		cp = strchr(zc.zc_value, '@');
 		if (cp != NULL)
 			*cp = '\0';
 		(void) strlcat(zc.zc_value, snapname - 1, sizeof (zc.zc_value));
 	}
 
 	err = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SPACE_WRITTEN, &zc);
 	if (err)
 		return (err);
 
 	*propvalue = zc.zc_cookie;
 	return (0);
 }
 
 int
 zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
     char *propbuf, int proplen, boolean_t literal)
 {
 	int err;
 	uint64_t propvalue;
 
 	err = zfs_prop_get_written_int(zhp, propname, &propvalue);
 
 	if (err)
 		return (err);
 
 	if (literal) {
 		(void) snprintf(propbuf, proplen, "%llu",
 		    (u_longlong_t)propvalue);
 	} else {
 		zfs_nicebytes(propvalue, propbuf, proplen);
 	}
 
 	return (0);
 }
 
 /*
  * Returns the name of the given zfs handle.
  */
 const char *
 zfs_get_name(const zfs_handle_t *zhp)
 {
 	return (zhp->zfs_name);
 }
 
 /*
  * Returns the name of the parent pool for the given zfs handle.
  */
 const char *
 zfs_get_pool_name(const zfs_handle_t *zhp)
 {
 	return (zhp->zpool_hdl->zpool_name);
 }
 
 /*
  * Returns the type of the given zfs handle.
  */
 zfs_type_t
 zfs_get_type(const zfs_handle_t *zhp)
 {
 	return (zhp->zfs_type);
 }
 
 /*
  * Returns the type of the given zfs handle,
  * or, if a snapshot, the type of the snapshotted dataset.
  */
 zfs_type_t
 zfs_get_underlying_type(const zfs_handle_t *zhp)
 {
 	return (zhp->zfs_head_type);
 }
 
 /*
  * Is one dataset name a child dataset of another?
  *
  * Needs to handle these cases:
  * Dataset 1	"a/foo"		"a/foo"		"a/foo"		"a/foo"
  * Dataset 2	"a/fo"		"a/foobar"	"a/bar/baz"	"a/foo/bar"
  * Descendant?	No.		No.		No.		Yes.
  */
 static boolean_t
 is_descendant(const char *ds1, const char *ds2)
 {
 	size_t d1len = strlen(ds1);
 
 	/* ds2 can't be a descendant if it's smaller */
 	if (strlen(ds2) < d1len)
 		return (B_FALSE);
 
 	/* otherwise, compare strings and verify that there's a '/' char */
 	return (ds2[d1len] == '/' && (strncmp(ds1, ds2, d1len) == 0));
 }
 
 /*
  * Given a complete name, return just the portion that refers to the parent.
  * Will return -1 if there is no parent (path is just the name of the
  * pool).
  */
 static int
 parent_name(const char *path, char *buf, size_t buflen)
 {
 	char *slashp;
 
 	(void) strlcpy(buf, path, buflen);
 
 	if ((slashp = strrchr(buf, '/')) == NULL)
 		return (-1);
 	*slashp = '\0';
 
 	return (0);
 }
 
 int
 zfs_parent_name(zfs_handle_t *zhp, char *buf, size_t buflen)
 {
 	return (parent_name(zfs_get_name(zhp), buf, buflen));
 }
 
 /*
  * If accept_ancestor is false, then check to make sure that the given path has
  * a parent, and that it exists.  If accept_ancestor is true, then find the
  * closest existing ancestor for the given path.  In prefixlen return the
  * length of already existing prefix of the given path.  We also fetch the
  * 'zoned' property, which is used to validate property settings when creating
  * new datasets.
  */
 static int
 check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
     boolean_t accept_ancestor, int *prefixlen)
 {
 	zfs_cmd_t zc = {"\0"};
 	char parent[ZFS_MAX_DATASET_NAME_LEN];
 	char *slash;
 	zfs_handle_t *zhp;
 	char errbuf[ERRBUFLEN];
 	uint64_t is_zoned;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot create '%s'"), path);
 
 	/* get parent, and check to see if this is just a pool */
 	if (parent_name(path, parent, sizeof (parent)) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "missing dataset name"));
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	}
 
 	/* check to see if the pool exists */
 	if ((slash = strchr(parent, '/')) == NULL)
 		slash = parent + strlen(parent);
-	(void) strncpy(zc.zc_name, parent, slash - parent);
-	zc.zc_name[slash - parent] = '\0';
+	(void) strlcpy(zc.zc_name, parent,
+	    MIN(sizeof (zc.zc_name), slash - parent + 1));
 	if (zfs_ioctl(hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
 	    errno == ENOENT) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "no such pool '%s'"), zc.zc_name);
 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
 	}
 
 	/* check to see if the parent dataset exists */
 	while ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
 		if (errno == ENOENT && accept_ancestor) {
 			/*
 			 * Go deeper to find an ancestor, give up on top level.
 			 */
 			if (parent_name(parent, parent, sizeof (parent)) != 0) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "no such pool '%s'"), zc.zc_name);
 				return (zfs_error(hdl, EZFS_NOENT, errbuf));
 			}
 		} else if (errno == ENOENT) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "parent does not exist"));
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 		} else
 			return (zfs_standard_error(hdl, errno, errbuf));
 	}
 
 	is_zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
 	if (zoned != NULL)
 		*zoned = is_zoned;
 
 	/* we are in a non-global zone, but parent is in the global zone */
 	if (getzoneid() != GLOBAL_ZONEID && !is_zoned) {
 		(void) zfs_standard_error(hdl, EPERM, errbuf);
 		zfs_close(zhp);
 		return (-1);
 	}
 
 	/* make sure parent is a filesystem */
 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "parent is not a filesystem"));
 		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 		zfs_close(zhp);
 		return (-1);
 	}
 
 	zfs_close(zhp);
 	if (prefixlen != NULL)
 		*prefixlen = strlen(parent);
 	return (0);
 }
 
 /*
  * Finds whether the dataset of the given type(s) exists.
  */
 boolean_t
 zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types)
 {
 	zfs_handle_t *zhp;
 
 	if (!zfs_validate_name(hdl, path, types, B_FALSE))
 		return (B_FALSE);
 
 	/*
 	 * Try to get stats for the dataset, which will tell us if it exists.
 	 */
 	if ((zhp = make_dataset_handle(hdl, path)) != NULL) {
 		int ds_type = zhp->zfs_type;
 
 		zfs_close(zhp);
 		if (types & ds_type)
 			return (B_TRUE);
 	}
 	return (B_FALSE);
 }
 
 /*
  * Given a path to 'target', create all the ancestors between
  * the prefixlen portion of the path, and the target itself.
  * Fail if the initial prefixlen-ancestor does not already exist.
  */
 int
 create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
 {
 	zfs_handle_t *h;
 	char *cp;
 	const char *opname;
 
 	/* make sure prefix exists */
 	cp = target + prefixlen;
 	if (*cp != '/') {
 		assert(strchr(cp, '/') == NULL);
 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
 	} else {
 		*cp = '\0';
 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
 		*cp = '/';
 	}
 	if (h == NULL)
 		return (-1);
 	zfs_close(h);
 
 	/*
 	 * Attempt to create, mount, and share any ancestor filesystems,
 	 * up to the prefixlen-long one.
 	 */
 	for (cp = target + prefixlen + 1;
 	    (cp = strchr(cp, '/')) != NULL; *cp = '/', cp++) {
 
 		*cp = '\0';
 
 		h = make_dataset_handle(hdl, target);
 		if (h) {
 			/* it already exists, nothing to do here */
 			zfs_close(h);
 			continue;
 		}
 
 		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
 		    NULL) != 0) {
 			opname = dgettext(TEXT_DOMAIN, "create");
 			goto ancestorerr;
 		}
 
 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
 		if (h == NULL) {
 			opname = dgettext(TEXT_DOMAIN, "open");
 			goto ancestorerr;
 		}
 
 		if (zfs_mount(h, NULL, 0) != 0) {
 			opname = dgettext(TEXT_DOMAIN, "mount");
 			goto ancestorerr;
 		}
 
 		if (zfs_share(h, NULL) != 0) {
 			opname = dgettext(TEXT_DOMAIN, "share");
 			goto ancestorerr;
 		}
 
 		zfs_close(h);
 	}
 	zfs_commit_shares(NULL);
 
 	return (0);
 
 ancestorerr:
 	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 	    "failed to %s ancestor '%s'"), opname, target);
 	return (-1);
 }
 
 /*
  * Creates non-existing ancestors of the given path.
  */
 int
 zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
 {
 	int prefix;
 	char *path_copy;
 	char errbuf[ERRBUFLEN];
 	int rc = 0;
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), path);
 
 	/*
 	 * Check that we are not passing the nesting limit
 	 * before we start creating any ancestors.
 	 */
 	if (dataset_nestcheck(path) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "maximum name nesting depth exceeded"));
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	}
 
 	if (check_parents(hdl, path, NULL, B_TRUE, &prefix) != 0)
 		return (-1);
 
 	if ((path_copy = strdup(path)) != NULL) {
 		rc = create_parents(hdl, path_copy, prefix);
 		free(path_copy);
 	}
 	if (path_copy == NULL || rc != 0)
 		return (-1);
 
 	return (0);
 }
 
 /*
  * Create a new filesystem or volume.
  */
 int
 zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
     nvlist_t *props)
 {
 	int ret;
 	uint64_t size = 0;
 	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
 	uint64_t zoned;
 	enum lzc_dataset_type ost;
 	zpool_handle_t *zpool_handle;
 	uint8_t *wkeydata = NULL;
 	uint_t wkeylen = 0;
 	char errbuf[ERRBUFLEN];
 	char parent[ZFS_MAX_DATASET_NAME_LEN];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), path);
 
 	/* validate the path, taking care to note the extended error message */
 	if (!zfs_validate_name(hdl, path, type, B_TRUE))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	if (dataset_nestcheck(path) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "maximum name nesting depth exceeded"));
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	}
 
 	/* validate parents exist */
 	if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0)
 		return (-1);
 
 	/*
 	 * The failure modes when creating a dataset of a different type over
 	 * one that already exists is a little strange.  In particular, if you
 	 * try to create a dataset on top of an existing dataset, the ioctl()
 	 * will return ENOENT, not EEXIST.  To prevent this from happening, we
 	 * first try to see if the dataset exists.
 	 */
 	if (zfs_dataset_exists(hdl, path, ZFS_TYPE_DATASET)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset already exists"));
 		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 	}
 
 	if (type == ZFS_TYPE_VOLUME)
 		ost = LZC_DATSET_TYPE_ZVOL;
 	else
 		ost = LZC_DATSET_TYPE_ZFS;
 
 	/* open zpool handle for prop validation */
 	char pool_path[ZFS_MAX_DATASET_NAME_LEN];
 	(void) strlcpy(pool_path, path, sizeof (pool_path));
 
 	/* truncate pool_path at first slash */
 	char *p = strchr(pool_path, '/');
 	if (p != NULL)
 		*p = '\0';
 
 	if ((zpool_handle = zpool_open(hdl, pool_path)) == NULL)
 		return (-1);
 
 	if (props && (props = zfs_valid_proplist(hdl, type, props,
 	    zoned, NULL, zpool_handle, B_TRUE, errbuf)) == 0) {
 		zpool_close(zpool_handle);
 		return (-1);
 	}
 	zpool_close(zpool_handle);
 
 	if (type == ZFS_TYPE_VOLUME) {
 		/*
 		 * If we are creating a volume, the size and block size must
 		 * satisfy a few restraints.  First, the blocksize must be a
 		 * valid block size between SPA_{MIN,MAX}BLOCKSIZE.  Second, the
 		 * volsize must be a multiple of the block size, and cannot be
 		 * zero.
 		 */
 		if (props == NULL || nvlist_lookup_uint64(props,
 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) {
 			nvlist_free(props);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "missing volume size"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 
 		if ((ret = nvlist_lookup_uint64(props,
 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 		    &blocksize)) != 0) {
 			if (ret == ENOENT) {
 				blocksize = zfs_prop_default_numeric(
 				    ZFS_PROP_VOLBLOCKSIZE);
 			} else {
 				nvlist_free(props);
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "missing volume block size"));
 				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 			}
 		}
 
 		if (size == 0) {
 			nvlist_free(props);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "volume size cannot be zero"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 
 		if (size % blocksize != 0) {
 			nvlist_free(props);
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "volume size must be a multiple of volume block "
 			    "size"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 		}
 	}
 
 	(void) parent_name(path, parent, sizeof (parent));
 	if (zfs_crypto_create(hdl, parent, props, NULL, B_TRUE,
 	    &wkeydata, &wkeylen) != 0) {
 		nvlist_free(props);
 		return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
 	}
 
 	/* create the dataset */
 	ret = lzc_create(path, ost, props, wkeydata, wkeylen);
 	nvlist_free(props);
 	if (wkeydata != NULL)
 		free(wkeydata);
 
 	/* check for failure */
 	if (ret != 0) {
 		switch (errno) {
 		case ENOENT:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "no such parent '%s'"), parent);
 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
 
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded to set this "
 			    "property or value"));
 			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
 
 		case EACCES:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "encryption root's key is not loaded "
 			    "or provided"));
 			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
 
 		case ERANGE:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "invalid property value(s) specified"));
 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
 #ifdef _ILP32
 		case EOVERFLOW:
 			/*
 			 * This platform can't address a volume this big.
 			 */
 			if (type == ZFS_TYPE_VOLUME)
 				return (zfs_error(hdl, EZFS_VOLTOOBIG,
 				    errbuf));
 			zfs_fallthrough;
 #endif
 		default:
 			return (zfs_standard_error(hdl, errno, errbuf));
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Destroys the given dataset.  The caller must make sure that the filesystem
  * isn't mounted, and that there are no active dependents. If the file system
  * does not exist this function does nothing.
  */
 int
 zfs_destroy(zfs_handle_t *zhp, boolean_t defer)
 {
 	int error;
 
 	if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT && defer)
 		return (EINVAL);
 
 	if (zhp->zfs_type == ZFS_TYPE_BOOKMARK) {
 		nvlist_t *nv = fnvlist_alloc();
 		fnvlist_add_boolean(nv, zhp->zfs_name);
 		error = lzc_destroy_bookmarks(nv, NULL);
 		fnvlist_free(nv);
 		if (error != 0) {
 			return (zfs_standard_error_fmt(zhp->zfs_hdl, error,
 			    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
 			    zhp->zfs_name));
 		}
 		return (0);
 	}
 
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
 		nvlist_t *nv = fnvlist_alloc();
 		fnvlist_add_boolean(nv, zhp->zfs_name);
 		error = lzc_destroy_snaps(nv, defer, NULL);
 		fnvlist_free(nv);
 	} else {
 		error = lzc_destroy(zhp->zfs_name);
 	}
 
 	if (error != 0 && error != ENOENT) {
 		return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
 		    zhp->zfs_name));
 	}
 
 	remove_mountpoint(zhp);
 
 	return (0);
 }
 
 struct destroydata {
 	nvlist_t *nvl;
 	const char *snapname;
 };
 
 static int
 zfs_check_snap_cb(zfs_handle_t *zhp, void *arg)
 {
 	struct destroydata *dd = arg;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	int rv = 0;
 
 	if (snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name,
 	    dd->snapname) >= sizeof (name))
 		return (EINVAL);
 
 	if (lzc_exists(name))
 		fnvlist_add_boolean(dd->nvl, name);
 
 	rv = zfs_iter_filesystems(zhp, zfs_check_snap_cb, dd);
 	zfs_close(zhp);
 	return (rv);
 }
 
 /*
  * Destroys all snapshots with the given name in zhp & descendants.
  */
 int
 zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
 {
 	int ret;
 	struct destroydata dd = { 0 };
 
 	dd.snapname = snapname;
 	dd.nvl = fnvlist_alloc();
 	(void) zfs_check_snap_cb(zfs_handle_dup(zhp), &dd);
 
 	if (nvlist_empty(dd.nvl)) {
 		ret = zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
 		    zhp->zfs_name, snapname);
 	} else {
 		ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer);
 	}
 	fnvlist_free(dd.nvl);
 	return (ret);
 }
 
 /*
  * Destroys all the snapshots named in the nvlist.
  */
 int
 zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
 {
 	nvlist_t *errlist = NULL;
 	nvpair_t *pair;
 
 	int ret = zfs_destroy_snaps_nvl_os(hdl, snaps);
 	if (ret != 0)
 		return (ret);
 
 	ret = lzc_destroy_snaps(snaps, defer, &errlist);
 
 	if (ret == 0) {
 		nvlist_free(errlist);
 		return (0);
 	}
 
 	if (nvlist_empty(errlist)) {
 		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
 
 		ret = zfs_standard_error(hdl, ret, errbuf);
 	}
 	for (pair = nvlist_next_nvpair(errlist, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
 		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
 		    nvpair_name(pair));
 
 		switch (fnvpair_value_int32(pair)) {
 		case EEXIST:
 			zfs_error_aux(hdl,
 			    dgettext(TEXT_DOMAIN, "snapshot is cloned"));
 			ret = zfs_error(hdl, EZFS_EXISTS, errbuf);
 			break;
 		default:
 			ret = zfs_standard_error(hdl, errno, errbuf);
 			break;
 		}
 	}
 
 	nvlist_free(errlist);
 	return (ret);
 }
 
 /*
  * Clones the given dataset.  The target must be of the same type as the source.
  */
 int
 zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
 {
 	char parent[ZFS_MAX_DATASET_NAME_LEN];
 	int ret;
 	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	uint64_t zoned;
 
 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), target);
 
 	/* validate the target/clone name */
 	if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	/* validate parents exist */
 	if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0)
 		return (-1);
 
 	(void) parent_name(target, parent, sizeof (parent));
 
 	/* do the clone */
 
 	if (props) {
 		zfs_type_t type = ZFS_TYPE_FILESYSTEM;
 
 		if (ZFS_IS_VOLUME(zhp))
 			type = ZFS_TYPE_VOLUME;
 		if ((props = zfs_valid_proplist(hdl, type, props, zoned,
 		    zhp, zhp->zpool_hdl, B_TRUE, errbuf)) == NULL)
 			return (-1);
 		if (zfs_fix_auto_resv(zhp, props) == -1) {
 			nvlist_free(props);
 			return (-1);
 		}
 	}
 
 	if (zfs_crypto_clone_check(hdl, zhp, parent, props) != 0) {
 		nvlist_free(props);
 		return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
 	}
 
 	ret = lzc_clone(target, zhp->zfs_name, props);
 	nvlist_free(props);
 
 	if (ret != 0) {
 		switch (errno) {
 
 		case ENOENT:
 			/*
 			 * The parent doesn't exist.  We should have caught this
 			 * above, but there may a race condition that has since
 			 * destroyed the parent.
 			 *
 			 * At this point, we don't know whether it's the source
 			 * that doesn't exist anymore, or whether the target
 			 * dataset doesn't exist.
 			 */
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "no such parent '%s'"), parent);
 			return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
 
 		case EXDEV:
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "source and target pools differ"));
 			return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
 			    errbuf));
 
 		default:
 			return (zfs_standard_error(zhp->zfs_hdl, errno,
 			    errbuf));
 		}
 	}
 
 	return (ret);
 }
 
 /*
  * Promotes the given clone fs to be the clone parent.
  */
 int
 zfs_promote(zfs_handle_t *zhp)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	int ret;
 	char errbuf[ERRBUFLEN];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot promote '%s'"), zhp->zfs_name);
 
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "snapshots can not be promoted"));
 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 	}
 
 	if (zhp->zfs_dmustats.dds_origin[0] == '\0') {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "not a cloned filesystem"));
 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 	}
 
 	if (!zfs_validate_name(hdl, zhp->zfs_name, zhp->zfs_type, B_TRUE))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	ret = lzc_promote(zhp->zfs_name, snapname, sizeof (snapname));
 
 	if (ret != 0) {
 		switch (ret) {
 		case EACCES:
 			/*
 			 * Promoting encrypted dataset outside its
 			 * encryption root.
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cannot promote dataset outside its "
 			    "encryption root"));
 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 
 		case EEXIST:
 			/* There is a conflicting snapshot name. */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "conflicting snapshot '%s' from parent '%s'"),
 			    snapname, zhp->zfs_dmustats.dds_origin);
 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
 
 		default:
 			return (zfs_standard_error(hdl, ret, errbuf));
 		}
 	}
 	return (ret);
 }
 
 typedef struct snapdata {
 	nvlist_t *sd_nvl;
 	const char *sd_snapname;
 } snapdata_t;
 
 static int
 zfs_snapshot_cb(zfs_handle_t *zhp, void *arg)
 {
 	snapdata_t *sd = arg;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	int rv = 0;
 
 	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) == 0) {
 		if (snprintf(name, sizeof (name), "%s@%s", zfs_get_name(zhp),
 		    sd->sd_snapname) >= sizeof (name))
 			return (EINVAL);
 
 		fnvlist_add_boolean(sd->sd_nvl, name);
 
 		rv = zfs_iter_filesystems(zhp, zfs_snapshot_cb, sd);
 	}
 	zfs_close(zhp);
 
 	return (rv);
 }
 
 /*
  * Creates snapshots.  The keys in the snaps nvlist are the snapshots to be
  * created.
  */
 int
 zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
 {
 	int ret;
 	char errbuf[ERRBUFLEN];
 	nvpair_t *elem;
 	nvlist_t *errors;
 	zpool_handle_t *zpool_hdl;
 	char pool[ZFS_MAX_DATASET_NAME_LEN];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot create snapshots "));
 
 	elem = NULL;
 	while ((elem = nvlist_next_nvpair(snaps, elem)) != NULL) {
 		const char *snapname = nvpair_name(elem);
 
 		/* validate the target name */
 		if (!zfs_validate_name(hdl, snapname, ZFS_TYPE_SNAPSHOT,
 		    B_TRUE)) {
 			(void) snprintf(errbuf, sizeof (errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "cannot create snapshot '%s'"), snapname);
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
 	}
 
 	/*
 	 * get pool handle for prop validation. assumes all snaps are in the
 	 * same pool, as does lzc_snapshot (below).
 	 */
 	elem = nvlist_next_nvpair(snaps, NULL);
 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 	pool[strcspn(pool, "/@")] = '\0';
 	zpool_hdl = zpool_open(hdl, pool);
 	if (zpool_hdl == NULL)
 		return (-1);
 
 	if (props != NULL &&
 	    (props = zfs_valid_proplist(hdl, ZFS_TYPE_SNAPSHOT,
 	    props, B_FALSE, NULL, zpool_hdl, B_FALSE, errbuf)) == NULL) {
 		zpool_close(zpool_hdl);
 		return (-1);
 	}
 	zpool_close(zpool_hdl);
 
 	ret = lzc_snapshot(snaps, props, &errors);
 
 	if (ret != 0) {
 		boolean_t printed = B_FALSE;
 		for (elem = nvlist_next_nvpair(errors, NULL);
 		    elem != NULL;
 		    elem = nvlist_next_nvpair(errors, elem)) {
 			(void) snprintf(errbuf, sizeof (errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "cannot create snapshot '%s'"), nvpair_name(elem));
 			(void) zfs_standard_error(hdl,
 			    fnvpair_value_int32(elem), errbuf);
 			printed = B_TRUE;
 		}
 		if (!printed) {
 			switch (ret) {
 			case EXDEV:
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "multiple snapshots of same "
 				    "fs not allowed"));
 				(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
 
 				break;
 			default:
 				(void) zfs_standard_error(hdl, ret, errbuf);
 			}
 		}
 	}
 
 	nvlist_free(props);
 	nvlist_free(errors);
 	return (ret);
 }
 
 int
 zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
     nvlist_t *props)
 {
 	int ret;
 	snapdata_t sd = { 0 };
 	char fsname[ZFS_MAX_DATASET_NAME_LEN];
 	char *cp;
 	zfs_handle_t *zhp;
 	char errbuf[ERRBUFLEN];
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot snapshot %s"), path);
 
 	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	(void) strlcpy(fsname, path, sizeof (fsname));
 	cp = strchr(fsname, '@');
 	*cp = '\0';
 	sd.sd_snapname = cp + 1;
 
 	if ((zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME)) == NULL) {
 		return (-1);
 	}
 
 	sd.sd_nvl = fnvlist_alloc();
 	if (recursive) {
 		(void) zfs_snapshot_cb(zfs_handle_dup(zhp), &sd);
 	} else {
 		fnvlist_add_boolean(sd.sd_nvl, path);
 	}
 
 	ret = zfs_snapshot_nvl(hdl, sd.sd_nvl, props);
 	fnvlist_free(sd.sd_nvl);
 	zfs_close(zhp);
 	return (ret);
 }
 
 /*
  * Destroy any more recent snapshots.  We invoke this callback on any dependents
  * of the snapshot first.  If the 'cb_dependent' member is non-zero, then this
  * is a dependent and we should just destroy it without checking the transaction
  * group.
  */
 typedef struct rollback_data {
 	const char	*cb_target;		/* the snapshot */
 	uint64_t	cb_create;		/* creation time reference */
 	boolean_t	cb_error;
 	boolean_t	cb_force;
 } rollback_data_t;
 
 static int
 rollback_destroy_dependent(zfs_handle_t *zhp, void *data)
 {
 	rollback_data_t *cbp = data;
 	prop_changelist_t *clp;
 
 	/* We must destroy this clone; first unmount it */
 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
 	    cbp->cb_force ? MS_FORCE: 0);
 	if (clp == NULL || changelist_prefix(clp) != 0) {
 		cbp->cb_error = B_TRUE;
 		zfs_close(zhp);
 		return (0);
 	}
 	if (zfs_destroy(zhp, B_FALSE) != 0)
 		cbp->cb_error = B_TRUE;
 	else
 		changelist_remove(clp, zhp->zfs_name);
 	(void) changelist_postfix(clp);
 	changelist_free(clp);
 
 	zfs_close(zhp);
 	return (0);
 }
 
 static int
 rollback_destroy(zfs_handle_t *zhp, void *data)
 {
 	rollback_data_t *cbp = data;
 
 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > cbp->cb_create) {
 		cbp->cb_error |= zfs_iter_dependents(zhp, B_FALSE,
 		    rollback_destroy_dependent, cbp);
 
 		cbp->cb_error |= zfs_destroy(zhp, B_FALSE);
 	}
 
 	zfs_close(zhp);
 	return (0);
 }
 
 /*
  * Given a dataset, rollback to a specific snapshot, discarding any
  * data changes since then and making it the active dataset.
  *
  * Any snapshots and bookmarks more recent than the target are
  * destroyed, along with their dependents (i.e. clones).
  */
 int
 zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
 {
 	rollback_data_t cb = { 0 };
 	int err;
 	boolean_t restore_resv = 0;
 	uint64_t old_volsize = 0, new_volsize;
 	zfs_prop_t resv_prop = { 0 };
 	uint64_t min_txg = 0;
 
 	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
 	    zhp->zfs_type == ZFS_TYPE_VOLUME);
 
 	/*
 	 * Destroy all recent snapshots and their dependents.
 	 */
 	cb.cb_force = force;
 	cb.cb_target = snap->zfs_name;
 	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
 
 	if (cb.cb_create > 0)
 		min_txg = cb.cb_create;
 
 	(void) zfs_iter_snapshots(zhp, B_FALSE, rollback_destroy, &cb,
 	    min_txg, 0);
 
 	(void) zfs_iter_bookmarks(zhp, rollback_destroy, &cb);
 
 	if (cb.cb_error)
 		return (-1);
 
 	/*
 	 * Now that we have verified that the snapshot is the latest,
 	 * rollback to the given snapshot.
 	 */
 
 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
 		if (zfs_which_resv_prop(zhp, &resv_prop) < 0)
 			return (-1);
 		old_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 		restore_resv =
 		    (old_volsize == zfs_prop_get_int(zhp, resv_prop));
 	}
 
 	/*
 	 * Pass both the filesystem and the wanted snapshot names,
 	 * we would get an error back if the snapshot is destroyed or
 	 * a new snapshot is created before this request is processed.
 	 */
 	err = lzc_rollback_to(zhp->zfs_name, snap->zfs_name);
 	if (err != 0) {
 		char errbuf[ERRBUFLEN];
 
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
 		    zhp->zfs_name);
 		switch (err) {
 		case EEXIST:
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "there is a snapshot or bookmark more recent "
 			    "than '%s'"), snap->zfs_name);
 			(void) zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf);
 			break;
 		case ESRCH:
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' is not found among snapshots of '%s'"),
 			    snap->zfs_name, zhp->zfs_name);
 			(void) zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf);
 			break;
 		case EINVAL:
 			(void) zfs_error(zhp->zfs_hdl, EZFS_BADTYPE, errbuf);
 			break;
 		default:
 			(void) zfs_standard_error(zhp->zfs_hdl, err, errbuf);
 		}
 		return (err);
 	}
 
 	/*
 	 * For volumes, if the pre-rollback volsize matched the pre-
 	 * rollback reservation and the volsize has changed then set
 	 * the reservation property to the post-rollback volsize.
 	 * Make a new handle since the rollback closed the dataset.
 	 */
 	if ((zhp->zfs_type == ZFS_TYPE_VOLUME) &&
 	    (zhp = make_dataset_handle(zhp->zfs_hdl, zhp->zfs_name))) {
 		if (restore_resv) {
 			new_volsize = zfs_prop_get_int(zhp, ZFS_PROP_VOLSIZE);
 			if (old_volsize != new_volsize)
 				err = zfs_prop_set_int(zhp, resv_prop,
 				    new_volsize);
 		}
 		zfs_close(zhp);
 	}
 	return (err);
 }
 
 /*
  * Renames the given dataset.
  */
 int
 zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags)
 {
 	int ret = 0;
 	zfs_cmd_t zc = {"\0"};
 	char *delim;
 	prop_changelist_t *cl = NULL;
 	char parent[ZFS_MAX_DATASET_NAME_LEN];
 	char property[ZFS_MAXPROPLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[ERRBUFLEN];
 
 	/* if we have the same exact name, just return success */
 	if (strcmp(zhp->zfs_name, target) == 0)
 		return (0);
 
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot rename to '%s'"), target);
 
 	/* make sure source name is valid */
 	if (!zfs_validate_name(hdl, zhp->zfs_name, zhp->zfs_type, B_TRUE))
 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 	/*
 	 * Make sure the target name is valid
 	 */
 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
 		if ((strchr(target, '@') == NULL) ||
 		    *target == '@') {
 			/*
 			 * Snapshot target name is abbreviated,
 			 * reconstruct full dataset name
 			 */
 			(void) strlcpy(parent, zhp->zfs_name,
 			    sizeof (parent));
 			delim = strchr(parent, '@');
 			if (strchr(target, '@') == NULL)
 				*(++delim) = '\0';
 			else
 				*delim = '\0';
 			(void) strlcat(parent, target, sizeof (parent));
 			target = parent;
 		} else {
 			/*
 			 * Make sure we're renaming within the same dataset.
 			 */
 			delim = strchr(target, '@');
 			if (strncmp(zhp->zfs_name, target, delim - target)
 			    != 0 || zhp->zfs_name[delim - target] != '@') {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "snapshots must be part of same "
 				    "dataset"));
 				return (zfs_error(hdl, EZFS_CROSSTARGET,
 				    errbuf));
 			}
 		}
 
 		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 	} else {
 		if (flags.recursive) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "recursive rename must be a snapshot"));
 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
 		}
 
 		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 
 		/* validate parents */
 		if (check_parents(hdl, target, NULL, B_FALSE, NULL) != 0)
 			return (-1);
 
 		/* make sure we're in the same pool */
 		verify((delim = strchr(target, '/')) != NULL);
 		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
 		    zhp->zfs_name[delim - target] != '/') {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "datasets must be within same pool"));
 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 		}
 
 		/* new name cannot be a child of the current dataset name */
 		if (is_descendant(zhp->zfs_name, target)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "New dataset name cannot be a descendant of "
 			    "current dataset name"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
 		}
 	}
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
 
 	if (getzoneid() == GLOBAL_ZONEID &&
 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dataset is used in a non-global zone"));
 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
 	}
 
 	/*
 	 * Avoid unmounting file systems with mountpoint property set to
 	 * 'legacy' or 'none' even if -u option is not given.
 	 */
 	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
 	    !flags.recursive && !flags.nounmount &&
 	    zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, property,
 	    sizeof (property), NULL, NULL, 0, B_FALSE) == 0 &&
 	    (strcmp(property, "legacy") == 0 ||
 	    strcmp(property, "none") == 0)) {
 		flags.nounmount = B_TRUE;
 	}
 	if (flags.recursive) {
 		char *parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
 		delim = strchr(parentname, '@');
 		*delim = '\0';
 		zfs_handle_t *zhrp = zfs_open(zhp->zfs_hdl, parentname,
 		    ZFS_TYPE_DATASET);
 		free(parentname);
 		if (zhrp == NULL) {
 			ret = -1;
 			goto error;
 		}
 		zfs_close(zhrp);
 	} else if (zhp->zfs_type != ZFS_TYPE_SNAPSHOT) {
 		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME,
 		    flags.nounmount ? CL_GATHER_DONT_UNMOUNT :
 		    CL_GATHER_ITER_MOUNTED,
 		    flags.forceunmount ? MS_FORCE : 0)) == NULL)
 			return (-1);
 
 		if (changelist_haszonedchild(cl)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "child dataset with inherited mountpoint is used "
 			    "in a non-global zone"));
 			(void) zfs_error(hdl, EZFS_ZONED, errbuf);
 			ret = -1;
 			goto error;
 		}
 
 		if ((ret = changelist_prefix(cl)) != 0)
 			goto error;
 	}
 
 	if (ZFS_IS_VOLUME(zhp))
 		zc.zc_objset_type = DMU_OST_ZVOL;
 	else
 		zc.zc_objset_type = DMU_OST_ZFS;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
 
 	zc.zc_cookie = !!flags.recursive;
 	zc.zc_cookie |= (!!flags.nounmount) << 1;
 
 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) {
 		/*
 		 * if it was recursive, the one that actually failed will
 		 * be in zc.zc_name
 		 */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot rename '%s'"), zc.zc_name);
 
 		if (flags.recursive && errno == EEXIST) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "a child dataset already has a snapshot "
 			    "with the new name"));
 			(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
 		} else if (errno == EACCES) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cannot move encrypted child outside of "
 			    "its encryption root"));
 			(void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
 		} else {
 			(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
 		}
 
 		/*
 		 * On failure, we still want to remount any filesystems that
 		 * were previously mounted, so we don't alter the system state.
 		 */
 		if (cl != NULL)
 			(void) changelist_postfix(cl);
 	} else {
 		if (cl != NULL) {
 			changelist_rename(cl, zfs_get_name(zhp), target);
 			ret = changelist_postfix(cl);
 		}
 	}
 
 error:
 	if (cl != NULL) {
 		changelist_free(cl);
 	}
 	return (ret);
 }
 
 nvlist_t *
 zfs_get_all_props(zfs_handle_t *zhp)
 {
 	return (zhp->zfs_props);
 }
 
 nvlist_t *
 zfs_get_recvd_props(zfs_handle_t *zhp)
 {
 	if (zhp->zfs_recvd_props == NULL)
 		if (get_recvd_props_ioctl(zhp) != 0)
 			return (NULL);
 	return (zhp->zfs_recvd_props);
 }
 
 nvlist_t *
 zfs_get_user_props(zfs_handle_t *zhp)
 {
 	return (zhp->zfs_user_props);
 }
 
 /*
  * This function is used by 'zfs list' to determine the exact set of columns to
  * display, and their maximum widths.  This does two main things:
  *
  *      - If this is a list of all properties, then expand the list to include
  *        all native properties, and set a flag so that for each dataset we look
  *        for new unique user properties and add them to the list.
  *
  *      - For non fixed-width properties, keep track of the maximum width seen
  *        so that we can size the column appropriately. If the user has
  *        requested received property values, we also need to compute the width
  *        of the RECEIVED column.
  */
 int
 zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
     boolean_t literal)
 {
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	zprop_list_t *entry;
 	zprop_list_t **last, **start;
 	nvlist_t *userprops, *propval;
 	nvpair_t *elem;
 	char *strval;
 	char buf[ZFS_MAXPROPLEN];
 
 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0)
 		return (-1);
 
 	userprops = zfs_get_user_props(zhp);
 
 	entry = *plp;
 	if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) {
 		/*
 		 * Go through and add any user properties as necessary.  We
 		 * start by incrementing our list pointer to the first
 		 * non-native property.
 		 */
 		start = plp;
 		while (*start != NULL) {
 			if ((*start)->pl_prop == ZPROP_USERPROP)
 				break;
 			start = &(*start)->pl_next;
 		}
 
 		elem = NULL;
 		while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) {
 			/*
 			 * See if we've already found this property in our list.
 			 */
 			for (last = start; *last != NULL;
 			    last = &(*last)->pl_next) {
 				if (strcmp((*last)->pl_user_prop,
 				    nvpair_name(elem)) == 0)
 					break;
 			}
 
 			if (*last == NULL) {
 				entry = zfs_alloc(hdl, sizeof (zprop_list_t));
 				entry->pl_user_prop =
 				    zfs_strdup(hdl, nvpair_name(elem));
 				entry->pl_prop = ZPROP_USERPROP;
 				entry->pl_width = strlen(nvpair_name(elem));
 				entry->pl_all = B_TRUE;
 				*last = entry;
 			}
 		}
 	}
 
 	/*
 	 * Now go through and check the width of any non-fixed columns
 	 */
 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
 		if (entry->pl_fixed && !literal)
 			continue;
 
 		if (entry->pl_prop != ZPROP_USERPROP) {
 			if (zfs_prop_get(zhp, entry->pl_prop,
 			    buf, sizeof (buf), NULL, NULL, 0, literal) == 0) {
 				if (strlen(buf) > entry->pl_width)
 					entry->pl_width = strlen(buf);
 			}
 			if (received && zfs_prop_get_recvd(zhp,
 			    zfs_prop_to_name(entry->pl_prop),
 			    buf, sizeof (buf), literal) == 0)
 				if (strlen(buf) > entry->pl_recvd_width)
 					entry->pl_recvd_width = strlen(buf);
 		} else {
 			if (nvlist_lookup_nvlist(userprops, entry->pl_user_prop,
 			    &propval) == 0) {
 				strval = fnvlist_lookup_string(propval,
 				    ZPROP_VALUE);
 				if (strlen(strval) > entry->pl_width)
 					entry->pl_width = strlen(strval);
 			}
 			if (received && zfs_prop_get_recvd(zhp,
 			    entry->pl_user_prop,
 			    buf, sizeof (buf), literal) == 0)
 				if (strlen(buf) > entry->pl_recvd_width)
 					entry->pl_recvd_width = strlen(buf);
 		}
 	}
 
 	return (0);
 }
 
 void
 zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
 {
 	nvpair_t *curr;
 	nvpair_t *next;
 
 	/*
 	 * Keep a reference to the props-table against which we prune the
 	 * properties.
 	 */
 	zhp->zfs_props_table = props;
 
 	curr = nvlist_next_nvpair(zhp->zfs_props, NULL);
 
 	while (curr) {
 		zfs_prop_t zfs_prop = zfs_name_to_prop(nvpair_name(curr));
 		next = nvlist_next_nvpair(zhp->zfs_props, curr);
 
 		/*
 		 * User properties will result in ZPROP_USERPROP (an alias
 		 * for ZPROP_INVAL), and since we
 		 * only know how to prune standard ZFS properties, we always
 		 * leave these in the list.  This can also happen if we
 		 * encounter an unknown DSL property (when running older
 		 * software, for example).
 		 */
 		if (zfs_prop != ZPROP_USERPROP && props[zfs_prop] == B_FALSE)
 			(void) nvlist_remove(zhp->zfs_props,
 			    nvpair_name(curr), nvpair_type(curr));
 		curr = next;
 	}
 }
 
 static int
 zfs_smb_acl_mgmt(libzfs_handle_t *hdl, char *dataset, char *path,
     zfs_smb_acl_op_t cmd, char *resource1, char *resource2)
 {
 	zfs_cmd_t zc = {"\0"};
 	nvlist_t *nvlist = NULL;
 	int error;
 
 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 	(void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value));
 	zc.zc_cookie = (uint64_t)cmd;
 
 	if (cmd == ZFS_SMB_ACL_RENAME) {
 		if (nvlist_alloc(&nvlist, NV_UNIQUE_NAME, 0) != 0) {
 			(void) no_memory(hdl);
 			return (0);
 		}
 	}
 
 	switch (cmd) {
 	case ZFS_SMB_ACL_ADD:
 	case ZFS_SMB_ACL_REMOVE:
 		(void) strlcpy(zc.zc_string, resource1, sizeof (zc.zc_string));
 		break;
 	case ZFS_SMB_ACL_RENAME:
 		if (nvlist_add_string(nvlist, ZFS_SMB_ACL_SRC,
 		    resource1) != 0) {
 				(void) no_memory(hdl);
 				return (-1);
 		}
 		if (nvlist_add_string(nvlist, ZFS_SMB_ACL_TARGET,
 		    resource2) != 0) {
 				(void) no_memory(hdl);
 				return (-1);
 		}
 		zcmd_write_src_nvlist(hdl, &zc, nvlist);
 		break;
 	case ZFS_SMB_ACL_PURGE:
 		break;
 	default:
 		return (-1);
 	}
 	error = ioctl(hdl->libzfs_fd, ZFS_IOC_SMB_ACL, &zc);
 	nvlist_free(nvlist);
 	return (error);
 }
 
 int
 zfs_smb_acl_add(libzfs_handle_t *hdl, char *dataset,
     char *path, char *resource)
 {
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_ADD,
 	    resource, NULL));
 }
 
 int
 zfs_smb_acl_remove(libzfs_handle_t *hdl, char *dataset,
     char *path, char *resource)
 {
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_REMOVE,
 	    resource, NULL));
 }
 
 int
 zfs_smb_acl_purge(libzfs_handle_t *hdl, char *dataset, char *path)
 {
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_PURGE,
 	    NULL, NULL));
 }
 
 int
 zfs_smb_acl_rename(libzfs_handle_t *hdl, char *dataset, char *path,
     char *oldname, char *newname)
 {
 	return (zfs_smb_acl_mgmt(hdl, dataset, path, ZFS_SMB_ACL_RENAME,
 	    oldname, newname));
 }
 
 int
 zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
     zfs_userspace_cb_t func, void *arg)
 {
 	zfs_cmd_t zc = {"\0"};
 	zfs_useracct_t buf[100];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	int ret;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	zc.zc_objset_type = type;
 	zc.zc_nvlist_dst = (uintptr_t)buf;
 
 	for (;;) {
 		zfs_useracct_t *zua = buf;
 
 		zc.zc_nvlist_dst_size = sizeof (buf);
 		if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) {
 			if ((errno == ENOTSUP &&
 			    (type == ZFS_PROP_USEROBJUSED ||
 			    type == ZFS_PROP_GROUPOBJUSED ||
 			    type == ZFS_PROP_USEROBJQUOTA ||
 			    type == ZFS_PROP_GROUPOBJQUOTA ||
 			    type == ZFS_PROP_PROJECTOBJUSED ||
 			    type == ZFS_PROP_PROJECTOBJQUOTA ||
 			    type == ZFS_PROP_PROJECTUSED ||
 			    type == ZFS_PROP_PROJECTQUOTA)))
 				break;
 
 			return (zfs_standard_error_fmt(hdl, errno,
 			    dgettext(TEXT_DOMAIN,
 			    "cannot get used/quota for %s"), zc.zc_name));
 		}
 		if (zc.zc_nvlist_dst_size == 0)
 			break;
 
 		while (zc.zc_nvlist_dst_size > 0) {
 			if ((ret = func(arg, zua->zu_domain, zua->zu_rid,
 			    zua->zu_space)) != 0)
 				return (ret);
 			zua++;
 			zc.zc_nvlist_dst_size -= sizeof (zfs_useracct_t);
 		}
 	}
 
 	return (0);
 }
 
 struct holdarg {
 	nvlist_t *nvl;
 	const char *snapname;
 	const char *tag;
 	boolean_t recursive;
 	int error;
 };
 
 static int
 zfs_hold_one(zfs_handle_t *zhp, void *arg)
 {
 	struct holdarg *ha = arg;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	int rv = 0;
 
 	if (snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name,
 	    ha->snapname) >= sizeof (name))
 		return (EINVAL);
 
 	if (lzc_exists(name))
 		fnvlist_add_string(ha->nvl, name, ha->tag);
 
 	if (ha->recursive)
 		rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
 	zfs_close(zhp);
 	return (rv);
 }
 
 int
 zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
     boolean_t recursive, int cleanup_fd)
 {
 	int ret;
 	struct holdarg ha;
 
 	ha.nvl = fnvlist_alloc();
 	ha.snapname = snapname;
 	ha.tag = tag;
 	ha.recursive = recursive;
 	(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
 
 	if (nvlist_empty(ha.nvl)) {
 		char errbuf[ERRBUFLEN];
 
 		fnvlist_free(ha.nvl);
 		ret = ENOENT;
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "cannot hold snapshot '%s@%s'"),
 		    zhp->zfs_name, snapname);
 		(void) zfs_standard_error(zhp->zfs_hdl, ret, errbuf);
 		return (ret);
 	}
 
 	ret = zfs_hold_nvl(zhp, cleanup_fd, ha.nvl);
 	fnvlist_free(ha.nvl);
 
 	return (ret);
 }
 
 int
 zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
 {
 	int ret;
 	nvlist_t *errors;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[ERRBUFLEN];
 	nvpair_t *elem;
 
 	errors = NULL;
 	ret = lzc_hold(holds, cleanup_fd, &errors);
 
 	if (ret == 0) {
 		/* There may be errors even in the success case. */
 		fnvlist_free(errors);
 		return (0);
 	}
 
 	if (nvlist_empty(errors)) {
 		/* no hold-specific errors */
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot hold"));
 		switch (ret) {
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EINVAL:
 			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		default:
 			(void) zfs_standard_error(hdl, ret, errbuf);
 		}
 	}
 
 	for (elem = nvlist_next_nvpair(errors, NULL);
 	    elem != NULL;
 	    elem = nvlist_next_nvpair(errors, elem)) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "cannot hold snapshot '%s'"), nvpair_name(elem));
 		switch (fnvpair_value_int32(elem)) {
 		case E2BIG:
 			/*
 			 * Temporary tags wind up having the ds object id
 			 * prepended. So even if we passed the length check
 			 * above, it's still possible for the tag to wind
 			 * up being slightly too long.
 			 */
 			(void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf);
 			break;
 		case EINVAL:
 			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		case EEXIST:
 			(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
 			break;
 		default:
 			(void) zfs_standard_error(hdl,
 			    fnvpair_value_int32(elem), errbuf);
 		}
 	}
 
 	fnvlist_free(errors);
 	return (ret);
 }
 
 static int
 zfs_release_one(zfs_handle_t *zhp, void *arg)
 {
 	struct holdarg *ha = arg;
 	char name[ZFS_MAX_DATASET_NAME_LEN];
 	int rv = 0;
 	nvlist_t *existing_holds;
 
 	if (snprintf(name, sizeof (name), "%s@%s", zhp->zfs_name,
 	    ha->snapname) >= sizeof (name)) {
 		ha->error = EINVAL;
 		rv = EINVAL;
 	}
 
 	if (lzc_get_holds(name, &existing_holds) != 0) {
 		ha->error = ENOENT;
 	} else if (!nvlist_exists(existing_holds, ha->tag)) {
 		ha->error = ESRCH;
 	} else {
 		nvlist_t *torelease = fnvlist_alloc();
 		fnvlist_add_boolean(torelease, ha->tag);
 		fnvlist_add_nvlist(ha->nvl, name, torelease);
 		fnvlist_free(torelease);
 	}
 
 	if (ha->recursive)
 		rv = zfs_iter_filesystems(zhp, zfs_release_one, ha);
 	zfs_close(zhp);
 	return (rv);
 }
 
 int
 zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
     boolean_t recursive)
 {
 	int ret;
 	struct holdarg ha;
 	nvlist_t *errors = NULL;
 	nvpair_t *elem;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char errbuf[ERRBUFLEN];
 
 	ha.nvl = fnvlist_alloc();
 	ha.snapname = snapname;
 	ha.tag = tag;
 	ha.recursive = recursive;
 	ha.error = 0;
 	(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
 
 	if (nvlist_empty(ha.nvl)) {
 		fnvlist_free(ha.nvl);
 		ret = ha.error;
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "cannot release hold from snapshot '%s@%s'"),
 		    zhp->zfs_name, snapname);
 		if (ret == ESRCH) {
 			(void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf);
 		} else {
 			(void) zfs_standard_error(hdl, ret, errbuf);
 		}
 		return (ret);
 	}
 
 	ret = lzc_release(ha.nvl, &errors);
 	fnvlist_free(ha.nvl);
 
 	if (ret == 0) {
 		/* There may be errors even in the success case. */
 		fnvlist_free(errors);
 		return (0);
 	}
 
 	if (nvlist_empty(errors)) {
 		/* no hold-specific errors */
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot release"));
 		switch (errno) {
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		default:
 			(void) zfs_standard_error(hdl, errno, errbuf);
 		}
 	}
 
 	for (elem = nvlist_next_nvpair(errors, NULL);
 	    elem != NULL;
 	    elem = nvlist_next_nvpair(errors, elem)) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "cannot release hold from snapshot '%s'"),
 		    nvpair_name(elem));
 		switch (fnvpair_value_int32(elem)) {
 		case ESRCH:
 			(void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf);
 			break;
 		case EINVAL:
 			(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		default:
 			(void) zfs_standard_error(hdl,
 			    fnvpair_value_int32(elem), errbuf);
 		}
 	}
 
 	fnvlist_free(errors);
 	return (ret);
 }
 
 int
 zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
 {
 	zfs_cmd_t zc = {"\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	int nvsz = 2048;
 	void *nvbuf;
 	int err = 0;
 	char errbuf[ERRBUFLEN];
 
 	assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
 	    zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
 
 tryagain:
 
 	nvbuf = malloc(nvsz);
 	if (nvbuf == NULL) {
 		err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
 		goto out;
 	}
 
 	zc.zc_nvlist_dst_size = nvsz;
 	zc.zc_nvlist_dst = (uintptr_t)nvbuf;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (zfs_ioctl(hdl, ZFS_IOC_GET_FSACL, &zc) != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot get permissions on '%s'"),
 		    zc.zc_name);
 		switch (errno) {
 		case ENOMEM:
 			free(nvbuf);
 			nvsz = zc.zc_nvlist_dst_size;
 			goto tryagain;
 
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EINVAL:
 			err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		case ENOENT:
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
 			err = zfs_standard_error(hdl, errno, errbuf);
 			break;
 		}
 	} else {
 		/* success */
 		int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
 		if (rc) {
 			err = zfs_standard_error_fmt(hdl, rc, dgettext(
 			    TEXT_DOMAIN, "cannot get permissions on '%s'"),
 			    zc.zc_name);
 		}
 	}
 
 	free(nvbuf);
 out:
 	return (err);
 }
 
 int
 zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
 {
 	zfs_cmd_t zc = {"\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char *nvbuf;
 	char errbuf[ERRBUFLEN];
 	size_t nvsz;
 	int err;
 
 	assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
 	    zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
 
 	err = nvlist_size(nvl, &nvsz, NV_ENCODE_NATIVE);
 	assert(err == 0);
 
 	nvbuf = malloc(nvsz);
 
 	err = nvlist_pack(nvl, &nvbuf, &nvsz, NV_ENCODE_NATIVE, 0);
 	assert(err == 0);
 
 	zc.zc_nvlist_src_size = nvsz;
 	zc.zc_nvlist_src = (uintptr_t)nvbuf;
 	zc.zc_perm_action = un;
 
 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 
 	if (zfs_ioctl(hdl, ZFS_IOC_SET_FSACL, &zc) != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot set permissions on '%s'"),
 		    zc.zc_name);
 		switch (errno) {
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EINVAL:
 			err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		case ENOENT:
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
 			err = zfs_standard_error(hdl, errno, errbuf);
 			break;
 		}
 	}
 
 	free(nvbuf);
 
 	return (err);
 }
 
 int
 zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
 {
 	int err;
 	char errbuf[ERRBUFLEN];
 
 	err = lzc_get_holds(zhp->zfs_name, nvl);
 
 	if (err != 0) {
 		libzfs_handle_t *hdl = zhp->zfs_hdl;
 
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
 		    zhp->zfs_name);
 		switch (err) {
 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded"));
 			err = zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;
 		case EINVAL:
 			err = zfs_error(hdl, EZFS_BADTYPE, errbuf);
 			break;
 		case ENOENT:
 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
 			break;
 		default:
 			err = zfs_standard_error(hdl, errno, errbuf);
 			break;
 		}
 	}
 
 	return (err);
 }
 
 /*
  * The theory of raidz space accounting
  *
  * The "referenced" property of RAIDZ vdevs is scaled such that a 128KB block
  * will "reference" 128KB, even though it allocates more than that, to store the
  * parity information (and perhaps skip sectors). This concept of the
  * "referenced" (and other DMU space accounting) being lower than the allocated
  * space by a constant factor is called "raidz deflation."
  *
  * As mentioned above, the constant factor for raidz deflation assumes a 128KB
  * block size. However, zvols typically have a much smaller block size (default
  * 8KB). These smaller blocks may require proportionally much more parity
  * information (and perhaps skip sectors). In this case, the change to the
  * "referenced" property may be much more than the logical block size.
  *
  * Suppose a raidz vdev has 5 disks with ashift=12.  A 128k block may be written
  * as follows.
  *
  * +-------+-------+-------+-------+-------+
  * | disk1 | disk2 | disk3 | disk4 | disk5 |
  * +-------+-------+-------+-------+-------+
  * |  P0   |  D0   |  D8   |  D16  |  D24  |
  * |  P1   |  D1   |  D9   |  D17  |  D25  |
  * |  P2   |  D2   |  D10  |  D18  |  D26  |
  * |  P3   |  D3   |  D11  |  D19  |  D27  |
  * |  P4   |  D4   |  D12  |  D20  |  D28  |
  * |  P5   |  D5   |  D13  |  D21  |  D29  |
  * |  P6   |  D6   |  D14  |  D22  |  D30  |
  * |  P7   |  D7   |  D15  |  D23  |  D31  |
  * +-------+-------+-------+-------+-------+
  *
  * Above, notice that 160k was allocated: 8 x 4k parity sectors + 32 x 4k data
  * sectors.  The dataset's referenced will increase by 128k and the pool's
  * allocated and free properties will be adjusted by 160k.
  *
  * A 4k block written to the same raidz vdev will require two 4k sectors.  The
  * blank cells represent unallocated space.
  *
  * +-------+-------+-------+-------+-------+
  * | disk1 | disk2 | disk3 | disk4 | disk5 |
  * +-------+-------+-------+-------+-------+
  * |  P0   |  D0   |       |       |       |
  * +-------+-------+-------+-------+-------+
  *
  * Above, notice that the 4k block required one sector for parity and another
  * for data.  vdev_raidz_asize() will return 8k and as such the pool's allocated
  * and free properties will be adjusted by 8k.  The dataset will not be charged
  * 8k.  Rather, it will be charged a value that is scaled according to the
  * overhead of the 128k block on the same vdev.  This 8k allocation will be
  * charged 8k * 128k / 160k.  128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as
  * calculated in the 128k block example above.
  *
  * Every raidz allocation is sized to be a multiple of nparity+1 sectors.  That
  * is, every raidz1 allocation will be a multiple of 2 sectors, raidz2
  * allocations are a multiple of 3 sectors, and raidz3 allocations are a
  * multiple of of 4 sectors.  When a block does not fill the required number of
  * sectors, skip blocks (sectors) are used.
  *
  * An 8k block being written to a raidz vdev may be written as follows:
  *
  * +-------+-------+-------+-------+-------+
  * | disk1 | disk2 | disk3 | disk4 | disk5 |
  * +-------+-------+-------+-------+-------+
  * |  P0   |  D0   |  D1   |  S0   |       |
  * +-------+-------+-------+-------+-------+
  *
  * In order to maintain the nparity+1 allocation size, a skip block (S0) was
  * added.  For this 8k block, the pool's allocated and free properties are
  * adjusted by 16k and the dataset's referenced is increased by 16k * 128k /
  * 160k.  Again, 128k is from SPA_OLD_MAXBLOCKSIZE and 160k is as calculated in
  * the 128k block example above.
  *
  * The situation is slightly different for dRAID since the minimum allocation
  * size is the full group width.  The same 8K block above would be written as
  * follows in a dRAID group:
  *
  * +-------+-------+-------+-------+-------+
  * | disk1 | disk2 | disk3 | disk4 | disk5 |
  * +-------+-------+-------+-------+-------+
  * |  P0   |  D0   |  D1   |  S0   |  S1   |
  * +-------+-------+-------+-------+-------+
  *
  * Compression may lead to a variety of block sizes being written for the same
  * volume or file.  There is no clear way to reserve just the amount of space
  * that will be required, so the worst case (no compression) is assumed.
  * Note that metadata blocks will typically be compressed, so the reservation
  * size returned by zvol_volsize_to_reservation() will generally be slightly
  * larger than the maximum that the volume can reference.
  */
 
 /*
  * Derived from function of same name in module/zfs/vdev_raidz.c.  Returns the
  * amount of space (in bytes) that will be allocated for the specified block
  * size. Note that the "referenced" space accounted will be less than this, but
  * not necessarily equal to "blksize", due to RAIDZ deflation.
  */
 static uint64_t
 vdev_raidz_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift,
     uint64_t blksize)
 {
 	uint64_t asize, ndata;
 
 	ASSERT3U(ndisks, >, nparity);
 	ndata = ndisks - nparity;
 	asize = ((blksize - 1) >> ashift) + 1;
 	asize += nparity * ((asize + ndata - 1) / ndata);
 	asize = roundup(asize, nparity + 1) << ashift;
 
 	return (asize);
 }
 
 /*
  * Derived from function of same name in module/zfs/vdev_draid.c.  Returns the
  * amount of space (in bytes) that will be allocated for the specified block
  * size.
  */
 static uint64_t
 vdev_draid_asize(uint64_t ndisks, uint64_t nparity, uint64_t ashift,
     uint64_t blksize)
 {
 	ASSERT3U(ndisks, >, nparity);
 	uint64_t ndata = ndisks - nparity;
 	uint64_t rows = ((blksize - 1) / (ndata << ashift)) + 1;
 	uint64_t asize = (rows * ndisks) << ashift;
 
 	return (asize);
 }
 
 /*
  * Determine how much space will be allocated if it lands on the most space-
  * inefficient top-level vdev.  Returns the size in bytes required to store one
  * copy of the volume data.  See theory comment above.
  */
 static uint64_t
 volsize_from_vdevs(zpool_handle_t *zhp, uint64_t nblocks, uint64_t blksize)
 {
 	nvlist_t *config, *tree, **vdevs;
 	uint_t nvdevs;
 	uint64_t ret = 0;
 
 	config = zpool_get_config(zhp, NULL);
 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &tree) != 0 ||
 	    nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN,
 	    &vdevs, &nvdevs) != 0) {
 		return (nblocks * blksize);
 	}
 
 	for (int v = 0; v < nvdevs; v++) {
 		char *type;
 		uint64_t nparity, ashift, asize, tsize;
 		uint64_t volsize;
 
 		if (nvlist_lookup_string(vdevs[v], ZPOOL_CONFIG_TYPE,
 		    &type) != 0)
 			continue;
 
 		if (strcmp(type, VDEV_TYPE_RAIDZ) != 0 &&
 		    strcmp(type, VDEV_TYPE_DRAID) != 0)
 			continue;
 
 		if (nvlist_lookup_uint64(vdevs[v],
 		    ZPOOL_CONFIG_NPARITY, &nparity) != 0)
 			continue;
 
 		if (nvlist_lookup_uint64(vdevs[v],
 		    ZPOOL_CONFIG_ASHIFT, &ashift) != 0)
 			continue;
 
 		if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
 			nvlist_t **disks;
 			uint_t ndisks;
 
 			if (nvlist_lookup_nvlist_array(vdevs[v],
 			    ZPOOL_CONFIG_CHILDREN, &disks, &ndisks) != 0)
 				continue;
 
 			/* allocation size for the "typical" 128k block */
 			tsize = vdev_raidz_asize(ndisks, nparity, ashift,
 			    SPA_OLD_MAXBLOCKSIZE);
 
 			/* allocation size for the blksize block */
 			asize = vdev_raidz_asize(ndisks, nparity, ashift,
 			    blksize);
 		} else {
 			uint64_t ndata;
 
 			if (nvlist_lookup_uint64(vdevs[v],
 			    ZPOOL_CONFIG_DRAID_NDATA, &ndata) != 0)
 				continue;
 
 			/* allocation size for the "typical" 128k block */
 			tsize = vdev_draid_asize(ndata + nparity, nparity,
 			    ashift, SPA_OLD_MAXBLOCKSIZE);
 
 			/* allocation size for the blksize block */
 			asize = vdev_draid_asize(ndata + nparity, nparity,
 			    ashift, blksize);
 		}
 
 		/*
 		 * Scale this size down as a ratio of 128k / tsize.
 		 * See theory statement above.
 		 */
 		volsize = nblocks * asize * SPA_OLD_MAXBLOCKSIZE / tsize;
 		if (volsize > ret) {
 			ret = volsize;
 		}
 	}
 
 	if (ret == 0) {
 		ret = nblocks * blksize;
 	}
 
 	return (ret);
 }
 
 /*
  * Convert the zvol's volume size to an appropriate reservation.  See theory
  * comment above.
  *
  * Note: If this routine is updated, it is necessary to update the ZFS test
  * suite's shell version in reservation.shlib.
  */
 uint64_t
 zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize,
     nvlist_t *props)
 {
 	uint64_t numdb;
 	uint64_t nblocks, volblocksize;
 	int ncopies;
 	char *strval;
 
 	if (nvlist_lookup_string(props,
 	    zfs_prop_to_name(ZFS_PROP_COPIES), &strval) == 0)
 		ncopies = atoi(strval);
 	else
 		ncopies = 1;
 	if (nvlist_lookup_uint64(props,
 	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 	    &volblocksize) != 0)
 		volblocksize = ZVOL_DEFAULT_BLOCKSIZE;
 
 	nblocks = volsize / volblocksize;
 	/*
 	 * Metadata defaults to using 128k blocks, not volblocksize blocks.  For
 	 * this reason, only the data blocks are scaled based on vdev config.
 	 */
 	volsize = volsize_from_vdevs(zph, nblocks, volblocksize);
 
 	/* start with metadnode L0-L6 */
 	numdb = 7;
 	/* calculate number of indirects */
 	while (nblocks > 1) {
 		nblocks += DNODES_PER_LEVEL - 1;
 		nblocks /= DNODES_PER_LEVEL;
 		numdb += nblocks;
 	}
 	numdb *= MIN(SPA_DVAS_PER_BP, ncopies + 1);
 	volsize *= ncopies;
 	/*
 	 * this is exactly DN_MAX_INDBLKSHIFT when metadata isn't
 	 * compressed, but in practice they compress down to about
 	 * 1100 bytes
 	 */
 	numdb *= 1ULL << DN_MAX_INDBLKSHIFT;
 	volsize += numdb;
 	return (volsize);
 }
 
 /*
  * Wait for the given activity and return the status of the wait (whether or not
  * any waiting was done) in the 'waited' parameter. Non-existent fses are
  * reported via the 'missing' parameter, rather than by printing an error
  * message. This is convenient when this function is called in a loop over a
  * long period of time (as it is, for example, by zfs's wait cmd). In that
  * scenario, a fs being exported or destroyed should be considered a normal
  * event, so we don't want to print an error when we find that the fs doesn't
  * exist.
  */
 int
 zfs_wait_status(zfs_handle_t *zhp, zfs_wait_activity_t activity,
     boolean_t *missing, boolean_t *waited)
 {
 	int error = lzc_wait_fs(zhp->zfs_name, activity, waited);
 	*missing = (error == ENOENT);
 	if (*missing)
 		return (0);
 
 	if (error != 0) {
 		(void) zfs_standard_error_fmt(zhp->zfs_hdl, error,
 		    dgettext(TEXT_DOMAIN, "error waiting in fs '%s'"),
 		    zhp->zfs_name);
 	}
 
 	return (error);
 }
diff --git a/lib/libzfs/libzfs_diff.c b/lib/libzfs/libzfs_diff.c
index 93f6e19e9127..80588a860c18 100644
--- a/lib/libzfs/libzfs_diff.c
+++ b/lib/libzfs/libzfs_diff.c
@@ -1,790 +1,788 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2015, 2018 by Delphix. All rights reserved.
  * Copyright 2016 Joyent, Inc.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  */
 
 /*
  * zfs diff support
  */
 #include <ctype.h>
 #include <errno.h>
 #include <libintl.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <stddef.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <pthread.h>
 #include <sys/zfs_ioctl.h>
 #include <libzfs.h>
 #include "libzfs_impl.h"
 
 #define	ZDIFF_SNAPDIR		"/.zfs/snapshot/"
 #define	ZDIFF_PREFIX		"zfs-diff-%d"
 
 #define	ZDIFF_ADDED	'+'
 #define	ZDIFF_MODIFIED	"M"
 #define	ZDIFF_REMOVED	'-'
 #define	ZDIFF_RENAMED	"R"
 
 
 /*
  * Given a {dsname, object id}, get the object path
  */
 static int
 get_stats_for_obj(differ_info_t *di, const char *dsname, uint64_t obj,
     char *pn, int maxlen, zfs_stat_t *sb)
 {
 	zfs_cmd_t zc = {"\0"};
 	int error;
 
 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
 	zc.zc_obj = obj;
 
 	errno = 0;
 	error = zfs_ioctl(di->zhp->zfs_hdl, ZFS_IOC_OBJ_TO_STATS, &zc);
 	di->zerr = errno;
 
 	/* we can get stats even if we failed to get a path */
 	(void) memcpy(sb, &zc.zc_stat, sizeof (zfs_stat_t));
 	if (error == 0) {
 		ASSERT(di->zerr == 0);
 		(void) strlcpy(pn, zc.zc_value, maxlen);
 		return (0);
 	}
 
 	if (di->zerr == ESTALE) {
 		(void) snprintf(pn, maxlen, "(on_delete_queue)");
 		return (0);
 	} else if (di->zerr == EPERM) {
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "The sys_config privilege or diff delegated permission "
 		    "is needed\nto discover path names"));
 		return (-1);
 	} else if (di->zerr == EACCES) {
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Key must be loaded to discover path names"));
 		return (-1);
 	} else {
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Unable to determine path or stats for "
 		    "object %lld in %s"), (longlong_t)obj, dsname);
 		return (-1);
 	}
 }
 
 /*
  * stream_bytes
  *
  * Prints a file name out a character at a time.  If the character is
  * not in the range of what we consider "printable" ASCII, display it
  * as an escaped 4-digit octal value.  ASCII values less than a space
  * are all control characters and we declare the upper end as the
  * DELete character.  This also is the last 7-bit ASCII character.
  * We choose to treat all 8-bit ASCII as not printable for this
  * application.
  */
 static void
 stream_bytes(FILE *fp, const char *string)
 {
 	char c;
 
 	while ((c = *string++) != '\0') {
 		if (c > ' ' && c != '\\' && c < '\177') {
 			(void) fputc(c, fp);
 		} else {
 			(void) fprintf(fp, "\\%04hho", (uint8_t)c);
 		}
 	}
 }
 
 static char
 get_what(mode_t what)
 {
 	switch (what & S_IFMT) {
 	case S_IFBLK:
 		return ('B');
 	case S_IFCHR:
 		return ('C');
 	case S_IFDIR:
 		return ('/');
 #ifdef S_IFDOOR
 	case S_IFDOOR:
 		return ('>');
 #endif
 	case S_IFIFO:
 		return ('|');
 	case S_IFLNK:
 		return ('@');
 #ifdef S_IFPORT
 	case S_IFPORT:
 		return ('P');
 #endif
 	case S_IFSOCK:
 		return ('=');
 	case S_IFREG:
 		return ('F');
 	default:
 		return ('?');
 	}
 }
 
 static void
 print_cmn(FILE *fp, differ_info_t *di, const char *file)
 {
 	if (!di->no_mangle) {
 		stream_bytes(fp, di->dsmnt);
 		stream_bytes(fp, file);
 	} else {
 		(void) fputs(di->dsmnt, fp);
 		(void) fputs(file, fp);
 	}
 }
 
 static void
 print_rename(FILE *fp, differ_info_t *di, const char *old, const char *new,
     zfs_stat_t *isb)
 {
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
 	(void) fputs(ZDIFF_RENAMED "\t", fp);
 	if (di->classify)
 		(void) fprintf(fp, "%c\t", get_what(isb->zs_mode));
 	print_cmn(fp, di, old);
 	(void) fputs(di->scripted ? "\t" : " -> ", fp);
 	print_cmn(fp, di, new);
 	(void) fputc('\n', fp);
 }
 
 static void
 print_link_change(FILE *fp, differ_info_t *di, int delta, const char *file,
     zfs_stat_t *isb)
 {
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
 	(void) fputs(ZDIFF_MODIFIED "\t", fp);
 	if (di->classify)
 		(void) fprintf(fp, "%c\t", get_what(isb->zs_mode));
 	print_cmn(fp, di, file);
 	(void) fprintf(fp, "\t(%+d)\n", delta);
 }
 
 static void
 print_file(FILE *fp, differ_info_t *di, char type, const char *file,
     zfs_stat_t *isb)
 {
 	if (di->timestamped)
 		(void) fprintf(fp, "%10lld.%09lld\t",
 		    (longlong_t)isb->zs_ctime[0],
 		    (longlong_t)isb->zs_ctime[1]);
 	(void) fprintf(fp, "%c\t", type);
 	if (di->classify)
 		(void) fprintf(fp, "%c\t", get_what(isb->zs_mode));
 	print_cmn(fp, di, file);
 	(void) fputc('\n', fp);
 }
 
 static int
 write_inuse_diffs_one(FILE *fp, differ_info_t *di, uint64_t dobj)
 {
 	struct zfs_stat fsb, tsb;
 	mode_t fmode, tmode;
 	char fobjname[MAXPATHLEN], tobjname[MAXPATHLEN];
 	boolean_t already_logged = B_FALSE;
 	int fobjerr, tobjerr;
 	int change;
 
 	if (dobj == di->shares)
 		return (0);
 
 	/*
 	 * Check the from and to snapshots for info on the object. If
 	 * we get ENOENT, then the object just didn't exist in that
 	 * snapshot.  If we get ENOTSUP, then we tried to get
 	 * info on a non-ZPL object, which we don't care about anyway.
 	 * For any other error we print a warning which includes the
 	 * errno and continue.
 	 */
 
 	fobjerr = get_stats_for_obj(di, di->fromsnap, dobj, fobjname,
 	    MAXPATHLEN, &fsb);
 	if (fobjerr && di->zerr != ENOTSUP && di->zerr != ENOENT) {
 		zfs_error_aux(di->zhp->zfs_hdl, "%s", strerror(di->zerr));
 		zfs_error(di->zhp->zfs_hdl, di->zerr, di->errbuf);
 		/*
 		 * Let's not print an error for the same object more than
 		 * once if it happens in both snapshots
 		 */
 		already_logged = B_TRUE;
 	}
 
 	tobjerr = get_stats_for_obj(di, di->tosnap, dobj, tobjname,
 	    MAXPATHLEN, &tsb);
 
 	if (tobjerr && di->zerr != ENOTSUP && di->zerr != ENOENT) {
 		if (!already_logged) {
 			zfs_error_aux(di->zhp->zfs_hdl,
 			    "%s", strerror(di->zerr));
 			zfs_error(di->zhp->zfs_hdl, di->zerr, di->errbuf);
 		}
 	}
 	/*
 	 * Unallocated object sharing the same meta dnode block
 	 */
 	if (fobjerr && tobjerr) {
 		di->zerr = 0;
 		return (0);
 	}
 
 	di->zerr = 0; /* negate get_stats_for_obj() from side that failed */
 	fmode = fsb.zs_mode & S_IFMT;
 	tmode = tsb.zs_mode & S_IFMT;
 	if (fmode == S_IFDIR || tmode == S_IFDIR || fsb.zs_links == 0 ||
 	    tsb.zs_links == 0)
 		change = 0;
 	else
 		change = tsb.zs_links - fsb.zs_links;
 
 	if (fobjerr) {
 		if (change) {
 			print_link_change(fp, di, change, tobjname, &tsb);
 			return (0);
 		}
 		print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb);
 		return (0);
 	} else if (tobjerr) {
 		if (change) {
 			print_link_change(fp, di, change, fobjname, &fsb);
 			return (0);
 		}
 		print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb);
 		return (0);
 	}
 
 	if (fmode != tmode && fsb.zs_gen == tsb.zs_gen)
 		tsb.zs_gen++;	/* Force a generational difference */
 
 	/* Simple modification or no change */
 	if (fsb.zs_gen == tsb.zs_gen) {
 		/* No apparent changes.  Could we assert !this?  */
 		if (fsb.zs_ctime[0] == tsb.zs_ctime[0] &&
 		    fsb.zs_ctime[1] == tsb.zs_ctime[1])
 			return (0);
 		if (change) {
 			print_link_change(fp, di, change,
 			    change > 0 ? fobjname : tobjname, &tsb);
 		} else if (strcmp(fobjname, tobjname) == 0) {
 			print_file(fp, di, *ZDIFF_MODIFIED, fobjname, &tsb);
 		} else {
 			print_rename(fp, di, fobjname, tobjname, &tsb);
 		}
 		return (0);
 	} else {
 		/* file re-created or object re-used */
 		print_file(fp, di, ZDIFF_REMOVED, fobjname, &fsb);
 		print_file(fp, di, ZDIFF_ADDED, tobjname, &tsb);
 		return (0);
 	}
 }
 
 static int
 write_inuse_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr)
 {
 	uint64_t o;
 	int err;
 
 	for (o = dr->ddr_first; o <= dr->ddr_last; o++) {
 		if ((err = write_inuse_diffs_one(fp, di, o)) != 0)
 			return (err);
 	}
 	return (0);
 }
 
 static int
 describe_free(FILE *fp, differ_info_t *di, uint64_t object, char *namebuf,
     int maxlen)
 {
 	struct zfs_stat sb;
 
 	(void) get_stats_for_obj(di, di->fromsnap, object, namebuf,
 	    maxlen, &sb);
 
 	/* Don't print if in the delete queue on from side */
 	if (di->zerr == ESTALE || di->zerr == ENOENT) {
 		di->zerr = 0;
 		return (0);
 	}
 
 	print_file(fp, di, ZDIFF_REMOVED, namebuf, &sb);
 	return (0);
 }
 
 static int
 write_free_diffs(FILE *fp, differ_info_t *di, dmu_diff_record_t *dr)
 {
 	zfs_cmd_t zc = {"\0"};
 	libzfs_handle_t *lhdl = di->zhp->zfs_hdl;
 	char fobjname[MAXPATHLEN];
 
 	(void) strlcpy(zc.zc_name, di->fromsnap, sizeof (zc.zc_name));
 	zc.zc_obj = dr->ddr_first - 1;
 
 	ASSERT(di->zerr == 0);
 
 	while (zc.zc_obj < dr->ddr_last) {
 		int err;
 
 		err = zfs_ioctl(lhdl, ZFS_IOC_NEXT_OBJ, &zc);
 		if (err == 0) {
 			if (zc.zc_obj == di->shares) {
 				zc.zc_obj++;
 				continue;
 			}
 			if (zc.zc_obj > dr->ddr_last) {
 				break;
 			}
 			err = describe_free(fp, di, zc.zc_obj, fobjname,
 			    MAXPATHLEN);
 		} else if (errno == ESRCH) {
 			break;
 		} else {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "next allocated object (> %lld) find failure"),
 			    (longlong_t)zc.zc_obj);
 			di->zerr = errno;
 			break;
 		}
 	}
 	if (di->zerr)
 		return (-1);
 	return (0);
 }
 
 static void *
 differ(void *arg)
 {
 	differ_info_t *di = arg;
 	dmu_diff_record_t dr;
 	FILE *ofp;
 	int err = 0;
 
 	if ((ofp = fdopen(di->outputfd, "w")) == NULL) {
 		di->zerr = errno;
 		strlcpy(di->errbuf, strerror(errno), sizeof (di->errbuf));
 		(void) close(di->datafd);
 		return ((void *)-1);
 	}
 
 	for (;;) {
 		char *cp = (char *)&dr;
 		int len = sizeof (dr);
 		int rv;
 
 		do {
 			rv = read(di->datafd, cp, len);
 			cp += rv;
 			len -= rv;
 		} while (len > 0 && rv > 0);
 
 		if (rv < 0 || (rv == 0 && len != sizeof (dr))) {
 			di->zerr = EPIPE;
 			break;
 		} else if (rv == 0) {
 			/* end of file at a natural breaking point */
 			break;
 		}
 
 		switch (dr.ddr_type) {
 		case DDR_FREE:
 			err = write_free_diffs(ofp, di, &dr);
 			break;
 		case DDR_INUSE:
 			err = write_inuse_diffs(ofp, di, &dr);
 			break;
 		default:
 			di->zerr = EPIPE;
 			break;
 		}
 
 		if (err || di->zerr)
 			break;
 	}
 
 	(void) fclose(ofp);
 	(void) close(di->datafd);
 	if (err)
 		return ((void *)-1);
 	if (di->zerr) {
 		ASSERT(di->zerr == EPIPE);
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Internal error: bad data from diff IOCTL"));
 		return ((void *)-1);
 	}
 	return ((void *)0);
 }
 
 static int
 make_temp_snapshot(differ_info_t *di)
 {
 	libzfs_handle_t *hdl = di->zhp->zfs_hdl;
 	zfs_cmd_t zc = {"\0"};
 
 	(void) snprintf(zc.zc_value, sizeof (zc.zc_value),
 	    ZDIFF_PREFIX, getpid());
 	(void) strlcpy(zc.zc_name, di->ds, sizeof (zc.zc_name));
 	zc.zc_cleanup_fd = di->cleanupfd;
 
 	if (zfs_ioctl(hdl, ZFS_IOC_TMP_SNAPSHOT, &zc) != 0) {
 		int err = errno;
 		if (err == EPERM) {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
 			    dgettext(TEXT_DOMAIN, "The diff delegated "
 			    "permission is needed in order\nto create a "
 			    "just-in-time snapshot for diffing\n"));
 			return (zfs_error(hdl, EZFS_DIFF, di->errbuf));
 		} else {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
 			    dgettext(TEXT_DOMAIN, "Cannot create just-in-time "
 			    "snapshot of '%s'"), zc.zc_name);
 			return (zfs_standard_error(hdl, err, di->errbuf));
 		}
 	}
 
 	di->tmpsnap = zfs_strdup(hdl, zc.zc_value);
 	di->tosnap = zfs_asprintf(hdl, "%s@%s", di->ds, di->tmpsnap);
 	return (0);
 }
 
 static void
 teardown_differ_info(differ_info_t *di)
 {
 	free(di->ds);
 	free(di->dsmnt);
 	free(di->fromsnap);
 	free(di->frommnt);
 	free(di->tosnap);
 	free(di->tmpsnap);
 	free(di->tomnt);
 	(void) close(di->cleanupfd);
 }
 
 static int
 get_snapshot_names(differ_info_t *di, const char *fromsnap,
     const char *tosnap)
 {
 	libzfs_handle_t *hdl = di->zhp->zfs_hdl;
 	char *atptrf = NULL;
 	char *atptrt = NULL;
 	int fdslen, fsnlen;
 	int tdslen, tsnlen;
 
 	/*
 	 * Can accept
 	 *                                      fdslen fsnlen tdslen tsnlen
 	 *       dataset@snap1
 	 *    0. dataset@snap1 dataset@snap2      >0     >1     >0     >1
 	 *    1. dataset@snap1 @snap2             >0     >1    ==0     >1
 	 *    2. dataset@snap1 dataset            >0     >1     >0    ==0
 	 *    3. @snap1 dataset@snap2            ==0     >1     >0     >1
 	 *    4. @snap1 dataset                  ==0     >1     >0    ==0
 	 */
 	if (tosnap == NULL) {
 		/* only a from snapshot given, must be valid */
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Badly formed snapshot name %s"), fromsnap);
 
 		if (!zfs_validate_name(hdl, fromsnap, ZFS_TYPE_SNAPSHOT,
 		    B_FALSE)) {
 			return (zfs_error(hdl, EZFS_INVALIDNAME,
 			    di->errbuf));
 		}
 
 		atptrf = strchr(fromsnap, '@');
 		ASSERT(atptrf != NULL);
 		fdslen = atptrf - fromsnap;
 
 		di->fromsnap = zfs_strdup(hdl, fromsnap);
 		di->ds = zfs_strdup(hdl, fromsnap);
 		di->ds[fdslen] = '\0';
 
 		/* the to snap will be a just-in-time snap of the head */
 		return (make_temp_snapshot(di));
 	}
 
 	(void) snprintf(di->errbuf, sizeof (di->errbuf),
 	    dgettext(TEXT_DOMAIN,
 	    "Unable to determine which snapshots to compare"));
 
 	atptrf = strchr(fromsnap, '@');
 	atptrt = strchr(tosnap, '@');
 	fdslen = atptrf ? atptrf - fromsnap : strlen(fromsnap);
 	tdslen = atptrt ? atptrt - tosnap : strlen(tosnap);
 	fsnlen = strlen(fromsnap) - fdslen;	/* includes @ sign */
 	tsnlen = strlen(tosnap) - tdslen;	/* includes @ sign */
 
 	if (fsnlen <= 1 || tsnlen == 1 || (fdslen == 0 && tdslen == 0)) {
 		return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf));
 	} else if ((fdslen > 0 && tdslen > 0) &&
 	    ((tdslen != fdslen || strncmp(fromsnap, tosnap, fdslen) != 0))) {
 		/*
 		 * not the same dataset name, might be okay if
 		 * tosnap is a clone of a fromsnap descendant.
 		 */
 		char origin[ZFS_MAX_DATASET_NAME_LEN];
 		zprop_source_t src;
 		zfs_handle_t *zhp;
 
 		di->ds = zfs_alloc(di->zhp->zfs_hdl, tdslen + 1);
-		(void) strncpy(di->ds, tosnap, tdslen);
-		di->ds[tdslen] = '\0';
+		(void) strlcpy(di->ds, tosnap, tdslen + 1);
 
 		zhp = zfs_open(hdl, di->ds, ZFS_TYPE_FILESYSTEM);
 		while (zhp != NULL) {
 			if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin,
 			    sizeof (origin), &src, NULL, 0, B_FALSE) != 0) {
 				(void) zfs_close(zhp);
 				zhp = NULL;
 				break;
 			}
 			if (strncmp(origin, fromsnap, fsnlen) == 0)
 				break;
 
 			(void) zfs_close(zhp);
 			zhp = zfs_open(hdl, origin, ZFS_TYPE_FILESYSTEM);
 		}
 
 		if (zhp == NULL) {
 			(void) snprintf(di->errbuf, sizeof (di->errbuf),
 			    dgettext(TEXT_DOMAIN,
 			    "Not an earlier snapshot from the same fs"));
 			return (zfs_error(hdl, EZFS_INVALIDNAME, di->errbuf));
 		} else {
 			(void) zfs_close(zhp);
 		}
 
 		di->isclone = B_TRUE;
 		di->fromsnap = zfs_strdup(hdl, fromsnap);
 		if (tsnlen)
 			di->tosnap = zfs_strdup(hdl, tosnap);
 		else
 			return (make_temp_snapshot(di));
 	} else {
 		int dslen = fdslen ? fdslen : tdslen;
 
 		di->ds = zfs_alloc(hdl, dslen + 1);
-		(void) strncpy(di->ds, fdslen ? fromsnap : tosnap, dslen);
-		di->ds[dslen] = '\0';
+		(void) strlcpy(di->ds, fdslen ? fromsnap : tosnap, dslen + 1);
 
 		di->fromsnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrf);
 		if (tsnlen) {
 			di->tosnap = zfs_asprintf(hdl, "%s%s", di->ds, atptrt);
 		} else {
 			return (make_temp_snapshot(di));
 		}
 	}
 	return (0);
 }
 
 static int
 get_mountpoint(differ_info_t *di, char *dsnm, char **mntpt)
 {
 	boolean_t mounted;
 
 	mounted = is_mounted(di->zhp->zfs_hdl, dsnm, mntpt);
 	if (mounted == B_FALSE) {
 		(void) snprintf(di->errbuf, sizeof (di->errbuf),
 		    dgettext(TEXT_DOMAIN,
 		    "Cannot diff an unmounted snapshot"));
 		return (zfs_error(di->zhp->zfs_hdl, EZFS_BADTYPE, di->errbuf));
 	}
 
 	/* Avoid a double slash at the beginning of root-mounted datasets */
 	if (**mntpt == '/' && *(*mntpt + 1) == '\0')
 		**mntpt = '\0';
 	return (0);
 }
 
 static int
 get_mountpoints(differ_info_t *di)
 {
 	char *strptr;
 	char *frommntpt;
 
 	/*
 	 * first get the mountpoint for the parent dataset
 	 */
 	if (get_mountpoint(di, di->ds, &di->dsmnt) != 0)
 		return (-1);
 
 	strptr = strchr(di->tosnap, '@');
 	ASSERT3P(strptr, !=, NULL);
 	di->tomnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", di->dsmnt,
 	    ZDIFF_SNAPDIR, ++strptr);
 
 	strptr = strchr(di->fromsnap, '@');
 	ASSERT3P(strptr, !=, NULL);
 
 	frommntpt = di->dsmnt;
 	if (di->isclone) {
 		char *mntpt;
 		int err;
 
 		*strptr = '\0';
 		err = get_mountpoint(di, di->fromsnap, &mntpt);
 		*strptr = '@';
 		if (err != 0)
 			return (-1);
 		frommntpt = mntpt;
 	}
 
 	di->frommnt = zfs_asprintf(di->zhp->zfs_hdl, "%s%s%s", frommntpt,
 	    ZDIFF_SNAPDIR, ++strptr);
 
 	if (di->isclone)
 		free(frommntpt);
 
 	return (0);
 }
 
 static int
 setup_differ_info(zfs_handle_t *zhp, const char *fromsnap,
     const char *tosnap, differ_info_t *di)
 {
 	di->zhp = zhp;
 
 	di->cleanupfd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
 	VERIFY(di->cleanupfd >= 0);
 
 	if (get_snapshot_names(di, fromsnap, tosnap) != 0)
 		return (-1);
 
 	if (get_mountpoints(di) != 0)
 		return (-1);
 
 	if (find_shares_object(di) != 0)
 		return (-1);
 
 	return (0);
 }
 
 int
 zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap,
     const char *tosnap, int flags)
 {
 	zfs_cmd_t zc = {"\0"};
 	char errbuf[ERRBUFLEN];
 	differ_info_t di = { 0 };
 	pthread_t tid;
 	int pipefd[2];
 	int iocerr;
 
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "zfs diff failed"));
 
 	if (setup_differ_info(zhp, fromsnap, tosnap, &di)) {
 		teardown_differ_info(&di);
 		return (-1);
 	}
 
 	if (pipe2(pipefd, O_CLOEXEC)) {
 		zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
 		teardown_differ_info(&di);
 		return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED, errbuf));
 	}
 
 	di.scripted = (flags & ZFS_DIFF_PARSEABLE);
 	di.classify = (flags & ZFS_DIFF_CLASSIFY);
 	di.timestamped = (flags & ZFS_DIFF_TIMESTAMP);
 	di.no_mangle = (flags & ZFS_DIFF_NO_MANGLE);
 
 	di.outputfd = outfd;
 	di.datafd = pipefd[0];
 
 	if (pthread_create(&tid, NULL, differ, &di)) {
 		zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
 		(void) close(pipefd[0]);
 		(void) close(pipefd[1]);
 		teardown_differ_info(&di);
 		return (zfs_error(zhp->zfs_hdl,
 		    EZFS_THREADCREATEFAILED, errbuf));
 	}
 
 	/* do the ioctl() */
 	(void) strlcpy(zc.zc_value, di.fromsnap, strlen(di.fromsnap) + 1);
 	(void) strlcpy(zc.zc_name, di.tosnap, strlen(di.tosnap) + 1);
 	zc.zc_cookie = pipefd[1];
 
 	iocerr = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DIFF, &zc);
 	if (iocerr != 0) {
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "Unable to obtain diffs"));
 		if (errno == EPERM) {
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "\n   The sys_mount privilege or diff delegated "
 			    "permission is needed\n   to execute the "
 			    "diff ioctl"));
 		} else if (errno == EXDEV) {
 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
 			    "\n   Not an earlier snapshot from the same fs"));
 		} else if (errno != EPIPE || di.zerr == 0) {
 			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(errno));
 		}
 		(void) close(pipefd[1]);
 		(void) pthread_cancel(tid);
 		(void) pthread_join(tid, NULL);
 		teardown_differ_info(&di);
 		if (di.zerr != 0 && di.zerr != EPIPE) {
 			zfs_error_aux(zhp->zfs_hdl, "%s", strerror(di.zerr));
 			return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf));
 		} else {
 			return (zfs_error(zhp->zfs_hdl, EZFS_DIFFDATA, errbuf));
 		}
 	}
 
 	(void) close(pipefd[1]);
 	(void) pthread_join(tid, NULL);
 
 	if (di.zerr != 0) {
 		zfs_error_aux(zhp->zfs_hdl, "%s", strerror(di.zerr));
 		return (zfs_error(zhp->zfs_hdl, EZFS_DIFF, di.errbuf));
 	}
 	teardown_differ_info(&di);
 	return (0);
 }
diff --git a/lib/libzpool/taskq.c b/lib/libzpool/taskq.c
index 8d6f1c93d8c9..b1e71e998078 100644
--- a/lib/libzpool/taskq.c
+++ b/lib/libzpool/taskq.c
@@ -1,384 +1,384 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 /*
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  * Copyright (c) 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 
 int taskq_now;
 taskq_t *system_taskq;
 taskq_t *system_delay_taskq;
 
 static pthread_key_t taskq_tsd;
 
 #define	TASKQ_ACTIVE	0x00010000
 
 static taskq_ent_t *
 task_alloc(taskq_t *tq, int tqflags)
 {
 	taskq_ent_t *t;
 	int rv;
 
 again:	if ((t = tq->tq_freelist) != NULL && tq->tq_nalloc >= tq->tq_minalloc) {
 		ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
 		tq->tq_freelist = t->tqent_next;
 	} else {
 		if (tq->tq_nalloc >= tq->tq_maxalloc) {
 			if (!(tqflags & KM_SLEEP))
 				return (NULL);
 
 			/*
 			 * We don't want to exceed tq_maxalloc, but we can't
 			 * wait for other tasks to complete (and thus free up
 			 * task structures) without risking deadlock with
 			 * the caller.  So, we just delay for one second
 			 * to throttle the allocation rate. If we have tasks
 			 * complete before one second timeout expires then
 			 * taskq_ent_free will signal us and we will
 			 * immediately retry the allocation.
 			 */
 			tq->tq_maxalloc_wait++;
 			rv = cv_timedwait(&tq->tq_maxalloc_cv,
 			    &tq->tq_lock, ddi_get_lbolt() + hz);
 			tq->tq_maxalloc_wait--;
 			if (rv > 0)
 				goto again;		/* signaled */
 		}
 		mutex_exit(&tq->tq_lock);
 
 		t = kmem_alloc(sizeof (taskq_ent_t), tqflags);
 
 		mutex_enter(&tq->tq_lock);
 		if (t != NULL) {
 			/* Make sure we start without any flags */
 			t->tqent_flags = 0;
 			tq->tq_nalloc++;
 		}
 	}
 	return (t);
 }
 
 static void
 task_free(taskq_t *tq, taskq_ent_t *t)
 {
 	if (tq->tq_nalloc <= tq->tq_minalloc) {
 		t->tqent_next = tq->tq_freelist;
 		tq->tq_freelist = t;
 	} else {
 		tq->tq_nalloc--;
 		mutex_exit(&tq->tq_lock);
 		kmem_free(t, sizeof (taskq_ent_t));
 		mutex_enter(&tq->tq_lock);
 	}
 
 	if (tq->tq_maxalloc_wait)
 		cv_signal(&tq->tq_maxalloc_cv);
 }
 
 taskqid_t
 taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags)
 {
 	taskq_ent_t *t;
 
 	if (taskq_now) {
 		func(arg);
 		return (1);
 	}
 
 	mutex_enter(&tq->tq_lock);
 	ASSERT(tq->tq_flags & TASKQ_ACTIVE);
 	if ((t = task_alloc(tq, tqflags)) == NULL) {
 		mutex_exit(&tq->tq_lock);
 		return (0);
 	}
 	if (tqflags & TQ_FRONT) {
 		t->tqent_next = tq->tq_task.tqent_next;
 		t->tqent_prev = &tq->tq_task;
 	} else {
 		t->tqent_next = &tq->tq_task;
 		t->tqent_prev = tq->tq_task.tqent_prev;
 	}
 	t->tqent_next->tqent_prev = t;
 	t->tqent_prev->tqent_next = t;
 	t->tqent_func = func;
 	t->tqent_arg = arg;
 	t->tqent_flags = 0;
 	cv_signal(&tq->tq_dispatch_cv);
 	mutex_exit(&tq->tq_lock);
 	return (1);
 }
 
 taskqid_t
 taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg, uint_t tqflags,
     clock_t expire_time)
 {
 	(void) tq, (void) func, (void) arg, (void) tqflags, (void) expire_time;
 	return (0);
 }
 
 int
 taskq_empty_ent(taskq_ent_t *t)
 {
 	return (t->tqent_next == NULL);
 }
 
 void
 taskq_init_ent(taskq_ent_t *t)
 {
 	t->tqent_next = NULL;
 	t->tqent_prev = NULL;
 	t->tqent_func = NULL;
 	t->tqent_arg = NULL;
 	t->tqent_flags = 0;
 }
 
 void
 taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
     taskq_ent_t *t)
 {
 	ASSERT(func != NULL);
 
 	/*
 	 * Mark it as a prealloc'd task.  This is important
 	 * to ensure that we don't free it later.
 	 */
 	t->tqent_flags |= TQENT_FLAG_PREALLOC;
 	/*
 	 * Enqueue the task to the underlying queue.
 	 */
 	mutex_enter(&tq->tq_lock);
 
 	if (flags & TQ_FRONT) {
 		t->tqent_next = tq->tq_task.tqent_next;
 		t->tqent_prev = &tq->tq_task;
 	} else {
 		t->tqent_next = &tq->tq_task;
 		t->tqent_prev = tq->tq_task.tqent_prev;
 	}
 	t->tqent_next->tqent_prev = t;
 	t->tqent_prev->tqent_next = t;
 	t->tqent_func = func;
 	t->tqent_arg = arg;
 	cv_signal(&tq->tq_dispatch_cv);
 	mutex_exit(&tq->tq_lock);
 }
 
 void
 taskq_wait(taskq_t *tq)
 {
 	mutex_enter(&tq->tq_lock);
 	while (tq->tq_task.tqent_next != &tq->tq_task || tq->tq_active != 0)
 		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
 	mutex_exit(&tq->tq_lock);
 }
 
 void
 taskq_wait_id(taskq_t *tq, taskqid_t id)
 {
 	(void) id;
 	taskq_wait(tq);
 }
 
 void
 taskq_wait_outstanding(taskq_t *tq, taskqid_t id)
 {
 	(void) id;
 	taskq_wait(tq);
 }
 
 static __attribute__((noreturn)) void
 taskq_thread(void *arg)
 {
 	taskq_t *tq = arg;
 	taskq_ent_t *t;
 	boolean_t prealloc;
 
 	VERIFY0(pthread_setspecific(taskq_tsd, tq));
 
 	mutex_enter(&tq->tq_lock);
 	while (tq->tq_flags & TASKQ_ACTIVE) {
 		if ((t = tq->tq_task.tqent_next) == &tq->tq_task) {
 			if (--tq->tq_active == 0)
 				cv_broadcast(&tq->tq_wait_cv);
 			cv_wait(&tq->tq_dispatch_cv, &tq->tq_lock);
 			tq->tq_active++;
 			continue;
 		}
 		t->tqent_prev->tqent_next = t->tqent_next;
 		t->tqent_next->tqent_prev = t->tqent_prev;
 		t->tqent_next = NULL;
 		t->tqent_prev = NULL;
 		prealloc = t->tqent_flags & TQENT_FLAG_PREALLOC;
 		mutex_exit(&tq->tq_lock);
 
 		rw_enter(&tq->tq_threadlock, RW_READER);
 		t->tqent_func(t->tqent_arg);
 		rw_exit(&tq->tq_threadlock);
 
 		mutex_enter(&tq->tq_lock);
 		if (!prealloc)
 			task_free(tq, t);
 	}
 	tq->tq_nthreads--;
 	cv_broadcast(&tq->tq_wait_cv);
 	mutex_exit(&tq->tq_lock);
 	thread_exit();
 }
 
 taskq_t *
 taskq_create(const char *name, int nthreads, pri_t pri,
     int minalloc, int maxalloc, uint_t flags)
 {
 	(void) pri;
 	taskq_t *tq = kmem_zalloc(sizeof (taskq_t), KM_SLEEP);
 	int t;
 
 	if (flags & TASKQ_THREADS_CPU_PCT) {
 		int pct;
 		ASSERT3S(nthreads, >=, 0);
 		ASSERT3S(nthreads, <=, 100);
 		pct = MIN(nthreads, 100);
 		pct = MAX(pct, 0);
 
 		nthreads = (sysconf(_SC_NPROCESSORS_ONLN) * pct) / 100;
 		nthreads = MAX(nthreads, 1);	/* need at least 1 thread */
 	} else {
 		ASSERT3S(nthreads, >=, 1);
 	}
 
 	rw_init(&tq->tq_threadlock, NULL, RW_DEFAULT, NULL);
 	mutex_init(&tq->tq_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL);
-	(void) strncpy(tq->tq_name, name, TASKQ_NAMELEN);
+	(void) strlcpy(tq->tq_name, name, sizeof (tq->tq_name));
 	tq->tq_flags = flags | TASKQ_ACTIVE;
 	tq->tq_active = nthreads;
 	tq->tq_nthreads = nthreads;
 	tq->tq_minalloc = minalloc;
 	tq->tq_maxalloc = maxalloc;
 	tq->tq_task.tqent_next = &tq->tq_task;
 	tq->tq_task.tqent_prev = &tq->tq_task;
 	tq->tq_threadlist = kmem_alloc(nthreads * sizeof (kthread_t *),
 	    KM_SLEEP);
 
 	if (flags & TASKQ_PREPOPULATE) {
 		mutex_enter(&tq->tq_lock);
 		while (minalloc-- > 0)
 			task_free(tq, task_alloc(tq, KM_SLEEP));
 		mutex_exit(&tq->tq_lock);
 	}
 
 	for (t = 0; t < nthreads; t++)
 		VERIFY((tq->tq_threadlist[t] = thread_create(NULL, 0,
 		    taskq_thread, tq, 0, &p0, TS_RUN, pri)) != NULL);
 
 	return (tq);
 }
 
 void
 taskq_destroy(taskq_t *tq)
 {
 	int nthreads = tq->tq_nthreads;
 
 	taskq_wait(tq);
 
 	mutex_enter(&tq->tq_lock);
 
 	tq->tq_flags &= ~TASKQ_ACTIVE;
 	cv_broadcast(&tq->tq_dispatch_cv);
 
 	while (tq->tq_nthreads != 0)
 		cv_wait(&tq->tq_wait_cv, &tq->tq_lock);
 
 	tq->tq_minalloc = 0;
 	while (tq->tq_nalloc != 0) {
 		ASSERT(tq->tq_freelist != NULL);
 		task_free(tq, task_alloc(tq, KM_SLEEP));
 	}
 
 	mutex_exit(&tq->tq_lock);
 
 	kmem_free(tq->tq_threadlist, nthreads * sizeof (kthread_t *));
 
 	rw_destroy(&tq->tq_threadlock);
 	mutex_destroy(&tq->tq_lock);
 	cv_destroy(&tq->tq_dispatch_cv);
 	cv_destroy(&tq->tq_wait_cv);
 	cv_destroy(&tq->tq_maxalloc_cv);
 
 	kmem_free(tq, sizeof (taskq_t));
 }
 
 int
 taskq_member(taskq_t *tq, kthread_t *t)
 {
 	int i;
 
 	if (taskq_now)
 		return (1);
 
 	for (i = 0; i < tq->tq_nthreads; i++)
 		if (tq->tq_threadlist[i] == t)
 			return (1);
 
 	return (0);
 }
 
 taskq_t *
 taskq_of_curthread(void)
 {
 	return (pthread_getspecific(taskq_tsd));
 }
 
 int
 taskq_cancel_id(taskq_t *tq, taskqid_t id)
 {
 	(void) tq, (void) id;
 	return (ENOENT);
 }
 
 void
 system_taskq_init(void)
 {
 	VERIFY0(pthread_key_create(&taskq_tsd, NULL));
 	system_taskq = taskq_create("system_taskq", 64, maxclsyspri, 4, 512,
 	    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
 	system_delay_taskq = taskq_create("delay_taskq", 4, maxclsyspri, 4,
 	    512, TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
 }
 
 void
 system_taskq_fini(void)
 {
 	taskq_destroy(system_taskq);
 	system_taskq = NULL; /* defensive */
 	taskq_destroy(system_delay_taskq);
 	system_delay_taskq = NULL;
 	VERIFY0(pthread_key_delete(taskq_tsd));
 }
diff --git a/module/os/freebsd/spl/callb.c b/module/os/freebsd/spl/callb.c
index ba13ea887938..47f3ccc0c7fa 100644
--- a/module/os/freebsd/spl/callb.c
+++ b/module/os/freebsd/spl/callb.c
@@ -1,373 +1,372 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/sysmacros.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/callb.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/debug.h>
 #include <sys/kobj.h>
 #include <sys/systm.h>	/* for delay() */
 #include <sys/taskq.h>  /* For TASKQ_NAMELEN */
 #include <sys/kernel.h>
 
 #define	CB_MAXNAME	TASKQ_NAMELEN
 
 /*
  * The callb mechanism provides generic event scheduling/echoing.
  * A callb function is registered and called on behalf of the event.
  */
 typedef struct callb {
 	struct callb	*c_next; 	/* next in class or on freelist */
 	kthread_id_t	c_thread;	/* ptr to caller's thread struct */
 	char		c_flag;		/* info about the callb state */
 	uchar_t		c_class;	/* this callb's class */
 	kcondvar_t	c_done_cv;	/* signal callb completion */
 	boolean_t	(*c_func)(void *, int);
 					/* cb function: returns true if ok */
 	void		*c_arg;		/* arg to c_func */
 	char		c_name[CB_MAXNAME+1]; /* debug:max func name length */
 } callb_t;
 
 /*
  * callb c_flag bitmap definitions
  */
 #define	CALLB_FREE		0x0
 #define	CALLB_TAKEN		0x1
 #define	CALLB_EXECUTING		0x2
 
 /*
  * Basic structure for a callb table.
  * All callbs are organized into different class groups described
  * by ct_class array.
  * The callbs within a class are single-linked and normally run by a
  * serial execution.
  */
 typedef struct callb_table {
 	kmutex_t ct_lock;		/* protect all callb states */
 	callb_t	*ct_freelist; 		/* free callb structures */
 	boolean_t ct_busy;		/* B_TRUE prevents additions */
 	kcondvar_t ct_busy_cv;		/* to wait for not busy    */
 	int	ct_ncallb; 		/* num of callbs allocated */
 	callb_t	*ct_first_cb[NCBCLASS];	/* ptr to 1st callb in a class */
 } callb_table_t;
 
 int callb_timeout_sec = CPR_KTHREAD_TIMEOUT_SEC;
 
 static callb_id_t callb_add_common(boolean_t (*)(void *, int),
     void *, int, char *, kthread_id_t);
 
 static callb_table_t callb_table;	/* system level callback table */
 static callb_table_t *ct = &callb_table;
 static kmutex_t	callb_safe_mutex;
 callb_cpr_t	callb_cprinfo_safe = {
 	&callb_safe_mutex, CALLB_CPR_ALWAYS_SAFE, 0, {0, 0} };
 
 /*
  * Init all callb tables in the system.
  */
 static void
 callb_init(void *dummy __unused)
 {
 	callb_table.ct_busy = B_FALSE;	/* mark table open for additions */
 	mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL);
 }
 
 static void
 callb_fini(void *dummy __unused)
 {
 	callb_t *cp;
 	int i;
 
 	mutex_enter(&ct->ct_lock);
 	for (i = 0; i < 16; i++) {
 		while ((cp = ct->ct_freelist) != NULL) {
 			ct->ct_freelist = cp->c_next;
 			ct->ct_ncallb--;
 			kmem_free(cp, sizeof (callb_t));
 		}
 		if (ct->ct_ncallb == 0)
 			break;
 		/* Not all callbacks finished, waiting for the rest. */
 		mutex_exit(&ct->ct_lock);
 		tsleep(ct, 0, "callb", hz / 4);
 		mutex_enter(&ct->ct_lock);
 	}
 	if (ct->ct_ncallb > 0)
 		printf("%s: Leaked %d callbacks!\n", __func__, ct->ct_ncallb);
 	mutex_exit(&ct->ct_lock);
 	mutex_destroy(&callb_safe_mutex);
 	mutex_destroy(&callb_table.ct_lock);
 }
 
 /*
  * callout_add() is called to register func() be called later.
  */
 static callb_id_t
 callb_add_common(boolean_t (*func)(void *arg, int code),
     void *arg, int class, char *name, kthread_id_t t)
 {
 	callb_t *cp;
 
 	ASSERT3S(class, <, NCBCLASS);
 
 	mutex_enter(&ct->ct_lock);
 	while (ct->ct_busy)
 		cv_wait(&ct->ct_busy_cv, &ct->ct_lock);
 	if ((cp = ct->ct_freelist) == NULL) {
 		ct->ct_ncallb++;
 		cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP);
 	}
 	ct->ct_freelist = cp->c_next;
 	cp->c_thread = t;
 	cp->c_func = func;
 	cp->c_arg = arg;
 	cp->c_class = (uchar_t)class;
 	cp->c_flag |= CALLB_TAKEN;
 #ifdef ZFS_DEBUG
 	if (strlen(name) > CB_MAXNAME)
 		cmn_err(CE_WARN, "callb_add: name of callback function '%s' "
 		    "too long -- truncated to %d chars",
 		    name, CB_MAXNAME);
 #endif
-	(void) strncpy(cp->c_name, name, CB_MAXNAME);
-	cp->c_name[CB_MAXNAME] = '\0';
+	(void) strlcpy(cp->c_name, name, sizeof (cp->c_name));
 
 	/*
 	 * Insert the new callb at the head of its class list.
 	 */
 	cp->c_next = ct->ct_first_cb[class];
 	ct->ct_first_cb[class] = cp;
 
 	mutex_exit(&ct->ct_lock);
 	return ((callb_id_t)cp);
 }
 
 /*
  * The default function to add an entry to the callback table.  Since
  * it uses curthread as the thread identifier to store in the table,
  * it should be used for the normal case of a thread which is calling
  * to add ITSELF to the table.
  */
 callb_id_t
 callb_add(boolean_t (*func)(void *arg, int code),
     void *arg, int class, char *name)
 {
 	return (callb_add_common(func, arg, class, name, curthread));
 }
 
 /*
  * A special version of callb_add() above for use by threads which
  * might be adding an entry to the table on behalf of some other
  * thread (for example, one which is constructed but not yet running).
  * In this version the thread id is an argument.
  */
 callb_id_t
 callb_add_thread(boolean_t (*func)(void *arg, int code),
     void *arg, int class, char *name, kthread_id_t t)
 {
 	return (callb_add_common(func, arg, class, name, t));
 }
 
 /*
  * callout_delete() is called to remove an entry identified by id
  * that was originally placed there by a call to callout_add().
  * return -1 if fail to delete a callb entry otherwise return 0.
  */
 int
 callb_delete(callb_id_t id)
 {
 	callb_t **pp;
 	callb_t *me = (callb_t *)id;
 
 	mutex_enter(&ct->ct_lock);
 
 	for (;;) {
 		pp = &ct->ct_first_cb[me->c_class];
 		while (*pp != NULL && *pp != me)
 			pp = &(*pp)->c_next;
 
 #ifdef ZFS_DEBUG
 		if (*pp != me) {
 			cmn_err(CE_WARN, "callb delete bogus entry 0x%p",
 			    (void *)me);
 			mutex_exit(&ct->ct_lock);
 			return (-1);
 		}
 #endif /* DEBUG */
 
 		/*
 		 * It is not allowed to delete a callb in the middle of
 		 * executing otherwise, the callb_execute() will be confused.
 		 */
 		if (!(me->c_flag & CALLB_EXECUTING))
 			break;
 
 		cv_wait(&me->c_done_cv, &ct->ct_lock);
 	}
 	/* relink the class list */
 	*pp = me->c_next;
 
 	/* clean up myself and return the free callb to the head of freelist */
 	me->c_flag = CALLB_FREE;
 	me->c_next = ct->ct_freelist;
 	ct->ct_freelist = me;
 
 	mutex_exit(&ct->ct_lock);
 	return (0);
 }
 
 /*
  * class:	indicates to execute all callbs in the same class;
  * code:	optional argument for the callb functions.
  * return:	 = 0: success
  *		!= 0: ptr to string supplied when callback was registered
  */
 void *
 callb_execute_class(int class, int code)
 {
 	callb_t *cp;
 	void *ret = NULL;
 
 	ASSERT3S(class, <, NCBCLASS);
 
 	mutex_enter(&ct->ct_lock);
 
 	for (cp = ct->ct_first_cb[class];
 	    cp != NULL && ret == 0; cp = cp->c_next) {
 		while (cp->c_flag & CALLB_EXECUTING)
 			cv_wait(&cp->c_done_cv, &ct->ct_lock);
 		/*
 		 * cont if the callb is deleted while we're sleeping
 		 */
 		if (cp->c_flag == CALLB_FREE)
 			continue;
 		cp->c_flag |= CALLB_EXECUTING;
 
 #ifdef CALLB_DEBUG
 		printf("callb_execute: name=%s func=%p arg=%p\n",
 		    cp->c_name, (void *)cp->c_func, (void *)cp->c_arg);
 #endif /* CALLB_DEBUG */
 
 		mutex_exit(&ct->ct_lock);
 		/* If callback function fails, pass back client's name */
 		if (!(*cp->c_func)(cp->c_arg, code))
 			ret = cp->c_name;
 		mutex_enter(&ct->ct_lock);
 
 		cp->c_flag &= ~CALLB_EXECUTING;
 		cv_broadcast(&cp->c_done_cv);
 	}
 	mutex_exit(&ct->ct_lock);
 	return (ret);
 }
 
 /*
  * callers make sure no recursive entries to this func.
  * dp->cc_lockp is registered by callb_add to protect callb_cpr_t structure.
  *
  * When calling to stop a kernel thread (code == CB_CODE_CPR_CHKPT) we
  * use a cv_timedwait() in case the kernel thread is blocked.
  *
  * Note that this is a generic callback handler for daemon CPR and
  * should NOT be changed to accommodate any specific requirement in a daemon.
  * Individual daemons that require changes to the handler shall write
  * callback routines in their own daemon modules.
  */
 boolean_t
 callb_generic_cpr(void *arg, int code)
 {
 	callb_cpr_t *cp = (callb_cpr_t *)arg;
 	clock_t ret = 0;			/* assume success */
 
 	mutex_enter(cp->cc_lockp);
 
 	switch (code) {
 	case CB_CODE_CPR_CHKPT:
 		cp->cc_events |= CALLB_CPR_START;
 #ifdef CPR_NOT_THREAD_SAFE
 		while (!(cp->cc_events & CALLB_CPR_SAFE))
 			/* cv_timedwait() returns -1 if it times out. */
 			if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
 			    cp->cc_lockp, (callb_timeout_sec * hz),
 			    TR_CLOCK_TICK)) == -1)
 				break;
 #endif
 		break;
 
 	case CB_CODE_CPR_RESUME:
 		cp->cc_events &= ~CALLB_CPR_START;
 		cv_signal(&cp->cc_stop_cv);
 		break;
 	}
 	mutex_exit(cp->cc_lockp);
 	return (ret != -1);
 }
 
 /*
  * The generic callback function associated with kernel threads which
  * are always considered safe.
  */
 boolean_t
 callb_generic_cpr_safe(void *arg, int code)
 {
 	(void) arg, (void) code;
 	return (B_TRUE);
 }
 /*
  * Prevent additions to callback table.
  */
 void
 callb_lock_table(void)
 {
 	mutex_enter(&ct->ct_lock);
 	ASSERT(!ct->ct_busy);
 	ct->ct_busy = B_TRUE;
 	mutex_exit(&ct->ct_lock);
 }
 
 /*
  * Allow additions to callback table.
  */
 void
 callb_unlock_table(void)
 {
 	mutex_enter(&ct->ct_lock);
 	ASSERT(ct->ct_busy);
 	ct->ct_busy = B_FALSE;
 	cv_broadcast(&ct->ct_busy_cv);
 	mutex_exit(&ct->ct_lock);
 }
 
 SYSINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_init, NULL);
 SYSUNINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_fini, NULL);
diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
index b290c36748ca..c65be4c134d5 100644
--- a/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/module/os/freebsd/zfs/zfs_vfsops.c
@@ -1,2318 +1,2317 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <sys/acl.h>
 #include <sys/vnode.h>
 #include <sys/vfs.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
 #include <sys/cmn_err.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_dir.h>
 #include <sys/zil.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/policy.h>
 #include <sys/atomic.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/sunddi.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
 #include <sys/jail.h>
 #include <ufs/ufs/quota.h>
 #include <sys/zfs_quota.h>
 
 #include "zfs_comutil.h"
 
 #ifndef	MNTK_VMSETSIZE_BUG
 #define	MNTK_VMSETSIZE_BUG	0
 #endif
 #ifndef	MNTK_NOMSYNC
 #define	MNTK_NOMSYNC	8
 #endif
 
 struct mtx zfs_debug_mtx;
 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
 
 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
 
 int zfs_super_owner;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
 	"File system owners can perform privileged operation on file systems");
 
 int zfs_debug_level;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
 	"Debug level");
 
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
 static int zfs_version_acl = ZFS_ACL_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
 	"ZFS_ACL_VERSION");
 static int zfs_version_spa = SPA_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
 	"SPA_VERSION");
 static int zfs_version_zpl = ZPL_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
 	"ZPL_VERSION");
 
 #if __FreeBSD_version >= 1400018
 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg,
     bool *mp_busy);
 #else
 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
 #endif
 static int zfs_mount(vfs_t *vfsp);
 static int zfs_umount(vfs_t *vfsp, int fflag);
 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
 static int zfs_sync(vfs_t *vfsp, int waitfor);
 #if __FreeBSD_version >= 1300098
 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
     struct ucred **credanonp, int *numsecflavors, int *secflavors);
 #else
 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
     struct ucred **credanonp, int *numsecflavors, int **secflavors);
 #endif
 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
 static void zfs_freevfs(vfs_t *vfsp);
 
 struct vfsops zfs_vfsops = {
 	.vfs_mount =		zfs_mount,
 	.vfs_unmount =		zfs_umount,
 #if __FreeBSD_version >= 1300049
 	.vfs_root =		vfs_cache_root,
 	.vfs_cachedroot = zfs_root,
 #else
 	.vfs_root =		zfs_root,
 #endif
 	.vfs_statfs =		zfs_statfs,
 	.vfs_vget =		zfs_vget,
 	.vfs_sync =		zfs_sync,
 	.vfs_checkexp =		zfs_checkexp,
 	.vfs_fhtovp =		zfs_fhtovp,
 	.vfs_quotactl =		zfs_quotactl,
 };
 
 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
 
 /*
  * We need to keep a count of active fs's.
  * This is necessary to prevent our module
  * from being unloaded after a umount -f
  */
 static uint32_t	zfs_active_fs_count = 0;
 
 int
 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
     char *setpoint)
 {
 	int error;
 	zfsvfs_t *zfvp;
 	vfs_t *vfsp;
 	objset_t *os;
 	uint64_t tmp = *val;
 
 	error = dmu_objset_from_ds(ds, &os);
 	if (error != 0)
 		return (error);
 
 	error = getzfsvfs_impl(os, &zfvp);
 	if (error != 0)
 		return (error);
 	if (zfvp == NULL)
 		return (ENOENT);
 	vfsp = zfvp->z_vfs;
 	switch (zfs_prop) {
 	case ZFS_PROP_ATIME:
 		if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_DEVICES:
 		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_EXEC:
 		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_SETUID:
 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_READONLY:
 		if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_XATTR:
 		if (zfvp->z_flags & ZSB_XATTR)
 			tmp = zfvp->z_xattr;
 		break;
 	case ZFS_PROP_NBMAND:
 		if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
 			tmp = 1;
 		break;
 	default:
 		vfs_unbusy(vfsp);
 		return (ENOENT);
 	}
 
 	vfs_unbusy(vfsp);
 	if (tmp != *val) {
 		(void) strcpy(setpoint, "temporary");
 		*val = tmp;
 	}
 	return (0);
 }
 
 static int
 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
 {
 	int error = 0;
 	char buf[32];
 	uint64_t usedobj, quotaobj;
 	uint64_t quota, used = 0;
 	timespec_t now;
 
 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
 
 	if (quotaobj == 0 || zfsvfs->z_replay) {
 		error = ENOENT;
 		goto done;
 	}
 	(void) sprintf(buf, "%llx", (longlong_t)id);
 	if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
 	    buf, sizeof (quota), 1, &quota)) != 0) {
 		dprintf("%s(%d): quotaobj lookup failed\n",
 		    __FUNCTION__, __LINE__);
 		goto done;
 	}
 	/*
 	 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
 	 * So we set them to be the same.
 	 */
 	dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
 	error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used);
 	if (error && error != ENOENT) {
 		dprintf("%s(%d):  usedobj failed; %d\n",
 		    __FUNCTION__, __LINE__, error);
 		goto done;
 	}
 	dqp->dqb_curblocks = btodb(used);
 	dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
 	vfs_timestamp(&now);
 	/*
 	 * Setting this to 0 causes FreeBSD quota(8) to print
 	 * the number of days since the epoch, which isn't
 	 * particularly useful.
 	 */
 	dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
 done:
 	return (error);
 }
 
 static int
 #if __FreeBSD_version >= 1400018
 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy)
 #else
 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
 #endif
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	struct thread *td;
 	int cmd, type, error = 0;
 	int bitsize;
 	zfs_userquota_prop_t quota_type;
 	struct dqblk64 dqblk = { 0 };
 
 	td = curthread;
 	cmd = cmds >> SUBCMDSHIFT;
 	type = cmds & SUBCMDMASK;
 
 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
 		return (error);
 	if (id == -1) {
 		switch (type) {
 		case USRQUOTA:
 			id = td->td_ucred->cr_ruid;
 			break;
 		case GRPQUOTA:
 			id = td->td_ucred->cr_rgid;
 			break;
 		default:
 			error = EINVAL;
 #if __FreeBSD_version < 1400018
 			if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
 				vfs_unbusy(vfsp);
 #endif
 			goto done;
 		}
 	}
 	/*
 	 * Map BSD type to:
 	 * ZFS_PROP_USERUSED,
 	 * ZFS_PROP_USERQUOTA,
 	 * ZFS_PROP_GROUPUSED,
 	 * ZFS_PROP_GROUPQUOTA
 	 */
 	switch (cmd) {
 	case Q_SETQUOTA:
 	case Q_SETQUOTA32:
 		if (type == USRQUOTA)
 			quota_type = ZFS_PROP_USERQUOTA;
 		else if (type == GRPQUOTA)
 			quota_type = ZFS_PROP_GROUPQUOTA;
 		else
 			error = EINVAL;
 		break;
 	case Q_GETQUOTA:
 	case Q_GETQUOTA32:
 		if (type == USRQUOTA)
 			quota_type = ZFS_PROP_USERUSED;
 		else if (type == GRPQUOTA)
 			quota_type = ZFS_PROP_GROUPUSED;
 		else
 			error = EINVAL;
 		break;
 	}
 
 	/*
 	 * Depending on the cmd, we may need to get
 	 * the ruid and domain (see fuidstr_to_sid?),
 	 * the fuid (how?), or other information.
 	 * Create fuid using zfs_fuid_create(zfsvfs, id,
 	 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
 	 * I think I can use just the id?
 	 *
 	 * Look at zfs_id_overquota() to look up a quota.
 	 * zap_lookup(something, quotaobj, fuidstring,
 	 *     sizeof (long long), 1, &quota)
 	 *
 	 * See zfs_set_userquota() to set a quota.
 	 */
 	if ((uint32_t)type >= MAXQUOTAS) {
 		error = EINVAL;
 		goto done;
 	}
 
 	switch (cmd) {
 	case Q_GETQUOTASIZE:
 		bitsize = 64;
 		error = copyout(&bitsize, arg, sizeof (int));
 		break;
 	case Q_QUOTAON:
 		// As far as I can tell, you can't turn quotas on or off on zfs
 		error = 0;
 #if __FreeBSD_version < 1400018
 		vfs_unbusy(vfsp);
 #endif
 		break;
 	case Q_QUOTAOFF:
 		error = ENOTSUP;
 #if __FreeBSD_version < 1400018
 		vfs_unbusy(vfsp);
 #endif
 		break;
 	case Q_SETQUOTA:
 		error = copyin(arg, &dqblk, sizeof (dqblk));
 		if (error == 0)
 			error = zfs_set_userquota(zfsvfs, quota_type,
 			    "", id, dbtob(dqblk.dqb_bhardlimit));
 		break;
 	case Q_GETQUOTA:
 		error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
 		if (error == 0)
 			error = copyout(&dqblk, arg, sizeof (dqblk));
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 done:
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 
 boolean_t
 zfs_is_readonly(zfsvfs_t *zfsvfs)
 {
 	return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY));
 }
 
 static int
 zfs_sync(vfs_t *vfsp, int waitfor)
 {
 
 	/*
 	 * Data integrity is job one.  We don't want a compromised kernel
 	 * writing to the storage pool, so we never sync during panic.
 	 */
 	if (panicstr)
 		return (0);
 
 	/*
 	 * Ignore the system syncher.  ZFS already commits async data
 	 * at zfs_txg_timeout intervals.
 	 */
 	if (waitfor == MNT_LAZY)
 		return (0);
 
 	if (vfsp != NULL) {
 		/*
 		 * Sync a specific filesystem.
 		 */
 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
 		dsl_pool_t *dp;
 		int error;
 
 		error = vfs_stdsync(vfsp, waitfor);
 		if (error != 0)
 			return (error);
 
 		if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
 			return (error);
 		dp = dmu_objset_pool(zfsvfs->z_os);
 
 		/*
 		 * If the system is shutting down, then skip any
 		 * filesystems which may exist on a suspended pool.
 		 */
 		if (rebooting && spa_suspended(dp->dp_spa)) {
 			zfs_exit(zfsvfs, FTAG);
 			return (0);
 		}
 
 		if (zfsvfs->z_log != NULL)
 			zil_commit(zfsvfs->z_log, 0);
 
 		zfs_exit(zfsvfs, FTAG);
 	} else {
 		/*
 		 * Sync all ZFS filesystems.  This is what happens when you
 		 * run sync(8).  Unlike other filesystems, ZFS honors the
 		 * request by waiting for all pools to commit all dirty data.
 		 */
 		spa_sync_allpools();
 	}
 
 	return (0);
 }
 
 static void
 atime_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == TRUE) {
 		zfsvfs->z_atime = TRUE;
 		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
 	} else {
 		zfsvfs->z_atime = FALSE;
 		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
 	}
 }
 
 static void
 xattr_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == ZFS_XATTR_OFF) {
 		zfsvfs->z_flags &= ~ZSB_XATTR;
 	} else {
 		zfsvfs->z_flags |= ZSB_XATTR;
 
 		if (newval == ZFS_XATTR_SA)
 			zfsvfs->z_xattr_sa = B_TRUE;
 		else
 			zfsvfs->z_xattr_sa = B_FALSE;
 	}
 }
 
 static void
 blksz_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
 	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
 	ASSERT(ISP2(newval));
 
 	zfsvfs->z_max_blksz = newval;
 	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
 }
 
 static void
 readonly_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval) {
 		/* XXX locking on vfs_flag? */
 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
 	} else {
 		/* XXX locking on vfs_flag? */
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
 	}
 }
 
 static void
 setuid_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == FALSE) {
 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
 	} else {
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
 	}
 }
 
 static void
 exec_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == FALSE) {
 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
 	} else {
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
 	}
 }
 
 /*
  * The nbmand mount option can be changed at mount time.
  * We can't allow it to be toggled on live file systems or incorrect
  * behavior may be seen from cifs clients
  *
  * This property isn't registered via dsl_prop_register(), but this callback
  * will be called when a file system is first mounted
  */
 static void
 nbmand_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 	if (newval == FALSE) {
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
 	} else {
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
 	}
 }
 
 static void
 snapdir_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_show_ctldir = newval;
 }
 
 static void
 acl_mode_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_acl_mode = newval;
 }
 
 static void
 acl_inherit_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_acl_inherit = newval;
 }
 
 static void
 acl_type_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_acl_type = newval;
 }
 
 static int
 zfs_register_callbacks(vfs_t *vfsp)
 {
 	struct dsl_dataset *ds = NULL;
 	objset_t *os = NULL;
 	zfsvfs_t *zfsvfs = NULL;
 	uint64_t nbmand;
 	boolean_t readonly = B_FALSE;
 	boolean_t do_readonly = B_FALSE;
 	boolean_t setuid = B_FALSE;
 	boolean_t do_setuid = B_FALSE;
 	boolean_t exec = B_FALSE;
 	boolean_t do_exec = B_FALSE;
 	boolean_t xattr = B_FALSE;
 	boolean_t atime = B_FALSE;
 	boolean_t do_atime = B_FALSE;
 	boolean_t do_xattr = B_FALSE;
 	int error = 0;
 
 	ASSERT3P(vfsp, !=, NULL);
 	zfsvfs = vfsp->vfs_data;
 	ASSERT3P(zfsvfs, !=, NULL);
 	os = zfsvfs->z_os;
 
 	/*
 	 * This function can be called for a snapshot when we update snapshot's
 	 * mount point, which isn't really supported.
 	 */
 	if (dmu_objset_is_snapshot(os))
 		return (EOPNOTSUPP);
 
 	/*
 	 * The act of registering our callbacks will destroy any mount
 	 * options we may have.  In order to enable temporary overrides
 	 * of mount options, we stash away the current values and
 	 * restore them after we register the callbacks.
 	 */
 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
 	    !spa_writeable(dmu_objset_spa(os))) {
 		readonly = B_TRUE;
 		do_readonly = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
 		readonly = B_FALSE;
 		do_readonly = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
 		setuid = B_FALSE;
 		do_setuid = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
 		setuid = B_TRUE;
 		do_setuid = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
 		exec = B_FALSE;
 		do_exec = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
 		exec = B_TRUE;
 		do_exec = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
 		zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF;
 		do_xattr = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
 		do_xattr = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) {
 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
 		do_xattr = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) {
 		zfsvfs->z_xattr = xattr = ZFS_XATTR_SA;
 		do_xattr = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
 		atime = B_FALSE;
 		do_atime = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
 		atime = B_TRUE;
 		do_atime = B_TRUE;
 	}
 
 	/*
 	 * We need to enter pool configuration here, so that we can use
 	 * dsl_prop_get_int_ds() to handle the special nbmand property below.
 	 * dsl_prop_get_integer() can not be used, because it has to acquire
 	 * spa_namespace_lock and we can not do that because we already hold
 	 * z_teardown_lock.  The problem is that spa_write_cachefile() is called
 	 * with spa_namespace_lock held and the function calls ZFS vnode
 	 * operations to write the cache file and thus z_teardown_lock is
 	 * acquired after spa_namespace_lock.
 	 */
 	ds = dmu_objset_ds(os);
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 
 	/*
 	 * nbmand is a special property.  It can only be changed at
 	 * mount time.
 	 *
 	 * This is weird, but it is documented to only be changeable
 	 * at mount time.
 	 */
 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
 		nbmand = B_FALSE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
 		nbmand = B_TRUE;
 	} else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) {
 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 		return (error);
 	}
 
 	/*
 	 * Register property callbacks.
 	 *
 	 * It would probably be fine to just check for i/o error from
 	 * the first prop_register(), but I guess I like to go
 	 * overboard...
 	 */
 	error = dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
 	    zfsvfs);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 	if (error)
 		goto unregister;
 
 	/*
 	 * Invoke our callbacks to restore temporary mount options.
 	 */
 	if (do_readonly)
 		readonly_changed_cb(zfsvfs, readonly);
 	if (do_setuid)
 		setuid_changed_cb(zfsvfs, setuid);
 	if (do_exec)
 		exec_changed_cb(zfsvfs, exec);
 	if (do_xattr)
 		xattr_changed_cb(zfsvfs, xattr);
 	if (do_atime)
 		atime_changed_cb(zfsvfs, atime);
 
 	nbmand_changed_cb(zfsvfs, nbmand);
 
 	return (0);
 
 unregister:
 	dsl_prop_unregister_all(ds, zfsvfs);
 	return (error);
 }
 
 /*
  * Associate this zfsvfs with the given objset, which must be owned.
  * This will cache a bunch of on-disk state from the objset in the
  * zfsvfs.
  */
 static int
 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
 {
 	int error;
 	uint64_t val;
 
 	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
 	zfsvfs->z_os = os;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
 	if (error != 0)
 		return (error);
 	if (zfsvfs->z_version >
 	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
 		(void) printf("Can't mount a version %lld file system "
 		    "on a version %lld pool\n. Pool must be upgraded to mount "
 		    "this file system.", (u_longlong_t)zfsvfs->z_version,
 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
 		return (SET_ERROR(ENOTSUP));
 	}
 	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_norm = (int)val;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_utf8 = (val != 0);
 
 	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_case = (uint_t)val;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_acl_type = (uint_t)val;
 
 	/*
 	 * Fold case on file systems that are always or sometimes case
 	 * insensitive.
 	 */
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
 	    zfsvfs->z_case == ZFS_CASE_MIXED)
 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
 
 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 
 	uint64_t sa_obj = 0;
 	if (zfsvfs->z_use_sa) {
 		/* should either have both of these objects or none */
 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
 		    &sa_obj);
 		if (error != 0)
 			return (error);
 
 		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
 		if (error == 0 && val == ZFS_XATTR_SA)
 			zfsvfs->z_xattr_sa = B_TRUE;
 	}
 
 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
 	    &zfsvfs->z_attr_table);
 	if (error != 0)
 		return (error);
 
 	if (zfsvfs->z_version >= ZPL_VERSION_SA)
 		sa_register_update_callback(os, zfs_sa_upgrade);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
 	    &zfsvfs->z_root);
 	if (error != 0)
 		return (error);
 	ASSERT3U(zfsvfs->z_root, !=, 0);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
 	    &zfsvfs->z_unlinkedobj);
 	if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
 	    8, 1, &zfsvfs->z_userquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_userquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
 	    8, 1, &zfsvfs->z_groupquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_groupquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
 	    8, 1, &zfsvfs->z_projectquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_projectquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
 	    8, 1, &zfsvfs->z_userobjquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_userobjquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
 	    8, 1, &zfsvfs->z_groupobjquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_groupobjquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
 	    8, 1, &zfsvfs->z_projectobjquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_projectobjquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
 	    &zfsvfs->z_fuid_obj);
 	if (error == ENOENT)
 		zfsvfs->z_fuid_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
 	    &zfsvfs->z_shares_dir);
 	if (error == ENOENT)
 		zfsvfs->z_shares_dir = 0;
 	else if (error != 0)
 		return (error);
 
 	/*
 	 * Only use the name cache if we are looking for a
 	 * name on a file system that does not require normalization
 	 * or case folding.  We can also look there if we happen to be
 	 * on a non-normalizing, mixed sensitivity file system IF we
 	 * are looking for the exact name (which is always the case on
 	 * FreeBSD).
 	 */
 	zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
 	    ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
 	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
 
 	return (0);
 }
 
 taskq_t *zfsvfs_taskq;
 
 static void
 zfsvfs_task_unlinked_drain(void *context, int pending __unused)
 {
 
 	zfs_unlinked_drain((zfsvfs_t *)context);
 }
 
 int
 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
 {
 	objset_t *os;
 	zfsvfs_t *zfsvfs;
 	int error;
 	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
 
 	/*
 	 * XXX: Fix struct statfs so this isn't necessary!
 	 *
 	 * The 'osname' is used as the filesystem's special node, which means
 	 * it must fit in statfs.f_mntfromname, or else it can't be
 	 * enumerated, so libzfs_mnttab_find() returns NULL, which causes
 	 * 'zfs unmount' to think it's not mounted when it is.
 	 */
 	if (strlen(osname) >= MNAMELEN)
 		return (SET_ERROR(ENAMETOOLONG));
 
 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 
 	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs,
 	    &os);
 	if (error != 0) {
 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
 		return (error);
 	}
 
 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
 
 	return (error);
 }
 
 
 int
 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
 {
 	int error;
 
 	zfsvfs->z_vfs = NULL;
 	zfsvfs->z_parent = zfsvfs;
 
 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 	TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
 	    zfsvfs_task_unlinked_drain, zfsvfs);
 	ZFS_TEARDOWN_INIT(zfsvfs);
 	ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs);
 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 	for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
 	error = zfsvfs_init(zfsvfs, os);
 	if (error != 0) {
 		dmu_objset_disown(os, B_TRUE, zfsvfs);
 		*zfvp = NULL;
 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
 		return (error);
 	}
 
 	*zfvp = zfsvfs;
 	return (0);
 }
 
 static int
 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 {
 	int error;
 
 	/*
 	 * Check for a bad on-disk format version now since we
 	 * lied about owning the dataset readonly before.
 	 */
 	if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
 	    dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
 		return (SET_ERROR(EROFS));
 
 	error = zfs_register_callbacks(zfsvfs->z_vfs);
 	if (error)
 		return (error);
 
 	/*
 	 * If we are not mounting (ie: online recv), then we don't
 	 * have to worry about replaying the log as we blocked all
 	 * operations out since we closed the ZIL.
 	 */
 	if (mounting) {
 		boolean_t readonly;
 
 		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
 		error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
 		if (error)
 			return (error);
 		zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
 		    &zfsvfs->z_kstat.dk_zil_sums);
 
 		/*
 		 * During replay we remove the read only flag to
 		 * allow replays to succeed.
 		 */
 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
 		if (readonly != 0) {
 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
 		} else {
 			dsl_dir_t *dd;
 			zap_stats_t zs;
 
 			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
 			    &zs) == 0) {
 				dataset_kstats_update_nunlinks_kstat(
 				    &zfsvfs->z_kstat, zs.zs_num_entries);
 				dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
 				    "num_entries in unlinked set: %llu",
 				    (u_longlong_t)zs.zs_num_entries);
 			}
 
 			zfs_unlinked_drain(zfsvfs);
 			dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
 			dd->dd_activity_cancelled = B_FALSE;
 		}
 
 		/*
 		 * Parse and replay the intent log.
 		 *
 		 * Because of ziltest, this must be done after
 		 * zfs_unlinked_drain().  (Further note: ziltest
 		 * doesn't use readonly mounts, where
 		 * zfs_unlinked_drain() isn't called.)  This is because
 		 * ziltest causes spa_sync() to think it's committed,
 		 * but actually it is not, so the intent log contains
 		 * many txg's worth of changes.
 		 *
 		 * In particular, if object N is in the unlinked set in
 		 * the last txg to actually sync, then it could be
 		 * actually freed in a later txg and then reallocated
 		 * in a yet later txg.  This would write a "create
 		 * object N" record to the intent log.  Normally, this
 		 * would be fine because the spa_sync() would have
 		 * written out the fact that object N is free, before
 		 * we could write the "create object N" intent log
 		 * record.
 		 *
 		 * But when we are in ziltest mode, we advance the "open
 		 * txg" without actually spa_sync()-ing the changes to
 		 * disk.  So we would see that object N is still
 		 * allocated and in the unlinked set, and there is an
 		 * intent log record saying to allocate it.
 		 */
 		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
 			if (zil_replay_disable) {
 				zil_destroy(zfsvfs->z_log, B_FALSE);
 			} else {
 				boolean_t use_nc = zfsvfs->z_use_namecache;
 				zfsvfs->z_use_namecache = B_FALSE;
 				zfsvfs->z_replay = B_TRUE;
 				zil_replay(zfsvfs->z_os, zfsvfs,
 				    zfs_replay_vector);
 				zfsvfs->z_replay = B_FALSE;
 				zfsvfs->z_use_namecache = use_nc;
 			}
 		}
 
 		/* restore readonly bit */
 		if (readonly != 0)
 			zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
 	} else {
 		ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL);
 		zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data,
 		    &zfsvfs->z_kstat.dk_zil_sums);
 	}
 
 	/*
 	 * Set the objset user_ptr to track its zfsvfs.
 	 */
 	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
 	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 
 	return (0);
 }
 
 void
 zfsvfs_free(zfsvfs_t *zfsvfs)
 {
 	int i;
 
 	zfs_fuid_destroy(zfsvfs);
 
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 	mutex_destroy(&zfsvfs->z_lock);
 	ASSERT3U(zfsvfs->z_nr_znodes, ==, 0);
 	list_destroy(&zfsvfs->z_all_znodes);
 	ZFS_TEARDOWN_DESTROY(zfsvfs);
 	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs);
 	rw_destroy(&zfsvfs->z_fuid_lock);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
 	dataset_kstats_destroy(&zfsvfs->z_kstat);
 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
 }
 
 static void
 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
 {
 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 }
 
 static int
 zfs_domount(vfs_t *vfsp, char *osname)
 {
 	uint64_t recordsize, fsid_guid;
 	int error = 0;
 	zfsvfs_t *zfsvfs;
 
 	ASSERT3P(vfsp, !=, NULL);
 	ASSERT3P(osname, !=, NULL);
 
 	error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs);
 	if (error)
 		return (error);
 	zfsvfs->z_vfs = vfsp;
 
 	if ((error = dsl_prop_get_integer(osname,
 	    "recordsize", &recordsize, NULL)))
 		goto out;
 	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
 	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
 
 	vfsp->vfs_data = zfsvfs;
 	vfsp->mnt_flag |= MNT_LOCAL;
 	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
 	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
 	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
 	/*
 	 * This can cause a loss of coherence between ARC and page cache
 	 * on ZoF - unclear if the problem is in FreeBSD or ZoF
 	 */
 	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
 	vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
 	vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
 
 #if defined(_KERNEL) && !defined(KMEM_DEBUG)
 	vfsp->mnt_kern_flag |= MNTK_FPLOOKUP;
 #endif
 	/*
 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
 	 * separates our fsid from any other filesystem types, and a
 	 * 56-bit objset unique ID.  The objset unique ID is unique to
 	 * all objsets open on this system, provided by unique_create().
 	 * The 8-bit fs type must be put in the low bits of fsid[1]
 	 * because that's where other Solaris filesystems put it.
 	 */
 	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
 	ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0);
 	vfsp->vfs_fsid.val[0] = fsid_guid;
 	vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) |
 	    (vfsp->mnt_vfc->vfc_typenum & 0xFF);
 
 	/*
 	 * Set features for file system.
 	 */
 	zfs_set_fuid_feature(zfsvfs);
 
 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
 		uint64_t pval;
 
 		atime_changed_cb(zfsvfs, B_FALSE);
 		readonly_changed_cb(zfsvfs, B_TRUE);
 		if ((error = dsl_prop_get_integer(osname,
 		    "xattr", &pval, NULL)))
 			goto out;
 		xattr_changed_cb(zfsvfs, pval);
 		if ((error = dsl_prop_get_integer(osname,
 		    "acltype", &pval, NULL)))
 			goto out;
 		acl_type_changed_cb(zfsvfs, pval);
 		zfsvfs->z_issnap = B_TRUE;
 		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
 
 		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
 		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 	} else {
 		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
 			goto out;
 	}
 
 	vfs_mountedfrom(vfsp, osname);
 
 	if (!zfsvfs->z_issnap)
 		zfsctl_create(zfsvfs);
 out:
 	if (error) {
 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	} else {
 		atomic_inc_32(&zfs_active_fs_count);
 	}
 
 	return (error);
 }
 
 static void
 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
 {
 	objset_t *os = zfsvfs->z_os;
 
 	if (!dmu_objset_is_snapshot(os))
 		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
 }
 
 static int
 getpoolname(const char *osname, char *poolname)
 {
 	char *p;
 
 	p = strchr(osname, '/');
 	if (p == NULL) {
 		if (strlen(osname) >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strcpy(poolname, osname);
 	} else {
 		if (p - osname >= MAXNAMELEN)
 			return (ENAMETOOLONG);
-		(void) strncpy(poolname, osname, p - osname);
-		poolname[p - osname] = '\0';
+		(void) strlcpy(poolname, osname, p - osname + 1);
 	}
 	return (0);
 }
 
 static void
 fetch_osname_options(char *name, bool *checkpointrewind)
 {
 
 	if (name[0] == '!') {
 		*checkpointrewind = true;
 		memmove(name, name + 1, strlen(name));
 	} else {
 		*checkpointrewind = false;
 	}
 }
 
 static int
 zfs_mount(vfs_t *vfsp)
 {
 	kthread_t	*td = curthread;
 	vnode_t		*mvp = vfsp->mnt_vnodecovered;
 	cred_t		*cr = td->td_ucred;
 	char		*osname;
 	int		error = 0;
 	int		canwrite;
 	bool		checkpointrewind;
 
 	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * If full-owner-access is enabled and delegated administration is
 	 * turned on, we must set nosuid.
 	 */
 	if (zfs_super_owner &&
 	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
 		secpolicy_fs_mount_clearopts(cr, vfsp);
 	}
 
 	fetch_osname_options(osname, &checkpointrewind);
 
 	/*
 	 * Check for mount privilege?
 	 *
 	 * If we don't have privilege then see if
 	 * we have local permission to allow it
 	 */
 	error = secpolicy_fs_mount(cr, mvp, vfsp);
 	if (error) {
 		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
 			goto out;
 
 		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
 			vattr_t		vattr;
 
 			/*
 			 * Make sure user is the owner of the mount point
 			 * or has sufficient privileges.
 			 */
 
 			vattr.va_mask = AT_UID;
 
 			vn_lock(mvp, LK_SHARED | LK_RETRY);
 			if (VOP_GETATTR(mvp, &vattr, cr)) {
 				VOP_UNLOCK1(mvp);
 				goto out;
 			}
 
 			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
 			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
 				VOP_UNLOCK1(mvp);
 				goto out;
 			}
 			VOP_UNLOCK1(mvp);
 		}
 
 		secpolicy_fs_mount_clearopts(cr, vfsp);
 	}
 
 	/*
 	 * Refuse to mount a filesystem if we are in a local zone and the
 	 * dataset is not visible.
 	 */
 	if (!INGLOBALZONE(curproc) &&
 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
 		error = SET_ERROR(EPERM);
 		goto out;
 	}
 
 	vfsp->vfs_flag |= MNT_NFS4ACLS;
 
 	/*
 	 * When doing a remount, we simply refresh our temporary properties
 	 * according to those options set in the current VFS options.
 	 */
 	if (vfsp->vfs_flag & MS_REMOUNT) {
 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 		/*
 		 * Refresh mount options with z_teardown_lock blocking I/O while
 		 * the filesystem is in an inconsistent state.
 		 * The lock also serializes this code with filesystem
 		 * manipulations between entry to zfs_suspend_fs() and return
 		 * from zfs_resume_fs().
 		 */
 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 		zfs_unregister_callbacks(zfsvfs);
 		error = zfs_register_callbacks(vfsp);
 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 		goto out;
 	}
 
 	/* Initial root mount: try hard to import the requested root pool. */
 	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
 	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
 		char pname[MAXNAMELEN];
 
 		error = getpoolname(osname, pname);
 		if (error == 0)
 			error = spa_import_rootpool(pname, checkpointrewind);
 		if (error)
 			goto out;
 	}
 	DROP_GIANT();
 	error = zfs_domount(vfsp, osname);
 	PICKUP_GIANT();
 
 out:
 	return (error);
 }
 
 static int
 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
 	int error;
 
 	statp->f_version = STATFS_VERSION;
 
 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
 		return (error);
 
 	dmu_objset_space(zfsvfs->z_os,
 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
 
 	/*
 	 * The underlying storage pool actually uses multiple block sizes.
 	 * We report the fragsize as the smallest block size we support,
 	 * and we report our blocksize as the filesystem's maximum blocksize.
 	 */
 	statp->f_bsize = SPA_MINBLOCKSIZE;
 	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
 
 	/*
 	 * The following report "total" blocks of various kinds in the
 	 * file system, but reported in terms of f_frsize - the
 	 * "fragment" size.
 	 */
 
 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
 	statp->f_bfree = availbytes / statp->f_bsize;
 	statp->f_bavail = statp->f_bfree; /* no root reservation */
 
 	/*
 	 * statvfs() should really be called statufs(), because it assumes
 	 * static metadata.  ZFS doesn't preallocate files, so the best
 	 * we can do is report the max that could possibly fit in f_files,
 	 * and that minus the number actually used in f_ffree.
 	 * For f_ffree, report the smaller of the number of object available
 	 * and the number of blocks (each object will take at least a block).
 	 */
 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
 	statp->f_files = statp->f_ffree + usedobjs;
 
 	/*
 	 * We're a zfs filesystem.
 	 */
 	strlcpy(statp->f_fstypename, "zfs",
 	    sizeof (statp->f_fstypename));
 
 	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
 	    sizeof (statp->f_mntfromname));
 	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
 	    sizeof (statp->f_mntonname));
 
 	statp->f_namemax = MAXNAMELEN - 1;
 
 	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
 static int
 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	znode_t *rootzp;
 	int error;
 
 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
 		return (error);
 
 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 	if (error == 0)
 		*vpp = ZTOV(rootzp);
 
 	zfs_exit(zfsvfs, FTAG);
 
 	if (error == 0) {
 		error = vn_lock(*vpp, flags);
 		if (error != 0) {
 			VN_RELE(*vpp);
 			*vpp = NULL;
 		}
 	}
 	return (error);
 }
 
 /*
  * Teardown the zfsvfs::z_os.
  *
  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
  * and 'z_teardown_inactive_lock' held.
  */
 static int
 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 {
 	znode_t	*zp;
 	dsl_dir_t *dd;
 
 	/*
 	 * If someone has not already unmounted this file system,
 	 * drain the zrele_taskq to ensure all active references to the
 	 * zfsvfs_t have been handled only then can it be safely destroyed.
 	 */
 	if (zfsvfs->z_os) {
 		/*
 		 * If we're unmounting we have to wait for the list to
 		 * drain completely.
 		 *
 		 * If we're not unmounting there's no guarantee the list
 		 * will drain completely, but zreles run from the taskq
 		 * may add the parents of dir-based xattrs to the taskq
 		 * so we want to wait for these.
 		 *
 		 * We can safely read z_nr_znodes without locking because the
 		 * VFS has already blocked operations which add to the
 		 * z_all_znodes list and thus increment z_nr_znodes.
 		 */
 		int round = 0;
 		while (zfsvfs->z_nr_znodes > 0) {
 			taskq_wait_outstanding(dsl_pool_zrele_taskq(
 			    dmu_objset_pool(zfsvfs->z_os)), 0);
 			if (++round > 1 && !unmounting)
 				break;
 		}
 	}
 	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 
 	if (!unmounting) {
 		/*
 		 * We purge the parent filesystem's vfsp as the parent
 		 * filesystem and all of its snapshots have their vnode's
 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
 		 * 'z_parent' is self referential for non-snapshots.
 		 */
 #ifdef FREEBSD_NAMECACHE
 #if __FreeBSD_version >= 1300117
 		cache_purgevfs(zfsvfs->z_parent->z_vfs);
 #else
 		cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
 #endif
 #endif
 	}
 
 	/*
 	 * Close the zil. NB: Can't close the zil while zfs_inactive
 	 * threads are blocked as zil_close can call zfs_inactive.
 	 */
 	if (zfsvfs->z_log) {
 		zil_close(zfsvfs->z_log);
 		zfsvfs->z_log = NULL;
 	}
 
 	ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs);
 
 	/*
 	 * If we are not unmounting (ie: online recv) and someone already
 	 * unmounted this file system while we were doing the switcheroo,
 	 * or a reopen of z_os failed then just bail out now.
 	 */
 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 		return (SET_ERROR(EIO));
 	}
 
 	/*
 	 * At this point there are no vops active, and any new vops will
 	 * fail with EIO since we have z_teardown_lock for writer (only
 	 * relevant for forced unmount).
 	 *
 	 * Release all holds on dbufs.
 	 */
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
 		if (zp->z_sa_hdl != NULL) {
 			zfs_znode_dmu_fini(zp);
 		}
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	/*
 	 * If we are unmounting, set the unmounted flag and let new vops
 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
 	 * other vops will fail with EIO.
 	 */
 	if (unmounting) {
 		zfsvfs->z_unmounted = B_TRUE;
 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 	}
 
 	/*
 	 * z_os will be NULL if there was an error in attempting to reopen
 	 * zfsvfs, so just return as the properties had already been
 	 * unregistered and cached data had been evicted before.
 	 */
 	if (zfsvfs->z_os == NULL)
 		return (0);
 
 	/*
 	 * Unregister properties.
 	 */
 	zfs_unregister_callbacks(zfsvfs);
 
 	/*
 	 * Evict cached data
 	 */
 	if (!zfs_is_readonly(zfsvfs))
 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
 	dmu_objset_evict_dbufs(zfsvfs->z_os);
 	dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
 	dsl_dir_cancel_waiters(dd);
 
 	return (0);
 }
 
 static int
 zfs_umount(vfs_t *vfsp, int fflag)
 {
 	kthread_t *td = curthread;
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	objset_t *os;
 	cred_t *cr = td->td_ucred;
 	int ret;
 
 	ret = secpolicy_fs_unmount(cr, vfsp);
 	if (ret) {
 		if (dsl_deleg_access((char *)vfsp->vfs_resource,
 		    ZFS_DELEG_PERM_MOUNT, cr))
 			return (ret);
 	}
 
 	/*
 	 * Unmount any snapshots mounted under .zfs before unmounting the
 	 * dataset itself.
 	 */
 	if (zfsvfs->z_ctldir != NULL) {
 		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
 			return (ret);
 	}
 
 	if (fflag & MS_FORCE) {
 		/*
 		 * Mark file system as unmounted before calling
 		 * vflush(FORCECLOSE). This way we ensure no future vnops
 		 * will be called and risk operating on DOOMED vnodes.
 		 */
 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 		zfsvfs->z_unmounted = B_TRUE;
 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 	}
 
 	/*
 	 * Flush all the files.
 	 */
 	ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
 	if (ret != 0)
 		return (ret);
 	while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
 	    &zfsvfs->z_unlinked_drain_task, NULL) != 0)
 		taskqueue_drain(zfsvfs_taskq->tq_queue,
 		    &zfsvfs->z_unlinked_drain_task);
 
 	VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE));
 	os = zfsvfs->z_os;
 
 	/*
 	 * z_os will be NULL if there was an error in
 	 * attempting to reopen zfsvfs.
 	 */
 	if (os != NULL) {
 		/*
 		 * Unset the objset user_ptr.
 		 */
 		mutex_enter(&os->os_user_ptr_lock);
 		dmu_objset_set_user(os, NULL);
 		mutex_exit(&os->os_user_ptr_lock);
 
 		/*
 		 * Finally release the objset
 		 */
 		dmu_objset_disown(os, B_TRUE, zfsvfs);
 	}
 
 	/*
 	 * We can now safely destroy the '.zfs' directory node.
 	 */
 	if (zfsvfs->z_ctldir != NULL)
 		zfsctl_destroy(zfsvfs);
 	zfs_freevfs(vfsp);
 
 	return (0);
 }
 
 static int
 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
 {
 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
 	znode_t		*zp;
 	int 		err;
 
 	/*
 	 * zfs_zget() can't operate on virtual entries like .zfs/ or
 	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
 	 * This will make NFS to switch to LOOKUP instead of using VGET.
 	 */
 	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
 	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
 		return (EOPNOTSUPP);
 
 	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
 		return (err);
 	err = zfs_zget(zfsvfs, ino, &zp);
 	if (err == 0 && zp->z_unlinked) {
 		vrele(ZTOV(zp));
 		err = EINVAL;
 	}
 	if (err == 0)
 		*vpp = ZTOV(zp);
 	zfs_exit(zfsvfs, FTAG);
 	if (err == 0) {
 		err = vn_lock(*vpp, flags);
 		if (err != 0)
 			vrele(*vpp);
 	}
 	if (err != 0)
 		*vpp = NULL;
 	return (err);
 }
 
 static int
 #if __FreeBSD_version >= 1300098
 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
     struct ucred **credanonp, int *numsecflavors, int *secflavors)
 #else
 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
     struct ucred **credanonp, int *numsecflavors, int **secflavors)
 #endif
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 	/*
 	 * If this is regular file system vfsp is the same as
 	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
 	 * zfsvfs->z_parent->z_vfs represents parent file system
 	 * which we have to use here, because only this file system
 	 * has mnt_export configured.
 	 */
 	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
 	    credanonp, numsecflavors, secflavors));
 }
 
 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN,
 	"struct fid bigger than SHORT_FID_LEN");
 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN,
 	"struct fid bigger than LONG_FID_LEN");
 
 static int
 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 {
 	struct componentname cn;
 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
 	znode_t		*zp;
 	vnode_t		*dvp;
 	uint64_t	object = 0;
 	uint64_t	fid_gen = 0;
 	uint64_t	setgen = 0;
 	uint64_t	gen_mask;
 	uint64_t	zp_gen;
 	int 		i, err;
 
 	*vpp = NULL;
 
 	if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
 		return (err);
 
 	/*
 	 * On FreeBSD we can get snapshot's mount point or its parent file
 	 * system mount point depending if snapshot is already mounted or not.
 	 */
 	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
 		uint64_t	objsetid = 0;
 
 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
 
 		zfs_exit(zfsvfs, FTAG);
 
 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
 		if (err)
 			return (SET_ERROR(EINVAL));
 		if ((err = zfs_enter(zfsvfs, FTAG)) != 0)
 			return (err);
 	}
 
 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
 
 		for (i = 0; i < sizeof (zfid->zf_object); i++)
 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
 	} else {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (fidp->fid_len == LONG_FID_LEN && (fid_gen > 1 || setgen != 0)) {
 		dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n",
 		    (u_longlong_t)fid_gen, (u_longlong_t)setgen);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
 	 * directory tree. If the object == zfsvfs->z_shares_dir, then
 	 * we are in the .zfs/shares directory tree.
 	 */
 	if ((fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
 	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
 		zfs_exit(zfsvfs, FTAG);
 		VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
 		if (object == ZFSCTL_INO_SNAPDIR) {
 			cn.cn_nameptr = "snapshot";
 			cn.cn_namelen = strlen(cn.cn_nameptr);
 			cn.cn_nameiop = LOOKUP;
 			cn.cn_flags = ISLASTCN | LOCKLEAF;
 			cn.cn_lkflags = flags;
 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
 			vput(dvp);
 		} else if (object == zfsvfs->z_shares_dir) {
 			/*
 			 * XXX This branch must not be taken,
 			 * if it is, then the lookup below will
 			 * explode.
 			 */
 			cn.cn_nameptr = "shares";
 			cn.cn_namelen = strlen(cn.cn_nameptr);
 			cn.cn_nameiop = LOOKUP;
 			cn.cn_flags = ISLASTCN;
 			cn.cn_lkflags = flags;
 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
 			vput(dvp);
 		} else {
 			*vpp = dvp;
 		}
 		return (err);
 	}
 
 	gen_mask = -1ULL >> (64 - 8 * i);
 
 	dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object,
 	    (u_longlong_t)fid_gen,
 	    (u_longlong_t)gen_mask);
 	if ((err = zfs_zget(zfsvfs, object, &zp))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (err);
 	}
 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
 	    sizeof (uint64_t));
 	zp_gen = zp_gen & gen_mask;
 	if (zp_gen == 0)
 		zp_gen = 1;
 	if (zp->z_unlinked || zp_gen != fid_gen) {
 		dprintf("znode gen (%llu) != fid gen (%llu)\n",
 		    (u_longlong_t)zp_gen, (u_longlong_t)fid_gen);
 		vrele(ZTOV(zp));
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	*vpp = ZTOV(zp);
 	zfs_exit(zfsvfs, FTAG);
 	err = vn_lock(*vpp, flags);
 	if (err == 0)
 		vnode_create_vobject(*vpp, zp->z_size, curthread);
 	else
 		*vpp = NULL;
 	return (err);
 }
 
 /*
  * Block out VOPs and close zfsvfs_t::z_os
  *
  * Note, if successful, then we return with the 'z_teardown_lock' and
  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
  * dataset and objset intact so that they can be atomically handed off during
  * a subsequent rollback or recv operation and the resume thereafter.
  */
 int
 zfs_suspend_fs(zfsvfs_t *zfsvfs)
 {
 	int error;
 
 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
 		return (error);
 
 	return (0);
 }
 
 /*
  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
  * is an invariant across any of the operations that can be performed while the
  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
  * are the same: the relevant objset and associated dataset are owned by
  * zfsvfs, held, and long held on entry.
  */
 int
 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 {
 	int err;
 	znode_t *zp;
 
 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
 
 	/*
 	 * We already own this, so just update the objset_t, as the one we
 	 * had before may have been evicted.
 	 */
 	objset_t *os;
 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
 	VERIFY(dsl_dataset_long_held(ds));
 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
 	dsl_pool_config_enter(dp, FTAG);
 	VERIFY0(dmu_objset_from_ds(ds, &os));
 	dsl_pool_config_exit(dp, FTAG);
 
 	err = zfsvfs_init(zfsvfs, os);
 	if (err != 0)
 		goto bail;
 
 	ds->ds_dir->dd_activity_cancelled = B_FALSE;
 	VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE));
 
 	zfs_set_fuid_feature(zfsvfs);
 
 	/*
 	 * Attempt to re-establish all the active znodes with
 	 * their dbufs.  If a zfs_rezget() fails, then we'll let
 	 * any potential callers discover that via zfs_enter_verify_zp
 	 * when they try to use their znode.
 	 */
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
 		(void) zfs_rezget(zp);
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 bail:
 	/* release the VOPs */
 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 
 	if (err) {
 		/*
 		 * Since we couldn't setup the sa framework, try to force
 		 * unmount this file system.
 		 */
 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
 			vfs_ref(zfsvfs->z_vfs);
 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
 		}
 	}
 	return (err);
 }
 
 static void
 zfs_freevfs(vfs_t *vfsp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 	zfsvfs_free(zfsvfs);
 
 	atomic_dec_32(&zfs_active_fs_count);
 }
 
 #ifdef __i386__
 static int desiredvnodes_backup;
 #include <sys/vmmeter.h>
 
 
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #endif
 
 static void
 zfs_vnodes_adjust(void)
 {
 #ifdef __i386__
 	int newdesiredvnodes;
 
 	desiredvnodes_backup = desiredvnodes;
 
 	/*
 	 * We calculate newdesiredvnodes the same way it is done in
 	 * vntblinit(). If it is equal to desiredvnodes, it means that
 	 * it wasn't tuned by the administrator and we can tune it down.
 	 */
 	newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
 	    vm_kmem_size / (5 * (sizeof (struct vm_object) +
 	    sizeof (struct vnode))));
 	if (newdesiredvnodes == desiredvnodes)
 		desiredvnodes = (3 * newdesiredvnodes) / 4;
 #endif
 }
 
 static void
 zfs_vnodes_adjust_back(void)
 {
 
 #ifdef __i386__
 	desiredvnodes = desiredvnodes_backup;
 #endif
 }
 
 void
 zfs_init(void)
 {
 
 	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
 
 	/*
 	 * Initialize .zfs directory structures
 	 */
 	zfsctl_init();
 
 	/*
 	 * Initialize znode cache, vnode ops, etc...
 	 */
 	zfs_znode_init();
 
 	/*
 	 * Reduce number of vnodes. Originally number of vnodes is calculated
 	 * with UFS inode in mind. We reduce it here, because it's too big for
 	 * ZFS/i386.
 	 */
 	zfs_vnodes_adjust();
 
 	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
 
 	zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
 }
 
 void
 zfs_fini(void)
 {
 	taskq_destroy(zfsvfs_taskq);
 	zfsctl_fini();
 	zfs_znode_fini();
 	zfs_vnodes_adjust_back();
 }
 
 int
 zfs_busy(void)
 {
 	return (zfs_active_fs_count != 0);
 }
 
 /*
  * Release VOPs and unmount a suspended filesystem.
  */
 int
 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 {
 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
 
 	/*
 	 * We already own this, so just hold and rele it to update the
 	 * objset_t, as the one we had before may have been evicted.
 	 */
 	objset_t *os;
 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
 	VERIFY(dsl_dataset_long_held(ds));
 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
 	dsl_pool_config_enter(dp, FTAG);
 	VERIFY0(dmu_objset_from_ds(ds, &os));
 	dsl_pool_config_exit(dp, FTAG);
 	zfsvfs->z_os = os;
 
 	/* release the VOPs */
 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 
 	/*
 	 * Try to force unmount this file system.
 	 */
 	(void) zfs_umount(zfsvfs->z_vfs, 0);
 	zfsvfs->z_unmounted = B_TRUE;
 	return (0);
 }
 
 int
 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 {
 	int error;
 	objset_t *os = zfsvfs->z_os;
 	dmu_tx_t *tx;
 
 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
 		return (SET_ERROR(EINVAL));
 
 	if (newvers < zfsvfs->z_version)
 		return (SET_ERROR(EINVAL));
 
 	if (zfs_spa_version_map(newvers) >
 	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
 		return (SET_ERROR(ENOTSUP));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 		    ZFS_SA_ATTRS);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	}
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		return (error);
 	}
 
 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 	    8, 1, &newvers, tx);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		return (error);
 	}
 
 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
 		uint64_t sa_obj;
 
 		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
 		    SPA_VERSION_SA);
 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
 		    DMU_OT_NONE, 0, tx);
 
 		error = zap_add(os, MASTER_NODE_OBJ,
 		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
 		ASSERT0(error);
 
 		VERIFY0(sa_set_sa_object(os, sa_obj));
 		sa_register_update_callback(os, zfs_sa_upgrade);
 	}
 
 	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
 	    "from %ju to %ju", (uintmax_t)zfsvfs->z_version,
 	    (uintmax_t)newvers);
 	dmu_tx_commit(tx);
 
 	zfsvfs->z_version = newvers;
 	os->os_version = newvers;
 
 	zfs_set_fuid_feature(zfsvfs);
 
 	return (0);
 }
 
 /*
  * Read a property stored within the master node.
  */
 int
 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
 {
 	uint64_t *cached_copy = NULL;
 
 	/*
 	 * Figure out where in the objset_t the cached copy would live, if it
 	 * is available for the requested property.
 	 */
 	if (os != NULL) {
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 			cached_copy = &os->os_version;
 			break;
 		case ZFS_PROP_NORMALIZE:
 			cached_copy = &os->os_normalization;
 			break;
 		case ZFS_PROP_UTF8ONLY:
 			cached_copy = &os->os_utf8only;
 			break;
 		case ZFS_PROP_CASE:
 			cached_copy = &os->os_casesensitivity;
 			break;
 		default:
 			break;
 		}
 	}
 	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
 		*value = *cached_copy;
 		return (0);
 	}
 
 	/*
 	 * If the property wasn't cached, look up the file system's value for
 	 * the property. For the version property, we look up a slightly
 	 * different string.
 	 */
 	const char *pname;
 	int error = ENOENT;
 	if (prop == ZFS_PROP_VERSION) {
 		pname = ZPL_VERSION_STR;
 	} else {
 		pname = zfs_prop_to_name(prop);
 	}
 
 	if (os != NULL) {
 		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
 	}
 
 	if (error == ENOENT) {
 		/* No value set, use the default value */
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 			*value = ZPL_VERSION;
 			break;
 		case ZFS_PROP_NORMALIZE:
 		case ZFS_PROP_UTF8ONLY:
 			*value = 0;
 			break;
 		case ZFS_PROP_CASE:
 			*value = ZFS_CASE_SENSITIVE;
 			break;
 		case ZFS_PROP_ACLTYPE:
 			*value = ZFS_ACLTYPE_NFSV4;
 			break;
 		default:
 			return (error);
 		}
 		error = 0;
 	}
 
 	/*
 	 * If one of the methods for getting the property value above worked,
 	 * copy it into the objset_t's cache.
 	 */
 	if (error == 0 && cached_copy != NULL) {
 		*cached_copy = *value;
 	}
 
 	return (error);
 }
 
 /*
  * Return true if the corresponding vfs's unmounted flag is set.
  * Otherwise return false.
  * If this function returns true we know VFS unmount has been initiated.
  */
 boolean_t
 zfs_get_vfs_flag_unmounted(objset_t *os)
 {
 	zfsvfs_t *zfvp;
 	boolean_t unmounted = B_FALSE;
 
 	ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS);
 
 	mutex_enter(&os->os_user_ptr_lock);
 	zfvp = dmu_objset_get_user(os);
 	if (zfvp != NULL && zfvp->z_vfs != NULL &&
 	    (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
 		unmounted = B_TRUE;
 	mutex_exit(&os->os_user_ptr_lock);
 
 	return (unmounted);
 }
 
 #ifdef _KERNEL
 void
 zfsvfs_update_fromname(const char *oldname, const char *newname)
 {
 	char tmpbuf[MAXPATHLEN];
 	struct mount *mp;
 	char *fromname;
 	size_t oldlen;
 
 	oldlen = strlen(oldname);
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		fromname = mp->mnt_stat.f_mntfromname;
 		if (strcmp(fromname, oldname) == 0) {
 			(void) strlcpy(fromname, newname,
 			    sizeof (mp->mnt_stat.f_mntfromname));
 			continue;
 		}
 		if (strncmp(fromname, oldname, oldlen) == 0 &&
 		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
 			(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s",
 			    newname, fromname + oldlen);
 			(void) strlcpy(fromname, tmpbuf,
 			    sizeof (mp->mnt_stat.f_mntfromname));
 			continue;
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 }
 #endif
diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c
index efb8d0c30330..e355e2bfc3a0 100644
--- a/module/os/linux/spl/spl-kmem-cache.c
+++ b/module/os/linux/spl/spl-kmem-cache.c
@@ -1,1465 +1,1465 @@
 /*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  *  UCRL-CODE-235197
  *
  *  This file is part of the SPL, Solaris Porting Layer.
  *
  *  The SPL is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License as published by the
  *  Free Software Foundation; either version 2 of the License, or (at your
  *  option) any later version.
  *
  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/percpu_compat.h>
 #include <sys/kmem.h>
 #include <sys/kmem_cache.h>
 #include <sys/taskq.h>
 #include <sys/timer.h>
 #include <sys/vmem.h>
 #include <sys/wait.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/prefetch.h>
 
 /*
  * Within the scope of spl-kmem.c file the kmem_cache_* definitions
  * are removed to allow access to the real Linux slab allocator.
  */
 #undef kmem_cache_destroy
 #undef kmem_cache_create
 #undef kmem_cache_alloc
 #undef kmem_cache_free
 
 
 /*
  * Linux 3.16 replaced smp_mb__{before,after}_{atomic,clear}_{dec,inc,bit}()
  * with smp_mb__{before,after}_atomic() because they were redundant. This is
  * only used inside our SLAB allocator, so we implement an internal wrapper
  * here to give us smp_mb__{before,after}_atomic() on older kernels.
  */
 #ifndef smp_mb__before_atomic
 #define	smp_mb__before_atomic(x) smp_mb__before_clear_bit(x)
 #endif
 
 #ifndef smp_mb__after_atomic
 #define	smp_mb__after_atomic(x) smp_mb__after_clear_bit(x)
 #endif
 
 /* BEGIN CSTYLED */
 /*
  * Cache magazines are an optimization designed to minimize the cost of
  * allocating memory.  They do this by keeping a per-cpu cache of recently
  * freed objects, which can then be reallocated without taking a lock. This
  * can improve performance on highly contended caches.  However, because
  * objects in magazines will prevent otherwise empty slabs from being
  * immediately released this may not be ideal for low memory machines.
  *
  * For this reason spl_kmem_cache_magazine_size can be used to set a maximum
  * magazine size.  When this value is set to 0 the magazine size will be
  * automatically determined based on the object size.  Otherwise magazines
  * will be limited to 2-256 objects per magazine (i.e per cpu).  Magazines
  * may never be entirely disabled in this implementation.
  */
 static unsigned int spl_kmem_cache_magazine_size = 0;
 module_param(spl_kmem_cache_magazine_size, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
 	"Default magazine size (2-256), set automatically (0)");
 
 /*
  * The default behavior is to report the number of objects remaining in the
  * cache.  This allows the Linux VM to repeatedly reclaim objects from the
  * cache when memory is low satisfy other memory allocations.  Alternately,
  * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
  * is reclaimed.  This may increase the likelihood of out of memory events.
  */
 static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
 module_param(spl_kmem_cache_reclaim, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
 
 static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
 module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
 
 static unsigned int spl_kmem_cache_max_size = SPL_KMEM_CACHE_MAX_SIZE;
 module_param(spl_kmem_cache_max_size, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
 
 /*
  * For small objects the Linux slab allocator should be used to make the most
  * efficient use of the memory.  However, large objects are not supported by
  * the Linux slab and therefore the SPL implementation is preferred.  A cutoff
  * of 16K was determined to be optimal for architectures using 4K pages and
  * to also work well on architecutres using larger 64K page sizes.
  */
 static unsigned int spl_kmem_cache_slab_limit = 16384;
 module_param(spl_kmem_cache_slab_limit, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
 	"Objects less than N bytes use the Linux slab");
 
 /*
  * The number of threads available to allocate new slabs for caches.  This
  * should not need to be tuned but it is available for performance analysis.
  */
 static unsigned int spl_kmem_cache_kmem_threads = 4;
 module_param(spl_kmem_cache_kmem_threads, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
 	"Number of spl_kmem_cache threads");
 /* END CSTYLED */
 
 /*
  * Slab allocation interfaces
  *
  * While the Linux slab implementation was inspired by the Solaris
  * implementation I cannot use it to emulate the Solaris APIs.  I
  * require two features which are not provided by the Linux slab.
  *
  * 1) Constructors AND destructors.  Recent versions of the Linux
  *    kernel have removed support for destructors.  This is a deal
  *    breaker for the SPL which contains particularly expensive
  *    initializers for mutex's, condition variables, etc.  We also
  *    require a minimal level of cleanup for these data types unlike
  *    many Linux data types which do need to be explicitly destroyed.
  *
  * 2) Virtual address space backed slab.  Callers of the Solaris slab
  *    expect it to work well for both small are very large allocations.
  *    Because of memory fragmentation the Linux slab which is backed
  *    by kmalloc'ed memory performs very badly when confronted with
  *    large numbers of large allocations.  Basing the slab on the
  *    virtual address space removes the need for contiguous pages
  *    and greatly improve performance for large allocations.
  *
  * For these reasons, the SPL has its own slab implementation with
  * the needed features.  It is not as highly optimized as either the
  * Solaris or Linux slabs, but it should get me most of what is
  * needed until it can be optimized or obsoleted by another approach.
  *
  * One serious concern I do have about this method is the relatively
  * small virtual address space on 32bit arches.  This will seriously
  * constrain the size of the slab caches and their performance.
  */
 
 struct list_head spl_kmem_cache_list;   /* List of caches */
 struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
 taskq_t *spl_kmem_cache_taskq;		/* Task queue for aging / reclaim */
 
 static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
 
 static void *
 kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
 {
 	gfp_t lflags = kmem_flags_convert(flags);
 	void *ptr;
 
 	ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);
 
 	/* Resulting allocated memory will be page aligned */
 	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
 
 	return (ptr);
 }
 
 static void
 kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
 {
 	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
 
 	/*
 	 * The Linux direct reclaim path uses this out of band value to
 	 * determine if forward progress is being made.  Normally this is
 	 * incremented by kmem_freepages() which is part of the various
 	 * Linux slab implementations.  However, since we are using none
 	 * of that infrastructure we are responsible for incrementing it.
 	 */
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
 
 	vfree(ptr);
 }
 
 /*
  * Required space for each aligned sks.
  */
 static inline uint32_t
 spl_sks_size(spl_kmem_cache_t *skc)
 {
 	return (P2ROUNDUP_TYPED(sizeof (spl_kmem_slab_t),
 	    skc->skc_obj_align, uint32_t));
 }
 
 /*
  * Required space for each aligned object.
  */
 static inline uint32_t
 spl_obj_size(spl_kmem_cache_t *skc)
 {
 	uint32_t align = skc->skc_obj_align;
 
 	return (P2ROUNDUP_TYPED(skc->skc_obj_size, align, uint32_t) +
 	    P2ROUNDUP_TYPED(sizeof (spl_kmem_obj_t), align, uint32_t));
 }
 
 uint64_t
 spl_kmem_cache_inuse(kmem_cache_t *cache)
 {
 	return (cache->skc_obj_total);
 }
 EXPORT_SYMBOL(spl_kmem_cache_inuse);
 
 uint64_t
 spl_kmem_cache_entry_size(kmem_cache_t *cache)
 {
 	return (cache->skc_obj_size);
 }
 EXPORT_SYMBOL(spl_kmem_cache_entry_size);
 
 /*
  * Lookup the spl_kmem_object_t for an object given that object.
  */
 static inline spl_kmem_obj_t *
 spl_sko_from_obj(spl_kmem_cache_t *skc, void *obj)
 {
 	return (obj + P2ROUNDUP_TYPED(skc->skc_obj_size,
 	    skc->skc_obj_align, uint32_t));
 }
 
 /*
  * It's important that we pack the spl_kmem_obj_t structure and the
  * actual objects in to one large address space to minimize the number
  * of calls to the allocator.  It is far better to do a few large
  * allocations and then subdivide it ourselves.  Now which allocator
  * we use requires balancing a few trade offs.
  *
  * For small objects we use kmem_alloc() because as long as you are
  * only requesting a small number of pages (ideally just one) its cheap.
  * However, when you start requesting multiple pages with kmem_alloc()
  * it gets increasingly expensive since it requires contiguous pages.
  * For this reason we shift to vmem_alloc() for slabs of large objects
  * which removes the need for contiguous pages.  We do not use
  * vmem_alloc() in all cases because there is significant locking
  * overhead in __get_vm_area_node().  This function takes a single
  * global lock when acquiring an available virtual address range which
  * serializes all vmem_alloc()'s for all slab caches.  Using slightly
  * different allocation functions for small and large objects should
  * give us the best of both worlds.
  *
  * +------------------------+
  * | spl_kmem_slab_t --+-+  |
  * | skc_obj_size    <-+ |  |
  * | spl_kmem_obj_t      |  |
  * | skc_obj_size    <---+  |
  * | spl_kmem_obj_t      |  |
  * | ...                 v  |
  * +------------------------+
  */
 static spl_kmem_slab_t *
 spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
 {
 	spl_kmem_slab_t *sks;
 	void *base;
 	uint32_t obj_size;
 
 	base = kv_alloc(skc, skc->skc_slab_size, flags);
 	if (base == NULL)
 		return (NULL);
 
 	sks = (spl_kmem_slab_t *)base;
 	sks->sks_magic = SKS_MAGIC;
 	sks->sks_objs = skc->skc_slab_objs;
 	sks->sks_age = jiffies;
 	sks->sks_cache = skc;
 	INIT_LIST_HEAD(&sks->sks_list);
 	INIT_LIST_HEAD(&sks->sks_free_list);
 	sks->sks_ref = 0;
 	obj_size = spl_obj_size(skc);
 
 	for (int i = 0; i < sks->sks_objs; i++) {
 		void *obj = base + spl_sks_size(skc) + (i * obj_size);
 
 		ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
 		spl_kmem_obj_t *sko = spl_sko_from_obj(skc, obj);
 		sko->sko_addr = obj;
 		sko->sko_magic = SKO_MAGIC;
 		sko->sko_slab = sks;
 		INIT_LIST_HEAD(&sko->sko_list);
 		list_add_tail(&sko->sko_list, &sks->sks_free_list);
 	}
 
 	return (sks);
 }
 
 /*
  * Remove a slab from complete or partial list, it must be called with
  * the 'skc->skc_lock' held but the actual free must be performed
  * outside the lock to prevent deadlocking on vmem addresses.
  */
 static void
 spl_slab_free(spl_kmem_slab_t *sks,
     struct list_head *sks_list, struct list_head *sko_list)
 {
 	spl_kmem_cache_t *skc;
 
 	ASSERT(sks->sks_magic == SKS_MAGIC);
 	ASSERT(sks->sks_ref == 0);
 
 	skc = sks->sks_cache;
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 
 	/*
 	 * Update slab/objects counters in the cache, then remove the
 	 * slab from the skc->skc_partial_list.  Finally add the slab
 	 * and all its objects in to the private work lists where the
 	 * destructors will be called and the memory freed to the system.
 	 */
 	skc->skc_obj_total -= sks->sks_objs;
 	skc->skc_slab_total--;
 	list_del(&sks->sks_list);
 	list_add(&sks->sks_list, sks_list);
 	list_splice_init(&sks->sks_free_list, sko_list);
 }
 
 /*
  * Reclaim empty slabs at the end of the partial list.
  */
 static void
 spl_slab_reclaim(spl_kmem_cache_t *skc)
 {
 	spl_kmem_slab_t *sks = NULL, *m = NULL;
 	spl_kmem_obj_t *sko = NULL, *n = NULL;
 	LIST_HEAD(sks_list);
 	LIST_HEAD(sko_list);
 
 	/*
 	 * Empty slabs and objects must be moved to a private list so they
 	 * can be safely freed outside the spin lock.  All empty slabs are
 	 * at the end of skc->skc_partial_list, therefore once a non-empty
 	 * slab is found we can stop scanning.
 	 */
 	spin_lock(&skc->skc_lock);
 	list_for_each_entry_safe_reverse(sks, m,
 	    &skc->skc_partial_list, sks_list) {
 
 		if (sks->sks_ref > 0)
 			break;
 
 		spl_slab_free(sks, &sks_list, &sko_list);
 	}
 	spin_unlock(&skc->skc_lock);
 
 	/*
 	 * The following two loops ensure all the object destructors are run,
 	 * and the slabs themselves are freed.  This is all done outside the
 	 * skc->skc_lock since this allows the destructor to sleep, and
 	 * allows us to perform a conditional reschedule when a freeing a
 	 * large number of objects and slabs back to the system.
 	 */
 
 	list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
 		ASSERT(sko->sko_magic == SKO_MAGIC);
 	}
 
 	list_for_each_entry_safe(sks, m, &sks_list, sks_list) {
 		ASSERT(sks->sks_magic == SKS_MAGIC);
 		kv_free(skc, sks, skc->skc_slab_size);
 	}
 }
 
 static spl_kmem_emergency_t *
 spl_emergency_search(struct rb_root *root, void *obj)
 {
 	struct rb_node *node = root->rb_node;
 	spl_kmem_emergency_t *ske;
 	unsigned long address = (unsigned long)obj;
 
 	while (node) {
 		ske = container_of(node, spl_kmem_emergency_t, ske_node);
 
 		if (address < ske->ske_obj)
 			node = node->rb_left;
 		else if (address > ske->ske_obj)
 			node = node->rb_right;
 		else
 			return (ske);
 	}
 
 	return (NULL);
 }
 
 static int
 spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
 {
 	struct rb_node **new = &(root->rb_node), *parent = NULL;
 	spl_kmem_emergency_t *ske_tmp;
 	unsigned long address = ske->ske_obj;
 
 	while (*new) {
 		ske_tmp = container_of(*new, spl_kmem_emergency_t, ske_node);
 
 		parent = *new;
 		if (address < ske_tmp->ske_obj)
 			new = &((*new)->rb_left);
 		else if (address > ske_tmp->ske_obj)
 			new = &((*new)->rb_right);
 		else
 			return (0);
 	}
 
 	rb_link_node(&ske->ske_node, parent, new);
 	rb_insert_color(&ske->ske_node, root);
 
 	return (1);
 }
 
 /*
  * Allocate a single emergency object and track it in a red black tree.
  */
 static int
 spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
 {
 	gfp_t lflags = kmem_flags_convert(flags);
 	spl_kmem_emergency_t *ske;
 	int order = get_order(skc->skc_obj_size);
 	int empty;
 
 	/* Last chance use a partial slab if one now exists */
 	spin_lock(&skc->skc_lock);
 	empty = list_empty(&skc->skc_partial_list);
 	spin_unlock(&skc->skc_lock);
 	if (!empty)
 		return (-EEXIST);
 
 	ske = kmalloc(sizeof (*ske), lflags);
 	if (ske == NULL)
 		return (-ENOMEM);
 
 	ske->ske_obj = __get_free_pages(lflags, order);
 	if (ske->ske_obj == 0) {
 		kfree(ske);
 		return (-ENOMEM);
 	}
 
 	spin_lock(&skc->skc_lock);
 	empty = spl_emergency_insert(&skc->skc_emergency_tree, ske);
 	if (likely(empty)) {
 		skc->skc_obj_total++;
 		skc->skc_obj_emergency++;
 		if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
 			skc->skc_obj_emergency_max = skc->skc_obj_emergency;
 	}
 	spin_unlock(&skc->skc_lock);
 
 	if (unlikely(!empty)) {
 		free_pages(ske->ske_obj, order);
 		kfree(ske);
 		return (-EINVAL);
 	}
 
 	*obj = (void *)ske->ske_obj;
 
 	return (0);
 }
 
 /*
  * Locate the passed object in the red black tree and free it.
  */
 static int
 spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
 {
 	spl_kmem_emergency_t *ske;
 	int order = get_order(skc->skc_obj_size);
 
 	spin_lock(&skc->skc_lock);
 	ske = spl_emergency_search(&skc->skc_emergency_tree, obj);
 	if (ske) {
 		rb_erase(&ske->ske_node, &skc->skc_emergency_tree);
 		skc->skc_obj_emergency--;
 		skc->skc_obj_total--;
 	}
 	spin_unlock(&skc->skc_lock);
 
 	if (ske == NULL)
 		return (-ENOENT);
 
 	free_pages(ske->ske_obj, order);
 	kfree(ske);
 
 	return (0);
 }
 
 /*
  * Release objects from the per-cpu magazine back to their slab.  The flush
  * argument contains the max number of entries to remove from the magazine.
  */
 static void
 spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
 {
 	spin_lock(&skc->skc_lock);
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 
 	int count = MIN(flush, skm->skm_avail);
 	for (int i = 0; i < count; i++)
 		spl_cache_shrink(skc, skm->skm_objs[i]);
 
 	skm->skm_avail -= count;
 	memmove(skm->skm_objs, &(skm->skm_objs[count]),
 	    sizeof (void *) * skm->skm_avail);
 
 	spin_unlock(&skc->skc_lock);
 }
 
 /*
  * Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
  * When on-slab we want to target spl_kmem_cache_obj_per_slab.  However,
  * for very small objects we may end up with more than this so as not
  * to waste space in the minimal allocation of a single page.
  */
 static int
 spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
 {
 	uint32_t sks_size, obj_size, max_size, tgt_size, tgt_objs;
 
 	sks_size = spl_sks_size(skc);
 	obj_size = spl_obj_size(skc);
 	max_size = (spl_kmem_cache_max_size * 1024 * 1024);
 	tgt_size = (spl_kmem_cache_obj_per_slab * obj_size + sks_size);
 
 	if (tgt_size <= max_size) {
 		tgt_objs = (tgt_size - sks_size) / obj_size;
 	} else {
 		tgt_objs = (max_size - sks_size) / obj_size;
 		tgt_size = (tgt_objs * obj_size) + sks_size;
 	}
 
 	if (tgt_objs == 0)
 		return (-ENOSPC);
 
 	*objs = tgt_objs;
 	*size = tgt_size;
 
 	return (0);
 }
 
 /*
  * Make a guess at reasonable per-cpu magazine size based on the size of
  * each object and the cost of caching N of them in each magazine.  Long
  * term this should really adapt based on an observed usage heuristic.
  */
 static int
 spl_magazine_size(spl_kmem_cache_t *skc)
 {
 	uint32_t obj_size = spl_obj_size(skc);
 	int size;
 
 	if (spl_kmem_cache_magazine_size > 0)
 		return (MAX(MIN(spl_kmem_cache_magazine_size, 256), 2));
 
 	/* Per-magazine sizes below assume a 4Kib page size */
 	if (obj_size > (PAGE_SIZE * 256))
 		size = 4;  /* Minimum 4Mib per-magazine */
 	else if (obj_size > (PAGE_SIZE * 32))
 		size = 16; /* Minimum 2Mib per-magazine */
 	else if (obj_size > (PAGE_SIZE))
 		size = 64; /* Minimum 256Kib per-magazine */
 	else if (obj_size > (PAGE_SIZE / 4))
 		size = 128; /* Minimum 128Kib per-magazine */
 	else
 		size = 256;
 
 	return (size);
 }
 
 /*
  * Allocate a per-cpu magazine to associate with a specific core.
  */
 static spl_kmem_magazine_t *
 spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
 {
 	spl_kmem_magazine_t *skm;
 	int size = sizeof (spl_kmem_magazine_t) +
 	    sizeof (void *) * skc->skc_mag_size;
 
 	skm = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
 	if (skm) {
 		skm->skm_magic = SKM_MAGIC;
 		skm->skm_avail = 0;
 		skm->skm_size = skc->skc_mag_size;
 		skm->skm_refill = skc->skc_mag_refill;
 		skm->skm_cache = skc;
 		skm->skm_cpu = cpu;
 	}
 
 	return (skm);
 }
 
 /*
  * Free a per-cpu magazine associated with a specific core.
  */
 static void
 spl_magazine_free(spl_kmem_magazine_t *skm)
 {
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 	ASSERT(skm->skm_avail == 0);
 	kfree(skm);
 }
 
 /*
  * Create all pre-cpu magazines of reasonable sizes.
  */
 static int
 spl_magazine_create(spl_kmem_cache_t *skc)
 {
 	int i = 0;
 
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
 
 	skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) *
 	    num_possible_cpus(), kmem_flags_convert(KM_SLEEP));
 	skc->skc_mag_size = spl_magazine_size(skc);
 	skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
 
 	for_each_possible_cpu(i) {
 		skc->skc_mag[i] = spl_magazine_alloc(skc, i);
 		if (!skc->skc_mag[i]) {
 			for (i--; i >= 0; i--)
 				spl_magazine_free(skc->skc_mag[i]);
 
 			kfree(skc->skc_mag);
 			return (-ENOMEM);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Destroy all pre-cpu magazines.
  */
 static void
 spl_magazine_destroy(spl_kmem_cache_t *skc)
 {
 	spl_kmem_magazine_t *skm;
 	int i = 0;
 
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
 
 	for_each_possible_cpu(i) {
 		skm = skc->skc_mag[i];
 		spl_cache_flush(skc, skm, skm->skm_avail);
 		spl_magazine_free(skm);
 	}
 
 	kfree(skc->skc_mag);
 }
 
 /*
  * Create a object cache based on the following arguments:
  * name		cache name
  * size		cache object size
  * align	cache object alignment
  * ctor		cache object constructor
  * dtor		cache object destructor
  * reclaim	cache object reclaim
  * priv		cache private data for ctor/dtor/reclaim
  * vmp		unused must be NULL
  * flags
  *	KMC_KVMEM       Force kvmem backed SPL cache
  *	KMC_SLAB        Force Linux slab backed cache
  *	KMC_NODEBUG	Disable debugging (unsupported)
  */
 spl_kmem_cache_t *
 spl_kmem_cache_create(const char *name, size_t size, size_t align,
     spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, void *reclaim,
     void *priv, void *vmp, int flags)
 {
 	gfp_t lflags = kmem_flags_convert(KM_SLEEP);
 	spl_kmem_cache_t *skc;
 	int rc;
 
 	/*
 	 * Unsupported flags
 	 */
 	ASSERT(vmp == NULL);
 	ASSERT(reclaim == NULL);
 
 	might_sleep();
 
 	skc = kzalloc(sizeof (*skc), lflags);
 	if (skc == NULL)
 		return (NULL);
 
 	skc->skc_magic = SKC_MAGIC;
 	skc->skc_name_size = strlen(name) + 1;
 	skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags);
 	if (skc->skc_name == NULL) {
 		kfree(skc);
 		return (NULL);
 	}
-	strncpy(skc->skc_name, name, skc->skc_name_size);
+	strlcpy(skc->skc_name, name, skc->skc_name_size);
 
 	skc->skc_ctor = ctor;
 	skc->skc_dtor = dtor;
 	skc->skc_private = priv;
 	skc->skc_vmp = vmp;
 	skc->skc_linux_cache = NULL;
 	skc->skc_flags = flags;
 	skc->skc_obj_size = size;
 	skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
 	atomic_set(&skc->skc_ref, 0);
 
 	INIT_LIST_HEAD(&skc->skc_list);
 	INIT_LIST_HEAD(&skc->skc_complete_list);
 	INIT_LIST_HEAD(&skc->skc_partial_list);
 	skc->skc_emergency_tree = RB_ROOT;
 	spin_lock_init(&skc->skc_lock);
 	init_waitqueue_head(&skc->skc_waitq);
 	skc->skc_slab_fail = 0;
 	skc->skc_slab_create = 0;
 	skc->skc_slab_destroy = 0;
 	skc->skc_slab_total = 0;
 	skc->skc_slab_alloc = 0;
 	skc->skc_slab_max = 0;
 	skc->skc_obj_total = 0;
 	skc->skc_obj_alloc = 0;
 	skc->skc_obj_max = 0;
 	skc->skc_obj_deadlock = 0;
 	skc->skc_obj_emergency = 0;
 	skc->skc_obj_emergency_max = 0;
 
 	rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0,
 	    GFP_KERNEL);
 	if (rc != 0) {
 		kfree(skc);
 		return (NULL);
 	}
 
 	/*
 	 * Verify the requested alignment restriction is sane.
 	 */
 	if (align) {
 		VERIFY(ISP2(align));
 		VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
 		VERIFY3U(align, <=, PAGE_SIZE);
 		skc->skc_obj_align = align;
 	}
 
 	/*
 	 * When no specific type of slab is requested (kmem, vmem, or
 	 * linuxslab) then select a cache type based on the object size
 	 * and default tunables.
 	 */
 	if (!(skc->skc_flags & (KMC_SLAB | KMC_KVMEM))) {
 		if (spl_kmem_cache_slab_limit &&
 		    size <= (size_t)spl_kmem_cache_slab_limit) {
 			/*
 			 * Objects smaller than spl_kmem_cache_slab_limit can
 			 * use the Linux slab for better space-efficiency.
 			 */
 			skc->skc_flags |= KMC_SLAB;
 		} else {
 			/*
 			 * All other objects are considered large and are
 			 * placed on kvmem backed slabs.
 			 */
 			skc->skc_flags |= KMC_KVMEM;
 		}
 	}
 
 	/*
 	 * Given the type of slab allocate the required resources.
 	 */
 	if (skc->skc_flags & KMC_KVMEM) {
 		rc = spl_slab_size(skc,
 		    &skc->skc_slab_objs, &skc->skc_slab_size);
 		if (rc)
 			goto out;
 
 		rc = spl_magazine_create(skc);
 		if (rc)
 			goto out;
 	} else {
 		unsigned long slabflags = 0;
 
 		if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
 			rc = EINVAL;
 			goto out;
 		}
 
 #if defined(SLAB_USERCOPY)
 		/*
 		 * Required for PAX-enabled kernels if the slab is to be
 		 * used for copying between user and kernel space.
 		 */
 		slabflags |= SLAB_USERCOPY;
 #endif
 
 #if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY)
 		/*
 		 * Newer grsec patchset uses kmem_cache_create_usercopy()
 		 * instead of SLAB_USERCOPY flag
 		 */
 		skc->skc_linux_cache = kmem_cache_create_usercopy(
 		    skc->skc_name, size, align, slabflags, 0, size, NULL);
 #else
 		skc->skc_linux_cache = kmem_cache_create(
 		    skc->skc_name, size, align, slabflags, NULL);
 #endif
 		if (skc->skc_linux_cache == NULL) {
 			rc = ENOMEM;
 			goto out;
 		}
 	}
 
 	down_write(&spl_kmem_cache_sem);
 	list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
 	up_write(&spl_kmem_cache_sem);
 
 	return (skc);
 out:
 	kfree(skc->skc_name);
 	percpu_counter_destroy(&skc->skc_linux_alloc);
 	kfree(skc);
 	return (NULL);
 }
 EXPORT_SYMBOL(spl_kmem_cache_create);
 
 /*
  * Register a move callback for cache defragmentation.
  * XXX: Unimplemented but harmless to stub out for now.
  */
 void
 spl_kmem_cache_set_move(spl_kmem_cache_t *skc,
     kmem_cbrc_t (move)(void *, void *, size_t, void *))
 {
 	ASSERT(move != NULL);
 }
 EXPORT_SYMBOL(spl_kmem_cache_set_move);
 
 /*
  * Destroy a cache and all objects associated with the cache.
  */
 void
 spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
 {
 	DECLARE_WAIT_QUEUE_HEAD(wq);
 	taskqid_t id;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(skc->skc_flags & (KMC_KVMEM | KMC_SLAB));
 
 	down_write(&spl_kmem_cache_sem);
 	list_del_init(&skc->skc_list);
 	up_write(&spl_kmem_cache_sem);
 
 	/* Cancel any and wait for any pending delayed tasks */
 	VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
 	spin_lock(&skc->skc_lock);
 	id = skc->skc_taskqid;
 	spin_unlock(&skc->skc_lock);
 
 	taskq_cancel_id(spl_kmem_cache_taskq, id);
 
 	/*
 	 * Wait until all current callers complete, this is mainly
 	 * to catch the case where a low memory situation triggers a
 	 * cache reaping action which races with this destroy.
 	 */
 	wait_event(wq, atomic_read(&skc->skc_ref) == 0);
 
 	if (skc->skc_flags & KMC_KVMEM) {
 		spl_magazine_destroy(skc);
 		spl_slab_reclaim(skc);
 	} else {
 		ASSERT(skc->skc_flags & KMC_SLAB);
 		kmem_cache_destroy(skc->skc_linux_cache);
 	}
 
 	spin_lock(&skc->skc_lock);
 
 	/*
 	 * Validate there are no objects in use and free all the
 	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
 	 */
 	ASSERT3U(skc->skc_slab_alloc, ==, 0);
 	ASSERT3U(skc->skc_obj_alloc, ==, 0);
 	ASSERT3U(skc->skc_slab_total, ==, 0);
 	ASSERT3U(skc->skc_obj_total, ==, 0);
 	ASSERT3U(skc->skc_obj_emergency, ==, 0);
 	ASSERT(list_empty(&skc->skc_complete_list));
 
 	ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0);
 	percpu_counter_destroy(&skc->skc_linux_alloc);
 
 	spin_unlock(&skc->skc_lock);
 
 	kfree(skc->skc_name);
 	kfree(skc);
 }
 EXPORT_SYMBOL(spl_kmem_cache_destroy);
 
 /*
  * Allocate an object from a slab attached to the cache.  This is used to
  * repopulate the per-cpu magazine caches in batches when they run low.
  */
 static void *
 spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
 {
 	spl_kmem_obj_t *sko;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(sks->sks_magic == SKS_MAGIC);
 
 	sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
 	ASSERT(sko->sko_magic == SKO_MAGIC);
 	ASSERT(sko->sko_addr != NULL);
 
 	/* Remove from sks_free_list */
 	list_del_init(&sko->sko_list);
 
 	sks->sks_age = jiffies;
 	sks->sks_ref++;
 	skc->skc_obj_alloc++;
 
 	/* Track max obj usage statistics */
 	if (skc->skc_obj_alloc > skc->skc_obj_max)
 		skc->skc_obj_max = skc->skc_obj_alloc;
 
 	/* Track max slab usage statistics */
 	if (sks->sks_ref == 1) {
 		skc->skc_slab_alloc++;
 
 		if (skc->skc_slab_alloc > skc->skc_slab_max)
 			skc->skc_slab_max = skc->skc_slab_alloc;
 	}
 
 	return (sko->sko_addr);
 }
 
 /*
  * Generic slab allocation function to run by the global work queues.
  * It is responsible for allocating a new slab, linking it in to the list
  * of partial slabs, and then waking any waiters.
  */
 static int
 __spl_cache_grow(spl_kmem_cache_t *skc, int flags)
 {
 	spl_kmem_slab_t *sks;
 
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 	sks = spl_slab_alloc(skc, flags);
 	spl_fstrans_unmark(cookie);
 
 	spin_lock(&skc->skc_lock);
 	if (sks) {
 		skc->skc_slab_total++;
 		skc->skc_obj_total += sks->sks_objs;
 		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
 
 		smp_mb__before_atomic();
 		clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
 		smp_mb__after_atomic();
 	}
 	spin_unlock(&skc->skc_lock);
 
 	return (sks == NULL ? -ENOMEM : 0);
 }
 
 static void
 spl_cache_grow_work(void *data)
 {
 	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
 	spl_kmem_cache_t *skc = ska->ska_cache;
 
 	int error = __spl_cache_grow(skc, ska->ska_flags);
 
 	atomic_dec(&skc->skc_ref);
 	smp_mb__before_atomic();
 	clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
 	smp_mb__after_atomic();
 	if (error == 0)
 		wake_up_all(&skc->skc_waitq);
 
 	kfree(ska);
 }
 
 /*
  * Returns non-zero when a new slab should be available.
  */
 static int
 spl_cache_grow_wait(spl_kmem_cache_t *skc)
 {
 	return (!test_bit(KMC_BIT_GROWING, &skc->skc_flags));
 }
 
 /*
  * No available objects on any slabs, create a new slab.  Note that this
  * functionality is disabled for KMC_SLAB caches which are backed by the
  * Linux slab.
  */
 static int
 spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
 {
 	int remaining, rc = 0;
 
 	ASSERT0(flags & ~KM_PUBLIC_MASK);
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
 	might_sleep();
 	*obj = NULL;
 
 	/*
 	 * Before allocating a new slab wait for any reaping to complete and
 	 * then return so the local magazine can be rechecked for new objects.
 	 */
 	if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
 		rc = spl_wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING,
 		    TASK_UNINTERRUPTIBLE);
 		return (rc ? rc : -EAGAIN);
 	}
 
 	/*
 	 * Note: It would be nice to reduce the overhead of context switch
 	 * and improve NUMA locality, by trying to allocate a new slab in the
 	 * current process context with KM_NOSLEEP flag.
 	 *
 	 * However, this can't be applied to vmem/kvmem due to a bug that
 	 * spl_vmalloc() doesn't honor gfp flags in page table allocation.
 	 */
 
 	/*
 	 * This is handled by dispatching a work request to the global work
 	 * queue.  This allows us to asynchronously allocate a new slab while
 	 * retaining the ability to safely fall back to a smaller synchronous
 	 * allocations to ensure forward progress is always maintained.
 	 */
 	if (test_and_set_bit(KMC_BIT_GROWING, &skc->skc_flags) == 0) {
 		spl_kmem_alloc_t *ska;
 
 		ska = kmalloc(sizeof (*ska), kmem_flags_convert(flags));
 		if (ska == NULL) {
 			clear_bit_unlock(KMC_BIT_GROWING, &skc->skc_flags);
 			smp_mb__after_atomic();
 			wake_up_all(&skc->skc_waitq);
 			return (-ENOMEM);
 		}
 
 		atomic_inc(&skc->skc_ref);
 		ska->ska_cache = skc;
 		ska->ska_flags = flags;
 		taskq_init_ent(&ska->ska_tqe);
 		taskq_dispatch_ent(spl_kmem_cache_taskq,
 		    spl_cache_grow_work, ska, 0, &ska->ska_tqe);
 	}
 
 	/*
 	 * The goal here is to only detect the rare case where a virtual slab
 	 * allocation has deadlocked.  We must be careful to minimize the use
 	 * of emergency objects which are more expensive to track.  Therefore,
 	 * we set a very long timeout for the asynchronous allocation and if
 	 * the timeout is reached the cache is flagged as deadlocked.  From
 	 * this point only new emergency objects will be allocated until the
 	 * asynchronous allocation completes and clears the deadlocked flag.
 	 */
 	if (test_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags)) {
 		rc = spl_emergency_alloc(skc, flags, obj);
 	} else {
 		remaining = wait_event_timeout(skc->skc_waitq,
 		    spl_cache_grow_wait(skc), HZ / 10);
 
 		if (!remaining) {
 			spin_lock(&skc->skc_lock);
 			if (test_bit(KMC_BIT_GROWING, &skc->skc_flags)) {
 				set_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
 				skc->skc_obj_deadlock++;
 			}
 			spin_unlock(&skc->skc_lock);
 		}
 
 		rc = -ENOMEM;
 	}
 
 	return (rc);
 }
 
 /*
  * Refill a per-cpu magazine with objects from the slabs for this cache.
  * Ideally the magazine can be repopulated using existing objects which have
  * been released, however if we are unable to locate enough free objects new
  * slabs of objects will be created.  On success NULL is returned, otherwise
  * the address of a single emergency object is returned for use by the caller.
  */
 static void *
 spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
 {
 	spl_kmem_slab_t *sks;
 	int count = 0, rc, refill;
 	void *obj = NULL;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 
 	refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
 	spin_lock(&skc->skc_lock);
 
 	while (refill > 0) {
 		/* No slabs available we may need to grow the cache */
 		if (list_empty(&skc->skc_partial_list)) {
 			spin_unlock(&skc->skc_lock);
 
 			local_irq_enable();
 			rc = spl_cache_grow(skc, flags, &obj);
 			local_irq_disable();
 
 			/* Emergency object for immediate use by caller */
 			if (rc == 0 && obj != NULL)
 				return (obj);
 
 			if (rc)
 				goto out;
 
 			/* Rescheduled to different CPU skm is not local */
 			if (skm != skc->skc_mag[smp_processor_id()])
 				goto out;
 
 			/*
 			 * Potentially rescheduled to the same CPU but
 			 * allocations may have occurred from this CPU while
 			 * we were sleeping so recalculate max refill.
 			 */
 			refill = MIN(refill, skm->skm_size - skm->skm_avail);
 
 			spin_lock(&skc->skc_lock);
 			continue;
 		}
 
 		/* Grab the next available slab */
 		sks = list_entry((&skc->skc_partial_list)->next,
 		    spl_kmem_slab_t, sks_list);
 		ASSERT(sks->sks_magic == SKS_MAGIC);
 		ASSERT(sks->sks_ref < sks->sks_objs);
 		ASSERT(!list_empty(&sks->sks_free_list));
 
 		/*
 		 * Consume as many objects as needed to refill the requested
 		 * cache.  We must also be careful not to overfill it.
 		 */
 		while (sks->sks_ref < sks->sks_objs && refill-- > 0 &&
 		    ++count) {
 			ASSERT(skm->skm_avail < skm->skm_size);
 			ASSERT(count < skm->skm_size);
 			skm->skm_objs[skm->skm_avail++] =
 			    spl_cache_obj(skc, sks);
 		}
 
 		/* Move slab to skc_complete_list when full */
 		if (sks->sks_ref == sks->sks_objs) {
 			list_del(&sks->sks_list);
 			list_add(&sks->sks_list, &skc->skc_complete_list);
 		}
 	}
 
 	spin_unlock(&skc->skc_lock);
 out:
 	return (NULL);
 }
 
 /*
  * Release an object back to the slab from which it came.
  */
 static void
 spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
 {
 	spl_kmem_slab_t *sks = NULL;
 	spl_kmem_obj_t *sko = NULL;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 
 	sko = spl_sko_from_obj(skc, obj);
 	ASSERT(sko->sko_magic == SKO_MAGIC);
 	sks = sko->sko_slab;
 	ASSERT(sks->sks_magic == SKS_MAGIC);
 	ASSERT(sks->sks_cache == skc);
 	list_add(&sko->sko_list, &sks->sks_free_list);
 
 	sks->sks_age = jiffies;
 	sks->sks_ref--;
 	skc->skc_obj_alloc--;
 
 	/*
 	 * Move slab to skc_partial_list when no longer full.  Slabs
 	 * are added to the head to keep the partial list is quasi-full
 	 * sorted order.  Fuller at the head, emptier at the tail.
 	 */
 	if (sks->sks_ref == (sks->sks_objs - 1)) {
 		list_del(&sks->sks_list);
 		list_add(&sks->sks_list, &skc->skc_partial_list);
 	}
 
 	/*
 	 * Move empty slabs to the end of the partial list so
 	 * they can be easily found and freed during reclamation.
 	 */
 	if (sks->sks_ref == 0) {
 		list_del(&sks->sks_list);
 		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
 		skc->skc_slab_alloc--;
 	}
 }
 
 /*
  * Allocate an object from the per-cpu magazine, or if the magazine
  * is empty directly allocate from a slab and repopulate the magazine.
  */
 void *
 spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
 {
 	spl_kmem_magazine_t *skm;
 	void *obj = NULL;
 
 	ASSERT0(flags & ~KM_PUBLIC_MASK);
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
 	/*
 	 * Allocate directly from a Linux slab.  All optimizations are left
 	 * to the underlying cache we only need to guarantee that KM_SLEEP
 	 * callers will never fail.
 	 */
 	if (skc->skc_flags & KMC_SLAB) {
 		struct kmem_cache *slc = skc->skc_linux_cache;
 		do {
 			obj = kmem_cache_alloc(slc, kmem_flags_convert(flags));
 		} while ((obj == NULL) && !(flags & KM_NOSLEEP));
 
 		if (obj != NULL) {
 			/*
 			 * Even though we leave everything up to the
 			 * underlying cache we still keep track of
 			 * how many objects we've allocated in it for
 			 * better debuggability.
 			 */
 			percpu_counter_inc(&skc->skc_linux_alloc);
 		}
 		goto ret;
 	}
 
 	local_irq_disable();
 
 restart:
 	/*
 	 * Safe to update per-cpu structure without lock, but
 	 * in the restart case we must be careful to reacquire
 	 * the local magazine since this may have changed
 	 * when we need to grow the cache.
 	 */
 	skm = skc->skc_mag[smp_processor_id()];
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 
 	if (likely(skm->skm_avail)) {
 		/* Object available in CPU cache, use it */
 		obj = skm->skm_objs[--skm->skm_avail];
 	} else {
 		obj = spl_cache_refill(skc, skm, flags);
 		if ((obj == NULL) && !(flags & KM_NOSLEEP))
 			goto restart;
 
 		local_irq_enable();
 		goto ret;
 	}
 
 	local_irq_enable();
 	ASSERT(obj);
 	ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
 
 ret:
 	/* Pre-emptively migrate object to CPU L1 cache */
 	if (obj) {
 		if (obj && skc->skc_ctor)
 			skc->skc_ctor(obj, skc->skc_private, flags);
 		else
 			prefetchw(obj);
 	}
 
 	return (obj);
 }
 EXPORT_SYMBOL(spl_kmem_cache_alloc);
 
 /*
  * Free an object back to the local per-cpu magazine, there is no
  * guarantee that this is the same magazine the object was originally
  * allocated from.  We may need to flush entire from the magazine
  * back to the slabs to make space.
  */
 void
 spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 {
 	spl_kmem_magazine_t *skm;
 	unsigned long flags;
 	int do_reclaim = 0;
 	int do_emergency = 0;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
 	/*
 	 * Run the destructor
 	 */
 	if (skc->skc_dtor)
 		skc->skc_dtor(obj, skc->skc_private);
 
 	/*
 	 * Free the object from the Linux underlying Linux slab.
 	 */
 	if (skc->skc_flags & KMC_SLAB) {
 		kmem_cache_free(skc->skc_linux_cache, obj);
 		percpu_counter_dec(&skc->skc_linux_alloc);
 		return;
 	}
 
 	/*
 	 * While a cache has outstanding emergency objects all freed objects
 	 * must be checked.  However, since emergency objects will never use
 	 * a virtual address these objects can be safely excluded as an
 	 * optimization.
 	 */
 	if (!is_vmalloc_addr(obj)) {
 		spin_lock(&skc->skc_lock);
 		do_emergency = (skc->skc_obj_emergency > 0);
 		spin_unlock(&skc->skc_lock);
 
 		if (do_emergency && (spl_emergency_free(skc, obj) == 0))
 			return;
 	}
 
 	local_irq_save(flags);
 
 	/*
 	 * Safe to update per-cpu structure without lock, but
 	 * no remote memory allocation tracking is being performed
 	 * it is entirely possible to allocate an object from one
 	 * CPU cache and return it to another.
 	 */
 	skm = skc->skc_mag[smp_processor_id()];
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 
 	/*
 	 * Per-CPU cache full, flush it to make space for this object,
 	 * this may result in an empty slab which can be reclaimed once
 	 * interrupts are re-enabled.
 	 */
 	if (unlikely(skm->skm_avail >= skm->skm_size)) {
 		spl_cache_flush(skc, skm, skm->skm_refill);
 		do_reclaim = 1;
 	}
 
 	/* Available space in cache, use it */
 	skm->skm_objs[skm->skm_avail++] = obj;
 
 	local_irq_restore(flags);
 
 	if (do_reclaim)
 		spl_slab_reclaim(skc);
 }
 EXPORT_SYMBOL(spl_kmem_cache_free);
 
 /*
  * Depending on how many and which objects are released it may simply
  * repopulate the local magazine which will then need to age-out.  Objects
  * which cannot fit in the magazine will be released back to their slabs
  * which will also need to age out before being released.  This is all just
  * best effort and we do not want to thrash creating and destroying slabs.
  */
 void
 spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
 {
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
 	if (skc->skc_flags & KMC_SLAB)
 		return;
 
 	atomic_inc(&skc->skc_ref);
 
 	/*
 	 * Prevent concurrent cache reaping when contended.
 	 */
 	if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
 		goto out;
 
 	/* Reclaim from the magazine and free all now empty slabs. */
 	unsigned long irq_flags;
 	local_irq_save(irq_flags);
 	spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
 	spl_cache_flush(skc, skm, skm->skm_avail);
 	local_irq_restore(irq_flags);
 
 	spl_slab_reclaim(skc);
 	clear_bit_unlock(KMC_BIT_REAPING, &skc->skc_flags);
 	smp_mb__after_atomic();
 	wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
 out:
 	atomic_dec(&skc->skc_ref);
 }
 EXPORT_SYMBOL(spl_kmem_cache_reap_now);
 
 /*
  * This is stubbed out for code consistency with other platforms.  There
  * is existing logic to prevent concurrent reaping so while this is ugly
  * it should do no harm.
  */
 int
 spl_kmem_cache_reap_active(void)
 {
 	return (0);
 }
 EXPORT_SYMBOL(spl_kmem_cache_reap_active);
 
 /*
  * Reap all free slabs from all registered caches.
  */
 void
 spl_kmem_reap(void)
 {
 	spl_kmem_cache_t *skc = NULL;
 
 	down_read(&spl_kmem_cache_sem);
 	list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
 		spl_kmem_cache_reap_now(skc);
 	}
 	up_read(&spl_kmem_cache_sem);
 }
 EXPORT_SYMBOL(spl_kmem_reap);
 
 int
 spl_kmem_cache_init(void)
 {
 	init_rwsem(&spl_kmem_cache_sem);
 	INIT_LIST_HEAD(&spl_kmem_cache_list);
 	spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
 	    spl_kmem_cache_kmem_threads, maxclsyspri,
 	    spl_kmem_cache_kmem_threads * 8, INT_MAX,
 	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 
 	if (spl_kmem_cache_taskq == NULL)
 		return (-ENOMEM);
 
 	return (0);
 }
 
 void
 spl_kmem_cache_fini(void)
 {
 	taskq_destroy(spl_kmem_cache_taskq);
 }
diff --git a/module/os/linux/spl/spl-kstat.c b/module/os/linux/spl/spl-kstat.c
index c6d3c8f4413f..4308581147a9 100644
--- a/module/os/linux/spl/spl-kstat.c
+++ b/module/os/linux/spl/spl-kstat.c
@@ -1,715 +1,715 @@
 /*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  *  UCRL-CODE-235197
  *
  *  This file is part of the SPL, Solaris Porting Layer.
  *
  *  The SPL is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License as published by the
  *  Free Software Foundation; either version 2 of the License, or (at your
  *  option) any later version.
  *
  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  *
  *  Solaris Porting Layer (SPL) Kstat Implementation.
  *
  *  Links to Illumos.org for more information on kstat function:
  *  [1] https://illumos.org/man/1M/kstat
  *  [2] https://illumos.org/man/9f/kstat_create
  */
 
 #include <linux/seq_file.h>
 #include <sys/kstat.h>
 #include <sys/vmem.h>
 #include <sys/cmn_err.h>
 #include <sys/sysmacros.h>
 
 static kmutex_t kstat_module_lock;
 static struct list_head kstat_module_list;
 static kid_t kstat_id;
 
 static int
 kstat_resize_raw(kstat_t *ksp)
 {
 	if (ksp->ks_raw_bufsize == KSTAT_RAW_MAX)
 		return (ENOMEM);
 
 	vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
 	ksp->ks_raw_bufsize = MIN(ksp->ks_raw_bufsize * 2, KSTAT_RAW_MAX);
 	ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
 
 	return (0);
 }
 
 static int
 kstat_seq_show_headers(struct seq_file *f)
 {
 	kstat_t *ksp = (kstat_t *)f->private;
 	int rc = 0;
 
 	ASSERT(ksp->ks_magic == KS_MAGIC);
 
 	seq_printf(f, "%d %d 0x%02x %d %d %lld %lld\n",
 	    ksp->ks_kid, ksp->ks_type, ksp->ks_flags,
 	    ksp->ks_ndata, (int)ksp->ks_data_size,
 	    ksp->ks_crtime, ksp->ks_snaptime);
 
 	switch (ksp->ks_type) {
 		case KSTAT_TYPE_RAW:
 restart:
 			if (ksp->ks_raw_ops.headers) {
 				rc = ksp->ks_raw_ops.headers(
 				    ksp->ks_raw_buf, ksp->ks_raw_bufsize);
 				if (rc == ENOMEM && !kstat_resize_raw(ksp))
 					goto restart;
 				if (!rc)
 					seq_puts(f, ksp->ks_raw_buf);
 			} else {
 				seq_printf(f, "raw data\n");
 			}
 			break;
 		case KSTAT_TYPE_NAMED:
 			seq_printf(f, "%-31s %-4s %s\n",
 			    "name", "type", "data");
 			break;
 		case KSTAT_TYPE_INTR:
 			seq_printf(f, "%-8s %-8s %-8s %-8s %-8s\n",
 			    "hard", "soft", "watchdog",
 			    "spurious", "multsvc");
 			break;
 		case KSTAT_TYPE_IO:
 			seq_printf(f,
 			    "%-8s %-8s %-8s %-8s %-8s %-8s "
 			    "%-8s %-8s %-8s %-8s %-8s %-8s\n",
 			    "nread", "nwritten", "reads", "writes",
 			    "wtime", "wlentime", "wupdate",
 			    "rtime", "rlentime", "rupdate",
 			    "wcnt", "rcnt");
 			break;
 		case KSTAT_TYPE_TIMER:
 			seq_printf(f,
 			    "%-31s %-8s "
 			    "%-8s %-8s %-8s %-8s %-8s\n",
 			    "name", "events", "elapsed",
 			    "min", "max", "start", "stop");
 			break;
 		default:
 			PANIC("Undefined kstat type %d\n", ksp->ks_type);
 	}
 
 	return (-rc);
 }
 
 static int
 kstat_seq_show_raw(struct seq_file *f, unsigned char *p, int l)
 {
 	int i, j;
 
 	for (i = 0; ; i++) {
 		seq_printf(f, "%03x:", i);
 
 		for (j = 0; j < 16; j++) {
 			if (i * 16 + j >= l) {
 				seq_printf(f, "\n");
 				goto out;
 			}
 
 			seq_printf(f, " %02x", (unsigned char)p[i * 16 + j]);
 		}
 		seq_printf(f, "\n");
 	}
 out:
 	return (0);
 }
 
 static int
 kstat_seq_show_named(struct seq_file *f, kstat_named_t *knp)
 {
 	seq_printf(f, "%-31s %-4d ", knp->name, knp->data_type);
 
 	switch (knp->data_type) {
 		case KSTAT_DATA_CHAR:
 			knp->value.c[15] = '\0'; /* NULL terminate */
 			seq_printf(f, "%-16s", knp->value.c);
 			break;
 		/*
 		 * NOTE - We need to be more careful able what tokens are
 		 * used for each arch, for now this is correct for x86_64.
 		 */
 		case KSTAT_DATA_INT32:
 			seq_printf(f, "%d", knp->value.i32);
 			break;
 		case KSTAT_DATA_UINT32:
 			seq_printf(f, "%u", knp->value.ui32);
 			break;
 		case KSTAT_DATA_INT64:
 			seq_printf(f, "%lld", (signed long long)knp->value.i64);
 			break;
 		case KSTAT_DATA_UINT64:
 			seq_printf(f, "%llu",
 			    (unsigned long long)knp->value.ui64);
 			break;
 		case KSTAT_DATA_LONG:
 			seq_printf(f, "%ld", knp->value.l);
 			break;
 		case KSTAT_DATA_ULONG:
 			seq_printf(f, "%lu", knp->value.ul);
 			break;
 		case KSTAT_DATA_STRING:
 			KSTAT_NAMED_STR_PTR(knp)
 				[KSTAT_NAMED_STR_BUFLEN(knp)-1] = '\0';
 			seq_printf(f, "%s", KSTAT_NAMED_STR_PTR(knp));
 			break;
 		default:
 			PANIC("Undefined kstat data type %d\n", knp->data_type);
 	}
 
 	seq_printf(f, "\n");
 
 	return (0);
 }
 
 static int
 kstat_seq_show_intr(struct seq_file *f, kstat_intr_t *kip)
 {
 	seq_printf(f, "%-8u %-8u %-8u %-8u %-8u\n",
 	    kip->intrs[KSTAT_INTR_HARD],
 	    kip->intrs[KSTAT_INTR_SOFT],
 	    kip->intrs[KSTAT_INTR_WATCHDOG],
 	    kip->intrs[KSTAT_INTR_SPURIOUS],
 	    kip->intrs[KSTAT_INTR_MULTSVC]);
 
 	return (0);
 }
 
 static int
 kstat_seq_show_io(struct seq_file *f, kstat_io_t *kip)
 {
 	/* though wlentime & friends are signed, they will never be negative */
 	seq_printf(f,
 	    "%-8llu %-8llu %-8u %-8u %-8llu %-8llu "
 	    "%-8llu %-8llu %-8llu %-8llu %-8u %-8u\n",
 	    kip->nread, kip->nwritten,
 	    kip->reads, kip->writes,
 	    kip->wtime, kip->wlentime, kip->wlastupdate,
 	    kip->rtime, kip->rlentime, kip->rlastupdate,
 	    kip->wcnt,  kip->rcnt);
 
 	return (0);
 }
 
 static int
 kstat_seq_show_timer(struct seq_file *f, kstat_timer_t *ktp)
 {
 	seq_printf(f,
 	    "%-31s %-8llu %-8llu %-8llu %-8llu %-8llu %-8llu\n",
 	    ktp->name, ktp->num_events, ktp->elapsed_time,
 	    ktp->min_time, ktp->max_time,
 	    ktp->start_time, ktp->stop_time);
 
 	return (0);
 }
 
 static int
 kstat_seq_show(struct seq_file *f, void *p)
 {
 	kstat_t *ksp = (kstat_t *)f->private;
 	int rc = 0;
 
 	ASSERT(ksp->ks_magic == KS_MAGIC);
 
 	switch (ksp->ks_type) {
 		case KSTAT_TYPE_RAW:
 restart:
 			if (ksp->ks_raw_ops.data) {
 				rc = ksp->ks_raw_ops.data(
 				    ksp->ks_raw_buf, ksp->ks_raw_bufsize, p);
 				if (rc == ENOMEM && !kstat_resize_raw(ksp))
 					goto restart;
 				if (!rc)
 					seq_puts(f, ksp->ks_raw_buf);
 			} else {
 				ASSERT(ksp->ks_ndata == 1);
 				rc = kstat_seq_show_raw(f, ksp->ks_data,
 				    ksp->ks_data_size);
 			}
 			break;
 		case KSTAT_TYPE_NAMED:
 			rc = kstat_seq_show_named(f, (kstat_named_t *)p);
 			break;
 		case KSTAT_TYPE_INTR:
 			rc = kstat_seq_show_intr(f, (kstat_intr_t *)p);
 			break;
 		case KSTAT_TYPE_IO:
 			rc = kstat_seq_show_io(f, (kstat_io_t *)p);
 			break;
 		case KSTAT_TYPE_TIMER:
 			rc = kstat_seq_show_timer(f, (kstat_timer_t *)p);
 			break;
 		default:
 			PANIC("Undefined kstat type %d\n", ksp->ks_type);
 	}
 
 	return (-rc);
 }
 
 static int
 kstat_default_update(kstat_t *ksp, int rw)
 {
 	ASSERT(ksp != NULL);
 
 	if (rw == KSTAT_WRITE)
 		return (EACCES);
 
 	return (0);
 }
 
 static void *
 kstat_seq_data_addr(kstat_t *ksp, loff_t n)
 {
 	void *rc = NULL;
 
 	switch (ksp->ks_type) {
 		case KSTAT_TYPE_RAW:
 			if (ksp->ks_raw_ops.addr)
 				rc = ksp->ks_raw_ops.addr(ksp, n);
 			else
 				rc = ksp->ks_data;
 			break;
 		case KSTAT_TYPE_NAMED:
 			rc = ksp->ks_data + n * sizeof (kstat_named_t);
 			break;
 		case KSTAT_TYPE_INTR:
 			rc = ksp->ks_data + n * sizeof (kstat_intr_t);
 			break;
 		case KSTAT_TYPE_IO:
 			rc = ksp->ks_data + n * sizeof (kstat_io_t);
 			break;
 		case KSTAT_TYPE_TIMER:
 			rc = ksp->ks_data + n * sizeof (kstat_timer_t);
 			break;
 		default:
 			PANIC("Undefined kstat type %d\n", ksp->ks_type);
 	}
 
 	return (rc);
 }
 
 static void *
 kstat_seq_start(struct seq_file *f, loff_t *pos)
 {
 	loff_t n = *pos;
 	kstat_t *ksp = (kstat_t *)f->private;
 	ASSERT(ksp->ks_magic == KS_MAGIC);
 
 	mutex_enter(ksp->ks_lock);
 
 	if (ksp->ks_type == KSTAT_TYPE_RAW) {
 		ksp->ks_raw_bufsize = PAGE_SIZE;
 		ksp->ks_raw_buf = vmem_alloc(ksp->ks_raw_bufsize, KM_SLEEP);
 	}
 
 	/* Dynamically update kstat, on error existing kstats are used */
 	(void) ksp->ks_update(ksp, KSTAT_READ);
 
 	ksp->ks_snaptime = gethrtime();
 
 	if (!(ksp->ks_flags & KSTAT_FLAG_NO_HEADERS) && !n &&
 	    kstat_seq_show_headers(f))
 		return (NULL);
 
 	if (n >= ksp->ks_ndata)
 		return (NULL);
 
 	return (kstat_seq_data_addr(ksp, n));
 }
 
 static void *
 kstat_seq_next(struct seq_file *f, void *p, loff_t *pos)
 {
 	kstat_t *ksp = (kstat_t *)f->private;
 	ASSERT(ksp->ks_magic == KS_MAGIC);
 
 	++*pos;
 	if (*pos >= ksp->ks_ndata)
 		return (NULL);
 
 	return (kstat_seq_data_addr(ksp, *pos));
 }
 
 static void
 kstat_seq_stop(struct seq_file *f, void *v)
 {
 	kstat_t *ksp = (kstat_t *)f->private;
 	ASSERT(ksp->ks_magic == KS_MAGIC);
 
 	if (ksp->ks_type == KSTAT_TYPE_RAW)
 		vmem_free(ksp->ks_raw_buf, ksp->ks_raw_bufsize);
 
 	mutex_exit(ksp->ks_lock);
 }
 
 static const struct seq_operations kstat_seq_ops = {
 	.show  = kstat_seq_show,
 	.start = kstat_seq_start,
 	.next  = kstat_seq_next,
 	.stop  = kstat_seq_stop,
 };
 
 static kstat_module_t *
 kstat_find_module(char *name)
 {
 	kstat_module_t *module = NULL;
 
 	list_for_each_entry(module, &kstat_module_list, ksm_module_list) {
 		if (strncmp(name, module->ksm_name, KSTAT_STRLEN) == 0)
 			return (module);
 	}
 
 	return (NULL);
 }
 
 static kstat_module_t *
 kstat_create_module(char *name)
 {
 	kstat_module_t *module;
 	struct proc_dir_entry *pde;
 
 	pde = proc_mkdir(name, proc_spl_kstat);
 	if (pde == NULL)
 		return (NULL);
 
 	module = kmem_alloc(sizeof (kstat_module_t), KM_SLEEP);
 	module->ksm_proc = pde;
-	strlcpy(module->ksm_name, name, KSTAT_STRLEN+1);
+	strlcpy(module->ksm_name, name, KSTAT_STRLEN);
 	INIT_LIST_HEAD(&module->ksm_kstat_list);
 	list_add_tail(&module->ksm_module_list, &kstat_module_list);
 
 	return (module);
 
 }
 
 static void
 kstat_delete_module(kstat_module_t *module)
 {
 	ASSERT(list_empty(&module->ksm_kstat_list));
 	remove_proc_entry(module->ksm_name, proc_spl_kstat);
 	list_del(&module->ksm_module_list);
 	kmem_free(module, sizeof (kstat_module_t));
 }
 
 static int
 proc_kstat_open(struct inode *inode, struct file *filp)
 {
 	struct seq_file *f;
 	int rc;
 
 	rc = seq_open(filp, &kstat_seq_ops);
 	if (rc)
 		return (rc);
 
 	f = filp->private_data;
 	f->private = SPL_PDE_DATA(inode);
 
 	return (0);
 }
 
 static ssize_t
 proc_kstat_write(struct file *filp, const char __user *buf, size_t len,
     loff_t *ppos)
 {
 	struct seq_file *f = filp->private_data;
 	kstat_t *ksp = f->private;
 	int rc;
 
 	ASSERT(ksp->ks_magic == KS_MAGIC);
 
 	mutex_enter(ksp->ks_lock);
 	rc = ksp->ks_update(ksp, KSTAT_WRITE);
 	mutex_exit(ksp->ks_lock);
 
 	if (rc)
 		return (-rc);
 
 	*ppos += len;
 	return (len);
 }
 
 static const kstat_proc_op_t proc_kstat_operations = {
 #ifdef HAVE_PROC_OPS_STRUCT
 	.proc_open	= proc_kstat_open,
 	.proc_write	= proc_kstat_write,
 	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_release	= seq_release,
 #else
 	.open		= proc_kstat_open,
 	.write		= proc_kstat_write,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
 	.release	= seq_release,
 #endif
 };
 
 void
 __kstat_set_raw_ops(kstat_t *ksp,
     int (*headers)(char *buf, size_t size),
     int (*data)(char *buf, size_t size, void *data),
     void *(*addr)(kstat_t *ksp, loff_t index))
 {
 	ksp->ks_raw_ops.headers = headers;
 	ksp->ks_raw_ops.data    = data;
 	ksp->ks_raw_ops.addr    = addr;
 }
 EXPORT_SYMBOL(__kstat_set_raw_ops);
 
 void
 kstat_proc_entry_init(kstat_proc_entry_t *kpep, const char *module,
     const char *name)
 {
 	kpep->kpe_owner = NULL;
 	kpep->kpe_proc = NULL;
 	INIT_LIST_HEAD(&kpep->kpe_list);
-	strncpy(kpep->kpe_module, module, KSTAT_STRLEN);
-	strncpy(kpep->kpe_name, name, KSTAT_STRLEN);
+	strlcpy(kpep->kpe_module, module, sizeof (kpep->kpe_module));
+	strlcpy(kpep->kpe_name, name, sizeof (kpep->kpe_name));
 }
 EXPORT_SYMBOL(kstat_proc_entry_init);
 
 kstat_t *
 __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
     const char *ks_class, uchar_t ks_type, uint_t ks_ndata,
     uchar_t ks_flags)
 {
 	kstat_t *ksp;
 
 	ASSERT(ks_module);
 	ASSERT(ks_instance == 0);
 	ASSERT(ks_name);
 
 	if ((ks_type == KSTAT_TYPE_INTR) || (ks_type == KSTAT_TYPE_IO))
 		ASSERT(ks_ndata == 1);
 
 	ksp = kmem_zalloc(sizeof (*ksp), KM_SLEEP);
 	if (ksp == NULL)
 		return (ksp);
 
 	mutex_enter(&kstat_module_lock);
 	ksp->ks_kid = kstat_id;
 	kstat_id++;
 	mutex_exit(&kstat_module_lock);
 
 	ksp->ks_magic = KS_MAGIC;
 	mutex_init(&ksp->ks_private_lock, NULL, MUTEX_DEFAULT, NULL);
 	ksp->ks_lock = &ksp->ks_private_lock;
 
 	ksp->ks_crtime = gethrtime();
 	ksp->ks_snaptime = ksp->ks_crtime;
 	ksp->ks_instance = ks_instance;
-	strncpy(ksp->ks_class, ks_class, KSTAT_STRLEN);
+	strlcpy(ksp->ks_class, ks_class, sizeof (ksp->ks_class));
 	ksp->ks_type = ks_type;
 	ksp->ks_flags = ks_flags;
 	ksp->ks_update = kstat_default_update;
 	ksp->ks_private = NULL;
 	ksp->ks_raw_ops.headers = NULL;
 	ksp->ks_raw_ops.data = NULL;
 	ksp->ks_raw_ops.addr = NULL;
 	ksp->ks_raw_buf = NULL;
 	ksp->ks_raw_bufsize = 0;
 	kstat_proc_entry_init(&ksp->ks_proc, ks_module, ks_name);
 
 	switch (ksp->ks_type) {
 		case KSTAT_TYPE_RAW:
 			ksp->ks_ndata = 1;
 			ksp->ks_data_size = ks_ndata;
 			break;
 		case KSTAT_TYPE_NAMED:
 			ksp->ks_ndata = ks_ndata;
 			ksp->ks_data_size = ks_ndata * sizeof (kstat_named_t);
 			break;
 		case KSTAT_TYPE_INTR:
 			ksp->ks_ndata = ks_ndata;
 			ksp->ks_data_size = ks_ndata * sizeof (kstat_intr_t);
 			break;
 		case KSTAT_TYPE_IO:
 			ksp->ks_ndata = ks_ndata;
 			ksp->ks_data_size = ks_ndata * sizeof (kstat_io_t);
 			break;
 		case KSTAT_TYPE_TIMER:
 			ksp->ks_ndata = ks_ndata;
 			ksp->ks_data_size = ks_ndata * sizeof (kstat_timer_t);
 			break;
 		default:
 			PANIC("Undefined kstat type %d\n", ksp->ks_type);
 	}
 
 	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
 		ksp->ks_data = NULL;
 	} else {
 		ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
 		if (ksp->ks_data == NULL) {
 			kmem_free(ksp, sizeof (*ksp));
 			ksp = NULL;
 		}
 	}
 
 	return (ksp);
 }
 EXPORT_SYMBOL(__kstat_create);
 
 static int
 kstat_detect_collision(kstat_proc_entry_t *kpep)
 {
 	kstat_module_t *module;
 	kstat_proc_entry_t *tmp = NULL;
 	char *parent;
 	char *cp;
 
 	parent = kmem_asprintf("%s", kpep->kpe_module);
 
 	if ((cp = strrchr(parent, '/')) == NULL) {
 		kmem_strfree(parent);
 		return (0);
 	}
 
 	cp[0] = '\0';
 	if ((module = kstat_find_module(parent)) != NULL) {
 		list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
 			if (strncmp(tmp->kpe_name, cp+1, KSTAT_STRLEN) == 0) {
 				kmem_strfree(parent);
 				return (EEXIST);
 			}
 		}
 	}
 
 	kmem_strfree(parent);
 	return (0);
 }
 
 /*
  * Add a file to the proc filesystem under the kstat namespace (i.e.
  * /proc/spl/kstat/). The file need not necessarily be implemented as a
  * kstat.
  */
 void
 kstat_proc_entry_install(kstat_proc_entry_t *kpep, mode_t mode,
     const kstat_proc_op_t *proc_ops, void *data)
 {
 	kstat_module_t *module;
 	kstat_proc_entry_t *tmp = NULL;
 
 	ASSERT(kpep);
 
 	mutex_enter(&kstat_module_lock);
 
 	module = kstat_find_module(kpep->kpe_module);
 	if (module == NULL) {
 		if (kstat_detect_collision(kpep) != 0) {
 			cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
 			    " collision", kpep->kpe_module, kpep->kpe_name);
 			goto out;
 		}
 		module = kstat_create_module(kpep->kpe_module);
 		if (module == NULL)
 			goto out;
 	}
 
 	/*
 	 * Only one entry by this name per-module, on failure the module
 	 * shouldn't be deleted because we know it has at least one entry.
 	 */
 	list_for_each_entry(tmp, &module->ksm_kstat_list, kpe_list) {
 		if (strncmp(tmp->kpe_name, kpep->kpe_name, KSTAT_STRLEN) == 0)
 			goto out;
 	}
 
 	list_add_tail(&kpep->kpe_list, &module->ksm_kstat_list);
 
 	kpep->kpe_owner = module;
 	kpep->kpe_proc = proc_create_data(kpep->kpe_name, mode,
 	    module->ksm_proc, proc_ops, data);
 	if (kpep->kpe_proc == NULL) {
 		list_del_init(&kpep->kpe_list);
 		if (list_empty(&module->ksm_kstat_list))
 			kstat_delete_module(module);
 	}
 out:
 	mutex_exit(&kstat_module_lock);
 
 }
 EXPORT_SYMBOL(kstat_proc_entry_install);
 
 void
 __kstat_install(kstat_t *ksp)
 {
 	ASSERT(ksp);
 	mode_t mode;
 	/* Specify permission modes for different kstats */
 	if (strncmp(ksp->ks_proc.kpe_name, "dbufs", KSTAT_STRLEN) == 0) {
 		mode = 0600;
 	} else {
 		mode = 0644;
 	}
 	kstat_proc_entry_install(
 	    &ksp->ks_proc, mode, &proc_kstat_operations, ksp);
 }
 EXPORT_SYMBOL(__kstat_install);
 
 void
 kstat_proc_entry_delete(kstat_proc_entry_t *kpep)
 {
 	kstat_module_t *module = kpep->kpe_owner;
 	if (kpep->kpe_proc)
 		remove_proc_entry(kpep->kpe_name, module->ksm_proc);
 
 	mutex_enter(&kstat_module_lock);
 	list_del_init(&kpep->kpe_list);
 
 	/*
 	 * Remove top level module directory if it wasn't empty before, but now
 	 * is.
 	 */
 	if (kpep->kpe_proc && list_empty(&module->ksm_kstat_list))
 		kstat_delete_module(module);
 	mutex_exit(&kstat_module_lock);
 
 }
 EXPORT_SYMBOL(kstat_proc_entry_delete);
 
 void
 __kstat_delete(kstat_t *ksp)
 {
 	kstat_proc_entry_delete(&ksp->ks_proc);
 
 	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
 		kmem_free(ksp->ks_data, ksp->ks_data_size);
 
 	ksp->ks_lock = NULL;
 	mutex_destroy(&ksp->ks_private_lock);
 	kmem_free(ksp, sizeof (*ksp));
 }
 EXPORT_SYMBOL(__kstat_delete);
 
 int
 spl_kstat_init(void)
 {
 	mutex_init(&kstat_module_lock, NULL, MUTEX_DEFAULT, NULL);
 	INIT_LIST_HEAD(&kstat_module_list);
 	kstat_id = 0;
 	return (0);
 }
 
 void
 spl_kstat_fini(void)
 {
 	ASSERT(list_empty(&kstat_module_list));
 	mutex_destroy(&kstat_module_lock);
 }
diff --git a/module/os/linux/spl/spl-thread.c b/module/os/linux/spl/spl-thread.c
index 32a2d34b1d93..b863945a1c59 100644
--- a/module/os/linux/spl/spl-thread.c
+++ b/module/os/linux/spl/spl-thread.c
@@ -1,207 +1,207 @@
 /*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  *  UCRL-CODE-235197
  *
  *  This file is part of the SPL, Solaris Porting Layer.
  *
  *  The SPL is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License as published by the
  *  Free Software Foundation; either version 2 of the License, or (at your
  *  option) any later version.
  *
  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  *
  *  Solaris Porting Layer (SPL) Thread Implementation.
  */
 
 #include <sys/thread.h>
 #include <sys/kmem.h>
 #include <sys/tsd.h>
 
 /*
  * Thread interfaces
  */
 typedef struct thread_priv_s {
 	unsigned long tp_magic;		/* Magic */
 	int tp_name_size;		/* Name size */
 	char *tp_name;			/* Name (without _thread suffix) */
 	void (*tp_func)(void *);	/* Registered function */
 	void *tp_args;			/* Args to be passed to function */
 	size_t tp_len;			/* Len to be passed to function */
 	int tp_state;			/* State to start thread at */
 	pri_t tp_pri;			/* Priority to start threat at */
 } thread_priv_t;
 
 static int
 thread_generic_wrapper(void *arg)
 {
 	thread_priv_t *tp = (thread_priv_t *)arg;
 	void (*func)(void *);
 	void *args;
 
 	ASSERT(tp->tp_magic == TP_MAGIC);
 	func = tp->tp_func;
 	args = tp->tp_args;
 	set_current_state(tp->tp_state);
 	set_user_nice((kthread_t *)current, PRIO_TO_NICE(tp->tp_pri));
 	kmem_free(tp->tp_name, tp->tp_name_size);
 	kmem_free(tp, sizeof (thread_priv_t));
 
 	if (func)
 		func(args);
 
 	return (0);
 }
 
 /*
  * thread_create() may block forever if it cannot create a thread or
  * allocate memory.  This is preferable to returning a NULL which Solaris
  * style callers likely never check for... since it can't fail.
  */
 kthread_t *
 __thread_create(caddr_t stk, size_t  stksize, thread_func_t func,
     const char *name, void *args, size_t len, proc_t *pp, int state, pri_t pri)
 {
 	thread_priv_t *tp;
 	struct task_struct *tsk;
 	char *p;
 
 	/* Option pp is simply ignored */
 	/* Variable stack size unsupported */
 	ASSERT(stk == NULL);
 
 	tp = kmem_alloc(sizeof (thread_priv_t), KM_PUSHPAGE);
 	if (tp == NULL)
 		return (NULL);
 
 	tp->tp_magic = TP_MAGIC;
 	tp->tp_name_size = strlen(name) + 1;
 
 	tp->tp_name = kmem_alloc(tp->tp_name_size, KM_PUSHPAGE);
 	if (tp->tp_name == NULL) {
 		kmem_free(tp, sizeof (thread_priv_t));
 		return (NULL);
 	}
 
-	strncpy(tp->tp_name, name, tp->tp_name_size);
+	strlcpy(tp->tp_name, name, tp->tp_name_size);
 
 	/*
 	 * Strip trailing "_thread" from passed name which will be the func
 	 * name since the exposed API has no parameter for passing a name.
 	 */
 	p = strstr(tp->tp_name, "_thread");
 	if (p)
 		p[0] = '\0';
 
 	tp->tp_func  = func;
 	tp->tp_args  = args;
 	tp->tp_len   = len;
 	tp->tp_state = state;
 	tp->tp_pri   = pri;
 
 	tsk = spl_kthread_create(thread_generic_wrapper, (void *)tp,
 	    "%s", tp->tp_name);
 	if (IS_ERR(tsk))
 		return (NULL);
 
 	wake_up_process(tsk);
 	return ((kthread_t *)tsk);
 }
 EXPORT_SYMBOL(__thread_create);
 
 /*
  * spl_kthread_create - Wrapper providing pre-3.13 semantics for
  * kthread_create() in which it is not killable and less likely
  * to return -ENOMEM.
  */
 struct task_struct *
 spl_kthread_create(int (*func)(void *), void *data, const char namefmt[], ...)
 {
 	struct task_struct *tsk;
 	va_list args;
 	char name[TASK_COMM_LEN];
 
 	va_start(args, namefmt);
 	vsnprintf(name, sizeof (name), namefmt, args);
 	va_end(args);
 	do {
 		tsk = kthread_create(func, data, "%s", name);
 		if (IS_ERR(tsk)) {
 			if (signal_pending(current)) {
 				clear_thread_flag(TIF_SIGPENDING);
 				continue;
 			}
 			if (PTR_ERR(tsk) == -ENOMEM)
 				continue;
 			return (NULL);
 		} else {
 			return (tsk);
 		}
 	} while (1);
 }
 EXPORT_SYMBOL(spl_kthread_create);
 
 /*
  * The "why" argument indicates the allowable side-effects of the call:
  *
  * FORREAL:  Extract the next pending signal from p_sig into p_cursig;
  * stop the process if a stop has been requested or if a traced signal
  * is pending.
  *
  * JUSTLOOKING:  Don't stop the process, just indicate whether or not
  * a signal might be pending (FORREAL is needed to tell for sure).
  */
 int
 issig(int why)
 {
 	ASSERT(why == FORREAL || why == JUSTLOOKING);
 
 	if (!signal_pending(current))
 		return (0);
 
 	if (why != FORREAL)
 		return (1);
 
 	struct task_struct *task = current;
 	spl_kernel_siginfo_t __info;
 	sigset_t set;
 	siginitsetinv(&set, 1ULL << (SIGSTOP - 1) | 1ULL << (SIGTSTP - 1));
 	sigorsets(&set, &task->blocked, &set);
 
 	spin_lock_irq(&task->sighand->siglock);
 	int ret;
 #ifdef HAVE_DEQUEUE_SIGNAL_4ARG
 	enum pid_type __type;
 	if ((ret = dequeue_signal(task, &set, &__info, &__type)) != 0) {
 #else
 	if ((ret = dequeue_signal(task, &set, &__info)) != 0) {
 #endif
 #ifdef HAVE_SIGNAL_STOP
 		spin_unlock_irq(&task->sighand->siglock);
 		kernel_signal_stop();
 #else
 		if (current->jobctl & JOBCTL_STOP_DEQUEUED)
 			spl_set_special_state(TASK_STOPPED);
 
 		spin_unlock_irq(&current->sighand->siglock);
 
 		schedule();
 #endif
 		return (0);
 	}
 
 	spin_unlock_irq(&task->sighand->siglock);
 
 	return (1);
 }
 
 EXPORT_SYMBOL(issig);
diff --git a/module/os/linux/spl/spl-zone.c b/module/os/linux/spl/spl-zone.c
index 234ae7f6cd0c..9421f81bf0c8 100644
--- a/module/os/linux/spl/spl-zone.c
+++ b/module/os/linux/spl/spl-zone.c
@@ -1,424 +1,423 @@
 /*
  * Copyright (c) 2021 Klara Systems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/types.h>
 #include <sys/mutex.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <linux/file.h>
 #include <linux/magic.h>
 #include <sys/zone.h>
 
 #if defined(CONFIG_USER_NS)
 #include <linux/statfs.h>
 #include <linux/proc_ns.h>
 #endif
 
 static kmutex_t zone_datasets_lock;
 static struct list_head zone_datasets;
 
 typedef struct zone_datasets {
 	struct list_head zds_list;	/* zone_datasets linkage */
 	struct user_namespace *zds_userns; /* namespace reference */
 	struct list_head zds_datasets;	/* datasets for the namespace */
 } zone_datasets_t;
 
 typedef struct zone_dataset {
 	struct list_head zd_list;	/* zone_dataset linkage */
 	size_t zd_dsnamelen;		/* length of name */
 	char zd_dsname[0];		/* name of the member dataset */
 } zone_dataset_t;
 
 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
 /*
  * Returns:
  * - 0 on success
  * - EBADF if it cannot open the provided file descriptor
  * - ENOTTY if the file itself is a not a user namespace file. We want to
  *   intercept this error in the ZFS layer. We cannot just return one of the
  *   ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
  *   and the SPL layers.
  */
 static int
 user_ns_get(int fd, struct user_namespace **userns)
 {
 	struct kstatfs st;
 	struct file *nsfile;
 	struct ns_common *ns;
 	int error;
 
 	if ((nsfile = fget(fd)) == NULL)
 		return (EBADF);
 	if (vfs_statfs(&nsfile->f_path, &st) != 0) {
 		error = ENOTTY;
 		goto done;
 	}
 	if (st.f_type != NSFS_MAGIC) {
 		error = ENOTTY;
 		goto done;
 	}
 	ns = get_proc_ns(file_inode(nsfile));
 	if (ns->ops->type != CLONE_NEWUSER) {
 		error = ENOTTY;
 		goto done;
 	}
 	*userns = container_of(ns, struct user_namespace, ns);
 
 	error = 0;
 done:
 	fput(nsfile);
 
 	return (error);
 }
 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
 
 static unsigned int
 user_ns_zoneid(struct user_namespace *user_ns)
 {
 	unsigned int r;
 
 #if defined(HAVE_USER_NS_COMMON_INUM)
 	r = user_ns->ns.inum;
 #else
 	r = user_ns->proc_inum;
 #endif
 
 	return (r);
 }
 
 static struct zone_datasets *
 zone_datasets_lookup(unsigned int nsinum)
 {
 	zone_datasets_t *zds;
 
 	list_for_each_entry(zds, &zone_datasets, zds_list) {
 		if (user_ns_zoneid(zds->zds_userns) == nsinum)
 			return (zds);
 	}
 	return (NULL);
 }
 
 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
 static struct zone_dataset *
 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
 {
 	zone_dataset_t *zd;
 
 	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
 		if (zd->zd_dsnamelen != dsnamelen)
 			continue;
 		if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
 			return (zd);
 	}
 
 	return (NULL);
 }
 
 static int
 zone_dataset_cred_check(cred_t *cred)
 {
 
 	if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
 		return (EPERM);
 
 	return (0);
 }
 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
 
 static int
 zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
 {
 
 	if (dataset[0] == '\0' || dataset[0] == '/')
 		return (ENOENT);
 
 	*dsnamelen = strlen(dataset);
 	/* Ignore trailing slash, if supplied. */
 	if (dataset[*dsnamelen - 1] == '/')
 		(*dsnamelen)--;
 
 	return (0);
 }
 
 int
 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
 {
 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
 	struct user_namespace *userns;
 	zone_datasets_t *zds;
 	zone_dataset_t *zd;
 	int error;
 	size_t dsnamelen;
 
 	if ((error = zone_dataset_cred_check(cred)) != 0)
 		return (error);
 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
 		return (error);
 	if ((error = user_ns_get(userns_fd, &userns)) != 0)
 		return (error);
 
 	mutex_enter(&zone_datasets_lock);
 	zds = zone_datasets_lookup(user_ns_zoneid(userns));
 	if (zds == NULL) {
 		zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
 		INIT_LIST_HEAD(&zds->zds_list);
 		INIT_LIST_HEAD(&zds->zds_datasets);
 		zds->zds_userns = userns;
 		/*
 		 * Lock the namespace by incresing its refcount to prevent
 		 * the namespace ID from being reused.
 		 */
 		get_user_ns(userns);
 		list_add_tail(&zds->zds_list, &zone_datasets);
 	} else {
 		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
 		if (zd != NULL) {
 			mutex_exit(&zone_datasets_lock);
 			return (EEXIST);
 		}
 	}
 
 	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
 	zd->zd_dsnamelen = dsnamelen;
-	strncpy(zd->zd_dsname, dataset, dsnamelen);
-	zd->zd_dsname[dsnamelen] = '\0';
+	strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
 	INIT_LIST_HEAD(&zd->zd_list);
 	list_add_tail(&zd->zd_list, &zds->zds_datasets);
 
 	mutex_exit(&zone_datasets_lock);
 	return (0);
 #else
 	return (ENXIO);
 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
 }
 EXPORT_SYMBOL(zone_dataset_attach);
 
 int
 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
 {
 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
 	struct user_namespace *userns;
 	zone_datasets_t *zds;
 	zone_dataset_t *zd;
 	int error;
 	size_t dsnamelen;
 
 	if ((error = zone_dataset_cred_check(cred)) != 0)
 		return (error);
 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
 		return (error);
 	if ((error = user_ns_get(userns_fd, &userns)) != 0)
 		return (error);
 
 	mutex_enter(&zone_datasets_lock);
 	zds = zone_datasets_lookup(user_ns_zoneid(userns));
 	if (zds != NULL)
 		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
 	if (zds == NULL || zd == NULL) {
 		mutex_exit(&zone_datasets_lock);
 		return (ENOENT);
 	}
 
 	list_del(&zd->zd_list);
 	kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
 
 	/* Prune the namespace entry if it has no more delegations. */
 	if (list_empty(&zds->zds_datasets)) {
 		/*
 		 * Decrease the refcount now that the namespace is no longer
 		 * used. It is no longer necessary to prevent the namespace ID
 		 * from being reused.
 		 */
 		put_user_ns(userns);
 		list_del(&zds->zds_list);
 		kmem_free(zds, sizeof (*zds));
 	}
 
 	mutex_exit(&zone_datasets_lock);
 	return (0);
 #else
 	return (ENXIO);
 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
 }
 EXPORT_SYMBOL(zone_dataset_detach);
 
 /*
  * A dataset is visible if:
  * - It is a parent of a namespace entry.
  * - It is one of the namespace entries.
  * - It is a child of a namespace entry.
  *
  * A dataset is writable if:
  * - It is one of the namespace entries.
  * - It is a child of a namespace entry.
  *
  * The parent datasets of namespace entries are visible and
  * read-only to provide a path back to the root of the pool.
  */
 int
 zone_dataset_visible(const char *dataset, int *write)
 {
 	zone_datasets_t *zds;
 	zone_dataset_t *zd;
 	size_t dsnamelen, zd_len;
 	int visible;
 
 	/* Default to read-only, in case visible is returned. */
 	if (write != NULL)
 		*write = 0;
 	if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
 		return (0);
 	if (INGLOBALZONE(curproc)) {
 		if (write != NULL)
 			*write = 1;
 		return (1);
 	}
 
 	mutex_enter(&zone_datasets_lock);
 	zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
 	if (zds == NULL) {
 		mutex_exit(&zone_datasets_lock);
 		return (0);
 	}
 
 	visible = 0;
 	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
 		zd_len = strlen(zd->zd_dsname);
 		if (zd_len > dsnamelen) {
 			/*
 			 * The name of the namespace entry is longer than that
 			 * of the dataset, so it could be that the dataset is a
 			 * parent of the namespace entry.
 			 */
 			visible = memcmp(zd->zd_dsname, dataset,
 			    dsnamelen) == 0 &&
 			    zd->zd_dsname[dsnamelen] == '/';
 			if (visible)
 				break;
 		} else if (zd_len == dsnamelen) {
 			/*
 			 * The name of the namespace entry is as long as that
 			 * of the dataset, so perhaps the dataset itself is the
 			 * namespace entry.
 			 */
 			visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
 			if (visible) {
 				if (write != NULL)
 					*write = 1;
 				break;
 			}
 		} else {
 			/*
 			 * The name of the namespace entry is shorter than that
 			 * of the dataset, so perhaps the dataset is a child of
 			 * the namespace entry.
 			 */
 			visible = memcmp(zd->zd_dsname, dataset,
 			    zd_len) == 0 && dataset[zd_len] == '/';
 			if (visible) {
 				if (write != NULL)
 					*write = 1;
 				break;
 			}
 		}
 	}
 
 	mutex_exit(&zone_datasets_lock);
 	return (visible);
 }
 EXPORT_SYMBOL(zone_dataset_visible);
 
 unsigned int
 global_zoneid(void)
 {
 	unsigned int z = 0;
 
 #if defined(CONFIG_USER_NS)
 	z = user_ns_zoneid(&init_user_ns);
 #endif
 
 	return (z);
 }
 EXPORT_SYMBOL(global_zoneid);
 
 unsigned int
 crgetzoneid(const cred_t *cr)
 {
 	unsigned int r = 0;
 
 #if defined(CONFIG_USER_NS)
 	r = user_ns_zoneid(cr->user_ns);
 #endif
 
 	return (r);
 }
 EXPORT_SYMBOL(crgetzoneid);
 
 boolean_t
 inglobalzone(proc_t *proc)
 {
 #if defined(CONFIG_USER_NS)
 	return (proc->cred->user_ns == &init_user_ns);
 #else
 	return (B_TRUE);
 #endif
 }
 EXPORT_SYMBOL(inglobalzone);
 
 int
 spl_zone_init(void)
 {
 	mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
 	INIT_LIST_HEAD(&zone_datasets);
 	return (0);
 }
 
 void
 spl_zone_fini(void)
 {
 	zone_datasets_t *zds;
 	zone_dataset_t *zd;
 
 	/*
 	 * It would be better to assert an empty zone_datasets, but since
 	 * there's no automatic mechanism for cleaning them up if the user
 	 * namespace is destroyed, just do it here, since spl is about to go
 	 * out of context.
 	 */
 	while (!list_empty(&zone_datasets)) {
 		zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
 		while (!list_empty(&zds->zds_datasets)) {
 			zd = list_entry(zds->zds_datasets.next,
 			    zone_dataset_t, zd_list);
 			list_del(&zd->zd_list);
 			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
 		}
 		put_user_ns(zds->zds_userns);
 		list_del(&zds->zds_list);
 		kmem_free(zds, sizeof (*zds));
 	}
 	mutex_destroy(&zone_datasets_lock);
 }
diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
index b328959464b2..d93c7f08c1c2 100644
--- a/module/zfs/dsl_dir.c
+++ b/module/zfs/dsl_dir.c
@@ -1,2478 +1,2476 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
  * Copyright (c) 2014 Joyent, Inc. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  */
 
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_impl.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/metaslab.h>
 #include <sys/zap.h>
 #include <sys/zio.h>
 #include <sys/arc.h>
 #include <sys/sunddi.h>
 #include <sys/zfeature.h>
 #include <sys/policy.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 #include <sys/zvol.h>
 #include <sys/zthr.h>
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 
 /*
  * Filesystem and Snapshot Limits
  * ------------------------------
  *
  * These limits are used to restrict the number of filesystems and/or snapshots
  * that can be created at a given level in the tree or below. A typical
  * use-case is with a delegated dataset where the administrator wants to ensure
  * that a user within the zone is not creating too many additional filesystems
  * or snapshots, even though they're not exceeding their space quota.
  *
  * The filesystem and snapshot counts are stored as extensible properties. This
  * capability is controlled by a feature flag and must be enabled to be used.
  * Once enabled, the feature is not active until the first limit is set. At
  * that point, future operations to create/destroy filesystems or snapshots
  * will validate and update the counts.
  *
  * Because the count properties will not exist before the feature is active,
  * the counts are updated when a limit is first set on an uninitialized
  * dsl_dir node in the tree (The filesystem/snapshot count on a node includes
  * all of the nested filesystems/snapshots. Thus, a new leaf node has a
  * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and
  * snapshot count properties on a node indicate uninitialized counts on that
  * node.) When first setting a limit on an uninitialized node, the code starts
  * at the filesystem with the new limit and descends into all sub-filesystems
  * to add the count properties.
  *
  * In practice this is lightweight since a limit is typically set when the
  * filesystem is created and thus has no children. Once valid, changing the
  * limit value won't require a re-traversal since the counts are already valid.
  * When recursively fixing the counts, if a node with a limit is encountered
  * during the descent, the counts are known to be valid and there is no need to
  * descend into that filesystem's children. The counts on filesystems above the
  * one with the new limit will still be uninitialized, unless a limit is
  * eventually set on one of those filesystems. The counts are always recursively
  * updated when a limit is set on a dataset, unless there is already a limit.
  * When a new limit value is set on a filesystem with an existing limit, it is
  * possible for the new limit to be less than the current count at that level
  * since a user who can change the limit is also allowed to exceed the limit.
  *
  * Once the feature is active, then whenever a filesystem or snapshot is
  * created, the code recurses up the tree, validating the new count against the
  * limit at each initialized level. In practice, most levels will not have a
  * limit set. If there is a limit at any initialized level up the tree, the
  * check must pass or the creation will fail. Likewise, when a filesystem or
  * snapshot is destroyed, the counts are recursively adjusted all the way up
  * the initialized nodes in the tree. Renaming a filesystem into different point
  * in the tree will first validate, then update the counts on each branch up to
  * the common ancestor. A receive will also validate the counts and then update
  * them.
  *
  * An exception to the above behavior is that the limit is not enforced if the
  * user has permission to modify the limit. This is primarily so that
  * recursive snapshots in the global zone always work. We want to prevent a
  * denial-of-service in which a lower level delegated dataset could max out its
  * limit and thus block recursive snapshots from being taken in the global zone.
  * Because of this, it is possible for the snapshot count to be over the limit
  * and snapshots taken in the global zone could cause a lower level dataset to
  * hit or exceed its limit. The administrator taking the global zone recursive
  * snapshot should be aware of this side-effect and behave accordingly.
  * For consistency, the filesystem limit is also not enforced if the user can
  * modify the limit.
  *
  * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check()
  * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in
  * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by
  * dsl_dir_init_fs_ss_count().
  */
 
 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
 
 typedef struct ddulrt_arg {
 	dsl_dir_t	*ddulrta_dd;
 	uint64_t	ddlrta_txg;
 } ddulrt_arg_t;
 
 static void
 dsl_dir_evict_async(void *dbu)
 {
 	dsl_dir_t *dd = dbu;
 	int t;
 	dsl_pool_t *dp __maybe_unused = dd->dd_pool;
 
 	dd->dd_dbuf = NULL;
 
 	for (t = 0; t < TXG_SIZE; t++) {
 		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
 		ASSERT(dd->dd_tempreserved[t] == 0);
 		ASSERT(dd->dd_space_towrite[t] == 0);
 	}
 
 	if (dd->dd_parent)
 		dsl_dir_async_rele(dd->dd_parent, dd);
 
 	spa_async_close(dd->dd_pool->dp_spa, dd);
 
 	if (dsl_deadlist_is_open(&dd->dd_livelist))
 		dsl_dir_livelist_close(dd);
 
 	dsl_prop_fini(dd);
 	cv_destroy(&dd->dd_activity_cv);
 	mutex_destroy(&dd->dd_activity_lock);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 }
 
 int
 dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
     const char *tail, const void *tag, dsl_dir_t **ddp)
 {
 	dmu_buf_t *dbuf;
 	dsl_dir_t *dd;
 	dmu_object_info_t doi;
 	int err;
 
 	ASSERT(dsl_pool_config_held(dp));
 
 	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
 	if (err != 0)
 		return (err);
 	dd = dmu_buf_get_user(dbuf);
 
 	dmu_object_info_from_db(dbuf, &doi);
 	ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
 	ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
 
 	if (dd == NULL) {
 		dsl_dir_t *winner;
 
 		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
 		dd->dd_object = ddobj;
 		dd->dd_dbuf = dbuf;
 		dd->dd_pool = dp;
 
 		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
 		mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL);
 		cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL);
 		dsl_prop_init(dd);
 
 		if (dsl_dir_is_zapified(dd)) {
 			err = zap_lookup(dp->dp_meta_objset,
 			    ddobj, DD_FIELD_CRYPTO_KEY_OBJ,
 			    sizeof (uint64_t), 1, &dd->dd_crypto_obj);
 			if (err == 0) {
 				/* check for on-disk format errata */
 				if (dsl_dir_incompatible_encryption_version(
 				    dd)) {
 					dp->dp_spa->spa_errata =
 					    ZPOOL_ERRATA_ZOL_6845_ENCRYPTION;
 				}
 			} else if (err != ENOENT) {
 				goto errout;
 			}
 		}
 
 		if (dsl_dir_phys(dd)->dd_parent_obj) {
 			err = dsl_dir_hold_obj(dp,
 			    dsl_dir_phys(dd)->dd_parent_obj, NULL, dd,
 			    &dd->dd_parent);
 			if (err != 0)
 				goto errout;
 			if (tail) {
 #ifdef ZFS_DEBUG
 				uint64_t foundobj;
 
 				err = zap_lookup(dp->dp_meta_objset,
 				    dsl_dir_phys(dd->dd_parent)->
 				    dd_child_dir_zapobj, tail,
 				    sizeof (foundobj), 1, &foundobj);
 				ASSERT(err || foundobj == ddobj);
 #endif
 				(void) strlcpy(dd->dd_myname, tail,
 				    sizeof (dd->dd_myname));
 			} else {
 				err = zap_value_search(dp->dp_meta_objset,
 				    dsl_dir_phys(dd->dd_parent)->
 				    dd_child_dir_zapobj,
 				    ddobj, 0, dd->dd_myname);
 			}
 			if (err != 0)
 				goto errout;
 		} else {
 			(void) strlcpy(dd->dd_myname, spa_name(dp->dp_spa),
 			    sizeof (dd->dd_myname));
 		}
 
 		if (dsl_dir_is_clone(dd)) {
 			dmu_buf_t *origin_bonus;
 			dsl_dataset_phys_t *origin_phys;
 
 			/*
 			 * We can't open the origin dataset, because
 			 * that would require opening this dsl_dir.
 			 * Just look at its phys directly instead.
 			 */
 			err = dmu_bonus_hold(dp->dp_meta_objset,
 			    dsl_dir_phys(dd)->dd_origin_obj, FTAG,
 			    &origin_bonus);
 			if (err != 0)
 				goto errout;
 			origin_phys = origin_bonus->db_data;
 			dd->dd_origin_txg =
 			    origin_phys->ds_creation_txg;
 			dmu_buf_rele(origin_bonus, FTAG);
 			if (dsl_dir_is_zapified(dd)) {
 				uint64_t obj;
 				err = zap_lookup(dp->dp_meta_objset,
 				    dd->dd_object, DD_FIELD_LIVELIST,
 				    sizeof (uint64_t), 1, &obj);
 				if (err == 0)
 					dsl_dir_livelist_open(dd, obj);
 				else if (err != ENOENT)
 					goto errout;
 			}
 		}
 
 		if (dsl_dir_is_zapified(dd)) {
 			inode_timespec_t t = {0};
 			(void) zap_lookup(dp->dp_meta_objset, ddobj,
 			    DD_FIELD_SNAPSHOTS_CHANGED,
 			    sizeof (uint64_t),
 			    sizeof (inode_timespec_t) / sizeof (uint64_t),
 			    &t);
 			dd->dd_snap_cmtime = t;
 		}
 
 		dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
 		    &dd->dd_dbuf);
 		winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
 		if (winner != NULL) {
 			if (dd->dd_parent)
 				dsl_dir_rele(dd->dd_parent, dd);
 			if (dsl_deadlist_is_open(&dd->dd_livelist))
 				dsl_dir_livelist_close(dd);
 			dsl_prop_fini(dd);
 			cv_destroy(&dd->dd_activity_cv);
 			mutex_destroy(&dd->dd_activity_lock);
 			mutex_destroy(&dd->dd_lock);
 			kmem_free(dd, sizeof (dsl_dir_t));
 			dd = winner;
 		} else {
 			spa_open_ref(dp->dp_spa, dd);
 		}
 	}
 
 	/*
 	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
 	 * holds on the spa.  We need the open-to-close holds because
 	 * otherwise the spa_refcnt wouldn't change when we open a
 	 * dir which the spa also has open, so we could incorrectly
 	 * think it was OK to unload/export/destroy the pool.  We need
 	 * the instantiate-to-evict hold because the dsl_dir_t has a
 	 * pointer to the dd_pool, which has a pointer to the spa_t.
 	 */
 	spa_open_ref(dp->dp_spa, tag);
 	ASSERT3P(dd->dd_pool, ==, dp);
 	ASSERT3U(dd->dd_object, ==, ddobj);
 	ASSERT3P(dd->dd_dbuf, ==, dbuf);
 	*ddp = dd;
 	return (0);
 
 errout:
 	if (dd->dd_parent)
 		dsl_dir_rele(dd->dd_parent, dd);
 	if (dsl_deadlist_is_open(&dd->dd_livelist))
 		dsl_dir_livelist_close(dd);
 	dsl_prop_fini(dd);
 	cv_destroy(&dd->dd_activity_cv);
 	mutex_destroy(&dd->dd_activity_lock);
 	mutex_destroy(&dd->dd_lock);
 	kmem_free(dd, sizeof (dsl_dir_t));
 	dmu_buf_rele(dbuf, tag);
 	return (err);
 }
 
 void
 dsl_dir_rele(dsl_dir_t *dd, const void *tag)
 {
 	dprintf_dd(dd, "%s\n", "");
 	spa_close(dd->dd_pool->dp_spa, tag);
 	dmu_buf_rele(dd->dd_dbuf, tag);
 }
 
 /*
  * Remove a reference to the given dsl dir that is being asynchronously
  * released.  Async releases occur from a taskq performing eviction of
  * dsl datasets and dirs.  This process is identical to a normal release
  * with the exception of using the async API for releasing the reference on
  * the spa.
  */
 void
 dsl_dir_async_rele(dsl_dir_t *dd, const void *tag)
 {
 	dprintf_dd(dd, "%s\n", "");
 	spa_async_close(dd->dd_pool->dp_spa, tag);
 	dmu_buf_rele(dd->dd_dbuf, tag);
 }
 
 /* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */
 void
 dsl_dir_name(dsl_dir_t *dd, char *buf)
 {
 	if (dd->dd_parent) {
 		dsl_dir_name(dd->dd_parent, buf);
 		VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <,
 		    ZFS_MAX_DATASET_NAME_LEN);
 	} else {
 		buf[0] = '\0';
 	}
 	if (!MUTEX_HELD(&dd->dd_lock)) {
 		/*
 		 * recursive mutex so that we can use
 		 * dprintf_dd() with dd_lock held
 		 */
 		mutex_enter(&dd->dd_lock);
 		VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
 		    <, ZFS_MAX_DATASET_NAME_LEN);
 		mutex_exit(&dd->dd_lock);
 	} else {
 		VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
 		    <, ZFS_MAX_DATASET_NAME_LEN);
 	}
 }
 
 /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
 int
 dsl_dir_namelen(dsl_dir_t *dd)
 {
 	int result = 0;
 
 	if (dd->dd_parent) {
 		/* parent's name + 1 for the "/" */
 		result = dsl_dir_namelen(dd->dd_parent) + 1;
 	}
 
 	if (!MUTEX_HELD(&dd->dd_lock)) {
 		/* see dsl_dir_name */
 		mutex_enter(&dd->dd_lock);
 		result += strlen(dd->dd_myname);
 		mutex_exit(&dd->dd_lock);
 	} else {
 		result += strlen(dd->dd_myname);
 	}
 
 	return (result);
 }
 
 static int
 getcomponent(const char *path, char *component, const char **nextp)
 {
 	char *p;
 
 	if ((path == NULL) || (path[0] == '\0'))
 		return (SET_ERROR(ENOENT));
 	/* This would be a good place to reserve some namespace... */
 	p = strpbrk(path, "/@");
 	if (p && (p[1] == '/' || p[1] == '@')) {
 		/* two separators in a row */
 		return (SET_ERROR(EINVAL));
 	}
 	if (p == NULL || p == path) {
 		/*
 		 * if the first thing is an @ or /, it had better be an
 		 * @ and it had better not have any more ats or slashes,
 		 * and it had better have something after the @.
 		 */
 		if (p != NULL &&
 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
 			return (SET_ERROR(EINVAL));
 		if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN)
 			return (SET_ERROR(ENAMETOOLONG));
 		(void) strlcpy(component, path, ZFS_MAX_DATASET_NAME_LEN);
 		p = NULL;
 	} else if (p[0] == '/') {
 		if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
 			return (SET_ERROR(ENAMETOOLONG));
-		(void) strncpy(component, path, p - path);
-		component[p - path] = '\0';
+		(void) strlcpy(component, path, p - path + 1);
 		p++;
 	} else if (p[0] == '@') {
 		/*
 		 * if the next separator is an @, there better not be
 		 * any more slashes.
 		 */
 		if (strchr(path, '/'))
 			return (SET_ERROR(EINVAL));
 		if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
 			return (SET_ERROR(ENAMETOOLONG));
-		(void) strncpy(component, path, p - path);
-		component[p - path] = '\0';
+		(void) strlcpy(component, path, p - path + 1);
 	} else {
 		panic("invalid p=%p", (void *)p);
 	}
 	*nextp = p;
 	return (0);
 }
 
 /*
  * Return the dsl_dir_t, and possibly the last component which couldn't
  * be found in *tail.  The name must be in the specified dsl_pool_t.  This
  * thread must hold the dp_config_rwlock for the pool.  Returns NULL if the
  * path is bogus, or if tail==NULL and we couldn't parse the whole name.
  * (*tail)[0] == '@' means that the last component is a snapshot.
  */
 int
 dsl_dir_hold(dsl_pool_t *dp, const char *name, const void *tag,
     dsl_dir_t **ddp, const char **tailp)
 {
 	char *buf;
 	const char *spaname, *next, *nextnext = NULL;
 	int err;
 	dsl_dir_t *dd;
 	uint64_t ddobj;
 
 	buf = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
 	err = getcomponent(name, buf, &next);
 	if (err != 0)
 		goto error;
 
 	/* Make sure the name is in the specified pool. */
 	spaname = spa_name(dp->dp_spa);
 	if (strcmp(buf, spaname) != 0) {
 		err = SET_ERROR(EXDEV);
 		goto error;
 	}
 
 	ASSERT(dsl_pool_config_held(dp));
 
 	err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
 	if (err != 0) {
 		goto error;
 	}
 
 	while (next != NULL) {
 		dsl_dir_t *child_dd;
 		err = getcomponent(next, buf, &nextnext);
 		if (err != 0)
 			break;
 		ASSERT(next[0] != '\0');
 		if (next[0] == '@')
 			break;
 		dprintf("looking up %s in obj%lld\n",
 		    buf, (longlong_t)dsl_dir_phys(dd)->dd_child_dir_zapobj);
 
 		err = zap_lookup(dp->dp_meta_objset,
 		    dsl_dir_phys(dd)->dd_child_dir_zapobj,
 		    buf, sizeof (ddobj), 1, &ddobj);
 		if (err != 0) {
 			if (err == ENOENT)
 				err = 0;
 			break;
 		}
 
 		err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd);
 		if (err != 0)
 			break;
 		dsl_dir_rele(dd, tag);
 		dd = child_dd;
 		next = nextnext;
 	}
 
 	if (err != 0) {
 		dsl_dir_rele(dd, tag);
 		goto error;
 	}
 
 	/*
 	 * It's an error if there's more than one component left, or
 	 * tailp==NULL and there's any component left.
 	 */
 	if (next != NULL &&
 	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
 		/* bad path name */
 		dsl_dir_rele(dd, tag);
 		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
 		err = SET_ERROR(ENOENT);
 	}
 	if (tailp != NULL)
 		*tailp = next;
 	if (err == 0)
 		*ddp = dd;
 error:
 	kmem_free(buf, ZFS_MAX_DATASET_NAME_LEN);
 	return (err);
 }
 
 /*
  * If the counts are already initialized for this filesystem and its
  * descendants then do nothing, otherwise initialize the counts.
  *
  * The counts on this filesystem, and those below, may be uninitialized due to
  * either the use of a pre-existing pool which did not support the
  * filesystem/snapshot limit feature, or one in which the feature had not yet
  * been enabled.
  *
  * Recursively descend the filesystem tree and update the filesystem/snapshot
  * counts on each filesystem below, then update the cumulative count on the
  * current filesystem. If the filesystem already has a count set on it,
  * then we know that its counts, and the counts on the filesystems below it,
  * are already correct, so we don't have to update this filesystem.
  */
 static void
 dsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	uint64_t my_fs_cnt = 0;
 	uint64_t my_ss_cnt = 0;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *os = dp->dp_meta_objset;
 	zap_cursor_t *zc;
 	zap_attribute_t *za;
 	dsl_dataset_t *ds;
 
 	ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT));
 	ASSERT(dsl_pool_config_held(dp));
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	dsl_dir_zapify(dd, tx);
 
 	/*
 	 * If the filesystem count has already been initialized then we
 	 * don't need to recurse down any further.
 	 */
 	if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0)
 		return;
 
 	zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
 	za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 
 	/* Iterate my child dirs */
 	for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj);
 	    zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) {
 		dsl_dir_t *chld_dd;
 		uint64_t count;
 
 		VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG,
 		    &chld_dd));
 
 		/*
 		 * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets.
 		 */
 		if (chld_dd->dd_myname[0] == '$') {
 			dsl_dir_rele(chld_dd, FTAG);
 			continue;
 		}
 
 		my_fs_cnt++;	/* count this child */
 
 		dsl_dir_init_fs_ss_count(chld_dd, tx);
 
 		VERIFY0(zap_lookup(os, chld_dd->dd_object,
 		    DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count));
 		my_fs_cnt += count;
 		VERIFY0(zap_lookup(os, chld_dd->dd_object,
 		    DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count));
 		my_ss_cnt += count;
 
 		dsl_dir_rele(chld_dd, FTAG);
 	}
 	zap_cursor_fini(zc);
 	/* Count my snapshots (we counted children's snapshots above) */
 	VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
 	    dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds));
 
 	for (zap_cursor_init(zc, os, dsl_dataset_phys(ds)->ds_snapnames_zapobj);
 	    zap_cursor_retrieve(zc, za) == 0;
 	    zap_cursor_advance(zc)) {
 		/* Don't count temporary snapshots */
 		if (za->za_name[0] != '%')
 			my_ss_cnt++;
 	}
 	zap_cursor_fini(zc);
 
 	dsl_dataset_rele(ds, FTAG);
 
 	kmem_free(zc, sizeof (zap_cursor_t));
 	kmem_free(za, sizeof (zap_attribute_t));
 
 	/* we're in a sync task, update counts */
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
 	    sizeof (my_fs_cnt), 1, &my_fs_cnt, tx));
 	VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
 	    sizeof (my_ss_cnt), 1, &my_ss_cnt, tx));
 }
 
 static int
 dsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx)
 {
 	char *ddname = (char *)arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 	dsl_dir_t *dd;
 	int error;
 
 	error = dsl_dataset_hold(dp, ddname, FTAG, &ds);
 	if (error != 0)
 		return (error);
 
 	if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
 		dsl_dataset_rele(ds, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	dd = ds->ds_dir;
 	if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) &&
 	    dsl_dir_is_zapified(dd) &&
 	    zap_contains(dp->dp_meta_objset, dd->dd_object,
 	    DD_FIELD_FILESYSTEM_COUNT) == 0) {
 		dsl_dataset_rele(ds, FTAG);
 		return (SET_ERROR(EALREADY));
 	}
 
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 static void
 dsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx)
 {
 	char *ddname = (char *)arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 	spa_t *spa;
 
 	VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds));
 
 	spa = dsl_dataset_get_spa(ds);
 
 	if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) {
 		/*
 		 * Since the feature was not active and we're now setting a
 		 * limit, increment the feature-active counter so that the
 		 * feature becomes active for the first time.
 		 *
 		 * We are already in a sync task so we can update the MOS.
 		 */
 		spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx);
 	}
 
 	/*
 	 * Since we are now setting a non-UINT64_MAX limit on the filesystem,
 	 * we need to ensure the counts are correct. Descend down the tree from
 	 * this point and update all of the counts to be accurate.
 	 */
 	dsl_dir_init_fs_ss_count(ds->ds_dir, tx);
 
 	dsl_dataset_rele(ds, FTAG);
 }
 
 /*
  * Make sure the feature is enabled and activate it if necessary.
  * Since we're setting a limit, ensure the on-disk counts are valid.
  * This is only called by the ioctl path when setting a limit value.
  *
  * We do not need to validate the new limit, since users who can change the
  * limit are also allowed to exceed the limit.
  */
 int
 dsl_dir_activate_fs_ss_limit(const char *ddname)
 {
 	int error;
 
 	error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check,
 	    dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0,
 	    ZFS_SPACE_CHECK_RESERVED);
 
 	if (error == EALREADY)
 		error = 0;
 
 	return (error);
 }
 
 /*
  * Used to determine if the filesystem_limit or snapshot_limit should be
  * enforced. We allow the limit to be exceeded if the user has permission to
  * write the property value. We pass in the creds that we got in the open
  * context since we will always be the GZ root in syncing context. We also have
  * to handle the case where we are allowed to change the limit on the current
  * dataset, but there may be another limit in the tree above.
  *
  * We can never modify these two properties within a non-global zone. In
  * addition, the other checks are modeled on zfs_secpolicy_write_perms. We
  * can't use that function since we are already holding the dp_config_rwlock.
  * In addition, we already have the dd and dealing with snapshots is simplified
  * in this code.
  */
 
 typedef enum {
 	ENFORCE_ALWAYS,
 	ENFORCE_NEVER,
 	ENFORCE_ABOVE
 } enforce_res_t;
 
 static enforce_res_t
 dsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop,
     cred_t *cr, proc_t *proc)
 {
 	enforce_res_t enforce = ENFORCE_ALWAYS;
 	uint64_t obj;
 	dsl_dataset_t *ds;
 	uint64_t zoned;
 	const char *zonedstr;
 
 	ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
 	    prop == ZFS_PROP_SNAPSHOT_LIMIT);
 
 #ifdef _KERNEL
 	if (crgetzoneid(cr) != GLOBAL_ZONEID)
 		return (ENFORCE_ALWAYS);
 
 	/*
 	 * We are checking the saved credentials of the user process, which is
 	 * not the current process.  Note that we can't use secpolicy_zfs(),
 	 * because it only works if the cred is that of the current process (on
 	 * Linux).
 	 */
 	if (secpolicy_zfs_proc(cr, proc) == 0)
 		return (ENFORCE_NEVER);
 #else
 	(void) proc;
 #endif
 
 	if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0)
 		return (ENFORCE_ALWAYS);
 
 	ASSERT(dsl_pool_config_held(dd->dd_pool));
 
 	if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0)
 		return (ENFORCE_ALWAYS);
 
 	zonedstr = zfs_prop_to_name(ZFS_PROP_ZONED);
 	if (dsl_prop_get_ds(ds, zonedstr, 8, 1, &zoned, NULL) || zoned) {
 		/* Only root can access zoned fs's from the GZ */
 		enforce = ENFORCE_ALWAYS;
 	} else {
 		if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0)
 			enforce = ENFORCE_ABOVE;
 	}
 
 	dsl_dataset_rele(ds, FTAG);
 	return (enforce);
 }
 
 /*
  * Check if adding additional child filesystem(s) would exceed any filesystem
  * limits or adding additional snapshot(s) would exceed any snapshot limits.
  * The prop argument indicates which limit to check.
  *
  * Note that all filesystem limits up to the root (or the highest
  * initialized) filesystem or the given ancestor must be satisfied.
  */
 int
 dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
     dsl_dir_t *ancestor, cred_t *cr, proc_t *proc)
 {
 	objset_t *os = dd->dd_pool->dp_meta_objset;
 	uint64_t limit, count;
 	const char *count_prop;
 	enforce_res_t enforce;
 	int err = 0;
 
 	ASSERT(dsl_pool_config_held(dd->dd_pool));
 	ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
 	    prop == ZFS_PROP_SNAPSHOT_LIMIT);
 
 	/*
 	 * If we're allowed to change the limit, don't enforce the limit
 	 * e.g. this can happen if a snapshot is taken by an administrative
 	 * user in the global zone (i.e. a recursive snapshot by root).
 	 * However, we must handle the case of delegated permissions where we
 	 * are allowed to change the limit on the current dataset, but there
 	 * is another limit in the tree above.
 	 */
 	enforce = dsl_enforce_ds_ss_limits(dd, prop, cr, proc);
 	if (enforce == ENFORCE_NEVER)
 		return (0);
 
 	/*
 	 * e.g. if renaming a dataset with no snapshots, count adjustment
 	 * is 0.
 	 */
 	if (delta == 0)
 		return (0);
 
 	if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
 		/*
 		 * We don't enforce the limit for temporary snapshots. This is
 		 * indicated by a NULL cred_t argument.
 		 */
 		if (cr == NULL)
 			return (0);
 
 		count_prop = DD_FIELD_SNAPSHOT_COUNT;
 	} else {
 		count_prop = DD_FIELD_FILESYSTEM_COUNT;
 	}
 
 	/*
 	 * If an ancestor has been provided, stop checking the limit once we
 	 * hit that dir. We need this during rename so that we don't overcount
 	 * the check once we recurse up to the common ancestor.
 	 */
 	if (ancestor == dd)
 		return (0);
 
 	/*
 	 * If we hit an uninitialized node while recursing up the tree, we can
 	 * stop since we know there is no limit here (or above). The counts are
 	 * not valid on this node and we know we won't touch this node's counts.
 	 */
 	if (!dsl_dir_is_zapified(dd))
 		return (0);
 	err = zap_lookup(os, dd->dd_object,
 	    count_prop, sizeof (count), 1, &count);
 	if (err == ENOENT)
 		return (0);
 	if (err != 0)
 		return (err);
 
 	err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL,
 	    B_FALSE);
 	if (err != 0)
 		return (err);
 
 	/* Is there a limit which we've hit? */
 	if (enforce == ENFORCE_ALWAYS && (count + delta) > limit)
 		return (SET_ERROR(EDQUOT));
 
 	if (dd->dd_parent != NULL)
 		err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop,
 		    ancestor, cr, proc);
 
 	return (err);
 }
 
 /*
  * Adjust the filesystem or snapshot count for the specified dsl_dir_t and all
  * parents. When a new filesystem/snapshot is created, increment the count on
  * all parents, and when a filesystem/snapshot is destroyed, decrement the
  * count.
  */
 void
 dsl_fs_ss_count_adjust(dsl_dir_t *dd, int64_t delta, const char *prop,
     dmu_tx_t *tx)
 {
 	int err;
 	objset_t *os = dd->dd_pool->dp_meta_objset;
 	uint64_t count;
 
 	ASSERT(dsl_pool_config_held(dd->dd_pool));
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0 ||
 	    strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0);
 
 	/*
 	 * We don't do accounting for hidden ($FREE, $MOS & $ORIGIN) objsets.
 	 */
 	if (dd->dd_myname[0] == '$' && strcmp(prop,
 	    DD_FIELD_FILESYSTEM_COUNT) == 0) {
 		return;
 	}
 
 	/*
 	 * e.g. if renaming a dataset with no snapshots, count adjustment is 0
 	 */
 	if (delta == 0)
 		return;
 
 	/*
 	 * If we hit an uninitialized node while recursing up the tree, we can
 	 * stop since we know the counts are not valid on this node and we
 	 * know we shouldn't touch this node's counts. An uninitialized count
 	 * on the node indicates that either the feature has not yet been
 	 * activated or there are no limits on this part of the tree.
 	 */
 	if (!dsl_dir_is_zapified(dd) || (err = zap_lookup(os, dd->dd_object,
 	    prop, sizeof (count), 1, &count)) == ENOENT)
 		return;
 	VERIFY0(err);
 
 	count += delta;
 	/* Use a signed verify to make sure we're not neg. */
 	VERIFY3S(count, >=, 0);
 
 	VERIFY0(zap_update(os, dd->dd_object, prop, sizeof (count), 1, &count,
 	    tx));
 
 	/* Roll up this additional count into our ancestors */
 	if (dd->dd_parent != NULL)
 		dsl_fs_ss_count_adjust(dd->dd_parent, delta, prop, tx);
 }
 
 uint64_t
 dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
     dmu_tx_t *tx)
 {
 	objset_t *mos = dp->dp_meta_objset;
 	uint64_t ddobj;
 	dsl_dir_phys_t *ddphys;
 	dmu_buf_t *dbuf;
 
 	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
 	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
 	if (pds) {
 		VERIFY0(zap_add(mos, dsl_dir_phys(pds)->dd_child_dir_zapobj,
 		    name, sizeof (uint64_t), 1, &ddobj, tx));
 	} else {
 		/* it's the root dir */
 		VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
 		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
 	}
 	VERIFY0(dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
 	dmu_buf_will_dirty(dbuf, tx);
 	ddphys = dbuf->db_data;
 
 	ddphys->dd_creation_time = gethrestime_sec();
 	if (pds) {
 		ddphys->dd_parent_obj = pds->dd_object;
 
 		/* update the filesystem counts */
 		dsl_fs_ss_count_adjust(pds, 1, DD_FIELD_FILESYSTEM_COUNT, tx);
 	}
 	ddphys->dd_props_zapobj = zap_create(mos,
 	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
 	ddphys->dd_child_dir_zapobj = zap_create(mos,
 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
 		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
 
 	dmu_buf_rele(dbuf, FTAG);
 
 	return (ddobj);
 }
 
 boolean_t
 dsl_dir_is_clone(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_origin_obj &&
 	    (dd->dd_pool->dp_origin_snap == NULL ||
 	    dsl_dir_phys(dd)->dd_origin_obj !=
 	    dd->dd_pool->dp_origin_snap->ds_object));
 }
 
 uint64_t
 dsl_dir_get_used(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_used_bytes);
 }
 
 uint64_t
 dsl_dir_get_compressed(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_compressed_bytes);
 }
 
 uint64_t
 dsl_dir_get_quota(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_quota);
 }
 
 uint64_t
 dsl_dir_get_reservation(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_reserved);
 }
 
 uint64_t
 dsl_dir_get_compressratio(dsl_dir_t *dd)
 {
 	/* a fixed point number, 100x the ratio */
 	return (dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 :
 	    (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 /
 	    dsl_dir_phys(dd)->dd_compressed_bytes));
 }
 
 uint64_t
 dsl_dir_get_logicalused(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_uncompressed_bytes);
 }
 
 uint64_t
 dsl_dir_get_usedsnap(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]);
 }
 
 uint64_t
 dsl_dir_get_usedds(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]);
 }
 
 uint64_t
 dsl_dir_get_usedrefreserv(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]);
 }
 
 uint64_t
 dsl_dir_get_usedchild(dsl_dir_t *dd)
 {
 	return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] +
 	    dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]);
 }
 
 void
 dsl_dir_get_origin(dsl_dir_t *dd, char *buf)
 {
 	dsl_dataset_t *ds;
 	VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
 	    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
 
 	dsl_dataset_name(ds, buf);
 
 	dsl_dataset_rele(ds, FTAG);
 }
 
 int
 dsl_dir_get_filesystem_count(dsl_dir_t *dd, uint64_t *count)
 {
 	if (dsl_dir_is_zapified(dd)) {
 		objset_t *os = dd->dd_pool->dp_meta_objset;
 		return (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
 		    sizeof (*count), 1, count));
 	} else {
 		return (SET_ERROR(ENOENT));
 	}
 }
 
 int
 dsl_dir_get_snapshot_count(dsl_dir_t *dd, uint64_t *count)
 {
 	if (dsl_dir_is_zapified(dd)) {
 		objset_t *os = dd->dd_pool->dp_meta_objset;
 		return (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
 		    sizeof (*count), 1, count));
 	} else {
 		return (SET_ERROR(ENOENT));
 	}
 }
 
 void
 dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
 {
 	mutex_enter(&dd->dd_lock);
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
 	    dsl_dir_get_quota(dd));
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
 	    dsl_dir_get_reservation(dd));
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED,
 	    dsl_dir_get_logicalused(dd));
 	if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
 		    dsl_dir_get_usedsnap(dd));
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
 		    dsl_dir_get_usedds(dd));
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
 		    dsl_dir_get_usedrefreserv(dd));
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
 		    dsl_dir_get_usedchild(dd));
 	}
 	mutex_exit(&dd->dd_lock);
 
 	uint64_t count;
 	if (dsl_dir_get_filesystem_count(dd, &count) == 0) {
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_FILESYSTEM_COUNT,
 		    count);
 	}
 	if (dsl_dir_get_snapshot_count(dd, &count) == 0) {
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT,
 		    count);
 	}
 
 	if (dsl_dir_is_clone(dd)) {
 		char buf[ZFS_MAX_DATASET_NAME_LEN];
 		dsl_dir_get_origin(dd, buf);
 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
 	}
 
 }
 
 void
 dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = dd->dd_pool;
 
 	ASSERT(dsl_dir_phys(dd));
 
 	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
 		/* up the hold count until we can be written out */
 		dmu_buf_add_ref(dd->dd_dbuf, dd);
 	}
 }
 
 static int64_t
 parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
 {
 	uint64_t old_accounted = MAX(used, dsl_dir_phys(dd)->dd_reserved);
 	uint64_t new_accounted =
 	    MAX(used + delta, dsl_dir_phys(dd)->dd_reserved);
 	return (new_accounted - old_accounted);
 }
 
 void
 dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 
 	mutex_enter(&dd->dd_lock);
 	ASSERT0(dd->dd_tempreserved[tx->tx_txg & TXG_MASK]);
 	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", (u_longlong_t)tx->tx_txg,
 	    (u_longlong_t)dd->dd_space_towrite[tx->tx_txg & TXG_MASK] / 1024);
 	dd->dd_space_towrite[tx->tx_txg & TXG_MASK] = 0;
 	mutex_exit(&dd->dd_lock);
 
 	/* release the hold from dsl_dir_dirty */
 	dmu_buf_rele(dd->dd_dbuf, dd);
 }
 
 static uint64_t
 dsl_dir_space_towrite(dsl_dir_t *dd)
 {
 	uint64_t space = 0;
 
 	ASSERT(MUTEX_HELD(&dd->dd_lock));
 
 	for (int i = 0; i < TXG_SIZE; i++) {
 		space += dd->dd_space_towrite[i & TXG_MASK];
 		ASSERT3U(dd->dd_space_towrite[i & TXG_MASK], >=, 0);
 	}
 	return (space);
 }
 
 /*
  * How much space would dd have available if ancestor had delta applied
  * to it?  If ondiskonly is set, we're only interested in what's
  * on-disk, not estimated pending changes.
  */
 uint64_t
 dsl_dir_space_available(dsl_dir_t *dd,
     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
 {
 	uint64_t parentspace, myspace, quota, used;
 
 	/*
 	 * If there are no restrictions otherwise, assume we have
 	 * unlimited space available.
 	 */
 	quota = UINT64_MAX;
 	parentspace = UINT64_MAX;
 
 	if (dd->dd_parent != NULL) {
 		parentspace = dsl_dir_space_available(dd->dd_parent,
 		    ancestor, delta, ondiskonly);
 	}
 
 	mutex_enter(&dd->dd_lock);
 	if (dsl_dir_phys(dd)->dd_quota != 0)
 		quota = dsl_dir_phys(dd)->dd_quota;
 	used = dsl_dir_phys(dd)->dd_used_bytes;
 	if (!ondiskonly)
 		used += dsl_dir_space_towrite(dd);
 
 	if (dd->dd_parent == NULL) {
 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool,
 		    ZFS_SPACE_CHECK_NORMAL);
 		quota = MIN(quota, poolsize);
 	}
 
 	if (dsl_dir_phys(dd)->dd_reserved > used && parentspace != UINT64_MAX) {
 		/*
 		 * We have some space reserved, in addition to what our
 		 * parent gave us.
 		 */
 		parentspace += dsl_dir_phys(dd)->dd_reserved - used;
 	}
 
 	if (dd == ancestor) {
 		ASSERT(delta <= 0);
 		ASSERT(used >= -delta);
 		used += delta;
 		if (parentspace != UINT64_MAX)
 			parentspace -= delta;
 	}
 
 	if (used > quota) {
 		/* over quota */
 		myspace = 0;
 	} else {
 		/*
 		 * the lesser of the space provided by our parent and
 		 * the space left in our quota
 		 */
 		myspace = MIN(parentspace, quota - used);
 	}
 
 	mutex_exit(&dd->dd_lock);
 
 	return (myspace);
 }
 
 struct tempreserve {
 	list_node_t tr_node;
 	dsl_dir_t *tr_ds;
 	uint64_t tr_size;
 };
 
 static int
 dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
     boolean_t ignorequota, list_t *tr_list,
     dmu_tx_t *tx, boolean_t first)
 {
 	uint64_t txg;
 	uint64_t quota;
 	struct tempreserve *tr;
 	int retval;
 	uint64_t ref_rsrv;
 
 top_of_function:
 	txg = tx->tx_txg;
 	retval = EDQUOT;
 	ref_rsrv = 0;
 
 	ASSERT3U(txg, !=, 0);
 	ASSERT3S(asize, >, 0);
 
 	mutex_enter(&dd->dd_lock);
 
 	/*
 	 * Check against the dsl_dir's quota.  We don't add in the delta
 	 * when checking for over-quota because they get one free hit.
 	 */
 	uint64_t est_inflight = dsl_dir_space_towrite(dd);
 	for (int i = 0; i < TXG_SIZE; i++)
 		est_inflight += dd->dd_tempreserved[i];
 	uint64_t used_on_disk = dsl_dir_phys(dd)->dd_used_bytes;
 
 	/*
 	 * On the first iteration, fetch the dataset's used-on-disk and
 	 * refreservation values. Also, if checkrefquota is set, test if
 	 * allocating this space would exceed the dataset's refquota.
 	 */
 	if (first && tx->tx_objset) {
 		int error;
 		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
 
 		error = dsl_dataset_check_quota(ds, !netfree,
 		    asize, est_inflight, &used_on_disk, &ref_rsrv);
 		if (error != 0) {
 			mutex_exit(&dd->dd_lock);
 			DMU_TX_STAT_BUMP(dmu_tx_quota);
 			return (error);
 		}
 	}
 
 	/*
 	 * If this transaction will result in a net free of space,
 	 * we want to let it through.
 	 */
 	if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0)
 		quota = UINT64_MAX;
 	else
 		quota = dsl_dir_phys(dd)->dd_quota;
 
 	/*
 	 * Adjust the quota against the actual pool size at the root
 	 * minus any outstanding deferred frees.
 	 * To ensure that it's possible to remove files from a full
 	 * pool without inducing transient overcommits, we throttle
 	 * netfree transactions against a quota that is slightly larger,
 	 * but still within the pool's allocation slop.  In cases where
 	 * we're very close to full, this will allow a steady trickle of
 	 * removes to get through.
 	 */
 	if (dd->dd_parent == NULL) {
 		uint64_t avail = dsl_pool_unreserved_space(dd->dd_pool,
 		    (netfree) ?
 		    ZFS_SPACE_CHECK_RESERVED : ZFS_SPACE_CHECK_NORMAL);
 
 		if (avail < quota) {
 			quota = avail;
 			retval = SET_ERROR(ENOSPC);
 		}
 	}
 
 	/*
 	 * If they are requesting more space, and our current estimate
 	 * is over quota, they get to try again unless the actual
 	 * on-disk is over quota and there are no pending changes
 	 * or deferred frees (which may free up space for us).
 	 */
 	if (used_on_disk + est_inflight >= quota) {
 		if (est_inflight > 0 || used_on_disk < quota) {
 			retval = SET_ERROR(ERESTART);
 		} else {
 			ASSERT3U(used_on_disk, >=, quota);
 
 			if (retval == ENOSPC && (used_on_disk - quota) <
 			    dsl_pool_deferred_space(dd->dd_pool)) {
 				retval = SET_ERROR(ERESTART);
 			}
 		}
 
 		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
 		    "quota=%lluK tr=%lluK err=%d\n",
 		    (u_longlong_t)used_on_disk>>10,
 		    (u_longlong_t)est_inflight>>10,
 		    (u_longlong_t)quota>>10, (u_longlong_t)asize>>10, retval);
 		mutex_exit(&dd->dd_lock);
 		DMU_TX_STAT_BUMP(dmu_tx_quota);
 		return (retval);
 	}
 
 	/* We need to up our estimated delta before dropping dd_lock */
 	dd->dd_tempreserved[txg & TXG_MASK] += asize;
 
 	uint64_t parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
 	    asize - ref_rsrv);
 	mutex_exit(&dd->dd_lock);
 
 	tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 	tr->tr_ds = dd;
 	tr->tr_size = asize;
 	list_insert_tail(tr_list, tr);
 
 	/* see if it's OK with our parent */
 	if (dd->dd_parent != NULL && parent_rsrv != 0) {
 		/*
 		 * Recurse on our parent without recursion. This has been
 		 * observed to be potentially large stack usage even within
 		 * the test suite. Largest seen stack was 7632 bytes on linux.
 		 */
 
 		dd = dd->dd_parent;
 		asize = parent_rsrv;
 		ignorequota = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
 		first = B_FALSE;
 		goto top_of_function;
 
 	} else {
 		return (0);
 	}
 }
 
 /*
  * Reserve space in this dsl_dir, to be used in this tx's txg.
  * After the space has been dirtied (and dsl_dir_willuse_space()
  * has been called), the reservation should be canceled, using
  * dsl_dir_tempreserve_clear().
  */
 int
 dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
     boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx)
 {
 	int err;
 	list_t *tr_list;
 
 	if (asize == 0) {
 		*tr_cookiep = NULL;
 		return (0);
 	}
 
 	tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
 	list_create(tr_list, sizeof (struct tempreserve),
 	    offsetof(struct tempreserve, tr_node));
 	ASSERT3S(asize, >, 0);
 
 	err = arc_tempreserve_space(dd->dd_pool->dp_spa, lsize, tx->tx_txg);
 	if (err == 0) {
 		struct tempreserve *tr;
 
 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
 		tr->tr_size = lsize;
 		list_insert_tail(tr_list, tr);
 	} else {
 		if (err == EAGAIN) {
 			/*
 			 * If arc_memory_throttle() detected that pageout
 			 * is running and we are low on memory, we delay new
 			 * non-pageout transactions to give pageout an
 			 * advantage.
 			 *
 			 * It is unfortunate to be delaying while the caller's
 			 * locks are held.
 			 */
 			txg_delay(dd->dd_pool, tx->tx_txg,
 			    MSEC2NSEC(10), MSEC2NSEC(10));
 			err = SET_ERROR(ERESTART);
 		}
 	}
 
 	if (err == 0) {
 		err = dsl_dir_tempreserve_impl(dd, asize, netfree,
 		    B_FALSE, tr_list, tx, B_TRUE);
 	}
 
 	if (err != 0)
 		dsl_dir_tempreserve_clear(tr_list, tx);
 	else
 		*tr_cookiep = tr_list;
 
 	return (err);
 }
 
 /*
  * Clear a temporary reservation that we previously made with
  * dsl_dir_tempreserve_space().
  */
 void
 dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
 {
 	int txgidx = tx->tx_txg & TXG_MASK;
 	list_t *tr_list = tr_cookie;
 	struct tempreserve *tr;
 
 	ASSERT3U(tx->tx_txg, !=, 0);
 
 	if (tr_cookie == NULL)
 		return;
 
 	while ((tr = list_head(tr_list)) != NULL) {
 		if (tr->tr_ds) {
 			mutex_enter(&tr->tr_ds->dd_lock);
 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
 			    tr->tr_size);
 			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
 			mutex_exit(&tr->tr_ds->dd_lock);
 		} else {
 			arc_tempreserve_clear(tr->tr_size);
 		}
 		list_remove(tr_list, tr);
 		kmem_free(tr, sizeof (struct tempreserve));
 	}
 
 	kmem_free(tr_list, sizeof (list_t));
 }
 
 /*
  * This should be called from open context when we think we're going to write
  * or free space, for example when dirtying data. Be conservative; it's okay
  * to write less space or free more, but we don't want to write more or free
  * less than the amount specified.
  *
  * NOTE: The behavior of this function is identical to the Illumos / FreeBSD
  * version however it has been adjusted to use an iterative rather than
  * recursive algorithm to minimize stack usage.
  */
 void
 dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
 {
 	int64_t parent_space;
 	uint64_t est_used;
 
 	do {
 		mutex_enter(&dd->dd_lock);
 		if (space > 0)
 			dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
 
 		est_used = dsl_dir_space_towrite(dd) +
 		    dsl_dir_phys(dd)->dd_used_bytes;
 		parent_space = parent_delta(dd, est_used, space);
 		mutex_exit(&dd->dd_lock);
 
 		/* Make sure that we clean up dd_space_to* */
 		dsl_dir_dirty(dd, tx);
 
 		dd = dd->dd_parent;
 		space = parent_space;
 	} while (space && dd);
 }
 
 /* call from syncing context when we actually write/free space for this dd */
 void
 dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
 {
 	int64_t accounted_delta;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(type < DD_USED_NUM);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	/*
 	 * dsl_dataset_set_refreservation_sync_impl() calls this with
 	 * dd_lock held, so that it can atomically update
 	 * ds->ds_reserved and the dsl_dir accounting, so that
 	 * dsl_dataset_check_quota() can see dataset and dir accounting
 	 * consistently.
 	 */
 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
 	if (needlock)
 		mutex_enter(&dd->dd_lock);
 	dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
 	accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
 	ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
 	ASSERT(compressed >= 0 || ddp->dd_compressed_bytes >= -compressed);
 	ASSERT(uncompressed >= 0 ||
 	    ddp->dd_uncompressed_bytes >= -uncompressed);
 	ddp->dd_used_bytes += used;
 	ddp->dd_uncompressed_bytes += uncompressed;
 	ddp->dd_compressed_bytes += compressed;
 
 	if (ddp->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		ASSERT(used >= 0 || ddp->dd_used_breakdown[type] >= -used);
 		ddp->dd_used_breakdown[type] += used;
 #ifdef ZFS_DEBUG
 		{
 			dd_used_t t;
 			uint64_t u = 0;
 			for (t = 0; t < DD_USED_NUM; t++)
 				u += ddp->dd_used_breakdown[t];
 			ASSERT3U(u, ==, ddp->dd_used_bytes);
 		}
 #endif
 	}
 	if (needlock)
 		mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent != NULL) {
 		dsl_dir_diduse_transfer_space(dd->dd_parent,
 		    accounted_delta, compressed, uncompressed,
 		    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
 	}
 }
 
 void
 dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
 {
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(oldtype < DD_USED_NUM);
 	ASSERT(newtype < DD_USED_NUM);
 
 	dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
 	if (delta == 0 ||
 	    !(ddp->dd_flags & DD_FLAG_USED_BREAKDOWN))
 		return;
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 	mutex_enter(&dd->dd_lock);
 	ASSERT(delta > 0 ?
 	    ddp->dd_used_breakdown[oldtype] >= delta :
 	    ddp->dd_used_breakdown[newtype] >= -delta);
 	ASSERT(ddp->dd_used_bytes >= ABS(delta));
 	ddp->dd_used_breakdown[oldtype] -= delta;
 	ddp->dd_used_breakdown[newtype] += delta;
 	mutex_exit(&dd->dd_lock);
 }
 
 void
 dsl_dir_diduse_transfer_space(dsl_dir_t *dd, int64_t used,
     int64_t compressed, int64_t uncompressed, int64_t tonew,
     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
 {
 	int64_t accounted_delta;
 
 	ASSERT(dmu_tx_is_syncing(tx));
 	ASSERT(oldtype < DD_USED_NUM);
 	ASSERT(newtype < DD_USED_NUM);
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	mutex_enter(&dd->dd_lock);
 	dsl_dir_phys_t *ddp = dsl_dir_phys(dd);
 	accounted_delta = parent_delta(dd, ddp->dd_used_bytes, used);
 	ASSERT(used >= 0 || ddp->dd_used_bytes >= -used);
 	ASSERT(compressed >= 0 || ddp->dd_compressed_bytes >= -compressed);
 	ASSERT(uncompressed >= 0 ||
 	    ddp->dd_uncompressed_bytes >= -uncompressed);
 	ddp->dd_used_bytes += used;
 	ddp->dd_uncompressed_bytes += uncompressed;
 	ddp->dd_compressed_bytes += compressed;
 
 	if (ddp->dd_flags & DD_FLAG_USED_BREAKDOWN) {
 		ASSERT(tonew - used <= 0 ||
 		    ddp->dd_used_breakdown[oldtype] >= tonew - used);
 		ASSERT(tonew >= 0 ||
 		    ddp->dd_used_breakdown[newtype] >= -tonew);
 		ddp->dd_used_breakdown[oldtype] -= tonew - used;
 		ddp->dd_used_breakdown[newtype] += tonew;
 #ifdef ZFS_DEBUG
 		{
 			dd_used_t t;
 			uint64_t u = 0;
 			for (t = 0; t < DD_USED_NUM; t++)
 				u += ddp->dd_used_breakdown[t];
 			ASSERT3U(u, ==, ddp->dd_used_bytes);
 		}
 #endif
 	}
 	mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent != NULL) {
 		dsl_dir_diduse_transfer_space(dd->dd_parent,
 		    accounted_delta, compressed, uncompressed,
 		    used, DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
 	}
 }
 
 typedef struct dsl_dir_set_qr_arg {
 	const char *ddsqra_name;
 	zprop_source_t ddsqra_source;
 	uint64_t ddsqra_value;
 } dsl_dir_set_qr_arg_t;
 
 static int
 dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_dir_set_qr_arg_t *ddsqra = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 	int error;
 	uint64_t towrite, newval;
 
 	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
 	if (error != 0)
 		return (error);
 
 	error = dsl_prop_predict(ds->ds_dir, "quota",
 	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
 	if (error != 0) {
 		dsl_dataset_rele(ds, FTAG);
 		return (error);
 	}
 
 	if (newval == 0) {
 		dsl_dataset_rele(ds, FTAG);
 		return (0);
 	}
 
 	mutex_enter(&ds->ds_dir->dd_lock);
 	/*
 	 * If we are doing the preliminary check in open context, and
 	 * there are pending changes, then don't fail it, since the
 	 * pending changes could under-estimate the amount of space to be
 	 * freed up.
 	 */
 	towrite = dsl_dir_space_towrite(ds->ds_dir);
 	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
 	    (newval < dsl_dir_phys(ds->ds_dir)->dd_reserved ||
 	    newval < dsl_dir_phys(ds->ds_dir)->dd_used_bytes + towrite)) {
 		error = SET_ERROR(ENOSPC);
 	}
 	mutex_exit(&ds->ds_dir->dd_lock);
 	dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
 static void
 dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_dir_set_qr_arg_t *ddsqra = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 	uint64_t newval;
 
 	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
 
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) {
 		dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
 		    ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
 		    &ddsqra->ddsqra_value, tx);
 
 		VERIFY0(dsl_prop_get_int_ds(ds,
 		    zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
 	} else {
 		newval = ddsqra->ddsqra_value;
 		spa_history_log_internal_ds(ds, "set", tx, "%s=%lld",
 		    zfs_prop_to_name(ZFS_PROP_QUOTA), (longlong_t)newval);
 	}
 
 	dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 	mutex_enter(&ds->ds_dir->dd_lock);
 	dsl_dir_phys(ds->ds_dir)->dd_quota = newval;
 	mutex_exit(&ds->ds_dir->dd_lock);
 	dsl_dataset_rele(ds, FTAG);
 }
 
 int
 dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
 {
 	dsl_dir_set_qr_arg_t ddsqra;
 
 	ddsqra.ddsqra_name = ddname;
 	ddsqra.ddsqra_source = source;
 	ddsqra.ddsqra_value = quota;
 
 	return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
 	    dsl_dir_set_quota_sync, &ddsqra, 0,
 	    ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
 static int
 dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_dir_set_qr_arg_t *ddsqra = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 	dsl_dir_t *dd;
 	uint64_t newval, used, avail;
 	int error;
 
 	error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
 	if (error != 0)
 		return (error);
 	dd = ds->ds_dir;
 
 	/*
 	 * If we are doing the preliminary check in open context, the
 	 * space estimates may be inaccurate.
 	 */
 	if (!dmu_tx_is_syncing(tx)) {
 		dsl_dataset_rele(ds, FTAG);
 		return (0);
 	}
 
 	error = dsl_prop_predict(ds->ds_dir,
 	    zfs_prop_to_name(ZFS_PROP_RESERVATION),
 	    ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
 	if (error != 0) {
 		dsl_dataset_rele(ds, FTAG);
 		return (error);
 	}
 
 	mutex_enter(&dd->dd_lock);
 	used = dsl_dir_phys(dd)->dd_used_bytes;
 	mutex_exit(&dd->dd_lock);
 
 	if (dd->dd_parent) {
 		avail = dsl_dir_space_available(dd->dd_parent,
 		    NULL, 0, FALSE);
 	} else {
 		avail = dsl_pool_adjustedsize(dd->dd_pool,
 		    ZFS_SPACE_CHECK_NORMAL) - used;
 	}
 
 	if (MAX(used, newval) > MAX(used, dsl_dir_phys(dd)->dd_reserved)) {
 		uint64_t delta = MAX(used, newval) -
 		    MAX(used, dsl_dir_phys(dd)->dd_reserved);
 
 		if (delta > avail ||
 		    (dsl_dir_phys(dd)->dd_quota > 0 &&
 		    newval > dsl_dir_phys(dd)->dd_quota))
 			error = SET_ERROR(ENOSPC);
 	}
 
 	dsl_dataset_rele(ds, FTAG);
 	return (error);
 }
 
 void
 dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
 {
 	uint64_t used;
 	int64_t delta;
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	mutex_enter(&dd->dd_lock);
 	used = dsl_dir_phys(dd)->dd_used_bytes;
 	delta = MAX(used, value) - MAX(used, dsl_dir_phys(dd)->dd_reserved);
 	dsl_dir_phys(dd)->dd_reserved = value;
 
 	if (dd->dd_parent != NULL) {
 		/* Roll up this additional usage into our ancestors */
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
 		    delta, 0, 0, tx);
 	}
 	mutex_exit(&dd->dd_lock);
 }
 
 static void
 dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_dir_set_qr_arg_t *ddsqra = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 	uint64_t newval;
 
 	VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
 
 	if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) {
 		dsl_prop_set_sync_impl(ds,
 		    zfs_prop_to_name(ZFS_PROP_RESERVATION),
 		    ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
 		    &ddsqra->ddsqra_value, tx);
 
 		VERIFY0(dsl_prop_get_int_ds(ds,
 		    zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
 	} else {
 		newval = ddsqra->ddsqra_value;
 		spa_history_log_internal_ds(ds, "set", tx, "%s=%lld",
 		    zfs_prop_to_name(ZFS_PROP_RESERVATION),
 		    (longlong_t)newval);
 	}
 
 	dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
 	dsl_dataset_rele(ds, FTAG);
 }
 
 int
 dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
     uint64_t reservation)
 {
 	dsl_dir_set_qr_arg_t ddsqra;
 
 	ddsqra.ddsqra_name = ddname;
 	ddsqra.ddsqra_source = source;
 	ddsqra.ddsqra_value = reservation;
 
 	return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
 	    dsl_dir_set_reservation_sync, &ddsqra, 0,
 	    ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
 static dsl_dir_t *
 closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
 {
 	for (; ds1; ds1 = ds1->dd_parent) {
 		dsl_dir_t *dd;
 		for (dd = ds2; dd; dd = dd->dd_parent) {
 			if (ds1 == dd)
 				return (dd);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * If delta is applied to dd, how much of that delta would be applied to
  * ancestor?  Syncing context only.
  */
 static int64_t
 would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
 {
 	if (dd == ancestor)
 		return (delta);
 
 	mutex_enter(&dd->dd_lock);
 	delta = parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, delta);
 	mutex_exit(&dd->dd_lock);
 	return (would_change(dd->dd_parent, delta, ancestor));
 }
 
 typedef struct dsl_dir_rename_arg {
 	const char *ddra_oldname;
 	const char *ddra_newname;
 	cred_t *ddra_cred;
 	proc_t *ddra_proc;
 } dsl_dir_rename_arg_t;
 
 typedef struct dsl_valid_rename_arg {
 	int char_delta;
 	int nest_delta;
 } dsl_valid_rename_arg_t;
 
 static int
 dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
 	(void) dp;
 	dsl_valid_rename_arg_t *dvra = arg;
 	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
 
 	dsl_dataset_name(ds, namebuf);
 
 	ASSERT3U(strnlen(namebuf, ZFS_MAX_DATASET_NAME_LEN),
 	    <, ZFS_MAX_DATASET_NAME_LEN);
 	int namelen = strlen(namebuf) + dvra->char_delta;
 	int depth = get_dataset_depth(namebuf) + dvra->nest_delta;
 
 	if (namelen >= ZFS_MAX_DATASET_NAME_LEN)
 		return (SET_ERROR(ENAMETOOLONG));
 	if (dvra->nest_delta > 0 && depth >= zfs_max_dataset_nesting)
 		return (SET_ERROR(ENAMETOOLONG));
 	return (0);
 }
 
 static int
 dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_dir_rename_arg_t *ddra = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dir_t *dd, *newparent;
 	dsl_valid_rename_arg_t dvra;
 	dsl_dataset_t *parentds;
 	objset_t *parentos;
 	const char *mynewname;
 	int error;
 
 	/* target dir should exist */
 	error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
 	if (error != 0)
 		return (error);
 
 	/* new parent should exist */
 	error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
 	    &newparent, &mynewname);
 	if (error != 0) {
 		dsl_dir_rele(dd, FTAG);
 		return (error);
 	}
 
 	/* can't rename to different pool */
 	if (dd->dd_pool != newparent->dd_pool) {
 		dsl_dir_rele(newparent, FTAG);
 		dsl_dir_rele(dd, FTAG);
 		return (SET_ERROR(EXDEV));
 	}
 
 	/* new name should not already exist */
 	if (mynewname == NULL) {
 		dsl_dir_rele(newparent, FTAG);
 		dsl_dir_rele(dd, FTAG);
 		return (SET_ERROR(EEXIST));
 	}
 
 	/* can't rename below anything but filesystems (eg. no ZVOLs) */
 	error = dsl_dataset_hold_obj(newparent->dd_pool,
 	    dsl_dir_phys(newparent)->dd_head_dataset_obj, FTAG, &parentds);
 	if (error != 0) {
 		dsl_dir_rele(newparent, FTAG);
 		dsl_dir_rele(dd, FTAG);
 		return (error);
 	}
 	error = dmu_objset_from_ds(parentds, &parentos);
 	if (error != 0) {
 		dsl_dataset_rele(parentds, FTAG);
 		dsl_dir_rele(newparent, FTAG);
 		dsl_dir_rele(dd, FTAG);
 		return (error);
 	}
 	if (dmu_objset_type(parentos) != DMU_OST_ZFS) {
 		dsl_dataset_rele(parentds, FTAG);
 		dsl_dir_rele(newparent, FTAG);
 		dsl_dir_rele(dd, FTAG);
 		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
 	}
 	dsl_dataset_rele(parentds, FTAG);
 
 	ASSERT3U(strnlen(ddra->ddra_newname, ZFS_MAX_DATASET_NAME_LEN),
 	    <, ZFS_MAX_DATASET_NAME_LEN);
 	ASSERT3U(strnlen(ddra->ddra_oldname, ZFS_MAX_DATASET_NAME_LEN),
 	    <, ZFS_MAX_DATASET_NAME_LEN);
 	dvra.char_delta = strlen(ddra->ddra_newname)
 	    - strlen(ddra->ddra_oldname);
 	dvra.nest_delta = get_dataset_depth(ddra->ddra_newname)
 	    - get_dataset_depth(ddra->ddra_oldname);
 
 	/* if the name length is growing, validate child name lengths */
 	if (dvra.char_delta > 0 || dvra.nest_delta > 0) {
 		error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
 		    &dvra, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
 		if (error != 0) {
 			dsl_dir_rele(newparent, FTAG);
 			dsl_dir_rele(dd, FTAG);
 			return (error);
 		}
 	}
 
 	if (dmu_tx_is_syncing(tx)) {
 		if (spa_feature_is_active(dp->dp_spa,
 		    SPA_FEATURE_FS_SS_LIMIT)) {
 			/*
 			 * Although this is the check function and we don't
 			 * normally make on-disk changes in check functions,
 			 * we need to do that here.
 			 *
 			 * Ensure this portion of the tree's counts have been
 			 * initialized in case the new parent has limits set.
 			 */
 			dsl_dir_init_fs_ss_count(dd, tx);
 		}
 	}
 
 	if (newparent != dd->dd_parent) {
 		/* is there enough space? */
 		uint64_t myspace =
 		    MAX(dsl_dir_phys(dd)->dd_used_bytes,
 		    dsl_dir_phys(dd)->dd_reserved);
 		objset_t *os = dd->dd_pool->dp_meta_objset;
 		uint64_t fs_cnt = 0;
 		uint64_t ss_cnt = 0;
 
 		if (dsl_dir_is_zapified(dd)) {
 			int err;
 
 			err = zap_lookup(os, dd->dd_object,
 			    DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1,
 			    &fs_cnt);
 			if (err != ENOENT && err != 0) {
 				dsl_dir_rele(newparent, FTAG);
 				dsl_dir_rele(dd, FTAG);
 				return (err);
 			}
 
 			/*
 			 * have to add 1 for the filesystem itself that we're
 			 * moving
 			 */
 			fs_cnt++;
 
 			err = zap_lookup(os, dd->dd_object,
 			    DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1,
 			    &ss_cnt);
 			if (err != ENOENT && err != 0) {
 				dsl_dir_rele(newparent, FTAG);
 				dsl_dir_rele(dd, FTAG);
 				return (err);
 			}
 		}
 
 		/* check for encryption errors */
 		error = dsl_dir_rename_crypt_check(dd, newparent);
 		if (error != 0) {
 			dsl_dir_rele(newparent, FTAG);
 			dsl_dir_rele(dd, FTAG);
 			return (SET_ERROR(EACCES));
 		}
 
 		/* no rename into our descendant */
 		if (closest_common_ancestor(dd, newparent) == dd) {
 			dsl_dir_rele(newparent, FTAG);
 			dsl_dir_rele(dd, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
 
 		error = dsl_dir_transfer_possible(dd->dd_parent,
 		    newparent, fs_cnt, ss_cnt, myspace,
 		    ddra->ddra_cred, ddra->ddra_proc);
 		if (error != 0) {
 			dsl_dir_rele(newparent, FTAG);
 			dsl_dir_rele(dd, FTAG);
 			return (error);
 		}
 	}
 
 	dsl_dir_rele(newparent, FTAG);
 	dsl_dir_rele(dd, FTAG);
 	return (0);
 }
 
 static void
 dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_dir_rename_arg_t *ddra = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dir_t *dd, *newparent;
 	const char *mynewname;
 	objset_t *mos = dp->dp_meta_objset;
 
 	VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
 	VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
 	    &mynewname));
 
 	/* Log this before we change the name. */
 	spa_history_log_internal_dd(dd, "rename", tx,
 	    "-> %s", ddra->ddra_newname);
 
 	if (newparent != dd->dd_parent) {
 		objset_t *os = dd->dd_pool->dp_meta_objset;
 		uint64_t fs_cnt = 0;
 		uint64_t ss_cnt = 0;
 
 		/*
 		 * We already made sure the dd counts were initialized in the
 		 * check function.
 		 */
 		if (spa_feature_is_active(dp->dp_spa,
 		    SPA_FEATURE_FS_SS_LIMIT)) {
 			VERIFY0(zap_lookup(os, dd->dd_object,
 			    DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1,
 			    &fs_cnt));
 			/* add 1 for the filesystem itself that we're moving */
 			fs_cnt++;
 
 			VERIFY0(zap_lookup(os, dd->dd_object,
 			    DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1,
 			    &ss_cnt));
 		}
 
 		dsl_fs_ss_count_adjust(dd->dd_parent, -fs_cnt,
 		    DD_FIELD_FILESYSTEM_COUNT, tx);
 		dsl_fs_ss_count_adjust(newparent, fs_cnt,
 		    DD_FIELD_FILESYSTEM_COUNT, tx);
 
 		dsl_fs_ss_count_adjust(dd->dd_parent, -ss_cnt,
 		    DD_FIELD_SNAPSHOT_COUNT, tx);
 		dsl_fs_ss_count_adjust(newparent, ss_cnt,
 		    DD_FIELD_SNAPSHOT_COUNT, tx);
 
 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
 		    -dsl_dir_phys(dd)->dd_used_bytes,
 		    -dsl_dir_phys(dd)->dd_compressed_bytes,
 		    -dsl_dir_phys(dd)->dd_uncompressed_bytes, tx);
 		dsl_dir_diduse_space(newparent, DD_USED_CHILD,
 		    dsl_dir_phys(dd)->dd_used_bytes,
 		    dsl_dir_phys(dd)->dd_compressed_bytes,
 		    dsl_dir_phys(dd)->dd_uncompressed_bytes, tx);
 
 		if (dsl_dir_phys(dd)->dd_reserved >
 		    dsl_dir_phys(dd)->dd_used_bytes) {
 			uint64_t unused_rsrv = dsl_dir_phys(dd)->dd_reserved -
 			    dsl_dir_phys(dd)->dd_used_bytes;
 
 			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
 			    -unused_rsrv, 0, 0, tx);
 			dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
 			    unused_rsrv, 0, 0, tx);
 		}
 	}
 
 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
 
 	/* remove from old parent zapobj */
 	VERIFY0(zap_remove(mos,
 	    dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
 	    dd->dd_myname, tx));
 
 	(void) strlcpy(dd->dd_myname, mynewname,
 	    sizeof (dd->dd_myname));
 	dsl_dir_rele(dd->dd_parent, dd);
 	dsl_dir_phys(dd)->dd_parent_obj = newparent->dd_object;
 	VERIFY0(dsl_dir_hold_obj(dp,
 	    newparent->dd_object, NULL, dd, &dd->dd_parent));
 
 	/* add to new parent zapobj */
 	VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj,
 	    dd->dd_myname, 8, 1, &dd->dd_object, tx));
 
 	/* TODO: A rename callback to avoid these layering violations. */
 	zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname);
 	zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname,
 	    ddra->ddra_newname, B_TRUE);
 
 	dsl_prop_notify_all(dd);
 
 	dsl_dir_rele(newparent, FTAG);
 	dsl_dir_rele(dd, FTAG);
 }
 
 int
 dsl_dir_rename(const char *oldname, const char *newname)
 {
 	dsl_dir_rename_arg_t ddra;
 
 	ddra.ddra_oldname = oldname;
 	ddra.ddra_newname = newname;
 	ddra.ddra_cred = CRED();
 	ddra.ddra_proc = curproc;
 
 	return (dsl_sync_task(oldname,
 	    dsl_dir_rename_check, dsl_dir_rename_sync, &ddra,
 	    3, ZFS_SPACE_CHECK_RESERVED));
 }
 
 int
 dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
     uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space,
     cred_t *cr, proc_t *proc)
 {
 	dsl_dir_t *ancestor;
 	int64_t adelta;
 	uint64_t avail;
 	int err;
 
 	ancestor = closest_common_ancestor(sdd, tdd);
 	adelta = would_change(sdd, -space, ancestor);
 	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
 	if (avail < space)
 		return (SET_ERROR(ENOSPC));
 
 	err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT,
 	    ancestor, cr, proc);
 	if (err != 0)
 		return (err);
 	err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT,
 	    ancestor, cr, proc);
 	if (err != 0)
 		return (err);
 
 	return (0);
 }
 
 inode_timespec_t
 dsl_dir_snap_cmtime(dsl_dir_t *dd)
 {
 	inode_timespec_t t;
 
 	mutex_enter(&dd->dd_lock);
 	t = dd->dd_snap_cmtime;
 	mutex_exit(&dd->dd_lock);
 
 	return (t);
 }
 
 void
 dsl_dir_snap_cmtime_update(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	inode_timespec_t t;
 	gethrestime(&t);
 
 	mutex_enter(&dd->dd_lock);
 	dd->dd_snap_cmtime = t;
 	if (spa_feature_is_enabled(dp->dp_spa,
 	    SPA_FEATURE_EXTENSIBLE_DATASET)) {
 		objset_t *mos = dd->dd_pool->dp_meta_objset;
 		uint64_t ddobj = dd->dd_object;
 		dsl_dir_zapify(dd, tx);
 		VERIFY0(zap_update(mos, ddobj,
 		    DD_FIELD_SNAPSHOTS_CHANGED,
 		    sizeof (uint64_t),
 		    sizeof (inode_timespec_t) / sizeof (uint64_t),
 		    &t, tx));
 	}
 	mutex_exit(&dd->dd_lock);
 }
 
 void
 dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx)
 {
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	dmu_object_zapify(mos, dd->dd_object, DMU_OT_DSL_DIR, tx);
 }
 
 boolean_t
 dsl_dir_is_zapified(dsl_dir_t *dd)
 {
 	dmu_object_info_t doi;
 
 	dmu_object_info_from_db(dd->dd_dbuf, &doi);
 	return (doi.doi_type == DMU_OTN_ZAP_METADATA);
 }
 
 void
 dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj)
 {
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	ASSERT(spa_feature_is_active(dd->dd_pool->dp_spa,
 	    SPA_FEATURE_LIVELIST));
 	dsl_deadlist_open(&dd->dd_livelist, mos, obj);
 	bplist_create(&dd->dd_pending_allocs);
 	bplist_create(&dd->dd_pending_frees);
 }
 
 void
 dsl_dir_livelist_close(dsl_dir_t *dd)
 {
 	dsl_deadlist_close(&dd->dd_livelist);
 	bplist_destroy(&dd->dd_pending_allocs);
 	bplist_destroy(&dd->dd_pending_frees);
 }
 
 void
 dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total)
 {
 	uint64_t obj;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	spa_t *spa = dp->dp_spa;
 	livelist_condense_entry_t to_condense = spa->spa_to_condense;
 
 	if (!dsl_deadlist_is_open(&dd->dd_livelist))
 		return;
 
 	/*
 	 * If the livelist being removed is set to be condensed, stop the
 	 * condense zthr and indicate the cancellation in the spa_to_condense
 	 * struct in case the condense no-wait synctask has already started
 	 */
 	zthr_t *ll_condense_thread = spa->spa_livelist_condense_zthr;
 	if (ll_condense_thread != NULL &&
 	    (to_condense.ds != NULL) && (to_condense.ds->ds_dir == dd)) {
 		/*
 		 * We use zthr_wait_cycle_done instead of zthr_cancel
 		 * because we don't want to destroy the zthr, just have
 		 * it skip its current task.
 		 */
 		spa->spa_to_condense.cancelled = B_TRUE;
 		zthr_wait_cycle_done(ll_condense_thread);
 		/*
 		 * If we've returned from zthr_wait_cycle_done without
 		 * clearing the to_condense data structure it's either
 		 * because the no-wait synctask has started (which is
 		 * indicated by 'syncing' field of to_condense) and we
 		 * can expect it to clear to_condense on its own.
 		 * Otherwise, we returned before the zthr ran. The
 		 * checkfunc will now fail as cancelled == B_TRUE so we
 		 * can safely NULL out ds, allowing a different dir's
 		 * livelist to be condensed.
 		 *
 		 * We can be sure that the to_condense struct will not
 		 * be repopulated at this stage because both this
 		 * function and dsl_livelist_try_condense execute in
 		 * syncing context.
 		 */
 		if ((spa->spa_to_condense.ds != NULL) &&
 		    !spa->spa_to_condense.syncing) {
 			dmu_buf_rele(spa->spa_to_condense.ds->ds_dbuf,
 			    spa);
 			spa->spa_to_condense.ds = NULL;
 		}
 	}
 
 	dsl_dir_livelist_close(dd);
 	VERIFY0(zap_lookup(dp->dp_meta_objset, dd->dd_object,
 	    DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &obj));
 	VERIFY0(zap_remove(dp->dp_meta_objset, dd->dd_object,
 	    DD_FIELD_LIVELIST, tx));
 	if (total) {
 		dsl_deadlist_free(dp->dp_meta_objset, obj, tx);
 		spa_feature_decr(spa, SPA_FEATURE_LIVELIST, tx);
 	}
 }
 
 static int
 dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds,
     zfs_wait_activity_t activity, boolean_t *in_progress)
 {
 	int error = 0;
 
 	ASSERT(MUTEX_HELD(&dd->dd_activity_lock));
 
 	switch (activity) {
 	case ZFS_WAIT_DELETEQ: {
 #ifdef _KERNEL
 		objset_t *os;
 		error = dmu_objset_from_ds(ds, &os);
 		if (error != 0)
 			break;
 
 		mutex_enter(&os->os_user_ptr_lock);
 		void *user = dmu_objset_get_user(os);
 		mutex_exit(&os->os_user_ptr_lock);
 		if (dmu_objset_type(os) != DMU_OST_ZFS ||
 		    user == NULL || zfs_get_vfs_flag_unmounted(os)) {
 			*in_progress = B_FALSE;
 			return (0);
 		}
 
 		uint64_t readonly = B_FALSE;
 		error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly,
 		    NULL);
 
 		if (error != 0)
 			break;
 
 		if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) {
 			*in_progress = B_FALSE;
 			return (0);
 		}
 
 		uint64_t count, unlinked_obj;
 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
 		    &unlinked_obj);
 		if (error != 0) {
 			dsl_dataset_rele(ds, FTAG);
 			break;
 		}
 		error = zap_count(os, unlinked_obj, &count);
 
 		if (error == 0)
 			*in_progress = (count != 0);
 		break;
 #else
 		/*
 		 * The delete queue is ZPL specific, and libzpool doesn't have
 		 * it. It doesn't make sense to wait for it.
 		 */
 		(void) ds;
 		*in_progress = B_FALSE;
 		break;
 #endif
 	}
 	default:
 		panic("unrecognized value for activity %d", activity);
 	}
 
 	return (error);
 }
 
 int
 dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
     boolean_t *waited)
 {
 	int error = 0;
 	boolean_t in_progress;
 	dsl_pool_t *dp = dd->dd_pool;
 	for (;;) {
 		dsl_pool_config_enter(dp, FTAG);
 		error = dsl_dir_activity_in_progress(dd, ds, activity,
 		    &in_progress);
 		dsl_pool_config_exit(dp, FTAG);
 		if (error != 0 || !in_progress)
 			break;
 
 		*waited = B_TRUE;
 
 		if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) ==
 		    0 || dd->dd_activity_cancelled) {
 			error = SET_ERROR(EINTR);
 			break;
 		}
 	}
 	return (error);
 }
 
 void
 dsl_dir_cancel_waiters(dsl_dir_t *dd)
 {
 	mutex_enter(&dd->dd_activity_lock);
 	dd->dd_activity_cancelled = B_TRUE;
 	cv_broadcast(&dd->dd_activity_cv);
 	while (dd->dd_activity_waiters > 0)
 		cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock);
 	mutex_exit(&dd->dd_activity_lock);
 }
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(dsl_dir_set_quota);
 EXPORT_SYMBOL(dsl_dir_set_reservation);
 #endif
diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c
index 1d3d26124949..610e887b3fba 100644
--- a/module/zfs/dsl_prop.c
+++ b/module/zfs/dsl_prop.c
@@ -1,1287 +1,1287 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2013 Martin Matuska. All rights reserved.
  * Copyright 2019 Joyent, Inc.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/fs/zfs.h>
 
 #include "zfs_prop.h"
 
 #define	ZPROP_INHERIT_SUFFIX "$inherit"
 #define	ZPROP_RECVD_SUFFIX "$recvd"
 
 static int
 dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
 {
 	/*
 	 * The setonce properties are read-only, BUT they still
 	 * have a default value that can be used as the initial
 	 * value.
 	 */
 	if (prop == ZPROP_INVAL ||
 	    (zfs_prop_readonly(prop) && !zfs_prop_setonce(prop)))
 		return (SET_ERROR(ENOENT));
 
 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 		if (intsz != 1)
 			return (SET_ERROR(EOVERFLOW));
-		(void) strncpy(buf, zfs_prop_default_string(prop),
+		(void) strlcpy(buf, zfs_prop_default_string(prop),
 		    numints);
 	} else {
 		if (intsz != 8 || numints < 1)
 			return (SET_ERROR(EOVERFLOW));
 
 		*(uint64_t *)buf = zfs_prop_default_numeric(prop);
 	}
 
 	return (0);
 }
 
 int
 dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
     int intsz, int numints, void *buf, char *setpoint, boolean_t snapshot)
 {
 	int err;
 	dsl_dir_t *target = dd;
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 	zfs_prop_t prop;
 	boolean_t inheritable;
 	boolean_t inheriting = B_FALSE;
 	char *inheritstr;
 	char *recvdstr;
 
 	ASSERT(dsl_pool_config_held(dd->dd_pool));
 
 	if (setpoint)
 		setpoint[0] = '\0';
 
 	prop = zfs_name_to_prop(propname);
 	inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
 	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
 
 	/*
 	 * Note: dd may become NULL, therefore we shouldn't dereference it
 	 * after this loop.
 	 */
 	for (; dd != NULL; dd = dd->dd_parent) {
 		if (dd != target || snapshot) {
 			if (!inheritable) {
 				err = SET_ERROR(ENOENT);
 				break;
 			}
 			inheriting = B_TRUE;
 		}
 
 		/* Check for a local value. */
 		err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
 		    propname, intsz, numints, buf);
 		if (err != ENOENT) {
 			if (setpoint != NULL && err == 0)
 				dsl_dir_name(dd, setpoint);
 			break;
 		}
 
 		/*
 		 * Skip the check for a received value if there is an explicit
 		 * inheritance entry.
 		 */
 		err = zap_contains(mos, dsl_dir_phys(dd)->dd_props_zapobj,
 		    inheritstr);
 		if (err != 0 && err != ENOENT)
 			break;
 
 		if (err == ENOENT) {
 			/* Check for a received value. */
 			err = zap_lookup(mos, dsl_dir_phys(dd)->dd_props_zapobj,
 			    recvdstr, intsz, numints, buf);
 			if (err != ENOENT) {
 				if (setpoint != NULL && err == 0) {
 					if (inheriting) {
 						dsl_dir_name(dd, setpoint);
 					} else {
 						(void) strlcpy(setpoint,
 						    ZPROP_SOURCE_VAL_RECVD,
 						    MAXNAMELEN);
 					}
 				}
 				break;
 			}
 		}
 
 		/*
 		 * If we found an explicit inheritance entry, err is zero even
 		 * though we haven't yet found the value, so reinitializing err
 		 * at the end of the loop (instead of at the beginning) ensures
 		 * that err has a valid post-loop value.
 		 */
 		err = SET_ERROR(ENOENT);
 	}
 
 	if (err == ENOENT)
 		err = dodefault(prop, intsz, numints, buf);
 
 	kmem_strfree(inheritstr);
 	kmem_strfree(recvdstr);
 
 	return (err);
 }
 
 int
 dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
     int intsz, int numints, void *buf, char *setpoint)
 {
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	boolean_t inheritable;
 	uint64_t zapobj;
 
 	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
 	inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
 	zapobj = dsl_dataset_phys(ds)->ds_props_obj;
 
 	if (zapobj != 0) {
 		objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 		int err;
 
 		ASSERT(ds->ds_is_snapshot);
 
 		/* Check for a local value. */
 		err = zap_lookup(mos, zapobj, propname, intsz, numints, buf);
 		if (err != ENOENT) {
 			if (setpoint != NULL && err == 0)
 				dsl_dataset_name(ds, setpoint);
 			return (err);
 		}
 
 		/*
 		 * Skip the check for a received value if there is an explicit
 		 * inheritance entry.
 		 */
 		if (inheritable) {
 			char *inheritstr = kmem_asprintf("%s%s", propname,
 			    ZPROP_INHERIT_SUFFIX);
 			err = zap_contains(mos, zapobj, inheritstr);
 			kmem_strfree(inheritstr);
 			if (err != 0 && err != ENOENT)
 				return (err);
 		}
 
 		if (err == ENOENT) {
 			/* Check for a received value. */
 			char *recvdstr = kmem_asprintf("%s%s", propname,
 			    ZPROP_RECVD_SUFFIX);
 			err = zap_lookup(mos, zapobj, recvdstr,
 			    intsz, numints, buf);
 			kmem_strfree(recvdstr);
 			if (err != ENOENT) {
 				if (setpoint != NULL && err == 0)
 					(void) strlcpy(setpoint,
 					    ZPROP_SOURCE_VAL_RECVD,
 					    MAXNAMELEN);
 				return (err);
 			}
 		}
 	}
 
 	return (dsl_prop_get_dd(ds->ds_dir, propname,
 	    intsz, numints, buf, setpoint, ds->ds_is_snapshot));
 }
 
 static dsl_prop_record_t *
 dsl_prop_record_find(dsl_dir_t *dd, const char *propname)
 {
 	dsl_prop_record_t *pr = NULL;
 
 	ASSERT(MUTEX_HELD(&dd->dd_lock));
 
 	for (pr = list_head(&dd->dd_props);
 	    pr != NULL; pr = list_next(&dd->dd_props, pr)) {
 		if (strcmp(pr->pr_propname, propname) == 0)
 			break;
 	}
 
 	return (pr);
 }
 
 static dsl_prop_record_t *
 dsl_prop_record_create(dsl_dir_t *dd, const char *propname)
 {
 	dsl_prop_record_t *pr;
 
 	ASSERT(MUTEX_HELD(&dd->dd_lock));
 
 	pr = kmem_alloc(sizeof (dsl_prop_record_t), KM_SLEEP);
 	pr->pr_propname = spa_strdup(propname);
 	list_create(&pr->pr_cbs, sizeof (dsl_prop_cb_record_t),
 	    offsetof(dsl_prop_cb_record_t, cbr_pr_node));
 	list_insert_head(&dd->dd_props, pr);
 
 	return (pr);
 }
 
 void
 dsl_prop_init(dsl_dir_t *dd)
 {
 	list_create(&dd->dd_props, sizeof (dsl_prop_record_t),
 	    offsetof(dsl_prop_record_t, pr_node));
 }
 
 void
 dsl_prop_fini(dsl_dir_t *dd)
 {
 	dsl_prop_record_t *pr;
 
 	while ((pr = list_remove_head(&dd->dd_props)) != NULL) {
 		list_destroy(&pr->pr_cbs);
 		spa_strfree((char *)pr->pr_propname);
 		kmem_free(pr, sizeof (dsl_prop_record_t));
 	}
 	list_destroy(&dd->dd_props);
 }
 
 /*
  * Register interest in the named property.  We'll call the callback
  * once to notify it of the current property value, and again each time
  * the property changes, until this callback is unregistered.
  *
  * Return 0 on success, errno if the prop is not an integer value.
  */
 int
 dsl_prop_register(dsl_dataset_t *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg)
 {
 	dsl_dir_t *dd = ds->ds_dir;
 	uint64_t value;
 	dsl_prop_record_t *pr;
 	dsl_prop_cb_record_t *cbr;
 	int err;
 	dsl_pool_t *dp __maybe_unused = dd->dd_pool;
 
 	ASSERT(dsl_pool_config_held(dp));
 
 	err = dsl_prop_get_int_ds(ds, propname, &value);
 	if (err != 0)
 		return (err);
 
 	cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
 	cbr->cbr_ds = ds;
 	cbr->cbr_func = callback;
 	cbr->cbr_arg = cbarg;
 
 	mutex_enter(&dd->dd_lock);
 	pr = dsl_prop_record_find(dd, propname);
 	if (pr == NULL)
 		pr = dsl_prop_record_create(dd, propname);
 	cbr->cbr_pr = pr;
 	list_insert_head(&pr->pr_cbs, cbr);
 	list_insert_head(&ds->ds_prop_cbs, cbr);
 	mutex_exit(&dd->dd_lock);
 
 	cbr->cbr_func(cbr->cbr_arg, value);
 	return (0);
 }
 
 int
 dsl_prop_get(const char *dsname, const char *propname,
     int intsz, int numints, void *buf, char *setpoint)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(dsname, FTAG, &os);
 	if (error != 0)
 		return (error);
 
 	error = dsl_prop_get_ds(dmu_objset_ds(os), propname,
 	    intsz, numints, buf, setpoint);
 
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * Get the current property value.  It may have changed by the time this
  * function returns, so it is NOT safe to follow up with
  * dsl_prop_register() and assume that the value has not changed in
  * between.
  *
  * Return 0 on success, ENOENT if ddname is invalid.
  */
 int
 dsl_prop_get_integer(const char *ddname, const char *propname,
     uint64_t *valuep, char *setpoint)
 {
 	return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
 }
 
 int
 dsl_prop_get_int_ds(dsl_dataset_t *ds, const char *propname,
     uint64_t *valuep)
 {
 	return (dsl_prop_get_ds(ds, propname, 8, 1, valuep, NULL));
 }
 
 /*
  * Predict the effective value of the given special property if it were set with
  * the given value and source. This is not a general purpose function. It exists
  * only to handle the special requirements of the quota and reservation
  * properties. The fact that these properties are non-inheritable greatly
  * simplifies the prediction logic.
  *
  * Returns 0 on success, a positive error code on failure, or -1 if called with
  * a property not handled by this function.
  */
 int
 dsl_prop_predict(dsl_dir_t *dd, const char *propname,
     zprop_source_t source, uint64_t value, uint64_t *newvalp)
 {
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	objset_t *mos;
 	uint64_t zapobj;
 	uint64_t version;
 	char *recvdstr;
 	int err = 0;
 
 	switch (prop) {
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_RESERVATION:
 	case ZFS_PROP_REFQUOTA:
 	case ZFS_PROP_REFRESERVATION:
 		break;
 	default:
 		return (-1);
 	}
 
 	mos = dd->dd_pool->dp_meta_objset;
 	zapobj = dsl_dir_phys(dd)->dd_props_zapobj;
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
 
 	version = spa_version(dd->dd_pool->dp_spa);
 	if (version < SPA_VERSION_RECVD_PROPS) {
 		if (source & ZPROP_SRC_NONE)
 			source = ZPROP_SRC_NONE;
 		else if (source & ZPROP_SRC_RECEIVED)
 			source = ZPROP_SRC_LOCAL;
 	}
 
 	switch ((int)source) {
 	case ZPROP_SRC_NONE:
 		/* Revert to the received value, if any. */
 		err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp);
 		if (err == ENOENT)
 			*newvalp = 0;
 		break;
 	case ZPROP_SRC_LOCAL:
 		*newvalp = value;
 		break;
 	case ZPROP_SRC_RECEIVED:
 		/*
 		 * If there's no local setting, then the new received value will
 		 * be the effective value.
 		 */
 		err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
 		if (err == ENOENT)
 			*newvalp = value;
 		break;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
 		/*
 		 * We're clearing the received value, so the local setting (if
 		 * it exists) remains the effective value.
 		 */
 		err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
 		if (err == ENOENT)
 			*newvalp = 0;
 		break;
 	default:
 		panic("unexpected property source: %d", source);
 	}
 
 	kmem_strfree(recvdstr);
 
 	if (err == ENOENT)
 		return (0);
 
 	return (err);
 }
 
 /*
  * Unregister this callback.  Return 0 on success, ENOENT if ddname is
  * invalid, or ENOMSG if no matching callback registered.
  *
  * NOTE: This function is no longer used internally but has been preserved
  * to prevent breaking external consumers (Lustre, etc).
  */
 int
 dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
     dsl_prop_changed_cb_t *callback, void *cbarg)
 {
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_cb_record_t *cbr;
 
 	mutex_enter(&dd->dd_lock);
 	for (cbr = list_head(&ds->ds_prop_cbs);
 	    cbr; cbr = list_next(&ds->ds_prop_cbs, cbr)) {
 		if (cbr->cbr_ds == ds &&
 		    cbr->cbr_func == callback &&
 		    cbr->cbr_arg == cbarg &&
 		    strcmp(cbr->cbr_pr->pr_propname, propname) == 0)
 			break;
 	}
 
 	if (cbr == NULL) {
 		mutex_exit(&dd->dd_lock);
 		return (SET_ERROR(ENOMSG));
 	}
 
 	list_remove(&ds->ds_prop_cbs, cbr);
 	list_remove(&cbr->cbr_pr->pr_cbs, cbr);
 	mutex_exit(&dd->dd_lock);
 	kmem_free(cbr, sizeof (dsl_prop_cb_record_t));
 
 	return (0);
 }
 
 /*
  * Unregister all callbacks that are registered with the
  * given callback argument.
  */
 void
 dsl_prop_unregister_all(dsl_dataset_t *ds, void *cbarg)
 {
 	dsl_prop_cb_record_t *cbr, *next_cbr;
 
 	dsl_dir_t *dd = ds->ds_dir;
 
 	mutex_enter(&dd->dd_lock);
 	next_cbr = list_head(&ds->ds_prop_cbs);
 	while (next_cbr != NULL) {
 		cbr = next_cbr;
 		next_cbr = list_next(&ds->ds_prop_cbs, cbr);
 		if (cbr->cbr_arg == cbarg) {
 			list_remove(&ds->ds_prop_cbs, cbr);
 			list_remove(&cbr->cbr_pr->pr_cbs, cbr);
 			kmem_free(cbr, sizeof (dsl_prop_cb_record_t));
 		}
 	}
 	mutex_exit(&dd->dd_lock);
 }
 
 boolean_t
 dsl_prop_hascb(dsl_dataset_t *ds)
 {
 	return (!list_is_empty(&ds->ds_prop_cbs));
 }
 
 static int
 dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
 	(void) arg;
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_prop_record_t *pr;
 	dsl_prop_cb_record_t *cbr;
 
 	mutex_enter(&dd->dd_lock);
 	for (pr = list_head(&dd->dd_props);
 	    pr; pr = list_next(&dd->dd_props, pr)) {
 		for (cbr = list_head(&pr->pr_cbs); cbr;
 		    cbr = list_next(&pr->pr_cbs, cbr)) {
 			uint64_t value;
 
 			/*
 			 * Callback entries do not have holds on their
 			 * datasets so that datasets with registered
 			 * callbacks are still eligible for eviction.
 			 * Unlike operations to update properties on a
 			 * single dataset, we are performing a recursive
 			 * descent of related head datasets.  The caller
 			 * of this function only has a dataset hold on
 			 * the passed in head dataset, not the snapshots
 			 * associated with this dataset.  Without a hold,
 			 * the dataset pointer within callback records
 			 * for snapshots can be invalidated by eviction
 			 * at any time.
 			 *
 			 * Use dsl_dataset_try_add_ref() to verify
 			 * that the dataset for a snapshot has not
 			 * begun eviction processing and to prevent
 			 * eviction from occurring for the duration of
 			 * the callback.  If the hold attempt fails,
 			 * this object is already being evicted and the
 			 * callback can be safely ignored.
 			 */
 			if (ds != cbr->cbr_ds &&
 			    !dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG))
 				continue;
 
 			if (dsl_prop_get_ds(cbr->cbr_ds,
 			    cbr->cbr_pr->pr_propname, sizeof (value), 1,
 			    &value, NULL) == 0)
 				cbr->cbr_func(cbr->cbr_arg, value);
 
 			if (ds != cbr->cbr_ds)
 				dsl_dataset_rele(cbr->cbr_ds, FTAG);
 		}
 	}
 	mutex_exit(&dd->dd_lock);
 
 	return (0);
 }
 
 /*
  * Update all property values for ddobj & its descendants.  This is used
  * when renaming the dir.
  */
 void
 dsl_prop_notify_all(dsl_dir_t *dd)
 {
 	dsl_pool_t *dp = dd->dd_pool;
 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 	(void) dmu_objset_find_dp(dp, dd->dd_object, dsl_prop_notify_all_cb,
 	    NULL, DS_FIND_CHILDREN);
 }
 
 static void
 dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
     const char *propname, uint64_t value, int first)
 {
 	dsl_dir_t *dd;
 	dsl_prop_record_t *pr;
 	dsl_prop_cb_record_t *cbr;
 	objset_t *mos = dp->dp_meta_objset;
 	zap_cursor_t zc;
 	zap_attribute_t *za;
 	int err;
 
 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 	err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
 	if (err)
 		return;
 
 	if (!first) {
 		/*
 		 * If the prop is set here, then this change is not
 		 * being inherited here or below; stop the recursion.
 		 */
 		err = zap_contains(mos, dsl_dir_phys(dd)->dd_props_zapobj,
 		    propname);
 		if (err == 0) {
 			dsl_dir_rele(dd, FTAG);
 			return;
 		}
 		ASSERT3U(err, ==, ENOENT);
 	}
 
 	mutex_enter(&dd->dd_lock);
 	pr = dsl_prop_record_find(dd, propname);
 	if (pr != NULL) {
 		for (cbr = list_head(&pr->pr_cbs); cbr;
 		    cbr = list_next(&pr->pr_cbs, cbr)) {
 			uint64_t propobj;
 
 			/*
 			 * cbr->cbr_ds may be invalidated due to eviction,
 			 * requiring the use of dsl_dataset_try_add_ref().
 			 * See comment block in dsl_prop_notify_all_cb()
 			 * for details.
 			 */
 			if (!dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG))
 				continue;
 
 			propobj = dsl_dataset_phys(cbr->cbr_ds)->ds_props_obj;
 
 			/*
 			 * If the property is not set on this ds, then it is
 			 * inherited here; call the callback.
 			 */
 			if (propobj == 0 ||
 			    zap_contains(mos, propobj, propname) != 0)
 				cbr->cbr_func(cbr->cbr_arg, value);
 
 			dsl_dataset_rele(cbr->cbr_ds, FTAG);
 		}
 	}
 	mutex_exit(&dd->dd_lock);
 
 	za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 	for (zap_cursor_init(&zc, mos,
 	    dsl_dir_phys(dd)->dd_child_dir_zapobj);
 	    zap_cursor_retrieve(&zc, za) == 0;
 	    zap_cursor_advance(&zc)) {
 		dsl_prop_changed_notify(dp, za->za_first_integer,
 		    propname, value, FALSE);
 	}
 	kmem_free(za, sizeof (zap_attribute_t));
 	zap_cursor_fini(&zc);
 	dsl_dir_rele(dd, FTAG);
 }
 
 void
 dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
     zprop_source_t source, int intsz, int numints, const void *value,
     dmu_tx_t *tx)
 {
 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 	uint64_t zapobj, intval, dummy, count;
 	int isint;
 	char valbuf[32];
 	const char *valstr = NULL;
 	char *inheritstr;
 	char *recvdstr;
 	char *tbuf = NULL;
 	int err;
 	uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
 
 	isint = (dodefault(zfs_name_to_prop(propname), 8, 1, &intval) == 0);
 
 	if (ds->ds_is_snapshot) {
 		ASSERT(version >= SPA_VERSION_SNAP_PROPS);
 		if (dsl_dataset_phys(ds)->ds_props_obj == 0 &&
 		    (source & ZPROP_SRC_NONE) == 0) {
 			dmu_buf_will_dirty(ds->ds_dbuf, tx);
 			dsl_dataset_phys(ds)->ds_props_obj =
 			    zap_create(mos,
 			    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
 		}
 		zapobj = dsl_dataset_phys(ds)->ds_props_obj;
 	} else {
 		zapobj = dsl_dir_phys(ds->ds_dir)->dd_props_zapobj;
 	}
 
 	/* If we are removing objects from a non-existent ZAP just return */
 	if (zapobj == 0)
 		return;
 
 	if (version < SPA_VERSION_RECVD_PROPS) {
 		if (source & ZPROP_SRC_NONE)
 			source = ZPROP_SRC_NONE;
 		else if (source & ZPROP_SRC_RECEIVED)
 			source = ZPROP_SRC_LOCAL;
 	}
 
 	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
 
 	switch ((int)source) {
 	case ZPROP_SRC_NONE:
 		/*
 		 * revert to received value, if any (inherit -S)
 		 * - remove propname
 		 * - remove propname$inherit
 		 */
 		err = zap_remove(mos, zapobj, propname, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		break;
 	case ZPROP_SRC_LOCAL:
 		/*
 		 * remove propname$inherit
 		 * set propname -> value
 		 */
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		VERIFY0(zap_update(mos, zapobj, propname,
 		    intsz, numints, value, tx));
 		break;
 	case ZPROP_SRC_INHERITED:
 		/*
 		 * explicitly inherit
 		 * - remove propname
 		 * - set propname$inherit
 		 */
 		err = zap_remove(mos, zapobj, propname, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		if (version >= SPA_VERSION_RECVD_PROPS &&
 		    dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
 			dummy = 0;
 			VERIFY0(zap_update(mos, zapobj, inheritstr,
 			    8, 1, &dummy, tx));
 		}
 		break;
 	case ZPROP_SRC_RECEIVED:
 		/*
 		 * set propname$recvd -> value
 		 */
 		err = zap_update(mos, zapobj, recvdstr,
 		    intsz, numints, value, tx);
 		ASSERT(err == 0);
 		break;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
 		/*
 		 * clear local and received settings
 		 * - remove propname
 		 * - remove propname$inherit
 		 * - remove propname$recvd
 		 */
 		err = zap_remove(mos, zapobj, propname, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		err = zap_remove(mos, zapobj, inheritstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		zfs_fallthrough;
 	case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
 		/*
 		 * remove propname$recvd
 		 */
 		err = zap_remove(mos, zapobj, recvdstr, tx);
 		ASSERT(err == 0 || err == ENOENT);
 		break;
 	default:
 		cmn_err(CE_PANIC, "unexpected property source: %d", source);
 	}
 
 	kmem_strfree(inheritstr);
 	kmem_strfree(recvdstr);
 
 	/*
 	 * If we are left with an empty snap zap we can destroy it.
 	 * This will prevent unnecessary calls to zap_lookup() in
 	 * the "zfs list" and "zfs get" code paths.
 	 */
 	if (ds->ds_is_snapshot &&
 	    zap_count(mos, zapobj, &count) == 0 && count == 0) {
 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
 		dsl_dataset_phys(ds)->ds_props_obj = 0;
 		zap_destroy(mos, zapobj, tx);
 	}
 
 	if (isint) {
 		VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval));
 
 		if (ds->ds_is_snapshot) {
 			dsl_prop_cb_record_t *cbr;
 			/*
 			 * It's a snapshot; nothing can inherit this
 			 * property, so just look for callbacks on this
 			 * ds here.
 			 */
 			mutex_enter(&ds->ds_dir->dd_lock);
 			for (cbr = list_head(&ds->ds_prop_cbs); cbr;
 			    cbr = list_next(&ds->ds_prop_cbs, cbr)) {
 				if (strcmp(cbr->cbr_pr->pr_propname,
 				    propname) == 0)
 					cbr->cbr_func(cbr->cbr_arg, intval);
 			}
 			mutex_exit(&ds->ds_dir->dd_lock);
 		} else {
 			dsl_prop_changed_notify(ds->ds_dir->dd_pool,
 			    ds->ds_dir->dd_object, propname, intval, TRUE);
 		}
 
 		(void) snprintf(valbuf, sizeof (valbuf),
 		    "%lld", (longlong_t)intval);
 		valstr = valbuf;
 	} else {
 		if (source == ZPROP_SRC_LOCAL) {
 			valstr = value;
 		} else {
 			tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
 			if (dsl_prop_get_ds(ds, propname, 1,
 			    ZAP_MAXVALUELEN, tbuf, NULL) == 0)
 				valstr = tbuf;
 		}
 	}
 
 	spa_history_log_internal_ds(ds, (source == ZPROP_SRC_NONE ||
 	    source == ZPROP_SRC_INHERITED) ? "inherit" : "set", tx,
 	    "%s=%s", propname, (valstr == NULL ? "" : valstr));
 
 	if (tbuf != NULL)
 		kmem_free(tbuf, ZAP_MAXVALUELEN);
 }
 
 int
 dsl_prop_set_int(const char *dsname, const char *propname,
     zprop_source_t source, uint64_t value)
 {
 	nvlist_t *nvl = fnvlist_alloc();
 	int error;
 
 	fnvlist_add_uint64(nvl, propname, value);
 	error = dsl_props_set(dsname, source, nvl);
 	fnvlist_free(nvl);
 	return (error);
 }
 
 int
 dsl_prop_set_string(const char *dsname, const char *propname,
     zprop_source_t source, const char *value)
 {
 	nvlist_t *nvl = fnvlist_alloc();
 	int error;
 
 	fnvlist_add_string(nvl, propname, value);
 	error = dsl_props_set(dsname, source, nvl);
 	fnvlist_free(nvl);
 	return (error);
 }
 
 int
 dsl_prop_inherit(const char *dsname, const char *propname,
     zprop_source_t source)
 {
 	nvlist_t *nvl = fnvlist_alloc();
 	int error;
 
 	fnvlist_add_boolean(nvl, propname);
 	error = dsl_props_set(dsname, source, nvl);
 	fnvlist_free(nvl);
 	return (error);
 }
 
 int
 dsl_props_set_check(void *arg, dmu_tx_t *tx)
 {
 	dsl_props_set_arg_t *dpsa = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 	uint64_t version;
 	nvpair_t *elem = NULL;
 	int err;
 
 	err = dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds);
 	if (err != 0)
 		return (err);
 
 	version = spa_version(ds->ds_dir->dd_pool->dp_spa);
 	while ((elem = nvlist_next_nvpair(dpsa->dpsa_props, elem)) != NULL) {
 		if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
 			dsl_dataset_rele(ds, FTAG);
 			return (SET_ERROR(ENAMETOOLONG));
 		}
 		if (nvpair_type(elem) == DATA_TYPE_STRING) {
 			char *valstr = fnvpair_value_string(elem);
 			if (strlen(valstr) >= (version <
 			    SPA_VERSION_STMF_PROP ?
 			    ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
 				dsl_dataset_rele(ds, FTAG);
 				return (SET_ERROR(E2BIG));
 			}
 		}
 	}
 
 	if (ds->ds_is_snapshot && version < SPA_VERSION_SNAP_PROPS) {
 		dsl_dataset_rele(ds, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 	dsl_dataset_rele(ds, FTAG);
 	return (0);
 }
 
 void
 dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source,
     nvlist_t *props, dmu_tx_t *tx)
 {
 	nvpair_t *elem = NULL;
 
 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
 		nvpair_t *pair = elem;
 		const char *name = nvpair_name(pair);
 
 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 			/*
 			 * This usually happens when we reuse the nvlist_t data
 			 * returned by the counterpart dsl_prop_get_all_impl().
 			 * For instance we do this to restore the original
 			 * received properties when an error occurs in the
 			 * zfs_ioc_recv() codepath.
 			 */
 			nvlist_t *attrs = fnvpair_value_nvlist(pair);
 			pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
 		}
 
 		if (nvpair_type(pair) == DATA_TYPE_STRING) {
 			const char *value = fnvpair_value_string(pair);
 			dsl_prop_set_sync_impl(ds, name,
 			    source, 1, strlen(value) + 1, value, tx);
 		} else if (nvpair_type(pair) == DATA_TYPE_UINT64) {
 			uint64_t intval = fnvpair_value_uint64(pair);
 			dsl_prop_set_sync_impl(ds, name,
 			    source, sizeof (intval), 1, &intval, tx);
 		} else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) {
 			dsl_prop_set_sync_impl(ds, name,
 			    source, 0, 0, NULL, tx);
 		} else {
 			panic("invalid nvpair type");
 		}
 	}
 }
 
 void
 dsl_props_set_sync(void *arg, dmu_tx_t *tx)
 {
 	dsl_props_set_arg_t *dpsa = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dataset_t *ds;
 
 	VERIFY0(dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds));
 	dsl_props_set_sync_impl(ds, dpsa->dpsa_source, dpsa->dpsa_props, tx);
 	dsl_dataset_rele(ds, FTAG);
 }
 
 /*
  * All-or-nothing; if any prop can't be set, nothing will be modified.
  */
 int
 dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
 {
 	dsl_props_set_arg_t dpsa;
 	int nblks = 0;
 
 	dpsa.dpsa_dsname = dsname;
 	dpsa.dpsa_source = source;
 	dpsa.dpsa_props = props;
 
 	/*
 	 * If the source includes NONE, then we will only be removing entries
 	 * from the ZAP object.  In that case don't check for ENOSPC.
 	 */
 	if ((source & ZPROP_SRC_NONE) == 0)
 		nblks = 2 * fnvlist_num_pairs(props);
 
 	return (dsl_sync_task(dsname, dsl_props_set_check, dsl_props_set_sync,
 	    &dpsa, nblks, ZFS_SPACE_CHECK_RESERVED));
 }
 
 typedef enum dsl_prop_getflags {
 	DSL_PROP_GET_INHERITING = 0x1,	/* searching parent of target ds */
 	DSL_PROP_GET_SNAPSHOT = 0x2,	/* snapshot dataset */
 	DSL_PROP_GET_LOCAL = 0x4,	/* local properties */
 	DSL_PROP_GET_RECEIVED = 0x8,	/* received properties */
 } dsl_prop_getflags_t;
 
 static int
 dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
     const char *setpoint, dsl_prop_getflags_t flags, nvlist_t *nv)
 {
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	int err = 0;
 
 	for (zap_cursor_init(&zc, mos, propobj);
 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		nvlist_t *propval;
 		zfs_prop_t prop;
 		char buf[ZAP_MAXNAMELEN];
 		char *valstr;
 		const char *suffix;
 		const char *propname;
 		const char *source;
 
 		suffix = strchr(za.za_name, '$');
 
 		if (suffix == NULL) {
 			/*
 			 * Skip local properties if we only want received
 			 * properties.
 			 */
 			if (flags & DSL_PROP_GET_RECEIVED)
 				continue;
 
 			propname = za.za_name;
 			source = setpoint;
 		} else if (strcmp(suffix, ZPROP_INHERIT_SUFFIX) == 0) {
 			/* Skip explicitly inherited entries. */
 			continue;
 		} else if (strcmp(suffix, ZPROP_RECVD_SUFFIX) == 0) {
 			if (flags & DSL_PROP_GET_LOCAL)
 				continue;
 
-			(void) strncpy(buf, za.za_name, (suffix - za.za_name));
-			buf[suffix - za.za_name] = '\0';
+			(void) strlcpy(buf, za.za_name,
+			    MIN(sizeof (buf), suffix - za.za_name + 1));
 			propname = buf;
 
 			if (!(flags & DSL_PROP_GET_RECEIVED)) {
 				/* Skip if locally overridden. */
 				err = zap_contains(mos, propobj, propname);
 				if (err == 0)
 					continue;
 				if (err != ENOENT)
 					break;
 
 				/* Skip if explicitly inherited. */
 				valstr = kmem_asprintf("%s%s", propname,
 				    ZPROP_INHERIT_SUFFIX);
 				err = zap_contains(mos, propobj, valstr);
 				kmem_strfree(valstr);
 				if (err == 0)
 					continue;
 				if (err != ENOENT)
 					break;
 			}
 
 			source = ((flags & DSL_PROP_GET_INHERITING) ?
 			    setpoint : ZPROP_SOURCE_VAL_RECVD);
 		} else {
 			/*
 			 * For backward compatibility, skip suffixes we don't
 			 * recognize.
 			 */
 			continue;
 		}
 
 		prop = zfs_name_to_prop(propname);
 
 		/* Skip non-inheritable properties. */
 		if ((flags & DSL_PROP_GET_INHERITING) &&
 		    prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
 			continue;
 
 		/* Skip properties not valid for this type. */
 		if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_USERPROP &&
 		    !zfs_prop_valid_for_type(prop, ZFS_TYPE_SNAPSHOT, B_FALSE))
 			continue;
 
 		/* Skip properties already defined. */
 		if (nvlist_exists(nv, propname))
 			continue;
 
 		VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		if (za.za_integer_length == 1) {
 			/*
 			 * String property
 			 */
 			char *tmp = kmem_alloc(za.za_num_integers,
 			    KM_SLEEP);
 			err = zap_lookup(mos, propobj,
 			    za.za_name, 1, za.za_num_integers, tmp);
 			if (err != 0) {
 				kmem_free(tmp, za.za_num_integers);
 				break;
 			}
 			VERIFY(nvlist_add_string(propval, ZPROP_VALUE,
 			    tmp) == 0);
 			kmem_free(tmp, za.za_num_integers);
 		} else {
 			/*
 			 * Integer property
 			 */
 			ASSERT(za.za_integer_length == 8);
 			(void) nvlist_add_uint64(propval, ZPROP_VALUE,
 			    za.za_first_integer);
 		}
 
 		VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, source) == 0);
 		VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
 		nvlist_free(propval);
 	}
 	zap_cursor_fini(&zc);
 	if (err == ENOENT)
 		err = 0;
 	return (err);
 }
 
 /*
  * Iterate over all properties for this dataset and return them in an nvlist.
  */
 static int
 dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
     dsl_prop_getflags_t flags)
 {
 	dsl_dir_t *dd = ds->ds_dir;
 	dsl_pool_t *dp = dd->dd_pool;
 	objset_t *mos = dp->dp_meta_objset;
 	int err = 0;
 	char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 
 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	if (ds->ds_is_snapshot)
 		flags |= DSL_PROP_GET_SNAPSHOT;
 
 	ASSERT(dsl_pool_config_held(dp));
 
 	if (dsl_dataset_phys(ds)->ds_props_obj != 0) {
 		ASSERT(flags & DSL_PROP_GET_SNAPSHOT);
 		dsl_dataset_name(ds, setpoint);
 		err = dsl_prop_get_all_impl(mos,
 		    dsl_dataset_phys(ds)->ds_props_obj, setpoint, flags, *nvp);
 		if (err)
 			goto out;
 	}
 
 	for (; dd != NULL; dd = dd->dd_parent) {
 		if (dd != ds->ds_dir || (flags & DSL_PROP_GET_SNAPSHOT)) {
 			if (flags & (DSL_PROP_GET_LOCAL |
 			    DSL_PROP_GET_RECEIVED))
 				break;
 			flags |= DSL_PROP_GET_INHERITING;
 		}
 		dsl_dir_name(dd, setpoint);
 		err = dsl_prop_get_all_impl(mos,
 		    dsl_dir_phys(dd)->dd_props_zapobj, setpoint, flags, *nvp);
 		if (err)
 			break;
 	}
 
 out:
 	if (err) {
 		nvlist_free(*nvp);
 		*nvp = NULL;
 	}
 	return (err);
 }
 
 boolean_t
 dsl_prop_get_hasrecvd(const char *dsname)
 {
 	uint64_t dummy;
 
 	return (0 ==
 	    dsl_prop_get_integer(dsname, ZPROP_HAS_RECVD, &dummy, NULL));
 }
 
 static int
 dsl_prop_set_hasrecvd_impl(const char *dsname, zprop_source_t source)
 {
 	uint64_t version;
 	spa_t *spa;
 	int error = 0;
 
 	VERIFY0(spa_open(dsname, &spa, FTAG));
 	version = spa_version(spa);
 	spa_close(spa, FTAG);
 
 	if (version >= SPA_VERSION_RECVD_PROPS)
 		error = dsl_prop_set_int(dsname, ZPROP_HAS_RECVD, source, 0);
 	return (error);
 }
 
 /*
  * Call after successfully receiving properties to ensure that only the first
  * receive on or after SPA_VERSION_RECVD_PROPS blows away local properties.
  */
 int
 dsl_prop_set_hasrecvd(const char *dsname)
 {
 	int error = 0;
 	if (!dsl_prop_get_hasrecvd(dsname))
 		error = dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_LOCAL);
 	return (error);
 }
 
 void
 dsl_prop_unset_hasrecvd(const char *dsname)
 {
 	VERIFY0(dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_NONE));
 }
 
 int
 dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
 {
 	return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, 0));
 }
 
 int
 dsl_prop_get_received(const char *dsname, nvlist_t **nvp)
 {
 	objset_t *os;
 	int error;
 
 	/*
 	 * Received properties are not distinguishable from local properties
 	 * until the dataset has received properties on or after
 	 * SPA_VERSION_RECVD_PROPS.
 	 */
 	dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(dsname) ?
 	    DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL);
 
 	error = dmu_objset_hold(dsname, FTAG, &os);
 	if (error != 0)
 		return (error);
 	error = dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 void
 dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value)
 {
 	nvlist_t *propval;
 	const char *propname = zfs_prop_to_name(prop);
 	uint64_t default_value;
 
 	if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
 		return;
 	}
 
 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
 	/* Indicate the default source if we can. */
 	if (dodefault(prop, 8, 1, &default_value) == 0 &&
 	    value == default_value) {
 		VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, "") == 0);
 	}
 	VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
 	nvlist_free(propval);
 }
 
 void
 dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
 {
 	nvlist_t *propval;
 	const char *propname = zfs_prop_to_name(prop);
 
 	if (nvlist_lookup_nvlist(nv, propname, &propval) == 0) {
 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
 		return;
 	}
 
 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 	VERIFY(nvlist_add_string(propval, ZPROP_VALUE, value) == 0);
 	VERIFY(nvlist_add_nvlist(nv, propname, propval) == 0);
 	nvlist_free(propval);
 }
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(dsl_prop_register);
 EXPORT_SYMBOL(dsl_prop_unregister);
 EXPORT_SYMBOL(dsl_prop_unregister_all);
 EXPORT_SYMBOL(dsl_prop_get);
 EXPORT_SYMBOL(dsl_prop_get_integer);
 EXPORT_SYMBOL(dsl_prop_get_all);
 EXPORT_SYMBOL(dsl_prop_get_received);
 EXPORT_SYMBOL(dsl_prop_get_ds);
 EXPORT_SYMBOL(dsl_prop_get_int_ds);
 EXPORT_SYMBOL(dsl_prop_get_dd);
 EXPORT_SYMBOL(dsl_props_set);
 EXPORT_SYMBOL(dsl_prop_set_int);
 EXPORT_SYMBOL(dsl_prop_set_string);
 EXPORT_SYMBOL(dsl_prop_inherit);
 EXPORT_SYMBOL(dsl_prop_predict);
 EXPORT_SYMBOL(dsl_prop_nvlist_add_uint64);
 EXPORT_SYMBOL(dsl_prop_nvlist_add_string);
 #endif
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
index decf4ddae6af..daab1d6fce71 100644
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -1,2959 +1,2959 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011, 2019 by Delphix. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2017 Datto Inc.
  * Copyright (c) 2017, Intel Corporation.
  * Copyright (c) 2019, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/zfs_chksum.h>
 #include <sys/spa_impl.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
 #include <sys/zio_compress.h>
 #include <sys/dmu.h>
 #include <sys/dmu_tx.h>
 #include <sys/zap.h>
 #include <sys/zil.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
 #include <sys/vdev_file.h>
 #include <sys/vdev_raidz.h>
 #include <sys/metaslab.h>
 #include <sys/uberblock_impl.h>
 #include <sys/txg.h>
 #include <sys/avl.h>
 #include <sys/unique.h>
 #include <sys/dsl_pool.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_prop.h>
 #include <sys/fm/util.h>
 #include <sys/dsl_scan.h>
 #include <sys/fs/zfs.h>
 #include <sys/metaslab_impl.h>
 #include <sys/arc.h>
 #include <sys/ddt.h>
 #include <sys/kstat.h>
 #include "zfs_prop.h"
 #include <sys/btree.h>
 #include <sys/zfeature.h>
 #include <sys/qat.h>
 #include <sys/zstd/zstd.h>
 
 /*
  * SPA locking
  *
  * There are three basic locks for managing spa_t structures:
  *
  * spa_namespace_lock (global mutex)
  *
  *	This lock must be acquired to do any of the following:
  *
  *		- Lookup a spa_t by name
  *		- Add or remove a spa_t from the namespace
  *		- Increase spa_refcount from non-zero
  *		- Check if spa_refcount is zero
  *		- Rename a spa_t
  *		- add/remove/attach/detach devices
  *		- Held for the duration of create/destroy/import/export
  *
  *	It does not need to handle recursion.  A create or destroy may
  *	reference objects (files or zvols) in other pools, but by
  *	definition they must have an existing reference, and will never need
  *	to lookup a spa_t by name.
  *
  * spa_refcount (per-spa zfs_refcount_t protected by mutex)
  *
  *	This reference count keep track of any active users of the spa_t.  The
  *	spa_t cannot be destroyed or freed while this is non-zero.  Internally,
  *	the refcount is never really 'zero' - opening a pool implicitly keeps
  *	some references in the DMU.  Internally we check against spa_minref, but
  *	present the image of a zero/non-zero value to consumers.
  *
  * spa_config_lock[] (per-spa array of rwlocks)
  *
  *	This protects the spa_t from config changes, and must be held in
  *	the following circumstances:
  *
  *		- RW_READER to perform I/O to the spa
  *		- RW_WRITER to change the vdev config
  *
  * The locking order is fairly straightforward:
  *
  *		spa_namespace_lock	->	spa_refcount
  *
  *	The namespace lock must be acquired to increase the refcount from 0
  *	or to check if it is zero.
  *
  *		spa_refcount		->	spa_config_lock[]
  *
  *	There must be at least one valid reference on the spa_t to acquire
  *	the config lock.
  *
  *		spa_namespace_lock	->	spa_config_lock[]
  *
  *	The namespace lock must always be taken before the config lock.
  *
  *
  * The spa_namespace_lock can be acquired directly and is globally visible.
  *
  * The namespace is manipulated using the following functions, all of which
  * require the spa_namespace_lock to be held.
  *
  *	spa_lookup()		Lookup a spa_t by name.
  *
  *	spa_add()		Create a new spa_t in the namespace.
  *
  *	spa_remove()		Remove a spa_t from the namespace.  This also
  *				frees up any memory associated with the spa_t.
  *
  *	spa_next()		Returns the next spa_t in the system, or the
  *				first if NULL is passed.
  *
  *	spa_evict_all()		Shutdown and remove all spa_t structures in
  *				the system.
  *
  *	spa_guid_exists()	Determine whether a pool/device guid exists.
  *
  * The spa_refcount is manipulated using the following functions:
  *
  *	spa_open_ref()		Adds a reference to the given spa_t.  Must be
  *				called with spa_namespace_lock held if the
  *				refcount is currently zero.
  *
  *	spa_close()		Remove a reference from the spa_t.  This will
  *				not free the spa_t or remove it from the
  *				namespace.  No locking is required.
  *
  *	spa_refcount_zero()	Returns true if the refcount is currently
  *				zero.  Must be called with spa_namespace_lock
  *				held.
  *
  * The spa_config_lock[] is an array of rwlocks, ordered as follows:
  * SCL_CONFIG > SCL_STATE > SCL_ALLOC > SCL_ZIO > SCL_FREE > SCL_VDEV.
  * spa_config_lock[] is manipulated with spa_config_{enter,exit,held}().
  *
  * To read the configuration, it suffices to hold one of these locks as reader.
  * To modify the configuration, you must hold all locks as writer.  To modify
  * vdev state without altering the vdev tree's topology (e.g. online/offline),
  * you must hold SCL_STATE and SCL_ZIO as writer.
  *
  * We use these distinct config locks to avoid recursive lock entry.
  * For example, spa_sync() (which holds SCL_CONFIG as reader) induces
  * block allocations (SCL_ALLOC), which may require reading space maps
  * from disk (dmu_read() -> zio_read() -> SCL_ZIO).
  *
  * The spa config locks cannot be normal rwlocks because we need the
  * ability to hand off ownership.  For example, SCL_ZIO is acquired
  * by the issuing thread and later released by an interrupt thread.
  * They do, however, obey the usual write-wanted semantics to prevent
  * writer (i.e. system administrator) starvation.
  *
  * The lock acquisition rules are as follows:
  *
  * SCL_CONFIG
  *	Protects changes to the vdev tree topology, such as vdev
  *	add/remove/attach/detach.  Protects the dirty config list
  *	(spa_config_dirty_list) and the set of spares and l2arc devices.
  *
  * SCL_STATE
  *	Protects changes to pool state and vdev state, such as vdev
  *	online/offline/fault/degrade/clear.  Protects the dirty state list
  *	(spa_state_dirty_list) and global pool state (spa_state).
  *
  * SCL_ALLOC
  *	Protects changes to metaslab groups and classes.
  *	Held as reader by metaslab_alloc() and metaslab_claim().
  *
  * SCL_ZIO
  *	Held by bp-level zios (those which have no io_vd upon entry)
  *	to prevent changes to the vdev tree.  The bp-level zio implicitly
  *	protects all of its vdev child zios, which do not hold SCL_ZIO.
  *
  * SCL_FREE
  *	Protects changes to metaslab groups and classes.
  *	Held as reader by metaslab_free().  SCL_FREE is distinct from
  *	SCL_ALLOC, and lower than SCL_ZIO, so that we can safely free
  *	blocks in zio_done() while another i/o that holds either
  *	SCL_ALLOC or SCL_ZIO is waiting for this i/o to complete.
  *
  * SCL_VDEV
  *	Held as reader to prevent changes to the vdev tree during trivial
  *	inquiries such as bp_get_dsize().  SCL_VDEV is distinct from the
  *	other locks, and lower than all of them, to ensure that it's safe
  *	to acquire regardless of caller context.
  *
  * In addition, the following rules apply:
  *
  * (a)	spa_props_lock protects pool properties, spa_config and spa_config_list.
  *	The lock ordering is SCL_CONFIG > spa_props_lock.
  *
  * (b)	I/O operations on leaf vdevs.  For any zio operation that takes
  *	an explicit vdev_t argument -- such as zio_ioctl(), zio_read_phys(),
  *	or zio_write_phys() -- the caller must ensure that the config cannot
  *	cannot change in the interim, and that the vdev cannot be reopened.
  *	SCL_STATE as reader suffices for both.
  *
  * The vdev configuration is protected by spa_vdev_enter() / spa_vdev_exit().
  *
  *	spa_vdev_enter()	Acquire the namespace lock and the config lock
  *				for writing.
  *
  *	spa_vdev_exit()		Release the config lock, wait for all I/O
  *				to complete, sync the updated configs to the
  *				cache, and release the namespace lock.
  *
  * vdev state is protected by spa_vdev_state_enter() / spa_vdev_state_exit().
  * Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
  * locking is, always, based on spa_namespace_lock and spa_config_lock[].
  */
 
 static avl_tree_t spa_namespace_avl;
 kmutex_t spa_namespace_lock;
 static kcondvar_t spa_namespace_cv;
 static const int spa_max_replication_override = SPA_DVAS_PER_BP;
 
 static kmutex_t spa_spare_lock;
 static avl_tree_t spa_spare_avl;
 static kmutex_t spa_l2cache_lock;
 static avl_tree_t spa_l2cache_avl;
 
 spa_mode_t spa_mode_global = SPA_MODE_UNINIT;
 
 #ifdef ZFS_DEBUG
 /*
  * Everything except dprintf, set_error, spa, and indirect_remap is on
  * by default in debug builds.
  */
 int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SET_ERROR |
     ZFS_DEBUG_INDIRECT_REMAP);
 #else
 int zfs_flags = 0;
 #endif
 
 /*
  * zfs_recover can be set to nonzero to attempt to recover from
  * otherwise-fatal errors, typically caused by on-disk corruption.  When
  * set, calls to zfs_panic_recover() will turn into warning messages.
  * This should only be used as a last resort, as it typically results
  * in leaked space, or worse.
  */
 int zfs_recover = B_FALSE;
 
 /*
  * If destroy encounters an EIO while reading metadata (e.g. indirect
  * blocks), space referenced by the missing metadata can not be freed.
  * Normally this causes the background destroy to become "stalled", as
  * it is unable to make forward progress.  While in this stalled state,
  * all remaining space to free from the error-encountering filesystem is
  * "temporarily leaked".  Set this flag to cause it to ignore the EIO,
  * permanently leak the space from indirect blocks that can not be read,
  * and continue to free everything else that it can.
  *
  * The default, "stalling" behavior is useful if the storage partially
  * fails (i.e. some but not all i/os fail), and then later recovers.  In
  * this case, we will be able to continue pool operations while it is
  * partially failed, and when it recovers, we can continue to free the
  * space, with no leaks.  However, note that this case is actually
  * fairly rare.
  *
  * Typically pools either (a) fail completely (but perhaps temporarily,
  * e.g. a top-level vdev going offline), or (b) have localized,
  * permanent errors (e.g. disk returns the wrong data due to bit flip or
  * firmware bug).  In case (a), this setting does not matter because the
  * pool will be suspended and the sync thread will not be able to make
  * forward progress regardless.  In case (b), because the error is
  * permanent, the best we can do is leak the minimum amount of space,
  * which is what setting this flag will do.  Therefore, it is reasonable
  * for this flag to normally be set, but we chose the more conservative
  * approach of not setting it, so that there is no possibility of
  * leaking space in the "partial temporary" failure case.
  */
 int zfs_free_leak_on_eio = B_FALSE;
 
 /*
  * Expiration time in milliseconds. This value has two meanings. First it is
  * used to determine when the spa_deadman() logic should fire. By default the
  * spa_deadman() will fire if spa_sync() has not completed in 600 seconds.
  * Secondly, the value determines if an I/O is considered "hung". Any I/O that
  * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
  * in one of three behaviors controlled by zfs_deadman_failmode.
  */
 unsigned long zfs_deadman_synctime_ms = 600000UL;  /* 10 min. */
 
 /*
  * This value controls the maximum amount of time zio_wait() will block for an
  * outstanding IO.  By default this is 300 seconds at which point the "hung"
  * behavior will be applied as described for zfs_deadman_synctime_ms.
  */
 unsigned long zfs_deadman_ziotime_ms = 300000UL;  /* 5 min. */
 
 /*
  * Check time in milliseconds. This defines the frequency at which we check
  * for hung I/O.
  */
 unsigned long zfs_deadman_checktime_ms = 60000UL;  /* 1 min. */
 
 /*
  * By default the deadman is enabled.
  */
 int zfs_deadman_enabled = B_TRUE;
 
 /*
  * Controls the behavior of the deadman when it detects a "hung" I/O.
  * Valid values are zfs_deadman_failmode=<wait|continue|panic>.
  *
  * wait     - Wait for the "hung" I/O (default)
  * continue - Attempt to recover from a "hung" I/O
  * panic    - Panic the system
  */
 const char *zfs_deadman_failmode = "wait";
 
 /*
  * The worst case is single-sector max-parity RAID-Z blocks, in which
  * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
  * times the size; so just assume that.  Add to this the fact that
  * we can have up to 3 DVAs per bp, and one more factor of 2 because
  * the block may be dittoed with up to 3 DVAs by ddt_sync().  All together,
  * the worst case is:
  *     (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24
  */
 int spa_asize_inflation = 24;
 
 /*
  * Normally, we don't allow the last 3.2% (1/(2^spa_slop_shift)) of space in
  * the pool to be consumed (bounded by spa_max_slop).  This ensures that we
  * don't run the pool completely out of space, due to unaccounted changes (e.g.
  * to the MOS).  It also limits the worst-case time to allocate space.  If we
  * have less than this amount of free space, most ZPL operations (e.g.  write,
  * create) will return ENOSPC.  The ZIL metaslabs (spa_embedded_log_class) are
  * also part of this 3.2% of space which can't be consumed by normal writes;
  * the slop space "proper" (spa_get_slop_space()) is decreased by the embedded
  * log space.
  *
  * Certain operations (e.g. file removal, most administrative actions) can
  * use half the slop space.  They will only return ENOSPC if less than half
  * the slop space is free.  Typically, once the pool has less than the slop
  * space free, the user will use these operations to free up space in the pool.
  * These are the operations that call dsl_pool_adjustedsize() with the netfree
  * argument set to TRUE.
  *
  * Operations that are almost guaranteed to free up space in the absence of
  * a pool checkpoint can use up to three quarters of the slop space
  * (e.g zfs destroy).
  *
  * A very restricted set of operations are always permitted, regardless of
  * the amount of free space.  These are the operations that call
  * dsl_sync_task(ZFS_SPACE_CHECK_NONE). If these operations result in a net
  * increase in the amount of space used, it is possible to run the pool
  * completely out of space, causing it to be permanently read-only.
  *
  * Note that on very small pools, the slop space will be larger than
  * 3.2%, in an effort to have it be at least spa_min_slop (128MB),
  * but we never allow it to be more than half the pool size.
  *
  * Further, on very large pools, the slop space will be smaller than
  * 3.2%, to avoid reserving much more space than we actually need; bounded
  * by spa_max_slop (128GB).
  *
  * See also the comments in zfs_space_check_t.
  */
 int spa_slop_shift = 5;
 static const uint64_t spa_min_slop = 128ULL * 1024 * 1024;
 static const uint64_t spa_max_slop = 128ULL * 1024 * 1024 * 1024;
 static const int spa_allocators = 4;
 
 
 void
 spa_load_failed(spa_t *spa, const char *fmt, ...)
 {
 	va_list adx;
 	char buf[256];
 
 	va_start(adx, fmt);
 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
 	va_end(adx);
 
 	zfs_dbgmsg("spa_load(%s, config %s): FAILED: %s", spa->spa_name,
 	    spa->spa_trust_config ? "trusted" : "untrusted", buf);
 }
 
 void
 spa_load_note(spa_t *spa, const char *fmt, ...)
 {
 	va_list adx;
 	char buf[256];
 
 	va_start(adx, fmt);
 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
 	va_end(adx);
 
 	zfs_dbgmsg("spa_load(%s, config %s): %s", spa->spa_name,
 	    spa->spa_trust_config ? "trusted" : "untrusted", buf);
 }
 
 /*
  * By default dedup and user data indirects land in the special class
  */
 static int zfs_ddt_data_is_special = B_TRUE;
 static int zfs_user_indirect_is_special = B_TRUE;
 
 /*
  * The percentage of special class final space reserved for metadata only.
  * Once we allocate 100 - zfs_special_class_metadata_reserve_pct we only
  * let metadata into the class.
  */
 static int zfs_special_class_metadata_reserve_pct = 25;
 
 /*
  * ==========================================================================
  * SPA config locking
  * ==========================================================================
  */
 static void
 spa_config_lock_init(spa_t *spa)
 {
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
 		cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
 		scl->scl_writer = NULL;
 		scl->scl_write_wanted = 0;
 		scl->scl_count = 0;
 	}
 }
 
 static void
 spa_config_lock_destroy(spa_t *spa)
 {
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		mutex_destroy(&scl->scl_lock);
 		cv_destroy(&scl->scl_cv);
 		ASSERT(scl->scl_writer == NULL);
 		ASSERT(scl->scl_write_wanted == 0);
 		ASSERT(scl->scl_count == 0);
 	}
 }
 
 int
 spa_config_tryenter(spa_t *spa, int locks, const void *tag, krw_t rw)
 {
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (!(locks & (1 << i)))
 			continue;
 		mutex_enter(&scl->scl_lock);
 		if (rw == RW_READER) {
 			if (scl->scl_writer || scl->scl_write_wanted) {
 				mutex_exit(&scl->scl_lock);
 				spa_config_exit(spa, locks & ((1 << i) - 1),
 				    tag);
 				return (0);
 			}
 		} else {
 			ASSERT(scl->scl_writer != curthread);
 			if (scl->scl_count != 0) {
 				mutex_exit(&scl->scl_lock);
 				spa_config_exit(spa, locks & ((1 << i) - 1),
 				    tag);
 				return (0);
 			}
 			scl->scl_writer = curthread;
 		}
 		scl->scl_count++;
 		mutex_exit(&scl->scl_lock);
 	}
 	return (1);
 }
 
 void
 spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
 {
 	(void) tag;
 	int wlocks_held = 0;
 
 	ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
 
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (scl->scl_writer == curthread)
 			wlocks_held |= (1 << i);
 		if (!(locks & (1 << i)))
 			continue;
 		mutex_enter(&scl->scl_lock);
 		if (rw == RW_READER) {
 			while (scl->scl_writer || scl->scl_write_wanted) {
 				cv_wait(&scl->scl_cv, &scl->scl_lock);
 			}
 		} else {
 			ASSERT(scl->scl_writer != curthread);
 			while (scl->scl_count != 0) {
 				scl->scl_write_wanted++;
 				cv_wait(&scl->scl_cv, &scl->scl_lock);
 				scl->scl_write_wanted--;
 			}
 			scl->scl_writer = curthread;
 		}
 		scl->scl_count++;
 		mutex_exit(&scl->scl_lock);
 	}
 	ASSERT3U(wlocks_held, <=, locks);
 }
 
 void
 spa_config_exit(spa_t *spa, int locks, const void *tag)
 {
 	(void) tag;
 	for (int i = SCL_LOCKS - 1; i >= 0; i--) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (!(locks & (1 << i)))
 			continue;
 		mutex_enter(&scl->scl_lock);
 		ASSERT(scl->scl_count > 0);
 		if (--scl->scl_count == 0) {
 			ASSERT(scl->scl_writer == NULL ||
 			    scl->scl_writer == curthread);
 			scl->scl_writer = NULL;	/* OK in either case */
 			cv_broadcast(&scl->scl_cv);
 		}
 		mutex_exit(&scl->scl_lock);
 	}
 }
 
 int
 spa_config_held(spa_t *spa, int locks, krw_t rw)
 {
 	int locks_held = 0;
 
 	for (int i = 0; i < SCL_LOCKS; i++) {
 		spa_config_lock_t *scl = &spa->spa_config_lock[i];
 		if (!(locks & (1 << i)))
 			continue;
 		if ((rw == RW_READER && scl->scl_count != 0) ||
 		    (rw == RW_WRITER && scl->scl_writer == curthread))
 			locks_held |= 1 << i;
 	}
 
 	return (locks_held);
 }
 
 /*
  * ==========================================================================
  * SPA namespace functions
  * ==========================================================================
  */
 
 /*
  * Lookup the named spa_t in the AVL tree.  The spa_namespace_lock must be held.
  * Returns NULL if no matching spa_t is found.
  */
 spa_t *
 spa_lookup(const char *name)
 {
 	static spa_t search;	/* spa_t is large; don't allocate on stack */
 	spa_t *spa;
 	avl_index_t where;
 	char *cp;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
 
 	/*
 	 * If it's a full dataset name, figure out the pool name and
 	 * just use that.
 	 */
 	cp = strpbrk(search.spa_name, "/@#");
 	if (cp != NULL)
 		*cp = '\0';
 
 	spa = avl_find(&spa_namespace_avl, &search, &where);
 
 	return (spa);
 }
 
 /*
  * Fires when spa_sync has not completed within zfs_deadman_synctime_ms.
  * If the zfs_deadman_enabled flag is set then it inspects all vdev queues
  * looking for potentially hung I/Os.
  */
 void
 spa_deadman(void *arg)
 {
 	spa_t *spa = arg;
 
 	/* Disable the deadman if the pool is suspended. */
 	if (spa_suspended(spa))
 		return;
 
 	zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
 	    (gethrtime() - spa->spa_sync_starttime) / NANOSEC,
 	    (u_longlong_t)++spa->spa_deadman_calls);
 	if (zfs_deadman_enabled)
 		vdev_deadman(spa->spa_root_vdev, FTAG);
 
 	spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
 	    spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
 	    MSEC_TO_TICK(zfs_deadman_checktime_ms));
 }
 
 static int
 spa_log_sm_sort_by_txg(const void *va, const void *vb)
 {
 	const spa_log_sm_t *a = va;
 	const spa_log_sm_t *b = vb;
 
 	return (TREE_CMP(a->sls_txg, b->sls_txg));
 }
 
 /*
  * Create an uninitialized spa_t with the given name.  Requires
  * spa_namespace_lock.  The caller must ensure that the spa_t doesn't already
  * exist by calling spa_lookup() first.
  */
 spa_t *
 spa_add(const char *name, nvlist_t *config, const char *altroot)
 {
 	spa_t *spa;
 	spa_config_dirent_t *dp;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
 
 	mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_errlist_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_errlog_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_evicting_os_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_cksum_tmpls_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_flushed_ms_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa->spa_activities_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_proc_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_scrub_io_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_suspend_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_activities_cv, NULL, CV_DEFAULT, NULL);
 	cv_init(&spa->spa_waiters_cv, NULL, CV_DEFAULT, NULL);
 
 	for (int t = 0; t < TXG_SIZE; t++)
 		bplist_create(&spa->spa_free_bplist[t]);
 
 	(void) strlcpy(spa->spa_name, name, sizeof (spa->spa_name));
 	spa->spa_state = POOL_STATE_UNINITIALIZED;
 	spa->spa_freeze_txg = UINT64_MAX;
 	spa->spa_final_txg = UINT64_MAX;
 	spa->spa_load_max_txg = UINT64_MAX;
 	spa->spa_proc = &p0;
 	spa->spa_proc_state = SPA_PROC_NONE;
 	spa->spa_trust_config = B_TRUE;
 	spa->spa_hostid = zone_get_hostid(NULL);
 
 	spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
 	spa->spa_deadman_ziotime = MSEC2NSEC(zfs_deadman_ziotime_ms);
 	spa_set_deadman_failmode(spa, zfs_deadman_failmode);
 
 	zfs_refcount_create(&spa->spa_refcount);
 	spa_config_lock_init(spa);
 	spa_stats_init(spa);
 
 	avl_add(&spa_namespace_avl, spa);
 
 	/*
 	 * Set the alternate root, if there is one.
 	 */
 	if (altroot)
 		spa->spa_root = spa_strdup(altroot);
 
 	spa->spa_alloc_count = spa_allocators;
 	spa->spa_allocs = kmem_zalloc(spa->spa_alloc_count *
 	    sizeof (spa_alloc_t), KM_SLEEP);
 	for (int i = 0; i < spa->spa_alloc_count; i++) {
 		mutex_init(&spa->spa_allocs[i].spaa_lock, NULL, MUTEX_DEFAULT,
 		    NULL);
 		avl_create(&spa->spa_allocs[i].spaa_tree, zio_bookmark_compare,
 		    sizeof (zio_t), offsetof(zio_t, io_alloc_node));
 	}
 	avl_create(&spa->spa_metaslabs_by_flushed, metaslab_sort_by_flushed,
 	    sizeof (metaslab_t), offsetof(metaslab_t, ms_spa_txg_node));
 	avl_create(&spa->spa_sm_logs_by_txg, spa_log_sm_sort_by_txg,
 	    sizeof (spa_log_sm_t), offsetof(spa_log_sm_t, sls_node));
 	list_create(&spa->spa_log_summary, sizeof (log_summary_entry_t),
 	    offsetof(log_summary_entry_t, lse_node));
 
 	/*
 	 * Every pool starts with the default cachefile
 	 */
 	list_create(&spa->spa_config_list, sizeof (spa_config_dirent_t),
 	    offsetof(spa_config_dirent_t, scd_link));
 
 	dp = kmem_zalloc(sizeof (spa_config_dirent_t), KM_SLEEP);
 	dp->scd_path = altroot ? NULL : spa_strdup(spa_config_path);
 	list_insert_head(&spa->spa_config_list, dp);
 
 	VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
 	    KM_SLEEP) == 0);
 
 	if (config != NULL) {
 		nvlist_t *features;
 
 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
 		    &features) == 0) {
 			VERIFY(nvlist_dup(features, &spa->spa_label_features,
 			    0) == 0);
 		}
 
 		VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
 	}
 
 	if (spa->spa_label_features == NULL) {
 		VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
 		    KM_SLEEP) == 0);
 	}
 
 	spa->spa_min_ashift = INT_MAX;
 	spa->spa_max_ashift = 0;
 	spa->spa_min_alloc = INT_MAX;
 
 	/* Reset cached value */
 	spa->spa_dedup_dspace = ~0ULL;
 
 	/*
 	 * As a pool is being created, treat all features as disabled by
 	 * setting SPA_FEATURE_DISABLED for all entries in the feature
 	 * refcount cache.
 	 */
 	for (int i = 0; i < SPA_FEATURES; i++) {
 		spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED;
 	}
 
 	list_create(&spa->spa_leaf_list, sizeof (vdev_t),
 	    offsetof(vdev_t, vdev_leaf_node));
 
 	return (spa);
 }
 
 /*
  * Removes a spa_t from the namespace, freeing up any memory used.  Requires
  * spa_namespace_lock.  This is called only after the spa_t has been closed and
  * deactivated.
  */
 void
 spa_remove(spa_t *spa)
 {
 	spa_config_dirent_t *dp;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 	ASSERT(spa_state(spa) == POOL_STATE_UNINITIALIZED);
 	ASSERT3U(zfs_refcount_count(&spa->spa_refcount), ==, 0);
 	ASSERT0(spa->spa_waiters);
 
 	nvlist_free(spa->spa_config_splitting);
 
 	avl_remove(&spa_namespace_avl, spa);
 	cv_broadcast(&spa_namespace_cv);
 
 	if (spa->spa_root)
 		spa_strfree(spa->spa_root);
 
 	while ((dp = list_head(&spa->spa_config_list)) != NULL) {
 		list_remove(&spa->spa_config_list, dp);
 		if (dp->scd_path != NULL)
 			spa_strfree(dp->scd_path);
 		kmem_free(dp, sizeof (spa_config_dirent_t));
 	}
 
 	for (int i = 0; i < spa->spa_alloc_count; i++) {
 		avl_destroy(&spa->spa_allocs[i].spaa_tree);
 		mutex_destroy(&spa->spa_allocs[i].spaa_lock);
 	}
 	kmem_free(spa->spa_allocs, spa->spa_alloc_count *
 	    sizeof (spa_alloc_t));
 
 	avl_destroy(&spa->spa_metaslabs_by_flushed);
 	avl_destroy(&spa->spa_sm_logs_by_txg);
 	list_destroy(&spa->spa_log_summary);
 	list_destroy(&spa->spa_config_list);
 	list_destroy(&spa->spa_leaf_list);
 
 	nvlist_free(spa->spa_label_features);
 	nvlist_free(spa->spa_load_info);
 	nvlist_free(spa->spa_feat_stats);
 	spa_config_set(spa, NULL);
 
 	zfs_refcount_destroy(&spa->spa_refcount);
 
 	spa_stats_destroy(spa);
 	spa_config_lock_destroy(spa);
 
 	for (int t = 0; t < TXG_SIZE; t++)
 		bplist_destroy(&spa->spa_free_bplist[t]);
 
 	zio_checksum_templates_free(spa);
 
 	cv_destroy(&spa->spa_async_cv);
 	cv_destroy(&spa->spa_evicting_os_cv);
 	cv_destroy(&spa->spa_proc_cv);
 	cv_destroy(&spa->spa_scrub_io_cv);
 	cv_destroy(&spa->spa_suspend_cv);
 	cv_destroy(&spa->spa_activities_cv);
 	cv_destroy(&spa->spa_waiters_cv);
 
 	mutex_destroy(&spa->spa_flushed_ms_lock);
 	mutex_destroy(&spa->spa_async_lock);
 	mutex_destroy(&spa->spa_errlist_lock);
 	mutex_destroy(&spa->spa_errlog_lock);
 	mutex_destroy(&spa->spa_evicting_os_lock);
 	mutex_destroy(&spa->spa_history_lock);
 	mutex_destroy(&spa->spa_proc_lock);
 	mutex_destroy(&spa->spa_props_lock);
 	mutex_destroy(&spa->spa_cksum_tmpls_lock);
 	mutex_destroy(&spa->spa_scrub_lock);
 	mutex_destroy(&spa->spa_suspend_lock);
 	mutex_destroy(&spa->spa_vdev_top_lock);
 	mutex_destroy(&spa->spa_feat_stats_lock);
 	mutex_destroy(&spa->spa_activities_lock);
 
 	kmem_free(spa, sizeof (spa_t));
 }
 
 /*
  * Given a pool, return the next pool in the namespace, or NULL if there is
  * none.  If 'prev' is NULL, return the first pool.
  */
 spa_t *
 spa_next(spa_t *prev)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	if (prev)
 		return (AVL_NEXT(&spa_namespace_avl, prev));
 	else
 		return (avl_first(&spa_namespace_avl));
 }
 
 /*
  * ==========================================================================
  * SPA refcount functions
  * ==========================================================================
  */
 
 /*
  * Add a reference to the given spa_t.  Must have at least one reference, or
  * have the namespace lock held.
  */
 void
 spa_open_ref(spa_t *spa, const void *tag)
 {
 	ASSERT(zfs_refcount_count(&spa->spa_refcount) >= spa->spa_minref ||
 	    MUTEX_HELD(&spa_namespace_lock));
 	(void) zfs_refcount_add(&spa->spa_refcount, tag);
 }
 
 /*
  * Remove a reference to the given spa_t.  Must have at least one reference, or
  * have the namespace lock held.
  */
 void
 spa_close(spa_t *spa, const void *tag)
 {
 	ASSERT(zfs_refcount_count(&spa->spa_refcount) > spa->spa_minref ||
 	    MUTEX_HELD(&spa_namespace_lock));
 	(void) zfs_refcount_remove(&spa->spa_refcount, tag);
 }
 
 /*
  * Remove a reference to the given spa_t held by a dsl dir that is
  * being asynchronously released.  Async releases occur from a taskq
  * performing eviction of dsl datasets and dirs.  The namespace lock
  * isn't held and the hold by the object being evicted may contribute to
  * spa_minref (e.g. dataset or directory released during pool export),
  * so the asserts in spa_close() do not apply.
  */
 void
 spa_async_close(spa_t *spa, const void *tag)
 {
 	(void) zfs_refcount_remove(&spa->spa_refcount, tag);
 }
 
 /*
  * Check to see if the spa refcount is zero.  Must be called with
  * spa_namespace_lock held.  We really compare against spa_minref, which is the
  * number of references acquired when opening a pool
  */
 boolean_t
 spa_refcount_zero(spa_t *spa)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	return (zfs_refcount_count(&spa->spa_refcount) == spa->spa_minref);
 }
 
 /*
  * ==========================================================================
  * SPA spare and l2cache tracking
  * ==========================================================================
  */
 
 /*
  * Hot spares and cache devices are tracked using the same code below,
  * for 'auxiliary' devices.
  */
 
 typedef struct spa_aux {
 	uint64_t	aux_guid;
 	uint64_t	aux_pool;
 	avl_node_t	aux_avl;
 	int		aux_count;
 } spa_aux_t;
 
 static inline int
 spa_aux_compare(const void *a, const void *b)
 {
 	const spa_aux_t *sa = (const spa_aux_t *)a;
 	const spa_aux_t *sb = (const spa_aux_t *)b;
 
 	return (TREE_CMP(sa->aux_guid, sb->aux_guid));
 }
 
 static void
 spa_aux_add(vdev_t *vd, avl_tree_t *avl)
 {
 	avl_index_t where;
 	spa_aux_t search;
 	spa_aux_t *aux;
 
 	search.aux_guid = vd->vdev_guid;
 	if ((aux = avl_find(avl, &search, &where)) != NULL) {
 		aux->aux_count++;
 	} else {
 		aux = kmem_zalloc(sizeof (spa_aux_t), KM_SLEEP);
 		aux->aux_guid = vd->vdev_guid;
 		aux->aux_count = 1;
 		avl_insert(avl, aux, where);
 	}
 }
 
 static void
 spa_aux_remove(vdev_t *vd, avl_tree_t *avl)
 {
 	spa_aux_t search;
 	spa_aux_t *aux;
 	avl_index_t where;
 
 	search.aux_guid = vd->vdev_guid;
 	aux = avl_find(avl, &search, &where);
 
 	ASSERT(aux != NULL);
 
 	if (--aux->aux_count == 0) {
 		avl_remove(avl, aux);
 		kmem_free(aux, sizeof (spa_aux_t));
 	} else if (aux->aux_pool == spa_guid(vd->vdev_spa)) {
 		aux->aux_pool = 0ULL;
 	}
 }
 
 static boolean_t
 spa_aux_exists(uint64_t guid, uint64_t *pool, int *refcnt, avl_tree_t *avl)
 {
 	spa_aux_t search, *found;
 
 	search.aux_guid = guid;
 	found = avl_find(avl, &search, NULL);
 
 	if (pool) {
 		if (found)
 			*pool = found->aux_pool;
 		else
 			*pool = 0ULL;
 	}
 
 	if (refcnt) {
 		if (found)
 			*refcnt = found->aux_count;
 		else
 			*refcnt = 0;
 	}
 
 	return (found != NULL);
 }
 
 static void
 spa_aux_activate(vdev_t *vd, avl_tree_t *avl)
 {
 	spa_aux_t search, *found;
 	avl_index_t where;
 
 	search.aux_guid = vd->vdev_guid;
 	found = avl_find(avl, &search, &where);
 	ASSERT(found != NULL);
 	ASSERT(found->aux_pool == 0ULL);
 
 	found->aux_pool = spa_guid(vd->vdev_spa);
 }
 
 /*
  * Spares are tracked globally due to the following constraints:
  *
  *	- A spare may be part of multiple pools.
  *	- A spare may be added to a pool even if it's actively in use within
  *	  another pool.
  *	- A spare in use in any pool can only be the source of a replacement if
  *	  the target is a spare in the same pool.
  *
  * We keep track of all spares on the system through the use of a reference
  * counted AVL tree.  When a vdev is added as a spare, or used as a replacement
  * spare, then we bump the reference count in the AVL tree.  In addition, we set
  * the 'vdev_isspare' member to indicate that the device is a spare (active or
  * inactive).  When a spare is made active (used to replace a device in the
  * pool), we also keep track of which pool its been made a part of.
  *
  * The 'spa_spare_lock' protects the AVL tree.  These functions are normally
  * called under the spa_namespace lock as part of vdev reconfiguration.  The
  * separate spare lock exists for the status query path, which does not need to
  * be completely consistent with respect to other vdev configuration changes.
  */
 
 static int
 spa_spare_compare(const void *a, const void *b)
 {
 	return (spa_aux_compare(a, b));
 }
 
 void
 spa_spare_add(vdev_t *vd)
 {
 	mutex_enter(&spa_spare_lock);
 	ASSERT(!vd->vdev_isspare);
 	spa_aux_add(vd, &spa_spare_avl);
 	vd->vdev_isspare = B_TRUE;
 	mutex_exit(&spa_spare_lock);
 }
 
 void
 spa_spare_remove(vdev_t *vd)
 {
 	mutex_enter(&spa_spare_lock);
 	ASSERT(vd->vdev_isspare);
 	spa_aux_remove(vd, &spa_spare_avl);
 	vd->vdev_isspare = B_FALSE;
 	mutex_exit(&spa_spare_lock);
 }
 
 boolean_t
 spa_spare_exists(uint64_t guid, uint64_t *pool, int *refcnt)
 {
 	boolean_t found;
 
 	mutex_enter(&spa_spare_lock);
 	found = spa_aux_exists(guid, pool, refcnt, &spa_spare_avl);
 	mutex_exit(&spa_spare_lock);
 
 	return (found);
 }
 
 void
 spa_spare_activate(vdev_t *vd)
 {
 	mutex_enter(&spa_spare_lock);
 	ASSERT(vd->vdev_isspare);
 	spa_aux_activate(vd, &spa_spare_avl);
 	mutex_exit(&spa_spare_lock);
 }
 
 /*
  * Level 2 ARC devices are tracked globally for the same reasons as spares.
  * Cache devices currently only support one pool per cache device, and so
  * for these devices the aux reference count is currently unused beyond 1.
  */
 
 static int
 spa_l2cache_compare(const void *a, const void *b)
 {
 	return (spa_aux_compare(a, b));
 }
 
 void
 spa_l2cache_add(vdev_t *vd)
 {
 	mutex_enter(&spa_l2cache_lock);
 	ASSERT(!vd->vdev_isl2cache);
 	spa_aux_add(vd, &spa_l2cache_avl);
 	vd->vdev_isl2cache = B_TRUE;
 	mutex_exit(&spa_l2cache_lock);
 }
 
 void
 spa_l2cache_remove(vdev_t *vd)
 {
 	mutex_enter(&spa_l2cache_lock);
 	ASSERT(vd->vdev_isl2cache);
 	spa_aux_remove(vd, &spa_l2cache_avl);
 	vd->vdev_isl2cache = B_FALSE;
 	mutex_exit(&spa_l2cache_lock);
 }
 
 boolean_t
 spa_l2cache_exists(uint64_t guid, uint64_t *pool)
 {
 	boolean_t found;
 
 	mutex_enter(&spa_l2cache_lock);
 	found = spa_aux_exists(guid, pool, NULL, &spa_l2cache_avl);
 	mutex_exit(&spa_l2cache_lock);
 
 	return (found);
 }
 
 void
 spa_l2cache_activate(vdev_t *vd)
 {
 	mutex_enter(&spa_l2cache_lock);
 	ASSERT(vd->vdev_isl2cache);
 	spa_aux_activate(vd, &spa_l2cache_avl);
 	mutex_exit(&spa_l2cache_lock);
 }
 
 /*
  * ==========================================================================
  * SPA vdev locking
  * ==========================================================================
  */
 
 /*
  * Lock the given spa_t for the purpose of adding or removing a vdev.
  * Grabs the global spa_namespace_lock plus the spa config lock for writing.
  * It returns the next transaction group for the spa_t.
  */
 uint64_t
 spa_vdev_enter(spa_t *spa)
 {
 	mutex_enter(&spa->spa_vdev_top_lock);
 	mutex_enter(&spa_namespace_lock);
 
 	vdev_autotrim_stop_all(spa);
 
 	return (spa_vdev_config_enter(spa));
 }
 
 /*
  * The same as spa_vdev_enter() above but additionally takes the guid of
  * the vdev being detached.  When there is a rebuild in process it will be
  * suspended while the vdev tree is modified then resumed by spa_vdev_exit().
  * The rebuild is canceled if only a single child remains after the detach.
  */
 uint64_t
 spa_vdev_detach_enter(spa_t *spa, uint64_t guid)
 {
 	mutex_enter(&spa->spa_vdev_top_lock);
 	mutex_enter(&spa_namespace_lock);
 
 	vdev_autotrim_stop_all(spa);
 
 	if (guid != 0) {
 		vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE);
 		if (vd) {
 			vdev_rebuild_stop_wait(vd->vdev_top);
 		}
 	}
 
 	return (spa_vdev_config_enter(spa));
 }
 
 /*
  * Internal implementation for spa_vdev_enter().  Used when a vdev
  * operation requires multiple syncs (i.e. removing a device) while
  * keeping the spa_namespace_lock held.
  */
 uint64_t
 spa_vdev_config_enter(spa_t *spa)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
 
 	return (spa_last_synced_txg(spa) + 1);
 }
 
 /*
  * Used in combination with spa_vdev_config_enter() to allow the syncing
  * of multiple transactions without releasing the spa_namespace_lock.
  */
 void
 spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error,
     const char *tag)
 {
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	int config_changed = B_FALSE;
 
 	ASSERT(txg > spa_last_synced_txg(spa));
 
 	spa->spa_pending_vdev = NULL;
 
 	/*
 	 * Reassess the DTLs.
 	 */
 	vdev_dtl_reassess(spa->spa_root_vdev, 0, 0, B_FALSE, B_FALSE);
 
 	if (error == 0 && !list_is_empty(&spa->spa_config_dirty_list)) {
 		config_changed = B_TRUE;
 		spa->spa_config_generation++;
 	}
 
 	/*
 	 * Verify the metaslab classes.
 	 */
 	ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
 	ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
 	ASSERT(metaslab_class_validate(spa_embedded_log_class(spa)) == 0);
 	ASSERT(metaslab_class_validate(spa_special_class(spa)) == 0);
 	ASSERT(metaslab_class_validate(spa_dedup_class(spa)) == 0);
 
 	spa_config_exit(spa, SCL_ALL, spa);
 
 	/*
 	 * Panic the system if the specified tag requires it.  This
 	 * is useful for ensuring that configurations are updated
 	 * transactionally.
 	 */
 	if (zio_injection_enabled)
 		zio_handle_panic_injection(spa, tag, 0);
 
 	/*
 	 * Note: this txg_wait_synced() is important because it ensures
 	 * that there won't be more than one config change per txg.
 	 * This allows us to use the txg as the generation number.
 	 */
 	if (error == 0)
 		txg_wait_synced(spa->spa_dsl_pool, txg);
 
 	if (vd != NULL) {
 		ASSERT(!vd->vdev_detached || vd->vdev_dtl_sm == NULL);
 		if (vd->vdev_ops->vdev_op_leaf) {
 			mutex_enter(&vd->vdev_initialize_lock);
 			vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED,
 			    NULL);
 			mutex_exit(&vd->vdev_initialize_lock);
 
 			mutex_enter(&vd->vdev_trim_lock);
 			vdev_trim_stop(vd, VDEV_TRIM_CANCELED, NULL);
 			mutex_exit(&vd->vdev_trim_lock);
 		}
 
 		/*
 		 * The vdev may be both a leaf and top-level device.
 		 */
 		vdev_autotrim_stop_wait(vd);
 
 		spa_config_enter(spa, SCL_STATE_ALL, spa, RW_WRITER);
 		vdev_free(vd);
 		spa_config_exit(spa, SCL_STATE_ALL, spa);
 	}
 
 	/*
 	 * If the config changed, update the config cache.
 	 */
 	if (config_changed)
 		spa_write_cachefile(spa, B_FALSE, B_TRUE);
 }
 
 /*
  * Unlock the spa_t after adding or removing a vdev.  Besides undoing the
  * locking of spa_vdev_enter(), we also want make sure the transactions have
  * synced to disk, and then update the global configuration cache with the new
  * information.
  */
 int
 spa_vdev_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error)
 {
 	vdev_autotrim_restart(spa);
 	vdev_rebuild_restart(spa);
 
 	spa_vdev_config_exit(spa, vd, txg, error, FTAG);
 	mutex_exit(&spa_namespace_lock);
 	mutex_exit(&spa->spa_vdev_top_lock);
 
 	return (error);
 }
 
 /*
  * Lock the given spa_t for the purpose of changing vdev state.
  */
 void
 spa_vdev_state_enter(spa_t *spa, int oplocks)
 {
 	int locks = SCL_STATE_ALL | oplocks;
 
 	/*
 	 * Root pools may need to read of the underlying devfs filesystem
 	 * when opening up a vdev.  Unfortunately if we're holding the
 	 * SCL_ZIO lock it will result in a deadlock when we try to issue
 	 * the read from the root filesystem.  Instead we "prefetch"
 	 * the associated vnodes that we need prior to opening the
 	 * underlying devices and cache them so that we can prevent
 	 * any I/O when we are doing the actual open.
 	 */
 	if (spa_is_root(spa)) {
 		int low = locks & ~(SCL_ZIO - 1);
 		int high = locks & ~low;
 
 		spa_config_enter(spa, high, spa, RW_WRITER);
 		vdev_hold(spa->spa_root_vdev);
 		spa_config_enter(spa, low, spa, RW_WRITER);
 	} else {
 		spa_config_enter(spa, locks, spa, RW_WRITER);
 	}
 	spa->spa_vdev_locks = locks;
 }
 
 int
 spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
 {
 	boolean_t config_changed = B_FALSE;
 	vdev_t *vdev_top;
 
 	if (vd == NULL || vd == spa->spa_root_vdev) {
 		vdev_top = spa->spa_root_vdev;
 	} else {
 		vdev_top = vd->vdev_top;
 	}
 
 	if (vd != NULL || error == 0)
 		vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE, B_FALSE);
 
 	if (vd != NULL) {
 		if (vd != spa->spa_root_vdev)
 			vdev_state_dirty(vdev_top);
 
 		config_changed = B_TRUE;
 		spa->spa_config_generation++;
 	}
 
 	if (spa_is_root(spa))
 		vdev_rele(spa->spa_root_vdev);
 
 	ASSERT3U(spa->spa_vdev_locks, >=, SCL_STATE_ALL);
 	spa_config_exit(spa, spa->spa_vdev_locks, spa);
 
 	/*
 	 * If anything changed, wait for it to sync.  This ensures that,
 	 * from the system administrator's perspective, zpool(8) commands
 	 * are synchronous.  This is important for things like zpool offline:
 	 * when the command completes, you expect no further I/O from ZFS.
 	 */
 	if (vd != NULL)
 		txg_wait_synced(spa->spa_dsl_pool, 0);
 
 	/*
 	 * If the config changed, update the config cache.
 	 */
 	if (config_changed) {
 		mutex_enter(&spa_namespace_lock);
 		spa_write_cachefile(spa, B_FALSE, B_TRUE);
 		mutex_exit(&spa_namespace_lock);
 	}
 
 	return (error);
 }
 
 /*
  * ==========================================================================
  * Miscellaneous functions
  * ==========================================================================
  */
 
 void
 spa_activate_mos_feature(spa_t *spa, const char *feature, dmu_tx_t *tx)
 {
 	if (!nvlist_exists(spa->spa_label_features, feature)) {
 		fnvlist_add_boolean(spa->spa_label_features, feature);
 		/*
 		 * When we are creating the pool (tx_txg==TXG_INITIAL), we can't
 		 * dirty the vdev config because lock SCL_CONFIG is not held.
 		 * Thankfully, in this case we don't need to dirty the config
 		 * because it will be written out anyway when we finish
 		 * creating the pool.
 		 */
 		if (tx->tx_txg != TXG_INITIAL)
 			vdev_config_dirty(spa->spa_root_vdev);
 	}
 }
 
 void
 spa_deactivate_mos_feature(spa_t *spa, const char *feature)
 {
 	if (nvlist_remove_all(spa->spa_label_features, feature) == 0)
 		vdev_config_dirty(spa->spa_root_vdev);
 }
 
 /*
  * Return the spa_t associated with given pool_guid, if it exists.  If
  * device_guid is non-zero, determine whether the pool exists *and* contains
  * a device with the specified device_guid.
  */
 spa_t *
 spa_by_guid(uint64_t pool_guid, uint64_t device_guid)
 {
 	spa_t *spa;
 	avl_tree_t *t = &spa_namespace_avl;
 
 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
 
 	for (spa = avl_first(t); spa != NULL; spa = AVL_NEXT(t, spa)) {
 		if (spa->spa_state == POOL_STATE_UNINITIALIZED)
 			continue;
 		if (spa->spa_root_vdev == NULL)
 			continue;
 		if (spa_guid(spa) == pool_guid) {
 			if (device_guid == 0)
 				break;
 
 			if (vdev_lookup_by_guid(spa->spa_root_vdev,
 			    device_guid) != NULL)
 				break;
 
 			/*
 			 * Check any devices we may be in the process of adding.
 			 */
 			if (spa->spa_pending_vdev) {
 				if (vdev_lookup_by_guid(spa->spa_pending_vdev,
 				    device_guid) != NULL)
 					break;
 			}
 		}
 	}
 
 	return (spa);
 }
 
 /*
  * Determine whether a pool with the given pool_guid exists.
  */
 boolean_t
 spa_guid_exists(uint64_t pool_guid, uint64_t device_guid)
 {
 	return (spa_by_guid(pool_guid, device_guid) != NULL);
 }
 
 char *
 spa_strdup(const char *s)
 {
 	size_t len;
 	char *new;
 
 	len = strlen(s);
 	new = kmem_alloc(len + 1, KM_SLEEP);
 	memcpy(new, s, len + 1);
 
 	return (new);
 }
 
 void
 spa_strfree(char *s)
 {
 	kmem_free(s, strlen(s) + 1);
 }
 
 uint64_t
 spa_generate_guid(spa_t *spa)
 {
 	uint64_t guid;
 
 	if (spa != NULL) {
 		do {
 			(void) random_get_pseudo_bytes((void *)&guid,
 			    sizeof (guid));
 		} while (guid == 0 || spa_guid_exists(spa_guid(spa), guid));
 	} else {
 		do {
 			(void) random_get_pseudo_bytes((void *)&guid,
 			    sizeof (guid));
 		} while (guid == 0 || spa_guid_exists(guid, 0));
 	}
 
 	return (guid);
 }
 
 void
 snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
 {
 	char type[256];
 	const char *checksum = NULL;
 	const char *compress = NULL;
 
 	if (bp != NULL) {
 		if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
 			dmu_object_byteswap_t bswap =
 			    DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
 			(void) snprintf(type, sizeof (type), "bswap %s %s",
 			    DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ?
 			    "metadata" : "data",
 			    dmu_ot_byteswap[bswap].ob_name);
 		} else {
 			(void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
 			    sizeof (type));
 		}
 		if (!BP_IS_EMBEDDED(bp)) {
 			checksum =
 			    zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
 		}
 		compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
 	}
 
 	SNPRINTF_BLKPTR(snprintf, ' ', buf, buflen, bp, type, checksum,
 	    compress);
 }
 
 void
 spa_freeze(spa_t *spa)
 {
 	uint64_t freeze_txg = 0;
 
 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
 	if (spa->spa_freeze_txg == UINT64_MAX) {
 		freeze_txg = spa_last_synced_txg(spa) + TXG_SIZE;
 		spa->spa_freeze_txg = freeze_txg;
 	}
 	spa_config_exit(spa, SCL_ALL, FTAG);
 	if (freeze_txg != 0)
 		txg_wait_synced(spa_get_dsl(spa), freeze_txg);
 }
 
 void
 zfs_panic_recover(const char *fmt, ...)
 {
 	va_list adx;
 
 	va_start(adx, fmt);
 	vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
 	va_end(adx);
 }
 
 /*
  * This is a stripped-down version of strtoull, suitable only for converting
  * lowercase hexadecimal numbers that don't overflow.
  */
 uint64_t
 zfs_strtonum(const char *str, char **nptr)
 {
 	uint64_t val = 0;
 	char c;
 	int digit;
 
 	while ((c = *str) != '\0') {
 		if (c >= '0' && c <= '9')
 			digit = c - '0';
 		else if (c >= 'a' && c <= 'f')
 			digit = 10 + c - 'a';
 		else
 			break;
 
 		val *= 16;
 		val += digit;
 
 		str++;
 	}
 
 	if (nptr)
 		*nptr = (char *)str;
 
 	return (val);
 }
 
 void
 spa_activate_allocation_classes(spa_t *spa, dmu_tx_t *tx)
 {
 	/*
 	 * We bump the feature refcount for each special vdev added to the pool
 	 */
 	ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_ALLOCATION_CLASSES));
 	spa_feature_incr(spa, SPA_FEATURE_ALLOCATION_CLASSES, tx);
 }
 
 /*
  * ==========================================================================
  * Accessor functions
  * ==========================================================================
  */
 
 boolean_t
 spa_shutting_down(spa_t *spa)
 {
 	return (spa->spa_async_suspended);
 }
 
 dsl_pool_t *
 spa_get_dsl(spa_t *spa)
 {
 	return (spa->spa_dsl_pool);
 }
 
 boolean_t
 spa_is_initializing(spa_t *spa)
 {
 	return (spa->spa_is_initializing);
 }
 
 boolean_t
 spa_indirect_vdevs_loaded(spa_t *spa)
 {
 	return (spa->spa_indirect_vdevs_loaded);
 }
 
 blkptr_t *
 spa_get_rootblkptr(spa_t *spa)
 {
 	return (&spa->spa_ubsync.ub_rootbp);
 }
 
 void
 spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp)
 {
 	spa->spa_uberblock.ub_rootbp = *bp;
 }
 
 void
 spa_altroot(spa_t *spa, char *buf, size_t buflen)
 {
 	if (spa->spa_root == NULL)
 		buf[0] = '\0';
 	else
-		(void) strncpy(buf, spa->spa_root, buflen);
+		(void) strlcpy(buf, spa->spa_root, buflen);
 }
 
 int
 spa_sync_pass(spa_t *spa)
 {
 	return (spa->spa_sync_pass);
 }
 
 char *
 spa_name(spa_t *spa)
 {
 	return (spa->spa_name);
 }
 
 uint64_t
 spa_guid(spa_t *spa)
 {
 	dsl_pool_t *dp = spa_get_dsl(spa);
 	uint64_t guid;
 
 	/*
 	 * If we fail to parse the config during spa_load(), we can go through
 	 * the error path (which posts an ereport) and end up here with no root
 	 * vdev.  We stash the original pool guid in 'spa_config_guid' to handle
 	 * this case.
 	 */
 	if (spa->spa_root_vdev == NULL)
 		return (spa->spa_config_guid);
 
 	guid = spa->spa_last_synced_guid != 0 ?
 	    spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid;
 
 	/*
 	 * Return the most recently synced out guid unless we're
 	 * in syncing context.
 	 */
 	if (dp && dsl_pool_sync_context(dp))
 		return (spa->spa_root_vdev->vdev_guid);
 	else
 		return (guid);
 }
 
 uint64_t
 spa_load_guid(spa_t *spa)
 {
 	/*
 	 * This is a GUID that exists solely as a reference for the
 	 * purposes of the arc.  It is generated at load time, and
 	 * is never written to persistent storage.
 	 */
 	return (spa->spa_load_guid);
 }
 
 uint64_t
 spa_last_synced_txg(spa_t *spa)
 {
 	return (spa->spa_ubsync.ub_txg);
 }
 
 uint64_t
 spa_first_txg(spa_t *spa)
 {
 	return (spa->spa_first_txg);
 }
 
 uint64_t
 spa_syncing_txg(spa_t *spa)
 {
 	return (spa->spa_syncing_txg);
 }
 
 /*
  * Return the last txg where data can be dirtied. The final txgs
  * will be used to just clear out any deferred frees that remain.
  */
 uint64_t
 spa_final_dirty_txg(spa_t *spa)
 {
 	return (spa->spa_final_txg - TXG_DEFER_SIZE);
 }
 
 pool_state_t
 spa_state(spa_t *spa)
 {
 	return (spa->spa_state);
 }
 
 spa_load_state_t
 spa_load_state(spa_t *spa)
 {
 	return (spa->spa_load_state);
 }
 
 uint64_t
 spa_freeze_txg(spa_t *spa)
 {
 	return (spa->spa_freeze_txg);
 }
 
 /*
  * Return the inflated asize for a logical write in bytes. This is used by the
  * DMU to calculate the space a logical write will require on disk.
  * If lsize is smaller than the largest physical block size allocatable on this
  * pool we use its value instead, since the write will end up using the whole
  * block anyway.
  */
 uint64_t
 spa_get_worst_case_asize(spa_t *spa, uint64_t lsize)
 {
 	if (lsize == 0)
 		return (0);	/* No inflation needed */
 	return (MAX(lsize, 1 << spa->spa_max_ashift) * spa_asize_inflation);
 }
 
 /*
  * Return the amount of slop space in bytes.  It is typically 1/32 of the pool
  * (3.2%), minus the embedded log space.  On very small pools, it may be
  * slightly larger than this.  On very large pools, it will be capped to
  * the value of spa_max_slop.  The embedded log space is not included in
  * spa_dspace.  By subtracting it, the usable space (per "zfs list") is a
  * constant 97% of the total space, regardless of metaslab size (assuming the
  * default spa_slop_shift=5 and a non-tiny pool).
  *
  * See the comment above spa_slop_shift for more details.
  */
 uint64_t
 spa_get_slop_space(spa_t *spa)
 {
 	uint64_t space = 0;
 	uint64_t slop = 0;
 
 	/*
 	 * Make sure spa_dedup_dspace has been set.
 	 */
 	if (spa->spa_dedup_dspace == ~0ULL)
 		spa_update_dspace(spa);
 
 	/*
 	 * spa_get_dspace() includes the space only logically "used" by
 	 * deduplicated data, so since it's not useful to reserve more
 	 * space with more deduplicated data, we subtract that out here.
 	 */
 	space = spa_get_dspace(spa) - spa->spa_dedup_dspace;
 	slop = MIN(space >> spa_slop_shift, spa_max_slop);
 
 	/*
 	 * Subtract the embedded log space, but no more than half the (3.2%)
 	 * unusable space.  Note, the "no more than half" is only relevant if
 	 * zfs_embedded_slog_min_ms >> spa_slop_shift < 2, which is not true by
 	 * default.
 	 */
 	uint64_t embedded_log =
 	    metaslab_class_get_dspace(spa_embedded_log_class(spa));
 	slop -= MIN(embedded_log, slop >> 1);
 
 	/*
 	 * Slop space should be at least spa_min_slop, but no more than half
 	 * the entire pool.
 	 */
 	slop = MAX(slop, MIN(space >> 1, spa_min_slop));
 	return (slop);
 }
 
 uint64_t
 spa_get_dspace(spa_t *spa)
 {
 	return (spa->spa_dspace);
 }
 
 uint64_t
 spa_get_checkpoint_space(spa_t *spa)
 {
 	return (spa->spa_checkpoint_info.sci_dspace);
 }
 
 void
 spa_update_dspace(spa_t *spa)
 {
 	spa->spa_dspace = metaslab_class_get_dspace(spa_normal_class(spa)) +
 	    ddt_get_dedup_dspace(spa);
 	if (spa->spa_nonallocating_dspace > 0) {
 		/*
 		 * Subtract the space provided by all non-allocating vdevs that
 		 * contribute to dspace.  If a file is overwritten, its old
 		 * blocks are freed and new blocks are allocated.  If there are
 		 * no snapshots of the file, the available space should remain
 		 * the same.  The old blocks could be freed from the
 		 * non-allocating vdev, but the new blocks must be allocated on
 		 * other (allocating) vdevs.  By reserving the entire size of
 		 * the non-allocating vdevs (including allocated space), we
 		 * ensure that there will be enough space on the allocating
 		 * vdevs for this file overwrite to succeed.
 		 *
 		 * Note that the DMU/DSL doesn't actually know or care
 		 * how much space is allocated (it does its own tracking
 		 * of how much space has been logically used).  So it
 		 * doesn't matter that the data we are moving may be
 		 * allocated twice (on the old device and the new device).
 		 */
 		ASSERT3U(spa->spa_dspace, >=, spa->spa_nonallocating_dspace);
 		spa->spa_dspace -= spa->spa_nonallocating_dspace;
 	}
 }
 
 /*
  * Return the failure mode that has been set to this pool. The default
  * behavior will be to block all I/Os when a complete failure occurs.
  */
 uint64_t
 spa_get_failmode(spa_t *spa)
 {
 	return (spa->spa_failmode);
 }
 
 boolean_t
 spa_suspended(spa_t *spa)
 {
 	return (spa->spa_suspended != ZIO_SUSPEND_NONE);
 }
 
 uint64_t
 spa_version(spa_t *spa)
 {
 	return (spa->spa_ubsync.ub_version);
 }
 
 boolean_t
 spa_deflate(spa_t *spa)
 {
 	return (spa->spa_deflate);
 }
 
 metaslab_class_t *
 spa_normal_class(spa_t *spa)
 {
 	return (spa->spa_normal_class);
 }
 
 metaslab_class_t *
 spa_log_class(spa_t *spa)
 {
 	return (spa->spa_log_class);
 }
 
 metaslab_class_t *
 spa_embedded_log_class(spa_t *spa)
 {
 	return (spa->spa_embedded_log_class);
 }
 
 metaslab_class_t *
 spa_special_class(spa_t *spa)
 {
 	return (spa->spa_special_class);
 }
 
 metaslab_class_t *
 spa_dedup_class(spa_t *spa)
 {
 	return (spa->spa_dedup_class);
 }
 
 /*
  * Locate an appropriate allocation class
  */
 metaslab_class_t *
 spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype,
     uint_t level, uint_t special_smallblk)
 {
 	/*
 	 * ZIL allocations determine their class in zio_alloc_zil().
 	 */
 	ASSERT(objtype != DMU_OT_INTENT_LOG);
 
 	boolean_t has_special_class = spa->spa_special_class->mc_groups != 0;
 
 	if (DMU_OT_IS_DDT(objtype)) {
 		if (spa->spa_dedup_class->mc_groups != 0)
 			return (spa_dedup_class(spa));
 		else if (has_special_class && zfs_ddt_data_is_special)
 			return (spa_special_class(spa));
 		else
 			return (spa_normal_class(spa));
 	}
 
 	/* Indirect blocks for user data can land in special if allowed */
 	if (level > 0 && (DMU_OT_IS_FILE(objtype) || objtype == DMU_OT_ZVOL)) {
 		if (has_special_class && zfs_user_indirect_is_special)
 			return (spa_special_class(spa));
 		else
 			return (spa_normal_class(spa));
 	}
 
 	if (DMU_OT_IS_METADATA(objtype) || level > 0) {
 		if (has_special_class)
 			return (spa_special_class(spa));
 		else
 			return (spa_normal_class(spa));
 	}
 
 	/*
 	 * Allow small file blocks in special class in some cases (like
 	 * for the dRAID vdev feature). But always leave a reserve of
 	 * zfs_special_class_metadata_reserve_pct exclusively for metadata.
 	 */
 	if (DMU_OT_IS_FILE(objtype) &&
 	    has_special_class && size <= special_smallblk) {
 		metaslab_class_t *special = spa_special_class(spa);
 		uint64_t alloc = metaslab_class_get_alloc(special);
 		uint64_t space = metaslab_class_get_space(special);
 		uint64_t limit =
 		    (space * (100 - zfs_special_class_metadata_reserve_pct))
 		    / 100;
 
 		if (alloc < limit)
 			return (special);
 	}
 
 	return (spa_normal_class(spa));
 }
 
 void
 spa_evicting_os_register(spa_t *spa, objset_t *os)
 {
 	mutex_enter(&spa->spa_evicting_os_lock);
 	list_insert_head(&spa->spa_evicting_os_list, os);
 	mutex_exit(&spa->spa_evicting_os_lock);
 }
 
 void
 spa_evicting_os_deregister(spa_t *spa, objset_t *os)
 {
 	mutex_enter(&spa->spa_evicting_os_lock);
 	list_remove(&spa->spa_evicting_os_list, os);
 	cv_broadcast(&spa->spa_evicting_os_cv);
 	mutex_exit(&spa->spa_evicting_os_lock);
 }
 
 void
 spa_evicting_os_wait(spa_t *spa)
 {
 	mutex_enter(&spa->spa_evicting_os_lock);
 	while (!list_is_empty(&spa->spa_evicting_os_list))
 		cv_wait(&spa->spa_evicting_os_cv, &spa->spa_evicting_os_lock);
 	mutex_exit(&spa->spa_evicting_os_lock);
 
 	dmu_buf_user_evict_wait();
 }
 
 int
 spa_max_replication(spa_t *spa)
 {
 	/*
 	 * As of SPA_VERSION == SPA_VERSION_DITTO_BLOCKS, we are able to
 	 * handle BPs with more than one DVA allocated.  Set our max
 	 * replication level accordingly.
 	 */
 	if (spa_version(spa) < SPA_VERSION_DITTO_BLOCKS)
 		return (1);
 	return (MIN(SPA_DVAS_PER_BP, spa_max_replication_override));
 }
 
 int
 spa_prev_software_version(spa_t *spa)
 {
 	return (spa->spa_prev_software_version);
 }
 
 uint64_t
 spa_deadman_synctime(spa_t *spa)
 {
 	return (spa->spa_deadman_synctime);
 }
 
 spa_autotrim_t
 spa_get_autotrim(spa_t *spa)
 {
 	return (spa->spa_autotrim);
 }
 
 uint64_t
 spa_deadman_ziotime(spa_t *spa)
 {
 	return (spa->spa_deadman_ziotime);
 }
 
 uint64_t
 spa_get_deadman_failmode(spa_t *spa)
 {
 	return (spa->spa_deadman_failmode);
 }
 
 void
 spa_set_deadman_failmode(spa_t *spa, const char *failmode)
 {
 	if (strcmp(failmode, "wait") == 0)
 		spa->spa_deadman_failmode = ZIO_FAILURE_MODE_WAIT;
 	else if (strcmp(failmode, "continue") == 0)
 		spa->spa_deadman_failmode = ZIO_FAILURE_MODE_CONTINUE;
 	else if (strcmp(failmode, "panic") == 0)
 		spa->spa_deadman_failmode = ZIO_FAILURE_MODE_PANIC;
 	else
 		spa->spa_deadman_failmode = ZIO_FAILURE_MODE_WAIT;
 }
 
 void
 spa_set_deadman_ziotime(hrtime_t ns)
 {
 	spa_t *spa = NULL;
 
 	if (spa_mode_global != SPA_MODE_UNINIT) {
 		mutex_enter(&spa_namespace_lock);
 		while ((spa = spa_next(spa)) != NULL)
 			spa->spa_deadman_ziotime = ns;
 		mutex_exit(&spa_namespace_lock);
 	}
 }
 
 void
 spa_set_deadman_synctime(hrtime_t ns)
 {
 	spa_t *spa = NULL;
 
 	if (spa_mode_global != SPA_MODE_UNINIT) {
 		mutex_enter(&spa_namespace_lock);
 		while ((spa = spa_next(spa)) != NULL)
 			spa->spa_deadman_synctime = ns;
 		mutex_exit(&spa_namespace_lock);
 	}
 }
 
 uint64_t
 dva_get_dsize_sync(spa_t *spa, const dva_t *dva)
 {
 	uint64_t asize = DVA_GET_ASIZE(dva);
 	uint64_t dsize = asize;
 
 	ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
 
 	if (asize != 0 && spa->spa_deflate) {
 		vdev_t *vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
 		if (vd != NULL)
 			dsize = (asize >> SPA_MINBLOCKSHIFT) *
 			    vd->vdev_deflate_ratio;
 	}
 
 	return (dsize);
 }
 
 uint64_t
 bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp)
 {
 	uint64_t dsize = 0;
 
 	for (int d = 0; d < BP_GET_NDVAS(bp); d++)
 		dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
 
 	return (dsize);
 }
 
 uint64_t
 bp_get_dsize(spa_t *spa, const blkptr_t *bp)
 {
 	uint64_t dsize = 0;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 
 	for (int d = 0; d < BP_GET_NDVAS(bp); d++)
 		dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
 
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	return (dsize);
 }
 
 uint64_t
 spa_dirty_data(spa_t *spa)
 {
 	return (spa->spa_dsl_pool->dp_dirty_total);
 }
 
 /*
  * ==========================================================================
  * SPA Import Progress Routines
  * ==========================================================================
  */
 
 typedef struct spa_import_progress {
 	uint64_t		pool_guid;	/* unique id for updates */
 	char			*pool_name;
 	spa_load_state_t	spa_load_state;
 	uint64_t		mmp_sec_remaining;	/* MMP activity check */
 	uint64_t		spa_load_max_txg;	/* rewind txg */
 	procfs_list_node_t	smh_node;
 } spa_import_progress_t;
 
 spa_history_list_t *spa_import_progress_list = NULL;
 
 static int
 spa_import_progress_show_header(struct seq_file *f)
 {
 	seq_printf(f, "%-20s %-14s %-14s %-12s %s\n", "pool_guid",
 	    "load_state", "multihost_secs", "max_txg",
 	    "pool_name");
 	return (0);
 }
 
 static int
 spa_import_progress_show(struct seq_file *f, void *data)
 {
 	spa_import_progress_t *sip = (spa_import_progress_t *)data;
 
 	seq_printf(f, "%-20llu %-14llu %-14llu %-12llu %s\n",
 	    (u_longlong_t)sip->pool_guid, (u_longlong_t)sip->spa_load_state,
 	    (u_longlong_t)sip->mmp_sec_remaining,
 	    (u_longlong_t)sip->spa_load_max_txg,
 	    (sip->pool_name ? sip->pool_name : "-"));
 
 	return (0);
 }
 
 /* Remove oldest elements from list until there are no more than 'size' left */
 static void
 spa_import_progress_truncate(spa_history_list_t *shl, unsigned int size)
 {
 	spa_import_progress_t *sip;
 	while (shl->size > size) {
 		sip = list_remove_head(&shl->procfs_list.pl_list);
 		if (sip->pool_name)
 			spa_strfree(sip->pool_name);
 		kmem_free(sip, sizeof (spa_import_progress_t));
 		shl->size--;
 	}
 
 	IMPLY(size == 0, list_is_empty(&shl->procfs_list.pl_list));
 }
 
 static void
 spa_import_progress_init(void)
 {
 	spa_import_progress_list = kmem_zalloc(sizeof (spa_history_list_t),
 	    KM_SLEEP);
 
 	spa_import_progress_list->size = 0;
 
 	spa_import_progress_list->procfs_list.pl_private =
 	    spa_import_progress_list;
 
 	procfs_list_install("zfs",
 	    NULL,
 	    "import_progress",
 	    0644,
 	    &spa_import_progress_list->procfs_list,
 	    spa_import_progress_show,
 	    spa_import_progress_show_header,
 	    NULL,
 	    offsetof(spa_import_progress_t, smh_node));
 }
 
 static void
 spa_import_progress_destroy(void)
 {
 	spa_history_list_t *shl = spa_import_progress_list;
 	procfs_list_uninstall(&shl->procfs_list);
 	spa_import_progress_truncate(shl, 0);
 	procfs_list_destroy(&shl->procfs_list);
 	kmem_free(shl, sizeof (spa_history_list_t));
 }
 
 int
 spa_import_progress_set_state(uint64_t pool_guid,
     spa_load_state_t load_state)
 {
 	spa_history_list_t *shl = spa_import_progress_list;
 	spa_import_progress_t *sip;
 	int error = ENOENT;
 
 	if (shl->size == 0)
 		return (0);
 
 	mutex_enter(&shl->procfs_list.pl_lock);
 	for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
 	    sip = list_prev(&shl->procfs_list.pl_list, sip)) {
 		if (sip->pool_guid == pool_guid) {
 			sip->spa_load_state = load_state;
 			error = 0;
 			break;
 		}
 	}
 	mutex_exit(&shl->procfs_list.pl_lock);
 
 	return (error);
 }
 
 int
 spa_import_progress_set_max_txg(uint64_t pool_guid, uint64_t load_max_txg)
 {
 	spa_history_list_t *shl = spa_import_progress_list;
 	spa_import_progress_t *sip;
 	int error = ENOENT;
 
 	if (shl->size == 0)
 		return (0);
 
 	mutex_enter(&shl->procfs_list.pl_lock);
 	for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
 	    sip = list_prev(&shl->procfs_list.pl_list, sip)) {
 		if (sip->pool_guid == pool_guid) {
 			sip->spa_load_max_txg = load_max_txg;
 			error = 0;
 			break;
 		}
 	}
 	mutex_exit(&shl->procfs_list.pl_lock);
 
 	return (error);
 }
 
 int
 spa_import_progress_set_mmp_check(uint64_t pool_guid,
     uint64_t mmp_sec_remaining)
 {
 	spa_history_list_t *shl = spa_import_progress_list;
 	spa_import_progress_t *sip;
 	int error = ENOENT;
 
 	if (shl->size == 0)
 		return (0);
 
 	mutex_enter(&shl->procfs_list.pl_lock);
 	for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
 	    sip = list_prev(&shl->procfs_list.pl_list, sip)) {
 		if (sip->pool_guid == pool_guid) {
 			sip->mmp_sec_remaining = mmp_sec_remaining;
 			error = 0;
 			break;
 		}
 	}
 	mutex_exit(&shl->procfs_list.pl_lock);
 
 	return (error);
 }
 
 /*
  * A new import is in progress, add an entry.
  */
 void
 spa_import_progress_add(spa_t *spa)
 {
 	spa_history_list_t *shl = spa_import_progress_list;
 	spa_import_progress_t *sip;
 	char *poolname = NULL;
 
 	sip = kmem_zalloc(sizeof (spa_import_progress_t), KM_SLEEP);
 	sip->pool_guid = spa_guid(spa);
 
 	(void) nvlist_lookup_string(spa->spa_config, ZPOOL_CONFIG_POOL_NAME,
 	    &poolname);
 	if (poolname == NULL)
 		poolname = spa_name(spa);
 	sip->pool_name = spa_strdup(poolname);
 	sip->spa_load_state = spa_load_state(spa);
 
 	mutex_enter(&shl->procfs_list.pl_lock);
 	procfs_list_add(&shl->procfs_list, sip);
 	shl->size++;
 	mutex_exit(&shl->procfs_list.pl_lock);
 }
 
 void
 spa_import_progress_remove(uint64_t pool_guid)
 {
 	spa_history_list_t *shl = spa_import_progress_list;
 	spa_import_progress_t *sip;
 
 	mutex_enter(&shl->procfs_list.pl_lock);
 	for (sip = list_tail(&shl->procfs_list.pl_list); sip != NULL;
 	    sip = list_prev(&shl->procfs_list.pl_list, sip)) {
 		if (sip->pool_guid == pool_guid) {
 			if (sip->pool_name)
 				spa_strfree(sip->pool_name);
 			list_remove(&shl->procfs_list.pl_list, sip);
 			shl->size--;
 			kmem_free(sip, sizeof (spa_import_progress_t));
 			break;
 		}
 	}
 	mutex_exit(&shl->procfs_list.pl_lock);
 }
 
 /*
  * ==========================================================================
  * Initialization and Termination
  * ==========================================================================
  */
 
 static int
 spa_name_compare(const void *a1, const void *a2)
 {
 	const spa_t *s1 = a1;
 	const spa_t *s2 = a2;
 	int s;
 
 	s = strcmp(s1->spa_name, s2->spa_name);
 
 	return (TREE_ISIGN(s));
 }
 
 void
 spa_boot_init(void)
 {
 	spa_config_load();
 }
 
 void
 spa_init(spa_mode_t mode)
 {
 	mutex_init(&spa_namespace_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa_spare_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&spa_l2cache_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&spa_namespace_cv, NULL, CV_DEFAULT, NULL);
 
 	avl_create(&spa_namespace_avl, spa_name_compare, sizeof (spa_t),
 	    offsetof(spa_t, spa_avl));
 
 	avl_create(&spa_spare_avl, spa_spare_compare, sizeof (spa_aux_t),
 	    offsetof(spa_aux_t, aux_avl));
 
 	avl_create(&spa_l2cache_avl, spa_l2cache_compare, sizeof (spa_aux_t),
 	    offsetof(spa_aux_t, aux_avl));
 
 	spa_mode_global = mode;
 
 #ifndef _KERNEL
 	if (spa_mode_global != SPA_MODE_READ && dprintf_find_string("watch")) {
 		struct sigaction sa;
 
 		sa.sa_flags = SA_SIGINFO;
 		sigemptyset(&sa.sa_mask);
 		sa.sa_sigaction = arc_buf_sigsegv;
 
 		if (sigaction(SIGSEGV, &sa, NULL) == -1) {
 			perror("could not enable watchpoints: "
 			    "sigaction(SIGSEGV, ...) = ");
 		} else {
 			arc_watch = B_TRUE;
 		}
 	}
 #endif
 
 	fm_init();
 	zfs_refcount_init();
 	unique_init();
 	zfs_btree_init();
 	metaslab_stat_init();
 	ddt_init();
 	zio_init();
 	dmu_init();
 	zil_init();
 	vdev_cache_stat_init();
 	vdev_mirror_stat_init();
 	vdev_raidz_math_init();
 	vdev_file_init();
 	zfs_prop_init();
 	chksum_init();
 	zpool_prop_init();
 	zpool_feature_init();
 	spa_config_load();
 	vdev_prop_init();
 	l2arc_start();
 	scan_init();
 	qat_init();
 	spa_import_progress_init();
 }
 
 void
 spa_fini(void)
 {
 	l2arc_stop();
 
 	spa_evict_all();
 
 	vdev_file_fini();
 	vdev_cache_stat_fini();
 	vdev_mirror_stat_fini();
 	vdev_raidz_math_fini();
 	chksum_fini();
 	zil_fini();
 	dmu_fini();
 	zio_fini();
 	ddt_fini();
 	metaslab_stat_fini();
 	zfs_btree_fini();
 	unique_fini();
 	zfs_refcount_fini();
 	fm_fini();
 	scan_fini();
 	qat_fini();
 	spa_import_progress_destroy();
 
 	avl_destroy(&spa_namespace_avl);
 	avl_destroy(&spa_spare_avl);
 	avl_destroy(&spa_l2cache_avl);
 
 	cv_destroy(&spa_namespace_cv);
 	mutex_destroy(&spa_namespace_lock);
 	mutex_destroy(&spa_spare_lock);
 	mutex_destroy(&spa_l2cache_lock);
 }
 
 /*
  * Return whether this pool has a dedicated slog device. No locking needed.
  * It's not a problem if the wrong answer is returned as it's only for
  * performance and not correctness.
  */
 boolean_t
 spa_has_slogs(spa_t *spa)
 {
 	return (spa->spa_log_class->mc_groups != 0);
 }
 
 spa_log_state_t
 spa_get_log_state(spa_t *spa)
 {
 	return (spa->spa_log_state);
 }
 
 void
 spa_set_log_state(spa_t *spa, spa_log_state_t state)
 {
 	spa->spa_log_state = state;
 }
 
 boolean_t
 spa_is_root(spa_t *spa)
 {
 	return (spa->spa_is_root);
 }
 
 boolean_t
 spa_writeable(spa_t *spa)
 {
 	return (!!(spa->spa_mode & SPA_MODE_WRITE) && spa->spa_trust_config);
 }
 
 /*
  * Returns true if there is a pending sync task in any of the current
  * syncing txg, the current quiescing txg, or the current open txg.
  */
 boolean_t
 spa_has_pending_synctask(spa_t *spa)
 {
 	return (!txg_all_lists_empty(&spa->spa_dsl_pool->dp_sync_tasks) ||
 	    !txg_all_lists_empty(&spa->spa_dsl_pool->dp_early_sync_tasks));
 }
 
 spa_mode_t
 spa_mode(spa_t *spa)
 {
 	return (spa->spa_mode);
 }
 
 uint64_t
 spa_bootfs(spa_t *spa)
 {
 	return (spa->spa_bootfs);
 }
 
 uint64_t
 spa_delegation(spa_t *spa)
 {
 	return (spa->spa_delegation);
 }
 
 objset_t *
 spa_meta_objset(spa_t *spa)
 {
 	return (spa->spa_meta_objset);
 }
 
 enum zio_checksum
 spa_dedup_checksum(spa_t *spa)
 {
 	return (spa->spa_dedup_checksum);
 }
 
 /*
  * Reset pool scan stat per scan pass (or reboot).
  */
 void
 spa_scan_stat_init(spa_t *spa)
 {
 	/* data not stored on disk */
 	spa->spa_scan_pass_start = gethrestime_sec();
 	if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan))
 		spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start;
 	else
 		spa->spa_scan_pass_scrub_pause = 0;
 	spa->spa_scan_pass_scrub_spent_paused = 0;
 	spa->spa_scan_pass_exam = 0;
 	spa->spa_scan_pass_issued = 0;
 	vdev_scan_stat_init(spa->spa_root_vdev);
 }
 
 /*
  * Get scan stats for zpool status reports
  */
 int
 spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
 {
 	dsl_scan_t *scn = spa->spa_dsl_pool ? spa->spa_dsl_pool->dp_scan : NULL;
 
 	if (scn == NULL || scn->scn_phys.scn_func == POOL_SCAN_NONE)
 		return (SET_ERROR(ENOENT));
 	memset(ps, 0, sizeof (pool_scan_stat_t));
 
 	/* data stored on disk */
 	ps->pss_func = scn->scn_phys.scn_func;
 	ps->pss_state = scn->scn_phys.scn_state;
 	ps->pss_start_time = scn->scn_phys.scn_start_time;
 	ps->pss_end_time = scn->scn_phys.scn_end_time;
 	ps->pss_to_examine = scn->scn_phys.scn_to_examine;
 	ps->pss_examined = scn->scn_phys.scn_examined;
 	ps->pss_to_process = scn->scn_phys.scn_to_process;
 	ps->pss_processed = scn->scn_phys.scn_processed;
 	ps->pss_errors = scn->scn_phys.scn_errors;
 
 	/* data not stored on disk */
 	ps->pss_pass_exam = spa->spa_scan_pass_exam;
 	ps->pss_pass_start = spa->spa_scan_pass_start;
 	ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause;
 	ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused;
 	ps->pss_pass_issued = spa->spa_scan_pass_issued;
 	ps->pss_issued =
 	    scn->scn_issued_before_pass + spa->spa_scan_pass_issued;
 
 	return (0);
 }
 
 int
 spa_maxblocksize(spa_t *spa)
 {
 	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS))
 		return (SPA_MAXBLOCKSIZE);
 	else
 		return (SPA_OLD_MAXBLOCKSIZE);
 }
 
 
 /*
  * Returns the txg that the last device removal completed. No indirect mappings
  * have been added since this txg.
  */
 uint64_t
 spa_get_last_removal_txg(spa_t *spa)
 {
 	uint64_t vdevid;
 	uint64_t ret = -1ULL;
 
 	spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
 	/*
 	 * sr_prev_indirect_vdev is only modified while holding all the
 	 * config locks, so it is sufficient to hold SCL_VDEV as reader when
 	 * examining it.
 	 */
 	vdevid = spa->spa_removing_phys.sr_prev_indirect_vdev;
 
 	while (vdevid != -1ULL) {
 		vdev_t *vd = vdev_lookup_top(spa, vdevid);
 		vdev_indirect_births_t *vib = vd->vdev_indirect_births;
 
 		ASSERT3P(vd->vdev_ops, ==, &vdev_indirect_ops);
 
 		/*
 		 * If the removal did not remap any data, we don't care.
 		 */
 		if (vdev_indirect_births_count(vib) != 0) {
 			ret = vdev_indirect_births_last_entry_txg(vib);
 			break;
 		}
 
 		vdevid = vd->vdev_indirect_config.vic_prev_indirect_vdev;
 	}
 	spa_config_exit(spa, SCL_VDEV, FTAG);
 
 	IMPLY(ret != -1ULL,
 	    spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REMOVAL));
 
 	return (ret);
 }
 
 int
 spa_maxdnodesize(spa_t *spa)
 {
 	if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE))
 		return (DNODE_MAX_SIZE);
 	else
 		return (DNODE_MIN_SIZE);
 }
 
 boolean_t
 spa_multihost(spa_t *spa)
 {
 	return (spa->spa_multihost ? B_TRUE : B_FALSE);
 }
 
 uint32_t
 spa_get_hostid(spa_t *spa)
 {
 	return (spa->spa_hostid);
 }
 
 boolean_t
 spa_trust_config(spa_t *spa)
 {
 	return (spa->spa_trust_config);
 }
 
 uint64_t
 spa_missing_tvds_allowed(spa_t *spa)
 {
 	return (spa->spa_missing_tvds_allowed);
 }
 
 space_map_t *
 spa_syncing_log_sm(spa_t *spa)
 {
 	return (spa->spa_syncing_log_sm);
 }
 
 void
 spa_set_missing_tvds(spa_t *spa, uint64_t missing)
 {
 	spa->spa_missing_tvds = missing;
 }
 
 /*
  * Return the pool state string ("ONLINE", "DEGRADED", "SUSPENDED", etc).
  */
 const char *
 spa_state_to_name(spa_t *spa)
 {
 	ASSERT3P(spa, !=, NULL);
 
 	/*
 	 * it is possible for the spa to exist, without root vdev
 	 * as the spa transitions during import/export
 	 */
 	vdev_t *rvd = spa->spa_root_vdev;
 	if (rvd == NULL) {
 		return ("TRANSITIONING");
 	}
 	vdev_state_t state = rvd->vdev_state;
 	vdev_aux_t aux = rvd->vdev_stat.vs_aux;
 
 	if (spa_suspended(spa) &&
 	    (spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE))
 		return ("SUSPENDED");
 
 	switch (state) {
 	case VDEV_STATE_CLOSED:
 	case VDEV_STATE_OFFLINE:
 		return ("OFFLINE");
 	case VDEV_STATE_REMOVED:
 		return ("REMOVED");
 	case VDEV_STATE_CANT_OPEN:
 		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
 			return ("FAULTED");
 		else if (aux == VDEV_AUX_SPLIT_POOL)
 			return ("SPLIT");
 		else
 			return ("UNAVAIL");
 	case VDEV_STATE_FAULTED:
 		return ("FAULTED");
 	case VDEV_STATE_DEGRADED:
 		return ("DEGRADED");
 	case VDEV_STATE_HEALTHY:
 		return ("ONLINE");
 	default:
 		break;
 	}
 
 	return ("UNKNOWN");
 }
 
 boolean_t
 spa_top_vdevs_spacemap_addressable(spa_t *spa)
 {
 	vdev_t *rvd = spa->spa_root_vdev;
 	for (uint64_t c = 0; c < rvd->vdev_children; c++) {
 		if (!vdev_is_spacemap_addressable(rvd->vdev_child[c]))
 			return (B_FALSE);
 	}
 	return (B_TRUE);
 }
 
 boolean_t
 spa_has_checkpoint(spa_t *spa)
 {
 	return (spa->spa_checkpoint_txg != 0);
 }
 
 boolean_t
 spa_importing_readonly_checkpoint(spa_t *spa)
 {
 	return ((spa->spa_import_flags & ZFS_IMPORT_CHECKPOINT) &&
 	    spa->spa_mode == SPA_MODE_READ);
 }
 
 uint64_t
 spa_min_claim_txg(spa_t *spa)
 {
 	uint64_t checkpoint_txg = spa->spa_uberblock.ub_checkpoint_txg;
 
 	if (checkpoint_txg != 0)
 		return (checkpoint_txg + 1);
 
 	return (spa->spa_first_txg);
 }
 
 /*
  * If there is a checkpoint, async destroys may consume more space from
  * the pool instead of freeing it. In an attempt to save the pool from
  * getting suspended when it is about to run out of space, we stop
  * processing async destroys.
  */
 boolean_t
 spa_suspend_async_destroy(spa_t *spa)
 {
 	dsl_pool_t *dp = spa_get_dsl(spa);
 
 	uint64_t unreserved = dsl_pool_unreserved_space(dp,
 	    ZFS_SPACE_CHECK_EXTRA_RESERVED);
 	uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes;
 	uint64_t avail = (unreserved > used) ? (unreserved - used) : 0;
 
 	if (spa_has_checkpoint(spa) && avail == 0)
 		return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 #if defined(_KERNEL)
 
 int
 param_set_deadman_failmode_common(const char *val)
 {
 	spa_t *spa = NULL;
 	char *p;
 
 	if (val == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((p = strchr(val, '\n')) != NULL)
 		*p = '\0';
 
 	if (strcmp(val, "wait") != 0 && strcmp(val, "continue") != 0 &&
 	    strcmp(val, "panic"))
 		return (SET_ERROR(EINVAL));
 
 	if (spa_mode_global != SPA_MODE_UNINIT) {
 		mutex_enter(&spa_namespace_lock);
 		while ((spa = spa_next(spa)) != NULL)
 			spa_set_deadman_failmode(spa, val);
 		mutex_exit(&spa_namespace_lock);
 	}
 
 	return (0);
 }
 #endif
 
 /* Namespace manipulation */
 EXPORT_SYMBOL(spa_lookup);
 EXPORT_SYMBOL(spa_add);
 EXPORT_SYMBOL(spa_remove);
 EXPORT_SYMBOL(spa_next);
 
 /* Refcount functions */
 EXPORT_SYMBOL(spa_open_ref);
 EXPORT_SYMBOL(spa_close);
 EXPORT_SYMBOL(spa_refcount_zero);
 
 /* Pool configuration lock */
 EXPORT_SYMBOL(spa_config_tryenter);
 EXPORT_SYMBOL(spa_config_enter);
 EXPORT_SYMBOL(spa_config_exit);
 EXPORT_SYMBOL(spa_config_held);
 
 /* Pool vdev add/remove lock */
 EXPORT_SYMBOL(spa_vdev_enter);
 EXPORT_SYMBOL(spa_vdev_exit);
 
 /* Pool vdev state change lock */
 EXPORT_SYMBOL(spa_vdev_state_enter);
 EXPORT_SYMBOL(spa_vdev_state_exit);
 
 /* Accessor functions */
 EXPORT_SYMBOL(spa_shutting_down);
 EXPORT_SYMBOL(spa_get_dsl);
 EXPORT_SYMBOL(spa_get_rootblkptr);
 EXPORT_SYMBOL(spa_set_rootblkptr);
 EXPORT_SYMBOL(spa_altroot);
 EXPORT_SYMBOL(spa_sync_pass);
 EXPORT_SYMBOL(spa_name);
 EXPORT_SYMBOL(spa_guid);
 EXPORT_SYMBOL(spa_last_synced_txg);
 EXPORT_SYMBOL(spa_first_txg);
 EXPORT_SYMBOL(spa_syncing_txg);
 EXPORT_SYMBOL(spa_version);
 EXPORT_SYMBOL(spa_state);
 EXPORT_SYMBOL(spa_load_state);
 EXPORT_SYMBOL(spa_freeze_txg);
 EXPORT_SYMBOL(spa_get_dspace);
 EXPORT_SYMBOL(spa_update_dspace);
 EXPORT_SYMBOL(spa_deflate);
 EXPORT_SYMBOL(spa_normal_class);
 EXPORT_SYMBOL(spa_log_class);
 EXPORT_SYMBOL(spa_special_class);
 EXPORT_SYMBOL(spa_preferred_class);
 EXPORT_SYMBOL(spa_max_replication);
 EXPORT_SYMBOL(spa_prev_software_version);
 EXPORT_SYMBOL(spa_get_failmode);
 EXPORT_SYMBOL(spa_suspended);
 EXPORT_SYMBOL(spa_bootfs);
 EXPORT_SYMBOL(spa_delegation);
 EXPORT_SYMBOL(spa_meta_objset);
 EXPORT_SYMBOL(spa_maxblocksize);
 EXPORT_SYMBOL(spa_maxdnodesize);
 
 /* Miscellaneous support routines */
 EXPORT_SYMBOL(spa_guid_exists);
 EXPORT_SYMBOL(spa_strdup);
 EXPORT_SYMBOL(spa_strfree);
 EXPORT_SYMBOL(spa_generate_guid);
 EXPORT_SYMBOL(snprintf_blkptr);
 EXPORT_SYMBOL(spa_freeze);
 EXPORT_SYMBOL(spa_upgrade);
 EXPORT_SYMBOL(spa_evict_all);
 EXPORT_SYMBOL(spa_lookup_by_guid);
 EXPORT_SYMBOL(spa_has_spare);
 EXPORT_SYMBOL(dva_get_dsize_sync);
 EXPORT_SYMBOL(bp_get_dsize_sync);
 EXPORT_SYMBOL(bp_get_dsize);
 EXPORT_SYMBOL(spa_has_slogs);
 EXPORT_SYMBOL(spa_is_root);
 EXPORT_SYMBOL(spa_writeable);
 EXPORT_SYMBOL(spa_mode);
 EXPORT_SYMBOL(spa_namespace_lock);
 EXPORT_SYMBOL(spa_trust_config);
 EXPORT_SYMBOL(spa_missing_tvds_allowed);
 EXPORT_SYMBOL(spa_set_missing_tvds);
 EXPORT_SYMBOL(spa_state_to_name);
 EXPORT_SYMBOL(spa_importing_readonly_checkpoint);
 EXPORT_SYMBOL(spa_min_claim_txg);
 EXPORT_SYMBOL(spa_suspend_async_destroy);
 EXPORT_SYMBOL(spa_has_checkpoint);
 EXPORT_SYMBOL(spa_top_vdevs_spacemap_addressable);
 
 ZFS_MODULE_PARAM(zfs, zfs_, flags, UINT, ZMOD_RW,
 	"Set additional debugging flags");
 
 ZFS_MODULE_PARAM(zfs, zfs_, recover, INT, ZMOD_RW,
 	"Set to attempt to recover from fatal errors");
 
 ZFS_MODULE_PARAM(zfs, zfs_, free_leak_on_eio, INT, ZMOD_RW,
 	"Set to ignore IO errors during free and permanently leak the space");
 
 ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, checktime_ms, ULONG, ZMOD_RW,
 	"Dead I/O check interval in milliseconds");
 
 ZFS_MODULE_PARAM(zfs_deadman, zfs_deadman_, enabled, INT, ZMOD_RW,
 	"Enable deadman timer");
 
 ZFS_MODULE_PARAM(zfs_spa, spa_, asize_inflation, INT, ZMOD_RW,
 	"SPA size estimate multiplication factor");
 
 ZFS_MODULE_PARAM(zfs, zfs_, ddt_data_is_special, INT, ZMOD_RW,
 	"Place DDT data into the special class");
 
 ZFS_MODULE_PARAM(zfs, zfs_, user_indirect_is_special, INT, ZMOD_RW,
 	"Place user data indirect blocks into the special class");
 
 /* BEGIN CSTYLED */
 ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, failmode,
 	param_set_deadman_failmode, param_get_charp, ZMOD_RW,
 	"Failmode for deadman timer");
 
 ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, synctime_ms,
 	param_set_deadman_synctime, param_get_ulong, ZMOD_RW,
 	"Pool sync expiration time in milliseconds");
 
 ZFS_MODULE_PARAM_CALL(zfs_deadman, zfs_deadman_, ziotime_ms,
 	param_set_deadman_ziotime, param_get_ulong, ZMOD_RW,
 	"IO expiration time in milliseconds");
 
 ZFS_MODULE_PARAM(zfs, zfs_, special_class_metadata_reserve_pct, INT, ZMOD_RW,
 	"Small file blocks in special vdevs depends on this much "
 	"free space available");
 /* END CSTYLED */
 
 ZFS_MODULE_PARAM_CALL(zfs_spa, spa_, slop_shift, param_set_slop_shift,
 	param_get_int, ZMOD_RW, "Reserved free space in pool");
diff --git a/module/zfs/zcp_get.c b/module/zfs/zcp_get.c
index 8230a4193662..cd17374eb422 100644
--- a/module/zfs/zcp_get.c
+++ b/module/zfs/zcp_get.c
@@ -1,810 +1,809 @@
 /*
  * CDDL HEADER START
  *
  * This file and its contents are supplied under the terms of the
  * Common Development and Distribution License ("CDDL"), version 1.0.
  * You may only use this file in accordance with the terms of version
  * 1.0 of the CDDL.
  *
  * A full copy of the text of the CDDL should have accompanied this
  * source.  A copy of the CDDL is also available via the Internet at
  * http://www.illumos.org/license/CDDL.
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2016 by Delphix. All rights reserved.
  */
 
 #include <sys/lua/lua.h>
 #include <sys/lua/lualib.h>
 #include <sys/lua/lauxlib.h>
 
 #include <zfs_prop.h>
 
 #include <sys/dsl_prop.h>
 #include <sys/dsl_synctask.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_dir.h>
 #include <sys/dmu_objset.h>
 #include <sys/mntent.h>
 #include <sys/sunddi.h>
 #include <sys/zap.h>
 #include <sys/zcp.h>
 #include <sys/zcp_iter.h>
 #include <sys/zcp_global.h>
 #include <sys/zcp_prop.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_znode.h>
 #include <sys/zvol.h>
 
 #ifdef _KERNEL
 #include <sys/zfs_quota.h>
 #include <sys/zfs_vfsops.h>
 #endif
 
 static int
 get_objset_type(dsl_dataset_t *ds, zfs_type_t *type)
 {
 	int error;
 	objset_t *os;
 	error = dmu_objset_from_ds(ds, &os);
 	if (error != 0)
 		return (error);
 	if (ds->ds_is_snapshot) {
 		*type = ZFS_TYPE_SNAPSHOT;
 	} else {
 		switch (os->os_phys->os_type) {
 		case DMU_OST_ZFS:
 			*type = ZFS_TYPE_FILESYSTEM;
 			break;
 		case DMU_OST_ZVOL:
 			*type = ZFS_TYPE_VOLUME;
 			break;
 		default:
 			return (EINVAL);
 		}
 	}
 	return (0);
 }
 
 /*
  * Returns the string name of ds's type in str (a buffer which should be
  * at least 12 bytes long).
  */
 static int
 get_objset_type_name(dsl_dataset_t *ds, char *str)
 {
 	zfs_type_t type = ZFS_TYPE_INVALID;
 	int error = get_objset_type(ds, &type);
 	if (error != 0)
 		return (error);
 	switch (type) {
 	case ZFS_TYPE_SNAPSHOT:
 		(void) strlcpy(str, "snapshot", ZAP_MAXVALUELEN);
 		break;
 	case ZFS_TYPE_FILESYSTEM:
 		(void) strlcpy(str, "filesystem", ZAP_MAXVALUELEN);
 		break;
 	case ZFS_TYPE_VOLUME:
 		(void) strlcpy(str, "volume", ZAP_MAXVALUELEN);
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 /*
  * Determines the source of a property given its setpoint and
  * property type. It pushes the source to the lua stack.
  */
 static void
 get_prop_src(lua_State *state, const char *setpoint, zfs_prop_t prop)
 {
 	if (zfs_prop_readonly(prop) || (prop == ZFS_PROP_VERSION)) {
 		lua_pushnil(state);
 	} else {
 		const char *src;
 		if (strcmp("", setpoint) == 0) {
 			src = "default";
 		} else {
 			src = setpoint;
 		}
 		(void) lua_pushstring(state, src);
 	}
 }
 
 /*
  * Given an error encountered while getting properties, either longjmp's for
  * a fatal error or pushes nothing to the stack for a non fatal one.
  */
 static int
 zcp_handle_error(lua_State *state, const char *dataset_name,
     const char *property_name, int error)
 {
 	ASSERT3S(error, !=, 0);
 	if (error == ENOENT) {
 		return (0);
 	} else if (error == EINVAL) {
 		return (luaL_error(state,
 		    "property '%s' is not a valid property on dataset '%s'",
 		    property_name, dataset_name));
 	} else if (error == EIO) {
 		return (luaL_error(state,
 		    "I/O error while retrieving property '%s' on dataset '%s'",
 		    property_name, dataset_name));
 	} else {
 		return (luaL_error(state, "unexpected error %d while "
 		    "retrieving property '%s' on dataset '%s'",
 		    error, property_name, dataset_name));
 	}
 }
 
 /*
  * Look up a user defined property in the zap object. If it exists, push it
  * and the setpoint onto the stack, otherwise don't push anything.
  */
 static int
 zcp_get_user_prop(lua_State *state, dsl_pool_t *dp, const char *dataset_name,
     const char *property_name)
 {
 	int error;
 	char *buf;
 	char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 	/*
 	 * zcp_dataset_hold will either successfully return the requested
 	 * dataset or throw a lua error and longjmp out of the zfs.get_prop call
 	 * without returning.
 	 */
 	dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG);
 	if (ds == NULL)
 		return (1); /* not reached; zcp_dataset_hold() longjmp'd */
 
 	buf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
 	error = dsl_prop_get_ds(ds, property_name, 1, ZAP_MAXVALUELEN,
 	    buf, setpoint);
 	dsl_dataset_rele(ds, FTAG);
 
 	if (error != 0) {
 		kmem_free(buf, ZAP_MAXVALUELEN);
 		return (zcp_handle_error(state, dataset_name, property_name,
 		    error));
 	}
 	(void) lua_pushstring(state, buf);
 	(void) lua_pushstring(state, setpoint);
 	kmem_free(buf, ZAP_MAXVALUELEN);
 	return (2);
 }
 
 /*
  * Check if the property we're looking for is stored in the ds_dir. If so,
  * return it in the 'val' argument. Return 0 on success and ENOENT and if
  * the property is not present.
  */
 static int
 get_dsl_dir_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop,
     uint64_t *val)
 {
 	dsl_dir_t *dd = ds->ds_dir;
 	mutex_enter(&dd->dd_lock);
 	switch (zfs_prop) {
 	case ZFS_PROP_USEDSNAP:
 		*val = dsl_dir_get_usedsnap(dd);
 		break;
 	case ZFS_PROP_USEDCHILD:
 		*val = dsl_dir_get_usedchild(dd);
 		break;
 	case ZFS_PROP_USEDDS:
 		*val = dsl_dir_get_usedds(dd);
 		break;
 	case ZFS_PROP_USEDREFRESERV:
 		*val = dsl_dir_get_usedrefreserv(dd);
 		break;
 	case ZFS_PROP_LOGICALUSED:
 		*val = dsl_dir_get_logicalused(dd);
 		break;
 	default:
 		mutex_exit(&dd->dd_lock);
 		return (SET_ERROR(ENOENT));
 	}
 	mutex_exit(&dd->dd_lock);
 	return (0);
 }
 
 /*
  * Check if the property we're looking for is stored at the dsl_dataset or
  * dsl_dir level. If so, push the property value and source onto the lua stack
  * and return 0. If it is not present or a failure occurs in lookup, return a
  * non-zero error value.
  */
 static int
 get_special_prop(lua_State *state, dsl_dataset_t *ds, const char *dsname,
     zfs_prop_t zfs_prop)
 {
 	int error = 0;
 	objset_t *os;
 	uint64_t numval = 0;
 	char *strval = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
 	char setpoint[ZFS_MAX_DATASET_NAME_LEN] =
 	    "Internal error - setpoint not determined";
 	zfs_type_t ds_type = ZFS_TYPE_INVALID;
 	zprop_type_t prop_type = zfs_prop_get_type(zfs_prop);
 	(void) get_objset_type(ds, &ds_type);
 
 	switch (zfs_prop) {
 	case ZFS_PROP_REFRATIO:
 		numval = dsl_get_refratio(ds);
 		break;
 	case ZFS_PROP_USED:
 		numval = dsl_get_used(ds);
 		break;
 	case ZFS_PROP_CLONES: {
 		nvlist_t *clones = fnvlist_alloc();
 		error = get_clones_stat_impl(ds, clones);
 		if (error == 0) {
 			/* push list to lua stack */
 			VERIFY0(zcp_nvlist_to_lua(state, clones, NULL, 0ULL));
 			/* source */
 			(void) lua_pushnil(state);
 		}
 		nvlist_free(clones);
 		kmem_free(strval, ZAP_MAXVALUELEN);
 		return (error);
 	}
 	case ZFS_PROP_COMPRESSRATIO:
 		numval = dsl_get_compressratio(ds);
 		break;
 	case ZFS_PROP_CREATION:
 		numval = dsl_get_creation(ds);
 		break;
 	case ZFS_PROP_REFERENCED:
 		numval = dsl_get_referenced(ds);
 		break;
 	case ZFS_PROP_AVAILABLE:
 		numval = dsl_get_available(ds);
 		break;
 	case ZFS_PROP_LOGICALREFERENCED:
 		numval = dsl_get_logicalreferenced(ds);
 		break;
 	case ZFS_PROP_CREATETXG:
 		numval = dsl_get_creationtxg(ds);
 		break;
 	case ZFS_PROP_GUID:
 		numval = dsl_get_guid(ds);
 		break;
 	case ZFS_PROP_UNIQUE:
 		numval = dsl_get_unique(ds);
 		break;
 	case ZFS_PROP_OBJSETID:
 		numval = dsl_get_objsetid(ds);
 		break;
 	case ZFS_PROP_ORIGIN:
 		dsl_dir_get_origin(ds->ds_dir, strval);
 		break;
 	case ZFS_PROP_USERACCOUNTING:
 		error = dmu_objset_from_ds(ds, &os);
 		if (error == 0)
 			numval = dmu_objset_userspace_present(os);
 		break;
 	case ZFS_PROP_WRITTEN:
 		error = dsl_get_written(ds, &numval);
 		break;
 	case ZFS_PROP_TYPE:
 		error = get_objset_type_name(ds, strval);
 		break;
 	case ZFS_PROP_PREV_SNAP:
 		error = dsl_get_prev_snap(ds, strval);
 		break;
 	case ZFS_PROP_NAME:
 		dsl_dataset_name(ds, strval);
 		break;
 	case ZFS_PROP_MOUNTPOINT:
 		error = dsl_get_mountpoint(ds, dsname, strval, setpoint);
 		break;
 	case ZFS_PROP_VERSION:
 		/* should be a snapshot or filesystem */
 		ASSERT(ds_type != ZFS_TYPE_VOLUME);
 		error = dmu_objset_from_ds(ds, &os);
 		/* look in the master node for the version */
 		if (error == 0) {
 			error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 			    sizeof (numval), 1, &numval);
 		}
 		break;
 	case ZFS_PROP_DEFER_DESTROY:
 		numval = dsl_get_defer_destroy(ds);
 		break;
 	case ZFS_PROP_USERREFS:
 		numval = dsl_get_userrefs(ds);
 		break;
 	case ZFS_PROP_FILESYSTEM_COUNT:
 		error = dsl_dir_get_filesystem_count(ds->ds_dir, &numval);
 		(void) strlcpy(setpoint, "", ZFS_MAX_DATASET_NAME_LEN);
 		break;
 	case ZFS_PROP_SNAPSHOT_COUNT:
 		error = dsl_dir_get_snapshot_count(ds->ds_dir, &numval);
 		(void) strlcpy(setpoint, "", ZFS_MAX_DATASET_NAME_LEN);
 		break;
 	case ZFS_PROP_NUMCLONES:
 		numval = dsl_get_numclones(ds);
 		break;
 	case ZFS_PROP_INCONSISTENT:
 		numval = dsl_get_inconsistent(ds);
 		break;
 	case ZFS_PROP_IVSET_GUID:
 		if (dsl_dataset_is_zapified(ds)) {
 			error = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
 			    ds->ds_object, DS_FIELD_IVSET_GUID,
 			    sizeof (numval), 1, &numval);
 		} else {
 			error = ENOENT;
 		}
 		break;
 	case ZFS_PROP_RECEIVE_RESUME_TOKEN: {
 		char *token = get_receive_resume_token(ds);
 		if (token != NULL) {
 			(void) strlcpy(strval, token, ZAP_MAXVALUELEN);
 			kmem_strfree(token);
 		} else {
 			error = ENOENT;
 		}
 		break;
 	}
 	case ZFS_PROP_VOLSIZE:
 		ASSERT(ds_type == ZFS_TYPE_VOLUME ||
 		    ds_type == ZFS_TYPE_SNAPSHOT);
 		error = dmu_objset_from_ds(ds, &os);
 		if (error == 0) {
 			error = zap_lookup(os, ZVOL_ZAP_OBJ, "size",
 			    sizeof (numval), 1, &numval);
 		}
 		if (error == 0)
 			(void) strlcpy(setpoint, dsname,
 			    ZFS_MAX_DATASET_NAME_LEN);
 
 		break;
 	case ZFS_PROP_VOLBLOCKSIZE: {
 		ASSERT(ds_type == ZFS_TYPE_VOLUME);
 		dmu_object_info_t doi;
 		error = dmu_objset_from_ds(ds, &os);
 		if (error == 0) {
 			error = dmu_object_info(os, ZVOL_OBJ, &doi);
 			if (error == 0)
 				numval = doi.doi_data_block_size;
 		}
 		break;
 	}
 
 	case ZFS_PROP_KEYSTATUS:
 	case ZFS_PROP_KEYFORMAT: {
 		/* provide defaults in case no crypto obj exists */
 		setpoint[0] = '\0';
 		if (zfs_prop == ZFS_PROP_KEYSTATUS)
 			numval = ZFS_KEYSTATUS_NONE;
 		else
 			numval = ZFS_KEYFORMAT_NONE;
 
 		nvlist_t *nvl, *propval;
 		nvl = fnvlist_alloc();
 		dsl_dataset_crypt_stats(ds, nvl);
 		if (nvlist_lookup_nvlist(nvl, zfs_prop_to_name(zfs_prop),
 		    &propval) == 0) {
 			char *source;
 
 			(void) nvlist_lookup_uint64(propval, ZPROP_VALUE,
 			    &numval);
 			if (nvlist_lookup_string(propval, ZPROP_SOURCE,
 			    &source) == 0)
 				strlcpy(setpoint, source, sizeof (setpoint));
 		}
 		nvlist_free(nvl);
 		break;
 	}
 
 	case ZFS_PROP_SNAPSHOTS_CHANGED:
 		numval = dsl_dir_snap_cmtime(ds->ds_dir).tv_sec;
 		break;
 
 	default:
 		/* Did not match these props, check in the dsl_dir */
 		error = get_dsl_dir_prop(ds, zfs_prop, &numval);
 	}
 	if (error != 0) {
 		kmem_free(strval, ZAP_MAXVALUELEN);
 		return (error);
 	}
 
 	switch (prop_type) {
 	case PROP_TYPE_NUMBER: {
 		(void) lua_pushnumber(state, numval);
 		break;
 	}
 	case PROP_TYPE_STRING: {
 		(void) lua_pushstring(state, strval);
 		break;
 	}
 	case PROP_TYPE_INDEX: {
 		const char *propval;
 		error = zfs_prop_index_to_string(zfs_prop, numval, &propval);
 		if (error != 0) {
 			kmem_free(strval, ZAP_MAXVALUELEN);
 			return (error);
 		}
 		(void) lua_pushstring(state, propval);
 		break;
 	}
 	}
 	kmem_free(strval, ZAP_MAXVALUELEN);
 
 	/* Push the source to the stack */
 	get_prop_src(state, setpoint, zfs_prop);
 	return (0);
 }
 
 /*
  * Look up a property and its source in the zap object. If the value is
  * present and successfully retrieved, push the value and source on the
  * lua stack and return 0. On failure, return a non-zero error value.
  */
 static int
 get_zap_prop(lua_State *state, dsl_dataset_t *ds, zfs_prop_t zfs_prop)
 {
 	int error = 0;
 	char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 	char *strval = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
 	uint64_t numval;
 	const char *prop_name = zfs_prop_to_name(zfs_prop);
 	zprop_type_t prop_type = zfs_prop_get_type(zfs_prop);
 
 	if (prop_type == PROP_TYPE_STRING) {
 		/* Push value to lua stack */
 		error = dsl_prop_get_ds(ds, prop_name, 1,
 		    ZAP_MAXVALUELEN, strval, setpoint);
 		if (error == 0)
 			(void) lua_pushstring(state, strval);
 	} else {
 		error = dsl_prop_get_ds(ds, prop_name, sizeof (numval),
 		    1, &numval, setpoint);
 
 #ifdef _KERNEL
 		/* Fill in temporary value for prop, if applicable */
 		(void) zfs_get_temporary_prop(ds, zfs_prop, &numval, setpoint);
 #else
 		kmem_free(strval, ZAP_MAXVALUELEN);
 		return (luaL_error(state,
 		    "temporary properties only supported in kernel mode",
 		    prop_name));
 #endif
 		/* Push value to lua stack */
 		if (prop_type == PROP_TYPE_INDEX) {
 			const char *propval;
 			error = zfs_prop_index_to_string(zfs_prop, numval,
 			    &propval);
 			if (error == 0)
 				(void) lua_pushstring(state, propval);
 		} else {
 			if (error == 0)
 				(void) lua_pushnumber(state, numval);
 		}
 	}
 	kmem_free(strval, ZAP_MAXVALUELEN);
 	if (error == 0)
 		get_prop_src(state, setpoint, zfs_prop);
 	return (error);
 }
 
 /*
  * Determine whether property is valid for a given dataset
  */
 boolean_t
 prop_valid_for_ds(dsl_dataset_t *ds, zfs_prop_t zfs_prop)
 {
 	zfs_type_t zfs_type = ZFS_TYPE_INVALID;
 
 	/* properties not supported */
 	if ((zfs_prop == ZFS_PROP_ISCSIOPTIONS) ||
 	    (zfs_prop == ZFS_PROP_MOUNTED))
 		return (B_FALSE);
 
 	/* if we want the origin prop, ds must be a clone */
 	if ((zfs_prop == ZFS_PROP_ORIGIN) && (!dsl_dir_is_clone(ds->ds_dir)))
 		return (B_FALSE);
 
 	int error = get_objset_type(ds, &zfs_type);
 	if (error != 0)
 		return (B_FALSE);
 	return (zfs_prop_valid_for_type(zfs_prop, zfs_type, B_FALSE));
 }
 
 /*
  * Look up a given dataset property. On success return 2, the number of
  * values pushed to the lua stack (property value and source). On a fatal
  * error, longjmp. On a non fatal error push nothing.
  */
 static int
 zcp_get_system_prop(lua_State *state, dsl_pool_t *dp, const char *dataset_name,
     zfs_prop_t zfs_prop)
 {
 	int error;
 	/*
 	 * zcp_dataset_hold will either successfully return the requested
 	 * dataset or throw a lua error and longjmp out of the zfs.get_prop call
 	 * without returning.
 	 */
 	dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG);
 	if (ds == NULL)
 		return (1); /* not reached; zcp_dataset_hold() longjmp'd */
 
 	/* Check that the property is valid for the given dataset */
 	const char *prop_name = zfs_prop_to_name(zfs_prop);
 	if (!prop_valid_for_ds(ds, zfs_prop)) {
 		dsl_dataset_rele(ds, FTAG);
 		return (0);
 	}
 
 	/* Check if the property can be accessed directly */
 	error = get_special_prop(state, ds, dataset_name, zfs_prop);
 	if (error == 0) {
 		dsl_dataset_rele(ds, FTAG);
 		/* The value and source have been pushed by get_special_prop */
 		return (2);
 	}
 	if (error != ENOENT) {
 		dsl_dataset_rele(ds, FTAG);
 		return (zcp_handle_error(state, dataset_name,
 		    prop_name, error));
 	}
 
 	/* If we were unable to find it, look in the zap object */
 	error = get_zap_prop(state, ds, zfs_prop);
 	dsl_dataset_rele(ds, FTAG);
 	if (error != 0) {
 		return (zcp_handle_error(state, dataset_name,
 		    prop_name, error));
 	}
 	/* The value and source have been pushed by get_zap_prop */
 	return (2);
 }
 
 #ifdef _KERNEL
 static zfs_userquota_prop_t
 get_userquota_prop(const char *prop_name)
 {
 	zfs_userquota_prop_t type;
 	/* Figure out the property type ({user|group}{quota|used}) */
 	for (type = 0; type < ZFS_NUM_USERQUOTA_PROPS; type++) {
 		if (strncmp(prop_name, zfs_userquota_prop_prefixes[type],
 		    strlen(zfs_userquota_prop_prefixes[type])) == 0)
 			break;
 	}
 	return (type);
 }
 
 /*
  * Given the name of a zfs_userquota_prop, this function determines the
  * prop type as well as the numeric group/user ids based on the string
  * following the '@' in the property name. On success, returns 0. On failure,
  * returns a non-zero error.
  * 'domain' must be free'd by caller using kmem_strfree()
  */
 static int
 parse_userquota_prop(const char *prop_name, zfs_userquota_prop_t *type,
     char **domain, uint64_t *rid)
 {
 	char *cp, *end, *domain_val;
 
 	*type = get_userquota_prop(prop_name);
 	if (*type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (EINVAL);
 
 	*rid = 0;
 	cp = strchr(prop_name, '@') + 1;
 	if (strncmp(cp, "S-1-", 4) == 0) {
 		/*
 		 * It's a numeric SID (eg "S-1-234-567-89") and we want to
 		 * separate the domain id and the rid
 		 */
 		int domain_len = strrchr(cp, '-') - cp;
 		domain_val = kmem_alloc(domain_len + 1, KM_SLEEP);
-		(void) strncpy(domain_val, cp, domain_len);
-		domain_val[domain_len] = '\0';
+		(void) strlcpy(domain_val, cp, domain_len + 1);
 		cp += domain_len + 1;
 
 		(void) ddi_strtoll(cp, &end, 10, (longlong_t *)rid);
 		if (*end != '\0') {
 			kmem_strfree(domain_val);
 			return (EINVAL);
 		}
 	} else {
 		/* It's only a user/group ID (eg "12345"), just get the rid */
 		domain_val = NULL;
 		(void) ddi_strtoll(cp, &end, 10, (longlong_t *)rid);
 		if (*end != '\0')
 			return (EINVAL);
 	}
 	*domain = domain_val;
 	return (0);
 }
 
 /*
  * Look up {user|group}{quota|used} property for given dataset. On success
  * push the value (quota or used amount) and the setpoint. On failure, push
  * a lua error.
  */
 static int
 zcp_get_userquota_prop(lua_State *state, dsl_pool_t *dp,
     const char *dataset_name, const char *prop_name)
 {
 	zfsvfs_t *zfvp;
 	zfsvfs_t *zfsvfs;
 	int error;
 	zfs_userquota_prop_t type;
 	char *domain;
 	uint64_t rid, value = 0;
 	objset_t *os;
 
 	dsl_dataset_t *ds = zcp_dataset_hold(state, dp, dataset_name, FTAG);
 	if (ds == NULL)
 		return (1); /* not reached; zcp_dataset_hold() longjmp'd */
 
 	error = parse_userquota_prop(prop_name, &type, &domain, &rid);
 	if (error == 0) {
 		error = dmu_objset_from_ds(ds, &os);
 		if (error == 0) {
 			zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 			error = zfsvfs_create_impl(&zfvp, zfsvfs, os);
 			if (error == 0) {
 				error = zfs_userspace_one(zfvp, type, domain,
 				    rid, &value);
 				zfsvfs_free(zfvp);
 			}
 		}
 		if (domain != NULL)
 			kmem_strfree(domain);
 	}
 	dsl_dataset_rele(ds, FTAG);
 
 	if ((value == 0) && ((type == ZFS_PROP_USERQUOTA) ||
 	    (type == ZFS_PROP_GROUPQUOTA)))
 		error = SET_ERROR(ENOENT);
 	if (error != 0) {
 		return (zcp_handle_error(state, dataset_name,
 		    prop_name, error));
 	}
 
 	(void) lua_pushnumber(state, value);
 	(void) lua_pushstring(state, dataset_name);
 	return (2);
 }
 #endif
 
 /*
  * Determines the name of the snapshot referenced in the written property
  * name. Returns snapshot name in snap_name, a buffer that must be at least
  * as large as ZFS_MAX_DATASET_NAME_LEN
  */
 static void
 parse_written_prop(const char *dataset_name, const char *prop_name,
     char *snap_name)
 {
 	ASSERT(zfs_prop_written(prop_name));
 	const char *name = prop_name + ZFS_WRITTEN_PROP_PREFIX_LEN;
 	if (strchr(name, '@') == NULL) {
 		(void) snprintf(snap_name, ZFS_MAX_DATASET_NAME_LEN, "%s@%s",
 		    dataset_name, name);
 	} else {
 		(void) strlcpy(snap_name, name, ZFS_MAX_DATASET_NAME_LEN);
 	}
 }
 
 /*
  * Look up written@ property for given dataset. On success
  * push the value and the setpoint. If error is fatal, we will
  * longjmp, otherwise push nothing.
  */
 static int
 zcp_get_written_prop(lua_State *state, dsl_pool_t *dp,
     const char *dataset_name, const char *prop_name)
 {
 	char snap_name[ZFS_MAX_DATASET_NAME_LEN];
 	uint64_t used, comp, uncomp;
 	dsl_dataset_t *old;
 	int error = 0;
 
 	parse_written_prop(dataset_name, prop_name, snap_name);
 	dsl_dataset_t *new = zcp_dataset_hold(state, dp, dataset_name, FTAG);
 	if (new == NULL)
 		return (1); /* not reached; zcp_dataset_hold() longjmp'd */
 
 	error = dsl_dataset_hold(dp, snap_name, FTAG, &old);
 	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
 		return (zcp_dataset_hold_error(state, dp, snap_name,
 		    error));
 	}
 	error = dsl_dataset_space_written(old, new,
 	    &used, &comp, &uncomp);
 
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
 
 	if (error != 0) {
 		return (zcp_handle_error(state, dataset_name,
 		    snap_name, error));
 	}
 	(void) lua_pushnumber(state, used);
 	(void) lua_pushstring(state, dataset_name);
 	return (2);
 }
 
 static int zcp_get_prop(lua_State *state);
 static const zcp_lib_info_t zcp_get_prop_info = {
 	.name = "get_prop",
 	.func = zcp_get_prop,
 	.pargs = {
 	    { .za_name = "dataset", .za_lua_type = LUA_TSTRING },
 	    { .za_name = "property", .za_lua_type =  LUA_TSTRING },
 	    {NULL, 0}
 	},
 	.kwargs = {
 	    {NULL, 0}
 	}
 };
 
 static int
 zcp_get_prop(lua_State *state)
 {
 	const char *dataset_name;
 	const char *property_name;
 	dsl_pool_t *dp = zcp_run_info(state)->zri_pool;
 	const zcp_lib_info_t *libinfo = &zcp_get_prop_info;
 
 	zcp_parse_args(state, libinfo->name, libinfo->pargs, libinfo->kwargs);
 
 	dataset_name = lua_tostring(state, 1);
 	property_name = lua_tostring(state, 2);
 
 	/* User defined property */
 	if (zfs_prop_user(property_name)) {
 		return (zcp_get_user_prop(state, dp,
 		    dataset_name, property_name));
 	}
 	/* userspace property */
 	if (zfs_prop_userquota(property_name)) {
 #ifdef _KERNEL
 		return (zcp_get_userquota_prop(state, dp,
 		    dataset_name, property_name));
 #else
 		return (luaL_error(state,
 		    "user quota properties only supported in kernel mode",
 		    property_name));
 #endif
 	}
 	/* written@ property */
 	if (zfs_prop_written(property_name)) {
 		return (zcp_get_written_prop(state, dp,
 		    dataset_name, property_name));
 	}
 
 	zfs_prop_t zfs_prop = zfs_name_to_prop(property_name);
 	/* Valid system property */
 	if (zfs_prop != ZPROP_INVAL) {
 		return (zcp_get_system_prop(state, dp, dataset_name,
 		    zfs_prop));
 	}
 
 	/* Invalid property name */
 	return (luaL_error(state,
 	    "'%s' is not a valid property", property_name));
 }
 
 int
 zcp_load_get_lib(lua_State *state)
 {
 	lua_pushcclosure(state, zcp_get_prop_info.func, 0);
 	lua_setfield(state, -2, zcp_get_prop_info.name);
 
 	return (1);
 }
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
index 259d68c477de..bafe6fe7dfa4 100644
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -1,7887 +1,7887 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Portions Copyright 2011 Martin Matuska
  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  * Portions Copyright 2012 Pawel Jakub Dawidek <pawel@dawidek.net>
  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Toomas Soome <tsoome@me.com>
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  * Copyright (c) 2019 Datto Inc.
  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
  * Copyright (c) 2019, 2021, Klara Inc.
  * Copyright (c) 2019, Allan Jude
  */
 
 /*
  * ZFS ioctls.
  *
  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
  *
  * There are two ways that we handle ioctls: the legacy way where almost
  * all of the logic is in the ioctl callback, and the new way where most
  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
  *
  * Non-legacy ioctls should be registered by calling
  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
  * from userland by lzc_ioctl().
  *
  * The registration arguments are as follows:
  *
  * const char *name
  *   The name of the ioctl.  This is used for history logging.  If the
  *   ioctl returns successfully (the callback returns 0), and allow_log
  *   is true, then a history log entry will be recorded with the input &
  *   output nvlists.  The log entry can be printed with "zpool history -i".
  *
  * zfs_ioc_t ioc
  *   The ioctl request number, which userland will pass to ioctl(2).
  *   We want newer versions of libzfs and libzfs_core to run against
  *   existing zfs kernel modules (i.e. a deferred reboot after an update).
  *   Therefore the ioctl numbers cannot change from release to release.
  *
  * zfs_secpolicy_func_t *secpolicy
  *   This function will be called before the zfs_ioc_func_t, to
  *   determine if this operation is permitted.  It should return EPERM
  *   on failure, and 0 on success.  Checks include determining if the
  *   dataset is visible in this zone, and if the user has either all
  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
  *   to do this operation on this dataset with "zfs allow".
  *
  * zfs_ioc_namecheck_t namecheck
  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
  *   name, a dataset name, or nothing.  If the name is not well-formed,
  *   the ioctl will fail and the callback will not be called.
  *   Therefore, the callback can assume that the name is well-formed
  *   (e.g. is null-terminated, doesn't have more than one '@' character,
  *   doesn't have invalid characters).
  *
  * zfs_ioc_poolcheck_t pool_check
  *   This specifies requirements on the pool state.  If the pool does
  *   not meet them (is suspended or is readonly), the ioctl will fail
  *   and the callback will not be called.  If any checks are specified
  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
  *   POOL_CHECK_READONLY).
  *
  * zfs_ioc_key_t *nvl_keys
  *  The list of expected/allowable innvl input keys. This list is used
  *  to validate the nvlist input to the ioctl.
  *
  * boolean_t smush_outnvlist
  *   If smush_outnvlist is true, then the output is presumed to be a
  *   list of errors, and it will be "smushed" down to fit into the
  *   caller's buffer, by removing some entries and replacing them with a
  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
  *   outnvlist does not fit into the userland-provided buffer, then the
  *   ioctl will fail with ENOMEM.
  *
  * zfs_ioc_func_t *func
  *   The callback function that will perform the operation.
  *
  *   The callback should return 0 on success, or an error number on
  *   failure.  If the function fails, the userland ioctl will return -1,
  *   and errno will be set to the callback's return value.  The callback
  *   will be called with the following arguments:
  *
  *   const char *name
  *     The name of the pool or dataset to operate on, from
  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
  *     expected type (pool, dataset, or none).
  *
  *   nvlist_t *innvl
  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
  *     NULL if no input nvlist was provided.  Changes to this nvlist are
  *     ignored.  If the input nvlist could not be deserialized, the
  *     ioctl will fail and the callback will not be called.
  *
  *   nvlist_t *outnvl
  *     The output nvlist, initially empty.  The callback can fill it in,
  *     and it will be returned to userland by serializing it into
  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
  *     fails (e.g. because the caller didn't supply a large enough
  *     buffer), then the overall ioctl will fail.  See the
  *     'smush_nvlist' argument above for additional behaviors.
  *
  *     There are two typical uses of the output nvlist:
  *       - To return state, e.g. property values.  In this case,
  *         smush_outnvlist should be false.  If the buffer was not large
  *         enough, the caller will reallocate a larger buffer and try
  *         the ioctl again.
  *
  *       - To return multiple errors from an ioctl which makes on-disk
  *         changes.  In this case, smush_outnvlist should be true.
  *         Ioctls which make on-disk modifications should generally not
  *         use the outnvl if they succeed, because the caller can not
  *         distinguish between the operation failing, and
  *         deserialization failing.
  *
  * IOCTL Interface Errors
  *
  * The following ioctl input errors can be returned:
  *   ZFS_ERR_IOC_CMD_UNAVAIL	the ioctl number is not supported by kernel
  *   ZFS_ERR_IOC_ARG_UNAVAIL	an input argument is not supported by kernel
  *   ZFS_ERR_IOC_ARG_REQUIRED	a required input argument is missing
  *   ZFS_ERR_IOC_ARG_BADTYPE	an input argument has an invalid type
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/errno.h>
 #include <sys/uio_impl.h>
 #include <sys/file.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/stat.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_quota.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 #include <sys/zap.h>
 #include <sys/spa.h>
 #include <sys/spa_impl.h>
 #include <sys/vdev.h>
 #include <sys/vdev_impl.h>
 #include <sys/dmu.h>
 #include <sys/dsl_dir.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_deleg.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_impl.h>
 #include <sys/dmu_redact.h>
 #include <sys/dmu_tx.h>
 #include <sys/sunddi.h>
 #include <sys/policy.h>
 #include <sys/zone.h>
 #include <sys/nvpair.h>
 #include <sys/pathname.h>
 #include <sys/fs/zfs.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_onexit.h>
 #include <sys/zvol.h>
 #include <sys/dsl_scan.h>
 #include <sys/fm/util.h>
 #include <sys/dsl_crypt.h>
 #include <sys/rrwlock.h>
 #include <sys/zfs_file.h>
 
 #include <sys/dmu_recv.h>
 #include <sys/dmu_send.h>
 #include <sys/dmu_recv.h>
 #include <sys/dsl_destroy.h>
 #include <sys/dsl_bookmark.h>
 #include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
 #include <sys/zcp.h>
 #include <sys/zio_checksum.h>
 #include <sys/vdev_removal.h>
 #include <sys/vdev_impl.h>
 #include <sys/vdev_initialize.h>
 #include <sys/vdev_trim.h>
 
 #include "zfs_namecheck.h"
 #include "zfs_prop.h"
 #include "zfs_deleg.h"
 #include "zfs_comutil.h"
 
 #include <sys/lua/lua.h>
 #include <sys/lua/lauxlib.h>
 #include <sys/zfs_ioctl_impl.h>
 
 kmutex_t zfsdev_state_lock;
 static zfsdev_state_t *zfsdev_state_list;
 
 /*
  * Limit maximum nvlist size.  We don't want users passing in insane values
  * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
  * Defaults to 0=auto which is handled by platform code.
  */
 unsigned long zfs_max_nvlist_src_size = 0;
 
 /*
  * When logging the output nvlist of an ioctl in the on-disk history, limit
  * the logged size to this many bytes.  This must be less than DMU_MAX_ACCESS.
  * This applies primarily to zfs_ioc_channel_program().
  */
 static unsigned long zfs_history_output_max = 1024 * 1024;
 
 uint_t zfs_fsyncer_key;
 uint_t zfs_allow_log_key;
 
 /* DATA_TYPE_ANY is used when zkey_type can vary. */
 #define	DATA_TYPE_ANY	DATA_TYPE_UNKNOWN
 
 typedef struct zfs_ioc_vec {
 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
 	zfs_ioc_func_t		*zvec_func;
 	zfs_secpolicy_func_t	*zvec_secpolicy;
 	zfs_ioc_namecheck_t	zvec_namecheck;
 	boolean_t		zvec_allow_log;
 	zfs_ioc_poolcheck_t	zvec_pool_check;
 	boolean_t		zvec_smush_outnvlist;
 	const char		*zvec_name;
 	const zfs_ioc_key_t	*zvec_nvl_keys;
 	size_t			zvec_nvl_key_count;
 } zfs_ioc_vec_t;
 
 /* This array is indexed by zfs_userquota_prop_t */
 static const char *userquota_perms[] = {
 	ZFS_DELEG_PERM_USERUSED,
 	ZFS_DELEG_PERM_USERQUOTA,
 	ZFS_DELEG_PERM_GROUPUSED,
 	ZFS_DELEG_PERM_GROUPQUOTA,
 	ZFS_DELEG_PERM_USEROBJUSED,
 	ZFS_DELEG_PERM_USEROBJQUOTA,
 	ZFS_DELEG_PERM_GROUPOBJUSED,
 	ZFS_DELEG_PERM_GROUPOBJQUOTA,
 	ZFS_DELEG_PERM_PROJECTUSED,
 	ZFS_DELEG_PERM_PROJECTQUOTA,
 	ZFS_DELEG_PERM_PROJECTOBJUSED,
 	ZFS_DELEG_PERM_PROJECTOBJQUOTA,
 };
 
 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
 static int zfs_check_settable(const char *name, nvpair_t *property,
     cred_t *cr);
 static int zfs_check_clearable(const char *dataset, nvlist_t *props,
     nvlist_t **errors);
 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
     boolean_t *);
 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 
 static void
 history_str_free(char *buf)
 {
 	kmem_free(buf, HIS_MAX_RECORD_LEN);
 }
 
 static char *
 history_str_get(zfs_cmd_t *zc)
 {
 	char *buf;
 
 	if (zc->zc_history == 0)
 		return (NULL);
 
 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 		history_str_free(buf);
 		return (NULL);
 	}
 
 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
 
 	return (buf);
 }
 
 /*
  * Return non-zero if the spa version is less than requested version.
  */
 static int
 zfs_earlier_version(const char *name, int version)
 {
 	spa_t *spa;
 
 	if (spa_open(name, &spa, FTAG) == 0) {
 		if (spa_version(spa) < version) {
 			spa_close(spa, FTAG);
 			return (1);
 		}
 		spa_close(spa, FTAG);
 	}
 	return (0);
 }
 
 /*
  * Return TRUE if the ZPL version is less than requested version.
  */
 static boolean_t
 zpl_earlier_version(const char *name, int version)
 {
 	objset_t *os;
 	boolean_t rc = B_TRUE;
 
 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
 		uint64_t zplversion;
 
 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
 			dmu_objset_rele(os, FTAG);
 			return (B_TRUE);
 		}
 		/* XXX reading from non-owned objset */
 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 			rc = zplversion < version;
 		dmu_objset_rele(os, FTAG);
 	}
 	return (rc);
 }
 
 static void
 zfs_log_history(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *buf;
 
 	if ((buf = history_str_get(zc)) == NULL)
 		return;
 
 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 			(void) spa_history_log(spa, buf);
 		spa_close(spa, FTAG);
 	}
 	history_str_free(buf);
 }
 
 /*
  * Policy for top-level read operations (list pools).  Requires no privileges,
  * and can be used in the local zone, as there is no associated dataset.
  */
 static int
 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc, (void) innvl, (void) cr;
 	return (0);
 }
 
 /*
  * Policy for dataset read operations (list children, get statistics).  Requires
  * no privileges, but must be visible in the local zone.
  */
 static int
 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl, (void) cr;
 	if (INGLOBALZONE(curproc) ||
 	    zone_dataset_visible(zc->zc_name, NULL))
 		return (0);
 
 	return (SET_ERROR(ENOENT));
 }
 
 static int
 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 {
 	int writable = 1;
 
 	/*
 	 * The dataset must be visible by this zone -- check this first
 	 * so they don't see EPERM on something they shouldn't know about.
 	 */
 	if (!INGLOBALZONE(curproc) &&
 	    !zone_dataset_visible(dataset, &writable))
 		return (SET_ERROR(ENOENT));
 
 	if (INGLOBALZONE(curproc)) {
 		/*
 		 * If the fs is zoned, only root can access it from the
 		 * global zone.
 		 */
 		if (secpolicy_zfs(cr) && zoned)
 			return (SET_ERROR(EPERM));
 	} else {
 		/*
 		 * If we are in a local zone, the 'zoned' property must be set.
 		 */
 		if (!zoned)
 			return (SET_ERROR(EPERM));
 
 		/* must be writable by this zone */
 		if (!writable)
 			return (SET_ERROR(EPERM));
 	}
 	return (0);
 }
 
 static int
 zfs_dozonecheck(const char *dataset, cred_t *cr)
 {
 	uint64_t zoned;
 
 	if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
 	    &zoned, NULL))
 		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
 static int
 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 {
 	uint64_t zoned;
 
 	if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
 		return (SET_ERROR(ENOENT));
 
 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
 }
 
 static int
 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
     const char *perm, cred_t *cr)
 {
 	int error;
 
 	error = zfs_dozonecheck_ds(name, ds, cr);
 	if (error == 0) {
 		error = secpolicy_zfs(cr);
 		if (error != 0)
 			error = dsl_deleg_access_impl(ds, perm, cr);
 	}
 	return (error);
 }
 
 static int
 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 {
 	int error;
 	dsl_dataset_t *ds;
 	dsl_pool_t *dp;
 
 	/*
 	 * First do a quick check for root in the global zone, which
 	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
 	 * will get to handle nonexistent datasets.
 	 */
 	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
 		return (0);
 
 	error = dsl_pool_hold(name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
 
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 /*
  * Policy for setting the security label property.
  *
  * Returns 0 for success, non-zero for access and other errors.
  */
 static int
 zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
 {
 #ifdef HAVE_MLSLABEL
 	char		ds_hexsl[MAXNAMELEN];
 	bslabel_t	ds_sl, new_sl;
 	boolean_t	new_default = FALSE;
 	uint64_t	zoned;
 	int		needed_priv = -1;
 	int		error;
 
 	/* First get the existing dataset label. */
 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 	if (error != 0)
 		return (SET_ERROR(EPERM));
 
 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 		new_default = TRUE;
 
 	/* The label must be translatable */
 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * In a non-global zone, disallow attempts to set a label that
 	 * doesn't match that of the zone; otherwise no other checks
 	 * are needed.
 	 */
 	if (!INGLOBALZONE(curproc)) {
 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 			return (SET_ERROR(EPERM));
 		return (0);
 	}
 
 	/*
 	 * For global-zone datasets (i.e., those whose zoned property is
 	 * "off", verify that the specified new label is valid for the
 	 * global zone.
 	 */
 	if (dsl_prop_get_integer(name,
 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 		return (SET_ERROR(EPERM));
 	if (!zoned) {
 		if (zfs_check_global_label(name, strval) != 0)
 			return (SET_ERROR(EPERM));
 	}
 
 	/*
 	 * If the existing dataset label is nondefault, check if the
 	 * dataset is mounted (label cannot be changed while mounted).
 	 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
 	 * mounted (or isn't a dataset, doesn't exist, ...).
 	 */
 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 		objset_t *os;
 		static const char *setsl_tag = "setsl_tag";
 
 		/*
 		 * Try to own the dataset; abort if there is any error,
 		 * (e.g., already mounted, in use, or other error).
 		 */
 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
 		    setsl_tag, &os);
 		if (error != 0)
 			return (SET_ERROR(EPERM));
 
 		dmu_objset_disown(os, B_TRUE, setsl_tag);
 
 		if (new_default) {
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
 			goto out_check;
 		}
 
 		if (hexstr_to_label(strval, &new_sl) != 0)
 			return (SET_ERROR(EPERM));
 
 		if (blstrictdom(&ds_sl, &new_sl))
 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
 		else if (blstrictdom(&new_sl, &ds_sl))
 			needed_priv = PRIV_FILE_UPGRADE_SL;
 	} else {
 		/* dataset currently has a default label */
 		if (!new_default)
 			needed_priv = PRIV_FILE_UPGRADE_SL;
 	}
 
 out_check:
 	if (needed_priv != -1)
 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 	return (0);
 #else
 	return (SET_ERROR(ENOTSUP));
 #endif /* HAVE_MLSLABEL */
 }
 
 static int
 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
     cred_t *cr)
 {
 	char *strval;
 
 	/*
 	 * Check permissions for special properties.
 	 */
 	switch (prop) {
 	default:
 		break;
 	case ZFS_PROP_ZONED:
 		/*
 		 * Disallow setting of 'zoned' from within a local zone.
 		 */
 		if (!INGLOBALZONE(curproc))
 			return (SET_ERROR(EPERM));
 		break;
 
 	case ZFS_PROP_QUOTA:
 	case ZFS_PROP_FILESYSTEM_LIMIT:
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 		if (!INGLOBALZONE(curproc)) {
 			uint64_t zoned;
 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
 			/*
 			 * Unprivileged users are allowed to modify the
 			 * limit on things *under* (ie. contained by)
 			 * the thing they own.
 			 */
 			if (dsl_prop_get_integer(dsname,
 			    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
 				return (SET_ERROR(EPERM));
 			if (!zoned || strlen(dsname) <= strlen(setpoint))
 				return (SET_ERROR(EPERM));
 		}
 		break;
 
 	case ZFS_PROP_MLSLABEL:
 		if (!is_system_labeled())
 			return (SET_ERROR(EPERM));
 
 		if (nvpair_value_string(propval, &strval) == 0) {
 			int err;
 
 			err = zfs_set_slabel_policy(dsname, strval, CRED());
 			if (err != 0)
 				return (err);
 		}
 		break;
 	}
 
 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 }
 
 static int
 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	/*
 	 * permission to set permissions will be evaluated later in
 	 * dsl_deleg_can_allow()
 	 */
 	(void) innvl;
 	return (zfs_dozonecheck(zc->zc_name, cr));
 }
 
 static int
 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_ROLLBACK, cr));
 }
 
 static int
 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	const char *cp;
 	int error;
 
 	/*
 	 * Generate the current snapshot name from the given objsetid, then
 	 * use that name for the secpolicy/zone checks.
 	 */
 	cp = strchr(zc->zc_name, '@');
 	if (cp == NULL)
 		return (SET_ERROR(EINVAL));
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	dsl_dataset_name(ds, zc->zc_name);
 
 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 	    ZFS_DELEG_PERM_SEND, cr);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_SEND, cr));
 }
 
 static int
 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc, (void) innvl, (void) cr;
 	return (SET_ERROR(ENOTSUP));
 }
 
 static int
 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc, (void) innvl, (void) cr;
 	return (SET_ERROR(ENOTSUP));
 }
 
 static int
 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 {
 	char *cp;
 
 	/*
 	 * Remove the @bla or /bla from the end of the name to get the parent.
 	 */
-	(void) strncpy(parent, datasetname, parentsize);
+	(void) strlcpy(parent, datasetname, parentsize);
 	cp = strrchr(parent, '@');
 	if (cp != NULL) {
 		cp[0] = '\0';
 	} else {
 		cp = strrchr(parent, '/');
 		if (cp == NULL)
 			return (SET_ERROR(ENOENT));
 		cp[0] = '\0';
 	}
 
 	return (0);
 }
 
 int
 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 {
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(name,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 }
 
 static int
 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 }
 
 /*
  * Destroying snapshots with delegated permissions requires
  * descendant mount and destroy permissions.
  */
 static int
 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc;
 	nvlist_t *snaps;
 	nvpair_t *pair, *nextpair;
 	int error = 0;
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nextpair) {
 		nextpair = nvlist_next_nvpair(snaps, pair);
 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
 		if (error == ENOENT) {
 			/*
 			 * Ignore any snapshots that don't exist (we consider
 			 * them "already destroyed").  Remove the name from the
 			 * nvl here in case the snapshot is created between
 			 * now and when we try to destroy it (in which case
 			 * we don't want to destroy it since we haven't
 			 * checked for permission).
 			 */
 			fnvlist_remove_nvpair(snaps, pair);
 			error = 0;
 		}
 		if (error != 0)
 			break;
 	}
 
 	return (error);
 }
 
 int
 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 {
 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
 	int	error;
 
 	if ((error = zfs_secpolicy_write_perms(from,
 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(from,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_get_parent(to, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (error);
 }
 
 static int
 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 }
 
 static int
 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	dsl_pool_t *dp;
 	dsl_dataset_t *clone;
 	int error;
 
 	error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_PROMOTE, cr);
 	if (error != 0)
 		return (error);
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
 
 	if (error == 0) {
 		char parentname[ZFS_MAX_DATASET_NAME_LEN];
 		dsl_dataset_t *origin = NULL;
 		dsl_dir_t *dd;
 		dd = clone->ds_dir;
 
 		error = dsl_dataset_hold_obj(dd->dd_pool,
 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
 		if (error != 0) {
 			dsl_dataset_rele(clone, FTAG);
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
 		    ZFS_DELEG_PERM_MOUNT, cr);
 
 		dsl_dataset_name(origin, parentname);
 		if (error == 0) {
 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
 			    ZFS_DELEG_PERM_PROMOTE, cr);
 		}
 		dsl_dataset_rele(clone, FTAG);
 		dsl_dataset_rele(origin, FTAG);
 	}
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 static int
 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_CREATE, cr));
 }
 
 int
 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(name,
 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
 }
 
 /*
  * Check for permission to create each snapshot in the nvlist.
  */
 static int
 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc;
 	nvlist_t *snaps;
 	int error = 0;
 	nvpair_t *pair;
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		char *name = nvpair_name(pair);
 		char *atp = strchr(name, '@');
 
 		if (atp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 		*atp = '\0';
 		error = zfs_secpolicy_snapshot_perms(name, cr);
 		*atp = '@';
 		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
 /*
  * Check for permission to create each bookmark in the nvlist.
  */
 static int
 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc;
 	int error = 0;
 
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		char *name = nvpair_name(pair);
 		char *hashp = strchr(name, '#');
 
 		if (hashp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 		*hashp = '\0';
 		error = zfs_secpolicy_write_perms(name,
 		    ZFS_DELEG_PERM_BOOKMARK, cr);
 		*hashp = '#';
 		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
 static int
 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc;
 	nvpair_t *pair, *nextpair;
 	int error = 0;
 
 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
 	    pair = nextpair) {
 		char *name = nvpair_name(pair);
 		char *hashp = strchr(name, '#');
 		nextpair = nvlist_next_nvpair(innvl, pair);
 
 		if (hashp == NULL) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 
 		*hashp = '\0';
 		error = zfs_secpolicy_write_perms(name,
 		    ZFS_DELEG_PERM_DESTROY, cr);
 		*hashp = '#';
 		if (error == ENOENT) {
 			/*
 			 * Ignore any filesystems that don't exist (we consider
 			 * their bookmarks "already destroyed").  Remove
 			 * the name from the nvl here in case the filesystem
 			 * is created between now and when we try to destroy
 			 * the bookmark (in which case we don't want to
 			 * destroy it since we haven't checked for permission).
 			 */
 			fnvlist_remove_nvpair(innvl, pair);
 			error = 0;
 		}
 		if (error != 0)
 			break;
 	}
 
 	return (error);
 }
 
 static int
 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc, (void) innvl, (void) cr;
 	/*
 	 * Even root must have a proper TSD so that we know what pool
 	 * to log to.
 	 */
 	if (tsd_get(zfs_allow_log_key) == NULL)
 		return (SET_ERROR(EPERM));
 	return (0);
 }
 
 static int
 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
 	int	error;
 	char	*origin;
 
 	if ((error = zfs_get_parent(zc->zc_name, parentname,
 	    sizeof (parentname))) != 0)
 		return (error);
 
 	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
 	    (error = zfs_secpolicy_write_perms(origin,
 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
 		return (error);
 
 	if ((error = zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
 		return (error);
 
 	return (zfs_secpolicy_write_perms(parentname,
 	    ZFS_DELEG_PERM_MOUNT, cr));
 }
 
 /*
  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
  * SYS_CONFIG privilege, which is not available in a local zone.
  */
 int
 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc, (void) innvl;
 
 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
 		return (SET_ERROR(EPERM));
 
 	return (0);
 }
 
 /*
  * Policy for object to name lookups.
  */
 static int
 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	int error;
 
 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
 		return (0);
 
 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
 	return (error);
 }
 
 /*
  * Policy for fault injection.  Requires all privileges.
  */
 static int
 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc, (void) innvl;
 	return (secpolicy_zinject(cr));
 }
 
 static int
 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 
 	if (prop == ZPROP_USERPROP) {
 		if (!zfs_prop_user(zc->zc_value))
 			return (SET_ERROR(EINVAL));
 		return (zfs_secpolicy_write_perms(zc->zc_name,
 		    ZFS_DELEG_PERM_USERPROP, cr));
 	} else {
 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
 		    NULL, cr));
 	}
 }
 
 static int
 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	if (zc->zc_value[0] == 0) {
 		/*
 		 * They are asking about a posix uid/gid.  If it's
 		 * themself, allow it.
 		 */
 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
 		    zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
 		    zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
 			if (zc->zc_guid == crgetuid(cr))
 				return (0);
 		} else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
 		    zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
 			if (groupmember(zc->zc_guid, cr))
 				return (0);
 		}
 		/* else is for project quota/used */
 	}
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
 static int
 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	int err = zfs_secpolicy_read(zc, innvl, cr);
 	if (err)
 		return (err);
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    userquota_perms[zc->zc_objset_type], cr));
 }
 
 static int
 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) innvl;
 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 	    NULL, cr));
 }
 
 static int
 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc;
 	nvpair_t *pair;
 	nvlist_t *holds;
 	int error;
 
 	holds = fnvlist_lookup_nvlist(innvl, "holds");
 
 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(holds, pair)) {
 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
 		error = dmu_fsname(nvpair_name(pair), fsname);
 		if (error != 0)
 			return (error);
 		error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_HOLD, cr);
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 static int
 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	(void) zc;
 	nvpair_t *pair;
 	int error;
 
 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(innvl, pair)) {
 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
 		error = dmu_fsname(nvpair_name(pair), fsname);
 		if (error != 0)
 			return (error);
 		error = zfs_secpolicy_write_perms(fsname,
 		    ZFS_DELEG_PERM_RELEASE, cr);
 		if (error != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Policy for allowing temporary snapshots to be taken or released
  */
 static int
 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	/*
 	 * A temporary snapshot is the same as a snapshot,
 	 * hold, destroy and release all rolled into one.
 	 * Delegated diff alone is sufficient that we allow this.
 	 */
 	int error;
 
 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
 		return (0);
 
 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
 
 	if (innvl != NULL) {
 		if (error == 0)
 			error = zfs_secpolicy_hold(zc, innvl, cr);
 		if (error == 0)
 			error = zfs_secpolicy_release(zc, innvl, cr);
 		if (error == 0)
 			error = zfs_secpolicy_destroy(zc, innvl, cr);
 	}
 	return (error);
 }
 
 static int
 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_LOAD_KEY, cr));
 }
 
 static int
 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 {
 	return (zfs_secpolicy_write_perms(zc->zc_name,
 	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
 }
 
 /*
  * Returns the nvlist as specified by the user in the zfs_cmd_t.
  */
 static int
 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
 {
 	char *packed;
 	int error;
 	nvlist_t *list = NULL;
 
 	/*
 	 * Read in and unpack the user-supplied nvlist.
 	 */
 	if (size == 0)
 		return (SET_ERROR(EINVAL));
 
 	packed = vmem_alloc(size, KM_SLEEP);
 
 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
 	    iflag)) != 0) {
 		vmem_free(packed, size);
 		return (SET_ERROR(EFAULT));
 	}
 
 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
 		vmem_free(packed, size);
 		return (error);
 	}
 
 	vmem_free(packed, size);
 
 	*nvp = list;
 	return (0);
 }
 
 /*
  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
  * Entries will be removed from the end of the nvlist, and one int32 entry
  * named "N_MORE_ERRORS" will be added indicating how many entries were
  * removed.
  */
 static int
 nvlist_smush(nvlist_t *errors, size_t max)
 {
 	size_t size;
 
 	size = fnvlist_size(errors);
 
 	if (size > max) {
 		nvpair_t *more_errors;
 		int n = 0;
 
 		if (max < 1024)
 			return (SET_ERROR(ENOMEM));
 
 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
 		more_errors = nvlist_prev_nvpair(errors, NULL);
 
 		do {
 			nvpair_t *pair = nvlist_prev_nvpair(errors,
 			    more_errors);
 			fnvlist_remove_nvpair(errors, pair);
 			n++;
 			size = fnvlist_size(errors);
 		} while (size > max);
 
 		fnvlist_remove_nvpair(errors, more_errors);
 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
 		ASSERT3U(fnvlist_size(errors), <=, max);
 	}
 
 	return (0);
 }
 
 static int
 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
 {
 	char *packed = NULL;
 	int error = 0;
 	size_t size;
 
 	size = fnvlist_size(nvl);
 
 	if (size > zc->zc_nvlist_dst_size) {
 		error = SET_ERROR(ENOMEM);
 	} else {
 		packed = fnvlist_pack(nvl, &size);
 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    size, zc->zc_iflags) != 0)
 			error = SET_ERROR(EFAULT);
 		fnvlist_pack_free(packed, size);
 	}
 
 	zc->zc_nvlist_dst_size = size;
 	zc->zc_nvlist_dst_filled = B_TRUE;
 	return (error);
 }
 
 int
 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
 {
 	int error = 0;
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	mutex_enter(&os->os_user_ptr_lock);
 	*zfvp = dmu_objset_get_user(os);
 	/* bump s_active only when non-zero to prevent umount race */
 	error = zfs_vfs_ref(zfvp);
 	mutex_exit(&os->os_user_ptr_lock);
 	return (error);
 }
 
 int
 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(dsname, FTAG, &os);
 	if (error != 0)
 		return (error);
 
 	error = getzfsvfs_impl(os, zfvp);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
  * case its z_sb will be NULL, and it will be opened as the owner.
  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
  * which prevents all inode ops from running.
  */
 static int
 zfsvfs_hold(const char *name, const void *tag, zfsvfs_t **zfvp,
     boolean_t writer)
 {
 	int error = 0;
 
 	if (getzfsvfs(name, zfvp) != 0)
 		error = zfsvfs_create(name, B_FALSE, zfvp);
 	if (error == 0) {
 		if (writer)
 			ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
 		else
 			ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
 		if ((*zfvp)->z_unmounted) {
 			/*
 			 * XXX we could probably try again, since the unmounting
 			 * thread should be just about to disassociate the
 			 * objset from the zfsvfs.
 			 */
 			ZFS_TEARDOWN_EXIT(*zfvp, tag);
 			return (SET_ERROR(EBUSY));
 		}
 	}
 	return (error);
 }
 
 static void
 zfsvfs_rele(zfsvfs_t *zfsvfs, const void *tag)
 {
 	ZFS_TEARDOWN_EXIT(zfsvfs, tag);
 
 	if (zfs_vfs_held(zfsvfs)) {
 		zfs_vfs_rele(zfsvfs);
 	} else {
 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	}
 }
 
 static int
 zfs_ioc_pool_create(zfs_cmd_t *zc)
 {
 	int error;
 	nvlist_t *config, *props = NULL;
 	nvlist_t *rootprops = NULL;
 	nvlist_t *zplprops = NULL;
 	dsl_crypto_params_t *dcp = NULL;
 	const char *spa_name = zc->zc_name;
 	boolean_t unload_wkey = B_TRUE;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)))
 		return (error);
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		nvlist_free(config);
 		return (error);
 	}
 
 	if (props) {
 		nvlist_t *nvl = NULL;
 		nvlist_t *hidden_args = NULL;
 		uint64_t version = SPA_VERSION;
 		char *tname;
 
 		(void) nvlist_lookup_uint64(props,
 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
 			error = SET_ERROR(EINVAL);
 			goto pool_props_bad;
 		}
 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
 		if (nvl) {
 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
 			if (error != 0)
 				goto pool_props_bad;
 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
 		}
 
 		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
 		    &hidden_args);
 		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
 		    rootprops, hidden_args, &dcp);
 		if (error != 0)
 			goto pool_props_bad;
 		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
 
 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops_root(version, rootprops,
 		    zplprops, NULL);
 		if (error != 0)
 			goto pool_props_bad;
 
 		if (nvlist_lookup_string(props,
 		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
 			spa_name = tname;
 	}
 
 	error = spa_create(zc->zc_name, config, props, zplprops, dcp);
 
 	/*
 	 * Set the remaining root properties
 	 */
 	if (!error && (error = zfs_set_prop_nvlist(spa_name,
 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
 		(void) spa_destroy(spa_name);
 		unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
 	}
 
 pool_props_bad:
 	nvlist_free(rootprops);
 	nvlist_free(zplprops);
 	nvlist_free(config);
 	nvlist_free(props);
 	dsl_crypto_params_free(dcp, unload_wkey && !!error);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
 {
 	int error;
 	zfs_log_history(zc);
 	error = spa_destroy(zc->zc_name);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_import(zfs_cmd_t *zc)
 {
 	nvlist_t *config, *props = NULL;
 	uint64_t guid;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) != 0)
 		return (error);
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		nvlist_free(config);
 		return (error);
 	}
 
 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
 	    guid != zc->zc_guid)
 		error = SET_ERROR(EINVAL);
 	else
 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
 
 	if (zc->zc_nvlist_dst != 0) {
 		int err;
 
 		if ((err = put_nvlist(zc, config)) != 0)
 			error = err;
 	}
 
 	nvlist_free(config);
 	nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_export(zfs_cmd_t *zc)
 {
 	int error;
 	boolean_t force = (boolean_t)zc->zc_cookie;
 	boolean_t hardforce = (boolean_t)zc->zc_guid;
 
 	zfs_log_history(zc);
 	error = spa_export(zc->zc_name, NULL, force, hardforce);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_configs(zfs_cmd_t *zc)
 {
 	nvlist_t *configs;
 	int error;
 
 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
 		return (SET_ERROR(EEXIST));
 
 	error = put_nvlist(zc, configs);
 
 	nvlist_free(configs);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of the pool
  *
  * outputs:
  * zc_cookie		real errno
  * zc_nvlist_dst	config nvlist
  * zc_nvlist_dst_size	size of config nvlist
  */
 static int
 zfs_ioc_pool_stats(zfs_cmd_t *zc)
 {
 	nvlist_t *config;
 	int error;
 	int ret = 0;
 
 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
 	    sizeof (zc->zc_value));
 
 	if (config != NULL) {
 		ret = put_nvlist(zc, config);
 		nvlist_free(config);
 
 		/*
 		 * The config may be present even if 'error' is non-zero.
 		 * In this case we return success, and preserve the real errno
 		 * in 'zc_cookie'.
 		 */
 		zc->zc_cookie = error;
 	} else {
 		ret = error;
 	}
 
 	return (ret);
 }
 
 /*
  * Try to import the given pool, returning pool stats as appropriate so that
  * user land knows which devices are available and overall pool health.
  */
 static int
 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
 {
 	nvlist_t *tryconfig, *config = NULL;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &tryconfig)) != 0)
 		return (error);
 
 	config = spa_tryimport(tryconfig);
 
 	nvlist_free(tryconfig);
 
 	if (config == NULL)
 		return (SET_ERROR(EINVAL));
 
 	error = put_nvlist(zc, config);
 	nvlist_free(config);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name              name of the pool
  * zc_cookie            scan func (pool_scan_func_t)
  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
  */
 static int
 zfs_ioc_pool_scan(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (zc->zc_flags == POOL_SCRUB_PAUSE)
 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
 	else if (zc->zc_cookie == POOL_SCAN_NONE)
 		error = spa_scan_stop(spa);
 	else
 		error = spa_scan(spa, zc->zc_cookie);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error == 0) {
 		spa_freeze(spa);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static int
 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (zc->zc_cookie < spa_version(spa) ||
 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	spa_upgrade(spa, zc->zc_cookie);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	char *hist_buf;
 	uint64_t size;
 	int error;
 
 	if ((size = zc->zc_history_len) == 0)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	hist_buf = vmem_alloc(size, KM_SLEEP);
 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
 	    &zc->zc_history_len, hist_buf)) == 0) {
 		error = ddi_copyout(hist_buf,
 		    (void *)(uintptr_t)zc->zc_history,
 		    zc->zc_history_len, zc->zc_iflags);
 	}
 
 	spa_close(spa, FTAG);
 	vmem_free(hist_buf, size);
 	return (error);
 }
 
 static int
 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error == 0) {
 		error = spa_change_guid(spa);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 static int
 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
 {
 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_obj		object to find
  *
  * outputs:
  * zc_value		name of object
  */
 static int
 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	/* XXX reading from objset not owned */
 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
 	    FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
 	    sizeof (zc->zc_value));
 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_obj		object to find
  *
  * outputs:
  * zc_stat		stats on object
  * zc_value		path to object
  */
 static int
 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	/* XXX reading from objset not owned */
 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
 	    FTAG, &os)) != 0)
 		return (error);
 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
 	    sizeof (zc->zc_value));
 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_add(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	nvlist_t *config;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config);
 	if (error == 0) {
 		error = spa_vdev_add(spa, config);
 		nvlist_free(config);
 	}
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of the pool
  * zc_guid		guid of vdev to remove
  * zc_cookie		cancel removal
  */
 static int
 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 	if (zc->zc_cookie != 0) {
 		error = spa_vdev_remove_cancel(spa);
 	} else {
 		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
 	}
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 	switch (zc->zc_cookie) {
 	case VDEV_STATE_ONLINE:
 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
 		break;
 
 	case VDEV_STATE_OFFLINE:
 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	case VDEV_STATE_FAULTED:
 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 
 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	case VDEV_STATE_DEGRADED:
 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
 
 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
 		break;
 
 	default:
 		error = SET_ERROR(EINVAL);
 	}
 	zc->zc_cookie = newstate;
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	nvlist_t *config;
 	int replacing = zc->zc_cookie;
 	int rebuild = zc->zc_simple;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config)) == 0) {
 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
 		    rebuild);
 		nvlist_free(config);
 	}
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
 
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_split(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	nvlist_t *config, *props = NULL;
 	int error;
 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &config))) {
 		spa_close(spa, FTAG);
 		return (error);
 	}
 
 	if (zc->zc_nvlist_src_size != 0 && (error =
 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props))) {
 		spa_close(spa, FTAG);
 		nvlist_free(config);
 		return (error);
 	}
 
 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
 
 	spa_close(spa, FTAG);
 
 	nvlist_free(config);
 	nvlist_free(props);
 
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	const char *path = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = spa_vdev_setpath(spa, guid, path);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	const char *fru = zc->zc_value;
 	uint64_t guid = zc->zc_guid;
 	int error;
 
 	error = spa_open(zc->zc_name, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	error = spa_vdev_setfru(spa, guid, fru);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
 {
 	int error = 0;
 	nvlist_t *nv;
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
 		dmu_objset_stats(os, nv);
 		/*
 		 * NB: zvol_get_stats() will read the objset contents,
 		 * which we aren't supposed to do with a
 		 * DS_MODE_USER hold, because it could be
 		 * inconsistent.  So this is a bit of a workaround...
 		 * XXX reading without owning
 		 */
 		if (!zc->zc_objset_stats.dds_inconsistent &&
 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
 			error = zvol_get_stats(os, nv);
 			if (error == EIO) {
 				nvlist_free(nv);
 				return (error);
 			}
 			VERIFY0(error);
 		}
 		if (error == 0)
 			error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_objset_stats(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error == 0) {
 		error = zfs_ioc_objset_stats_impl(zc, os);
 		dmu_objset_rele(os, FTAG);
 	}
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_nvlist_dst	received property nvlist
  * zc_nvlist_dst_size	size of received property nvlist
  *
  * Gets received properties (distinct from local properties on or after
  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
  * local property values.
  */
 static int
 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
 {
 	int error = 0;
 	nvlist_t *nv;
 
 	/*
 	 * Without this check, we would return local property values if the
 	 * caller has not already received properties on or after
 	 * SPA_VERSION_RECVD_PROPS.
 	 */
 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
 		return (SET_ERROR(ENOTSUP));
 
 	if (zc->zc_nvlist_dst != 0 &&
 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
 		error = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	}
 
 	return (error);
 }
 
 static int
 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
 {
 	uint64_t value;
 	int error;
 
 	/*
 	 * zfs_get_zplprop() will either find a value or give us
 	 * the default value (if there is one).
 	 */
 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
 	return (0);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
  *
  * outputs:
  * zc_nvlist_dst	zpl property nvlist
  * zc_nvlist_dst_size	size of zpl property nvlist
  */
 static int
 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int err;
 
 	/* XXX reading without owning */
 	if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
 		return (err);
 
 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
 
 	/*
 	 * NB: nvl_add_zplprop() will read the objset contents,
 	 * which we aren't supposed to do with a DS_MODE_USER
 	 * hold, because it could be inconsistent.
 	 */
 	if (zc->zc_nvlist_dst != 0 &&
 	    !zc->zc_objset_stats.dds_inconsistent &&
 	    dmu_objset_type(os) == DMU_OST_ZFS) {
 		nvlist_t *nv;
 
 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
 			err = put_nvlist(zc, nv);
 		nvlist_free(nv);
 	} else {
 		err = SET_ERROR(ENOENT);
 	}
 	dmu_objset_rele(os, FTAG);
 	return (err);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_nvlist_dst_size	size of buffer for property nvlist
  *
  * outputs:
  * zc_name		name of next filesystem
  * zc_cookie		zap cursor
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 	char *p;
 	size_t orig_len = strlen(zc->zc_name);
 
 top:
 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
 		if (error == ENOENT)
 			error = SET_ERROR(ESRCH);
 		return (error);
 	}
 
 	p = strrchr(zc->zc_name, '/');
 	if (p == NULL || p[1] != '\0')
 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
 	p = zc->zc_name + strlen(zc->zc_name);
 
 	do {
 		error = dmu_dir_list_next(os,
 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
 		    NULL, &zc->zc_cookie);
 		if (error == ENOENT)
 			error = SET_ERROR(ESRCH);
 	} while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
 	dmu_objset_rele(os, FTAG);
 
 	/*
 	 * If it's an internal dataset (ie. with a '$' in its name),
 	 * don't try to get stats for it, otherwise we'll return ENOENT.
 	 */
 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
 		if (error == ENOENT) {
 			/* We lost a race with destroy, get the next one. */
 			zc->zc_name[orig_len] = '\0';
 			goto top;
 		}
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_nvlist_src	iteration range nvlist
  * zc_nvlist_src_size	size of iteration range nvlist
  *
  * outputs:
  * zc_name		name of next snapshot
  * zc_objset_stats	stats
  * zc_nvlist_dst	property nvlist
  * zc_nvlist_dst_size	size of property nvlist
  */
 static int
 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
 {
 	int error;
 	objset_t *os, *ossnap;
 	dsl_dataset_t *ds;
 	uint64_t min_txg = 0, max_txg = 0;
 
 	if (zc->zc_nvlist_src_size != 0) {
 		nvlist_t *props = NULL;
 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 		    zc->zc_iflags, &props);
 		if (error != 0)
 			return (error);
 		(void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
 		    &min_txg);
 		(void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
 		    &max_txg);
 		nvlist_free(props);
 	}
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error != 0) {
 		return (error == ENOENT ? SET_ERROR(ESRCH) : error);
 	}
 
 	/*
 	 * A dataset name of maximum length cannot have any snapshots,
 	 * so exit immediately.
 	 */
 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
 	    ZFS_MAX_DATASET_NAME_LEN) {
 		dmu_objset_rele(os, FTAG);
 		return (SET_ERROR(ESRCH));
 	}
 
 	while (error == 0) {
 		if (issig(JUSTLOOKING) && issig(FORREAL)) {
 			error = SET_ERROR(EINTR);
 			break;
 		}
 
 		error = dmu_snapshot_list_next(os,
 		    sizeof (zc->zc_name) - strlen(zc->zc_name),
 		    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
 		    &zc->zc_cookie, NULL);
 		if (error == ENOENT) {
 			error = SET_ERROR(ESRCH);
 			break;
 		} else if (error != 0) {
 			break;
 		}
 
 		error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
 		    FTAG, &ds);
 		if (error != 0)
 			break;
 
 		if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
 		    (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
 			dsl_dataset_rele(ds, FTAG);
 			/* undo snapshot name append */
 			*(strchr(zc->zc_name, '@') + 1) = '\0';
 			/* skip snapshot */
 			continue;
 		}
 
 		if (zc->zc_simple) {
 			zc->zc_objset_stats.dds_creation_txg =
 			    dsl_get_creationtxg(ds);
 			dsl_dataset_rele(ds, FTAG);
 			break;
 		}
 
 		if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
 			dsl_dataset_rele(ds, FTAG);
 			break;
 		}
 		if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
 			dsl_dataset_rele(ds, FTAG);
 			break;
 		}
 		dsl_dataset_rele(ds, FTAG);
 		break;
 	}
 
 	dmu_objset_rele(os, FTAG);
 	/* if we failed, undo the @ that we tacked on to zc_name */
 	if (error != 0)
 		*strchr(zc->zc_name, '@') = '\0';
 	return (error);
 }
 
 static int
 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
 {
 	const char *propname = nvpair_name(pair);
 	uint64_t *valary;
 	unsigned int vallen;
 	const char *dash, *domain;
 	zfs_userquota_prop_t type;
 	uint64_t rid;
 	uint64_t quota;
 	zfsvfs_t *zfsvfs;
 	int err;
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) != 0)
 			return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * A correctly constructed propname is encoded as
 	 * userquota@<rid>-<domain>.
 	 */
 	if ((dash = strchr(propname, '-')) == NULL ||
 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
 	    vallen != 3)
 		return (SET_ERROR(EINVAL));
 
 	domain = dash + 1;
 	type = valary[0];
 	rid = valary[1];
 	quota = valary[2];
 
 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
 	if (err == 0) {
 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
 		zfsvfs_rele(zfsvfs, FTAG);
 	}
 
 	return (err);
 }
 
 /*
  * If the named property is one that has a special function to set its value,
  * return 0 on success and a positive error code on failure; otherwise if it is
  * not one of the special properties handled by this function, return -1.
  *
  * XXX: It would be better for callers of the property interface if we handled
  * these special cases in dsl_prop.c (in the dsl layer).
  */
 static int
 zfs_prop_set_special(const char *dsname, zprop_source_t source,
     nvpair_t *pair)
 {
 	const char *propname = nvpair_name(pair);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval = 0;
 	const char *strval = NULL;
 	int err = -1;
 
 	if (prop == ZPROP_USERPROP) {
 		if (zfs_prop_userquota(propname))
 			return (zfs_prop_set_userquota(dsname, pair));
 		return (-1);
 	}
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) == 0);
 	}
 
 	/* all special properties are numeric except for keylocation */
 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 		strval = fnvpair_value_string(pair);
 	} else {
 		intval = fnvpair_value_uint64(pair);
 	}
 
 	switch (prop) {
 	case ZFS_PROP_QUOTA:
 		err = dsl_dir_set_quota(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFQUOTA:
 		err = dsl_dataset_set_refquota(dsname, source, intval);
 		break;
 	case ZFS_PROP_FILESYSTEM_LIMIT:
 	case ZFS_PROP_SNAPSHOT_LIMIT:
 		if (intval == UINT64_MAX) {
 			/* clearing the limit, just do it */
 			err = 0;
 		} else {
 			err = dsl_dir_activate_fs_ss_limit(dsname);
 		}
 		/*
 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
 		 * default path to set the value in the nvlist.
 		 */
 		if (err == 0)
 			err = -1;
 		break;
 	case ZFS_PROP_KEYLOCATION:
 		err = dsl_crypto_can_set_keylocation(dsname, strval);
 
 		/*
 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
 		 * default path to set the value in the nvlist.
 		 */
 		if (err == 0)
 			err = -1;
 		break;
 	case ZFS_PROP_RESERVATION:
 		err = dsl_dir_set_reservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_REFRESERVATION:
 		err = dsl_dataset_set_refreservation(dsname, source, intval);
 		break;
 	case ZFS_PROP_COMPRESSION:
 		err = dsl_dataset_set_compression(dsname, source, intval);
 		/*
 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
 		 * default path to set the value in the nvlist.
 		 */
 		if (err == 0)
 			err = -1;
 		break;
 	case ZFS_PROP_VOLSIZE:
 		err = zvol_set_volsize(dsname, intval);
 		break;
 	case ZFS_PROP_SNAPDEV:
 		err = zvol_set_snapdev(dsname, source, intval);
 		break;
 	case ZFS_PROP_VOLMODE:
 		err = zvol_set_volmode(dsname, source, intval);
 		break;
 	case ZFS_PROP_VERSION:
 	{
 		zfsvfs_t *zfsvfs;
 
 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
 			break;
 
 		err = zfs_set_version(zfsvfs, intval);
 		zfsvfs_rele(zfsvfs, FTAG);
 
 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
 			zfs_cmd_t *zc;
 
 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
 			(void) strlcpy(zc->zc_name, dsname,
 			    sizeof (zc->zc_name));
 			(void) zfs_ioc_userspace_upgrade(zc);
 			(void) zfs_ioc_id_quota_upgrade(zc);
 			kmem_free(zc, sizeof (zfs_cmd_t));
 		}
 		break;
 	}
 	default:
 		err = -1;
 	}
 
 	return (err);
 }
 
 static boolean_t
 zfs_is_namespace_prop(zfs_prop_t prop)
 {
 	switch (prop) {
 
 	case ZFS_PROP_ATIME:
 	case ZFS_PROP_RELATIME:
 	case ZFS_PROP_DEVICES:
 	case ZFS_PROP_EXEC:
 	case ZFS_PROP_SETUID:
 	case ZFS_PROP_READONLY:
 	case ZFS_PROP_XATTR:
 	case ZFS_PROP_NBMAND:
 		return (B_TRUE);
 
 	default:
 		return (B_FALSE);
 	}
 }
 
 /*
  * This function is best effort. If it fails to set any of the given properties,
  * it continues to set as many as it can and returns the last error
  * encountered. If the caller provides a non-NULL errlist, it will be filled in
  * with the list of names of all the properties that failed along with the
  * corresponding error numbers.
  *
  * If every property is set successfully, zero is returned and errlist is not
  * modified.
  */
 int
 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
     nvlist_t *errlist)
 {
 	nvpair_t *pair;
 	nvpair_t *propval;
 	int rv = 0;
 	int err;
 	uint64_t intval;
 	const char *strval;
 	boolean_t should_update_mount_cache = B_FALSE;
 
 	nvlist_t *genericnvl = fnvlist_alloc();
 	nvlist_t *retrynvl = fnvlist_alloc();
 retry:
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 		zfs_prop_t prop = zfs_name_to_prop(propname);
 		err = 0;
 
 		/* decode the property value */
 		propval = pair;
 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 			nvlist_t *attrs;
 			attrs = fnvpair_value_nvlist(pair);
 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 			    &propval) != 0)
 				err = SET_ERROR(EINVAL);
 		}
 
 		/* Validate value type */
 		if (err == 0 && source == ZPROP_SRC_INHERITED) {
 			/* inherited properties are expected to be booleans */
 			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
 				err = SET_ERROR(EINVAL);
 		} else if (err == 0 && prop == ZPROP_USERPROP) {
 			if (zfs_prop_user(propname)) {
 				if (nvpair_type(propval) != DATA_TYPE_STRING)
 					err = SET_ERROR(EINVAL);
 			} else if (zfs_prop_userquota(propname)) {
 				if (nvpair_type(propval) !=
 				    DATA_TYPE_UINT64_ARRAY)
 					err = SET_ERROR(EINVAL);
 			} else {
 				err = SET_ERROR(EINVAL);
 			}
 		} else if (err == 0) {
 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
 					err = SET_ERROR(EINVAL);
 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
 				const char *unused;
 
 				intval = fnvpair_value_uint64(propval);
 
 				switch (zfs_prop_get_type(prop)) {
 				case PROP_TYPE_NUMBER:
 					break;
 				case PROP_TYPE_STRING:
 					err = SET_ERROR(EINVAL);
 					break;
 				case PROP_TYPE_INDEX:
 					if (zfs_prop_index_to_string(prop,
 					    intval, &unused) != 0)
 						err =
 						    SET_ERROR(ZFS_ERR_BADPROP);
 					break;
 				default:
 					cmn_err(CE_PANIC,
 					    "unknown property type");
 				}
 			} else {
 				err = SET_ERROR(EINVAL);
 			}
 		}
 
 		/* Validate permissions */
 		if (err == 0)
 			err = zfs_check_settable(dsname, pair, CRED());
 
 		if (err == 0) {
 			if (source == ZPROP_SRC_INHERITED)
 				err = -1; /* does not need special handling */
 			else
 				err = zfs_prop_set_special(dsname, source,
 				    pair);
 			if (err == -1) {
 				/*
 				 * For better performance we build up a list of
 				 * properties to set in a single transaction.
 				 */
 				err = nvlist_add_nvpair(genericnvl, pair);
 			} else if (err != 0 && nvl != retrynvl) {
 				/*
 				 * This may be a spurious error caused by
 				 * receiving quota and reservation out of order.
 				 * Try again in a second pass.
 				 */
 				err = nvlist_add_nvpair(retrynvl, pair);
 			}
 		}
 
 		if (err != 0) {
 			if (errlist != NULL)
 				fnvlist_add_int32(errlist, propname, err);
 			rv = err;
 		}
 
 		if (zfs_is_namespace_prop(prop))
 			should_update_mount_cache = B_TRUE;
 	}
 
 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
 		nvl = retrynvl;
 		goto retry;
 	}
 
 	if (nvlist_empty(genericnvl))
 		goto out;
 
 	/*
 	 * Try to set them all in one batch.
 	 */
 	err = dsl_props_set(dsname, source, genericnvl);
 	if (err == 0)
 		goto out;
 
 	/*
 	 * If batching fails, we still want to set as many properties as we
 	 * can, so try setting them individually.
 	 */
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 		err = 0;
 
 		propval = pair;
 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 			nvlist_t *attrs;
 			attrs = fnvpair_value_nvlist(pair);
 			propval = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
 		}
 
 		if (nvpair_type(propval) == DATA_TYPE_STRING) {
 			strval = fnvpair_value_string(propval);
 			err = dsl_prop_set_string(dsname, propname,
 			    source, strval);
 		} else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
 			err = dsl_prop_inherit(dsname, propname, source);
 		} else {
 			intval = fnvpair_value_uint64(propval);
 			err = dsl_prop_set_int(dsname, propname, source,
 			    intval);
 		}
 
 		if (err != 0) {
 			if (errlist != NULL) {
 				fnvlist_add_int32(errlist, propname, err);
 			}
 			rv = err;
 		}
 	}
 
 out:
 	if (should_update_mount_cache)
 		zfs_ioctl_update_mount_cache(dsname);
 
 	nvlist_free(genericnvl);
 	nvlist_free(retrynvl);
 
 	return (rv);
 }
 
 /*
  * Check that all the properties are valid user properties.
  */
 static int
 zfs_check_userprops(nvlist_t *nvl)
 {
 	nvpair_t *pair = NULL;
 
 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
 		const char *propname = nvpair_name(pair);
 
 		if (!zfs_prop_user(propname) ||
 		    nvpair_type(pair) != DATA_TYPE_STRING)
 			return (SET_ERROR(EINVAL));
 
 		if (strlen(propname) >= ZAP_MAXNAMELEN)
 			return (SET_ERROR(ENAMETOOLONG));
 
 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
 			return (SET_ERROR(E2BIG));
 	}
 	return (0);
 }
 
 static void
 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
 {
 	nvpair_t *pair;
 
 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	pair = NULL;
 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
 		if (nvlist_exists(skipped, nvpair_name(pair)))
 			continue;
 
 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
 	}
 }
 
 static int
 clear_received_props(const char *dsname, nvlist_t *props,
     nvlist_t *skipped)
 {
 	int err = 0;
 	nvlist_t *cleared_props = NULL;
 	props_skip(props, skipped, &cleared_props);
 	if (!nvlist_empty(cleared_props)) {
 		/*
 		 * Acts on local properties until the dataset has received
 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
 		 */
 		zprop_source_t flags = (ZPROP_SRC_NONE |
 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
 	}
 	nvlist_free(cleared_props);
 	return (err);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		name of property to set
  * zc_nvlist_src{_size}	nvlist of properties to apply
  * zc_cookie		received properties flag
  *
  * outputs:
  * zc_nvlist_dst{_size} error for each unapplied received property
  */
 static int
 zfs_ioc_set_prop(zfs_cmd_t *zc)
 {
 	nvlist_t *nvl;
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
 	    ZPROP_SRC_LOCAL);
 	nvlist_t *errors;
 	int error;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &nvl)) != 0)
 		return (error);
 
 	if (received) {
 		nvlist_t *origprops;
 
 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
 			(void) clear_received_props(zc->zc_name,
 			    origprops, nvl);
 			nvlist_free(origprops);
 		}
 
 		error = dsl_prop_set_hasrecvd(zc->zc_name);
 	}
 
 	errors = fnvlist_alloc();
 	if (error == 0)
 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
 
 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
 		(void) put_nvlist(zc, errors);
 	}
 
 	nvlist_free(errors);
 	nvlist_free(nvl);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		name of property to inherit
  * zc_cookie		revert to received value if TRUE
  *
  * outputs:		none
  */
 static int
 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
 {
 	const char *propname = zc->zc_value;
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	boolean_t received = zc->zc_cookie;
 	zprop_source_t source = (received
 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
 	nvlist_t *dummy;
 	nvpair_t *pair;
 	zprop_type_t type;
 	int err;
 
 	if (!received) {
 		/*
 		 * Only check this in the non-received case. We want to allow
 		 * 'inherit -S' to revert non-inheritable properties like quota
 		 * and reservation to the received or default values even though
 		 * they are not considered inheritable.
 		 */
 		if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
 			return (SET_ERROR(EINVAL));
 	}
 
 	if (prop == ZPROP_USERPROP) {
 		if (!zfs_prop_user(propname))
 			return (SET_ERROR(EINVAL));
 
 		type = PROP_TYPE_STRING;
 	} else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
 		return (SET_ERROR(EINVAL));
 	} else {
 		type = zfs_prop_get_type(prop);
 	}
 
 	/*
 	 * zfs_prop_set_special() expects properties in the form of an
 	 * nvpair with type info.
 	 */
 	dummy = fnvlist_alloc();
 
 	switch (type) {
 	case PROP_TYPE_STRING:
 		VERIFY(0 == nvlist_add_string(dummy, propname, ""));
 		break;
 	case PROP_TYPE_NUMBER:
 	case PROP_TYPE_INDEX:
 		VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
 		break;
 	default:
 		err = SET_ERROR(EINVAL);
 		goto errout;
 	}
 
 	pair = nvlist_next_nvpair(dummy, NULL);
 	if (pair == NULL) {
 		err = SET_ERROR(EINVAL);
 	} else {
 		err = zfs_prop_set_special(zc->zc_name, source, pair);
 		if (err == -1) /* property is not "special", needs handling */
 			err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
 			    source);
 	}
 
 errout:
 	nvlist_free(dummy);
 	return (err);
 }
 
 static int
 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
 {
 	nvlist_t *props;
 	spa_t *spa;
 	int error;
 	nvpair_t *pair;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &props)))
 		return (error);
 
 	/*
 	 * If the only property is the configfile, then just do a spa_lookup()
 	 * to handle the faulted case.
 	 */
 	pair = nvlist_next_nvpair(props, NULL);
 	if (pair != NULL && strcmp(nvpair_name(pair),
 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
 	    nvlist_next_nvpair(props, pair) == NULL) {
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
 			spa_configfile_set(spa, props, B_FALSE);
 			spa_write_cachefile(spa, B_FALSE, B_TRUE);
 		}
 		mutex_exit(&spa_namespace_lock);
 		if (spa != NULL) {
 			nvlist_free(props);
 			return (0);
 		}
 	}
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 		nvlist_free(props);
 		return (error);
 	}
 
 	error = spa_prop_set(spa, props);
 
 	nvlist_free(props);
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	nvlist_t *nvp = NULL;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
 		/*
 		 * If the pool is faulted, there may be properties we can still
 		 * get (such as altroot and cachefile), so attempt to get them
 		 * anyway.
 		 */
 		mutex_enter(&spa_namespace_lock);
 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
 			error = spa_prop_get(spa, &nvp);
 		mutex_exit(&spa_namespace_lock);
 	} else {
 		error = spa_prop_get(spa, &nvp);
 		spa_close(spa, FTAG);
 	}
 
 	if (error == 0 && zc->zc_nvlist_dst != 0)
 		error = put_nvlist(zc, nvp);
 	else
 		error = SET_ERROR(EFAULT);
 
 	nvlist_free(nvp);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "vdevprops_set_vdev" -> guid
  *     "vdevprops_set_props" -> { prop -> value }
  * }
  *
  * outnvl: propname -> error code (int32)
  */
 static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = {
 	{ZPOOL_VDEV_PROPS_SET_VDEV,	DATA_TYPE_UINT64,	0},
 	{ZPOOL_VDEV_PROPS_SET_PROPS,	DATA_TYPE_NVLIST,	0}
 };
 
 static int
 zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	spa_t *spa;
 	int error;
 	vdev_t *vd;
 	uint64_t vdev_guid;
 
 	/* Early validation */
 	if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
 	    &vdev_guid) != 0)
 		return (SET_ERROR(EINVAL));
 
 	if (outnvl == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
 		return (error);
 
 	ASSERT(spa_writeable(spa));
 
 	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOENT));
 	}
 
 	error = vdev_prop_set(vd, innvl, outnvl);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 /*
  * innvl: {
  *     "vdevprops_get_vdev" -> guid
  *     (optional) "vdevprops_get_props" -> { propname -> propid }
  * }
  *
  * outnvl: propname -> value
  */
 static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = {
 	{ZPOOL_VDEV_PROPS_GET_VDEV,	DATA_TYPE_UINT64,	0},
 	{ZPOOL_VDEV_PROPS_GET_PROPS,	DATA_TYPE_NVLIST,	ZK_OPTIONAL}
 };
 
 static int
 zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	spa_t *spa;
 	int error;
 	vdev_t *vd;
 	uint64_t vdev_guid;
 
 	/* Early validation */
 	if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
 	    &vdev_guid) != 0)
 		return (SET_ERROR(EINVAL));
 
 	if (outnvl == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
 		return (error);
 
 	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOENT));
 	}
 
 	error = vdev_prop_get(vd, innvl, outnvl);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_nvlist_src{_size}	nvlist of delegated permissions
  * zc_perm_action	allow/unallow flag
  *
  * outputs:		none
  */
 static int
 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
 {
 	int error;
 	nvlist_t *fsaclnv = NULL;
 
 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &fsaclnv)) != 0)
 		return (error);
 
 	/*
 	 * Verify nvlist is constructed correctly
 	 */
 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
 		nvlist_free(fsaclnv);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * If we don't have PRIV_SYS_MOUNT, then validate
 	 * that user is allowed to hand out each permission in
 	 * the nvlist(s)
 	 */
 
 	error = secpolicy_zfs(CRED());
 	if (error != 0) {
 		if (zc->zc_perm_action == B_FALSE) {
 			error = dsl_deleg_can_allow(zc->zc_name,
 			    fsaclnv, CRED());
 		} else {
 			error = dsl_deleg_can_unallow(zc->zc_name,
 			    fsaclnv, CRED());
 		}
 	}
 
 	if (error == 0)
 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
 
 	nvlist_free(fsaclnv);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * zc_nvlist_src{_size}	nvlist of delegated permissions
  */
 static int
 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
 {
 	nvlist_t *nvp;
 	int error;
 
 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
 		error = put_nvlist(zc, nvp);
 		nvlist_free(nvp);
 	}
 
 	return (error);
 }
 
 static void
 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 {
 	zfs_creat_t *zct = arg;
 
 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
 }
 
 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
 
 /*
  * inputs:
  * os			parent objset pointer (NULL if root fs)
  * fuids_ok		fuids allowed in this version of the spa?
  * sa_ok		SAs allowed in this version of the spa?
  * createprops		list of properties requested by creator
  *
  * outputs:
  * zplprops	values for the zplprops we attach to the master node object
  * is_ci	true if requested file system will be purely case-insensitive
  *
  * Determine the settings for utf8only, normalization and
  * casesensitivity.  Specific values may have been requested by the
  * creator and/or we can inherit values from the parent dataset.  If
  * the file system is of too early a vintage, a creator can not
  * request settings for these properties, even if the requested
  * setting is the default value.  We don't actually want to create dsl
  * properties for these, so remove them from the source nvlist after
  * processing.
  */
 static int
 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	uint64_t sense = ZFS_PROP_UNDEFINED;
 	uint64_t norm = ZFS_PROP_UNDEFINED;
 	uint64_t u8 = ZFS_PROP_UNDEFINED;
 	int error;
 
 	ASSERT(zplprops != NULL);
 
 	/* parent dataset must be a filesystem */
 	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
 		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
 
 	/*
 	 * Pull out creator prop choices, if any.
 	 */
 	if (createprops) {
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
 		(void) nvlist_lookup_uint64(createprops,
 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
 		(void) nvlist_remove_all(createprops,
 		    zfs_prop_to_name(ZFS_PROP_CASE));
 	}
 
 	/*
 	 * If the zpl version requested is whacky or the file system
 	 * or pool is version is too "young" to support normalization
 	 * and the creator tried to set a value for one of the props,
 	 * error out.
 	 */
 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
 	    (zplver < ZPL_VERSION_NORMALIZATION &&
 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
 	    sense != ZFS_PROP_UNDEFINED)))
 		return (SET_ERROR(ENOTSUP));
 
 	/*
 	 * Put the version in the zplprops
 	 */
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
 
 	if (norm == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
 
 	/*
 	 * If we're normalizing, names must always be valid UTF-8 strings.
 	 */
 	if (norm)
 		u8 = 1;
 	if (u8 == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
 
 	if (sense == ZFS_PROP_UNDEFINED &&
 	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
 		return (error);
 	VERIFY(nvlist_add_uint64(zplprops,
 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
 
 	if (is_ci)
 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
 
 	return (0);
 }
 
 static int
 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	boolean_t fuids_ok, sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	objset_t *os = NULL;
 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
 	spa_t *spa;
 	uint64_t spa_vers;
 	int error;
 
 	zfs_get_parent(dataset, parentname, sizeof (parentname));
 
 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
 		return (error);
 
 	spa_vers = spa_version(spa);
 	spa_close(spa, FTAG);
 
 	zplver = zfs_zpl_version_map(spa_vers);
 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
 	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	/*
 	 * Open parent object set so we can inherit zplprop values.
 	 */
 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
 		return (error);
 
 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
 	    zplprops, is_ci);
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 static int
 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
     nvlist_t *zplprops, boolean_t *is_ci)
 {
 	boolean_t fuids_ok;
 	boolean_t sa_ok;
 	uint64_t zplver = ZPL_VERSION;
 	int error;
 
 	zplver = zfs_zpl_version_map(spa_vers);
 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
 	sa_ok = (zplver >= ZPL_VERSION_SA);
 
 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
 	    createprops, zplprops, is_ci);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "type" -> dmu_objset_type_t (int32)
  *     (optional) "props" -> { prop -> value }
  *     (optional) "hidden_args" -> { "wkeydata" -> value }
  *         raw uint8_t array of encryption wrapping key data (32 bytes)
  * }
  *
  * outnvl: propname -> error code (int32)
  */
 
 static const zfs_ioc_key_t zfs_keys_create[] = {
 	{"type",	DATA_TYPE_INT32,	0},
 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error = 0;
 	zfs_creat_t zct = { 0 };
 	nvlist_t *nvprops = NULL;
 	nvlist_t *hidden_args = NULL;
 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
 	dmu_objset_type_t type;
 	boolean_t is_insensitive = B_FALSE;
 	dsl_crypto_params_t *dcp = NULL;
 
 	type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 
 	switch (type) {
 	case DMU_OST_ZFS:
 		cbfunc = zfs_create_cb;
 		break;
 
 	case DMU_OST_ZVOL:
 		cbfunc = zvol_create_cb;
 		break;
 
 	default:
 		cbfunc = NULL;
 		break;
 	}
 	if (strchr(fsname, '@') ||
 	    strchr(fsname, '%'))
 		return (SET_ERROR(EINVAL));
 
 	zct.zct_props = nvprops;
 
 	if (cbfunc == NULL)
 		return (SET_ERROR(EINVAL));
 
 	if (type == DMU_OST_ZVOL) {
 		uint64_t volsize, volblocksize;
 
 		if (nvprops == NULL)
 			return (SET_ERROR(EINVAL));
 		if (nvlist_lookup_uint64(nvprops,
 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
 			return (SET_ERROR(EINVAL));
 
 		if ((error = nvlist_lookup_uint64(nvprops,
 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
 		    &volblocksize)) != 0 && error != ENOENT)
 			return (SET_ERROR(EINVAL));
 
 		if (error != 0)
 			volblocksize = zfs_prop_default_numeric(
 			    ZFS_PROP_VOLBLOCKSIZE);
 
 		if ((error = zvol_check_volblocksize(fsname,
 		    volblocksize)) != 0 ||
 		    (error = zvol_check_volsize(volsize,
 		    volblocksize)) != 0)
 			return (error);
 	} else if (type == DMU_OST_ZFS) {
 		int error;
 
 		/*
 		 * We have to have normalization and
 		 * case-folding flags correct when we do the
 		 * file system creation, so go figure them out
 		 * now.
 		 */
 		VERIFY(nvlist_alloc(&zct.zct_zplprops,
 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
 		error = zfs_fill_zplprops(fsname, nvprops,
 		    zct.zct_zplprops, &is_insensitive);
 		if (error != 0) {
 			nvlist_free(zct.zct_zplprops);
 			return (error);
 		}
 	}
 
 	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
 	    hidden_args, &dcp);
 	if (error != 0) {
 		nvlist_free(zct.zct_zplprops);
 		return (error);
 	}
 
 	error = dmu_objset_create(fsname, type,
 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
 
 	nvlist_free(zct.zct_zplprops);
 	dsl_crypto_params_free(dcp, !!error);
 
 	/*
 	 * It would be nice to do this atomically.
 	 */
 	if (error == 0) {
 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
 		    nvprops, outnvl);
 		if (error != 0) {
 			spa_t *spa;
 			int error2;
 
 			/*
 			 * Volumes will return EBUSY and cannot be destroyed
 			 * until all asynchronous minor handling (e.g. from
 			 * setting the volmode property) has completed. Wait for
 			 * the spa_zvol_taskq to drain then retry.
 			 */
 			error2 = dsl_destroy_head(fsname);
 			while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
 				error2 = spa_open(fsname, &spa, FTAG);
 				if (error2 == 0) {
 					taskq_wait(spa->spa_zvol_taskq);
 					spa_close(spa, FTAG);
 				}
 				error2 = dsl_destroy_head(fsname);
 			}
 		}
 	}
 	return (error);
 }
 
 /*
  * innvl: {
  *     "origin" -> name of origin snapshot
  *     (optional) "props" -> { prop -> value }
  *     (optional) "hidden_args" -> { "wkeydata" -> value }
  *         raw uint8_t array of encryption wrapping key data (32 bytes)
  * }
  *
  * outputs:
  * outnvl: propname -> error code (int32)
  */
 static const zfs_ioc_key_t zfs_keys_clone[] = {
 	{"origin",	DATA_TYPE_STRING,	0},
 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error = 0;
 	nvlist_t *nvprops = NULL;
 	const char *origin_name;
 
 	origin_name = fnvlist_lookup_string(innvl, "origin");
 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
 
 	if (strchr(fsname, '@') ||
 	    strchr(fsname, '%'))
 		return (SET_ERROR(EINVAL));
 
 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 
 	error = dmu_objset_clone(fsname, origin_name);
 
 	/*
 	 * It would be nice to do this atomically.
 	 */
 	if (error == 0) {
 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
 		    nvprops, outnvl);
 		if (error != 0)
 			(void) dsl_destroy_head(fsname);
 	}
 	return (error);
 }
 
 static const zfs_ioc_key_t zfs_keys_remap[] = {
 	/* no nvl keys */
 };
 
 static int
 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	/* This IOCTL is no longer supported. */
 	(void) fsname, (void) innvl, (void) outnvl;
 	return (0);
 }
 
 /*
  * innvl: {
  *     "snaps" -> { snapshot1, snapshot2 }
  *     (optional) "props" -> { prop -> value (string) }
  * }
  *
  * outnvl: snapshot -> error code (int32)
  */
 static const zfs_ioc_key_t zfs_keys_snapshot[] = {
 	{"snaps",	DATA_TYPE_NVLIST,	0},
 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	nvlist_t *snaps;
 	nvlist_t *props = NULL;
 	int error, poollen;
 	nvpair_t *pair;
 
 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
 	if (!nvlist_empty(props) &&
 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
 		return (SET_ERROR(ENOTSUP));
 	if ((error = zfs_check_userprops(props)) != 0)
 		return (error);
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 	poollen = strlen(poolname);
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		const char *name = nvpair_name(pair);
 		char *cp = strchr(name, '@');
 
 		/*
 		 * The snap name must contain an @, and the part after it must
 		 * contain only valid characters.
 		 */
 		if (cp == NULL ||
 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 			return (SET_ERROR(EINVAL));
 
 		/*
 		 * The snap must be in the specified pool.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '@'))
 			return (SET_ERROR(EXDEV));
 
 		/*
 		 * Check for permission to set the properties on the fs.
 		 */
 		if (!nvlist_empty(props)) {
 			*cp = '\0';
 			error = zfs_secpolicy_write_perms(name,
 			    ZFS_DELEG_PERM_USERPROP, CRED());
 			*cp = '@';
 			if (error != 0)
 				return (error);
 		}
 
 		/* This must be the only snap of this fs. */
 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
 			    == 0) {
 				return (SET_ERROR(EXDEV));
 			}
 		}
 	}
 
 	error = dsl_dataset_snapshot(snaps, props, outnvl);
 
 	return (error);
 }
 
 /*
  * innvl: "message" -> string
  */
 static const zfs_ioc_key_t zfs_keys_log_history[] = {
 	{"message",	DATA_TYPE_STRING,	0},
 };
 
 static int
 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) unused, (void) outnvl;
 	const char *message;
 	char *poolname;
 	spa_t *spa;
 	int error;
 
 	/*
 	 * The poolname in the ioctl is not set, we get it from the TSD,
 	 * which was set at the end of the last successful ioctl that allows
 	 * logging.  The secpolicy func already checked that it is set.
 	 * Only one log ioctl is allowed after each successful ioctl, so
 	 * we clear the TSD here.
 	 */
 	poolname = tsd_get(zfs_allow_log_key);
 	if (poolname == NULL)
 		return (SET_ERROR(EINVAL));
 	(void) tsd_set(zfs_allow_log_key, NULL);
 	error = spa_open(poolname, &spa, FTAG);
 	kmem_strfree(poolname);
 	if (error != 0)
 		return (error);
 
 	message = fnvlist_lookup_string(innvl, "message");
 
 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
 		spa_close(spa, FTAG);
 		return (SET_ERROR(ENOTSUP));
 	}
 
 	error = spa_history_log(spa, message);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * This ioctl is used to set the bootenv configuration on the current
  * pool. This configuration is stored in the second padding area of the label,
  * and it is used by the bootloader(s) to store the bootloader and/or system
  * specific data.
  * The data is stored as nvlist data stream, and is protected by
  * an embedded checksum.
  * The version can have two possible values:
  * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
  * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
  */
 static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
 	{"version",	DATA_TYPE_UINT64,	0},
 	{"<keys>",	DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
 };
 
 static int
 zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error;
 	spa_t *spa;
 
 	if ((error = spa_open(name, &spa, FTAG)) != 0)
 		return (error);
 	spa_vdev_state_enter(spa, SCL_ALL);
 	error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
 	/* no nvl keys */
 };
 
 static int
 zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	spa_t *spa;
 	int error;
 
 	if ((error = spa_open(name, &spa, FTAG)) != 0)
 		return (error);
 	spa_vdev_state_enter(spa, SCL_ALL);
 	error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (error);
 }
 
 /*
  * The dp_config_rwlock must not be held when calling this, because the
  * unmount may need to write out data.
  *
  * This function is best-effort.  Callers must deal gracefully if it
  * remains mounted (or is remounted after this call).
  *
  * Returns 0 if the argument is not a snapshot, or it is not currently a
  * filesystem, or we were able to unmount it.  Returns error code otherwise.
  */
 void
 zfs_unmount_snap(const char *snapname)
 {
 	if (strchr(snapname, '@') == NULL)
 		return;
 
 	(void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
 }
 
 static int
 zfs_unmount_snap_cb(const char *snapname, void *arg)
 {
 	(void) arg;
 	zfs_unmount_snap(snapname);
 	return (0);
 }
 
 /*
  * When a clone is destroyed, its origin may also need to be destroyed,
  * in which case it must be unmounted.  This routine will do that unmount
  * if necessary.
  */
 void
 zfs_destroy_unmount_origin(const char *fsname)
 {
 	int error;
 	objset_t *os;
 	dsl_dataset_t *ds;
 
 	error = dmu_objset_hold(fsname, FTAG, &os);
 	if (error != 0)
 		return;
 	ds = dmu_objset_ds(os);
 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
 		char originname[ZFS_MAX_DATASET_NAME_LEN];
 		dsl_dataset_name(ds->ds_prev, originname);
 		dmu_objset_rele(os, FTAG);
 		zfs_unmount_snap(originname);
 	} else {
 		dmu_objset_rele(os, FTAG);
 	}
 }
 
 /*
  * innvl: {
  *     "snaps" -> { snapshot1, snapshot2 }
  *     (optional boolean) "defer"
  * }
  *
  * outnvl: snapshot -> error code (int32)
  */
 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
 	{"snaps",	DATA_TYPE_NVLIST,	0},
 	{"defer",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int poollen;
 	nvlist_t *snaps;
 	nvpair_t *pair;
 	boolean_t defer;
 	spa_t *spa;
 
 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
 	defer = nvlist_exists(innvl, "defer");
 
 	poollen = strlen(poolname);
 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(snaps, pair)) {
 		const char *name = nvpair_name(pair);
 
 		/*
 		 * The snap must be in the specified pool to prevent the
 		 * invalid removal of zvol minors below.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '@'))
 			return (SET_ERROR(EXDEV));
 
 		zfs_unmount_snap(nvpair_name(pair));
 		if (spa_open(name, &spa, FTAG) == 0) {
 			zvol_remove_minors(spa, name, B_TRUE);
 			spa_close(spa, FTAG);
 		}
 	}
 
 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
 }
 
 /*
  * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
  * All bookmarks and snapshots must be in the same pool.
  * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
  *
  * innvl: {
  *     new_bookmark1 -> existing_snapshot,
  *     new_bookmark2 -> existing_bookmark,
  * }
  *
  * outnvl: bookmark -> error code (int32)
  *
  */
 static const zfs_ioc_key_t zfs_keys_bookmark[] = {
 	{"<bookmark>...",	DATA_TYPE_STRING,	ZK_WILDCARDLIST},
 };
 
 static int
 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) poolname;
 	return (dsl_bookmark_create(innvl, outnvl));
 }
 
 /*
  * innvl: {
  *     property 1, property 2, ...
  * }
  *
  * outnvl: {
  *     bookmark name 1 -> { property 1, property 2, ... },
  *     bookmark name 2 -> { property 1, property 2, ... }
  * }
  *
  */
 static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
 	{"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
 }
 
 /*
  * innvl is not used.
  *
  * outnvl: {
  *     property 1, property 2, ...
  * }
  *
  */
 static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
 	/* no nvl keys */
 };
 
 static int
 zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	(void) innvl;
 	char fsname[ZFS_MAX_DATASET_NAME_LEN];
 	char *bmname;
 
 	bmname = strchr(bookmark, '#');
 	if (bmname == NULL)
 		return (SET_ERROR(EINVAL));
 	bmname++;
 
 	(void) strlcpy(fsname, bookmark, sizeof (fsname));
 	*(strchr(fsname, '#')) = '\0';
 
 	return (dsl_get_bookmark_props(fsname, bmname, outnvl));
 }
 
 /*
  * innvl: {
  *     bookmark name 1, bookmark name 2
  * }
  *
  * outnvl: bookmark -> error code (int32)
  *
  */
 static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
 	{"<bookmark>...",	DATA_TYPE_BOOLEAN,	ZK_WILDCARDLIST},
 };
 
 static int
 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	int error, poollen;
 
 	poollen = strlen(poolname);
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		const char *name = nvpair_name(pair);
 		const char *cp = strchr(name, '#');
 
 		/*
 		 * The bookmark name must contain an #, and the part after it
 		 * must contain only valid characters.
 		 */
 		if (cp == NULL ||
 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 			return (SET_ERROR(EINVAL));
 
 		/*
 		 * The bookmark must be in the specified pool.
 		 */
 		if (strncmp(name, poolname, poollen) != 0 ||
 		    (name[poollen] != '/' && name[poollen] != '#'))
 			return (SET_ERROR(EXDEV));
 	}
 
 	error = dsl_bookmark_destroy(innvl, outnvl);
 	return (error);
 }
 
 static const zfs_ioc_key_t zfs_keys_channel_program[] = {
 	{"program",	DATA_TYPE_STRING,		0},
 	{"arg",		DATA_TYPE_ANY,			0},
 	{"sync",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
 	{"instrlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
 	{"memlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	char *program;
 	uint64_t instrlimit, memlimit;
 	boolean_t sync_flag;
 	nvpair_t *nvarg = NULL;
 
 	program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
 	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
 		sync_flag = B_TRUE;
 	}
 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
 		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
 	}
 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
 		memlimit = ZCP_DEFAULT_MEMLIMIT;
 	}
 	nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
 
 	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
 		return (SET_ERROR(EINVAL));
 	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
 		return (SET_ERROR(EINVAL));
 
 	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
 	    nvarg, outnvl));
 }
 
 /*
  * innvl: unused
  * outnvl: empty
  */
 static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
 	/* no nvl keys */
 };
 
 static int
 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) innvl, (void) outnvl;
 	return (spa_checkpoint(poolname));
 }
 
 /*
  * innvl: unused
  * outnvl: empty
  */
 static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
 	/* no nvl keys */
 };
 
 static int
 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
     nvlist_t *outnvl)
 {
 	(void) innvl, (void) outnvl;
 	return (spa_checkpoint_discard(poolname));
 }
 
 /*
  * inputs:
  * zc_name		name of dataset to destroy
  * zc_defer_destroy	mark for deferred destroy
  *
  * outputs:		none
  */
 static int
 zfs_ioc_destroy(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	dmu_objset_type_t ost;
 	int err;
 
 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (err != 0)
 		return (err);
 	ost = dmu_objset_type(os);
 	dmu_objset_rele(os, FTAG);
 
 	if (ost == DMU_OST_ZFS)
 		zfs_unmount_snap(zc->zc_name);
 
 	if (strchr(zc->zc_name, '@')) {
 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
 	} else {
 		err = dsl_destroy_head(zc->zc_name);
 		if (err == EEXIST) {
 			/*
 			 * It is possible that the given DS may have
 			 * hidden child (%recv) datasets - "leftovers"
 			 * resulting from the previously interrupted
 			 * 'zfs receive'.
 			 *
 			 * 6 extra bytes for /%recv
 			 */
 			char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
 
 			if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
 			    zc->zc_name, recv_clone_name) >=
 			    sizeof (namebuf))
 				return (SET_ERROR(EINVAL));
 
 			/*
 			 * Try to remove the hidden child (%recv) and after
 			 * that try to remove the target dataset.
 			 * If the hidden child (%recv) does not exist
 			 * the original error (EEXIST) will be returned
 			 */
 			err = dsl_destroy_head(namebuf);
 			if (err == 0)
 				err = dsl_destroy_head(zc->zc_name);
 			else if (err == ENOENT)
 				err = SET_ERROR(EEXIST);
 		}
 	}
 
 	return (err);
 }
 
 /*
  * innvl: {
  *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
  *     "initialize_vdevs": { -> guids to initialize (nvlist)
  *         "vdev_path_1": vdev_guid_1, (uint64),
  *         "vdev_path_2": vdev_guid_2, (uint64),
  *         ...
  *     },
  * }
  *
  * outnvl: {
  *     "initialize_vdevs": { -> initialization errors (nvlist)
  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
  *         "vdev_path_2": errno, ... (uint64)
  *         ...
  *     }
  * }
  *
  * EINVAL is returned for an unknown commands or if any of the provided vdev
  * guids have be specified with a type other than uint64.
  */
 static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
 	{ZPOOL_INITIALIZE_COMMAND,	DATA_TYPE_UINT64,	0},
 	{ZPOOL_INITIALIZE_VDEVS,	DATA_TYPE_NVLIST,	0}
 };
 
 static int
 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	uint64_t cmd_type;
 	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
 	    &cmd_type) != 0) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
 	    cmd_type == POOL_INITIALIZE_START ||
 	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	nvlist_t *vdev_guids;
 	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
 	    &vdev_guids) != 0) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
 		uint64_t vdev_guid;
 		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
 			return (SET_ERROR(EINVAL));
 		}
 	}
 
 	spa_t *spa;
 	int error = spa_open(poolname, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	nvlist_t *vdev_errlist = fnvlist_alloc();
 	int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
 	    vdev_errlist);
 
 	if (fnvlist_size(vdev_errlist) > 0) {
 		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
 		    vdev_errlist);
 	}
 	fnvlist_free(vdev_errlist);
 
 	spa_close(spa, FTAG);
 	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
 }
 
 /*
  * innvl: {
  *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
  *     "trim_vdevs": { -> guids to TRIM (nvlist)
  *         "vdev_path_1": vdev_guid_1, (uint64),
  *         "vdev_path_2": vdev_guid_2, (uint64),
  *         ...
  *     },
  *     "trim_rate" -> Target TRIM rate in bytes/sec.
  *     "trim_secure" -> Set to request a secure TRIM.
  * }
  *
  * outnvl: {
  *     "trim_vdevs": { -> TRIM errors (nvlist)
  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
  *         "vdev_path_2": errno, ... (uint64)
  *         ...
  *     }
  * }
  *
  * EINVAL is returned for an unknown commands or if any of the provided vdev
  * guids have be specified with a type other than uint64.
  */
 static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
 	{ZPOOL_TRIM_COMMAND,	DATA_TYPE_UINT64,		0},
 	{ZPOOL_TRIM_VDEVS,	DATA_TYPE_NVLIST,		0},
 	{ZPOOL_TRIM_RATE,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
 	{ZPOOL_TRIM_SECURE,	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	uint64_t cmd_type;
 	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
 		return (SET_ERROR(EINVAL));
 
 	if (!(cmd_type == POOL_TRIM_CANCEL ||
 	    cmd_type == POOL_TRIM_START ||
 	    cmd_type == POOL_TRIM_SUSPEND)) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	nvlist_t *vdev_guids;
 	if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
 		return (SET_ERROR(EINVAL));
 
 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
 		uint64_t vdev_guid;
 		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
 			return (SET_ERROR(EINVAL));
 		}
 	}
 
 	/* Optional, defaults to maximum rate when not provided */
 	uint64_t rate;
 	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
 		rate = 0;
 
 	/* Optional, defaults to standard TRIM when not provided */
 	boolean_t secure;
 	if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
 	    &secure) != 0) {
 		secure = B_FALSE;
 	}
 
 	spa_t *spa;
 	int error = spa_open(poolname, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	nvlist_t *vdev_errlist = fnvlist_alloc();
 	int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
 	    rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
 
 	if (fnvlist_size(vdev_errlist) > 0)
 		fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
 
 	fnvlist_free(vdev_errlist);
 
 	spa_close(spa, FTAG);
 	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
 }
 
 /*
  * This ioctl waits for activity of a particular type to complete. If there is
  * no activity of that type in progress, it returns immediately, and the
  * returned value "waited" is false. If there is activity in progress, and no
  * tag is passed in, the ioctl blocks until all activity of that type is
  * complete, and then returns with "waited" set to true.
  *
  * If a tag is provided, it identifies a particular instance of an activity to
  * wait for. Currently, this is only valid for use with 'initialize', because
  * that is the only activity for which there can be multiple instances running
  * concurrently. In the case of 'initialize', the tag corresponds to the guid of
  * the vdev on which to wait.
  *
  * If a thread waiting in the ioctl receives a signal, the call will return
  * immediately, and the return value will be EINTR.
  *
  * innvl: {
  *     "wait_activity" -> int32_t
  *     (optional) "wait_tag" -> uint64_t
  * }
  *
  * outnvl: "waited" -> boolean_t
  */
 static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
 	{ZPOOL_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
 	{ZPOOL_WAIT_TAG,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int32_t activity;
 	uint64_t tag;
 	boolean_t waited;
 	int error;
 
 	if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
 		return (EINVAL);
 
 	if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
 		error = spa_wait_tag(name, activity, tag, &waited);
 	else
 		error = spa_wait(name, activity, &waited);
 
 	if (error == 0)
 		fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
 
 	return (error);
 }
 
 /*
  * This ioctl waits for activity of a particular type to complete. If there is
  * no activity of that type in progress, it returns immediately, and the
  * returned value "waited" is false. If there is activity in progress, and no
  * tag is passed in, the ioctl blocks until all activity of that type is
  * complete, and then returns with "waited" set to true.
  *
  * If a thread waiting in the ioctl receives a signal, the call will return
  * immediately, and the return value will be EINTR.
  *
  * innvl: {
  *     "wait_activity" -> int32_t
  * }
  *
  * outnvl: "waited" -> boolean_t
  */
 static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
 	{ZFS_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
 };
 
 static int
 zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int32_t activity;
 	boolean_t waited = B_FALSE;
 	int error;
 	dsl_pool_t *dp;
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 
 	if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
 		return (SET_ERROR(EINVAL));
 
 	if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
 		return (SET_ERROR(EINVAL));
 
 	if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
 		return (error);
 
 	if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	dd = ds->ds_dir;
 	mutex_enter(&dd->dd_activity_lock);
 	dd->dd_activity_waiters++;
 
 	/*
 	 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
 	 * aren't evicted while we're waiting. Normally this is prevented by
 	 * holding the pool, but we can't do that while we're waiting since
 	 * that would prevent TXGs from syncing out. Some of the functionality
 	 * of long-holds (e.g. preventing deletion) is unnecessary for this
 	 * case, since we would cancel the waiters before proceeding with a
 	 * deletion. An alternative mechanism for keeping the dataset around
 	 * could be developed but this is simpler.
 	 */
 	dsl_dataset_long_hold(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 
 	error = dsl_dir_wait(dd, ds, activity, &waited);
 
 	dsl_dataset_long_rele(ds, FTAG);
 	dd->dd_activity_waiters--;
 	if (dd->dd_activity_waiters == 0)
 		cv_signal(&dd->dd_activity_cv);
 	mutex_exit(&dd->dd_activity_lock);
 
 	dsl_dataset_rele(ds, FTAG);
 
 	if (error == 0)
 		fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
 
 	return (error);
 }
 
 /*
  * fsname is name of dataset to rollback (to most recent snapshot)
  *
  * innvl may contain name of expected target snapshot
  *
  * outnvl: "target" -> name of most recent snapshot
  * }
  */
 static const zfs_ioc_key_t zfs_keys_rollback[] = {
 	{"target",	DATA_TYPE_STRING,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	zfsvfs_t *zfsvfs;
 	zvol_state_handle_t *zv;
 	char *target = NULL;
 	int error;
 
 	(void) nvlist_lookup_string(innvl, "target", &target);
 	if (target != NULL) {
 		const char *cp = strchr(target, '@');
 
 		/*
 		 * The snap name must contain an @, and the part after it must
 		 * contain only valid characters.
 		 */
 		if (cp == NULL ||
 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
 			return (SET_ERROR(EINVAL));
 	}
 
 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
 		dsl_dataset_t *ds;
 
 		ds = dmu_objset_ds(zfsvfs->z_os);
 		error = zfs_suspend_fs(zfsvfs);
 		if (error == 0) {
 			int resume_err;
 
 			error = dsl_dataset_rollback(fsname, target, zfsvfs,
 			    outnvl);
 			resume_err = zfs_resume_fs(zfsvfs, ds);
 			error = error ? error : resume_err;
 		}
 		zfs_vfs_rele(zfsvfs);
 	} else if ((zv = zvol_suspend(fsname)) != NULL) {
 		error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
 		    outnvl);
 		zvol_resume(zv);
 	} else {
 		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
 	}
 	return (error);
 }
 
 static int
 recursive_unmount(const char *fsname, void *arg)
 {
 	const char *snapname = arg;
 	char *fullname;
 
 	fullname = kmem_asprintf("%s@%s", fsname, snapname);
 	zfs_unmount_snap(fullname);
 	kmem_strfree(fullname);
 
 	return (0);
 }
 
 /*
  *
  * snapname is the snapshot to redact.
  * innvl: {
  *     "bookname" -> (string)
  *         shortname of the redaction bookmark to generate
  *     "snapnv" -> (nvlist, values ignored)
  *         snapshots to redact snapname with respect to
  * }
  *
  * outnvl is unused
  */
 
 static const zfs_ioc_key_t zfs_keys_redact[] = {
 	{"bookname",		DATA_TYPE_STRING,	0},
 	{"snapnv",		DATA_TYPE_NVLIST,	0},
 };
 
 static int
 zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) outnvl;
 	nvlist_t *redactnvl = NULL;
 	char *redactbook = NULL;
 
 	if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
 		return (SET_ERROR(EINVAL));
 	if (fnvlist_num_pairs(redactnvl) == 0)
 		return (SET_ERROR(ENXIO));
 	if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
 		return (SET_ERROR(EINVAL));
 
 	return (dmu_redact_snap(snapname, redactnvl, redactbook));
 }
 
 /*
  * inputs:
  * zc_name	old name of dataset
  * zc_value	new name of dataset
  * zc_cookie	recursive flag (only valid for snapshots)
  *
  * outputs:	none
  */
 static int
 zfs_ioc_rename(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	dmu_objset_type_t ost;
 	boolean_t recursive = zc->zc_cookie & 1;
 	boolean_t nounmount = !!(zc->zc_cookie & 2);
 	char *at;
 	int err;
 
 	/* "zfs rename" from and to ...%recv datasets should both fail */
 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
 	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
 		return (SET_ERROR(EINVAL));
 
 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (err != 0)
 		return (err);
 	ost = dmu_objset_type(os);
 	dmu_objset_rele(os, FTAG);
 
 	at = strchr(zc->zc_name, '@');
 	if (at != NULL) {
 		/* snaps must be in same fs */
 		int error;
 
 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
 			return (SET_ERROR(EXDEV));
 		*at = '\0';
 		if (ost == DMU_OST_ZFS && !nounmount) {
 			error = dmu_objset_find(zc->zc_name,
 			    recursive_unmount, at + 1,
 			    recursive ? DS_FIND_CHILDREN : 0);
 			if (error != 0) {
 				*at = '@';
 				return (error);
 			}
 		}
 		error = dsl_dataset_rename_snapshot(zc->zc_name,
 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
 		*at = '@';
 
 		return (error);
 	} else {
 		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
 	}
 }
 
 static int
 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 {
 	const char *propname = nvpair_name(pair);
 	boolean_t issnap = (strchr(dsname, '@') != NULL);
 	zfs_prop_t prop = zfs_name_to_prop(propname);
 	uint64_t intval, compval;
 	int err;
 
 	if (prop == ZPROP_USERPROP) {
 		if (zfs_prop_user(propname)) {
 			if ((err = zfs_secpolicy_write_perms(dsname,
 			    ZFS_DELEG_PERM_USERPROP, cr)))
 				return (err);
 			return (0);
 		}
 
 		if (!issnap && zfs_prop_userquota(propname)) {
 			const char *perm = NULL;
 			const char *uq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
 			const char *gq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
 			const char *uiq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
 			const char *giq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
 			const char *pq_prefix =
 			    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
 			const char *piq_prefix = zfs_userquota_prop_prefixes[\
 			    ZFS_PROP_PROJECTOBJQUOTA];
 
 			if (strncmp(propname, uq_prefix,
 			    strlen(uq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_USERQUOTA;
 			} else if (strncmp(propname, uiq_prefix,
 			    strlen(uiq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_USEROBJQUOTA;
 			} else if (strncmp(propname, gq_prefix,
 			    strlen(gq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
 			} else if (strncmp(propname, giq_prefix,
 			    strlen(giq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
 			} else if (strncmp(propname, pq_prefix,
 			    strlen(pq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_PROJECTQUOTA;
 			} else if (strncmp(propname, piq_prefix,
 			    strlen(piq_prefix)) == 0) {
 				perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
 			} else {
 				/* {USER|GROUP|PROJECT}USED are read-only */
 				return (SET_ERROR(EINVAL));
 			}
 
 			if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
 				return (err);
 			return (0);
 		}
 
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (issnap)
 		return (SET_ERROR(EINVAL));
 
 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
 		/*
 		 * dsl_prop_get_all_impl() returns properties in this
 		 * format.
 		 */
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &pair) == 0);
 	}
 
 	/*
 	 * Check that this value is valid for this pool version
 	 */
 	switch (prop) {
 	case ZFS_PROP_COMPRESSION:
 		/*
 		 * If the user specified gzip compression, make sure
 		 * the SPA supports it. We ignore any errors here since
 		 * we'll catch them later.
 		 */
 		if (nvpair_value_uint64(pair, &intval) == 0) {
 			compval = ZIO_COMPRESS_ALGO(intval);
 			if (compval >= ZIO_COMPRESS_GZIP_1 &&
 			    compval <= ZIO_COMPRESS_GZIP_9 &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_GZIP_COMPRESSION)) {
 				return (SET_ERROR(ENOTSUP));
 			}
 
 			if (compval == ZIO_COMPRESS_ZLE &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_ZLE_COMPRESSION))
 				return (SET_ERROR(ENOTSUP));
 
 			if (compval == ZIO_COMPRESS_LZ4) {
 				spa_t *spa;
 
 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 					return (err);
 
 				if (!spa_feature_is_enabled(spa,
 				    SPA_FEATURE_LZ4_COMPRESS)) {
 					spa_close(spa, FTAG);
 					return (SET_ERROR(ENOTSUP));
 				}
 				spa_close(spa, FTAG);
 			}
 
 			if (compval == ZIO_COMPRESS_ZSTD) {
 				spa_t *spa;
 
 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 					return (err);
 
 				if (!spa_feature_is_enabled(spa,
 				    SPA_FEATURE_ZSTD_COMPRESS)) {
 					spa_close(spa, FTAG);
 					return (SET_ERROR(ENOTSUP));
 				}
 				spa_close(spa, FTAG);
 			}
 		}
 		break;
 
 	case ZFS_PROP_COPIES:
 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
 			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_VOLBLOCKSIZE:
 	case ZFS_PROP_RECORDSIZE:
 		/* Record sizes above 128k need the feature to be enabled */
 		if (nvpair_value_uint64(pair, &intval) == 0 &&
 		    intval > SPA_OLD_MAXBLOCKSIZE) {
 			spa_t *spa;
 
 			/*
 			 * We don't allow setting the property above 1MB,
 			 * unless the tunable has been changed.
 			 */
 			if (intval > zfs_max_recordsize ||
 			    intval > SPA_MAXBLOCKSIZE)
 				return (SET_ERROR(ERANGE));
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
 			if (!spa_feature_is_enabled(spa,
 			    SPA_FEATURE_LARGE_BLOCKS)) {
 				spa_close(spa, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 			spa_close(spa, FTAG);
 		}
 		break;
 
 	case ZFS_PROP_DNODESIZE:
 		/* Dnode sizes above 512 need the feature to be enabled */
 		if (nvpair_value_uint64(pair, &intval) == 0 &&
 		    intval != ZFS_DNSIZE_LEGACY) {
 			spa_t *spa;
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
 			if (!spa_feature_is_enabled(spa,
 			    SPA_FEATURE_LARGE_DNODE)) {
 				spa_close(spa, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 			spa_close(spa, FTAG);
 		}
 		break;
 
 	case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
 		/*
 		 * This property could require the allocation classes
 		 * feature to be active for setting, however we allow
 		 * it so that tests of settable properties succeed.
 		 * The CLI will issue a warning in this case.
 		 */
 		break;
 
 	case ZFS_PROP_SHARESMB:
 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
 			return (SET_ERROR(ENOTSUP));
 		break;
 
 	case ZFS_PROP_ACLINHERIT:
 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 		    nvpair_value_uint64(pair, &intval) == 0) {
 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
 			    zfs_earlier_version(dsname,
 			    SPA_VERSION_PASSTHROUGH_X))
 				return (SET_ERROR(ENOTSUP));
 		}
 		break;
 	case ZFS_PROP_CHECKSUM:
 	case ZFS_PROP_DEDUP:
 	{
 		spa_feature_t feature;
 		spa_t *spa;
 		int err;
 
 		/* dedup feature version checks */
 		if (prop == ZFS_PROP_DEDUP &&
 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
 			return (SET_ERROR(ENOTSUP));
 
 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
 		    nvpair_value_uint64(pair, &intval) == 0) {
 			/* check prop value is enabled in features */
 			feature = zio_checksum_to_feature(
 			    intval & ZIO_CHECKSUM_MASK);
 			if (feature == SPA_FEATURE_NONE)
 				break;
 
 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
 				return (err);
 
 			if (!spa_feature_is_enabled(spa, feature)) {
 				spa_close(spa, FTAG);
 				return (SET_ERROR(ENOTSUP));
 			}
 			spa_close(spa, FTAG);
 		}
 		break;
 	}
 
 	default:
 		break;
 	}
 
 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
 }
 
 /*
  * Removes properties from the given props list that fail permission checks
  * needed to clear them and to restore them in case of a receive error. For each
  * property, make sure we have both set and inherit permissions.
  *
  * Returns the first error encountered if any permission checks fail. If the
  * caller provides a non-NULL errlist, it also gives the complete list of names
  * of all the properties that failed a permission check along with the
  * corresponding error numbers. The caller is responsible for freeing the
  * returned errlist.
  *
  * If every property checks out successfully, zero is returned and the list
  * pointed at by errlist is NULL.
  */
 static int
 zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
 {
 	zfs_cmd_t *zc;
 	nvpair_t *pair, *next_pair;
 	nvlist_t *errors;
 	int err, rv = 0;
 
 	if (props == NULL)
 		return (0);
 
 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
 	(void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
 	pair = nvlist_next_nvpair(props, NULL);
 	while (pair != NULL) {
 		next_pair = nvlist_next_nvpair(props, pair);
 
 		(void) strlcpy(zc->zc_value, nvpair_name(pair),
 		    sizeof (zc->zc_value));
 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
 			VERIFY(nvlist_add_int32(errors,
 			    zc->zc_value, err) == 0);
 		}
 		pair = next_pair;
 	}
 	kmem_free(zc, sizeof (zfs_cmd_t));
 
 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
 		nvlist_free(errors);
 		errors = NULL;
 	} else {
 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
 	}
 
 	if (errlist == NULL)
 		nvlist_free(errors);
 	else
 		*errlist = errors;
 
 	return (rv);
 }
 
 static boolean_t
 propval_equals(nvpair_t *p1, nvpair_t *p2)
 {
 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
 		/* dsl_prop_get_all_impl() format */
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &p1) == 0);
 	}
 
 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
 		nvlist_t *attrs;
 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
 		    &p2) == 0);
 	}
 
 	if (nvpair_type(p1) != nvpair_type(p2))
 		return (B_FALSE);
 
 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
 		char *valstr1, *valstr2;
 
 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
 		return (strcmp(valstr1, valstr2) == 0);
 	} else {
 		uint64_t intval1, intval2;
 
 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
 		return (intval1 == intval2);
 	}
 }
 
 /*
  * Remove properties from props if they are not going to change (as determined
  * by comparison with origprops). Remove them from origprops as well, since we
  * do not need to clear or restore properties that won't change.
  */
 static void
 props_reduce(nvlist_t *props, nvlist_t *origprops)
 {
 	nvpair_t *pair, *next_pair;
 
 	if (origprops == NULL)
 		return; /* all props need to be received */
 
 	pair = nvlist_next_nvpair(props, NULL);
 	while (pair != NULL) {
 		const char *propname = nvpair_name(pair);
 		nvpair_t *match;
 
 		next_pair = nvlist_next_nvpair(props, pair);
 
 		if ((nvlist_lookup_nvpair(origprops, propname,
 		    &match) != 0) || !propval_equals(pair, match))
 			goto next; /* need to set received value */
 
 		/* don't clear the existing received value */
 		(void) nvlist_remove_nvpair(origprops, match);
 		/* don't bother receiving the property */
 		(void) nvlist_remove_nvpair(props, pair);
 next:
 		pair = next_pair;
 	}
 }
 
 /*
  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
  * For example, refquota cannot be set until after the receipt of a dataset,
  * because in replication streams, an older/earlier snapshot may exceed the
  * refquota.  We want to receive the older/earlier snapshot, but setting
  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
  * the older/earlier snapshot from being received (with EDQUOT).
  *
  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
  *
  * libzfs will need to be judicious handling errors encountered by props
  * extracted by this function.
  */
 static nvlist_t *
 extract_delay_props(nvlist_t *props)
 {
 	nvlist_t *delayprops;
 	nvpair_t *nvp, *tmp;
 	static const zfs_prop_t delayable[] = {
 		ZFS_PROP_REFQUOTA,
 		ZFS_PROP_KEYLOCATION,
 		/*
 		 * Setting ZFS_PROP_SHARESMB requires the objset type to be
 		 * known, which is not possible prior to receipt of raw sends.
 		 */
 		ZFS_PROP_SHARESMB,
 		0
 	};
 	int i;
 
 	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
 
 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
 	    nvp = nvlist_next_nvpair(props, nvp)) {
 		/*
 		 * strcmp() is safe because zfs_prop_to_name() always returns
 		 * a bounded string.
 		 */
 		for (i = 0; delayable[i] != 0; i++) {
 			if (strcmp(zfs_prop_to_name(delayable[i]),
 			    nvpair_name(nvp)) == 0) {
 				break;
 			}
 		}
 		if (delayable[i] != 0) {
 			tmp = nvlist_prev_nvpair(props, nvp);
 			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
 			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
 			nvp = tmp;
 		}
 	}
 
 	if (nvlist_empty(delayprops)) {
 		nvlist_free(delayprops);
 		delayprops = NULL;
 	}
 	return (delayprops);
 }
 
 static void
 zfs_allow_log_destroy(void *arg)
 {
 	char *poolname = arg;
 
 	if (poolname != NULL)
 		kmem_strfree(poolname);
 }
 
 #ifdef	ZFS_DEBUG
 static boolean_t zfs_ioc_recv_inject_err;
 #endif
 
 /*
  * nvlist 'errors' is always allocated. It will contain descriptions of
  * encountered errors, if any. It's the callers responsibility to free.
  */
 static int
 zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
     nvlist_t *localprops, nvlist_t *hidden_args, boolean_t force,
     boolean_t heal, boolean_t resumable, int input_fd,
     dmu_replay_record_t *begin_record, uint64_t *read_bytes,
     uint64_t *errflags, nvlist_t **errors)
 {
 	dmu_recv_cookie_t drc;
 	int error = 0;
 	int props_error = 0;
 	offset_t off, noff;
 	nvlist_t *local_delayprops = NULL;
 	nvlist_t *recv_delayprops = NULL;
 	nvlist_t *inherited_delayprops = NULL;
 	nvlist_t *origprops = NULL; /* existing properties */
 	nvlist_t *origrecvd = NULL; /* existing received properties */
 	boolean_t first_recvd_props = B_FALSE;
 	boolean_t tofs_was_redacted;
 	zfs_file_t *input_fp;
 
 	*read_bytes = 0;
 	*errflags = 0;
 	*errors = fnvlist_alloc();
 	off = 0;
 
 	if ((input_fp = zfs_file_get(input_fd)) == NULL)
 		return (SET_ERROR(EBADF));
 
 	noff = off = zfs_file_off(input_fp);
 	error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal,
 	    resumable, localprops, hidden_args, origin, &drc, input_fp,
 	    &off);
 	if (error != 0)
 		goto out;
 	tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
 
 	/*
 	 * Set properties before we receive the stream so that they are applied
 	 * to the new data. Note that we must call dmu_recv_stream() if
 	 * dmu_recv_begin() succeeds.
 	 */
 	if (recvprops != NULL && !drc.drc_newfs) {
 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
 		    SPA_VERSION_RECVD_PROPS &&
 		    !dsl_prop_get_hasrecvd(tofs))
 			first_recvd_props = B_TRUE;
 
 		/*
 		 * If new received properties are supplied, they are to
 		 * completely replace the existing received properties,
 		 * so stash away the existing ones.
 		 */
 		if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
 			nvlist_t *errlist = NULL;
 			/*
 			 * Don't bother writing a property if its value won't
 			 * change (and avoid the unnecessary security checks).
 			 *
 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
 			 * special case where we blow away all local properties
 			 * regardless.
 			 */
 			if (!first_recvd_props)
 				props_reduce(recvprops, origrecvd);
 			if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
 				(void) nvlist_merge(*errors, errlist, 0);
 			nvlist_free(errlist);
 
 			if (clear_received_props(tofs, origrecvd,
 			    first_recvd_props ? NULL : recvprops) != 0)
 				*errflags |= ZPROP_ERR_NOCLEAR;
 		} else {
 			*errflags |= ZPROP_ERR_NOCLEAR;
 		}
 	}
 
 	/*
 	 * Stash away existing properties so we can restore them on error unless
 	 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
 	 * case "origrecvd" will take care of that.
 	 */
 	if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
 		objset_t *os;
 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
 			if (dsl_prop_get_all(os, &origprops) != 0) {
 				*errflags |= ZPROP_ERR_NOCLEAR;
 			}
 			dmu_objset_rele(os, FTAG);
 		} else {
 			*errflags |= ZPROP_ERR_NOCLEAR;
 		}
 	}
 
 	if (recvprops != NULL) {
 		props_error = dsl_prop_set_hasrecvd(tofs);
 
 		if (props_error == 0) {
 			recv_delayprops = extract_delay_props(recvprops);
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
 			    recvprops, *errors);
 		}
 	}
 
 	if (localprops != NULL) {
 		nvlist_t *oprops = fnvlist_alloc();
 		nvlist_t *xprops = fnvlist_alloc();
 		nvpair_t *nvp = NULL;
 
 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
 			if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
 				/* -x property */
 				const char *name = nvpair_name(nvp);
 				zfs_prop_t prop = zfs_name_to_prop(name);
 				if (prop != ZPROP_USERPROP) {
 					if (!zfs_prop_inheritable(prop))
 						continue;
 				} else if (!zfs_prop_user(name))
 					continue;
 				fnvlist_add_boolean(xprops, name);
 			} else {
 				/* -o property=value */
 				fnvlist_add_nvpair(oprops, nvp);
 			}
 		}
 
 		local_delayprops = extract_delay_props(oprops);
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 		    oprops, *errors);
 		inherited_delayprops = extract_delay_props(xprops);
 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
 		    xprops, *errors);
 
 		nvlist_free(oprops);
 		nvlist_free(xprops);
 	}
 
 	error = dmu_recv_stream(&drc, &off);
 
 	if (error == 0) {
 		zfsvfs_t *zfsvfs = NULL;
 		zvol_state_handle_t *zv = NULL;
 
 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
 			/* online recv */
 			dsl_dataset_t *ds;
 			int end_err;
 			boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
 			    begin_record->drr_u.drr_begin.
 			    drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
 
 			ds = dmu_objset_ds(zfsvfs->z_os);
 			error = zfs_suspend_fs(zfsvfs);
 			/*
 			 * If the suspend fails, then the recv_end will
 			 * likely also fail, and clean up after itself.
 			 */
 			end_err = dmu_recv_end(&drc, zfsvfs);
 			/*
 			 * If the dataset was not redacted, but we received a
 			 * redacted stream onto it, we need to unmount the
 			 * dataset.  Otherwise, resume the filesystem.
 			 */
 			if (error == 0 && !drc.drc_newfs &&
 			    stream_is_redacted && !tofs_was_redacted) {
 				error = zfs_end_fs(zfsvfs, ds);
 			} else if (error == 0) {
 				error = zfs_resume_fs(zfsvfs, ds);
 			}
 			error = error ? error : end_err;
 			zfs_vfs_rele(zfsvfs);
 		} else if ((zv = zvol_suspend(tofs)) != NULL) {
 			error = dmu_recv_end(&drc, zvol_tag(zv));
 			zvol_resume(zv);
 		} else {
 			error = dmu_recv_end(&drc, NULL);
 		}
 
 		/* Set delayed properties now, after we're done receiving. */
 		if (recv_delayprops != NULL && error == 0) {
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
 			    recv_delayprops, *errors);
 		}
 		if (local_delayprops != NULL && error == 0) {
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
 			    local_delayprops, *errors);
 		}
 		if (inherited_delayprops != NULL && error == 0) {
 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
 			    inherited_delayprops, *errors);
 		}
 	}
 
 	/*
 	 * Merge delayed props back in with initial props, in case
 	 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
 	 * we have to make sure clear_received_props() includes
 	 * the delayed properties).
 	 *
 	 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
 	 * using ASSERT() will be just like a VERIFY.
 	 */
 	if (recv_delayprops != NULL) {
 		ASSERT(nvlist_merge(recvprops, recv_delayprops, 0) == 0);
 		nvlist_free(recv_delayprops);
 	}
 	if (local_delayprops != NULL) {
 		ASSERT(nvlist_merge(localprops, local_delayprops, 0) == 0);
 		nvlist_free(local_delayprops);
 	}
 	if (inherited_delayprops != NULL) {
 		ASSERT(nvlist_merge(localprops, inherited_delayprops, 0) == 0);
 		nvlist_free(inherited_delayprops);
 	}
 	*read_bytes = off - noff;
 
 #ifdef	ZFS_DEBUG
 	if (zfs_ioc_recv_inject_err) {
 		zfs_ioc_recv_inject_err = B_FALSE;
 		error = 1;
 	}
 #endif
 
 	/*
 	 * On error, restore the original props.
 	 */
 	if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
 		if (clear_received_props(tofs, recvprops, NULL) != 0) {
 			/*
 			 * We failed to clear the received properties.
 			 * Since we may have left a $recvd value on the
 			 * system, we can't clear the $hasrecvd flag.
 			 */
 			*errflags |= ZPROP_ERR_NORESTORE;
 		} else if (first_recvd_props) {
 			dsl_prop_unset_hasrecvd(tofs);
 		}
 
 		if (origrecvd == NULL && !drc.drc_newfs) {
 			/* We failed to stash the original properties. */
 			*errflags |= ZPROP_ERR_NORESTORE;
 		}
 
 		/*
 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
 		 * explicitly if we're restoring local properties cleared in the
 		 * first new-style receive.
 		 */
 		if (origrecvd != NULL &&
 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
 		    origrecvd, NULL) != 0) {
 			/*
 			 * We stashed the original properties but failed to
 			 * restore them.
 			 */
 			*errflags |= ZPROP_ERR_NORESTORE;
 		}
 	}
 	if (error != 0 && localprops != NULL && !drc.drc_newfs &&
 	    !first_recvd_props) {
 		nvlist_t *setprops;
 		nvlist_t *inheritprops;
 		nvpair_t *nvp;
 
 		if (origprops == NULL) {
 			/* We failed to stash the original properties. */
 			*errflags |= ZPROP_ERR_NORESTORE;
 			goto out;
 		}
 
 		/* Restore original props */
 		setprops = fnvlist_alloc();
 		inheritprops = fnvlist_alloc();
 		nvp = NULL;
 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
 			const char *name = nvpair_name(nvp);
 			const char *source;
 			nvlist_t *attrs;
 
 			if (!nvlist_exists(origprops, name)) {
 				/*
 				 * Property was not present or was explicitly
 				 * inherited before the receive, restore this.
 				 */
 				fnvlist_add_boolean(inheritprops, name);
 				continue;
 			}
 			attrs = fnvlist_lookup_nvlist(origprops, name);
 			source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
 
 			/* Skip received properties */
 			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
 				continue;
 
 			if (strcmp(source, tofs) == 0) {
 				/* Property was locally set */
 				fnvlist_add_nvlist(setprops, name, attrs);
 			} else {
 				/* Property was implicitly inherited */
 				fnvlist_add_boolean(inheritprops, name);
 			}
 		}
 
 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
 		    NULL) != 0)
 			*errflags |= ZPROP_ERR_NORESTORE;
 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
 		    NULL) != 0)
 			*errflags |= ZPROP_ERR_NORESTORE;
 
 		nvlist_free(setprops);
 		nvlist_free(inheritprops);
 	}
 out:
 	zfs_file_put(input_fp);
 	nvlist_free(origrecvd);
 	nvlist_free(origprops);
 
 	if (error == 0)
 		error = props_error;
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of containing filesystem (unused)
  * zc_nvlist_src{_size}	nvlist of properties to apply
  * zc_nvlist_conf{_size}	nvlist of properties to exclude
  *			(DATA_TYPE_BOOLEAN) and override (everything else)
  * zc_value		name of snapshot to create
  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
  * zc_cookie		file descriptor to recv from
  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
  * zc_guid		force flag
  *
  * outputs:
  * zc_cookie		number of bytes read
  * zc_obj		zprop_errflags_t
  * zc_nvlist_dst{_size} error for each unapplied received property
  */
 static int
 zfs_ioc_recv(zfs_cmd_t *zc)
 {
 	dmu_replay_record_t begin_record;
 	nvlist_t *errors = NULL;
 	nvlist_t *recvdprops = NULL;
 	nvlist_t *localprops = NULL;
 	char *origin = NULL;
 	char *tosnap;
 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
 	int error = 0;
 
 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
 	    strchr(zc->zc_value, '@') == NULL ||
 	    strchr(zc->zc_value, '%'))
 		return (SET_ERROR(EINVAL));
 
 	(void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
 	tosnap = strchr(tofs, '@');
 	*tosnap++ = '\0';
 
 	if (zc->zc_nvlist_src != 0 &&
 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 	    zc->zc_iflags, &recvdprops)) != 0)
 		return (error);
 
 	if (zc->zc_nvlist_conf != 0 &&
 	    (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
 	    zc->zc_iflags, &localprops)) != 0)
 		return (error);
 
 	if (zc->zc_string[0])
 		origin = zc->zc_string;
 
 	begin_record.drr_type = DRR_BEGIN;
 	begin_record.drr_payloadlen = 0;
 	begin_record.drr_u.drr_begin = zc->zc_begin_record;
 
 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
 	    NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record,
 	    &zc->zc_cookie, &zc->zc_obj, &errors);
 	nvlist_free(recvdprops);
 	nvlist_free(localprops);
 
 	/*
 	 * Now that all props, initial and delayed, are set, report the prop
 	 * errors to the caller.
 	 */
 	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
 	    put_nvlist(zc, errors) != 0)) {
 		/*
 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
 		 * size or supplied an invalid address.
 		 */
 		error = SET_ERROR(EINVAL);
 	}
 
 	nvlist_free(errors);
 
 	return (error);
 }
 
 /*
  * innvl: {
  *     "snapname" -> full name of the snapshot to create
  *     (optional) "props" -> received properties to set (nvlist)
  *     (optional) "localprops" -> override and exclude properties (nvlist)
  *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
  *     "begin_record" -> non-byteswapped dmu_replay_record_t
  *     "input_fd" -> file descriptor to read stream from (int32)
  *     (optional) "force" -> force flag (value ignored)
  *     (optional) "heal" -> use send stream to heal data corruption
  *     (optional) "resumable" -> resumable flag (value ignored)
  *     (optional) "cleanup_fd" -> unused
  *     (optional) "action_handle" -> unused
  *     (optional) "hidden_args" -> { "wkeydata" -> value }
  * }
  *
  * outnvl: {
  *     "read_bytes" -> number of bytes read
  *     "error_flags" -> zprop_errflags_t
  *     "errors" -> error for each unapplied received property (nvlist)
  * }
  */
 static const zfs_ioc_key_t zfs_keys_recv_new[] = {
 	{"snapname",		DATA_TYPE_STRING,	0},
 	{"props",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"localprops",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"origin",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"begin_record",	DATA_TYPE_BYTE_ARRAY,	0},
 	{"input_fd",		DATA_TYPE_INT32,	0},
 	{"force",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"heal",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"resumable",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
 	{"action_handle",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"hidden_args",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	dmu_replay_record_t *begin_record;
 	uint_t begin_record_size;
 	nvlist_t *errors = NULL;
 	nvlist_t *recvprops = NULL;
 	nvlist_t *localprops = NULL;
 	nvlist_t *hidden_args = NULL;
 	char *snapname;
 	char *origin = NULL;
 	char *tosnap;
 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
 	boolean_t force;
 	boolean_t heal;
 	boolean_t resumable;
 	uint64_t read_bytes = 0;
 	uint64_t errflags = 0;
 	int input_fd = -1;
 	int error;
 
 	snapname = fnvlist_lookup_string(innvl, "snapname");
 
 	if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
 	    strchr(snapname, '@') == NULL ||
 	    strchr(snapname, '%'))
 		return (SET_ERROR(EINVAL));
 
 	(void) strlcpy(tofs, snapname, sizeof (tofs));
 	tosnap = strchr(tofs, '@');
 	*tosnap++ = '\0';
 
 	error = nvlist_lookup_string(innvl, "origin", &origin);
 	if (error && error != ENOENT)
 		return (error);
 
 	error = nvlist_lookup_byte_array(innvl, "begin_record",
 	    (uchar_t **)&begin_record, &begin_record_size);
 	if (error != 0 || begin_record_size != sizeof (*begin_record))
 		return (SET_ERROR(EINVAL));
 
 	input_fd = fnvlist_lookup_int32(innvl, "input_fd");
 
 	force = nvlist_exists(innvl, "force");
 	heal = nvlist_exists(innvl, "heal");
 	resumable = nvlist_exists(innvl, "resumable");
 
 	/* we still use "props" here for backwards compatibility */
 	error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
 	if (error && error != ENOENT)
 		return (error);
 
 	error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
 	if (error && error != ENOENT)
 		return (error);
 
 	error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 	if (error && error != ENOENT)
 		return (error);
 
 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
 	    hidden_args, force, heal, resumable, input_fd, begin_record,
 	    &read_bytes, &errflags, &errors);
 
 	fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
 	fnvlist_add_uint64(outnvl, "error_flags", errflags);
 	fnvlist_add_nvlist(outnvl, "errors", errors);
 
 	nvlist_free(errors);
 	nvlist_free(recvprops);
 	nvlist_free(localprops);
 
 	return (error);
 }
 
 typedef struct dump_bytes_io {
 	zfs_file_t	*dbi_fp;
 	caddr_t		dbi_buf;
 	int		dbi_len;
 	int		dbi_err;
 } dump_bytes_io_t;
 
 static void
 dump_bytes_cb(void *arg)
 {
 	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
 	zfs_file_t *fp;
 	caddr_t buf;
 
 	fp = dbi->dbi_fp;
 	buf = dbi->dbi_buf;
 
 	dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
 }
 
 static int
 dump_bytes(objset_t *os, void *buf, int len, void *arg)
 {
 	dump_bytes_io_t dbi;
 
 	dbi.dbi_fp = arg;
 	dbi.dbi_buf = buf;
 	dbi.dbi_len = len;
 
 #if defined(HAVE_LARGE_STACKS)
 	dump_bytes_cb(&dbi);
 #else
 	/*
 	 * The vn_rdwr() call is performed in a taskq to ensure that there is
 	 * always enough stack space to write safely to the target filesystem.
 	 * The ZIO_TYPE_FREE threads are used because there can be a lot of
 	 * them and they are used in vdev_file.c for a similar purpose.
 	 */
 	spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
 	    ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
 #endif /* HAVE_LARGE_STACKS */
 
 	return (dbi.dbi_err);
 }
 
 /*
  * inputs:
  * zc_name	name of snapshot to send
  * zc_cookie	file descriptor to send stream to
  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
  * zc_sendobj	objsetid of snapshot to send
  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
  *		output size in zc_objset_type.
  * zc_flags	lzc_send_flags
  *
  * outputs:
  * zc_objset_type	estimated size, if zc_guid is set
  *
  * NOTE: This is no longer the preferred interface, any new functionality
  *	  should be added to zfs_ioc_send_new() instead.
  */
 static int
 zfs_ioc_send(zfs_cmd_t *zc)
 {
 	int error;
 	offset_t off;
 	boolean_t estimate = (zc->zc_guid != 0);
 	boolean_t embedok = (zc->zc_flags & 0x1);
 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
 	boolean_t compressok = (zc->zc_flags & 0x4);
 	boolean_t rawok = (zc->zc_flags & 0x8);
 	boolean_t savedok = (zc->zc_flags & 0x10);
 
 	if (zc->zc_obj != 0) {
 		dsl_pool_t *dp;
 		dsl_dataset_t *tosnap;
 
 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 		if (error != 0)
 			return (error);
 
 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
 		if (error != 0) {
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		if (dsl_dir_is_clone(tosnap->ds_dir))
 			zc->zc_fromobj =
 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	}
 
 	if (estimate) {
 		dsl_pool_t *dp;
 		dsl_dataset_t *tosnap;
 		dsl_dataset_t *fromsnap = NULL;
 
 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 		if (error != 0)
 			return (error);
 
 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
 		    FTAG, &tosnap);
 		if (error != 0) {
 			dsl_pool_rele(dp, FTAG);
 			return (error);
 		}
 
 		if (zc->zc_fromobj != 0) {
 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
 			    FTAG, &fromsnap);
 			if (error != 0) {
 				dsl_dataset_rele(tosnap, FTAG);
 				dsl_pool_rele(dp, FTAG);
 				return (error);
 			}
 		}
 
 		error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
 		    compressok || rawok, savedok, &zc->zc_objset_type);
 
 		if (fromsnap != NULL)
 			dsl_dataset_rele(fromsnap, FTAG);
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	} else {
 		zfs_file_t *fp;
 		dmu_send_outparams_t out = {0};
 
 		if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
 			return (SET_ERROR(EBADF));
 
 		off = zfs_file_off(fp);
 		out.dso_outfunc = dump_bytes;
 		out.dso_arg = fp;
 		out.dso_dryrun = B_FALSE;
 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
 		    zc->zc_fromobj, embedok, large_block_ok, compressok,
 		    rawok, savedok, zc->zc_cookie, &off, &out);
 
 		zfs_file_put(fp);
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of snapshot on which to report progress
  * zc_cookie		file descriptor of send stream
  *
  * outputs:
  * zc_cookie		number of bytes written in send stream thus far
  * zc_objset_type	logical size of data traversed by send thus far
  */
 static int
 zfs_ioc_send_progress(zfs_cmd_t *zc)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	dmu_sendstatus_t *dsp = NULL;
 	int error;
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	mutex_enter(&ds->ds_sendstream_lock);
 
 	/*
 	 * Iterate over all the send streams currently active on this dataset.
 	 * If there's one which matches the specified file descriptor _and_ the
 	 * stream was started by the current process, return the progress of
 	 * that stream.
 	 */
 
 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
 		if (dsp->dss_outfd == zc->zc_cookie &&
 		    zfs_proc_is_caller(dsp->dss_proc))
 			break;
 	}
 
 	if (dsp != NULL) {
 		zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
 		    0, 0);
 		/* This is the closest thing we have to atomic_read_64. */
 		zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
 	} else {
 		error = SET_ERROR(ENOENT);
 	}
 
 	mutex_exit(&ds->ds_sendstream_lock);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 static int
 zfs_ioc_inject_fault(zfs_cmd_t *zc)
 {
 	int id, error;
 
 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
 	    &zc->zc_inject_record);
 
 	if (error == 0)
 		zc->zc_guid = (uint64_t)id;
 
 	return (error);
 }
 
 static int
 zfs_ioc_clear_fault(zfs_cmd_t *zc)
 {
 	return (zio_clear_fault((int)zc->zc_guid));
 }
 
 static int
 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
 {
 	int id = (int)zc->zc_guid;
 	int error;
 
 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
 	    &zc->zc_inject_record);
 
 	zc->zc_guid = id;
 
 	return (error);
 }
 
 static int
 zfs_ioc_error_log(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	int error;
 	uint64_t count = zc->zc_nvlist_dst_size;
 
 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
 		return (error);
 
 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
 	    &count);
 	if (error == 0)
 		zc->zc_nvlist_dst_size = count;
 	else
 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_clear(zfs_cmd_t *zc)
 {
 	spa_t *spa;
 	vdev_t *vd;
 	int error;
 
 	/*
 	 * On zpool clear we also fix up missing slogs
 	 */
 	mutex_enter(&spa_namespace_lock);
 	spa = spa_lookup(zc->zc_name);
 	if (spa == NULL) {
 		mutex_exit(&spa_namespace_lock);
 		return (SET_ERROR(EIO));
 	}
 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
 		/* we need to let spa_open/spa_load clear the chains */
 		spa_set_log_state(spa, SPA_LOG_CLEAR);
 	}
 	spa->spa_last_open_failed = 0;
 	mutex_exit(&spa_namespace_lock);
 
 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
 		error = spa_open(zc->zc_name, &spa, FTAG);
 	} else {
 		nvlist_t *policy;
 		nvlist_t *config = NULL;
 
 		if (zc->zc_nvlist_src == 0)
 			return (SET_ERROR(EINVAL));
 
 		if ((error = get_nvlist(zc->zc_nvlist_src,
 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
 			    policy, &config);
 			if (config != NULL) {
 				int err;
 
 				if ((err = put_nvlist(zc, config)) != 0)
 					error = err;
 				nvlist_free(config);
 			}
 			nvlist_free(policy);
 		}
 	}
 
 	if (error != 0)
 		return (error);
 
 	/*
 	 * If multihost is enabled, resuming I/O is unsafe as another
 	 * host may have imported the pool.
 	 */
 	if (spa_multihost(spa) && spa_suspended(spa))
 		return (SET_ERROR(EINVAL));
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	if (zc->zc_guid == 0) {
 		vd = NULL;
 	} else {
 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
 		if (vd == NULL) {
 			error = SET_ERROR(ENODEV);
 			(void) spa_vdev_state_exit(spa, NULL, error);
 			spa_close(spa, FTAG);
 			return (error);
 		}
 	}
 
 	vdev_clear(spa, vd);
 
 	(void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
 	    NULL : spa->spa_root_vdev, 0);
 
 	/*
 	 * Resume any suspended I/Os.
 	 */
 	if (zio_resume(spa) != 0)
 		error = SET_ERROR(EIO);
 
 	spa_close(spa, FTAG);
 
 	return (error);
 }
 
 /*
  * Reopen all the vdevs associated with the pool.
  *
  * innvl: {
  *  "scrub_restart" -> when true and scrub is running, allow to restart
  *              scrub as the side effect of the reopen (boolean).
  * }
  *
  * outnvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
 	{"scrub_restart",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) outnvl;
 	spa_t *spa;
 	int error;
 	boolean_t rc, scrub_restart = B_TRUE;
 
 	if (innvl) {
 		error = nvlist_lookup_boolean_value(innvl,
 		    "scrub_restart", &rc);
 		if (error == 0)
 			scrub_restart = rc;
 	}
 
 	error = spa_open(pool, &spa, FTAG);
 	if (error != 0)
 		return (error);
 
 	spa_vdev_state_enter(spa, SCL_NONE);
 
 	/*
 	 * If the scrub_restart flag is B_FALSE and a scrub is already
 	 * in progress then set spa_scrub_reopen flag to B_TRUE so that
 	 * we don't restart the scrub as a side effect of the reopen.
 	 * Otherwise, let vdev_open() decided if a resilver is required.
 	 */
 
 	spa->spa_scrub_reopen = (!scrub_restart &&
 	    dsl_scan_scrubbing(spa->spa_dsl_pool));
 	vdev_reopen(spa->spa_root_vdev);
 	spa->spa_scrub_reopen = B_FALSE;
 
 	(void) spa_vdev_state_exit(spa, NULL, 0);
 	spa_close(spa, FTAG);
 	return (0);
 }
 
 /*
  * inputs:
  * zc_name	name of filesystem
  *
  * outputs:
  * zc_string	name of conflicting snapshot, if there is one
  */
 static int
 zfs_ioc_promote(zfs_cmd_t *zc)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds, *ods;
 	char origin[ZFS_MAX_DATASET_NAME_LEN];
 	char *cp;
 	int error;
 
 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
 	    strchr(zc->zc_name, '%'))
 		return (SET_ERROR(EINVAL));
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	if (!dsl_dir_is_clone(ds->ds_dir)) {
 		dsl_dataset_rele(ds, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	error = dsl_dataset_hold_obj(dp,
 	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
 	if (error != 0) {
 		dsl_dataset_rele(ds, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	dsl_dataset_name(ods, origin);
 	dsl_dataset_rele(ods, FTAG);
 	dsl_dataset_rele(ds, FTAG);
 	dsl_pool_rele(dp, FTAG);
 
 	/*
 	 * We don't need to unmount *all* the origin fs's snapshots, but
 	 * it's easier.
 	 */
 	cp = strchr(origin, '@');
 	if (cp)
 		*cp = '\0';
 	(void) dmu_objset_find(origin,
 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
 }
 
 /*
  * Retrieve a single {user|group|project}{used|quota}@... property.
  *
  * inputs:
  * zc_name	name of filesystem
  * zc_objset_type zfs_userquota_prop_t
  * zc_value	domain name (eg. "S-1-234-567-89")
  * zc_guid	RID/UID/GID
  *
  * outputs:
  * zc_cookie	property value
  */
 static int
 zfs_ioc_userspace_one(zfs_cmd_t *zc)
 {
 	zfsvfs_t *zfsvfs;
 	int error;
 
 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 		return (SET_ERROR(EINVAL));
 
 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	error = zfs_userspace_one(zfsvfs,
 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
 	zfsvfs_rele(zfsvfs, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_cookie		zap cursor
  * zc_objset_type	zfs_userquota_prop_t
  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
  *
  * outputs:
  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
  * zc_cookie	zap cursor
  */
 static int
 zfs_ioc_userspace_many(zfs_cmd_t *zc)
 {
 	zfsvfs_t *zfsvfs;
 	int bufsize = zc->zc_nvlist_dst_size;
 
 	if (bufsize <= 0)
 		return (SET_ERROR(ENOMEM));
 
 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	void *buf = vmem_alloc(bufsize, KM_SLEEP);
 
 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
 	    buf, &zc->zc_nvlist_dst_size);
 
 	if (error == 0) {
 		error = xcopyout(buf,
 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
 		    zc->zc_nvlist_dst_size);
 	}
 	vmem_free(buf, bufsize);
 	zfsvfs_rele(zfsvfs, FTAG);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * none
  */
 static int
 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
 {
 	int error = 0;
 	zfsvfs_t *zfsvfs;
 
 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
 			/*
 			 * If userused is not enabled, it may be because the
 			 * objset needs to be closed & reopened (to grow the
 			 * objset_phys_t).  Suspend/resume the fs will do that.
 			 */
 			dsl_dataset_t *ds, *newds;
 
 			ds = dmu_objset_ds(zfsvfs->z_os);
 			error = zfs_suspend_fs(zfsvfs);
 			if (error == 0) {
 				dmu_objset_refresh_ownership(ds, &newds,
 				    B_TRUE, zfsvfs);
 				error = zfs_resume_fs(zfsvfs, newds);
 			}
 		}
 		if (error == 0) {
 			mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
 			if (zfsvfs->z_os->os_upgrade_id == 0) {
 				/* clear potential error code and retry */
 				zfsvfs->z_os->os_upgrade_status = 0;
 				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
 
 				dsl_pool_config_enter(
 				    dmu_objset_pool(zfsvfs->z_os), FTAG);
 				dmu_objset_userspace_upgrade(zfsvfs->z_os);
 				dsl_pool_config_exit(
 				    dmu_objset_pool(zfsvfs->z_os), FTAG);
 			} else {
 				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
 			}
 
 			taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
 			    zfsvfs->z_os->os_upgrade_id);
 			error = zfsvfs->z_os->os_upgrade_status;
 		}
 		zfs_vfs_rele(zfsvfs);
 	} else {
 		objset_t *os;
 
 		/* XXX kind of reading contents without owning */
 		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 		if (error != 0)
 			return (error);
 
 		mutex_enter(&os->os_upgrade_lock);
 		if (os->os_upgrade_id == 0) {
 			/* clear potential error code and retry */
 			os->os_upgrade_status = 0;
 			mutex_exit(&os->os_upgrade_lock);
 
 			dmu_objset_userspace_upgrade(os);
 		} else {
 			mutex_exit(&os->os_upgrade_lock);
 		}
 
 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
 
 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
 		error = os->os_upgrade_status;
 
 		dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
 		    FTAG);
 	}
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  *
  * outputs:
  * none
  */
 static int
 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
 {
 	objset_t *os;
 	int error;
 
 	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
 	if (error != 0)
 		return (error);
 
 	if (dmu_objset_userobjspace_upgradable(os) ||
 	    dmu_objset_projectquota_upgradable(os)) {
 		mutex_enter(&os->os_upgrade_lock);
 		if (os->os_upgrade_id == 0) {
 			/* clear potential error code and retry */
 			os->os_upgrade_status = 0;
 			mutex_exit(&os->os_upgrade_lock);
 
 			dmu_objset_id_quota_upgrade(os);
 		} else {
 			mutex_exit(&os->os_upgrade_lock);
 		}
 
 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
 
 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
 		error = os->os_upgrade_status;
 	} else {
 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
 	}
 
 	dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
 
 	return (error);
 }
 
 static int
 zfs_ioc_share(zfs_cmd_t *zc)
 {
 	return (SET_ERROR(ENOSYS));
 }
 
 /*
  * inputs:
  * zc_name		name of containing filesystem
  * zc_obj		object # beyond which we want next in-use object #
  *
  * outputs:
  * zc_obj		next in-use object #
  */
 static int
 zfs_ioc_next_obj(zfs_cmd_t *zc)
 {
 	objset_t *os = NULL;
 	int error;
 
 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
 	if (error != 0)
 		return (error);
 
 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
 
 	dmu_objset_rele(os, FTAG);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of filesystem
  * zc_value		prefix name for snapshot
  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
  *
  * outputs:
  * zc_value		short name of new snapshot
  */
 static int
 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
 {
 	char *snap_name;
 	char *hold_name;
 	minor_t minor;
 
 	zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
 	if (fp == NULL)
 		return (SET_ERROR(EBADF));
 
 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
 	    (u_longlong_t)ddi_get_lbolt64());
 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
 
 	int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
 	    hold_name);
 	if (error == 0)
 		(void) strlcpy(zc->zc_value, snap_name,
 		    sizeof (zc->zc_value));
 	kmem_strfree(snap_name);
 	kmem_strfree(hold_name);
 	zfs_onexit_fd_rele(fp);
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of "to" snapshot
  * zc_value		name of "from" snapshot
  * zc_cookie		file descriptor to write diff data on
  *
  * outputs:
  * dmu_diff_record_t's to the file descriptor
  */
 static int
 zfs_ioc_diff(zfs_cmd_t *zc)
 {
 	zfs_file_t *fp;
 	offset_t off;
 	int error;
 
 	if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
 		return (SET_ERROR(EBADF));
 
 	off = zfs_file_off(fp);
 	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
 
 	zfs_file_put(fp);
 
 	return (error);
 }
 
 static int
 zfs_ioc_smb_acl(zfs_cmd_t *zc)
 {
 	return (SET_ERROR(ENOTSUP));
 }
 
 /*
  * innvl: {
  *     "holds" -> { snapname -> holdname (string), ... }
  *     (optional) "cleanup_fd" -> fd (int32)
  * }
  *
  * outnvl: {
  *     snapname -> error value (int32)
  *     ...
  * }
  */
 static const zfs_ioc_key_t zfs_keys_hold[] = {
 	{"holds",		DATA_TYPE_NVLIST,	0},
 	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
 {
 	(void) pool;
 	nvpair_t *pair;
 	nvlist_t *holds;
 	int cleanup_fd = -1;
 	int error;
 	minor_t minor = 0;
 	zfs_file_t *fp = NULL;
 
 	holds = fnvlist_lookup_nvlist(args, "holds");
 
 	/* make sure the user didn't pass us any invalid (empty) tags */
 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 	    pair = nvlist_next_nvpair(holds, pair)) {
 		char *htag;
 
 		error = nvpair_value_string(pair, &htag);
 		if (error != 0)
 			return (SET_ERROR(error));
 
 		if (strlen(htag) == 0)
 			return (SET_ERROR(EINVAL));
 	}
 
 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
 		fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
 		if (fp == NULL)
 			return (SET_ERROR(EBADF));
 	}
 
 	error = dsl_dataset_user_hold(holds, minor, errlist);
 	if (fp != NULL) {
 		ASSERT3U(minor, !=, 0);
 		zfs_onexit_fd_rele(fp);
 	}
 	return (SET_ERROR(error));
 }
 
 /*
  * innvl is not used.
  *
  * outnvl: {
  *    holdname -> time added (uint64 seconds since epoch)
  *    ...
  * }
  */
 static const zfs_ioc_key_t zfs_keys_get_holds[] = {
 	/* no nvl keys */
 };
 
 static int
 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
 {
 	(void) args;
 	return (dsl_dataset_get_holds(snapname, outnvl));
 }
 
 /*
  * innvl: {
  *     snapname -> { holdname, ... }
  *     ...
  * }
  *
  * outnvl: {
  *     snapname -> error value (int32)
  *     ...
  * }
  */
 static const zfs_ioc_key_t zfs_keys_release[] = {
 	{"<snapname>...",	DATA_TYPE_NVLIST,	ZK_WILDCARDLIST},
 };
 
 static int
 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
 {
 	(void) pool;
 	return (dsl_dataset_user_release(holds, errlist));
 }
 
 /*
  * inputs:
  * zc_guid		flags (ZEVENT_NONBLOCK)
  * zc_cleanup_fd	zevent file descriptor
  *
  * outputs:
  * zc_nvlist_dst	next nvlist event
  * zc_cookie		dropped events since last get
  */
 static int
 zfs_ioc_events_next(zfs_cmd_t *zc)
 {
 	zfs_zevent_t *ze;
 	nvlist_t *event = NULL;
 	minor_t minor;
 	uint64_t dropped = 0;
 	int error;
 
 	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
 	if (fp == NULL)
 		return (SET_ERROR(EBADF));
 
 	do {
 		error = zfs_zevent_next(ze, &event,
 		    &zc->zc_nvlist_dst_size, &dropped);
 		if (event != NULL) {
 			zc->zc_cookie = dropped;
 			error = put_nvlist(zc, event);
 			nvlist_free(event);
 		}
 
 		if (zc->zc_guid & ZEVENT_NONBLOCK)
 			break;
 
 		if ((error == 0) || (error != ENOENT))
 			break;
 
 		error = zfs_zevent_wait(ze);
 		if (error != 0)
 			break;
 	} while (1);
 
 	zfs_zevent_fd_rele(fp);
 
 	return (error);
 }
 
 /*
  * outputs:
  * zc_cookie		cleared events count
  */
 static int
 zfs_ioc_events_clear(zfs_cmd_t *zc)
 {
 	int count;
 
 	zfs_zevent_drain_all(&count);
 	zc->zc_cookie = count;
 
 	return (0);
 }
 
 /*
  * inputs:
  * zc_guid		eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
  * zc_cleanup		zevent file descriptor
  */
 static int
 zfs_ioc_events_seek(zfs_cmd_t *zc)
 {
 	zfs_zevent_t *ze;
 	minor_t minor;
 	int error;
 
 	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
 	if (fp == NULL)
 		return (SET_ERROR(EBADF));
 
 	error = zfs_zevent_seek(ze, zc->zc_guid);
 	zfs_zevent_fd_rele(fp);
 
 	return (error);
 }
 
 /*
  * inputs:
  * zc_name		name of later filesystem or snapshot
  * zc_value		full name of old snapshot or bookmark
  *
  * outputs:
  * zc_cookie		space in bytes
  * zc_objset_type	compressed space in bytes
  * zc_perm_action	uncompressed space in bytes
  */
 static int
 zfs_ioc_space_written(zfs_cmd_t *zc)
 {
 	int error;
 	dsl_pool_t *dp;
 	dsl_dataset_t *new;
 
 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 	if (error != 0)
 		return (error);
 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 	if (strchr(zc->zc_value, '#') != NULL) {
 		zfs_bookmark_phys_t bmp;
 		error = dsl_bookmark_lookup(dp, zc->zc_value,
 		    new, &bmp);
 		if (error == 0) {
 			error = dsl_dataset_space_written_bookmark(&bmp, new,
 			    &zc->zc_cookie,
 			    &zc->zc_objset_type, &zc->zc_perm_action);
 		}
 	} else {
 		dsl_dataset_t *old;
 		error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
 
 		if (error == 0) {
 			error = dsl_dataset_space_written(old, new,
 			    &zc->zc_cookie,
 			    &zc->zc_objset_type, &zc->zc_perm_action);
 			dsl_dataset_rele(old, FTAG);
 		}
 	}
 	dsl_dataset_rele(new, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "firstsnap" -> snapshot name
  * }
  *
  * outnvl: {
  *     "used" -> space in bytes
  *     "compressed" -> compressed space in bytes
  *     "uncompressed" -> uncompressed space in bytes
  * }
  */
 static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
 	{"firstsnap",	DATA_TYPE_STRING,	0},
 };
 
 static int
 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	int error;
 	dsl_pool_t *dp;
 	dsl_dataset_t *new, *old;
 	char *firstsnap;
 	uint64_t used, comp, uncomp;
 
 	firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
 
 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
 	if (error == 0 && !new->ds_is_snapshot) {
 		dsl_dataset_rele(new, FTAG);
 		error = SET_ERROR(EINVAL);
 	}
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
 	if (error == 0 && !old->ds_is_snapshot) {
 		dsl_dataset_rele(old, FTAG);
 		error = SET_ERROR(EINVAL);
 	}
 	if (error != 0) {
 		dsl_dataset_rele(new, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 
 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
 	dsl_dataset_rele(old, FTAG);
 	dsl_dataset_rele(new, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	fnvlist_add_uint64(outnvl, "used", used);
 	fnvlist_add_uint64(outnvl, "compressed", comp);
 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
 	return (error);
 }
 
 /*
  * innvl: {
  *     "fd" -> file descriptor to write stream to (int32)
  *     (optional) "fromsnap" -> full snap name to send an incremental from
  *     (optional) "largeblockok" -> (value ignored)
  *         indicates that blocks > 128KB are permitted
  *     (optional) "embedok" -> (value ignored)
  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  *     (optional) "compressok" -> (value ignored)
  *         presence indicates compressed DRR_WRITE records are permitted
  *     (optional) "rawok" -> (value ignored)
  *         presence indicates raw encrypted records should be used.
  *     (optional) "savedok" -> (value ignored)
  *         presence indicates we should send a partially received snapshot
  *     (optional) "resume_object" and "resume_offset" -> (uint64)
  *         if present, resume send stream from specified object and offset.
  *     (optional) "redactbook" -> (string)
  *         if present, use this bookmark's redaction list to generate a redacted
  *         send stream
  * }
  *
  * outnvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_send_new[] = {
 	{"fd",			DATA_TYPE_INT32,	0},
 	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"savedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) outnvl;
 	int error;
 	offset_t off;
 	char *fromname = NULL;
 	int fd;
 	zfs_file_t *fp;
 	boolean_t largeblockok;
 	boolean_t embedok;
 	boolean_t compressok;
 	boolean_t rawok;
 	boolean_t savedok;
 	uint64_t resumeobj = 0;
 	uint64_t resumeoff = 0;
 	char *redactbook = NULL;
 
 	fd = fnvlist_lookup_int32(innvl, "fd");
 
 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
 
 	largeblockok = nvlist_exists(innvl, "largeblockok");
 	embedok = nvlist_exists(innvl, "embedok");
 	compressok = nvlist_exists(innvl, "compressok");
 	rawok = nvlist_exists(innvl, "rawok");
 	savedok = nvlist_exists(innvl, "savedok");
 
 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
 
 	(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
 
 	if ((fp = zfs_file_get(fd)) == NULL)
 		return (SET_ERROR(EBADF));
 
 	off = zfs_file_off(fp);
 
 	dmu_send_outparams_t out = {0};
 	out.dso_outfunc = dump_bytes;
 	out.dso_arg = fp;
 	out.dso_dryrun = B_FALSE;
 	error = dmu_send(snapname, fromname, embedok, largeblockok,
 	    compressok, rawok, savedok, resumeobj, resumeoff,
 	    redactbook, fd, &off, &out);
 
 	zfs_file_put(fp);
 	return (error);
 }
 
 static int
 send_space_sum(objset_t *os, void *buf, int len, void *arg)
 {
 	(void) os, (void) buf;
 	uint64_t *size = arg;
 
 	*size += len;
 	return (0);
 }
 
 /*
  * Determine approximately how large a zfs send stream will be -- the number
  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
  *
  * innvl: {
  *     (optional) "from" -> full snap or bookmark name to send an incremental
  *                          from
  *     (optional) "largeblockok" -> (value ignored)
  *         indicates that blocks > 128KB are permitted
  *     (optional) "embedok" -> (value ignored)
  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
  *     (optional) "compressok" -> (value ignored)
  *         presence indicates compressed DRR_WRITE records are permitted
  *     (optional) "rawok" -> (value ignored)
  *         presence indicates raw encrypted records should be used.
  *     (optional) "resume_object" and "resume_offset" -> (uint64)
  *         if present, resume send stream from specified object and offset.
  *     (optional) "fd" -> file descriptor to use as a cookie for progress
  *         tracking (int32)
  * }
  *
  * outnvl: {
  *     "space" -> bytes of space (uint64)
  * }
  */
 static const zfs_ioc_key_t zfs_keys_send_space[] = {
 	{"from",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 	{"fd",			DATA_TYPE_INT32,	ZK_OPTIONAL},
 	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"bytes",		DATA_TYPE_UINT64,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *tosnap;
 	dsl_dataset_t *fromsnap = NULL;
 	int error;
 	char *fromname = NULL;
 	char *redactlist_book = NULL;
 	boolean_t largeblockok;
 	boolean_t embedok;
 	boolean_t compressok;
 	boolean_t rawok;
 	boolean_t savedok;
 	uint64_t space = 0;
 	boolean_t full_estimate = B_FALSE;
 	uint64_t resumeobj = 0;
 	uint64_t resumeoff = 0;
 	uint64_t resume_bytes = 0;
 	int32_t fd = -1;
 	zfs_bookmark_phys_t zbm = {0};
 
 	error = dsl_pool_hold(snapname, FTAG, &dp);
 	if (error != 0)
 		return (error);
 
 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
 	if (error != 0) {
 		dsl_pool_rele(dp, FTAG);
 		return (error);
 	}
 	(void) nvlist_lookup_int32(innvl, "fd", &fd);
 
 	largeblockok = nvlist_exists(innvl, "largeblockok");
 	embedok = nvlist_exists(innvl, "embedok");
 	compressok = nvlist_exists(innvl, "compressok");
 	rawok = nvlist_exists(innvl, "rawok");
 	savedok = nvlist_exists(innvl, "savedok");
 	boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
 	boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
 	    &redactlist_book) == 0);
 
 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
 	(void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
 
 	if (altbook) {
 		full_estimate = B_TRUE;
 	} else if (from) {
 		if (strchr(fromname, '#')) {
 			error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
 
 			/*
 			 * dsl_bookmark_lookup() will fail with EXDEV if
 			 * the from-bookmark and tosnap are at the same txg.
 			 * However, it's valid to do a send (and therefore,
 			 * a send estimate) from and to the same time point,
 			 * if the bookmark is redacted (the incremental send
 			 * can change what's redacted on the target).  In
 			 * this case, dsl_bookmark_lookup() fills in zbm
 			 * but returns EXDEV.  Ignore this error.
 			 */
 			if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
 			    zbm.zbm_guid ==
 			    dsl_dataset_phys(tosnap)->ds_guid)
 				error = 0;
 
 			if (error != 0) {
 				dsl_dataset_rele(tosnap, FTAG);
 				dsl_pool_rele(dp, FTAG);
 				return (error);
 			}
 			if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
 			    ZBM_FLAG_HAS_FBN)) {
 				full_estimate = B_TRUE;
 			}
 		} else if (strchr(fromname, '@')) {
 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
 			if (error != 0) {
 				dsl_dataset_rele(tosnap, FTAG);
 				dsl_pool_rele(dp, FTAG);
 				return (error);
 			}
 
 			if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
 				full_estimate = B_TRUE;
 				dsl_dataset_rele(fromsnap, FTAG);
 			}
 		} else {
 			/*
 			 * from is not properly formatted as a snapshot or
 			 * bookmark
 			 */
 			dsl_dataset_rele(tosnap, FTAG);
 			dsl_pool_rele(dp, FTAG);
 			return (SET_ERROR(EINVAL));
 		}
 	}
 
 	if (full_estimate) {
 		dmu_send_outparams_t out = {0};
 		offset_t off = 0;
 		out.dso_outfunc = send_space_sum;
 		out.dso_arg = &space;
 		out.dso_dryrun = B_TRUE;
 		/*
 		 * We have to release these holds so dmu_send can take them.  It
 		 * will do all the error checking we need.
 		 */
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 		error = dmu_send(snapname, fromname, embedok, largeblockok,
 		    compressok, rawok, savedok, resumeobj, resumeoff,
 		    redactlist_book, fd, &off, &out);
 	} else {
 		error = dmu_send_estimate_fast(tosnap, fromsnap,
 		    (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
 		    compressok || rawok, savedok, &space);
 		space -= resume_bytes;
 		if (fromsnap != NULL)
 			dsl_dataset_rele(fromsnap, FTAG);
 		dsl_dataset_rele(tosnap, FTAG);
 		dsl_pool_rele(dp, FTAG);
 	}
 
 	fnvlist_add_uint64(outnvl, "space", space);
 
 	return (error);
 }
 
 /*
  * Sync the currently open TXG to disk for the specified pool.
  * This is somewhat similar to 'zfs_sync()'.
  * For cases that do not result in error this ioctl will wait for
  * the currently open TXG to commit before returning back to the caller.
  *
  * innvl: {
  *  "force" -> when true, force uberblock update even if there is no dirty data.
  *             In addition this will cause the vdev configuration to be written
  *             out including updating the zpool cache file. (boolean_t)
  * }
  *
  * onvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
 	{"force",	DATA_TYPE_BOOLEAN_VALUE,	0},
 };
 
 static int
 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
 {
 	(void) onvl;
 	int err;
 	boolean_t rc, force = B_FALSE;
 	spa_t *spa;
 
 	if ((err = spa_open(pool, &spa, FTAG)) != 0)
 		return (err);
 
 	if (innvl) {
 		err = nvlist_lookup_boolean_value(innvl, "force", &rc);
 		if (err == 0)
 			force = rc;
 	}
 
 	if (force) {
 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
 		vdev_config_dirty(spa->spa_root_vdev);
 		spa_config_exit(spa, SCL_CONFIG, FTAG);
 	}
 	txg_wait_synced(spa_get_dsl(spa), 0);
 
 	spa_close(spa, FTAG);
 
 	return (0);
 }
 
 /*
  * Load a user's wrapping key into the kernel.
  * innvl: {
  *     "hidden_args" -> { "wkeydata" -> value }
  *         raw uint8_t array of encryption wrapping key data (32 bytes)
  *     (optional) "noop" -> (value ignored)
  *         presence indicated key should only be verified, not loaded
  * }
  */
 static const zfs_ioc_key_t zfs_keys_load_key[] = {
 	{"hidden_args",	DATA_TYPE_NVLIST,	0},
 	{"noop",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) outnvl;
 	int ret;
 	dsl_crypto_params_t *dcp = NULL;
 	nvlist_t *hidden_args;
 	boolean_t noop = nvlist_exists(innvl, "noop");
 
 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 		ret = SET_ERROR(EINVAL);
 		goto error;
 	}
 
 	hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
 
 	ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
 	    hidden_args, &dcp);
 	if (ret != 0)
 		goto error;
 
 	ret = spa_keystore_load_wkey(dsname, dcp, noop);
 	if (ret != 0)
 		goto error;
 
 	dsl_crypto_params_free(dcp, noop);
 
 	return (0);
 
 error:
 	dsl_crypto_params_free(dcp, B_TRUE);
 	return (ret);
 }
 
 /*
  * Unload a user's wrapping key from the kernel.
  * Both innvl and outnvl are unused.
  */
 static const zfs_ioc_key_t zfs_keys_unload_key[] = {
 	/* no nvl keys */
 };
 
 static int
 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) innvl, (void) outnvl;
 	int ret = 0;
 
 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 		ret = (SET_ERROR(EINVAL));
 		goto out;
 	}
 
 	ret = spa_keystore_unload_wkey(dsname);
 	if (ret != 0)
 		goto out;
 
 out:
 	return (ret);
 }
 
 /*
  * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
  * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
  * here to change how the key is derived in userspace.
  *
  * innvl: {
  *    "hidden_args" (optional) -> { "wkeydata" -> value }
  *         raw uint8_t array of new encryption wrapping key data (32 bytes)
  *    "props" (optional) -> { prop -> value }
  * }
  *
  * outnvl is unused
  */
 static const zfs_ioc_key_t zfs_keys_change_key[] = {
 	{"crypt_cmd",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
 };
 
 static int
 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	(void) outnvl;
 	int ret;
 	uint64_t cmd = DCP_CMD_NONE;
 	dsl_crypto_params_t *dcp = NULL;
 	nvlist_t *args = NULL, *hidden_args = NULL;
 
 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
 		ret = (SET_ERROR(EINVAL));
 		goto error;
 	}
 
 	(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
 	(void) nvlist_lookup_nvlist(innvl, "props", &args);
 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
 
 	ret = dsl_crypto_params_create_nvlist(cmd, args, hidden_args, &dcp);
 	if (ret != 0)
 		goto error;
 
 	ret = spa_keystore_change_key(dsname, dcp);
 	if (ret != 0)
 		goto error;
 
 	dsl_crypto_params_free(dcp, B_FALSE);
 
 	return (0);
 
 error:
 	dsl_crypto_params_free(dcp, B_TRUE);
 	return (ret);
 }
 
 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
 
 static void
 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
 
 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
 	ASSERT3P(vec->zvec_func, ==, NULL);
 
 	vec->zvec_legacy_func = func;
 	vec->zvec_secpolicy = secpolicy;
 	vec->zvec_namecheck = namecheck;
 	vec->zvec_allow_log = log_history;
 	vec->zvec_pool_check = pool_check;
 }
 
 /*
  * See the block comment at the beginning of this file for details on
  * each argument to this function.
  */
 void
 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
     boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
 {
 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
 
 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
 	ASSERT3P(vec->zvec_func, ==, NULL);
 
 	/* if we are logging, the name must be valid */
 	ASSERT(!allow_log || namecheck != NO_NAME);
 
 	vec->zvec_name = name;
 	vec->zvec_func = func;
 	vec->zvec_secpolicy = secpolicy;
 	vec->zvec_namecheck = namecheck;
 	vec->zvec_pool_check = pool_check;
 	vec->zvec_smush_outnvlist = smush_outnvlist;
 	vec->zvec_allow_log = allow_log;
 	vec->zvec_nvl_keys = nvl_keys;
 	vec->zvec_nvl_key_count = num_keys;
 }
 
 static void
 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
     zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    POOL_NAME, log_history, pool_check);
 }
 
 void
 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_FALSE, pool_check);
 }
 
 static void
 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
 {
 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 }
 
 static void
 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
 }
 
 static void
 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
 }
 
 static void
 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
 {
 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
 	    zfs_secpolicy_read);
 }
 
 static void
 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
     zfs_secpolicy_func_t *secpolicy)
 {
 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 }
 
 static void
 zfs_ioctl_init(void)
 {
 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
 
 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
 
 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
 
 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
 
 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
 
 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
 
 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
 
 	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
 	    zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 	    zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
 
 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
 
 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
 
 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
 
 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 	    zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
 
 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
 
 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
 	    zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
 
 	zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
 	    zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
 	    ARRAY_SIZE(zfs_keys_get_bookmark_props));
 
 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
 	    POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_destroy_bookmarks,
 	    ARRAY_SIZE(zfs_keys_destroy_bookmarks));
 
 	zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
 	    zfs_ioc_recv_new, zfs_secpolicy_recv, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
 	zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
 	    zfs_ioc_load_key, zfs_secpolicy_load_key,
 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
 	    zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
 	zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
 	    zfs_ioc_unload_key, zfs_secpolicy_load_key,
 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
 	    zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
 	zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
 	    zfs_ioc_change_key, zfs_secpolicy_change_key,
 	    DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
 	    B_TRUE, B_TRUE, zfs_keys_change_key,
 	    ARRAY_SIZE(zfs_keys_change_key));
 
 	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
 	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
 	zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
 	    zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
 	    B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
 
 	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
 	    zfs_ioc_channel_program, zfs_secpolicy_config,
 	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
 	    B_TRUE, zfs_keys_channel_program,
 	    ARRAY_SIZE(zfs_keys_channel_program));
 
 	zfs_ioctl_register("redact", ZFS_IOC_REDACT,
 	    zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
 
 	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
 	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
 
 	zfs_ioctl_register("zpool_discard_checkpoint",
 	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
 	    zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_discard_checkpoint,
 	    ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
 
 	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
 	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
 
 	zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
 	    zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
 	    zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
 
 	zfs_ioctl_register("wait", ZFS_IOC_WAIT,
 	    zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
 
 	zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
 	    zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
 
 	zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
 	    zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
 	    zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
 
 	zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
 	    zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
 	    POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
 	    zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
 
 	zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS,
 	    zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME,
 	    POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props,
 	    ARRAY_SIZE(zfs_keys_vdev_get_props));
 
 	zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS,
 	    zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
 	    zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props));
 
 	/* IOCTLS that use the legacy function signature */
 
 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
 	    zfs_ioc_pool_scan);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
 	    zfs_ioc_pool_upgrade);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
 	    zfs_ioc_vdev_add);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
 	    zfs_ioc_vdev_remove);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
 	    zfs_ioc_vdev_set_state);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
 	    zfs_ioc_vdev_attach);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
 	    zfs_ioc_vdev_detach);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
 	    zfs_ioc_vdev_setpath);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
 	    zfs_ioc_vdev_setfru);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
 	    zfs_ioc_pool_set_props);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
 	    zfs_ioc_vdev_split);
 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
 	    zfs_ioc_pool_reguid);
 
 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
 
 	/*
 	 * pool destroy, and export don't log the history as part of
 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
 	 * does the logging of those commands.
 	 */
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
 
 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
 	    zfs_ioc_dsobj_to_dsname,
 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
 	    zfs_ioc_pool_get_history,
 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
 
 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
 
 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
 
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
 	    zfs_ioc_space_written);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
 	    zfs_ioc_objset_recvd_props);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
 	    zfs_ioc_next_obj);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
 	    zfs_ioc_get_fsacl);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
 	    zfs_ioc_objset_stats);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
 	    zfs_ioc_objset_zplprops);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
 	    zfs_ioc_dataset_list_next);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
 	    zfs_ioc_snapshot_list_next);
 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
 	    zfs_ioc_send_progress);
 
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
 	    zfs_ioc_diff, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
 	    zfs_ioc_send, zfs_secpolicy_send);
 
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
 	    zfs_secpolicy_none);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
 	    zfs_secpolicy_destroy);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
 	    zfs_secpolicy_rename);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
 	    zfs_secpolicy_recv);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
 	    zfs_secpolicy_promote);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
 	    zfs_secpolicy_set_fsacl);
 
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
 	    zfs_secpolicy_share, POOL_CHECK_NONE);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
 
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
 
 	zfs_ioctl_init_os();
 }
 
 /*
  * Verify that for non-legacy ioctls the input nvlist
  * pairs match against the expected input.
  *
  * Possible errors are:
  * ZFS_ERR_IOC_ARG_UNAVAIL	An unrecognized nvpair was encountered
  * ZFS_ERR_IOC_ARG_REQUIRED	A required nvpair is missing
  * ZFS_ERR_IOC_ARG_BADTYPE	Invalid type for nvpair
  */
 static int
 zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
 {
 	const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
 	boolean_t required_keys_found = B_FALSE;
 
 	/*
 	 * examine each input pair
 	 */
 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
 		char *name = nvpair_name(pair);
 		data_type_t type = nvpair_type(pair);
 		boolean_t identified = B_FALSE;
 
 		/*
 		 * check pair against the documented names and type
 		 */
 		for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
 			/* if not a wild card name, check for an exact match */
 			if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
 			    strcmp(nvl_keys[k].zkey_name, name) != 0)
 				continue;
 
 			identified = B_TRUE;
 
 			if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
 			    nvl_keys[k].zkey_type != type) {
 				return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
 			}
 
 			if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
 				continue;
 
 			required_keys_found = B_TRUE;
 			break;
 		}
 
 		/* allow an 'optional' key, everything else is invalid */
 		if (!identified &&
 		    (strcmp(name, "optional") != 0 ||
 		    type != DATA_TYPE_NVLIST)) {
 			return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
 		}
 	}
 
 	/* verify that all required keys were found */
 	for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
 		if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
 			continue;
 
 		if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
 			/* at least one non-optional key is expected here */
 			if (!required_keys_found)
 				return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
 			continue;
 		}
 
 		if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
 			return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
 	}
 
 	return (0);
 }
 
 static int
 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
     zfs_ioc_poolcheck_t check)
 {
 	spa_t *spa;
 	int error;
 
 	ASSERT(type == POOL_NAME || type == DATASET_NAME ||
 	    type == ENTITY_NAME);
 
 	if (check & POOL_CHECK_NONE)
 		return (0);
 
 	error = spa_open(name, &spa, FTAG);
 	if (error == 0) {
 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
 			error = SET_ERROR(EAGAIN);
 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
 			error = SET_ERROR(EROFS);
 		spa_close(spa, FTAG);
 	}
 	return (error);
 }
 
 int
 zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
 {
 	zfsdev_state_t *zs, *fpd;
 
 	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
 
 	fpd = zfs_file_private(fp);
 	if (fpd == NULL)
 		return (SET_ERROR(EBADF));
 
 	mutex_enter(&zfsdev_state_lock);
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 
 		if (zs->zs_minor == -1)
 			continue;
 
 		if (fpd == zs) {
 			*minorp = fpd->zs_minor;
 			mutex_exit(&zfsdev_state_lock);
 			return (0);
 		}
 	}
 
 	mutex_exit(&zfsdev_state_lock);
 
 	return (SET_ERROR(EBADF));
 }
 
 void *
 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
 {
 	zfsdev_state_t *zs;
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 		if (zs->zs_minor == minor) {
 			membar_consumer();
 			switch (which) {
 			case ZST_ONEXIT:
 				return (zs->zs_onexit);
 			case ZST_ZEVENT:
 				return (zs->zs_zevent);
 			case ZST_ALL:
 				return (zs);
 			}
 		}
 	}
 
 	return (NULL);
 }
 
 /*
  * Find a free minor number.  The zfsdev_state_list is expected to
  * be short since it is only a list of currently open file handles.
  */
 static minor_t
 zfsdev_minor_alloc(void)
 {
 	static minor_t last_minor = 0;
 	minor_t m;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
 	for (m = last_minor + 1; m != last_minor; m++) {
 		if (m > ZFSDEV_MAX_MINOR)
 			m = 1;
 		if (zfsdev_get_state(m, ZST_ALL) == NULL) {
 			last_minor = m;
 			return (m);
 		}
 	}
 
 	return (0);
 }
 
 int
 zfsdev_state_init(void *priv)
 {
 	zfsdev_state_t *zs, *zsprev = NULL;
 	minor_t minor;
 	boolean_t newzs = B_FALSE;
 
 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
 
 	minor = zfsdev_minor_alloc();
 	if (minor == 0)
 		return (SET_ERROR(ENXIO));
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zs->zs_next) {
 		if (zs->zs_minor == -1)
 			break;
 		zsprev = zs;
 	}
 
 	if (!zs) {
 		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
 		newzs = B_TRUE;
 	}
 
 	zfsdev_private_set_state(priv, zs);
 
 	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
 	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
 
 	/*
 	 * In order to provide for lock-free concurrent read access
 	 * to the minor list in zfsdev_get_state(), new entries
 	 * must be completely written before linking them into the
 	 * list whereas existing entries are already linked; the last
 	 * operation must be updating zs_minor (from -1 to the new
 	 * value).
 	 */
 	if (newzs) {
 		zs->zs_minor = minor;
 		membar_producer();
 		zsprev->zs_next = zs;
 	} else {
 		membar_producer();
 		zs->zs_minor = minor;
 	}
 
 	return (0);
 }
 
 void
 zfsdev_state_destroy(void *priv)
 {
 	zfsdev_state_t *zs = zfsdev_private_get_state(priv);
 
 	ASSERT(zs != NULL);
 	ASSERT3S(zs->zs_minor, >, 0);
 
 	/*
 	 * The last reference to this zfsdev file descriptor is being dropped.
 	 * We don't have to worry about lookup grabbing this state object, and
 	 * zfsdev_state_init() will not try to reuse this object until it is
 	 * invalidated by setting zs_minor to -1.  Invalidation must be done
 	 * last, with a memory barrier to ensure ordering.  This lets us avoid
 	 * taking the global zfsdev state lock around destruction.
 	 */
 	zfs_onexit_destroy(zs->zs_onexit);
 	zfs_zevent_destroy(zs->zs_zevent);
 	zs->zs_onexit = NULL;
 	zs->zs_zevent = NULL;
 	membar_producer();
 	zs->zs_minor = -1;
 }
 
 long
 zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
 {
 	int error, cmd;
 	const zfs_ioc_vec_t *vec;
 	char *saved_poolname = NULL;
 	uint64_t max_nvlist_src_size;
 	size_t saved_poolname_len = 0;
 	nvlist_t *innvl = NULL;
 	fstrans_cookie_t cookie;
 	hrtime_t start_time = gethrtime();
 
 	cmd = vecnum;
 	error = 0;
 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
 		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
 
 	vec = &zfs_ioc_vec[vecnum];
 
 	/*
 	 * The registered ioctl list may be sparse, verify that either
 	 * a normal or legacy handler are registered.
 	 */
 	if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
 		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
 
 	zc->zc_iflags = flag & FKIOCTL;
 	max_nvlist_src_size = zfs_max_nvlist_src_size_os();
 	if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
 		/*
 		 * Make sure the user doesn't pass in an insane value for
 		 * zc_nvlist_src_size.  We have to check, since we will end
 		 * up allocating that much memory inside of get_nvlist().  This
 		 * prevents a nefarious user from allocating tons of kernel
 		 * memory.
 		 *
 		 * Also, we return EINVAL instead of ENOMEM here.  The reason
 		 * being that returning ENOMEM from an ioctl() has a special
 		 * connotation; that the user's size value is too small and
 		 * needs to be expanded to hold the nvlist.  See
 		 * zcmd_expand_dst_nvlist() for details.
 		 */
 		error = SET_ERROR(EINVAL);	/* User's size too big */
 
 	} else if (zc->zc_nvlist_src_size != 0) {
 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
 		    zc->zc_iflags, &innvl);
 		if (error != 0)
 			goto out;
 	}
 
 	/*
 	 * Ensure that all pool/dataset names are valid before we pass down to
 	 * the lower layers.
 	 */
 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
 	switch (vec->zvec_namecheck) {
 	case POOL_NAME:
 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
 			error = SET_ERROR(EINVAL);
 		else
 			error = pool_status_check(zc->zc_name,
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		break;
 
 	case DATASET_NAME:
 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
 			error = SET_ERROR(EINVAL);
 		else
 			error = pool_status_check(zc->zc_name,
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		break;
 
 	case ENTITY_NAME:
 		if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
 			error = SET_ERROR(EINVAL);
 		} else {
 			error = pool_status_check(zc->zc_name,
 			    vec->zvec_namecheck, vec->zvec_pool_check);
 		}
 		break;
 
 	case NO_NAME:
 		break;
 	}
 	/*
 	 * Ensure that all input pairs are valid before we pass them down
 	 * to the lower layers.
 	 *
 	 * The vectored functions can use fnvlist_lookup_{type} for any
 	 * required pairs since zfs_check_input_nvpairs() confirmed that
 	 * they exist and are of the correct type.
 	 */
 	if (error == 0 && vec->zvec_func != NULL) {
 		error = zfs_check_input_nvpairs(innvl, vec);
 		if (error != 0)
 			goto out;
 	}
 
 	if (error == 0) {
 		cookie = spl_fstrans_mark();
 		error = vec->zvec_secpolicy(zc, innvl, CRED());
 		spl_fstrans_unmark(cookie);
 	}
 
 	if (error != 0)
 		goto out;
 
 	/* legacy ioctls can modify zc_name */
 	/*
 	 * Can't use kmem_strdup() as we might truncate the string and
 	 * kmem_strfree() would then free with incorrect size.
 	 */
 	saved_poolname_len = strlen(zc->zc_name) + 1;
 	saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
 
 	strlcpy(saved_poolname, zc->zc_name, saved_poolname_len);
 	saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
 
 	if (vec->zvec_func != NULL) {
 		nvlist_t *outnvl;
 		int puterror = 0;
 		spa_t *spa;
 		nvlist_t *lognv = NULL;
 
 		ASSERT(vec->zvec_legacy_func == NULL);
 
 		/*
 		 * Add the innvl to the lognv before calling the func,
 		 * in case the func changes the innvl.
 		 */
 		if (vec->zvec_allow_log) {
 			lognv = fnvlist_alloc();
 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
 			    vec->zvec_name);
 			if (!nvlist_empty(innvl)) {
 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
 				    innvl);
 			}
 		}
 
 		outnvl = fnvlist_alloc();
 		cookie = spl_fstrans_mark();
 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
 		spl_fstrans_unmark(cookie);
 
 		/*
 		 * Some commands can partially execute, modify state, and still
 		 * return an error.  In these cases, attempt to record what
 		 * was modified.
 		 */
 		if ((error == 0 ||
 		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
 		    vec->zvec_allow_log &&
 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
 			if (!nvlist_empty(outnvl)) {
 				size_t out_size = fnvlist_size(outnvl);
 				if (out_size > zfs_history_output_max) {
 					fnvlist_add_int64(lognv,
 					    ZPOOL_HIST_OUTPUT_SIZE, out_size);
 				} else {
 					fnvlist_add_nvlist(lognv,
 					    ZPOOL_HIST_OUTPUT_NVL, outnvl);
 				}
 			}
 			if (error != 0) {
 				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
 				    error);
 			}
 			fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
 			    gethrtime() - start_time);
 			(void) spa_history_log_nvl(spa, lognv);
 			spa_close(spa, FTAG);
 		}
 		fnvlist_free(lognv);
 
 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
 			int smusherror = 0;
 			if (vec->zvec_smush_outnvlist) {
 				smusherror = nvlist_smush(outnvl,
 				    zc->zc_nvlist_dst_size);
 			}
 			if (smusherror == 0)
 				puterror = put_nvlist(zc, outnvl);
 		}
 
 		if (puterror != 0)
 			error = puterror;
 
 		nvlist_free(outnvl);
 	} else {
 		cookie = spl_fstrans_mark();
 		error = vec->zvec_legacy_func(zc);
 		spl_fstrans_unmark(cookie);
 	}
 
 out:
 	nvlist_free(innvl);
 	if (error == 0 && vec->zvec_allow_log) {
 		char *s = tsd_get(zfs_allow_log_key);
 		if (s != NULL)
 			kmem_strfree(s);
 		(void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
 	}
 	if (saved_poolname != NULL)
 		kmem_free(saved_poolname, saved_poolname_len);
 
 	return (error);
 }
 
 int
 zfs_kmod_init(void)
 {
 	int error;
 
 	if ((error = zvol_init()) != 0)
 		return (error);
 
 	spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
 	zfs_init();
 
 	zfs_ioctl_init();
 
 	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
 	zfsdev_state_list = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
 	zfsdev_state_list->zs_minor = -1;
 
 	if ((error = zfsdev_attach()) != 0)
 		goto out;
 
 	tsd_create(&zfs_fsyncer_key, NULL);
 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
 
 	return (0);
 out:
 	zfs_fini();
 	spa_fini();
 	zvol_fini();
 
 	return (error);
 }
 
 void
 zfs_kmod_fini(void)
 {
 	zfsdev_state_t *zs, *zsnext = NULL;
 
 	zfsdev_detach();
 
 	mutex_destroy(&zfsdev_state_lock);
 
 	for (zs = zfsdev_state_list; zs != NULL; zs = zsnext) {
 		zsnext = zs->zs_next;
 		if (zs->zs_onexit)
 			zfs_onexit_destroy(zs->zs_onexit);
 		if (zs->zs_zevent)
 			zfs_zevent_destroy(zs->zs_zevent);
 		kmem_free(zs, sizeof (zfsdev_state_t));
 	}
 
 	zfs_ereport_taskq_fini();	/* run before zfs_fini() on Linux */
 	zfs_fini();
 	spa_fini();
 	zvol_fini();
 
 	tsd_destroy(&zfs_fsyncer_key);
 	tsd_destroy(&rrw_tsd_key);
 	tsd_destroy(&zfs_allow_log_key);
 }
 
 ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, ULONG, ZMOD_RW,
 	"Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
 
 ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, ULONG, ZMOD_RW,
 	"Maximum size in bytes of ZFS ioctl output that will be logged");
diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c
index 4f7cb8430d3e..3598351c499d 100644
--- a/module/zfs/zio_inject.c
+++ b/module/zfs/zio_inject.c
@@ -1,972 +1,972 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2017, Intel Corporation.
  */
 
 /*
  * ZFS fault injection
  *
  * To handle fault injection, we keep track of a series of zinject_record_t
  * structures which describe which logical block(s) should be injected with a
  * fault.  These are kept in a global list.  Each record corresponds to a given
  * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
  * or exported while the injection record exists.
  *
  * Device level injection is done using the 'zi_guid' field.  If this is set, it
  * means that the error is destined for a particular device, not a piece of
  * data.
  *
  * This is a rather poor data structure and algorithm, but we don't expect more
  * than a few faults at any one time, so it should be sufficient for our needs.
  */
 
 #include <sys/arc.h>
 #include <sys/zio.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/vdev_impl.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dataset.h>
 #include <sys/fs/zfs.h>
 
 uint32_t zio_injection_enabled = 0;
 
 /*
  * Data describing each zinject handler registered on the system, and
  * contains the list node linking the handler in the global zinject
  * handler list.
  */
 typedef struct inject_handler {
 	int			zi_id;
 	spa_t			*zi_spa;
 	zinject_record_t	zi_record;
 	uint64_t		*zi_lanes;
 	int			zi_next_lane;
 	list_node_t		zi_link;
 } inject_handler_t;
 
 /*
  * List of all zinject handlers registered on the system, protected by
  * the inject_lock defined below.
  */
 static list_t inject_handlers;
 
 /*
  * This protects insertion into, and traversal of, the inject handler
  * list defined above; as well as the inject_delay_count. Any time a
  * handler is inserted or removed from the list, this lock should be
  * taken as a RW_WRITER; and any time traversal is done over the list
  * (without modification to it) this lock should be taken as a RW_READER.
  */
 static krwlock_t inject_lock;
 
 /*
  * This holds the number of zinject delay handlers that have been
  * registered on the system. It is protected by the inject_lock defined
  * above. Thus modifications to this count must be a RW_WRITER of the
  * inject_lock, and reads of this count must be (at least) a RW_READER
  * of the lock.
  */
 static int inject_delay_count = 0;
 
 /*
  * This lock is used only in zio_handle_io_delay(), refer to the comment
  * in that function for more details.
  */
 static kmutex_t inject_delay_mtx;
 
 /*
  * Used to assign unique identifying numbers to each new zinject handler.
  */
 static int inject_next_id = 1;
 
 /*
  * Test if the requested frequency was triggered
  */
 static boolean_t
 freq_triggered(uint32_t frequency)
 {
 	/*
 	 * zero implies always (100%)
 	 */
 	if (frequency == 0)
 		return (B_TRUE);
 
 	/*
 	 * Note: we still handle legacy (unscaled) frequency values
 	 */
 	uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
 
 	return (random_in_range(maximum) < frequency);
 }
 
 /*
  * Returns true if the given record matches the I/O in progress.
  */
 static boolean_t
 zio_match_handler(const zbookmark_phys_t *zb, uint64_t type, int dva,
     zinject_record_t *record, int error)
 {
 	/*
 	 * Check for a match against the MOS, which is based on type
 	 */
 	if (zb->zb_objset == DMU_META_OBJSET &&
 	    record->zi_objset == DMU_META_OBJSET &&
 	    record->zi_object == DMU_META_DNODE_OBJECT) {
 		if (record->zi_type == DMU_OT_NONE ||
 		    type == record->zi_type)
 			return (freq_triggered(record->zi_freq));
 		else
 			return (B_FALSE);
 	}
 
 	/*
 	 * Check for an exact match.
 	 */
 	if (zb->zb_objset == record->zi_objset &&
 	    zb->zb_object == record->zi_object &&
 	    zb->zb_level == record->zi_level &&
 	    zb->zb_blkid >= record->zi_start &&
 	    zb->zb_blkid <= record->zi_end &&
 	    (record->zi_dvas == 0 ||
 	    (dva != ZI_NO_DVA && (record->zi_dvas & (1ULL << dva)))) &&
 	    error == record->zi_error) {
 		return (freq_triggered(record->zi_freq));
 	}
 
 	return (B_FALSE);
 }
 
 /*
  * Panic the system when a config change happens in the function
  * specified by tag.
  */
 void
 zio_handle_panic_injection(spa_t *spa, const char *tag, uint64_t type)
 {
 	inject_handler_t *handler;
 
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
 		if (spa != handler->zi_spa)
 			continue;
 
 		if (handler->zi_record.zi_type == type &&
 		    strcmp(tag, handler->zi_record.zi_func) == 0)
 			panic("Panic requested in function %s\n", tag);
 	}
 
 	rw_exit(&inject_lock);
 }
 
 /*
  * Inject a decryption failure. Decryption failures can occur in
  * both the ARC and the ZIO layers.
  */
 int
 zio_handle_decrypt_injection(spa_t *spa, const zbookmark_phys_t *zb,
     uint64_t type, int error)
 {
 	int ret = 0;
 	inject_handler_t *handler;
 
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
 		if (spa != handler->zi_spa ||
 		    handler->zi_record.zi_cmd != ZINJECT_DECRYPT_FAULT)
 			continue;
 
 		if (zio_match_handler(zb, type, ZI_NO_DVA,
 		    &handler->zi_record, error)) {
 			ret = error;
 			break;
 		}
 	}
 
 	rw_exit(&inject_lock);
 	return (ret);
 }
 
 /*
  * If this is a physical I/O for a vdev child determine which DVA it is
  * for. We iterate backwards through the DVAs matching on the offset so
  * that we end up with ZI_NO_DVA (-1) if we don't find a match.
  */
 static int
 zio_match_dva(zio_t *zio)
 {
 	int i = ZI_NO_DVA;
 
 	if (zio->io_bp != NULL && zio->io_vd != NULL &&
 	    zio->io_child_type == ZIO_CHILD_VDEV) {
 		for (i = BP_GET_NDVAS(zio->io_bp) - 1; i >= 0; i--) {
 			dva_t *dva = &zio->io_bp->blk_dva[i];
 			uint64_t off = DVA_GET_OFFSET(dva);
 			vdev_t *vd = vdev_lookup_top(zio->io_spa,
 			    DVA_GET_VDEV(dva));
 
 			/* Compensate for vdev label added to leaves */
 			if (zio->io_vd->vdev_ops->vdev_op_leaf)
 				off += VDEV_LABEL_START_SIZE;
 
 			if (zio->io_vd == vd && zio->io_offset == off)
 				break;
 		}
 	}
 
 	return (i);
 }
 
 
 /*
  * Determine if the I/O in question should return failure.  Returns the errno
  * to be returned to the caller.
  */
 int
 zio_handle_fault_injection(zio_t *zio, int error)
 {
 	int ret = 0;
 	inject_handler_t *handler;
 
 	/*
 	 * Ignore I/O not associated with any logical data.
 	 */
 	if (zio->io_logical == NULL)
 		return (0);
 
 	/*
 	 * Currently, we only support fault injection on reads.
 	 */
 	if (zio->io_type != ZIO_TYPE_READ)
 		return (0);
 
 	/*
 	 * A rebuild I/O has no checksum to verify.
 	 */
 	if (zio->io_priority == ZIO_PRIORITY_REBUILD && error == ECKSUM)
 		return (0);
 
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 		if (zio->io_spa != handler->zi_spa ||
 		    handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT)
 			continue;
 
 		/* If this handler matches, return the specified error */
 		if (zio_match_handler(&zio->io_logical->io_bookmark,
 		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
 		    zio_match_dva(zio), &handler->zi_record, error)) {
 			ret = error;
 			break;
 		}
 	}
 
 	rw_exit(&inject_lock);
 
 	return (ret);
 }
 
 /*
  * Determine if the zio is part of a label update and has an injection
  * handler associated with that portion of the label. Currently, we
  * allow error injection in either the nvlist or the uberblock region of
  * of the vdev label.
  */
 int
 zio_handle_label_injection(zio_t *zio, int error)
 {
 	inject_handler_t *handler;
 	vdev_t *vd = zio->io_vd;
 	uint64_t offset = zio->io_offset;
 	int label;
 	int ret = 0;
 
 	if (offset >= VDEV_LABEL_START_SIZE &&
 	    offset < vd->vdev_psize - VDEV_LABEL_END_SIZE)
 		return (0);
 
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 		uint64_t start = handler->zi_record.zi_start;
 		uint64_t end = handler->zi_record.zi_end;
 
 		if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT)
 			continue;
 
 		/*
 		 * The injection region is the relative offsets within a
 		 * vdev label. We must determine the label which is being
 		 * updated and adjust our region accordingly.
 		 */
 		label = vdev_label_number(vd->vdev_psize, offset);
 		start = vdev_label_offset(vd->vdev_psize, label, start);
 		end = vdev_label_offset(vd->vdev_psize, label, end);
 
 		if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
 		    (offset >= start && offset <= end)) {
 			ret = error;
 			break;
 		}
 	}
 	rw_exit(&inject_lock);
 	return (ret);
 }
 
 static int
 zio_inject_bitflip_cb(void *data, size_t len, void *private)
 {
 	zio_t *zio = private;
 	uint8_t *buffer = data;
 	uint_t byte = random_in_range(len);
 
 	ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
 
 	/* flip a single random bit in an abd data buffer */
 	buffer[byte] ^= 1 << random_in_range(8);
 
 	return (1);	/* stop after first flip */
 }
 
 static int
 zio_handle_device_injection_impl(vdev_t *vd, zio_t *zio, int err1, int err2)
 {
 	inject_handler_t *handler;
 	int ret = 0;
 
 	/*
 	 * We skip over faults in the labels unless it's during
 	 * device open (i.e. zio == NULL).
 	 */
 	if (zio != NULL) {
 		uint64_t offset = zio->io_offset;
 
 		if (offset < VDEV_LABEL_START_SIZE ||
 		    offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE)
 			return (0);
 	}
 
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
 		if (handler->zi_record.zi_cmd != ZINJECT_DEVICE_FAULT)
 			continue;
 
 		if (vd->vdev_guid == handler->zi_record.zi_guid) {
 			if (handler->zi_record.zi_failfast &&
 			    (zio == NULL || (zio->io_flags &
 			    (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
 				continue;
 			}
 
 			/* Handle type specific I/O failures */
 			if (zio != NULL &&
 			    handler->zi_record.zi_iotype != ZIO_TYPES &&
 			    handler->zi_record.zi_iotype != zio->io_type)
 				continue;
 
 			if (handler->zi_record.zi_error == err1 ||
 			    handler->zi_record.zi_error == err2) {
 				/*
 				 * limit error injection if requested
 				 */
 				if (!freq_triggered(handler->zi_record.zi_freq))
 					continue;
 
 				/*
 				 * For a failed open, pretend like the device
 				 * has gone away.
 				 */
 				if (err1 == ENXIO)
 					vd->vdev_stat.vs_aux =
 					    VDEV_AUX_OPEN_FAILED;
 
 				/*
 				 * Treat these errors as if they had been
 				 * retried so that all the appropriate stats
 				 * and FMA events are generated.
 				 */
 				if (!handler->zi_record.zi_failfast &&
 				    zio != NULL)
 					zio->io_flags |= ZIO_FLAG_IO_RETRY;
 
 				/*
 				 * EILSEQ means flip a bit after a read
 				 */
 				if (handler->zi_record.zi_error == EILSEQ) {
 					if (zio == NULL)
 						break;
 
 					/* locate buffer data and flip a bit */
 					(void) abd_iterate_func(zio->io_abd, 0,
 					    zio->io_size, zio_inject_bitflip_cb,
 					    zio);
 					break;
 				}
 
 				ret = handler->zi_record.zi_error;
 				break;
 			}
 			if (handler->zi_record.zi_error == ENXIO) {
 				ret = SET_ERROR(EIO);
 				break;
 			}
 		}
 	}
 
 	rw_exit(&inject_lock);
 
 	return (ret);
 }
 
 int
 zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
 {
 	return (zio_handle_device_injection_impl(vd, zio, error, INT_MAX));
 }
 
 int
 zio_handle_device_injections(vdev_t *vd, zio_t *zio, int err1, int err2)
 {
 	return (zio_handle_device_injection_impl(vd, zio, err1, err2));
 }
 
 /*
  * Simulate hardware that ignores cache flushes.  For requested number
  * of seconds nix the actual writing to disk.
  */
 void
 zio_handle_ignored_writes(zio_t *zio)
 {
 	inject_handler_t *handler;
 
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
 		/* Ignore errors not destined for this pool */
 		if (zio->io_spa != handler->zi_spa ||
 		    handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
 			continue;
 
 		/*
 		 * Positive duration implies # of seconds, negative
 		 * a number of txgs
 		 */
 		if (handler->zi_record.zi_timer == 0) {
 			if (handler->zi_record.zi_duration > 0)
 				handler->zi_record.zi_timer = ddi_get_lbolt64();
 			else
 				handler->zi_record.zi_timer = zio->io_txg;
 		}
 
 		/* Have a "problem" writing 60% of the time */
 		if (random_in_range(100) < 60)
 			zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
 		break;
 	}
 
 	rw_exit(&inject_lock);
 }
 
 void
 spa_handle_ignored_writes(spa_t *spa)
 {
 	inject_handler_t *handler;
 
 	if (zio_injection_enabled == 0)
 		return;
 
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler)) {
 
 		if (spa != handler->zi_spa ||
 		    handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
 			continue;
 
 		if (handler->zi_record.zi_duration > 0) {
 			VERIFY(handler->zi_record.zi_timer == 0 ||
 			    ddi_time_after64(
 			    (int64_t)handler->zi_record.zi_timer +
 			    handler->zi_record.zi_duration * hz,
 			    ddi_get_lbolt64()));
 		} else {
 			/* duration is negative so the subtraction here adds */
 			VERIFY(handler->zi_record.zi_timer == 0 ||
 			    handler->zi_record.zi_timer -
 			    handler->zi_record.zi_duration >=
 			    spa_syncing_txg(spa));
 		}
 	}
 
 	rw_exit(&inject_lock);
 }
 
 hrtime_t
 zio_handle_io_delay(zio_t *zio)
 {
 	vdev_t *vd = zio->io_vd;
 	inject_handler_t *min_handler = NULL;
 	hrtime_t min_target = 0;
 
 	rw_enter(&inject_lock, RW_READER);
 
 	/*
 	 * inject_delay_count is a subset of zio_injection_enabled that
 	 * is only incremented for delay handlers. These checks are
 	 * mainly added to remind the reader why we're not explicitly
 	 * checking zio_injection_enabled like the other functions.
 	 */
 	IMPLY(inject_delay_count > 0, zio_injection_enabled > 0);
 	IMPLY(zio_injection_enabled == 0, inject_delay_count == 0);
 
 	/*
 	 * If there aren't any inject delay handlers registered, then we
 	 * can short circuit and simply return 0 here. A value of zero
 	 * informs zio_delay_interrupt() that this request should not be
 	 * delayed. This short circuit keeps us from acquiring the
 	 * inject_delay_mutex unnecessarily.
 	 */
 	if (inject_delay_count == 0) {
 		rw_exit(&inject_lock);
 		return (0);
 	}
 
 	/*
 	 * Each inject handler has a number of "lanes" associated with
 	 * it. Each lane is able to handle requests independently of one
 	 * another, and at a latency defined by the inject handler
 	 * record's zi_timer field. Thus if a handler in configured with
 	 * a single lane with a 10ms latency, it will delay requests
 	 * such that only a single request is completed every 10ms. So,
 	 * if more than one request is attempted per each 10ms interval,
 	 * the average latency of the requests will be greater than
 	 * 10ms; but if only a single request is submitted each 10ms
 	 * interval the average latency will be 10ms.
 	 *
 	 * We need to acquire this mutex to prevent multiple concurrent
 	 * threads being assigned to the same lane of a given inject
 	 * handler. The mutex allows us to perform the following two
 	 * operations atomically:
 	 *
 	 *	1. determine the minimum handler and minimum target
 	 *	   value of all the possible handlers
 	 *	2. update that minimum handler's lane array
 	 *
 	 * Without atomicity, two (or more) threads could pick the same
 	 * lane in step (1), and then conflict with each other in step
 	 * (2). This could allow a single lane handler to process
 	 * multiple requests simultaneously, which shouldn't be possible.
 	 */
 	mutex_enter(&inject_delay_mtx);
 
 	for (inject_handler_t *handler = list_head(&inject_handlers);
 	    handler != NULL; handler = list_next(&inject_handlers, handler)) {
 		if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
 			continue;
 
 		if (!freq_triggered(handler->zi_record.zi_freq))
 			continue;
 
 		if (vd->vdev_guid != handler->zi_record.zi_guid)
 			continue;
 
 		/*
 		 * Defensive; should never happen as the array allocation
 		 * occurs prior to inserting this handler on the list.
 		 */
 		ASSERT3P(handler->zi_lanes, !=, NULL);
 
 		/*
 		 * This should never happen, the zinject command should
 		 * prevent a user from setting an IO delay with zero lanes.
 		 */
 		ASSERT3U(handler->zi_record.zi_nlanes, !=, 0);
 
 		ASSERT3U(handler->zi_record.zi_nlanes, >,
 		    handler->zi_next_lane);
 
 		/*
 		 * We want to issue this IO to the lane that will become
 		 * idle the soonest, so we compare the soonest this
 		 * specific handler can complete the IO with all other
 		 * handlers, to find the lowest value of all possible
 		 * lanes. We then use this lane to submit the request.
 		 *
 		 * Since each handler has a constant value for its
 		 * delay, we can just use the "next" lane for that
 		 * handler; as it will always be the lane with the
 		 * lowest value for that particular handler (i.e. the
 		 * lane that will become idle the soonest). This saves a
 		 * scan of each handler's lanes array.
 		 *
 		 * There's two cases to consider when determining when
 		 * this specific IO request should complete. If this
 		 * lane is idle, we want to "submit" the request now so
 		 * it will complete after zi_timer milliseconds. Thus,
 		 * we set the target to now + zi_timer.
 		 *
 		 * If the lane is busy, we want this request to complete
 		 * zi_timer milliseconds after the lane becomes idle.
 		 * Since the 'zi_lanes' array holds the time at which
 		 * each lane will become idle, we use that value to
 		 * determine when this request should complete.
 		 */
 		hrtime_t idle = handler->zi_record.zi_timer + gethrtime();
 		hrtime_t busy = handler->zi_record.zi_timer +
 		    handler->zi_lanes[handler->zi_next_lane];
 		hrtime_t target = MAX(idle, busy);
 
 		if (min_handler == NULL) {
 			min_handler = handler;
 			min_target = target;
 			continue;
 		}
 
 		ASSERT3P(min_handler, !=, NULL);
 		ASSERT3U(min_target, !=, 0);
 
 		/*
 		 * We don't yet increment the "next lane" variable since
 		 * we still might find a lower value lane in another
 		 * handler during any remaining iterations. Once we're
 		 * sure we've selected the absolute minimum, we'll claim
 		 * the lane and increment the handler's "next lane"
 		 * field below.
 		 */
 
 		if (target < min_target) {
 			min_handler = handler;
 			min_target = target;
 		}
 	}
 
 	/*
 	 * 'min_handler' will be NULL if no IO delays are registered for
 	 * this vdev, otherwise it will point to the handler containing
 	 * the lane that will become idle the soonest.
 	 */
 	if (min_handler != NULL) {
 		ASSERT3U(min_target, !=, 0);
 		min_handler->zi_lanes[min_handler->zi_next_lane] = min_target;
 
 		/*
 		 * If we've used all possible lanes for this handler,
 		 * loop back and start using the first lane again;
 		 * otherwise, just increment the lane index.
 		 */
 		min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) %
 		    min_handler->zi_record.zi_nlanes;
 	}
 
 	mutex_exit(&inject_delay_mtx);
 	rw_exit(&inject_lock);
 
 	return (min_target);
 }
 
 static int
 zio_calculate_range(const char *pool, zinject_record_t *record)
 {
 	dsl_pool_t *dp;
 	dsl_dataset_t *ds;
 	objset_t *os = NULL;
 	dnode_t *dn = NULL;
 	int error;
 
 	/*
 	 * Obtain the dnode for object using pool, objset, and object
 	 */
 	error = dsl_pool_hold(pool, FTAG, &dp);
 	if (error)
 		return (error);
 
 	error = dsl_dataset_hold_obj(dp, record->zi_objset, FTAG, &ds);
 	dsl_pool_rele(dp, FTAG);
 	if (error)
 		return (error);
 
 	error = dmu_objset_from_ds(ds, &os);
 	dsl_dataset_rele(ds, FTAG);
 	if (error)
 		return (error);
 
 	error = dnode_hold(os, record->zi_object, FTAG, &dn);
 	if (error)
 		return (error);
 
 	/*
 	 * Translate the range into block IDs
 	 */
 	if (record->zi_start != 0 || record->zi_end != -1ULL) {
 		record->zi_start >>= dn->dn_datablkshift;
 		record->zi_end >>= dn->dn_datablkshift;
 	}
 	if (record->zi_level > 0) {
 		if (record->zi_level >= dn->dn_nlevels) {
 			dnode_rele(dn, FTAG);
 			return (SET_ERROR(EDOM));
 		}
 
 		if (record->zi_start != 0 || record->zi_end != 0) {
 			int shift = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 
 			for (int level = record->zi_level; level > 0; level--) {
 				record->zi_start >>= shift;
 				record->zi_end >>= shift;
 			}
 		}
 	}
 
 	dnode_rele(dn, FTAG);
 	return (0);
 }
 
 /*
  * Create a new handler for the given record.  We add it to the list, adding
  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
  * which is the switch to trigger all fault injection.
  */
 int
 zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
 {
 	inject_handler_t *handler;
 	int error;
 	spa_t *spa;
 
 	/*
 	 * If this is pool-wide metadata, make sure we unload the corresponding
 	 * spa_t, so that the next attempt to load it will trigger the fault.
 	 * We call spa_reset() to unload the pool appropriately.
 	 */
 	if (flags & ZINJECT_UNLOAD_SPA)
 		if ((error = spa_reset(name)) != 0)
 			return (error);
 
 	if (record->zi_cmd == ZINJECT_DELAY_IO) {
 		/*
 		 * A value of zero for the number of lanes or for the
 		 * delay time doesn't make sense.
 		 */
 		if (record->zi_timer == 0 || record->zi_nlanes == 0)
 			return (SET_ERROR(EINVAL));
 
 		/*
 		 * The number of lanes is directly mapped to the size of
 		 * an array used by the handler. Thus, to ensure the
 		 * user doesn't trigger an allocation that's "too large"
 		 * we cap the number of lanes here.
 		 */
 		if (record->zi_nlanes >= UINT16_MAX)
 			return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * If the supplied range was in bytes -- calculate the actual blkid
 	 */
 	if (flags & ZINJECT_CALC_RANGE) {
 		error = zio_calculate_range(name, record);
 		if (error != 0)
 			return (error);
 	}
 
 	if (!(flags & ZINJECT_NULL)) {
 		/*
 		 * spa_inject_ref() will add an injection reference, which will
 		 * prevent the pool from being removed from the namespace while
 		 * still allowing it to be unloaded.
 		 */
 		if ((spa = spa_inject_addref(name)) == NULL)
 			return (SET_ERROR(ENOENT));
 
 		handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
 
 		handler->zi_spa = spa;
 		handler->zi_record = *record;
 
 		if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
 			handler->zi_lanes = kmem_zalloc(
 			    sizeof (*handler->zi_lanes) *
 			    handler->zi_record.zi_nlanes, KM_SLEEP);
 			handler->zi_next_lane = 0;
 		} else {
 			handler->zi_lanes = NULL;
 			handler->zi_next_lane = 0;
 		}
 
 		rw_enter(&inject_lock, RW_WRITER);
 
 		/*
 		 * We can't move this increment into the conditional
 		 * above because we need to hold the RW_WRITER lock of
 		 * inject_lock, and we don't want to hold that while
 		 * allocating the handler's zi_lanes array.
 		 */
 		if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
 			ASSERT3S(inject_delay_count, >=, 0);
 			inject_delay_count++;
 			ASSERT3S(inject_delay_count, >, 0);
 		}
 
 		*id = handler->zi_id = inject_next_id++;
 		list_insert_tail(&inject_handlers, handler);
 		atomic_inc_32(&zio_injection_enabled);
 
 		rw_exit(&inject_lock);
 	}
 
 	/*
 	 * Flush the ARC, so that any attempts to read this data will end up
 	 * going to the ZIO layer.  Note that this is a little overkill, but
 	 * we don't have the necessary ARC interfaces to do anything else, and
 	 * fault injection isn't a performance critical path.
 	 */
 	if (flags & ZINJECT_FLUSH_ARC)
 		/*
 		 * We must use FALSE to ensure arc_flush returns, since
 		 * we're not preventing concurrent ARC insertions.
 		 */
 		arc_flush(NULL, FALSE);
 
 	return (0);
 }
 
 /*
  * Returns the next record with an ID greater than that supplied to the
  * function.  Used to iterate over all handlers in the system.
  */
 int
 zio_inject_list_next(int *id, char *name, size_t buflen,
     zinject_record_t *record)
 {
 	inject_handler_t *handler;
 	int ret;
 
 	mutex_enter(&spa_namespace_lock);
 	rw_enter(&inject_lock, RW_READER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler))
 		if (handler->zi_id > *id)
 			break;
 
 	if (handler) {
 		*record = handler->zi_record;
 		*id = handler->zi_id;
-		(void) strncpy(name, spa_name(handler->zi_spa), buflen);
+		(void) strlcpy(name, spa_name(handler->zi_spa), buflen);
 		ret = 0;
 	} else {
 		ret = SET_ERROR(ENOENT);
 	}
 
 	rw_exit(&inject_lock);
 	mutex_exit(&spa_namespace_lock);
 
 	return (ret);
 }
 
 /*
  * Clear the fault handler with the given identifier, or return ENOENT if none
  * exists.
  */
 int
 zio_clear_fault(int id)
 {
 	inject_handler_t *handler;
 
 	rw_enter(&inject_lock, RW_WRITER);
 
 	for (handler = list_head(&inject_handlers); handler != NULL;
 	    handler = list_next(&inject_handlers, handler))
 		if (handler->zi_id == id)
 			break;
 
 	if (handler == NULL) {
 		rw_exit(&inject_lock);
 		return (SET_ERROR(ENOENT));
 	}
 
 	if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
 		ASSERT3S(inject_delay_count, >, 0);
 		inject_delay_count--;
 		ASSERT3S(inject_delay_count, >=, 0);
 	}
 
 	list_remove(&inject_handlers, handler);
 	rw_exit(&inject_lock);
 
 	if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
 		ASSERT3P(handler->zi_lanes, !=, NULL);
 		kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) *
 		    handler->zi_record.zi_nlanes);
 	} else {
 		ASSERT3P(handler->zi_lanes, ==, NULL);
 	}
 
 	spa_inject_delref(handler->zi_spa);
 	kmem_free(handler, sizeof (inject_handler_t));
 	atomic_dec_32(&zio_injection_enabled);
 
 	return (0);
 }
 
 void
 zio_inject_init(void)
 {
 	rw_init(&inject_lock, NULL, RW_DEFAULT, NULL);
 	mutex_init(&inject_delay_mtx, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&inject_handlers, sizeof (inject_handler_t),
 	    offsetof(inject_handler_t, zi_link));
 }
 
 void
 zio_inject_fini(void)
 {
 	list_destroy(&inject_handlers);
 	mutex_destroy(&inject_delay_mtx);
 	rw_destroy(&inject_lock);
 }
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(zio_injection_enabled);
 EXPORT_SYMBOL(zio_inject_fault);
 EXPORT_SYMBOL(zio_inject_list_next);
 EXPORT_SYMBOL(zio_clear_fault);
 EXPORT_SYMBOL(zio_handle_fault_injection);
 EXPORT_SYMBOL(zio_handle_device_injection);
 EXPORT_SYMBOL(zio_handle_label_injection);
 #endif
diff --git a/tests/zfs-tests/cmd/draid.c b/tests/zfs-tests/cmd/draid.c
index 39b58a709cec..76fdb4e8417f 100644
--- a/tests/zfs-tests/cmd/draid.c
+++ b/tests/zfs-tests/cmd/draid.c
@@ -1,1407 +1,1407 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2018 Intel Corporation.
  * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
  */
 
 #include <stdio.h>
 #include <zlib.h>
 #include <zfs_fletcher.h>
 #include <sys/vdev_draid.h>
 #include <sys/nvpair.h>
 #include <sys/stat.h>
 
 /*
  * The number of rows to generate for new permutation maps.
  */
 #define	MAP_ROWS_DEFAULT	256
 
 /*
  * Key values for dRAID maps when stored as nvlists.
  */
 #define	MAP_SEED		"seed"
 #define	MAP_CHECKSUM		"checksum"
 #define	MAP_WORST_RATIO		"worst_ratio"
 #define	MAP_AVG_RATIO		"avg_ratio"
 #define	MAP_CHILDREN		"children"
 #define	MAP_NPERMS		"nperms"
 #define	MAP_PERMS		"perms"
 
 static void
 draid_usage(void)
 {
 	(void) fprintf(stderr,
 	    "usage: draid command args ...\n"
 	    "Available commands are:\n"
 	    "\n"
 	    "\tdraid generate [-cv] [-m min] [-n max] [-p passes] FILE\n"
 	    "\tdraid verify [-rv] FILE\n"
 	    "\tdraid dump [-v] [-m min] [-n max] FILE\n"
 	    "\tdraid table FILE\n"
 	    "\tdraid merge FILE SRC SRC...\n");
 	exit(1);
 }
 
 static int
 read_map(const char *filename, nvlist_t **allcfgs)
 {
 	int block_size = 131072;
 	int buf_size = 131072;
 	int tmp_size, error;
 	char *tmp_buf;
 
 	struct stat64 stat;
 	if (lstat64(filename, &stat) != 0)
 		return (errno);
 
 	if (stat.st_size == 0 ||
 	    !(S_ISREG(stat.st_mode) || S_ISLNK(stat.st_mode))) {
 		return (EINVAL);
 	}
 
 	gzFile fp = gzopen(filename, "rb");
 	if (fp == Z_NULL)
 		return (errno);
 
 	char *buf = malloc(buf_size);
 	if (buf == NULL) {
 		(void) gzclose(fp);
 		return (ENOMEM);
 	}
 
 	ssize_t rc, bytes = 0;
 	while (!gzeof(fp)) {
 		rc = gzread(fp, buf + bytes, block_size);
 		if ((rc < 0) || (rc == 0 && !gzeof(fp))) {
 			free(buf);
 			(void) gzclose(fp);
 			(void) gzerror(fp, &error);
 			return (error);
 		} else {
 			bytes += rc;
 
 			if (bytes + block_size >= buf_size) {
 				tmp_size = 2 * buf_size;
 				tmp_buf = malloc(tmp_size);
 				if (tmp_buf == NULL) {
 					free(buf);
 					(void) gzclose(fp);
 					return (ENOMEM);
 				}
 
 				memcpy(tmp_buf, buf, bytes);
 				free(buf);
 				buf = tmp_buf;
 				buf_size = tmp_size;
 			}
 		}
 	}
 
 	(void) gzclose(fp);
 
 	error = nvlist_unpack(buf, bytes, allcfgs, 0);
 	free(buf);
 
 	return (error);
 }
 
 /*
  * Read a map from the specified filename.  A file contains multiple maps
  * which are indexed by the number of children. The caller is responsible
  * for freeing the configuration returned.
  */
 static int
 read_map_key(const char *filename, const char *key, nvlist_t **cfg)
 {
 	nvlist_t *allcfgs, *foundcfg = NULL;
 	int error;
 
 	error = read_map(filename, &allcfgs);
 	if (error != 0)
 		return (error);
 
 	(void) nvlist_lookup_nvlist(allcfgs, key, &foundcfg);
 	if (foundcfg != NULL) {
 		nvlist_dup(foundcfg, cfg, KM_SLEEP);
 		error = 0;
 	} else {
 		error = ENOENT;
 	}
 
 	nvlist_free(allcfgs);
 
 	return (error);
 }
 
 /*
  * Write all mappings to the map file.
  */
 static int
 write_map(const char *filename, nvlist_t *allcfgs)
 {
 	size_t buflen = 0;
 	int error;
 
 	error = nvlist_size(allcfgs, &buflen, NV_ENCODE_XDR);
 	if (error)
 		return (error);
 
 	char *buf = malloc(buflen);
 	if (buf == NULL)
 		return (ENOMEM);
 
 	error = nvlist_pack(allcfgs, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP);
 	if (error) {
 		free(buf);
 		return (error);
 	}
 
 	/*
 	 * Atomically update the file using a temporary file and the
 	 * traditional unlink then rename steps.  This code provides
 	 * no locking, it only guarantees the packed nvlist on disk
 	 * is updated atomically and is internally consistent.
 	 */
 	char *tmpname = calloc(1, MAXPATHLEN);
 	if (tmpname == NULL) {
 		free(buf);
 		return (ENOMEM);
 	}
 
 	snprintf(tmpname, MAXPATHLEN - 1, "%s.XXXXXX", filename);
 
 	int fd = mkstemp(tmpname);
 	if (fd < 0) {
 		error = errno;
 		free(buf);
 		free(tmpname);
 		return (error);
 	}
 	(void) close(fd);
 
 	gzFile fp = gzopen(tmpname, "w9b");
 	if (fp == Z_NULL) {
 		error = errno;
 		free(buf);
 		free(tmpname);
 		return (errno);
 	}
 
 	ssize_t rc, bytes = 0;
 	while (bytes < buflen) {
 		size_t size = MIN(buflen - bytes, 131072);
 		rc = gzwrite(fp, buf + bytes, size);
 		if (rc < 0) {
 			free(buf);
 			(void) gzerror(fp, &error);
 			(void) gzclose(fp);
 			(void) unlink(tmpname);
 			free(tmpname);
 			return (error);
 		} else if (rc == 0) {
 			break;
 		} else {
 			bytes += rc;
 		}
 	}
 
 	free(buf);
 	(void) gzclose(fp);
 
 	if (bytes != buflen) {
 		(void) unlink(tmpname);
 		free(tmpname);
 		return (EIO);
 	}
 
 	/*
 	 * Unlink the previous config file and replace it with the updated
 	 * version.  If we're able to unlink the file then directory is
 	 * writable by us and the subsequent rename should never fail.
 	 */
 	error = unlink(filename);
 	if (error != 0 && errno != ENOENT) {
 		error = errno;
 		(void) unlink(tmpname);
 		free(tmpname);
 		return (error);
 	}
 
 	error = rename(tmpname, filename);
 	if (error != 0) {
 		error = errno;
 		(void) unlink(tmpname);
 		free(tmpname);
 		return (error);
 	}
 
 	free(tmpname);
 
 	return (0);
 }
 
 /*
  * Add the dRAID map to the file and write it out.
  */
 static int
 write_map_key(const char *filename, char *key, draid_map_t *map,
     double worst_ratio, double avg_ratio)
 {
 	nvlist_t *nv_cfg, *allcfgs;
 	int error;
 
 	/*
 	 * Add the configuration to an existing or new file.  The new
 	 * configuration will replace an existing configuration with the
 	 * same key if it has a lower ratio and is therefore better.
 	 */
 	error = read_map(filename, &allcfgs);
 	if (error == ENOENT) {
 		allcfgs = fnvlist_alloc();
 	} else if (error != 0) {
 		return (error);
 	}
 
 	error = nvlist_lookup_nvlist(allcfgs, key, &nv_cfg);
 	if (error == 0) {
 		uint64_t nv_cfg_worst_ratio = fnvlist_lookup_uint64(nv_cfg,
 		    MAP_WORST_RATIO);
 		double nv_worst_ratio = (double)nv_cfg_worst_ratio / 1000.0;
 
 		if (worst_ratio < nv_worst_ratio) {
 			/* Replace old map with the more balanced new map. */
 			fnvlist_remove(allcfgs, key);
 		} else {
 			/* The old map is preferable, keep it. */
 			nvlist_free(allcfgs);
 			return (EEXIST);
 		}
 	}
 
 	nvlist_t *cfg = fnvlist_alloc();
 	fnvlist_add_uint64(cfg, MAP_SEED, map->dm_seed);
 	fnvlist_add_uint64(cfg, MAP_CHECKSUM, map->dm_checksum);
 	fnvlist_add_uint64(cfg, MAP_CHILDREN, map->dm_children);
 	fnvlist_add_uint64(cfg, MAP_NPERMS, map->dm_nperms);
 	fnvlist_add_uint8_array(cfg, MAP_PERMS,  map->dm_perms,
 	    map->dm_children * map->dm_nperms * sizeof (uint8_t));
 
 	fnvlist_add_uint64(cfg, MAP_WORST_RATIO,
 	    (uint64_t)(worst_ratio * 1000.0));
 	fnvlist_add_uint64(cfg, MAP_AVG_RATIO,
 	    (uint64_t)(avg_ratio * 1000.0));
 
 	error = nvlist_add_nvlist(allcfgs, key, cfg);
 	if (error == 0)
 		error = write_map(filename, allcfgs);
 
 	nvlist_free(cfg);
 	nvlist_free(allcfgs);
 	return (error);
 }
 
 static void
 dump_map(draid_map_t *map, const char *key, double worst_ratio,
     double avg_ratio, int verbose)
 {
 	if (verbose == 0) {
 		return;
 	} else if (verbose == 1) {
 		printf("    \"%s\": seed: 0x%016llx worst_ratio: %2.03f "
 		    "avg_ratio: %2.03f\n", key, (u_longlong_t)map->dm_seed,
 		    worst_ratio, avg_ratio);
 		return;
 	} else {
 		printf("    \"%s\":\n"
 		    "        seed: 0x%016llx\n"
 		    "        checksum: 0x%016llx\n"
 		    "        worst_ratio: %2.03f\n"
 		    "        avg_ratio: %2.03f\n"
 		    "        children: %llu\n"
 		    "        nperms: %llu\n",
 		    key, (u_longlong_t)map->dm_seed,
 		    (u_longlong_t)map->dm_checksum, worst_ratio, avg_ratio,
 		    (u_longlong_t)map->dm_children,
 		    (u_longlong_t)map->dm_nperms);
 
 		if (verbose > 2) {
 			printf("        perms = {\n");
 			for (int i = 0; i < map->dm_nperms; i++) {
 				printf("            { ");
 				for (int j = 0; j < map->dm_children; j++) {
 					printf("%3d%s ", map->dm_perms[
 					    i * map->dm_children + j],
 					    j < map->dm_children - 1 ?
 					    "," : "");
 				}
 				printf(" },\n");
 			}
 			printf("        }\n");
 		} else if (verbose == 2) {
 			printf("        draid_perms = <omitted>\n");
 		}
 	}
 }
 
 static void
 dump_map_nv(const char *key, nvlist_t *cfg, int verbose)
 {
 	draid_map_t map;
 	uint_t c;
 
 	uint64_t worst_ratio = fnvlist_lookup_uint64(cfg, MAP_WORST_RATIO);
 	uint64_t avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO);
 
 	map.dm_seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
 	map.dm_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
 	map.dm_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
 	map.dm_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
 	map.dm_perms = fnvlist_lookup_uint8_array(cfg, MAP_PERMS, &c);
 
 	dump_map(&map, key, (double)worst_ratio / 1000.0,
 	    avg_ratio / 1000.0, verbose);
 }
 
 /*
  * Print a summary of the mapping.
  */
 static int
 dump_map_key(const char *filename, const char *key, int verbose)
 {
 	nvlist_t *cfg;
 	int error;
 
 	error = read_map_key(filename, key, &cfg);
 	if (error != 0)
 		return (error);
 
 	dump_map_nv(key, cfg, verbose);
 
 	return (0);
 }
 
 /*
  * Allocate a new permutation map for evaluation.
  */
 static int
 alloc_new_map(uint64_t children, uint64_t nperms, uint64_t seed,
     draid_map_t **mapp)
 {
 	draid_map_t *map;
 	int error;
 
 	map = malloc(sizeof (draid_map_t));
 	if (map == NULL)
 		return (ENOMEM);
 
 	map->dm_children = children;
 	map->dm_nperms = nperms;
 	map->dm_seed = seed;
 	map->dm_checksum = 0;
 
 	error = vdev_draid_generate_perms(map, &map->dm_perms);
 	if (error) {
 		free(map);
 		return (error);
 	}
 
 	*mapp = map;
 
 	return (0);
 }
 
 /*
  * Allocate the fixed permutation map for N children.
  */
 static int
 alloc_fixed_map(uint64_t children, draid_map_t **mapp)
 {
 	const draid_map_t *fixed_map;
 	draid_map_t *map;
 	int error;
 
 	error = vdev_draid_lookup_map(children, &fixed_map);
 	if (error)
 		return (error);
 
 	map = malloc(sizeof (draid_map_t));
 	if (map == NULL)
 		return (ENOMEM);
 
 	memcpy(map, fixed_map, sizeof (draid_map_t));
 	VERIFY3U(map->dm_checksum, !=, 0);
 
 	error = vdev_draid_generate_perms(map, &map->dm_perms);
 	if (error) {
 		free(map);
 		return (error);
 	}
 
 	*mapp = map;
 
 	return (0);
 }
 
 /*
  * Free a permutation map.
  */
 static void
 free_map(draid_map_t *map)
 {
 	free(map->dm_perms);
 	free(map);
 }
 
 /*
  * Check if dev is in the provided list of faulted devices.
  */
 static inline boolean_t
 is_faulted(int *faulted_devs, int nfaulted, int dev)
 {
 	for (int i = 0; i < nfaulted; i++)
 		if (faulted_devs[i] == dev)
 			return (B_TRUE);
 
 	return (B_FALSE);
 }
 
 /*
  * Evaluate how resilvering I/O will be distributed given a list of faulted
  * vdevs.  As a simplification we assume one IO is sufficient to repair each
  * damaged device in a group.
  */
 static double
 eval_resilver(draid_map_t *map, uint64_t groupwidth, uint64_t nspares,
     int *faulted_devs, int nfaulted, int *min_child_ios, int *max_child_ios)
 {
 	uint64_t children = map->dm_children;
 	uint64_t ngroups = 1;
 	uint64_t ndisks = children - nspares;
 
 	/*
 	 * Calculate the minimum number of groups required to fill a slice.
 	 */
 	while (ngroups * (groupwidth) % (children - nspares) != 0)
 		ngroups++;
 
 	int *ios = calloc(map->dm_children, sizeof (uint64_t));
 
 	/* Resilver all rows */
 	for (int i = 0; i < map->dm_nperms; i++) {
 		uint8_t *row = &map->dm_perms[i * map->dm_children];
 
 		/* Resilver all groups with faulted drives */
 		for (int j = 0; j < ngroups; j++) {
 			uint64_t spareidx = map->dm_children - nspares;
 			boolean_t repair_needed = B_FALSE;
 
 			/* See if any devices in this group are faulted */
 			uint64_t groupstart = (j * groupwidth) % ndisks;
 
 			for (int k = 0; k < groupwidth; k++) {
 				uint64_t groupidx = (groupstart + k) % ndisks;
 
 				repair_needed = is_faulted(faulted_devs,
 				    nfaulted, row[groupidx]);
 				if (repair_needed)
 					break;
 			}
 
 			if (repair_needed == B_FALSE)
 				continue;
 
 			/*
 			 * This group is degraded. Calculate the number of
 			 * reads the non-faulted drives require and the number
 			 * of writes to the distributed hot spare for this row.
 			 */
 			for (int k = 0; k < groupwidth; k++) {
 				uint64_t groupidx = (groupstart + k) % ndisks;
 
 				if (!is_faulted(faulted_devs, nfaulted,
 				    row[groupidx])) {
 					ios[row[groupidx]]++;
 				} else if (nspares > 0) {
 					while (is_faulted(faulted_devs,
 					    nfaulted, row[spareidx])) {
 						spareidx++;
 					}
 
 					ASSERT3U(spareidx, <, map->dm_children);
 					ios[row[spareidx]]++;
 					spareidx++;
 				}
 			}
 		}
 	}
 
 	*min_child_ios = INT_MAX;
 	*max_child_ios = 0;
 
 	/*
 	 * Find the drives with fewest and most required I/O.  These values
 	 * are used to calculate the imbalance ratio.  To avoid returning an
 	 * infinite value for permutations which have children that perform
 	 * no IO a floor of 1 IO per child is set.  This ensures a meaningful
 	 * ratio is returned for comparison and it is not an uncommon when
 	 * there are a large number of children.
 	 */
 	for (int i = 0; i < map->dm_children; i++) {
 
 		if (is_faulted(faulted_devs, nfaulted, i)) {
 			ASSERT0(ios[i]);
 			continue;
 		}
 
 		if (ios[i] == 0)
 			ios[i] = 1;
 
 		if (ios[i] < *min_child_ios)
 			*min_child_ios = ios[i];
 
 		if (ios[i] > *max_child_ios)
 			*max_child_ios = ios[i];
 	}
 
 	ASSERT3S(*min_child_ios, !=, INT_MAX);
 	ASSERT3S(*max_child_ios, !=, 0);
 
 	double ratio = (double)(*max_child_ios) / (double)(*min_child_ios);
 
 	free(ios);
 
 	return (ratio);
 }
 
 /*
  * Evaluate the quality of the permutation mapping by considering possible
  * device failures.  Returns the imbalance ratio for the worst mapping which
  * is defined to be the largest number of child IOs over the fewest number
  * child IOs. A value of 1.0 indicates the mapping is perfectly balance and
  * all children perform an equal amount of work during reconstruction.
  */
 static void
 eval_decluster(draid_map_t *map, double *worst_ratiop, double *avg_ratiop)
 {
 	uint64_t children = map->dm_children;
 	double worst_ratio = 1.0;
 	double sum = 0;
 	int worst_min_ios = 0, worst_max_ios = 0;
 	int n = 0;
 
 	/*
 	 * When there are only 2 children there can be no distributed
 	 * spare and no resilver to evaluate.  Default to a ratio of 1.0
 	 * for this degenerate case.
 	 */
 	if (children == VDEV_DRAID_MIN_CHILDREN) {
 		*worst_ratiop = 1.0;
 		*avg_ratiop = 1.0;
 		return;
 	}
 
 	/*
 	 * Score the mapping as if it had either 1 or 2 distributed spares.
 	 */
 	for (int nspares = 1; nspares <= 2; nspares++) {
 		uint64_t faults = nspares;
 
 		/*
 		 * Score groupwidths up to 19.  This value was chosen as the
 		 * largest reasonable width (16d+3p).  dRAID pools may be still
 		 * be created with wider stripes but they are not considered in
 		 * this analysis in order to optimize for the most common cases.
 		 */
 		for (uint64_t groupwidth = 2;
 		    groupwidth <= MIN(children - nspares, 19);
 		    groupwidth++) {
 			int faulted_devs[2];
 			int min_ios, max_ios;
 
 			/*
 			 * Score possible devices faults.  This is limited
 			 * to exactly one fault per distributed spare for
 			 * the purposes of this similation.
 			 */
 			for (int f1 = 0; f1 < children; f1++) {
 				faulted_devs[0] = f1;
 				double ratio;
 
 				if (faults == 1) {
 					ratio = eval_resilver(map, groupwidth,
 					    nspares, faulted_devs, faults,
 					    &min_ios, &max_ios);
 
 					if (ratio > worst_ratio) {
 						worst_ratio = ratio;
 						worst_min_ios = min_ios;
 						worst_max_ios = max_ios;
 					}
 
 					sum += ratio;
 					n++;
 				} else if (faults == 2) {
 					for (int f2 = f1 + 1; f2 < children;
 					    f2++) {
 						faulted_devs[1] = f2;
 
 						ratio = eval_resilver(map,
 						    groupwidth, nspares,
 						    faulted_devs, faults,
 						    &min_ios, &max_ios);
 
 						if (ratio > worst_ratio) {
 							worst_ratio = ratio;
 							worst_min_ios = min_ios;
 							worst_max_ios = max_ios;
 						}
 
 						sum += ratio;
 						n++;
 					}
 				}
 			}
 		}
 	}
 
 	*worst_ratiop = worst_ratio;
 	*avg_ratiop = sum / n;
 
 	/*
 	 * Log the min/max io values for particularly unbalanced maps.
 	 * Since the maps are generated entirely randomly these are possible
 	 * be exceedingly unlikely.  We log it for possible investigation.
 	 */
 	if (worst_ratio > 100.0) {
 		dump_map(map, "DEBUG", worst_ratio, *avg_ratiop, 2);
 		printf("worst_min_ios=%d worst_max_ios=%d\n",
 		    worst_min_ios, worst_max_ios);
 	}
 }
 
 static int
 eval_maps(uint64_t children, int passes, uint64_t *map_seed,
     draid_map_t **best_mapp, double *best_ratiop, double *avg_ratiop)
 {
 	draid_map_t *best_map = NULL;
 	double best_worst_ratio = 1000.0;
 	double best_avg_ratio = 1000.0;
 
 	/*
 	 * Perform the requested number of passes evaluating randomly
 	 * generated permutation maps.  Only the best version is kept.
 	 */
 	for (int i = 0; i < passes; i++) {
 		double worst_ratio, avg_ratio;
 		draid_map_t *map;
 		int error;
 
 		/*
 		 * Calculate the next seed and generate a new candidate map.
 		 */
 		error = alloc_new_map(children, MAP_ROWS_DEFAULT,
 		    vdev_draid_rand(map_seed), &map);
 		if (error) {
 			if (best_map != NULL)
 				free_map(best_map);
 			return (error);
 		}
 
 		/*
 		 * Consider maps with a lower worst_ratio to be of higher
 		 * quality.  Some maps may have a lower avg_ratio but they
 		 * are discarded since they might include some particularly
 		 * imbalanced permutations.  The average is tracked to in
 		 * order to get a sense of the average permutation quality.
 		 */
 		eval_decluster(map, &worst_ratio, &avg_ratio);
 
 		if (best_map == NULL || worst_ratio < best_worst_ratio) {
 
 			if (best_map != NULL)
 				free_map(best_map);
 
 			best_map = map;
 			best_worst_ratio = worst_ratio;
 			best_avg_ratio = avg_ratio;
 		} else {
 			free_map(map);
 		}
 	}
 
 	/*
 	 * After determining the best map generate a checksum over the full
 	 * permutation array.  This checksum is verified when opening a dRAID
 	 * pool to ensure the generated in memory permutations are correct.
 	 */
 	zio_cksum_t cksum;
 	fletcher_4_native_varsize(best_map->dm_perms,
 	    sizeof (uint8_t) * best_map->dm_children * best_map->dm_nperms,
 	    &cksum);
 	best_map->dm_checksum = cksum.zc_word[0];
 
 	*best_mapp = best_map;
 	*best_ratiop = best_worst_ratio;
 	*avg_ratiop = best_avg_ratio;
 
 	return (0);
 }
 
 static int
 draid_generate(int argc, char *argv[])
 {
 	char filename[MAXPATHLEN] = {0};
 	uint64_t map_seed;
 	int c, fd, error, verbose = 0, passes = 1, continuous = 0;
 	int min_children = VDEV_DRAID_MIN_CHILDREN;
 	int max_children = VDEV_DRAID_MAX_CHILDREN;
 	int restarts = 0;
 
 	while ((c = getopt(argc, argv, ":cm:n:p:v")) != -1) {
 		switch (c) {
 		case 'c':
 			continuous++;
 			break;
 		case 'm':
 			min_children = (int)strtol(optarg, NULL, 0);
 			if (min_children < VDEV_DRAID_MIN_CHILDREN) {
 				(void) fprintf(stderr, "A minimum of 2 "
 				    "children are required.\n");
 				return (1);
 			}
 
 			break;
 		case 'n':
 			max_children = (int)strtol(optarg, NULL, 0);
 			if (max_children > VDEV_DRAID_MAX_CHILDREN) {
 				(void) fprintf(stderr, "A maximum of %d "
 				    "children are allowed.\n",
 				    VDEV_DRAID_MAX_CHILDREN);
 				return (1);
 			}
 			break;
 		case 'p':
 			passes = (int)strtol(optarg, NULL, 0);
 			break;
 		case 'v':
 			/*
 			 * 0 - Only log when a better map is added to the file.
 			 * 1 - Log the current best map for each child count.
 			 *     Minimal output on a single summary line.
 			 * 2 - Log the current best map for each child count.
 			 *     More verbose includes most map fields.
 			 * 3 - Log the current best map for each child count.
 			 *     Very verbose all fields including the full map.
 			 */
 			verbose++;
 			break;
 		case ':':
 			(void) fprintf(stderr,
 			    "missing argument for '%c' option\n", optopt);
 			draid_usage();
 			break;
 		case '?':
 			(void) fprintf(stderr, "invalid option '%c'\n",
 			    optopt);
 			draid_usage();
 			break;
 		}
 	}
 
 	if (argc > optind)
-		strncpy(filename, argv[optind], MAXPATHLEN - 1);
+		strlcpy(filename, argv[optind], sizeof (filename));
 	else {
 		(void) fprintf(stderr, "A FILE must be specified.\n");
 		return (1);
 	}
 
 restart:
 	/*
 	 * Start with a fresh seed from /dev/urandom.
 	 */
 	fd = open("/dev/urandom", O_RDONLY);
 	if (fd < 0) {
 		printf("Unable to open /dev/urandom: %s\n:", strerror(errno));
 		return (1);
 	} else {
 		ssize_t bytes = sizeof (map_seed);
 		ssize_t bytes_read = 0;
 
 		while (bytes_read < bytes) {
 			ssize_t rc = read(fd, ((char *)&map_seed) + bytes_read,
 			    bytes - bytes_read);
 			if (rc < 0) {
 				printf("Unable to read /dev/urandom: %s\n:",
 				    strerror(errno));
 				close(fd);
 				return (1);
 			}
 			bytes_read += rc;
 		}
 
 		(void) close(fd);
 	}
 
 	if (restarts == 0)
 		printf("Writing generated mappings to '%s':\n", filename);
 
 	/*
 	 * Generate maps for all requested child counts. The best map for
 	 * each child count is written out to the specified file.  If the file
 	 * already contains a better mapping this map will not be added.
 	 */
 	for (uint64_t children = min_children;
 	    children <= max_children; children++) {
 		char key[8] = { 0 };
 		draid_map_t *map;
 		double worst_ratio = 1000.0;
 		double avg_ratio = 1000.0;
 
 		error = eval_maps(children, passes, &map_seed, &map,
 		    &worst_ratio, &avg_ratio);
 		if (error) {
 			printf("Error eval_maps(): %s\n", strerror(error));
 			return (1);
 		}
 
 		if (worst_ratio < 1.0 || avg_ratio < 1.0) {
 			printf("Error ratio < 1.0: worst_ratio = %2.03f "
 			    "avg_ratio = %2.03f\n", worst_ratio, avg_ratio);
 			return (1);
 		}
 
 		snprintf(key, 7, "%llu", (u_longlong_t)children);
 		error = write_map_key(filename, key, map, worst_ratio,
 		    avg_ratio);
 		if (error == 0) {
 			/* The new map was added to the file. */
 			dump_map(map, key, worst_ratio, avg_ratio,
 			    MAX(verbose, 1));
 		} else if (error == EEXIST) {
 			/* The existing map was preferable and kept. */
 			if (verbose > 0)
 				dump_map_key(filename, key, verbose);
 		} else {
 			printf("Error write_map_key(): %s\n", strerror(error));
 			return (1);
 		}
 
 		free_map(map);
 	}
 
 	/*
 	 * When the continuous option is set restart at the minimum number of
 	 * children instead of exiting. This option is useful as a mechanism
 	 * to continuous try and refine the discovered permutations.
 	 */
 	if (continuous) {
 		restarts++;
 		printf("Restarting by request (-c): %d\n", restarts);
 		goto restart;
 	}
 
 	return (0);
 }
 
 /*
  * Verify each map in the file by generating its in-memory permutation array
  * and comfirming its checksum is correct.
  */
 static int
 draid_verify(int argc, char *argv[])
 {
 	char filename[MAXPATHLEN] = {0};
 	int n = 0, c, error, verbose = 1;
 	int check_ratios = 0;
 
 	while ((c = getopt(argc, argv, ":rv")) != -1) {
 		switch (c) {
 		case 'r':
 			check_ratios++;
 			break;
 		case 'v':
 			verbose++;
 			break;
 		case ':':
 			(void) fprintf(stderr,
 			    "missing argument for '%c' option\n", optopt);
 			draid_usage();
 			break;
 		case '?':
 			(void) fprintf(stderr, "invalid option '%c'\n",
 			    optopt);
 			draid_usage();
 			break;
 		}
 	}
 
 	if (argc > optind) {
 		char *abspath = malloc(MAXPATHLEN);
 		if (abspath == NULL)
 			return (ENOMEM);
 
 		if (realpath(argv[optind], abspath) != NULL)
-			strncpy(filename, abspath, MAXPATHLEN - 1);
+			strlcpy(filename, abspath, sizeof (filename));
 		else
-			strncpy(filename, argv[optind], MAXPATHLEN - 1);
+			strlcpy(filename, argv[optind], sizeof (filename));
 
 		free(abspath);
 	} else {
 		(void) fprintf(stderr, "A FILE must be specified.\n");
 		return (1);
 	}
 
 	printf("Verifying permutation maps: '%s'\n", filename);
 
 	/*
 	 * Lookup hardcoded permutation map for each valid number of children
 	 * and verify a generated map has the correct checksum.  Then compare
 	 * the generated map values with the nvlist map values read from the
 	 * reference file to cross-check the permutation.
 	 */
 	for (uint64_t children = VDEV_DRAID_MIN_CHILDREN;
 	    children <= VDEV_DRAID_MAX_CHILDREN;
 	    children++) {
 		draid_map_t *map;
 		char key[8] = {0};
 
 		snprintf(key, 8, "%llu", (u_longlong_t)children);
 
 		error = alloc_fixed_map(children, &map);
 		if (error) {
 			printf("Error alloc_fixed_map() failed: %s\n",
 			    error == ECKSUM ? "Invalid checksum" :
 			    strerror(error));
 			return (1);
 		}
 
 		uint64_t nv_seed, nv_checksum, nv_children, nv_nperms;
 		uint8_t *nv_perms;
 		nvlist_t *cfg;
 		uint_t c;
 
 		error = read_map_key(filename, key, &cfg);
 		if (error != 0) {
 			printf("Error read_map_key() failed: %s\n",
 			    strerror(error));
 			free_map(map);
 			return (1);
 		}
 
 		nv_seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
 		nv_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
 		nv_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
 		nv_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
 		nvlist_lookup_uint8_array(cfg, MAP_PERMS, &nv_perms, &c);
 
 		/*
 		 * Compare draid_map_t and nvlist reference values.
 		 */
 		if (map->dm_seed != nv_seed) {
 			printf("Error different seeds: 0x%016llx != "
 			    "0x%016llx\n", (u_longlong_t)map->dm_seed,
 			    (u_longlong_t)nv_seed);
 			error = EINVAL;
 		}
 
 		if (map->dm_checksum != nv_checksum) {
 			printf("Error different checksums: 0x%016llx "
 			    "!= 0x%016llx\n",
 			    (u_longlong_t)map->dm_checksum,
 			    (u_longlong_t)nv_checksum);
 			error = EINVAL;
 		}
 
 		if (map->dm_children != nv_children) {
 			printf("Error different children: %llu "
 			    "!= %llu\n", (u_longlong_t)map->dm_children,
 			    (u_longlong_t)nv_children);
 			error = EINVAL;
 		}
 
 		if (map->dm_nperms != nv_nperms) {
 			printf("Error different nperms: %llu "
 			    "!= %llu\n", (u_longlong_t)map->dm_nperms,
 			    (u_longlong_t)nv_nperms);
 			error = EINVAL;
 		}
 
 		for (uint64_t i = 0; i < nv_children * nv_nperms; i++) {
 			if (map->dm_perms[i] != nv_perms[i]) {
 				printf("Error different perms[%llu]: "
 				    "%d != %d\n", (u_longlong_t)i,
 				    (int)map->dm_perms[i],
 				    (int)nv_perms[i]);
 				error = EINVAL;
 				break;
 			}
 		}
 
 		/*
 		 * For good measure recalculate the worst and average
 		 * ratios and confirm they match the nvlist values.
 		 */
 		if (check_ratios) {
 			uint64_t nv_worst_ratio, nv_avg_ratio;
 			double worst_ratio, avg_ratio;
 
 			eval_decluster(map, &worst_ratio, &avg_ratio);
 
 			nv_worst_ratio = fnvlist_lookup_uint64(cfg,
 			    MAP_WORST_RATIO);
 			nv_avg_ratio = fnvlist_lookup_uint64(cfg,
 			    MAP_AVG_RATIO);
 
 			if (worst_ratio < 1.0 || avg_ratio < 1.0) {
 				printf("Error ratio out of range %2.03f, "
 				    "%2.03f\n", worst_ratio, avg_ratio);
 				error = EINVAL;
 			}
 
 			if ((uint64_t)(worst_ratio * 1000.0) !=
 			    nv_worst_ratio) {
 				printf("Error different worst_ratio %2.03f "
 				    "!= %2.03f\n", (double)nv_worst_ratio /
 				    1000.0, worst_ratio);
 				error = EINVAL;
 			}
 
 			if ((uint64_t)(avg_ratio * 1000.0) != nv_avg_ratio) {
 				printf("Error different average_ratio %2.03f "
 				    "!= %2.03f\n", (double)nv_avg_ratio /
 				    1000.0, avg_ratio);
 				error = EINVAL;
 			}
 		}
 
 		if (error) {
 			free_map(map);
 			nvlist_free(cfg);
 			return (1);
 		}
 
 		if (verbose > 0) {
 			printf("- %llu children: good\n",
 			    (u_longlong_t)children);
 		}
 		n++;
 
 		free_map(map);
 		nvlist_free(cfg);
 	}
 
 	if (n != (VDEV_DRAID_MAX_CHILDREN - 1)) {
 		printf("Error permutation maps missing: %d / %d checked\n",
 		    n, VDEV_DRAID_MAX_CHILDREN - 1);
 		return (1);
 	}
 
 	printf("Successfully verified %d / %d permutation maps\n",
 	    n, VDEV_DRAID_MAX_CHILDREN - 1);
 
 	return (0);
 }
 
 /*
  * Dump the contents of the specified mapping(s) for inspection.
  */
 static int
 draid_dump(int argc, char *argv[])
 {
 	char filename[MAXPATHLEN] = {0};
 	int c, error, verbose = 1;
 	int min_children = VDEV_DRAID_MIN_CHILDREN;
 	int max_children = VDEV_DRAID_MAX_CHILDREN;
 
 	while ((c = getopt(argc, argv, ":vm:n:")) != -1) {
 		switch (c) {
 		case 'm':
 			min_children = (int)strtol(optarg, NULL, 0);
 			if (min_children < 2) {
 				(void) fprintf(stderr, "A minimum of 2 "
 				    "children are required.\n");
 				return (1);
 			}
 
 			break;
 		case 'n':
 			max_children = (int)strtol(optarg, NULL, 0);
 			if (max_children > VDEV_DRAID_MAX_CHILDREN) {
 				(void) fprintf(stderr, "A maximum of %d "
 				    "children are allowed.\n",
 				    VDEV_DRAID_MAX_CHILDREN);
 				return (1);
 			}
 			break;
 		case 'v':
 			verbose++;
 			break;
 		case ':':
 			(void) fprintf(stderr,
 			    "missing argument for '%c' option\n", optopt);
 			draid_usage();
 			break;
 		case '?':
 			(void) fprintf(stderr, "invalid option '%c'\n",
 			    optopt);
 			draid_usage();
 			break;
 		}
 	}
 
 	if (argc > optind)
-		strncpy(filename, argv[optind], MAXPATHLEN - 1);
+		strlcpy(filename, argv[optind], sizeof (filename));
 	else {
 		(void) fprintf(stderr, "A FILE must be specified.\n");
 		return (1);
 	}
 
 	/*
 	 * Dump maps for the requested child counts.
 	 */
 	for (uint64_t children = min_children;
 	    children <= max_children; children++) {
 		char key[8] = { 0 };
 
 		snprintf(key, 7, "%llu", (u_longlong_t)children);
 		error = dump_map_key(filename, key, verbose);
 		if (error) {
 			printf("Error dump_map_key(): %s\n", strerror(error));
 			return (1);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Print all of the mappings as a C formatted draid_map_t array.  This table
  * is found in the module/zcommon/zfs_draid.c file and is the definitive
  * source for all mapping used by dRAID.  It cannot be updated without
  * changing the dRAID on disk format.
  */
 static int
 draid_table(int argc, char *argv[])
 {
 	char filename[MAXPATHLEN] = {0};
 	int error;
 
 	if (argc > optind)
-		strncpy(filename, argv[optind], MAXPATHLEN - 1);
+		strlcpy(filename, argv[optind], sizeof (filename));
 	else {
 		(void) fprintf(stderr, "A FILE must be specified.\n");
 		return (1);
 	}
 
 	printf("static const draid_map_t "
 	    "draid_maps[VDEV_DRAID_MAX_MAPS] = {\n");
 
 	for (uint64_t children = VDEV_DRAID_MIN_CHILDREN;
 	    children <= VDEV_DRAID_MAX_CHILDREN;
 	    children++) {
 		uint64_t seed, checksum, nperms, avg_ratio;
 		nvlist_t *cfg;
 		char key[8] = {0};
 
 		snprintf(key, 8, "%llu", (u_longlong_t)children);
 
 		error = read_map_key(filename, key, &cfg);
 		if (error != 0) {
 			printf("Error read_map_key() failed: %s\n",
 			    strerror(error));
 			return (1);
 		}
 
 		seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
 		checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
 		children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
 		nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
 		avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO);
 
 		printf("\t{ %3llu, %3llu, 0x%016llx, 0x%016llx },\t"
 		    "/* %2.03f */\n", (u_longlong_t)children,
 		    (u_longlong_t)nperms, (u_longlong_t)seed,
 		    (u_longlong_t)checksum, (double)avg_ratio / 1000.0);
 
 		nvlist_free(cfg);
 	}
 
 	printf("};\n");
 
 	return (0);
 }
 
 static int
 draid_merge_impl(nvlist_t *allcfgs, const char *srcfilename, int *mergedp)
 {
 	nvlist_t *srccfgs;
 	nvpair_t *elem = NULL;
 	int error, merged = 0;
 
 	error = read_map(srcfilename, &srccfgs);
 	if (error != 0)
 		return (error);
 
 	while ((elem = nvlist_next_nvpair(srccfgs, elem)) != NULL) {
 		uint64_t nv_worst_ratio;
 		uint64_t allcfg_worst_ratio;
 		nvlist_t *cfg, *allcfg;
 		char *key;
 
 		switch (nvpair_type(elem)) {
 		case DATA_TYPE_NVLIST:
 
 			(void) nvpair_value_nvlist(elem, &cfg);
 			key = nvpair_name(elem);
 
 			nv_worst_ratio = fnvlist_lookup_uint64(cfg,
 			    MAP_WORST_RATIO);
 
 			error = nvlist_lookup_nvlist(allcfgs, key, &allcfg);
 			if (error == 0) {
 				allcfg_worst_ratio = fnvlist_lookup_uint64(
 				    allcfg, MAP_WORST_RATIO);
 
 				if (nv_worst_ratio < allcfg_worst_ratio) {
 					fnvlist_remove(allcfgs, key);
 					error = nvlist_add_nvlist(allcfgs,
 					    key, cfg);
 					merged++;
 				}
 			} else if (error == ENOENT) {
 				error = nvlist_add_nvlist(allcfgs, key, cfg);
 				merged++;
 			} else {
 				return (error);
 			}
 
 			break;
 		default:
 			continue;
 		}
 	}
 
 	nvlist_free(srccfgs);
 
 	*mergedp = merged;
 
 	return (0);
 }
 
 /*
  * Merge the best map for each child count found in the listed files into
  * a new file.  This allows 'draid generate' to be run in parallel and for
  * the results maps to be combined.
  */
 static int
 draid_merge(int argc, char *argv[])
 {
 	char filename[MAXPATHLEN] = {0};
 	int c, error, total_merged = 0;
 	nvlist_t *allcfgs;
 
 	while ((c = getopt(argc, argv, ":")) != -1) {
 		switch (c) {
 		case ':':
 			(void) fprintf(stderr,
 			    "missing argument for '%c' option\n", optopt);
 			draid_usage();
 			break;
 		case '?':
 			(void) fprintf(stderr, "invalid option '%c'\n",
 			    optopt);
 			draid_usage();
 			break;
 		}
 	}
 
 	if (argc < 4) {
 		(void) fprintf(stderr,
 		    "A FILE and multiple SRCs must be specified.\n");
 		return (1);
 	}
 
-	strncpy(filename, argv[optind], MAXPATHLEN - 1);
+	strlcpy(filename, argv[optind], sizeof (filename));
 	optind++;
 
 	error = read_map(filename, &allcfgs);
 	if (error == ENOENT) {
 		allcfgs = fnvlist_alloc();
 	} else if (error != 0) {
 		printf("Error read_map(): %s\n", strerror(error));
 		return (error);
 	}
 
 	while (optind < argc) {
 		char srcfilename[MAXPATHLEN] = {0};
 		int merged = 0;
 
-		strncpy(srcfilename, argv[optind], MAXPATHLEN - 1);
+		strlcpy(srcfilename, argv[optind], sizeof (srcfilename));
 
 		error = draid_merge_impl(allcfgs, srcfilename, &merged);
 		if (error) {
 			printf("Error draid_merge_impl(): %s\n",
 			    strerror(error));
 			nvlist_free(allcfgs);
 			return (1);
 		}
 
 		total_merged += merged;
 		printf("Merged %d key(s) from '%s' into '%s'\n", merged,
 		    srcfilename, filename);
 
 		optind++;
 	}
 
 	if (total_merged > 0)
 		write_map(filename, allcfgs);
 
 	printf("Merged a total of %d key(s) into '%s'\n", total_merged,
 	    filename);
 
 	nvlist_free(allcfgs);
 
 	return (0);
 }
 
 int
 main(int argc, char *argv[])
 {
 	if (argc < 2)
 		draid_usage();
 
 	char *subcommand = argv[1];
 
 	if (strcmp(subcommand, "generate") == 0) {
 		return (draid_generate(argc - 1, argv + 1));
 	} else if (strcmp(subcommand, "verify") == 0) {
 		return (draid_verify(argc - 1, argv + 1));
 	} else if (strcmp(subcommand, "dump") == 0) {
 		return (draid_dump(argc - 1, argv + 1));
 	} else if (strcmp(subcommand, "table") == 0) {
 		return (draid_table(argc - 1, argv + 1));
 	} else if (strcmp(subcommand, "merge") == 0) {
 		return (draid_merge(argc - 1, argv + 1));
 	} else {
 		draid_usage();
 	}
 }
diff --git a/tests/zfs-tests/cmd/zfs_diff-socket.c b/tests/zfs-tests/cmd/zfs_diff-socket.c
index be4bf31dde9f..3ebc95799fe3 100644
--- a/tests/zfs-tests/cmd/zfs_diff-socket.c
+++ b/tests/zfs-tests/cmd/zfs_diff-socket.c
@@ -1,57 +1,56 @@
 /*
  * This file and its contents are supplied under the terms of the
  * Common Development and Distribution License ("CDDL"), version 1.0.
  * You may only use this file in accordance with the terms of version
  * 1.0 of the CDDL.
  *
  * A full copy of the text of the CDDL should have accompanied this
  * source.  A copy of the CDDL is also available via the Internet at
  * http://www.illumos.org/license/CDDL.
  */
 
 /*
  * Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
  */
 
 #include <fcntl.h>
 #include <sys/un.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
 int
 main(int argc, char *argv[])
 {
 	struct sockaddr_un sock;
 	int fd;
 	char *path;
 	size_t size;
 	if (argc != 2) {
 		fprintf(stderr, "usage: %s /path/to/socket\n", argv[0]);
 		exit(1);
 	}
 	path = argv[1];
 	size =  sizeof (sock.sun_path);
-	strncpy(sock.sun_path, (char *)path, size - 1);
-	sock.sun_path[size - 1] = '\0';
+	(void) snprintf(sock.sun_path, size, "%s", path);
 
 	sock.sun_family = AF_UNIX;
 	if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == -1) {
 		perror("socket");
 		return (1);
 	}
 	if (bind(fd, (struct sockaddr *)&sock, sizeof (struct sockaddr_un))) {
 		perror("bind");
 		return (1);
 	}
 	if (close(fd)) {
 		perror("close");
 		return (1);
 	}
 	return (0);
 }