diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
index 42c8287a7c03..33581d018256 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c
@@ -1,2660 +1,2660 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <sys/acl.h>
 #include <sys/vnode.h>
 #include <sys/vfs.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
 #include <sys/cmn_err.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_dir.h>
 #include <sys/zil.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/policy.h>
 #include <sys/atomic.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/sunddi.h>
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dir.h>
 #include <sys/spa_boot.h>
 #include <sys/jail.h>
 #include <sys/osd.h>
 #include <ufs/ufs/quota.h>
 #include <sys/zfs_quota.h>
 
 #include "zfs_comutil.h"
 
 #ifndef	MNTK_VMSETSIZE_BUG
 #define	MNTK_VMSETSIZE_BUG	0
 #endif
 #ifndef	MNTK_NOMSYNC
 #define	MNTK_NOMSYNC	8
 #endif
 
 /* BEGIN CSTYLED */
 struct mtx zfs_debug_mtx;
 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
 
 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
 
 int zfs_super_owner;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
     "File system owner can perform privileged operation on his file systems");
 
 int zfs_debug_level;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
 	"Debug level");
 
 struct zfs_jailparam {
 	int mount_snapshot;
 };
 
 static struct zfs_jailparam zfs_jailparam0 = {
 	.mount_snapshot = 0,
 };
 
 static int zfs_jailparam_slot;
 
 SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters");
-SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "B",
+SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I",
 	"Allow mounting snapshots in the .zfs directory for unjailed datasets");
 
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
 static int zfs_version_acl = ZFS_ACL_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
     "ZFS_ACL_VERSION");
 static int zfs_version_spa = SPA_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
     "SPA_VERSION");
 static int zfs_version_zpl = ZPL_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
     "ZPL_VERSION");
 /* END CSTYLED */
 
 #if __FreeBSD_version >= 1400018
 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg,
     bool *mp_busy);
 #else
 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
 #endif
 static int zfs_mount(vfs_t *vfsp);
 static int zfs_umount(vfs_t *vfsp, int fflag);
 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
 static int zfs_sync(vfs_t *vfsp, int waitfor);
 #if __FreeBSD_version >= 1300098
 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
     struct ucred **credanonp, int *numsecflavors, int *secflavors);
 #else
 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
     struct ucred **credanonp, int *numsecflavors, int **secflavors);
 #endif
 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
 static void zfs_freevfs(vfs_t *vfsp);
 
 struct vfsops zfs_vfsops = {
 	.vfs_mount =		zfs_mount,
 	.vfs_unmount =		zfs_umount,
 #if __FreeBSD_version >= 1300049
 	.vfs_root =		vfs_cache_root,
 	.vfs_cachedroot = zfs_root,
 #else
 	.vfs_root =		zfs_root,
 #endif
 	.vfs_statfs =		zfs_statfs,
 	.vfs_vget =		zfs_vget,
 	.vfs_sync =		zfs_sync,
 	.vfs_checkexp =		zfs_checkexp,
 	.vfs_fhtovp =		zfs_fhtovp,
 	.vfs_quotactl =		zfs_quotactl,
 };
 
 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
 
 /*
  * We need to keep a count of active fs's.
  * This is necessary to prevent our module
  * from being unloaded after a umount -f
  */
 static uint32_t	zfs_active_fs_count = 0;
 
 int
 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
     char *setpoint)
 {
 	int error;
 	zfsvfs_t *zfvp;
 	vfs_t *vfsp;
 	objset_t *os;
 	uint64_t tmp = *val;
 
 	error = dmu_objset_from_ds(ds, &os);
 	if (error != 0)
 		return (error);
 
 	error = getzfsvfs_impl(os, &zfvp);
 	if (error != 0)
 		return (error);
 	if (zfvp == NULL)
 		return (ENOENT);
 	vfsp = zfvp->z_vfs;
 	switch (zfs_prop) {
 	case ZFS_PROP_ATIME:
 		if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_DEVICES:
 		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_EXEC:
 		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_SETUID:
 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_READONLY:
 		if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
 			tmp = 1;
 		break;
 	case ZFS_PROP_XATTR:
 		if (zfvp->z_flags & ZSB_XATTR)
 			tmp = zfvp->z_xattr;
 		break;
 	case ZFS_PROP_NBMAND:
 		if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
 			tmp = 0;
 		if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
 			tmp = 1;
 		break;
 	default:
 		vfs_unbusy(vfsp);
 		return (ENOENT);
 	}
 
 	vfs_unbusy(vfsp);
 	if (tmp != *val) {
 		(void) strcpy(setpoint, "temporary");
 		*val = tmp;
 	}
 	return (0);
 }
 
 static int
 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
 {
 	int error = 0;
 	char buf[32];
 	uint64_t usedobj, quotaobj;
 	uint64_t quota, used = 0;
 	timespec_t now;
 
 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
 
 	if (quotaobj == 0 || zfsvfs->z_replay) {
 		error = ENOENT;
 		goto done;
 	}
 	(void) sprintf(buf, "%llx", (longlong_t)id);
 	if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
 	    buf, sizeof (quota), 1, &quota)) != 0) {
 		dprintf("%s(%d): quotaobj lookup failed\n",
 		    __FUNCTION__, __LINE__);
 		goto done;
 	}
 	/*
 	 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
 	 * So we set them to be the same.
 	 */
 	dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
 	error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used);
 	if (error && error != ENOENT) {
 		dprintf("%s(%d):  usedobj failed; %d\n",
 		    __FUNCTION__, __LINE__, error);
 		goto done;
 	}
 	dqp->dqb_curblocks = btodb(used);
 	dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
 	vfs_timestamp(&now);
 	/*
 	 * Setting this to 0 causes FreeBSD quota(8) to print
 	 * the number of days since the epoch, which isn't
 	 * particularly useful.
 	 */
 	dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
 done:
 	return (error);
 }
 
 static int
 #if __FreeBSD_version >= 1400018
 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy)
 #else
 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
 #endif
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	struct thread *td;
 	int cmd, type, error = 0;
 	int bitsize;
 	zfs_userquota_prop_t quota_type;
 	struct dqblk64 dqblk = { 0 };
 
 	td = curthread;
 	cmd = cmds >> SUBCMDSHIFT;
 	type = cmds & SUBCMDMASK;
 
 	ZFS_ENTER(zfsvfs);
 	if (id == -1) {
 		switch (type) {
 		case USRQUOTA:
 			id = td->td_ucred->cr_ruid;
 			break;
 		case GRPQUOTA:
 			id = td->td_ucred->cr_rgid;
 			break;
 		default:
 			error = EINVAL;
 #if __FreeBSD_version < 1400018
 			if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
 				vfs_unbusy(vfsp);
 #endif
 			goto done;
 		}
 	}
 	/*
 	 * Map BSD type to:
 	 * ZFS_PROP_USERUSED,
 	 * ZFS_PROP_USERQUOTA,
 	 * ZFS_PROP_GROUPUSED,
 	 * ZFS_PROP_GROUPQUOTA
 	 */
 	switch (cmd) {
 	case Q_SETQUOTA:
 	case Q_SETQUOTA32:
 		if (type == USRQUOTA)
 			quota_type = ZFS_PROP_USERQUOTA;
 		else if (type == GRPQUOTA)
 			quota_type = ZFS_PROP_GROUPQUOTA;
 		else
 			error = EINVAL;
 		break;
 	case Q_GETQUOTA:
 	case Q_GETQUOTA32:
 		if (type == USRQUOTA)
 			quota_type = ZFS_PROP_USERUSED;
 		else if (type == GRPQUOTA)
 			quota_type = ZFS_PROP_GROUPUSED;
 		else
 			error = EINVAL;
 		break;
 	}
 
 	/*
 	 * Depending on the cmd, we may need to get
 	 * the ruid and domain (see fuidstr_to_sid?),
 	 * the fuid (how?), or other information.
 	 * Create fuid using zfs_fuid_create(zfsvfs, id,
 	 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
 	 * I think I can use just the id?
 	 *
 	 * Look at zfs_id_overquota() to look up a quota.
 	 * zap_lookup(something, quotaobj, fuidstring,
 	 *     sizeof (long long), 1, &quota)
 	 *
 	 * See zfs_set_userquota() to set a quota.
 	 */
 	if ((uint32_t)type >= MAXQUOTAS) {
 		error = EINVAL;
 		goto done;
 	}
 
 	switch (cmd) {
 	case Q_GETQUOTASIZE:
 		bitsize = 64;
 		error = copyout(&bitsize, arg, sizeof (int));
 		break;
 	case Q_QUOTAON:
 		// As far as I can tell, you can't turn quotas on or off on zfs
 		error = 0;
 #if __FreeBSD_version < 1400018
 		vfs_unbusy(vfsp);
 #endif
 		break;
 	case Q_QUOTAOFF:
 		error = ENOTSUP;
 #if __FreeBSD_version < 1400018
 		vfs_unbusy(vfsp);
 #endif
 		break;
 	case Q_SETQUOTA:
 		error = copyin(arg, &dqblk, sizeof (dqblk));
 		if (error == 0)
 			error = zfs_set_userquota(zfsvfs, quota_type,
 			    "", id, dbtob(dqblk.dqb_bhardlimit));
 		break;
 	case Q_GETQUOTA:
 		error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
 		if (error == 0)
 			error = copyout(&dqblk, arg, sizeof (dqblk));
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 done:
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 
 boolean_t
 zfs_is_readonly(zfsvfs_t *zfsvfs)
 {
 	return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY));
 }
 
 /*ARGSUSED*/
 static int
 zfs_sync(vfs_t *vfsp, int waitfor)
 {
 
 	/*
 	 * Data integrity is job one.  We don't want a compromised kernel
 	 * writing to the storage pool, so we never sync during panic.
 	 */
 	if (panicstr)
 		return (0);
 
 	/*
 	 * Ignore the system syncher.  ZFS already commits async data
 	 * at zfs_txg_timeout intervals.
 	 */
 	if (waitfor == MNT_LAZY)
 		return (0);
 
 	if (vfsp != NULL) {
 		/*
 		 * Sync a specific filesystem.
 		 */
 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
 		dsl_pool_t *dp;
 		int error;
 
 		error = vfs_stdsync(vfsp, waitfor);
 		if (error != 0)
 			return (error);
 
 		ZFS_ENTER(zfsvfs);
 		dp = dmu_objset_pool(zfsvfs->z_os);
 
 		/*
 		 * If the system is shutting down, then skip any
 		 * filesystems which may exist on a suspended pool.
 		 */
 		if (rebooting && spa_suspended(dp->dp_spa)) {
 			ZFS_EXIT(zfsvfs);
 			return (0);
 		}
 
 		if (zfsvfs->z_log != NULL)
 			zil_commit(zfsvfs->z_log, 0);
 
 		ZFS_EXIT(zfsvfs);
 	} else {
 		/*
 		 * Sync all ZFS filesystems.  This is what happens when you
 		 * run sync(8).  Unlike other filesystems, ZFS honors the
 		 * request by waiting for all pools to commit all dirty data.
 		 */
 		spa_sync_allpools();
 	}
 
 	return (0);
 }
 
 static void
 atime_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == TRUE) {
 		zfsvfs->z_atime = TRUE;
 		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
 	} else {
 		zfsvfs->z_atime = FALSE;
 		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
 	}
 }
 
 static void
 xattr_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == ZFS_XATTR_OFF) {
 		zfsvfs->z_flags &= ~ZSB_XATTR;
 	} else {
 		zfsvfs->z_flags |= ZSB_XATTR;
 
 		if (newval == ZFS_XATTR_SA)
 			zfsvfs->z_xattr_sa = B_TRUE;
 		else
 			zfsvfs->z_xattr_sa = B_FALSE;
 	}
 }
 
 static void
 blksz_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
 	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
 	ASSERT(ISP2(newval));
 
 	zfsvfs->z_max_blksz = newval;
 	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
 }
 
 static void
 readonly_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval) {
 		/* XXX locking on vfs_flag? */
 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
 	} else {
 		/* XXX locking on vfs_flag? */
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
 	}
 }
 
 static void
 setuid_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == FALSE) {
 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
 	} else {
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
 	}
 }
 
 static void
 exec_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == FALSE) {
 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
 	} else {
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
 	}
 }
 
 /*
  * The nbmand mount option can be changed at mount time.
  * We can't allow it to be toggled on live file systems or incorrect
  * behavior may be seen from cifs clients
  *
  * This property isn't registered via dsl_prop_register(), but this callback
  * will be called when a file system is first mounted
  */
 static void
 nbmand_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 	if (newval == FALSE) {
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
 	} else {
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
 	}
 }
 
 static void
 snapdir_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_show_ctldir = newval;
 }
 
 static void
 vscan_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_vscan = newval;
 }
 
 static void
 acl_mode_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_acl_mode = newval;
 }
 
 static void
 acl_inherit_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_acl_inherit = newval;
 }
 
 static void
 acl_type_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_acl_type = newval;
 }
 
 static int
 zfs_register_callbacks(vfs_t *vfsp)
 {
 	struct dsl_dataset *ds = NULL;
 	objset_t *os = NULL;
 	zfsvfs_t *zfsvfs = NULL;
 	uint64_t nbmand;
 	boolean_t readonly = B_FALSE;
 	boolean_t do_readonly = B_FALSE;
 	boolean_t setuid = B_FALSE;
 	boolean_t do_setuid = B_FALSE;
 	boolean_t exec = B_FALSE;
 	boolean_t do_exec = B_FALSE;
 	boolean_t xattr = B_FALSE;
 	boolean_t atime = B_FALSE;
 	boolean_t do_atime = B_FALSE;
 	boolean_t do_xattr = B_FALSE;
 	int error = 0;
 
 	ASSERT3P(vfsp, !=, NULL);
 	zfsvfs = vfsp->vfs_data;
 	ASSERT3P(zfsvfs, !=, NULL);
 	os = zfsvfs->z_os;
 
 	/*
 	 * This function can be called for a snapshot when we update snapshot's
 	 * mount point, which isn't really supported.
 	 */
 	if (dmu_objset_is_snapshot(os))
 		return (EOPNOTSUPP);
 
 	/*
 	 * The act of registering our callbacks will destroy any mount
 	 * options we may have.  In order to enable temporary overrides
 	 * of mount options, we stash away the current values and
 	 * restore them after we register the callbacks.
 	 */
 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
 	    !spa_writeable(dmu_objset_spa(os))) {
 		readonly = B_TRUE;
 		do_readonly = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
 		readonly = B_FALSE;
 		do_readonly = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
 		setuid = B_FALSE;
 		do_setuid = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
 		setuid = B_TRUE;
 		do_setuid = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
 		exec = B_FALSE;
 		do_exec = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
 		exec = B_TRUE;
 		do_exec = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
 		zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF;
 		do_xattr = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
 		do_xattr = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) {
 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
 		do_xattr = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) {
 		zfsvfs->z_xattr = xattr = ZFS_XATTR_SA;
 		do_xattr = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
 		atime = B_FALSE;
 		do_atime = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
 		atime = B_TRUE;
 		do_atime = B_TRUE;
 	}
 
 	/*
 	 * We need to enter pool configuration here, so that we can use
 	 * dsl_prop_get_int_ds() to handle the special nbmand property below.
 	 * dsl_prop_get_integer() can not be used, because it has to acquire
 	 * spa_namespace_lock and we can not do that because we already hold
 	 * z_teardown_lock.  The problem is that spa_write_cachefile() is called
 	 * with spa_namespace_lock held and the function calls ZFS vnode
 	 * operations to write the cache file and thus z_teardown_lock is
 	 * acquired after spa_namespace_lock.
 	 */
 	ds = dmu_objset_ds(os);
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 
 	/*
 	 * nbmand is a special property.  It can only be changed at
 	 * mount time.
 	 *
 	 * This is weird, but it is documented to only be changeable
 	 * at mount time.
 	 */
 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
 		nbmand = B_FALSE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
 		nbmand = B_TRUE;
 	} else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) {
 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 		return (error);
 	}
 
 	/*
 	 * Register property callbacks.
 	 *
 	 * It would probably be fine to just check for i/o error from
 	 * the first prop_register(), but I guess I like to go
 	 * overboard...
 	 */
 	error = dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
 	    zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 	if (error)
 		goto unregister;
 
 	/*
 	 * Invoke our callbacks to restore temporary mount options.
 	 */
 	if (do_readonly)
 		readonly_changed_cb(zfsvfs, readonly);
 	if (do_setuid)
 		setuid_changed_cb(zfsvfs, setuid);
 	if (do_exec)
 		exec_changed_cb(zfsvfs, exec);
 	if (do_xattr)
 		xattr_changed_cb(zfsvfs, xattr);
 	if (do_atime)
 		atime_changed_cb(zfsvfs, atime);
 
 	nbmand_changed_cb(zfsvfs, nbmand);
 
 	return (0);
 
 unregister:
 	dsl_prop_unregister_all(ds, zfsvfs);
 	return (error);
 }
 
 /*
  * Associate this zfsvfs with the given objset, which must be owned.
  * This will cache a bunch of on-disk state from the objset in the
  * zfsvfs.
  */
 static int
 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
 {
 	int error;
 	uint64_t val;
 
 	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
 	zfsvfs->z_os = os;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
 	if (error != 0)
 		return (error);
 	if (zfsvfs->z_version >
 	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
 		(void) printf("Can't mount a version %lld file system "
 		    "on a version %lld pool\n. Pool must be upgraded to mount "
 		    "this file system.", (u_longlong_t)zfsvfs->z_version,
 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
 		return (SET_ERROR(ENOTSUP));
 	}
 	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_norm = (int)val;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_utf8 = (val != 0);
 
 	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_case = (uint_t)val;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_acl_type = (uint_t)val;
 
 	/*
 	 * Fold case on file systems that are always or sometimes case
 	 * insensitive.
 	 */
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
 	    zfsvfs->z_case == ZFS_CASE_MIXED)
 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
 
 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 
 	uint64_t sa_obj = 0;
 	if (zfsvfs->z_use_sa) {
 		/* should either have both of these objects or none */
 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
 		    &sa_obj);
 		if (error != 0)
 			return (error);
 
 		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
 		if (error == 0 && val == ZFS_XATTR_SA)
 			zfsvfs->z_xattr_sa = B_TRUE;
 	}
 
 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
 	    &zfsvfs->z_attr_table);
 	if (error != 0)
 		return (error);
 
 	if (zfsvfs->z_version >= ZPL_VERSION_SA)
 		sa_register_update_callback(os, zfs_sa_upgrade);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
 	    &zfsvfs->z_root);
 	if (error != 0)
 		return (error);
 	ASSERT3U(zfsvfs->z_root, !=, 0);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
 	    &zfsvfs->z_unlinkedobj);
 	if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
 	    8, 1, &zfsvfs->z_userquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_userquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
 	    8, 1, &zfsvfs->z_groupquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_groupquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
 	    8, 1, &zfsvfs->z_projectquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_projectquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
 	    8, 1, &zfsvfs->z_userobjquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_userobjquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
 	    8, 1, &zfsvfs->z_groupobjquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_groupobjquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
 	    8, 1, &zfsvfs->z_projectobjquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_projectobjquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
 	    &zfsvfs->z_fuid_obj);
 	if (error == ENOENT)
 		zfsvfs->z_fuid_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
 	    &zfsvfs->z_shares_dir);
 	if (error == ENOENT)
 		zfsvfs->z_shares_dir = 0;
 	else if (error != 0)
 		return (error);
 
 	/*
 	 * Only use the name cache if we are looking for a
 	 * name on a file system that does not require normalization
 	 * or case folding.  We can also look there if we happen to be
 	 * on a non-normalizing, mixed sensitivity file system IF we
 	 * are looking for the exact name (which is always the case on
 	 * FreeBSD).
 	 */
 	zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
 	    ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
 	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
 
 	return (0);
 }
 
 taskq_t *zfsvfs_taskq;
 
 static void
 zfsvfs_task_unlinked_drain(void *context, int pending __unused)
 {
 
 	zfs_unlinked_drain((zfsvfs_t *)context);
 }
 
 int
 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
 {
 	objset_t *os;
 	zfsvfs_t *zfsvfs;
 	int error;
 	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
 
 	/*
 	 * XXX: Fix struct statfs so this isn't necessary!
 	 *
 	 * The 'osname' is used as the filesystem's special node, which means
 	 * it must fit in statfs.f_mntfromname, or else it can't be
 	 * enumerated, so libzfs_mnttab_find() returns NULL, which causes
 	 * 'zfs unmount' to think it's not mounted when it is.
 	 */
 	if (strlen(osname) >= MNAMELEN)
 		return (SET_ERROR(ENAMETOOLONG));
 
 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 
 	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs,
 	    &os);
 	if (error != 0) {
 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
 		return (error);
 	}
 
 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
 
 	return (error);
 }
 
 
 int
 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
 {
 	int error;
 
 	zfsvfs->z_vfs = NULL;
 	zfsvfs->z_parent = zfsvfs;
 
 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 	TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
 	    zfsvfs_task_unlinked_drain, zfsvfs);
 	ZFS_TEARDOWN_INIT(zfsvfs);
 	ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs);
 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 	for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
 	error = zfsvfs_init(zfsvfs, os);
 	if (error != 0) {
 		dmu_objset_disown(os, B_TRUE, zfsvfs);
 		*zfvp = NULL;
 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
 		return (error);
 	}
 
 	*zfvp = zfsvfs;
 	return (0);
 }
 
 static int
 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 {
 	int error;
 
 	/*
 	 * Check for a bad on-disk format version now since we
 	 * lied about owning the dataset readonly before.
 	 */
 	if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
 	    dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
 		return (SET_ERROR(EROFS));
 
 	error = zfs_register_callbacks(zfsvfs->z_vfs);
 	if (error)
 		return (error);
 
 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
 
 	/*
 	 * If we are not mounting (ie: online recv), then we don't
 	 * have to worry about replaying the log as we blocked all
 	 * operations out since we closed the ZIL.
 	 */
 	if (mounting) {
 		boolean_t readonly;
 
 		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
 		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
 
 		/*
 		 * During replay we remove the read only flag to
 		 * allow replays to succeed.
 		 */
 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
 		if (readonly != 0) {
 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
 		} else {
 			dsl_dir_t *dd;
 			zap_stats_t zs;
 
 			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
 			    &zs) == 0) {
 				dataset_kstats_update_nunlinks_kstat(
 				    &zfsvfs->z_kstat, zs.zs_num_entries);
 				dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
 				    "num_entries in unlinked set: %llu",
 				    (u_longlong_t)zs.zs_num_entries);
 			}
 
 			zfs_unlinked_drain(zfsvfs);
 			dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
 			dd->dd_activity_cancelled = B_FALSE;
 		}
 
 		/*
 		 * Parse and replay the intent log.
 		 *
 		 * Because of ziltest, this must be done after
 		 * zfs_unlinked_drain().  (Further note: ziltest
 		 * doesn't use readonly mounts, where
 		 * zfs_unlinked_drain() isn't called.)  This is because
 		 * ziltest causes spa_sync() to think it's committed,
 		 * but actually it is not, so the intent log contains
 		 * many txg's worth of changes.
 		 *
 		 * In particular, if object N is in the unlinked set in
 		 * the last txg to actually sync, then it could be
 		 * actually freed in a later txg and then reallocated
 		 * in a yet later txg.  This would write a "create
 		 * object N" record to the intent log.  Normally, this
 		 * would be fine because the spa_sync() would have
 		 * written out the fact that object N is free, before
 		 * we could write the "create object N" intent log
 		 * record.
 		 *
 		 * But when we are in ziltest mode, we advance the "open
 		 * txg" without actually spa_sync()-ing the changes to
 		 * disk.  So we would see that object N is still
 		 * allocated and in the unlinked set, and there is an
 		 * intent log record saying to allocate it.
 		 */
 		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
 			if (zil_replay_disable) {
 				zil_destroy(zfsvfs->z_log, B_FALSE);
 			} else {
 				boolean_t use_nc = zfsvfs->z_use_namecache;
 				zfsvfs->z_use_namecache = B_FALSE;
 				zfsvfs->z_replay = B_TRUE;
 				zil_replay(zfsvfs->z_os, zfsvfs,
 				    zfs_replay_vector);
 				zfsvfs->z_replay = B_FALSE;
 				zfsvfs->z_use_namecache = use_nc;
 			}
 		}
 
 		/* restore readonly bit */
 		if (readonly != 0)
 			zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
 	}
 
 	/*
 	 * Set the objset user_ptr to track its zfsvfs.
 	 */
 	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
 	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 
 	return (0);
 }
 
 void
 zfsvfs_free(zfsvfs_t *zfsvfs)
 {
 	int i;
 
 	zfs_fuid_destroy(zfsvfs);
 
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 	mutex_destroy(&zfsvfs->z_lock);
 	ASSERT3U(zfsvfs->z_nr_znodes, ==, 0);
 	list_destroy(&zfsvfs->z_all_znodes);
 	ZFS_TEARDOWN_DESTROY(zfsvfs);
 	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs);
 	rw_destroy(&zfsvfs->z_fuid_lock);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
 	dataset_kstats_destroy(&zfsvfs->z_kstat);
 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
 }
 
 static void
 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
 {
 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
 	if (zfsvfs->z_vfs) {
 		if (zfsvfs->z_use_fuids) {
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
 		} else {
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
 		}
 	}
 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 }
 
 static int
 zfs_domount(vfs_t *vfsp, char *osname)
 {
 	uint64_t recordsize, fsid_guid;
 	int error = 0;
 	zfsvfs_t *zfsvfs;
 
 	ASSERT3P(vfsp, !=, NULL);
 	ASSERT3P(osname, !=, NULL);
 
 	error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs);
 	if (error)
 		return (error);
 	zfsvfs->z_vfs = vfsp;
 
 	if ((error = dsl_prop_get_integer(osname,
 	    "recordsize", &recordsize, NULL)))
 		goto out;
 	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
 	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
 
 	vfsp->vfs_data = zfsvfs;
 	vfsp->mnt_flag |= MNT_LOCAL;
 	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
 	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
 	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
 	/*
 	 * This can cause a loss of coherence between ARC and page cache
 	 * on ZoF - unclear if the problem is in FreeBSD or ZoF
 	 */
 	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
 	vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
 	vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
 
 #if defined(_KERNEL) && !defined(KMEM_DEBUG)
 	vfsp->mnt_kern_flag |= MNTK_FPLOOKUP;
 #endif
 	/*
 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
 	 * separates our fsid from any other filesystem types, and a
 	 * 56-bit objset unique ID.  The objset unique ID is unique to
 	 * all objsets open on this system, provided by unique_create().
 	 * The 8-bit fs type must be put in the low bits of fsid[1]
 	 * because that's where other Solaris filesystems put it.
 	 */
 	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
 	ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0);
 	vfsp->vfs_fsid.val[0] = fsid_guid;
 	vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) |
 	    (vfsp->mnt_vfc->vfc_typenum & 0xFF);
 
 	/*
 	 * Set features for file system.
 	 */
 	zfs_set_fuid_feature(zfsvfs);
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
 		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
 	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
 	}
 	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
 
 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
 		uint64_t pval;
 
 		atime_changed_cb(zfsvfs, B_FALSE);
 		readonly_changed_cb(zfsvfs, B_TRUE);
 		if ((error = dsl_prop_get_integer(osname,
 		    "xattr", &pval, NULL)))
 			goto out;
 		xattr_changed_cb(zfsvfs, pval);
 		if ((error = dsl_prop_get_integer(osname,
 		    "acltype", &pval, NULL)))
 			goto out;
 		acl_type_changed_cb(zfsvfs, pval);
 		zfsvfs->z_issnap = B_TRUE;
 		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
 
 		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
 		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 	} else {
 		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
 			goto out;
 	}
 
 	vfs_mountedfrom(vfsp, osname);
 
 	if (!zfsvfs->z_issnap)
 		zfsctl_create(zfsvfs);
 out:
 	if (error) {
 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	} else {
 		atomic_inc_32(&zfs_active_fs_count);
 	}
 
 	return (error);
 }
 
 static void
 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
 {
 	objset_t *os = zfsvfs->z_os;
 
 	if (!dmu_objset_is_snapshot(os))
 		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
 }
 
 static int
 getpoolname(const char *osname, char *poolname)
 {
 	char *p;
 
 	p = strchr(osname, '/');
 	if (p == NULL) {
 		if (strlen(osname) >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strcpy(poolname, osname);
 	} else {
 		if (p - osname >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strncpy(poolname, osname, p - osname);
 		poolname[p - osname] = '\0';
 	}
 	return (0);
 }
 
 static void
 fetch_osname_options(char *name, bool *checkpointrewind)
 {
 
 	if (name[0] == '!') {
 		*checkpointrewind = true;
 		memmove(name, name + 1, strlen(name));
 	} else {
 		*checkpointrewind = false;
 	}
 }
 
 /*ARGSUSED*/
 static int
 zfs_mount(vfs_t *vfsp)
 {
 	kthread_t	*td = curthread;
 	vnode_t		*mvp = vfsp->mnt_vnodecovered;
 	cred_t		*cr = td->td_ucred;
 	char		*osname;
 	int		error = 0;
 	int		canwrite;
 	bool		checkpointrewind, isctlsnap = false;
 
 	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * If full-owner-access is enabled and delegated administration is
 	 * turned on, we must set nosuid.
 	 */
 	if (zfs_super_owner &&
 	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
 		secpolicy_fs_mount_clearopts(cr, vfsp);
 	}
 
 	fetch_osname_options(osname, &checkpointrewind);
 	isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) &&
 	    strchr(osname, '@') != NULL);
 
 	/*
 	 * Check for mount privilege?
 	 *
 	 * If we don't have privilege then see if
 	 * we have local permission to allow it
 	 */
 	error = secpolicy_fs_mount(cr, mvp, vfsp);
 	if (error && isctlsnap) {
 		secpolicy_fs_mount_clearopts(cr, vfsp);
 	} else if (error) {
 		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
 			goto out;
 
 		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
 			vattr_t		vattr;
 
 			/*
 			 * Make sure user is the owner of the mount point
 			 * or has sufficient privileges.
 			 */
 
 			vattr.va_mask = AT_UID;
 
 			vn_lock(mvp, LK_SHARED | LK_RETRY);
 			if (VOP_GETATTR(mvp, &vattr, cr)) {
 				VOP_UNLOCK1(mvp);
 				goto out;
 			}
 
 			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
 			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
 				VOP_UNLOCK1(mvp);
 				goto out;
 			}
 			VOP_UNLOCK1(mvp);
 		}
 
 		secpolicy_fs_mount_clearopts(cr, vfsp);
 	}
 
 	/*
 	 * Refuse to mount a filesystem if we are in a local zone and the
 	 * dataset is not visible.
 	 */
 	if (!INGLOBALZONE(curproc) &&
 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
 		boolean_t mount_snapshot = B_FALSE;
 
 		/*
 		 * Snapshots may be mounted in .zfs for unjailed datasets
 		 * if allowed by the jail param zfs.mount_snapshot.
 		 */
 		if (isctlsnap) {
 			struct prison *pr;
 			struct zfs_jailparam *zjp;
 
 			pr = curthread->td_ucred->cr_prison;
 			mtx_lock(&pr->pr_mtx);
 			zjp = osd_jail_get(pr, zfs_jailparam_slot);
 			mtx_unlock(&pr->pr_mtx);
 			if (zjp && zjp->mount_snapshot)
 				mount_snapshot = B_TRUE;
 		}
 		if (!mount_snapshot) {
 			error = SET_ERROR(EPERM);
 			goto out;
 		}
 	}
 
 	vfsp->vfs_flag |= MNT_NFS4ACLS;
 
 	/*
 	 * When doing a remount, we simply refresh our temporary properties
 	 * according to those options set in the current VFS options.
 	 */
 	if (vfsp->vfs_flag & MS_REMOUNT) {
 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 		/*
 		 * Refresh mount options with z_teardown_lock blocking I/O while
 		 * the filesystem is in an inconsistent state.
 		 * The lock also serializes this code with filesystem
 		 * manipulations between entry to zfs_suspend_fs() and return
 		 * from zfs_resume_fs().
 		 */
 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 		zfs_unregister_callbacks(zfsvfs);
 		error = zfs_register_callbacks(vfsp);
 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 		goto out;
 	}
 
 	/* Initial root mount: try hard to import the requested root pool. */
 	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
 	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
 		char pname[MAXNAMELEN];
 
 		error = getpoolname(osname, pname);
 		if (error == 0)
 			error = spa_import_rootpool(pname, checkpointrewind);
 		if (error)
 			goto out;
 	}
 	DROP_GIANT();
 	error = zfs_domount(vfsp, osname);
 	PICKUP_GIANT();
 
 out:
 	return (error);
 }
 
 static int
 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
 
 	statp->f_version = STATFS_VERSION;
 
 	ZFS_ENTER(zfsvfs);
 
 	dmu_objset_space(zfsvfs->z_os,
 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
 
 	/*
 	 * The underlying storage pool actually uses multiple block sizes.
 	 * We report the fragsize as the smallest block size we support,
 	 * and we report our blocksize as the filesystem's maximum blocksize.
 	 */
 	statp->f_bsize = SPA_MINBLOCKSIZE;
 	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
 
 	/*
 	 * The following report "total" blocks of various kinds in the
 	 * file system, but reported in terms of f_frsize - the
 	 * "fragment" size.
 	 */
 
 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
 	statp->f_bfree = availbytes / statp->f_bsize;
 	statp->f_bavail = statp->f_bfree; /* no root reservation */
 
 	/*
 	 * statvfs() should really be called statufs(), because it assumes
 	 * static metadata.  ZFS doesn't preallocate files, so the best
 	 * we can do is report the max that could possibly fit in f_files,
 	 * and that minus the number actually used in f_ffree.
 	 * For f_ffree, report the smaller of the number of object available
 	 * and the number of blocks (each object will take at least a block).
 	 */
 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
 	statp->f_files = statp->f_ffree + usedobjs;
 
 	/*
 	 * We're a zfs filesystem.
 	 */
 	strlcpy(statp->f_fstypename, "zfs",
 	    sizeof (statp->f_fstypename));
 
 	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
 	    sizeof (statp->f_mntfromname));
 	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
 	    sizeof (statp->f_mntonname));
 
 	statp->f_namemax = MAXNAMELEN - 1;
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 static int
 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	znode_t *rootzp;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 
 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 	if (error == 0)
 		*vpp = ZTOV(rootzp);
 
 	ZFS_EXIT(zfsvfs);
 
 	if (error == 0) {
 		error = vn_lock(*vpp, flags);
 		if (error != 0) {
 			VN_RELE(*vpp);
 			*vpp = NULL;
 		}
 	}
 	return (error);
 }
 
 /*
  * Teardown the zfsvfs::z_os.
  *
  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
  * and 'z_teardown_inactive_lock' held.
  */
 static int
 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 {
 	znode_t	*zp;
 	dsl_dir_t *dd;
 
 	/*
 	 * If someone has not already unmounted this file system,
 	 * drain the zrele_taskq to ensure all active references to the
 	 * zfsvfs_t have been handled only then can it be safely destroyed.
 	 */
 	if (zfsvfs->z_os) {
 		/*
 		 * If we're unmounting we have to wait for the list to
 		 * drain completely.
 		 *
 		 * If we're not unmounting there's no guarantee the list
 		 * will drain completely, but zreles run from the taskq
 		 * may add the parents of dir-based xattrs to the taskq
 		 * so we want to wait for these.
 		 *
 		 * We can safely read z_nr_znodes without locking because the
 		 * VFS has already blocked operations which add to the
 		 * z_all_znodes list and thus increment z_nr_znodes.
 		 */
 		int round = 0;
 		while (zfsvfs->z_nr_znodes > 0) {
 			taskq_wait_outstanding(dsl_pool_zrele_taskq(
 			    dmu_objset_pool(zfsvfs->z_os)), 0);
 			if (++round > 1 && !unmounting)
 				break;
 		}
 	}
 	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 
 	if (!unmounting) {
 		/*
 		 * We purge the parent filesystem's vfsp as the parent
 		 * filesystem and all of its snapshots have their vnode's
 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
 		 * 'z_parent' is self referential for non-snapshots.
 		 */
 #ifdef FREEBSD_NAMECACHE
 #if __FreeBSD_version >= 1300117
 		cache_purgevfs(zfsvfs->z_parent->z_vfs);
 #else
 		cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
 #endif
 #endif
 	}
 
 	/*
 	 * Close the zil. NB: Can't close the zil while zfs_inactive
 	 * threads are blocked as zil_close can call zfs_inactive.
 	 */
 	if (zfsvfs->z_log) {
 		zil_close(zfsvfs->z_log);
 		zfsvfs->z_log = NULL;
 	}
 
 	ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs);
 
 	/*
 	 * If we are not unmounting (ie: online recv) and someone already
 	 * unmounted this file system while we were doing the switcheroo,
 	 * or a reopen of z_os failed then just bail out now.
 	 */
 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 		return (SET_ERROR(EIO));
 	}
 
 	/*
 	 * At this point there are no vops active, and any new vops will
 	 * fail with EIO since we have z_teardown_lock for writer (only
 	 * relevant for forced unmount).
 	 *
 	 * Release all holds on dbufs.
 	 */
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
 		if (zp->z_sa_hdl != NULL) {
 			zfs_znode_dmu_fini(zp);
 		}
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	/*
 	 * If we are unmounting, set the unmounted flag and let new vops
 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
 	 * other vops will fail with EIO.
 	 */
 	if (unmounting) {
 		zfsvfs->z_unmounted = B_TRUE;
 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 	}
 
 	/*
 	 * z_os will be NULL if there was an error in attempting to reopen
 	 * zfsvfs, so just return as the properties had already been
 	 * unregistered and cached data had been evicted before.
 	 */
 	if (zfsvfs->z_os == NULL)
 		return (0);
 
 	/*
 	 * Unregister properties.
 	 */
 	zfs_unregister_callbacks(zfsvfs);
 
 	/*
 	 * Evict cached data
 	 */
 	if (!zfs_is_readonly(zfsvfs))
 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
 	dmu_objset_evict_dbufs(zfsvfs->z_os);
 	dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
 	dsl_dir_cancel_waiters(dd);
 
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 zfs_umount(vfs_t *vfsp, int fflag)
 {
 	kthread_t *td = curthread;
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	objset_t *os;
 	cred_t *cr = td->td_ucred;
 	int ret;
 
 	ret = secpolicy_fs_unmount(cr, vfsp);
 	if (ret) {
 		if (dsl_deleg_access((char *)vfsp->vfs_resource,
 		    ZFS_DELEG_PERM_MOUNT, cr))
 			return (ret);
 	}
 
 	/*
 	 * Unmount any snapshots mounted under .zfs before unmounting the
 	 * dataset itself.
 	 */
 	if (zfsvfs->z_ctldir != NULL) {
 		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
 			return (ret);
 	}
 
 	if (fflag & MS_FORCE) {
 		/*
 		 * Mark file system as unmounted before calling
 		 * vflush(FORCECLOSE). This way we ensure no future vnops
 		 * will be called and risk operating on DOOMED vnodes.
 		 */
 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
 		zfsvfs->z_unmounted = B_TRUE;
 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 	}
 
 	/*
 	 * Flush all the files.
 	 */
 	ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
 	if (ret != 0)
 		return (ret);
 	while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
 	    &zfsvfs->z_unlinked_drain_task, NULL) != 0)
 		taskqueue_drain(zfsvfs_taskq->tq_queue,
 		    &zfsvfs->z_unlinked_drain_task);
 
 	VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE));
 	os = zfsvfs->z_os;
 
 	/*
 	 * z_os will be NULL if there was an error in
 	 * attempting to reopen zfsvfs.
 	 */
 	if (os != NULL) {
 		/*
 		 * Unset the objset user_ptr.
 		 */
 		mutex_enter(&os->os_user_ptr_lock);
 		dmu_objset_set_user(os, NULL);
 		mutex_exit(&os->os_user_ptr_lock);
 
 		/*
 		 * Finally release the objset
 		 */
 		dmu_objset_disown(os, B_TRUE, zfsvfs);
 	}
 
 	/*
 	 * We can now safely destroy the '.zfs' directory node.
 	 */
 	if (zfsvfs->z_ctldir != NULL)
 		zfsctl_destroy(zfsvfs);
 	zfs_freevfs(vfsp);
 
 	return (0);
 }
 
 static int
 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
 {
 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
 	znode_t		*zp;
 	int 		err;
 
 	/*
 	 * zfs_zget() can't operate on virtual entries like .zfs/ or
 	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
 	 * This will make NFS to switch to LOOKUP instead of using VGET.
 	 */
 	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
 	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
 		return (EOPNOTSUPP);
 
 	ZFS_ENTER(zfsvfs);
 	err = zfs_zget(zfsvfs, ino, &zp);
 	if (err == 0 && zp->z_unlinked) {
 		vrele(ZTOV(zp));
 		err = EINVAL;
 	}
 	if (err == 0)
 		*vpp = ZTOV(zp);
 	ZFS_EXIT(zfsvfs);
 	if (err == 0) {
 		err = vn_lock(*vpp, flags);
 		if (err != 0)
 			vrele(*vpp);
 	}
 	if (err != 0)
 		*vpp = NULL;
 	return (err);
 }
 
 static int
 #if __FreeBSD_version >= 1300098
 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
     struct ucred **credanonp, int *numsecflavors, int *secflavors)
 #else
 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
     struct ucred **credanonp, int *numsecflavors, int **secflavors)
 #endif
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 	/*
 	 * If this is regular file system vfsp is the same as
 	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
 	 * zfsvfs->z_parent->z_vfs represents parent file system
 	 * which we have to use here, because only this file system
 	 * has mnt_export configured.
 	 */
 	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
 	    credanonp, numsecflavors, secflavors));
 }
 
 CTASSERT(SHORT_FID_LEN <= sizeof (struct fid));
 CTASSERT(LONG_FID_LEN <= sizeof (struct fid));
 
 static int
 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 {
 	struct componentname cn;
 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
 	znode_t		*zp;
 	vnode_t		*dvp;
 	uint64_t	object = 0;
 	uint64_t	fid_gen = 0;
 	uint64_t	setgen = 0;
 	uint64_t	gen_mask;
 	uint64_t	zp_gen;
 	int 		i, err;
 
 	*vpp = NULL;
 
 	ZFS_ENTER(zfsvfs);
 
 	/*
 	 * On FreeBSD we can get snapshot's mount point or its parent file
 	 * system mount point depending if snapshot is already mounted or not.
 	 */
 	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
 		uint64_t	objsetid = 0;
 
 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
 
 		ZFS_EXIT(zfsvfs);
 
 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
 		if (err)
 			return (SET_ERROR(EINVAL));
 		ZFS_ENTER(zfsvfs);
 	}
 
 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
 
 		for (i = 0; i < sizeof (zfid->zf_object); i++)
 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
 	} else {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (fidp->fid_len == LONG_FID_LEN && setgen != 0) {
 		ZFS_EXIT(zfsvfs);
 		dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n",
 		    (u_longlong_t)fid_gen, (u_longlong_t)setgen);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
 	 * directory tree. If the object == zfsvfs->z_shares_dir, then
 	 * we are in the .zfs/shares directory tree.
 	 */
 	if ((fid_gen == 0 &&
 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
 	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
 		ZFS_EXIT(zfsvfs);
 		VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
 		if (object == ZFSCTL_INO_SNAPDIR) {
 			cn.cn_nameptr = "snapshot";
 			cn.cn_namelen = strlen(cn.cn_nameptr);
 			cn.cn_nameiop = LOOKUP;
 			cn.cn_flags = ISLASTCN | LOCKLEAF;
 			cn.cn_lkflags = flags;
 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
 			vput(dvp);
 		} else if (object == zfsvfs->z_shares_dir) {
 			/*
 			 * XXX This branch must not be taken,
 			 * if it is, then the lookup below will
 			 * explode.
 			 */
 			cn.cn_nameptr = "shares";
 			cn.cn_namelen = strlen(cn.cn_nameptr);
 			cn.cn_nameiop = LOOKUP;
 			cn.cn_flags = ISLASTCN;
 			cn.cn_lkflags = flags;
 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
 			vput(dvp);
 		} else {
 			*vpp = dvp;
 		}
 		return (err);
 	}
 
 	gen_mask = -1ULL >> (64 - 8 * i);
 
 	dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object,
 	    (u_longlong_t)fid_gen,
 	    (u_longlong_t)gen_mask);
 	if ((err = zfs_zget(zfsvfs, object, &zp))) {
 		ZFS_EXIT(zfsvfs);
 		return (err);
 	}
 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
 	    sizeof (uint64_t));
 	zp_gen = zp_gen & gen_mask;
 	if (zp_gen == 0)
 		zp_gen = 1;
 	if (zp->z_unlinked || zp_gen != fid_gen) {
 		dprintf("znode gen (%llu) != fid gen (%llu)\n",
 		    (u_longlong_t)zp_gen, (u_longlong_t)fid_gen);
 		vrele(ZTOV(zp));
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	*vpp = ZTOV(zp);
 	ZFS_EXIT(zfsvfs);
 	err = vn_lock(*vpp, flags);
 	if (err == 0)
 		vnode_create_vobject(*vpp, zp->z_size, curthread);
 	else
 		*vpp = NULL;
 	return (err);
 }
 
 /*
  * Block out VOPs and close zfsvfs_t::z_os
  *
  * Note, if successful, then we return with the 'z_teardown_lock' and
  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
  * dataset and objset intact so that they can be atomically handed off during
  * a subsequent rollback or recv operation and the resume thereafter.
  */
 int
 zfs_suspend_fs(zfsvfs_t *zfsvfs)
 {
 	int error;
 
 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
 		return (error);
 
 	return (0);
 }
 
 /*
  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
  * is an invariant across any of the operations that can be performed while the
  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
  * are the same: the relevant objset and associated dataset are owned by
  * zfsvfs, held, and long held on entry.
  */
 int
 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 {
 	int err;
 	znode_t *zp;
 
 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
 
 	/*
 	 * We already own this, so just update the objset_t, as the one we
 	 * had before may have been evicted.
 	 */
 	objset_t *os;
 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
 	VERIFY(dsl_dataset_long_held(ds));
 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
 	dsl_pool_config_enter(dp, FTAG);
 	VERIFY0(dmu_objset_from_ds(ds, &os));
 	dsl_pool_config_exit(dp, FTAG);
 
 	err = zfsvfs_init(zfsvfs, os);
 	if (err != 0)
 		goto bail;
 
 	ds->ds_dir->dd_activity_cancelled = B_FALSE;
 	VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE));
 
 	zfs_set_fuid_feature(zfsvfs);
 
 	/*
 	 * Attempt to re-establish all the active znodes with
 	 * their dbufs.  If a zfs_rezget() fails, then we'll let
 	 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
 	 * when they try to use their znode.
 	 */
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
 		(void) zfs_rezget(zp);
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 bail:
 	/* release the VOPs */
 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 
 	if (err) {
 		/*
 		 * Since we couldn't setup the sa framework, try to force
 		 * unmount this file system.
 		 */
 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
 			vfs_ref(zfsvfs->z_vfs);
 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
 		}
 	}
 	return (err);
 }
 
 static void
 zfs_freevfs(vfs_t *vfsp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 	zfsvfs_free(zfsvfs);
 
 	atomic_dec_32(&zfs_active_fs_count);
 }
 
 #ifdef __i386__
 static int desiredvnodes_backup;
 #include <sys/vmmeter.h>
 
 
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #endif
 
 static void
 zfs_vnodes_adjust(void)
 {
 #ifdef __i386__
 	int newdesiredvnodes;
 
 	desiredvnodes_backup = desiredvnodes;
 
 	/*
 	 * We calculate newdesiredvnodes the same way it is done in
 	 * vntblinit(). If it is equal to desiredvnodes, it means that
 	 * it wasn't tuned by the administrator and we can tune it down.
 	 */
 	newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
 	    vm_kmem_size / (5 * (sizeof (struct vm_object) +
 	    sizeof (struct vnode))));
 	if (newdesiredvnodes == desiredvnodes)
 		desiredvnodes = (3 * newdesiredvnodes) / 4;
 #endif
 }
 
 static void
 zfs_vnodes_adjust_back(void)
 {
 
 #ifdef __i386__
 	desiredvnodes = desiredvnodes_backup;
 #endif
 }
 
 #if __FreeBSD_version >= 1300139
 static struct sx zfs_vnlru_lock;
 static struct vnode *zfs_vnlru_marker;
 #endif
 static arc_prune_t *zfs_prune;
 
 static void
 zfs_prune_task(uint64_t nr_to_scan, void *arg __unused)
 {
 	if (nr_to_scan > INT_MAX)
 		nr_to_scan = INT_MAX;
 #if __FreeBSD_version >= 1300139
 	sx_xlock(&zfs_vnlru_lock);
 	vnlru_free_vfsops(nr_to_scan, &zfs_vfsops, zfs_vnlru_marker);
 	sx_xunlock(&zfs_vnlru_lock);
 #else
 	vnlru_free(nr_to_scan, &zfs_vfsops);
 #endif
 }
 
 void
 zfs_init(void)
 {
 
 	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
 
 	/*
 	 * Initialize .zfs directory structures
 	 */
 	zfsctl_init();
 
 	/*
 	 * Initialize znode cache, vnode ops, etc...
 	 */
 	zfs_znode_init();
 
 	/*
 	 * Reduce number of vnodes. Originally number of vnodes is calculated
 	 * with UFS inode in mind. We reduce it here, because it's too big for
 	 * ZFS/i386.
 	 */
 	zfs_vnodes_adjust();
 
 	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
 
 	zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
 
 #if __FreeBSD_version >= 1300139
 	zfs_vnlru_marker = vnlru_alloc_marker();
 	sx_init(&zfs_vnlru_lock, "zfs vnlru lock");
 #endif
 	zfs_prune = arc_add_prune_callback(zfs_prune_task, NULL);
 }
 
 void
 zfs_fini(void)
 {
 	arc_remove_prune_callback(zfs_prune);
 #if __FreeBSD_version >= 1300139
 	vnlru_free_marker(zfs_vnlru_marker);
 	sx_destroy(&zfs_vnlru_lock);
 #endif
 
 	taskq_destroy(zfsvfs_taskq);
 	zfsctl_fini();
 	zfs_znode_fini();
 	zfs_vnodes_adjust_back();
 }
 
 int
 zfs_busy(void)
 {
 	return (zfs_active_fs_count != 0);
 }
 
 /*
  * Release VOPs and unmount a suspended filesystem.
  */
 int
 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 {
 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
 
 	/*
 	 * We already own this, so just hold and rele it to update the
 	 * objset_t, as the one we had before may have been evicted.
 	 */
 	objset_t *os;
 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
 	VERIFY(dsl_dataset_long_held(ds));
 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
 	dsl_pool_config_enter(dp, FTAG);
 	VERIFY0(dmu_objset_from_ds(ds, &os));
 	dsl_pool_config_exit(dp, FTAG);
 	zfsvfs->z_os = os;
 
 	/* release the VOPs */
 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
 
 	/*
 	 * Try to force unmount this file system.
 	 */
 	(void) zfs_umount(zfsvfs->z_vfs, 0);
 	zfsvfs->z_unmounted = B_TRUE;
 	return (0);
 }
 
 int
 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 {
 	int error;
 	objset_t *os = zfsvfs->z_os;
 	dmu_tx_t *tx;
 
 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
 		return (SET_ERROR(EINVAL));
 
 	if (newvers < zfsvfs->z_version)
 		return (SET_ERROR(EINVAL));
 
 	if (zfs_spa_version_map(newvers) >
 	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
 		return (SET_ERROR(ENOTSUP));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 		    ZFS_SA_ATTRS);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	}
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		return (error);
 	}
 
 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 	    8, 1, &newvers, tx);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		return (error);
 	}
 
 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
 		uint64_t sa_obj;
 
 		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
 		    SPA_VERSION_SA);
 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
 		    DMU_OT_NONE, 0, tx);
 
 		error = zap_add(os, MASTER_NODE_OBJ,
 		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
 		ASSERT0(error);
 
 		VERIFY0(sa_set_sa_object(os, sa_obj));
 		sa_register_update_callback(os, zfs_sa_upgrade);
 	}
 
 	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
 	    "from %ju to %ju", (uintmax_t)zfsvfs->z_version,
 	    (uintmax_t)newvers);
 	dmu_tx_commit(tx);
 
 	zfsvfs->z_version = newvers;
 	os->os_version = newvers;
 
 	zfs_set_fuid_feature(zfsvfs);
 
 	return (0);
 }
 
 /*
  * Read a property stored within the master node.
  */
 int
 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
 {
 	uint64_t *cached_copy = NULL;
 
 	/*
 	 * Figure out where in the objset_t the cached copy would live, if it
 	 * is available for the requested property.
 	 */
 	if (os != NULL) {
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 			cached_copy = &os->os_version;
 			break;
 		case ZFS_PROP_NORMALIZE:
 			cached_copy = &os->os_normalization;
 			break;
 		case ZFS_PROP_UTF8ONLY:
 			cached_copy = &os->os_utf8only;
 			break;
 		case ZFS_PROP_CASE:
 			cached_copy = &os->os_casesensitivity;
 			break;
 		default:
 			break;
 		}
 	}
 	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
 		*value = *cached_copy;
 		return (0);
 	}
 
 	/*
 	 * If the property wasn't cached, look up the file system's value for
 	 * the property. For the version property, we look up a slightly
 	 * different string.
 	 */
 	const char *pname;
 	int error = ENOENT;
 	if (prop == ZFS_PROP_VERSION) {
 		pname = ZPL_VERSION_STR;
 	} else {
 		pname = zfs_prop_to_name(prop);
 	}
 
 	if (os != NULL) {
 		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
 	}
 
 	if (error == ENOENT) {
 		/* No value set, use the default value */
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 			*value = ZPL_VERSION;
 			break;
 		case ZFS_PROP_NORMALIZE:
 		case ZFS_PROP_UTF8ONLY:
 			*value = 0;
 			break;
 		case ZFS_PROP_CASE:
 			*value = ZFS_CASE_SENSITIVE;
 			break;
 		case ZFS_PROP_ACLTYPE:
 			*value = ZFS_ACLTYPE_NFSV4;
 			break;
 		default:
 			return (error);
 		}
 		error = 0;
 	}
 
 	/*
 	 * If one of the methods for getting the property value above worked,
 	 * copy it into the objset_t's cache.
 	 */
 	if (error == 0 && cached_copy != NULL) {
 		*cached_copy = *value;
 	}
 
 	return (error);
 }
 
 /*
  * Return true if the corresponding vfs's unmounted flag is set.
  * Otherwise return false.
  * If this function returns true we know VFS unmount has been initiated.
  */
 boolean_t
 zfs_get_vfs_flag_unmounted(objset_t *os)
 {
 	zfsvfs_t *zfvp;
 	boolean_t unmounted = B_FALSE;
 
 	ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS);
 
 	mutex_enter(&os->os_user_ptr_lock);
 	zfvp = dmu_objset_get_user(os);
 	if (zfvp != NULL && zfvp->z_vfs != NULL &&
 	    (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
 		unmounted = B_TRUE;
 	mutex_exit(&os->os_user_ptr_lock);
 
 	return (unmounted);
 }
 
 #ifdef _KERNEL
 void
 zfsvfs_update_fromname(const char *oldname, const char *newname)
 {
 	char tmpbuf[MAXPATHLEN];
 	struct mount *mp;
 	char *fromname;
 	size_t oldlen;
 
 	oldlen = strlen(oldname);
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		fromname = mp->mnt_stat.f_mntfromname;
 		if (strcmp(fromname, oldname) == 0) {
 			(void) strlcpy(fromname, newname,
 			    sizeof (mp->mnt_stat.f_mntfromname));
 			continue;
 		}
 		if (strncmp(fromname, oldname, oldlen) == 0 &&
 		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
 			(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s",
 			    newname, fromname + oldlen);
 			(void) strlcpy(fromname, tmpbuf,
 			    sizeof (mp->mnt_stat.f_mntfromname));
 			continue;
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 }
 #endif
 
 /*
  * Find a prison with ZFS info.
  * Return the ZFS info and the (locked) prison.
  */
 static struct zfs_jailparam *
 zfs_jailparam_find(struct prison *spr, struct prison **prp)
 {
 	struct prison *pr;
 	struct zfs_jailparam *zjp;
 
 	for (pr = spr; ; pr = pr->pr_parent) {
 		mtx_lock(&pr->pr_mtx);
 		if (pr == &prison0) {
 			zjp = &zfs_jailparam0;
 			break;
 		}
 		zjp = osd_jail_get(pr, zfs_jailparam_slot);
 		if (zjp != NULL)
 			break;
 		mtx_unlock(&pr->pr_mtx);
 	}
 	*prp = pr;
 
 	return (zjp);
 }
 
 /*
  * Ensure a prison has its own ZFS info.  If zjpp is non-null, point it to the
  * ZFS info and lock the prison.
  */
 static void
 zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp)
 {
 	struct prison *ppr;
 	struct zfs_jailparam *zjp, *nzjp;
 	void **rsv;
 
 	/* If this prison already has ZFS info, return that. */
 	zjp = zfs_jailparam_find(pr, &ppr);
 	if (ppr == pr)
 		goto done;
 
 	/*
 	 * Allocate a new info record.  Then check again, in case something
 	 * changed during the allocation.
 	 */
 	mtx_unlock(&ppr->pr_mtx);
 	nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK);
 	rsv = osd_reserve(zfs_jailparam_slot);
 	zjp = zfs_jailparam_find(pr, &ppr);
 	if (ppr == pr) {
 		free(nzjp, M_PRISON);
 		osd_free_reserved(rsv);
 		goto done;
 	}
 	/* Inherit the initial values from the ancestor. */
 	mtx_lock(&pr->pr_mtx);
 	(void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp);
 	(void) memcpy(nzjp, zjp, sizeof (*zjp));
 	zjp = nzjp;
 	mtx_unlock(&ppr->pr_mtx);
 done:
 	if (zjpp != NULL)
 		*zjpp = zjp;
 	else
 		mtx_unlock(&pr->pr_mtx);
 }
 
 /*
  * Jail OSD methods for ZFS VFS info.
  */
 static int
 zfs_jailparam_create(void *obj, void *data)
 {
 	struct prison *pr = obj;
 	struct vfsoptlist *opts = data;
 	int jsys;
 
 	if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 &&
 	    jsys == JAIL_SYS_INHERIT)
 		return (0);
 	/*
 	 * Inherit a prison's initial values from its parent
 	 * (different from JAIL_SYS_INHERIT which also inherits changes).
 	 */
 	zfs_jailparam_alloc(pr, NULL);
 	return (0);
 }
 
 static int
 zfs_jailparam_get(void *obj, void *data)
 {
 	struct prison *ppr, *pr = obj;
 	struct vfsoptlist *opts = data;
 	struct zfs_jailparam *zjp;
 	int jsys, error;
 
 	zjp = zfs_jailparam_find(pr, &ppr);
 	jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
 	error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	if (jsys == JAIL_SYS_NEW) {
 		error = vfs_setopt(opts, "zfs.mount_snapshot",
 		    &zjp->mount_snapshot, sizeof (zjp->mount_snapshot));
 		if (error != 0 && error != ENOENT)
 			goto done;
 	} else {
 		/*
 		 * If this prison is inheriting its ZFS info, report
 		 * empty/zero parameters.
 		 */
 		static int mount_snapshot = 0;
 
 		error = vfs_setopt(opts, "zfs.mount_snapshot",
 		    &mount_snapshot, sizeof (mount_snapshot));
 		if (error != 0 && error != ENOENT)
 			goto done;
 	}
 	error = 0;
 done:
 	mtx_unlock(&ppr->pr_mtx);
 	return (error);
 }
 
 static int
 zfs_jailparam_set(void *obj, void *data)
 {
 	struct prison *pr = obj;
 	struct prison *ppr;
 	struct vfsoptlist *opts = data;
 	int error, jsys, mount_snapshot;
 
 	/* Set the parameters, which should be correct. */
 	error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys));
 	if (error == ENOENT)
 		jsys = -1;
 	error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot,
 	    sizeof (mount_snapshot));
 	if (error == ENOENT)
 		mount_snapshot = -1;
 	else
 		jsys = JAIL_SYS_NEW;
 	switch (jsys) {
 	case JAIL_SYS_NEW:
 	{
 		/* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */
 		struct zfs_jailparam *zjp;
 
 		/*
 		 * A child jail cannot have more permissions than its parent
 		 */
 		if (pr->pr_parent != &prison0) {
 			zjp = zfs_jailparam_find(pr->pr_parent, &ppr);
 			mtx_unlock(&ppr->pr_mtx);
 			if (zjp->mount_snapshot < mount_snapshot) {
 				return (EPERM);
 			}
 		}
 		zfs_jailparam_alloc(pr, &zjp);
 		if (mount_snapshot != -1)
 			zjp->mount_snapshot = mount_snapshot;
 		mtx_unlock(&pr->pr_mtx);
 		break;
 	}
 	case JAIL_SYS_INHERIT:
 		/* "zfs=inherit": inherit the parent's ZFS info. */
 		mtx_lock(&pr->pr_mtx);
 		osd_jail_del(pr, zfs_jailparam_slot);
 		mtx_unlock(&pr->pr_mtx);
 		break;
 	case -1:
 		/*
 		 * If the setting being changed is not ZFS related
 		 * then do nothing.
 		 */
 		break;
 	}
 
 	return (0);
 }
 
 static int
 zfs_jailparam_check(void *obj __unused, void *data)
 {
 	struct vfsoptlist *opts = data;
 	int error, jsys, mount_snapshot;
 
 	/* Check that the parameters are correct. */
 	error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys));
 	if (error != ENOENT) {
 		if (error != 0)
 			return (error);
 		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
 			return (EINVAL);
 	}
 	error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot,
 	    sizeof (mount_snapshot));
 	if (error != ENOENT) {
 		if (error != 0)
 			return (error);
 		if (mount_snapshot != 0 && mount_snapshot != 1)
 			return (EINVAL);
 	}
 	return (0);
 }
 
 static void
 zfs_jailparam_destroy(void *data)
 {
 
 	free(data, M_PRISON);
 }
 
 static void
 zfs_jailparam_sysinit(void *arg __unused)
 {
 	struct prison *pr;
 	osd_method_t  methods[PR_MAXMETHOD] = {
 		[PR_METHOD_CREATE] = zfs_jailparam_create,
 		[PR_METHOD_GET] = zfs_jailparam_get,
 		[PR_METHOD_SET] = zfs_jailparam_set,
 		[PR_METHOD_CHECK] = zfs_jailparam_check,
 	};
 
 	zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods);
 	/* Copy the defaults to any existing prisons. */
 	sx_slock(&allprison_lock);
 	TAILQ_FOREACH(pr, &allprison, pr_list)
 		zfs_jailparam_alloc(pr, NULL);
 	sx_sunlock(&allprison_lock);
 }
 
 static void
 zfs_jailparam_sysuninit(void *arg __unused)
 {
 
 	osd_jail_deregister(zfs_jailparam_slot);
 }
 
 SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY,
 	zfs_jailparam_sysinit, NULL);
 SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY,
 	zfs_jailparam_sysuninit, NULL);
diff --git a/usr.sbin/jail/jail.8 b/usr.sbin/jail/jail.8
index b66e3464dc5e..18dd54aa9b68 100644
--- a/usr.sbin/jail/jail.8
+++ b/usr.sbin/jail/jail.8
@@ -1,1477 +1,1477 @@
 .\" Copyright (c) 2000, 2003 Robert N. M. Watson
 .\" Copyright (c) 2008-2012 James Gritton
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd June 24, 2024
+.Dd September 19, 2024
 .Dt JAIL 8
 .Os
 .Sh NAME
 .Nm jail
 .Nd "manage system jails"
 .Sh SYNOPSIS
 .Nm
 .Op Fl dhilqv
 .Op Fl J Ar jid_file
 .Op Fl u Ar username
 .Op Fl U Ar username
 .Op Fl cmr
 .Ar param Ns = Ns Ar value ...
 .Op Cm command Ns = Ns Ar command ...
 .Nm
 .Op Fl dqv
 .Op Fl f Ar conf_file
 .Op Fl p Ar limit
 .Op Fl cmr
 .Op Ar jail
 .Nm
 .Op Fl qv
 .Op Fl f Ar conf_file
 .Op Fl rR
 .Op Cm * | Ar jail ...
 .Nm
 .Op Fl dhilqv
 .Op Fl J Ar jid_file
 .Op Fl u Ar username
 .Op Fl U Ar username
 .Op Fl n Ar jailname
 .Op Fl s Ar securelevel
 .Ar path hostname ip Ns Op Cm \&, Ns Ar ...
 .Ar command ...
 .Nm
 .Op Fl f Ar conf_file
 .Fl e
 .Ar separator
 .Sh DESCRIPTION
 The
 .Nm
 utility creates new jails, or modifies or removes existing jails.
 It can also print a list of configured jails and their parameters.
 A jail
 .Pq or Dq prison
 is specified via parameters on the command line, or in the
 .Xr jail.conf 5
 file.
 .Pp
 At least one of the options
 .Fl c ,
 .Fl e ,
 .Fl m
 or
 .Fl r
 must be specified.
 These options are used alone or in combination to describe the operation to
 perform:
 .Bl -tag -width indent
 .It Fl c
 Create a new jail.
 The jail
 .Va jid
 and
 .Va name
 parameters (if specified on the command line)
 must not refer to an existing jail.
 .It Fl e Ar separator
 Exhibit a list of all configured non-wildcard jails and their parameters.
 No jail creation, modification or removal performed if this option is used.
 The
 .Ar separator
 string is used to separate parameters.
 Use
 .Xr jls 8
 utility to list running jails.
 .It Fl m
 Modify an existing jail.
 One of the
 .Va jid
 or
 .Va name
 parameters must exist and refer to an existing jail.
 Some parameters may not be changed on a running jail.
 .It Fl r
 Remove the
 .Ar jail
 specified by jid or name.
 All jailed processes are killed, and all jails that are
 children of this jail are also
 removed.
 .It Fl rc
 Restart an existing jail.
 The jail is first removed and then re-created, as if
 .Dq Nm Fl r
 and
 .Dq Nm Fl c
 were run in succession.
 .It Fl cm
 Create a jail if it does not exist, or modify the jail if it does exist.
 .It Fl mr
 Modify an existing jail.
 The jail may be restarted if necessary to modify parameters than could
 not otherwise be changed.
 .It Fl cmr
 Create a jail if it doesn't exist, or modify (and possibly restart) the
 jail if it does exist.
 .El
 .Pp
 Other available options are:
 .Bl -tag -width indent
 .It Fl d
 Allow making changes to a dying jail, equivalent to the
 .Va allow.dying
 parameter.
 .It Fl f Ar conf_file
 Use configuration file
 .Ar conf_file
 instead of the default
 .Pa /etc/jail.conf .
 .It Fl h
 Resolve the
 .Va host.hostname
 parameter (or
 .Va hostname )
 and add all IP addresses returned by the resolver
 to the list of addresses for this jail.
 This is equivalent to the
 .Va ip_hostname
 parameter.
 .It Fl i
 Output (only) the jail identifier of the newly created jail(s).
 This implies the
 .Fl q
 option.
 .It Fl J Ar jid_file
 Write a
 .Ar jid_file
 file, containing the parameters used to start the jail.
 .It Fl l
 Run commands in a clean environment.
 This is deprecated and is equivalent to the exec.clean parameter.
 .It Fl n Ar jailname
 Set the jail's name.
 This is deprecated and is equivalent to the
 .Va name
 parameter.
 .It Fl p Ar limit
 Limit the number of commands from
 .Va  exec.*
 that can run simultaneously.
 .It Fl q
 Suppress the message printed whenever a jail is created, modified or removed.
 Only error messages will be printed.
 .It Fl R
 A variation of the
 .Fl r
 option that removes an existing jail without using the configuration file.
 No removal-related parameters for this jail will be used \(em the jail will
 simply be removed.
 .It Fl s Ar securelevel
 Set the
 .Va kern.securelevel
 MIB entry to the specified value inside the newly created jail.
 This is deprecated and is equivalent to the
 .Va securelevel
 parameter.
 .It Fl u Ar username
 The user name from host environment as whom jailed commands should run.
 This is deprecated and is equivalent to the
 .Va exec.jail_user
 and
 .Va exec.system_jail_user
 parameters.
 .It Fl U Ar username
 The user name from the jailed environment as whom jailed commands should run.
 This is deprecated and is equivalent to the
 .Va exec.jail_user
 parameter.
 .It Fl v
 Print a message on every operation, such as running commands and
 mounting filesystems.
 .El
 .Pp
 If no arguments are given after the options, the operation (except
 remove) will be performed on all jails specified in the
 .Xr jail.conf 5
 file.
 A single argument of a jail name will operate only on the specified jail.
 The
 .Fl r
 and
 .Fl R
 options can also remove running jails that aren't in the
 .Xr jail.conf 5
 file, specified by name or jid.
 .Pp
 An argument of
 .Dq *
 is a wildcard that will operate on all jails, regardless of whether
 they appear in
 .Xr jail.conf 5 ;
 this is the surest way for
 .Fl r
 to remove all jails.
 If hierarchical jails exist, a partial-matching wildcard definition may
 be specified.
 For example, an argument of
 .Dq foo.*
 would apply to jails with names like
 .Dq foo.bar
 and
 .Dq foo.bar.baz .
 .Pp
 A jail may be specified with parameters directly on the command line.
 In this case, the
 .Xr jail.conf 5
 file will not be used.
 For backward compatibility, the command line may also have four fixed
 parameters, without names:
 .Ar path ,
 .Ar hostname ,
 .Ar ip ,
 and
 .Ar command .
 This mode will always create a new jail, and the
 .Fl c
 and
 .Fl m
 options do not apply (and must not be present).
 .Ss Jail Parameters
 Parameters in the
 .Xr jail.conf 5
 file, or on the command line, are generally of the form
 .Dq name=value .
 Some parameters are boolean, and do not have a value but are set by the
 name alone with or without a
 .Dq no
 prefix, e.g.
 .Va persist
 or
 .Va nopersist .
 They can also be given the values
 .Dq true
 and
 .Dq false .
 Other parameters may have more than one value, specified as a
 comma-separated list or with
 .Dq +=
 in the configuration file (see
 .Xr jail.conf 5
 for details).
 .Pp
 The
 .Nm
 utility recognizes two classes of parameters.
 There are the true jail
 parameters that are passed to the kernel when the jail is created,
 which can be seen with
 .Xr jls 8 ,
 and can (usually) be changed with
 .Dq Nm Fl m .
 Then there are pseudo-parameters that are only used by
 .Nm
 itself.
 .Pp
 Jails have a set of core parameters, and kernel modules can add their own
 jail parameters.
 The current set of available parameters can be retrieved via
 .Dq Nm sysctl Fl d Va security.jail.param .
 Any parameters not set will be given default values, often based on the
 current environment.
 The core parameters are:
 .Bl -tag -width indent
 .It Va jid
 The jail identifier.
 This will be assigned automatically to a new jail (or can be explicitly
 set), and can be used to identify the jail for later modification, or
 for such commands as
 .Xr jls 8
 or
 .Xr jexec 8 .
 .It Va name
 The jail name.
 This is an arbitrary string that identifies a jail (except it may not
 contain a
 .Sq \&. ) .
 Like the
 .Va jid ,
 it can be passed to later
 .Nm
 commands, or to
 .Xr jls 8
 or
 .Xr jexec 8 .
 If no
 .Va name
 is supplied, a default is assumed that is the same as the
 .Va jid .
 The
 .Va name
 parameter is implied by the
 .Xr jail.conf 5
 file format, and need not be explicitly set when using the configuration
 file.
 .It Va path
 The directory which is to be the root of the jail.
 Any commands run inside the jail, either by
 .Nm
 or from
 .Xr jexec 8 ,
 are run from this directory.
 .It Va ip4.addr
 A list of IPv4 addresses assigned to the jail.
 If this is set, the jail is restricted to using only these addresses.
 Any attempts to use other addresses fail, and attempts to use wildcard
 addresses silently use the jailed address instead.
 For IPv4 the first address given will be used as the source address
 when source address selection on unbound sockets cannot find a better
 match.
 It is only possible to start multiple jails with the same IP address
 if none of the jails has more than this single overlapping IP address
 assigned to itself.
 .It Va ip4.saddrsel
 A boolean option to change the formerly mentioned behaviour and disable
 IPv4 source address selection for the jail in favour of the primary
 IPv4 address of the jail.
 Source address selection is enabled by default for all jails and the
 .Va ip4.nosaddrsel
 setting of a parent jail is not inherited for any child jails.
 .It Va ip4
 Control the availability of IPv4 addresses.
 Possible values are
 .Dq inherit
 to allow unrestricted access to all system addresses,
 .Dq new
 to restrict addresses via
 .Va ip4.addr ,
 and
 .Dq disable
 to stop the jail from using IPv4 entirely.
 Setting the
 .Va ip4.addr
 parameter implies a value of
 .Dq new .
 .It Va ip6.addr , Va ip6.saddrsel , Va ip6
 A set of IPv6 options for the jail, the counterparts to
 .Va ip4.addr ,
 .Va ip4.saddrsel
 and
 .Va ip4
 above.
 .It Va vnet
 Create the jail with its own virtual network stack,
 with its own network interfaces, addresses, routing table, etc.
 The kernel must have been compiled with the
 .Sy VIMAGE option
 for this to be available.
 Possible values are
 .Dq inherit
 to use the system network stack, possibly with restricted IP addresses,
 and
 .Dq new
 to create a new network stack.
 .It Va host.hostname
 The hostname of the jail.
 Other similar parameters are
 .Va host.domainname ,
 .Va host.hostuuid
 and
 .Va host.hostid .
 .It Va host
 Set the origin of hostname and related information.
 Possible values are
 .Dq inherit
 to use the system information and
 .Dq new
 for the jail to use the information from the above fields.
 Setting any of the above fields implies a value of
 .Dq new .
 .It Va securelevel
 The value of the jail's
 .Va kern.securelevel
 sysctl.
 A jail never has a lower securelevel than its parent system, but by
 setting this parameter it may have a higher one.
 If the system securelevel is changed, any jail securelevels will be at
 least as secure.
 .It Va devfs_ruleset
 The number of the devfs ruleset that is enforced for mounting devfs in
 this jail.
 A value of zero (default) means no ruleset is enforced.
 Descendant jails inherit the parent jail's devfs ruleset enforcement.
 Mounting devfs inside a jail is possible only if the
 .Va allow.mount
 and
 .Va allow.mount.devfs
 permissions are effective and
 .Va enforce_statfs
 is set to a value lower than 2.
 Devfs rules and rulesets cannot be viewed or modified from inside a jail.
 .Pp
 NOTE: It is important that only appropriate device nodes in devfs be
 exposed to a jail; access to disk devices in the jail may permit processes
 in the jail to bypass the jail sandboxing by modifying files outside of
 the jail.
 See
 .Xr devfs 8
 for information on how to use devfs rules to limit access to entries
 in the per-jail devfs.
 A simple devfs ruleset for jails is available as ruleset #4 in
 .Pa /etc/defaults/devfs.rules .
 .It Va children.max
 The number of child jails allowed to be created by this jail (or by
 other jails under this jail).
 This limit is zero by default, indicating the jail is not allowed to
 create child jails.
 See the
 .Sx "Hierarchical Jails"
 section for more information.
 .It Va children.cur
 The number of descendants of this jail, including its own child jails
 and any jails created under them.
 .It Va enforce_statfs
 This determines what information processes in a jail are able to get
 about mount points.
 It affects the behaviour of the following syscalls:
 .Xr statfs 2 ,
 .Xr fstatfs 2 ,
 .Xr getfsstat 2 ,
 and
 .Xr fhstatfs 2
 (as well as similar compatibility syscalls).
 When set to 0, all mount points are available without any restrictions.
 When set to 1, only mount points below the jail's chroot directory are
 visible.
 In addition to that, the path to the jail's chroot directory is removed
 from the front of their pathnames.
 When set to 2 (default), above syscalls can operate only on a mount-point
 where the jail's chroot directory is located.
 .It Va persist
 Setting this boolean parameter allows a jail to exist without any
 processes.
 Normally, a command is run as part of jail creation, and then the jail
 is destroyed as its last process exits.
 A new jail must have either the
 .Va persist
 parameter or
 .Va exec.start
 or
 .Va command
 pseudo-parameter set.
 .It Va cpuset.id
 The ID of the cpuset associated with this jail (read-only).
 .It Va dying
 This is true if the jail is in the process of shutting down (read-only).
 .It Va parent
 The
 .Va jid
 of the parent of this jail, or zero if this is a top-level jail
 (read-only).
 .It Va osrelease
 The string for the jail's
 .Va kern.osrelease
 sysctl and uname -r.
 .It Va osreldate
 The number for the jail's
 .Va kern.osreldate
 and uname -K.
 .It Va allow.*
 Some restrictions of the jail environment may be set on a per-jail
 basis.
 With the exception of
 .Va allow.set_hostname
 and
 .Va allow.reserved_ports ,
 these boolean parameters are off by default.
 .Bl -tag -width indent
 .It Va allow.set_hostname
 The jail's hostname may be changed via
 .Xr hostname 1
 or
 .Xr sethostname 3 .
 .It Va allow.sysvipc
 A process within the jail has access to System V IPC primitives.
 This is deprecated in favor of the per-module parameters (see below).
 When this parameter is set, it is equivalent to setting
 .Va sysvmsg ,
 .Va sysvsem ,
 and
 .Va sysvshm
 all to
 .Dq inherit .
 .It Va allow.raw_sockets
 The jail root is allowed to create raw sockets.
 Setting this parameter allows utilities like
 .Xr ping 8
 and
 .Xr traceroute 8
 to operate inside the jail.
 If this is set, the source IP addresses are enforced to comply
 with the IP address bound to the jail, regardless of whether or not
 the
 .Dv IP_HDRINCL
 flag has been set on the socket.
 Since raw sockets can be used to configure and interact with various
 network subsystems, extra caution should be used where privileged access
 to jails is given out to untrusted parties.
 .It Va allow.chflags
 Normally, privileged users inside a jail are treated as unprivileged by
 .Xr chflags 2 .
 When this parameter is set, such users are treated as privileged, and
 may manipulate system file flags subject to the usual constraints on
 .Va kern.securelevel .
 .It Va allow.mount
 privileged users inside the jail will be able to mount and unmount file
 system types marked as jail-friendly.
 The
 .Xr lsvfs 1
 command can be used to find file system types available for mount from
 within a jail.
 This permission is effective only if
 .Va enforce_statfs
 is set to a value lower than 2.
 .It Va allow.mount.devfs
 privileged users inside the jail will be able to mount and unmount the
 devfs file system.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 The devfs ruleset should be restricted from the default by using the
 .Va devfs_ruleset
 option.
 .It Va allow.quotas
 The jail root may administer quotas on the jail's filesystem(s).
 This includes filesystems that the jail may share with other jails or
 with non-jailed parts of the system.
 .It Va allow.read_msgbuf
 Jailed users may read the kernel message buffer.
 If the
 .Va security.bsd.unprivileged_read_msgbuf
 MIB entry is zero, this will be restricted to the root user.
 .It Va allow.socket_af
 Sockets within a jail are normally restricted to IPv4, IPv6, local
 (UNIX), and route.  This allows access to other protocol stacks that
 have not had jail functionality added to them.
 .It Va allow.mlock
 Locking or unlocking physical pages in memory are normally not available
 within a jail.
 When this parameter is set, users may
 .Xr mlock 2
 or
 .Xr munlock 2
 memory subject to
 .Va security.bsd.unprivileged_mlock
 and resource limits.
 .It Va allow.nfsd
 The
 .Xr mountd 8 ,
 .Xr nfsd 8 ,
 .Xr nfsuserd 8 ,
 .Xr gssd 8
 and
 .Xr rpc.tlsservd 8
 daemons are permitted to run inside a properly configured vnet-enabled jail.
 The jail's root must be a file system mount point and
 .Va enforce_statfs
 must not be set to 0, so that
 .Xr mountd 8
 can export file systems visible within the jail.
 .Va enforce_statfs
 must be set to 1 if file systems mounted under the
 jail's file system need to be exported by
 .Xr mount 8 .
 For exporting only the jail's file system, a setting of 2
 is sufficient.
 If the kernel configuration does not include the
 .Sy NFSD
 option,
 .Pa nfsd.ko
 must be loaded outside of the jails.
 This is normally done by adding
 .Dq nfsd
 to
 .Va kld_list
 in the
 .Xr rc.conf 5
 file outside of the jails.
 Similarily, if the
 .Xr gssd 8
 is to be run in a jail, either the kernel
 .Sy KGSSAPI
 option needs to be specified or
 .Dq kgssapi
 and
 .Dq kgssapi_krb5
 need to be in
 .Va kld_list
 in the
 .Xr rc.conf 5
 file outside of the jails.
 .It Va allow.reserved_ports
 The jail root may bind to ports lower than 1024.
 .It Va allow.unprivileged_proc_debug
 Unprivileged processes in the jail may use debugging facilities.
 .It Va allow.suser
 The value of the jail's
 .Va security.bsd.suser_enabled
 sysctl.
 The super-user will be disabled automatically if its parent system has it
 disabled.
 The super-user is enabled by default.
 .El
 .El
 .Pp
 Kernel modules may add their own parameters, which only exist when the
 module is loaded.
 These are typically headed under a parameter named after the module,
 with values of
 .Dq inherit
 to give the jail full use of the module,
 .Dq new
 to encapsulate the jail in some module-specific way,
 and
 .Dq disable
 to make the module unavailable to the jail.
 There also may be other parameters to define jail behavior within the module.
 Module-specific parameters include:
 .Bl -tag -width indent
 .It Va allow.mount.fdescfs
 privileged users inside the jail will be able to mount and unmount the
 fdescfs file system.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 .It Va allow.mount.fusefs
 privileged users inside the jail will be able to mount and unmount 
 fuse-based file systems.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 .It Va allow.mount.nullfs
 privileged users inside the jail will be able to mount and unmount the
 nullfs file system.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 .It Va allow.mount.procfs
 privileged users inside the jail will be able to mount and unmount the
 procfs file system.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 .It Va allow.mount.linprocfs
 privileged users inside the jail will be able to mount and unmount the
 linprocfs file system.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 .It Va allow.mount.linsysfs
 privileged users inside the jail will be able to mount and unmount the
 linsysfs file system.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 .It Va allow.mount.tmpfs
 privileged users inside the jail will be able to mount and unmount the
 tmpfs file system.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 .It Va allow.mount.zfs
 privileged users inside the jail will be able to mount and unmount the
 ZFS file system.
 This permission is effective only together with
 .Va allow.mount
 and only when
 .Va enforce_statfs
 is set to a value lower than 2.
 See
 .Xr zfs 8
 for information on how to configure the ZFS filesystem to operate from
 within a jail.
 .It Va allow.vmm
 The jail may access
 .Xr vmm 4 .
 This flag is only available when the
 .Xr vmm 4
 kernel module is loaded.
 .It Va linux
 Determine how a jail's Linux emulation environment appears.
 A value of
 .Dq inherit
 will keep the same environment, and
 .Dq new
 will give the jail its own environment (still originally inherited when
 the jail is created).
 .It Va linux.osname , linux.osrelease , linux.oss_version
 The Linux OS name, OS release, and OSS version associated with this jail.
 .It Va sysvmsg
 Allow access to SYSV IPC message primitives.
 If set to
 .Dq inherit ,
 all IPC objects on the system are visible to this jail, whether they
 were created by the jail itself, the base system, or other jails.
 If set to
 .Dq new ,
 the jail will have its own key namespace, and can only see the objects
 that it has created;
 the system (or parent jail) has access to the jail's objects, but not to
 its keys.
 If set to
 .Dq disable ,
 the jail cannot perform any sysvmsg-related system calls.
 .It Va sysvsem, sysvshm
 Allow access to SYSV IPC semaphore and shared memory primitives, in the
 same manner as
 .Va sysvmsg .
 .It Va zfs.mount_snapshot
-Allow jailed users to access the contents of ZFS snapshots under the
-filesystem's
+When set to 1, jailed users may access the contents of ZFS snapshots
+under the filesystem's
 .Pa .zfs
 directory.
 If
 .Va allow.mount.zfs
 is set, the snapshots may also be mounted.
 .El
 .Pp
 There are pseudo-parameters that are not passed to the kernel, but are
 used by
 .Nm
 to set up the jail environment, often by running specified commands
 when jails are created or removed.
 The
 .Va exec.*
 command parameters are
 .Xr sh 1
 command lines that are run in either the system or jail environment.
 They may be given multiple values, which would run the specified
 commands in sequence.
 All commands must succeed (return a zero exit status), or the jail will
 not be created or removed, as appropriate.
 .Pp
 The pseudo-parameters are:
 .Bl -tag -width indent
 .It Va exec.prepare
 Command(s) to run in the system environment to prepare a jail for creation.
 These commands are executed before assigning IP addresses and mounting
 filesystems, so they may be used to create a new jail filesystem if it does
 not already exist.
 .It Va exec.prestart
 Command(s) to run in the system environment before a jail is created.
 .It Va exec.created
 Command(s) to run in the system environment right after a jail has been
 created, but before commands (or services) get executed in the jail.
 .It Va exec.start
 Command(s) to run in the jail environment when a jail is created.
 A typical command to run is
 .Dq sh /etc/rc .
 .It Va command
 A synonym for
 .Va exec.start
 for use when specifying a jail directly on the command line.
 Unlike other parameters whose value is a single string,
 .Va command
 uses the remainder of the
 .Nm
 command line as its own arguments.
 .It Va exec.poststart
 Command(s) to run in the system environment after a jail is created,
 and after any
 .Va exec.start
 commands have completed.
 .It Va exec.prestop
 Command(s) to run in the system environment before a jail is removed.
 .It Va exec.stop
 Command(s) to run in the jail environment before a jail is removed,
 and after any
 .Va exec.prestop
 commands have completed.
 A typical command to run is
 .Dq sh /etc/rc.shutdown jail .
 .It Va exec.poststop
 Command(s) to run in the system environment after a jail is removed.
 .It Va exec.release
 Command(s) to run in the system environment after all other actions are done.
 These commands are executed after unmounting filesystems and removing IP
 addresses, so they may be used to remove a jail filesystem if it is no longer
 needed.
 .It Va exec.clean
 Run commands in a clean environment.
 The environment is discarded except for
 .Ev HOME , SHELL , TERM
 and
 .Ev USER .
 .Ev HOME
 and
 .Ev SHELL
 are set to the target login's default values.
 .Ev USER
 is set to the target login.
 .Ev TERM
 is imported from the current environment.
 The environment variables from the login class capability database for the
 target login are also set.
 .It Va exec.jail_user
 The user to run commands as, when running in the jail environment.
 The default is to run the commands as the current user.
 .It Va exec.system_jail_user
 This boolean option looks for the
 .Va exec.jail_user
 in the system
 .Xr passwd 5
 file, instead of in the jail's file.
 .It Va exec.system_user
 The user to run commands as, when running in the system environment.
 The default is to run the commands as the current user.
 .It Va exec.timeout
 The maximum amount of time to wait for a command to complete, in
 seconds.
 If a command is still running after this timeout has passed,
 the jail will not be created or removed, as appropriate.
 .It Va exec.consolelog
 A file to direct command output (stdout and stderr) to.
 .It Va exec.fib
 The FIB (routing table) to set when running commands inside the jail.
 .It Va stop.timeout
 The maximum amount of time to wait for a jail's processes to exit
 after sending them a
 .Dv SIGTERM
 signal (which happens after the
 .Va exec.stop
 commands have completed).
 After this many seconds have passed, the jail will be removed, which
 will kill any remaining processes.
 If this is set to zero, no
 .Dv SIGTERM
 is sent and the jail is immediately removed.
 The default is 10 seconds.
 .It Va interface
 A network interface to add the jail's IP addresses
 .Va ( ip4.addr
 and
 .Va ip6.addr )
 to.
 An alias for each address will be added to the interface before the
 jail is created, and will be removed from the interface after the
 jail is removed.
 .It Va ip4.addr
 In addition to the IP addresses that are passed to the kernel, an
 interface, netmask and additional parameters (as supported by
 .Xr ifconfig 8 Ns )
 may also be specified, in the form
 .Dq Ar interface Ns | Ns Ar ip-address Ns / Ns Ar netmask param ... .
 If an interface is given before the IP address, an alias for the address
 will be added to that interface, as it is with the
 .Va interface
 parameter.
 If a netmask in either dotted-quad or CIDR form is given
 after an IP address, it will be used when adding the IP alias.
 If additional parameters are specified then they will also be used when
 adding the IP alias.
 .It Va ip6.addr
 In addition to the IP addresses that are passed to the kernel,
 an interface, prefix and additional parameters (as supported by
 .Xr ifconfig 8 Ns )
 may also be specified, in the form
 .Dq Ar interface Ns | Ns Ar ip-address Ns / Ns Ar prefix param ... .
 .It Va vnet.interface
 A network interface to give to a vnet-enabled jail after is it created.
 The interface will automatically be released when the jail is removed.
 .It Va ip_hostname
 Resolve the
 .Va host.hostname
 parameter and add all IP addresses returned by the resolver
 to the list of addresses
 .Po Va ip4.addr
 or
 .Va ip6.addr Pc
 for this jail.
 This may affect default address selection for outgoing IPv4 connections
 from jails.
 The address first returned by the resolver for each address family
 will be used as the primary address.
 .It Va mount
 A filesystem to mount before creating the jail (and to unmount after
 removing it), given as a single
 .Xr fstab 5
 line.
 .It Va mount.fstab
 An
 .Xr fstab 5
 format file containing filesystems to mount before creating a jail.
 .It Va mount.devfs
 Mount a
 .Xr devfs 5
 filesystem on the chrooted
 .Pa /dev
 directory, and apply the ruleset in the
 .Va devfs_ruleset
 parameter (or a default of ruleset 4: devfsrules_jail)
 to restrict the devices visible inside the jail.
 .It Va mount.fdescfs
 Mount a
 .Xr fdescfs 5
 filesystem on the chrooted
 .Pa /dev/fd
 directory.
 .It Va mount.procfs
 Mount a
 .Xr procfs 5
 filesystem on the chrooted
 .Pa /proc
 directory.
 .It Va allow.dying
 Allow making changes to a
 .Va dying
 jail.
 .It Va depend
 Specify a jail (or jails) that this jail depends on.
 When this jail is to be created, any jail(s) it depends on must already exist.
 If not, they will be created automatically, up to the completion of the last
 .Va exec.poststart
 command, before any action will taken to create this jail.
 When jails are removed the opposite is true:
 this jail will be removed, up to the last
 .Va exec.poststop
 command, before any jail(s) it depends on are stopped.
 .El
 .Sh EXAMPLES
 Jails are typically set up using one of two philosophies: either to
 constrain a specific application (possibly running with privilege), or
 to create a
 .Dq "virtual system image"
 running a variety of daemons and services.
 In both cases, a fairly complete file system install of
 .Fx
 is
 required, so as to provide the necessary command line tools, daemons,
 libraries, application configuration files, etc.
 However, for a virtual server configuration, a fair amount of
 additional work is required so as to replace the
 .Dq boot
 process.
 This manual page documents the configuration steps necessary to support
 either of these steps, although the configuration steps may need to be
 refined based on local requirements.
 .Ss "Setting up a Jail Directory Tree"
 To set up a jail directory tree containing an entire
 .Fx
 distribution, the following
 .Xr sh 1
 command script can be used:
 .Bd -literal -offset indent
 D=/here/is/the/jail
 cd /usr/src
 mkdir -p $D
 make world DESTDIR=$D
 make distribution DESTDIR=$D
 .Ed
 .Pp
 In many cases this example would put far more in the jail than needed.
 In the other extreme case a jail might contain only one file:
 the executable to be run in the jail.
 .Pp
 We recommend experimentation, and caution that it is a lot easier to
 start with a
 .Dq fat
 jail and remove things until it stops working,
 than it is to start with a
 .Dq thin
 jail and add things until it works.
 .Ss "Setting Up a Jail"
 Do what was described in
 .Sx "Setting Up a Jail Directory Tree"
 to build the jail directory tree.
 For the sake of this example, we will
 assume you built it in
 .Pa /data/jail/testjail ,
 for a jail named
 .Dq testjail .
 Substitute below as needed with your
 own directory, IP address, and hostname.
 .Ss "Setting up the Host Environment"
 First, set up the real system's environment to be
 .Dq jail-friendly .
 For consistency, we will refer to the parent box as the
 .Dq "host environment" ,
 and to the jailed virtual machine as the
 .Dq "jail environment" .
 Since jails are implemented using IP aliases, one of the first things to do
 is to disable IP services on the host system that listen on all local
 IP addresses for a service.
 If a network service is present in the host environment that binds all
 available IP addresses rather than specific IP addresses, it may service
 requests sent to jail IP addresses if the jail did not bind the port.
 This means changing
 .Xr inetd 8
 to only listen on the
 appropriate IP address, and so forth.
 Add the following to
 .Pa /etc/rc.conf
 in the host environment:
 .Bd -literal -offset indent
 sendmail_enable="NO"
 inetd_flags="-wW -a 192.0.2.23"
 rpcbind_enable="NO"
 .Ed
 .Pp
 .Li 192.0.2.23
 is the native IP address for the host system, in this example.
 Daemons that run out of
 .Xr inetd 8
 can be easily configured to use only the specified host IP address.
 Other daemons
 will need to be manually configured \(em for some this is possible through
 .Xr rc.conf 5
 flags entries; for others it is necessary to modify per-application
 configuration files, or to recompile the application.
 The following frequently deployed services must have their individual
 configuration files modified to limit the application to listening
 to a specific IP address:
 .Pp
 To configure
 .Xr sshd 8 ,
 it is necessary to modify
 .Pa /etc/ssh/sshd_config .
 .Pp
 To configure
 .Xr sendmail 8 ,
 it is necessary to modify
 .Pa /etc/mail/sendmail.cf .
 .Pp
 In addition, a number of services must be recompiled in order to run
 them in the host environment.
 This includes most applications providing services using
 .Xr rpc 3 ,
 such as
 .Xr rpcbind 8 ,
 .Xr nfsd 8 ,
 and
 .Xr mountd 8 .
 In general, applications for which it is not possible to specify which
 IP address to bind should not be run in the host environment unless they
 should also service requests sent to jail IP addresses.
 Attempting to serve
 NFS from the host environment may also cause confusion, and cannot be
 easily reconfigured to use only specific IPs, as some NFS services are
 hosted directly from the kernel.
 Any third-party network software running
 in the host environment should also be checked and configured so that it
 does not bind all IP addresses, which would result in those services also
 appearing to be offered by the jail environments.
 .Pp
 Once
 these daemons have been disabled or fixed in the host environment, it is
 best to reboot so that all daemons are in a known state, to reduce the
 potential for confusion later (such as finding that when you send mail
 to a jail, and its sendmail is down, the mail is delivered to the host,
 etc.).
 .Ss "Configuring the Jail"
 Start any jail for the first time without configuring the network
 interface so that you can clean it up a little and set up accounts.
 As
 with any machine (virtual or not), you will need to set a root password, time
 zone, etc.
 Some of these steps apply only if you intend to run a full virtual server
 inside the jail; others apply both for constraining a particular application
 or for running a virtual server.
 .Pp
 Start a shell in the jail:
 .Bd -literal -offset indent
 jail -c path=/data/jail/testjail mount.devfs \\
 	host.hostname=testhostname ip4.addr=192.0.2.100 \\
 	command=/bin/sh
 .Ed
 .Pp
 Assuming no errors, you will end up with a shell prompt within the jail.
 You can now run
 .Xr bsdconfig 8
 and do the post-install configuration to set various configuration options,
 or perform these actions manually by editing
 .Pa /etc/rc.conf ,
 etc.
 .Pp
 .Bl -bullet -offset indent -compact
 .It
 Configure
 .Pa /etc/resolv.conf
 so that name resolution within the jail will work correctly.
 .It
 Run
 .Xr newaliases 1
 to quell
 .Xr sendmail 8
 warnings.
 .It
 Set a root password, probably different from the real host system.
 .It
 Set the timezone.
 .It
 Add accounts for users in the jail environment.
 .It
 Install any packages the environment requires.
 .El
 .Pp
 You may also want to perform any package-specific configuration (web servers,
 SSH servers, etc), patch up
 .Pa /etc/syslog.conf
 so it logs as you would like, etc.
 If you are not using a virtual server, you may wish to modify
 .Xr syslogd 8
 in the host environment to listen on the syslog socket in the jail
 environment; in this example, the syslog socket would be stored in
 .Pa /data/jail/testjail/var/run/log .
 .Pp
 Exit from the shell, and the jail will be shut down.
 .Ss "Starting the Jail"
 You are now ready to restart the jail and bring up the environment with
 all of its daemons and other programs.
 Create an entry for the jail in
 .Pa /etc/jail.conf :
 .Bd -literal -offset indent
 testjail {
 	path = /tmp/jail/testjail;
 	mount.devfs;
 	host.hostname = testhostname;
 	ip4.addr = 192.0.2.100;
 	interface = em0;
 	exec.start = "/bin/sh /etc/rc";
 	exec.stop = "/bin/sh /etc/rc.shutdown jail";
 }
 .Ed
 .Pp
 To start a virtual server environment,
 .Pa /etc/rc
 is run to launch various daemons and services, and
 .Pa /etc/rc.shutdown
 is run to shut them down when the jail is removed.
 If you are running a single application in the jail,
 substitute the command used to start the application for
 .Dq /bin/sh /etc/rc ;
 there may be some script available to cleanly shut down the application,
 or it may be sufficient to go without a stop command, and have
 .Nm
 send
 .Dv SIGTERM
 to the application.
 .Pp
 Start the jail by running:
 .Bd -literal -offset indent
 jail -c testjail
 .Ed
 .Pp
 A few warnings may be produced; however, it should all work properly.
 You should be able to see
 .Xr inetd 8 ,
 .Xr syslogd 8 ,
 and other processes running within the jail using
 .Xr ps 1 ,
 with the
 .Ql J
 flag appearing beside jailed processes.
 To see an active list of jails, use
 .Xr jls 8 .
 If
 .Xr sshd 8
 is enabled in the jail environment, you should be able to
 .Xr ssh 1
 to the hostname or IP address of the jailed environment, and log
 in using the accounts you created previously.
 .Pp
 It is possible to have jails started at boot time.
 Please refer to the
 .Dq jail_*
 variables in
 .Xr rc.conf 5
 for more information.
 .Ss "Managing the Jail"
 Normal machine shutdown commands, such as
 .Xr halt 8 ,
 .Xr reboot 8 ,
 and
 .Xr shutdown 8 ,
 cannot be used successfully within the jail.
 To kill all processes from within a jail, you may use one of the
 following commands, depending on what you want to accomplish:
 .Bd -literal -offset indent
 kill -TERM -1
 kill -KILL -1
 .Ed
 .Pp
 This will send the
 .Dv SIGTERM
 or
 .Dv SIGKILL
 signals to all processes in the jail \(em be careful not to run this from
 the host environment!
 Once all of the jail's processes have died, unless the jail was created
 with the
 .Va persist
 parameter, the jail will be removed.
 Depending on
 the intended use of the jail, you may also want to run
 .Pa /etc/rc.shutdown
 from within the jail.
 .Pp
 To shut down the jail from the outside, simply remove it with:
 .Bd -literal -offset indent
 jail -r
 .Ed
 .Pp
 which will run any commands specified by
 .Va exec.stop ,
 and then send
 .Dv SIGTERM
 and eventually
 .Dv SIGKILL
 to any remaining jailed processes.
 .Pp
 The
 .Pa /proc/ Ns Ar pid Ns Pa /status
 file contains, as its last field, the name of the jail in which the
 process runs, or
 .Dq Li -
 to indicate that the process is not running within a jail.
 The
 .Xr ps 1
 command also shows a
 .Ql J
 flag for processes in a jail.
 .Pp
 You can also list/kill processes based on their jail ID.
 To show processes and their jail ID, use the following command:
 .Pp
 .Dl "ps ax -o pid,jid,args"
 .Pp
 To show and then kill processes in jail number 3 use the following commands:
 .Bd -literal -offset indent
 pgrep -lfj 3
 pkill -j 3
 .Ed
 or:
 .Pp
 .Dl "killall -j 3"
 .Ss "Jails and File Systems"
 It is not possible to
 .Xr mount 8
 or
 .Xr umount 8
 any file system inside a jail unless the file system is marked
 jail-friendly, the jail's
 .Va allow.mount
 parameter is set, and the jail's
 .Va enforce_statfs
 parameter is lower than 2.
 .Pp
 Multiple jails sharing the same file system can influence each other.
 For example, a user in one jail can fill the file system,
 leaving no space for processes in the other jail.
 Trying to use
 .Xr quota 1
 to prevent this will not work either, as the file system quotas
 are not aware of jails but only look at the user and group IDs.
 This means the same user ID in two jails share a single file
 system quota.
 One would need to use one file system per jail to make this work.
 .Ss "Sysctl MIB Entries"
 The read-only entry
 .Va security.jail.jailed
 can be used to determine if a process is running inside a jail (value
 is one) or not (value is zero).
 .Pp
 The variable
 .Va security.jail.max_af_ips
 determines how may address per address family a jail may have.
 The default is 255.
 .Pp
 Some MIB variables have per-jail settings.
 Changes to these variables by a jailed process do not affect the host
 environment, only the jail environment.
 These variables are
 .Va kern.securelevel ,
 .Va security.bsd.suser_enabled ,
 .Va kern.hostname ,
 .Va kern.domainname ,
 .Va kern.hostid ,
 and
 .Va kern.hostuuid .
 .Ss "Hierarchical Jails"
 By setting a jail's
 .Va children.max
 parameter, processes within a jail may be able to create jails of their own.
 These child jails are kept in a hierarchy, with jails only able to see and/or
 modify the jails they created (or those jails' children).
 Each jail has a read-only
 .Va parent
 parameter, containing the
 .Va jid
 of the jail that created it; a
 .Va jid
 of 0 indicates the jail is a child of the current jail (or is a top-level
 jail if the current process isn't jailed).
 .Pp
 Jailed processes are not allowed to confer greater permissions than they
 themselves are given, e.g., if a jail is created with
 .Va allow.nomount ,
 it is not able to create a jail with
 .Va allow.mount
 set.
 Similarly, such restrictions as
 .Va ip4.addr
 and
 .Va securelevel
 may not be bypassed in child jails.
 .Pp
 A child jail may in turn create its own child jails if its own
 .Va children.max
 parameter is set (remember it is zero by default).
 These jails are visible to and can be modified by their parent and all
 ancestors.
 .Pp
 Jail names reflect this hierarchy, with a full name being an MIB-type string
 separated by dots.
 For example, if a base system process creates a jail
 .Dq foo ,
 and a process under that jail creates another jail
 .Dq bar ,
 then the second jail will be seen as
 .Dq foo.bar
 in the base system (though it is only seen as
 .Dq bar
 to any processes inside jail
 .Dq foo ) .
 Jids on the other hand exist in a single space, and each jail must have a
 unique jid.
 .Pp
 Like the names, a child jail's
 .Va path
 appears relative to its creator's own
 .Va path .
 This is by virtue of the child jail being created in the chrooted
 environment of the first jail.
 .Sh SEE ALSO
 .Xr killall 1 ,
 .Xr lsvfs 1 ,
 .Xr newaliases 1 ,
 .Xr pgrep 1 ,
 .Xr pkill 1 ,
 .Xr ps 1 ,
 .Xr quota 1 ,
 .Xr jail_set 2 ,
 .Xr vmm 4 ,
 .Xr devfs 5 ,
 .Xr fdescfs 5 ,
 .Xr jail.conf 5 ,
 .Xr linprocfs 5 ,
 .Xr linsysfs 5 ,
 .Xr procfs 5 ,
 .Xr rc.conf 5 ,
 .Xr sysctl.conf 5 ,
 .Xr bsdconfig 8 ,
 .Xr chroot 8 ,
 .Xr devfs 8 ,
 .Xr halt 8 ,
 .Xr ifconfig 8 ,
 .Xr inetd 8 ,
 .Xr jexec 8 ,
 .Xr jls 8 ,
 .Xr mount 8 ,
 .Xr mountd 8 ,
 .Xr nfsd 8 ,
 .Xr reboot 8 ,
 .Xr rpcbind 8 ,
 .Xr sendmail 8 ,
 .Xr shutdown 8 ,
 .Xr sysctl 8 ,
 .Xr syslogd 8 ,
 .Xr umount 8
 .Sh HISTORY
 The
 .Nm
 utility appeared in
 .Fx 4.0 .
 Hierarchical/extensible jails were introduced in
 .Fx 8.0 .
 The configuration file was introduced in
 .Fx 9.1 .
 .Sh AUTHORS
 .An -nosplit
 The jail feature was written by
 .An Poul-Henning Kamp
 for R&D Associates
 who contributed it to
 .Fx .
 .Pp
 .An Robert Watson
 wrote the extended documentation, found a few bugs, added
 a few new features, and cleaned up the userland jail environment.
 .Pp
 .An Bjoern A. Zeeb
 added multi-IP jail support for IPv4 and IPv6 based on a patch
 originally done by
 .An Pawel Jakub Dawidek
 for IPv4.
 .Pp
 .An James Gritton
 added the extensible jail parameters, hierarchical jails,
 and the configuration file.
 .Sh BUGS
 It might be a good idea to add an
 address alias flag such that daemons listening on all IPs
 .Pq Dv INADDR_ANY
 will not bind on that address, which would facilitate building a safe
 host environment such that host daemons do not impose on services offered
 from within jails.
 Currently, the simplest answer is to minimize services
 offered on the host, possibly limiting it to services offered from
 .Xr inetd 8
 which is easily configurable.
 .Sh NOTES
 Great care should be taken when managing directories visible within the jail.
 For example, if a jailed process has its current working directory set to a
 directory that is moved out of the jail's chroot, then the process may gain
 access to the file space outside of the jail.
 It is recommended that directories always be copied, rather than moved, out
 of a jail.
 .Pp
 In addition, there are several ways in which an unprivileged user
 outside the jail can cooperate with a privileged user inside the jail
 and thereby obtain elevated privileges in the host environment.
 Most of these attacks can be mitigated by ensuring that the jail root
 is not accessible to unprivileged users in the host environment.
 Regardless, as a general rule, untrusted users with privileged access
 to a jail should not be given access to the host environment.