Index: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
===================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	(revision 362157)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	(revision 362158)
@@ -1,2796 +1,2796 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
  * All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sysmacros.h>
 #include <sys/kmem.h>
 #include <sys/acl.h>
 #include <sys/vnode.h>
 #include <sys/vfs.h>
 #include <sys/mntent.h>
 #include <sys/mount.h>
 #include <sys/cmn_err.h>
 #include <sys/zfs_znode.h>
 #include <sys/zfs_dir.h>
 #include <sys/zil.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_deleg.h>
 #include <sys/spa.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
 #include <sys/varargs.h>
 #include <sys/policy.h>
 #include <sys/atomic.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/sunddi.h>
 #include <sys/dnlc.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa_boot.h>
 #include <sys/jail.h>
 #include <ufs/ufs/quota.h>
 
 #include "zfs_comutil.h"
 
 struct mtx zfs_debug_mtx;
 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
 
 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "ZFS file system");
 
 int zfs_super_owner;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
     "File system owner can perform privileged operation on his file systems");
 
 int zfs_debug_level;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
     "Debug level");
 
 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "ZFS versions");
 static int zfs_version_acl = ZFS_ACL_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
     "ZFS_ACL_VERSION");
 static int zfs_version_spa = SPA_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
     "SPA_VERSION");
 static int zfs_version_zpl = ZPL_VERSION;
 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
     "ZPL_VERSION");
 
 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
 static int zfs_mount(vfs_t *vfsp);
 static int zfs_umount(vfs_t *vfsp, int fflag);
 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
 static int zfs_sync(vfs_t *vfsp, int waitfor);
-static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
-    struct ucred **credanonp, int *numsecflavors, int **secflavors);
+static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
+    struct ucred **credanonp, int *numsecflavors, int *secflavors);
 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
 static void zfs_objset_close(zfsvfs_t *zfsvfs);
 static void zfs_freevfs(vfs_t *vfsp);
 
 struct vfsops zfs_vfsops = {
 	.vfs_mount =		zfs_mount,
 	.vfs_unmount =		zfs_umount,
 	.vfs_root =		vfs_cache_root,
 	.vfs_cachedroot =	zfs_root,
 	.vfs_statfs =		zfs_statfs,
 	.vfs_vget =		zfs_vget,
 	.vfs_sync =		zfs_sync,
 	.vfs_checkexp =		zfs_checkexp,
 	.vfs_fhtovp =		zfs_fhtovp,
 	.vfs_quotactl =		zfs_quotactl,
 };
 
 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
 
 /*
  * We need to keep a count of active fs's.
  * This is necessary to prevent our module
  * from being unloaded after a umount -f
  */
 static uint32_t	zfs_active_fs_count = 0;
 
 static int
 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
 {
 	int error = 0;
 	char buf[32];
 	int err;
 	uint64_t usedobj, quotaobj;
 	uint64_t quota, used = 0;
 	timespec_t now;
 	
 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
 
 	if (quotaobj == 0 || zfsvfs->z_replay) {
 		error = EINVAL;
 		goto done;
 	}
 	(void)sprintf(buf, "%llx", (longlong_t)id);
 	if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
 				buf, sizeof(quota), 1, &quota)) != 0) {
 		dprintf("%s(%d): quotaobj lookup failed\n", __FUNCTION__, __LINE__);
 		goto done;
 	}
 	/*
 	 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
 	 * So we set them to be the same.
 	 */
 	dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
 	error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof(used), 1, &used);
 	if (error && error != ENOENT) {
 		dprintf("%s(%d):  usedobj failed; %d\n", __FUNCTION__, __LINE__, error);
 		goto done;
 	}
 	dqp->dqb_curblocks = btodb(used);
 	dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
 	vfs_timestamp(&now);
 	/*
 	 * Setting this to 0 causes FreeBSD quota(8) to print
 	 * the number of days since the epoch, which isn't
 	 * particularly useful.
 	 */
 	dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
 done:
 	return (error);
 }
 
 static int
 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	struct thread *td;
 	int cmd, type, error = 0;
 	int bitsize;
 	uint64_t fuid;
 	zfs_userquota_prop_t quota_type;
 	struct dqblk64 dqblk = { 0 };
 	
 	td = curthread;
 	cmd = cmds >> SUBCMDSHIFT;
 	type = cmds & SUBCMDMASK;
 
 	ZFS_ENTER(zfsvfs);
 	if (id == -1) {
 		switch (type) {
 		case USRQUOTA:
 			id = td->td_ucred->cr_ruid;
 			break;
 		case GRPQUOTA:
 			id = td->td_ucred->cr_rgid;
 			break;
 		default:
 			error = EINVAL;
 			if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
 				vfs_unbusy(vfsp);
 			goto done;
 		}
 	}
 	/*
 	 * Map BSD type to:
 	 * ZFS_PROP_USERUSED,
 	 * ZFS_PROP_USERQUOTA,
 	 * ZFS_PROP_GROUPUSED,
 	 * ZFS_PROP_GROUPQUOTA
 	 */
 	switch (cmd) {
 	case Q_SETQUOTA:
 	case Q_SETQUOTA32:
 		if (type == USRQUOTA)
 			quota_type = ZFS_PROP_USERQUOTA;
 		else if (type == GRPQUOTA)
 			quota_type = ZFS_PROP_GROUPQUOTA;
 		else
 			error = EINVAL;
 		break;
 	case Q_GETQUOTA:
 	case Q_GETQUOTA32:
 		if (type == USRQUOTA)
 			quota_type = ZFS_PROP_USERUSED;
 		else if (type == GRPQUOTA)
 			quota_type = ZFS_PROP_GROUPUSED;
 		else
 			error = EINVAL;
 		break;
 	}
 
 	/*
 	 * Depending on the cmd, we may need to get
 	 * the ruid and domain (see fuidstr_to_sid?),
 	 * the fuid (how?), or other information.
 	 * Create fuid using zfs_fuid_create(zfsvfs, id,
 	 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
 	 * I think I can use just the id?
 	 *
 	 * Look at zfs_fuid_overquota() to look up a quota.
 	 * zap_lookup(something, quotaobj, fuidstring, sizeof(long long), 1, &quota)
 	 *
 	 * See zfs_set_userquota() to set a quota.
 	 */
 	if ((u_int)type >= MAXQUOTAS) {
 		error = EINVAL;
 		goto done;
 	}
 
 	switch (cmd) {
 	case Q_GETQUOTASIZE:
 		bitsize = 64;
 		error = copyout(&bitsize, arg, sizeof(int));
 		break;
 	case Q_QUOTAON:
 		// As far as I can tell, you can't turn quotas on or off on zfs
 		error = 0;
 		vfs_unbusy(vfsp);
 		break;
 	case Q_QUOTAOFF:
 		error = ENOTSUP;
 		vfs_unbusy(vfsp);
 		break;
 	case Q_SETQUOTA:
 		error = copyin(arg, &dqblk, sizeof(dqblk));
 		if (error == 0)
 			error = zfs_set_userquota(zfsvfs, quota_type,
 						  "", id, dbtob(dqblk.dqb_bhardlimit));
 		break;
 	case Q_GETQUOTA:
 		error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
 		if (error == 0)
 			error = copyout(&dqblk, arg, sizeof(dqblk));
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 done:
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*ARGSUSED*/
 static int
 zfs_sync(vfs_t *vfsp, int waitfor)
 {
 
 	/*
 	 * Data integrity is job one.  We don't want a compromised kernel
 	 * writing to the storage pool, so we never sync during panic.
 	 */
 	if (KERNEL_PANICKED())
 		return (0);
 
 	/*
 	 * Ignore the system syncher.  ZFS already commits async data
 	 * at zfs_txg_timeout intervals.
 	 */
 	if (waitfor == MNT_LAZY)
 		return (0);
 
 	if (vfsp != NULL) {
 		/*
 		 * Sync a specific filesystem.
 		 */
 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
 		dsl_pool_t *dp;
 		int error;
 
 		error = vfs_stdsync(vfsp, waitfor);
 		if (error != 0)
 			return (error);
 
 		ZFS_ENTER(zfsvfs);
 		dp = dmu_objset_pool(zfsvfs->z_os);
 
 		/*
 		 * If the system is shutting down, then skip any
 		 * filesystems which may exist on a suspended pool.
 		 */
 		if (sys_shutdown && spa_suspended(dp->dp_spa)) {
 			ZFS_EXIT(zfsvfs);
 			return (0);
 		}
 
 		if (zfsvfs->z_log != NULL)
 			zil_commit(zfsvfs->z_log, 0);
 
 		ZFS_EXIT(zfsvfs);
 	} else {
 		/*
 		 * Sync all ZFS filesystems.  This is what happens when you
 		 * run sync(1M).  Unlike other filesystems, ZFS honors the
 		 * request by waiting for all pools to commit all dirty data.
 		 */
 		spa_sync_allpools();
 	}
 
 	return (0);
 }
 
 #ifndef __FreeBSD_kernel__
 static int
 zfs_create_unique_device(dev_t *dev)
 {
 	major_t new_major;
 
 	do {
 		ASSERT3U(zfs_minor, <=, MAXMIN32);
 		minor_t start = zfs_minor;
 		do {
 			mutex_enter(&zfs_dev_mtx);
 			if (zfs_minor >= MAXMIN32) {
 				/*
 				 * If we're still using the real major
 				 * keep out of /dev/zfs and /dev/zvol minor
 				 * number space.  If we're using a getudev()'ed
 				 * major number, we can use all of its minors.
 				 */
 				if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
 					zfs_minor = ZFS_MIN_MINOR;
 				else
 					zfs_minor = 0;
 			} else {
 				zfs_minor++;
 			}
 			*dev = makedevice(zfs_major, zfs_minor);
 			mutex_exit(&zfs_dev_mtx);
 		} while (vfs_devismounted(*dev) && zfs_minor != start);
 		if (zfs_minor == start) {
 			/*
 			 * We are using all ~262,000 minor numbers for the
 			 * current major number.  Create a new major number.
 			 */
 			if ((new_major = getudev()) == (major_t)-1) {
 				cmn_err(CE_WARN,
 				    "zfs_mount: Can't get unique major "
 				    "device number.");
 				return (-1);
 			}
 			mutex_enter(&zfs_dev_mtx);
 			zfs_major = new_major;
 			zfs_minor = 0;
 
 			mutex_exit(&zfs_dev_mtx);
 		} else {
 			break;
 		}
 		/* CONSTANTCONDITION */
 	} while (1);
 
 	return (0);
 }
 #endif	/* !__FreeBSD_kernel__ */
 
 static void
 atime_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == TRUE) {
 		zfsvfs->z_atime = TRUE;
 		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
 	} else {
 		zfsvfs->z_atime = FALSE;
 		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
 	}
 }
 
 static void
 xattr_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == TRUE) {
 		/* XXX locking on vfs_flag? */
 #ifdef TODO
 		zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
 #endif
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
 	} else {
 		/* XXX locking on vfs_flag? */
 #ifdef TODO
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
 #endif
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
 	}
 }
 
 static void
 blksz_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
 	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
 	ASSERT(ISP2(newval));
 
 	zfsvfs->z_max_blksz = newval;
 	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
 }
 
 static void
 readonly_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval) {
 		/* XXX locking on vfs_flag? */
 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
 	} else {
 		/* XXX locking on vfs_flag? */
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
 	}
 }
 
 static void
 setuid_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == FALSE) {
 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
 	} else {
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
 	}
 }
 
 static void
 exec_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	if (newval == FALSE) {
 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
 	} else {
 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
 	}
 }
 
 /*
  * The nbmand mount option can be changed at mount time.
  * We can't allow it to be toggled on live file systems or incorrect
  * behavior may be seen from cifs clients
  *
  * This property isn't registered via dsl_prop_register(), but this callback
  * will be called when a file system is first mounted
  */
 static void
 nbmand_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 	if (newval == FALSE) {
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
 	} else {
 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
 	}
 }
 
 static void
 snapdir_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_show_ctldir = newval;
 }
 
 static void
 vscan_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_vscan = newval;
 }
 
 static void
 acl_mode_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_acl_mode = newval;
 }
 
 static void
 acl_inherit_changed_cb(void *arg, uint64_t newval)
 {
 	zfsvfs_t *zfsvfs = arg;
 
 	zfsvfs->z_acl_inherit = newval;
 }
 
 static int
 zfs_register_callbacks(vfs_t *vfsp)
 {
 	struct dsl_dataset *ds = NULL;
 	objset_t *os = NULL;
 	zfsvfs_t *zfsvfs = NULL;
 	uint64_t nbmand;
 	boolean_t readonly = B_FALSE;
 	boolean_t do_readonly = B_FALSE;
 	boolean_t setuid = B_FALSE;
 	boolean_t do_setuid = B_FALSE;
 	boolean_t exec = B_FALSE;
 	boolean_t do_exec = B_FALSE;
 #ifdef illumos
 	boolean_t devices = B_FALSE;
 	boolean_t do_devices = B_FALSE;
 #endif
 	boolean_t xattr = B_FALSE;
 	boolean_t do_xattr = B_FALSE;
 	boolean_t atime = B_FALSE;
 	boolean_t do_atime = B_FALSE;
 	int error = 0;
 
 	ASSERT(vfsp);
 	zfsvfs = vfsp->vfs_data;
 	ASSERT(zfsvfs);
 	os = zfsvfs->z_os;
 
 	/*
 	 * This function can be called for a snapshot when we update snapshot's
 	 * mount point, which isn't really supported.
 	 */
 	if (dmu_objset_is_snapshot(os))
 		return (EOPNOTSUPP);
 
 	/*
 	 * The act of registering our callbacks will destroy any mount
 	 * options we may have.  In order to enable temporary overrides
 	 * of mount options, we stash away the current values and
 	 * restore them after we register the callbacks.
 	 */
 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
 	    !spa_writeable(dmu_objset_spa(os))) {
 		readonly = B_TRUE;
 		do_readonly = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
 		readonly = B_FALSE;
 		do_readonly = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
 		setuid = B_FALSE;
 		do_setuid = B_TRUE;
 	} else {
 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
 			setuid = B_FALSE;
 			do_setuid = B_TRUE;
 		} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
 			setuid = B_TRUE;
 			do_setuid = B_TRUE;
 		}
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
 		exec = B_FALSE;
 		do_exec = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
 		exec = B_TRUE;
 		do_exec = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
 		xattr = B_FALSE;
 		do_xattr = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
 		xattr = B_TRUE;
 		do_xattr = B_TRUE;
 	}
 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
 		atime = B_FALSE;
 		do_atime = B_TRUE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
 		atime = B_TRUE;
 		do_atime = B_TRUE;
 	}
 
 	/*
 	 * We need to enter pool configuration here, so that we can use
 	 * dsl_prop_get_int_ds() to handle the special nbmand property below.
 	 * dsl_prop_get_integer() can not be used, because it has to acquire
 	 * spa_namespace_lock and we can not do that because we already hold
 	 * z_teardown_lock.  The problem is that spa_write_cachefile() is called
 	 * with spa_namespace_lock held and the function calls ZFS vnode
 	 * operations to write the cache file and thus z_teardown_lock is
 	 * acquired after spa_namespace_lock.
 	 */
 	ds = dmu_objset_ds(os);
 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
 
 	/*
 	 * nbmand is a special property.  It can only be changed at
 	 * mount time.
 	 *
 	 * This is weird, but it is documented to only be changeable
 	 * at mount time.
 	 */
 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
 		nbmand = B_FALSE;
 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
 		nbmand = B_TRUE;
 	} else if (error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0) {
 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 		return (error);
 	}
 
 	/*
 	 * Register property callbacks.
 	 *
 	 * It would probably be fine to just check for i/o error from
 	 * the first prop_register(), but I guess I like to go
 	 * overboard...
 	 */
 	error = dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
 #ifdef illumos
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
 #endif
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
 	    zfsvfs);
 	error = error ? error : dsl_prop_register(ds,
 	    zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
 	if (error)
 		goto unregister;
 
 	/*
 	 * Invoke our callbacks to restore temporary mount options.
 	 */
 	if (do_readonly)
 		readonly_changed_cb(zfsvfs, readonly);
 	if (do_setuid)
 		setuid_changed_cb(zfsvfs, setuid);
 	if (do_exec)
 		exec_changed_cb(zfsvfs, exec);
 	if (do_xattr)
 		xattr_changed_cb(zfsvfs, xattr);
 	if (do_atime)
 		atime_changed_cb(zfsvfs, atime);
 
 	nbmand_changed_cb(zfsvfs, nbmand);
 
 	return (0);
 
 unregister:
 	dsl_prop_unregister_all(ds, zfsvfs);
 	return (error);
 }
 
 static int
 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
     uint64_t *userp, uint64_t *groupp)
 {
 	/*
 	 * Is it a valid type of object to track?
 	 */
 	if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
 		return (SET_ERROR(ENOENT));
 
 	/*
 	 * If we have a NULL data pointer
 	 * then assume the id's aren't changing and
 	 * return EEXIST to the dmu to let it know to
 	 * use the same ids
 	 */
 	if (data == NULL)
 		return (SET_ERROR(EEXIST));
 
 	if (bonustype == DMU_OT_ZNODE) {
 		znode_phys_t *znp = data;
 		*userp = znp->zp_uid;
 		*groupp = znp->zp_gid;
 	} else {
 		int hdrsize;
 		sa_hdr_phys_t *sap = data;
 		sa_hdr_phys_t sa = *sap;
 		boolean_t swap = B_FALSE;
 
 		ASSERT(bonustype == DMU_OT_SA);
 
 		if (sa.sa_magic == 0) {
 			/*
 			 * This should only happen for newly created
 			 * files that haven't had the znode data filled
 			 * in yet.
 			 */
 			*userp = 0;
 			*groupp = 0;
 			return (0);
 		}
 		if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
 			sa.sa_magic = SA_MAGIC;
 			sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
 			swap = B_TRUE;
 		} else {
 			VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
 		}
 
 		hdrsize = sa_hdrsize(&sa);
 		VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
 		*userp = *((uint64_t *)((uintptr_t)data + hdrsize +
 		    SA_UID_OFFSET));
 		*groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
 		    SA_GID_OFFSET));
 		if (swap) {
 			*userp = BSWAP_64(*userp);
 			*groupp = BSWAP_64(*groupp);
 		}
 	}
 	return (0);
 }
 
 static void
 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
     char *domainbuf, int buflen, uid_t *ridp)
 {
 	uint64_t fuid;
 	const char *domain;
 
 	fuid = zfs_strtonum(fuidstr, NULL);
 
 	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
 	if (domain)
 		(void) strlcpy(domainbuf, domain, buflen);
 	else
 		domainbuf[0] = '\0';
 	*ridp = FUID_RID(fuid);
 }
 
 static uint64_t
 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
 {
 	switch (type) {
 	case ZFS_PROP_USERUSED:
 		return (DMU_USERUSED_OBJECT);
 	case ZFS_PROP_GROUPUSED:
 		return (DMU_GROUPUSED_OBJECT);
 	case ZFS_PROP_USERQUOTA:
 		return (zfsvfs->z_userquota_obj);
 	case ZFS_PROP_GROUPQUOTA:
 		return (zfsvfs->z_groupquota_obj);
 	}
 	return (0);
 }
 
 int
 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
 {
 	int error;
 	zap_cursor_t zc;
 	zap_attribute_t za;
 	zfs_useracct_t *buf = vbuf;
 	uint64_t obj;
 
 	if (!dmu_objset_userspace_present(zfsvfs->z_os))
 		return (SET_ERROR(ENOTSUP));
 
 	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
 	if (obj == 0) {
 		*bufsizep = 0;
 		return (0);
 	}
 
 	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
 	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
 	    zap_cursor_advance(&zc)) {
 		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
 		    *bufsizep)
 			break;
 
 		fuidstr_to_sid(zfsvfs, za.za_name,
 		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
 
 		buf->zu_space = za.za_first_integer;
 		buf++;
 	}
 	if (error == ENOENT)
 		error = 0;
 
 	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
 	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
 	*cookiep = zap_cursor_serialize(&zc);
 	zap_cursor_fini(&zc);
 	return (error);
 }
 
 /*
  * buf must be big enough (eg, 32 bytes)
  */
 static int
 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
     char *buf, boolean_t addok)
 {
 	uint64_t fuid;
 	int domainid = 0;
 
 	if (domain && domain[0]) {
 		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
 		if (domainid == -1)
 			return (SET_ERROR(ENOENT));
 	}
 	fuid = FUID_ENCODE(domainid, rid);
 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
 	return (0);
 }
 
 int
 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t *valp)
 {
 	char buf[32];
 	int err;
 	uint64_t obj;
 
 	*valp = 0;
 
 	if (!dmu_objset_userspace_present(zfsvfs->z_os))
 		return (SET_ERROR(ENOTSUP));
 
 	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
 	if (obj == 0)
 		return (0);
 
 	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
 	if (err)
 		return (err);
 
 	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
 	if (err == ENOENT)
 		err = 0;
 	return (err);
 }
 
 int
 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
     const char *domain, uint64_t rid, uint64_t quota)
 {
 	char buf[32];
 	int err;
 	dmu_tx_t *tx;
 	uint64_t *objp;
 	boolean_t fuid_dirtied;
 
 	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
 		return (SET_ERROR(EINVAL));
 
 	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
 		return (SET_ERROR(ENOTSUP));
 
 	objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
 	    &zfsvfs->z_groupquota_obj;
 
 	err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
 	if (err)
 		return (err);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
 	if (*objp == 0) {
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 		    zfs_userquota_prop_prefixes[type]);
 	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err) {
 		dmu_tx_abort(tx);
 		return (err);
 	}
 
 	mutex_enter(&zfsvfs->z_lock);
 	if (*objp == 0) {
 		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
 		    DMU_OT_NONE, 0, tx);
 		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
 		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
 	}
 	mutex_exit(&zfsvfs->z_lock);
 
 	if (quota == 0) {
 		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
 		if (err == ENOENT)
 			err = 0;
 	} else {
 		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
 	}
 	ASSERT(err == 0);
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 	dmu_tx_commit(tx);
 	return (err);
 }
 
 boolean_t
 zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
 {
 	char buf[32];
 	uint64_t used, quota, usedobj, quotaobj;
 	int err;
 
 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
 
 	if (quotaobj == 0 || zfsvfs->z_replay)
 		return (B_FALSE);
 
 	(void) sprintf(buf, "%llx", (longlong_t)fuid);
 	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
 	if (err != 0)
 		return (B_FALSE);
 
 	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
 	if (err != 0)
 		return (B_FALSE);
 	return (used >= quota);
 }
 
 boolean_t
 zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
 {
 	uint64_t fuid;
 	uint64_t quotaobj;
 
 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
 
 	fuid = isgroup ? zp->z_gid : zp->z_uid;
 
 	if (quotaobj == 0 || zfsvfs->z_replay)
 		return (B_FALSE);
 
 	return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
 }
 
 /*
  * Associate this zfsvfs with the given objset, which must be owned.
  * This will cache a bunch of on-disk state from the objset in the
  * zfsvfs.
  */
 static int
 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
 {
 	int error;
 	uint64_t val;
 
 	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
 	zfsvfs->z_os = os;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
 	if (error != 0)
 		return (error);
 	if (zfsvfs->z_version >
 	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
 		(void) printf("Can't mount a version %lld file system "
 		    "on a version %lld pool\n. Pool must be upgraded to mount "
 		    "this file system.", (u_longlong_t)zfsvfs->z_version,
 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
 		return (SET_ERROR(ENOTSUP));
 	}
 	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_norm = (int)val;
 
 	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_utf8 = (val != 0);
 
 	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
 	if (error != 0)
 		return (error);
 	zfsvfs->z_case = (uint_t)val;
 
 	/*
 	 * Fold case on file systems that are always or sometimes case
 	 * insensitive.
 	 */
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
 	    zfsvfs->z_case == ZFS_CASE_MIXED)
 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
 
 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 
 	uint64_t sa_obj = 0;
 	if (zfsvfs->z_use_sa) {
 		/* should either have both of these objects or none */
 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
 		    &sa_obj);
 		if (error != 0)
 			return (error);
 	}
 
 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
 	    &zfsvfs->z_attr_table);
 	if (error != 0)
 		return (error);
 
 	if (zfsvfs->z_version >= ZPL_VERSION_SA)
 		sa_register_update_callback(os, zfs_sa_upgrade);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
 	    &zfsvfs->z_root);
 	if (error != 0)
 		return (error);
 	ASSERT(zfsvfs->z_root != 0);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
 	    &zfsvfs->z_unlinkedobj);
 	if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
 	    8, 1, &zfsvfs->z_userquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_userquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ,
 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
 	    8, 1, &zfsvfs->z_groupquota_obj);
 	if (error == ENOENT)
 		zfsvfs->z_groupquota_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
 	    &zfsvfs->z_fuid_obj);
 	if (error == ENOENT)
 		zfsvfs->z_fuid_obj = 0;
 	else if (error != 0)
 		return (error);
 
 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
 	    &zfsvfs->z_shares_dir);
 	if (error == ENOENT)
 		zfsvfs->z_shares_dir = 0;
 	else if (error != 0)
 		return (error);
 
 	/*
 	 * Only use the name cache if we are looking for a
 	 * name on a file system that does not require normalization
 	 * or case folding.  We can also look there if we happen to be
 	 * on a non-normalizing, mixed sensitivity file system IF we
 	 * are looking for the exact name (which is always the case on
 	 * FreeBSD).
 	 */
 	zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
 	    ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
 	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
 
 	return (0);
 }
 
 #if defined(__FreeBSD__)
 taskq_t *zfsvfs_taskq;
 
 static void
 zfsvfs_task_unlinked_drain(void *context, int pending __unused)
 {
 
 	zfs_unlinked_drain((zfsvfs_t *)context);
 }
 #endif
 
 int
 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
 {
 	objset_t *os;
 	zfsvfs_t *zfsvfs;
 	int error;
 
 	/*
 	 * XXX: Fix struct statfs so this isn't necessary!
 	 *
 	 * The 'osname' is used as the filesystem's special node, which means
 	 * it must fit in statfs.f_mntfromname, or else it can't be
 	 * enumerated, so libzfs_mnttab_find() returns NULL, which causes
 	 * 'zfs unmount' to think it's not mounted when it is.
 	 */
 	if (strlen(osname) >= MNAMELEN)
 		return (SET_ERROR(ENAMETOOLONG));
 
 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
 
 	/*
 	 * We claim to always be readonly so we can open snapshots;
 	 * other ZPL code will prevent us from writing to snapshots.
 	 */
 
 	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
 	if (error != 0) {
 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
 		return (error);
 	}
 
 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
 	if (error != 0) {
 		dmu_objset_disown(os, zfsvfs);
 	}
 	return (error);
 }
 
 
 int
 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
 {
 	int error;
 
 	zfsvfs->z_vfs = NULL;
 	zfsvfs->z_parent = zfsvfs;
 
 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
 	    offsetof(znode_t, z_link_node));
 #if defined(__FreeBSD__)
 	TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
 	    zfsvfs_task_unlinked_drain, zfsvfs);
 #endif
 #ifdef DIAGNOSTIC
 	rrm_init(&zfsvfs->z_teardown_lock, B_TRUE);
 #else
 	rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
 #endif
 	rms_init(&zfsvfs->z_teardown_inactive_lock, "zfs teardown inactive");
 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
 	for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
 
 	error = zfsvfs_init(zfsvfs, os);
 	if (error != 0) {
 		*zfvp = NULL;
 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
 		return (error);
 	}
 
 	*zfvp = zfsvfs;
 	return (0);
 }
 
 static int
 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
 {
 	int error;
 
 	error = zfs_register_callbacks(zfsvfs->z_vfs);
 	if (error)
 		return (error);
 
 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
 
 	/*
 	 * If we are not mounting (ie: online recv), then we don't
 	 * have to worry about replaying the log as we blocked all
 	 * operations out since we closed the ZIL.
 	 */
 	if (mounting) {
 		boolean_t readonly;
 
 		/*
 		 * During replay we remove the read only flag to
 		 * allow replays to succeed.
 		 */
 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
 		if (readonly != 0)
 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
 		else
 			zfs_unlinked_drain(zfsvfs);
 
 		/*
 		 * Parse and replay the intent log.
 		 *
 		 * Because of ziltest, this must be done after
 		 * zfs_unlinked_drain().  (Further note: ziltest
 		 * doesn't use readonly mounts, where
 		 * zfs_unlinked_drain() isn't called.)  This is because
 		 * ziltest causes spa_sync() to think it's committed,
 		 * but actually it is not, so the intent log contains
 		 * many txg's worth of changes.
 		 *
 		 * In particular, if object N is in the unlinked set in
 		 * the last txg to actually sync, then it could be
 		 * actually freed in a later txg and then reallocated
 		 * in a yet later txg.  This would write a "create
 		 * object N" record to the intent log.  Normally, this
 		 * would be fine because the spa_sync() would have
 		 * written out the fact that object N is free, before
 		 * we could write the "create object N" intent log
 		 * record.
 		 *
 		 * But when we are in ziltest mode, we advance the "open
 		 * txg" without actually spa_sync()-ing the changes to
 		 * disk.  So we would see that object N is still
 		 * allocated and in the unlinked set, and there is an
 		 * intent log record saying to allocate it.
 		 */
 		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
 			if (zil_replay_disable) {
 				zil_destroy(zfsvfs->z_log, B_FALSE);
 			} else {
 				zfsvfs->z_replay = B_TRUE;
 				zil_replay(zfsvfs->z_os, zfsvfs,
 				    zfs_replay_vector);
 				zfsvfs->z_replay = B_FALSE;
 			}
 		}
 		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
 	}
 
 	/*
 	 * Set the objset user_ptr to track its zfsvfs.
 	 */
 	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
 	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 
 	return (0);
 }
 
 extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
 
 void
 zfsvfs_free(zfsvfs_t *zfsvfs)
 {
 	int i;
 
 	/*
 	 * This is a barrier to prevent the filesystem from going away in
 	 * zfs_znode_move() until we can safely ensure that the filesystem is
 	 * not unmounted. We consider the filesystem valid before the barrier
 	 * and invalid after the barrier.
 	 */
 	rw_enter(&zfsvfs_lock, RW_READER);
 	rw_exit(&zfsvfs_lock);
 
 	zfs_fuid_destroy(zfsvfs);
 
 	mutex_destroy(&zfsvfs->z_znodes_lock);
 	mutex_destroy(&zfsvfs->z_lock);
 	list_destroy(&zfsvfs->z_all_znodes);
 	rrm_destroy(&zfsvfs->z_teardown_lock);
 	rms_destroy(&zfsvfs->z_teardown_inactive_lock);
 	rw_destroy(&zfsvfs->z_fuid_lock);
 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
 }
 
 static void
 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
 {
 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
 	if (zfsvfs->z_vfs) {
 		if (zfsvfs->z_use_fuids) {
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
 		} else {
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
 		}
 	}
 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
 }
 
 static int
 zfs_domount(vfs_t *vfsp, char *osname)
 {
 	uint64_t recordsize, fsid_guid;
 	int error = 0;
 	zfsvfs_t *zfsvfs;
 	vnode_t *vp;
 
 	ASSERT(vfsp);
 	ASSERT(osname);
 
 	error = zfsvfs_create(osname, &zfsvfs);
 	if (error)
 		return (error);
 	zfsvfs->z_vfs = vfsp;
 
 #ifdef illumos
 	/* Initialize the generic filesystem structure. */
 	vfsp->vfs_bcount = 0;
 	vfsp->vfs_data = NULL;
 
 	if (zfs_create_unique_device(&mount_dev) == -1) {
 		error = SET_ERROR(ENODEV);
 		goto out;
 	}
 	ASSERT(vfs_devismounted(mount_dev) == 0);
 #endif
 
 	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
 	    NULL))
 		goto out;
 	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
 	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
 
 	vfsp->vfs_data = zfsvfs;
 	vfsp->mnt_flag |= MNT_LOCAL;
 	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
 	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
 	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
 	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
 	vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
 	vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
 
 	/*
 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
 	 * separates our fsid from any other filesystem types, and a
 	 * 56-bit objset unique ID.  The objset unique ID is unique to
 	 * all objsets open on this system, provided by unique_create().
 	 * The 8-bit fs type must be put in the low bits of fsid[1]
 	 * because that's where other Solaris filesystems put it.
 	 */
 	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
 	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
 	vfsp->vfs_fsid.val[0] = fsid_guid;
 	vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
 	    vfsp->mnt_vfc->vfc_typenum & 0xFF;
 
 	/*
 	 * Set features for file system.
 	 */
 	zfs_set_fuid_feature(zfsvfs);
 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
 		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
 	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
 	}
 	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
 
 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
 		uint64_t pval;
 
 		atime_changed_cb(zfsvfs, B_FALSE);
 		readonly_changed_cb(zfsvfs, B_TRUE);
 		if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
 			goto out;
 		xattr_changed_cb(zfsvfs, pval);
 		zfsvfs->z_issnap = B_TRUE;
 		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
 
 		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
 		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
 	} else {
 		error = zfsvfs_setup(zfsvfs, B_TRUE);
 	}
 
 	vfs_mountedfrom(vfsp, osname);
 
 	if (!zfsvfs->z_issnap)
 		zfsctl_create(zfsvfs);
 out:
 	if (error) {
 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
 		zfsvfs_free(zfsvfs);
 	} else {
 		atomic_inc_32(&zfs_active_fs_count);
 	}
 
 	return (error);
 }
 
 void
 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
 {
 	objset_t *os = zfsvfs->z_os;
 
 	if (!dmu_objset_is_snapshot(os))
 		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
 }
 
 #ifdef SECLABEL
 /*
  * Convert a decimal digit string to a uint64_t integer.
  */
 static int
 str_to_uint64(char *str, uint64_t *objnum)
 {
 	uint64_t num = 0;
 
 	while (*str) {
 		if (*str < '0' || *str > '9')
 			return (SET_ERROR(EINVAL));
 
 		num = num*10 + *str++ - '0';
 	}
 
 	*objnum = num;
 	return (0);
 }
 
 /*
  * The boot path passed from the boot loader is in the form of
  * "rootpool-name/root-filesystem-object-number'. Convert this
  * string to a dataset name: "rootpool-name/root-filesystem-name".
  */
 static int
 zfs_parse_bootfs(char *bpath, char *outpath)
 {
 	char *slashp;
 	uint64_t objnum;
 	int error;
 
 	if (*bpath == 0 || *bpath == '/')
 		return (SET_ERROR(EINVAL));
 
 	(void) strcpy(outpath, bpath);
 
 	slashp = strchr(bpath, '/');
 
 	/* if no '/', just return the pool name */
 	if (slashp == NULL) {
 		return (0);
 	}
 
 	/* if not a number, just return the root dataset name */
 	if (str_to_uint64(slashp+1, &objnum)) {
 		return (0);
 	}
 
 	*slashp = '\0';
 	error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
 	*slashp = '/';
 
 	return (error);
 }
 
 /*
  * Check that the hex label string is appropriate for the dataset being
  * mounted into the global_zone proper.
  *
  * Return an error if the hex label string is not default or
  * admin_low/admin_high.  For admin_low labels, the corresponding
  * dataset must be readonly.
  */
 int
 zfs_check_global_label(const char *dsname, const char *hexsl)
 {
 	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
 		return (0);
 	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
 		return (0);
 	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
 		/* must be readonly */
 		uint64_t rdonly;
 
 		if (dsl_prop_get_integer(dsname,
 		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
 			return (SET_ERROR(EACCES));
 		return (rdonly ? 0 : EACCES);
 	}
 	return (SET_ERROR(EACCES));
 }
 
 /*
  * Determine whether the mount is allowed according to MAC check.
  * by comparing (where appropriate) label of the dataset against
  * the label of the zone being mounted into.  If the dataset has
  * no label, create one.
  *
  * Returns 0 if access allowed, error otherwise (e.g. EACCES)
  */
 static int
 zfs_mount_label_policy(vfs_t *vfsp, char *osname)
 {
 	int		error, retv;
 	zone_t		*mntzone = NULL;
 	ts_label_t	*mnt_tsl;
 	bslabel_t	*mnt_sl;
 	bslabel_t	ds_sl;
 	char		ds_hexsl[MAXNAMELEN];
 
 	retv = EACCES;				/* assume the worst */
 
 	/*
 	 * Start by getting the dataset label if it exists.
 	 */
 	error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 	if (error)
 		return (SET_ERROR(EACCES));
 
 	/*
 	 * If labeling is NOT enabled, then disallow the mount of datasets
 	 * which have a non-default label already.  No other label checks
 	 * are needed.
 	 */
 	if (!is_system_labeled()) {
 		if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
 			return (0);
 		return (SET_ERROR(EACCES));
 	}
 
 	/*
 	 * Get the label of the mountpoint.  If mounting into the global
 	 * zone (i.e. mountpoint is not within an active zone and the
 	 * zoned property is off), the label must be default or
 	 * admin_low/admin_high only; no other checks are needed.
 	 */
 	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
 	if (mntzone->zone_id == GLOBAL_ZONEID) {
 		uint64_t zoned;
 
 		zone_rele(mntzone);
 
 		if (dsl_prop_get_integer(osname,
 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 			return (SET_ERROR(EACCES));
 		if (!zoned)
 			return (zfs_check_global_label(osname, ds_hexsl));
 		else
 			/*
 			 * This is the case of a zone dataset being mounted
 			 * initially, before the zone has been fully created;
 			 * allow this mount into global zone.
 			 */
 			return (0);
 	}
 
 	mnt_tsl = mntzone->zone_slabel;
 	ASSERT(mnt_tsl != NULL);
 	label_hold(mnt_tsl);
 	mnt_sl = label2bslabel(mnt_tsl);
 
 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
 		/*
 		 * The dataset doesn't have a real label, so fabricate one.
 		 */
 		char *str = NULL;
 
 		if (l_to_str_internal(mnt_sl, &str) == 0 &&
 		    dsl_prop_set_string(osname,
 		    zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 		    ZPROP_SRC_LOCAL, str) == 0)
 			retv = 0;
 		if (str != NULL)
 			kmem_free(str, strlen(str) + 1);
 	} else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
 		/*
 		 * Now compare labels to complete the MAC check.  If the
 		 * labels are equal then allow access.  If the mountpoint
 		 * label dominates the dataset label, allow readonly access.
 		 * Otherwise, access is denied.
 		 */
 		if (blequal(mnt_sl, &ds_sl))
 			retv = 0;
 		else if (bldominates(mnt_sl, &ds_sl)) {
 			vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 			retv = 0;
 		}
 	}
 
 	label_rele(mnt_tsl);
 	zone_rele(mntzone);
 	return (retv);
 }
 #endif	/* SECLABEL */
 
 #ifdef OPENSOLARIS_MOUNTROOT
 static int
 zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
 {
 	int error = 0;
 	static int zfsrootdone = 0;
 	zfsvfs_t *zfsvfs = NULL;
 	znode_t *zp = NULL;
 	vnode_t *vp = NULL;
 	char *zfs_bootfs;
 	char *zfs_devid;
 
 	ASSERT(vfsp);
 
 	/*
 	 * The filesystem that we mount as root is defined in the
 	 * boot property "zfs-bootfs" with a format of
 	 * "poolname/root-dataset-objnum".
 	 */
 	if (why == ROOT_INIT) {
 		if (zfsrootdone++)
 			return (SET_ERROR(EBUSY));
 		/*
 		 * the process of doing a spa_load will require the
 		 * clock to be set before we could (for example) do
 		 * something better by looking at the timestamp on
 		 * an uberblock, so just set it to -1.
 		 */
 		clkset(-1);
 
 		if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
 			cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
 			    "bootfs name");
 			return (SET_ERROR(EINVAL));
 		}
 		zfs_devid = spa_get_bootprop("diskdevid");
 		error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
 		if (zfs_devid)
 			spa_free_bootprop(zfs_devid);
 		if (error) {
 			spa_free_bootprop(zfs_bootfs);
 			cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
 			    error);
 			return (error);
 		}
 		if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
 			spa_free_bootprop(zfs_bootfs);
 			cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
 			    error);
 			return (error);
 		}
 
 		spa_free_bootprop(zfs_bootfs);
 
 		if (error = vfs_lock(vfsp))
 			return (error);
 
 		if (error = zfs_domount(vfsp, rootfs.bo_name)) {
 			cmn_err(CE_NOTE, "zfs_domount: error %d", error);
 			goto out;
 		}
 
 		zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
 		ASSERT(zfsvfs);
 		if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
 			cmn_err(CE_NOTE, "zfs_zget: error %d", error);
 			goto out;
 		}
 
 		vp = ZTOV(zp);
 		mutex_enter(&vp->v_lock);
 		vp->v_flag |= VROOT;
 		mutex_exit(&vp->v_lock);
 		rootvp = vp;
 
 		/*
 		 * Leave rootvp held.  The root file system is never unmounted.
 		 */
 
 		vfs_add((struct vnode *)0, vfsp,
 		    (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
 out:
 		vfs_unlock(vfsp);
 		return (error);
 	} else if (why == ROOT_REMOUNT) {
 		readonly_changed_cb(vfsp->vfs_data, B_FALSE);
 		vfsp->vfs_flag |= VFS_REMOUNT;
 
 		/* refresh mount options */
 		zfs_unregister_callbacks(vfsp->vfs_data);
 		return (zfs_register_callbacks(vfsp));
 
 	} else if (why == ROOT_UNMOUNT) {
 		zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
 		(void) zfs_sync(vfsp, 0, 0);
 		return (0);
 	}
 
 	/*
 	 * if "why" is equal to anything else other than ROOT_INIT,
 	 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
 	 */
 	return (SET_ERROR(ENOTSUP));
 }
 #endif	/* OPENSOLARIS_MOUNTROOT */
 
 static int
 getpoolname(const char *osname, char *poolname)
 {
 	char *p;
 
 	p = strchr(osname, '/');
 	if (p == NULL) {
 		if (strlen(osname) >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strcpy(poolname, osname);
 	} else {
 		if (p - osname >= MAXNAMELEN)
 			return (ENAMETOOLONG);
 		(void) strncpy(poolname, osname, p - osname);
 		poolname[p - osname] = '\0';
 	}
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 zfs_mount(vfs_t *vfsp)
 {
 	kthread_t	*td = curthread;
 	vnode_t		*mvp = vfsp->mnt_vnodecovered;
 	cred_t		*cr = td->td_ucred;
 	char		*osname;
 	int		error = 0;
 	int		canwrite;
 
 #ifdef illumos
 	if (mvp->v_type != VDIR)
 		return (SET_ERROR(ENOTDIR));
 
 	mutex_enter(&mvp->v_lock);
 	if ((uap->flags & MS_REMOUNT) == 0 &&
 	    (uap->flags & MS_OVERLAY) == 0 &&
 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 		mutex_exit(&mvp->v_lock);
 		return (SET_ERROR(EBUSY));
 	}
 	mutex_exit(&mvp->v_lock);
 
 	/*
 	 * ZFS does not support passing unparsed data in via MS_DATA.
 	 * Users should use the MS_OPTIONSTR interface; this means
 	 * that all option parsing is already done and the options struct
 	 * can be interrogated.
 	 */
 	if ((uap->flags & MS_DATA) && uap->datalen > 0)
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * Get the objset name (the "special" mount argument).
 	 */
 	if (error = pn_get(uap->spec, fromspace, &spn))
 		return (error);
 
 	osname = spn.pn_path;
 #else	/* !illumos */
 	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
 		return (SET_ERROR(EINVAL));
 
 	/*
 	 * If full-owner-access is enabled and delegated administration is
 	 * turned on, we must set nosuid.
 	 */
 	if (zfs_super_owner &&
 	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
 		secpolicy_fs_mount_clearopts(cr, vfsp);
 	}
 #endif	/* illumos */
 
 	/*
 	 * Check for mount privilege?
 	 *
 	 * If we don't have privilege then see if
 	 * we have local permission to allow it
 	 */
 	error = secpolicy_fs_mount(cr, mvp, vfsp);
 	if (error) {
 		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
 			goto out;
 
 		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
 			vattr_t		vattr;
 
 			/*
 			 * Make sure user is the owner of the mount point
 			 * or has sufficient privileges.
 			 */
 
 			vattr.va_mask = AT_UID;
 
 			vn_lock(mvp, LK_SHARED | LK_RETRY);
 			if (VOP_GETATTR(mvp, &vattr, cr)) {
 				VOP_UNLOCK(mvp);
 				goto out;
 			}
 
 			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
 			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
 				VOP_UNLOCK(mvp);
 				goto out;
 			}
 			VOP_UNLOCK(mvp);
 		}
 
 		secpolicy_fs_mount_clearopts(cr, vfsp);
 	}
 
 	/*
 	 * Refuse to mount a filesystem if we are in a local zone and the
 	 * dataset is not visible.
 	 */
 	if (!INGLOBALZONE(curthread) &&
 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
 		error = SET_ERROR(EPERM);
 		goto out;
 	}
 
 #ifdef SECLABEL
 	error = zfs_mount_label_policy(vfsp, osname);
 	if (error)
 		goto out;
 #endif
 
 	vfsp->vfs_flag |= MNT_NFS4ACLS;
 
 	/*
 	 * When doing a remount, we simply refresh our temporary properties
 	 * according to those options set in the current VFS options.
 	 */
 	if (vfsp->vfs_flag & MS_REMOUNT) {
 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 		/*
 		 * Refresh mount options with z_teardown_lock blocking I/O while
 		 * the filesystem is in an inconsistent state.
 		 * The lock also serializes this code with filesystem
 		 * manipulations between entry to zfs_suspend_fs() and return
 		 * from zfs_resume_fs().
 		 */
 		rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
 		zfs_unregister_callbacks(zfsvfs);
 		error = zfs_register_callbacks(vfsp);
 		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 		goto out;
 	}
 
 	/* Initial root mount: try hard to import the requested root pool. */
 	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
 	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
 		char pname[MAXNAMELEN];
 
 		error = getpoolname(osname, pname);
 		if (error == 0)
 			error = spa_import_rootpool(pname);
 		if (error)
 			goto out;
 	}
 	DROP_GIANT();
 	error = zfs_domount(vfsp, osname);
 	PICKUP_GIANT();
 
 #ifdef illumos
 	/*
 	 * Add an extra VFS_HOLD on our parent vfs so that it can't
 	 * disappear due to a forced unmount.
 	 */
 	if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
 		VFS_HOLD(mvp->v_vfsp);
 #endif
 
 out:
 	return (error);
 }
 
 static int
 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
 
 	statp->f_version = STATFS_VERSION;
 
 	ZFS_ENTER(zfsvfs);
 
 	dmu_objset_space(zfsvfs->z_os,
 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
 
 	/*
 	 * The underlying storage pool actually uses multiple block sizes.
 	 * We report the fragsize as the smallest block size we support,
 	 * and we report our blocksize as the filesystem's maximum blocksize.
 	 */
 	statp->f_bsize = SPA_MINBLOCKSIZE;
 	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
 
 	/*
 	 * The following report "total" blocks of various kinds in the
 	 * file system, but reported in terms of f_frsize - the
 	 * "fragment" size.
 	 */
 
 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
 	statp->f_bfree = availbytes / statp->f_bsize;
 	statp->f_bavail = statp->f_bfree; /* no root reservation */
 
 	/*
 	 * statvfs() should really be called statufs(), because it assumes
 	 * static metadata.  ZFS doesn't preallocate files, so the best
 	 * we can do is report the max that could possibly fit in f_files,
 	 * and that minus the number actually used in f_ffree.
 	 * For f_ffree, report the smaller of the number of object available
 	 * and the number of blocks (each object will take at least a block).
 	 */
 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
 	statp->f_files = statp->f_ffree + usedobjs;
 
 	/*
 	 * We're a zfs filesystem.
 	 */
 	(void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename));
 
 	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
 	    sizeof(statp->f_mntfromname));
 	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
 	    sizeof(statp->f_mntonname));
 
 	statp->f_namemax = MAXNAMELEN - 1;
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 static int
 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	znode_t *rootzp;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 
 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 	if (error == 0)
 		*vpp = ZTOV(rootzp);
 
 	ZFS_EXIT(zfsvfs);
 
 	if (error == 0) {
 		error = vn_lock(*vpp, flags);
 		if (error != 0) {
 			VN_RELE(*vpp);
 			*vpp = NULL;
 		}
 	}
 	return (error);
 }
 
 /*
  * Teardown the zfsvfs::z_os.
  *
  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
  * and 'z_teardown_inactive_lock' held.
  */
 static int
 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
 {
 	znode_t	*zp;
 
 	rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
 
 	if (!unmounting) {
 		/*
 		 * We purge the parent filesystem's vfsp as the parent
 		 * filesystem and all of its snapshots have their vnode's
 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
 		 * 'z_parent' is self referential for non-snapshots.
 		 */
 		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
 #ifdef FREEBSD_NAMECACHE
 		cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
 #endif
 	}
 
 	/*
 	 * Close the zil. NB: Can't close the zil while zfs_inactive
 	 * threads are blocked as zil_close can call zfs_inactive.
 	 */
 	if (zfsvfs->z_log) {
 		zil_close(zfsvfs->z_log);
 		zfsvfs->z_log = NULL;
 	}
 
 	ZFS_WLOCK_TEARDOWN_INACTIVE(zfsvfs);
 
 	/*
 	 * If we are not unmounting (ie: online recv) and someone already
 	 * unmounted this file system while we were doing the switcheroo,
 	 * or a reopen of z_os failed then just bail out now.
 	 */
 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
 		ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
 		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 		return (SET_ERROR(EIO));
 	}
 
 	/*
 	 * At this point there are no vops active, and any new vops will
 	 * fail with EIO since we have z_teardown_lock for writer (only
 	 * relavent for forced unmount).
 	 *
 	 * Release all holds on dbufs.
 	 */
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
 	    zp = list_next(&zfsvfs->z_all_znodes, zp))
 		if (zp->z_sa_hdl) {
 			ASSERT(ZTOV(zp)->v_count >= 0);
 			zfs_znode_dmu_fini(zp);
 		}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 	/*
 	 * If we are unmounting, set the unmounted flag and let new vops
 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
 	 * other vops will fail with EIO.
 	 */
 	if (unmounting) {
 		zfsvfs->z_unmounted = B_TRUE;
 		ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
 		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 	}
 
 	/*
 	 * z_os will be NULL if there was an error in attempting to reopen
 	 * zfsvfs, so just return as the properties had already been
 	 * unregistered and cached data had been evicted before.
 	 */
 	if (zfsvfs->z_os == NULL)
 		return (0);
 
 	/*
 	 * Unregister properties.
 	 */
 	zfs_unregister_callbacks(zfsvfs);
 
 	/*
 	 * Evict cached data
 	 */
 	if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
 	    !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
 	dmu_objset_evict_dbufs(zfsvfs->z_os);
 
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 zfs_umount(vfs_t *vfsp, int fflag)
 {
 	kthread_t *td = curthread;
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 	objset_t *os;
 	cred_t *cr = td->td_ucred;
 	int ret;
 
 	ret = secpolicy_fs_unmount(cr, vfsp);
 	if (ret) {
 		if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
 		    ZFS_DELEG_PERM_MOUNT, cr))
 			return (ret);
 	}
 
 	/*
 	 * We purge the parent filesystem's vfsp as the parent filesystem
 	 * and all of its snapshots have their vnode's v_vfsp set to the
 	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
 	 * referential for non-snapshots.
 	 */
 	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
 
 	/*
 	 * Unmount any snapshots mounted under .zfs before unmounting the
 	 * dataset itself.
 	 */
 	if (zfsvfs->z_ctldir != NULL) {
 		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
 			return (ret);
 	}
 
 	if (fflag & MS_FORCE) {
 		/*
 		 * Mark file system as unmounted before calling
 		 * vflush(FORCECLOSE). This way we ensure no future vnops
 		 * will be called and risk operating on DOOMED vnodes.
 		 */
 		rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
 		zfsvfs->z_unmounted = B_TRUE;
 		rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 	}
 
 	/*
 	 * Flush all the files.
 	 */
 	ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
 	if (ret != 0)
 		return (ret);
 
 #ifdef illumos
 	if (!(fflag & MS_FORCE)) {
 		/*
 		 * Check the number of active vnodes in the file system.
 		 * Our count is maintained in the vfs structure, but the
 		 * number is off by 1 to indicate a hold on the vfs
 		 * structure itself.
 		 *
 		 * The '.zfs' directory maintains a reference of its
 		 * own, and any active references underneath are
 		 * reflected in the vnode count.
 		 */
 		if (zfsvfs->z_ctldir == NULL) {
 			if (vfsp->vfs_count > 1)
 				return (SET_ERROR(EBUSY));
 		} else {
 			if (vfsp->vfs_count > 2 ||
 			    zfsvfs->z_ctldir->v_count > 1)
 				return (SET_ERROR(EBUSY));
 		}
 	}
 #endif
 
 	while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
 	    &zfsvfs->z_unlinked_drain_task, NULL) != 0)
 		taskqueue_drain(zfsvfs_taskq->tq_queue,
 		    &zfsvfs->z_unlinked_drain_task);
 
 	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
 	os = zfsvfs->z_os;
 
 	/*
 	 * z_os will be NULL if there was an error in
 	 * attempting to reopen zfsvfs.
 	 */
 	if (os != NULL) {
 		/*
 		 * Unset the objset user_ptr.
 		 */
 		mutex_enter(&os->os_user_ptr_lock);
 		dmu_objset_set_user(os, NULL);
 		mutex_exit(&os->os_user_ptr_lock);
 
 		/*
 		 * Finally release the objset
 		 */
 		dmu_objset_disown(os, zfsvfs);
 	}
 
 	/*
 	 * We can now safely destroy the '.zfs' directory node.
 	 */
 	if (zfsvfs->z_ctldir != NULL)
 		zfsctl_destroy(zfsvfs);
 	zfs_freevfs(vfsp);
 
 	return (0);
 }
 
 static int
 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
 {
 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
 	znode_t		*zp;
 	int 		err;
 
 	/*
 	 * zfs_zget() can't operate on virtual entries like .zfs/ or
 	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
 	 * This will make NFS to switch to LOOKUP instead of using VGET.
 	 */
 	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
 	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
 		return (EOPNOTSUPP);
 
 	ZFS_ENTER(zfsvfs);
 	err = zfs_zget(zfsvfs, ino, &zp);
 	if (err == 0 && zp->z_unlinked) {
 		vrele(ZTOV(zp));
 		err = EINVAL;
 	}
 	if (err == 0)
 		*vpp = ZTOV(zp);
 	ZFS_EXIT(zfsvfs);
 	if (err == 0) {
 		err = vn_lock(*vpp, flags);
 		if (err != 0)
 			vrele(*vpp);
 	}
 	if (err != 0)
 		*vpp = NULL;
 	return (err);
 }
 
 static int
-zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
-    struct ucred **credanonp, int *numsecflavors, int **secflavors)
+zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
+    struct ucred **credanonp, int *numsecflavors, int *secflavors)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 	/*
 	 * If this is regular file system vfsp is the same as
 	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
 	 * zfsvfs->z_parent->z_vfs represents parent file system
 	 * which we have to use here, because only this file system
 	 * has mnt_export configured.
 	 */
 	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
 	    credanonp, numsecflavors, secflavors));
 }
 
 CTASSERT(SHORT_FID_LEN <= sizeof(struct fid));
 CTASSERT(LONG_FID_LEN <= sizeof(struct fid));
 
 static int
 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
 {
 	struct componentname cn;
 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
 	znode_t		*zp;
 	vnode_t		*dvp;
 	uint64_t	object = 0;
 	uint64_t	fid_gen = 0;
 	uint64_t	gen_mask;
 	uint64_t	zp_gen;
 	int 		i, err;
 
 	*vpp = NULL;
 
 	ZFS_ENTER(zfsvfs);
 
 	/*
 	 * On FreeBSD we can get snapshot's mount point or its parent file
 	 * system mount point depending if snapshot is already mounted or not.
 	 */
 	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
 		uint64_t	objsetid = 0;
 		uint64_t	setgen = 0;
 
 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
 
 		ZFS_EXIT(zfsvfs);
 
 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
 		if (err)
 			return (SET_ERROR(EINVAL));
 		ZFS_ENTER(zfsvfs);
 	}
 
 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
 
 		for (i = 0; i < sizeof (zfid->zf_object); i++)
 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
 
 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
 	} else {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
 	 * directory tree. If the object == zfsvfs->z_shares_dir, then
 	 * we are in the .zfs/shares directory tree.
 	 */
 	if ((fid_gen == 0 &&
 	     (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
 	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
 		ZFS_EXIT(zfsvfs);
 		VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
 		if (object == ZFSCTL_INO_SNAPDIR) {
 			cn.cn_nameptr = "snapshot";
 			cn.cn_namelen = strlen(cn.cn_nameptr);
 			cn.cn_nameiop = LOOKUP;
 			cn.cn_flags = ISLASTCN | LOCKLEAF;
 			cn.cn_lkflags = flags;
 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
 			vput(dvp);
 		} else if (object == zfsvfs->z_shares_dir) {
 			/*
 			 * XXX This branch must not be taken,
 			 * if it is, then the lookup below will
 			 * explode.
 			 */
 			cn.cn_nameptr = "shares";
 			cn.cn_namelen = strlen(cn.cn_nameptr);
 			cn.cn_nameiop = LOOKUP;
 			cn.cn_flags = ISLASTCN;
 			cn.cn_lkflags = flags;
 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
 			vput(dvp);
 		} else {
 			*vpp = dvp;
 		}
 		return (err);
 	}
 
 	gen_mask = -1ULL >> (64 - 8 * i);
 
 	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
 	if (err = zfs_zget(zfsvfs, object, &zp)) {
 		ZFS_EXIT(zfsvfs);
 		return (err);
 	}
 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
 	    sizeof (uint64_t));
 	zp_gen = zp_gen & gen_mask;
 	if (zp_gen == 0)
 		zp_gen = 1;
 	if (zp->z_unlinked || zp_gen != fid_gen) {
 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
 		vrele(ZTOV(zp));
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	*vpp = ZTOV(zp);
 	ZFS_EXIT(zfsvfs);
 	err = vn_lock(*vpp, flags);
 	if (err == 0)
 		vnode_create_vobject(*vpp, zp->z_size, curthread);
 	else
 		*vpp = NULL;
 	return (err);
 }
 
 /*
  * Block out VOPs and close zfsvfs_t::z_os
  *
  * Note, if successful, then we return with the 'z_teardown_lock' and
  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
  * dataset and objset intact so that they can be atomically handed off during
  * a subsequent rollback or recv operation and the resume thereafter.
  */
 int
 zfs_suspend_fs(zfsvfs_t *zfsvfs)
 {
 	int error;
 
 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
 		return (error);
 
 	return (0);
 }
 
 /*
  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
  * is an invariant across any of the operations that can be performed while the
  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
  * are the same: the relevant objset and associated dataset are owned by
  * zfsvfs, held, and long held on entry.
  */
 int
 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
 {
 	int err;
 	znode_t *zp;
 
 	ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
 	ASSERT(ZFS_TEARDOWN_INACTIVE_WLOCKED(zfsvfs));
 
 	/*
 	 * We already own this, so just update the objset_t, as the one we
 	 * had before may have been evicted.
 	 */
 	objset_t *os;
 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
 	VERIFY(dsl_dataset_long_held(ds));
 	VERIFY0(dmu_objset_from_ds(ds, &os));
 
 	err = zfsvfs_init(zfsvfs, os);
 	if (err != 0)
 		goto bail;
 
 	VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
 
 	zfs_set_fuid_feature(zfsvfs);
 
 	/*
 	 * Attempt to re-establish all the active znodes with
 	 * their dbufs.  If a zfs_rezget() fails, then we'll let
 	 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
 	 * when they try to use their znode.
 	 */
 	mutex_enter(&zfsvfs->z_znodes_lock);
 	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
 		(void) zfs_rezget(zp);
 	}
 	mutex_exit(&zfsvfs->z_znodes_lock);
 
 bail:
 	/* release the VOPs */
 	ZFS_WUNLOCK_TEARDOWN_INACTIVE(zfsvfs);
 	rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
 
 	if (err) {
 		/*
 		 * Since we couldn't setup the sa framework, try to force
 		 * unmount this file system.
 		 */
 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
 			vfs_ref(zfsvfs->z_vfs);
 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
 		}
 	}
 	return (err);
 }
 
 static void
 zfs_freevfs(vfs_t *vfsp)
 {
 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
 
 #ifdef illumos
 	/*
 	 * If this is a snapshot, we have an extra VFS_HOLD on our parent
 	 * from zfs_mount().  Release it here.  If we came through
 	 * zfs_mountroot() instead, we didn't grab an extra hold, so
 	 * skip the VFS_RELE for rootvfs.
 	 */
 	if (zfsvfs->z_issnap && (vfsp != rootvfs))
 		VFS_RELE(zfsvfs->z_parent->z_vfs);
 #endif
 
 	zfsvfs_free(zfsvfs);
 
 	atomic_dec_32(&zfs_active_fs_count);
 }
 
 #ifdef __i386__
 static int desiredvnodes_backup;
 #endif
 
 static void
 zfs_vnodes_adjust(void)
 {
 #ifdef __i386__
 	int newdesiredvnodes;
 
 	desiredvnodes_backup = desiredvnodes;
 
 	/*
 	 * We calculate newdesiredvnodes the same way it is done in
 	 * vntblinit(). If it is equal to desiredvnodes, it means that
 	 * it wasn't tuned by the administrator and we can tune it down.
 	 */
 	newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
 	    vm_kmem_size / (5 * (sizeof(struct vm_object) +
 	    sizeof(struct vnode))));
 	if (newdesiredvnodes == desiredvnodes)
 		desiredvnodes = (3 * newdesiredvnodes) / 4;
 #endif
 }
 
 static void
 zfs_vnodes_adjust_back(void)
 {
 
 #ifdef __i386__
 	desiredvnodes = desiredvnodes_backup;
 #endif
 }
 
 void
 zfs_init(void)
 {
 
 	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
 
 	/*
 	 * Initialize .zfs directory structures
 	 */
 	zfsctl_init();
 
 	/*
 	 * Initialize znode cache, vnode ops, etc...
 	 */
 	zfs_znode_init();
 
 	/*
 	 * Reduce number of vnodes. Originally number of vnodes is calculated
 	 * with UFS inode in mind. We reduce it here, because it's too big for
 	 * ZFS/i386.
 	 */
 	zfs_vnodes_adjust();
 
 	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
 #if defined(__FreeBSD__)
 	zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
 #endif
 }
 
 void
 zfs_fini(void)
 {
 #if defined(__FreeBSD__)
 	taskq_destroy(zfsvfs_taskq);
 #endif
 	zfsctl_fini();
 	zfs_znode_fini();
 	zfs_vnodes_adjust_back();
 }
 
 int
 zfs_busy(void)
 {
 	return (zfs_active_fs_count != 0);
 }
 
 int
 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
 {
 	int error;
 	objset_t *os = zfsvfs->z_os;
 	dmu_tx_t *tx;
 
 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
 		return (SET_ERROR(EINVAL));
 
 	if (newvers < zfsvfs->z_version)
 		return (SET_ERROR(EINVAL));
 
 	if (zfs_spa_version_map(newvers) >
 	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
 		return (SET_ERROR(ENOTSUP));
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
 		    ZFS_SA_ATTRS);
 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	}
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		return (error);
 	}
 
 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
 	    8, 1, &newvers, tx);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		return (error);
 	}
 
 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
 		uint64_t sa_obj;
 
 		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
 		    SPA_VERSION_SA);
 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
 		    DMU_OT_NONE, 0, tx);
 
 		error = zap_add(os, MASTER_NODE_OBJ,
 		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
 		ASSERT0(error);
 
 		VERIFY(0 == sa_set_sa_object(os, sa_obj));
 		sa_register_update_callback(os, zfs_sa_upgrade);
 	}
 
 	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
 	    "from %llu to %llu", zfsvfs->z_version, newvers);
 
 	dmu_tx_commit(tx);
 
 	zfsvfs->z_version = newvers;
 	os->os_version = newvers;
 
 	zfs_set_fuid_feature(zfsvfs);
 
 	return (0);
 }
 
 /*
  * Read a property stored within the master node.
  */
 int
 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
 {
 	uint64_t *cached_copy = NULL;
 
 	/*
 	 * Figure out where in the objset_t the cached copy would live, if it
 	 * is available for the requested property.
 	 */
 	if (os != NULL) {
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 			cached_copy = &os->os_version;
 			break;
 		case ZFS_PROP_NORMALIZE:
 			cached_copy = &os->os_normalization;
 			break;
 		case ZFS_PROP_UTF8ONLY:
 			cached_copy = &os->os_utf8only;
 			break;
 		case ZFS_PROP_CASE:
 			cached_copy = &os->os_casesensitivity;
 			break;
 		default:
 			break;
 		}
 	}
 	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
 		*value = *cached_copy;
 		return (0);
 	}
 
 	/*
 	 * If the property wasn't cached, look up the file system's value for
 	 * the property. For the version property, we look up a slightly
 	 * different string.
 	 */
 	const char *pname;
 	int error = ENOENT;
 	if (prop == ZFS_PROP_VERSION) {
 		pname = ZPL_VERSION_STR;
 	} else {
 		pname = zfs_prop_to_name(prop);
 	}
 
 	if (os != NULL) {
 		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
 	}
 
 	if (error == ENOENT) {
 		/* No value set, use the default value */
 		switch (prop) {
 		case ZFS_PROP_VERSION:
 			*value = ZPL_VERSION;
 			break;
 		case ZFS_PROP_NORMALIZE:
 		case ZFS_PROP_UTF8ONLY:
 			*value = 0;
 			break;
 		case ZFS_PROP_CASE:
 			*value = ZFS_CASE_SENSITIVE;
 			break;
 		default:
 			return (error);
 		}
 		error = 0;
 	}
 
 	/*
 	 * If one of the methods for getting the property value above worked,
 	 * copy it into the objset_t's cache.
 	 */
 	if (error == 0 && cached_copy != NULL) {
 		*cached_copy = *value;
 	}
 
 	return (error);
 }
 
 /*
  * Return true if the coresponding vfs's unmounted flag is set.
  * Otherwise return false.
  * If this function returns true we know VFS unmount has been initiated.
  */
 boolean_t
 zfs_get_vfs_flag_unmounted(objset_t *os)
 {
 	zfsvfs_t *zfvp;
 	boolean_t unmounted = B_FALSE;
 
 	ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
 
 	mutex_enter(&os->os_user_ptr_lock);
 	zfvp = dmu_objset_get_user(os);
 	if (zfvp != NULL && zfvp->z_vfs != NULL &&
 	    (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
 		unmounted = B_TRUE;
 	mutex_exit(&os->os_user_ptr_lock);
 
 	return (unmounted);
 }
 
 #ifdef _KERNEL
 void
 zfsvfs_update_fromname(const char *oldname, const char *newname)
 {
 	char tmpbuf[MAXPATHLEN];
 	struct mount *mp;
 	char *fromname;
 	size_t oldlen;
 
 	oldlen = strlen(oldname);
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		fromname = mp->mnt_stat.f_mntfromname;
 		if (strcmp(fromname, oldname) == 0) {
 			(void)strlcpy(fromname, newname,
 			    sizeof(mp->mnt_stat.f_mntfromname));
 			continue;
 		}
 		if (strncmp(fromname, oldname, oldlen) == 0 &&
 		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
 			(void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s",
 			    newname, fromname + oldlen);
 			(void)strlcpy(fromname, tmpbuf,
 			    sizeof(mp->mnt_stat.f_mntfromname));
 			continue;
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 }
 #endif
Index: head/sys/fs/cd9660/cd9660_vfsops.c
===================================================================
--- head/sys/fs/cd9660/cd9660_vfsops.c	(revision 362157)
+++ head/sys/fs/cd9660/cd9660_vfsops.c	(revision 362158)
@@ -1,854 +1,852 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley
  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
  * Support code is derived from software contributed to Berkeley
  * by Atsushi Murai (amurai@spec.co.jp).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)cd9660_vfsops.c	8.18 (Berkeley) 5/22/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/cdio.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 #include <sys/syslog.h>
 #include <sys/iconv.h>
 
 #include <fs/cd9660/iso.h>
 #include <fs/cd9660/iso_rrip.h>
 #include <fs/cd9660/cd9660_node.h>
 #include <fs/cd9660/cd9660_mount.h>
 
 #include <geom/geom.h>
 #include <geom/geom_vfs.h>
 
 MALLOC_DEFINE(M_ISOFSMNT, "isofs_mount", "ISOFS mount structure");
 MALLOC_DEFINE(M_ISOFSNODE, "isofs_node", "ISOFS vnode private part");
 
 struct iconv_functions *cd9660_iconv = NULL;
 
 static vfs_mount_t	cd9660_mount;
 static vfs_cmount_t	cd9660_cmount;
 static vfs_unmount_t	cd9660_unmount;
 static vfs_root_t	cd9660_root;
 static vfs_statfs_t	cd9660_statfs;
 static vfs_vget_t	cd9660_vget;
 static vfs_fhtovp_t	cd9660_fhtovp;
 
 static struct vfsops cd9660_vfsops = {
 	.vfs_fhtovp =		cd9660_fhtovp,
 	.vfs_mount =		cd9660_mount,
 	.vfs_cmount =		cd9660_cmount,
 	.vfs_root =		cd9660_root,
 	.vfs_statfs =		cd9660_statfs,
 	.vfs_unmount =		cd9660_unmount,
 	.vfs_vget =		cd9660_vget,
 };
 VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY);
 MODULE_VERSION(cd9660, 1);
 
 static int cd9660_vfs_hash_cmp(struct vnode *vp, void *pino);
 static int iso_mountfs(struct vnode *devvp, struct mount *mp);
 
 /*
  * VFS Operations.
  */
 
 static int
 cd9660_cmount(struct mntarg *ma, void *data, uint64_t flags)
 {
 	struct iso_args args;
-	struct export_args exp;
 	int error;
 
 	error = copyin(data, &args, sizeof args);
 	if (error)
 		return (error);
-	vfs_oexport_conv(&args.export, &exp);
 
 	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
-	ma = mount_arg(ma, "export", &exp, sizeof(exp));
+	ma = mount_arg(ma, "export", &args.export, sizeof(args.export));
 	ma = mount_argsu(ma, "cs_disk", args.cs_disk, 64);
 	ma = mount_argsu(ma, "cs_local", args.cs_local, 64);
 	ma = mount_argf(ma, "ssector", "%u", args.ssector);
 	ma = mount_argb(ma, !(args.flags & ISOFSMNT_NORRIP), "norrip");
 	ma = mount_argb(ma, args.flags & ISOFSMNT_GENS, "nogens");
 	ma = mount_argb(ma, args.flags & ISOFSMNT_EXTATT, "noextatt");
 	ma = mount_argb(ma, !(args.flags & ISOFSMNT_NOJOLIET), "nojoliet");
 	ma = mount_argb(ma,
 	    args.flags & ISOFSMNT_BROKENJOLIET, "nobrokenjoliet");
 	ma = mount_argb(ma, args.flags & ISOFSMNT_KICONV, "nokiconv");
 
 	error = kernel_mount(ma, flags);
 
 	return (error);
 }
 
 static int
 cd9660_mount(struct mount *mp)
 {
 	struct vnode *devvp;
 	struct thread *td;
 	char *fspec;
 	int error;
 	accmode_t accmode;
 	struct nameidata ndp;
 	struct iso_mnt *imp = NULL;
 
 	td = curthread;
 
 	/*
 	 * Unconditionally mount as read-only.
 	 */
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_RDONLY;
 	MNT_IUNLOCK(mp);
 
 	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
 	if (error)
 		return (error);
 
 	imp = VFSTOISOFS(mp);
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0))
 			return (0);
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 	if ((error = namei(&ndp)))
 		return (error);
 	NDFREE(&ndp, NDF_ONLY_PNBUF);
 	devvp = ndp.ni_vp;
 
 	if (!vn_isdisk(devvp, &error)) {
 		vput(devvp);
 		return (error);
 	}
 
 	/*
 	 * Verify that user has necessary permissions on the device,
 	 * or has superuser abilities
 	 */
 	accmode = VREAD;
 	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 	if (error)
 		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 	if (error) {
 		vput(devvp);
 		return (error);
 	}
 
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = iso_mountfs(devvp, mp);
 		if (error)
 			vrele(devvp);
 	} else {
 		if (devvp != imp->im_devvp)
 			error = EINVAL;	/* needs translation */
 		vput(devvp);
 	}
 	if (error)
 		return (error);
 	vfs_mountedfrom(mp, fspec);
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 iso_mountfs(devvp, mp)
 	struct vnode *devvp;
 	struct mount *mp;
 {
 	struct iso_mnt *isomp = NULL;
 	struct buf *bp = NULL;
 	struct buf *pribp = NULL, *supbp = NULL;
 	struct cdev *dev;
 	int error = EINVAL;
 	int high_sierra = 0;
 	int iso_bsize;
 	int iso_blknum;
 	int joliet_level;
 	int isverified = 0;
 	struct iso_volume_descriptor *vdp = NULL;
 	struct iso_primary_descriptor *pri = NULL;
 	struct iso_sierra_primary_descriptor *pri_sierra = NULL;
 	struct iso_supplementary_descriptor *sup = NULL;
 	struct iso_directory_record *rootp;
 	int logical_block_size, ssector;
 	struct g_consumer *cp;
 	struct bufobj *bo;
 	char *cs_local, *cs_disk;
 
 	dev = devvp->v_rdev;
 	dev_ref(dev);
 	g_topology_lock();
 	error = g_vfs_open(devvp, &cp, "cd9660", 0);
 	if (error == 0)
 		g_getattr("MNT::verified", cp, &isverified);
 	g_topology_unlock();
 	VOP_UNLOCK(devvp);
 	if (error)
 		goto out;
 	if (devvp->v_rdev->si_iosize_max != 0)
 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 
 	bo = &devvp->v_bufobj;
 
 	/* This is the "logical sector size".  The standard says this
 	 * should be 2048 or the physical sector size on the device,
 	 * whichever is greater.
 	 */
 	if ((ISO_DEFAULT_BLOCK_SIZE % cp->provider->sectorsize) != 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	iso_bsize = cp->provider->sectorsize;
 
 	joliet_level = 0;
 	if (1 != vfs_scanopt(mp->mnt_optnew, "ssector", "%d", &ssector))
 		ssector = 0;
 	for (iso_blknum = 16 + ssector;
 	     iso_blknum < 100 + ssector;
 	     iso_blknum++) {
 		if ((error = bread(devvp, iso_blknum * btodb(ISO_DEFAULT_BLOCK_SIZE),
 				  iso_bsize, NOCRED, &bp)) != 0)
 			goto out;
 
 		vdp = (struct iso_volume_descriptor *)bp->b_data;
 		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
 			if (bcmp (vdp->id_sierra, ISO_SIERRA_ID,
 				  sizeof vdp->id_sierra) != 0) {
 				error = EINVAL;
 				goto out;
 			} else
 				high_sierra = 1;
 		}
 		switch (isonum_711 (high_sierra? vdp->type_sierra: vdp->type)){
 		case ISO_VD_PRIMARY:
 			if (pribp == NULL) {
 				pribp = bp;
 				bp = NULL;
 				pri = (struct iso_primary_descriptor *)vdp;
 				pri_sierra =
 				  (struct iso_sierra_primary_descriptor *)vdp;
 			}
 			break;
 
 		case ISO_VD_SUPPLEMENTARY:
 			if (supbp == NULL) {
 				supbp = bp;
 				bp = NULL;
 				sup = (struct iso_supplementary_descriptor *)vdp;
 
 				if (!vfs_flagopt(mp->mnt_optnew, "nojoliet", NULL, 0)) {
 					if (bcmp(sup->escape, "%/@", 3) == 0)
 						joliet_level = 1;
 					if (bcmp(sup->escape, "%/C", 3) == 0)
 						joliet_level = 2;
 					if (bcmp(sup->escape, "%/E", 3) == 0)
 						joliet_level = 3;
 
 					if ((isonum_711 (sup->flags) & 1) &&
 					    !vfs_flagopt(mp->mnt_optnew, "brokenjoliet", NULL, 0))
 						joliet_level = 0;
 				}
 			}
 			break;
 
 		case ISO_VD_END:
 			goto vd_end;
 
 		default:
 			break;
 		}
 		if (bp != NULL) {
 			brelse(bp);
 			bp = NULL;
 		}
 	}
  vd_end:
 	if (bp != NULL) {
 		brelse(bp);
 		bp = NULL;
 	}
 
 	if (pri == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 
 	logical_block_size =
 		isonum_723 (high_sierra?
 			    pri_sierra->logical_block_size:
 			    pri->logical_block_size);
 
 	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
 	    || (logical_block_size & (logical_block_size - 1)) != 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	rootp = (struct iso_directory_record *)
 		(high_sierra?
 		 pri_sierra->root_directory_record:
 		 pri->root_directory_record);
 
 	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK | M_ZERO);
 	isomp->im_cp = cp;
 	isomp->im_bo = bo;
 	isomp->logical_block_size = logical_block_size;
 	isomp->volume_space_size =
 		isonum_733 (high_sierra?
 			    pri_sierra->volume_space_size:
 			    pri->volume_space_size);
 	isomp->joliet_level = 0;
 	/*
 	 * Since an ISO9660 multi-session CD can also access previous
 	 * sessions, we have to include them into the space consider-
 	 * ations.  This doesn't yield a very accurate number since
 	 * parts of the old sessions might be inaccessible now, but we
 	 * can't do much better.  This is also important for the NFS
 	 * filehandle validation.
 	 */
 	isomp->volume_space_size += ssector;
 	memcpy(isomp->root, rootp, sizeof isomp->root);
 	isomp->root_extent = isonum_733 (rootp->extent);
 	isomp->root_size = isonum_733 (rootp->size);
 
 	isomp->im_bmask = logical_block_size - 1;
 	isomp->im_bshift = ffs(logical_block_size) - 1;
 
 	pribp->b_flags |= B_AGE;
 	brelse(pribp);
 	pribp = NULL;
 	rootp = NULL;
 	pri = NULL;
 	pri_sierra = NULL;
 
 	mp->mnt_data = isomp;
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = 0;
 	MNT_ILOCK(mp);
 	if (isverified)
 		mp->mnt_flag |= MNT_VERIFIED;
 	mp->mnt_flag |= MNT_LOCAL;
 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED;
 	MNT_IUNLOCK(mp);
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
 	isomp->im_devvp = devvp;
 
 	vfs_flagopt(mp->mnt_optnew, "norrip", &isomp->im_flags, ISOFSMNT_NORRIP);
 	vfs_flagopt(mp->mnt_optnew, "gens", &isomp->im_flags, ISOFSMNT_GENS);
 	vfs_flagopt(mp->mnt_optnew, "extatt", &isomp->im_flags, ISOFSMNT_EXTATT);
 	vfs_flagopt(mp->mnt_optnew, "nojoliet", &isomp->im_flags, ISOFSMNT_NOJOLIET);
 	vfs_flagopt(mp->mnt_optnew, "kiconv", &isomp->im_flags, ISOFSMNT_KICONV);
 
 	/* Check the Rock Ridge Extension support */
 	if (!(isomp->im_flags & ISOFSMNT_NORRIP)) {
 		if ((error = bread(isomp->im_devvp, (isomp->root_extent +
 		    isonum_711(((struct iso_directory_record *)isomp->root)->
 		    ext_attr_length)) << (isomp->im_bshift - DEV_BSHIFT),
 		    isomp->logical_block_size, NOCRED, &bp)) != 0)
 			goto out;
 
 		rootp = (struct iso_directory_record *)bp->b_data;
 
 		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
 		    isomp->im_flags |= ISOFSMNT_NORRIP;
 		} else {
 		    isomp->im_flags &= ~ISOFSMNT_GENS;
 		}
 
 		/*
 		 * The contents are valid,
 		 * but they will get reread as part of another vnode, so...
 		 */
 		bp->b_flags |= B_AGE;
 		brelse(bp);
 		bp = NULL;
 		rootp = NULL;
 	}
 
 	if (isomp->im_flags & ISOFSMNT_KICONV && cd9660_iconv) {
 		cs_local = vfs_getopts(mp->mnt_optnew, "cs_local", &error);
 		if (error)
 			goto out;
 		cs_disk = vfs_getopts(mp->mnt_optnew, "cs_disk", &error);
 		if (error)
 			goto out;
 		cd9660_iconv->open(cs_local, cs_disk, &isomp->im_d2l);
 		cd9660_iconv->open(cs_disk, cs_local, &isomp->im_l2d);
 	} else {
 		isomp->im_d2l = NULL;
 		isomp->im_l2d = NULL;
 	}
 
 	if (high_sierra) {
 		/* this effectively ignores all the mount flags */
 		if (bootverbose)
 			log(LOG_INFO, "cd9660: High Sierra Format\n");
 		isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA;
 	} else
 		switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
 		  default:
 			  isomp->iso_ftype = ISO_FTYPE_DEFAULT;
 			  break;
 		  case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
 			  isomp->iso_ftype = ISO_FTYPE_9660;
 			  break;
 		  case 0:
 			  if (bootverbose)
 			  	  log(LOG_INFO, "cd9660: RockRidge Extension\n");
 			  isomp->iso_ftype = ISO_FTYPE_RRIP;
 			  break;
 		}
 
 	/* Decide whether to use the Joliet descriptor */
 
 	if (isomp->iso_ftype != ISO_FTYPE_RRIP && joliet_level) {
 		if (bootverbose)
 			log(LOG_INFO, "cd9660: Joliet Extension (Level %d)\n",
 			    joliet_level);
 		rootp = (struct iso_directory_record *)
 			sup->root_directory_record;
 		memcpy(isomp->root, rootp, sizeof isomp->root);
 		isomp->root_extent = isonum_733 (rootp->extent);
 		isomp->root_size = isonum_733 (rootp->size);
 		isomp->joliet_level = joliet_level;
 		supbp->b_flags |= B_AGE;
 	}
 
 	if (supbp) {
 		brelse(supbp);
 		supbp = NULL;
 		sup = NULL;
 	}
 
 	return 0;
 out:
 	if (bp != NULL)
 		brelse(bp);
 	if (pribp != NULL)
 		brelse(pribp);
 	if (supbp != NULL)
 		brelse(supbp);
 	if (cp != NULL) {
 		g_topology_lock();
 		g_vfs_close(cp);
 		g_topology_unlock();
 	}
 	if (isomp) {
 		free(isomp, M_ISOFSMNT);
 		mp->mnt_data = NULL;
 	}
 	dev_rel(dev);
 	return error;
 }
 
 /*
  * unmount system call
  */
 static int
 cd9660_unmount(mp, mntflags)
 	struct mount *mp;
 	int mntflags;
 {
 	struct iso_mnt *isomp;
 	int error, flags = 0;
 
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	if ((error = vflush(mp, 0, flags, curthread)))
 		return (error);
 
 	isomp = VFSTOISOFS(mp);
 
 	if (isomp->im_flags & ISOFSMNT_KICONV && cd9660_iconv) {
 		if (isomp->im_d2l)
 			cd9660_iconv->close(isomp->im_d2l);
 		if (isomp->im_l2d)
 			cd9660_iconv->close(isomp->im_l2d);
 	}
 	g_topology_lock();
 	g_vfs_close(isomp->im_cp);
 	g_topology_unlock();
 	vrele(isomp->im_devvp);
 	dev_rel(isomp->im_dev);
 	free(isomp, M_ISOFSMNT);
 	mp->mnt_data = NULL;
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	return (error);
 }
 
 /*
  * Return root of a filesystem
  */
 static int
 cd9660_root(mp, flags, vpp)
 	struct mount *mp;
 	int flags;
 	struct vnode **vpp;
 {
 	struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct iso_directory_record *dp =
 	    (struct iso_directory_record *)imp->root;
 	cd_ino_t ino = isodirino(dp, imp);
 
 	/*
 	 * With RRIP we must use the `.' entry of the root directory.
 	 * Simply tell vget, that it's a relocated directory.
 	 */
 	return (cd9660_vget_internal(mp, ino, flags, vpp,
 	    imp->iso_ftype == ISO_FTYPE_RRIP, dp));
 }
 
 /*
  * Get filesystem statistics.
  */
 static int
 cd9660_statfs(mp, sbp)
 	struct mount *mp;
 	struct statfs *sbp;
 {
 	struct iso_mnt *isomp;
 
 	isomp = VFSTOISOFS(mp);
 
 	sbp->f_bsize = isomp->logical_block_size;
 	sbp->f_iosize = sbp->f_bsize;	/* XXX */
 	sbp->f_blocks = isomp->volume_space_size;
 	sbp->f_bfree = 0; /* total free blocks */
 	sbp->f_bavail = 0; /* blocks free for non superuser */
 	sbp->f_files =	0; /* total files */
 	sbp->f_ffree = 0; /* free file nodes */
 	return 0;
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is in range
  * - call iget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the generation number matches
  */
 
 /* ARGSUSED */
 static int
 cd9660_fhtovp(mp, fhp, flags, vpp)
 	struct mount *mp;
 	struct fid *fhp;
 	int flags;
 	struct vnode **vpp;
 {
 	struct ifid ifh;
 	struct iso_node *ip;
 	struct vnode *nvp;
 	int error;
 
 	memcpy(&ifh, fhp, sizeof(ifh));
 
 #ifdef	ISOFS_DBG
 	printf("fhtovp: ino %d, start %ld\n",
 	    ifh.ifid_ino, ifh.ifid_start);
 #endif
 
 	if ((error = VFS_VGET(mp, ifh.ifid_ino, LK_EXCLUSIVE, &nvp)) != 0) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	ip = VTOI(nvp);
 	if (ip->inode.iso_mode == 0) {
 		vput(nvp);
 		*vpp = NULLVP;
 		return (ESTALE);
 	}
 	*vpp = nvp;
 	vnode_create_vobject(*vpp, ip->i_size, curthread);
 	return (0);
 }
 
 /*
  * Conform to standard VFS interface; can't vget arbitrary inodes beyond 4GB
  * into media with current inode scheme and 32-bit ino_t.  This shouldn't be
  * needed for anything other than nfsd, and who exports a mounted DVD over NFS?
  */
 static int
 cd9660_vget(mp, ino, flags, vpp)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 {
 
 	/*
 	 * XXXX
 	 * It would be nice if we didn't always set the `relocated' flag
 	 * and force the extra read, but I don't want to think about fixing
 	 * that right now.
 	 */
 	return (cd9660_vget_internal(mp, ino, flags, vpp,
 #if 0
 	    VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP,
 #else
 	    0,
 #endif
 	    (struct iso_directory_record *)0));
 }
 
 /* Use special comparator for full 64-bit ino comparison. */
 static int
 cd9660_vfs_hash_cmp(vp, pino)
 	struct vnode *vp;
 	void *pino;
 {
 	struct iso_node *ip;
 	cd_ino_t ino;
 
 	ip = VTOI(vp);
 	ino = *(cd_ino_t *)pino;
 	return (ip->i_number != ino);
 }
 
 int
 cd9660_vget_internal(mp, ino, flags, vpp, relocated, isodir)
 	struct mount *mp;
 	cd_ino_t ino;
 	int flags;
 	struct vnode **vpp;
 	int relocated;
 	struct iso_directory_record *isodir;
 {
 	struct iso_mnt *imp;
 	struct iso_node *ip;
 	struct buf *bp;
 	struct vnode *vp;
 	int error;
 	struct thread *td;
 
 	td = curthread;
 	error = vfs_hash_get(mp, ino, flags, td, vpp, cd9660_vfs_hash_cmp,
 	    &ino);
 	if (error || *vpp != NULL)
 		return (error);
 
 	/*
 	 * We must promote to an exclusive lock for vnode creation.  This
 	 * can happen if lookup is passed LOCKSHARED.
  	 */
 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
 		flags &= ~LK_TYPE_MASK;
 		flags |= LK_EXCLUSIVE;
 	}
 
 	/*
 	 * We do not lock vnode creation as it is believed to be too
 	 * expensive for such rare case as simultaneous creation of vnode
 	 * for same ino by different processes. We just allow them to race
 	 * and check later to decide who wins. Let the race begin!
 	 */
 
 	imp = VFSTOISOFS(mp);
 
 	/* Allocate a new vnode/iso_node. */
 	if ((error = getnewvnode("isofs", mp, &cd9660_vnodeops, &vp)) != 0) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	ip = malloc(sizeof(struct iso_node), M_ISOFSNODE,
 	    M_WAITOK | M_ZERO);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_number = ino;
 
 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
 	error = insmntque(vp, mp);
 	if (error != 0) {
 		free(ip, M_ISOFSNODE);
 		*vpp = NULLVP;
 		return (error);
 	}
 	error = vfs_hash_insert(vp, ino, flags, td, vpp, cd9660_vfs_hash_cmp,
 	    &ino);
 	if (error || *vpp != NULL)
 		return (error);
 
 	if (isodir == NULL) {
 		int lbn, off;
 
 		lbn = lblkno(imp, ino);
 		if (lbn >= imp->volume_space_size) {
 			vput(vp);
 			printf("fhtovp: lbn exceed volume space %d\n", lbn);
 			return (ESTALE);
 		}
 
 		off = blkoff(imp, ino);
 		if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
 			vput(vp);
 			printf("fhtovp: crosses block boundary %d\n",
 			       off + ISO_DIRECTORY_RECORD_SIZE);
 			return (ESTALE);
 		}
 
 		error = bread(imp->im_devvp,
 			      lbn << (imp->im_bshift - DEV_BSHIFT),
 			      imp->logical_block_size, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			printf("fhtovp: bread error %d\n",error);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)(bp->b_data + off);
 
 		if (off + isonum_711(isodir->length) >
 		    imp->logical_block_size) {
 			vput(vp);
 			brelse(bp);
 			printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
 			       off +isonum_711(isodir->length), off,
 			       isonum_711(isodir->length));
 			return (ESTALE);
 		}
 
 #if 0
 		if (isonum_733(isodir->extent) +
 		    isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
 			brelse(bp);
 			printf("fhtovp: file start miss %d vs %d\n",
 			       isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
 			       ifhp->ifid_start);
 			return (ESTALE);
 		}
 #endif
 	} else
 		bp = NULL;
 
 	ip->i_mnt = imp;
 
 	if (relocated) {
 		/*
 		 * On relocated directories we must
 		 * read the `.' entry out of a dir.
 		 */
 		ip->iso_start = ino >> imp->im_bshift;
 		if (bp != NULL)
 			brelse(bp);
 		if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) {
 			vput(vp);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)bp->b_data;
 	}
 
 	ip->iso_extent = isonum_733(isodir->extent);
 	ip->i_size = isonum_733(isodir->size);
 	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
 
 	/*
 	 * Setup time stamp, attribute
 	 */
 	vp->v_type = VNON;
 	switch (imp->iso_ftype) {
 	default:	/* ISO_FTYPE_9660 */
 	    {
 		struct buf *bp2;
 		int off;
 		if ((imp->im_flags & ISOFSMNT_EXTATT)
 		    && (off = isonum_711(isodir->ext_attr_length)))
 			cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL,
 				     &bp2);
 		else
 			bp2 = NULL;
 		cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660);
 		cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660);
 		if (bp2)
 			brelse(bp2);
 		break;
 	    }
 	case ISO_FTYPE_RRIP:
 		cd9660_rrip_analyze(isodir, ip, imp);
 		break;
 	}
 
 	brelse(bp);
 
 	/*
 	 * Initialize the associated vnode
 	 */
 	switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
 	case VFIFO:
 		vp->v_op = &cd9660_fifoops;
 		break;
 	default:
 		VN_LOCK_ASHARE(vp);
 		break;
 	}
 
 	if (ip->iso_extent == imp->root_extent)
 		vp->v_vflag |= VV_ROOT;
 
 	/*
 	 * XXX need generation number?
 	 */
 
 	*vpp = vp;
 	return (0);
 }
Index: head/sys/fs/msdosfs/msdosfs_vfsops.c
===================================================================
--- head/sys/fs/msdosfs/msdosfs_vfsops.c	(revision 362157)
+++ head/sys/fs/msdosfs/msdosfs_vfsops.c	(revision 362158)
@@ -1,987 +1,985 @@
 /* $FreeBSD$ */
 /*	$NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/iconv.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
 
 #include <geom/geom.h>
 #include <geom/geom_vfs.h>
 
 #include <fs/msdosfs/bootsect.h>
 #include <fs/msdosfs/bpb.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
 #include <fs/msdosfs/msdosfsmount.h>
 
 #ifdef MSDOSFS_DEBUG
 #include <sys/rwlock.h>
 #endif
 
 static const char msdosfs_lock_msg[] = "fatlk";
 
 /* Mount options that we support. */
 static const char *msdosfs_opts[] = {
 	"async", "noatime", "noclusterr", "noclusterw",
 	"export", "force", "from", "sync",
 	"cs_dos", "cs_local", "cs_win", "dirmask",
 	"gid", "kiconv", "longname",
 	"longnames", "mask", "shortname", "shortnames",
 	"uid", "win95", "nowin95",
 	NULL
 };
 
 #if 1 /*def PC98*/
 /*
  * XXX - The boot signature formatted by NEC PC-98 DOS looks like a
  *       garbage or a random value :-{
  *       If you want to use that broken-signatured media, define the
  *       following symbol even though PC/AT.
  *       (ex. mount PC-98 DOS formatted FD on PC/AT)
  */
 #define	MSDOSFS_NOCHECKSIG
 #endif
 
 MALLOC_DEFINE(M_MSDOSFSMNT, "msdosfs_mount", "MSDOSFS mount structure");
 static MALLOC_DEFINE(M_MSDOSFSFAT, "msdosfs_fat", "MSDOSFS file allocation table");
 
 struct iconv_functions *msdosfs_iconv;
 
 static int	update_mp(struct mount *mp, struct thread *td);
 static int	mountmsdosfs(struct vnode *devvp, struct mount *mp);
 static vfs_fhtovp_t	msdosfs_fhtovp;
 static vfs_mount_t	msdosfs_mount;
 static vfs_root_t	msdosfs_root;
 static vfs_statfs_t	msdosfs_statfs;
 static vfs_sync_t	msdosfs_sync;
 static vfs_unmount_t	msdosfs_unmount;
 
 /* Maximum length of a character set name (arbitrary). */
 #define	MAXCSLEN	64
 
 static int
 update_mp(struct mount *mp, struct thread *td)
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	void *dos, *win, *local;
 	int error, v;
 
 	if (!vfs_getopt(mp->mnt_optnew, "kiconv", NULL, NULL)) {
 		if (msdosfs_iconv != NULL) {
 			error = vfs_getopt(mp->mnt_optnew,
 			    "cs_win", &win, NULL);
 			if (!error)
 				error = vfs_getopt(mp->mnt_optnew,
 				    "cs_local", &local, NULL);
 			if (!error)
 				error = vfs_getopt(mp->mnt_optnew,
 				    "cs_dos", &dos, NULL);
 			if (!error) {
 				msdosfs_iconv->open(win, local, &pmp->pm_u2w);
 				msdosfs_iconv->open(local, win, &pmp->pm_w2u);
 				msdosfs_iconv->open(dos, local, &pmp->pm_u2d);
 				msdosfs_iconv->open(local, dos, &pmp->pm_d2u);
 			}
 			if (error != 0)
 				return (error);
 		} else {
 			pmp->pm_w2u = NULL;
 			pmp->pm_u2w = NULL;
 			pmp->pm_d2u = NULL;
 			pmp->pm_u2d = NULL;
 		}
 	}
 
 	if (vfs_scanopt(mp->mnt_optnew, "gid", "%d", &v) == 1)
 		pmp->pm_gid = v;
 	if (vfs_scanopt(mp->mnt_optnew, "uid", "%d", &v) == 1)
 		pmp->pm_uid = v;
 	if (vfs_scanopt(mp->mnt_optnew, "mask", "%d", &v) == 1)
 		pmp->pm_mask = v & ALLPERMS;
 	if (vfs_scanopt(mp->mnt_optnew, "dirmask", "%d", &v) == 1)
 		pmp->pm_dirmask = v & ALLPERMS;
 	vfs_flagopt(mp->mnt_optnew, "shortname",
 	    &pmp->pm_flags, MSDOSFSMNT_SHORTNAME);
 	vfs_flagopt(mp->mnt_optnew, "shortnames",
 	    &pmp->pm_flags, MSDOSFSMNT_SHORTNAME);
 	vfs_flagopt(mp->mnt_optnew, "longname",
 	    &pmp->pm_flags, MSDOSFSMNT_LONGNAME);
 	vfs_flagopt(mp->mnt_optnew, "longnames",
 	    &pmp->pm_flags, MSDOSFSMNT_LONGNAME);
 	vfs_flagopt(mp->mnt_optnew, "kiconv",
 	    &pmp->pm_flags, MSDOSFSMNT_KICONV);
 
 	if (vfs_getopt(mp->mnt_optnew, "nowin95", NULL, NULL) == 0)
 		pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
 	else
 		pmp->pm_flags &= ~MSDOSFSMNT_NOWIN95;
 
 	if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 		pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 	else
 		pmp->pm_flags |= MSDOSFSMNT_LONGNAME;
 	return 0;
 }
 
 static int
 msdosfs_cmount(struct mntarg *ma, void *data, uint64_t flags)
 {
 	struct msdosfs_args args;
-	struct export_args exp;
 	int error;
 
 	if (data == NULL)
 		return (EINVAL);
 	error = copyin(data, &args, sizeof args);
 	if (error)
 		return (error);
-	vfs_oexport_conv(&args.export, &exp);
 
 	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
-	ma = mount_arg(ma, "export", &exp, sizeof(exp));
+	ma = mount_arg(ma, "export", &args.export, sizeof(args.export));
 	ma = mount_argf(ma, "uid", "%d", args.uid);
 	ma = mount_argf(ma, "gid", "%d", args.gid);
 	ma = mount_argf(ma, "mask", "%d", args.mask);
 	ma = mount_argf(ma, "dirmask", "%d", args.dirmask);
 
 	ma = mount_argb(ma, args.flags & MSDOSFSMNT_SHORTNAME, "noshortname");
 	ma = mount_argb(ma, args.flags & MSDOSFSMNT_LONGNAME, "nolongname");
 	ma = mount_argb(ma, !(args.flags & MSDOSFSMNT_NOWIN95), "nowin95");
 	ma = mount_argb(ma, args.flags & MSDOSFSMNT_KICONV, "nokiconv");
 
 	ma = mount_argsu(ma, "cs_win", args.cs_win, MAXCSLEN);
 	ma = mount_argsu(ma, "cs_dos", args.cs_dos, MAXCSLEN);
 	ma = mount_argsu(ma, "cs_local", args.cs_local, MAXCSLEN);
 
 	error = kernel_mount(ma, flags);
 
 	return (error);
 }
 
 /*
  * mp - path - addr in user space of mount point (ie /usr or whatever)
  * data - addr in user space of mount params including the name of the block
  * special file to treat as a filesystem.
  */
 static int
 msdosfs_mount(struct mount *mp)
 {
 	struct vnode *devvp;	  /* vnode for blk device to mount */
 	struct thread *td;
 	/* msdosfs specific mount control block */
 	struct msdosfsmount *pmp = NULL;
 	struct nameidata ndp;
 	int error, flags;
 	accmode_t accmode;
 	char *from;
 
 	td = curthread;
 	if (vfs_filteropt(mp->mnt_optnew, msdosfs_opts))
 		return (EINVAL);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		pmp = VFSTOMSDOSFS(mp);
 		if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) &&
 		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 			error = VFS_SYNC(mp, MNT_WAIT);
 			if (error)
 				return (error);
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			error = vflush(mp, 0, flags, td);
 			if (error)
 				return (error);
 
 			/*
 			 * Now the volume is clean.  Mark it so while the
 			 * device is still rw.
 			 */
 			error = markvoldirty(pmp, 0);
 			if (error) {
 				(void)markvoldirty(pmp, 1);
 				return (error);
 			}
 
 			/* Downgrade the device from rw to ro. */
 			g_topology_lock();
 			error = g_access(pmp->pm_cp, 0, -1, 0);
 			g_topology_unlock();
 			if (error) {
 				(void)markvoldirty(pmp, 1);
 				return (error);
 			}
 
 			/*
 			 * Backing out after an error was painful in the
 			 * above.  Now we are committed to succeeding.
 			 */
 			pmp->pm_fmod = 0;
 			pmp->pm_flags |= MSDOSFSMNT_RONLY;
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_RDONLY;
 			MNT_IUNLOCK(mp);
 		} else if ((pmp->pm_flags & MSDOSFSMNT_RONLY) &&
 		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			devvp = pmp->pm_devvp;
 			vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 			error = VOP_ACCESS(devvp, VREAD | VWRITE,
 			    td->td_ucred, td);
 			if (error)
 				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 			if (error) {
 				VOP_UNLOCK(devvp);
 				return (error);
 			}
 			VOP_UNLOCK(devvp);
 			g_topology_lock();
 			error = g_access(pmp->pm_cp, 0, 1, 0);
 			g_topology_unlock();
 			if (error)
 				return (error);
 
 			/* Now that the volume is modifiable, mark it dirty. */
 			error = markvoldirty_upgrade(pmp, true, true);
 			if (error) {
 				/*
 				 * If dirtying the superblock failed, drop GEOM
 				 * 'w' refs (we're still RO).
 				 */
 				g_topology_lock();
 				(void)g_access(pmp->pm_cp, 0, -1, 0);
 				g_topology_unlock();
 
 				return (error);
 			}
 
 			pmp->pm_fmod = 1;
 			pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_RDONLY;
 			MNT_IUNLOCK(mp);
 		}
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible disk device.
 	 */
 	if (vfs_getopt(mp->mnt_optnew, "from", (void **)&from, NULL))
 		return (EINVAL);
 	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, from, td);
 	error = namei(&ndp);
 	if (error)
 		return (error);
 	devvp = ndp.ni_vp;
 	NDFREE(&ndp, NDF_ONLY_PNBUF);
 
 	if (!vn_isdisk(devvp, &error)) {
 		vput(devvp);
 		return (error);
 	}
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	accmode = VREAD;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		accmode |= VWRITE;
 	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 	if (error)
 		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 	if (error) {
 		vput(devvp);
 		return (error);
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = mountmsdosfs(devvp, mp);
 #ifdef MSDOSFS_DEBUG		/* only needed for the printf below */
 		pmp = VFSTOMSDOSFS(mp);
 #endif
 	} else {
 		vput(devvp);
 		if (devvp != pmp->pm_devvp)
 			return (EINVAL);	/* XXX needs translation */
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 
 	error = update_mp(mp, td);
 	if (error) {
 		if ((mp->mnt_flag & MNT_UPDATE) == 0)
 			msdosfs_unmount(mp, MNT_FORCE);
 		return error;
 	}
 
 	vfs_mountedfrom(mp, from);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
 #endif
 	return (0);
 }
 
 static int
 mountmsdosfs(struct vnode *devvp, struct mount *mp)
 {
 	struct msdosfsmount *pmp;
 	struct buf *bp;
 	struct cdev *dev;
 	union bootsector *bsp;
 	struct byte_bpb33 *b33;
 	struct byte_bpb50 *b50;
 	struct byte_bpb710 *b710;
 	uint8_t SecPerClust;
 	u_long clusters;
 	int ronly, error;
 	struct g_consumer *cp;
 	struct bufobj *bo;
 
 	bp = NULL;		/* This and pmp both used in error_exit. */
 	pmp = NULL;
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 
 	dev = devvp->v_rdev;
 	if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
 	    (uintptr_t)mp) == 0) {
 		VOP_UNLOCK(devvp);
 		return (EBUSY);
 	}
 	g_topology_lock();
 	error = g_vfs_open(devvp, &cp, "msdosfs", ronly ? 0 : 1);
 	g_topology_unlock();
 	if (error != 0) {
 		atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
 		VOP_UNLOCK(devvp);
 		return (error);
 	}
 	dev_ref(dev);
 	bo = &devvp->v_bufobj;
 	VOP_UNLOCK(devvp);
 	if (dev->si_iosize_max != 0)
 		mp->mnt_iosize_max = dev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 
 	/*
 	 * Read the boot sector of the filesystem, and then check the
 	 * boot signature.  If not a dos boot sector then error out.
 	 *
 	 * NOTE: 8192 is a magic size that works for ffs.
 	 */
 	error = bread(devvp, 0, 8192, NOCRED, &bp);
 	if (error)
 		goto error_exit;
 	bp->b_flags |= B_AGE;
 	bsp = (union bootsector *)bp->b_data;
 	b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB;
 	b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB;
 	b710 = (struct byte_bpb710 *)bsp->bs710.bsBPB;
 
 #ifndef MSDOSFS_NOCHECKSIG
 	if (bsp->bs50.bsBootSectSig0 != BOOTSIG0
 	    || bsp->bs50.bsBootSectSig1 != BOOTSIG1) {
 		error = EINVAL;
 		goto error_exit;
 	}
 #endif
 
 	pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK | M_ZERO);
 	pmp->pm_mountp = mp;
 	pmp->pm_cp = cp;
 	pmp->pm_bo = bo;
 
 	lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0);
 
 	/*
 	 * Initialize ownerships and permissions, since nothing else will
 	 * initialize them iff we are mounting root.
 	 */
 	pmp->pm_uid = UID_ROOT;
 	pmp->pm_gid = GID_WHEEL;
 	pmp->pm_mask = pmp->pm_dirmask = S_IXUSR | S_IXGRP | S_IXOTH |
 	    S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR;
 
 	/*
 	 * Compute several useful quantities from the bpb in the
 	 * bootsector.  Copy in the dos 5 variant of the bpb then fix up
 	 * the fields that are different between dos 5 and dos 3.3.
 	 */
 	SecPerClust = b50->bpbSecPerClust;
 	pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec);
 	if (pmp->pm_BytesPerSec < DEV_BSIZE) {
 		error = EINVAL;
 		goto error_exit;
 	}
 	pmp->pm_ResSectors = getushort(b50->bpbResSectors);
 	pmp->pm_FATs = b50->bpbFATs;
 	pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts);
 	pmp->pm_Sectors = getushort(b50->bpbSectors);
 	pmp->pm_FATsecs = getushort(b50->bpbFATsecs);
 	pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack);
 	pmp->pm_Heads = getushort(b50->bpbHeads);
 	pmp->pm_Media = b50->bpbMedia;
 
 	/* calculate the ratio of sector size to DEV_BSIZE */
 	pmp->pm_BlkPerSec = pmp->pm_BytesPerSec / DEV_BSIZE;
 
 	/*
 	 * We don't check pm_Heads nor pm_SecPerTrack, because
 	 * these may not be set for EFI file systems. We don't
 	 * use these anyway, so we're unaffected if they are
 	 * invalid.
 	 */
 	if (!pmp->pm_BytesPerSec || !SecPerClust) {
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	if (pmp->pm_Sectors == 0) {
 		pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
 		pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);
 	} else {
 		pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
 		pmp->pm_HugeSectors = pmp->pm_Sectors;
 	}
 
 	if (pmp->pm_RootDirEnts == 0) {
 		if (pmp->pm_FATsecs
 		    || getushort(b710->bpbFSVers)) {
 			error = EINVAL;
 #ifdef MSDOSFS_DEBUG
 			printf("mountmsdosfs(): bad FAT32 filesystem\n");
 #endif
 			goto error_exit;
 		}
 		pmp->pm_fatmask = FAT32_MASK;
 		pmp->pm_fatmult = 4;
 		pmp->pm_fatdiv = 1;
 		pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs);
 		if (getushort(b710->bpbExtFlags) & FATMIRROR)
 			pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM;
 		else
 			pmp->pm_flags |= MSDOSFS_FATMIRROR;
 	} else
 		pmp->pm_flags |= MSDOSFS_FATMIRROR;
 
 	/*
 	 * Check a few values (could do some more):
 	 * - logical sector size: power of 2, >= block size
 	 * - sectors per cluster: power of 2, >= 1
 	 * - number of sectors:   >= 1, <= size of partition
 	 * - number of FAT sectors: >= 1
 	 */
 	if ( (SecPerClust == 0)
 	  || (SecPerClust & (SecPerClust - 1))
 	  || (pmp->pm_BytesPerSec < DEV_BSIZE)
 	  || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1))
 	  || (pmp->pm_HugeSectors == 0)
 	  || (pmp->pm_FATsecs == 0)
 	  || (SecPerClust * pmp->pm_BlkPerSec > MAXBSIZE / DEV_BSIZE)
 	) {
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	pmp->pm_HugeSectors *= pmp->pm_BlkPerSec;
 	pmp->pm_HiddenSects *= pmp->pm_BlkPerSec;	/* XXX not used? */
 	pmp->pm_FATsecs     *= pmp->pm_BlkPerSec;
 	SecPerClust         *= pmp->pm_BlkPerSec;
 
 	pmp->pm_fatblk = pmp->pm_ResSectors * pmp->pm_BlkPerSec;
 
 	if (FAT32(pmp)) {
 		pmp->pm_rootdirblk = getulong(b710->bpbRootClust);
 		pmp->pm_firstcluster = pmp->pm_fatblk
 			+ (pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_fsinfo = getushort(b710->bpbFSInfo) * pmp->pm_BlkPerSec;
 	} else {
 		pmp->pm_rootdirblk = pmp->pm_fatblk +
 			(pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_rootdirsize = howmany(pmp->pm_RootDirEnts *
 			sizeof(struct direntry), DEV_BSIZE); /* in blocks */
 		pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize;
 	}
 
 	pmp->pm_maxcluster = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
 	    SecPerClust + 1;
 	pmp->pm_fatsize = pmp->pm_FATsecs * DEV_BSIZE;	/* XXX not used? */
 
 	if (pmp->pm_fatmask == 0) {
 		if (pmp->pm_maxcluster
 		    <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) {
 			/*
 			 * This will usually be a floppy disk. This size makes
 			 * sure that one FAT entry will not be split across
 			 * multiple blocks.
 			 */
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	}
 
 	clusters = (pmp->pm_fatsize / pmp->pm_fatmult) * pmp->pm_fatdiv;
 	if (pmp->pm_maxcluster >= clusters) {
 #ifdef MSDOSFS_DEBUG
 		printf("Warning: number of clusters (%ld) exceeds FAT "
 		    "capacity (%ld)\n", pmp->pm_maxcluster + 1, clusters);
 #endif
 		pmp->pm_maxcluster = clusters - 1;
 	}
 
 	if (FAT12(pmp))
 		pmp->pm_fatblocksize = 3 * 512;
 	else
 		pmp->pm_fatblocksize = PAGE_SIZE;
 	pmp->pm_fatblocksize = roundup(pmp->pm_fatblocksize,
 	    pmp->pm_BytesPerSec);
 	pmp->pm_fatblocksec = pmp->pm_fatblocksize / DEV_BSIZE;
 	pmp->pm_bnshift = ffs(DEV_BSIZE) - 1;
 
 	/*
 	 * Compute mask and shift value for isolating cluster relative byte
 	 * offsets and cluster numbers from a file offset.
 	 */
 	pmp->pm_bpcluster = SecPerClust * DEV_BSIZE;
 	pmp->pm_crbomask = pmp->pm_bpcluster - 1;
 	pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1;
 
 	/*
 	 * Check for valid cluster size
 	 * must be a power of 2
 	 */
 	if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) {
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	/*
 	 * Release the bootsector buffer.
 	 */
 	brelse(bp);
 	bp = NULL;
 
 	/*
 	 * Check the fsinfo sector if we have one.  Silently fix up our
 	 * in-core copy of fp->fsinxtfree if it is unknown (0xffffffff)
 	 * or too large.  Ignore fp->fsinfree for now, since we need to
 	 * read the entire FAT anyway to fill the inuse map.
 	 */
 	if (pmp->pm_fsinfo) {
 		struct fsinfo *fp;
 
 		if ((error = bread(devvp, pmp->pm_fsinfo, pmp->pm_BytesPerSec,
 		    NOCRED, &bp)) != 0)
 			goto error_exit;
 		fp = (struct fsinfo *)bp->b_data;
 		if (!bcmp(fp->fsisig1, "RRaA", 4)
 		    && !bcmp(fp->fsisig2, "rrAa", 4)
 		    && !bcmp(fp->fsisig3, "\0\0\125\252", 4)) {
 			pmp->pm_nxtfree = getulong(fp->fsinxtfree);
 			if (pmp->pm_nxtfree > pmp->pm_maxcluster)
 				pmp->pm_nxtfree = CLUST_FIRST;
 		} else
 			pmp->pm_fsinfo = 0;
 		brelse(bp);
 		bp = NULL;
 	}
 
 	/*
 	 * Finish initializing pmp->pm_nxtfree (just in case the first few
 	 * sectors aren't properly reserved in the FAT).  This completes
 	 * the fixup for fp->fsinxtfree, and fixes up the zero-initialized
 	 * value if there is no fsinfo.  We will use pmp->pm_nxtfree
 	 * internally even if there is no fsinfo.
 	 */
 	if (pmp->pm_nxtfree < CLUST_FIRST)
 		pmp->pm_nxtfree = CLUST_FIRST;
 
 	/*
 	 * Allocate memory for the bitmap of allocated clusters, and then
 	 * fill it in.
 	 */
 	pmp->pm_inusemap = malloc(howmany(pmp->pm_maxcluster + 1, N_INUSEBITS)
 				  * sizeof(*pmp->pm_inusemap),
 				  M_MSDOSFSFAT, M_WAITOK);
 
 	/*
 	 * fillinusemap() needs pm_devvp.
 	 */
 	pmp->pm_devvp = devvp;
 	pmp->pm_dev = dev;
 
 	/*
 	 * Have the inuse map filled in.
 	 */
 	MSDOSFS_LOCK_MP(pmp);
 	error = fillinusemap(pmp);
 	MSDOSFS_UNLOCK_MP(pmp);
 	if (error != 0)
 		goto error_exit;
 
 	/*
 	 * If they want FAT updates to be synchronous then let them suffer
 	 * the performance degradation in exchange for the on disk copy of
 	 * the FAT being correct just about all the time.  I suppose this
 	 * would be a good thing to turn on if the kernel is still flakey.
 	 */
 	if (mp->mnt_flag & MNT_SYNCHRONOUS)
 		pmp->pm_flags |= MSDOSFSMNT_WAITONFAT;
 
 	/*
 	 * Finish up.
 	 */
 	if (ronly)
 		pmp->pm_flags |= MSDOSFSMNT_RONLY;
 	else {
 		if ((error = markvoldirty(pmp, 1)) != 0)
 			goto error_exit;
 		pmp->pm_fmod = 1;
 	}
 	mp->mnt_data =  pmp;
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
 	mp->mnt_kern_flag |= MNTK_USES_BCACHE | MNTK_NO_IOPF;
 	MNT_IUNLOCK(mp);
 
 	return (0);
 
 error_exit:
 	if (bp)
 		brelse(bp);
 	if (cp != NULL) {
 		g_topology_lock();
 		g_vfs_close(cp);
 		g_topology_unlock();
 	}
 	if (pmp) {
 		lockdestroy(&pmp->pm_fatlock);
 		free(pmp->pm_inusemap, M_MSDOSFSFAT);
 		free(pmp, M_MSDOSFSMNT);
 		mp->mnt_data = NULL;
 	}
 	atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
 	dev_rel(dev);
 	return (error);
 }
 
 /*
  * Unmount the filesystem described by mp.
  */
 static int
 msdosfs_unmount(struct mount *mp, int mntflags)
 {
 	struct msdosfsmount *pmp;
 	int error, flags;
 
 	error = flags = 0;
 	pmp = VFSTOMSDOSFS(mp);
 	if ((pmp->pm_flags & MSDOSFSMNT_RONLY) == 0)
 		error = msdosfs_sync(mp, MNT_WAIT);
 	if ((mntflags & MNT_FORCE) != 0)
 		flags |= FORCECLOSE;
 	else if (error != 0)
 		return (error);
 	error = vflush(mp, 0, flags, curthread);
 	if (error != 0 && error != ENXIO)
 		return (error);
 	if ((pmp->pm_flags & MSDOSFSMNT_RONLY) == 0) {
 		error = markvoldirty(pmp, 0);
 		if (error && error != ENXIO) {
 			(void)markvoldirty(pmp, 1);
 			return (error);
 		}
 	}
 	if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
 		if (pmp->pm_w2u)
 			msdosfs_iconv->close(pmp->pm_w2u);
 		if (pmp->pm_u2w)
 			msdosfs_iconv->close(pmp->pm_u2w);
 		if (pmp->pm_d2u)
 			msdosfs_iconv->close(pmp->pm_d2u);
 		if (pmp->pm_u2d)
 			msdosfs_iconv->close(pmp->pm_u2d);
 	}
 
 #ifdef MSDOSFS_DEBUG
 	{
 		struct vnode *vp = pmp->pm_devvp;
 		struct bufobj *bo;
 
 		bo = &vp->v_bufobj;
 		BO_LOCK(bo);
 		VI_LOCK(vp);
 		vn_printf(vp,
 		    "msdosfs_umount(): just before calling VOP_CLOSE()\n");
 		printf("freef %p, freeb %p, mount %p\n",
 		    TAILQ_NEXT(vp, v_vnodelist), vp->v_vnodelist.tqe_prev,
 		    vp->v_mount);
 		printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
 		    TAILQ_FIRST(&vp->v_bufobj.bo_clean.bv_hd),
 		    TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd),
 		    vp->v_bufobj.bo_numoutput, vp->v_type);
 		VI_UNLOCK(vp);
 		BO_UNLOCK(bo);
 	}
 #endif
 	g_topology_lock();
 	g_vfs_close(pmp->pm_cp);
 	g_topology_unlock();
 	atomic_store_rel_ptr((uintptr_t *)&pmp->pm_dev->si_mountpt, 0);
 	vrele(pmp->pm_devvp);
 	dev_rel(pmp->pm_dev);
 	free(pmp->pm_inusemap, M_MSDOSFSFAT);
 	lockdestroy(&pmp->pm_fatlock);
 	free(pmp, M_MSDOSFSMNT);
 	mp->mnt_data = NULL;
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	return (error);
 }
 
 static int
 msdosfs_root(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct denode *ndep;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp);
 #endif
 	error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep);
 	if (error)
 		return (error);
 	*vpp = DETOV(ndep);
 	return (0);
 }
 
 static int
 msdosfs_statfs(struct mount *mp, struct statfs *sbp)
 {
 	struct msdosfsmount *pmp;
 
 	pmp = VFSTOMSDOSFS(mp);
 	sbp->f_bsize = pmp->pm_bpcluster;
 	sbp->f_iosize = pmp->pm_bpcluster;
 	sbp->f_blocks = pmp->pm_maxcluster + 1;
 	sbp->f_bfree = pmp->pm_freeclustercount;
 	sbp->f_bavail = pmp->pm_freeclustercount;
 	sbp->f_files = pmp->pm_RootDirEnts;	/* XXX */
 	sbp->f_ffree = 0;	/* what to put in here? */
 	return (0);
 }
 
 /*
  * If we have an FSInfo block, update it.
  */
 static int
 msdosfs_fsiflush(struct msdosfsmount *pmp, int waitfor)
 {
 	struct fsinfo *fp;
 	struct buf *bp;
 	int error;
 
 	MSDOSFS_LOCK_MP(pmp);
 	if (pmp->pm_fsinfo == 0 || (pmp->pm_flags & MSDOSFS_FSIMOD) == 0) {
 		error = 0;
 		goto unlock;
 	}
 	error = bread(pmp->pm_devvp, pmp->pm_fsinfo, pmp->pm_BytesPerSec,
 	    NOCRED, &bp);
 	if (error != 0) {
 		goto unlock;
 	}
 	fp = (struct fsinfo *)bp->b_data;
 	putulong(fp->fsinfree, pmp->pm_freeclustercount);
 	putulong(fp->fsinxtfree, pmp->pm_nxtfree);
 	pmp->pm_flags &= ~MSDOSFS_FSIMOD;
 	if (waitfor == MNT_WAIT)
 		error = bwrite(bp);
 	else
 		bawrite(bp);
 unlock:
 	MSDOSFS_UNLOCK_MP(pmp);
 	return (error);
 }
 
 static int
 msdosfs_sync(struct mount *mp, int waitfor)
 {
 	struct vnode *vp, *nvp;
 	struct thread *td;
 	struct denode *dep;
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error, allerror = 0;
 
 	td = curthread;
 
 	/*
 	 * If we ever switch to not updating all of the FATs all the time,
 	 * this would be the place to update them from the first one.
 	 */
 	if (pmp->pm_fmod != 0) {
 		if (pmp->pm_flags & MSDOSFSMNT_RONLY)
 			panic("msdosfs_sync: rofs mod");
 		else {
 			/* update FATs here */
 		}
 	}
 	/*
 	 * Write back each (modified) denode.
 	 */
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, nvp) {
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		dep = VTODE(vp);
 		if ((dep->de_flag &
 		    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 &&
 		    (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
 		if (error) {
 			if (error == ENOENT) {
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, nvp);
 				goto loop;
 			}
 			continue;
 		}
 		error = VOP_FSYNC(vp, waitfor, td);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(vp);
 		vrele(vp);
 	}
 
 	/*
 	 * Flush filesystem control info.
 	 */
 	if (waitfor != MNT_LAZY) {
 		vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(pmp->pm_devvp, waitfor, td);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(pmp->pm_devvp);
 	}
 
 	error = msdosfs_fsiflush(pmp, waitfor);
 	if (error != 0)
 		allerror = error;
 	return (allerror);
 }
 
 static int
 msdosfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp)
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct defid *defhp = (struct defid *) fhp;
 	struct denode *dep;
 	int error;
 
 	error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	*vpp = DETOV(dep);
 	vnode_create_vobject(*vpp, dep->de_FileSize, curthread);
 	return (0);
 }
 
 static struct vfsops msdosfs_vfsops = {
 	.vfs_fhtovp =		msdosfs_fhtovp,
 	.vfs_mount =		msdosfs_mount,
 	.vfs_cmount =		msdosfs_cmount,
 	.vfs_root =		msdosfs_root,
 	.vfs_statfs =		msdosfs_statfs,
 	.vfs_sync =		msdosfs_sync,
 	.vfs_unmount =		msdosfs_unmount,
 };
 
 VFS_SET(msdosfs_vfsops, msdosfs, 0);
 MODULE_VERSION(msdosfs, 1);
Index: head/sys/fs/nfs/nfsdport.h
===================================================================
--- head/sys/fs/nfs/nfsdport.h	(revision 362157)
+++ head/sys/fs/nfs/nfsdport.h	(revision 362158)
@@ -1,124 +1,124 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2009 Rick Macklem, University of Guelph
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * These macros handle nfsvattr fields. They look a bit silly here, but
  * are quite different for the Darwin port.
  */
 #define	NFSVNO_ATTRINIT(n)		(VATTR_NULL(&((n)->na_vattr)))
 #define	NFSVNO_SETATTRVAL(n, f, v)	((n)->na_##f = (v))
 #define	NFSVNO_SETACTIVE(n, f)
 #define	NFSVNO_UNSET(n, f)		((n)->na_##f = VNOVAL)
 #define	NFSVNO_NOTSETMODE(n)		((n)->na_mode == ((mode_t)VNOVAL))
 #define	NFSVNO_ISSETMODE(n)		((n)->na_mode != ((mode_t)VNOVAL))
 #define	NFSVNO_NOTSETUID(n)		((n)->na_uid == ((uid_t)VNOVAL))
 #define	NFSVNO_ISSETUID(n)		((n)->na_uid != ((uid_t)VNOVAL))
 #define	NFSVNO_NOTSETGID(n)		((n)->na_gid == ((gid_t)VNOVAL))
 #define	NFSVNO_ISSETGID(n)		((n)->na_gid != ((gid_t)VNOVAL))
 #define	NFSVNO_NOTSETSIZE(n)		((n)->na_size == VNOVAL)
 #define	NFSVNO_ISSETSIZE(n)		((n)->na_size != VNOVAL)
 #define	NFSVNO_NOTSETATIME(n)		((n)->na_atime.tv_sec == VNOVAL)
 #define	NFSVNO_ISSETATIME(n)		((n)->na_atime.tv_sec != VNOVAL)
 #define	NFSVNO_NOTSETMTIME(n)		((n)->na_mtime.tv_sec == VNOVAL)
 #define	NFSVNO_ISSETMTIME(n)		((n)->na_mtime.tv_sec != VNOVAL)
 
 /*
  * This structure acts as a "catch-all" for information that
  * needs to be returned by nfsd_fhtovp().
  */
 struct nfsexstuff {
-	int	nes_exflag;			/* export flags */
+	uint64_t nes_exflag;			/* export flags */
 	int	nes_numsecflavor;		/* # of security flavors */
 	int	nes_secflavors[MAXSECFLAVORS];	/* and the flavors */
 };
 
 /*
  * These are NO-OPS for BSD until Isilon upstreams EXITCODE support.
  * EXITCODE is an in-memory ring buffer that holds the routines failing status.
  * This is a valuable tool to use when debugging and analyzing issues.
  * In addition to recording a routine's failing status, it offers
  * logging of routines for call stack tracing.
  * EXITCODE should be used only in routines that return a true errno value, as
  * that value will be formatted to a displayable errno string.  Routines that 
  * return regular int status that are not true errno should not set EXITCODE.
  * If you want to log routine tracing, you can add EXITCODE(0) to any routine.
  * NFS extended the EXITCODE with EXITCODE2 to record either the routine's
  * exit errno status or the nd_repstat.
  */
 #define	NFSEXITCODE(error)
 #define	NFSEXITCODE2(error, nd)
 
 #define	NFSVNO_EXINIT(e)		((e)->nes_exflag = 0)
 #define	NFSVNO_EXPORTED(e)		((e)->nes_exflag & MNT_EXPORTED)
 #define	NFSVNO_EXRDONLY(e)		((e)->nes_exflag & MNT_EXRDONLY)
 #define	NFSVNO_EXPORTANON(e)		((e)->nes_exflag & MNT_EXPORTANON)
 #define	NFSVNO_EXSTRICTACCESS(e)	((e)->nes_exflag & MNT_EXSTRICTACCESS)
 #define	NFSVNO_EXV4ONLY(e)		((e)->nes_exflag & MNT_EXV4ONLY)
 
 #define	NFSVNO_SETEXRDONLY(e)	((e)->nes_exflag = (MNT_EXPORTED|MNT_EXRDONLY))
 
 #define	NFSVNO_CMPFH(f1, f2)						\
     (fsidcmp(&(f1)->fh_fsid, &(f2)->fh_fsid) == 0 &&			\
      bcmp(&(f1)->fh_fid, &(f2)->fh_fid, sizeof(struct fid)) == 0)
 
 #define	NFSLOCKHASH(f) 							\
 	(&nfslockhash[nfsrv_hashfh(f) % nfsrv_lockhashsize])
 
 #define	NFSFPVNODE(f)	((struct vnode *)((f)->f_data))
 #define	NFSFPCRED(f)	((f)->f_cred)
 #define	NFSFPFLAG(f)	((f)->f_flag)
 
 #define	NFSNAMEICNDSET(n, c, o, f)	do {				\
 	(n)->cn_cred = (c);						\
 	(n)->cn_nameiop = (o);						\
 	(n)->cn_flags = (f);						\
     } while (0)
 
 /*
  * A little bit of Darwin vfs kpi.
  */
 #define	vnode_mount(v)	((v)->v_mount)
 #define	vfs_statfs(m)	(&((m)->mnt_stat))
 
 #define	NFSPATHLEN_T	size_t
 
 /*
  * These are set to the minimum and maximum size of a server file
  * handle.
  */
 #define	NFSRV_MINFH	(sizeof (fhandle_t))
 #define	NFSRV_MAXFH	(sizeof (fhandle_t))
 
 /* Use this macro for debug printfs. */
 #define	NFSD_DEBUG(level, ...)	do {					\
 		if (nfsd_debuglevel >= (level))				\
 			printf(__VA_ARGS__);				\
 	} while (0)
 
Index: head/sys/fs/nfs/nfsport.h
===================================================================
--- head/sys/fs/nfs/nfsport.h	(revision 362157)
+++ head/sys/fs/nfs/nfsport.h	(revision 362158)
@@ -1,1195 +1,1200 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _NFS_NFSPORT_H_
 #define	_NFS_NFSPORT_H_
 
 /*
  * In general, I'm not fond of #includes in .h files, but this seems
  * to be the cleanest way to handle #include files for the ports.
  */
 #ifdef _KERNEL
 #include <sys/unistd.h>
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/domain.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/reboot.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/stat.h>
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/time.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/acl.h>
 #include <sys/module.h>
 #include <sys/sysent.h>
 #include <sys/syscall.h>
 #include <sys/priv.h>
 #include <sys/kthread.h>
 #include <sys/syscallsubr.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/radix.h>
 #include <net/route.h>
 #include <net/if_dl.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <machine/in_cksum.h>
 #include <sys/md5.h>
 #include <rpc/rpc.h>
 #include <rpc/rpcsec_gss.h>
 
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/ufsmount.h>
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <nfs/nfssvc.h>
 #include "opt_nfs.h"
 #include "opt_ufs.h"
 
 /*
  * These types must be defined before the nfs includes.
  */
 #define	NFSSOCKADDR_T	struct sockaddr *
 #define	NFSPROC_T	struct thread
 #define	NFSDEV_T	dev_t
 #define	NFSSVCARGS	nfssvc_args
 #define	NFSACL_T	struct acl
 
 /*
  * These should be defined as the types used for the corresponding VOP's
  * argument type.
  */
 #define	NFS_ACCESS_ARGS		struct vop_access_args
 #define	NFS_OPEN_ARGS		struct vop_open_args
 #define	NFS_GETATTR_ARGS	struct vop_getattr_args
 #define	NFS_LOOKUP_ARGS		struct vop_lookup_args
 #define	NFS_READDIR_ARGS	struct vop_readdir_args
 
 /*
  * Allocate mbufs. Must succeed and never set the mbuf ptr to NULL.
  */
 #define	NFSMGET(m)	do { 					\
 		MGET((m), M_WAITOK, MT_DATA); 			\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
 			MGET((m), M_WAITOK, MT_DATA); 		\
 		} 						\
 	} while (0)
 #define	NFSMGETHDR(m)	do { 					\
 		MGETHDR((m), M_WAITOK, MT_DATA);		\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
 			MGETHDR((m), M_WAITOK, MT_DATA); 	\
 		} 						\
 	} while (0)
 #define	NFSMCLGET(m, w)	do { 					\
 		MGET((m), M_WAITOK, MT_DATA); 			\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
 			MGET((m), M_WAITOK, MT_DATA); 		\
 		} 						\
 		MCLGET((m), (w));				\
 	} while (0)
 #define	NFSMCLGETHDR(m, w) do { 				\
 		MGETHDR((m), M_WAITOK, MT_DATA);		\
 		while ((m) == NULL ) { 				\
 			(void) nfs_catnap(PZERO, 0, "nfsmget");	\
 			MGETHDR((m), M_WAITOK, MT_DATA); 	\
 		} 						\
 	} while (0)
 
 /*
  * Client side constant for size of a lockowner name.
  */
 #define	NFSV4CL_LOCKNAMELEN	12
 
 /*
  * Type for a mutex lock.
  */
 #define	NFSMUTEX_T		struct mtx
 
 #endif	/* _KERNEL */
 
 /*
  * NFSv4 Operation numbers.
  */
 #define	NFSV4OP_ACCESS		3
 #define	NFSV4OP_CLOSE		4
 #define	NFSV4OP_COMMIT		5
 #define	NFSV4OP_CREATE		6
 #define	NFSV4OP_DELEGPURGE	7
 #define	NFSV4OP_DELEGRETURN	8
 #define	NFSV4OP_GETATTR		9
 #define	NFSV4OP_GETFH		10
 #define	NFSV4OP_LINK		11
 #define	NFSV4OP_LOCK		12
 #define	NFSV4OP_LOCKT		13
 #define	NFSV4OP_LOCKU		14
 #define	NFSV4OP_LOOKUP		15
 #define	NFSV4OP_LOOKUPP		16
 #define	NFSV4OP_NVERIFY		17
 #define	NFSV4OP_OPEN		18
 #define	NFSV4OP_OPENATTR	19
 #define	NFSV4OP_OPENCONFIRM	20
 #define	NFSV4OP_OPENDOWNGRADE	21
 #define	NFSV4OP_PUTFH		22
 #define	NFSV4OP_PUTPUBFH	23
 #define	NFSV4OP_PUTROOTFH	24
 #define	NFSV4OP_READ		25
 #define	NFSV4OP_READDIR		26
 #define	NFSV4OP_READLINK	27
 #define	NFSV4OP_REMOVE		28
 #define	NFSV4OP_RENAME		29
 #define	NFSV4OP_RENEW		30
 #define	NFSV4OP_RESTOREFH	31
 #define	NFSV4OP_SAVEFH		32
 #define	NFSV4OP_SECINFO		33
 #define	NFSV4OP_SETATTR		34
 #define	NFSV4OP_SETCLIENTID	35
 #define	NFSV4OP_SETCLIENTIDCFRM	36
 #define	NFSV4OP_VERIFY		37
 #define	NFSV4OP_WRITE		38
 #define	NFSV4OP_RELEASELCKOWN	39
 
 /*
  * Must be one greater than the last Operation#.
  */
 #define	NFSV4OP_NOPS		40
 
 /*
  * Additional Ops for NFSv4.1.
  */
 #define	NFSV4OP_BACKCHANNELCTL	40
 #define	NFSV4OP_BINDCONNTOSESS	41
 #define	NFSV4OP_EXCHANGEID	42
 #define	NFSV4OP_CREATESESSION	43
 #define	NFSV4OP_DESTROYSESSION	44
 #define	NFSV4OP_FREESTATEID	45
 #define	NFSV4OP_GETDIRDELEG	46
 #define	NFSV4OP_GETDEVINFO	47
 #define	NFSV4OP_GETDEVLIST	48
 #define	NFSV4OP_LAYOUTCOMMIT	49
 #define	NFSV4OP_LAYOUTGET	50
 #define	NFSV4OP_LAYOUTRETURN	51
 #define	NFSV4OP_SECINFONONAME	52
 #define	NFSV4OP_SEQUENCE	53
 #define	NFSV4OP_SETSSV		54
 #define	NFSV4OP_TESTSTATEID	55
 #define	NFSV4OP_WANTDELEG	56
 #define	NFSV4OP_DESTROYCLIENTID	57
 #define	NFSV4OP_RECLAIMCOMPL	58
 
 /*
  * Must be one more than last op#.
  */
 #define	NFSV41_NOPS		59
 
 /*
  * Additional operations for NFSv4.2.
  */
 #define	NFSV4OP_ALLOCATE	59
 #define	NFSV4OP_COPY		60
 #define	NFSV4OP_COPYNOTIFY	61
 #define	NFSV4OP_DEALLOCATE	62
 #define	NFSV4OP_IOADVISE	63
 #define	NFSV4OP_LAYOUTERROR	64
 #define	NFSV4OP_LAYOUTSTATS	65
 #define	NFSV4OP_OFFLOADCANCEL	66
 #define	NFSV4OP_OFFLOADSTATUS	67
 #define	NFSV4OP_READPLUS	68
 #define	NFSV4OP_SEEK		69
 #define	NFSV4OP_WRITESAME	70
 #define	NFSV4OP_CLONE		71
 
 /* One greater than the last Operation # defined in RFC-7862. */
 #define	NFSV42_PURENOPS		72
 
 /* and the optional Extended attribute operations (RFC-8276). */
 #define	NFSV4OP_GETXATTR	72
 #define	NFSV4OP_SETXATTR	73
 #define	NFSV4OP_LISTXATTRS	74
 #define	NFSV4OP_REMOVEXATTR	75
 
 /*
  * Must be one more than the last NFSv4.2 op#.
  */
 #define	NFSV42_NOPS		76
 
 /* Quirky case if the illegal op code */
 #define	NFSV4OP_OPILLEGAL	10044
 
 /*
  * Fake NFSV4OP_xxx used for nfsstat. Start at NFSV42_NOPS.
  */
 #define	NFSV4OP_SYMLINK		(NFSV42_NOPS)
 #define	NFSV4OP_MKDIR		(NFSV42_NOPS + 1)
 #define	NFSV4OP_RMDIR		(NFSV42_NOPS + 2)
 #define	NFSV4OP_READDIRPLUS	(NFSV42_NOPS + 3)
 #define	NFSV4OP_MKNOD		(NFSV42_NOPS + 4)
 #define	NFSV4OP_FSSTAT		(NFSV42_NOPS + 5)
 #define	NFSV4OP_FSINFO		(NFSV42_NOPS + 6)
 #define	NFSV4OP_PATHCONF	(NFSV42_NOPS + 7)
 #define	NFSV4OP_V3CREATE	(NFSV42_NOPS + 8)
 
 /*
  * This is the count of the fake operations listed above.
  */
 #define	NFSV4OP_FAKENOPS	9
 
 /*
  * and the Callback OPs
  */
 #define	NFSV4OP_CBGETATTR	3
 #define	NFSV4OP_CBRECALL	4
 
 /*
  * Must be one greater than the last Callback Operation# for NFSv4.0.
  */
 #define	NFSV4OP_CBNOPS		5
 
 /*
  * Additional Callback Ops for NFSv4.1 only.
  */
 #define	NFSV4OP_CBLAYOUTRECALL	5
 #define	NFSV4OP_CBNOTIFY	6
 #define	NFSV4OP_CBPUSHDELEG	7
 #define	NFSV4OP_CBRECALLANY	8
 #define	NFSV4OP_CBRECALLOBJAVAIL 9
 #define	NFSV4OP_CBRECALLSLOT	10
 #define	NFSV4OP_CBSEQUENCE	11
 #define	NFSV4OP_CBWANTCANCELLED	12
 #define	NFSV4OP_CBNOTIFYLOCK	13
 #define	NFSV4OP_CBNOTIFYDEVID	14
 
 #define	NFSV41_CBNOPS		15
 
 /*
  * Additional callback operations for NFSv4.2.
  */
 #define	NFSV4OP_CBOFFLOAD	15
 
 #define	NFSV42_CBNOPS		16
 
 /*
  * The lower numbers -> 21 are used by NFSv2 and v3. These define higher
  * numbers used by NFSv4.
  * NFS_V3NPROCS is one greater than the last V3 op and NFS_NPROCS is
  * one greater than the last number.
  */
 #ifndef	NFS_V3NPROCS
 #define	NFS_V3NPROCS		22
 
 #define	NFSPROC_LOOKUPP		22
 #define	NFSPROC_SETCLIENTID	23
 #define	NFSPROC_SETCLIENTIDCFRM	24
 #define	NFSPROC_LOCK		25
 #define	NFSPROC_LOCKU		26
 #define	NFSPROC_OPEN		27
 #define	NFSPROC_CLOSE		28
 #define	NFSPROC_OPENCONFIRM	29
 #define	NFSPROC_LOCKT		30
 #define	NFSPROC_OPENDOWNGRADE	31
 #define	NFSPROC_RENEW		32
 #define	NFSPROC_PUTROOTFH	33
 #define	NFSPROC_RELEASELCKOWN	34
 #define	NFSPROC_DELEGRETURN	35
 #define	NFSPROC_RETDELEGREMOVE	36
 #define	NFSPROC_RETDELEGRENAME1	37
 #define	NFSPROC_RETDELEGRENAME2	38
 #define	NFSPROC_GETACL		39
 #define	NFSPROC_SETACL		40
 
 /*
  * Must be defined as one higher than the last Proc# above.
  */
 #define	NFSV4_NPROCS		41
 
 /* Additional procedures for NFSv4.1. */
 #define	NFSPROC_EXCHANGEID	41
 #define	NFSPROC_CREATESESSION	42
 #define	NFSPROC_DESTROYSESSION	43
 #define	NFSPROC_DESTROYCLIENT	44
 #define	NFSPROC_FREESTATEID	45
 #define	NFSPROC_LAYOUTGET	46
 #define	NFSPROC_GETDEVICEINFO	47
 #define	NFSPROC_LAYOUTCOMMIT	48
 #define	NFSPROC_LAYOUTRETURN	49
 #define	NFSPROC_RECLAIMCOMPL	50
 #define	NFSPROC_WRITEDS		51
 #define	NFSPROC_READDS		52
 #define	NFSPROC_COMMITDS	53
 #define	NFSPROC_OPENLAYGET	54
 #define	NFSPROC_CREATELAYGET	55
 
 /*
  * Must be defined as one higher than the last NFSv4.1 Proc# above.
  */
 #define	NFSV41_NPROCS		56
 
 /* Additional procedures for NFSv4.2. */
 #define	NFSPROC_IOADVISE	56
 #define	NFSPROC_ALLOCATE	57
 #define	NFSPROC_COPY		58
 #define	NFSPROC_SEEK		59
 #define	NFSPROC_SEEKDS		60
 
 /* and the ones for the optional Extended attribute support (RFC-8276). */
 #define	NFSPROC_GETEXTATTR	61
 #define	NFSPROC_SETEXTATTR	62
 #define	NFSPROC_RMEXTATTR	63
 #define	NFSPROC_LISTEXTATTR	64
 
 /*
  * Must be defined as one higher than the last NFSv4.2 Proc# above.
  */
 #define	NFSV42_NPROCS		65
 
 #endif	/* NFS_V3NPROCS */
 
 /*
  * Newest stats structure.
  * The vers field will be set to NFSSTATS_V1 by the caller.
  */
 #define	NFSSTATS_V1	2
 struct nfsstatsv1 {
 	int		vers;	/* Set to version requested by caller. */
 	uint64_t	attrcache_hits;
 	uint64_t	attrcache_misses;
 	uint64_t	lookupcache_hits;
 	uint64_t	lookupcache_misses;
 	uint64_t	direofcache_hits;
 	uint64_t	direofcache_misses;
 	uint64_t	accesscache_hits;
 	uint64_t	accesscache_misses;
 	uint64_t	biocache_reads;
 	uint64_t	read_bios;
 	uint64_t	read_physios;
 	uint64_t	biocache_writes;
 	uint64_t	write_bios;
 	uint64_t	write_physios;
 	uint64_t	biocache_readlinks;
 	uint64_t	readlink_bios;
 	uint64_t	biocache_readdirs;
 	uint64_t	readdir_bios;
 	uint64_t	rpccnt[NFSV42_NPROCS + 15];
 	uint64_t	rpcretries;
 	uint64_t	srvrpccnt[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15];
 	uint64_t	srvrpc_errs;
 	uint64_t	srv_errs;
 	uint64_t	rpcrequests;
 	uint64_t	rpctimeouts;
 	uint64_t	rpcunexpected;
 	uint64_t	rpcinvalid;
 	uint64_t	srvcache_inproghits;
 	uint64_t	srvcache_idemdonehits;
 	uint64_t	srvcache_nonidemdonehits;
 	uint64_t	srvcache_misses;
 	uint64_t	srvcache_tcppeak;
 	int		srvcache_size;	/* Updated by atomic_xx_int(). */
 	uint64_t	srvclients;
 	uint64_t	srvopenowners;
 	uint64_t	srvopens;
 	uint64_t	srvlockowners;
 	uint64_t	srvlocks;
 	uint64_t	srvdelegates;
 	uint64_t	cbrpccnt[NFSV42_CBNOPS + 10];
 	uint64_t	clopenowners;
 	uint64_t	clopens;
 	uint64_t	cllockowners;
 	uint64_t	cllocks;
 	uint64_t	cldelegates;
 	uint64_t	cllocalopenowners;
 	uint64_t	cllocalopens;
 	uint64_t	cllocallockowners;
 	uint64_t	cllocallocks;
 	uint64_t	srvstartcnt;
 	uint64_t	srvdonecnt;
 	uint64_t	srvbytes[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15];
 	uint64_t	srvops[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15];
 	struct bintime	srvduration[NFSV42_NOPS + NFSV4OP_FAKENOPS + 15];
 	struct bintime	busyfrom;
 	struct bintime	busytime;
 };
 
 /*
  * Newer stats structure.
  * The vers field will be set to NFSSTATS_OV1 by the caller.
  */
 #define	NFSSTATS_OV1	1
 struct nfsstatsov1 {
 	int		vers;	/* Set to version requested by caller. */
 	uint64_t	attrcache_hits;
 	uint64_t	attrcache_misses;
 	uint64_t	lookupcache_hits;
 	uint64_t	lookupcache_misses;
 	uint64_t	direofcache_hits;
 	uint64_t	direofcache_misses;
 	uint64_t	accesscache_hits;
 	uint64_t	accesscache_misses;
 	uint64_t	biocache_reads;
 	uint64_t	read_bios;
 	uint64_t	read_physios;
 	uint64_t	biocache_writes;
 	uint64_t	write_bios;
 	uint64_t	write_physios;
 	uint64_t	biocache_readlinks;
 	uint64_t	readlink_bios;
 	uint64_t	biocache_readdirs;
 	uint64_t	readdir_bios;
 	uint64_t	rpccnt[NFSV42_NPROCS + 4];
 	uint64_t	rpcretries;
 	uint64_t	srvrpccnt[NFSV42_PURENOPS + NFSV4OP_FAKENOPS];
 	uint64_t	srvrpc_errs;
 	uint64_t	srv_errs;
 	uint64_t	rpcrequests;
 	uint64_t	rpctimeouts;
 	uint64_t	rpcunexpected;
 	uint64_t	rpcinvalid;
 	uint64_t	srvcache_inproghits;
 	uint64_t	srvcache_idemdonehits;
 	uint64_t	srvcache_nonidemdonehits;
 	uint64_t	srvcache_misses;
 	uint64_t	srvcache_tcppeak;
 	int		srvcache_size;	/* Updated by atomic_xx_int(). */
 	uint64_t	srvclients;
 	uint64_t	srvopenowners;
 	uint64_t	srvopens;
 	uint64_t	srvlockowners;
 	uint64_t	srvlocks;
 	uint64_t	srvdelegates;
 	uint64_t	cbrpccnt[NFSV42_CBNOPS];
 	uint64_t	clopenowners;
 	uint64_t	clopens;
 	uint64_t	cllockowners;
 	uint64_t	cllocks;
 	uint64_t	cldelegates;
 	uint64_t	cllocalopenowners;
 	uint64_t	cllocalopens;
 	uint64_t	cllocallockowners;
 	uint64_t	cllocallocks;
 	uint64_t	srvstartcnt;
 	uint64_t	srvdonecnt;
 	uint64_t	srvbytes[NFSV42_PURENOPS + NFSV4OP_FAKENOPS];
 	uint64_t	srvops[NFSV42_PURENOPS + NFSV4OP_FAKENOPS];
 	struct bintime	srvduration[NFSV42_PURENOPS + NFSV4OP_FAKENOPS];
 	struct bintime	busyfrom;
 	struct bintime	busytime;
 };
 
 /*
  * Old stats structure.
  */
 struct ext_nfsstats {
 	int	attrcache_hits;
 	int	attrcache_misses;
 	int	lookupcache_hits;
 	int	lookupcache_misses;
 	int	direofcache_hits;
 	int	direofcache_misses;
 	int	accesscache_hits;
 	int	accesscache_misses;
 	int	biocache_reads;
 	int	read_bios;
 	int	read_physios;
 	int	biocache_writes;
 	int	write_bios;
 	int	write_physios;
 	int	biocache_readlinks;
 	int	readlink_bios;
 	int	biocache_readdirs;
 	int	readdir_bios;
 	int	rpccnt[NFSV4_NPROCS];
 	int	rpcretries;
 	int	srvrpccnt[NFSV4OP_NOPS + NFSV4OP_FAKENOPS];
 	int	srvrpc_errs;
 	int	srv_errs;
 	int	rpcrequests;
 	int	rpctimeouts;
 	int	rpcunexpected;
 	int	rpcinvalid;
 	int	srvcache_inproghits;
 	int	srvcache_idemdonehits;
 	int	srvcache_nonidemdonehits;
 	int	srvcache_misses;
 	int	srvcache_tcppeak;
 	int	srvcache_size;
 	int	srvclients;
 	int	srvopenowners;
 	int	srvopens;
 	int	srvlockowners;
 	int	srvlocks;
 	int	srvdelegates;
 	int	cbrpccnt[NFSV4OP_CBNOPS];
 	int	clopenowners;
 	int	clopens;
 	int	cllockowners;
 	int	cllocks;
 	int	cldelegates;
 	int	cllocalopenowners;
 	int	cllocalopens;
 	int	cllocallockowners;
 	int	cllocallocks;
 };
 
 #ifdef _KERNEL
 /*
  * Define NFS_NPROCS as NFSV4_NPROCS for the experimental kernel code.
  */
 #ifndef	NFS_NPROCS
 #define	NFS_NPROCS		NFSV4_NPROCS
 #endif
 
 #include <fs/nfs/nfskpiport.h>
 #include <fs/nfs/nfsdport.h>
 #include <fs/nfs/rpcv2.h>
 #include <fs/nfs/nfsproto.h>
 #include <fs/nfs/nfs.h>
 #include <fs/nfs/nfsclstate.h>
 #include <fs/nfs/nfs_var.h>
 #include <fs/nfs/nfsm_subs.h>
 #include <fs/nfs/nfsrvcache.h>
 #include <fs/nfs/nfsrvstate.h>
 #include <fs/nfs/xdr_subs.h>
 #include <fs/nfs/nfscl.h>
 #include <nfsclient/nfsargs.h>
 #include <fs/nfsclient/nfsmount.h>
 
 /*
  * Just to keep nfs_var.h happy.
  */
 struct nfs_vattr {
 	int	junk;
 };
 
 struct nfsvattr {
 	struct vattr	na_vattr;
 	nfsattrbit_t	na_suppattr;
 	u_int64_t	na_mntonfileno;
 	u_int64_t	na_filesid[2];
 };
 
 #define	na_type		na_vattr.va_type
 #define	na_mode		na_vattr.va_mode
 #define	na_nlink	na_vattr.va_nlink
 #define	na_uid		na_vattr.va_uid
 #define	na_gid		na_vattr.va_gid
 #define	na_fsid		na_vattr.va_fsid
 #define	na_fileid	na_vattr.va_fileid
 #define	na_size		na_vattr.va_size
 #define	na_blocksize	na_vattr.va_blocksize
 #define	na_atime	na_vattr.va_atime
 #define	na_mtime	na_vattr.va_mtime
 #define	na_ctime	na_vattr.va_ctime
 #define	na_gen		na_vattr.va_gen
 #define	na_flags	na_vattr.va_flags
 #define	na_rdev		na_vattr.va_rdev
 #define	na_bytes	na_vattr.va_bytes
 #define	na_filerev	na_vattr.va_filerev
 #define	na_vaflags	na_vattr.va_vaflags
 
 #include <fs/nfsclient/nfsnode.h>
 
 /*
  * This is the header structure used for the lists, etc. (It has the
  * above record in it.
  */
 struct nfsrv_stablefirst {
 	LIST_HEAD(, nfsrv_stable) nsf_head;	/* Head of nfsrv_stable list */
 	time_t		nsf_eograce;	/* Time grace period ends */
 	time_t		*nsf_bootvals;	/* Previous boottime values */
 	struct file	*nsf_fp;	/* File table pointer */
 	u_char		nsf_flags;	/* NFSNSF_ flags */
 	struct nfsf_rec	nsf_rec;	/* and above first record */
 };
 #define	nsf_lease	nsf_rec.lease
 #define	nsf_numboots	nsf_rec.numboots
 
 /* NFSNSF_xxx flags */
 #define	NFSNSF_UPDATEDONE	0x01
 #define	NFSNSF_GRACEOVER	0x02
 #define	NFSNSF_NEEDLOCK		0x04
 #define	NFSNSF_EXPIREDCLIENT	0x08
 #define	NFSNSF_NOOPENS		0x10
 #define	NFSNSF_OK		0x20
 
 /*
  * Maximum number of boot times allowed in record. Although there is
  * really no need for a fixed upper bound, this serves as a sanity check
  * for a corrupted file.
  */
 #define	NFSNSF_MAXNUMBOOTS	10000
 
 /*
  * This structure defines the other records in the file. The
  * nst_client array is actually the size of the client string name.
  */
 struct nfst_rec {
 	u_int16_t	len;
 	u_char		flag;
 	u_char		client[1];
 };
 /* and the values for flag */
 #define	NFSNST_NEWSTATE	0x1
 #define	NFSNST_REVOKE		0x2
 #define	NFSNST_GOTSTATE		0x4
 #define	NFSNST_RECLAIMED	0x8
 
 /*
  * This structure is linked onto nfsrv_stablefirst for the duration of
  * reclaim.
  */
 struct nfsrv_stable {
 	LIST_ENTRY(nfsrv_stable) nst_list;
 	struct nfsclient	*nst_clp;
 	struct nfst_rec		nst_rec;
 };
 #define	nst_timestamp	nst_rec.timestamp
 #define	nst_len		nst_rec.len
 #define	nst_flag	nst_rec.flag
 #define	nst_client	nst_rec.client
 
 /*
  * At some point the server will run out of kernel storage for
  * state structures. For FreeBSD5.2, this results in a panic
  * kmem_map is full. It happens at well over 1000000 opens plus
  * locks on a PIII-800 with 256Mbytes, so that is where I've set
  * the limit. If your server panics due to too many opens/locks,
  * decrease the size of NFSRV_V4STATELIMIT. If you find the server
  * returning NFS4ERR_RESOURCE a lot and have lots of memory, try
  * increasing it.
  */
 #define	NFSRV_V4STATELIMIT	500000	/* Max # of Opens + Locks */
 
 /*
  * The type required differs with BSDen (just the second arg).
  */
 void nfsrvd_rcv(struct socket *, void *, int);
 
 /*
  * Macros for handling socket addresses. (Hopefully this makes the code
  * more portable, since I've noticed some 'BSD don't have sockaddrs in
  * mbufs any more.)
  */
 #define	NFSSOCKADDR(a, t)	((t)(a))
 #define	NFSSOCKADDRSIZE(a, s)		((a)->sa_len = (s))
 
 /*
  * These should be defined as a process or thread structure, as required
  * for signal handling, etc.
  */
 #define	NFSNEWCRED(c)		(crdup(c))
 #define	NFSPROCCRED(p)		((p)->td_ucred)
 #define	NFSFREECRED(c)		(crfree(c))
 #define	NFSUIOPROC(u, p)	((u)->uio_td = NULL)
 #define	NFSPROCP(p)		((p)->td_proc)
 
 /*
  * Define these so that cn_hash and its length is ignored.
  */
 #define	NFSCNHASHZERO(c)
 #define	NFSCNHASH(c, v)
 #define	NCHNAMLEN	9999999
 
 /*
  * These macros are defined to initialize and set the timer routine.
  */
 #define	NFS_TIMERINIT \
 	newnfs_timer(NULL)
 
 /*
  * Handle SMP stuff:
  */
 #define	NFSSTATESPINLOCK	extern struct mtx nfs_state_mutex
 #define	NFSLOCKSTATE()		mtx_lock(&nfs_state_mutex)
 #define	NFSUNLOCKSTATE()	mtx_unlock(&nfs_state_mutex)
 #define	NFSSTATEMUTEXPTR	(&nfs_state_mutex)
 #define	NFSREQSPINLOCK		extern struct mtx nfs_req_mutex
 #define	NFSLOCKREQ()		mtx_lock(&nfs_req_mutex)
 #define	NFSUNLOCKREQ()		mtx_unlock(&nfs_req_mutex)
 #define	NFSSOCKMUTEX		extern struct mtx nfs_slock_mutex
 #define	NFSSOCKMUTEXPTR		(&nfs_slock_mutex)
 #define	NFSLOCKSOCK()		mtx_lock(&nfs_slock_mutex)
 #define	NFSUNLOCKSOCK()		mtx_unlock(&nfs_slock_mutex)
 #define	NFSNAMEIDMUTEX		extern struct mtx nfs_nameid_mutex
 #define	NFSNAMEIDMUTEXPTR	(&nfs_nameid_mutex)
 #define	NFSLOCKNAMEID()		mtx_lock(&nfs_nameid_mutex)
 #define	NFSUNLOCKNAMEID()	mtx_unlock(&nfs_nameid_mutex)
 #define	NFSNAMEIDREQUIRED()	mtx_assert(&nfs_nameid_mutex, MA_OWNED)
 #define	NFSCLSTATEMUTEX		extern struct mtx nfs_clstate_mutex
 #define	NFSCLSTATEMUTEXPTR	(&nfs_clstate_mutex)
 #define	NFSLOCKCLSTATE()	mtx_lock(&nfs_clstate_mutex)
 #define	NFSUNLOCKCLSTATE()	mtx_unlock(&nfs_clstate_mutex)
 #define	NFSDLOCKMUTEX		extern struct mtx newnfsd_mtx
 #define	NFSDLOCKMUTEXPTR	(&newnfsd_mtx)
 #define	NFSD_LOCK()		mtx_lock(&newnfsd_mtx)
 #define	NFSD_UNLOCK()		mtx_unlock(&newnfsd_mtx)
 #define	NFSD_LOCK_ASSERT()	mtx_assert(&newnfsd_mtx, MA_OWNED)
 #define	NFSD_UNLOCK_ASSERT()	mtx_assert(&newnfsd_mtx, MA_NOTOWNED)
 #define	NFSV4ROOTLOCKMUTEX	extern struct mtx nfs_v4root_mutex
 #define	NFSV4ROOTLOCKMUTEXPTR	(&nfs_v4root_mutex)
 #define	NFSLOCKV4ROOTMUTEX()	mtx_lock(&nfs_v4root_mutex)
 #define	NFSUNLOCKV4ROOTMUTEX()	mtx_unlock(&nfs_v4root_mutex)
 #define	NFSLOCKNODE(n)		mtx_lock(&((n)->n_mtx))
 #define	NFSUNLOCKNODE(n)	mtx_unlock(&((n)->n_mtx))
 #define	NFSASSERTNODE(n)	mtx_assert(&((n)->n_mtx), MA_OWNED)
 #define	NFSLOCKMNT(m)		mtx_lock(&((m)->nm_mtx))
 #define	NFSUNLOCKMNT(m)		mtx_unlock(&((m)->nm_mtx))
 #define	NFSLOCKIOD()		mtx_lock(&ncl_iod_mutex)
 #define	NFSUNLOCKIOD()		mtx_unlock(&ncl_iod_mutex)
 #define	NFSASSERTIOD()		mtx_assert(&ncl_iod_mutex, MA_OWNED)
 #define	NFSLOCKREQUEST(r)	mtx_lock(&((r)->r_mtx))
 #define	NFSUNLOCKREQUEST(r)	mtx_unlock(&((r)->r_mtx))
 #define	NFSLOCKSOCKREQ(r)	mtx_lock(&((r)->nr_mtx))
 #define	NFSUNLOCKSOCKREQ(r)	mtx_unlock(&((r)->nr_mtx))
 #define	NFSLOCKDS(d)		mtx_lock(&((d)->nfsclds_mtx))
 #define	NFSUNLOCKDS(d)		mtx_unlock(&((d)->nfsclds_mtx))
 #define	NFSSESSIONMUTEXPTR(s)	(&((s)->mtx))
 #define	NFSLOCKSESSION(s)	mtx_lock(&((s)->mtx))
 #define	NFSUNLOCKSESSION(s)	mtx_unlock(&((s)->mtx))
 #define	NFSLAYOUTMUTEXPTR(l)	(&((l)->mtx))
 #define	NFSLOCKLAYOUT(l)	mtx_lock(&((l)->mtx))
 #define	NFSUNLOCKLAYOUT(l)	mtx_unlock(&((l)->mtx))
 #define	NFSDDSMUTEXPTR		(&nfsrv_dslock_mtx)
 #define	NFSDDSLOCK()		mtx_lock(&nfsrv_dslock_mtx)
 #define	NFSDDSUNLOCK()		mtx_unlock(&nfsrv_dslock_mtx)
 #define	NFSDDONTLISTMUTEXPTR	(&nfsrv_dontlistlock_mtx)
 #define	NFSDDONTLISTLOCK()	mtx_lock(&nfsrv_dontlistlock_mtx)
 #define	NFSDDONTLISTUNLOCK()	mtx_unlock(&nfsrv_dontlistlock_mtx)
 #define	NFSDRECALLMUTEXPTR	(&nfsrv_recalllock_mtx)
 #define	NFSDRECALLLOCK()	mtx_lock(&nfsrv_recalllock_mtx)
 #define	NFSDRECALLUNLOCK()	mtx_unlock(&nfsrv_recalllock_mtx)
 
 /*
  * Use these macros to initialize/free a mutex.
  */
 #define	NFSINITSOCKMUTEX(m)	mtx_init((m), "nfssock", NULL, MTX_DEF)
 #define	NFSFREEMUTEX(m)		mtx_destroy((m))
 
 int nfsmsleep(void *, void *, int, const char *, struct timespec *);
 
 /*
  * And weird vm stuff in the nfs server.
  */
 #define	PDIRUNLOCK	0x0
 #define	MAX_COMMIT_COUNT	(1024 * 1024)
 
 /*
  * Define these to handle the type of va_rdev.
  */
 #define	NFSMAKEDEV(m, n)	makedev((m), (n))
 #define	NFSMAJOR(d)		major(d)
 #define	NFSMINOR(d)		minor(d)
 
 /*
  * The vnode tag for nfsv4root.
  */
 #define	VT_NFSV4ROOT		"nfsv4root"
 
 /*
  * Define whatever it takes to do a vn_rdwr().
  */
 #define	NFSD_RDWR(r, v, b, l, o, s, i, c, a, p) \
 	vn_rdwr((r), (v), (b), (l), (o), (s), (i), (c), NULL, (a), (p))
 
 /*
  * Macros for handling memory for different BSDen.
  * NFSBCOPY(src, dst, len) - copies len bytes, non-overlapping
  * NFSOVBCOPY(src, dst, len) - ditto, but data areas might overlap
  * NFSBCMP(cp1, cp2, len) - compare len bytes, return 0 if same
  * NFSBZERO(cp, len) - set len bytes to 0x0
  */
 #define	NFSBCOPY(s, d, l)	bcopy((s), (d), (l))
 #define	NFSOVBCOPY(s, d, l)	ovbcopy((s), (d), (l))
 #define	NFSBCMP(s, d, l)	bcmp((s), (d), (l))
 #define	NFSBZERO(s, l)		bzero((s), (l))
 
 /*
  * Some queue.h files don't have these dfined in them.
  */
 #define	LIST_END(head)		NULL
 #define	SLIST_END(head)		NULL
 #define	TAILQ_END(head)		NULL
 
 /*
  * This must be defined to be a global variable that increments once
  * per second, but never stops or goes backwards, even when a "date"
  * command changes the TOD clock. It is used for delta times for
  * leases, etc.
  */
 #define	NFSD_MONOSEC		time_uptime
 
 /*
  * Declare the malloc types.
  */
 MALLOC_DECLARE(M_NEWNFSRVCACHE);
 MALLOC_DECLARE(M_NEWNFSDCLIENT);
 MALLOC_DECLARE(M_NEWNFSDSTATE);
 MALLOC_DECLARE(M_NEWNFSDLOCK);
 MALLOC_DECLARE(M_NEWNFSDLOCKFILE);
 MALLOC_DECLARE(M_NEWNFSSTRING);
 MALLOC_DECLARE(M_NEWNFSUSERGROUP);
 MALLOC_DECLARE(M_NEWNFSDREQ);
 MALLOC_DECLARE(M_NEWNFSFH);
 MALLOC_DECLARE(M_NEWNFSCLOWNER);
 MALLOC_DECLARE(M_NEWNFSCLOPEN);
 MALLOC_DECLARE(M_NEWNFSCLDELEG);
 MALLOC_DECLARE(M_NEWNFSCLCLIENT);
 MALLOC_DECLARE(M_NEWNFSCLLOCKOWNER);
 MALLOC_DECLARE(M_NEWNFSCLLOCK);
 MALLOC_DECLARE(M_NEWNFSDIROFF);
 MALLOC_DECLARE(M_NEWNFSV4NODE);
 MALLOC_DECLARE(M_NEWNFSDIRECTIO);
 MALLOC_DECLARE(M_NEWNFSMNT);
 MALLOC_DECLARE(M_NEWNFSDROLLBACK);
 MALLOC_DECLARE(M_NEWNFSLAYOUT);
 MALLOC_DECLARE(M_NEWNFSFLAYOUT);
 MALLOC_DECLARE(M_NEWNFSDEVINFO);
 MALLOC_DECLARE(M_NEWNFSSOCKREQ);
 MALLOC_DECLARE(M_NEWNFSCLDS);
 MALLOC_DECLARE(M_NEWNFSLAYRECALL);
 MALLOC_DECLARE(M_NEWNFSDSESSION);
 #define	M_NFSRVCACHE	M_NEWNFSRVCACHE
 #define	M_NFSDCLIENT	M_NEWNFSDCLIENT
 #define	M_NFSDSTATE	M_NEWNFSDSTATE
 #define	M_NFSDLOCK	M_NEWNFSDLOCK
 #define	M_NFSDLOCKFILE	M_NEWNFSDLOCKFILE
 #define	M_NFSSTRING	M_NEWNFSSTRING
 #define	M_NFSUSERGROUP	M_NEWNFSUSERGROUP
 #define	M_NFSDREQ	M_NEWNFSDREQ
 #define	M_NFSFH		M_NEWNFSFH
 #define	M_NFSCLOWNER	M_NEWNFSCLOWNER
 #define	M_NFSCLOPEN	M_NEWNFSCLOPEN
 #define	M_NFSCLDELEG	M_NEWNFSCLDELEG
 #define	M_NFSCLCLIENT	M_NEWNFSCLCLIENT
 #define	M_NFSCLLOCKOWNER M_NEWNFSCLLOCKOWNER
 #define	M_NFSCLLOCK	M_NEWNFSCLLOCK
 #define	M_NFSDIROFF	M_NEWNFSDIROFF
 #define	M_NFSV4NODE	M_NEWNFSV4NODE
 #define	M_NFSDIRECTIO	M_NEWNFSDIRECTIO
 #define	M_NFSDROLLBACK	M_NEWNFSDROLLBACK
 #define	M_NFSLAYOUT	M_NEWNFSLAYOUT
 #define	M_NFSFLAYOUT	M_NEWNFSFLAYOUT
 #define	M_NFSDEVINFO	M_NEWNFSDEVINFO
 #define	M_NFSSOCKREQ	M_NEWNFSSOCKREQ
 #define	M_NFSCLDS	M_NEWNFSCLDS
 #define	M_NFSLAYRECALL	M_NEWNFSLAYRECALL
 #define	M_NFSDSESSION	M_NEWNFSDSESSION
 
 #define	NFSINT_SIGMASK(set) 						\
 	(SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) ||	\
 	 SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) ||	\
 	 SIGISMEMBER(set, SIGQUIT))
 
 /*
  * Convert a quota block count to byte count.
  */
 #define	NFSQUOTABLKTOBYTE(q, b)	(q) *= (b)
 
 /*
  * Define this as the largest file size supported. (It should probably
  * be available via a VFS_xxx Op, but it isn't.
  */
 #define	NFSRV_MAXFILESIZE	((u_int64_t)0x800000000000)
 
 /*
  * Set this macro to index() or strchr(), whichever is supported.
  */
 #define	STRCHR(s, c)		strchr((s), (c))
 
 /*
  * Set the n_time in the client write rpc, as required.
  */
 #define	NFSWRITERPC_SETTIME(w, n, a, v4)				\
 	do {								\
 		if (w) {						\
 			NFSLOCKNODE(n);					\
 			(n)->n_mtime = (a)->na_mtime;			\
 			if (v4)						\
 				(n)->n_change = (a)->na_filerev;	\
 			NFSUNLOCKNODE(n);				\
 		}							\
 	} while (0)
 
 /*
  * Fake value, just to make the client work.
  */
 #define	NFS_LATTR_NOSHRINK	1
 
 /*
  * Prototypes for functions where the arguments vary for different ports.
  */
 int nfscl_loadattrcache(struct vnode **, struct nfsvattr *, void *, void *,
     int, int);
 int newnfs_realign(struct mbuf **, int);
 bool ncl_pager_setsize(struct vnode *vp, u_quad_t *nsizep);
 
 /*
  * If the port runs on an SMP box that can enforce Atomic ops with low
  * overheads, define these as atomic increments/decrements. If not,
  * don't worry about it, since these are used for stats that can be
  * "out by one" without disastrous consequences.
  */
 #define	NFSINCRGLOBAL(a)	((a)++)
 
 /*
  * Assorted funky stuff to make things work under Darwin8.
  */
 /*
  * These macros checks for a field in vattr being set.
  */
 #define	NFSATTRISSET(t, v, a)	((v)->a != (t)VNOVAL)
 #define	NFSATTRISSETTIME(v, a)	((v)->a.tv_sec != VNOVAL)
 
 /*
  * Manipulate mount flags.
  */
 #define	NFSSTA_HASWRITEVERF	0x00040000  /* Has write verifier */
 #define	NFSSTA_GOTFSINFO	0x00100000  /* Got the fsinfo */
 #define	NFSSTA_OPENMODE		0x00200000  /* Must use correct open mode */
 #define	NFSSTA_FLEXFILE		0x00800000  /* Use Flex File Layout */
 #define	NFSSTA_NOLAYOUTCOMMIT	0x04000000  /* Don't do LayoutCommit */
 #define	NFSSTA_SESSPERSIST	0x08000000  /* Has a persistent session */
 #define	NFSSTA_TIMEO		0x10000000  /* Experiencing a timeout */
 #define	NFSSTA_LOCKTIMEO	0x20000000  /* Experiencing a lockd timeout */
 #define	NFSSTA_HASSETFSID	0x40000000  /* Has set the fsid */
 #define	NFSSTA_PNFS		0x80000000  /* pNFS is enabled */
 
 #define	NFSHASNFSV3(n)		((n)->nm_flag & NFSMNT_NFSV3)
 #define	NFSHASNFSV4(n)		((n)->nm_flag & NFSMNT_NFSV4)
 #define	NFSHASNFSV4N(n)		((n)->nm_minorvers > 0)
 #define	NFSHASNFSV3OR4(n)	((n)->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NFSV4))
 #define	NFSHASGOTFSINFO(n)	((n)->nm_state & NFSSTA_GOTFSINFO)
 #define	NFSHASHASSETFSID(n)	((n)->nm_state & NFSSTA_HASSETFSID)
 #define	NFSHASSTRICT3530(n)	((n)->nm_flag & NFSMNT_STRICT3530)
 #define	NFSHASWRITEVERF(n)	((n)->nm_state & NFSSTA_HASWRITEVERF)
 #define	NFSHASINT(n)		((n)->nm_flag & NFSMNT_INT)
 #define	NFSHASSOFT(n)		((n)->nm_flag & NFSMNT_SOFT)
 #define	NFSHASINTORSOFT(n)	((n)->nm_flag & (NFSMNT_INT | NFSMNT_SOFT))
 #define	NFSHASDUMBTIMR(n)	((n)->nm_flag & NFSMNT_DUMBTIMR)
 #define	NFSHASNOCONN(n)		((n)->nm_flag & NFSMNT_MNTD)
 #define	NFSHASKERB(n)		((n)->nm_flag & NFSMNT_KERB)
 #define	NFSHASALLGSSNAME(n)	((n)->nm_flag & NFSMNT_ALLGSSNAME)
 #define	NFSHASINTEGRITY(n)	((n)->nm_flag & NFSMNT_INTEGRITY)
 #define	NFSHASPRIVACY(n)	((n)->nm_flag & NFSMNT_PRIVACY)
 #define	NFSSETWRITEVERF(n)	((n)->nm_state |= NFSSTA_HASWRITEVERF)
 #define	NFSSETHASSETFSID(n)	((n)->nm_state |= NFSSTA_HASSETFSID)
 #define	NFSHASPNFSOPT(n)	((n)->nm_flag & NFSMNT_PNFS)
 #define	NFSHASNOLAYOUTCOMMIT(n)	((n)->nm_state & NFSSTA_NOLAYOUTCOMMIT)
 #define	NFSHASSESSPERSIST(n)	((n)->nm_state & NFSSTA_SESSPERSIST)
 #define	NFSHASPNFS(n)		((n)->nm_state & NFSSTA_PNFS)
 #define	NFSHASFLEXFILE(n)	((n)->nm_state & NFSSTA_FLEXFILE)
 #define	NFSHASOPENMODE(n)	((n)->nm_state & NFSSTA_OPENMODE)
 #define	NFSHASONEOPENOWN(n)	(((n)->nm_flag & NFSMNT_ONEOPENOWN) != 0 &&	\
 				    (n)->nm_minorvers > 0)
 
 /*
  * Gets the stats field out of the mount structure.
  */
 #define	vfs_statfs(m)	(&((m)->mnt_stat))
 
 /*
  * Set boottime.
  */
 #define	NFSSETBOOTTIME(b)	(getboottime(&b))
 
 /*
  * The size of directory blocks in the buffer cache.
  * MUST BE in the range of PAGE_SIZE <= NFS_DIRBLKSIZ <= MAXBSIZE!!
  */
 #define	NFS_DIRBLKSIZ	(16 * DIRBLKSIZ) /* Must be a multiple of DIRBLKSIZ */
 
 /*
  * Define these macros to access mnt_flag fields.
  */
 #define	NFSMNT_RDONLY(m)	((m)->mnt_flag & MNT_RDONLY)
 #endif	/* _KERNEL */
 
 /*
  * Define a structure similar to ufs_args for use in exporting the V4 root.
  */
 struct nfsex_args {
 	char	*fspec;
 	struct export_args	export;
 };
 
+struct nfsex_oldargs {
+	char	*fspec;
+	struct o2export_args	export;
+};
+
 /*
  * These export flags should be defined, but there are no bits left.
  * Maybe a separate mnt_exflag field could be added or the mnt_flag
  * field increased to 64 bits?
  */
 #ifndef	MNT_EXSTRICTACCESS
 #define	MNT_EXSTRICTACCESS	0x0
 #endif
 #ifndef MNT_EXV4ONLY
 #define	MNT_EXV4ONLY		0x0
 #endif
 
 #ifdef _KERNEL
 /*
  * Define this to invalidate the attribute cache for the nfs node.
  */
 #define	NFSINVALATTRCACHE(n)	((n)->n_attrstamp = 0)
 
 /* Used for FreeBSD only */
 void nfsd_mntinit(void);
 
 /*
  * Define these for vnode lock/unlock ops.
  *
  * These are good abstractions to macro out, so that they can be added to
  * later, for debugging or stats, etc.
  */
 #define	NFSVOPLOCK(v, f)	vn_lock((v), (f))
 #define	NFSVOPUNLOCK(v)		VOP_UNLOCK((v))
 #define	NFSVOPISLOCKED(v)	VOP_ISLOCKED((v))
 
 /*
  * Define ncl_hash().
  */
 #define	ncl_hash(f, l)	(fnv_32_buf((f), (l), FNV1_32_INIT))
 
 int newnfs_iosize(struct nfsmount *);
 
 int newnfs_vncmpf(struct vnode *, void *);
 
 #ifndef NFS_MINDIRATTRTIMO
 #define	NFS_MINDIRATTRTIMO 3		/* VDIR attrib cache timeout in sec */
 #endif
 #ifndef NFS_MAXDIRATTRTIMO
 #define	NFS_MAXDIRATTRTIMO 60
 #endif
 
 /*
  * Nfs outstanding request list element
  */
 struct nfsreq {
 	TAILQ_ENTRY(nfsreq) r_chain;
 	u_int32_t	r_flags;	/* flags on request, see below */
 	struct nfsmount *r_nmp;		/* Client mnt ptr */
 	struct mtx	r_mtx;		/* Mutex lock for this structure */
 };
 
 #ifndef NFS_MAXBSIZE
 #define	NFS_MAXBSIZE	(maxbcachebuf)
 #endif
 
 /*
  * This macro checks to see if issuing of delegations is allowed for this
  * vnode.
  */
 #ifdef VV_DISABLEDELEG
 #define	NFSVNO_DELEGOK(v)						\
 	((v) == NULL || ((v)->v_vflag & VV_DISABLEDELEG) == 0)
 #else
 #define	NFSVNO_DELEGOK(v)	(1)
 #endif
 
 /*
  * Name used by getnewvnode() to describe filesystem, "nfs".
  * For performance reasons it is useful to have the same string
  * used in both places that call getnewvnode().
  */
 extern const char nfs_vnode_tag[];
 
 /*
  * Check for the errors that indicate a DS should be disabled.
  * ENXIO indicates that the krpc cannot do an RPC on the DS.
  * EIO is returned by the RPC as an indication of I/O problems on the
  * server.
  * Are there other fatal errors?
  */
 #define	nfsds_failerr(e)	((e) == ENXIO || (e) == EIO)
 
 /*
  * Get a pointer to the MDS session, which is always the first element
  * in the list.
  * This macro can only be safely used when the NFSLOCKMNT() lock is held.
  * The inline function can be used when the lock isn't held.
  */
 #define	NFSMNT_MDSSESSION(m)	(&(TAILQ_FIRST(&((m)->nm_sess))->nfsclds_sess))
 
 static __inline struct nfsclsession *
 nfsmnt_mdssession(struct nfsmount *nmp)
 {
 	struct nfsclsession *tsep;
 
 	tsep = NULL;
 	mtx_lock(&nmp->nm_mtx);
 	if (TAILQ_FIRST(&nmp->nm_sess) != NULL)
 		tsep = NFSMNT_MDSSESSION(nmp);
 	mtx_unlock(&nmp->nm_mtx);
 	return (tsep);
 }
 
 #endif	/* _KERNEL */
 
 #endif	/* _NFS_NFSPORT_H */
Index: head/sys/fs/nfsserver/nfs_nfsdport.c
===================================================================
--- head/sys/fs/nfsserver/nfs_nfsdport.c	(revision 362157)
+++ head/sys/fs/nfsserver/nfs_nfsdport.c	(revision 362158)
@@ -1,6464 +1,6519 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/capsicum.h>
 #include <sys/extattr.h>
 
 /*
  * Functions that perform the vfs operations required by the routines in
  * nfsd_serv.c. It is hoped that this change will make the server more
  * portable.
  */
 
 #include <fs/nfs/nfsport.h>
 #include <security/mac/mac_framework.h>
 #include <sys/filio.h>
 #include <sys/hash.h>
 #include <sys/sysctl.h>
 #include <nlm/nlm_prot.h>
 #include <nlm/nlm.h>
 
 FEATURE(nfsd, "NFSv4 server");
 
 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
 extern int nfsrv_useacl;
 extern int newnfs_numnfsd;
 extern struct mount nfsv4root_mnt;
 extern struct nfsrv_stablefirst nfsrv_stablefirst;
 extern void (*nfsd_call_servertimer)(void);
 extern SVCPOOL	*nfsrvd_pool;
 extern struct nfsv4lock nfsd_suspend_lock;
 extern struct nfsclienthashhead *nfsclienthash;
 extern struct nfslockhashhead *nfslockhash;
 extern struct nfssessionhash *nfssessionhash;
 extern int nfsrv_sessionhashsize;
 extern struct nfsstatsv1 nfsstatsv1;
 extern struct nfslayouthash *nfslayouthash;
 extern int nfsrv_layouthashsize;
 extern struct mtx nfsrv_dslock_mtx;
 extern int nfs_pnfsiothreads;
 extern struct nfsdontlisthead nfsrv_dontlisthead;
 extern volatile int nfsrv_dontlistlen;
 extern volatile int nfsrv_devidcnt;
 extern int nfsrv_maxpnfsmirror;
 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
 NFSDLOCKMUTEX;
 NFSSTATESPINLOCK;
 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
 struct mtx nfsrc_udpmtx;
 struct mtx nfs_v4root_mutex;
 struct mtx nfsrv_dontlistlock_mtx;
 struct mtx nfsrv_recalllock_mtx;
 struct nfsrvfh nfs_rootfh, nfs_pubfh;
 int nfs_pubfhset = 0, nfs_rootfhset = 0;
 struct proc *nfsd_master_proc = NULL;
 int nfsd_debuglevel = 0;
 static pid_t nfsd_master_pid = (pid_t)-1;
 static char nfsd_master_comm[MAXCOMLEN + 1];
 static struct timeval nfsd_master_start;
 static uint32_t nfsv4_sysid = 0;
 static fhandle_t zerofh;
 
 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
     struct ucred *);
 
 int nfsrv_enable_crossmntpt = 1;
 static int nfs_commit_blks;
 static int nfs_commit_miss;
 extern int nfsrv_issuedelegs;
 extern int nfsrv_dolocallocks;
 extern int nfsd_enable_stringtouid;
 extern struct nfsdevicehead nfsrv_devidhead;
 
 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **,
     struct iovec **);
 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **,
     int *);
 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
     NFSPROC_T *);
 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **,
     int *, char *, fhandle_t *);
 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *,
     NFSPROC_T *);
 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *,
     struct thread *, int, struct mbuf **, char *, struct mbuf **,
     struct nfsvattr *, struct acl *, off_t *, int, bool *);
 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *);
 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *,
     NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **);
 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
     NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **,
     char *, int *);
 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
     NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
     struct vnode *, struct nfsmount **, int, struct acl *, int *);
 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
     struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *);
 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
     struct vnode *, struct nfsmount *, struct nfsvattr *);
 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *,
     NFSPROC_T *, struct nfsmount *);
 static int nfsrv_putfhname(fhandle_t *, char *);
 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
     struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *,
     struct vnode *, NFSPROC_T *);
 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
     NFSPROC_T *);
 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *);
 
 int nfs_pnfsio(task_fn_t *, void *);
 
 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "NFS server");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
     &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
     0, "");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
     0, "");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
     0, "Debug level for NFS server");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
     &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
 static int nfsrv_pnfsgetdsattr = 1;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
     &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
 
 /*
  * nfsrv_dsdirsize can only be increased and only when the nfsd threads are
  * not running.
  * The dsN subdirectories for the increased values must have been created
  * on all DS servers before this increase is done.
  */
 u_int	nfsrv_dsdirsize = 20;
 static int
 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)
 {
 	int error, newdsdirsize;
 
 	newdsdirsize = nfsrv_dsdirsize;
 	error = sysctl_handle_int(oidp, &newdsdirsize, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 ||
 	    newnfs_numnfsd != 0)
 		return (EINVAL);
 	nfsrv_dsdirsize = newdsdirsize;
 	return (0);
 }
 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
     sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
 
 #define	MAX_REORDERED_RPC	16
 #define	NUM_HEURISTIC		1031
 #define	NHUSE_INIT		64
 #define	NHUSE_INC		16
 #define	NHUSE_MAX		2048
 
 static struct nfsheur {
 	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
 	off_t nh_nextoff;	/* next offset for sequential detection */
 	int nh_use;		/* use count for selection */
 	int nh_seqcount;	/* heuristic */
 } nfsheur[NUM_HEURISTIC];
 
 
 /*
  * Heuristic to detect sequential operation.
  */
 static struct nfsheur *
 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
 {
 	struct nfsheur *nh;
 	int hi, try;
 
 	/* Locate best candidate. */
 	try = 32;
 	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
 	nh = &nfsheur[hi];
 	while (try--) {
 		if (nfsheur[hi].nh_vp == vp) {
 			nh = &nfsheur[hi];
 			break;
 		}
 		if (nfsheur[hi].nh_use > 0)
 			--nfsheur[hi].nh_use;
 		hi = (hi + 1) % NUM_HEURISTIC;
 		if (nfsheur[hi].nh_use < nh->nh_use)
 			nh = &nfsheur[hi];
 	}
 
 	/* Initialize hint if this is a new file. */
 	if (nh->nh_vp != vp) {
 		nh->nh_vp = vp;
 		nh->nh_nextoff = uio->uio_offset;
 		nh->nh_use = NHUSE_INIT;
 		if (uio->uio_offset == 0)
 			nh->nh_seqcount = 4;
 		else
 			nh->nh_seqcount = 1;
 	}
 
 	/* Calculate heuristic. */
 	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
 	    uio->uio_offset == nh->nh_nextoff) {
 		/* See comments in vfs_vnops.c:sequential_heuristic(). */
 		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
 		if (nh->nh_seqcount > IO_SEQMAX)
 			nh->nh_seqcount = IO_SEQMAX;
 	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
 	    imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
 		/* Probably a reordered RPC, leave seqcount alone. */
 	} else if (nh->nh_seqcount > 1) {
 		nh->nh_seqcount /= 2;
 	} else {
 		nh->nh_seqcount = 0;
 	}
 	nh->nh_use += NHUSE_INC;
 	if (nh->nh_use > NHUSE_MAX)
 		nh->nh_use = NHUSE_MAX;
 	return (nh);
 }
 
 /*
  * Get attributes into nfsvattr structure.
  */
 int
 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
     struct nfsrv_descript *nd, struct thread *p, int vpislocked,
     nfsattrbit_t *attrbitp)
 {
 	int error, gotattr, lockedit = 0;
 	struct nfsvattr na;
 
 	if (vpislocked == 0) {
 		/*
 		 * When vpislocked == 0, the vnode is either exclusively
 		 * locked by this thread or not locked by this thread.
 		 * As such, shared lock it, if not exclusively locked.
 		 */
 		if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
 			lockedit = 1;
 			NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
 		}
 	}
 
 	/*
 	 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed
 	 * attributes, as required.
 	 * This needs to be done for regular files if:
 	 * - non-NFSv4 RPCs or
 	 * - when attrbitp == NULL or
 	 * - an NFSv4 RPC with any of the above attributes in attrbitp.
 	 * A return of 0 for nfsrv_proxyds() indicates that it has acquired
 	 * these attributes.  nfsrv_proxyds() will return an error if the
 	 * server is not a pNFS one.
 	 */
 	gotattr = 0;
 	if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL ||
 	    (nd->nd_flag & ND_NFSV4) == 0 ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) {
 		error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p,
 		    NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0,
 		    NULL);
 		if (error == 0)
 			gotattr = 1;
 	}
 
 	error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
 	if (lockedit != 0)
 		NFSVOPUNLOCK(vp);
 
 	/*
 	 * If we got the Change, Size and Modify Time from the DS,
 	 * replace them.
 	 */
 	if (gotattr != 0) {
 		nvap->na_atime = na.na_atime;
 		nvap->na_mtime = na.na_mtime;
 		nvap->na_filerev = na.na_filerev;
 		nvap->na_size = na.na_size;
 		nvap->na_bytes = na.na_bytes;
 	}
 	NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr,
 	    error, (uintmax_t)na.na_filerev);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get a file handle for a vnode.
  */
 int
 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
 {
 	int error;
 
 	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VOP_VPTOFH(vp, &fhp->fh_fid);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Perform access checking for vnodes obtained from file handles that would
  * refer to files already opened by a Unix client. You cannot just use
  * vn_writechk() and VOP_ACCESSX() for two reasons.
  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
  *     case.
  * 2 - The owner is to be given access irrespective of mode bits for some
  *     operations, so that processes that chmod after opening a file don't
  *     break.
  */
 int
 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
     struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
     u_int32_t *supportedtypep)
 {
 	struct vattr vattr;
 	int error = 0, getret = 0;
 
 	if (vpislocked == 0) {
 		if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
 			error = EPERM;
 			goto out;
 		}
 	}
 	if (accmode & VWRITE) {
 		/* Just vn_writechk() changed to check rdonly */
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket or a block or character
 		 * device resident on the file system.
 		 */
 		if (NFSVNO_EXRDONLY(exp) ||
 		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 			switch (vp->v_type) {
 			case VREG:
 			case VDIR:
 			case VLNK:
 				error = EROFS;
 			default:
 				break;
 			}
 		}
 		/*
 		 * If there's shared text associated with
 		 * the inode, try to free it up once.  If
 		 * we fail, we can't allow writing.
 		 */
 		if (VOP_IS_TEXT(vp) && error == 0)
 			error = ETXTBSY;
 	}
 	if (error != 0) {
 		if (vpislocked == 0)
 			NFSVOPUNLOCK(vp);
 		goto out;
 	}
 
 	/*
 	 * Should the override still be applied when ACLs are enabled?
 	 */
 	error = VOP_ACCESSX(vp, accmode, cred, p);
 	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
 		/*
 		 * Try again with VEXPLICIT_DENY, to see if the test for
 		 * deletion is supported.
 		 */
 		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
 		if (error == 0) {
 			if (vp->v_type == VDIR) {
 				accmode &= ~(VDELETE | VDELETE_CHILD);
 				accmode |= VWRITE;
 				error = VOP_ACCESSX(vp, accmode, cred, p);
 			} else if (supportedtypep != NULL) {
 				*supportedtypep &= ~NFSACCESS_DELETE;
 			}
 		}
 	}
 
 	/*
 	 * Allow certain operations for the owner (reads and writes
 	 * on files that are already open).
 	 */
 	if (override != NFSACCCHK_NOOVERRIDE &&
 	    (error == EPERM || error == EACCES)) {
 		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
 			error = 0;
 		else if (override & NFSACCCHK_ALLOWOWNER) {
 			getret = VOP_GETATTR(vp, &vattr, cred);
 			if (getret == 0 && cred->cr_uid == vattr.va_uid)
 				error = 0;
 		}
 	}
 	if (vpislocked == 0)
 		NFSVOPUNLOCK(vp);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set attribute(s) vnop.
  */
 int
 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	u_quad_t savsize = 0;
 	int error, savedit;
 
 	/*
 	 * If this is an exported file system and a pNFS service is running,
 	 * don't VOP_SETATTR() of size for the MDS file system.
 	 */
 	savedit = 0;
 	error = 0;
 	if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 &&
 	    nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL &&
 	    nvap->na_vattr.va_size > 0) {
 		savsize = nvap->na_vattr.va_size;
 		nvap->na_vattr.va_size = VNOVAL;
 		if (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
 		    nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
 		    nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
 		    nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
 		    nvap->na_vattr.va_mtime.tv_sec != VNOVAL)
 			savedit = 1;
 		else
 			savedit = 2;
 	}
 	if (savedit != 2)
 		error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
 	if (savedit != 0)
 		nvap->na_vattr.va_size = savsize;
 	if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
 	    nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
 	    nvap->na_vattr.va_size != VNOVAL ||
 	    nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
 	    nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
 	    nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) {
 		/* For a pNFS server, set the attributes on the DS file. */
 		error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR,
 		    NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL);
 		if (error == ENOENT)
 			error = 0;
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set up nameidata for a lookup() call and do it.
  */
 int
 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct vnode *dp, int islocked, struct nfsexstuff *exp, struct thread *p,
     struct vnode **retdirp)
 {
 	struct componentname *cnp = &ndp->ni_cnd;
 	int i;
 	struct iovec aiov;
 	struct uio auio;
 	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
 	int error = 0;
 	char *cp;
 
 	*retdirp = NULL;
 	cnp->cn_nameptr = cnp->cn_pnbuf;
 	ndp->ni_lcf = 0;
 	/*
 	 * Extract and set starting directory.
 	 */
 	if (dp->v_type != VDIR) {
 		if (islocked)
 			vput(dp);
 		else
 			vrele(dp);
 		nfsvno_relpathbuf(ndp);
 		error = ENOTDIR;
 		goto out1;
 	}
 	if (islocked)
 		NFSVOPUNLOCK(dp);
 	VREF(dp);
 	*retdirp = dp;
 	if (NFSVNO_EXRDONLY(exp))
 		cnp->cn_flags |= RDONLY;
 	ndp->ni_segflg = UIO_SYSSPACE;
 
 	if (nd->nd_flag & ND_PUBLOOKUP) {
 		ndp->ni_loopcnt = 0;
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(dp);
 			/*
 			 * Check for degenerate pathnames here, since lookup()
 			 * panics on them.
 			 */
 			for (i = 1; i < ndp->ni_pathlen; i++)
 				if (cnp->cn_pnbuf[i] != '/')
 					break;
 			if (i == ndp->ni_pathlen) {
 				error = NFSERR_ACCES;
 				goto out;
 			}
 			dp = rootvnode;
 			VREF(dp);
 		}
 	} else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
 	    (nd->nd_flag & ND_NFSV4) == 0) {
 		/*
 		 * Only cross mount points for NFSv4 when doing a
 		 * mount while traversing the file system above
 		 * the mount point, unless nfsrv_enable_crossmntpt is set.
 		 */
 		cnp->cn_flags |= NOCROSSMOUNT;
 	}
 
 	/*
 	 * Initialize for scan, set ni_startdir and bump ref on dp again
 	 * because lookup() will dereference ni_startdir.
 	 */
 
 	cnp->cn_thread = p;
 	ndp->ni_startdir = dp;
 	ndp->ni_rootdir = rootvnode;
 	ndp->ni_topdir = NULL;
 
 	if (!lockleaf)
 		cnp->cn_flags |= LOCKLEAF;
 	for (;;) {
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		/*
 		 * Call lookup() to do the real work.  If an error occurs,
 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
 		 * we do not have to dereference anything before returning.
 		 * In either case ni_startdir will be dereferenced and NULLed
 		 * out.
 		 */
 		error = lookup(ndp);
 		if (error)
 			break;
 
 		/*
 		 * Check for encountering a symbolic link.  Trivial
 		 * termination occurs if no symlink encountered.
 		 */
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
 			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
 				nfsvno_relpathbuf(ndp);
 			if (ndp->ni_vp && !lockleaf)
 				NFSVOPUNLOCK(ndp->ni_vp);
 			break;
 		}
 
 		/*
 		 * Validate symlink
 		 */
 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
 			NFSVOPUNLOCK(ndp->ni_dvp);
 		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
 			error = EINVAL;
 			goto badlink2;
 		}
 
 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
 			error = ELOOP;
 			goto badlink2;
 		}
 		if (ndp->ni_pathlen > 1)
 			cp = uma_zalloc(namei_zone, M_WAITOK);
 		else
 			cp = cnp->cn_pnbuf;
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_td = NULL;
 		auio.uio_resid = MAXPATHLEN;
 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
 		if (error) {
 		badlink1:
 			if (ndp->ni_pathlen > 1)
 				uma_zfree(namei_zone, cp);
 		badlink2:
 			vrele(ndp->ni_dvp);
 			vput(ndp->ni_vp);
 			break;
 		}
 		linklen = MAXPATHLEN - auio.uio_resid;
 		if (linklen == 0) {
 			error = ENOENT;
 			goto badlink1;
 		}
 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
 			error = ENAMETOOLONG;
 			goto badlink1;
 		}
 
 		/*
 		 * Adjust or replace path
 		 */
 		if (ndp->ni_pathlen > 1) {
 			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
 			uma_zfree(namei_zone, cnp->cn_pnbuf);
 			cnp->cn_pnbuf = cp;
 		} else
 			cnp->cn_pnbuf[linklen] = '\0';
 		ndp->ni_pathlen += linklen;
 
 		/*
 		 * Cleanup refs for next loop and check if root directory
 		 * should replace current directory.  Normally ni_dvp
 		 * becomes the new base directory and is cleaned up when
 		 * we loop.  Explicitly null pointers after invalidation
 		 * to clarify operation.
 		 */
 		vput(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(ndp->ni_dvp);
 			ndp->ni_dvp = ndp->ni_rootdir;
 			VREF(ndp->ni_dvp);
 		}
 		ndp->ni_startdir = ndp->ni_dvp;
 		ndp->ni_dvp = NULL;
 	}
 	if (!lockleaf)
 		cnp->cn_flags &= ~LOCKLEAF;
 
 out:
 	if (error) {
 		nfsvno_relpathbuf(ndp);
 		ndp->ni_vp = NULL;
 		ndp->ni_dvp = NULL;
 		ndp->ni_startdir = NULL;
 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
 		ndp->ni_dvp = NULL;
 	}
 
 out1:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Set up a pathname buffer and return a pointer to it and, optionally
  * set a hash pointer.
  */
 void
 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
 {
 	struct componentname *cnp = &ndp->ni_cnd;
 
 	cnp->cn_flags |= (NOMACCHECK | HASBUF);
 	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
 	if (hashpp != NULL)
 		*hashpp = NULL;
 	*bufpp = cnp->cn_pnbuf;
 }
 
 /*
  * Release the above path buffer, if not released by nfsvno_namei().
  */
 void
 nfsvno_relpathbuf(struct nameidata *ndp)
 {
 
 	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
 		panic("nfsrelpath");
 	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
 	ndp->ni_cnd.cn_flags &= ~HASBUF;
 }
 
 /*
  * Readlink vnode op into an mbuf list.
  */
 int
 nfsvno_readlink(struct vnode *vp, struct ucred *cred, struct thread *p,
     struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
 {
 	struct iovec *iv;
 	struct uio io, *uiop = &io;
 	struct mbuf *mp, *mp3;
 	int len, tlen, error = 0;
 
 	len = NFS_MAXPATHLEN;
 	uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv);
 	uiop->uio_iov = iv;
 	uiop->uio_offset = 0;
 	uiop->uio_resid = len;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = NULL;
 	error = VOP_READLINK(vp, uiop, cred);
 	free(iv, M_TEMP);
 	if (error) {
 		m_freem(mp3);
 		*lenp = 0;
 		goto out;
 	}
 	if (uiop->uio_resid > 0) {
 		len -= uiop->uio_resid;
 		tlen = NFSM_RNDUP(len);
 		nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen, tlen - len);
 	}
 	*lenp = len;
 	*mpp = mp3;
 	*mpendp = mp;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Create an mbuf chain and an associated iovec that can be used to Read
  * or Getextattr of data.
  * Upon success, return pointers to the first and last mbufs in the chain
  * plus the malloc'd iovec and its iovlen.
  */
 static int
 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp,
     struct iovec **ivp)
 {
 	struct mbuf *m, *m2 = NULL, *m3;
 	struct iovec *iv;
 	int i, left, siz;
 
 	left = len;
 	m3 = NULL;
 	/*
 	 * Generate the mbuf list with the uio_iov ref. to it.
 	 */
 	i = 0;
 	while (left > 0) {
 		NFSMGET(m);
 		MCLGET(m, M_WAITOK);
 		m->m_len = 0;
 		siz = min(M_TRAILINGSPACE(m), left);
 		left -= siz;
 		i++;
 		if (m3)
 			m2->m_next = m;
 		else
 			m3 = m;
 		m2 = m;
 	}
 	*ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
 	m = m3;
 	left = len;
 	i = 0;
 	while (left > 0) {
 		if (m == NULL)
 			panic("nfsvno_read iov");
 		siz = min(M_TRAILINGSPACE(m), left);
 		if (siz > 0) {
 			iv->iov_base = mtod(m, caddr_t) + m->m_len;
 			iv->iov_len = siz;
 			m->m_len += siz;
 			left -= siz;
 			iv++;
 			i++;
 		}
 		m = m->m_next;
 	}
 	*mpp = m3;
 	*mpendp = m2;
 	return (i);
 }
 
 /*
  * Read vnode op call into mbuf list.
  */
 int
 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
     struct thread *p, struct mbuf **mpp, struct mbuf **mpendp)
 {
 	struct mbuf *m;
 	struct iovec *iv;
 	int error = 0, len, tlen, ioflag = 0;
 	struct mbuf *m3;
 	struct uio io, *uiop = &io;
 	struct nfsheur *nh;
 
 	/*
 	 * Attempt to read from a DS file. A return of ENOENT implies
 	 * there is no DS file to read.
 	 */
 	error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp,
 	    NULL, mpendp, NULL, NULL, NULL, 0, NULL);
 	if (error != ENOENT)
 		return (error);
 
 	len = NFSM_RNDUP(cnt);
 	uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv);
 	uiop->uio_iov = iv;
 	uiop->uio_offset = off;
 	uiop->uio_resid = len;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = NULL;
 	nh = nfsrv_sequential_heuristic(uiop, vp);
 	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
 	/* XXX KDM make this more systematic? */
 	nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid;
 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
 	free(iv, M_TEMP);
 	if (error) {
 		m_freem(m3);
 		*mpp = NULL;
 		goto out;
 	}
 	nh->nh_nextoff = uiop->uio_offset;
 	tlen = len - uiop->uio_resid;
 	cnt = cnt < tlen ? cnt : tlen;
 	tlen = NFSM_RNDUP(cnt);
 	if (tlen == 0) {
 		m_freem(m3);
 		m3 = NULL;
 	} else if (len != tlen || tlen != cnt)
 		nfsrv_adj(m3, len - tlen, tlen - cnt);
 	*mpp = m3;
 	*mpendp = m;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Create the iovec for the mbuf chain passed in as an argument.
  * The "cp" argument is where the data starts within the first mbuf in
  * the chain. It returns the iovec and the iovcnt.
  */
 static int
 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp,
     int *iovcntp)
 {
 	struct mbuf *mp;
 	struct iovec *ivp;
 	int cnt, i, len;
 
 	/*
 	 * Loop through the mbuf chain, counting how many mbufs are a
 	 * part of this write operation, so the iovec size is known.
 	 */
 	cnt = 0;
 	len = retlen;
 	mp = m;
 	i = mtod(mp, caddr_t) + mp->m_len - cp;
 	while (len > 0) {
 		if (i > 0) {
 			len -= i;
 			cnt++;
 		}
 		mp = mp->m_next;
 		if (!mp) {
 			if (len > 0)
 				return (EBADRPC);
 		} else
 			i = mp->m_len;
 	}
 
 	/* Now, create the iovec. */
 	mp = m;
 	*ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
 	    M_WAITOK);
 	*iovcntp = cnt;
 	i = mtod(mp, caddr_t) + mp->m_len - cp;
 	len = retlen;
 	while (len > 0) {
 		if (mp == NULL)
 			panic("nfsvno_write");
 		if (i > 0) {
 			i = min(i, len);
 			ivp->iov_base = cp;
 			ivp->iov_len = i;
 			ivp++;
 			len -= i;
 		}
 		mp = mp->m_next;
 		if (mp) {
 			i = mp->m_len;
 			cp = mtod(mp, caddr_t);
 		}
 	}
 	return (0);
 }
 
 /*
  * Write vnode op from an mbuf list.
  */
 int
 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable,
     struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
 {
 	struct iovec *iv;
 	int cnt, ioflags, error;
 	struct uio io, *uiop = &io;
 	struct nfsheur *nh;
 
 	/*
 	 * Attempt to write to a DS file. A return of ENOENT implies
 	 * there is no DS file to write.
 	 */
 	error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS,
 	    &mp, cp, NULL, NULL, NULL, NULL, 0, NULL);
 	if (error != ENOENT) {
 		*stable = NFSWRITE_FILESYNC;
 		return (error);
 	}
 
 
 	if (*stable == NFSWRITE_UNSTABLE)
 		ioflags = IO_NODELOCKED;
 	else
 		ioflags = (IO_SYNC | IO_NODELOCKED);
 	error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt);
 	if (error != 0)
 		return (error);
 	uiop->uio_iov = iv;
 	uiop->uio_iovcnt = cnt;
 	uiop->uio_resid = retlen;
 	uiop->uio_rw = UIO_WRITE;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	NFSUIOPROC(uiop, p);
 	uiop->uio_offset = off;
 	nh = nfsrv_sequential_heuristic(uiop, vp);
 	ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
 	/* XXX KDM make this more systematic? */
 	nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
 	error = VOP_WRITE(vp, uiop, ioflags, cred);
 	if (error == 0)
 		nh->nh_nextoff = uiop->uio_offset;
 	free(iv, M_TEMP);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Common code for creating a regular file (plus special files for V2).
  */
 int
 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
     int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp)
 {
 	u_quad_t tempsize;
 	int error;
 	struct thread *p = curthread;
 
 	error = nd->nd_repstat;
 	if (!error && ndp->ni_vp == NULL) {
 		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
 			vrele(ndp->ni_startdir);
 			error = VOP_CREATE(ndp->ni_dvp,
 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
 			/* For a pNFS server, create the data file on a DS. */
 			if (error == 0 && nvap->na_type == VREG) {
 				/*
 				 * Create a data file on a DS for a pNFS server.
 				 * This function just returns if not
 				 * running a pNFS DS or the creation fails.
 				 */
 				nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
 				    nd->nd_cred, p);
 			}
 			vput(ndp->ni_dvp);
 			nfsvno_relpathbuf(ndp);
 			if (!error) {
 				if (*exclusive_flagp) {
 					*exclusive_flagp = 0;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_atime.tv_sec = cverf[0];
 					nvap->na_atime.tv_nsec = cverf[1];
 					error = VOP_SETATTR(ndp->ni_vp,
 					    &nvap->na_vattr, nd->nd_cred);
 					if (error != 0) {
 						vput(ndp->ni_vp);
 						ndp->ni_vp = NULL;
 						error = NFSERR_NOTSUPP;
 					}
 				}
 			}
 		/*
 		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
 		 * (This implies, just get out on an error.)
 		 */
 		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
 			nvap->na_type == VFIFO) {
 			if (nvap->na_type == VCHR && rdev == 0xffffffff)
 				nvap->na_type = VFIFO;
                         if (nvap->na_type != VFIFO &&
 			    (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) {
 				vrele(ndp->ni_startdir);
 				nfsvno_relpathbuf(ndp);
 				vput(ndp->ni_dvp);
 				goto out;
 			}
 			nvap->na_rdev = rdev;
 			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
 			    &ndp->ni_cnd, &nvap->na_vattr);
 			vput(ndp->ni_dvp);
 			nfsvno_relpathbuf(ndp);
 			vrele(ndp->ni_startdir);
 			if (error)
 				goto out;
 		} else {
 			vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vput(ndp->ni_dvp);
 			error = ENXIO;
 			goto out;
 		}
 		*vpp = ndp->ni_vp;
 	} else {
 		/*
 		 * Handle cases where error is already set and/or
 		 * the file exists.
 		 * 1 - clean up the lookup
 		 * 2 - iff !error and na_size set, truncate it
 		 */
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		*vpp = ndp->ni_vp;
 		if (ndp->ni_dvp == *vpp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		if (!error && nvap->na_size != VNOVAL) {
 			error = nfsvno_accchk(*vpp, VWRITE,
 			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 			    NFSACCCHK_VPISLOCKED, NULL);
 			if (!error) {
 				tempsize = nvap->na_size;
 				NFSVNO_ATTRINIT(nvap);
 				nvap->na_size = tempsize;
 				error = VOP_SETATTR(*vpp,
 				    &nvap->na_vattr, nd->nd_cred);
 			}
 		}
 		if (error)
 			vput(*vpp);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do a mknod vnode op.
  */
 int
 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
     struct thread *p)
 {
 	int error = 0;
 	enum vtype vtyp;
 
 	vtyp = nvap->na_type;
 	/*
 	 * Iff doesn't exist, create it.
 	 */
 	if (ndp->ni_vp) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		vput(ndp->ni_dvp);
 		error = NFSERR_BADTYPE;
 		goto out;
 	}
 	if (vtyp == VSOCK) {
 		vrele(ndp->ni_startdir);
 		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
 		    &ndp->ni_cnd, &nvap->na_vattr);
 		vput(ndp->ni_dvp);
 		nfsvno_relpathbuf(ndp);
 	} else {
 		if (nvap->na_type != VFIFO &&
 		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) {
 			vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vput(ndp->ni_dvp);
 			goto out;
 		}
 		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
 		    &ndp->ni_cnd, &nvap->na_vattr);
 		vput(ndp->ni_dvp);
 		nfsvno_relpathbuf(ndp);
 		vrele(ndp->ni_startdir);
 		/*
 		 * Since VOP_MKNOD returns the ni_vp, I can't
 		 * see any reason to do the lookup.
 		 */
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Mkdir vnode op.
  */
 int
 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
 {
 	int error = 0;
 
 	if (ndp->ni_vp != NULL) {
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		nfsvno_relpathbuf(ndp);
 		error = EEXIST;
 		goto out;
 	}
 	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
 	    &nvap->na_vattr);
 	vput(ndp->ni_dvp);
 	nfsvno_relpathbuf(ndp);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * symlink vnode op.
  */
 int
 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
     int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
     struct nfsexstuff *exp)
 {
 	int error = 0;
 
 	if (ndp->ni_vp) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 
 	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
 	    &nvap->na_vattr, pathcp);
 	vput(ndp->ni_dvp);
 	vrele(ndp->ni_startdir);
 	nfsvno_relpathbuf(ndp);
 	/*
 	 * Although FreeBSD still had the lookup code in
 	 * it for 7/current, there doesn't seem to be any
 	 * point, since VOP_SYMLINK() returns the ni_vp.
 	 * Just vput it for v2.
 	 */
 	if (!not_v2 && !error)
 		vput(ndp->ni_vp);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Parse symbolic link arguments.
  * This function has an ugly side effect. It will malloc() an area for
  * the symlink and set iov_base to point to it, only if it succeeds.
  * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
  * be FREE'd later.
  */
 int
 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
     struct thread *p, char **pathcpp, int *lenp)
 {
 	u_int32_t *tl;
 	char *pathcp = NULL;
 	int error = 0, len;
 	struct nfsv2_sattr *sp;
 
 	*pathcpp = NULL;
 	*lenp = 0;
 	if ((nd->nd_flag & ND_NFSV3) &&
 	    (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p)))
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	len = fxdr_unsigned(int, *tl);
 	if (len > NFS_MAXPATHLEN || len <= 0) {
 		error = EBADRPC;
 		goto nfsmout;
 	}
 	pathcp = malloc(len + 1, M_TEMP, M_WAITOK);
 	error = nfsrv_mtostr(nd, pathcp, len);
 	if (error)
 		goto nfsmout;
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
 	}
 	*pathcpp = pathcp;
 	*lenp = len;
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (pathcp)
 		free(pathcp, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Remove a non-directory object.
  */
 int
 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS];
 	int error = 0, mirrorcnt;
 	char fname[PNFS_FILENAME_LEN + 1];
 	fhandle_t fh;
 
 	vp = ndp->ni_vp;
 	dsdvp[0] = NULL;
 	if (vp->v_type == VDIR)
 		error = NFSERR_ISDIR;
 	else if (is_v4)
 		error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0),
 		    p);
 	if (error == 0)
 		nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh);
 	if (!error)
 		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
 	if (error == 0 && dsdvp[0] != NULL)
 		nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
 	if (ndp->ni_dvp == vp)
 		vrele(ndp->ni_dvp);
 	else
 		vput(ndp->ni_dvp);
 	vput(vp);
 	if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
 		nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Remove a directory.
  */
 int
 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *vp;
 	int error = 0;
 
 	vp = ndp->ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (ndp->ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_vflag & VV_ROOT)
 		error = EBUSY;
 out:
 	if (!error)
 		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
 	if (ndp->ni_dvp == vp)
 		vrele(ndp->ni_dvp);
 	else
 		vput(ndp->ni_dvp);
 	vput(vp);
 	if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
 		nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Rename vnode op.
  */
 int
 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
     u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
 {
 	struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS];
 	int error = 0, mirrorcnt;
 	char fname[PNFS_FILENAME_LEN + 1];
 	fhandle_t fh;
 
 	dsdvp[0] = NULL;
 	fvp = fromndp->ni_vp;
 	if (ndstat) {
 		vrele(fromndp->ni_dvp);
 		vrele(fvp);
 		error = ndstat;
 		goto out1;
 	}
 	tdvp = tondp->ni_dvp;
 	tvp = tondp->ni_vp;
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
 			goto out;
 		}
 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 			goto out;
 		}
 
 		/*
 		 * A rename to '.' or '..' results in a prematurely
 		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
 		 * here.
 		 */
 		if ((tondp->ni_cnd.cn_namelen == 1 &&
 		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
 		    (tondp->ni_cnd.cn_namelen == 2 &&
 		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
 		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
 			error = EINVAL;
 			goto out;
 		}
 	}
 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 		goto out;
 	}
 	if (fvp->v_mount != tdvp->v_mount) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 		goto out;
 	}
 	if (fvp == tdvp) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
 		goto out;
 	}
 	if (fvp == tvp) {
 		/*
 		 * If source and destination are the same, there is nothing to
 		 * do. Set error to -1 to indicate this.
 		 */
 		error = -1;
 		goto out;
 	}
 	if (ndflag & ND_NFSV4) {
 		if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
 			error = nfsrv_checkremove(fvp, 0, NULL,
 			    (nfsquad_t)((u_quad_t)0), p);
 			NFSVOPUNLOCK(fvp);
 		} else
 			error = EPERM;
 		if (tvp && !error)
 			error = nfsrv_checkremove(tvp, 1, NULL,
 			    (nfsquad_t)((u_quad_t)0), p);
 	} else {
 		/*
 		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
 		 * that the NFSv4 client won't be confused by the rename.
 		 * Since nfsd_recalldelegation() can only be called on an
 		 * unlocked vnode at this point and fvp is the file that will
 		 * still exist after the rename, just do fvp.
 		 */
 		nfsd_recalldelegation(fvp, p);
 	}
 	if (error == 0 && tvp != NULL) {
 		nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh);
 		NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
 		    " dsdvp=%p\n", dsdvp[0]);
 	}
 out:
 	if (!error) {
 		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
 		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
 		    &tondp->ni_cnd);
 	} else {
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		vrele(fromndp->ni_dvp);
 		vrele(fvp);
 		if (error == -1)
 			error = 0;
 	}
 
 	/*
 	 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and
 	 * if the rename succeeded, the DS file for the tvp needs to be
 	 * removed.
 	 */
 	if (error == 0 && dsdvp[0] != NULL) {
 		nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
 		NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
 	}
 
 	vrele(tondp->ni_startdir);
 	nfsvno_relpathbuf(tondp);
 out1:
 	vrele(fromndp->ni_startdir);
 	nfsvno_relpathbuf(fromndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Link vnode op.
  */
 int
 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *xp;
 	int error = 0;
 
 	xp = ndp->ni_vp;
 	if (xp != NULL) {
 		error = EEXIST;
 	} else {
 		xp = ndp->ni_dvp;
 		if (vp->v_mount != xp->v_mount)
 			error = EXDEV;
 	}
 	if (!error) {
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (!VN_IS_DOOMED(vp))
 			error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
 		else
 			error = EPERM;
 		if (ndp->ni_dvp == vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		NFSVOPUNLOCK(vp);
 	} else {
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		if (ndp->ni_vp)
 			vrele(ndp->ni_vp);
 	}
 	nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do the fsync() appropriate for the commit.
  */
 int
 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
     struct thread *td)
 {
 	int error = 0;
 
 	/*
 	 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
 	 * file is done.  At this time VOP_FSYNC does not accept offset and
 	 * byte count parameters so call VOP_FSYNC the whole file for now.
 	 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
 	 * File systems that do not use the buffer cache (as indicated
 	 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
 	 */
 	if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
 	    (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
 		/*
 		 * Give up and do the whole thing
 		 */
 		if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
 			VM_OBJECT_WLOCK(vp->v_object);
 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 			VM_OBJECT_WUNLOCK(vp->v_object);
 		}
 		error = VOP_FSYNC(vp, MNT_WAIT, td);
 	} else {
 		/*
 		 * Locate and synchronously write any buffers that fall
 		 * into the requested range.  Note:  we are assuming that
 		 * f_iosize is a power of 2.
 		 */
 		int iosize = vp->v_mount->mnt_stat.f_iosize;
 		int iomask = iosize - 1;
 		struct bufobj *bo;
 		daddr_t lblkno;
 
 		/*
 		 * Align to iosize boundary, super-align to page boundary.
 		 */
 		if (off & iomask) {
 			cnt += off & iomask;
 			off &= ~(u_quad_t)iomask;
 		}
 		if (off & PAGE_MASK) {
 			cnt += off & PAGE_MASK;
 			off &= ~(u_quad_t)PAGE_MASK;
 		}
 		lblkno = off / iosize;
 
 		if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
 			VM_OBJECT_WLOCK(vp->v_object);
 			vm_object_page_clean(vp->v_object, off, off + cnt,
 			    OBJPC_SYNC);
 			VM_OBJECT_WUNLOCK(vp->v_object);
 		}
 
 		bo = &vp->v_bufobj;
 		BO_LOCK(bo);
 		while (cnt > 0) {
 			struct buf *bp;
 
 			/*
 			 * If we have a buffer and it is marked B_DELWRI we
 			 * have to lock and write it.  Otherwise the prior
 			 * write is assumed to have already been committed.
 			 *
 			 * gbincore() can return invalid buffers now so we
 			 * have to check that bit as well (though B_DELWRI
 			 * should not be set if B_INVAL is set there could be
 			 * a race here since we haven't locked the buffer).
 			 */
 			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 				    LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
 					BO_LOCK(bo);
 					continue; /* retry */
 				}
 			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 				    B_DELWRI) {
 					bremfree(bp);
 					bp->b_flags &= ~B_ASYNC;
 					bwrite(bp);
 					++nfs_commit_miss;
 				} else
 					BUF_UNLOCK(bp);
 				BO_LOCK(bo);
 			}
 			++nfs_commit_blks;
 			if (cnt < iosize)
 				break;
 			cnt -= iosize;
 			++lblkno;
 		}
 		BO_UNLOCK(bo);
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Statfs vnode op.
  */
 int
 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
 {
 	struct statfs *tsf;
 	int error;
 
 	tsf = NULL;
 	if (nfsrv_devidcnt > 0) {
 		/* For a pNFS service, get the DS numbers. */
 		tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO);
 		error = nfsrv_pnfsstatfs(tsf, vp->v_mount);
 		if (error != 0) {
 			free(tsf, M_TEMP);
 			tsf = NULL;
 		}
 	}
 	error = VFS_STATFS(vp->v_mount, sf);
 	if (error == 0) {
 		if (tsf != NULL) {
 			sf->f_blocks = tsf->f_blocks;
 			sf->f_bavail = tsf->f_bavail;
 			sf->f_bfree = tsf->f_bfree;
 			sf->f_bsize = tsf->f_bsize;
 		}
 		/*
 		 * Since NFS handles these values as unsigned on the
 		 * wire, there is no way to represent negative values,
 		 * so set them to 0. Without this, they will appear
 		 * to be very large positive values for clients like
 		 * Solaris10.
 		 */
 		if (sf->f_bavail < 0)
 			sf->f_bavail = 0;
 		if (sf->f_ffree < 0)
 			sf->f_ffree = 0;
 	}
 	free(tsf, M_TEMP);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
  * must handle nfsrv_opencheck() calls after any other access checks.
  */
 void
 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
     nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
     int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
     NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred,
     struct nfsexstuff *exp, struct vnode **vpp)
 {
 	struct vnode *vp = NULL;
 	u_quad_t tempsize;
 	struct nfsexstuff nes;
 	struct thread *p = curthread;
 
 	if (ndp->ni_vp == NULL)
 		nd->nd_repstat = nfsrv_opencheck(clientid,
 		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
 	if (!nd->nd_repstat) {
 		if (ndp->ni_vp == NULL) {
 			vrele(ndp->ni_startdir);
 			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
 			/* For a pNFS server, create the data file on a DS. */
 			if (nd->nd_repstat == 0) {
 				/*
 				 * Create a data file on a DS for a pNFS server.
 				 * This function just returns if not
 				 * running a pNFS DS or the creation fails.
 				 */
 				nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
 				    cred, p);
 			}
 			vput(ndp->ni_dvp);
 			nfsvno_relpathbuf(ndp);
 			if (!nd->nd_repstat) {
 				if (*exclusive_flagp) {
 					*exclusive_flagp = 0;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_atime.tv_sec = cverf[0];
 					nvap->na_atime.tv_nsec = cverf[1];
 					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
 					    &nvap->na_vattr, cred);
 					if (nd->nd_repstat != 0) {
 						vput(ndp->ni_vp);
 						ndp->ni_vp = NULL;
 						nd->nd_repstat = NFSERR_NOTSUPP;
 					} else
 						NFSSETBIT_ATTRBIT(attrbitp,
 						    NFSATTRBIT_TIMEACCESS);
 				} else {
 					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
 					    aclp, p, attrbitp, exp);
 				}
 			}
 			vp = ndp->ni_vp;
 		} else {
 			if (ndp->ni_startdir)
 				vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vp = ndp->ni_vp;
 			if (create == NFSV4OPEN_CREATE) {
 				if (ndp->ni_dvp == vp)
 					vrele(ndp->ni_dvp);
 				else
 					vput(ndp->ni_dvp);
 			}
 			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
 				if (ndp->ni_cnd.cn_flags & RDONLY)
 					NFSVNO_SETEXRDONLY(&nes);
 				else
 					NFSVNO_EXINIT(&nes);
 				nd->nd_repstat = nfsvno_accchk(vp, 
 				    VWRITE, cred, &nes, p,
 				    NFSACCCHK_NOOVERRIDE,
 				    NFSACCCHK_VPISLOCKED, NULL);
 				nd->nd_repstat = nfsrv_opencheck(clientid,
 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
 				if (!nd->nd_repstat) {
 					tempsize = nvap->na_size;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_size = tempsize;
 					nd->nd_repstat = VOP_SETATTR(vp,
 					    &nvap->na_vattr, cred);
 				}
 			} else if (vp->v_type == VREG) {
 				nd->nd_repstat = nfsrv_opencheck(clientid,
 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
 			}
 		}
 	} else {
 		if (ndp->ni_cnd.cn_flags & HASBUF)
 			nfsvno_relpathbuf(ndp);
 		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
 			vrele(ndp->ni_startdir);
 			if (ndp->ni_dvp == ndp->ni_vp)
 				vrele(ndp->ni_dvp);
 			else
 				vput(ndp->ni_dvp);
 			if (ndp->ni_vp)
 				vput(ndp->ni_vp);
 		}
 	}
 	*vpp = vp;
 
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * Updates the file rev and sets the mtime and ctime
  * to the current clock time, returning the va_filerev and va_Xtime
  * values.
  * Return ESTALE to indicate the vnode is VIRF_DOOMED.
  */
 int
 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
     struct nfsrv_descript *nd, struct thread *p)
 {
 	struct vattr va;
 
 	VATTR_NULL(&va);
 	vfs_timestamp(&va.va_mtime);
 	if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
 		NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 		if (VN_IS_DOOMED(vp))
 			return (ESTALE);
 	}
 	(void) VOP_SETATTR(vp, &va, nd->nd_cred);
 	(void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL);
 	return (0);
 }
 
 /*
  * Glue routine to nfsv4_fillattr().
  */
 int
 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
     struct ucred *cred, struct thread *p, int isdgram, int reterr,
     int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
 {
 	struct statfs *sf;
 	int error;
 
 	sf = NULL;
 	if (nfsrv_devidcnt > 0 &&
 	    (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) ||
 	     NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) ||
 	     NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) {
 		sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO);
 		error = nfsrv_pnfsstatfs(sf, mp);
 		if (error != 0) {
 			free(sf, M_TEMP);
 			sf = NULL;
 		}
 	}
 	error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
 	    attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
 	    mounted_on_fileno, sf);
 	free(sf, M_TEMP);
 	NFSEXITCODE2(0, nd);
 	return (error);
 }
 
 /* Since the Readdir vnode ops vary, put the entire functions in here. */
 /*
  * nfs readdir service
  * - mallocs what it thinks is enough to read
  *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
  * - calls VOP_READDIR()
  * - loops around building the reply
  *	if the output generated exceeds count break out of loop
  *	The NFSM_CLGET macro is used here so that the reply will be packed
  *	tightly in mbuf clusters.
  * - it trims out records with d_fileno == 0
  *	this doesn't matter for Unix clients, but they might confuse clients
  *	for other os'.
  * - it trims out records with d_type == DT_WHT
  *	these cannot be seen through NFS (unless we extend the protocol)
  *     The alternate call nfsrvd_readdirplus() does lookups as well.
  * PS: The NFS protocol spec. does not clarify what the "count" byte
  *	argument is a count of.. just name strings and file id's or the
  *	entire reply rpc or ...
  *	I tried just file name and id sizes and it confused the Sun client,
  *	so I am using the full rpc size now. The "paranoia.." comment refers
  *	to including the status longwords that are not a part of the dir.
  *	"entry" structures, but are in the rpc.
  */
 int
 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
     struct vnode *vp, struct nfsexstuff *exp)
 {
 	struct dirent *dp;
 	u_int32_t *tl;
 	int dirlen;
 	char *cpos, *cend, *rbuf;
 	struct nfsvattr at;
 	int nlen, error = 0, getret = 1;
 	int siz, cnt, fullsiz, eofflag, ncookies;
 	u_int64_t off, toff, verf __unused;
 	u_long *cookies = NULL, *cookiep;
 	struct uio io;
 	struct iovec iv;
 	int is_ufs;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		off = fxdr_unsigned(u_quad_t, *tl++);
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
 		tl += 2;
 		verf = fxdr_hyper(tl);
 		tl += 2;
 	}
 	toff = off;
 	cnt = fxdr_unsigned(int, *tl);
 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 		cnt = NFS_SRVMAXDATA(nd);
 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 	fullsiz = siz;
 	if (nd->nd_flag & ND_NFSV3) {
 		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1,
 		    NULL);
 #if 0
 		/*
 		 * va_filerev is not sufficient as a cookie verifier,
 		 * since it is not supposed to change when entries are
 		 * removed/added unless that offset cookies returned to
 		 * the client are no longer valid.
 		 */
 		if (!nd->nd_repstat && toff && verf != at.na_filerev)
 			nd->nd_repstat = NFSERR_BAD_COOKIE;
 #endif
 	}
 	if (!nd->nd_repstat && vp->v_type != VDIR)
 		nd->nd_repstat = NFSERR_NOTDIR;
 	if (nd->nd_repstat == 0 && cnt == 0) {
 		if (nd->nd_flag & ND_NFSV2)
 			/* NFSv2 does not have NFSERR_TOOSMALL */
 			nd->nd_repstat = EPERM;
 		else
 			nd->nd_repstat = NFSERR_TOOSMALL;
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 	rbuf = malloc(siz, M_TEMP, M_WAITOK);
 again:
 	eofflag = 0;
 	if (cookies) {
 		free(cookies, M_TEMP);
 		cookies = NULL;
 	}
 
 	iv.iov_base = rbuf;
 	iv.iov_len = siz;
 	io.uio_iov = &iv;
 	io.uio_iovcnt = 1;
 	io.uio_offset = (off_t)off;
 	io.uio_resid = siz;
 	io.uio_segflg = UIO_SYSSPACE;
 	io.uio_rw = UIO_READ;
 	io.uio_td = NULL;
 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 	    &cookies);
 	off = (u_int64_t)io.uio_offset;
 	if (io.uio_resid)
 		siz -= io.uio_resid;
 
 	if (!cookies && !nd->nd_repstat)
 		nd->nd_repstat = NFSERR_PERM;
 	if (nd->nd_flag & ND_NFSV3) {
 		getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 		if (!nd->nd_repstat)
 			nd->nd_repstat = getret;
 	}
 
 	/*
 	 * Handles the failed cases. nd->nd_repstat == 0 past here.
 	 */
 	if (nd->nd_repstat) {
 		vput(vp);
 		free(rbuf, M_TEMP);
 		if (cookies)
 			free(cookies, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	/*
 	 * If nothing read, return eof
 	 * rpc reply
 	 */
 	if (siz == 0) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV2) {
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		} else {
 			nfsrv_postopattr(nd, getret, &at);
 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			txdr_hyper(at.na_filerev, tl);
 			tl += 2;
 		}
 		*tl++ = newnfs_false;
 		*tl = newnfs_true;
 		free(rbuf, M_TEMP);
 		free(cookies, M_TEMP);
 		goto out;
 	}
 
 	/*
 	 * Check for degenerate cases of nothing useful read.
 	 * If so go try again
 	 */
 	cpos = rbuf;
 	cend = rbuf + siz;
 	dp = (struct dirent *)cpos;
 	cookiep = cookies;
 
 	/*
 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
 	 * directory offset up to a block boundary, so it is necessary to
 	 * skip over the records that precede the requested offset. This
 	 * requires the assumption that file offset cookies monotonically
 	 * increase.
 	 */
 	while (cpos < cend && ncookies > 0 &&
 	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 	     (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos >= cend || ncookies == 0) {
 		siz = fullsiz;
 		toff = off;
 		goto again;
 	}
 	vput(vp);
 
 	/*
 	 * dirlen is the size of the reply, including all XDR and must
 	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
 	 * if the XDR should be included in "count", but to be safe, we do.
 	 * (Include the two booleans at the end of the reply in dirlen now.)
 	 */
 	if (nd->nd_flag & ND_NFSV3) {
 		nfsrv_postopattr(nd, getret, &at);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		txdr_hyper(at.na_filerev, tl);
 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 	} else {
 		dirlen = 2 * NFSX_UNSIGNED;
 	}
 
 	/* Loop through the records and build reply */
 	while (cpos < cend && ncookies > 0) {
 		nlen = dp->d_namlen;
 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 			nlen <= NFS_MAXNAMLEN) {
 			if (nd->nd_flag & ND_NFSV3)
 				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 			else
 				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 			if (dirlen > cnt) {
 				eofflag = 0;
 				break;
 			}
 
 			/*
 			 * Build the directory record xdr from
 			 * the dirent entry.
 			 */
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				*tl++ = 0;
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 			}
 			*tl = txdr_unsigned(dp->d_fileno);
 			(void) nfsm_strtom(nd, dp->d_name, nlen);
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = 0;
 			} else
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(*cookiep);
 		}
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos < cend)
 		eofflag = 0;
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = newnfs_false;
 	if (eofflag)
 		*tl = newnfs_true;
 	else
 		*tl = newnfs_false;
 	free(rbuf, M_TEMP);
 	free(cookies, M_TEMP);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Readdirplus for V3 and Readdir for V4.
  */
 int
 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
     struct vnode *vp, struct nfsexstuff *exp)
 {
 	struct dirent *dp;
 	u_int32_t *tl;
 	int dirlen;
 	char *cpos, *cend, *rbuf;
 	struct vnode *nvp;
 	fhandle_t nfh;
 	struct nfsvattr nva, at, *nvap = &nva;
 	struct mbuf *mb0, *mb1;
 	struct nfsreferral *refp;
 	int nlen, r, error = 0, getret = 1, usevget = 1;
 	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
 	caddr_t bpos0, bpos1;
 	u_int64_t off, toff, verf;
 	u_long *cookies = NULL, *cookiep;
 	nfsattrbit_t attrbits, rderrbits, savbits;
 	struct uio io;
 	struct iovec iv;
 	struct componentname cn;
 	int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
 	struct mount *mp, *new_mp;
 	uint64_t mounted_on_fileno;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 	off = fxdr_hyper(tl);
 	toff = off;
 	tl += 2;
 	verf = fxdr_hyper(tl);
 	tl += 2;
 	siz = fxdr_unsigned(int, *tl++);
 	cnt = fxdr_unsigned(int, *tl);
 
 	/*
 	 * Use the server's maximum data transfer size as the upper bound
 	 * on reply datalen.
 	 */
 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 		cnt = NFS_SRVMAXDATA(nd);
 
 	/*
 	 * siz is a "hint" of how much directory information (name, fileid,
 	 * cookie) should be in the reply. At least one client "hints" 0,
 	 * so I set it to cnt for that case. I also round it up to the
 	 * next multiple of DIRBLKSIZ.
 	 * Since the size of a Readdirplus directory entry reply will always
 	 * be greater than a directory entry returned by VOP_READDIR(), it
 	 * does not make sense to read more than NFS_SRVMAXDATA() via
 	 * VOP_READDIR().
 	 */
 	if (siz <= 0)
 		siz = cnt;
 	else if (siz > NFS_SRVMAXDATA(nd))
 		siz = NFS_SRVMAXDATA(nd);
 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 
 	if (nd->nd_flag & ND_NFSV4) {
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		if (error)
 			goto nfsmout;
 		NFSSET_ATTRBIT(&savbits, &attrbits);
 		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd);
 		NFSZERO_ATTRBIT(&rderrbits);
 		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
 	} else {
 		NFSZERO_ATTRBIT(&attrbits);
 	}
 	fullsiz = siz;
 	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 #if 0
 	if (!nd->nd_repstat) {
 	    if (off && verf != at.na_filerev) {
 		/*
 		 * va_filerev is not sufficient as a cookie verifier,
 		 * since it is not supposed to change when entries are
 		 * removed/added unless that offset cookies returned to
 		 * the client are no longer valid.
 		 */
 		if (nd->nd_flag & ND_NFSV4) {
 			nd->nd_repstat = NFSERR_NOTSAME;
 		} else {
 			nd->nd_repstat = NFSERR_BAD_COOKIE;
 		}
 	    }
 	}
 #endif
 	if (!nd->nd_repstat && vp->v_type != VDIR)
 		nd->nd_repstat = NFSERR_NOTDIR;
 	if (!nd->nd_repstat && cnt == 0)
 		nd->nd_repstat = NFSERR_TOOSMALL;
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 	is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
 
 	rbuf = malloc(siz, M_TEMP, M_WAITOK);
 again:
 	eofflag = 0;
 	if (cookies) {
 		free(cookies, M_TEMP);
 		cookies = NULL;
 	}
 
 	iv.iov_base = rbuf;
 	iv.iov_len = siz;
 	io.uio_iov = &iv;
 	io.uio_iovcnt = 1;
 	io.uio_offset = (off_t)off;
 	io.uio_resid = siz;
 	io.uio_segflg = UIO_SYSSPACE;
 	io.uio_rw = UIO_READ;
 	io.uio_td = NULL;
 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 	    &cookies);
 	off = (u_int64_t)io.uio_offset;
 	if (io.uio_resid)
 		siz -= io.uio_resid;
 
 	getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 
 	if (!cookies && !nd->nd_repstat)
 		nd->nd_repstat = NFSERR_PERM;
 	if (!nd->nd_repstat)
 		nd->nd_repstat = getret;
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (cookies)
 			free(cookies, M_TEMP);
 		free(rbuf, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	/*
 	 * If nothing read, return eof
 	 * rpc reply
 	 */
 	if (siz == 0) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		txdr_hyper(at.na_filerev, tl);
 		tl += 2;
 		*tl++ = newnfs_false;
 		*tl = newnfs_true;
 		free(cookies, M_TEMP);
 		free(rbuf, M_TEMP);
 		goto out;
 	}
 
 	/*
 	 * Check for degenerate cases of nothing useful read.
 	 * If so go try again
 	 */
 	cpos = rbuf;
 	cend = rbuf + siz;
 	dp = (struct dirent *)cpos;
 	cookiep = cookies;
 
 	/*
 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
 	 * directory offset up to a block boundary, so it is necessary to
 	 * skip over the records that precede the requested offset. This
 	 * requires the assumption that file offset cookies monotonically
 	 * increase.
 	 */
 	while (cpos < cend && ncookies > 0 &&
 	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 	   (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
 	   ((nd->nd_flag & ND_NFSV4) &&
 	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos >= cend || ncookies == 0) {
 		siz = fullsiz;
 		toff = off;
 		goto again;
 	}
 
 	/*
 	 * Busy the file system so that the mount point won't go away
 	 * and, as such, VFS_VGET() can be used safely.
 	 */
 	mp = vp->v_mount;
 	vfs_ref(mp);
 	NFSVOPUNLOCK(vp);
 	nd->nd_repstat = vfs_busy(mp, 0);
 	vfs_rel(mp);
 	if (nd->nd_repstat != 0) {
 		vrele(vp);
 		free(cookies, M_TEMP);
 		free(rbuf, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 
 	/*
 	 * Check to see if entries in this directory can be safely acquired
 	 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
 	 * ZFS snapshot directories need VOP_LOOKUP(), so that any
 	 * automount of the snapshot directory that is required will
 	 * be done.
 	 * This needs to be done here for NFSv4, since NFSv4 never does
 	 * a VFS_VGET() for "." or "..".
 	 */
 	if (is_zfs == 1) {
 		r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
 		if (r == EOPNOTSUPP) {
 			usevget = 0;
 			cn.cn_nameiop = LOOKUP;
 			cn.cn_lkflags = LK_SHARED | LK_RETRY;
 			cn.cn_cred = nd->nd_cred;
 			cn.cn_thread = p;
 		} else if (r == 0)
 			vput(nvp);
 	}
 
 	/*
 	 * Save this position, in case there is an error before one entry
 	 * is created.
 	 */
 	mb0 = nd->nd_mb;
 	bpos0 = nd->nd_bpos;
 
 	/*
 	 * Fill in the first part of the reply.
 	 * dirlen is the reply length in bytes and cannot exceed cnt.
 	 * (Include the two booleans at the end of the reply in dirlen now,
 	 *  so we recognize when we have exceeded cnt.)
 	 */
 	if (nd->nd_flag & ND_NFSV3) {
 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 		nfsrv_postopattr(nd, getret, &at);
 	} else {
 		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 	txdr_hyper(at.na_filerev, tl);
 
 	/*
 	 * Save this position, in case there is an empty reply needed.
 	 */
 	mb1 = nd->nd_mb;
 	bpos1 = nd->nd_bpos;
 
 	/* Loop through the records and build reply */
 	entrycnt = 0;
 	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
 		nlen = dp->d_namlen;
 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 		    nlen <= NFS_MAXNAMLEN &&
 		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
 		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
 		      || (nlen == 1 && dp->d_name[0] != '.'))) {
 			/*
 			 * Save the current position in the reply, in case
 			 * this entry exceeds cnt.
 			 */
 			mb1 = nd->nd_mb;
 			bpos1 = nd->nd_bpos;
 	
 			/*
 			 * For readdir_and_lookup get the vnode using
 			 * the file number.
 			 */
 			nvp = NULL;
 			refp = NULL;
 			r = 0;
 			at_root = 0;
 			needs_unbusy = 0;
 			new_mp = mp;
 			mounted_on_fileno = (uint64_t)dp->d_fileno;
 			if ((nd->nd_flag & ND_NFSV3) ||
 			    NFSNONZERO_ATTRBIT(&savbits)) {
 				if (nd->nd_flag & ND_NFSV4)
 					refp = nfsv4root_getreferral(NULL,
 					    vp, dp->d_fileno);
 				if (refp == NULL) {
 					if (usevget)
 						r = VFS_VGET(mp, dp->d_fileno,
 						    LK_SHARED, &nvp);
 					else
 						r = EOPNOTSUPP;
 					if (r == EOPNOTSUPP) {
 						if (usevget) {
 							usevget = 0;
 							cn.cn_nameiop = LOOKUP;
 							cn.cn_lkflags =
 							    LK_SHARED |
 							    LK_RETRY;
 							cn.cn_cred =
 							    nd->nd_cred;
 							cn.cn_thread = p;
 						}
 						cn.cn_nameptr = dp->d_name;
 						cn.cn_namelen = nlen;
 						cn.cn_flags = ISLASTCN |
 						    NOFOLLOW | LOCKLEAF;
 						if (nlen == 2 &&
 						    dp->d_name[0] == '.' &&
 						    dp->d_name[1] == '.')
 							cn.cn_flags |=
 							    ISDOTDOT;
 						if (NFSVOPLOCK(vp, LK_SHARED)
 						    != 0) {
 							nd->nd_repstat = EPERM;
 							break;
 						}
 						if ((vp->v_vflag & VV_ROOT) != 0
 						    && (cn.cn_flags & ISDOTDOT)
 						    != 0) {
 							vref(vp);
 							nvp = vp;
 							r = 0;
 						} else {
 							r = VOP_LOOKUP(vp, &nvp,
 							    &cn);
 							if (vp != nvp)
 								NFSVOPUNLOCK(vp);
 						}
 					}
 
 					/*
 					 * For NFSv4, check to see if nvp is
 					 * a mount point and get the mount
 					 * point vnode, as required.
 					 */
 					if (r == 0 &&
 					    nfsrv_enable_crossmntpt != 0 &&
 					    (nd->nd_flag & ND_NFSV4) != 0 &&
 					    nvp->v_type == VDIR &&
 					    nvp->v_mountedhere != NULL) {
 						new_mp = nvp->v_mountedhere;
 						r = vfs_busy(new_mp, 0);
 						vput(nvp);
 						nvp = NULL;
 						if (r == 0) {
 							r = VFS_ROOT(new_mp,
 							    LK_SHARED, &nvp);
 							needs_unbusy = 1;
 							if (r == 0)
 								at_root = 1;
 						}
 					}
 				}
 
 				/*
 				 * If we failed to look up the entry, then it
 				 * has become invalid, most likely removed.
 				 */
 				if (r != 0) {
 					if (needs_unbusy)
 						vfs_unbusy(new_mp);
 					goto invalid;
 				}
 				KASSERT(refp != NULL || nvp != NULL,
 				    ("%s: undetected lookup error", __func__));
 
 				if (refp == NULL &&
 				    ((nd->nd_flag & ND_NFSV3) ||
 				     NFSNONZERO_ATTRBIT(&attrbits))) {
 					r = nfsvno_getfh(nvp, &nfh, p);
 					if (!r)
 					    r = nfsvno_getattr(nvp, nvap, nd, p,
 						1, &attrbits);
 					if (r == 0 && is_zfs == 1 &&
 					    nfsrv_enable_crossmntpt != 0 &&
 					    (nd->nd_flag & ND_NFSV4) != 0 &&
 					    nvp->v_type == VDIR &&
 					    vp->v_mount != nvp->v_mount) {
 					    /*
 					     * For a ZFS snapshot, there is a
 					     * pseudo mount that does not set
 					     * v_mountedhere, so it needs to
 					     * be detected via a different
 					     * mount structure.
 					     */
 					    at_root = 1;
 					    if (new_mp == mp)
 						new_mp = nvp->v_mount;
 					}
 				}
 
 				/*
 				 * If we failed to get attributes of the entry,
 				 * then just skip it for NFSv3 (the traditional
 				 * behavior in the old NFS server).
 				 * For NFSv4 the behavior is controlled by
 				 * RDATTRERROR: we either ignore the error or
 				 * fail the request.
 				 * Note that RDATTRERROR is never set for NFSv3.
 				 */
 				if (r != 0) {
 					if (!NFSISSET_ATTRBIT(&attrbits,
 					    NFSATTRBIT_RDATTRERROR)) {
 						vput(nvp);
 						if (needs_unbusy != 0)
 							vfs_unbusy(new_mp);
 						if ((nd->nd_flag & ND_NFSV3))
 							goto invalid;
 						nd->nd_repstat = r;
 						break;
 					}
 				}
 			}
 
 			/*
 			 * Build the directory record xdr
 			 */
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				*tl++ = 0;
 				*tl = txdr_unsigned(dp->d_fileno);
 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = 0;
 				*tl = txdr_unsigned(*cookiep);
 				nfsrv_postopattr(nd, 0, nvap);
 				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
 				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
 				if (nvp != NULL)
 					vput(nvp);
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				*tl++ = 0;
 				*tl = txdr_unsigned(*cookiep);
 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 				if (nvp != NULL) {
 					supports_nfsv4acls =
 					    nfs_supportsnfsv4acls(nvp);
 					NFSVOPUNLOCK(nvp);
 				} else
 					supports_nfsv4acls = 0;
 				if (refp != NULL) {
 					dirlen += nfsrv_putreferralattr(nd,
 					    &savbits, refp, 0,
 					    &nd->nd_repstat);
 					if (nd->nd_repstat) {
 						if (nvp != NULL)
 							vrele(nvp);
 						if (needs_unbusy != 0)
 							vfs_unbusy(new_mp);
 						break;
 					}
 				} else if (r) {
 					dirlen += nfsvno_fillattr(nd, new_mp,
 					    nvp, nvap, &nfh, r, &rderrbits,
 					    nd->nd_cred, p, isdgram, 0,
 					    supports_nfsv4acls, at_root,
 					    mounted_on_fileno);
 				} else {
 					dirlen += nfsvno_fillattr(nd, new_mp,
 					    nvp, nvap, &nfh, r, &attrbits,
 					    nd->nd_cred, p, isdgram, 0,
 					    supports_nfsv4acls, at_root,
 					    mounted_on_fileno);
 				}
 				if (nvp != NULL)
 					vrele(nvp);
 				dirlen += (3 * NFSX_UNSIGNED);
 			}
 			if (needs_unbusy != 0)
 				vfs_unbusy(new_mp);
 			if (dirlen <= cnt)
 				entrycnt++;
 		}
 invalid:
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	vrele(vp);
 	vfs_unbusy(mp);
 
 	/*
 	 * If dirlen > cnt, we must strip off the last entry. If that
 	 * results in an empty reply, report NFSERR_TOOSMALL.
 	 */
 	if (dirlen > cnt || nd->nd_repstat) {
 		if (!nd->nd_repstat && entrycnt == 0)
 			nd->nd_repstat = NFSERR_TOOSMALL;
 		if (nd->nd_repstat) {
 			newnfs_trimtrailing(nd, mb0, bpos0);
 			if (nd->nd_flag & ND_NFSV3)
 				nfsrv_postopattr(nd, getret, &at);
 		} else
 			newnfs_trimtrailing(nd, mb1, bpos1);
 		eofflag = 0;
 	} else if (cpos < cend)
 		eofflag = 0;
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		if (eofflag)
 			*tl = newnfs_true;
 		else
 			*tl = newnfs_false;
 	}
 	free(cookies, M_TEMP);
 	free(rbuf, M_TEMP);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Get the settable attributes out of the mbuf list.
  * (Return 0 or EBADRPC)
  */
 int
 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 {
 	u_int32_t *tl;
 	struct nfsv2_sattr *sp;
 	int error = 0, toclient = 0;
 
 	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
 	case ND_NFSV2:
 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		/*
 		 * Some old clients didn't fill in the high order 16bits.
 		 * --> check the low order 2 bytes for 0xffff
 		 */
 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
 			nvap->na_mode = nfstov_mode(sp->sa_mode);
 		if (sp->sa_uid != newnfs_xdrneg1)
 			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
 		if (sp->sa_gid != newnfs_xdrneg1)
 			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
 		if (sp->sa_size != newnfs_xdrneg1)
 			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
 		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
 #ifdef notyet
 			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
 #else
 			nvap->na_atime.tv_sec =
 				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
 			nvap->na_atime.tv_nsec = 0;
 #endif
 		}
 		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
 			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
 		break;
 	case ND_NFSV3:
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_mode = nfstov_mode(*tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			nvap->na_size = fxdr_hyper(tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		switch (fxdr_unsigned(int, *tl)) {
 		case NFSV3SATTRTIME_TOCLIENT:
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			fxdr_nfsv3time(tl, &nvap->na_atime);
 			toclient = 1;
 			break;
 		case NFSV3SATTRTIME_TOSERVER:
 			vfs_timestamp(&nvap->na_atime);
 			nvap->na_vaflags |= VA_UTIMES_NULL;
 			break;
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		switch (fxdr_unsigned(int, *tl)) {
 		case NFSV3SATTRTIME_TOCLIENT:
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			fxdr_nfsv3time(tl, &nvap->na_mtime);
 			nvap->na_vaflags &= ~VA_UTIMES_NULL;
 			break;
 		case NFSV3SATTRTIME_TOSERVER:
 			vfs_timestamp(&nvap->na_mtime);
 			if (!toclient)
 				nvap->na_vaflags |= VA_UTIMES_NULL;
 			break;
 		}
 		break;
 	case ND_NFSV4:
 		error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Handle the setable attributes for V4.
  * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
  */
 int
 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 {
 	u_int32_t *tl;
 	int attrsum = 0;
 	int i, j;
 	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
 	int moderet, toclient = 0;
 	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
 	uid_t uid;
 	gid_t gid;
 	u_short mode, mask;		/* Same type as va_mode. */
 	struct vattr va;
 
 	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
 	if (error)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	attrsize = fxdr_unsigned(int, *tl);
 
 	/*
 	 * Loop around getting the setable attributes. If an unsupported
 	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
 	 */
 	if (retnotsup) {
 		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 		bitpos = NFSATTRBIT_MAX;
 	} else {
 		bitpos = 0;
 	}
 	moderet = 0;
 	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
 	    if (attrsum > attrsize) {
 		error = NFSERR_BADXDR;
 		goto nfsmout;
 	    }
 	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
 		switch (bitpos) {
 		case NFSATTRBIT_SIZE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
                      if (vp != NULL && vp->v_type != VREG) {
                             error = (vp->v_type == VDIR) ? NFSERR_ISDIR :
                                 NFSERR_INVAL;
                             goto nfsmout;
 			}
 			nvap->na_size = fxdr_hyper(tl);
 			attrsum += NFSX_HYPER;
 			break;
 		case NFSATTRBIT_ACL:
 			error = nfsrv_dissectacl(nd, aclp, &aceerr, &aclsize,
 			    p);
 			if (error)
 				goto nfsmout;
 			if (aceerr && !nd->nd_repstat)
 				nd->nd_repstat = aceerr;
 			attrsum += aclsize;
 			break;
 		case NFSATTRBIT_ARCHIVE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_HIDDEN:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_MIMETYPE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			i = fxdr_unsigned(int, *tl);
 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 			if (error)
 				goto nfsmout;
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
 			break;
 		case NFSATTRBIT_MODE:
 			moderet = NFSERR_INVAL;	/* Can't do MODESETMASKED. */
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_mode = nfstov_mode(*tl);
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_OWNER:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			j = fxdr_unsigned(int, *tl);
 			if (j < 0) {
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			if (j > NFSV4_SMALLSTR)
 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 			else
 				cp = namestr;
 			error = nfsrv_mtostr(nd, cp, j);
 			if (error) {
 				if (j > NFSV4_SMALLSTR)
 					free(cp, M_NFSSTRING);
 				goto nfsmout;
 			}
 			if (!nd->nd_repstat) {
 				nd->nd_repstat = nfsv4_strtouid(nd, cp, j,
 				    &uid);
 				if (!nd->nd_repstat)
 					nvap->na_uid = uid;
 			}
 			if (j > NFSV4_SMALLSTR)
 				free(cp, M_NFSSTRING);
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 			break;
 		case NFSATTRBIT_OWNERGROUP:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			j = fxdr_unsigned(int, *tl);
 			if (j < 0) {
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			if (j > NFSV4_SMALLSTR)
 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 			else
 				cp = namestr;
 			error = nfsrv_mtostr(nd, cp, j);
 			if (error) {
 				if (j > NFSV4_SMALLSTR)
 					free(cp, M_NFSSTRING);
 				goto nfsmout;
 			}
 			if (!nd->nd_repstat) {
 				nd->nd_repstat = nfsv4_strtogid(nd, cp, j,
 				    &gid);
 				if (!nd->nd_repstat)
 					nvap->na_gid = gid;
 			}
 			if (j > NFSV4_SMALLSTR)
 				free(cp, M_NFSSTRING);
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 			break;
 		case NFSATTRBIT_SYSTEM:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_TIMEACCESSSET:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			attrsum += NFSX_UNSIGNED;
 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			    fxdr_nfsv4time(tl, &nvap->na_atime);
 			    toclient = 1;
 			    attrsum += NFSX_V4TIME;
 			} else {
 			    vfs_timestamp(&nvap->na_atime);
 			    nvap->na_vaflags |= VA_UTIMES_NULL;
 			}
 			break;
 		case NFSATTRBIT_TIMEBACKUP:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_V4TIME;
 			break;
 		case NFSATTRBIT_TIMECREATE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_V4TIME;
 			break;
 		case NFSATTRBIT_TIMEMODIFYSET:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			attrsum += NFSX_UNSIGNED;
 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			    fxdr_nfsv4time(tl, &nvap->na_mtime);
 			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
 			    attrsum += NFSX_V4TIME;
 			} else {
 			    vfs_timestamp(&nvap->na_mtime);
 			    if (!toclient)
 				nvap->na_vaflags |= VA_UTIMES_NULL;
 			}
 			break;
 		case NFSATTRBIT_MODESETMASKED:
 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			mode = fxdr_unsigned(u_short, *tl++);
 			mask = fxdr_unsigned(u_short, *tl);
 			/*
 			 * vp == NULL implies an Open/Create operation.
 			 * This attribute can only be used for Setattr and
 			 * only for NFSv4.1 or higher.
 			 * If moderet != 0, a mode attribute has also been
 			 * specified and this attribute cannot be done in the
 			 * same Setattr operation.
 			 */
 			if ((nd->nd_flag & ND_NFSV41) == 0)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 ||
 			    vp == NULL)
 				nd->nd_repstat = NFSERR_INVAL;
 			else if (moderet == 0)
 				moderet = VOP_GETATTR(vp, &va, nd->nd_cred);
 			if (moderet == 0)
 				nvap->na_mode = (mode & mask) |
 				    (va.va_mode & ~mask);
 			else
 				nd->nd_repstat = moderet;
 			attrsum += 2 * NFSX_UNSIGNED;
 			break;
 		default:
 			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			/*
 			 * set bitpos so we drop out of the loop.
 			 */
 			bitpos = NFSATTRBIT_MAX;
 			break;
 		}
 	}
 
 	/*
 	 * some clients pad the attrlist, so we need to skip over the
 	 * padding.
 	 */
 	if (attrsum > attrsize) {
 		error = NFSERR_BADXDR;
 	} else {
 		attrsize = NFSM_RNDUP(attrsize);
 		if (attrsum < attrsize)
 			error = nfsm_advance(nd, attrsize - attrsum, -1);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check/setup export credentials.
  */
 int
 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
     struct ucred *credanon)
 {
 	int error = 0;
 
 	/*
 	 * Check/setup credentials.
 	 */
 	if (nd->nd_flag & ND_GSS)
 		exp->nes_exflag &= ~MNT_EXPORTANON;
 
 	/*
 	 * Check to see if the operation is allowed for this security flavor.
 	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to
 	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
 	 * Also, allow Secinfo, so that it can acquire the correct flavor(s).
 	 */
 	if (nfsvno_testexp(nd, exp) &&
 	    nd->nd_procnum != NFSV4OP_SECINFO &&
 	    nd->nd_procnum != NFSPROC_FSINFO) {
 		if (nd->nd_flag & ND_NFSV4)
 			error = NFSERR_WRONGSEC;
 		else
 			error = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		goto out;
 	}
 
 	/*
 	 * Check to see if the file system is exported V4 only.
 	 */
 	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
 		error = NFSERR_PROGNOTV4;
 		goto out;
 	}
 
 	/*
 	 * Now, map the user credentials.
 	 * (Note that ND_AUTHNONE will only be set for an NFSv3
 	 *  Fsinfo RPC. If set for anything else, this code might need
 	 *  to change.)
 	 */
 	if (NFSVNO_EXPORTED(exp)) {
 		if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
 		     NFSVNO_EXPORTANON(exp) ||
 		     (nd->nd_flag & ND_AUTHNONE) != 0) {
 			nd->nd_cred->cr_uid = credanon->cr_uid;
 			nd->nd_cred->cr_gid = credanon->cr_gid;
 			crsetgroups(nd->nd_cred, credanon->cr_ngroups,
 			    credanon->cr_groups);
 		} else if ((nd->nd_flag & ND_GSS) == 0) {
 			/*
 			 * If using AUTH_SYS, call nfsrv_getgrpscred() to see
 			 * if there is a replacement credential with a group
 			 * list set up by "nfsuserd -manage-gids".
 			 * If there is no replacement, nfsrv_getgrpscred()
 			 * simply returns its argument.
 			 */
 			nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
 		}
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check exports.
  */
 int
 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
     struct ucred **credp)
 {
-	int i, error, *secflavors;
+	int error;
 
 	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
-	    &exp->nes_numsecflavor, &secflavors);
+	    &exp->nes_numsecflavor, exp->nes_secflavors);
 	if (error) {
 		if (nfs_rootfhset) {
 			exp->nes_exflag = 0;
 			exp->nes_numsecflavor = 0;
 			error = 0;
 		}
 	} else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
 	    MAXSECFLAVORS) {
 		printf("nfsvno_checkexp: numsecflavors out of range\n");
 		exp->nes_numsecflavor = 0;
 		error = EACCES;
-	} else {
-		/* Copy the security flavors. */
-		for (i = 0; i < exp->nes_numsecflavor; i++)
-			exp->nes_secflavors[i] = secflavors[i];
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get a vnode for a file handle and export stuff.
  */
 int
 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
     int lktype, struct vnode **vpp, struct nfsexstuff *exp,
     struct ucred **credp)
 {
-	int i, error, *secflavors;
+	int error;
 
 	*credp = NULL;
 	exp->nes_numsecflavor = 0;
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
 	if (error != 0)
 		/* Make sure the server replies ESTALE to the client. */
 		error = ESTALE;
 	if (nam && !error) {
 		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
-		    &exp->nes_numsecflavor, &secflavors);
+		    &exp->nes_numsecflavor, exp->nes_secflavors);
 		if (error) {
 			if (nfs_rootfhset) {
 				exp->nes_exflag = 0;
 				exp->nes_numsecflavor = 0;
 				error = 0;
 			} else {
 				vput(*vpp);
 			}
 		} else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
 		    MAXSECFLAVORS) {
 			printf("nfsvno_fhtovp: numsecflavors out of range\n");
 			exp->nes_numsecflavor = 0;
 			error = EACCES;
 			vput(*vpp);
-		} else {
-			/* Copy the security flavors. */
-			for (i = 0; i < exp->nes_numsecflavor; i++)
-				exp->nes_secflavors[i] = secflavors[i];
 		}
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * nfsd_fhtovp() - convert a fh to a vnode ptr
  * 	- look up fsid in mount list (if not found ret error)
  *	- get vp and export rights by calling nfsvno_fhtovp()
  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
  *	  for AUTH_SYS
  *	- if mpp != NULL, return the mount point so that it can
  *	  be used for vn_finished_write() by the caller
  */
 void
 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
     struct vnode **vpp, struct nfsexstuff *exp,
     struct mount **mpp, int startwrite)
 {
 	struct mount *mp;
 	struct ucred *credanon;
 	fhandle_t *fhp;
 
 	fhp = (fhandle_t *)nfp->nfsrvfh_data;
 	/*
 	 * Check for the special case of the nfsv4root_fh.
 	 */
 	mp = vfs_busyfs(&fhp->fh_fsid);
 	if (mpp != NULL)
 		*mpp = mp;
 	if (mp == NULL) {
 		*vpp = NULL;
 		nd->nd_repstat = ESTALE;
 		goto out;
 	}
 
 	if (startwrite) {
 		vn_start_write(NULL, mpp, V_WAIT);
 		if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
 			lktype = LK_EXCLUSIVE;
 	}
 	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
 	    &credanon);
 	vfs_unbusy(mp);
 
 	/*
 	 * For NFSv4 without a pseudo root fs, unexported file handles
 	 * can be returned, so that Lookup works everywhere.
 	 */
 	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
 	    !(nd->nd_flag & ND_NFSV4)) {
 		vput(*vpp);
 		nd->nd_repstat = EACCES;
 	}
 
 	/*
 	 * Personally, I've never seen any point in requiring a
 	 * reserved port#, since only in the rare case where the
 	 * clients are all boxes with secure system privileges,
 	 * does it provide any enhanced security, but... some people
 	 * believe it to be useful and keep putting this code back in.
 	 * (There is also some "security checker" out there that
 	 *  complains if the nfs server doesn't enforce this.)
 	 * However, note the following:
 	 * RFC3530 (NFSv4) specifies that a reserved port# not be
 	 *	required.
 	 * RFC2623 recommends that, if a reserved port# is checked for,
 	 *	that there be a way to turn that off--> ifdef'd.
 	 */
 #ifdef NFS_REQRSVPORT
 	if (!nd->nd_repstat) {
 		struct sockaddr_in *saddr;
 		struct sockaddr_in6 *saddr6;
 
 		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
 		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
 		if (!(nd->nd_flag & ND_NFSV4) &&
 		    ((saddr->sin_family == AF_INET &&
 		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
 		     (saddr6->sin6_family == AF_INET6 &&
 		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
 			vput(*vpp);
 			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	}
 #endif	/* NFS_REQRSVPORT */
 
 	/*
 	 * Check/setup credentials.
 	 */
 	if (!nd->nd_repstat) {
 		nd->nd_saveduid = nd->nd_cred->cr_uid;
 		nd->nd_repstat = nfsd_excred(nd, exp, credanon);
 		if (nd->nd_repstat)
 			vput(*vpp);
 	}
 	if (credanon != NULL)
 		crfree(credanon);
 	if (nd->nd_repstat) {
 		if (startwrite)
 			vn_finished_write(mp);
 		*vpp = NULL;
 		if (mpp != NULL)
 			*mpp = NULL;
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * glue for fp.
  */
 static int
 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	int error = 0;
 
 	fdp = p->td_proc->p_fd;
 	if (fd < 0 || fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
 		error = EBADF;
 		goto out;
 	}
 	*fpp = fp;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Called from nfssvc() to update the exports list. Just call
  * vfs_export(). This has to be done, since the v4 root fake fs isn't
  * in the mount list.
  */
 int
 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
 {
 	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
 	int error = 0;
 	struct nameidata nd;
 	fhandle_t fh;
 
 	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
 	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
 		nfs_rootfhset = 0;
 	else if (error == 0) {
 		if (nfsexargp->fspec == NULL) {
 			error = EPERM;
 			goto out;
 		}
 		/*
 		 * If fspec != NULL, this is the v4root path.
 		 */
 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE,
 		    nfsexargp->fspec, p);
 		if ((error = namei(&nd)) != 0)
 			goto out;
 		error = nfsvno_getfh(nd.ni_vp, &fh, p);
 		vrele(nd.ni_vp);
 		if (!error) {
 			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
 			NFSBCOPY((caddr_t)&fh,
 			    nfs_rootfh.nfsrvfh_data,
 			    sizeof (fhandle_t));
 			nfs_rootfhset = 1;
 		}
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * This function needs to test to see if the system is near its limit
  * for memory allocation via malloc() or mget() and return True iff
  * either of these resources are near their limit.
  * XXX (For now, this is just a stub.)
  */
 int nfsrv_testmalloclimit = 0;
 int
 nfsrv_mallocmget_limit(void)
 {
 	static int printmesg = 0;
 	static int testval = 1;
 
 	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
 		if ((printmesg++ % 100) == 0)
 			printf("nfsd: malloc/mget near limit\n");
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * BSD specific initialization of a mount point.
  */
 void
 nfsd_mntinit(void)
 {
 	static int inited = 0;
 
 	if (inited)
 		return;
 	inited = 1;
 	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
 	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
 	TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist);
 	nfsv4root_mnt.mnt_export = NULL;
 	TAILQ_INIT(&nfsv4root_opt);
 	TAILQ_INIT(&nfsv4root_newopt);
 	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
 	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
 	nfsv4root_mnt.mnt_nvnodelistsize = 0;
 	nfsv4root_mnt.mnt_lazyvnodelistsize = 0;
 }
 
 /*
  * Get a vnode for a file handle, without checking exports, etc.
  */
 struct vnode *
 nfsvno_getvp(fhandle_t *fhp)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	mp = vfs_busyfs(&fhp->fh_fsid);
 	if (mp == NULL)
 		return (NULL);
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error)
 		return (NULL);
 	return (vp);
 }
 
 /*
  * Do a local VOP_ADVLOCK().
  */
 int
 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
     u_int64_t end, struct thread *td)
 {
 	int error = 0;
 	struct flock fl;
 	u_int64_t tlen;
 
 	if (nfsrv_dolocallocks == 0)
 		goto out;
 	ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
 
 	fl.l_whence = SEEK_SET;
 	fl.l_type = ftype;
 	fl.l_start = (off_t)first;
 	if (end == NFS64BITSSET) {
 		fl.l_len = 0;
 	} else {
 		tlen = end - first;
 		fl.l_len = (off_t)tlen;
 	}
 	/*
 	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
 	 * values for all calls, so that all locks will be held by the
 	 * nfsd server. (The nfsd server handles conflicts between the
 	 * various clients.)
 	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
 	 * bytes, so it can't be put in l_sysid.
 	 */
 	if (nfsv4_sysid == 0)
 		nfsv4_sysid = nlm_acquire_next_sysid();
 	fl.l_pid = (pid_t)0;
 	fl.l_sysid = (int)nfsv4_sysid;
 
 	if (ftype == F_UNLCK)
 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
 		    (F_POSIX | F_REMOTE));
 	else
 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
 		    (F_POSIX | F_REMOTE));
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Check the nfsv4 root exports.
  */
 int
 nfsvno_v4rootexport(struct nfsrv_descript *nd)
 {
 	struct ucred *credanon;
-	int exflags, error = 0, numsecflavor, *secflavors, i;
+	int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i;
+	uint64_t exflags;
 
 	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
-	    &credanon, &numsecflavor, &secflavors);
+	    &credanon, &numsecflavor, secflavors);
 	if (error) {
 		error = NFSERR_PROGUNAVAIL;
 		goto out;
 	}
 	if (credanon != NULL)
 		crfree(credanon);
 	for (i = 0; i < numsecflavor; i++) {
 		if (secflavors[i] == AUTH_SYS)
 			nd->nd_flag |= ND_EXAUTHSYS;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5)
 			nd->nd_flag |= ND_EXGSS;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
 			nd->nd_flag |= ND_EXGSSINTEGRITY;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
 			nd->nd_flag |= ND_EXGSSPRIVACY;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Nfs server pseudo system call for the nfsd's
  */
 /*
  * MPSAFE
  */
 static int
 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
 {
 	struct file *fp;
 	struct nfsd_addsock_args sockarg;
 	struct nfsd_nfsd_args nfsdarg;
 	struct nfsd_nfsd_oargs onfsdarg;
 	struct nfsd_pnfsd_args pnfsdarg;
 	struct vnode *vp, *nvp, *curdvp;
 	struct pnfsdsfile *pf;
 	struct nfsdevice *ds, *fds;
 	cap_rights_t rights;
 	int buflen, error, ret;
 	char *buf, *cp, *cp2, *cp3;
 	char fname[PNFS_FILENAME_LEN + 1];
 
 	if (uap->flag & NFSSVC_NFSDADDSOCK) {
 		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
 		if (error)
 			goto out;
 		/*
 		 * Since we don't know what rights might be required,
 		 * pretend that we need them all. It is better to be too
 		 * careful than too reckless.
 		 */
 		error = fget(td, sockarg.sock,
 		    cap_rights_init(&rights, CAP_SOCK_SERVER), &fp);
 		if (error != 0)
 			goto out;
 		if (fp->f_type != DTYPE_SOCKET) {
 			fdrop(fp, td);
 			error = EPERM;
 			goto out;
 		}
 		error = nfsrvd_addsock(fp);
 		fdrop(fp, td);
 	} else if (uap->flag & NFSSVC_NFSDNFSD) {
 		if (uap->argp == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 		if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) {
 			error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg));
 			if (error == 0) {
 				nfsdarg.principal = onfsdarg.principal;
 				nfsdarg.minthreads = onfsdarg.minthreads;
 				nfsdarg.maxthreads = onfsdarg.maxthreads;
 				nfsdarg.version = 1;
 				nfsdarg.addr = NULL;
 				nfsdarg.addrlen = 0;
 				nfsdarg.dnshost = NULL;
 				nfsdarg.dnshostlen = 0;
 				nfsdarg.dspath = NULL;
 				nfsdarg.dspathlen = 0;
 				nfsdarg.mdspath = NULL;
 				nfsdarg.mdspathlen = 0;
 				nfsdarg.mirrorcnt = 1;
 			}
 		} else
 			error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg));
 		if (error)
 			goto out;
 		if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 &&
 		    nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 &&
 		    nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 &&
 		    nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 &&
 		    nfsdarg.mirrorcnt >= 1 &&
 		    nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS &&
 		    nfsdarg.addr != NULL && nfsdarg.dnshost != NULL &&
 		    nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) {
 			NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d"
 			    " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen,
 			    nfsdarg.dspathlen, nfsdarg.dnshostlen,
 			    nfsdarg.mdspathlen, nfsdarg.mirrorcnt);
 			cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK);
 			error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen);
 			if (error != 0) {
 				free(cp, M_TEMP);
 				goto out;
 			}
 			cp[nfsdarg.addrlen] = '\0';	/* Ensure nul term. */
 			nfsdarg.addr = cp;
 			cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK);
 			error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen);
 			if (error != 0) {
 				free(nfsdarg.addr, M_TEMP);
 				free(cp, M_TEMP);
 				goto out;
 			}
 			cp[nfsdarg.dnshostlen] = '\0';	/* Ensure nul term. */
 			nfsdarg.dnshost = cp;
 			cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK);
 			error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen);
 			if (error != 0) {
 				free(nfsdarg.addr, M_TEMP);
 				free(nfsdarg.dnshost, M_TEMP);
 				free(cp, M_TEMP);
 				goto out;
 			}
 			cp[nfsdarg.dspathlen] = '\0';	/* Ensure nul term. */
 			nfsdarg.dspath = cp;
 			cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK);
 			error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen);
 			if (error != 0) {
 				free(nfsdarg.addr, M_TEMP);
 				free(nfsdarg.dnshost, M_TEMP);
 				free(nfsdarg.dspath, M_TEMP);
 				free(cp, M_TEMP);
 				goto out;
 			}
 			cp[nfsdarg.mdspathlen] = '\0';	/* Ensure nul term. */
 			nfsdarg.mdspath = cp;
 		} else {
 			nfsdarg.addr = NULL;
 			nfsdarg.addrlen = 0;
 			nfsdarg.dnshost = NULL;
 			nfsdarg.dnshostlen = 0;
 			nfsdarg.dspath = NULL;
 			nfsdarg.dspathlen = 0;
 			nfsdarg.mdspath = NULL;
 			nfsdarg.mdspathlen = 0;
 			nfsdarg.mirrorcnt = 1;
 		}
 		error = nfsrvd_nfsd(td, &nfsdarg);
 		free(nfsdarg.addr, M_TEMP);
 		free(nfsdarg.dnshost, M_TEMP);
 		free(nfsdarg.dspath, M_TEMP);
 		free(nfsdarg.mdspath, M_TEMP);
 	} else if (uap->flag & NFSSVC_PNFSDS) {
 		error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg));
 		if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER ||
 		    pnfsdarg.op == PNFSDOP_FORCEDELDS)) {
 			cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 			error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1,
 			    NULL);
 			if (error == 0)
 				error = nfsrv_deldsserver(pnfsdarg.op, cp, td);
 			free(cp, M_TEMP);
 		} else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) {
 			cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 			buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS;
 			buf = malloc(buflen, M_TEMP, M_WAITOK);
 			error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1,
 			    NULL);
 			NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error);
 			if (error == 0 && pnfsdarg.dspath != NULL) {
 				cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 				error = copyinstr(pnfsdarg.dspath, cp2,
 				    PATH_MAX + 1, NULL);
 				NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n",
 				    error);
 			} else
 				cp2 = NULL;
 			if (error == 0 && pnfsdarg.curdspath != NULL) {
 				cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 				error = copyinstr(pnfsdarg.curdspath, cp3,
 				    PATH_MAX + 1, NULL);
 				NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n",
 				    error);
 			} else
 				cp3 = NULL;
 			curdvp = NULL;
 			fds = NULL;
 			if (error == 0)
 				error = nfsrv_mdscopymr(cp, cp2, cp3, buf,
 				    &buflen, fname, td, &vp, &nvp, &pf, &ds,
 				    &fds);
 			NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error);
 			if (error == 0) {
 				if (pf->dsf_dir >= nfsrv_dsdirsize) {
 					printf("copymr: dsdir out of range\n");
 					pf->dsf_dir = 0;
 				}
 				NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen);
 				error = nfsrv_copymr(vp, nvp,
 				    ds->nfsdev_dsdir[pf->dsf_dir], ds, pf,
 				    (struct pnfsdsfile *)buf,
 				    buflen / sizeof(*pf), td->td_ucred, td);
 				vput(vp);
 				vput(nvp);
 				if (fds != NULL && error == 0) {
 					curdvp = fds->nfsdev_dsdir[pf->dsf_dir];
 					ret = vn_lock(curdvp, LK_EXCLUSIVE);
 					if (ret == 0) {
 						nfsrv_dsremove(curdvp, fname,
 						    td->td_ucred, td);
 						NFSVOPUNLOCK(curdvp);
 					}
 				}
 				NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error);
 			}
 			free(cp, M_TEMP);
 			free(cp2, M_TEMP);
 			free(cp3, M_TEMP);
 			free(buf, M_TEMP);
 		}
 	} else {
 		error = nfssvc_srvcall(td, uap, td->td_ucred);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 static int
 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
 {
 	struct nfsex_args export;
+	struct nfsex_oldargs oexp;
 	struct file *fp = NULL;
-	int stablefd, len;
+	int stablefd, i, len;
 	struct nfsd_clid adminrevoke;
 	struct nfsd_dumplist dumplist;
 	struct nfsd_dumpclients *dumpclients;
 	struct nfsd_dumplocklist dumplocklist;
 	struct nfsd_dumplocks *dumplocks;
 	struct nameidata nd;
 	vnode_t vp;
 	int error = EINVAL, igotlock;
 	struct proc *procp;
+	gid_t *grps;
 	static int suspend_nfsd = 0;
 
 	if (uap->flag & NFSSVC_PUBLICFH) {
 		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
 		    sizeof (fhandle_t));
 		error = copyin(uap->argp,
 		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
 		if (!error)
 			nfs_pubfhset = 1;
-	} else if (uap->flag & NFSSVC_V4ROOTEXPORT) {
+	} else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
+	    (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) {
 		error = copyin(uap->argp,(caddr_t)&export,
 		    sizeof (struct nfsex_args));
-		if (!error)
-			error = nfsrv_v4rootexport(&export, cred, p);
+		if (!error) {
+			grps = NULL;
+			if (export.export.ex_ngroups > NGROUPS_MAX ||
+			    export.export.ex_ngroups < 0)
+				error = EINVAL;
+			else if (export.export.ex_ngroups > 0) {
+				grps = malloc(export.export.ex_ngroups *
+				    sizeof(gid_t), M_TEMP, M_WAITOK);
+				error = copyin(export.export.ex_groups, grps,
+				    export.export.ex_ngroups * sizeof(gid_t));
+				export.export.ex_groups = grps;
+			} else
+				export.export.ex_groups = NULL;
+			if (!error)
+				error = nfsrv_v4rootexport(&export, cred, p);
+			free(grps, M_TEMP);
+		}
+	} else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
+	    NFSSVC_V4ROOTEXPORT) {
+		error = copyin(uap->argp,(caddr_t)&oexp,
+		    sizeof (struct nfsex_oldargs));
+		if (!error) {
+			memset(&export.export, 0, sizeof(export.export));
+			export.export.ex_flags = (uint64_t)oexp.export.ex_flags;
+			export.export.ex_root = oexp.export.ex_root;
+			export.export.ex_uid = oexp.export.ex_anon.cr_uid;
+			export.export.ex_ngroups =
+			    oexp.export.ex_anon.cr_ngroups;
+			export.export.ex_groups = NULL;
+			if (export.export.ex_ngroups > XU_NGROUPS ||
+			    export.export.ex_ngroups < 0)
+				error = EINVAL;
+			else if (export.export.ex_ngroups > 0) {
+				export.export.ex_groups = malloc(
+				    export.export.ex_ngroups * sizeof(gid_t),
+				    M_TEMP, M_WAITOK);
+				for (i = 0; i < export.export.ex_ngroups; i++)
+					export.export.ex_groups[i] =
+					    oexp.export.ex_anon.cr_groups[i];
+			}
+			export.export.ex_addr = oexp.export.ex_addr;
+			export.export.ex_addrlen = oexp.export.ex_addrlen;
+			export.export.ex_mask = oexp.export.ex_mask;
+			export.export.ex_masklen = oexp.export.ex_masklen;
+			export.export.ex_indexfile = oexp.export.ex_indexfile;
+			export.export.ex_numsecflavors =
+			    oexp.export.ex_numsecflavors;
+			if (export.export.ex_numsecflavors >= MAXSECFLAVORS ||
+			    export.export.ex_numsecflavors < 0)
+				error = EINVAL;
+			else {
+				for (i = 0; i < export.export.ex_numsecflavors;
+				    i++)
+					export.export.ex_secflavors[i] =
+					    oexp.export.ex_secflavors[i];
+			}
+			export.fspec = oexp.fspec;
+			if (error == 0)
+				error = nfsrv_v4rootexport(&export, cred, p);
+			free(export.export.ex_groups, M_TEMP);
+		}
 	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
 		nfs_pubfhset = 0;
 		error = 0;
 	} else if (uap->flag & NFSSVC_STABLERESTART) {
 		error = copyin(uap->argp, (caddr_t)&stablefd,
 		    sizeof (int));
 		if (!error)
 			error = fp_getfvp(p, stablefd, &fp, &vp);
 		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
 			error = EBADF;
 		if (!error && newnfs_numnfsd != 0)
 			error = EPERM;
 		if (!error) {
 			nfsrv_stablefirst.nsf_fp = fp;
 			nfsrv_setupstable(p);
 		}
 	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
 		error = copyin(uap->argp, (caddr_t)&adminrevoke,
 		    sizeof (struct nfsd_clid));
 		if (!error)
 			error = nfsrv_adminrevoke(&adminrevoke, p);
 	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
 		error = copyin(uap->argp, (caddr_t)&dumplist,
 		    sizeof (struct nfsd_dumplist));
 		if (!error && (dumplist.ndl_size < 1 ||
 			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
 			error = EPERM;
 		if (!error) {
 		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
 		    dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
 		    error = copyout(dumpclients, dumplist.ndl_list, len);
 		    free(dumpclients, M_TEMP);
 		}
 	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
 		error = copyin(uap->argp, (caddr_t)&dumplocklist,
 		    sizeof (struct nfsd_dumplocklist));
 		if (!error && (dumplocklist.ndllck_size < 1 ||
 			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
 			error = EPERM;
 		if (!error)
 			error = nfsrv_lookupfilename(&nd,
 				dumplocklist.ndllck_fname, p);
 		if (!error) {
 			len = sizeof (struct nfsd_dumplocks) *
 				dumplocklist.ndllck_size;
 			dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 			nfsrv_dumplocks(nd.ni_vp, dumplocks,
 			    dumplocklist.ndllck_size, p);
 			vput(nd.ni_vp);
 			error = copyout(dumplocks, dumplocklist.ndllck_list,
 			    len);
 			free(dumplocks, M_TEMP);
 		}
 	} else if (uap->flag & NFSSVC_BACKUPSTABLE) {
 		procp = p->td_proc;
 		PROC_LOCK(procp);
 		nfsd_master_pid = procp->p_pid;
 		bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
 		nfsd_master_start = procp->p_stats->p_start;
 		nfsd_master_proc = procp;
 		PROC_UNLOCK(procp);
 	} else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
 		NFSLOCKV4ROOTMUTEX();
 		if (suspend_nfsd == 0) {
 			/* Lock out all nfsd threads */
 			do {
 				igotlock = nfsv4_lock(&nfsd_suspend_lock, 1,
 				    NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 			} while (igotlock == 0 && suspend_nfsd == 0);
 			suspend_nfsd = 1;
 		}
 		NFSUNLOCKV4ROOTMUTEX();
 		error = 0;
 	} else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
 		NFSLOCKV4ROOTMUTEX();
 		if (suspend_nfsd != 0) {
 			nfsv4_unlock(&nfsd_suspend_lock, 0);
 			suspend_nfsd = 0;
 		}
 		NFSUNLOCKV4ROOTMUTEX();
 		error = 0;
 	}
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Check exports.
  * Returns 0 if ok, 1 otherwise.
  */
 int
 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
 {
 	int i;
 
 	/*
 	 * This seems odd, but allow the case where the security flavor
 	 * list is empty. This happens when NFSv4 is traversing non-exported
 	 * file systems. Exported file systems should always have a non-empty
 	 * security flavor list.
 	 */
 	if (exp->nes_numsecflavor == 0)
 		return (0);
 
 	for (i = 0; i < exp->nes_numsecflavor; i++) {
 		/*
 		 * The tests for privacy and integrity must be first,
 		 * since ND_GSS is set for everything but AUTH_SYS.
 		 */
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
 		    (nd->nd_flag & ND_GSSPRIVACY))
 			return (0);
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
 		    (nd->nd_flag & ND_GSSINTEGRITY))
 			return (0);
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
 		    (nd->nd_flag & ND_GSS))
 			return (0);
 		if (exp->nes_secflavors[i] == AUTH_SYS &&
 		    (nd->nd_flag & ND_GSS) == 0)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * Calculate a hash value for the fid in a file handle.
  */
 uint32_t
 nfsrv_hashfh(fhandle_t *fhp)
 {
 	uint32_t hashval;
 
 	hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
 	return (hashval);
 }
 
 /*
  * Calculate a hash value for the sessionid.
  */
 uint32_t
 nfsrv_hashsessionid(uint8_t *sessionid)
 {
 	uint32_t hashval;
 
 	hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
 	return (hashval);
 }
 
 /*
  * Signal the userland master nfsd to backup the stable restart file.
  */
 void
 nfsrv_backupstable(void)
 {
 	struct proc *procp;
 
 	if (nfsd_master_proc != NULL) {
 		procp = pfind(nfsd_master_pid);
 		/* Try to make sure it is the correct process. */
 		if (procp == nfsd_master_proc &&
 		    procp->p_stats->p_start.tv_sec ==
 		    nfsd_master_start.tv_sec &&
 		    procp->p_stats->p_start.tv_usec ==
 		    nfsd_master_start.tv_usec &&
 		    strcmp(procp->p_comm, nfsd_master_comm) == 0)
 			kern_psignal(procp, SIGUSR2);
 		else
 			nfsd_master_proc = NULL;
 
 		if (procp != NULL)
 			PROC_UNLOCK(procp);
 	}
 }
 
 /*
  * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror.
  * The arguments are in a structure, so that they can be passed through
  * taskqueue for a kernel process to execute this function.
  */
 struct nfsrvdscreate {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	struct ucred		*tcred;
 	struct vnode		*dvp;
 	NFSPROC_T		*p;
 	struct pnfsdsfile	*pf;
 	int			err;
 	fhandle_t		fh;
 	struct vattr		va;
 	struct vattr		createva;
 };
 
 int
 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap,
     fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa,
     char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp)
 {
 	struct vnode *nvp;
 	struct nameidata named;
 	struct vattr va;
 	char *bufp;
 	u_long *hashp;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	int error;
 
 	NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE,
 	    LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	named.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
 	named.ni_cnd.cn_thread = p;
 	named.ni_cnd.cn_nameptr = bufp;
 	if (fnamep != NULL) {
 		strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1);
 		named.ni_cnd.cn_namelen = strlen(bufp);
 	} else
 		named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp);
 	NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp);
 
 	/* Create the date file in the DS mount. */
 	error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
 	if (error == 0) {
 		error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap);
 		NFSVOPUNLOCK(dvp);
 		if (error == 0) {
 			/* Set the ownership of the file. */
 			error = VOP_SETATTR(nvp, nvap, tcred);
 			NFSD_DEBUG(4, "nfsrv_dscreate:"
 			    " setattr-uid=%d\n", error);
 			if (error != 0)
 				vput(nvp);
 		}
 		if (error != 0)
 			printf("pNFS: pnfscreate failed=%d\n", error);
 	} else
 		printf("pNFS: pnfscreate vnlock=%d\n", error);
 	if (error == 0) {
 		np = VTONFS(nvp);
 		nmp = VFSTONFS(nvp->v_mount);
 		if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs")
 		    != 0 || nmp->nm_nam->sa_len > sizeof(
 		    struct sockaddr_in6) ||
 		    np->n_fhp->nfh_len != NFSX_MYFH) {
 			printf("Bad DS file: fstype=%s salen=%d"
 			    " fhlen=%d\n",
 			    nvp->v_mount->mnt_vfc->vfc_name,
 			    nmp->nm_nam->sa_len, np->n_fhp->nfh_len);
 			error = ENOENT;
 		}
 
 		/* Set extattrs for the DS on the MDS file. */
 		if (error == 0) {
 			if (dsa != NULL) {
 				error = VOP_GETATTR(nvp, &va, tcred);
 				if (error == 0) {
 					dsa->dsa_filerev = va.va_filerev;
 					dsa->dsa_size = va.va_size;
 					dsa->dsa_atime = va.va_atime;
 					dsa->dsa_mtime = va.va_mtime;
 					dsa->dsa_bytes = va.va_bytes;
 				}
 			}
 			if (error == 0) {
 				NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh,
 				    NFSX_MYFH);
 				NFSBCOPY(nmp->nm_nam, &pf->dsf_sin,
 				    nmp->nm_nam->sa_len);
 				NFSBCOPY(named.ni_cnd.cn_nameptr,
 				    pf->dsf_filename,
 				    sizeof(pf->dsf_filename));
 			}
 		} else
 			printf("pNFS: pnfscreate can't get DS"
 			    " attr=%d\n", error);
 		if (nvpp != NULL && error == 0)
 			*nvpp = nvp;
 		else
 			vput(nvp);
 	}
 	nfsvno_relpathbuf(&named);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_dscreate().
  */
 static void
 start_dscreate(void *arg, int pending)
 {
 	struct nfsrvdscreate *dsc;
 
 	dsc = (struct nfsrvdscreate *)arg;
 	dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh,
 	    dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL);
 	dsc->done = 1;
 	NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err);
 }
 
 /*
  * Create a pNFS data file on the Data Server(s).
  */
 static void
 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
     NFSPROC_T *p)
 {
 	struct nfsrvdscreate *dsc, *tdsc = NULL;
 	struct nfsdevice *ds, *tds, *fds;
 	struct mount *mp;
 	struct pnfsdsfile *pf, *tpf;
 	struct pnfsdsattr dsattr;
 	struct vattr va;
 	struct vnode *dvp[NFSDEV_MAXMIRRORS];
 	struct nfsmount *nmp;
 	fhandle_t fh;
 	uid_t vauid;
 	gid_t vagid;
 	u_short vamode;
 	struct ucred *tcred;
 	int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret;
 	int failpos, timo;
 
 	/* Get a DS server directory in a round-robin order. */
 	mirrorcnt = 1;
 	mp = vp->v_mount;
 	ds = fds = NULL;
 	NFSDDSLOCK();
 	/*
 	 * Search for the first entry that handles this MDS fs, but use the
 	 * first entry for all MDS fs's otherwise.
 	 */
 	TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) {
 		if (tds->nfsdev_nmp != NULL) {
 			if (tds->nfsdev_mdsisset == 0 && ds == NULL)
 				ds = tds;
 			else if (tds->nfsdev_mdsisset != 0 && fsidcmp(
 			    &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) {
 				ds = fds = tds;
 				break;
 			}
 		}
 	}
 	if (ds == NULL) {
 		NFSDDSUNLOCK();
 		NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n");
 		return;
 	}
 	i = dsdir[0] = ds->nfsdev_nextdir;
 	ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize;
 	dvp[0] = ds->nfsdev_dsdir[i];
 	tds = TAILQ_NEXT(ds, nfsdev_list);
 	if (nfsrv_maxpnfsmirror > 1 && tds != NULL) {
 		TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) {
 			if (tds->nfsdev_nmp != NULL &&
 			    ((tds->nfsdev_mdsisset == 0 && fds == NULL) ||
 			     (tds->nfsdev_mdsisset != 0 && fds != NULL &&
 			      fsidcmp(&mp->mnt_stat.f_fsid,
 			      &tds->nfsdev_mdsfsid) == 0))) {
 				dsdir[mirrorcnt] = i;
 				dvp[mirrorcnt] = tds->nfsdev_dsdir[i];
 				mirrorcnt++;
 				if (mirrorcnt >= nfsrv_maxpnfsmirror)
 					break;
 			}
 		}
 	}
 	/* Put at end of list to implement round-robin usage. */
 	TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
 	TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
 	NFSDDSUNLOCK();
 	dsc = NULL;
 	if (mirrorcnt > 1)
 		tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK | M_ZERO);
 	tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK |
 	    M_ZERO);
 
 	error = nfsvno_getfh(vp, &fh, p);
 	if (error == 0)
 		error = VOP_GETATTR(vp, &va, cred);
 	if (error == 0) {
 		/* Set the attributes for "vp" to Setattr the DS vp. */
 		vauid = va.va_uid;
 		vagid = va.va_gid;
 		vamode = va.va_mode;
 		VATTR_NULL(&va);
 		va.va_uid = vauid;
 		va.va_gid = vagid;
 		va.va_mode = vamode;
 		va.va_size = 0;
 	} else
 		printf("pNFS: pnfscreate getfh+attr=%d\n", error);
 
 	NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid,
 	    cred->cr_gid);
 	/* Make data file name based on FH. */
 	tcred = newnfs_getcred();
 
 	/*
 	 * Create the file on each DS mirror, using kernel process(es) for the
 	 * additional mirrors.
 	 */
 	failpos = -1;
 	for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) {
 		tpf->dsf_dir = dsdir[i];
 		tdsc->tcred = tcred;
 		tdsc->p = p;
 		tdsc->pf = tpf;
 		tdsc->createva = *vap;
 		NFSBCOPY(&fh, &tdsc->fh, sizeof(fh));
 		tdsc->va = va;
 		tdsc->dvp = dvp[i];
 		tdsc->done = 0;
 		tdsc->inprog = 0;
 		tdsc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_dscreate, tdsc);
 			NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL,
 			    NULL, tcred, p, NULL);
 			if (ret != 0) {
 				KASSERT(error == 0, ("nfsrv_dscreate err=%d",
 				    error));
 				if (failpos == -1 && nfsds_failerr(ret))
 					failpos = i;
 				else
 					error = ret;
 			}
 		}
 	}
 	if (error == 0) {
 		tpf->dsf_dir = dsdir[mirrorcnt - 1];
 		error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf,
 		    &dsattr, NULL, tcred, p, NULL);
 		if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) {
 			failpos = mirrorcnt - 1;
 			error = 0;
 		}
 	}
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	/* Wait for kernel task(s) to complete. */
 	for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) {
 		while (tdsc->inprog != 0 && tdsc->done == 0)
 			tsleep(&tdsc->tsk, PVFS, "srvdcr", timo);
 		if (tdsc->err != 0) {
 			if (failpos == -1 && nfsds_failerr(tdsc->err))
 				failpos = i;
 			else if (error == 0)
 				error = tdsc->err;
 		}
 	}
 
 	/*
 	 * If failpos has been set, that mirror has failed, so it needs
 	 * to be disabled.
 	 */
 	if (failpos >= 0) {
 		nmp = VFSTONFS(dvp[failpos]->v_mount);
 		NFSLOCKMNT(nmp);
 		if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 		     NFSMNTP_CANCELRPCS)) == 0) {
 			nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 			NFSUNLOCKMNT(nmp);
 			ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
 			NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos,
 			    ds);
 			if (ds != NULL)
 				nfsrv_killrpcs(nmp);
 			NFSLOCKMNT(nmp);
 			nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 			wakeup(nmp);
 		}
 		NFSUNLOCKMNT(nmp);
 	}
 
 	NFSFREECRED(tcred);
 	if (error == 0) {
 		ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp");
 
 		NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n",
 		    mirrorcnt, nfsrv_maxpnfsmirror);
 		/*
 		 * For all mirrors that couldn't be created, fill in the
 		 * *pf structure, but with an IP address == 0.0.0.0.
 		 */
 		tpf = pf + mirrorcnt;
 		for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) {
 			*tpf = *pf;
 			tpf->dsf_sin.sin_family = AF_INET;
 			tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in);
 			tpf->dsf_sin.sin_addr.s_addr = 0;
 			tpf->dsf_sin.sin_port = 0;
 		}
 
 		error = vn_extattr_set(vp, IO_NODELOCKED,
 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
 		    sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
 		if (error == 0)
 			error = vn_extattr_set(vp, IO_NODELOCKED,
 			    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
 			    sizeof(dsattr), (char *)&dsattr, p);
 		if (error != 0)
 			printf("pNFS: pnfscreate setextattr=%d\n",
 			    error);
 	} else
 		printf("pNFS: pnfscreate=%d\n", error);
 	free(pf, M_TEMP);
 	free(dsc, M_TEMP);
 }
 
 /*
  * Get the information needed to remove the pNFS Data Server file from the
  * Metadata file.  Upon success, ddvp is set non-NULL to the locked
  * DS directory vnode.  The caller must unlock *ddvp when done with it.
  */
 static void
 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp,
     int *mirrorcntp, char *fname, fhandle_t *fhp)
 {
 	struct vattr va;
 	struct ucred *tcred;
 	char *buf;
 	int buflen, error;
 
 	dvpp[0] = NULL;
 	/* If not an exported regular file or not a pNFS server, just return. */
 	if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
 	    nfsrv_devidcnt == 0)
 		return;
 
 	/* Check to see if this is the last hard link. */
 	tcred = newnfs_getcred();
 	error = VOP_GETATTR(vp, &va, tcred);
 	NFSFREECRED(tcred);
 	if (error != 0) {
 		printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error);
 		return;
 	}
 	if (va.va_nlink > 1)
 		return;
 
 	error = nfsvno_getfh(vp, fhp, p);
 	if (error != 0) {
 		printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error);
 		return;
 	}
 
 	buflen = 1024;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	/* Get the directory vnode for the DS mount and the file handle. */
 	error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp,
 	    NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL);
 	free(buf, M_TEMP);
 	if (error != 0)
 		printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error);
 }
 
 /*
  * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror.
  * The arguments are in a structure, so that they can be passed through
  * taskqueue for a kernel process to execute this function.
  */
 struct nfsrvdsremove {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	struct ucred		*tcred;
 	struct vnode		*dvp;
 	NFSPROC_T		*p;
 	int			err;
 	char			fname[PNFS_FILENAME_LEN + 1];
 };
 
 static int
 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred,
     NFSPROC_T *p)
 {
 	struct nameidata named;
 	struct vnode *nvp;
 	char *bufp;
 	u_long *hashp;
 	int error;
 
 	error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
 	if (error != 0)
 		return (error);
 	named.ni_cnd.cn_nameiop = DELETE;
 	named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
 	named.ni_cnd.cn_cred = tcred;
 	named.ni_cnd.cn_thread = p;
 	named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	named.ni_cnd.cn_nameptr = bufp;
 	named.ni_cnd.cn_namelen = strlen(fname);
 	strlcpy(bufp, fname, NAME_MAX);
 	NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp);
 	error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
 	NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error);
 	if (error == 0) {
 		error = VOP_REMOVE(dvp, nvp, &named.ni_cnd);
 		vput(nvp);
 	}
 	NFSVOPUNLOCK(dvp);
 	nfsvno_relpathbuf(&named);
 	if (error != 0)
 		printf("pNFS: nfsrv_pnfsremove failed=%d\n", error);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_dsremove().
  */
 static void
 start_dsremove(void *arg, int pending)
 {
 	struct nfsrvdsremove *dsrm;
 
 	dsrm = (struct nfsrvdsremove *)arg;
 	dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred,
 	    dsrm->p);
 	dsrm->done = 1;
 	NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err);
 }
 
 /*
  * Remove a pNFS data file from a Data Server.
  * nfsrv_pnfsremovesetup() must have been called before the MDS file was
  * removed to set up the dvp and fill in the FH.
  */
 static void
 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp,
     NFSPROC_T *p)
 {
 	struct ucred *tcred;
 	struct nfsrvdsremove *dsrm, *tdsrm;
 	struct nfsdevice *ds;
 	struct nfsmount *nmp;
 	int failpos, i, ret, timo;
 
 	tcred = newnfs_getcred();
 	dsrm = NULL;
 	if (mirrorcnt > 1)
 		dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK);
 	/*
 	 * Remove the file on each DS mirror, using kernel process(es) for the
 	 * additional mirrors.
 	 */
 	failpos = -1;
 	for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
 		tdsrm->tcred = tcred;
 		tdsrm->p = p;
 		tdsrm->dvp = dvp[i];
 		strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1);
 		tdsrm->inprog = 0;
 		tdsrm->done = 0;
 		tdsrm->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_dsremove, tdsrm);
 			NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_dsremove(dvp[i], fname, tcred, p);
 			if (failpos == -1 && nfsds_failerr(ret))
 				failpos = i;
 		}
 	}
 	ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p);
 	if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret))
 		failpos = mirrorcnt - 1;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	/* Wait for kernel task(s) to complete. */
 	for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
 		while (tdsrm->inprog != 0 && tdsrm->done == 0)
 			tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo);
 		if (failpos == -1 && nfsds_failerr(tdsrm->err))
 			failpos = i;
 	}
 
 	/*
 	 * If failpos has been set, that mirror has failed, so it needs
 	 * to be disabled.
 	 */
 	if (failpos >= 0) {
 		nmp = VFSTONFS(dvp[failpos]->v_mount);
 		NFSLOCKMNT(nmp);
 		if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 		     NFSMNTP_CANCELRPCS)) == 0) {
 			nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 			NFSUNLOCKMNT(nmp);
 			ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
 			NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos,
 			    ds);
 			if (ds != NULL)
 				nfsrv_killrpcs(nmp);
 			NFSLOCKMNT(nmp);
 			nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 			wakeup(nmp);
 		}
 		NFSUNLOCKMNT(nmp);
 	}
 
 	/* Get rid all layouts for the file. */
 	nfsrv_freefilelayouts(fhp);
 
 	NFSFREECRED(tcred);
 	free(dsrm, M_TEMP);
 }
 
 /*
  * Generate a file name based on the file handle and put it in *bufp.
  * Return the number of bytes generated.
  */
 static int
 nfsrv_putfhname(fhandle_t *fhp, char *bufp)
 {
 	int i;
 	uint8_t *cp;
 	const uint8_t *hexdigits = "0123456789abcdef";
 
 	cp = (uint8_t *)fhp;
 	for (i = 0; i < sizeof(*fhp); i++) {
 		bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf];
 		bufp[2 * i + 1] = hexdigits[*cp++ & 0xf];
 	}
 	bufp[2 * i] = '\0';
 	return (2 * i);
 }
 
 /*
  * Update the Metadata file's attributes from the DS file when a Read/Write
  * layout is returned.
  * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN
  * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file.
  */
 int
 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
 {
 	struct ucred *tcred;
 	int error;
 
 	/* Do this as root so that it won't fail with EACCES. */
 	tcred = newnfs_getcred();
 	error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN,
 	    NULL, NULL, NULL, nap, NULL, NULL, 0, NULL);
 	NFSFREECRED(tcred);
 	return (error);
 }
 
 /*
  * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file.
  */
 static int
 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred,
     NFSPROC_T *p)
 {
 	int error;
 
 	error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL,
 	    NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL);
 	return (error);
 }
 
 static int
 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
     struct thread *p, int ioproc, struct mbuf **mpp, char *cp,
     struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp,
     off_t *offp, int content, bool *eofp)
 {
 	struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp;
 	fhandle_t fh[NFSDEV_MAXMIRRORS];
 	struct vnode *dvp[NFSDEV_MAXMIRRORS];
 	struct nfsdevice *ds;
 	struct pnfsdsattr dsattr;
 	struct opnfsdsattr odsattr;
 	char *buf;
 	int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt;
 
 	NFSD_DEBUG(4, "in nfsrv_proxyds\n");
 	/*
 	 * If not a regular file, not exported or not a pNFS server,
 	 * just return ENOENT.
 	 */
 	if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
 	    nfsrv_devidcnt == 0)
 		return (ENOENT);
 
 	buflen = 1024;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	error = 0;
 
 	/*
 	 * For Getattr, get the Change attribute (va_filerev) and size (va_size)
 	 * from the MetaData file's extended attribute.
 	 */
 	if (ioproc == NFSPROC_GETATTR) {
 		error = vn_extattr_get(vp, IO_NODELOCKED,
 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf,
 		    p);
 		if (error == 0) {
 			if (buflen == sizeof(odsattr)) {
 				NFSBCOPY(buf, &odsattr, buflen);
 				nap->na_filerev = odsattr.dsa_filerev;
 				nap->na_size = odsattr.dsa_size;
 				nap->na_atime = odsattr.dsa_atime;
 				nap->na_mtime = odsattr.dsa_mtime;
 				/*
 				 * Fake na_bytes by rounding up na_size.
 				 * Since we don't know the block size, just
 				 * use BLKDEV_IOSIZE.
 				 */
 				nap->na_bytes = (odsattr.dsa_size +
 				    BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1);
 			} else if (buflen == sizeof(dsattr)) {
 				NFSBCOPY(buf, &dsattr, buflen);
 				nap->na_filerev = dsattr.dsa_filerev;
 				nap->na_size = dsattr.dsa_size;
 				nap->na_atime = dsattr.dsa_atime;
 				nap->na_mtime = dsattr.dsa_mtime;
 				nap->na_bytes = dsattr.dsa_bytes;
 			} else
 				error = ENXIO;
 		}
 		if (error == 0) {
 			/*
 			 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr()
 			 * returns 0, just return now.  nfsrv_checkdsattr()
 			 * returns 0 if there is no Read/Write layout
 			 * plus either an Open/Write_access or Write
 			 * delegation issued to a client for the file.
 			 */
 			if (nfsrv_pnfsgetdsattr == 0 ||
 			    nfsrv_checkdsattr(vp, p) == 0) {
 				free(buf, M_TEMP);
 				return (error);
 			}
 		}
 
 		/*
 		 * Clear ENOATTR so the code below will attempt to do a
 		 * nfsrv_getattrdsrpc() to get the attributes and (re)create
 		 * the extended attribute.
 		 */
 		if (error == ENOATTR)
 			error = 0;
 	}
 
 	origmircnt = -1;
 	trycnt = 0;
 tryagain:
 	if (error == 0) {
 		buflen = 1024;
 		if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
 		    LK_EXCLUSIVE)
 			printf("nfsrv_proxyds: Readds vp exclusively locked\n");
 		error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
 		    &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL,
 		    NULL, NULL);
 		if (error == 0) {
 			for (i = 0; i < mirrorcnt; i++)
 				nmp[i] = VFSTONFS(dvp[i]->v_mount);
 		} else
 			printf("pNFS: proxy getextattr sockaddr=%d\n", error);
 	} else
 		printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error);
 	if (error == 0) {
 		failpos = -1;
 		if (origmircnt == -1)
 			origmircnt = mirrorcnt;
 		/*
 		 * If failpos is set to a mirror#, then that mirror has
 		 * failed and will be disabled. For Read, Getattr and Seek, the
 		 * function only tries one mirror, so if that mirror has
 		 * failed, it will need to be retried. As such, increment
 		 * tryitagain for these cases.
 		 * For Write, Setattr and Setacl, the function tries all
 		 * mirrors and will not return an error for the case where
 		 * one mirror has failed. For these cases, the functioning
 		 * mirror(s) will have been modified, so a retry isn't
 		 * necessary. These functions will set failpos for the
 		 * failed mirror#.
 		 */
 		if (ioproc == NFSPROC_READDS) {
 			error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0],
 			    mpp, mpp2);
 			if (nfsds_failerr(error) && mirrorcnt > 1) {
 				/*
 				 * Setting failpos will cause the mirror
 				 * to be disabled and then a retry of this
 				 * read is required.
 				 */
 				failpos = 0;
 				error = 0;
 				trycnt++;
 			}
 		} else if (ioproc == NFSPROC_WRITEDS)
 			error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp,
 			    &nmp[0], mirrorcnt, mpp, cp, &failpos);
 		else if (ioproc == NFSPROC_SETATTR)
 			error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0],
 			    mirrorcnt, nap, &failpos);
 		else if (ioproc == NFSPROC_SETACL)
 			error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0],
 			    mirrorcnt, aclp, &failpos);
 		else if (ioproc == NFSPROC_SEEKDS) {
 			error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred,
 			    p, nmp[0]);
 			if (nfsds_failerr(error) && mirrorcnt > 1) {
 				/*
 				 * Setting failpos will cause the mirror
 				 * to be disabled and then a retry of this
 				 * read is required.
 				 */
 				failpos = 0;
 				error = 0;
 				trycnt++;
 			}
 		} else if (ioproc == NFSPROC_ALLOCATE)
 			error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
 			    &nmp[0], mirrorcnt, &failpos);
 		else {
 			error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
 			    vp, nmp[mirrorcnt - 1], nap);
 			if (nfsds_failerr(error) && mirrorcnt > 1) {
 				/*
 				 * Setting failpos will cause the mirror
 				 * to be disabled and then a retry of this
 				 * getattr is required.
 				 */
 				failpos = mirrorcnt - 1;
 				error = 0;
 				trycnt++;
 			}
 		}
 		ds = NULL;
 		if (failpos >= 0) {
 			failnmp = nmp[failpos];
 			NFSLOCKMNT(failnmp);
 			if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM |
 			     NFSMNTP_CANCELRPCS)) == 0) {
 				failnmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 				NFSUNLOCKMNT(failnmp);
 				ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER,
 				    failnmp, p);
 				NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n",
 				    failpos, ds);
 				if (ds != NULL)
 					nfsrv_killrpcs(failnmp);
 				NFSLOCKMNT(failnmp);
 				failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 				wakeup(failnmp);
 			}
 			NFSUNLOCKMNT(failnmp);
 		}
 		for (i = 0; i < mirrorcnt; i++)
 			NFSVOPUNLOCK(dvp[i]);
 		NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error,
 		    trycnt);
 		/* Try the Read/Getattr again if a mirror was deleted. */
 		if (ds != NULL && trycnt > 0 && trycnt < origmircnt)
 			goto tryagain;
 	} else {
 		/* Return ENOENT for any Extended Attribute error. */
 		error = ENOENT;
 	}
 	free(buf, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error);
 	return (error);
 }
 
 /*
  * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended
  * attribute.
  * newnmpp - If it points to a non-NULL nmp, that is the destination and needs
  *           to be checked.  If it points to a NULL nmp, then it returns
  *           a suitable destination.
  * curnmp - If non-NULL, it is the source mount for the copy.
  */
 int
 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
     int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp,
     char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp,
     struct nfsmount *curnmp, int *ippos, int *dsdirp)
 {
 	struct vnode *dvp, *nvp = NULL, **tdvpp;
 	struct mount *mp;
 	struct nfsmount *nmp, *newnmp;
 	struct sockaddr *sad;
 	struct sockaddr_in *sin;
 	struct nfsdevice *ds, *tds, *fndds;
 	struct pnfsdsfile *pf;
 	uint32_t dsdir;
 	int error, fhiszero, fnd, gotone, i, mirrorcnt;
 
 	ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp");
 	*mirrorcntp = 1;
 	tdvpp = dvpp;
 	if (nvpp != NULL)
 		*nvpp = NULL;
 	if (dvpp != NULL)
 		*dvpp = NULL;
 	if (ippos != NULL)
 		*ippos = -1;
 	if (newnmpp != NULL)
 		newnmp = *newnmpp;
 	else
 		newnmp = NULL;
 	mp = vp->v_mount;
 	error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
 	    "pnfsd.dsfile", buflenp, buf, p);
 	mirrorcnt = *buflenp / sizeof(*pf);
 	if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS ||
 	    *buflenp != sizeof(*pf) * mirrorcnt))
 		error = ENOATTR;
 
 	pf = (struct pnfsdsfile *)buf;
 	/* If curnmp != NULL, check for a match in the mirror list. */
 	if (curnmp != NULL && error == 0) {
 		fnd = 0;
 		for (i = 0; i < mirrorcnt; i++, pf++) {
 			sad = (struct sockaddr *)&pf->dsf_sin;
 			if (nfsaddr2_match(sad, curnmp->nm_nam)) {
 				if (ippos != NULL)
 					*ippos = i;
 				fnd = 1;
 				break;
 			}
 		}
 		if (fnd == 0)
 			error = ENXIO;
 	}
 
 	gotone = 0;
 	pf = (struct pnfsdsfile *)buf;
 	NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt,
 	    error);
 	for (i = 0; i < mirrorcnt && error == 0; i++, pf++) {
 		fhiszero = 0;
 		sad = (struct sockaddr *)&pf->dsf_sin;
 		sin = &pf->dsf_sin;
 		dsdir = pf->dsf_dir;
 		if (dsdir >= nfsrv_dsdirsize) {
 			printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir);
 			error = ENOATTR;
 		} else if (nvpp != NULL && newnmp != NULL &&
 		    nfsaddr2_match(sad, newnmp->nm_nam))
 			error = EEXIST;
 		if (error == 0) {
 			if (ippos != NULL && curnmp == NULL &&
 			    sad->sa_family == AF_INET &&
 			    sin->sin_addr.s_addr == 0)
 				*ippos = i;
 			if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0)
 				fhiszero = 1;
 			/* Use the socket address to find the mount point. */
 			fndds = NULL;
 			NFSDDSLOCK();
 			/* Find a match for the IP address. */
 			TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 				if (ds->nfsdev_nmp != NULL) {
 					dvp = ds->nfsdev_dvp;
 					nmp = VFSTONFS(dvp->v_mount);
 					if (nmp != ds->nfsdev_nmp)
 						printf("different2 nmp %p %p\n",
 						    nmp, ds->nfsdev_nmp);
 					if (nfsaddr2_match(sad, nmp->nm_nam)) {
 						fndds = ds;
 						break;
 					}
 				}
 			}
 			if (fndds != NULL && newnmpp != NULL &&
 			    newnmp == NULL) {
 				/* Search for a place to make a mirror copy. */
 				TAILQ_FOREACH(tds, &nfsrv_devidhead,
 				    nfsdev_list) {
 					if (tds->nfsdev_nmp != NULL &&
 					    fndds != tds &&
 					    ((tds->nfsdev_mdsisset == 0 &&
 					      fndds->nfsdev_mdsisset == 0) ||
 					     (tds->nfsdev_mdsisset != 0 &&
 					      fndds->nfsdev_mdsisset != 0 &&
 					      fsidcmp(&tds->nfsdev_mdsfsid,
 					      &mp->mnt_stat.f_fsid) == 0))) {
 						*newnmpp = tds->nfsdev_nmp;
 						break;
 					}
 				}
 				if (tds != NULL) {
 					/*
 					 * Move this entry to the end of the
 					 * list, so it won't be selected as
 					 * easily the next time.
 					 */
 					TAILQ_REMOVE(&nfsrv_devidhead, tds,
 					    nfsdev_list);
 					TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds,
 					    nfsdev_list);
 				}
 			}
 			NFSDDSUNLOCK();
 			if (fndds != NULL) {
 				dvp = fndds->nfsdev_dsdir[dsdir];
 				if (lktype != 0 || fhiszero != 0 ||
 				    (nvpp != NULL && *nvpp == NULL)) {
 					if (fhiszero != 0)
 						error = vn_lock(dvp,
 						    LK_EXCLUSIVE);
 					else if (lktype != 0)
 						error = vn_lock(dvp, lktype);
 					else
 						error = vn_lock(dvp, LK_SHARED);
 					/*
 					 * If the file handle is all 0's, try to
 					 * do a Lookup against the DS to acquire
 					 * it.
 					 * If dvpp == NULL or the Lookup fails,
 					 * unlock dvp after the call.
 					 */
 					if (error == 0 && (fhiszero != 0 ||
 					    (nvpp != NULL && *nvpp == NULL))) {
 						error = nfsrv_pnfslookupds(vp,
 						    dvp, pf, &nvp, p);
 						if (error == 0) {
 							if (fhiszero != 0)
 								nfsrv_pnfssetfh(
 								    vp, pf,
 								    devid,
 								    fnamep,
 								    nvp, p);
 							if (nvpp != NULL &&
 							    *nvpp == NULL) {
 								*nvpp = nvp;
 								*dsdirp = dsdir;
 							} else
 								vput(nvp);
 						}
 						if (error != 0 || lktype == 0)
 							NFSVOPUNLOCK(dvp);
 					}
 				}
 				if (error == 0) {
 					gotone++;
 					NFSD_DEBUG(4, "gotone=%d\n", gotone);
 					if (devid != NULL) {
 						NFSBCOPY(fndds->nfsdev_deviceid,
 						    devid, NFSX_V4DEVICEID);
 						devid += NFSX_V4DEVICEID;
 					}
 					if (dvpp != NULL)
 						*tdvpp++ = dvp;
 					if (fhp != NULL)
 						NFSBCOPY(&pf->dsf_fh, fhp++,
 						    NFSX_MYFH);
 					if (fnamep != NULL && gotone == 1)
 						strlcpy(fnamep,
 						    pf->dsf_filename,
 						    sizeof(pf->dsf_filename));
 				} else
 					NFSD_DEBUG(4, "nfsrv_dsgetsockmnt "
 					    "err=%d\n", error);
 			}
 		}
 	}
 	if (error == 0 && gotone == 0)
 		error = ENOENT;
 
 	NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone,
 	    error);
 	if (error == 0)
 		*mirrorcntp = gotone;
 	else {
 		if (gotone > 0 && dvpp != NULL) {
 			/*
 			 * If the error didn't occur on the first one and
 			 * dvpp != NULL, the one(s) prior to the failure will
 			 * have locked dvp's that need to be unlocked.
 			 */
 			for (i = 0; i < gotone; i++) {
 				NFSVOPUNLOCK(*dvpp);
 				*dvpp++ = NULL;
 			}
 		}
 		/*
 		 * If it found the vnode to be copied from before a failure,
 		 * it needs to be vput()'d.
 		 */
 		if (nvpp != NULL && *nvpp != NULL) {
 			vput(*nvpp);
 			*nvpp = NULL;
 		}
 	}
 	return (error);
 }
 
 /*
  * Set the extended attribute for the Change attribute.
  */
 static int
 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
 {
 	struct pnfsdsattr dsattr;
 	int error;
 
 	ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
 	dsattr.dsa_filerev = nap->na_filerev;
 	dsattr.dsa_size = nap->na_size;
 	dsattr.dsa_atime = nap->na_atime;
 	dsattr.dsa_mtime = nap->na_mtime;
 	dsattr.dsa_bytes = nap->na_bytes;
 	error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
 	    "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
 	if (error != 0)
 		printf("pNFS: setextattr=%d\n", error);
 	return (error);
 }
 
 static int
 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
     NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsv4stateid_t st;
 	struct mbuf *m, *m2;
 	int error = 0, retlen, tlen, trimlen;
 
 	NFSD_DEBUG(4, "in nfsrv_readdsrpc\n");
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	*mpp = NULL;
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 	    NULL, NULL, 0, 0);
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
 	txdr_hyper(off, tl);
 	*(tl + 2) = txdr_unsigned(len);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		NFSM_STRSIZ(retlen, len);
 		if (retlen > 0) {
 			/* Trim off the pre-data XDR from the mbuf chain. */
 			m = nd->nd_mrep;
 			while (m != NULL && m != nd->nd_md) {
 				if (m->m_next == nd->nd_md) {
 					m->m_next = NULL;
 					m_freem(nd->nd_mrep);
 					nd->nd_mrep = m = nd->nd_md;
 				} else
 					m = m->m_next;
 			}
 			if (m == NULL) {
 				printf("nfsrv_readdsrpc: busted mbuf list\n");
 				error = ENOENT;
 				goto nfsmout;
 			}
 	
 			/*
 			 * Now, adjust first mbuf so that any XDR before the
 			 * read data is skipped over.
 			 */
 			trimlen = nd->nd_dpos - mtod(m, char *);
 			if (trimlen > 0) {
 				m->m_len -= trimlen;
 				NFSM_DATAP(m, trimlen);
 			}
 	
 			/*
 			 * Truncate the mbuf chain at retlen bytes of data,
 			 * plus XDR padding that brings the length up to a
 			 * multiple of 4.
 			 */
 			tlen = NFSM_RNDUP(retlen);
 			do {
 				if (m->m_len >= tlen) {
 					m->m_len = tlen;
 					tlen = 0;
 					m2 = m->m_next;
 					m->m_next = NULL;
 					m_freem(m2);
 					break;
 				}
 				tlen -= m->m_len;
 				m = m->m_next;
 			} while (m != NULL);
 			if (tlen > 0) {
 				printf("nfsrv_readdsrpc: busted mbuf list\n");
 				error = ENOENT;
 				goto nfsmout;
 			}
 			*mpp = nd->nd_mrep;
 			*mpendp = m;
 			nd->nd_mrep = NULL;
 		}
 	} else
 		error = nd->nd_repstat;
 nfsmout:
 	/* If nd->nd_mrep is already NULL, this is a no-op. */
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Do a write RPC on a DS data file, using this structure for the arguments,
  * so that this function can be executed by a separate kernel process.
  */
 struct nfsrvwritedsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	off_t			off;
 	int			len;
 	struct nfsmount		*nmp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	struct mbuf		*m;
 	int			err;
 };
 
 static int
 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
     struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t st;
 	int commit, error, retlen;
 
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0);
 
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
 	txdr_hyper(off, tl);
 	tl += 2;
 	/*
 	 * Do all writes FileSync, since the server doesn't hold onto dirty
 	 * buffers.  Since clients should be accessing the DS servers directly
 	 * using the pNFS layouts, this just needs to work correctly as a
 	 * fallback.
 	 */
 	*tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
 	*tl = txdr_unsigned(len);
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len);
 
 	/* Put data in mbuf chain. */
 	nd->nd_mb->m_next = m;
 
 	/* Set nd_mb and nd_bpos to end of data. */
 	while (m->m_next != NULL)
 		m = m->m_next;
 	nd->nd_mb = m;
 	nfsm_set(nd, m->m_len);
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len);
 
 	/* Do a Getattr for the attributes that change upon writing. */
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat);
 	/* Get rid of weak cache consistency data for now. */
 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 	    (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 		NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
 		if (error != 0)
 			goto nfsmout;
 		/*
 		 * Get rid of Op# and status for next op.
 		 */
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		if (*++tl != 0)
 			nd->nd_flag |= ND_NOMOREDATA;
 	}
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
 		retlen = fxdr_unsigned(int, *tl++);
 		commit = fxdr_unsigned(int, *tl);
 		if (commit != NFSWRITE_FILESYNC)
 			error = NFSERR_IO;
 		NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n",
 		    retlen, commit, error);
 	} else
 		error = nd->nd_repstat;
 	/* We have no use for the Write Verifier since we use FileSync. */
 
 	/*
 	 * Get the Change, Size, Access Time and Modify Time attributes and set
 	 * on the Metadata file, so its attributes will be what the file's
 	 * would be if it had been written.
 	 */
 	if (error == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 	}
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_writedsdorpc().
  */
 static void
 start_writedsdorpc(void *arg, int pending)
 {
 	struct nfsrvwritedsdorpc *drpc;
 
 	drpc = (struct nfsrvwritedsdorpc *)arg;
 	drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
 	    drpc->len, NULL, drpc->m, drpc->cred, drpc->p);
 	drpc->done = 1;
 	NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err);
 }
 
 static int
 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
     struct mbuf **mpp, char *cp, int *failposp)
 {
 	struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL;
 	struct nfsvattr na;
 	struct mbuf *m;
 	int error, i, offs, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_writedsrpc\n");
 	KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain"));
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/* Calculate offset in mbuf chain that data starts. */
 	offs = cp - mtod(*mpp, char *);
 	NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len);
 
 	/*
 	 * Do the write RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->off = off;
 		tdrpc->len = len;
 		tdrpc->nmp = *nmpp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->inprog = 0;
 		tdrpc->err = 0;
 		tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_writedsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL,
 			    tdrpc->m, cred, p);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
 	ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	if (error == 0)
 		error = nfsrv_setextattr(vp, &na, p);
 	NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 /*
  * Do a allocate RPC on a DS data file, using this structure for the arguments,
  * so that this function can be executed by a separate kernel process.
  */
 struct nfsrvallocatedsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	off_t			off;
 	off_t			len;
 	struct nfsmount		*nmp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	int			err;
 };
 
 static int
 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
     off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t st;
 	int error;
 
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0);
 
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
 	txdr_hyper(off, tl); tl += 2;
 	txdr_hyper(len, tl); tl += 2;
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len);
 
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSGETATTR_ATTRBIT(&attrbits);
 	nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n",
 	    nd->nd_repstat);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 	} else
 		error = nd->nd_repstat;
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error);
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_allocatedsdorpc().
  */
 static void
 start_allocatedsdorpc(void *arg, int pending)
 {
 	struct nfsrvallocatedsdorpc *drpc;
 
 	drpc = (struct nfsrvallocatedsdorpc *)arg;
 	drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
 	    drpc->len, NULL, drpc->cred, drpc->p);
 	drpc->done = 1;
 	NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err);
 }
 
 static int
 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
     int *failposp)
 {
 	struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL;
 	struct nfsvattr na;
 	int error, i, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n");
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/*
 	 * Do the allocate RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->off = off;
 		tdrpc->len = len;
 		tdrpc->nmp = *nmpp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->inprog = 0;
 		tdrpc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL,
 			    cred, p);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	if (error == 0)
 		error = nfsrv_setextattr(vp, &na, p);
 	NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 static int
 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap,
     struct nfsvattr *dsnap)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsv4stateid_t st;
 	nfsattrbit_t attrbits;
 	int error;
 
 	NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n");
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 	    NULL, NULL, 0, 0);
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0);
 
 	/* Do a Getattr for the attributes that change due to writing. */
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n",
 	    nd->nd_repstat);
 	/* Get rid of weak cache consistency data for now. */
 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 	    (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
 		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 		NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error);
 		if (error != 0)
 			goto nfsmout;
 		/*
 		 * Get rid of Op# and status for next op.
 		 */
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		if (*++tl != 0)
 			nd->nd_flag |= ND_NOMOREDATA;
 	}
 	error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 	if (error != 0)
 		goto nfsmout;
 	if (nd->nd_repstat != 0)
 		error = nd->nd_repstat;
 	/*
 	 * Get the Change, Size, Access Time and Modify Time attributes and set
 	 * on the Metadata file, so its attributes will be what the file's
 	 * would be if it had been written.
 	 */
 	if (error == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
 		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 	}
 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error);
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error);
 	return (error);
 }
 
 struct nfsrvsetattrdsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	struct nfsmount		*nmp;
 	struct vnode		*vp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	struct nfsvattr		na;
 	struct nfsvattr		dsna;
 	int			err;
 };
 
 /*
  * Start up the thread that will execute nfsrv_setattrdsdorpc().
  */
 static void
 start_setattrdsdorpc(void *arg, int pending)
 {
 	struct nfsrvsetattrdsdorpc *drpc;
 
 	drpc = (struct nfsrvsetattrdsdorpc *)arg;
 	drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p,
 	    drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna);
 	drpc->done = 1;
 }
 
 static int
 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
     struct nfsvattr *nap, int *failposp)
 {
 	struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL;
 	struct nfsvattr na;
 	int error, i, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n");
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/*
 	 * Do the setattr RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		tdrpc->inprog = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->nmp = *nmpp;
 		tdrpc->vp = vp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->na = *nap;
 		tdrpc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap,
 			    &na);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	if (error == 0)
 		error = nfsrv_setextattr(vp, &na, p);
 	NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvsads", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 /*
  * Do a Setattr of an NFSv4 ACL on the DS file.
  */
 static int
 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount *nmp, struct acl *aclp)
 {
 	struct nfsrv_descript *nd;
 	nfsv4stateid_t st;
 	nfsattrbit_t attrbits;
 	int error;
 
 	NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n");
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 	    NULL, NULL, 0, 0);
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
 	/*
 	 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(),
 	 * so passing in the metadata "vp" will be ok, since it is of
 	 * the same type (VREG).
 	 */
 	nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL,
 	    NULL, 0, 0, 0, 0, 0, NULL);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n",
 	    nd->nd_repstat);
 	error = nd->nd_repstat;
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	return (error);
 }
 
 struct nfsrvsetacldsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	struct nfsmount		*nmp;
 	struct vnode		*vp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	struct acl		*aclp;
 	int			err;
 };
 
 /*
  * Start up the thread that will execute nfsrv_setacldsdorpc().
  */
 static void
 start_setacldsdorpc(void *arg, int pending)
 {
 	struct nfsrvsetacldsdorpc *drpc;
 
 	drpc = (struct nfsrvsetacldsdorpc *)arg;
 	drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p,
 	    drpc->vp, drpc->nmp, drpc->aclp);
 	drpc->done = 1;
 }
 
 static int
 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp,
     int *failposp)
 {
 	struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL;
 	int error, i, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n");
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/*
 	 * Do the setattr RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		tdrpc->inprog = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->nmp = *nmpp;
 		tdrpc->vp = vp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->aclp = aclp;
 		tdrpc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_setacldsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp,
 			    aclp);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvacds", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 /*
  * Getattr call to the DS for the attributes that change due to writing.
  */
 static int
 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap)
 {
 	struct nfsrv_descript *nd;
 	int error;
 	nfsattrbit_t attrbits;
 	
 	NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n");
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n",
 	    nd->nd_repstat);
 	if (nd->nd_repstat == 0) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 		    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 		    NULL, NULL);
 		/*
 		 * We can only save the updated values in the extended
 		 * attribute if the vp is exclusively locked.
 		 * This should happen when any of the following operations
 		 * occur on the vnode:
 		 *    Close, Delegreturn, LayoutCommit, LayoutReturn
 		 * As such, the updated extended attribute should get saved
 		 * before nfsrv_checkdsattr() returns 0 and allows the cached
 		 * attributes to be returned without calling this function.
 		 */
 		if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
 			error = nfsrv_setextattr(vp, nap, p);
 			NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n",
 			    error);
 		}
 	} else
 		error = nd->nd_repstat;
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Seek call to a DS.
  */
 static int
 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp,
     struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsv4stateid_t st;
 	int error;
 	
 	NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n");
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0);
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
 	txdr_hyper(*offp, tl); tl += 2;
 	*tl = txdr_unsigned(content);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER);
 		if (*tl++ == newnfs_true)
 			*eofp = true;
 		else
 			*eofp = false;
 		*offp = fxdr_hyper(tl);
 	} else
 		error = nd->nd_repstat;
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Get the device id and file handle for a DS file.
  */
 int
 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp,
     fhandle_t *fhp, char *devid)
 {
 	int buflen, error;
 	char *buf;
 
 	buflen = 1024;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL,
 	    fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL);
 	free(buf, M_TEMP);
 	return (error);
 }
 
 /*
  * Do a Lookup against the DS for the filename.
  */
 static int
 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf,
     struct vnode **nvpp, NFSPROC_T *p)
 {
 	struct nameidata named;
 	struct ucred *tcred;
 	char *bufp;
 	u_long *hashp;
 	struct vnode *nvp;
 	int error;
 
 	tcred = newnfs_getcred();
 	named.ni_cnd.cn_nameiop = LOOKUP;
 	named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY;
 	named.ni_cnd.cn_cred = tcred;
 	named.ni_cnd.cn_thread = p;
 	named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	named.ni_cnd.cn_nameptr = bufp;
 	named.ni_cnd.cn_namelen = strlen(pf->dsf_filename);
 	strlcpy(bufp, pf->dsf_filename, NAME_MAX);
 	NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp);
 	error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
 	NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error);
 	NFSFREECRED(tcred);
 	nfsvno_relpathbuf(&named);
 	if (error == 0)
 		*nvpp = nvp;
 	NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error);
 	return (error);
 }
 
 /*
  * Set the file handle to the correct one.
  */
 static void
 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid,
     char *fnamep, struct vnode *nvp, NFSPROC_T *p)
 {
 	struct nfsnode *np;
 	int ret = 0;
 
 	np = VTONFS(nvp);
 	NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH);
 	/*
 	 * We can only do a vn_set_extattr() if the vnode is exclusively
 	 * locked and vn_start_write() has been done.  If devid != NULL or
 	 * fnamep != NULL or the vnode is shared locked, vn_start_write()
 	 * may not have been done.
 	 * If not done now, it will be done on a future call.
 	 */
 	if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) ==
 	    LK_EXCLUSIVE)
 		ret = vn_extattr_set(vp, IO_NODELOCKED,
 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf),
 		    (char *)pf, p);
 	NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret);
 }
 
 /*
  * Cause RPCs waiting on "nmp" to fail.  This is called for a DS mount point
  * when the DS has failed.
  */
 void
 nfsrv_killrpcs(struct nfsmount *nmp)
 {
 
 	/*
 	 * Call newnfs_nmcancelreqs() to cause
 	 * any RPCs in progress on the mount point to
 	 * fail.
 	 * This will cause any process waiting for an
 	 * RPC to complete while holding a vnode lock
 	 * on the mounted-on vnode (such as "df" or
 	 * a non-forced "umount") to fail.
 	 * This will unlock the mounted-on vnode so
 	 * a forced dismount can succeed.
 	 * The NFSMNTP_CANCELRPCS flag should be set when this function is
 	 * called.
 	 */
 	newnfs_nmcancelreqs(nmp);
 }
 
 /*
  * Sum up the statfs info for each of the DSs, so that the client will
  * receive the total for all DSs.
  */
 static int
 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp)
 {
 	struct statfs *tsf;
 	struct nfsdevice *ds;
 	struct vnode **dvpp, **tdvpp, *dvp;
 	uint64_t tot;
 	int cnt, error = 0, i;
 
 	if (nfsrv_devidcnt <= 0)
 		return (ENXIO);
 	dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
 	tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
 
 	/* Get an array of the dvps for the DSs. */
 	tdvpp = dvpp;
 	i = 0;
 	NFSDDSLOCK();
 	/* First, search for matches for same file system. */
 	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 		if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 &&
 		    fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) {
 			if (++i > nfsrv_devidcnt)
 				break;
 			*tdvpp++ = ds->nfsdev_dvp;
 		}
 	}
 	/*
 	 * If no matches for same file system, total all servers not assigned
 	 * to a file system.
 	 */
 	if (i == 0) {
 		TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 			if (ds->nfsdev_nmp != NULL &&
 			    ds->nfsdev_mdsisset == 0) {
 				if (++i > nfsrv_devidcnt)
 					break;
 				*tdvpp++ = ds->nfsdev_dvp;
 			}
 		}
 	}
 	NFSDDSUNLOCK();
 	cnt = i;
 
 	/* Do a VFS_STATFS() for each of the DSs and sum them up. */
 	tdvpp = dvpp;
 	for (i = 0; i < cnt && error == 0; i++) {
 		dvp = *tdvpp++;
 		error = VFS_STATFS(dvp->v_mount, tsf);
 		if (error == 0) {
 			if (sf->f_bsize == 0) {
 				if (tsf->f_bsize > 0)
 					sf->f_bsize = tsf->f_bsize;
 				else
 					sf->f_bsize = 8192;
 			}
 			if (tsf->f_blocks > 0) {
 				if (sf->f_bsize != tsf->f_bsize) {
 					tot = tsf->f_blocks * tsf->f_bsize;
 					sf->f_blocks += (tot / sf->f_bsize);
 				} else
 					sf->f_blocks += tsf->f_blocks;
 			}
 			if (tsf->f_bfree > 0) {
 				if (sf->f_bsize != tsf->f_bsize) {
 					tot = tsf->f_bfree * tsf->f_bsize;
 					sf->f_bfree += (tot / sf->f_bsize);
 				} else
 					sf->f_bfree += tsf->f_bfree;
 			}
 			if (tsf->f_bavail > 0) {
 				if (sf->f_bsize != tsf->f_bsize) {
 					tot = tsf->f_bavail * tsf->f_bsize;
 					sf->f_bavail += (tot / sf->f_bsize);
 				} else
 					sf->f_bavail += tsf->f_bavail;
 			}
 		}
 	}
 	free(tsf, M_TEMP);
 	free(dvpp, M_TEMP);
 	return (error);
 }
 
 /*
  * Set an NFSv4 acl.
  */
 int
 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p)
 {
 	int error;
 
 	if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) {
 		error = NFSERR_ATTRNOTSUPP;
 		goto out;
 	}
 	/*
 	 * With NFSv4 ACLs, chmod(2) may need to add additional entries.
 	 * Make sure it has enough room for that - splitting every entry
 	 * into two and appending "canonical six" entries at the end.
 	 * Cribbed out of kern/vfs_acl.c - Rick M.
 	 */
 	if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) {
 		error = NFSERR_ATTRNOTSUPP;
 		goto out;
 	}
 	error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
 	if (error == 0) {
 		error = nfsrv_dssetacl(vp, aclp, cred, p);
 		if (error == ENOENT)
 			error = 0;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Seek vnode op call (actually it is a VOP_IOCTL()).
  * This function is called with the vnode locked, but unlocks and vrele()s
  * the vp before returning.
  */
 int
 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd,
     off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsvattr at;
 	int error, ret;
 
 	ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp");
 	/*
 	 * Attempt to seek on a DS file. A return of ENOENT implies
 	 * there is no DS file to seek on.
 	 */
 	error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL,
 	    NULL, NULL, NULL, NULL, offp, content, eofp);
 	if (error != ENOENT) {
 		vput(vp);
 		return (error);
 	}
 
 	/*
 	 * Do the VOP_IOCTL() call.  For the case where *offp == file_size,
 	 * VOP_IOCTL() will return ENXIO.  However, the correct reply for
 	 * NFSv4.2 is *eofp == true and error == 0 for this case.
 	 */
 	NFSVOPUNLOCK(vp);
 	error = VOP_IOCTL(vp, cmd, offp, 0, cred, p);
 	*eofp = false;
 	if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) {
 		/* Handle the cases where we might be at EOF. */
 		ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL);
 		if (ret == 0 && *offp == at.na_size) {
 			*eofp = true;
 			error = 0;
 		}
 		if (ret != 0 && error == 0)
 			error = ret;
 	}
 	vrele(vp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Allocate vnode op call.
  */
 int
 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
     NFSPROC_T *p)
 {
 	int error, trycnt;
 
 	ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp");
 	/*
 	 * Attempt to allocate on a DS file. A return of ENOENT implies
 	 * there is no DS file to allocate on.
 	 */
 	error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL,
 	    NULL, NULL, NULL, NULL, &len, 0, NULL);
 	if (error != ENOENT)
 		return (error);
 	error = 0;
 
 	/*
 	 * Do the actual VOP_ALLOCATE(), looping a reasonable number of
 	 * times to achieve completion.
 	 */
 	trycnt = 0;
 	while (error == 0 && len > 0 && trycnt++ < 20)
 		error = VOP_ALLOCATE(vp, &off, &len);
 	if (error == 0 && len > 0)
 		error = NFSERR_IO;
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get Extended Atribute vnode op into an mbuf list.
  */
 int
 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp,
     struct ucred *cred, struct thread *p, struct mbuf **mpp,
     struct mbuf **mpendp, int *lenp)
 {
 	struct iovec *iv;
 	struct uio io, *uiop = &io;
 	struct mbuf *m, *m2;
 	int alen, error, len, tlen;
 	size_t siz;
 
 	/* First, find out the size of the extended attribute. */
 	error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
 	    &siz, cred, p);
 	if (error != 0)
 		return (NFSERR_NOXATTR);
 	if (siz > maxresp - NFS_MAXXDR)
 		return (NFSERR_XATTR2BIG);
 	len = siz;
 	tlen = NFSM_RNDUP(len);
 	if (tlen > 0) {
 		uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2, &iv);
 		uiop->uio_iov = iv;
 	} else {
 		uiop->uio_iovcnt = 0;
 		uiop->uio_iov = iv = NULL;
 		m = m2 = NULL;
 	}
 	uiop->uio_offset = 0;
 	uiop->uio_resid = tlen;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = p;
 #ifdef MAC
 	error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER,
 	    name);
 	if (error != 0)
 		goto out;
 #endif
 
 	if (tlen > 0)
 		error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
 		    NULL, cred, p);
 	if (error != 0)
 		goto out;
 	if (uiop->uio_resid > 0) {
 		alen = tlen;
 		len = tlen - uiop->uio_resid;
 		tlen = NFSM_RNDUP(len);
 		if (alen != tlen)
 			printf("nfsvno_getxattr: weird size read\n");
 		nfsrv_adj(m, alen - tlen, tlen - len);
 	}
 	*lenp = len;
 	*mpp = m;
 	*mpendp = m2;
 
 out:
 	if (error != 0) {
 		if (m != NULL)
 			m_freem(m);
 		*lenp = 0;
 	}
 	free(iv, M_TEMP);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set Extended attribute vnode op from an mbuf list.
  */
 int
 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m,
     char *cp, struct ucred *cred, struct thread *p)
 {
 	struct iovec *iv;
 	struct uio uio, *uiop = &uio;
 	int cnt, error;
 
 	error = 0;
 #ifdef MAC
 	error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER,
 	    name);
 #endif
 	if (error != 0)
 		goto out;
 
 	uiop->uio_rw = UIO_WRITE;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = p;
 	uiop->uio_offset = 0;
 	uiop->uio_resid = len;
 	if (len > 0) {
 		error = nfsrv_createiovecw(len, m, cp, &iv, &cnt);
 		uiop->uio_iov = iv;
 		uiop->uio_iovcnt = cnt;
 	} else {
 		uiop->uio_iov = iv = NULL;
 		uiop->uio_iovcnt = 0;
 	}
 	if (error == 0) {
 		error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
 		    cred, p);
 		free(iv, M_TEMP);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Remove Extended attribute vnode op.
  */
 int
 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name,
     struct ucred *cred, struct thread *p)
 {
 	int error;
 
 	/*
 	 * Get rid of any delegations.  I am not sure why this is required,
 	 * but RFC-8276 says so.
 	 */
 	error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p);
 	if (error != 0)
 		goto out;
 #ifdef MAC
 	error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER,
 	    name);
 	if (error != 0)
 		goto out;
 #endif
 
 	error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p);
 	if (error == EOPNOTSUPP)
 		error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
 		    cred, p);
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * List Extended Atribute vnode op into an mbuf list.
  */
 int
 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred,
     struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp)
 {
 	struct iovec iv;
 	struct uio io;
 	int error;
 	size_t siz;
 
 	*bufp = NULL;
 	/* First, find out the size of the extended attribute. */
 	error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred,
 	    p);
 	if (error != 0)
 		return (NFSERR_NOXATTR);
 	if (siz <= cookie) {
 		*lenp = 0;
 		*eofp = true;
 		goto out;
 	}
 	if (siz > cookie + *lenp) {
 		siz = cookie + *lenp;
 		*eofp = false;
 	} else
 		*eofp = true;
 	/* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */
 	if (siz > 10 * 1024 * 1024) {
 		error = NFSERR_XATTR2BIG;
 		goto out;
 	}
 	*bufp = malloc(siz, M_TEMP, M_WAITOK);
 	iv.iov_base = *bufp;
 	iv.iov_len = siz;
 	io.uio_iovcnt = 1;
 	io.uio_iov = &iv;
 	io.uio_offset = 0;
 	io.uio_resid = siz;
 	io.uio_rw = UIO_READ;
 	io.uio_segflg = UIO_SYSSPACE;
 	io.uio_td = p;
 #ifdef MAC
 	error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER);
 	if (error != 0)
 		goto out;
 #endif
 
 	error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred,
 	    p);
 	if (error != 0)
 		goto out;
 	if (io.uio_resid > 0)
 		siz -= io.uio_resid;
 	*lenp = siz;
 
 out:
 	if (error != 0) {
 		free(*bufp, M_TEMP);
 		*bufp = NULL;
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
 
 /*
  * Called once to initialize data structures...
  */
 static int
 nfsd_modevent(module_t mod, int type, void *data)
 {
 	int error = 0, i;
 	static int loaded = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (loaded)
 			goto out;
 		newnfs_portinit();
 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 			mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL,
 			    MTX_DEF);
 			mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL,
 			    MTX_DEF);
 		}
 		mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF);
 		mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF);
 		mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF);
 		mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF);
 		mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF);
 		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
 		nfsrvd_initcache();
 		nfsd_init();
 		NFSD_LOCK();
 		nfsrvd_init(0);
 		NFSD_UNLOCK();
 		nfsd_mntinit();
 #ifdef VV_DISABLEDELEG
 		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
 		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
 #endif
 		nfsd_call_servertimer = nfsrv_servertimer;
 		nfsd_call_nfsd = nfssvc_nfsd;
 		loaded = 1;
 		break;
 
 	case MOD_UNLOAD:
 		if (newnfs_numnfsd != 0) {
 			error = EBUSY;
 			break;
 		}
 
 #ifdef VV_DISABLEDELEG
 		vn_deleg_ops.vndeleg_recall = NULL;
 		vn_deleg_ops.vndeleg_disable = NULL;
 #endif
 		nfsd_call_servertimer = NULL;
 		nfsd_call_nfsd = NULL;
 
 		/* Clean out all NFSv4 state. */
 		nfsrv_throwawayallstate(curthread);
 
 		/* Clean the NFS server reply cache */
 		nfsrvd_cleancache();
 
 		/* Free up the krpc server pool. */
 		if (nfsrvd_pool != NULL)
 			svcpool_destroy(nfsrvd_pool);
 
 		/* and get rid of the locks */
 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 			mtx_destroy(&nfsrchash_table[i].mtx);
 			mtx_destroy(&nfsrcahash_table[i].mtx);
 		}
 		mtx_destroy(&nfsrc_udpmtx);
 		mtx_destroy(&nfs_v4root_mutex);
 		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
 		mtx_destroy(&nfsrv_dontlistlock_mtx);
 		mtx_destroy(&nfsrv_recalllock_mtx);
 		for (i = 0; i < nfsrv_sessionhashsize; i++)
 			mtx_destroy(&nfssessionhash[i].mtx);
 		if (nfslayouthash != NULL) {
 			for (i = 0; i < nfsrv_layouthashsize; i++)
 				mtx_destroy(&nfslayouthash[i].mtx);
 			free(nfslayouthash, M_NFSDSESSION);
 		}
 		lockdestroy(&nfsv4root_mnt.mnt_explock);
 		free(nfsclienthash, M_NFSDCLIENT);
 		free(nfslockhash, M_NFSDLOCKFILE);
 		free(nfssessionhash, M_NFSDSESSION);
 		loaded = 0;
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 static moduledata_t nfsd_mod = {
 	"nfsd",
 	nfsd_modevent,
 	NULL,
 };
 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_VERSION(nfsd, 1);
 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
 MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
 
Index: head/sys/fs/unionfs/union_vfsops.c
===================================================================
--- head/sys/fs/unionfs/union_vfsops.c	(revision 362157)
+++ head/sys/fs/unionfs/union_vfsops.c	(revision 362158)
@@ -1,504 +1,504 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1994, 1995 The Regents of the University of California.
  * Copyright (c) 1994, 1995 Jan-Simon Pendry.
  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
  * All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)union_vfsops.c	8.20 (Berkeley) 5/20/95
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 
 #include <fs/unionfs/union.h>
 
 static MALLOC_DEFINE(M_UNIONFSMNT, "UNIONFS mount", "UNIONFS mount structure");
 
 static vfs_fhtovp_t	unionfs_fhtovp;
 static vfs_checkexp_t	unionfs_checkexp;
 static vfs_mount_t	unionfs_domount;
 static vfs_quotactl_t	unionfs_quotactl;
 static vfs_root_t	unionfs_root;
 static vfs_sync_t	unionfs_sync;
 static vfs_statfs_t	unionfs_statfs;
 static vfs_unmount_t	unionfs_unmount;
 static vfs_vget_t	unionfs_vget;
 static vfs_extattrctl_t	unionfs_extattrctl;
 
 static struct vfsops unionfs_vfsops;
 
 /*
  * Mount unionfs layer.
  */
 static int
 unionfs_domount(struct mount *mp)
 {
 	int		error;
 	struct vnode   *lowerrootvp;
 	struct vnode   *upperrootvp;
 	struct unionfs_mount *ump;
 	struct thread *td;
 	char           *target;
 	char           *tmp;
 	char           *ep;
 	int		len;
 	int		below;
 	uid_t		uid;
 	gid_t		gid;
 	u_short		udir;
 	u_short		ufile;
 	unionfs_copymode copymode;
 	unionfs_whitemode whitemode;
 	struct nameidata nd, *ndp;
 	struct vattr	va;
 
 	UNIONFSDEBUG("unionfs_mount(mp = %p)\n", (void *)mp);
 
 	error = 0;
 	below = 0;
 	uid = 0;
 	gid = 0;
 	udir = 0;
 	ufile = 0;
 	copymode = UNIONFS_TRANSPARENT;	/* default */
 	whitemode = UNIONFS_WHITE_ALWAYS;
 	ndp = &nd;
 	td = curthread;
 
 	if (mp->mnt_flag & MNT_ROOTFS) {
 		vfs_mount_error(mp, "Cannot union mount root filesystem");
 		return (EOPNOTSUPP);
 	}
 
 	/*
 	 * Update is a no operation.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		vfs_mount_error(mp, "unionfs does not support mount update");
 		return (EOPNOTSUPP);
 	}
 
 	/*
 	 * Get argument
 	 */
 	error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len);
 	if (error)
 		error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target,
 		    &len);
 	if (error || target[len - 1] != '\0') {
 		vfs_mount_error(mp, "Invalid target");
 		return (EINVAL);
 	}
 	if (vfs_getopt(mp->mnt_optnew, "below", NULL, NULL) == 0)
 		below = 1;
 	if (vfs_getopt(mp->mnt_optnew, "udir", (void **)&tmp, NULL) == 0) {
 		if (tmp != NULL)
 			udir = (mode_t)strtol(tmp, &ep, 8);
 		if (tmp == NULL || *ep) {
 			vfs_mount_error(mp, "Invalid udir");
 			return (EINVAL);
 		}
 		udir &= S_IRWXU | S_IRWXG | S_IRWXO;
 	}
 	if (vfs_getopt(mp->mnt_optnew, "ufile", (void **)&tmp, NULL) == 0) {
 		if (tmp != NULL)
 			ufile = (mode_t)strtol(tmp, &ep, 8);
 		if (tmp == NULL || *ep) {
 			vfs_mount_error(mp, "Invalid ufile");
 			return (EINVAL);
 		}
 		ufile &= S_IRWXU | S_IRWXG | S_IRWXO;
 	}
 	/* check umask, uid and gid */
 	if (udir == 0 && ufile != 0)
 		udir = ufile;
 	if (ufile == 0 && udir != 0)
 		ufile = udir;
 
 	vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY);
 	error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred);
 	if (!error) {
 		if (udir == 0)
 			udir = va.va_mode;
 		if (ufile == 0)
 			ufile = va.va_mode;
 		uid = va.va_uid;
 		gid = va.va_gid;
 	}
 	VOP_UNLOCK(mp->mnt_vnodecovered);
 	if (error)
 		return (error);
 
 	if (mp->mnt_cred->cr_ruid == 0) {	/* root only */
 		if (vfs_getopt(mp->mnt_optnew, "uid", (void **)&tmp,
 		    NULL) == 0) {
 			if (tmp != NULL)
 				uid = (uid_t)strtol(tmp, &ep, 10);
 			if (tmp == NULL || *ep) {
 				vfs_mount_error(mp, "Invalid uid");
 				return (EINVAL);
 			}
 		}
 		if (vfs_getopt(mp->mnt_optnew, "gid", (void **)&tmp,
 		    NULL) == 0) {
 			if (tmp != NULL)
 				gid = (gid_t)strtol(tmp, &ep, 10);
 			if (tmp == NULL || *ep) {
 				vfs_mount_error(mp, "Invalid gid");
 				return (EINVAL);
 			}
 		}
 		if (vfs_getopt(mp->mnt_optnew, "copymode", (void **)&tmp,
 		    NULL) == 0) {
 			if (tmp == NULL) {
 				vfs_mount_error(mp, "Invalid copymode");
 				return (EINVAL);
 			} else if (strcasecmp(tmp, "traditional") == 0)
 				copymode = UNIONFS_TRADITIONAL;
 			else if (strcasecmp(tmp, "transparent") == 0)
 				copymode = UNIONFS_TRANSPARENT;
 			else if (strcasecmp(tmp, "masquerade") == 0)
 				copymode = UNIONFS_MASQUERADE;
 			else {
 				vfs_mount_error(mp, "Invalid copymode");
 				return (EINVAL);
 			}
 		}
 		if (vfs_getopt(mp->mnt_optnew, "whiteout", (void **)&tmp,
 		    NULL) == 0) {
 			if (tmp == NULL) {
 				vfs_mount_error(mp, "Invalid whiteout mode");
 				return (EINVAL);
 			} else if (strcasecmp(tmp, "always") == 0)
 				whitemode = UNIONFS_WHITE_ALWAYS;
 			else if (strcasecmp(tmp, "whenneeded") == 0)
 				whitemode = UNIONFS_WHITE_WHENNEEDED;
 			else {
 				vfs_mount_error(mp, "Invalid whiteout mode");
 				return (EINVAL);
 			}
 		}
 	}
 	/* If copymode is UNIONFS_TRADITIONAL, uid/gid is mounted user. */
 	if (copymode == UNIONFS_TRADITIONAL) {
 		uid = mp->mnt_cred->cr_ruid;
 		gid = mp->mnt_cred->cr_rgid;
 	}
 
 	UNIONFSDEBUG("unionfs_mount: uid=%d, gid=%d\n", uid, gid);
 	UNIONFSDEBUG("unionfs_mount: udir=0%03o, ufile=0%03o\n", udir, ufile);
 	UNIONFSDEBUG("unionfs_mount: copymode=%d\n", copymode);
 
 	/*
 	 * Find upper node
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, target, td);
 	if ((error = namei(ndp)))
 		return (error);
 
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 
 	/* get root vnodes */
 	lowerrootvp = mp->mnt_vnodecovered;
 	upperrootvp = ndp->ni_vp;
 
 	/* create unionfs_mount */
 	ump = (struct unionfs_mount *)malloc(sizeof(struct unionfs_mount),
 	    M_UNIONFSMNT, M_WAITOK | M_ZERO);
 
 	/*
 	 * Save reference
 	 */
 	if (below) {
 		VOP_UNLOCK(upperrootvp);
 		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY);
 		ump->um_lowervp = upperrootvp;
 		ump->um_uppervp = lowerrootvp;
 	} else {
 		ump->um_lowervp = lowerrootvp;
 		ump->um_uppervp = upperrootvp;
 	}
 	ump->um_rootvp = NULLVP;
 	ump->um_uid = uid;
 	ump->um_gid = gid;
 	ump->um_udir = udir;
 	ump->um_ufile = ufile;
 	ump->um_copymode = copymode;
 	ump->um_whitemode = whitemode;
 
 	mp->mnt_data = ump;
 
 	/*
 	 * Copy upper layer's RDONLY flag.
 	 */
 	mp->mnt_flag |= ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY;
 
 	/*
 	 * Unlock the node
 	 */
 	VOP_UNLOCK(ump->um_uppervp);
 
 	/*
 	 * Get the unionfs root vnode.
 	 */
 	error = unionfs_nodeget(mp, ump->um_uppervp, ump->um_lowervp,
 	    NULLVP, &(ump->um_rootvp), NULL, td);
 	vrele(upperrootvp);
 	if (error) {
 		free(ump, M_UNIONFSMNT);
 		mp->mnt_data = NULL;
 		return (error);
 	}
 
 	MNT_ILOCK(mp);
 	if ((ump->um_lowervp->v_mount->mnt_flag & MNT_LOCAL) &&
 	    (ump->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
 		mp->mnt_flag |= MNT_LOCAL;
 	mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNIONFS;
 	MNT_IUNLOCK(mp);
 
 	/*
 	 * Get new fsid
 	 */
 	vfs_getnewfsid(mp);
 
 	snprintf(mp->mnt_stat.f_mntfromname, MNAMELEN, "<%s>:%s",
 	    below ? "below" : "above", target);
 
 	UNIONFSDEBUG("unionfs_mount: from %s, on %s\n",
 	    mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
 
 	return (0);
 }
 
 /*
  * Free reference to unionfs layer
  */
 static int
 unionfs_unmount(struct mount *mp, int mntflags)
 {
 	struct unionfs_mount *ump;
 	int		error;
 	int		num;
 	int		freeing;
 	int		flags;
 
 	UNIONFSDEBUG("unionfs_unmount: mp = %p\n", (void *)mp);
 
 	ump = MOUNTTOUNIONFSMOUNT(mp);
 	flags = 0;
 
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 
 	/* vflush (no need to call vrele) */
 	for (freeing = 0; (error = vflush(mp, 1, flags, curthread)) != 0;) {
 		num = mp->mnt_nvnodelistsize;
 		if (num == freeing)
 			break;
 		freeing = num;
 	}
 
 	if (error)
 		return (error);
 
 	free(ump, M_UNIONFSMNT);
 	mp->mnt_data = NULL;
 
 	return (0);
 }
 
 static int
 unionfs_root(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct unionfs_mount *ump;
 	struct vnode   *vp;
 
 	ump = MOUNTTOUNIONFSMOUNT(mp);
 	vp = ump->um_rootvp;
 
 	UNIONFSDEBUG("unionfs_root: rootvp=%p locked=%x\n",
 	    vp, VOP_ISLOCKED(vp));
 
 	vref(vp);
 	if (flags & LK_TYPE_MASK)
 		vn_lock(vp, flags);
 
 	*vpp = vp;
 
 	return (0);
 }
 
 static int
 unionfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg)
 {
 	struct unionfs_mount *ump;
 
 	ump = MOUNTTOUNIONFSMOUNT(mp);
 
 	/*
 	 * Writing is always performed to upper vnode.
 	 */
 	return (VFS_QUOTACTL(ump->um_uppervp->v_mount, cmd, uid, arg));
 }
 
 static int
 unionfs_statfs(struct mount *mp, struct statfs *sbp)
 {
 	struct unionfs_mount *ump;
 	int		error;
 	struct statfs	*mstat;
 	uint64_t	lbsize;
 
 	ump = MOUNTTOUNIONFSMOUNT(mp);
 
 	UNIONFSDEBUG("unionfs_statfs(mp = %p, lvp = %p, uvp = %p)\n",
 	    (void *)mp, (void *)ump->um_lowervp, (void *)ump->um_uppervp);
 
 	mstat = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK | M_ZERO);
 
 	error = VFS_STATFS(ump->um_lowervp->v_mount, mstat);
 	if (error) {
 		free(mstat, M_STATFS);
 		return (error);
 	}
 
 	/* now copy across the "interesting" information and fake the rest */
 	sbp->f_blocks = mstat->f_blocks;
 	sbp->f_files = mstat->f_files;
 
 	lbsize = mstat->f_bsize;
 
 	error = VFS_STATFS(ump->um_uppervp->v_mount, mstat);
 	if (error) {
 		free(mstat, M_STATFS);
 		return (error);
 	}
 
 
 	/*
 	 * The FS type etc is copy from upper vfs.
 	 * (write able vfs have priority)
 	 */
 	sbp->f_type = mstat->f_type;
 	sbp->f_flags = mstat->f_flags;
 	sbp->f_bsize = mstat->f_bsize;
 	sbp->f_iosize = mstat->f_iosize;
 
 	if (mstat->f_bsize != lbsize)
 		sbp->f_blocks = ((off_t)sbp->f_blocks * lbsize) /
 		    mstat->f_bsize;
 
 	sbp->f_blocks += mstat->f_blocks;
 	sbp->f_bfree = mstat->f_bfree;
 	sbp->f_bavail = mstat->f_bavail;
 	sbp->f_files += mstat->f_files;
 	sbp->f_ffree = mstat->f_ffree;
 
 	free(mstat, M_STATFS);
 	return (0);
 }
 
 static int
 unionfs_sync(struct mount *mp, int waitfor)
 {
 	/* nothing to do */
 	return (0);
 }
 
 static int
 unionfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
 {
 	return (EOPNOTSUPP);
 }
 
 static int
 unionfs_fhtovp(struct mount *mp, struct fid *fidp, int flags,
     struct vnode **vpp)
 {
 	return (EOPNOTSUPP);
 }
 
 static int
-unionfs_checkexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
-    struct ucred **credanonp, int *numsecflavors, int **secflavors)
+unionfs_checkexp(struct mount *mp, struct sockaddr *nam, uint64_t *extflagsp,
+    struct ucred **credanonp, int *numsecflavors, int *secflavors)
 {
 	return (EOPNOTSUPP);
 }
 
 static int
 unionfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
     int namespace, const char *attrname)
 {
 	struct unionfs_mount *ump;
 	struct unionfs_node *unp;
 
 	ump = MOUNTTOUNIONFSMOUNT(mp);
 	unp = VTOUNIONFS(filename_vp);
 
 	if (unp->un_uppervp != NULLVP) {
 		return (VFS_EXTATTRCTL(ump->um_uppervp->v_mount, cmd,
 		    unp->un_uppervp, namespace, attrname));
 	} else {
 		return (VFS_EXTATTRCTL(ump->um_lowervp->v_mount, cmd,
 		    unp->un_lowervp, namespace, attrname));
 	}
 }
 
 static struct vfsops unionfs_vfsops = {
 	.vfs_checkexp =		unionfs_checkexp,
 	.vfs_extattrctl =	unionfs_extattrctl,
 	.vfs_fhtovp =		unionfs_fhtovp,
 	.vfs_init =		unionfs_init,
 	.vfs_mount =		unionfs_domount,
 	.vfs_quotactl =		unionfs_quotactl,
 	.vfs_root =		unionfs_root,
 	.vfs_statfs =		unionfs_statfs,
 	.vfs_sync =		unionfs_sync,
 	.vfs_uninit =		unionfs_uninit,
 	.vfs_unmount =		unionfs_unmount,
 	.vfs_vget =		unionfs_vget,
 };
 
 VFS_SET(unionfs_vfsops, unionfs, VFCF_LOOPBACK);
Index: head/sys/kern/vfs_export.c
===================================================================
--- head/sys/kern/vfs_export.c	(revision 362157)
+++ head/sys/kern/vfs_export.c	(revision 362158)
@@ -1,542 +1,537 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/dirent.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/rmlock.h>
 #include <sys/refcount.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 
 #include <netinet/in.h>
 #include <net/radix.h>
 
 static MALLOC_DEFINE(M_NETADDR, "export_host", "Export host address structure");
 
 #if defined(INET) || defined(INET6)
 static struct radix_node_head *vfs_create_addrlist_af(
 		    struct radix_node_head **prnh, int off);
 #endif
 static void	vfs_free_addrlist(struct netexport *nep);
 static int	vfs_free_netcred(struct radix_node *rn, void *w);
 static void	vfs_free_addrlist_af(struct radix_node_head **prnh);
 static int	vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
 		    struct export_args *argp);
 static struct netcred *vfs_export_lookup(struct mount *, struct sockaddr *);
 
 /*
  * Network address lookup element
  */
 struct netcred {
 	struct	radix_node netc_rnodes[2];
-	int	netc_exflags;
+	uint64_t netc_exflags;
 	struct	ucred *netc_anon;
 	int	netc_numsecflavors;
 	int	netc_secflavors[MAXSECFLAVORS];
 };
 
 /*
  * Network export information
  */
 struct netexport {
 	struct	netcred ne_defexported;		      /* Default export */
 	struct 	radix_node_head	*ne4;
 	struct 	radix_node_head	*ne6;
 };
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
  * Called by vfs_export() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
     struct export_args *argp)
 {
 	struct netcred *np;
 	struct radix_node_head *rnh;
 	int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = NULL;
 #if defined(INET6) || defined(INET)
 	int off;
 #endif
 	int error;
 
 	KASSERT(argp->ex_numsecflavors > 0,
 	    ("%s: numsecflavors <= 0", __func__));
 	KASSERT(argp->ex_numsecflavors < MAXSECFLAVORS,
 	    ("%s: numsecflavors >= MAXSECFLAVORS", __func__));
 
 	/*
-	 * XXX: This routine converts from a `struct xucred'
-	 * (argp->ex_anon) to a `struct ucred' (np->netc_anon).  This
+	 * XXX: This routine converts from a uid plus gid list
+	 * to a `struct ucred' (np->netc_anon).  This
 	 * operation is questionable; for example, what should be done
 	 * with fields like cr_uidinfo and cr_prison?  Currently, this
 	 * routine does not touch them (leaves them as NULL).
 	 */
-	if (argp->ex_anon.cr_version != XUCRED_VERSION) {
-		vfs_mount_error(mp, "ex_anon.cr_version: %d != %d",
-		    argp->ex_anon.cr_version, XUCRED_VERSION);
-		return (EINVAL);
-	}
-
 	if (argp->ex_addrlen == 0) {
 		if (mp->mnt_flag & MNT_DEFEXPORTED) {
 			vfs_mount_error(mp,
 			    "MNT_DEFEXPORTED already set for mount %p", mp);
 			return (EPERM);
 		}
 		np = &nep->ne_defexported;
 		np->netc_exflags = argp->ex_flags;
 		np->netc_anon = crget();
-		np->netc_anon->cr_uid = argp->ex_anon.cr_uid;
-		crsetgroups(np->netc_anon, argp->ex_anon.cr_ngroups,
-		    argp->ex_anon.cr_groups);
+		np->netc_anon->cr_uid = argp->ex_uid;
+		crsetgroups(np->netc_anon, argp->ex_ngroups,
+		    argp->ex_groups);
 		np->netc_anon->cr_prison = &prison0;
 		prison_hold(np->netc_anon->cr_prison);
 		np->netc_numsecflavors = argp->ex_numsecflavors;
 		bcopy(argp->ex_secflavors, np->netc_secflavors,
 		    sizeof(np->netc_secflavors));
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_DEFEXPORTED;
 		MNT_IUNLOCK(mp);
 		return (0);
 	}
 
 #if MSIZE <= 256
 	if (argp->ex_addrlen > MLEN) {
 		vfs_mount_error(mp, "ex_addrlen %d is greater than %d",
 		    argp->ex_addrlen, MLEN);
 		return (EINVAL);
 	}
 #endif
 
 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK | M_ZERO);
 	saddr = (struct sockaddr *) (np + 1);
 	if ((error = copyin(argp->ex_addr, saddr, argp->ex_addrlen)))
 		goto out;
 	if (saddr->sa_family == AF_UNSPEC || saddr->sa_family > AF_MAX) {
 		error = EINVAL;
 		vfs_mount_error(mp, "Invalid saddr->sa_family: %d");
 		goto out;
 	}
 	if (saddr->sa_len > argp->ex_addrlen)
 		saddr->sa_len = argp->ex_addrlen;
 	if (argp->ex_masklen) {
 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
 		error = copyin(argp->ex_mask, smask, argp->ex_masklen);
 		if (error)
 			goto out;
 		if (smask->sa_len > argp->ex_masklen)
 			smask->sa_len = argp->ex_masklen;
 	}
 	rnh = NULL;
 	switch (saddr->sa_family) {
 #ifdef INET
 	case AF_INET:
 		if ((rnh = nep->ne4) == NULL) {
 			off = offsetof(struct sockaddr_in, sin_addr) << 3;
 			rnh = vfs_create_addrlist_af(&nep->ne4, off);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if ((rnh = nep->ne6) == NULL) {
 			off = offsetof(struct sockaddr_in6, sin6_addr) << 3;
 			rnh = vfs_create_addrlist_af(&nep->ne6, off);
 		}
 		break;
 #endif
 	}
 	if (rnh == NULL) {
 		error = ENOBUFS;
 		vfs_mount_error(mp, "%s %s %d",
 		    "Unable to initialize radix node head ",
 		    "for address family", saddr->sa_family);
 		goto out;
 	}
 	RADIX_NODE_HEAD_LOCK(rnh);
 	rn = (*rnh->rnh_addaddr)(saddr, smask, &rnh->rh, np->netc_rnodes);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 	if (rn == NULL || np != (struct netcred *)rn) {	/* already exists */
 		error = EPERM;
 		vfs_mount_error(mp,
 		    "netcred already exists for given addr/mask");
 		goto out;
 	}
 	np->netc_exflags = argp->ex_flags;
 	np->netc_anon = crget();
-	np->netc_anon->cr_uid = argp->ex_anon.cr_uid;
-	crsetgroups(np->netc_anon, argp->ex_anon.cr_ngroups,
-	    argp->ex_anon.cr_groups);
+	np->netc_anon->cr_uid = argp->ex_uid;
+	crsetgroups(np->netc_anon, argp->ex_ngroups,
+	    argp->ex_groups);
 	np->netc_anon->cr_prison = &prison0;
 	prison_hold(np->netc_anon->cr_prison);
 	np->netc_numsecflavors = argp->ex_numsecflavors;
 	bcopy(argp->ex_secflavors, np->netc_secflavors,
 	    sizeof(np->netc_secflavors));
 	return (0);
 out:
 	free(np, M_NETADDR);
 	return (error);
 }
 
 /* Helper for vfs_free_addrlist. */
 /* ARGSUSED */
 static int
 vfs_free_netcred(struct radix_node *rn, void *w)
 {
 	struct radix_node_head *rnh = (struct radix_node_head *) w;
 	struct ucred *cred;
 
 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, &rnh->rh);
 	cred = ((struct netcred *)rn)->netc_anon;
 	if (cred != NULL)
 		crfree(cred);
 	free(rn, M_NETADDR);
 	return (0);
 }
 
 #if defined(INET) || defined(INET6)
 static struct radix_node_head *
 vfs_create_addrlist_af(struct radix_node_head **prnh, int off)
 {
 
 	if (rn_inithead((void **)prnh, off) == 0)
 		return (NULL);
 	RADIX_NODE_HEAD_LOCK_INIT(*prnh);
 	return (*prnh);
 }
 #endif
 
 static void
 vfs_free_addrlist_af(struct radix_node_head **prnh)
 {
 	struct radix_node_head *rnh;
 
 	rnh = *prnh;
 	RADIX_NODE_HEAD_LOCK(rnh);
 	(*rnh->rnh_walktree)(&rnh->rh, vfs_free_netcred, rnh);
 	RADIX_NODE_HEAD_UNLOCK(rnh);
 	RADIX_NODE_HEAD_DESTROY(rnh);
 	rn_detachhead((void **)prnh);
 	prnh = NULL;
 }
 
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
 static void
 vfs_free_addrlist(struct netexport *nep)
 {
 	struct ucred *cred;
 
 	if (nep->ne4 != NULL)
 		vfs_free_addrlist_af(&nep->ne4);
 	if (nep->ne6 != NULL)
 		vfs_free_addrlist_af(&nep->ne6);
 
 	cred = nep->ne_defexported.netc_anon;
 	if (cred != NULL)
 		crfree(cred);
 
 }
 
 /*
  * High level function to manipulate export options on a mount point
  * and the passed in netexport.
  * Struct export_args *argp is the variable used to twiddle options,
  * the structure is described in sys/mount.h
  */
 int
 vfs_export(struct mount *mp, struct export_args *argp)
 {
 	struct netexport *nep;
 	int error;
 
 	if ((argp->ex_flags & (MNT_DELEXPORT | MNT_EXPORTED)) == 0)
 		return (EINVAL);
 
 	if ((argp->ex_flags & MNT_EXPORTED) != 0 &&
 	    (argp->ex_numsecflavors <= 0
 	    || argp->ex_numsecflavors >= MAXSECFLAVORS))
 		return (EINVAL);
 
 	error = 0;
 	lockmgr(&mp->mnt_explock, LK_EXCLUSIVE, NULL);
 	nep = mp->mnt_export;
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (nep == NULL) {
 			error = ENOENT;
 			goto out;
 		}
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_EXPUBLIC;
 			MNT_IUNLOCK(mp);
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_export = NULL;
 		free(nep, M_MOUNT);
 		nep = NULL;
 		MNT_ILOCK(mp);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 		MNT_IUNLOCK(mp);
 	}
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (nep == NULL) {
 			nep = malloc(sizeof(struct netexport), M_MOUNT, M_WAITOK | M_ZERO);
 			mp->mnt_export = nep;
 		}
 		if (argp->ex_flags & MNT_EXPUBLIC) {
 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 				goto out;
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 			MNT_IUNLOCK(mp);
 		}
 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
 			goto out;
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_EXPORTED;
 		MNT_IUNLOCK(mp);
 	}
 
 out:
 	lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
 	/*
 	 * Once we have executed the vfs_export() command, we do
 	 * not want to keep the "export" option around in the
 	 * options list, since that will cause subsequent MNT_UPDATE
 	 * calls to fail.  The export information is saved in
 	 * mp->mnt_export, so we can safely delete the "export" mount option
 	 * here.
 	 */
 	vfs_deleteopt(mp->mnt_optnew, "export");
 	vfs_deleteopt(mp->mnt_opt, "export");
 	return (error);
 }
 
 /*
  * Set the publicly exported filesystem (WebNFS). Currently, only
  * one public filesystem is possible in the spec (RFC 2054 and 2055)
  */
 int
 vfs_setpublicfs(struct mount *mp, struct netexport *nep,
     struct export_args *argp)
 {
 	int error;
 	struct vnode *rvp;
 	char *cp;
 
 	/*
 	 * mp == NULL -> invalidate the current info, the FS is
 	 * no longer exported. May be called from either vfs_export
 	 * or unmount, so check if it hasn't already been done.
 	 */
 	if (mp == NULL) {
 		if (nfs_pub.np_valid) {
 			nfs_pub.np_valid = 0;
 			if (nfs_pub.np_index != NULL) {
 				free(nfs_pub.np_index, M_TEMP);
 				nfs_pub.np_index = NULL;
 			}
 		}
 		return (0);
 	}
 
 	/*
 	 * Only one allowed at a time.
 	 */
 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 		return (EBUSY);
 
 	/*
 	 * Get real filehandle for root of exported FS.
 	 */
 	bzero(&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 
 	if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rvp)))
 		return (error);
 
 	if ((error = VOP_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 		return (error);
 
 	vput(rvp);
 
 	/*
 	 * If an indexfile was specified, pull it in.
 	 */
 	if (argp->ex_indexfile != NULL) {
 		if (nfs_pub.np_index == NULL)
 			nfs_pub.np_index = malloc(MAXNAMLEN + 1, M_TEMP,
 			    M_WAITOK);
 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 		    MAXNAMLEN, (size_t *)0);
 		if (!error) {
 			/*
 			 * Check for illegal filenames.
 			 */
 			for (cp = nfs_pub.np_index; *cp; cp++) {
 				if (*cp == '/') {
 					error = EINVAL;
 					break;
 				}
 			}
 		}
 		if (error) {
 			free(nfs_pub.np_index, M_TEMP);
 			nfs_pub.np_index = NULL;
 			return (error);
 		}
 	}
 
 	nfs_pub.np_mount = mp;
 	nfs_pub.np_valid = 1;
 	return (0);
 }
 
 /*
  * Used by the filesystems to determine if a given network address
  * (passed in 'nam') is present in their exports list, returns a pointer
  * to struct netcred so that the filesystem can examine it for
  * access rights (read/write/etc).
  */
 static struct netcred *
 vfs_export_lookup(struct mount *mp, struct sockaddr *nam)
 {
 	RADIX_NODE_HEAD_RLOCK_TRACKER;
 	struct netexport *nep;
 	struct netcred *np = NULL;
 	struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	nep = mp->mnt_export;
 	if (nep == NULL)
 		return (NULL);
 	if ((mp->mnt_flag & MNT_EXPORTED) == 0)
 		return (NULL);
 
 	/*
 	 * Lookup in the export list
 	 */
 	if (nam != NULL) {
 		saddr = nam;
 		rnh = NULL;
 		switch (saddr->sa_family) {
 		case AF_INET:
 			rnh = nep->ne4;
 			break;
 		case AF_INET6:
 			rnh = nep->ne6;
 			break;
 		}
 		if (rnh != NULL) {
 			RADIX_NODE_HEAD_RLOCK(rnh);
 			np = (struct netcred *) (*rnh->rnh_matchaddr)(saddr, &rnh->rh);
 			RADIX_NODE_HEAD_RUNLOCK(rnh);
 			if (np != NULL && (np->netc_rnodes->rn_flags & RNF_ROOT) != 0)
 				return (NULL);
 		}
 	}
 
 	/*
 	 * If no address match, use the default if it exists.
 	 */
 	if (np == NULL && (mp->mnt_flag & MNT_DEFEXPORTED) != 0)
 		return (&nep->ne_defexported);
 
 	return (np);
 }
 
 /*
  * XXX: This comment comes from the deprecated ufs_check_export()
  * XXX: and may not entirely apply, but lacking something better:
  * This is the generic part of fhtovp called after the underlying
  * filesystem has validated the file handle.
  *
  * Verify that a host should have access to a filesystem.
  */
 
 int 
-vfs_stdcheckexp(struct mount *mp, struct sockaddr *nam, int *extflagsp,
-    struct ucred **credanonp, int *numsecflavors, int **secflavors)
+vfs_stdcheckexp(struct mount *mp, struct sockaddr *nam, uint64_t *extflagsp,
+    struct ucred **credanonp, int *numsecflavors, int *secflavors)
 {
 	struct netcred *np;
 
 	lockmgr(&mp->mnt_explock, LK_SHARED, NULL);
 	np = vfs_export_lookup(mp, nam);
 	if (np == NULL) {
 		lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
 		*credanonp = NULL;
 		return (EACCES);
 	}
 	*extflagsp = np->netc_exflags;
 	if ((*credanonp = np->netc_anon) != NULL)
 		crhold(*credanonp);
 	if (numsecflavors) {
 		*numsecflavors = np->netc_numsecflavors;
 		KASSERT(*numsecflavors > 0,
 		    ("%s: numsecflavors <= 0", __func__));
 		KASSERT(*numsecflavors < MAXSECFLAVORS,
 		    ("%s: numsecflavors >= MAXSECFLAVORS", __func__));
 	}
-	if (secflavors)
-		*secflavors = np->netc_secflavors;
+	if (secflavors && np->netc_numsecflavors > 0)
+		memcpy(secflavors, np->netc_secflavors, np->netc_numsecflavors *
+		    sizeof(int));
 	lockmgr(&mp->mnt_explock, LK_RELEASE, NULL);
 	return (0);
 }
 
Index: head/sys/kern/vfs_init.c
===================================================================
--- head/sys/kern/vfs_init.c	(revision 362157)
+++ head/sys/kern/vfs_init.c	(revision 362158)
@@ -1,593 +1,593 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed
  * to Berkeley by John Heidemann of the UCLA Ficus project.
  *
  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_init.c	8.3 (Berkeley) 1/4/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fnv_hash.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/linker.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 
 static int	vfs_register(struct vfsconf *);
 static int	vfs_unregister(struct vfsconf *);
 
 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
 
 /*
  * The highest defined VFS number.
  */
 int maxvfsconf = VFS_GENERIC + 1;
 
 /*
  * Single-linked list of configured VFSes.
  * New entries are added/deleted by vfs_register()/vfs_unregister()
  */
 struct vfsconfhead vfsconf = TAILQ_HEAD_INITIALIZER(vfsconf);
 struct sx vfsconf_sx;
 SX_SYSINIT(vfsconf, &vfsconf_sx, "vfsconf");
 
 /*
  * Loader.conf variable vfs.typenumhash enables setting vfc_typenum using a hash
  * calculation on vfc_name, so that it doesn't change when file systems are
  * loaded in a different order. This will avoid the NFS server file handles from
  * changing for file systems that use vfc_typenum in their fsid.
  */
 static int	vfs_typenumhash = 1;
 SYSCTL_INT(_vfs, OID_AUTO, typenumhash, CTLFLAG_RDTUN, &vfs_typenumhash, 0,
     "Set vfc_typenum using a hash calculation on vfc_name, so that it does not"
     "change when file systems are loaded in a different order.");
 
 /*
  * A Zen vnode attribute structure.
  *
  * Initialized when the first filesystem registers by vfs_register().
  */
 struct vattr va_null;
 
 /*
  * vfs_init.c
  *
  * Allocate and fill in operations vectors.
  *
  * An undocumented feature of this approach to defining operations is that
  * there can be multiple entries in vfs_opv_descs for the same operations
  * vector. This allows third parties to extend the set of operations
  * supported by another layer in a binary compatibile way. For example,
  * assume that NFS needed to be modified to support Ficus. NFS has an entry
  * (probably nfs_vnopdeop_decls) declaring all the operations NFS supports by
  * default. Ficus could add another entry (ficus_nfs_vnodeop_decl_entensions)
  * listing those new operations Ficus adds to NFS, all without modifying the
  * NFS code. (Of couse, the OTW NFS protocol still needs to be munged, but
  * that is a(whole)nother story.) This is a feature.
  */
 
 /*
  * Routines having to do with the management of the vnode table.
  */
 
 static struct vfsconf *
 vfs_byname_locked(const char *name)
 {
 	struct vfsconf *vfsp;
 
 	sx_assert(&vfsconf_sx, SA_LOCKED);
 	if (!strcmp(name, "ffs"))
 		name = "ufs";
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 		if (!strcmp(name, vfsp->vfc_name))
 			return (vfsp);
 	}
 	return (NULL);
 }
 
 struct vfsconf *
 vfs_byname(const char *name)
 {
 	struct vfsconf *vfsp;
 
 	vfsconf_slock();
 	vfsp = vfs_byname_locked(name);
 	vfsconf_sunlock();
 	return (vfsp);
 }
 
 struct vfsconf *
 vfs_byname_kld(const char *fstype, struct thread *td, int *error)
 {
 	struct vfsconf *vfsp;
 	int fileid, loaded;
 
 	vfsp = vfs_byname(fstype);
 	if (vfsp != NULL)
 		return (vfsp);
 
 	/* Try to load the respective module. */
 	*error = kern_kldload(td, fstype, &fileid);
 	loaded = (*error == 0);
 	if (*error == EEXIST)
 		*error = 0;
 	if (*error)
 		return (NULL);
 
 	/* Look up again to see if the VFS was loaded. */
 	vfsp = vfs_byname(fstype);
 	if (vfsp == NULL) {
 		if (loaded)
 			(void)kern_kldunload(td, fileid, LINKER_UNLOAD_FORCE);
 		*error = ENODEV;
 		return (NULL);
 	}
 	return (vfsp);
 }
 
 static int
 vfs_mount_sigdefer(struct mount *mp)
 {
 	int prev_stops, rc;
 
 	TSRAW(curthread, TS_ENTER, "VFS_MOUNT", mp->mnt_vfc->vfc_name);
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_mount)(mp);
 	sigallowstop(prev_stops);
 	TSRAW(curthread, TS_EXIT, "VFS_MOUNT", mp->mnt_vfc->vfc_name);
 	return (rc);
 }
 
 static int
 vfs_unmount_sigdefer(struct mount *mp, int mntflags)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_unmount)(mp, mntflags);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_root_sigdefer(struct mount *mp, int flags, struct vnode **vpp)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_root)(mp, flags, vpp);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_cachedroot_sigdefer(struct mount *mp, int flags, struct vnode **vpp)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_cachedroot)(mp, flags, vpp);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_quotactl_sigdefer(struct mount *mp, int cmd, uid_t uid, void *arg)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_quotactl)(mp, cmd, uid, arg);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_statfs_sigdefer(struct mount *mp, struct statfs *sbp)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_statfs)(mp, sbp);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_sync_sigdefer(struct mount *mp, int waitfor)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_sync)(mp, waitfor);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_vget_sigdefer(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_vget)(mp, ino, flags, vpp);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_fhtovp_sigdefer(struct mount *mp, struct fid *fidp, int flags,
     struct vnode **vpp)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_fhtovp)(mp, fidp, flags, vpp);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
-vfs_checkexp_sigdefer(struct mount *mp, struct sockaddr *nam, int *exflg,
-    struct ucred **credp, int *numsecflavors, int **secflavors)
+vfs_checkexp_sigdefer(struct mount *mp, struct sockaddr *nam, uint64_t *exflg,
+    struct ucred **credp, int *numsecflavors, int *secflavors)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_checkexp)(mp, nam, exflg, credp,
 	    numsecflavors, secflavors);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_extattrctl_sigdefer(struct mount *mp, int cmd, struct vnode *filename_vp,
     int attrnamespace, const char *attrname)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_extattrctl)(mp, cmd,
 	    filename_vp, attrnamespace, attrname);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static int
 vfs_sysctl_sigdefer(struct mount *mp, fsctlop_t op, struct sysctl_req *req)
 {
 	int prev_stops, rc;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_sysctl)(mp, op, req);
 	sigallowstop(prev_stops);
 	return (rc);
 }
 
 static void
 vfs_susp_clean_sigdefer(struct mount *mp)
 {
 	int prev_stops;
 
 	if (*mp->mnt_vfc->vfc_vfsops_sd->vfs_susp_clean == NULL)
 		return;
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	(*mp->mnt_vfc->vfc_vfsops_sd->vfs_susp_clean)(mp);
 	sigallowstop(prev_stops);
 }
 
 static void
 vfs_reclaim_lowervp_sigdefer(struct mount *mp, struct vnode *vp)
 {
 	int prev_stops;
 
 	if (*mp->mnt_vfc->vfc_vfsops_sd->vfs_reclaim_lowervp == NULL)
 		return;
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	(*mp->mnt_vfc->vfc_vfsops_sd->vfs_reclaim_lowervp)(mp, vp);
 	sigallowstop(prev_stops);
 }
 
 static void
 vfs_unlink_lowervp_sigdefer(struct mount *mp, struct vnode *vp)
 {
 	int prev_stops;
 
 	if (*mp->mnt_vfc->vfc_vfsops_sd->vfs_unlink_lowervp == NULL)
 		return;
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	(*(mp)->mnt_vfc->vfc_vfsops_sd->vfs_unlink_lowervp)(mp, vp);
 	sigallowstop(prev_stops);
 }
 
 static void
 vfs_purge_sigdefer(struct mount *mp)
 {
 	int prev_stops;
 
 	prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT);
 	(*mp->mnt_vfc->vfc_vfsops_sd->vfs_purge)(mp);
 	sigallowstop(prev_stops);
 }
 
 static struct vfsops vfsops_sigdefer = {
 	.vfs_mount =		vfs_mount_sigdefer,
 	.vfs_unmount =		vfs_unmount_sigdefer,
 	.vfs_root =		vfs_root_sigdefer,
 	.vfs_cachedroot =	vfs_cachedroot_sigdefer,
 	.vfs_quotactl =		vfs_quotactl_sigdefer,
 	.vfs_statfs =		vfs_statfs_sigdefer,
 	.vfs_sync =		vfs_sync_sigdefer,
 	.vfs_vget =		vfs_vget_sigdefer,
 	.vfs_fhtovp =		vfs_fhtovp_sigdefer,
 	.vfs_checkexp =		vfs_checkexp_sigdefer,
 	.vfs_extattrctl =	vfs_extattrctl_sigdefer,
 	.vfs_sysctl =		vfs_sysctl_sigdefer,
 	.vfs_susp_clean =	vfs_susp_clean_sigdefer,
 	.vfs_reclaim_lowervp =	vfs_reclaim_lowervp_sigdefer,
 	.vfs_unlink_lowervp =	vfs_unlink_lowervp_sigdefer,
 	.vfs_purge =		vfs_purge_sigdefer,
 
 };
 
 /* Register a new filesystem type in the global table */
 static int
 vfs_register(struct vfsconf *vfc)
 {
 	struct sysctl_oid *oidp;
 	struct vfsops *vfsops;
 	static int once;
 	struct vfsconf *tvfc;
 	uint32_t hashval;
 	int secondpass;
 
 	if (!once) {
 		vattr_null(&va_null);
 		once = 1;
 	}
 	
 	if (vfc->vfc_version != VFS_VERSION) {
 		printf("ERROR: filesystem %s, unsupported ABI version %x\n",
 		    vfc->vfc_name, vfc->vfc_version);
 		return (EINVAL);
 	}
 	vfsconf_lock();
 	if (vfs_byname_locked(vfc->vfc_name) != NULL) {
 		vfsconf_unlock();
 		return (EEXIST);
 	}
 
 	if (vfs_typenumhash != 0) {
 		/*
 		 * Calculate a hash on vfc_name to use for vfc_typenum. Unless
 		 * all of 1<->255 are assigned, it is limited to 8bits since
 		 * that is what ZFS uses from vfc_typenum and is also the
 		 * preferred range for vfs_getnewfsid().
 		 */
 		hashval = fnv_32_str(vfc->vfc_name, FNV1_32_INIT);
 		hashval &= 0xff;
 		secondpass = 0;
 		do {
 			/* Look for and fix any collision. */
 			TAILQ_FOREACH(tvfc, &vfsconf, vfc_list) {
 				if (hashval == tvfc->vfc_typenum) {
 					if (hashval == 255 && secondpass == 0) {
 						hashval = 1;
 						secondpass = 1;
 					} else
 						hashval++;
 					break;
 				}
 			}
 		} while (tvfc != NULL);
 		vfc->vfc_typenum = hashval;
 		if (vfc->vfc_typenum >= maxvfsconf)
 			maxvfsconf = vfc->vfc_typenum + 1;
 	} else
 		vfc->vfc_typenum = maxvfsconf++;
 	TAILQ_INSERT_TAIL(&vfsconf, vfc, vfc_list);
 
 	/*
 	 * Initialise unused ``struct vfsops'' fields, to use
 	 * the vfs_std*() functions.  Note, we need the mount
 	 * and unmount operations, at the least.  The check
 	 * for vfsops available is just a debugging aid.
 	 */
 	KASSERT(vfc->vfc_vfsops != NULL,
 	    ("Filesystem %s has no vfsops", vfc->vfc_name));
 	/*
 	 * Check the mount and unmount operations.
 	 */
 	vfsops = vfc->vfc_vfsops;
 	KASSERT(vfsops->vfs_mount != NULL,
 	    ("Filesystem %s has no mount op", vfc->vfc_name));
 	KASSERT(vfsops->vfs_unmount != NULL,
 	    ("Filesystem %s has no unmount op", vfc->vfc_name));
 
 	if (vfsops->vfs_root == NULL)
 		/* return file system's root vnode */
 		vfsops->vfs_root =	vfs_stdroot;
 	if (vfsops->vfs_quotactl == NULL)
 		/* quota control */
 		vfsops->vfs_quotactl =	vfs_stdquotactl;
 	if (vfsops->vfs_statfs == NULL)
 		/* return file system's status */
 		vfsops->vfs_statfs =	vfs_stdstatfs;
 	if (vfsops->vfs_sync == NULL)
 		/*
 		 * flush unwritten data (nosync)
 		 * file systems can use vfs_stdsync
 		 * explicitly by setting it in the
 		 * vfsop vector.
 		 */
 		vfsops->vfs_sync =	vfs_stdnosync;
 	if (vfsops->vfs_vget == NULL)
 		/* convert an inode number to a vnode */
 		vfsops->vfs_vget =	vfs_stdvget;
 	if (vfsops->vfs_fhtovp == NULL)
 		/* turn an NFS file handle into a vnode */
 		vfsops->vfs_fhtovp =	vfs_stdfhtovp;
 	if (vfsops->vfs_checkexp == NULL)
 		/* check if file system is exported */
 		vfsops->vfs_checkexp =	vfs_stdcheckexp;
 	if (vfsops->vfs_init == NULL)
 		/* file system specific initialisation */
 		vfsops->vfs_init =	vfs_stdinit;
 	if (vfsops->vfs_uninit == NULL)
 		/* file system specific uninitialisation */
 		vfsops->vfs_uninit =	vfs_stduninit;
 	if (vfsops->vfs_extattrctl == NULL)
 		/* extended attribute control */
 		vfsops->vfs_extattrctl = vfs_stdextattrctl;
 	if (vfsops->vfs_sysctl == NULL)
 		vfsops->vfs_sysctl = vfs_stdsysctl;
 
 	if ((vfc->vfc_flags & VFCF_SBDRY) != 0) {
 		vfc->vfc_vfsops_sd = vfc->vfc_vfsops;
 		vfc->vfc_vfsops = &vfsops_sigdefer;
 	}
 
 	if (vfc->vfc_flags & VFCF_JAIL)
 		prison_add_vfs(vfc);
 
 	/*
 	 * Call init function for this VFS...
 	 */
 	if ((vfc->vfc_flags & VFCF_SBDRY) != 0)
 		vfc->vfc_vfsops_sd->vfs_init(vfc);
 	else
 		vfc->vfc_vfsops->vfs_init(vfc);
 	vfsconf_unlock();
 
 	/*
 	 * If this filesystem has a sysctl node under vfs
 	 * (i.e. vfs.xxfs), then change the oid number of that node to
 	 * match the filesystem's type number.  This allows user code
 	 * which uses the type number to read sysctl variables defined
 	 * by the filesystem to continue working. Since the oids are
 	 * in a sorted list, we need to make sure the order is
 	 * preserved by re-registering the oid after modifying its
 	 * number.
 	 */
 	sysctl_wlock();
 	SLIST_FOREACH(oidp, SYSCTL_CHILDREN(&sysctl___vfs), oid_link) {
 		if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) {
 			sysctl_unregister_oid(oidp);
 			oidp->oid_number = vfc->vfc_typenum;
 			sysctl_register_oid(oidp);
 			break;
 		}
 	}
 	sysctl_wunlock();
 
 	return (0);
 }
 
 /* Remove registration of a filesystem type */
 static int
 vfs_unregister(struct vfsconf *vfc)
 {
 	struct vfsconf *vfsp;
 	int error, maxtypenum;
 
 	vfsconf_lock();
 	vfsp = vfs_byname_locked(vfc->vfc_name);
 	if (vfsp == NULL) {
 		vfsconf_unlock();
 		return (EINVAL);
 	}
 	if (vfsp->vfc_refcount != 0) {
 		vfsconf_unlock();
 		return (EBUSY);
 	}
 	error = 0;
 	if ((vfc->vfc_flags & VFCF_SBDRY) != 0) {
 		if (vfc->vfc_vfsops_sd->vfs_uninit != NULL)
 			error = vfc->vfc_vfsops_sd->vfs_uninit(vfsp);
 	} else {
 		if (vfc->vfc_vfsops->vfs_uninit != NULL) {
 			error = vfc->vfc_vfsops->vfs_uninit(vfsp);
 	}
 	if (error != 0) {
 		vfsconf_unlock();
 		return (error);
 	}
 	}
 	TAILQ_REMOVE(&vfsconf, vfsp, vfc_list);
 	maxtypenum = VFS_GENERIC;
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list)
 		if (maxtypenum < vfsp->vfc_typenum)
 			maxtypenum = vfsp->vfc_typenum;
 	maxvfsconf = maxtypenum + 1;
 	vfsconf_unlock();
 	return (0);
 }
 
 /*
  * Standard kernel module handling code for filesystem modules.
  * Referenced from VFS_SET().
  */
 int
 vfs_modevent(module_t mod, int type, void *data)
 {
 	struct vfsconf *vfc;
 	int error = 0;
 
 	vfc = (struct vfsconf *)data;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (vfc)
 			error = vfs_register(vfc);
 		break;
 
 	case MOD_UNLOAD:
 		if (vfc)
 			error = vfs_unregister(vfc);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
Index: head/sys/kern/vfs_mount.c
===================================================================
--- head/sys/kern/vfs_mount.c	(revision 362157)
+++ head/sys/kern/vfs_mount.c	(revision 362158)
@@ -1,2367 +1,2404 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1999-2004 Poul-Henning Kamp
  * Copyright (c) 1999 Michael Smith
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/smp.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/libkern.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/filedesc.h>
 #include <sys/reboot.h>
 #include <sys/sbuf.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 #include <vm/uma.h>
 
 #include <geom/geom.h>
 
 #include <machine/stdarg.h>
 
 #include <rpc/types.h>
 #include <rpc/auth.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #define	VFS_MOUNTARG_SIZE_MAX	(1024 * 64)
 
 static int	vfs_domount(struct thread *td, const char *fstype, char *fspath,
 		    uint64_t fsflags, struct vfsoptlist **optlist);
 static void	free_mntarg(struct mntarg *ma);
 
 static int	usermount = 0;
 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0,
     "Unprivileged users may mount and unmount file systems");
 
 static bool	default_autoro = false;
 SYSCTL_BOOL(_vfs, OID_AUTO, default_autoro, CTLFLAG_RW, &default_autoro, 0,
     "Retry failed r/w mount as r/o if no explicit ro/rw option is specified");
 
 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
 MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure");
 static uma_zone_t mount_zone;
 
 /* List of mounted filesystems. */
 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
 
 /* For any iteration/modification of mountlist */
 struct mtx mountlist_mtx;
 MTX_SYSINIT(mountlist, &mountlist_mtx, "mountlist", MTX_DEF);
 
 EVENTHANDLER_LIST_DEFINE(vfs_mounted);
 EVENTHANDLER_LIST_DEFINE(vfs_unmounted);
 
 /*
  * Global opts, taken by all filesystems
  */
 static const char *global_opts[] = {
 	"errmsg",
 	"fstype",
 	"fspath",
 	"ro",
 	"rw",
 	"nosuid",
 	"noexec",
 	NULL
 };
 
 static int
 mount_init(void *mem, int size, int flags)
 {
 	struct mount *mp;
 
 	mp = (struct mount *)mem;
 	mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
 	mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
 	lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
 	mp->mnt_thread_in_ops_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
 	    M_WAITOK | M_ZERO);
 	mp->mnt_ref_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
 	    M_WAITOK | M_ZERO);
 	mp->mnt_lockref_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
 	    M_WAITOK | M_ZERO);
 	mp->mnt_writeopcount_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
 	    M_WAITOK | M_ZERO);
 	mp->mnt_ref = 0;
 	mp->mnt_vfs_ops = 1;
 	mp->mnt_rootvnode = NULL;
 	return (0);
 }
 
 static void
 mount_fini(void *mem, int size)
 {
 	struct mount *mp;
 
 	mp = (struct mount *)mem;
 	uma_zfree_pcpu(pcpu_zone_int, mp->mnt_writeopcount_pcpu);
 	uma_zfree_pcpu(pcpu_zone_int, mp->mnt_lockref_pcpu);
 	uma_zfree_pcpu(pcpu_zone_int, mp->mnt_ref_pcpu);
 	uma_zfree_pcpu(pcpu_zone_int, mp->mnt_thread_in_ops_pcpu);
 	lockdestroy(&mp->mnt_explock);
 	mtx_destroy(&mp->mnt_listmtx);
 	mtx_destroy(&mp->mnt_mtx);
 }
 
 static void
 vfs_mount_init(void *dummy __unused)
 {
 
 	mount_zone = uma_zcreate("Mountpoints", sizeof(struct mount), NULL,
 	    NULL, mount_init, mount_fini, UMA_ALIGN_CACHE, UMA_ZONE_NOFREE);
 }
 SYSINIT(vfs_mount, SI_SUB_VFS, SI_ORDER_ANY, vfs_mount_init, NULL);
 
 /*
  * ---------------------------------------------------------------------
  * Functions for building and sanitizing the mount options
  */
 
 /* Remove one mount option. */
 static void
 vfs_freeopt(struct vfsoptlist *opts, struct vfsopt *opt)
 {
 
 	TAILQ_REMOVE(opts, opt, link);
 	free(opt->name, M_MOUNT);
 	if (opt->value != NULL)
 		free(opt->value, M_MOUNT);
 	free(opt, M_MOUNT);
 }
 
 /* Release all resources related to the mount options. */
 void
 vfs_freeopts(struct vfsoptlist *opts)
 {
 	struct vfsopt *opt;
 
 	while (!TAILQ_EMPTY(opts)) {
 		opt = TAILQ_FIRST(opts);
 		vfs_freeopt(opts, opt);
 	}
 	free(opts, M_MOUNT);
 }
 
 void
 vfs_deleteopt(struct vfsoptlist *opts, const char *name)
 {
 	struct vfsopt *opt, *temp;
 
 	if (opts == NULL)
 		return;
 	TAILQ_FOREACH_SAFE(opt, opts, link, temp)  {
 		if (strcmp(opt->name, name) == 0)
 			vfs_freeopt(opts, opt);
 	}
 }
 
 static int
 vfs_isopt_ro(const char *opt)
 {
 
 	if (strcmp(opt, "ro") == 0 || strcmp(opt, "rdonly") == 0 ||
 	    strcmp(opt, "norw") == 0)
 		return (1);
 	return (0);
 }
 
 static int
 vfs_isopt_rw(const char *opt)
 {
 
 	if (strcmp(opt, "rw") == 0 || strcmp(opt, "noro") == 0)
 		return (1);
 	return (0);
 }
 
 /*
  * Check if options are equal (with or without the "no" prefix).
  */
 static int
 vfs_equalopts(const char *opt1, const char *opt2)
 {
 	char *p;
 
 	/* "opt" vs. "opt" or "noopt" vs. "noopt" */
 	if (strcmp(opt1, opt2) == 0)
 		return (1);
 	/* "noopt" vs. "opt" */
 	if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
 		return (1);
 	/* "opt" vs. "noopt" */
 	if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
 		return (1);
 	while ((p = strchr(opt1, '.')) != NULL &&
 	    !strncmp(opt1, opt2, ++p - opt1)) {
 		opt2 += p - opt1;
 		opt1 = p;
 		/* "foo.noopt" vs. "foo.opt" */
 		if (strncmp(opt1, "no", 2) == 0 && strcmp(opt1 + 2, opt2) == 0)
 			return (1);
 		/* "foo.opt" vs. "foo.noopt" */
 		if (strncmp(opt2, "no", 2) == 0 && strcmp(opt1, opt2 + 2) == 0)
 			return (1);
 	}
 	/* "ro" / "rdonly" / "norw" / "rw" / "noro" */
 	if ((vfs_isopt_ro(opt1) || vfs_isopt_rw(opt1)) &&
 	    (vfs_isopt_ro(opt2) || vfs_isopt_rw(opt2)))
 		return (1);
 	return (0);
 }
 
 /*
  * If a mount option is specified several times,
  * (with or without the "no" prefix) only keep
  * the last occurrence of it.
  */
 static void
 vfs_sanitizeopts(struct vfsoptlist *opts)
 {
 	struct vfsopt *opt, *opt2, *tmp;
 
 	TAILQ_FOREACH_REVERSE(opt, opts, vfsoptlist, link) {
 		opt2 = TAILQ_PREV(opt, vfsoptlist, link);
 		while (opt2 != NULL) {
 			if (vfs_equalopts(opt->name, opt2->name)) {
 				tmp = TAILQ_PREV(opt2, vfsoptlist, link);
 				vfs_freeopt(opts, opt2);
 				opt2 = tmp;
 			} else {
 				opt2 = TAILQ_PREV(opt2, vfsoptlist, link);
 			}
 		}
 	}
 }
 
 /*
  * Build a linked list of mount options from a struct uio.
  */
 int
 vfs_buildopts(struct uio *auio, struct vfsoptlist **options)
 {
 	struct vfsoptlist *opts;
 	struct vfsopt *opt;
 	size_t memused, namelen, optlen;
 	unsigned int i, iovcnt;
 	int error;
 
 	opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 	TAILQ_INIT(opts);
 	memused = 0;
 	iovcnt = auio->uio_iovcnt;
 	for (i = 0; i < iovcnt; i += 2) {
 		namelen = auio->uio_iov[i].iov_len;
 		optlen = auio->uio_iov[i + 1].iov_len;
 		memused += sizeof(struct vfsopt) + optlen + namelen;
 		/*
 		 * Avoid consuming too much memory, and attempts to overflow
 		 * memused.
 		 */
 		if (memused > VFS_MOUNTARG_SIZE_MAX ||
 		    optlen > VFS_MOUNTARG_SIZE_MAX ||
 		    namelen > VFS_MOUNTARG_SIZE_MAX) {
 			error = EINVAL;
 			goto bad;
 		}
 
 		opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
 		opt->name = malloc(namelen, M_MOUNT, M_WAITOK);
 		opt->value = NULL;
 		opt->len = 0;
 		opt->pos = i / 2;
 		opt->seen = 0;
 
 		/*
 		 * Do this early, so jumps to "bad" will free the current
 		 * option.
 		 */
 		TAILQ_INSERT_TAIL(opts, opt, link);
 
 		if (auio->uio_segflg == UIO_SYSSPACE) {
 			bcopy(auio->uio_iov[i].iov_base, opt->name, namelen);
 		} else {
 			error = copyin(auio->uio_iov[i].iov_base, opt->name,
 			    namelen);
 			if (error)
 				goto bad;
 		}
 		/* Ensure names are null-terminated strings. */
 		if (namelen == 0 || opt->name[namelen - 1] != '\0') {
 			error = EINVAL;
 			goto bad;
 		}
 		if (optlen != 0) {
 			opt->len = optlen;
 			opt->value = malloc(optlen, M_MOUNT, M_WAITOK);
 			if (auio->uio_segflg == UIO_SYSSPACE) {
 				bcopy(auio->uio_iov[i + 1].iov_base, opt->value,
 				    optlen);
 			} else {
 				error = copyin(auio->uio_iov[i + 1].iov_base,
 				    opt->value, optlen);
 				if (error)
 					goto bad;
 			}
 		}
 	}
 	vfs_sanitizeopts(opts);
 	*options = opts;
 	return (0);
 bad:
 	vfs_freeopts(opts);
 	return (error);
 }
 
 /*
  * Merge the old mount options with the new ones passed
  * in the MNT_UPDATE case.
  *
  * XXX: This function will keep a "nofoo" option in the new
  * options.  E.g, if the option's canonical name is "foo",
  * "nofoo" ends up in the mount point's active options.
  */
 static void
 vfs_mergeopts(struct vfsoptlist *toopts, struct vfsoptlist *oldopts)
 {
 	struct vfsopt *opt, *new;
 
 	TAILQ_FOREACH(opt, oldopts, link) {
 		new = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK);
 		new->name = strdup(opt->name, M_MOUNT);
 		if (opt->len != 0) {
 			new->value = malloc(opt->len, M_MOUNT, M_WAITOK);
 			bcopy(opt->value, new->value, opt->len);
 		} else
 			new->value = NULL;
 		new->len = opt->len;
 		new->seen = opt->seen;
 		TAILQ_INSERT_HEAD(toopts, new, link);
 	}
 	vfs_sanitizeopts(toopts);
 }
 
 /*
  * Mount a filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct nmount_args {
 	struct iovec *iovp;
 	unsigned int iovcnt;
 	int flags;
 };
 #endif
 int
 sys_nmount(struct thread *td, struct nmount_args *uap)
 {
 	struct uio *auio;
 	int error;
 	u_int iovcnt;
 	uint64_t flags;
 
 	/*
 	 * Mount flags are now 64-bits. On 32-bit archtectures only
 	 * 32-bits are passed in, but from here on everything handles
 	 * 64-bit flags correctly.
 	 */
 	flags = uap->flags;
 
 	AUDIT_ARG_FFLAGS(flags);
 	CTR4(KTR_VFS, "%s: iovp %p with iovcnt %d and flags %d", __func__,
 	    uap->iovp, uap->iovcnt, flags);
 
 	/*
 	 * Filter out MNT_ROOTFS.  We do not want clients of nmount() in
 	 * userspace to set this flag, but we must filter it out if we want
 	 * MNT_UPDATE on the root file system to work.
 	 * MNT_ROOTFS should only be set by the kernel when mounting its
 	 * root file system.
 	 */
 	flags &= ~MNT_ROOTFS;
 
 	iovcnt = uap->iovcnt;
 	/*
 	 * Check that we have an even number of iovec's
 	 * and that we have at least two options.
 	 */
 	if ((iovcnt & 1) || (iovcnt < 4)) {
 		CTR2(KTR_VFS, "%s: failed for invalid iovcnt %d", __func__,
 		    uap->iovcnt);
 		return (EINVAL);
 	}
 
 	error = copyinuio(uap->iovp, iovcnt, &auio);
 	if (error) {
 		CTR2(KTR_VFS, "%s: failed for invalid uio op with %d errno",
 		    __func__, error);
 		return (error);
 	}
 	error = vfs_donmount(td, flags, auio);
 
 	free(auio, M_IOV);
 	return (error);
 }
 
 /*
  * ---------------------------------------------------------------------
  * Various utility functions
  */
 
 void
 vfs_ref(struct mount *mp)
 {
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	if (vfs_op_thread_enter(mp)) {
 		vfs_mp_count_add_pcpu(mp, ref, 1);
 		vfs_op_thread_exit(mp);
 		return;
 	}
 
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 	MNT_IUNLOCK(mp);
 }
 
 void
 vfs_rel(struct mount *mp)
 {
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	if (vfs_op_thread_enter(mp)) {
 		vfs_mp_count_sub_pcpu(mp, ref, 1);
 		vfs_op_thread_exit(mp);
 		return;
 	}
 
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * Allocate and initialize the mount point struct.
  */
 struct mount *
 vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
     struct ucred *cred)
 {
 	struct mount *mp;
 
 	mp = uma_zalloc(mount_zone, M_WAITOK);
 	bzero(&mp->mnt_startzero,
 	    __rangeof(struct mount, mnt_startzero, mnt_endzero));
 	TAILQ_INIT(&mp->mnt_nvnodelist);
 	mp->mnt_nvnodelistsize = 0;
 	TAILQ_INIT(&mp->mnt_lazyvnodelist);
 	mp->mnt_lazyvnodelistsize = 0;
 	if (mp->mnt_ref != 0 || mp->mnt_lockref != 0 ||
 	    mp->mnt_writeopcount != 0)
 		panic("%s: non-zero counters on new mp %p\n", __func__, mp);
 	if (mp->mnt_vfs_ops != 1)
 		panic("%s: vfs_ops should be 1 but %d found\n", __func__,
 		    mp->mnt_vfs_ops);
 	(void) vfs_busy(mp, MBF_NOWAIT);
 	atomic_add_acq_int(&vfsp->vfc_refcount, 1);
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_vfc = vfsp;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_gen++;
 	strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	mp->mnt_vnodecovered = vp;
 	mp->mnt_cred = crdup(cred);
 	mp->mnt_stat.f_owner = cred->cr_uid;
 	strlcpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN);
 	mp->mnt_iosize_max = DFLTPHYS;
 #ifdef MAC
 	mac_mount_init(mp);
 	mac_mount_create(cred, mp);
 #endif
 	arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
 	TAILQ_INIT(&mp->mnt_uppers);
 	return (mp);
 }
 
 /*
  * Destroy the mount struct previously allocated by vfs_mount_alloc().
  */
 void
 vfs_mount_destroy(struct mount *mp)
 {
 
 	if (mp->mnt_vfs_ops == 0)
 		panic("%s: entered with zero vfs_ops\n", __func__);
 
 	vfs_assert_mount_counters(mp);
 
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_REFEXPIRE;
 	if (mp->mnt_kern_flag & MNTK_MWAIT) {
 		mp->mnt_kern_flag &= ~MNTK_MWAIT;
 		wakeup(mp);
 	}
 	while (mp->mnt_ref)
 		msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0);
 	KASSERT(mp->mnt_ref == 0,
 	    ("%s: invalid refcount in the drain path @ %s:%d", __func__,
 	    __FILE__, __LINE__));
 	if (mp->mnt_writeopcount != 0)
 		panic("vfs_mount_destroy: nonzero writeopcount");
 	if (mp->mnt_secondary_writes != 0)
 		panic("vfs_mount_destroy: nonzero secondary_writes");
 	atomic_subtract_rel_int(&mp->mnt_vfc->vfc_refcount, 1);
 	if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) {
 		struct vnode *vp;
 
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes)
 			vn_printf(vp, "dangling vnode ");
 		panic("unmount: dangling vnode");
 	}
 	KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
 	if (mp->mnt_nvnodelistsize != 0)
 		panic("vfs_mount_destroy: nonzero nvnodelistsize");
 	if (mp->mnt_lazyvnodelistsize != 0)
 		panic("vfs_mount_destroy: nonzero lazyvnodelistsize");
 	if (mp->mnt_lockref != 0)
 		panic("vfs_mount_destroy: nonzero lock refcount");
 	MNT_IUNLOCK(mp);
 
 	if (mp->mnt_vfs_ops != 1)
 		panic("%s: vfs_ops should be 1 but %d found\n", __func__,
 		    mp->mnt_vfs_ops);
 
 	if (mp->mnt_rootvnode != NULL)
 		panic("%s: mount point still has a root vnode %p\n", __func__,
 		    mp->mnt_rootvnode);
 
 	if (mp->mnt_vnodecovered != NULL)
 		vrele(mp->mnt_vnodecovered);
 #ifdef MAC
 	mac_mount_destroy(mp);
 #endif
 	if (mp->mnt_opt != NULL)
 		vfs_freeopts(mp->mnt_opt);
 	crfree(mp->mnt_cred);
 	uma_zfree(mount_zone, mp);
 }
 
 static bool
 vfs_should_downgrade_to_ro_mount(uint64_t fsflags, int error)
 {
 	/* This is an upgrade of an exisiting mount. */
 	if ((fsflags & MNT_UPDATE) != 0)
 		return (false);
 	/* This is already an R/O mount. */
 	if ((fsflags & MNT_RDONLY) != 0)
 		return (false);
 
 	switch (error) {
 	case ENODEV:	/* generic, geom, ... */
 	case EACCES:	/* cam/scsi, ... */
 	case EROFS:	/* md, mmcsd, ... */
 		/*
 		 * These errors can be returned by the storage layer to signal
 		 * that the media is read-only.  No harm in the R/O mount
 		 * attempt if the error was returned for some other reason.
 		 */
 		return (true);
 	default:
 		return (false);
 	}
 }
 
 int
 vfs_donmount(struct thread *td, uint64_t fsflags, struct uio *fsoptions)
 {
 	struct vfsoptlist *optlist;
 	struct vfsopt *opt, *tmp_opt;
 	char *fstype, *fspath, *errmsg;
 	int error, fstypelen, fspathlen, errmsg_len, errmsg_pos;
 	bool autoro;
 
 	errmsg = fspath = NULL;
 	errmsg_len = fspathlen = 0;
 	errmsg_pos = -1;
 	autoro = default_autoro;
 
 	error = vfs_buildopts(fsoptions, &optlist);
 	if (error)
 		return (error);
 
 	if (vfs_getopt(optlist, "errmsg", (void **)&errmsg, &errmsg_len) == 0)
 		errmsg_pos = vfs_getopt_pos(optlist, "errmsg");
 
 	/*
 	 * We need these two options before the others,
 	 * and they are mandatory for any filesystem.
 	 * Ensure they are NUL terminated as well.
 	 */
 	fstypelen = 0;
 	error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen);
 	if (error || fstypelen <= 0 || fstype[fstypelen - 1] != '\0') {
 		error = EINVAL;
 		if (errmsg != NULL)
 			strncpy(errmsg, "Invalid fstype", errmsg_len);
 		goto bail;
 	}
 	fspathlen = 0;
 	error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen);
 	if (error || fspathlen <= 0 || fspath[fspathlen - 1] != '\0') {
 		error = EINVAL;
 		if (errmsg != NULL)
 			strncpy(errmsg, "Invalid fspath", errmsg_len);
 		goto bail;
 	}
 
 	/*
 	 * We need to see if we have the "update" option
 	 * before we call vfs_domount(), since vfs_domount() has special
 	 * logic based on MNT_UPDATE.  This is very important
 	 * when we want to update the root filesystem.
 	 */
 	TAILQ_FOREACH_SAFE(opt, optlist, link, tmp_opt) {
 		int do_freeopt = 0;
 
 		if (strcmp(opt->name, "update") == 0) {
 			fsflags |= MNT_UPDATE;
 			do_freeopt = 1;
 		}
 		else if (strcmp(opt->name, "async") == 0)
 			fsflags |= MNT_ASYNC;
 		else if (strcmp(opt->name, "force") == 0) {
 			fsflags |= MNT_FORCE;
 			do_freeopt = 1;
 		}
 		else if (strcmp(opt->name, "reload") == 0) {
 			fsflags |= MNT_RELOAD;
 			do_freeopt = 1;
 		}
 		else if (strcmp(opt->name, "multilabel") == 0)
 			fsflags |= MNT_MULTILABEL;
 		else if (strcmp(opt->name, "noasync") == 0)
 			fsflags &= ~MNT_ASYNC;
 		else if (strcmp(opt->name, "noatime") == 0)
 			fsflags |= MNT_NOATIME;
 		else if (strcmp(opt->name, "atime") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonoatime", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "noclusterr") == 0)
 			fsflags |= MNT_NOCLUSTERR;
 		else if (strcmp(opt->name, "clusterr") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonoclusterr", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "noclusterw") == 0)
 			fsflags |= MNT_NOCLUSTERW;
 		else if (strcmp(opt->name, "clusterw") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonoclusterw", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "noexec") == 0)
 			fsflags |= MNT_NOEXEC;
 		else if (strcmp(opt->name, "exec") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonoexec", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "nosuid") == 0)
 			fsflags |= MNT_NOSUID;
 		else if (strcmp(opt->name, "suid") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonosuid", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "nosymfollow") == 0)
 			fsflags |= MNT_NOSYMFOLLOW;
 		else if (strcmp(opt->name, "symfollow") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("nonosymfollow", M_MOUNT);
 		}
 		else if (strcmp(opt->name, "noro") == 0) {
 			fsflags &= ~MNT_RDONLY;
 			autoro = false;
 		}
 		else if (strcmp(opt->name, "rw") == 0) {
 			fsflags &= ~MNT_RDONLY;
 			autoro = false;
 		}
 		else if (strcmp(opt->name, "ro") == 0) {
 			fsflags |= MNT_RDONLY;
 			autoro = false;
 		}
 		else if (strcmp(opt->name, "rdonly") == 0) {
 			free(opt->name, M_MOUNT);
 			opt->name = strdup("ro", M_MOUNT);
 			fsflags |= MNT_RDONLY;
 			autoro = false;
 		}
 		else if (strcmp(opt->name, "autoro") == 0) {
 			do_freeopt = 1;
 			autoro = true;
 		}
 		else if (strcmp(opt->name, "suiddir") == 0)
 			fsflags |= MNT_SUIDDIR;
 		else if (strcmp(opt->name, "sync") == 0)
 			fsflags |= MNT_SYNCHRONOUS;
 		else if (strcmp(opt->name, "union") == 0)
 			fsflags |= MNT_UNION;
 		else if (strcmp(opt->name, "automounted") == 0) {
 			fsflags |= MNT_AUTOMOUNTED;
 			do_freeopt = 1;
 		} else if (strcmp(opt->name, "nocover") == 0) {
 			fsflags |= MNT_NOCOVER;
 			do_freeopt = 1;
 		} else if (strcmp(opt->name, "cover") == 0) {
 			fsflags &= ~MNT_NOCOVER;
 			do_freeopt = 1;
 		} else if (strcmp(opt->name, "emptydir") == 0) {
 			fsflags |= MNT_EMPTYDIR;
 			do_freeopt = 1;
 		} else if (strcmp(opt->name, "noemptydir") == 0) {
 			fsflags &= ~MNT_EMPTYDIR;
 			do_freeopt = 1;
 		}
 		if (do_freeopt)
 			vfs_freeopt(optlist, opt);
 	}
 
 	/*
 	 * Be ultra-paranoid about making sure the type and fspath
 	 * variables will fit in our mp buffers, including the
 	 * terminating NUL.
 	 */
 	if (fstypelen > MFSNAMELEN || fspathlen > MNAMELEN) {
 		error = ENAMETOOLONG;
 		goto bail;
 	}
 
 	error = vfs_domount(td, fstype, fspath, fsflags, &optlist);
 
 	/*
 	 * See if we can mount in the read-only mode if the error code suggests
 	 * that it could be possible and the mount options allow for that.
 	 * Never try it if "[no]{ro|rw}" has been explicitly requested and not
 	 * overridden by "autoro".
 	 */
 	if (autoro && vfs_should_downgrade_to_ro_mount(fsflags, error)) {
 		printf("%s: R/W mount failed, possibly R/O media,"
 		    " trying R/O mount\n", __func__);
 		fsflags |= MNT_RDONLY;
 		error = vfs_domount(td, fstype, fspath, fsflags, &optlist);
 	}
 bail:
 	/* copyout the errmsg */
 	if (errmsg_pos != -1 && ((2 * errmsg_pos + 1) < fsoptions->uio_iovcnt)
 	    && errmsg_len > 0 && errmsg != NULL) {
 		if (fsoptions->uio_segflg == UIO_SYSSPACE) {
 			bcopy(errmsg,
 			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
 			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
 		} else {
 			copyout(errmsg,
 			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_base,
 			    fsoptions->uio_iov[2 * errmsg_pos + 1].iov_len);
 		}
 	}
 
 	if (optlist != NULL)
 		vfs_freeopts(optlist);
 	return (error);
 }
 
 /*
  * Old mount API.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mount_args {
 	char	*type;
 	char	*path;
 	int	flags;
 	caddr_t	data;
 };
 #endif
 /* ARGSUSED */
 int
 sys_mount(struct thread *td, struct mount_args *uap)
 {
 	char *fstype;
 	struct vfsconf *vfsp = NULL;
 	struct mntarg *ma = NULL;
 	uint64_t flags;
 	int error;
 
 	/*
 	 * Mount flags are now 64-bits. On 32-bit architectures only
 	 * 32-bits are passed in, but from here on everything handles
 	 * 64-bit flags correctly.
 	 */
 	flags = uap->flags;
 
 	AUDIT_ARG_FFLAGS(flags);
 
 	/*
 	 * Filter out MNT_ROOTFS.  We do not want clients of mount() in
 	 * userspace to set this flag, but we must filter it out if we want
 	 * MNT_UPDATE on the root file system to work.
 	 * MNT_ROOTFS should only be set by the kernel when mounting its
 	 * root file system.
 	 */
 	flags &= ~MNT_ROOTFS;
 
 	fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK);
 	error = copyinstr(uap->type, fstype, MFSNAMELEN, NULL);
 	if (error) {
 		free(fstype, M_TEMP);
 		return (error);
 	}
 
 	AUDIT_ARG_TEXT(fstype);
 	vfsp = vfs_byname_kld(fstype, td, &error);
 	free(fstype, M_TEMP);
 	if (vfsp == NULL)
 		return (ENOENT);
 	if (((vfsp->vfc_flags & VFCF_SBDRY) != 0 &&
 	    vfsp->vfc_vfsops_sd->vfs_cmount == NULL) ||
 	    ((vfsp->vfc_flags & VFCF_SBDRY) == 0 &&
 	    vfsp->vfc_vfsops->vfs_cmount == NULL))
 		return (EOPNOTSUPP);
 
 	ma = mount_argsu(ma, "fstype", uap->type, MFSNAMELEN);
 	ma = mount_argsu(ma, "fspath", uap->path, MNAMELEN);
 	ma = mount_argb(ma, flags & MNT_RDONLY, "noro");
 	ma = mount_argb(ma, !(flags & MNT_NOSUID), "nosuid");
 	ma = mount_argb(ma, !(flags & MNT_NOEXEC), "noexec");
 
 	if ((vfsp->vfc_flags & VFCF_SBDRY) != 0)
 		return (vfsp->vfc_vfsops_sd->vfs_cmount(ma, uap->data, flags));
 	return (vfsp->vfc_vfsops->vfs_cmount(ma, uap->data, flags));
 }
 
 /*
  * vfs_domount_first(): first file system mount (not update)
  */
 static int
 vfs_domount_first(
 	struct thread *td,		/* Calling thread. */
 	struct vfsconf *vfsp,		/* File system type. */
 	char *fspath,			/* Mount path. */
 	struct vnode *vp,		/* Vnode to be covered. */
 	uint64_t fsflags,		/* Flags common to all filesystems. */
 	struct vfsoptlist **optlist	/* Options local to the filesystem. */
 	)
 {
 	struct vattr va;
 	struct mount *mp;
 	struct vnode *newdp, *rootvp;
 	int error, error1;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 	KASSERT((fsflags & MNT_UPDATE) == 0, ("MNT_UPDATE shouldn't be here"));
 
 	if ((fsflags & MNT_EMPTYDIR) != 0) {
 		error = vfs_emptydir(vp);
 		if (error != 0) {
 			vput(vp);
 			return (error);
 		}
 	}
 
 	/*
 	 * If the jail of the calling thread lacks permission for this type of
 	 * file system, deny immediately.
 	 */
 	if (jailed(td->td_ucred) && !prison_allow(td->td_ucred,
 	    vfsp->vfc_prison_flag)) {
 		vput(vp);
 		return (EPERM);
 	}
 
 	/*
 	 * If the user is not root, ensure that they own the directory
 	 * onto which we are attempting to mount.
 	 */
 	error = VOP_GETATTR(vp, &va, td->td_ucred);
 	if (error == 0 && va.va_uid != td->td_ucred->cr_uid)
 		error = priv_check_cred(td->td_ucred, PRIV_VFS_ADMIN);
 	if (error == 0)
 		error = vinvalbuf(vp, V_SAVE, 0, 0);
 	if (error == 0 && vp->v_type != VDIR)
 		error = ENOTDIR;
 	if (error == 0) {
 		VI_LOCK(vp);
 		if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
 			vp->v_iflag |= VI_MOUNT;
 		else
 			error = EBUSY;
 		VI_UNLOCK(vp);
 	}
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp);
 
 	/* Allocate and initialize the filesystem. */
 	mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred);
 	/* XXXMAC: pass to vfs_mount_alloc? */
 	mp->mnt_optnew = *optlist;
 	/* Set the mount level flags. */
 	mp->mnt_flag = (fsflags & (MNT_UPDATEMASK | MNT_ROOTFS | MNT_RDONLY));
 
 	/*
 	 * Mount the filesystem.
 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 	 * get.  No freeing of cn_pnbuf.
 	 */
 	error1 = 0;
 	if ((error = VFS_MOUNT(mp)) != 0 ||
 	    (error1 = VFS_STATFS(mp, &mp->mnt_stat)) != 0 ||
 	    (error1 = VFS_ROOT(mp, LK_EXCLUSIVE, &newdp)) != 0) {
 		if (error1 != 0) {
 			error = error1;
 			rootvp = vfs_cache_root_clear(mp);
 			if (rootvp != NULL)
 				vrele(rootvp);
 			if ((error1 = VFS_UNMOUNT(mp, 0)) != 0)
 				printf("VFS_UNMOUNT returned %d\n", error1);
 		}
 		vfs_unbusy(mp);
 		mp->mnt_vnodecovered = NULL;
 		vfs_mount_destroy(mp);
 		VI_LOCK(vp);
 		vp->v_iflag &= ~VI_MOUNT;
 		VI_UNLOCK(vp);
 		vrele(vp);
 		return (error);
 	}
 	VOP_UNLOCK(newdp);
 
 	if (mp->mnt_opt != NULL)
 		vfs_freeopts(mp->mnt_opt);
 	mp->mnt_opt = mp->mnt_optnew;
 	*optlist = NULL;
 
 	/*
 	 * Prevent external consumers of mount options from reading mnt_optnew.
 	 */
 	mp->mnt_optnew = NULL;
 
 	MNT_ILOCK(mp);
 	if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 	    (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 		mp->mnt_kern_flag |= MNTK_ASYNC;
 	else
 		mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	cache_purge(vp);
 	VI_LOCK(vp);
 	vp->v_iflag &= ~VI_MOUNT;
 	VI_UNLOCK(vp);
 	vp->v_mountedhere = mp;
 	/* Place the new filesystem at the end of the mount list. */
 	mtx_lock(&mountlist_mtx);
 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 	vfs_event_signal(NULL, VQ_MOUNT, 0);
 	vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY);
 	VOP_UNLOCK(vp);
 	EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td);
 	VOP_UNLOCK(newdp);
 	mountcheckdirs(vp, newdp);
 	vrele(newdp);
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		vfs_allocate_syncvnode(mp);
 	vfs_op_exit(mp);
 	vfs_unbusy(mp);
 	return (0);
 }
 
 /*
  * vfs_domount_update(): update of mounted file system
  */
 static int
 vfs_domount_update(
 	struct thread *td,		/* Calling thread. */
 	struct vnode *vp,		/* Mount point vnode. */
 	uint64_t fsflags,		/* Flags common to all filesystems. */
 	struct vfsoptlist **optlist	/* Options local to the filesystem. */
 	)
 {
 	struct export_args export;
+	struct o2export_args o2export;
 	struct vnode *rootvp;
 	void *bufp;
 	struct mount *mp;
-	int error, export_error, len;
+	int error, export_error, i, len;
 	uint64_t flag;
+	gid_t *grps;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 	KASSERT((fsflags & MNT_UPDATE) != 0, ("MNT_UPDATE should be here"));
 	mp = vp->v_mount;
 
 	if ((vp->v_vflag & VV_ROOT) == 0) {
 		if (vfs_copyopt(*optlist, "export", &export, sizeof(export))
 		    == 0)
 			error = EXDEV;
 		else
 			error = EINVAL;
 		vput(vp);
 		return (error);
 	}
 
 	/*
 	 * We only allow the filesystem to be reloaded if it
 	 * is currently mounted read-only.
 	 */
 	flag = mp->mnt_flag;
 	if ((fsflags & MNT_RELOAD) != 0 && (flag & MNT_RDONLY) == 0) {
 		vput(vp);
 		return (EOPNOTSUPP);	/* Needs translation */
 	}
 	/*
 	 * Only privileged root, or (if MNT_USER is set) the user that
 	 * did the original mount is permitted to update it.
 	 */
 	error = vfs_suser(mp, td);
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	if (vfs_busy(mp, MBF_NOWAIT)) {
 		vput(vp);
 		return (EBUSY);
 	}
 	VI_LOCK(vp);
 	if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) {
 		VI_UNLOCK(vp);
 		vfs_unbusy(mp);
 		vput(vp);
 		return (EBUSY);
 	}
 	vp->v_iflag |= VI_MOUNT;
 	VI_UNLOCK(vp);
 	VOP_UNLOCK(vp);
 
 	vfs_op_enter(mp);
 
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
 		MNT_IUNLOCK(mp);
 		error = EBUSY;
 		goto end;
 	}
 	mp->mnt_flag &= ~MNT_UPDATEMASK;
 	mp->mnt_flag |= fsflags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE |
 	    MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY);
 	if ((mp->mnt_flag & MNT_ASYNC) == 0)
 		mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	rootvp = vfs_cache_root_clear(mp);
 	MNT_IUNLOCK(mp);
 	if (rootvp != NULL)
 		vrele(rootvp);
 	mp->mnt_optnew = *optlist;
 	vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
 
 	/*
 	 * Mount the filesystem.
 	 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they
 	 * get.  No freeing of cn_pnbuf.
 	 */
 	error = VFS_MOUNT(mp);
 
 	export_error = 0;
 	/* Process the export option. */
 	if (error == 0 && vfs_getopt(mp->mnt_optnew, "export", &bufp,
 	    &len) == 0) {
 		/* Assume that there is only 1 ABI for each length. */
 		switch (len) {
 		case (sizeof(struct oexport_args)):
-			bzero(&export, sizeof(export));
+			bzero(&o2export, sizeof(o2export));
+			o2export.ex_numsecflavors = 1;
+			o2export.ex_secflavors[0] = AUTH_SYS;
 			/* FALLTHROUGH */
+		case (sizeof(o2export)):
+			bcopy(bufp, &o2export, len);
+			export.ex_flags = (uint64_t)o2export.ex_flags;
+			export.ex_root = o2export.ex_root;
+			export.ex_uid = o2export.ex_anon.cr_uid;
+			export.ex_groups = NULL;
+			export.ex_ngroups = o2export.ex_anon.cr_ngroups;
+			if (export.ex_ngroups > 0) {
+				if (export.ex_ngroups <= XU_NGROUPS) {
+					export.ex_groups = malloc(
+					    export.ex_ngroups * sizeof(gid_t),
+					    M_TEMP, M_WAITOK);
+					for (i = 0; i < export.ex_ngroups; i++)
+						export.ex_groups[i] =
+						  o2export.ex_anon.cr_groups[i];
+				} else
+					export_error = EINVAL;
+			} else if (export.ex_ngroups < 0)
+				export_error = EINVAL;
+			export.ex_addr = o2export.ex_addr;
+			export.ex_addrlen = o2export.ex_addrlen;
+			export.ex_mask = o2export.ex_mask;
+			export.ex_masklen = o2export.ex_masklen;
+			export.ex_indexfile = o2export.ex_indexfile;
+			export.ex_numsecflavors = o2export.ex_numsecflavors;
+			if (export.ex_numsecflavors < MAXSECFLAVORS) {
+				for (i = 0; i < export.ex_numsecflavors; i++)
+					export.ex_secflavors[i] =
+					    o2export.ex_secflavors[i];
+			} else
+				export_error = EINVAL;
+			if (export_error == 0)
+				export_error = vfs_export(mp, &export);
+			free(export.ex_groups, M_TEMP);
+			break;
 		case (sizeof(export)):
 			bcopy(bufp, &export, len);
-			export_error = vfs_export(mp, &export);
+			grps = NULL;
+			if (export.ex_ngroups > 0) {
+				if (export.ex_ngroups <= NGROUPS_MAX) {
+					grps = malloc(export.ex_ngroups *
+					    sizeof(gid_t), M_TEMP, M_WAITOK);
+					export_error = copyin(export.ex_groups,
+					    grps, export.ex_ngroups *
+					    sizeof(gid_t));
+					if (export_error == 0)
+						export.ex_groups = grps;
+				} else
+					export_error = EINVAL;
+			} else if (export.ex_ngroups == 0)
+				export.ex_groups = NULL;
+			else
+				export_error = EINVAL;
+			if (export_error == 0)
+				export_error = vfs_export(mp, &export);
+			free(grps, M_TEMP);
 			break;
 		default:
 			export_error = EINVAL;
 			break;
 		}
 	}
 
 	MNT_ILOCK(mp);
 	if (error == 0) {
 		mp->mnt_flag &=	~(MNT_UPDATE | MNT_RELOAD | MNT_FORCE |
 		    MNT_SNAPSHOT);
 	} else {
 		/*
 		 * If we fail, restore old mount flags. MNT_QUOTA is special,
 		 * because it is not part of MNT_UPDATEMASK, but it could have
 		 * changed in the meantime if quotactl(2) was called.
 		 * All in all we want current value of MNT_QUOTA, not the old
 		 * one.
 		 */
 		mp->mnt_flag = (mp->mnt_flag & MNT_QUOTA) | (flag & ~MNT_QUOTA);
 	}
 	if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 	    (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 		mp->mnt_kern_flag |= MNTK_ASYNC;
 	else
 		mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 
 	if (error != 0)
 		goto end;
 
 	if (mp->mnt_opt != NULL)
 		vfs_freeopts(mp->mnt_opt);
 	mp->mnt_opt = mp->mnt_optnew;
 	*optlist = NULL;
 	(void)VFS_STATFS(mp, &mp->mnt_stat);
 	/*
 	 * Prevent external consumers of mount options from reading
 	 * mnt_optnew.
 	 */
 	mp->mnt_optnew = NULL;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		vfs_allocate_syncvnode(mp);
 	else
 		vfs_deallocate_syncvnode(mp);
 end:
 	vfs_op_exit(mp);
 	vfs_unbusy(mp);
 	VI_LOCK(vp);
 	vp->v_iflag &= ~VI_MOUNT;
 	VI_UNLOCK(vp);
 	vrele(vp);
 	return (error != 0 ? error : export_error);
 }
 
 /*
  * vfs_domount(): actually attempt a filesystem mount.
  */
 static int
 vfs_domount(
 	struct thread *td,		/* Calling thread. */
 	const char *fstype,		/* Filesystem type. */
 	char *fspath,			/* Mount path. */
 	uint64_t fsflags,		/* Flags common to all filesystems. */
 	struct vfsoptlist **optlist	/* Options local to the filesystem. */
 	)
 {
 	struct vfsconf *vfsp;
 	struct nameidata nd;
 	struct vnode *vp;
 	char *pathbuf;
 	int error;
 
 	/*
 	 * Be ultra-paranoid about making sure the type and fspath
 	 * variables will fit in our mp buffers, including the
 	 * terminating NUL.
 	 */
 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
 		return (ENAMETOOLONG);
 
 	if (jailed(td->td_ucred) || usermount == 0) {
 		if ((error = priv_check(td, PRIV_VFS_MOUNT)) != 0)
 			return (error);
 	}
 
 	/*
 	 * Do not allow NFS export or MNT_SUIDDIR by unprivileged users.
 	 */
 	if (fsflags & MNT_EXPORTED) {
 		error = priv_check(td, PRIV_VFS_MOUNT_EXPORTED);
 		if (error)
 			return (error);
 	}
 	if (fsflags & MNT_SUIDDIR) {
 		error = priv_check(td, PRIV_VFS_MOUNT_SUIDDIR);
 		if (error)
 			return (error);
 	}
 	/*
 	 * Silently enforce MNT_NOSUID and MNT_USER for unprivileged users.
 	 */
 	if ((fsflags & (MNT_NOSUID | MNT_USER)) != (MNT_NOSUID | MNT_USER)) {
 		if (priv_check(td, PRIV_VFS_MOUNT_NONUSER) != 0)
 			fsflags |= MNT_NOSUID | MNT_USER;
 	}
 
 	/* Load KLDs before we lock the covered vnode to avoid reversals. */
 	vfsp = NULL;
 	if ((fsflags & MNT_UPDATE) == 0) {
 		/* Don't try to load KLDs if we're mounting the root. */
 		if (fsflags & MNT_ROOTFS)
 			vfsp = vfs_byname(fstype);
 		else
 			vfsp = vfs_byname_kld(fstype, td, &error);
 		if (vfsp == NULL)
 			return (ENODEV);
 	}
 
 	/*
 	 * Get vnode to be covered or mount point's vnode in case of MNT_UPDATE.
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 	    UIO_SYSSPACE, fspath, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	if ((fsflags & MNT_UPDATE) == 0) {
 		if ((vp->v_vflag & VV_ROOT) != 0 &&
 		    (fsflags & MNT_NOCOVER) != 0) {
 			vput(vp);
 			return (EBUSY);
 		}
 		pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 		strcpy(pathbuf, fspath);
 		error = vn_path_to_global_path(td, vp, pathbuf, MNAMELEN);
 		if (error == 0) {
 			error = vfs_domount_first(td, vfsp, pathbuf, vp,
 			    fsflags, optlist);
 		}
 		free(pathbuf, M_TEMP);
 	} else
 		error = vfs_domount_update(td, vp, fsflags, optlist);
 
 	return (error);
 }
 
 /*
  * Unmount a filesystem.
  *
  * Note: unmount takes a path to the vnode mounted on as argument, not
  * special file (as before).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unmount_args {
 	char	*path;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 sys_unmount(struct thread *td, struct unmount_args *uap)
 {
 
 	return (kern_unmount(td, uap->path, uap->flags));
 }
 
 int
 kern_unmount(struct thread *td, const char *path, int flags)
 {
 	struct nameidata nd;
 	struct mount *mp;
 	char *pathbuf;
 	int error, id0, id1;
 
 	AUDIT_ARG_VALUE(flags);
 	if (jailed(td->td_ucred) || usermount == 0) {
 		error = priv_check(td, PRIV_VFS_UNMOUNT);
 		if (error)
 			return (error);
 	}
 
 	pathbuf = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 	error = copyinstr(path, pathbuf, MNAMELEN, NULL);
 	if (error) {
 		free(pathbuf, M_TEMP);
 		return (error);
 	}
 	if (flags & MNT_BYFSID) {
 		AUDIT_ARG_TEXT(pathbuf);
 		/* Decode the filesystem ID. */
 		if (sscanf(pathbuf, "FSID:%d:%d", &id0, &id1) != 2) {
 			free(pathbuf, M_TEMP);
 			return (EINVAL);
 		}
 
 		mtx_lock(&mountlist_mtx);
 		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 			if (mp->mnt_stat.f_fsid.val[0] == id0 &&
 			    mp->mnt_stat.f_fsid.val[1] == id1) {
 				vfs_ref(mp);
 				break;
 			}
 		}
 		mtx_unlock(&mountlist_mtx);
 	} else {
 		/*
 		 * Try to find global path for path argument.
 		 */
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1,
 		    UIO_SYSSPACE, pathbuf, td);
 		if (namei(&nd) == 0) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			error = vn_path_to_global_path(td, nd.ni_vp, pathbuf,
 			    MNAMELEN);
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
 		mtx_lock(&mountlist_mtx);
 		TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) {
 			if (strcmp(mp->mnt_stat.f_mntonname, pathbuf) == 0) {
 				vfs_ref(mp);
 				break;
 			}
 		}
 		mtx_unlock(&mountlist_mtx);
 	}
 	free(pathbuf, M_TEMP);
 	if (mp == NULL) {
 		/*
 		 * Previously we returned ENOENT for a nonexistent path and
 		 * EINVAL for a non-mountpoint.  We cannot tell these apart
 		 * now, so in the !MNT_BYFSID case return the more likely
 		 * EINVAL for compatibility.
 		 */
 		return ((flags & MNT_BYFSID) ? ENOENT : EINVAL);
 	}
 
 	/*
 	 * Don't allow unmounting the root filesystem.
 	 */
 	if (mp->mnt_flag & MNT_ROOTFS) {
 		vfs_rel(mp);
 		return (EINVAL);
 	}
 	error = dounmount(mp, flags, td);
 	return (error);
 }
 
 /*
  * Return error if any of the vnodes, ignoring the root vnode
  * and the syncer vnode, have non-zero usecount.
  *
  * This function is purely advisory - it can return false positives
  * and negatives.
  */
 static int
 vfs_check_usecounts(struct mount *mp)
 {
 	struct vnode *vp, *mvp;
 
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		if ((vp->v_vflag & VV_ROOT) == 0 && vp->v_type != VNON &&
 		    vp->v_usecount != 0) {
 			VI_UNLOCK(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (EBUSY);
 		}
 		VI_UNLOCK(vp);
 	}
 
 	return (0);
 }
 
 static void
 dounmount_cleanup(struct mount *mp, struct vnode *coveredvp, int mntkflags)
 {
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 	mp->mnt_kern_flag &= ~mntkflags;
 	if ((mp->mnt_kern_flag & MNTK_MWAIT) != 0) {
 		mp->mnt_kern_flag &= ~MNTK_MWAIT;
 		wakeup(mp);
 	}
 	vfs_op_exit_locked(mp);
 	MNT_IUNLOCK(mp);
 	if (coveredvp != NULL) {
 		VOP_UNLOCK(coveredvp);
 		vdrop(coveredvp);
 	}
 	vn_finished_write(mp);
 }
 
 /*
  * There are various reference counters associated with the mount point.
  * Normally it is permitted to modify them without taking the mnt ilock,
  * but this behavior can be temporarily disabled if stable value is needed
  * or callers are expected to block (e.g. to not allow new users during
  * forced unmount).
  */
 void
 vfs_op_enter(struct mount *mp)
 {
 	int cpu;
 
 	MNT_ILOCK(mp);
 	mp->mnt_vfs_ops++;
 	if (mp->mnt_vfs_ops > 1) {
 		MNT_IUNLOCK(mp);
 		return;
 	}
 	vfs_op_barrier_wait(mp);
 	CPU_FOREACH(cpu) {
 		mp->mnt_ref +=
 		    zpcpu_replace_cpu(mp->mnt_ref_pcpu, 0, cpu);
 		mp->mnt_lockref +=
 		    zpcpu_replace_cpu(mp->mnt_lockref_pcpu, 0, cpu);
 		mp->mnt_writeopcount +=
 		    zpcpu_replace_cpu(mp->mnt_writeopcount_pcpu, 0, cpu);
 	}
 	MNT_IUNLOCK(mp);
 	vfs_assert_mount_counters(mp);
 }
 
 void
 vfs_op_exit_locked(struct mount *mp)
 {
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 
 	if (mp->mnt_vfs_ops <= 0)
 		panic("%s: invalid vfs_ops count %d for mp %p\n",
 		    __func__, mp->mnt_vfs_ops, mp);
 	mp->mnt_vfs_ops--;
 }
 
 void
 vfs_op_exit(struct mount *mp)
 {
 
 	MNT_ILOCK(mp);
 	vfs_op_exit_locked(mp);
 	MNT_IUNLOCK(mp);
 }
 
 struct vfs_op_barrier_ipi {
 	struct mount *mp;
 	struct smp_rendezvous_cpus_retry_arg srcra;
 };
 
 static void
 vfs_op_action_func(void *arg)
 {
 	struct vfs_op_barrier_ipi *vfsopipi;
 	struct mount *mp;
 
 	vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra);
 	mp = vfsopipi->mp;
 
 	if (!vfs_op_thread_entered(mp))
 		smp_rendezvous_cpus_done(arg);
 }
 
 static void
 vfs_op_wait_func(void *arg, int cpu)
 {
 	struct vfs_op_barrier_ipi *vfsopipi;
 	struct mount *mp;
 	int *in_op;
 
 	vfsopipi = __containerof(arg, struct vfs_op_barrier_ipi, srcra);
 	mp = vfsopipi->mp;
 
 	in_op = zpcpu_get_cpu(mp->mnt_thread_in_ops_pcpu, cpu);
 	while (atomic_load_int(in_op))
 		cpu_spinwait();
 }
 
 void
 vfs_op_barrier_wait(struct mount *mp)
 {
 	struct vfs_op_barrier_ipi vfsopipi;
 
 	vfsopipi.mp = mp;
 
 	smp_rendezvous_cpus_retry(all_cpus,
 	    smp_no_rendezvous_barrier,
 	    vfs_op_action_func,
 	    smp_no_rendezvous_barrier,
 	    vfs_op_wait_func,
 	    &vfsopipi.srcra);
 }
 
 #ifdef DIAGNOSTIC
 void
 vfs_assert_mount_counters(struct mount *mp)
 {
 	int cpu;
 
 	if (mp->mnt_vfs_ops == 0)
 		return;
 
 	CPU_FOREACH(cpu) {
 		if (*zpcpu_get_cpu(mp->mnt_ref_pcpu, cpu) != 0 ||
 		    *zpcpu_get_cpu(mp->mnt_lockref_pcpu, cpu) != 0 ||
 		    *zpcpu_get_cpu(mp->mnt_writeopcount_pcpu, cpu) != 0)
 			vfs_dump_mount_counters(mp);
 	}
 }
 
 void
 vfs_dump_mount_counters(struct mount *mp)
 {
 	int cpu, *count;
 	int ref, lockref, writeopcount;
 
 	printf("%s: mp %p vfs_ops %d\n", __func__, mp, mp->mnt_vfs_ops);
 
 	printf("        ref : ");
 	ref = mp->mnt_ref;
 	CPU_FOREACH(cpu) {
 		count = zpcpu_get_cpu(mp->mnt_ref_pcpu, cpu);
 		printf("%d ", *count);
 		ref += *count;
 	}
 	printf("\n");
 	printf("    lockref : ");
 	lockref = mp->mnt_lockref;
 	CPU_FOREACH(cpu) {
 		count = zpcpu_get_cpu(mp->mnt_lockref_pcpu, cpu);
 		printf("%d ", *count);
 		lockref += *count;
 	}
 	printf("\n");
 	printf("writeopcount: ");
 	writeopcount = mp->mnt_writeopcount;
 	CPU_FOREACH(cpu) {
 		count = zpcpu_get_cpu(mp->mnt_writeopcount_pcpu, cpu);
 		printf("%d ", *count);
 		writeopcount += *count;
 	}
 	printf("\n");
 
 	printf("counter       struct total\n");
 	printf("ref             %-5d  %-5d\n", mp->mnt_ref, ref);
 	printf("lockref         %-5d  %-5d\n", mp->mnt_lockref, lockref);
 	printf("writeopcount    %-5d  %-5d\n", mp->mnt_writeopcount, writeopcount);
 
 	panic("invalid counts on struct mount");
 }
 #endif
 
 int
 vfs_mount_fetch_counter(struct mount *mp, enum mount_counter which)
 {
 	int *base, *pcpu;
 	int cpu, sum;
 
 	switch (which) {
 	case MNT_COUNT_REF:
 		base = &mp->mnt_ref;
 		pcpu = mp->mnt_ref_pcpu;
 		break;
 	case MNT_COUNT_LOCKREF:
 		base = &mp->mnt_lockref;
 		pcpu = mp->mnt_lockref_pcpu;
 		break;
 	case MNT_COUNT_WRITEOPCOUNT:
 		base = &mp->mnt_writeopcount;
 		pcpu = mp->mnt_writeopcount_pcpu;
 		break;
 	}
 
 	sum = *base;
 	CPU_FOREACH(cpu) {
 		sum += *zpcpu_get_cpu(pcpu, cpu);
 	}
 	return (sum);
 }
 
 /*
  * Do the actual filesystem unmount.
  */
 int
 dounmount(struct mount *mp, int flags, struct thread *td)
 {
 	struct vnode *coveredvp, *rootvp;
 	int error;
 	uint64_t async_flag;
 	int mnt_gen_r;
 
 	if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 		mnt_gen_r = mp->mnt_gen;
 		VI_LOCK(coveredvp);
 		vholdl(coveredvp);
 		vn_lock(coveredvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY);
 		/*
 		 * Check for mp being unmounted while waiting for the
 		 * covered vnode lock.
 		 */
 		if (coveredvp->v_mountedhere != mp ||
 		    coveredvp->v_mountedhere->mnt_gen != mnt_gen_r) {
 			VOP_UNLOCK(coveredvp);
 			vdrop(coveredvp);
 			vfs_rel(mp);
 			return (EBUSY);
 		}
 	}
 
 	/*
 	 * Only privileged root, or (if MNT_USER is set) the user that did the
 	 * original mount is permitted to unmount this filesystem.
 	 */
 	error = vfs_suser(mp, td);
 	if (error != 0) {
 		if (coveredvp != NULL) {
 			VOP_UNLOCK(coveredvp);
 			vdrop(coveredvp);
 		}
 		vfs_rel(mp);
 		return (error);
 	}
 
 	vfs_op_enter(mp);
 
 	vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
 	    (mp->mnt_flag & MNT_UPDATE) != 0 ||
 	    !TAILQ_EMPTY(&mp->mnt_uppers)) {
 		dounmount_cleanup(mp, coveredvp, 0);
 		return (EBUSY);
 	}
 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
 	rootvp = vfs_cache_root_clear(mp);
 	if (flags & MNT_NONBUSY) {
 		MNT_IUNLOCK(mp);
 		error = vfs_check_usecounts(mp);
 		MNT_ILOCK(mp);
 		if (error != 0) {
 			dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT);
 			if (rootvp != NULL)
 				vrele(rootvp);
 			return (error);
 		}
 	}
 	/* Allow filesystems to detect that a forced unmount is in progress. */
 	if (flags & MNT_FORCE) {
 		mp->mnt_kern_flag |= MNTK_UNMOUNTF;
 		MNT_IUNLOCK(mp);
 		/*
 		 * Must be done after setting MNTK_UNMOUNTF and before
 		 * waiting for mnt_lockref to become 0.
 		 */
 		VFS_PURGE(mp);
 		MNT_ILOCK(mp);
 	}
 	error = 0;
 	if (mp->mnt_lockref) {
 		mp->mnt_kern_flag |= MNTK_DRAINING;
 		error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS,
 		    "mount drain", 0);
 	}
 	MNT_IUNLOCK(mp);
 	KASSERT(mp->mnt_lockref == 0,
 	    ("%s: invalid lock refcount in the drain path @ %s:%d",
 	    __func__, __FILE__, __LINE__));
 	KASSERT(error == 0,
 	    ("%s: invalid return value for msleep in the drain path @ %s:%d",
 	    __func__, __FILE__, __LINE__));
 
 	if (rootvp != NULL)
 		vrele(rootvp);
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
 
 	/*
 	 * From now, we can claim that the use reference on the
 	 * coveredvp is ours, and the ref can be released only by
 	 * successfull unmount by us, or left for later unmount
 	 * attempt.  The previously acquired hold reference is no
 	 * longer needed to protect the vnode from reuse.
 	 */
 	if (coveredvp != NULL)
 		vdrop(coveredvp);
 
 	vfs_periodic(mp, MNT_WAIT);
 	MNT_ILOCK(mp);
 	async_flag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	MNT_IUNLOCK(mp);
 	cache_purgevfs(mp, false); /* remove cache entries for this file sys */
 	vfs_deallocate_syncvnode(mp);
 	error = VFS_UNMOUNT(mp, flags);
 	vn_finished_write(mp);
 	/*
 	 * If we failed to flush the dirty blocks for this mount point,
 	 * undo all the cdir/rdir and rootvnode changes we made above.
 	 * Unless we failed to do so because the device is reporting that
 	 * it doesn't exist anymore.
 	 */
 	if (error && error != ENXIO) {
 		MNT_ILOCK(mp);
 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 			MNT_IUNLOCK(mp);
 			vfs_allocate_syncvnode(mp);
 			MNT_ILOCK(mp);
 		}
 		mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF);
 		mp->mnt_flag |= async_flag;
 		if ((mp->mnt_flag & MNT_ASYNC) != 0 &&
 		    (mp->mnt_kern_flag & MNTK_NOASYNC) == 0)
 			mp->mnt_kern_flag |= MNTK_ASYNC;
 		if (mp->mnt_kern_flag & MNTK_MWAIT) {
 			mp->mnt_kern_flag &= ~MNTK_MWAIT;
 			wakeup(mp);
 		}
 		vfs_op_exit_locked(mp);
 		MNT_IUNLOCK(mp);
 		if (coveredvp)
 			VOP_UNLOCK(coveredvp);
 		return (error);
 	}
 	mtx_lock(&mountlist_mtx);
 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 	EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td);
 	if (coveredvp != NULL) {
 		coveredvp->v_mountedhere = NULL;
 		VOP_UNLOCK(coveredvp);
 	}
 	vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 	if (rootvnode != NULL && mp == rootvnode->v_mount) {
 		vrele(rootvnode);
 		rootvnode = NULL;
 	}
 	if (mp == rootdevmp)
 		rootdevmp = NULL;
 	vfs_mount_destroy(mp);
 	return (0);
 }
 
 /*
  * Report errors during filesystem mounting.
  */
 void
 vfs_mount_error(struct mount *mp, const char *fmt, ...)
 {
 	struct vfsoptlist *moptlist = mp->mnt_optnew;
 	va_list ap;
 	int error, len;
 	char *errmsg;
 
 	error = vfs_getopt(moptlist, "errmsg", (void **)&errmsg, &len);
 	if (error || errmsg == NULL || len <= 0)
 		return;
 
 	va_start(ap, fmt);
 	vsnprintf(errmsg, (size_t)len, fmt, ap);
 	va_end(ap);
 }
 
 void
 vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...)
 {
 	va_list ap;
 	int error, len;
 	char *errmsg;
 
 	error = vfs_getopt(opts, "errmsg", (void **)&errmsg, &len);
 	if (error || errmsg == NULL || len <= 0)
 		return;
 
 	va_start(ap, fmt);
 	vsnprintf(errmsg, (size_t)len, fmt, ap);
 	va_end(ap);
 }
 
 /*
  * ---------------------------------------------------------------------
  * Functions for querying mount options/arguments from filesystems.
  */
 
 /*
  * Check that no unknown options are given
  */
 int
 vfs_filteropt(struct vfsoptlist *opts, const char **legal)
 {
 	struct vfsopt *opt;
 	char errmsg[255];
 	const char **t, *p, *q;
 	int ret = 0;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		p = opt->name;
 		q = NULL;
 		if (p[0] == 'n' && p[1] == 'o')
 			q = p + 2;
 		for(t = global_opts; *t != NULL; t++) {
 			if (strcmp(*t, p) == 0)
 				break;
 			if (q != NULL) {
 				if (strcmp(*t, q) == 0)
 					break;
 			}
 		}
 		if (*t != NULL)
 			continue;
 		for(t = legal; *t != NULL; t++) {
 			if (strcmp(*t, p) == 0)
 				break;
 			if (q != NULL) {
 				if (strcmp(*t, q) == 0)
 					break;
 			}
 		}
 		if (*t != NULL)
 			continue;
 		snprintf(errmsg, sizeof(errmsg),
 		    "mount option <%s> is unknown", p);
 		ret = EINVAL;
 	}
 	if (ret != 0) {
 		TAILQ_FOREACH(opt, opts, link) {
 			if (strcmp(opt->name, "errmsg") == 0) {
 				strncpy((char *)opt->value, errmsg, opt->len);
 				break;
 			}
 		}
 		if (opt == NULL)
 			printf("%s\n", errmsg);
 	}
 	return (ret);
 }
 
 /*
  * Get a mount option by its name.
  *
  * Return 0 if the option was found, ENOENT otherwise.
  * If len is non-NULL it will be filled with the length
  * of the option. If buf is non-NULL, it will be filled
  * with the address of the option.
  */
 int
 vfs_getopt(struct vfsoptlist *opts, const char *name, void **buf, int *len)
 {
 	struct vfsopt *opt;
 
 	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) == 0) {
 			opt->seen = 1;
 			if (len != NULL)
 				*len = opt->len;
 			if (buf != NULL)
 				*buf = opt->value;
 			return (0);
 		}
 	}
 	return (ENOENT);
 }
 
 int
 vfs_getopt_pos(struct vfsoptlist *opts, const char *name)
 {
 	struct vfsopt *opt;
 
 	if (opts == NULL)
 		return (-1);
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) == 0) {
 			opt->seen = 1;
 			return (opt->pos);
 		}
 	}
 	return (-1);
 }
 
 int
 vfs_getopt_size(struct vfsoptlist *opts, const char *name, off_t *value)
 {
 	char *opt_value, *vtp;
 	quad_t iv;
 	int error, opt_len;
 
 	error = vfs_getopt(opts, name, (void **)&opt_value, &opt_len);
 	if (error != 0)
 		return (error);
 	if (opt_len == 0 || opt_value == NULL)
 		return (EINVAL);
 	if (opt_value[0] == '\0' || opt_value[opt_len - 1] != '\0')
 		return (EINVAL);
 	iv = strtoq(opt_value, &vtp, 0);
 	if (vtp == opt_value || (vtp[0] != '\0' && vtp[1] != '\0'))
 		return (EINVAL);
 	if (iv < 0)
 		return (EINVAL);
 	switch (vtp[0]) {
 	case 't': case 'T':
 		iv *= 1024;
 		/* FALLTHROUGH */
 	case 'g': case 'G':
 		iv *= 1024;
 		/* FALLTHROUGH */
 	case 'm': case 'M':
 		iv *= 1024;
 		/* FALLTHROUGH */
 	case 'k': case 'K':
 		iv *= 1024;
 	case '\0':
 		break;
 	default:
 		return (EINVAL);
 	}
 	*value = iv;
 
 	return (0);
 }
 
 char *
 vfs_getopts(struct vfsoptlist *opts, const char *name, int *error)
 {
 	struct vfsopt *opt;
 
 	*error = 0;
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->len == 0 ||
 		    ((char *)opt->value)[opt->len - 1] != '\0') {
 			*error = EINVAL;
 			return (NULL);
 		}
 		return (opt->value);
 	}
 	*error = ENOENT;
 	return (NULL);
 }
 
 int
 vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w,
 	uint64_t val)
 {
 	struct vfsopt *opt;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) == 0) {
 			opt->seen = 1;
 			if (w != NULL)
 				*w |= val;
 			return (1);
 		}
 	}
 	if (w != NULL)
 		*w &= ~val;
 	return (0);
 }
 
 int
 vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...)
 {
 	va_list ap;
 	struct vfsopt *opt;
 	int ret;
 
 	KASSERT(opts != NULL, ("vfs_getopt: caller passed 'opts' as NULL"));
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->len == 0 || opt->value == NULL)
 			return (0);
 		if (((char *)opt->value)[opt->len - 1] != '\0')
 			return (0);
 		va_start(ap, fmt);
 		ret = vsscanf(opt->value, fmt, ap);
 		va_end(ap);
 		return (ret);
 	}
 	return (0);
 }
 
 int
 vfs_setopt(struct vfsoptlist *opts, const char *name, void *value, int len)
 {
 	struct vfsopt *opt;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->value == NULL)
 			opt->len = len;
 		else {
 			if (opt->len != len)
 				return (EINVAL);
 			bcopy(value, opt->value, len);
 		}
 		return (0);
 	}
 	return (ENOENT);
 }
 
 int
 vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value, int len)
 {
 	struct vfsopt *opt;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->value == NULL)
 			opt->len = len;
 		else {
 			if (opt->len < len)
 				return (EINVAL);
 			opt->len = len;
 			bcopy(value, opt->value, len);
 		}
 		return (0);
 	}
 	return (ENOENT);
 }
 
 int
 vfs_setopts(struct vfsoptlist *opts, const char *name, const char *value)
 {
 	struct vfsopt *opt;
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) != 0)
 			continue;
 		opt->seen = 1;
 		if (opt->value == NULL)
 			opt->len = strlen(value) + 1;
 		else if (strlcpy(opt->value, value, opt->len) >= opt->len)
 			return (EINVAL);
 		return (0);
 	}
 	return (ENOENT);
 }
 
 /*
  * Find and copy a mount option.
  *
  * The size of the buffer has to be specified
  * in len, if it is not the same length as the
  * mount option, EINVAL is returned.
  * Returns ENOENT if the option is not found.
  */
 int
 vfs_copyopt(struct vfsoptlist *opts, const char *name, void *dest, int len)
 {
 	struct vfsopt *opt;
 
 	KASSERT(opts != NULL, ("vfs_copyopt: caller passed 'opts' as NULL"));
 
 	TAILQ_FOREACH(opt, opts, link) {
 		if (strcmp(name, opt->name) == 0) {
 			opt->seen = 1;
 			if (len != opt->len)
 				return (EINVAL);
 			bcopy(opt->value, dest, opt->len);
 			return (0);
 		}
 	}
 	return (ENOENT);
 }
 
 int
 __vfs_statfs(struct mount *mp, struct statfs *sbp)
 {
 
 	/*
 	 * Filesystems only fill in part of the structure for updates, we
 	 * have to read the entirety first to get all content.
 	 */
 	memcpy(sbp, &mp->mnt_stat, sizeof(*sbp));
 
 	/*
 	 * Set these in case the underlying filesystem fails to do so.
 	 */
 	sbp->f_version = STATFS_VERSION;
 	sbp->f_namemax = NAME_MAX;
 	sbp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 
 	return (mp->mnt_op->vfs_statfs(mp, sbp));
 }
 
 void
 vfs_mountedfrom(struct mount *mp, const char *from)
 {
 
 	bzero(mp->mnt_stat.f_mntfromname, sizeof mp->mnt_stat.f_mntfromname);
 	strlcpy(mp->mnt_stat.f_mntfromname, from,
 	    sizeof mp->mnt_stat.f_mntfromname);
 }
 
 /*
  * ---------------------------------------------------------------------
  * This is the api for building mount args and mounting filesystems from
  * inside the kernel.
  *
  * The API works by accumulation of individual args.  First error is
  * latched.
  *
  * XXX: should be documented in new manpage kernel_mount(9)
  */
 
 /* A memory allocation which must be freed when we are done */
 struct mntaarg {
 	SLIST_ENTRY(mntaarg)	next;
 };
 
 /* The header for the mount arguments */
 struct mntarg {
 	struct iovec *v;
 	int len;
 	int error;
 	SLIST_HEAD(, mntaarg)	list;
 };
 
 /*
  * Add a boolean argument.
  *
  * flag is the boolean value.
  * name must start with "no".
  */
 struct mntarg *
 mount_argb(struct mntarg *ma, int flag, const char *name)
 {
 
 	KASSERT(name[0] == 'n' && name[1] == 'o',
 	    ("mount_argb(...,%s): name must start with 'no'", name));
 
 	return (mount_arg(ma, name + (flag ? 2 : 0), NULL, 0));
 }
 
 /*
  * Add an argument printf style
  */
 struct mntarg *
 mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...)
 {
 	va_list ap;
 	struct mntaarg *maa;
 	struct sbuf *sb;
 	int len;
 
 	if (ma == NULL) {
 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 		SLIST_INIT(&ma->list);
 	}
 	if (ma->error)
 		return (ma);
 
 	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 	    M_MOUNT, M_WAITOK);
 	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 	ma->v[ma->len].iov_len = strlen(name) + 1;
 	ma->len++;
 
 	sb = sbuf_new_auto();
 	va_start(ap, fmt);
 	sbuf_vprintf(sb, fmt, ap);
 	va_end(ap);
 	sbuf_finish(sb);
 	len = sbuf_len(sb) + 1;
 	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 	SLIST_INSERT_HEAD(&ma->list, maa, next);
 	bcopy(sbuf_data(sb), maa + 1, len);
 	sbuf_delete(sb);
 
 	ma->v[ma->len].iov_base = maa + 1;
 	ma->v[ma->len].iov_len = len;
 	ma->len++;
 
 	return (ma);
 }
 
 /*
  * Add an argument which is a userland string.
  */
 struct mntarg *
 mount_argsu(struct mntarg *ma, const char *name, const void *val, int len)
 {
 	struct mntaarg *maa;
 	char *tbuf;
 
 	if (val == NULL)
 		return (ma);
 	if (ma == NULL) {
 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 		SLIST_INIT(&ma->list);
 	}
 	if (ma->error)
 		return (ma);
 	maa = malloc(sizeof *maa + len, M_MOUNT, M_WAITOK | M_ZERO);
 	SLIST_INSERT_HEAD(&ma->list, maa, next);
 	tbuf = (void *)(maa + 1);
 	ma->error = copyinstr(val, tbuf, len, NULL);
 	return (mount_arg(ma, name, tbuf, -1));
 }
 
 /*
  * Plain argument.
  *
  * If length is -1, treat value as a C string.
  */
 struct mntarg *
 mount_arg(struct mntarg *ma, const char *name, const void *val, int len)
 {
 
 	if (ma == NULL) {
 		ma = malloc(sizeof *ma, M_MOUNT, M_WAITOK | M_ZERO);
 		SLIST_INIT(&ma->list);
 	}
 	if (ma->error)
 		return (ma);
 
 	ma->v = realloc(ma->v, sizeof *ma->v * (ma->len + 2),
 	    M_MOUNT, M_WAITOK);
 	ma->v[ma->len].iov_base = (void *)(uintptr_t)name;
 	ma->v[ma->len].iov_len = strlen(name) + 1;
 	ma->len++;
 
 	ma->v[ma->len].iov_base = (void *)(uintptr_t)val;
 	if (len < 0)
 		ma->v[ma->len].iov_len = strlen(val) + 1;
 	else
 		ma->v[ma->len].iov_len = len;
 	ma->len++;
 	return (ma);
 }
 
 /*
  * Free a mntarg structure
  */
 static void
 free_mntarg(struct mntarg *ma)
 {
 	struct mntaarg *maa;
 
 	while (!SLIST_EMPTY(&ma->list)) {
 		maa = SLIST_FIRST(&ma->list);
 		SLIST_REMOVE_HEAD(&ma->list, next);
 		free(maa, M_MOUNT);
 	}
 	free(ma->v, M_MOUNT);
 	free(ma, M_MOUNT);
 }
 
 /*
  * Mount a filesystem
  */
 int
 kernel_mount(struct mntarg *ma, uint64_t flags)
 {
 	struct uio auio;
 	int error;
 
 	KASSERT(ma != NULL, ("kernel_mount NULL ma"));
 	KASSERT(ma->v != NULL, ("kernel_mount NULL ma->v"));
 	KASSERT(!(ma->len & 1), ("kernel_mount odd ma->len (%d)", ma->len));
 
 	auio.uio_iov = ma->v;
 	auio.uio_iovcnt = ma->len;
 	auio.uio_segflg = UIO_SYSSPACE;
 
 	error = ma->error;
 	if (!error)
 		error = vfs_donmount(curthread, flags, &auio);
 	free_mntarg(ma);
 	return (error);
 }
 
 /*
  * A printflike function to mount a filesystem.
  */
 int
 kernel_vmount(int flags, ...)
 {
 	struct mntarg *ma = NULL;
 	va_list ap;
 	const char *cp;
 	const void *vp;
 	int error;
 
 	va_start(ap, flags);
 	for (;;) {
 		cp = va_arg(ap, const char *);
 		if (cp == NULL)
 			break;
 		vp = va_arg(ap, const void *);
 		ma = mount_arg(ma, cp, vp, (vp != NULL ? -1 : 0));
 	}
 	va_end(ap);
 
 	error = kernel_mount(ma, flags);
 	return (error);
-}
-
-/*
- * Convert the old export args format into new export args.
- *
- * The old export args struct does not have security flavors.  Otherwise, the
- * structs are identical.  The default security flavor 'sys' is applied when
- * the given args export the filesystem.
- */
-void
-vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp)
-{
-
-	bcopy(oexp, exp, sizeof(*oexp));
-	if (exp->ex_flags & MNT_EXPORTED) {
-		exp->ex_numsecflavors = 1;
-		exp->ex_secflavors[0] = AUTH_SYS;
-	} else {
-		exp->ex_numsecflavors = 0;
-	}
 }
Index: head/sys/nlm/nlm_prot_impl.c
===================================================================
--- head/sys/nlm/nlm_prot_impl.c	(revision 362157)
+++ head/sys/nlm/nlm_prot_impl.c	(revision 362158)
@@ -1,2416 +1,2417 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
  * Authors: Doug Rabson <dfr@rabson.org>
  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet6.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/fail.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syscall.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/sysproto.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 #include <nfs/nfsproto.h>
 #include <nfs/nfs_lock.h>
 
 #include <nlm/nlm_prot.h>
 #include <nlm/sm_inter.h>
 #include <nlm/nlm.h>
 #include <rpc/rpc_com.h>
 #include <rpc/rpcb_prot.h>
 
 MALLOC_DEFINE(M_NLM, "NLM", "Network Lock Manager");
 
 /*
  * If a host is inactive (and holds no locks) for this amount of
  * seconds, we consider it idle and stop tracking it.
  */
 #define NLM_IDLE_TIMEOUT	30
 
 /*
  * We check the host list for idle every few seconds.
  */
 #define NLM_IDLE_PERIOD		5
 
 /*
  * We only look for GRANTED_RES messages for a little while.
  */
 #define NLM_EXPIRE_TIMEOUT	10
 
 /*
  * Support for sysctl vfs.nlm.sysid
  */
 static SYSCTL_NODE(_vfs, OID_AUTO, nlm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     "Network Lock Manager");
 static SYSCTL_NODE(_vfs_nlm, OID_AUTO, sysid,
     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     "");
 
 /*
  * Syscall hooks
  */
 static struct syscall_helper_data nlm_syscalls[] = {
 	SYSCALL_INIT_HELPER(nlm_syscall),
 	SYSCALL_INIT_LAST
 };
 
 /*
  * Debug level passed in from userland. We also support a sysctl hook
  * so that it can be changed on a live system.
  */
 static int nlm_debug_level;
 SYSCTL_INT(_debug, OID_AUTO, nlm_debug, CTLFLAG_RW, &nlm_debug_level, 0, "");
 
 #define NLM_DEBUG(_level, args...)			\
 	do {						\
 		if (nlm_debug_level >= (_level))	\
 			log(LOG_DEBUG, args);		\
 	} while(0)
 #define NLM_ERR(args...)			\
 	do {					\
 		log(LOG_ERR, args);		\
 	} while(0)
 
 /*
  * Grace period handling. The value of nlm_grace_threshold is the
  * value of time_uptime after which we are serving requests normally.
  */
 static time_t nlm_grace_threshold;
 
 /*
  * We check for idle hosts if time_uptime is greater than
  * nlm_next_idle_check,
  */
 static time_t nlm_next_idle_check;
 
 /*
  * A flag to indicate the server is already running.
  */
 static int nlm_is_running;
 
 /*
  * A socket to use for RPC - shared by all IPv4 RPC clients.
  */
 static struct socket *nlm_socket;
 
 #ifdef INET6
 
 /*
  * A socket to use for RPC - shared by all IPv6 RPC clients.
  */
 static struct socket *nlm_socket6;
 
 #endif
 
 /*
  * An RPC client handle that can be used to communicate with the local
  * NSM.
  */
 static CLIENT *nlm_nsm;
 
 /*
  * An AUTH handle for the server's creds.
  */
 static AUTH *nlm_auth;
 
 /*
  * A zero timeval for sending async RPC messages.
  */
 struct timeval nlm_zero_tv = { 0, 0 };
 
 /*
  * The local NSM state number
  */
 int nlm_nsm_state;
 
 
 /*
  * A lock to protect the host list and waiting lock list.
  */
 static struct mtx nlm_global_lock;
 
 /*
  * Locks:
  * (l)		locked by nh_lock
  * (s)		only accessed via server RPC which is single threaded
  * (g)		locked by nlm_global_lock
  * (c)		const until freeing
  * (a)		modified using atomic ops
  */
 
 /*
  * A pending client-side lock request, stored on the nlm_waiting_locks
  * list.
  */
 struct nlm_waiting_lock {
 	TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */
 	bool_t		nw_waiting;	       /* (g) */
 	nlm4_lock	nw_lock;	       /* (c) */
 	union nfsfh	nw_fh;		       /* (c) */
 	struct vnode	*nw_vp;		       /* (c) */
 };
 TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock);
 
 struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */
 
 /*
  * A pending server-side asynchronous lock request, stored on the
  * nh_pending list of the NLM host.
  */
 struct nlm_async_lock {
 	TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */
 	struct task	af_task;	/* (c) async callback details */
 	void		*af_cookie;	/* (l) lock manager cancel token */
 	struct vnode	*af_vp;		/* (l) vnode to lock */
 	struct flock	af_fl;		/* (c) lock details */
 	struct nlm_host *af_host;	/* (c) host which is locking */
 	CLIENT		*af_rpc;	/* (c) rpc client to send message */
 	nlm4_testargs	af_granted;	/* (c) notification details */
 	time_t		af_expiretime;	/* (c) notification time */
 };
 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock);
 
 /*
  * NLM host.
  */
 enum nlm_host_state {
 	NLM_UNMONITORED,
 	NLM_MONITORED,
 	NLM_MONITOR_FAILED,
 	NLM_RECOVERING
 };
 
 struct nlm_rpc {
 	CLIENT		*nr_client;    /* (l) RPC client handle */
 	time_t		nr_create_time; /* (l) when client was created */
 };
 
 struct nlm_host {
 	struct mtx	nh_lock;
 	volatile u_int	nh_refs;       /* (a) reference count */
 	TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */
 	char		nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */
 	uint32_t	nh_sysid;	 /* (c) our allocaed system ID */
 	char		nh_sysid_string[10]; /* (c) string rep. of sysid */
 	struct sockaddr_storage	nh_addr; /* (s) remote address of host */
 	struct nlm_rpc	nh_srvrpc;	 /* (l) RPC for server replies */
 	struct nlm_rpc	nh_clntrpc;	 /* (l) RPC for client requests */
 	rpcvers_t	nh_vers;	 /* (s) NLM version of host */
 	int		nh_state;	 /* (s) last seen NSM state of host */
 	enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */
 	time_t		nh_idle_timeout; /* (s) Time at which host is idle */
 	struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */
 	uint32_t	nh_grantcookie;  /* (l) grant cookie counter */
 	struct nlm_async_lock_list nh_pending; /* (l) pending async locks */
 	struct nlm_async_lock_list nh_granted; /* (l) granted locks */
 	struct nlm_async_lock_list nh_finished; /* (l) finished async locks */
 };
 TAILQ_HEAD(nlm_host_list, nlm_host);
 
 static struct nlm_host_list nlm_hosts; /* (g) */
 static uint32_t nlm_next_sysid = 1;    /* (g) */
 
 static void	nlm_host_unmonitor(struct nlm_host *);
 
 struct nlm_grantcookie {
 	uint32_t	ng_sysid;
 	uint32_t	ng_cookie;
 };
 
 static inline uint32_t
 ng_sysid(struct netobj *src)
 {
 
 	return ((struct nlm_grantcookie *)src->n_bytes)->ng_sysid;
 }
 
 static inline uint32_t
 ng_cookie(struct netobj *src)
 {
 
 	return ((struct nlm_grantcookie *)src->n_bytes)->ng_cookie;
 }
 
 /**********************************************************************/
 
 /*
  * Initialise NLM globals.
  */
 static int
 nlm_init(void)
 {
 	int error;
 
 	mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF);
 	TAILQ_INIT(&nlm_waiting_locks);
 	TAILQ_INIT(&nlm_hosts);
 
 	error = syscall_helper_register(nlm_syscalls, SY_THR_STATIC_KLD);
 	if (error != 0)
 		NLM_ERR("Can't register NLM syscall\n");
 	return (error);
 }
 
 static void
 nlm_uninit(void)
 {
 
 	syscall_helper_unregister(nlm_syscalls);
 }
 
 /*
  * Create a netobj from an arbitrary source.
  */
 void
 nlm_make_netobj(struct netobj *dst, caddr_t src, size_t srcsize,
     struct malloc_type *type)
 {
 
 	dst->n_len = srcsize;
 	dst->n_bytes = malloc(srcsize, type, M_WAITOK);
 	memcpy(dst->n_bytes, src, srcsize);
 }
 
 /*
  * Copy a struct netobj.
  */ 
 void
 nlm_copy_netobj(struct netobj *dst, struct netobj *src,
     struct malloc_type *type)
 {
 
 	nlm_make_netobj(dst, src->n_bytes, src->n_len, type);
 }
 
 
 /*
  * Create an RPC client handle for the given (address,prog,vers)
  * triple using UDP.
  */
 static CLIENT *
 nlm_get_rpc(struct sockaddr *sa, rpcprog_t prog, rpcvers_t vers)
 {
 	char *wchan = "nlmrcv";
 	struct sockaddr_storage ss;
 	struct socket *so;
 	CLIENT *rpcb;
 	struct timeval timo;
 	RPCB parms;
 	char *uaddr;
 	enum clnt_stat stat = RPC_SUCCESS;
 	int rpcvers = RPCBVERS4;
 	bool_t do_tcp = FALSE;
 	bool_t tryagain = FALSE;
 	struct portmap mapping;
 	u_short port = 0;
 
 	/*
 	 * First we need to contact the remote RPCBIND service to find
 	 * the right port.
 	 */
 	memcpy(&ss, sa, sa->sa_len);
 	switch (ss.ss_family) {
 	case AF_INET:
 		((struct sockaddr_in *)&ss)->sin_port = htons(111);
 		so = nlm_socket;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		((struct sockaddr_in6 *)&ss)->sin6_port = htons(111);
 		so = nlm_socket6;
 		break;
 #endif
 
 	default:
 		/*
 		 * Unsupported address family - fail.
 		 */
 		return (NULL);
 	}
 
 	rpcb = clnt_dg_create(so, (struct sockaddr *)&ss,
 	    RPCBPROG, rpcvers, 0, 0);
 	if (!rpcb)
 		return (NULL);
 
 try_tcp:
 	parms.r_prog = prog;
 	parms.r_vers = vers;
 	if (do_tcp)
 		parms.r_netid = "tcp";
 	else
 		parms.r_netid = "udp";
 	parms.r_addr = "";
 	parms.r_owner = "";
 
 	/*
 	 * Use the default timeout.
 	 */
 	timo.tv_sec = 25;
 	timo.tv_usec = 0;
 again:
 	switch (rpcvers) {
 	case RPCBVERS4:
 	case RPCBVERS:
 		/*
 		 * Try RPCBIND 4 then 3.
 		 */
 		uaddr = NULL;
 		stat = CLNT_CALL(rpcb, (rpcprog_t) RPCBPROC_GETADDR,
 		    (xdrproc_t) xdr_rpcb, &parms,
 		    (xdrproc_t) xdr_wrapstring, &uaddr, timo);
 		if (stat == RPC_SUCCESS) {
 			/*
 			 * We have a reply from the remote RPCBIND - turn it
 			 * into an appropriate address and make a new client
 			 * that can talk to the remote NLM.
 			 *
 			 * XXX fixup IPv6 scope ID.
 			 */
 			struct netbuf *a;
 			a = __rpc_uaddr2taddr_af(ss.ss_family, uaddr);
 			if (!a) {
 				tryagain = TRUE;
 			} else {
 				tryagain = FALSE;
 				memcpy(&ss, a->buf, a->len);
 				free(a->buf, M_RPC);
 				free(a, M_RPC);
 				xdr_free((xdrproc_t) xdr_wrapstring, &uaddr);
 			}
 		}
 		if (tryagain || stat == RPC_PROGVERSMISMATCH) {
 			if (rpcvers == RPCBVERS4)
 				rpcvers = RPCBVERS;
 			else if (rpcvers == RPCBVERS)
 				rpcvers = PMAPVERS;
 			CLNT_CONTROL(rpcb, CLSET_VERS, &rpcvers);
 			goto again;
 		}
 		break;
 	case PMAPVERS:
 		/*
 		 * Try portmap.
 		 */
 		mapping.pm_prog = parms.r_prog;
 		mapping.pm_vers = parms.r_vers;
 		mapping.pm_prot = do_tcp ? IPPROTO_TCP : IPPROTO_UDP;
 		mapping.pm_port = 0;
 
 		stat = CLNT_CALL(rpcb, (rpcprog_t) PMAPPROC_GETPORT,
 		    (xdrproc_t) xdr_portmap, &mapping,
 		    (xdrproc_t) xdr_u_short, &port, timo);
 
 		if (stat == RPC_SUCCESS) {
 			switch (ss.ss_family) {
 			case AF_INET:
 				((struct sockaddr_in *)&ss)->sin_port =
 					htons(port);
 				break;
 		
 #ifdef INET6
 			case AF_INET6:
 				((struct sockaddr_in6 *)&ss)->sin6_port =
 					htons(port);
 				break;
 #endif
 			}
 		}
 		break;
 	default:
 		panic("invalid rpcvers %d", rpcvers);
 	}
 	/*
 	 * We may have a positive response from the portmapper, but the NLM
 	 * service was not found. Make sure we received a valid port.
 	 */
 	switch (ss.ss_family) {
 	case AF_INET:
 		port = ((struct sockaddr_in *)&ss)->sin_port;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		port = ((struct sockaddr_in6 *)&ss)->sin6_port;
 		break;
 #endif
 	}
 	if (stat != RPC_SUCCESS || !port) {
 		/*
 		 * If we were able to talk to rpcbind or portmap, but the udp
 		 * variant wasn't available, ask about tcp.
 		 *
 		 * XXX - We could also check for a TCP portmapper, but
 		 * if the host is running a portmapper at all, we should be able
 		 * to hail it over UDP.
 		 */
 		if (stat == RPC_SUCCESS && !do_tcp) {
 			do_tcp = TRUE;
 			goto try_tcp;
 		}
 
 		/* Otherwise, bad news. */
 		NLM_ERR("NLM: failed to contact remote rpcbind, "
 		    "stat = %d, port = %d\n", (int) stat, port);
 		CLNT_DESTROY(rpcb);
 		return (NULL);
 	}
 
 	if (do_tcp) {
 		/*
 		 * Destroy the UDP client we used to speak to rpcbind and
 		 * recreate as a TCP client.
 		 */
 		struct netconfig *nconf = NULL;
 
 		CLNT_DESTROY(rpcb);
 
 		switch (ss.ss_family) {
 		case AF_INET:
 			nconf = getnetconfigent("tcp");
 			break;
 #ifdef INET6
 		case AF_INET6:
 			nconf = getnetconfigent("tcp6");
 			break;
 #endif
 		}
 
 		rpcb = clnt_reconnect_create(nconf, (struct sockaddr *)&ss,
 		    prog, vers, 0, 0);
 		CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
 		rpcb->cl_auth = nlm_auth;
 		
 	} else {
 		/*
 		 * Re-use the client we used to speak to rpcbind.
 		 */
 		CLNT_CONTROL(rpcb, CLSET_SVC_ADDR, &ss);
 		CLNT_CONTROL(rpcb, CLSET_PROG, &prog);
 		CLNT_CONTROL(rpcb, CLSET_VERS, &vers);
 		CLNT_CONTROL(rpcb, CLSET_WAITCHAN, wchan);
 		rpcb->cl_auth = nlm_auth;
 	}
 
 	return (rpcb);
 }
 
 /*
  * This async callback after when an async lock request has been
  * granted. We notify the host which initiated the request.
  */
 static void
 nlm_lock_callback(void *arg, int pending)
 {
 	struct nlm_async_lock *af = (struct nlm_async_lock *) arg;
 	struct rpc_callextra ext;
 
 	NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) granted,"
 	    " cookie %d:%d\n", af, af->af_host->nh_caller_name,
 	    af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie),
 	    ng_cookie(&af->af_granted.cookie));
 
 	/*
 	 * Send the results back to the host.
 	 *
 	 * Note: there is a possible race here with nlm_host_notify
 	 * destroying the RPC client. To avoid problems, the first
 	 * thing nlm_host_notify does is to cancel pending async lock
 	 * requests.
 	 */
 	memset(&ext, 0, sizeof(ext));
 	ext.rc_auth = nlm_auth;
 	if (af->af_host->nh_vers == NLM_VERS4) {
 		nlm4_granted_msg_4(&af->af_granted,
 		    NULL, af->af_rpc, &ext, nlm_zero_tv);
 	} else {
 		/*
 		 * Back-convert to legacy protocol
 		 */
 		nlm_testargs granted;
 		granted.cookie = af->af_granted.cookie;
 		granted.exclusive = af->af_granted.exclusive;
 		granted.alock.caller_name =
 			af->af_granted.alock.caller_name;
 		granted.alock.fh = af->af_granted.alock.fh;
 		granted.alock.oh = af->af_granted.alock.oh;
 		granted.alock.svid = af->af_granted.alock.svid;
 		granted.alock.l_offset =
 			af->af_granted.alock.l_offset;
 		granted.alock.l_len =
 			af->af_granted.alock.l_len;
 
 		nlm_granted_msg_1(&granted,
 		    NULL, af->af_rpc, &ext, nlm_zero_tv);
 	}
 
 	/*
 	 * Move this entry to the nh_granted list.
 	 */
 	af->af_expiretime = time_uptime + NLM_EXPIRE_TIMEOUT;
 	mtx_lock(&af->af_host->nh_lock);
 	TAILQ_REMOVE(&af->af_host->nh_pending, af, af_link);
 	TAILQ_INSERT_TAIL(&af->af_host->nh_granted, af, af_link);
 	mtx_unlock(&af->af_host->nh_lock);
 }
 
 /*
  * Free an async lock request. The request must have been removed from
  * any list.
  */
 static void
 nlm_free_async_lock(struct nlm_async_lock *af)
 {
 	/*
 	 * Free an async lock.
 	 */
 	if (af->af_rpc)
 		CLNT_RELEASE(af->af_rpc);
 	xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted);
 	if (af->af_vp)
 		vrele(af->af_vp);
 	free(af, M_NLM);
 }
 
 /*
  * Cancel our async request - this must be called with
  * af->nh_host->nh_lock held. This is slightly complicated by a
  * potential race with our own callback. If we fail to cancel the
  * lock, it must already have been granted - we make sure our async
  * task has completed by calling taskqueue_drain in this case.
  */
 static int
 nlm_cancel_async_lock(struct nlm_async_lock *af)
 {
 	struct nlm_host *host = af->af_host;
 	int error;
 
 	mtx_assert(&host->nh_lock, MA_OWNED);
 
 	mtx_unlock(&host->nh_lock);
 
 	error = VOP_ADVLOCKASYNC(af->af_vp, NULL, F_CANCEL, &af->af_fl,
 	    F_REMOTE, NULL, &af->af_cookie);
 
 	if (error) {
 		/*
 		 * We failed to cancel - make sure our callback has
 		 * completed before we continue.
 		 */
 		taskqueue_drain(taskqueue_thread, &af->af_task);
 	}
 
 	mtx_lock(&host->nh_lock);
 	
 	if (!error) {
 		NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) "
 		    "cancelled\n", af, host->nh_caller_name, host->nh_sysid);
 
 		/*
 		 * Remove from the nh_pending list and free now that
 		 * we are safe from the callback.
 		 */
 		TAILQ_REMOVE(&host->nh_pending, af, af_link);
 		mtx_unlock(&host->nh_lock);
 		nlm_free_async_lock(af);
 		mtx_lock(&host->nh_lock);
 	}
 
 	return (error);
 }
 
 static void
 nlm_check_expired_locks(struct nlm_host *host)
 {
 	struct nlm_async_lock *af;
 	time_t uptime = time_uptime;
 
 	mtx_lock(&host->nh_lock);
 	while ((af = TAILQ_FIRST(&host->nh_granted)) != NULL
 	    && uptime >= af->af_expiretime) {
 		NLM_DEBUG(2, "NLM: async lock %p for %s (sysid %d) expired,"
 		    " cookie %d:%d\n", af, af->af_host->nh_caller_name,
 		    af->af_host->nh_sysid, ng_sysid(&af->af_granted.cookie),
 		    ng_cookie(&af->af_granted.cookie));
 		TAILQ_REMOVE(&host->nh_granted, af, af_link);
 		mtx_unlock(&host->nh_lock);
 		nlm_free_async_lock(af);
 		mtx_lock(&host->nh_lock);
 	}
 	while ((af = TAILQ_FIRST(&host->nh_finished)) != NULL) {
 		TAILQ_REMOVE(&host->nh_finished, af, af_link);
 		mtx_unlock(&host->nh_lock);
 		nlm_free_async_lock(af);
 		mtx_lock(&host->nh_lock);
 	}
 	mtx_unlock(&host->nh_lock);
 }
 
 /*
  * Free resources used by a host. This is called after the reference
  * count has reached zero so it doesn't need to worry about locks.
  */
 static void
 nlm_host_destroy(struct nlm_host *host)
 {
 
 	mtx_lock(&nlm_global_lock);
 	TAILQ_REMOVE(&nlm_hosts, host, nh_link);
 	mtx_unlock(&nlm_global_lock);
 
 	if (host->nh_srvrpc.nr_client)
 		CLNT_RELEASE(host->nh_srvrpc.nr_client);
 	if (host->nh_clntrpc.nr_client)
 		CLNT_RELEASE(host->nh_clntrpc.nr_client);
 	mtx_destroy(&host->nh_lock);
 	sysctl_ctx_free(&host->nh_sysctl);
 	free(host, M_NLM);
 }
 
 /*
  * Thread start callback for client lock recovery
  */
 static void
 nlm_client_recovery_start(void *arg)
 {
 	struct nlm_host *host = (struct nlm_host *) arg;
 
 	NLM_DEBUG(1, "NLM: client lock recovery for %s started\n",
 	    host->nh_caller_name);
 
 	nlm_client_recovery(host);
 
 	NLM_DEBUG(1, "NLM: client lock recovery for %s completed\n",
 	    host->nh_caller_name);
 
 	host->nh_monstate = NLM_MONITORED;
 	nlm_host_release(host);
 
 	kthread_exit();
 }
 
 /*
  * This is called when we receive a host state change notification. We
  * unlock any active locks owned by the host. When rpc.lockd is
  * shutting down, this function is called with newstate set to zero
  * which allows us to cancel any pending async locks and clear the
  * locking state.
  */
 static void
 nlm_host_notify(struct nlm_host *host, int newstate)
 {
 	struct nlm_async_lock *af;
 
 	if (newstate) {
 		NLM_DEBUG(1, "NLM: host %s (sysid %d) rebooted, new "
 		    "state is %d\n", host->nh_caller_name,
 		    host->nh_sysid, newstate);
 	}
 
 	/*
 	 * Cancel any pending async locks for this host.
 	 */
 	mtx_lock(&host->nh_lock);
 	while ((af = TAILQ_FIRST(&host->nh_pending)) != NULL) {
 		/*
 		 * nlm_cancel_async_lock will remove the entry from
 		 * nh_pending and free it.
 		 */
 		nlm_cancel_async_lock(af);
 	}
 	mtx_unlock(&host->nh_lock);
 	nlm_check_expired_locks(host);
 
 	/*
 	 * The host just rebooted - trash its locks.
 	 */
 	lf_clearremotesys(host->nh_sysid);
 	host->nh_state = newstate;
 
 	/*
 	 * If we have any remote locks for this host (i.e. it
 	 * represents a remote NFS server that our local NFS client
 	 * has locks for), start a recovery thread.
 	 */
 	if (newstate != 0
 	    && host->nh_monstate != NLM_RECOVERING
 	    && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) {
 		struct thread *td;
 		host->nh_monstate = NLM_RECOVERING;
 		refcount_acquire(&host->nh_refs);
 		kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0,
 		    "NFS lock recovery for %s", host->nh_caller_name);
 	}
 }
 
 /*
  * Sysctl handler to count the number of locks for a sysid.
  */
 static int
 nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct nlm_host *host;
 	int count;
 
 	host = oidp->oid_arg1;
 	count = lf_countlocks(host->nh_sysid);
 	return sysctl_handle_int(oidp, &count, 0, req);
 }
 
 /*
  * Sysctl handler to count the number of client locks for a sysid.
  */
 static int
 nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct nlm_host *host;
 	int count;
 
 	host = oidp->oid_arg1;
 	count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid);
 	return sysctl_handle_int(oidp, &count, 0, req);
 }
 
 /*
  * Create a new NLM host.
  */
 static struct nlm_host *
 nlm_create_host(const char* caller_name)
 {
 	struct nlm_host *host;
 	struct sysctl_oid *oid;
 
 	mtx_assert(&nlm_global_lock, MA_OWNED);
 
 	NLM_DEBUG(1, "NLM: new host %s (sysid %d)\n",
 	    caller_name, nlm_next_sysid);
 	host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO);
 	if (!host)
 		return (NULL);
 	mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF);
 	host->nh_refs = 1;
 	strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN);
 	host->nh_sysid = nlm_next_sysid++;
 	snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string),
 		"%d", host->nh_sysid);
 	host->nh_vers = 0;
 	host->nh_state = 0;
 	host->nh_monstate = NLM_UNMONITORED;
 	host->nh_grantcookie = 1;
 	TAILQ_INIT(&host->nh_pending);
 	TAILQ_INIT(&host->nh_granted);
 	TAILQ_INIT(&host->nh_finished);
 	TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link);
 
 	mtx_unlock(&nlm_global_lock);
 
 	sysctl_ctx_init(&host->nh_sysctl);
 	oid = SYSCTL_ADD_NODE(&host->nh_sysctl,
 	    SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid),
 	    OID_AUTO, host->nh_sysid_string, CTLFLAG_RD | CTLFLAG_MPSAFE,
 	    NULL, "");
 	SYSCTL_ADD_STRING(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "hostname", CTLFLAG_RD, host->nh_caller_name, 0, "");
 	SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "version", CTLFLAG_RD, &host->nh_vers, 0, "");
 	SYSCTL_ADD_UINT(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "monitored", CTLFLAG_RD, &host->nh_monstate, 0, "");
 	SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, host,
 	    0, nlm_host_lock_count_sysctl, "I", "");
 	SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "client_lock_count", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	    host, 0, nlm_host_client_lock_count_sysctl, "I", "");
 
 	mtx_lock(&nlm_global_lock);
 
 	return (host);
 }
 
 /*
  * Acquire the next sysid for remote locks not handled by the NLM.
  */
 uint32_t
 nlm_acquire_next_sysid(void)
 {
 	uint32_t next_sysid;
 
 	mtx_lock(&nlm_global_lock);
 	next_sysid = nlm_next_sysid++;
 	mtx_unlock(&nlm_global_lock);
 	return (next_sysid);
 }
 
 /*
  * Return non-zero if the address parts of the two sockaddrs are the
  * same.
  */
 static int
 nlm_compare_addr(const struct sockaddr *a, const struct sockaddr *b)
 {
 	const struct sockaddr_in *a4, *b4;
 #ifdef INET6
 	const struct sockaddr_in6 *a6, *b6;
 #endif
 
 	if (a->sa_family != b->sa_family)
 		return (FALSE);
 
 	switch (a->sa_family) {
 	case AF_INET:
 		a4 = (const struct sockaddr_in *) a;
 		b4 = (const struct sockaddr_in *) b;
 		return !memcmp(&a4->sin_addr, &b4->sin_addr,
 		    sizeof(a4->sin_addr));
 #ifdef INET6
 	case AF_INET6:
 		a6 = (const struct sockaddr_in6 *) a;
 		b6 = (const struct sockaddr_in6 *) b;
 		return !memcmp(&a6->sin6_addr, &b6->sin6_addr,
 		    sizeof(a6->sin6_addr));
 #endif
 	}
 
 	return (0);
 }
 
 /*
  * Check for idle hosts and stop monitoring them. We could also free
  * the host structure here, possibly after a larger timeout but that
  * would require some care to avoid races with
  * e.g. nlm_host_lock_count_sysctl.
  */
 static void
 nlm_check_idle(void)
 {
 	struct nlm_host *host;
 
 	mtx_assert(&nlm_global_lock, MA_OWNED);
 
 	if (time_uptime <= nlm_next_idle_check)
 		return;
 
 	nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
 
 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
 		if (host->nh_monstate == NLM_MONITORED
 		    && time_uptime > host->nh_idle_timeout) {
 			mtx_unlock(&nlm_global_lock);
 			if (lf_countlocks(host->nh_sysid) > 0
 			    || lf_countlocks(NLM_SYSID_CLIENT
 				+ host->nh_sysid)) {
 				host->nh_idle_timeout =
 					time_uptime + NLM_IDLE_TIMEOUT;
 				mtx_lock(&nlm_global_lock);
 				continue;
 			}
 			nlm_host_unmonitor(host);
 			mtx_lock(&nlm_global_lock);
 		} 
 	}
 }
 
 /*
  * Search for an existing NLM host that matches the given name
  * (typically the caller_name element of an nlm4_lock).  If none is
  * found, create a new host. If 'addr' is non-NULL, record the remote
  * address of the host so that we can call it back for async
  * responses. If 'vers' is greater than zero then record the NLM
  * program version to use to communicate with this client.
  */
 struct nlm_host *
 nlm_find_host_by_name(const char *name, const struct sockaddr *addr,
     rpcvers_t vers)
 {
 	struct nlm_host *host;
 
 	mtx_lock(&nlm_global_lock);
 
 	/*
 	 * The remote host is determined by caller_name.
 	 */
 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
 		if (!strcmp(host->nh_caller_name, name))
 			break;
 	}
 
 	if (!host) {
 		host = nlm_create_host(name);
 		if (!host) {
 			mtx_unlock(&nlm_global_lock);
 			return (NULL);
 		}
 	}
 	refcount_acquire(&host->nh_refs);
 
 	host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
 
 	/*
 	 * If we have an address for the host, record it so that we
 	 * can send async replies etc.
 	 */
 	if (addr) {
 		
 		KASSERT(addr->sa_len < sizeof(struct sockaddr_storage),
 		    ("Strange remote transport address length"));
 
 		/*
 		 * If we have seen an address before and we currently
 		 * have an RPC client handle, make sure the address is
 		 * the same, otherwise discard the client handle.
 		 */
 		if (host->nh_addr.ss_len && host->nh_srvrpc.nr_client) {
 			if (!nlm_compare_addr(
 				    (struct sockaddr *) &host->nh_addr,
 				    addr)
 			    || host->nh_vers != vers) {
 				CLIENT *client;
 				mtx_lock(&host->nh_lock);
 				client = host->nh_srvrpc.nr_client;
 				host->nh_srvrpc.nr_client = NULL;
 				mtx_unlock(&host->nh_lock);
 				if (client) {
 					CLNT_RELEASE(client);
 				}
 			}
 		}
 		memcpy(&host->nh_addr, addr, addr->sa_len);
 		host->nh_vers = vers;
 	}
 
 	nlm_check_idle();
 
 	mtx_unlock(&nlm_global_lock);
 
 	return (host);
 }
 
 /*
  * Search for an existing NLM host that matches the given remote
  * address. If none is found, create a new host with the requested
  * address and remember 'vers' as the NLM protocol version to use for
  * that host.
  */
 struct nlm_host *
 nlm_find_host_by_addr(const struct sockaddr *addr, int vers)
 {
 	/*
 	 * Fake up a name using inet_ntop. This buffer is
 	 * large enough for an IPv6 address.
 	 */
 	char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
 	struct nlm_host *host;
 
 	switch (addr->sa_family) {
 	case AF_INET:
 		inet_ntop(AF_INET,
 		    &((const struct sockaddr_in *) addr)->sin_addr,
 		    tmp, sizeof tmp);
 		break;
 #ifdef INET6
 	case AF_INET6:
 		inet_ntop(AF_INET6,
 		    &((const struct sockaddr_in6 *) addr)->sin6_addr,
 		    tmp, sizeof tmp);
 		break;
 #endif
 	default:
 		strlcpy(tmp, "<unknown>", sizeof(tmp));
 	}
 
 
 	mtx_lock(&nlm_global_lock);
 
 	/*
 	 * The remote host is determined by caller_name.
 	 */
 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
 		if (nlm_compare_addr(addr,
 			(const struct sockaddr *) &host->nh_addr))
 			break;
 	}
 
 	if (!host) {
 		host = nlm_create_host(tmp);
 		if (!host) {
 			mtx_unlock(&nlm_global_lock);
 			return (NULL);
 		}
 		memcpy(&host->nh_addr, addr, addr->sa_len);
 		host->nh_vers = vers;
 	}
 	refcount_acquire(&host->nh_refs);
 
 	host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
 
 	nlm_check_idle();
 
 	mtx_unlock(&nlm_global_lock);
 
 	return (host);
 }
 
 /*
  * Find the NLM host that matches the value of 'sysid'. If none
  * exists, return NULL.
  */
 static struct nlm_host *
 nlm_find_host_by_sysid(int sysid)
 {
 	struct nlm_host *host;
 
 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
 		if (host->nh_sysid == sysid) {
 			refcount_acquire(&host->nh_refs);
 			return (host);
 		}
 	}
 
 	return (NULL);
 }
 
 void nlm_host_release(struct nlm_host *host)
 {
 	if (refcount_release(&host->nh_refs)) {
 		/*
 		 * Free the host
 		 */
 		nlm_host_destroy(host);
 	}
 }
 
 /*
  * Unregister this NLM host with the local NSM due to idleness.
  */
 static void
 nlm_host_unmonitor(struct nlm_host *host)
 {
 	mon_id smmonid;
 	sm_stat_res smstat;
 	struct timeval timo;
 	enum clnt_stat stat;
 
 	NLM_DEBUG(1, "NLM: unmonitoring %s (sysid %d)\n",
 	    host->nh_caller_name, host->nh_sysid);
 
 	/*
 	 * We put our assigned system ID value in the priv field to
 	 * make it simpler to find the host if we are notified of a
 	 * host restart.
 	 */
 	smmonid.mon_name = host->nh_caller_name;
 	smmonid.my_id.my_name = "localhost";
 	smmonid.my_id.my_prog = NLM_PROG;
 	smmonid.my_id.my_vers = NLM_SM;
 	smmonid.my_id.my_proc = NLM_SM_NOTIFY;
 
 	timo.tv_sec = 25;
 	timo.tv_usec = 0;
 	stat = CLNT_CALL(nlm_nsm, SM_UNMON,
 	    (xdrproc_t) xdr_mon, &smmonid,
 	    (xdrproc_t) xdr_sm_stat, &smstat, timo);
 
 	if (stat != RPC_SUCCESS) {
 		NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat);
 		return;
 	}
 	if (smstat.res_stat == stat_fail) {
 		NLM_ERR("Local NSM refuses to unmonitor %s\n",
 		    host->nh_caller_name);
 		return;
 	}
 
 	host->nh_monstate = NLM_UNMONITORED;
 }
 
 /*
  * Register this NLM host with the local NSM so that we can be
  * notified if it reboots.
  */
 void
 nlm_host_monitor(struct nlm_host *host, int state)
 {
 	mon smmon;
 	sm_stat_res smstat;
 	struct timeval timo;
 	enum clnt_stat stat;
 
 	if (state && !host->nh_state) {
 		/*
 		 * This is the first time we have seen an NSM state
 		 * value for this host. We record it here to help
 		 * detect host reboots.
 		 */
 		host->nh_state = state;
 		NLM_DEBUG(1, "NLM: host %s (sysid %d) has NSM state %d\n",
 		    host->nh_caller_name, host->nh_sysid, state);
 	}
 
 	mtx_lock(&host->nh_lock);
 	if (host->nh_monstate != NLM_UNMONITORED) {
 		mtx_unlock(&host->nh_lock);
 		return;
 	}
 	host->nh_monstate = NLM_MONITORED;
 	mtx_unlock(&host->nh_lock);
 
 	NLM_DEBUG(1, "NLM: monitoring %s (sysid %d)\n",
 	    host->nh_caller_name, host->nh_sysid);
 
 	/*
 	 * We put our assigned system ID value in the priv field to
 	 * make it simpler to find the host if we are notified of a
 	 * host restart.
 	 */
 	smmon.mon_id.mon_name = host->nh_caller_name;
 	smmon.mon_id.my_id.my_name = "localhost";
 	smmon.mon_id.my_id.my_prog = NLM_PROG;
 	smmon.mon_id.my_id.my_vers = NLM_SM;
 	smmon.mon_id.my_id.my_proc = NLM_SM_NOTIFY;
 	memcpy(smmon.priv, &host->nh_sysid, sizeof(host->nh_sysid));
 
 	timo.tv_sec = 25;
 	timo.tv_usec = 0;
 	stat = CLNT_CALL(nlm_nsm, SM_MON,
 	    (xdrproc_t) xdr_mon, &smmon,
 	    (xdrproc_t) xdr_sm_stat, &smstat, timo);
 
 	if (stat != RPC_SUCCESS) {
 		NLM_ERR("Failed to contact local NSM - rpc error %d\n", stat);
 		return;
 	}
 	if (smstat.res_stat == stat_fail) {
 		NLM_ERR("Local NSM refuses to monitor %s\n",
 		    host->nh_caller_name);
 		mtx_lock(&host->nh_lock);
 		host->nh_monstate = NLM_MONITOR_FAILED;
 		mtx_unlock(&host->nh_lock);
 		return;
 	}
 
 	host->nh_monstate = NLM_MONITORED;
 }
 
 /*
  * Return an RPC client handle that can be used to talk to the NLM
  * running on the given host.
  */
 CLIENT *
 nlm_host_get_rpc(struct nlm_host *host, bool_t isserver)
 {
 	struct nlm_rpc *rpc;
 	CLIENT *client;
 
 	mtx_lock(&host->nh_lock);
 
 	if (isserver)
 		rpc = &host->nh_srvrpc;
 	else
 		rpc = &host->nh_clntrpc;
 
 	/*
 	 * We can't hold onto RPC handles for too long - the async
 	 * call/reply protocol used by some NLM clients makes it hard
 	 * to tell when they change port numbers (e.g. after a
 	 * reboot). Note that if a client reboots while it isn't
 	 * holding any locks, it won't bother to notify us. We
 	 * expire the RPC handles after two minutes.
 	 */
 	if (rpc->nr_client && time_uptime > rpc->nr_create_time + 2*60) {
 		client = rpc->nr_client;
 		rpc->nr_client = NULL;
 		mtx_unlock(&host->nh_lock);
 		CLNT_RELEASE(client);
 		mtx_lock(&host->nh_lock);
 	}
 
 	if (!rpc->nr_client) {
 		mtx_unlock(&host->nh_lock);
 		client = nlm_get_rpc((struct sockaddr *)&host->nh_addr,
 		    NLM_PROG, host->nh_vers);
 		mtx_lock(&host->nh_lock);
 
 		if (client) {
 			if (rpc->nr_client) {
 				mtx_unlock(&host->nh_lock);
 				CLNT_DESTROY(client);
 				mtx_lock(&host->nh_lock);
 			} else {
 				rpc->nr_client = client;
 				rpc->nr_create_time = time_uptime;
 			}
 		}
 	}
 
 	client = rpc->nr_client;
 	if (client)
 		CLNT_ACQUIRE(client);
 	mtx_unlock(&host->nh_lock);
 
 	return (client);
 
 }
 
 int nlm_host_get_sysid(struct nlm_host *host)
 {
 
 	return (host->nh_sysid);
 }
 
 int
 nlm_host_get_state(struct nlm_host *host)
 {
 
 	return (host->nh_state);
 }
 
 void *
 nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp)
 {
 	struct nlm_waiting_lock *nw;
 
 	nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK);
 	nw->nw_lock = *lock;
 	memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes,
 	    nw->nw_lock.fh.n_len);
 	nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes;
 	nw->nw_waiting = TRUE;
 	nw->nw_vp = vp;
 	mtx_lock(&nlm_global_lock);
 	TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link);
 	mtx_unlock(&nlm_global_lock);
 
 	return nw;
 }
 
 void
 nlm_deregister_wait_lock(void *handle)
 {
 	struct nlm_waiting_lock *nw = handle;
 
 	mtx_lock(&nlm_global_lock);
 	TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
 	mtx_unlock(&nlm_global_lock);
 	
 	free(nw, M_NLM);
 }
 
 int
 nlm_wait_lock(void *handle, int timo)
 {
 	struct nlm_waiting_lock *nw = handle;
 	int error, stops_deferred;
 
 	/*
 	 * If the granted message arrived before we got here,
 	 * nw->nw_waiting will be FALSE - in that case, don't sleep.
 	 */
 	mtx_lock(&nlm_global_lock);
 	error = 0;
 	if (nw->nw_waiting) {
 		stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART);
 		error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo);
 		sigallowstop(stops_deferred);
 	}
 	TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
 	if (error) {
 		/*
 		 * The granted message may arrive after the
 		 * interrupt/timeout but before we manage to lock the
 		 * mutex. Detect this by examining nw_lock.
 		 */
 		if (!nw->nw_waiting)
 			error = 0;
 	} else {
 		/*
 		 * If nlm_cancel_wait is called, then error will be
 		 * zero but nw_waiting will still be TRUE. We
 		 * translate this into EINTR.
 		 */
 		if (nw->nw_waiting)
 			error = EINTR;
 	}
 	mtx_unlock(&nlm_global_lock);
 
 	free(nw, M_NLM);
 
 	return (error);
 }
 
 void
 nlm_cancel_wait(struct vnode *vp)
 {
 	struct nlm_waiting_lock *nw;
 
 	mtx_lock(&nlm_global_lock);
 	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
 		if (nw->nw_vp == vp) {
 			wakeup(nw);
 		}
 	}
 	mtx_unlock(&nlm_global_lock);
 }
 
 
 /**********************************************************************/
 
 /*
  * Syscall interface with userland.
  */
 
 extern void nlm_prog_0(struct svc_req *rqstp, SVCXPRT *transp);
 extern void nlm_prog_1(struct svc_req *rqstp, SVCXPRT *transp);
 extern void nlm_prog_3(struct svc_req *rqstp, SVCXPRT *transp);
 extern void nlm_prog_4(struct svc_req *rqstp, SVCXPRT *transp);
 
 static int
 nlm_register_services(SVCPOOL *pool, int addr_count, char **addrs)
 {
 	static rpcvers_t versions[] = {
 		NLM_SM, NLM_VERS, NLM_VERSX, NLM_VERS4
 	};
 	static void (*dispatchers[])(struct svc_req *, SVCXPRT *) = {
 		nlm_prog_0, nlm_prog_1, nlm_prog_3, nlm_prog_4
 	};
 
 	SVCXPRT **xprts;
 	char netid[16];
 	char uaddr[128];
 	struct netconfig *nconf;
 	int i, j, error;
 
 	if (!addr_count) {
 		NLM_ERR("NLM: no service addresses given - can't start server");
 		return (EINVAL);
 	}
 
 	if (addr_count < 0 || addr_count > 256 ) {
 		NLM_ERR("NLM:  too many service addresses (%d) given, "
 		    "max 256 - can't start server\n", addr_count);
 		return (EINVAL);
 	}
 
 	xprts = malloc(addr_count * sizeof(SVCXPRT *), M_NLM, M_WAITOK|M_ZERO);
 	for (i = 0; i < nitems(versions); i++) {
 		for (j = 0; j < addr_count; j++) {
 			/*
 			 * Create transports for the first version and
 			 * then just register everything else to the
 			 * same transports.
 			 */
 			if (i == 0) {
 				char *up;
 
 				error = copyin(&addrs[2*j], &up,
 				    sizeof(char*));
 				if (error)
 					goto out;
 				error = copyinstr(up, netid, sizeof(netid),
 				    NULL);
 				if (error)
 					goto out;
 				error = copyin(&addrs[2*j+1], &up,
 				    sizeof(char*));
 				if (error)
 					goto out;
 				error = copyinstr(up, uaddr, sizeof(uaddr),
 				    NULL);
 				if (error)
 					goto out;
 				nconf = getnetconfigent(netid);
 				if (!nconf) {
 					NLM_ERR("Can't lookup netid %s\n",
 					    netid);
 					error = EINVAL;
 					goto out;
 				}
 				xprts[j] = svc_tp_create(pool, dispatchers[i],
 				    NLM_PROG, versions[i], uaddr, nconf);
 				if (!xprts[j]) {
 					NLM_ERR("NLM: unable to create "
 					    "(NLM_PROG, %d).\n", versions[i]);
 					error = EINVAL;
 					goto out;
 				}
 				freenetconfigent(nconf);
 			} else {
 				nconf = getnetconfigent(xprts[j]->xp_netid);
 				rpcb_unset(NLM_PROG, versions[i], nconf);
 				if (!svc_reg(xprts[j], NLM_PROG, versions[i],
 					dispatchers[i], nconf)) {
 					NLM_ERR("NLM: can't register "
 					    "(NLM_PROG, %d)\n", versions[i]);
 					error = EINVAL;
 					goto out;
 				}
 			}
 		}
 	}
 	error = 0;
 out:
 	for (j = 0; j < addr_count; j++) {
 		if (xprts[j])
 			SVC_RELEASE(xprts[j]);
 	}
 	free(xprts, M_NLM);
 	return (error);
 }
 
 /*
  * Main server entry point. Contacts the local NSM to get its current
  * state and send SM_UNMON_ALL. Registers the NLM services and then
  * services requests. Does not return until the server is interrupted
  * by a signal.
  */
 static int
 nlm_server_main(int addr_count, char **addrs)
 {
 	struct thread *td = curthread;
 	int error;
 	SVCPOOL *pool = NULL;
 	struct sockopt opt;
 	int portlow;
 #ifdef INET6
 	struct sockaddr_in6 sin6;
 #endif
 	struct sockaddr_in sin;
 	my_id id;
 	sm_stat smstat;
 	struct timeval timo;
 	enum clnt_stat stat;
 	struct nlm_host *host, *nhost;
 	struct nlm_waiting_lock *nw;
 	vop_advlock_t *old_nfs_advlock;
 	vop_reclaim_t *old_nfs_reclaim;
 
 	if (nlm_is_running != 0) {
 		NLM_ERR("NLM: can't start server - "
 		    "it appears to be running already\n");
 		return (EPERM);
 	}
 
 	if (nlm_socket == NULL) {
 		memset(&opt, 0, sizeof(opt));
 
 		error = socreate(AF_INET, &nlm_socket, SOCK_DGRAM, 0,
 		    td->td_ucred, td);
 		if (error) {
 			NLM_ERR("NLM: can't create IPv4 socket - error %d\n",
 			    error);
 			return (error);
 		}
 		opt.sopt_dir = SOPT_SET;
 		opt.sopt_level = IPPROTO_IP;
 		opt.sopt_name = IP_PORTRANGE;
 		portlow = IP_PORTRANGE_LOW;
 		opt.sopt_val = &portlow;
 		opt.sopt_valsize = sizeof(portlow);
 		sosetopt(nlm_socket, &opt);
 
 #ifdef INET6
 		nlm_socket6 = NULL;
 		error = socreate(AF_INET6, &nlm_socket6, SOCK_DGRAM, 0,
 		    td->td_ucred, td);
 		if (error) {
 			NLM_ERR("NLM: can't create IPv6 socket - error %d\n",
 			    error);
 			soclose(nlm_socket);
 			nlm_socket = NULL;
 			return (error);
 		}
 		opt.sopt_dir = SOPT_SET;
 		opt.sopt_level = IPPROTO_IPV6;
 		opt.sopt_name = IPV6_PORTRANGE;
 		portlow = IPV6_PORTRANGE_LOW;
 		opt.sopt_val = &portlow;
 		opt.sopt_valsize = sizeof(portlow);
 		sosetopt(nlm_socket6, &opt);
 #endif
 	}
 
 	nlm_auth = authunix_create(curthread->td_ucred);
 
 #ifdef INET6
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_len = sizeof(sin6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = in6addr_loopback;
 	nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin6, SM_PROG, SM_VERS);
 	if (!nlm_nsm) {
 #endif
 		memset(&sin, 0, sizeof(sin));
 		sin.sin_len = sizeof(sin);
 		sin.sin_family = AF_INET;
 		sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 		nlm_nsm = nlm_get_rpc((struct sockaddr *) &sin, SM_PROG,
 		    SM_VERS);
 #ifdef INET6
 	}
 #endif
 
 	if (!nlm_nsm) {
 		NLM_ERR("Can't start NLM - unable to contact NSM\n");
 		error = EINVAL;
 		goto out;
 	}
 
 	pool = svcpool_create("NLM", NULL);
 
 	error = nlm_register_services(pool, addr_count, addrs);
 	if (error)
 		goto out;
 
 	memset(&id, 0, sizeof(id));
 	id.my_name = "NFS NLM";
 
 	timo.tv_sec = 25;
 	timo.tv_usec = 0;
 	stat = CLNT_CALL(nlm_nsm, SM_UNMON_ALL,
 	    (xdrproc_t) xdr_my_id, &id,
 	    (xdrproc_t) xdr_sm_stat, &smstat, timo);
 
 	if (stat != RPC_SUCCESS) {
 		struct rpc_err err;
 
 		CLNT_GETERR(nlm_nsm, &err);
 		NLM_ERR("NLM: unexpected error contacting NSM, "
 		    "stat=%d, errno=%d\n", stat, err.re_errno);
 		error = EINVAL;
 		goto out;
 	}
 	nlm_is_running = 1;
 
 	NLM_DEBUG(1, "NLM: local NSM state is %d\n", smstat.state);
 	nlm_nsm_state = smstat.state;
 
 	old_nfs_advlock = nfs_advlock_p;
 	nfs_advlock_p = nlm_advlock;
 	old_nfs_reclaim = nfs_reclaim_p;
 	nfs_reclaim_p = nlm_reclaim;
 
 	svc_run(pool);
 	error = 0;
 
 	nfs_advlock_p = old_nfs_advlock;
 	nfs_reclaim_p = old_nfs_reclaim;
 
 out:
 	nlm_is_running = 0;
 	if (pool)
 		svcpool_destroy(pool);
 
 	/*
 	 * We are finished communicating with the NSM.
 	 */
 	if (nlm_nsm) {
 		CLNT_RELEASE(nlm_nsm);
 		nlm_nsm = NULL;
 	}
 
 	/*
 	 * Trash all the existing state so that if the server
 	 * restarts, it gets a clean slate. This is complicated by the
 	 * possibility that there may be other threads trying to make
 	 * client locking requests.
 	 *
 	 * First we fake a client reboot notification which will
 	 * cancel any pending async locks and purge remote lock state
 	 * from the local lock manager. We release the reference from
 	 * nlm_hosts to the host (which may remove it from the list
 	 * and free it). After this phase, the only entries in the
 	 * nlm_host list should be from other threads performing
 	 * client lock requests.
 	 */
 	mtx_lock(&nlm_global_lock);
 	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
 		wakeup(nw);
 	}
 	TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) {
 		mtx_unlock(&nlm_global_lock);
 		nlm_host_notify(host, 0);
 		nlm_host_release(host);
 		mtx_lock(&nlm_global_lock);
 	}
 	mtx_unlock(&nlm_global_lock);
 
 	AUTH_DESTROY(nlm_auth);
 
 	return (error);
 }
 
 int
 sys_nlm_syscall(struct thread *td, struct nlm_syscall_args *uap)
 {
 	int error;
 
 	error = priv_check(td, PRIV_NFS_LOCKD);
 	if (error)
 		return (error);
 
 	nlm_debug_level = uap->debug_level;
 	nlm_grace_threshold = time_uptime + uap->grace_period;
 	nlm_next_idle_check = time_uptime + NLM_IDLE_PERIOD;
 
 	return nlm_server_main(uap->addr_count, uap->addrs);
 }
 
 /**********************************************************************/
 
 /*
  * NLM implementation details, called from the RPC stubs.
  */
 
 
 void
 nlm_sm_notify(struct nlm_sm_status *argp)
 {
 	uint32_t sysid;
 	struct nlm_host *host;
 
 	NLM_DEBUG(3, "nlm_sm_notify(): mon_name = %s\n", argp->mon_name);
 	memcpy(&sysid, &argp->priv, sizeof(sysid));
 	host = nlm_find_host_by_sysid(sysid);
 	if (host) {
 		nlm_host_notify(host, argp->state);
 		nlm_host_release(host);
 	}
 }
 
 static void
 nlm_convert_to_fhandle_t(fhandle_t *fhp, struct netobj *p)
 {
 	memcpy(fhp, p->n_bytes, sizeof(fhandle_t));
 }
 
 struct vfs_state {
 	struct mount	*vs_mp;
 	struct vnode	*vs_vp;
 	int		vs_vnlocked;
 };
 
 static int
 nlm_get_vfs_state(struct nlm_host *host, struct svc_req *rqstp,
     fhandle_t *fhp, struct vfs_state *vs, accmode_t accmode)
 {
-	int error, exflags;
+	int error;
+	uint64_t exflags;
 	struct ucred *cred = NULL, *credanon = NULL;
 	
 	memset(vs, 0, sizeof(*vs));
 
 	vs->vs_mp = vfs_getvfs(&fhp->fh_fsid);
 	if (!vs->vs_mp) {
 		return (ESTALE);
 	}
 
 	/* accmode == 0 means don't check, since it is an unlock. */
 	if (accmode != 0) {
 		error = VFS_CHECKEXP(vs->vs_mp,
 		    (struct sockaddr *)&host->nh_addr, &exflags, &credanon,
 		    NULL, NULL);
 		if (error)
 			goto out;
 
 		if (exflags & MNT_EXRDONLY ||
 		    (vs->vs_mp->mnt_flag & MNT_RDONLY)) {
 			error = EROFS;
 			goto out;
 		}
 	}
 
 	error = VFS_FHTOVP(vs->vs_mp, &fhp->fh_fid, LK_EXCLUSIVE, &vs->vs_vp);
 	if (error)
 		goto out;
 	vs->vs_vnlocked = TRUE;
 
 	if (accmode != 0) {
 		if (!svc_getcred(rqstp, &cred, NULL)) {
 			error = EINVAL;
 			goto out;
 		}
 		if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
 			crfree(cred);
 			cred = credanon;
 			credanon = NULL;
 		}
 
 		/*
 		 * Check cred.
 		 */
 		error = VOP_ACCESS(vs->vs_vp, accmode, cred, curthread);
 		/*
 		 * If this failed and accmode != VWRITE, try again with
 		 * VWRITE to maintain backwards compatibility with the
 		 * old code that always used VWRITE.
 		 */
 		if (error != 0 && accmode != VWRITE)
 			error = VOP_ACCESS(vs->vs_vp, VWRITE, cred, curthread);
 		if (error)
 			goto out;
 	}
 
 	VOP_UNLOCK(vs->vs_vp);
 	vs->vs_vnlocked = FALSE;
 
 out:
 	if (cred)
 		crfree(cred);
 	if (credanon)
 		crfree(credanon);
 
 	return (error);
 }
 
 static void
 nlm_release_vfs_state(struct vfs_state *vs)
 {
 
 	if (vs->vs_vp) {
 		if (vs->vs_vnlocked)
 			vput(vs->vs_vp);
 		else
 			vrele(vs->vs_vp);
 	}
 	if (vs->vs_mp)
 		vfs_rel(vs->vs_mp);
 }
 
 static nlm4_stats
 nlm_convert_error(int error)
 {
 
 	if (error == ESTALE)
 		return nlm4_stale_fh;
 	else if (error == EROFS)
 		return nlm4_rofs;
 	else
 		return nlm4_failed;
 }
 
 int
 nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp,
 	CLIENT **rpcp)
 {
 	fhandle_t fh;
 	struct vfs_state vs;
 	struct nlm_host *host, *bhost;
 	int error, sysid;
 	struct flock fl;
 	accmode_t accmode;
 	
 	memset(result, 0, sizeof(*result));
 	memset(&vs, 0, sizeof(vs));
 
 	host = nlm_find_host_by_name(argp->alock.caller_name,
 	    svc_getrpccaller(rqstp), rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
 		return (ENOMEM);
 	}
 
 	NLM_DEBUG(3, "nlm_do_test(): caller_name = %s (sysid = %d)\n",
 	    host->nh_caller_name, host->nh_sysid);
 
 	nlm_check_expired_locks(host);
 	sysid = host->nh_sysid;
 
 	nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
 
 	if (time_uptime < nlm_grace_threshold) {
 		result->stat.stat = nlm4_denied_grace_period;
 		goto out;
 	}
 
 	accmode = argp->exclusive ? VWRITE : VREAD;
 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode);
 	if (error) {
 		result->stat.stat = nlm_convert_error(error);
 		goto out;
 	}
 
 	fl.l_start = argp->alock.l_offset;
 	fl.l_len = argp->alock.l_len;
 	fl.l_pid = argp->alock.svid;
 	fl.l_sysid = sysid;
 	fl.l_whence = SEEK_SET;
 	if (argp->exclusive)
 		fl.l_type = F_WRLCK;
 	else
 		fl.l_type = F_RDLCK;
 	error = VOP_ADVLOCK(vs.vs_vp, NULL, F_GETLK, &fl, F_REMOTE);
 	if (error) {
 		result->stat.stat = nlm4_failed;
 		goto out;
 	}
 
 	if (fl.l_type == F_UNLCK) {
 		result->stat.stat = nlm4_granted;
 	} else {
 		result->stat.stat = nlm4_denied;
 		result->stat.nlm4_testrply_u.holder.exclusive =
 			(fl.l_type == F_WRLCK);
 		result->stat.nlm4_testrply_u.holder.svid = fl.l_pid;
 		bhost = nlm_find_host_by_sysid(fl.l_sysid);
 		if (bhost) {
 			/*
 			 * We don't have any useful way of recording
 			 * the value of oh used in the original lock
 			 * request. Ideally, the test reply would have
 			 * a space for the owning host's name allowing
 			 * our caller's NLM to keep track.
 			 *
 			 * As far as I can see, Solaris uses an eight
 			 * byte structure for oh which contains a four
 			 * byte pid encoded in local byte order and
 			 * the first four bytes of the host
 			 * name. Linux uses a variable length string
 			 * 'pid@hostname' in ascii but doesn't even
 			 * return that in test replies.
 			 *
 			 * For the moment, return nothing in oh
 			 * (already zero'ed above).
 			 */
 			nlm_host_release(bhost);
 		}
 		result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start;
 		result->stat.nlm4_testrply_u.holder.l_len = fl.l_len;
 	}
 
 out:
 	nlm_release_vfs_state(&vs);
 	if (rpcp)
 		*rpcp = nlm_host_get_rpc(host, TRUE);
 	nlm_host_release(host);
 	return (0);
 }
 
 int
 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
     bool_t monitor, CLIENT **rpcp)
 {
 	fhandle_t fh;
 	struct vfs_state vs;
 	struct nlm_host *host;
 	int error, sysid;
 	struct flock fl;
 	accmode_t accmode;
 	
 	memset(result, 0, sizeof(*result));
 	memset(&vs, 0, sizeof(vs));
 
 	host = nlm_find_host_by_name(argp->alock.caller_name,
 	    svc_getrpccaller(rqstp), rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
 		return (ENOMEM);
 	}
 
 	NLM_DEBUG(3, "nlm_do_lock(): caller_name = %s (sysid = %d)\n",
 	    host->nh_caller_name, host->nh_sysid);
 
 	if (monitor && host->nh_state && argp->state
 	    && host->nh_state != argp->state) {
 		/*
 		 * The host rebooted without telling us. Trash its
 		 * locks.
 		 */
 		nlm_host_notify(host, argp->state);
 	}
 
 	nlm_check_expired_locks(host);
 	sysid = host->nh_sysid;
 
 	nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
 
 	if (time_uptime < nlm_grace_threshold && !argp->reclaim) {
 		result->stat.stat = nlm4_denied_grace_period;
 		goto out;
 	}
 
 	accmode = argp->exclusive ? VWRITE : VREAD;
 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs, accmode);
 	if (error) {
 		result->stat.stat = nlm_convert_error(error);
 		goto out;
 	}
 
 	fl.l_start = argp->alock.l_offset;
 	fl.l_len = argp->alock.l_len;
 	fl.l_pid = argp->alock.svid;
 	fl.l_sysid = sysid;
 	fl.l_whence = SEEK_SET;
 	if (argp->exclusive)
 		fl.l_type = F_WRLCK;
 	else
 		fl.l_type = F_RDLCK;
 	if (argp->block) {
 		struct nlm_async_lock *af;
 		CLIENT *client;
 		struct nlm_grantcookie cookie;
 
 		/*
 		 * First, make sure we can contact the host's NLM.
 		 */
 		client = nlm_host_get_rpc(host, TRUE);
 		if (!client) {
 			result->stat.stat = nlm4_failed;
 			goto out;
 		}
 
 		/*
 		 * First we need to check and see if there is an
 		 * existing blocked lock that matches. This could be a
 		 * badly behaved client or an RPC re-send. If we find
 		 * one, just return nlm4_blocked.
 		 */
 		mtx_lock(&host->nh_lock);
 		TAILQ_FOREACH(af, &host->nh_pending, af_link) {
 			if (af->af_fl.l_start == fl.l_start
 			    && af->af_fl.l_len == fl.l_len
 			    && af->af_fl.l_pid == fl.l_pid
 			    && af->af_fl.l_type == fl.l_type) {
 				break;
 			}
 		}
 		if (!af) {
 			cookie.ng_sysid = host->nh_sysid;
 			cookie.ng_cookie = host->nh_grantcookie++;
 		}
 		mtx_unlock(&host->nh_lock);
 		if (af) {
 			CLNT_RELEASE(client);
 			result->stat.stat = nlm4_blocked;
 			goto out;
 		}
 
 		af = malloc(sizeof(struct nlm_async_lock), M_NLM,
 		    M_WAITOK|M_ZERO);
 		TASK_INIT(&af->af_task, 0, nlm_lock_callback, af);
 		af->af_vp = vs.vs_vp;
 		af->af_fl = fl;
 		af->af_host = host;
 		af->af_rpc = client;
 		/*
 		 * We use M_RPC here so that we can xdr_free the thing
 		 * later.
 		 */
 		nlm_make_netobj(&af->af_granted.cookie,
 		    (caddr_t)&cookie, sizeof(cookie), M_RPC);
 		af->af_granted.exclusive = argp->exclusive;
 		af->af_granted.alock.caller_name =
 			strdup(argp->alock.caller_name, M_RPC);
 		nlm_copy_netobj(&af->af_granted.alock.fh,
 		    &argp->alock.fh, M_RPC);
 		nlm_copy_netobj(&af->af_granted.alock.oh,
 		    &argp->alock.oh, M_RPC);
 		af->af_granted.alock.svid = argp->alock.svid;
 		af->af_granted.alock.l_offset = argp->alock.l_offset;
 		af->af_granted.alock.l_len = argp->alock.l_len;
 
 		/*
 		 * Put the entry on the pending list before calling
 		 * VOP_ADVLOCKASYNC. We do this in case the lock
 		 * request was blocked (returning EINPROGRESS) but
 		 * then granted before we manage to run again. The
 		 * client may receive the granted message before we
 		 * send our blocked reply but thats their problem.
 		 */
 		mtx_lock(&host->nh_lock);
 		TAILQ_INSERT_TAIL(&host->nh_pending, af, af_link);
 		mtx_unlock(&host->nh_lock);
 
 		error = VOP_ADVLOCKASYNC(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE,
 		    &af->af_task, &af->af_cookie);
 
 		/*
 		 * If the lock completed synchronously, just free the
 		 * tracking structure now.
 		 */
 		if (error != EINPROGRESS) {
 			CLNT_RELEASE(af->af_rpc);
 			mtx_lock(&host->nh_lock);
 			TAILQ_REMOVE(&host->nh_pending, af, af_link);
 			mtx_unlock(&host->nh_lock);
 			xdr_free((xdrproc_t) xdr_nlm4_testargs,
 			    &af->af_granted);
 			free(af, M_NLM);
 		} else {
 			NLM_DEBUG(2, "NLM: pending async lock %p for %s "
 			    "(sysid %d)\n", af, host->nh_caller_name, sysid);
 			/*
 			 * Don't vrele the vnode just yet - this must
 			 * wait until either the async callback
 			 * happens or the lock is cancelled.
 			 */
 			vs.vs_vp = NULL;
 		}
 	} else {
 		error = VOP_ADVLOCK(vs.vs_vp, NULL, F_SETLK, &fl, F_REMOTE);
 	}
 
 	if (error) {
 		if (error == EINPROGRESS) {
 			result->stat.stat = nlm4_blocked;
 		} else if (error == EDEADLK) {
 			result->stat.stat = nlm4_deadlck;
 		} else if (error == EAGAIN) {
 			result->stat.stat = nlm4_denied;
 		} else {
 			result->stat.stat = nlm4_failed;
 		}
 	} else {
 		if (monitor)
 			nlm_host_monitor(host, argp->state);
 		result->stat.stat = nlm4_granted;
 	}       
 
 out:
 	nlm_release_vfs_state(&vs);
 	if (rpcp)
 		*rpcp = nlm_host_get_rpc(host, TRUE);
 	nlm_host_release(host);
 	return (0);
 }
 
 int
 nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp,
     CLIENT **rpcp)
 {
 	fhandle_t fh;
 	struct vfs_state vs;
 	struct nlm_host *host;
 	int error, sysid;
 	struct flock fl;
 	struct nlm_async_lock *af;
 	
 	memset(result, 0, sizeof(*result));
 	memset(&vs, 0, sizeof(vs));
 
 	host = nlm_find_host_by_name(argp->alock.caller_name,
 	    svc_getrpccaller(rqstp), rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
 		return (ENOMEM);
 	}
 
 	NLM_DEBUG(3, "nlm_do_cancel(): caller_name = %s (sysid = %d)\n",
 	    host->nh_caller_name, host->nh_sysid);
 
 	nlm_check_expired_locks(host);
 	sysid = host->nh_sysid;
 
 	nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
 
 	if (time_uptime < nlm_grace_threshold) {
 		result->stat.stat = nlm4_denied_grace_period;
 		goto out;
 	}
 
 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0);
 	if (error) {
 		result->stat.stat = nlm_convert_error(error);
 		goto out;
 	}
 
 	fl.l_start = argp->alock.l_offset;
 	fl.l_len = argp->alock.l_len;
 	fl.l_pid = argp->alock.svid;
 	fl.l_sysid = sysid;
 	fl.l_whence = SEEK_SET;
 	if (argp->exclusive)
 		fl.l_type = F_WRLCK;
 	else
 		fl.l_type = F_RDLCK;
 
 	/*
 	 * First we need to try and find the async lock request - if
 	 * there isn't one, we give up and return nlm4_denied.
 	 */
 	mtx_lock(&host->nh_lock);
 
 	TAILQ_FOREACH(af, &host->nh_pending, af_link) {
 		if (af->af_fl.l_start == fl.l_start
 		    && af->af_fl.l_len == fl.l_len
 		    && af->af_fl.l_pid == fl.l_pid
 		    && af->af_fl.l_type == fl.l_type) {
 			break;
 		}
 	}
 
 	if (!af) {
 		mtx_unlock(&host->nh_lock);
 		result->stat.stat = nlm4_denied;
 		goto out;
 	}
 
 	error = nlm_cancel_async_lock(af);
 
 	if (error) {
 		result->stat.stat = nlm4_denied;
 	} else {
 		result->stat.stat = nlm4_granted;
 	}
 
 	mtx_unlock(&host->nh_lock);
 
 out:
 	nlm_release_vfs_state(&vs);
 	if (rpcp)
 		*rpcp = nlm_host_get_rpc(host, TRUE);
 	nlm_host_release(host);
 	return (0);
 }
 
 int
 nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp,
     CLIENT **rpcp)
 {
 	fhandle_t fh;
 	struct vfs_state vs;
 	struct nlm_host *host;
 	int error, sysid;
 	struct flock fl;
 	
 	memset(result, 0, sizeof(*result));
 	memset(&vs, 0, sizeof(vs));
 
 	host = nlm_find_host_by_name(argp->alock.caller_name,
 	    svc_getrpccaller(rqstp), rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
 		return (ENOMEM);
 	}
 
 	NLM_DEBUG(3, "nlm_do_unlock(): caller_name = %s (sysid = %d)\n",
 	    host->nh_caller_name, host->nh_sysid);
 
 	nlm_check_expired_locks(host);
 	sysid = host->nh_sysid;
 
 	nlm_convert_to_fhandle_t(&fh, &argp->alock.fh);
 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
 
 	if (time_uptime < nlm_grace_threshold) {
 		result->stat.stat = nlm4_denied_grace_period;
 		goto out;
 	}
 
 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs, (accmode_t)0);
 	if (error) {
 		result->stat.stat = nlm_convert_error(error);
 		goto out;
 	}
 
 	fl.l_start = argp->alock.l_offset;
 	fl.l_len = argp->alock.l_len;
 	fl.l_pid = argp->alock.svid;
 	fl.l_sysid = sysid;
 	fl.l_whence = SEEK_SET;
 	fl.l_type = F_UNLCK;
 	error = VOP_ADVLOCK(vs.vs_vp, NULL, F_UNLCK, &fl, F_REMOTE);
 
 	/*
 	 * Ignore the error - there is no result code for failure,
 	 * only for grace period.
 	 */
 	result->stat.stat = nlm4_granted;
 
 out:
 	nlm_release_vfs_state(&vs);
 	if (rpcp)
 		*rpcp = nlm_host_get_rpc(host, TRUE);
 	nlm_host_release(host);
 	return (0);
 }
 
 int
 nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp,
 
     CLIENT **rpcp)
 {
 	struct nlm_host *host;
 	struct nlm_waiting_lock *nw;
 	
 	memset(result, 0, sizeof(*result));
 
 	host = nlm_find_host_by_addr(svc_getrpccaller(rqstp), rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
 		return (ENOMEM);
 	}
 
 	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
 	result->stat.stat = nlm4_denied;
 	KFAIL_POINT_CODE(DEBUG_FP, nlm_deny_grant, goto out);
 
 	mtx_lock(&nlm_global_lock);
 	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
 		if (!nw->nw_waiting)
 			continue;
 		if (argp->alock.svid == nw->nw_lock.svid
 		    && argp->alock.l_offset == nw->nw_lock.l_offset
 		    && argp->alock.l_len == nw->nw_lock.l_len
 		    && argp->alock.fh.n_len == nw->nw_lock.fh.n_len
 		    && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes,
 			nw->nw_lock.fh.n_len)) {
 			nw->nw_waiting = FALSE;
 			wakeup(nw);
 			result->stat.stat = nlm4_granted;
 			break;
 		}
 	}
 	mtx_unlock(&nlm_global_lock);
 
 out:
 	if (rpcp)
 		*rpcp = nlm_host_get_rpc(host, TRUE);
 	nlm_host_release(host);
 	return (0);
 }
 
 void
 nlm_do_granted_res(nlm4_res *argp, struct svc_req *rqstp)
 {
 	struct nlm_host *host = NULL;
 	struct nlm_async_lock *af = NULL;
 	int error;
 
 	if (argp->cookie.n_len != sizeof(struct nlm_grantcookie)) {
 		NLM_DEBUG(1, "NLM: bogus grant cookie");
 		goto out;
 	}
 
 	host = nlm_find_host_by_sysid(ng_sysid(&argp->cookie));
 	if (!host) {
 		NLM_DEBUG(1, "NLM: Unknown host rejected our grant");
 		goto out;
 	}
 
 	mtx_lock(&host->nh_lock);
 	TAILQ_FOREACH(af, &host->nh_granted, af_link)
 	    if (ng_cookie(&argp->cookie) ==
 		ng_cookie(&af->af_granted.cookie))
 		    break;
 	if (af)
 		TAILQ_REMOVE(&host->nh_granted, af, af_link);
 	mtx_unlock(&host->nh_lock);
 
 	if (!af) {
 		NLM_DEBUG(1, "NLM: host %s (sysid %d) replied to our grant "
 		    "with unrecognized cookie %d:%d", host->nh_caller_name,
 		    host->nh_sysid, ng_sysid(&argp->cookie),
 		    ng_cookie(&argp->cookie));
 		goto out;
 	}
 
 	if (argp->stat.stat != nlm4_granted) {
 		af->af_fl.l_type = F_UNLCK;
 		error = VOP_ADVLOCK(af->af_vp, NULL, F_UNLCK, &af->af_fl, F_REMOTE);
 		if (error) {
 			NLM_DEBUG(1, "NLM: host %s (sysid %d) rejected our grant "
 			    "and we failed to unlock (%d)", host->nh_caller_name,
 			    host->nh_sysid, error);
 			goto out;
 		}
 
 		NLM_DEBUG(5, "NLM: async lock %p rejected by host %s (sysid %d)",
 		    af, host->nh_caller_name, host->nh_sysid);
 	} else {
 		NLM_DEBUG(5, "NLM: async lock %p accepted by host %s (sysid %d)",
 		    af, host->nh_caller_name, host->nh_sysid);
 	}
 
  out:
 	if (af)
 		nlm_free_async_lock(af);
 	if (host)
 		nlm_host_release(host);
 }
 
 void
 nlm_do_free_all(nlm4_notify *argp)
 {
 	struct nlm_host *host, *thost;
 
 	TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) {
 		if (!strcmp(host->nh_caller_name, argp->name))
 			nlm_host_notify(host, argp->state);
 	}
 }
 
 /*
  * Kernel module glue
  */
 static int
 nfslockd_modevent(module_t mod, int type, void *data)
 {
 
 	switch (type) {
 	case MOD_LOAD:
 		return (nlm_init());
 
 	case MOD_UNLOAD:
 		nlm_uninit();
 		/* The NLM module cannot be safely unloaded. */
 		/* FALLTHROUGH */
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 static moduledata_t nfslockd_mod = {
 	"nfslockd",
 	nfslockd_modevent,
 	NULL,
 };
 DECLARE_MODULE(nfslockd, nfslockd_mod, SI_SUB_VFS, SI_ORDER_ANY);
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_DEPEND(nfslockd, xdr, 1, 1, 1);
 MODULE_DEPEND(nfslockd, krpc, 1, 1, 1);
 MODULE_DEPEND(nfslockd, nfscommon, 1, 1, 1);
 MODULE_VERSION(nfslockd, 1);
Index: head/sys/sys/mount.h
===================================================================
--- head/sys/sys/mount.h	(revision 362157)
+++ head/sys/sys/mount.h	(revision 362158)
@@ -1,1069 +1,1086 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mount.h	8.21 (Berkeley) 5/20/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_MOUNT_H_
 #define _SYS_MOUNT_H_
 
 #include <sys/ucred.h>
 #include <sys/queue.h>
 #ifdef _KERNEL
 #include <sys/lock.h>
 #include <sys/lockmgr.h>
 #include <sys/tslog.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 #endif
 
 /*
  * NOTE: When changing statfs structure, mount structure, MNT_* flags or
  * MNTK_* flags also update DDB show mount command in vfs_subr.c.
  */
 
 typedef struct fsid { int32_t val[2]; } fsid_t;	/* filesystem id type */
 
 #define fsidcmp(a, b) memcmp((a), (b), sizeof(fsid_t))
 
 /*
  * File identifier.
  * These are unique per filesystem on a single machine.
  *
  * Note that the offset of fid_data is 4 bytes, so care must be taken to avoid
  * undefined behavior accessing unaligned fields within an embedded struct.
  */
 #define	MAXFIDSZ	16
 
 struct fid {
 	u_short		fid_len;		/* length of data in bytes */
 	u_short		fid_data0;		/* force longword alignment */
 	char		fid_data[MAXFIDSZ];	/* data (variable length) */
 };
 
 /*
  * filesystem statistics
  */
 #define	MFSNAMELEN	16		/* length of type name including null */
 #define	MNAMELEN	1024		/* size of on/from name bufs */
 #define	STATFS_VERSION	0x20140518	/* current version number */
 struct statfs {
 	uint32_t f_version;		/* structure version number */
 	uint32_t f_type;		/* type of filesystem */
 	uint64_t f_flags;		/* copy of mount exported flags */
 	uint64_t f_bsize;		/* filesystem fragment size */
 	uint64_t f_iosize;		/* optimal transfer block size */
 	uint64_t f_blocks;		/* total data blocks in filesystem */
 	uint64_t f_bfree;		/* free blocks in filesystem */
 	int64_t	 f_bavail;		/* free blocks avail to non-superuser */
 	uint64_t f_files;		/* total file nodes in filesystem */
 	int64_t	 f_ffree;		/* free nodes avail to non-superuser */
 	uint64_t f_syncwrites;		/* count of sync writes since mount */
 	uint64_t f_asyncwrites;		/* count of async writes since mount */
 	uint64_t f_syncreads;		/* count of sync reads since mount */
 	uint64_t f_asyncreads;		/* count of async reads since mount */
 	uint64_t f_spare[10];		/* unused spare */
 	uint32_t f_namemax;		/* maximum filename length */
 	uid_t	  f_owner;		/* user that mounted the filesystem */
 	fsid_t	  f_fsid;		/* filesystem id */
 	char	  f_charspare[80];	    /* spare string space */
 	char	  f_fstypename[MFSNAMELEN]; /* filesystem type name */
 	char	  f_mntfromname[MNAMELEN];  /* mounted filesystem */
 	char	  f_mntonname[MNAMELEN];    /* directory on which mounted */
 };
 
 #if defined(_WANT_FREEBSD11_STATFS) || defined(_KERNEL)
 #define	FREEBSD11_STATFS_VERSION	0x20030518 /* current version number */
 struct freebsd11_statfs {
 	uint32_t f_version;		/* structure version number */
 	uint32_t f_type;		/* type of filesystem */
 	uint64_t f_flags;		/* copy of mount exported flags */
 	uint64_t f_bsize;		/* filesystem fragment size */
 	uint64_t f_iosize;		/* optimal transfer block size */
 	uint64_t f_blocks;		/* total data blocks in filesystem */
 	uint64_t f_bfree;		/* free blocks in filesystem */
 	int64_t	 f_bavail;		/* free blocks avail to non-superuser */
 	uint64_t f_files;		/* total file nodes in filesystem */
 	int64_t	 f_ffree;		/* free nodes avail to non-superuser */
 	uint64_t f_syncwrites;		/* count of sync writes since mount */
 	uint64_t f_asyncwrites;		/* count of async writes since mount */
 	uint64_t f_syncreads;		/* count of sync reads since mount */
 	uint64_t f_asyncreads;		/* count of async reads since mount */
 	uint64_t f_spare[10];		/* unused spare */
 	uint32_t f_namemax;		/* maximum filename length */
 	uid_t	  f_owner;		/* user that mounted the filesystem */
 	fsid_t	  f_fsid;		/* filesystem id */
 	char	  f_charspare[80];	/* spare string space */
 	char	  f_fstypename[16];	/* filesystem type name */
 	char	  f_mntfromname[88];	/* mounted filesystem */
 	char	  f_mntonname[88];	/* directory on which mounted */
 };
 #endif /* _WANT_FREEBSD11_STATFS || _KERNEL */
 
 #ifdef _KERNEL
 #define	OMFSNAMELEN	16	/* length of fs type name, including null */
 #define	OMNAMELEN	(88 - 2 * sizeof(long))	/* size of on/from name bufs */
 
 /* XXX getfsstat.2 is out of date with write and read counter changes here. */
 /* XXX statfs.2 is out of date with read counter changes here. */
 struct ostatfs {
 	long	f_spare2;		/* placeholder */
 	long	f_bsize;		/* fundamental filesystem block size */
 	long	f_iosize;		/* optimal transfer block size */
 	long	f_blocks;		/* total data blocks in filesystem */
 	long	f_bfree;		/* free blocks in fs */
 	long	f_bavail;		/* free blocks avail to non-superuser */
 	long	f_files;		/* total file nodes in filesystem */
 	long	f_ffree;		/* free file nodes in fs */
 	fsid_t	f_fsid;			/* filesystem id */
 	uid_t	f_owner;		/* user that mounted the filesystem */
 	int	f_type;			/* type of filesystem */
 	int	f_flags;		/* copy of mount exported flags */
 	long	f_syncwrites;		/* count of sync writes since mount */
 	long	f_asyncwrites;		/* count of async writes since mount */
 	char	f_fstypename[OMFSNAMELEN]; /* fs type name */
 	char	f_mntonname[OMNAMELEN];	/* directory on which mounted */
 	long	f_syncreads;		/* count of sync reads since mount */
 	long	f_asyncreads;		/* count of async reads since mount */
 	short	f_spares1;		/* unused spare */
 	char	f_mntfromname[OMNAMELEN];/* mounted filesystem */
 	short	f_spares2;		/* unused spare */
 	/*
 	 * XXX on machines where longs are aligned to 8-byte boundaries, there
 	 * is an unnamed int32_t here.  This spare was after the apparent end
 	 * of the struct until we bit off the read counters from f_mntonname.
 	 */
 	long	f_spare[2];		/* unused spare */
 };
 
 TAILQ_HEAD(vnodelst, vnode);
 
 /* Mount options list */
 TAILQ_HEAD(vfsoptlist, vfsopt);
 struct vfsopt {
 	TAILQ_ENTRY(vfsopt) link;
 	char	*name;
 	void	*value;
 	int	len;
 	int	pos;
 	int	seen;
 };
 
 /*
  * Structure per mounted filesystem.  Each mounted filesystem has an
  * array of operations and an instance record.  The filesystems are
  * put on a doubly linked list.
  *
  * Lock reference:
  * 	l - mnt_listmtx
  *	m - mountlist_mtx
  *	i - interlock
  *	v - vnode freelist mutex
  *
  * Unmarked fields are considered stable as long as a ref is held.
  *
  */
 struct mount {
 	struct mtx	mnt_mtx;		/* mount structure interlock */
 	int		mnt_gen;		/* struct mount generation */
 #define	mnt_startzero	mnt_list
 	TAILQ_ENTRY(mount) mnt_list;		/* (m) mount list */
 	struct vfsops	*mnt_op;		/* operations on fs */
 	struct vfsconf	*mnt_vfc;		/* configuration info */
 	struct vnode	*mnt_vnodecovered;	/* vnode we mounted on */
 	struct vnode	*mnt_syncer;		/* syncer vnode */
 	int		mnt_ref;		/* (i) Reference count */
 	struct vnodelst	mnt_nvnodelist;		/* (i) list of vnodes */
 	int		mnt_nvnodelistsize;	/* (i) # of vnodes */
 	int		mnt_writeopcount;	/* (i) write syscalls pending */
 	int		mnt_kern_flag;		/* (i) kernel only flags */
 	uint64_t	mnt_flag;		/* (i) flags shared with user */
 	struct vfsoptlist *mnt_opt;		/* current mount options */
 	struct vfsoptlist *mnt_optnew;		/* new options passed to fs */
 	int		mnt_maxsymlinklen;	/* max size of short symlink */
 	struct statfs	mnt_stat;		/* cache of filesystem stats */
 	struct ucred	*mnt_cred;		/* credentials of mounter */
 	void *		mnt_data;		/* private data */
 	time_t		mnt_time;		/* last time written*/
 	int		mnt_iosize_max;		/* max size for clusters, etc */
 	struct netexport *mnt_export;		/* export list */
 	struct label	*mnt_label;		/* MAC label for the fs */
 	u_int		mnt_hashseed;		/* Random seed for vfs_hash */
 	int		mnt_lockref;		/* (i) Lock reference count */
 	int		mnt_secondary_writes;   /* (i) # of secondary writes */
 	int		mnt_secondary_accwrites;/* (i) secondary wr. starts */
 	struct thread	*mnt_susp_owner;	/* (i) thread owning suspension */
 #define	mnt_endzero	mnt_gjprovider
 	char		*mnt_gjprovider;	/* gjournal provider name */
 	struct mtx	mnt_listmtx;
 	struct vnodelst	mnt_lazyvnodelist;	/* (l) list of lazy vnodes */
 	int		mnt_lazyvnodelistsize;	/* (l) # of lazy vnodes */
 	struct lock	mnt_explock;		/* vfs_export walkers lock */
 	TAILQ_ENTRY(mount) mnt_upper_link;	/* (m) we in the all uppers */
 	TAILQ_HEAD(, mount) mnt_uppers;		/* (m) upper mounts over us*/
 	int __aligned(CACHE_LINE_SIZE)	mnt_vfs_ops;/* (i) pending vfs ops */
 	int		*mnt_thread_in_ops_pcpu;
 	int		*mnt_ref_pcpu;
 	int		*mnt_lockref_pcpu;
 	int		*mnt_writeopcount_pcpu;
 	struct vnode	*mnt_rootvnode;
 };
 
 /*
  * Definitions for MNT_VNODE_FOREACH_ALL.
  */
 struct vnode *__mnt_vnode_next_all(struct vnode **mvp, struct mount *mp);
 struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp);
 void          __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp);
 
 #define MNT_VNODE_FOREACH_ALL(vp, mp, mvp)				\
 	for (vp = __mnt_vnode_first_all(&(mvp), (mp));			\
 		(vp) != NULL; vp = __mnt_vnode_next_all(&(mvp), (mp)))
 
 #define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp)				\
 	do {								\
 		MNT_ILOCK(mp);						\
 		__mnt_vnode_markerfree_all(&(mvp), (mp));		\
 		/* MNT_IUNLOCK(mp); -- done in above function */	\
 		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);			\
 	} while (0)
 
 /*
  * Definitions for MNT_VNODE_FOREACH_LAZY.
  */
 typedef int mnt_lazy_cb_t(struct vnode *, void *);
 struct vnode *__mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp,
     mnt_lazy_cb_t *cb, void *cbarg);
 struct vnode *__mnt_vnode_first_lazy(struct vnode **mvp, struct mount *mp,
     mnt_lazy_cb_t *cb, void *cbarg);
 void          __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp);
 
 #define MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, cb, cbarg)			\
 	for (vp = __mnt_vnode_first_lazy(&(mvp), (mp), (cb), (cbarg));	\
 		(vp) != NULL; 						\
 		vp = __mnt_vnode_next_lazy(&(mvp), (mp), (cb), (cbarg)))
 
 #define MNT_VNODE_FOREACH_LAZY_ABORT(mp, mvp)				\
 	__mnt_vnode_markerfree_lazy(&(mvp), (mp))
 
 #define	MNT_ILOCK(mp)	mtx_lock(&(mp)->mnt_mtx)
 #define	MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx)
 #define	MNT_IUNLOCK(mp)	mtx_unlock(&(mp)->mnt_mtx)
 #define	MNT_MTX(mp)	(&(mp)->mnt_mtx)
 
 #define	MNT_REF(mp)	do {						\
 	mtx_assert(MNT_MTX(mp), MA_OWNED);				\
 	mp->mnt_ref++;							\
 } while (0)
 #define	MNT_REL(mp)	do {						\
 	mtx_assert(MNT_MTX(mp), MA_OWNED);				\
 	(mp)->mnt_ref--;						\
 	if ((mp)->mnt_vfs_ops && (mp)->mnt_ref < 0)		\
 		vfs_dump_mount_counters(mp);				\
 	if ((mp)->mnt_ref == 0 && (mp)->mnt_vfs_ops)		\
 		wakeup((mp));						\
 } while (0)
 
 #endif /* _KERNEL */
 
 /*
  * User specifiable flags, stored in mnt_flag.
  */
 #define	MNT_RDONLY	0x0000000000000001ULL /* read only filesystem */
 #define	MNT_SYNCHRONOUS	0x0000000000000002ULL /* fs written synchronously */
 #define	MNT_NOEXEC	0x0000000000000004ULL /* can't exec from filesystem */
 #define	MNT_NOSUID	0x0000000000000008ULL /* don't honor setuid fs bits */
 #define	MNT_NFS4ACLS	0x0000000000000010ULL /* enable NFS version 4 ACLs */
 #define	MNT_UNION	0x0000000000000020ULL /* union with underlying fs */
 #define	MNT_ASYNC	0x0000000000000040ULL /* fs written asynchronously */
 #define	MNT_SUIDDIR	0x0000000000100000ULL /* special SUID dir handling */
 #define	MNT_SOFTDEP	0x0000000000200000ULL /* using soft updates */
 #define	MNT_NOSYMFOLLOW	0x0000000000400000ULL /* do not follow symlinks */
 #define	MNT_GJOURNAL	0x0000000002000000ULL /* GEOM journal support enabled */
 #define	MNT_MULTILABEL	0x0000000004000000ULL /* MAC support for objects */
 #define	MNT_ACLS	0x0000000008000000ULL /* ACL support enabled */
 #define	MNT_NOATIME	0x0000000010000000ULL /* dont update file access time */
 #define	MNT_NOCLUSTERR	0x0000000040000000ULL /* disable cluster read */
 #define	MNT_NOCLUSTERW	0x0000000080000000ULL /* disable cluster write */
 #define	MNT_SUJ		0x0000000100000000ULL /* using journaled soft updates */
 #define	MNT_AUTOMOUNTED	0x0000000200000000ULL /* mounted by automountd(8) */
 #define	MNT_UNTRUSTED	0x0000000800000000ULL /* filesys metadata untrusted */
 
 /*
  * NFS export related mount flags.
  */
 #define	MNT_EXRDONLY	0x0000000000000080ULL	/* exported read only */
 #define	MNT_EXPORTED	0x0000000000000100ULL	/* filesystem is exported */
 #define	MNT_DEFEXPORTED	0x0000000000000200ULL	/* exported to the world */
 #define	MNT_EXPORTANON	0x0000000000000400ULL	/* anon uid mapping for all */
 #define	MNT_EXKERB	0x0000000000000800ULL	/* exported with Kerberos */
 #define	MNT_EXPUBLIC	0x0000000020000000ULL	/* public export (WebNFS) */
 
 /*
  * Flags set by internal operations,
  * but visible to the user.
  * XXX some of these are not quite right.. (I've never seen the root flag set)
  */
 #define	MNT_LOCAL	0x0000000000001000ULL /* filesystem is stored locally */
 #define	MNT_QUOTA	0x0000000000002000ULL /* quotas are enabled on fs */
 #define	MNT_ROOTFS	0x0000000000004000ULL /* identifies the root fs */
 #define	MNT_USER	0x0000000000008000ULL /* mounted by a user */
 #define	MNT_IGNORE	0x0000000000800000ULL /* do not show entry in df */
 #define	MNT_VERIFIED	0x0000000400000000ULL /* filesystem is verified */
 
 /*
  * Mask of flags that are visible to statfs().
  * XXX I think that this could now become (~(MNT_CMDFLAGS))
  * but the 'mount' program may need changing to handle this.
  */
 #define	MNT_VISFLAGMASK	(MNT_RDONLY	| MNT_SYNCHRONOUS | MNT_NOEXEC	| \
 			MNT_NOSUID	| MNT_UNION	| MNT_SUJ	| \
 			MNT_ASYNC	| MNT_EXRDONLY	| MNT_EXPORTED	| \
 			MNT_DEFEXPORTED	| MNT_EXPORTANON| MNT_EXKERB	| \
 			MNT_LOCAL	| MNT_USER	| MNT_QUOTA	| \
 			MNT_ROOTFS	| MNT_NOATIME	| MNT_NOCLUSTERR| \
 			MNT_NOCLUSTERW	| MNT_SUIDDIR	| MNT_SOFTDEP	| \
 			MNT_IGNORE	| MNT_EXPUBLIC	| MNT_NOSYMFOLLOW | \
 			MNT_GJOURNAL	| MNT_MULTILABEL | MNT_ACLS	| \
 			MNT_NFS4ACLS	| MNT_AUTOMOUNTED | MNT_VERIFIED | \
 			MNT_UNTRUSTED)
 
 /* Mask of flags that can be updated. */
 #define	MNT_UPDATEMASK (MNT_NOSUID	| MNT_NOEXEC	| \
 			MNT_SYNCHRONOUS	| MNT_UNION	| MNT_ASYNC	| \
 			MNT_NOATIME | \
 			MNT_NOSYMFOLLOW	| MNT_IGNORE	| \
 			MNT_NOCLUSTERR	| MNT_NOCLUSTERW | MNT_SUIDDIR	| \
 			MNT_ACLS	| MNT_USER	| MNT_NFS4ACLS	| \
 			MNT_AUTOMOUNTED | MNT_UNTRUSTED)
 
 /*
  * External filesystem command modifier flags.
  * Unmount can use the MNT_FORCE flag.
  * XXX: These are not STATES and really should be somewhere else.
  * XXX: MNT_BYFSID and MNT_NONBUSY collide with MNT_ACLS and MNT_MULTILABEL,
  *      but because MNT_ACLS and MNT_MULTILABEL are only used for mount(2),
  *      and MNT_BYFSID and MNT_NONBUSY are only used for unmount(2),
  *      it's harmless.
  */
 #define	MNT_UPDATE	0x0000000000010000ULL /* not real mount, just update */
 #define	MNT_DELEXPORT	0x0000000000020000ULL /* delete export host lists */
 #define	MNT_RELOAD	0x0000000000040000ULL /* reload filesystem data */
 #define	MNT_FORCE	0x0000000000080000ULL /* force unmount or readonly */
 #define	MNT_SNAPSHOT	0x0000000001000000ULL /* snapshot the filesystem */
 #define	MNT_NONBUSY	0x0000000004000000ULL /* check vnode use counts. */
 #define	MNT_BYFSID	0x0000000008000000ULL /* specify filesystem by ID. */
 #define	MNT_NOCOVER	0x0000001000000000ULL /* Do not cover a mount point */
 #define	MNT_EMPTYDIR	0x0000002000000000ULL /* Only mount on empty dir */
 #define MNT_CMDFLAGS   (MNT_UPDATE	| MNT_DELEXPORT	| MNT_RELOAD	| \
 			MNT_FORCE	| MNT_SNAPSHOT	| MNT_NONBUSY	| \
 			MNT_BYFSID	| MNT_NOCOVER	| MNT_EMPTYDIR)
 /*
  * Internal filesystem control flags stored in mnt_kern_flag.
  *
  * MNTK_UNMOUNT locks the mount entry so that name lookup cannot
  * proceed past the mount point.  This keeps the subtree stable during
  * mounts and unmounts.  When non-forced unmount flushes all vnodes
  * from the mp queue, the MNTK_UNMOUNT flag prevents insmntque() from
  * queueing new vnodes.
  *
  * MNTK_UNMOUNTF permits filesystems to detect a forced unmount while
  * dounmount() is still waiting to lock the mountpoint. This allows
  * the filesystem to cancel operations that might otherwise deadlock
  * with the unmount attempt (used by NFS).
  */
 #define MNTK_UNMOUNTF	0x00000001	/* forced unmount in progress */
 #define MNTK_ASYNC	0x00000002	/* filtered async flag */
 #define MNTK_SOFTDEP	0x00000004	/* async disabled by softdep */
 #define MNTK_NOMSYNC	0x00000008	/* don't do msync */
 #define	MNTK_DRAINING	0x00000010	/* lock draining is happening */
 #define	MNTK_REFEXPIRE	0x00000020	/* refcount expiring is happening */
 #define MNTK_EXTENDED_SHARED	0x00000040 /* Allow shared locking for more ops */
 #define	MNTK_SHARED_WRITES	0x00000080 /* Allow shared locking for writes */
 #define	MNTK_NO_IOPF	0x00000100	/* Disallow page faults during reads
 					   and writes. Filesystem shall properly
 					   handle i/o state on EFAULT. */
 #define	MNTK_VGONE_UPPER	0x00000200
 #define	MNTK_VGONE_WAITER	0x00000400
 #define	MNTK_LOOKUP_EXCL_DOTDOT	0x00000800
 #define	MNTK_MARKER		0x00001000
 #define	MNTK_UNMAPPED_BUFS	0x00002000
 #define	MNTK_USES_BCACHE	0x00004000 /* FS uses the buffer cache. */
 #define	MNTK_TEXT_REFS		0x00008000 /* Keep use ref for text */
 #define	MNTK_VMSETSIZE_BUG	0x00010000
 #define	MNTK_UNIONFS	0x00020000	/* A hack for F_ISUNIONSTACK */
 #define MNTK_NOASYNC	0x00800000	/* disable async */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
 #define	MNTK_SUSPEND	0x08000000	/* request write suspension */
 #define	MNTK_SUSPEND2	0x04000000	/* block secondary writes */
 #define	MNTK_SUSPENDED	0x10000000	/* write operations are suspended */
 #define	MNTK_NULL_NOCACHE	0x20000000 /* auto disable cache for nullfs
 					      mounts over this fs */
 #define MNTK_LOOKUP_SHARED	0x40000000 /* FS supports shared lock lookups */
 #define	MNTK_NOKNOTE	0x80000000	/* Don't send KNOTEs from VOP hooks */
 
 #ifdef _KERNEL
 static inline int
 MNT_SHARED_WRITES(struct mount *mp)
 {
 
 	return (mp != NULL && (mp->mnt_kern_flag & MNTK_SHARED_WRITES) != 0);
 }
 
 static inline int
 MNT_EXTENDED_SHARED(struct mount *mp)
 {
 
 	return (mp != NULL && (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED) != 0);
 }
 #endif
 
 /*
  * Sysctl CTL_VFS definitions.
  *
  * Second level identifier specifies which filesystem. Second level
  * identifier VFS_VFSCONF returns information about all filesystems.
  * Second level identifier VFS_GENERIC is non-terminal.
  */
 #define	VFS_VFSCONF		0	/* get configured filesystems */
 #define	VFS_GENERIC		0	/* generic filesystem information */
 /*
  * Third level identifiers for VFS_GENERIC are given below; third
  * level identifiers for specific filesystems are given in their
  * mount specific header files.
  */
 #define VFS_MAXTYPENUM	1	/* int: highest defined filesystem type */
 #define VFS_CONF	2	/* struct: vfsconf for filesystem given
 				   as next argument */
 
 /*
  * Flags for various system call interfaces.
  *
  * waitfor flags to vfs_sync() and getfsstat()
  */
 #define MNT_WAIT	1	/* synchronously wait for I/O to complete */
 #define MNT_NOWAIT	2	/* start all I/O, but do not wait for it */
 #define MNT_LAZY	3	/* push data not written by filesystem syncer */
 #define MNT_SUSPEND	4	/* Suspend file system after sync */
 
 /*
  * Generic file handle
  */
 struct fhandle {
 	fsid_t	fh_fsid;	/* Filesystem id of mount point */
 	struct	fid fh_fid;	/* Filesys specific id */
 };
 typedef struct fhandle	fhandle_t;
 
 /*
  * Old export arguments without security flavor list
  */
 struct oexport_args {
 	int	ex_flags;		/* export related flags */
 	uid_t	ex_root;		/* mapping for root uid */
 	struct	xucred ex_anon;		/* mapping for anonymous user */
 	struct	sockaddr *ex_addr;	/* net address to which exported */
 	u_char	ex_addrlen;		/* and the net address length */
 	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
 	u_char	ex_masklen;		/* and the smask length */
 	char	*ex_indexfile;		/* index file for WebNFS URLs */
 };
 
 /*
- * Export arguments for local filesystem mount calls.
+ * Not quite so old export arguments with 32bit ex_flags and xucred ex_anon.
  */
 #define	MAXSECFLAVORS	5
-struct export_args {
+struct o2export_args {
 	int	ex_flags;		/* export related flags */
 	uid_t	ex_root;		/* mapping for root uid */
 	struct	xucred ex_anon;		/* mapping for anonymous user */
 	struct	sockaddr *ex_addr;	/* net address to which exported */
 	u_char	ex_addrlen;		/* and the net address length */
 	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
 	u_char	ex_masklen;		/* and the smask length */
 	char	*ex_indexfile;		/* index file for WebNFS URLs */
 	int	ex_numsecflavors;	/* security flavor count */
 	int	ex_secflavors[MAXSECFLAVORS]; /* list of security flavors */
 };
 
 /*
+ * Export arguments for local filesystem mount calls.
+ */
+#define	MAXSECFLAVORS	5
+struct export_args {
+	uint64_t ex_flags;		/* export related flags */
+	uid_t	ex_root;		/* mapping for root uid */
+	uid_t	ex_uid;			/* mapping for anonymous user */
+	int	ex_ngroups;
+	gid_t	*ex_groups;
+	struct	sockaddr *ex_addr;	/* net address to which exported */
+	u_char	ex_addrlen;		/* and the net address length */
+	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
+	u_char	ex_masklen;		/* and the smask length */
+	char	*ex_indexfile;		/* index file for WebNFS URLs */
+	int	ex_numsecflavors;	/* security flavor count */
+	int	ex_secflavors[MAXSECFLAVORS]; /* list of security flavors */
+};
+
+/*
  * Structure holding information for a publicly exported filesystem
  * (WebNFS). Currently the specs allow just for one such filesystem.
  */
 struct nfs_public {
 	int		np_valid;	/* Do we hold valid information */
 	fhandle_t	np_handle;	/* Filehandle for pub fs (internal) */
 	struct mount	*np_mount;	/* Mountpoint of exported fs */
 	char		*np_index;	/* Index file */
 };
 
 /*
  * Filesystem configuration information. One of these exists for each
  * type of filesystem supported by the kernel. These are searched at
  * mount time to identify the requested filesystem.
  *
  * XXX: Never change the first two arguments!
  */
 struct vfsconf {
 	u_int	vfc_version;		/* ABI version number */
 	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
 	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
 	struct	vfsops *vfc_vfsops_sd;	/* ... signal-deferred */
 	int	vfc_typenum;		/* historic filesystem type number */
 	int	vfc_refcount;		/* number mounted of this type */
 	int	vfc_flags;		/* permanent flags */
 	int	vfc_prison_flag;	/* prison allow.mount.* flag */
 	struct	vfsoptdecl *vfc_opts;	/* mount options */
 	TAILQ_ENTRY(vfsconf) vfc_list;	/* list of vfscons */
 };
 
 /* Userland version of the struct vfsconf. */
 struct xvfsconf {
 	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
 	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
 	int	vfc_typenum;		/* historic filesystem type number */
 	int	vfc_refcount;		/* number mounted of this type */
 	int	vfc_flags;		/* permanent flags */
 	struct	vfsconf *vfc_next;	/* next in list */
 };
 
 #ifndef BURN_BRIDGES
 struct ovfsconf {
 	void	*vfc_vfsops;
 	char	vfc_name[32];
 	int	vfc_index;
 	int	vfc_refcount;
 	int	vfc_flags;
 };
 #endif
 
 /*
  * NB: these flags refer to IMPLEMENTATION properties, not properties of
  * any actual mounts; i.e., it does not make sense to change the flags.
  */
 #define	VFCF_STATIC	0x00010000	/* statically compiled into kernel */
 #define	VFCF_NETWORK	0x00020000	/* may get data over the network */
 #define	VFCF_READONLY	0x00040000	/* writes are not implemented */
 #define	VFCF_SYNTHETIC	0x00080000	/* data does not represent real files */
 #define	VFCF_LOOPBACK	0x00100000	/* aliases some other mounted FS */
 #define	VFCF_UNICODE	0x00200000	/* stores file names as Unicode */
 #define	VFCF_JAIL	0x00400000	/* can be mounted from within a jail */
 #define	VFCF_DELEGADMIN	0x00800000	/* supports delegated administration */
 #define	VFCF_SBDRY	0x01000000	/* Stop at Boundary: defer stop requests
 					   to kernel->user (AST) transition */
 
 typedef uint32_t fsctlop_t;
 
 struct vfsidctl {
 	int		vc_vers;	/* should be VFSIDCTL_VERS1 (below) */
 	fsid_t		vc_fsid;	/* fsid to operate on */
 	char		vc_fstypename[MFSNAMELEN];
 					/* type of fs 'nfs' or '*' */
 	fsctlop_t	vc_op;		/* operation VFS_CTL_* (below) */
 	void		*vc_ptr;	/* pointer to data structure */
 	size_t		vc_len;		/* sizeof said structure */
 	u_int32_t	vc_spare[12];	/* spare (must be zero) */
 };
 
 /* vfsidctl API version. */
 #define VFS_CTL_VERS1	0x01
 
 /*
  * New style VFS sysctls, do not reuse/conflict with the namespace for
  * private sysctls.
  * All "global" sysctl ops have the 33rd bit set:
  * 0x...1....
  * Private sysctl ops should have the 33rd bit unset.
  */
 #define VFS_CTL_QUERY	0x00010001	/* anything wrong? (vfsquery) */
 #define VFS_CTL_TIMEO	0x00010002	/* set timeout for vfs notification */
 #define VFS_CTL_NOLOCKS	0x00010003	/* disable file locking */
 
 struct vfsquery {
 	u_int32_t	vq_flags;
 	u_int32_t	vq_spare[31];
 };
 
 /* vfsquery flags */
 #define VQ_NOTRESP	0x0001	/* server down */
 #define VQ_NEEDAUTH	0x0002	/* server bad auth */
 #define VQ_LOWDISK	0x0004	/* we're low on space */
 #define VQ_MOUNT	0x0008	/* new filesystem arrived */
 #define VQ_UNMOUNT	0x0010	/* filesystem has left */
 #define VQ_DEAD		0x0020	/* filesystem is dead, needs force unmount */
 #define VQ_ASSIST	0x0040	/* filesystem needs assistance from external
 				   program */
 #define VQ_NOTRESPLOCK	0x0080	/* server lockd down */
 #define VQ_FLAG0100	0x0100	/* placeholder */
 #define VQ_FLAG0200	0x0200	/* placeholder */
 #define VQ_FLAG0400	0x0400	/* placeholder */
 #define VQ_FLAG0800	0x0800	/* placeholder */
 #define VQ_FLAG1000	0x1000	/* placeholder */
 #define VQ_FLAG2000	0x2000	/* placeholder */
 #define VQ_FLAG4000	0x4000	/* placeholder */
 #define VQ_FLAG8000	0x8000	/* placeholder */
 
 #ifdef _KERNEL
 /* Point a sysctl request at a vfsidctl's data. */
 #define VCTLTOREQ(vc, req)						\
 	do {								\
 		(req)->newptr = (vc)->vc_ptr;				\
 		(req)->newlen = (vc)->vc_len;				\
 		(req)->newidx = 0;					\
 	} while (0)
 #endif
 
 struct iovec;
 struct uio;
 
 #ifdef _KERNEL
 
 /*
  * vfs_busy specific flags and mask.
  */
 #define	MBF_NOWAIT	0x01
 #define	MBF_MNTLSTLOCK	0x02
 #define	MBF_MASK	(MBF_NOWAIT | MBF_MNTLSTLOCK)
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_MOUNT);
 MALLOC_DECLARE(M_STATFS);
 #endif
 extern int maxvfsconf;		/* highest defined filesystem type */
 
 TAILQ_HEAD(vfsconfhead, vfsconf);
 extern struct vfsconfhead vfsconf;
 
 /*
  * Operations supported on mounted filesystem.
  */
 struct mount_args;
 struct nameidata;
 struct sysctl_req;
 struct mntarg;
 
 /*
  * N.B., vfs_cmount is the ancient vfsop invoked by the old mount(2) syscall.
  * The new way is vfs_mount.
  *
  * vfs_cmount implementations typically translate arguments from their
  * respective old per-FS structures into the key-value list supported by
  * nmount(2), then use kernel_mount(9) to mimic nmount(2) from kernelspace.
  *
  * Filesystems with mounters that use nmount(2) do not need to and should not
  * implement vfs_cmount.  Hopefully a future cleanup can remove vfs_cmount and
  * mount(2) entirely.
  */
 typedef int vfs_cmount_t(struct mntarg *ma, void *data, uint64_t flags);
 typedef int vfs_unmount_t(struct mount *mp, int mntflags);
 typedef int vfs_root_t(struct mount *mp, int flags, struct vnode **vpp);
 typedef	int vfs_quotactl_t(struct mount *mp, int cmds, uid_t uid, void *arg);
 typedef	int vfs_statfs_t(struct mount *mp, struct statfs *sbp);
 typedef	int vfs_sync_t(struct mount *mp, int waitfor);
 typedef	int vfs_vget_t(struct mount *mp, ino_t ino, int flags,
 		    struct vnode **vpp);
 typedef	int vfs_fhtovp_t(struct mount *mp, struct fid *fhp,
 		    int flags, struct vnode **vpp);
 typedef	int vfs_checkexp_t(struct mount *mp, struct sockaddr *nam,
-		    int *extflagsp, struct ucred **credanonp,
-		    int *numsecflavors, int **secflavors);
+		    uint64_t *extflagsp, struct ucred **credanonp,
+		    int *numsecflavors, int *secflavors);
 typedef	int vfs_init_t(struct vfsconf *);
 typedef	int vfs_uninit_t(struct vfsconf *);
 typedef	int vfs_extattrctl_t(struct mount *mp, int cmd,
 		    struct vnode *filename_vp, int attrnamespace,
 		    const char *attrname);
 typedef	int vfs_mount_t(struct mount *mp);
 typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op,
 		    struct sysctl_req *req);
 typedef void vfs_susp_clean_t(struct mount *mp);
 typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp);
 typedef void vfs_purge_t(struct mount *mp);
 
 struct vfsops {
 	vfs_mount_t		*vfs_mount;
 	vfs_cmount_t		*vfs_cmount;
 	vfs_unmount_t		*vfs_unmount;
 	vfs_root_t		*vfs_root;
 	vfs_root_t		*vfs_cachedroot;
 	vfs_quotactl_t		*vfs_quotactl;
 	vfs_statfs_t		*vfs_statfs;
 	vfs_sync_t		*vfs_sync;
 	vfs_vget_t		*vfs_vget;
 	vfs_fhtovp_t		*vfs_fhtovp;
 	vfs_checkexp_t		*vfs_checkexp;
 	vfs_init_t		*vfs_init;
 	vfs_uninit_t		*vfs_uninit;
 	vfs_extattrctl_t	*vfs_extattrctl;
 	vfs_sysctl_t		*vfs_sysctl;
 	vfs_susp_clean_t	*vfs_susp_clean;
 	vfs_notify_lowervp_t	*vfs_reclaim_lowervp;
 	vfs_notify_lowervp_t	*vfs_unlink_lowervp;
 	vfs_purge_t		*vfs_purge;
 	vfs_mount_t		*vfs_spare[6];	/* spares for ABI compat */
 };
 
 vfs_statfs_t	__vfs_statfs;
 
 #define	VFS_MOUNT(MP) ({						\
 	int _rc;							\
 									\
 	TSRAW(curthread, TS_ENTER, "VFS_MOUNT", (MP)->mnt_vfc->vfc_name);\
 	_rc = (*(MP)->mnt_op->vfs_mount)(MP);				\
 	TSRAW(curthread, TS_EXIT, "VFS_MOUNT", (MP)->mnt_vfc->vfc_name);\
 	_rc; })
 
 #define	VFS_UNMOUNT(MP, FORCE) ({					\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE);			\
 	_rc; })
 
 #define	VFS_ROOT(MP, FLAGS, VPP) ({					\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP);		\
 	_rc; })
 
 #define	VFS_CACHEDROOT(MP, FLAGS, VPP) ({				\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_cachedroot)(MP, FLAGS, VPP);		\
 	_rc; })
 
 #define	VFS_QUOTACTL(MP, C, U, A) ({					\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A);		\
 	_rc; })
 
 #define	VFS_STATFS(MP, SBP) ({						\
 	int _rc;							\
 									\
 	_rc = __vfs_statfs((MP), (SBP));				\
 	_rc; })
 
 #define	VFS_SYNC(MP, WAIT) ({						\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT);			\
 	_rc; })
 
 #define	VFS_VGET(MP, INO, FLAGS, VPP) ({				\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP);		\
 	_rc; })
 
 #define	VFS_FHTOVP(MP, FIDP, FLAGS, VPP) ({				\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP);	\
 	_rc; })
 
 #define	VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({		\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\
 	    SEC);							\
 	_rc; })
 
 #define	VFS_EXTATTRCTL(MP, C, FN, NS, N) ({				\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N);	\
 	_rc; })
 
 #define	VFS_SYSCTL(MP, OP, REQ) ({					\
 	int _rc;							\
 									\
 	_rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ);			\
 	_rc; })
 
 #define	VFS_SUSP_CLEAN(MP) do {						\
 	if (*(MP)->mnt_op->vfs_susp_clean != NULL) {			\
 		(*(MP)->mnt_op->vfs_susp_clean)(MP);			\
 	}								\
 } while (0)
 
 #define	VFS_RECLAIM_LOWERVP(MP, VP) do {				\
 	if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) {		\
 		(*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP));	\
 	}								\
 } while (0)
 
 #define	VFS_UNLINK_LOWERVP(MP, VP) do {					\
 	if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) {		\
 		(*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP));	\
 	}								\
 } while (0)
 
 #define	VFS_PURGE(MP) do {						\
 	if (*(MP)->mnt_op->vfs_purge != NULL) {				\
 		(*(MP)->mnt_op->vfs_purge)(MP);				\
 	}								\
 } while (0)
 
 #define VFS_KNOTE_LOCKED(vp, hint) do					\
 {									\
 	if (((vp)->v_vflag & VV_NOKNOTE) == 0)				\
 		VN_KNOTE((vp), (hint), KNF_LISTLOCKED);			\
 } while (0)
 
 #define VFS_KNOTE_UNLOCKED(vp, hint) do					\
 {									\
 	if (((vp)->v_vflag & VV_NOKNOTE) == 0)				\
 		VN_KNOTE((vp), (hint), 0);				\
 } while (0)
 
 #define	VFS_NOTIFY_UPPER_RECLAIM	1
 #define	VFS_NOTIFY_UPPER_UNLINK		2
 
 #include <sys/module.h>
 
 /*
  * Version numbers.
  */
 #define VFS_VERSION_00	0x19660120
 #define VFS_VERSION_01	0x20121030
 #define VFS_VERSION_02	0x20180504
 #define VFS_VERSION	VFS_VERSION_02
 
 #define VFS_SET(vfsops, fsname, flags) \
 	static struct vfsconf fsname ## _vfsconf = {		\
 		.vfc_version = VFS_VERSION,			\
 		.vfc_name = #fsname,				\
 		.vfc_vfsops = &vfsops,				\
 		.vfc_typenum = -1,				\
 		.vfc_flags = flags,				\
 	};							\
 	static moduledata_t fsname ## _mod = {			\
 		#fsname,					\
 		vfs_modevent,					\
 		& fsname ## _vfsconf				\
 	};							\
 	DECLARE_MODULE(fsname, fsname ## _mod, SI_SUB_VFS, SI_ORDER_MIDDLE)
 
 /*
  * exported vnode operations
  */
 
 int	dounmount(struct mount *, int, struct thread *);
 
 int	kernel_mount(struct mntarg *ma, uint64_t flags);
 int	kernel_vmount(int flags, ...);
 struct mntarg *mount_arg(struct mntarg *ma, const char *name, const void *val, int len);
 struct mntarg *mount_argb(struct mntarg *ma, int flag, const char *name);
 struct mntarg *mount_argf(struct mntarg *ma, const char *name, const char *fmt, ...);
 struct mntarg *mount_argsu(struct mntarg *ma, const char *name, const void *val, int len);
 void	statfs_scale_blocks(struct statfs *sf, long max_size);
 struct vfsconf *vfs_byname(const char *);
 struct vfsconf *vfs_byname_kld(const char *, struct thread *td, int *);
 void	vfs_mount_destroy(struct mount *);
 void	vfs_event_signal(fsid_t *, u_int32_t, intptr_t);
 void	vfs_freeopts(struct vfsoptlist *opts);
 void	vfs_deleteopt(struct vfsoptlist *opts, const char *name);
 int	vfs_buildopts(struct uio *auio, struct vfsoptlist **options);
 int	vfs_flagopt(struct vfsoptlist *opts, const char *name, uint64_t *w,
 	    uint64_t val);
 int	vfs_getopt(struct vfsoptlist *, const char *, void **, int *);
 int	vfs_getopt_pos(struct vfsoptlist *opts, const char *name);
 int	vfs_getopt_size(struct vfsoptlist *opts, const char *name,
 	    off_t *value);
 char	*vfs_getopts(struct vfsoptlist *, const char *, int *error);
 int	vfs_copyopt(struct vfsoptlist *, const char *, void *, int);
 int	vfs_filteropt(struct vfsoptlist *, const char **legal);
 void	vfs_opterror(struct vfsoptlist *opts, const char *fmt, ...);
 int	vfs_scanopt(struct vfsoptlist *opts, const char *name, const char *fmt, ...);
 int	vfs_setopt(struct vfsoptlist *opts, const char *name, void *value,
 	    int len);
 int	vfs_setopt_part(struct vfsoptlist *opts, const char *name, void *value,
 	    int len);
 int	vfs_setopts(struct vfsoptlist *opts, const char *name,
 	    const char *value);
 int	vfs_setpublicfs			    /* set publicly exported fs */
 	    (struct mount *, struct netexport *, struct export_args *);
 void	vfs_periodic(struct mount *, int);
 int	vfs_busy(struct mount *, int);
 int	vfs_export			 /* process mount export info */
 	    (struct mount *, struct export_args *);
 void	vfs_allocate_syncvnode(struct mount *);
 void	vfs_deallocate_syncvnode(struct mount *);
 int	vfs_donmount(struct thread *td, uint64_t fsflags,
 	    struct uio *fsoptions);
 void	vfs_getnewfsid(struct mount *);
 struct cdev *vfs_getrootfsid(struct mount *);
 struct	mount *vfs_getvfs(fsid_t *);      /* return vfs given fsid */
 struct	mount *vfs_busyfs(fsid_t *);
 int	vfs_modevent(module_t, int, void *);
 void	vfs_mount_error(struct mount *, const char *, ...);
 void	vfs_mountroot(void);			/* mount our root filesystem */
 void	vfs_mountedfrom(struct mount *, const char *from);
 void	vfs_notify_upper(struct vnode *, int);
-void	vfs_oexport_conv(const struct oexport_args *oexp,
-	    struct export_args *exp);
 void	vfs_ref(struct mount *);
 void	vfs_rel(struct mount *);
 struct mount *vfs_mount_alloc(struct vnode *, struct vfsconf *, const char *,
 	    struct ucred *);
 int	vfs_suser(struct mount *, struct thread *);
 void	vfs_unbusy(struct mount *);
 void	vfs_unmountall(void);
 extern	TAILQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
 extern	struct mtx mountlist_mtx;
 extern	struct nfs_public nfs_pub;
 extern	struct sx vfsconf_sx;
 #define	vfsconf_lock()		sx_xlock(&vfsconf_sx)
 #define	vfsconf_unlock()	sx_xunlock(&vfsconf_sx)
 #define	vfsconf_slock()		sx_slock(&vfsconf_sx)
 #define	vfsconf_sunlock()	sx_sunlock(&vfsconf_sx)
 struct vnode *mntfs_allocvp(struct mount *, struct vnode *);
 void   mntfs_freevp(struct vnode *);
 
 /*
  * Declarations for these vfs default operations are located in
  * kern/vfs_default.c.  They will be automatically used to replace
  * null entries in VFS ops tables when registering a new filesystem
  * type in the global table.
  */
 vfs_root_t		vfs_stdroot;
 vfs_quotactl_t		vfs_stdquotactl;
 vfs_statfs_t		vfs_stdstatfs;
 vfs_sync_t		vfs_stdsync;
 vfs_sync_t		vfs_stdnosync;
 vfs_vget_t		vfs_stdvget;
 vfs_fhtovp_t		vfs_stdfhtovp;
 vfs_checkexp_t		vfs_stdcheckexp;
 vfs_init_t		vfs_stdinit;
 vfs_uninit_t		vfs_stduninit;
 vfs_extattrctl_t	vfs_stdextattrctl;
 vfs_sysctl_t		vfs_stdsysctl;
 
 void	syncer_suspend(void);
 void	syncer_resume(void);
 
 struct vnode *vfs_cache_root_clear(struct mount *);
 void	vfs_cache_root_set(struct mount *, struct vnode *);
 
 void	vfs_op_barrier_wait(struct mount *);
 void	vfs_op_enter(struct mount *);
 void	vfs_op_exit_locked(struct mount *);
 void	vfs_op_exit(struct mount *);
 
 #ifdef DIAGNOSTIC
 void	vfs_assert_mount_counters(struct mount *);
 void	vfs_dump_mount_counters(struct mount *);
 #else
 #define vfs_assert_mount_counters(mp) do { } while (0)
 #define vfs_dump_mount_counters(mp) do { } while (0)
 #endif
 
 enum mount_counter { MNT_COUNT_REF, MNT_COUNT_LOCKREF, MNT_COUNT_WRITEOPCOUNT };
 int	vfs_mount_fetch_counter(struct mount *, enum mount_counter);
 
 /*
  * Code transitioning mnt_vfs_ops to > 0 issues IPIs until it observes
  * all CPUs not executing code enclosed by mnt_thread_in_ops_pcpu.
  *
  * This provides an invariant that by the time the last CPU is observed not
  * executing, everyone else entering will see the counter > 0 and exit.
  *
  * Note there is no barrier between vfs_ops and the rest of the code in the
  * section. It is not necessary as the writer has to wait for everyone to drain
  * before making any changes or only make changes safe while the section is
  * executed.
  */
 #define vfs_op_thread_entered(mp) ({				\
 	MPASS(curthread->td_critnest > 0);			\
 	*zpcpu_get(mp->mnt_thread_in_ops_pcpu) == 1;		\
 })
 
 #define vfs_op_thread_enter(mp) ({				\
 	bool _retval = true;					\
 	critical_enter();					\
 	MPASS(!vfs_op_thread_entered(mp));			\
 	zpcpu_set_protected(mp->mnt_thread_in_ops_pcpu, 1);	\
 	__compiler_membar();					\
 	if (__predict_false(mp->mnt_vfs_ops > 0)) {		\
 		vfs_op_thread_exit(mp);				\
 		_retval = false;				\
 	}							\
 	_retval;						\
 })
 
 #define vfs_op_thread_exit(mp) do {				\
 	MPASS(vfs_op_thread_entered(mp));			\
 	__compiler_membar();					\
 	zpcpu_set_protected(mp->mnt_thread_in_ops_pcpu, 0);	\
 	critical_exit();					\
 } while (0)
 
 #define vfs_mp_count_add_pcpu(mp, count, val) do {		\
 	MPASS(vfs_op_thread_entered(mp));			\
 	zpcpu_add_protected(mp->mnt_##count##_pcpu, val);	\
 } while (0)
 
 #define vfs_mp_count_sub_pcpu(mp, count, val) do {		\
 	MPASS(vfs_op_thread_entered(mp));			\
 	zpcpu_sub_protected(mp->mnt_##count##_pcpu, val);	\
 } while (0)
 
 #else /* !_KERNEL */
 
 #include <sys/cdefs.h>
 
 struct stat;
 
 __BEGIN_DECLS
 int	fhlink(struct fhandle *, const char *);
 int	fhlinkat(struct fhandle *, int, const char *);
 int	fhopen(const struct fhandle *, int);
 int	fhreadlink(struct fhandle *, char *, size_t);
 int	fhstat(const struct fhandle *, struct stat *);
 int	fhstatfs(const struct fhandle *, struct statfs *);
 int	fstatfs(int, struct statfs *);
 int	getfh(const char *, fhandle_t *);
 int	getfhat(int, char *, struct fhandle *, int);
 int	getfsstat(struct statfs *, long, int);
 int	getmntinfo(struct statfs **, int);
 int	lgetfh(const char *, fhandle_t *);
 int	mount(const char *, const char *, int, void *);
 int	nmount(struct iovec *, unsigned int, int);
 int	statfs(const char *, struct statfs *);
 int	unmount(const char *, int);
 
 /* C library stuff */
 int	getvfsbyname(const char *, struct xvfsconf *);
 __END_DECLS
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_MOUNT_H_ */
Index: head/sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c	(revision 362157)
+++ head/sys/ufs/ffs/ffs_vfsops.c	(revision 362158)
@@ -1,2464 +1,2462 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_quota.h"
 #include "opt_ufs.h"
 #include "opt_ffs.h"
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/gsb_crc32.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/taskqueue.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/ioccom.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/gjournal.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <vm/vm.h>
 #include <vm/uma.h>
 #include <vm/vm_page.h>
 
 #include <geom/geom.h>
 #include <geom/geom_vfs.h>
 
 #include <ddb/ddb.h>
 
 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
 
 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
 		    ufs2_daddr_t);
 static void	ffs_ifree(struct ufsmount *ump, struct inode *ip);
 static int	ffs_sync_lazy(struct mount *mp);
 static int	ffs_use_bread(void *devfd, off_t loc, void **bufp, int size);
 static int	ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size);
 
 static vfs_init_t ffs_init;
 static vfs_uninit_t ffs_uninit;
 static vfs_extattrctl_t ffs_extattrctl;
 static vfs_cmount_t ffs_cmount;
 static vfs_unmount_t ffs_unmount;
 static vfs_mount_t ffs_mount;
 static vfs_statfs_t ffs_statfs;
 static vfs_fhtovp_t ffs_fhtovp;
 static vfs_sync_t ffs_sync;
 
 static struct vfsops ufs_vfsops = {
 	.vfs_extattrctl =	ffs_extattrctl,
 	.vfs_fhtovp =		ffs_fhtovp,
 	.vfs_init =		ffs_init,
 	.vfs_mount =		ffs_mount,
 	.vfs_cmount =		ffs_cmount,
 	.vfs_quotactl =		ufs_quotactl,
 	.vfs_root =		vfs_cache_root,
 	.vfs_cachedroot =	ufs_root,
 	.vfs_statfs =		ffs_statfs,
 	.vfs_sync =		ffs_sync,
 	.vfs_uninit =		ffs_uninit,
 	.vfs_unmount =		ffs_unmount,
 	.vfs_vget =		ffs_vget,
 	.vfs_susp_clean =	process_deferred_inactive,
 };
 
 VFS_SET(ufs_vfsops, ufs, 0);
 MODULE_VERSION(ufs, 1);
 
 static b_strategy_t ffs_geom_strategy;
 static b_write_t ffs_bufwrite;
 
 static struct buf_ops ffs_ops = {
 	.bop_name =	"FFS",
 	.bop_write =	ffs_bufwrite,
 	.bop_strategy =	ffs_geom_strategy,
 	.bop_sync =	bufsync,
 #ifdef NO_FFS_SNAPSHOT
 	.bop_bdflush =	bufbdflush,
 #else
 	.bop_bdflush =	ffs_bdflush,
 #endif
 };
 
 /*
  * Note that userquota and groupquota options are not currently used
  * by UFS/FFS code and generally mount(8) does not pass those options
  * from userland, but they can be passed by loader(8) via
  * vfs.root.mountfrom.options.
  */
 static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr",
     "noclusterw", "noexec", "export", "force", "from", "groupquota",
     "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir",
     "nosymfollow", "sync", "union", "userquota", "untrusted", NULL };
 
 static int ffs_enxio_enable = 1;
 SYSCTL_DECL(_vfs_ffs);
 SYSCTL_INT(_vfs_ffs, OID_AUTO, enxio_enable, CTLFLAG_RWTUN,
     &ffs_enxio_enable, 0,
     "enable mapping of other disk I/O errors to ENXIO");
 
 static int
 ffs_mount(struct mount *mp)
 {
 	struct vnode *devvp, *odevvp;
 	struct thread *td;
 	struct ufsmount *ump = NULL;
 	struct fs *fs;
 	pid_t fsckpid = 0;
 	int error, error1, flags;
 	uint64_t mntorflags, saved_mnt_flag;
 	accmode_t accmode;
 	struct nameidata ndp;
 	char *fspec;
 
 	td = curthread;
 	if (vfs_filteropt(mp->mnt_optnew, ffs_opts))
 		return (EINVAL);
 	if (uma_inode == NULL) {
 		uma_inode = uma_zcreate("FFS inode",
 		    sizeof(struct inode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		uma_ufs1 = uma_zcreate("FFS1 dinode",
 		    sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 		uma_ufs2 = uma_zcreate("FFS2 dinode",
 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
 	}
 
 	vfs_deleteopt(mp->mnt_optnew, "groupquota");
 	vfs_deleteopt(mp->mnt_optnew, "userquota");
 
 	fspec = vfs_getopts(mp->mnt_optnew, "from", &error);
 	if (error)
 		return (error);
 
 	mntorflags = 0;
 	if (vfs_getopt(mp->mnt_optnew, "untrusted", NULL, NULL) == 0)
 		mntorflags |= MNT_UNTRUSTED;
 
 	if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0)
 		mntorflags |= MNT_ACLS;
 
 	if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) {
 		mntorflags |= MNT_SNAPSHOT;
 		/*
 		 * Once we have set the MNT_SNAPSHOT flag, do not
 		 * persist "snapshot" in the options list.
 		 */
 		vfs_deleteopt(mp->mnt_optnew, "snapshot");
 		vfs_deleteopt(mp->mnt_opt, "snapshot");
 	}
 
 	if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 &&
 	    vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) {
 		/*
 		 * Once we have set the restricted PID, do not
 		 * persist "fsckpid" in the options list.
 		 */
 		vfs_deleteopt(mp->mnt_optnew, "fsckpid");
 		vfs_deleteopt(mp->mnt_opt, "fsckpid");
 		if (mp->mnt_flag & MNT_UPDATE) {
 			if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 &&
 			     vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
 				vfs_mount_error(mp,
 				    "Checker enable: Must be read-only");
 				return (EINVAL);
 			}
 		} else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) {
 			vfs_mount_error(mp,
 			    "Checker enable: Must be read-only");
 			return (EINVAL);
 		}
 		/* Set to -1 if we are done */
 		if (fsckpid == 0)
 			fsckpid = -1;
 	}
 
 	if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) {
 		if (mntorflags & MNT_ACLS) {
 			vfs_mount_error(mp,
 			    "\"acls\" and \"nfsv4acls\" options "
 			    "are mutually exclusive");
 			return (EINVAL);
 		}
 		mntorflags |= MNT_NFS4ACLS;
 	}
 
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= mntorflags;
 	MNT_IUNLOCK(mp);
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		odevvp = ump->um_odevvp;
 		devvp = ump->um_devvp;
 		if (fsckpid == -1 && ump->um_fsckpid > 0) {
 			if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 ||
 			    (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0)
 				return (error);
 			g_topology_lock();
 			/*
 			 * Return to normal read-only mode.
 			 */
 			error = g_access(ump->um_cp, 0, -1, 0);
 			g_topology_unlock();
 			ump->um_fsckpid = 0;
 		}
 		if (fs->fs_ronly == 0 &&
 		    vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 			/*
 			 * Flush any dirty data and suspend filesystem.
 			 */
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
 			error = vfs_write_suspend_umnt(mp);
 			if (error != 0)
 				return (error);
 			/*
 			 * Check for and optionally get rid of files open
 			 * for writing.
 			 */
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (MOUNTEDSOFTDEP(mp)) {
 				error = softdep_flushfiles(mp, flags, td);
 			} else {
 				error = ffs_flushfiles(mp, flags, td);
 			}
 			if (error) {
 				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			if (fs->fs_pendingblocks != 0 ||
 			    fs->fs_pendinginodes != 0) {
 				printf("WARNING: %s Update error: blocks %jd "
 				    "files %d\n", fs->fs_fsmnt, 
 				    (intmax_t)fs->fs_pendingblocks,
 				    fs->fs_pendinginodes);
 				fs->fs_pendingblocks = 0;
 				fs->fs_pendinginodes = 0;
 			}
 			if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 				fs->fs_clean = 1;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 				fs->fs_ronly = 0;
 				fs->fs_clean = 0;
 				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			if (MOUNTEDSOFTDEP(mp))
 				softdep_unmount(mp);
 			g_topology_lock();
 			/*
 			 * Drop our write and exclusive access.
 			 */
 			g_access(ump->um_cp, 0, -1, -1);
 			g_topology_unlock();
 			fs->fs_ronly = 1;
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_RDONLY;
 			MNT_IUNLOCK(mp);
 			/*
 			 * Allow the writers to note that filesystem
 			 * is ro now.
 			 */
 			vfs_write_resume(mp, 0);
 		}
 		if ((mp->mnt_flag & MNT_RELOAD) &&
 		    (error = ffs_reload(mp, td, 0)) != 0)
 			return (error);
 		if (fs->fs_ronly &&
 		    !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) {
 			/*
 			 * If we are running a checker, do not allow upgrade.
 			 */
 			if (ump->um_fsckpid > 0) {
 				vfs_mount_error(mp,
 				    "Active checker, cannot upgrade to write");
 				return (EINVAL);
 			}
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			vn_lock(odevvp, LK_EXCLUSIVE | LK_RETRY);
 			error = VOP_ACCESS(odevvp, VREAD | VWRITE,
 			    td->td_ucred, td);
 			if (error)
 				error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 			VOP_UNLOCK(odevvp);
 			if (error) {
 				return (error);
 			}
 			fs->fs_flags &= ~FS_UNCLEAN;
 			if (fs->fs_clean == 0) {
 				fs->fs_flags |= FS_UNCLEAN;
 				if ((mp->mnt_flag & MNT_FORCE) ||
 				    ((fs->fs_flags &
 				     (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
 				     (fs->fs_flags & FS_DOSOFTDEP))) {
 					printf("WARNING: %s was not properly "
 					   "dismounted\n", fs->fs_fsmnt);
 				} else {
 					vfs_mount_error(mp,
 					   "R/W mount of %s denied. %s.%s",
 					   fs->fs_fsmnt,
 					   "Filesystem is not clean - run fsck",
 					   (fs->fs_flags & FS_SUJ) == 0 ? "" :
 					   " Forced mount will invalidate"
 					   " journal contents");
 					return (EPERM);
 				}
 			}
 			g_topology_lock();
 			/*
 			 * Request exclusive write access.
 			 */
 			error = g_access(ump->um_cp, 0, 1, 1);
 			g_topology_unlock();
 			if (error)
 				return (error);
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
 			error = vfs_write_suspend_umnt(mp);
 			if (error != 0)
 				return (error);
 			fs->fs_ronly = 0;
 			MNT_ILOCK(mp);
 			saved_mnt_flag = MNT_RDONLY;
 			if (MOUNTEDSOFTDEP(mp) && (mp->mnt_flag &
 			    MNT_ASYNC) != 0)
 				saved_mnt_flag |= MNT_ASYNC;
 			mp->mnt_flag &= ~saved_mnt_flag;
 			MNT_IUNLOCK(mp);
 			fs->fs_mtime = time_second;
 			/* check to see if we need to start softdep */
 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
 			    (error = softdep_mount(devvp, mp, fs, td->td_ucred))){
 				fs->fs_ronly = 1;
 				MNT_ILOCK(mp);
 				mp->mnt_flag |= saved_mnt_flag;
 				MNT_IUNLOCK(mp);
 				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			fs->fs_clean = 0;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) {
 				fs->fs_ronly = 1;
 				MNT_ILOCK(mp);
 				mp->mnt_flag |= saved_mnt_flag;
 				MNT_IUNLOCK(mp);
 				vfs_write_resume(mp, 0);
 				return (error);
 			}
 			if (fs->fs_snapinum[0] != 0)
 				ffs_snapshot_mount(mp);
 			vfs_write_resume(mp, 0);
 		}
 		/*
 		 * Soft updates is incompatible with "async",
 		 * so if we are doing softupdates stop the user
 		 * from setting the async flag in an update.
 		 * Softdep_mount() clears it in an initial mount
 		 * or ro->rw remount.
 		 */
 		if (MOUNTEDSOFTDEP(mp)) {
 			/* XXX: Reset too late ? */
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_ASYNC;
 			MNT_IUNLOCK(mp);
 		}
 		/*
 		 * Keep MNT_ACLS flag if it is stored in superblock.
 		 */
 		if ((fs->fs_flags & FS_ACLS) != 0) {
 			/* XXX: Set too late ? */
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_ACLS;
 			MNT_IUNLOCK(mp);
 		}
 
 		if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
 			/* XXX: Set too late ? */
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_NFS4ACLS;
 			MNT_IUNLOCK(mp);
 		}
 		/*
 		 * If this is a request from fsck to clean up the filesystem,
 		 * then allow the specified pid to proceed.
 		 */
 		if (fsckpid > 0) {
 			if (ump->um_fsckpid != 0) {
 				vfs_mount_error(mp,
 				    "Active checker already running on %s",
 				    fs->fs_fsmnt);
 				return (EINVAL);
 			}
 			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
 			    ("soft updates enabled on read-only file system"));
 			g_topology_lock();
 			/*
 			 * Request write access.
 			 */
 			error = g_access(ump->um_cp, 0, 1, 0);
 			g_topology_unlock();
 			if (error) {
 				vfs_mount_error(mp,
 				    "Checker activation failed on %s",
 				    fs->fs_fsmnt);
 				return (error);
 			}
 			ump->um_fsckpid = fsckpid;
 			if (fs->fs_snapinum[0] != 0)
 				ffs_snapshot_mount(mp);
 			fs->fs_mtime = time_second;
 			fs->fs_fmod = 1;
 			fs->fs_clean = 0;
 			(void) ffs_sbupdate(ump, MNT_WAIT, 0);
 		}
 
 		/*
 		 * If this is a snapshot request, take the snapshot.
 		 */
 		if (mp->mnt_flag & MNT_SNAPSHOT)
 			return (ffs_snapshot(mp, fspec));
 
 		/*
 		 * Must not call namei() while owning busy ref.
 		 */
 		vfs_unbusy(mp);
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible disk device.
 	 */
 	NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td);
 	error = namei(&ndp);
 	if ((mp->mnt_flag & MNT_UPDATE) != 0) {
 		/*
 		 * Unmount does not start if MNT_UPDATE is set.  Mount
 		 * update busies mp before setting MNT_UPDATE.  We
 		 * must be able to retain our busy ref succesfully,
 		 * without sleep.
 		 */
 		error1 = vfs_busy(mp, MBF_NOWAIT);
 		MPASS(error1 == 0);
 	}
 	if (error != 0)
 		return (error);
 	NDFREE(&ndp, NDF_ONLY_PNBUF);
 	devvp = ndp.ni_vp;
 	if (!vn_isdisk(devvp, &error)) {
 		vput(devvp);
 		return (error);
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	accmode = VREAD;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		accmode |= VWRITE;
 	error = VOP_ACCESS(devvp, accmode, td->td_ucred, td);
 	if (error)
 		error = priv_check(td, PRIV_VFS_MOUNT_PERM);
 	if (error) {
 		vput(devvp);
 		return (error);
 	}
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		/*
 		 * Update only
 		 *
 		 * If it's not the same vnode, or at least the same device
 		 * then it's not correct.
 		 */
 
 		if (devvp->v_rdev != ump->um_devvp->v_rdev)
 			error = EINVAL;	/* needs translation */
 		vput(devvp);
 		if (error)
 			return (error);
 	} else {
 		/*
 		 * New mount
 		 *
 		 * We need the name for the mount point (also used for
 		 * "last mounted on") copied in. If an error occurs,
 		 * the mount point is discarded by the upper level code.
 		 * Note that vfs_mount_alloc() populates f_mntonname for us.
 		 */
 		if ((error = ffs_mountfs(devvp, mp, td)) != 0) {
 			vrele(devvp);
 			return (error);
 		}
 		if (fsckpid > 0) {
 			KASSERT(MOUNTEDSOFTDEP(mp) == 0,
 			    ("soft updates enabled on read-only file system"));
 			ump = VFSTOUFS(mp);
 			fs = ump->um_fs;
 			g_topology_lock();
 			/*
 			 * Request write access.
 			 */
 			error = g_access(ump->um_cp, 0, 1, 0);
 			g_topology_unlock();
 			if (error) {
 				printf("WARNING: %s: Checker activation "
 				    "failed\n", fs->fs_fsmnt);
 			} else { 
 				ump->um_fsckpid = fsckpid;
 				if (fs->fs_snapinum[0] != 0)
 					ffs_snapshot_mount(mp);
 				fs->fs_mtime = time_second;
 				fs->fs_clean = 0;
 				(void) ffs_sbupdate(ump, MNT_WAIT, 0);
 			}
 		}
 	}
 	vfs_mountedfrom(mp, fspec);
 	return (0);
 }
 
 /*
  * Compatibility with old mount system call.
  */
 
 static int
 ffs_cmount(struct mntarg *ma, void *data, uint64_t flags)
 {
 	struct ufs_args args;
-	struct export_args exp;
 	int error;
 
 	if (data == NULL)
 		return (EINVAL);
 	error = copyin(data, &args, sizeof args);
 	if (error)
 		return (error);
-	vfs_oexport_conv(&args.export, &exp);
 
 	ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN);
-	ma = mount_arg(ma, "export", &exp, sizeof(exp));
+	ma = mount_arg(ma, "export", &args.export, sizeof(args.export));
 	error = kernel_mount(ma, flags);
 
 	return (error);
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). If the 'force' flag
  * is 0, the filesystem must be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
  *	5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary
  *	   writers, if requested.
  *	6) invalidate all cached file data.
  *	7) re-read inode data for all active vnodes.
  */
 int
 ffs_reload(struct mount *mp, struct thread *td, int flags)
 {
 	struct vnode *vp, *mvp, *devvp;
 	struct inode *ip;
 	void *space;
 	struct buf *bp;
 	struct fs *fs, *newfs;
 	struct ufsmount *ump;
 	ufs2_daddr_t sblockloc;
 	int i, blks, error;
 	u_long size;
 	int32_t *lp;
 
 	ump = VFSTOUFS(mp);
 
 	MNT_ILOCK(mp);
 	if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) {
 		MNT_IUNLOCK(mp);
 		return (EINVAL);
 	}
 	MNT_IUNLOCK(mp);
 	
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mp)->um_devvp;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 	if (vinvalbuf(devvp, 0, 0, 0) != 0)
 		panic("ffs_reload: dirty1");
 	VOP_UNLOCK(devvp);
 
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
 	fs = VFSTOUFS(mp)->um_fs;
 	if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize,
 	    NOCRED, &bp)) != 0)
 		return (error);
 	newfs = (struct fs *)bp->b_data;
 	if ((newfs->fs_magic != FS_UFS1_MAGIC &&
 	     newfs->fs_magic != FS_UFS2_MAGIC) ||
 	    newfs->fs_bsize > MAXBSIZE ||
 	    newfs->fs_bsize < sizeof(struct fs)) {
 			brelse(bp);
 			return (EIO);		/* XXX needs translation */
 	}
 	/*
 	 * Copy pointer fields back into superblock before copying in	XXX
 	 * new superblock. These should really be in the ufsmount.	XXX
 	 * Note that important parameters (eg fs_ncg) are unchanged.
 	 */
 	newfs->fs_csp = fs->fs_csp;
 	newfs->fs_maxcluster = fs->fs_maxcluster;
 	newfs->fs_contigdirs = fs->fs_contigdirs;
 	newfs->fs_active = fs->fs_active;
 	newfs->fs_ronly = fs->fs_ronly;
 	sblockloc = fs->fs_sblockloc;
 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 	brelse(bp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc);
 	UFS_LOCK(ump);
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("WARNING: %s: reload pending error: blocks %jd "
 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 	UFS_UNLOCK(ump);
 
 	/*
 	 * Step 3: re-read summary information from disk.
 	 */
 	size = fs->fs_cssize;
 	blks = howmany(size, fs->fs_fsize);
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
 	size += fs->fs_ncg * sizeof(u_int8_t);
 	free(fs->fs_csp, M_UFSMNT);
 	space = malloc(size, M_UFSMNT, M_WAITOK);
 	fs->fs_csp = space;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    NOCRED, &bp);
 		if (error)
 			return (error);
 		bcopy(bp->b_data, space, (u_int)size);
 		space = (char *)space + size;
 		brelse(bp);
 	}
 	/*
 	 * We no longer know anything about clusters per cylinder group.
 	 */
 	if (fs->fs_contigsumsize > 0) {
 		fs->fs_maxcluster = lp = space;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 		space = lp;
 	}
 	size = fs->fs_ncg * sizeof(u_int8_t);
 	fs->fs_contigdirs = (u_int8_t *)space;
 	bzero(fs->fs_contigdirs, size);
 	if ((flags & FFSR_UNSUSPEND) != 0) {
 		MNT_ILOCK(mp);
 		mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2);
 		wakeup(&mp->mnt_flag);
 		MNT_IUNLOCK(mp);
 	}
 
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/*
 		 * Skip syncer vnode.
 		 */
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		/*
 		 * Step 4: invalidate all cached file data.
 		 */
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, 0, 0))
 			panic("ffs_reload: dirty2");
 		/*
 		 * Step 5: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error =
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (error);
 		}
 		if ((error = ffs_load_inode(bp, ip, fs, ip->i_number)) != 0) {
 			brelse(bp);
 			vput(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (error);
 		}
 		ip->i_effnlink = ip->i_nlink;
 		brelse(bp);
 		vput(vp);
 	}
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 ffs_mountfs(odevvp, mp, td)
 	struct vnode *odevvp;
 	struct mount *mp;
 	struct thread *td;
 {
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct cdev *dev;
 	int error, i, len, ronly;
 	struct ucred *cred;
 	struct g_consumer *cp;
 	struct mount *nmp;
 	struct vnode *devvp;
 	struct fsfail_task *etp;
 	int candelete, canspeedup;
 	off_t loc;
 
 	fs = NULL;
 	ump = NULL;
 	cred = td ? td->td_ucred : NOCRED;
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 
 	devvp = mntfs_allocvp(mp, odevvp);
 	VOP_UNLOCK(odevvp);
 	KASSERT(devvp->v_type == VCHR, ("reclaimed devvp"));
 	dev = devvp->v_rdev;
 	if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0,
 	    (uintptr_t)mp) == 0) {
 		mntfs_freevp(devvp);
 		return (EBUSY);
 	}
 	g_topology_lock();
 	error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1);
 	g_topology_unlock();
 	if (error != 0) {
 		atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
 		mntfs_freevp(devvp);
 		return (error);
 	}
 	dev_ref(dev);
 	devvp->v_bufobj.bo_ops = &ffs_ops;
 	BO_LOCK(&odevvp->v_bufobj);
 	odevvp->v_bufobj.bo_flag |= BO_NOBUFS;
 	BO_UNLOCK(&odevvp->v_bufobj);
 	if (dev->si_iosize_max != 0)
 		mp->mnt_iosize_max = dev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 	if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
 		error = EINVAL;
 		vfs_mount_error(mp,
 		    "Invalid sectorsize %d for superblock size %d",
 		    cp->provider->sectorsize, SBLOCKSIZE);
 		goto out;
 	}
 	/* fetch the superblock and summary information */
 	loc = STDSB;
 	if ((mp->mnt_flag & MNT_ROOTFS) != 0)
 		loc = STDSB_NOHASHFAIL;
 	if ((error = ffs_sbget(devvp, &fs, loc, M_UFSMNT, ffs_use_bread)) != 0)
 		goto out;
 	/* none of these types of check-hashes are maintained by this kernel */
 	fs->fs_metackhash &= ~(CK_INDIR | CK_DIR);
 	/* no support for any undefined flags */
 	fs->fs_flags &= FS_SUPPORTED;
 	fs->fs_flags &= ~FS_UNCLEAN;
 	if (fs->fs_clean == 0) {
 		fs->fs_flags |= FS_UNCLEAN;
 		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
 		    ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 &&
 		     (fs->fs_flags & FS_DOSOFTDEP))) {
 			printf("WARNING: %s was not properly dismounted\n",
 			    fs->fs_fsmnt);
 		} else {
 			vfs_mount_error(mp, "R/W mount of %s denied. %s%s",
 			    fs->fs_fsmnt, "Filesystem is not clean - run fsck.",
 			    (fs->fs_flags & FS_SUJ) == 0 ? "" :
 			    " Forced mount will invalidate journal contents");
 			error = EPERM;
 			goto out;
 		}
 		if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) &&
 		    (mp->mnt_flag & MNT_FORCE)) {
 			printf("WARNING: %s: lost blocks %jd files %d\n",
 			    fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 			    fs->fs_pendinginodes);
 			fs->fs_pendingblocks = 0;
 			fs->fs_pendinginodes = 0;
 		}
 	}
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("WARNING: %s: mount pending error: blocks %jd "
 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 	if ((fs->fs_flags & FS_GJOURNAL) != 0) {
 #ifdef UFS_GJOURNAL
 		/*
 		 * Get journal provider name.
 		 */
 		len = 1024;
 		mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK);
 		if (g_io_getattr("GJOURNAL::provider", cp, &len,
 		    mp->mnt_gjprovider) == 0) {
 			mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len,
 			    M_UFSMNT, M_WAITOK);
 			MNT_ILOCK(mp);
 			mp->mnt_flag |= MNT_GJOURNAL;
 			MNT_IUNLOCK(mp);
 		} else {
 			printf("WARNING: %s: GJOURNAL flag on fs "
 			    "but no gjournal provider below\n",
 			    mp->mnt_stat.f_mntonname);
 			free(mp->mnt_gjprovider, M_UFSMNT);
 			mp->mnt_gjprovider = NULL;
 		}
 #else
 		printf("WARNING: %s: GJOURNAL flag on fs but no "
 		    "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname);
 #endif
 	} else {
 		mp->mnt_gjprovider = NULL;
 	}
 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 	ump->um_cp = cp;
 	ump->um_bo = &devvp->v_bufobj;
 	ump->um_fs = fs;
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		ump->um_fstype = UFS1;
 		ump->um_balloc = ffs_balloc_ufs1;
 	} else {
 		ump->um_fstype = UFS2;
 		ump->um_balloc = ffs_balloc_ufs2;
 	}
 	ump->um_blkatoff = ffs_blkatoff;
 	ump->um_truncate = ffs_truncate;
 	ump->um_update = ffs_update;
 	ump->um_valloc = ffs_valloc;
 	ump->um_vfree = ffs_vfree;
 	ump->um_ifree = ffs_ifree;
 	ump->um_rdonly = ffs_rdonly;
 	ump->um_snapgone = ffs_snapgone;
 	if ((mp->mnt_flag & MNT_UNTRUSTED) != 0)
 		ump->um_check_blkno = ffs_check_blkno;
 	else
 		ump->um_check_blkno = NULL;
 	mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
 	ffs_oldfscompat_read(fs, ump, fs->fs_sblockloc);
 	fs->fs_ronly = ronly;
 	fs->fs_active = NULL;
 	mp->mnt_data = ump;
 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 	nmp = NULL;
 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
 	    (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) {
 		if (nmp)
 			vfs_rel(nmp);
 		vfs_getnewfsid(mp);
 	}
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	if ((fs->fs_flags & FS_MULTILABEL) != 0) {
 #ifdef MAC
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_MULTILABEL;
 		MNT_IUNLOCK(mp);
 #else
 		printf("WARNING: %s: multilabel flag on fs but "
 		    "no MAC support\n", mp->mnt_stat.f_mntonname);
 #endif
 	}
 	if ((fs->fs_flags & FS_ACLS) != 0) {
 #ifdef UFS_ACL
 		MNT_ILOCK(mp);
 
 		if (mp->mnt_flag & MNT_NFS4ACLS)
 			printf("WARNING: %s: ACLs flag on fs conflicts with "
 			    "\"nfsv4acls\" mount option; option ignored\n",
 			    mp->mnt_stat.f_mntonname);
 		mp->mnt_flag &= ~MNT_NFS4ACLS;
 		mp->mnt_flag |= MNT_ACLS;
 
 		MNT_IUNLOCK(mp);
 #else
 		printf("WARNING: %s: ACLs flag on fs but no ACLs support\n",
 		    mp->mnt_stat.f_mntonname);
 #endif
 	}
 	if ((fs->fs_flags & FS_NFS4ACLS) != 0) {
 #ifdef UFS_ACL
 		MNT_ILOCK(mp);
 
 		if (mp->mnt_flag & MNT_ACLS)
 			printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts "
 			    "with \"acls\" mount option; option ignored\n",
 			    mp->mnt_stat.f_mntonname);
 		mp->mnt_flag &= ~MNT_ACLS;
 		mp->mnt_flag |= MNT_NFS4ACLS;
 
 		MNT_IUNLOCK(mp);
 #else
 		printf("WARNING: %s: NFSv4 ACLs flag on fs but no "
 		    "ACLs support\n", mp->mnt_stat.f_mntonname);
 #endif
 	}
 	if ((fs->fs_flags & FS_TRIM) != 0) {
 		len = sizeof(int);
 		if (g_io_getattr("GEOM::candelete", cp, &len,
 		    &candelete) == 0) {
 			if (candelete)
 				ump->um_flags |= UM_CANDELETE;
 			else
 				printf("WARNING: %s: TRIM flag on fs but disk "
 				    "does not support TRIM\n",
 				    mp->mnt_stat.f_mntonname);
 		} else {
 			printf("WARNING: %s: TRIM flag on fs but disk does "
 			    "not confirm that it supports TRIM\n",
 			    mp->mnt_stat.f_mntonname);
 		}
 		if (((ump->um_flags) & UM_CANDELETE) != 0) {
 			ump->um_trim_tq = taskqueue_create("trim", M_WAITOK,
 			    taskqueue_thread_enqueue, &ump->um_trim_tq);
 			taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS,
 			    "%s trim", mp->mnt_stat.f_mntonname);
 			ump->um_trimhash = hashinit(MAXTRIMIO, M_TRIM,
 			    &ump->um_trimlisthashsize);
 		}
 	}
 
 	len = sizeof(int);
 	if (g_io_getattr("GEOM::canspeedup", cp, &len, &canspeedup) == 0) {
 		if (canspeedup)
 			ump->um_flags |= UM_CANSPEEDUP;
 	}
 
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_odevvp = odevvp;
 	ump->um_nindir = fs->fs_nindir;
 	ump->um_bptrtodb = fs->fs_fsbtodb;
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP;
 #ifdef UFS_EXTATTR
 	ufs_extattr_uepm_init(&ump->um_extattr);
 #endif
 	/*
 	 * Set FS local "last mounted on" information (NULL pad)
 	 */
 	bzero(fs->fs_fsmnt, MAXMNTLEN);
 	strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN);
 	mp->mnt_stat.f_iosize = fs->fs_bsize;
 
 	if (mp->mnt_flag & MNT_ROOTFS) {
 		/*
 		 * Root mount; update timestamp in mount structure.
 		 * this will be used by the common root mount code
 		 * to update the system clock.
 		 */
 		mp->mnt_time = fs->fs_time;
 	}
 
 	if (ronly == 0) {
 		fs->fs_mtime = time_second;
 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 			ffs_flushfiles(mp, FORCECLOSE, td);
 			goto out;
 		}
 		if (fs->fs_snapinum[0] != 0)
 			ffs_snapshot_mount(mp);
 		fs->fs_fmod = 1;
 		fs->fs_clean = 0;
 		(void) ffs_sbupdate(ump, MNT_WAIT, 0);
 	}
 	/*
 	 * Initialize filesystem state information in mount struct.
 	 */
 	MNT_ILOCK(mp);
 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
 	    MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE;
 	MNT_IUNLOCK(mp);
 #ifdef UFS_EXTATTR
 #ifdef UFS_EXTATTR_AUTOSTART
 	/*
 	 *
 	 * Auto-starting does the following:
 	 *	- check for /.attribute in the fs, and extattr_start if so
 	 *	- for each file in .attribute, enable that file with
 	 * 	  an attribute of the same name.
 	 * Not clear how to report errors -- probably eat them.
 	 * This would all happen while the filesystem was busy/not
 	 * available, so would effectively be "atomic".
 	 */
 	(void) ufs_extattr_autostart(mp, td);
 #endif /* !UFS_EXTATTR_AUTOSTART */
 #endif /* !UFS_EXTATTR */
 	etp = malloc(sizeof *ump->um_fsfail_task, M_UFSMNT, M_WAITOK | M_ZERO);
 	etp->fsid = mp->mnt_stat.f_fsid;
 	ump->um_fsfail_task = etp;
 	return (0);
 out:
 	if (fs != NULL) {
 		free(fs->fs_csp, M_UFSMNT);
 		free(fs, M_UFSMNT);
 	}
 	if (cp != NULL) {
 		g_topology_lock();
 		g_vfs_close(cp);
 		g_topology_unlock();
 	}
 	if (ump) {
 		mtx_destroy(UFS_MTX(ump));
 		if (mp->mnt_gjprovider != NULL) {
 			free(mp->mnt_gjprovider, M_UFSMNT);
 			mp->mnt_gjprovider = NULL;
 		}
 		free(ump, M_UFSMNT);
 		mp->mnt_data = NULL;
 	}
 	BO_LOCK(&odevvp->v_bufobj);
 	odevvp->v_bufobj.bo_flag &= ~BO_NOBUFS;
 	BO_UNLOCK(&odevvp->v_bufobj);
 	atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0);
 	mntfs_freevp(devvp);
 	dev_rel(dev);
 	return (error);
 }
 
 /*
  * A read function for use by filesystem-layer routines.
  */
 static int
 ffs_use_bread(void *devfd, off_t loc, void **bufp, int size)
 {
 	struct buf *bp;
 	int error;
 
 	KASSERT(*bufp == NULL, ("ffs_use_bread: non-NULL *bufp %p\n", *bufp));
 	*bufp = malloc(size, M_UFSMNT, M_WAITOK);
 	if ((error = bread((struct vnode *)devfd, btodb(loc), size, NOCRED,
 	    &bp)) != 0)
 		return (error);
 	bcopy(bp->b_data, *bufp, size);
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	return (0);
 }
 
 static int bigcgs = 0;
 SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, "");
 
 /*
  * Sanity checks for loading old filesystem superblocks.
  * See ffs_oldfscompat_write below for unwound actions.
  *
  * XXX - Parts get retired eventually.
  * Unfortunately new bits get added.
  */
 static void
 ffs_oldfscompat_read(fs, ump, sblockloc)
 	struct fs *fs;
 	struct ufsmount *ump;
 	ufs2_daddr_t sblockloc;
 {
 	off_t maxfilesize;
 
 	/*
 	 * If not yet done, update fs_flags location and value of fs_sblockloc.
 	 */
 	if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 		fs->fs_flags = fs->fs_old_flags;
 		fs->fs_old_flags |= FS_FLAGS_UPDATED;
 		fs->fs_sblockloc = sblockloc;
 	}
 	/*
 	 * If not yet done, update UFS1 superblock with new wider fields.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) {
 		fs->fs_maxbsize = fs->fs_bsize;
 		fs->fs_time = fs->fs_old_time;
 		fs->fs_size = fs->fs_old_size;
 		fs->fs_dsize = fs->fs_old_dsize;
 		fs->fs_csaddr = fs->fs_old_csaddr;
 		fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir;
 		fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree;
 		fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree;
 		fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree;
 	}
 	if (fs->fs_magic == FS_UFS1_MAGIC &&
 	    fs->fs_old_inodefmt < FS_44INODEFMT) {
 		fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1;
 		fs->fs_qbmask = ~fs->fs_bmask;
 		fs->fs_qfmask = ~fs->fs_fmask;
 	}
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		ump->um_savedmaxfilesize = fs->fs_maxfilesize;
 		maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1;
 		if (fs->fs_maxfilesize > maxfilesize)
 			fs->fs_maxfilesize = maxfilesize;
 	}
 	/* Compatibility for old filesystems */
 	if (fs->fs_avgfilesize <= 0)
 		fs->fs_avgfilesize = AVFILESIZ;
 	if (fs->fs_avgfpdir <= 0)
 		fs->fs_avgfpdir = AFPDIR;
 	if (bigcgs) {
 		fs->fs_save_cgsize = fs->fs_cgsize;
 		fs->fs_cgsize = fs->fs_bsize;
 	}
 }
 
 /*
  * Unwinding superblock updates for old filesystems.
  * See ffs_oldfscompat_read above for details.
  *
  * XXX - Parts get retired eventually.
  * Unfortunately new bits get added.
  */
 void
 ffs_oldfscompat_write(fs, ump)
 	struct fs *fs;
 	struct ufsmount *ump;
 {
 
 	/*
 	 * Copy back UFS2 updated fields that UFS1 inspects.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC) {
 		fs->fs_old_time = fs->fs_time;
 		fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir;
 		fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree;
 		fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree;
 		fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree;
 		fs->fs_maxfilesize = ump->um_savedmaxfilesize;
 	}
 	if (bigcgs) {
 		fs->fs_cgsize = fs->fs_save_cgsize;
 		fs->fs_save_cgsize = 0;
 	}
 }
 
 /*
  * unmount system call
  */
 static int
 ffs_unmount(mp, mntflags)
 	struct mount *mp;
 	int mntflags;
 {
 	struct thread *td;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	int error, flags, susp;
 #ifdef UFS_EXTATTR
 	int e_restart;
 #endif
 
 	flags = 0;
 	td = curthread;
 	fs = ump->um_fs;
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	susp = fs->fs_ronly == 0;
 #ifdef UFS_EXTATTR
 	if ((error = ufs_extattr_stop(mp, td))) {
 		if (error != EOPNOTSUPP)
 			printf("WARNING: unmount %s: ufs_extattr_stop "
 			    "returned errno %d\n", mp->mnt_stat.f_mntonname,
 			    error);
 		e_restart = 0;
 	} else {
 		ufs_extattr_uepm_destroy(&ump->um_extattr);
 		e_restart = 1;
 	}
 #endif
 	if (susp) {
 		error = vfs_write_suspend_umnt(mp);
 		if (error != 0)
 			goto fail1;
 	}
 	if (MOUNTEDSOFTDEP(mp))
 		error = softdep_flushfiles(mp, flags, td);
 	else
 		error = ffs_flushfiles(mp, flags, td);
 	if (error != 0 && !ffs_fsfail_cleanup(ump, error))
 		goto fail;
 
 	UFS_LOCK(ump);
 	if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) {
 		printf("WARNING: unmount %s: pending error: blocks %jd "
 		    "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks,
 		    fs->fs_pendinginodes);
 		fs->fs_pendingblocks = 0;
 		fs->fs_pendinginodes = 0;
 	}
 	UFS_UNLOCK(ump);
 	if (MOUNTEDSOFTDEP(mp))
 		softdep_unmount(mp);
 	if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) {
 		fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1;
 		error = ffs_sbupdate(ump, MNT_WAIT, 0);
 		if (ffs_fsfail_cleanup(ump, error))
 			error = 0;
 		if (error != 0 && !ffs_fsfail_cleanup(ump, error)) {
 			fs->fs_clean = 0;
 			goto fail;
 		}
 	}
 	if (susp)
 		vfs_write_resume(mp, VR_START_WRITE);
 	if (ump->um_trim_tq != NULL) {
 		while (ump->um_trim_inflight != 0)
 			pause("ufsutr", hz);
 		taskqueue_drain_all(ump->um_trim_tq);
 		taskqueue_free(ump->um_trim_tq);
 		free (ump->um_trimhash, M_TRIM);
 	}
 	g_topology_lock();
 	if (ump->um_fsckpid > 0) {
 		/*
 		 * Return to normal read-only mode.
 		 */
 		error = g_access(ump->um_cp, 0, -1, 0);
 		ump->um_fsckpid = 0;
 	}
 	g_vfs_close(ump->um_cp);
 	g_topology_unlock();
 	BO_LOCK(&ump->um_odevvp->v_bufobj);
 	ump->um_odevvp->v_bufobj.bo_flag &= ~BO_NOBUFS;
 	BO_UNLOCK(&ump->um_odevvp->v_bufobj);
 	atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0);
 	mntfs_freevp(ump->um_devvp);
 	vrele(ump->um_odevvp);
 	dev_rel(ump->um_dev);
 	mtx_destroy(UFS_MTX(ump));
 	if (mp->mnt_gjprovider != NULL) {
 		free(mp->mnt_gjprovider, M_UFSMNT);
 		mp->mnt_gjprovider = NULL;
 	}
 	free(fs->fs_csp, M_UFSMNT);
 	free(fs, M_UFSMNT);
 	if (ump->um_fsfail_task != NULL)
 		free(ump->um_fsfail_task, M_UFSMNT);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = NULL;
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	if (td->td_su == mp) {
 		td->td_su = NULL;
 		vfs_rel(mp);
 	}
 	return (error);
 
 fail:
 	if (susp)
 		vfs_write_resume(mp, VR_START_WRITE);
 fail1:
 #ifdef UFS_EXTATTR
 	if (e_restart) {
 		ufs_extattr_uepm_init(&ump->um_extattr);
 #ifdef UFS_EXTATTR_AUTOSTART
 		(void) ufs_extattr_autostart(mp, td);
 #endif
 	}
 #endif
 
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 int
 ffs_flushfiles(mp, flags, td)
 	struct mount *mp;
 	int flags;
 	struct thread *td;
 {
 	struct ufsmount *ump;
 	int qerror, error;
 
 	ump = VFSTOUFS(mp);
 	qerror = 0;
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		int i;
 		error = vflush(mp, 0, SKIPSYSTEM|flags, td);
 		if (error)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
 			error = quotaoff(td, mp, i);
 			if (error != 0) {
 				if ((flags & EARLYFLUSH) == 0)
 					return (error);
 				else
 					qerror = error;
 			}
 		}
 
 		/*
 		 * Here we fall through to vflush again to ensure that
 		 * we have gotten rid of all the system vnodes, unless
 		 * quotas must not be closed.
 		 */
 	}
 #endif
 	ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles");
 	if (ump->um_devvp->v_vflag & VV_COPYONWRITE) {
 		if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0)
 			return (error);
 		ffs_snapshot_unmount(mp);
 		flags |= FORCECLOSE;
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
 
 	/*
 	 * Do not close system files if quotas were not closed, to be
 	 * able to sync the remaining dquots.  The freeblks softupdate
 	 * workitems might hold a reference on a dquot, preventing
 	 * quotaoff() from completing.  Next round of
 	 * softdep_flushworklist() iteration should process the
 	 * blockers, allowing the next run of quotaoff() to finally
 	 * flush held dquots.
 	 *
 	 * Otherwise, flush all the files.
 	 */
 	if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0)
 		return (error);
 
 	/*
 	 * Flush filesystem metadata.
 	 */
 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY);
 	error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td);
 	VOP_UNLOCK(ump->um_devvp);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 static int
 ffs_statfs(mp, sbp)
 	struct mount *mp;
 	struct statfs *sbp;
 {
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC)
 		panic("ffs_statfs");
 	sbp->f_version = STATFS_VERSION;
 	sbp->f_bsize = fs->fs_fsize;
 	sbp->f_iosize = fs->fs_bsize;
 	sbp->f_blocks = fs->fs_dsize;
 	UFS_LOCK(ump);
 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 	    fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks);
 	sbp->f_bavail = freespace(fs, fs->fs_minfree) +
 	    dbtofsb(fs, fs->fs_pendingblocks);
 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO;
 	sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes;
 	UFS_UNLOCK(ump);
 	sbp->f_namemax = UFS_MAXNAMLEN;
 	return (0);
 }
 
 static bool
 sync_doupdate(struct inode *ip)
 {
 
 	return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
 	    IN_UPDATE)) != 0);
 }
 
 static int
 ffs_sync_lazy_filter(struct vnode *vp, void *arg __unused)
 {
 	struct inode *ip;
 
 	/*
 	 * Flags are safe to access because ->v_data invalidation
 	 * is held off by listmtx.
 	 */
 	if (vp->v_type == VNON)
 		return (false);
 	ip = VTOI(vp);
 	if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0)
 		return (false);
 	return (true);
 }
 
 /*
  * For a lazy sync, we only care about access times, quotas and the
  * superblock.  Other filesystem changes are already converted to
  * cylinder group blocks or inode blocks updates and are written to
  * disk by syncer.
  */
 static int
 ffs_sync_lazy(mp)
      struct mount *mp;
 {
 	struct vnode *mvp, *vp;
 	struct inode *ip;
 	struct thread *td;
 	int allerror, error;
 
 	allerror = 0;
 	td = curthread;
 	if ((mp->mnt_flag & MNT_NOATIME) != 0) {
 #ifdef QUOTA
 		qsync(mp);
 #endif
 		goto sbupdate;
 	}
 	MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, ffs_sync_lazy_filter, NULL) {
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		ip = VTOI(vp);
 
 		/*
 		 * The IN_ACCESS flag is converted to IN_MODIFIED by
 		 * ufs_close() and ufs_getattr() by the calls to
 		 * ufs_itimes_locked(), without subsequent UFS_UPDATE().
 		 * Test also all the other timestamp flags too, to pick up
 		 * any other cases that could be missed.
 		 */
 		if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
 		    td)) != 0)
 			continue;
 #ifdef QUOTA
 		qsyncvp(vp);
 #endif
 		if (sync_doupdate(ip))
 			error = ffs_update(vp, 0);
 		if (error != 0)
 			allerror = error;
 		vput(vp);
 	}
 sbupdate:
 	if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 &&
 	    (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0)
 		allerror = error;
 	return (allerror);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked busy using
  * vfs_busy().
  */
 static int
 ffs_sync(mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 	struct vnode *mvp, *vp, *devvp;
 	struct thread *td;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	int error, count, lockreq, allerror = 0;
 	int suspend;
 	int suspended;
 	int secondary_writes;
 	int secondary_accwrites;
 	int softdep_deps;
 	int softdep_accdeps;
 	struct bufobj *bo;
 
 	suspend = 0;
 	suspended = 0;
 	td = curthread;
 	fs = ump->um_fs;
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
 		panic("%s: ffs_sync: modification on read-only filesystem",
 		    fs->fs_fsmnt);
 	if (waitfor == MNT_LAZY) {
 		if (!rebooting)
 			return (ffs_sync_lazy(mp));
 		waitfor = MNT_NOWAIT;
 	}
 
 	/*
 	 * Write back each (modified) inode.
 	 */
 	lockreq = LK_EXCLUSIVE | LK_NOWAIT;
 	if (waitfor == MNT_SUSPEND) {
 		suspend = 1;
 		waitfor = MNT_WAIT;
 	}
 	if (waitfor == MNT_WAIT)
 		lockreq = LK_EXCLUSIVE;
 	lockreq |= LK_INTERLOCK | LK_SLEEPFAIL;
 loop:
 	/* Grab snapshot of secondary write counts */
 	MNT_ILOCK(mp);
 	secondary_writes = mp->mnt_secondary_writes;
 	secondary_accwrites = mp->mnt_secondary_accwrites;
 	MNT_IUNLOCK(mp);
 
 	/* Grab snapshot of softdep dependency counts */
 	softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps);
 
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/*
 		 * Depend on the vnode interlock to keep things stable enough
 		 * for a quick test.  Since there might be hundreds of
 		 * thousands of vnodes, we cannot afford even a subroutine
 		 * call unless there's a good chance that we have work to do.
 		 */
 		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		ip = VTOI(vp);
 		if ((ip->i_flag &
 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		    vp->v_bufobj.bo_dirty.bv_cnt == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		if ((error = vget(vp, lockreq, td)) != 0) {
 			if (error == ENOENT || error == ENOLCK) {
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
 		}
 #ifdef QUOTA
 		qsyncvp(vp);
 #endif
 		if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0)
 			allerror = error;
 		vput(vp);
 	}
 	/*
 	 * Force stale filesystem control information to be flushed.
 	 */
 	if (waitfor == MNT_WAIT || rebooting) {
 		if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
 			allerror = error;
 		if (ffs_fsfail_cleanup(ump, allerror))
 			allerror = 0;
 		/* Flushed work items may create new vnodes to clean */
 		if (allerror == 0 && count)
 			goto loop;
 	}
 
 	devvp = ump->um_devvp;
 	bo = &devvp->v_bufobj;
 	BO_LOCK(bo);
 	if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
 		BO_UNLOCK(bo);
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(devvp, waitfor, td);
 		VOP_UNLOCK(devvp);
 		if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
 			error = ffs_sbupdate(ump, waitfor, 0);
 		if (error != 0)
 			allerror = error;
 		if (ffs_fsfail_cleanup(ump, allerror))
 			allerror = 0;
 		if (allerror == 0 && waitfor == MNT_WAIT)
 			goto loop;
 	} else if (suspend != 0) {
 		if (softdep_check_suspend(mp,
 					  devvp,
 					  softdep_deps,
 					  softdep_accdeps,
 					  secondary_writes,
 					  secondary_accwrites) != 0) {
 			MNT_IUNLOCK(mp);
 			goto loop;	/* More work needed */
 		}
 		mtx_assert(MNT_MTX(mp), MA_OWNED);
 		mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED;
 		MNT_IUNLOCK(mp);
 		suspended = 1;
 	} else
 		BO_UNLOCK(bo);
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->fs_fmod != 0 &&
 	    (error = ffs_sbupdate(ump, waitfor, suspended)) != 0)
 		allerror = error;
 	if (ffs_fsfail_cleanup(ump, allerror))
 		allerror = 0;
 	return (allerror);
 }
 
 int
 ffs_vget(mp, ino, flags, vpp)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 {
 	return (ffs_vgetf(mp, ino, flags, vpp, 0));
 }
 
 int
 ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
 	struct mount *mp;
 	ino_t ino;
 	int flags;
 	struct vnode **vpp;
 	int ffs_flags;
 {
 	struct fs *fs;
 	struct inode *ip;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	daddr_t dbn;
 	int error;
 
 	MPASS((ffs_flags & FFSV_REPLACE) == 0 || (flags & LK_EXCLUSIVE) != 0);
 
 	error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (*vpp != NULL) {
 		if ((ffs_flags & FFSV_REPLACE) == 0)
 			return (0);
 		vgone(*vpp);
 		vput(*vpp);
 	}
 
 	/*
 	 * We must promote to an exclusive lock for vnode creation.  This
 	 * can happen if lookup is passed LOCKSHARED.
 	 */
 	if ((flags & LK_TYPE_MASK) == LK_SHARED) {
 		flags &= ~LK_TYPE_MASK;
 		flags |= LK_EXCLUSIVE;
 	}
 
 	/*
 	 * We do not lock vnode creation as it is believed to be too
 	 * expensive for such rare case as simultaneous creation of vnode
 	 * for same ino by different processes. We just allow them to race
 	 * and check later to decide who wins. Let the race begin!
 	 */
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
 
 	/* Allocate a new vnode/inode. */
 	error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
 	    &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
 	if (error) {
 		*vpp = NULL;
 		uma_zfree(uma_inode, ip);
 		return (error);
 	}
 	/*
 	 * FFS supports recursive locking.
 	 */
 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
 	VN_LOCK_AREC(vp);
 	vp->v_data = ip;
 	vp->v_bufobj.bo_bsize = fs->fs_bsize;
 	ip->i_vnode = vp;
 	ip->i_ump = ump;
 	ip->i_number = ino;
 	ip->i_ea_refs = 0;
 	ip->i_nextclustercg = -1;
 	ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
 	ip->i_mode = 0; /* ensure error cases below throw away vnode */
 #ifdef QUOTA
 	{
 		int i;
 		for (i = 0; i < MAXQUOTAS; i++)
 			ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 
 	if (ffs_flags & FFSV_FORCEINSMQ)
 		vp->v_vflag |= VV_FORCEINSMQ;
 	error = insmntque(vp, mp);
 	if (error != 0) {
 		uma_zfree(uma_inode, ip);
 		*vpp = NULL;
 		return (error);
 	}
 	vp->v_vflag &= ~VV_FORCEINSMQ;
 	error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	if (*vpp != NULL) {
 		/*
 		 * Calls from ffs_valloc() (i.e. FFSV_REPLACE set)
 		 * operate on empty inode, which must not be found by
 		 * other threads until fully filled.  Vnode for empty
 		 * inode must be not re-inserted on the hash by other
 		 * thread, after removal by us at the beginning.
 		 */
 		MPASS((ffs_flags & FFSV_REPLACE) == 0);
 		return (0);
 	}
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 	dbn = fsbtodb(fs, ino_to_fsba(fs, ino));
 	error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, (int)fs->fs_bsize,
 	    NULL, NULL, 0, NOCRED, 0, NULL, &bp);
 	if (error != 0) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		vgone(vp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	if (I_IS_UFS1(ip))
 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
 	else
 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
 	if ((error = ffs_load_inode(bp, ip, fs, ino)) != 0) {
 		bqrelse(bp);
 		vgone(vp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	if (DOINGSOFTDEP(vp))
 		softdep_load_inodeblock(ip);
 	else
 		ip->i_effnlink = ip->i_nlink;
 	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2,
 	    &vp);
 	if (error) {
 		vgone(vp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 
 	/*
 	 * Finish inode initialization.
 	 */
 	if (vp->v_type != VFIFO) {
 		/* FFS supports shared locking for all files except fifos. */
 		VN_LOCK_ASHARE(vp);
 	}
 
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		while (ip->i_gen == 0)
 			ip->i_gen = arc4random();
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			UFS_INODE_SET_FLAG(ip, IN_MODIFIED);
 			DIP_SET(ip, i_gen, ip->i_gen);
 		}
 	}
 #ifdef MAC
 	if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) {
 		/*
 		 * If this vnode is already allocated, and we're running
 		 * multi-label, attempt to perform a label association
 		 * from the extended attributes on the inode.
 		 */
 		error = mac_vnode_associate_extattr(mp, vp);
 		if (error) {
 			/* ufs_inactive will release ip->i_devvp ref. */
 			vgone(vp);
 			vput(vp);
 			*vpp = NULL;
 			return (error);
 		}
 	}
 #endif
 
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - for UFS2 check that the inode number is initialized
  * - call ffs_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 static int
 ffs_fhtovp(mp, fhp, flags, vpp)
 	struct mount *mp;
 	struct fid *fhp;
 	int flags;
 	struct vnode **vpp;
 {
 	struct ufid *ufhp;
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp;
 	ino_t ino;
 	u_int cg;
 	int error;
 
 	ufhp = (struct ufid *)fhp;
 	ino = ufhp->ufid_ino;
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg)
 		return (ESTALE);
 	/*
 	 * Need to check if inode is initialized because UFS2 does lazy
 	 * initialization and nfs_fhtovp can offer arbitrary inode numbers.
 	 */
 	if (fs->fs_magic != FS_UFS2_MAGIC)
 		return (ufs_fhtovp(mp, ufhp, flags, vpp));
 	cg = ino_to_cg(fs, ino);
 	if ((error = ffs_getcg(fs, ump->um_devvp, cg, 0, &bp, &cgp)) != 0)
 		return (error);
 	if (ino >= cg * fs->fs_ipg + cgp->cg_initediblk) {
 		brelse(bp);
 		return (ESTALE);
 	}
 	brelse(bp);
 	return (ufs_fhtovp(mp, ufhp, flags, vpp));
 }
 
 /*
  * Initialize the filesystem.
  */
 static int
 ffs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	ffs_susp_initialize();
 	softdep_initialize();
 	return (ufs_init(vfsp));
 }
 
 /*
  * Undo the work of ffs_init().
  */
 static int
 ffs_uninit(vfsp)
 	struct vfsconf *vfsp;
 {
 	int ret;
 
 	ret = ufs_uninit(vfsp);
 	softdep_uninitialize();
 	ffs_susp_uninitialize();
 	taskqueue_drain_all(taskqueue_thread);
 	return (ret);
 }
 
 /*
  * Structure used to pass information from ffs_sbupdate to its
  * helper routine ffs_use_bwrite.
  */
 struct devfd {
 	struct ufsmount	*ump;
 	struct buf	*sbbp;
 	int		 waitfor;
 	int		 suspended;
 	int		 error;
 };
 
 /*
  * Write a superblock and associated information back to disk.
  */
 int
 ffs_sbupdate(ump, waitfor, suspended)
 	struct ufsmount *ump;
 	int waitfor;
 	int suspended;
 {
 	struct fs *fs;
 	struct buf *sbbp;
 	struct devfd devfd;
 
 	fs = ump->um_fs;
 	if (fs->fs_ronly == 1 &&
 	    (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) !=
 	    (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0)
 		panic("ffs_sbupdate: write read-only filesystem");
 	/*
 	 * We use the superblock's buf to serialize calls to ffs_sbupdate().
 	 */
 	sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
 	    (int)fs->fs_sbsize, 0, 0, 0);
 	/*
 	 * Initialize info needed for write function.
 	 */
 	devfd.ump = ump;
 	devfd.sbbp = sbbp;
 	devfd.waitfor = waitfor;
 	devfd.suspended = suspended;
 	devfd.error = 0;
 	return (ffs_sbput(&devfd, fs, fs->fs_sblockloc, ffs_use_bwrite));
 }
 
 /*
  * Write function for use by filesystem-layer routines.
  */
 static int
 ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size)
 {
 	struct devfd *devfdp;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct fs *fs;
 	int error;
 
 	devfdp = devfd;
 	ump = devfdp->ump;
 	fs = ump->um_fs;
 	/*
 	 * Writing the superblock summary information.
 	 */
 	if (loc != fs->fs_sblockloc) {
 		bp = getblk(ump->um_devvp, btodb(loc), size, 0, 0, 0);
 		bcopy(buf, bp->b_data, (u_int)size);
 		if (devfdp->suspended)
 			bp->b_flags |= B_VALIDSUSPWRT;
 		if (devfdp->waitfor != MNT_WAIT)
 			bawrite(bp);
 		else if ((error = bwrite(bp)) != 0)
 			devfdp->error = error;
 		return (0);
 	}
 	/*
 	 * Writing the superblock itself. We need to do special checks for it.
 	 */
 	bp = devfdp->sbbp;
 	if (ffs_fsfail_cleanup(ump, devfdp->error))
 		devfdp->error = 0;
 	if (devfdp->error != 0) {
 		brelse(bp);
 		return (devfdp->error);
 	}
 	if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 &&
 	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1);
 		fs->fs_sblockloc = SBLOCK_UFS1;
 	}
 	if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 &&
 	    (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) {
 		printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n",
 		    fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2);
 		fs->fs_sblockloc = SBLOCK_UFS2;
 	}
 	if (MOUNTEDSOFTDEP(ump->um_mountp))
 		softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp);
 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 	fs = (struct fs *)bp->b_data;
 	ffs_oldfscompat_write(fs, ump);
 	/*
 	 * Because we may have made changes to the superblock, we need to
 	 * recompute its check-hash.
 	 */
 	fs->fs_ckhash = ffs_calc_sbhash(fs);
 	if (devfdp->suspended)
 		bp->b_flags |= B_VALIDSUSPWRT;
 	if (devfdp->waitfor != MNT_WAIT)
 		bawrite(bp);
 	else if ((error = bwrite(bp)) != 0)
 		devfdp->error = error;
 	return (devfdp->error);
 }
 
 static int
 ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp,
 	int attrnamespace, const char *attrname)
 {
 
 #ifdef UFS_EXTATTR
 	return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace,
 	    attrname));
 #else
 	return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace,
 	    attrname));
 #endif
 }
 
 static void
 ffs_ifree(struct ufsmount *ump, struct inode *ip)
 {
 
 	if (ump->um_fstype == UFS1 && ip->i_din1 != NULL)
 		uma_zfree(uma_ufs1, ip->i_din1);
 	else if (ip->i_din2 != NULL)
 		uma_zfree(uma_ufs2, ip->i_din2);
 	uma_zfree(uma_inode, ip);
 }
 
 static int dobkgrdwrite = 1;
 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
     "Do background writes (honoring the BV_BKGRDWRITE flag)?");
 
 /*
  * Complete a background write started from bwrite.
  */
 static void
 ffs_backgroundwritedone(struct buf *bp)
 {
 	struct bufobj *bufobj;
 	struct buf *origbp;
 
 #ifdef SOFTUPDATES
 	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) != 0)
 		softdep_handle_error(bp);
 #endif
 
 	/*
 	 * Find the original buffer that we are writing.
 	 */
 	bufobj = bp->b_bufobj;
 	BO_LOCK(bufobj);
 	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
 		panic("backgroundwritedone: lost buffer");
 
 	/*
 	 * We should mark the cylinder group buffer origbp as
 	 * dirty, to not lose the failed write.
 	 */
 	if ((bp->b_ioflags & BIO_ERROR) != 0)
 		origbp->b_vflags |= BV_BKGRDERR;
 	BO_UNLOCK(bufobj);
 	/*
 	 * Process dependencies then return any unfinished ones.
 	 */
 	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
 		buf_complete(bp);
 #ifdef SOFTUPDATES
 	if (!LIST_EMPTY(&bp->b_dep))
 		softdep_move_dependencies(bp, origbp);
 #endif
 	/*
 	 * This buffer is marked B_NOCACHE so when it is released
 	 * by biodone it will be tossed.
 	 */
 	bp->b_flags |= B_NOCACHE;
 	bp->b_flags &= ~B_CACHE;
 	pbrelvp(bp);
 
 	/*
 	 * Prevent brelse() from trying to keep and re-dirtying bp on
 	 * errors. It causes b_bufobj dereference in
 	 * bdirty()/reassignbuf(), and b_bufobj was cleared in
 	 * pbrelvp() above.
 	 */
 	if ((bp->b_ioflags & BIO_ERROR) != 0)
 		bp->b_flags |= B_INVAL;
 	bufdone(bp);
 	BO_LOCK(bufobj);
 	/*
 	 * Clear the BV_BKGRDINPROG flag in the original buffer
 	 * and awaken it if it is waiting for the write to complete.
 	 * If BV_BKGRDINPROG is not set in the original buffer it must
 	 * have been released and re-instantiated - which is not legal.
 	 */
 	KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
 	    ("backgroundwritedone: lost buffer2"));
 	origbp->b_vflags &= ~BV_BKGRDINPROG;
 	if (origbp->b_vflags & BV_BKGRDWAIT) {
 		origbp->b_vflags &= ~BV_BKGRDWAIT;
 		wakeup(&origbp->b_xflags);
 	}
 	BO_UNLOCK(bufobj);
 }
 
 
 /*
  * Write, release buffer on completion.  (Done by iodone
  * if async).  Do not bother writing anything if the buffer
  * is invalid.
  *
  * Note that we set B_CACHE here, indicating that buffer is
  * fully valid and thus cacheable.  This is true even of NFS
  * now so we set it generally.  This could be set either here
  * or in biodone() since the I/O is synchronous.  We put it
  * here.
  */
 static int
 ffs_bufwrite(struct buf *bp)
 {
 	struct buf *newbp;
 	struct cg *cgp;
 
 	CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 
 	if (!BUF_ISLOCKED(bp))
 		panic("bufwrite: buffer is not busy???");
 	/*
 	 * If a background write is already in progress, delay
 	 * writing this block if it is asynchronous. Otherwise
 	 * wait for the background write to complete.
 	 */
 	BO_LOCK(bp->b_bufobj);
 	if (bp->b_vflags & BV_BKGRDINPROG) {
 		if (bp->b_flags & B_ASYNC) {
 			BO_UNLOCK(bp->b_bufobj);
 			bdwrite(bp);
 			return (0);
 		}
 		bp->b_vflags |= BV_BKGRDWAIT;
 		msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO,
 		    "bwrbg", 0);
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("bufwrite: still writing");
 	}
 	bp->b_vflags &= ~BV_BKGRDERR;
 	BO_UNLOCK(bp->b_bufobj);
 
 	/*
 	 * If this buffer is marked for background writing and we
 	 * do not have to wait for it, make a copy and write the
 	 * copy so as to leave this buffer ready for further use.
 	 *
 	 * This optimization eats a lot of memory.  If we have a page
 	 * or buffer shortfall we can't do it.
 	 */
 	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) &&
 	    (bp->b_flags & B_ASYNC) &&
 	    !vm_page_count_severe() &&
 	    !buf_dirty_count_severe()) {
 		KASSERT(bp->b_iodone == NULL,
 		    ("bufwrite: needs chained iodone (%p)", bp->b_iodone));
 
 		/* get a new block */
 		newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD);
 		if (newbp == NULL)
 			goto normal_write;
 
 		KASSERT(buf_mapped(bp), ("Unmapped cg"));
 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
 		BO_LOCK(bp->b_bufobj);
 		bp->b_vflags |= BV_BKGRDINPROG;
 		BO_UNLOCK(bp->b_bufobj);
 		newbp->b_xflags |=
 		    (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER;
 		newbp->b_lblkno = bp->b_lblkno;
 		newbp->b_blkno = bp->b_blkno;
 		newbp->b_offset = bp->b_offset;
 		newbp->b_iodone = ffs_backgroundwritedone;
 		newbp->b_flags |= B_ASYNC;
 		newbp->b_flags &= ~B_INVAL;
 		pbgetvp(bp->b_vp, newbp);
 
 #ifdef SOFTUPDATES
 		/*
 		 * Move over the dependencies.  If there are rollbacks,
 		 * leave the parent buffer dirtied as it will need to
 		 * be written again.
 		 */
 		if (LIST_EMPTY(&bp->b_dep) ||
 		    softdep_move_dependencies(bp, newbp) == 0)
 			bundirty(bp);
 #else
 		bundirty(bp);
 #endif
 
 		/*
 		 * Initiate write on the copy, release the original.  The
 		 * BKGRDINPROG flag prevents it from going away until 
 		 * the background write completes. We have to recalculate
 		 * its check hash in case the buffer gets freed and then
 		 * reconstituted from the buffer cache during a later read.
 		 */
 		if ((bp->b_xflags & BX_CYLGRP) != 0) {
 			cgp = (struct cg *)bp->b_data;
 			cgp->cg_ckhash = 0;
 			cgp->cg_ckhash =
 			    calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
 		}
 		bqrelse(bp);
 		bp = newbp;
 	} else
 		/* Mark the buffer clean */
 		bundirty(bp);
 
 
 	/* Let the normal bufwrite do the rest for us */
 normal_write:
 	/*
 	 * If we are writing a cylinder group, update its time.
 	 */
 	if ((bp->b_xflags & BX_CYLGRP) != 0) {
 		cgp = (struct cg *)bp->b_data;
 		cgp->cg_old_time = cgp->cg_time = time_second;
 	}
 	return (bufwrite(bp));
 }
 
 
 static void
 ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
 {
 	struct vnode *vp;
 	struct buf *tbp;
 	int error, nocopy;
 
 	/*
 	 * This is the bufobj strategy for the private VCHR vnodes
 	 * used by FFS to access the underlying storage device.
 	 * We override the default bufobj strategy and thus bypass
 	 * VOP_STRATEGY() for these vnodes.
 	 */
 	vp = bo2vnode(bo);
 	KASSERT(bp->b_vp == NULL || bp->b_vp->v_type != VCHR ||
 	    bp->b_vp->v_rdev == NULL ||
 	    bp->b_vp->v_rdev->si_mountpt == NULL ||
 	    VFSTOUFS(bp->b_vp->v_rdev->si_mountpt) == NULL ||
 	    vp == VFSTOUFS(bp->b_vp->v_rdev->si_mountpt)->um_devvp,
 	    ("ffs_geom_strategy() with wrong vp"));
 	if (bp->b_iocmd == BIO_WRITE) {
 		if ((bp->b_flags & B_VALIDSUSPWRT) == 0 &&
 		    bp->b_vp != NULL && bp->b_vp->v_mount != NULL &&
 		    (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
 			panic("ffs_geom_strategy: bad I/O");
 		nocopy = bp->b_flags & B_NOCOPY;
 		bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY);
 		if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 &&
 		    vp->v_rdev->si_snapdata != NULL) {
 			if ((bp->b_flags & B_CLUSTER) != 0) {
 				runningbufwakeup(bp);
 				TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
 					      b_cluster.cluster_entry) {
 					error = ffs_copyonwrite(vp, tbp);
 					if (error != 0 &&
 					    error != EOPNOTSUPP) {
 						bp->b_error = error;
 						bp->b_ioflags |= BIO_ERROR;
 						bufdone(bp);
 						return;
 					}
 				}
 				bp->b_runningbufspace = bp->b_bufsize;
 				atomic_add_long(&runningbufspace,
 					       bp->b_runningbufspace);
 			} else {
 				error = ffs_copyonwrite(vp, bp);
 				if (error != 0 && error != EOPNOTSUPP) {
 					bp->b_error = error;
 					bp->b_ioflags |= BIO_ERROR;
 					bufdone(bp);
 					return;
 				}
 			}
 		}
 #ifdef SOFTUPDATES
 		if ((bp->b_flags & B_CLUSTER) != 0) {
 			TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head,
 				      b_cluster.cluster_entry) {
 				if (!LIST_EMPTY(&tbp->b_dep))
 					buf_start(tbp);
 			}
 		} else {
 			if (!LIST_EMPTY(&bp->b_dep))
 				buf_start(bp);
 		}
 
 #endif
 		/*
 		 * Check for metadata that needs check-hashes and update them.
 		 */
 		switch (bp->b_xflags & BX_FSPRIV) {
 		case BX_CYLGRP:
 			((struct cg *)bp->b_data)->cg_ckhash = 0;
 			((struct cg *)bp->b_data)->cg_ckhash =
 			    calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
 			break;
 
 		case BX_SUPERBLOCK:
 		case BX_INODE:
 		case BX_INDIR:
 		case BX_DIR:
 			printf("Check-hash write is unimplemented!!!\n");
 			break;
 
 		case 0:
 			break;
 
 		default:
 			printf("multiple buffer types 0x%b\n",
 			    (u_int)(bp->b_xflags & BX_FSPRIV),
 			    PRINT_UFS_BUF_XFLAGS);
 			break;
 		}
 	}
 	if (bp->b_iocmd != BIO_READ && ffs_enxio_enable)
 		bp->b_xflags |= BX_CVTENXIO;
 	g_vfs_strategy(bo, bp);
 }
 
 int
 ffs_own_mount(const struct mount *mp)
 {
 
 	if (mp->mnt_op == &ufs_vfsops)
 		return (1);
 	return (0);
 }
 
 #ifdef	DDB
 #ifdef SOFTUPDATES
 
 /* defined in ffs_softdep.c */
 extern void db_print_ffs(struct ufsmount *ump);
 
 DB_SHOW_COMMAND(ffs, db_show_ffs)
 {
 	struct mount *mp;
 	struct ufsmount *ump;
 
 	if (have_addr) {
 		ump = VFSTOUFS((struct mount *)addr);
 		db_print_ffs(ump);
 		return;
 	}
 
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name))
 			db_print_ffs(VFSTOUFS(mp));
 	}
 }
 
 #endif	/* SOFTUPDATES */
 #endif	/* DDB */