diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
index b46cc550c781..a102ce2e99a9 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c
@@ -1,6335 +1,6356 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  * Copyright (c) 2014 Integros [integros.com]
  * Copyright 2017 Nexenta Systems, Inc.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
 /* Portions Copyright 2010 Robert Milkowski */
 
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sysmacros.h>
 #include <sys/resource.h>
 #include <sys/vfs.h>
 #include <sys/endian.h>
 #include <sys/vm.h>
 #include <sys/vnode.h>
 #if __FreeBSD_version >= 1300102
 #include <sys/smr.h>
 #endif
 #include <sys/dirent.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/kmem.h>
 #include <sys/taskq.h>
 #include <sys/uio.h>
 #include <sys/atomic.h>
 #include <sys/namei.h>
 #include <sys/mman.h>
 #include <sys/cmn_err.h>
 #include <sys/kdb.h>
 #include <sys/sysproto.h>
 #include <sys/errno.h>
 #include <sys/unistd.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/zap.h>
 #include <sys/sa.h>
 #include <sys/policy.h>
 #include <sys/sunddi.h>
 #include <sys/filio.h>
 #include <sys/sid.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zfs_fuid.h>
 #include <sys/zfs_quota.h>
 #include <sys/zfs_sa.h>
 #include <sys/zfs_rlock.h>
 #include <sys/extdirent.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sched.h>
 #include <sys/acl.h>
 #include <sys/vmmeter.h>
 #include <vm/vm_param.h>
 #include <sys/zil.h>
 #include <sys/zfs_vnops.h>
 
 #include <vm/vm_object.h>
 
 #include <sys/extattr.h>
 #include <sys/priv.h>
 
 #ifndef VN_OPEN_INVFS
 #define	VN_OPEN_INVFS	0x0
 #endif
 
 VFS_SMR_DECLARE;
 
 #if __FreeBSD_version < 1300103
 #define	NDFREE_PNBUF(ndp)	NDFREE((ndp), NDF_ONLY_PNBUF)
 #endif
 
 #if __FreeBSD_version >= 1300047
 #define	vm_page_wire_lock(pp)
 #define	vm_page_wire_unlock(pp)
 #else
 #define	vm_page_wire_lock(pp) vm_page_lock(pp)
 #define	vm_page_wire_unlock(pp) vm_page_unlock(pp)
 #endif
 
 #ifdef DEBUG_VFS_LOCKS
 #define	VNCHECKREF(vp)				  \
 	VNASSERT((vp)->v_holdcnt > 0 && (vp)->v_usecount > 0, vp,	\
 	    ("%s: wrong ref counts", __func__));
 #else
 #define	VNCHECKREF(vp)
 #endif
 
 #if __FreeBSD_version >= 1400045
 typedef uint64_t cookie_t;
 #else
 typedef ulong_t cookie_t;
 #endif
 
 /*
  * Programming rules.
  *
  * Each vnode op performs some logical unit of work.  To do this, the ZPL must
  * properly lock its in-core state, create a DMU transaction, do the work,
  * record this work in the intent log (ZIL), commit the DMU transaction,
  * and wait for the intent log to commit if it is a synchronous operation.
  * Moreover, the vnode ops must work in both normal and log replay context.
  * The ordering of events is important to avoid deadlocks and references
  * to freed memory.  The example below illustrates the following Big Rules:
  *
  *  (1)	A check must be made in each zfs thread for a mounted file system.
  *	This is done avoiding races using ZFS_ENTER(zfsvfs).
  *	A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
  *	must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
  *	can return EIO from the calling function.
  *
  *  (2)	VN_RELE() should always be the last thing except for zil_commit()
  *	(if necessary) and ZFS_EXIT(). This is for 3 reasons:
  *	First, if it's the last reference, the vnode/znode
  *	can be freed, so the zp may point to freed memory.  Second, the last
  *	reference will call zfs_zinactive(), which may induce a lot of work --
  *	pushing cached pages (which acquires range locks) and syncing out
  *	cached atime changes.  Third, zfs_zinactive() may require a new tx,
  *	which could deadlock the system if you were already holding one.
  *	If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
  *
  *  (3)	All range locks must be grabbed before calling dmu_tx_assign(),
  *	as they can span dmu_tx_assign() calls.
  *
  *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
  *      dmu_tx_assign().  This is critical because we don't want to block
  *      while holding locks.
  *
  *	If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
  *	reduces lock contention and CPU usage when we must wait (note that if
  *	throughput is constrained by the storage, nearly every transaction
  *	must wait).
  *
  *      Note, in particular, that if a lock is sometimes acquired before
  *      the tx assigns, and sometimes after (e.g. z_lock), then failing
  *      to use a non-blocking assign can deadlock the system.  The scenario:
  *
  *	Thread A has grabbed a lock before calling dmu_tx_assign().
  *	Thread B is in an already-assigned tx, and blocks for this lock.
  *	Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
  *	forever, because the previous txg can't quiesce until B's tx commits.
  *
  *	If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
  *	then drop all locks, call dmu_tx_wait(), and try again.  On subsequent
  *	calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
  *	to indicate that this operation has already called dmu_tx_wait().
  *	This will ensure that we don't retry forever, waiting a short bit
  *	each time.
  *
  *  (5)	If the operation succeeded, generate the intent log entry for it
  *	before dropping locks.  This ensures that the ordering of events
  *	in the intent log matches the order in which they actually occurred.
  *	During ZIL replay the zfs_log_* functions will update the sequence
  *	number to indicate the zil transaction has replayed.
  *
  *  (6)	At the end of each vnode op, the DMU tx must always commit,
  *	regardless of whether there were any errors.
  *
  *  (7)	After dropping all locks, invoke zil_commit(zilog, foid)
  *	to ensure that synchronous semantics are provided when necessary.
  *
  * In general, this is how things should be ordered in each vnode op:
  *
  *	ZFS_ENTER(zfsvfs);		// exit if unmounted
  * top:
  *	zfs_dirent_lookup(&dl, ...)	// lock directory entry (may VN_HOLD())
  *	rw_enter(...);			// grab any other locks you need
  *	tx = dmu_tx_create(...);	// get DMU tx
  *	dmu_tx_hold_*();		// hold each object you might modify
  *	error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
  *	if (error) {
  *		rw_exit(...);		// drop locks
  *		zfs_dirent_unlock(dl);	// unlock directory entry
  *		VN_RELE(...);		// release held vnodes
  *		if (error == ERESTART) {
  *			waited = B_TRUE;
  *			dmu_tx_wait(tx);
  *			dmu_tx_abort(tx);
  *			goto top;
  *		}
  *		dmu_tx_abort(tx);	// abort DMU tx
  *		ZFS_EXIT(zfsvfs);	// finished in zfs
  *		return (error);		// really out of space
  *	}
  *	error = do_real_work();		// do whatever this VOP does
  *	if (error == 0)
  *		zfs_log_*(...);		// on success, make ZIL entry
  *	dmu_tx_commit(tx);		// commit DMU tx -- error or not
  *	rw_exit(...);			// drop locks
  *	zfs_dirent_unlock(dl);		// unlock directory entry
  *	VN_RELE(...);			// release held vnodes
  *	zil_commit(zilog, foid);	// synchronous when necessary
  *	ZFS_EXIT(zfsvfs);		// finished in zfs
  *	return (error);			// done, report error
  */
 static int
 zfs_open(vnode_t **vpp, int flag, cred_t *cr)
 {
 	(void) cr;
 	znode_t	*zp = VTOZ(*vpp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
 	    ((flag & FAPPEND) == 0)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	/* Keep a count of the synchronous opens in the znode */
 	if (flag & O_SYNC)
 		atomic_inc_32(&zp->z_sync_cnt);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 static int
 zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
 {
 	(void) offset, (void) cr;
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	/* Decrement the synchronous opens in the znode */
 	if ((flag & O_SYNC) && (count == 1))
 		atomic_dec_32(&zp->z_sync_cnt);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 static int
 zfs_ioctl(vnode_t *vp, ulong_t com, intptr_t data, int flag, cred_t *cred,
     int *rvalp)
 {
 	(void) flag, (void) cred, (void) rvalp;
 	loff_t off;
 	int error;
 
 	switch (com) {
 	case _FIOFFS:
 	{
 		return (0);
 
 		/*
 		 * The following two ioctls are used by bfu.  Faking out,
 		 * necessary to avoid bfu errors.
 		 */
 	}
 	case _FIOGDIO:
 	case _FIOSDIO:
 	{
 		return (0);
 	}
 
 	case F_SEEK_DATA:
 	case F_SEEK_HOLE:
 	{
 		off = *(offset_t *)data;
 		/* offset parameter is in/out */
 		error = zfs_holey(VTOZ(vp), com, &off);
 		if (error)
 			return (error);
 		*(offset_t *)data = off;
 		return (0);
 	}
 	}
 	return (SET_ERROR(ENOTTY));
 }
 
 static vm_page_t
 page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
 {
 	vm_object_t obj;
 	vm_page_t pp;
 	int64_t end;
 
 	/*
 	 * At present vm_page_clear_dirty extends the cleared range to DEV_BSIZE
 	 * aligned boundaries, if the range is not aligned.  As a result a
 	 * DEV_BSIZE subrange with partially dirty data may get marked as clean.
 	 * It may happen that all DEV_BSIZE subranges are marked clean and thus
 	 * the whole page would be considered clean despite have some
 	 * dirty data.
 	 * For this reason we should shrink the range to DEV_BSIZE aligned
 	 * boundaries before calling vm_page_clear_dirty.
 	 */
 	end = rounddown2(off + nbytes, DEV_BSIZE);
 	off = roundup2(off, DEV_BSIZE);
 	nbytes = end - off;
 
 	obj = vp->v_object;
 	zfs_vmobject_assert_wlocked_12(obj);
 #if __FreeBSD_version < 1300050
 	for (;;) {
 		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
 		    pp->valid) {
 			if (vm_page_xbusied(pp)) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
 				 * likely to reclaim it.
 				 */
 				vm_page_reference(pp);
 				vm_page_lock(pp);
 				zfs_vmobject_wunlock(obj);
 				vm_page_busy_sleep(pp, "zfsmwb", true);
 				zfs_vmobject_wlock(obj);
 				continue;
 			}
 			vm_page_sbusy(pp);
 		} else if (pp != NULL) {
 			ASSERT(!pp->valid);
 			pp = NULL;
 		}
 		if (pp != NULL) {
 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
 			vm_object_pip_add(obj, 1);
 			pmap_remove_write(pp);
 			if (nbytes != 0)
 				vm_page_clear_dirty(pp, off, nbytes);
 		}
 		break;
 	}
 #else
 	vm_page_grab_valid_unlocked(&pp, obj, OFF_TO_IDX(start),
 	    VM_ALLOC_NOCREAT | VM_ALLOC_SBUSY | VM_ALLOC_NORMAL |
 	    VM_ALLOC_IGN_SBUSY);
 	if (pp != NULL) {
 		ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
 		vm_object_pip_add(obj, 1);
 		pmap_remove_write(pp);
 		if (nbytes != 0)
 			vm_page_clear_dirty(pp, off, nbytes);
 	}
 #endif
 	return (pp);
 }
 
 static void
 page_unbusy(vm_page_t pp)
 {
 
 	vm_page_sunbusy(pp);
 #if __FreeBSD_version >= 1300041
 	vm_object_pip_wakeup(pp->object);
 #else
 	vm_object_pip_subtract(pp->object, 1);
 #endif
 }
 
 #if __FreeBSD_version > 1300051
 static vm_page_t
 page_hold(vnode_t *vp, int64_t start)
 {
 	vm_object_t obj;
 	vm_page_t m;
 
 	obj = vp->v_object;
 	vm_page_grab_valid_unlocked(&m, obj, OFF_TO_IDX(start),
 	    VM_ALLOC_NOCREAT | VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY |
 	    VM_ALLOC_NOBUSY);
 	return (m);
 }
 #else
 static vm_page_t
 page_hold(vnode_t *vp, int64_t start)
 {
 	vm_object_t obj;
 	vm_page_t pp;
 
 	obj = vp->v_object;
 	zfs_vmobject_assert_wlocked(obj);
 
 	for (;;) {
 		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
 		    pp->valid) {
 			if (vm_page_xbusied(pp)) {
 				/*
 				 * Reference the page before unlocking and
 				 * sleeping so that the page daemon is less
 				 * likely to reclaim it.
 				 */
 				vm_page_reference(pp);
 				vm_page_lock(pp);
 				zfs_vmobject_wunlock(obj);
 				vm_page_busy_sleep(pp, "zfsmwb", true);
 				zfs_vmobject_wlock(obj);
 				continue;
 			}
 
 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
 			vm_page_wire_lock(pp);
 			vm_page_hold(pp);
 			vm_page_wire_unlock(pp);
 
 		} else
 			pp = NULL;
 		break;
 	}
 	return (pp);
 }
 #endif
 
 static void
 page_unhold(vm_page_t pp)
 {
 
 	vm_page_wire_lock(pp);
 #if __FreeBSD_version >= 1300035
 	vm_page_unwire(pp, PQ_ACTIVE);
 #else
 	vm_page_unhold(pp);
 #endif
 	vm_page_wire_unlock(pp);
 }
 
 /*
  * When a file is memory mapped, we must keep the IO data synchronized
  * between the DMU cache and the memory mapped pages.  What this means:
  *
  * On Write:	If we find a memory mapped page, we write to *both*
  *		the page and the dmu buffer.
  */
 void
 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
 {
 	vm_object_t obj;
 	struct sf_buf *sf;
 	vnode_t *vp = ZTOV(zp);
 	caddr_t va;
 	int off;
 
 	ASSERT3P(vp->v_mount, !=, NULL);
 	obj = vp->v_object;
 	ASSERT3P(obj, !=, NULL);
 
 	off = start & PAGEOFFSET;
 	zfs_vmobject_wlock_12(obj);
 #if __FreeBSD_version >= 1300041
 	vm_object_pip_add(obj, 1);
 #endif
 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
 		vm_page_t pp;
 		int nbytes = imin(PAGESIZE - off, len);
 
 		if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
 			zfs_vmobject_wunlock_12(obj);
 
 			va = zfs_map_page(pp, &sf);
 			(void) dmu_read(os, zp->z_id, start + off, nbytes,
 			    va + off, DMU_READ_PREFETCH);
 			zfs_unmap_page(sf);
 
 			zfs_vmobject_wlock_12(obj);
 			page_unbusy(pp);
 		}
 		len -= nbytes;
 		off = 0;
 	}
 #if __FreeBSD_version >= 1300041
 	vm_object_pip_wakeup(obj);
 #else
 	vm_object_pip_wakeupn(obj, 0);
 #endif
 	zfs_vmobject_wunlock_12(obj);
 }
 
 /*
  * Read with UIO_NOCOPY flag means that sendfile(2) requests
  * ZFS to populate a range of page cache pages with data.
  *
  * NOTE: this function could be optimized to pre-allocate
  * all pages in advance, drain exclusive busy on all of them,
  * map them into contiguous KVA region and populate them
  * in one single dmu_read() call.
  */
 int
 mappedread_sf(znode_t *zp, int nbytes, zfs_uio_t *uio)
 {
 	vnode_t *vp = ZTOV(zp);
 	objset_t *os = zp->z_zfsvfs->z_os;
 	struct sf_buf *sf;
 	vm_object_t obj;
 	vm_page_t pp;
 	int64_t start;
 	caddr_t va;
 	int len = nbytes;
 	int error = 0;
 
 	ASSERT3U(zfs_uio_segflg(uio), ==, UIO_NOCOPY);
 	ASSERT3P(vp->v_mount, !=, NULL);
 	obj = vp->v_object;
 	ASSERT3P(obj, !=, NULL);
 	ASSERT0(zfs_uio_offset(uio) & PAGEOFFSET);
 
 	zfs_vmobject_wlock_12(obj);
 	for (start = zfs_uio_offset(uio); len > 0; start += PAGESIZE) {
 		int bytes = MIN(PAGESIZE, len);
 
 		pp = vm_page_grab_unlocked(obj, OFF_TO_IDX(start),
 		    VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
 		if (vm_page_none_valid(pp)) {
 			zfs_vmobject_wunlock_12(obj);
 			va = zfs_map_page(pp, &sf);
 			error = dmu_read(os, zp->z_id, start, bytes, va,
 			    DMU_READ_PREFETCH);
 			if (bytes != PAGESIZE && error == 0)
 				memset(va + bytes, 0, PAGESIZE - bytes);
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock_12(obj);
 #if  __FreeBSD_version >= 1300081
 			if (error == 0) {
 				vm_page_valid(pp);
 				vm_page_activate(pp);
 				vm_page_do_sunbusy(pp);
 			} else {
 				zfs_vmobject_wlock(obj);
 				if (!vm_page_wired(pp) && pp->valid == 0 &&
 				    vm_page_busy_tryupgrade(pp))
 					vm_page_free(pp);
 				else
 					vm_page_sunbusy(pp);
 				zfs_vmobject_wunlock(obj);
 			}
 #else
 			vm_page_do_sunbusy(pp);
 			vm_page_lock(pp);
 			if (error) {
 				if (pp->wire_count == 0 && pp->valid == 0 &&
 				    !vm_page_busied(pp))
 					vm_page_free(pp);
 			} else {
 				pp->valid = VM_PAGE_BITS_ALL;
 				vm_page_activate(pp);
 			}
 			vm_page_unlock(pp);
 #endif
 		} else {
 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
 			vm_page_do_sunbusy(pp);
 		}
 		if (error)
 			break;
 		zfs_uio_advance(uio, bytes);
 		len -= bytes;
 	}
 	zfs_vmobject_wunlock_12(obj);
 	return (error);
 }
 
 /*
  * When a file is memory mapped, we must keep the IO data synchronized
  * between the DMU cache and the memory mapped pages.  What this means:
  *
  * On Read:	We "read" preferentially from memory mapped pages,
  *		else we default from the dmu buffer.
  *
  * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
  *	 the file is memory mapped.
  */
 int
 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
 {
 	vnode_t *vp = ZTOV(zp);
 	vm_object_t obj;
 	int64_t start;
 	int len = nbytes;
 	int off;
 	int error = 0;
 
 	ASSERT3P(vp->v_mount, !=, NULL);
 	obj = vp->v_object;
 	ASSERT3P(obj, !=, NULL);
 
 	start = zfs_uio_offset(uio);
 	off = start & PAGEOFFSET;
 	zfs_vmobject_wlock_12(obj);
 	for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
 		vm_page_t pp;
 		uint64_t bytes = MIN(PAGESIZE - off, len);
 
 		if ((pp = page_hold(vp, start))) {
 			struct sf_buf *sf;
 			caddr_t va;
 
 			zfs_vmobject_wunlock_12(obj);
 			va = zfs_map_page(pp, &sf);
 			error = vn_io_fault_uiomove(va + off, bytes,
 			    GET_UIO_STRUCT(uio));
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock_12(obj);
 			page_unhold(pp);
 		} else {
 			zfs_vmobject_wunlock_12(obj);
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, bytes);
 			zfs_vmobject_wlock_12(obj);
 		}
 		len -= bytes;
 		off = 0;
 		if (error)
 			break;
 	}
 	zfs_vmobject_wunlock_12(obj);
 	return (error);
 }
 
 int
 zfs_write_simple(znode_t *zp, const void *data, size_t len,
     loff_t pos, size_t *presid)
 {
 	int error = 0;
 	ssize_t resid;
 
 	error = vn_rdwr(UIO_WRITE, ZTOV(zp), __DECONST(void *, data), len, pos,
 	    UIO_SYSSPACE, IO_SYNC, kcred, NOCRED, &resid, curthread);
 
 	if (error) {
 		return (SET_ERROR(error));
 	} else if (presid == NULL) {
 		if (resid != 0) {
 			error = SET_ERROR(EIO);
 		}
 	} else {
 		*presid = resid;
 	}
 	return (error);
 }
 
 void
 zfs_zrele_async(znode_t *zp)
 {
 	vnode_t *vp = ZTOV(zp);
 	objset_t *os = ITOZSB(vp)->z_os;
 
 	VN_RELE_ASYNC(vp, dsl_pool_zrele_taskq(dmu_objset_pool(os)));
 }
 
 static int
 zfs_dd_callback(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
 {
 	int error;
 
 	*vpp = arg;
 	error = vn_lock(*vpp, lkflags);
 	if (error != 0)
 		vrele(*vpp);
 	return (error);
 }
 
 static int
 zfs_lookup_lock(vnode_t *dvp, vnode_t *vp, const char *name, int lkflags)
 {
 	znode_t *zdp = VTOZ(dvp);
 	zfsvfs_t *zfsvfs __unused = zdp->z_zfsvfs;
 	int error;
 	int ltype;
 
 	if (zfsvfs->z_replay == B_FALSE)
 		ASSERT_VOP_LOCKED(dvp, __func__);
 
 	if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
 		ASSERT3P(dvp, ==, vp);
 		vref(dvp);
 		ltype = lkflags & LK_TYPE_MASK;
 		if (ltype != VOP_ISLOCKED(dvp)) {
 			if (ltype == LK_EXCLUSIVE)
 				vn_lock(dvp, LK_UPGRADE | LK_RETRY);
 			else /* if (ltype == LK_SHARED) */
 				vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
 
 			/*
 			 * Relock for the "." case could leave us with
 			 * reclaimed vnode.
 			 */
 			if (VN_IS_DOOMED(dvp)) {
 				vrele(dvp);
 				return (SET_ERROR(ENOENT));
 			}
 		}
 		return (0);
 	} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
 		/*
 		 * Note that in this case, dvp is the child vnode, and we
 		 * are looking up the parent vnode - exactly reverse from
 		 * normal operation.  Unlocking dvp requires some rather
 		 * tricky unlock/relock dance to prevent mp from being freed;
 		 * use vn_vget_ino_gen() which takes care of all that.
 		 *
 		 * XXX Note that there is a time window when both vnodes are
 		 * unlocked.  It is possible, although highly unlikely, that
 		 * during that window the parent-child relationship between
 		 * the vnodes may change, for example, get reversed.
 		 * In that case we would have a wrong lock order for the vnodes.
 		 * All other filesystems seem to ignore this problem, so we
 		 * do the same here.
 		 * A potential solution could be implemented as follows:
 		 * - using LK_NOWAIT when locking the second vnode and retrying
 		 *   if necessary
 		 * - checking that the parent-child relationship still holds
 		 *   after locking both vnodes and retrying if it doesn't
 		 */
 		error = vn_vget_ino_gen(dvp, zfs_dd_callback, vp, lkflags, &vp);
 		return (error);
 	} else {
 		error = vn_lock(vp, lkflags);
 		if (error != 0)
 			vrele(vp);
 		return (error);
 	}
 }
 
 /*
  * Lookup an entry in a directory, or an extended attribute directory.
  * If it exists, return a held vnode reference for it.
  *
  *	IN:	dvp	- vnode of directory to search.
  *		nm	- name of entry to lookup.
  *		pnp	- full pathname to lookup [UNUSED].
  *		flags	- LOOKUP_XATTR set if looking for an attribute.
  *		rdir	- root directory vnode [UNUSED].
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	OUT:	vpp	- vnode of located entry, NULL if not found.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	NA
  */
 static int
 zfs_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp,
     struct componentname *cnp, int nameiop, cred_t *cr, int flags,
     boolean_t cached)
 {
 	znode_t *zdp = VTOZ(dvp);
 	znode_t *zp;
 	zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
 #if	__FreeBSD_version > 1300124
 	seqc_t dvp_seqc;
 #endif
 	int	error = 0;
 
 	/*
 	 * Fast path lookup, however we must skip DNLC lookup
 	 * for case folding or normalizing lookups because the
 	 * DNLC code only stores the passed in name.  This means
 	 * creating 'a' and removing 'A' on a case insensitive
 	 * file system would work, but DNLC still thinks 'a'
 	 * exists and won't let you create it again on the next
 	 * pass through fast path.
 	 */
 	if (!(flags & LOOKUP_XATTR)) {
 		if (dvp->v_type != VDIR) {
 			return (SET_ERROR(ENOTDIR));
 		} else if (zdp->z_sa_hdl == NULL) {
 			return (SET_ERROR(EIO));
 		}
 	}
 
 	DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp,
 	    const char *, nm);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zdp);
 
 #if	__FreeBSD_version > 1300124
 	dvp_seqc = vn_seqc_read_notmodify(dvp);
 #endif
 
 	*vpp = NULL;
 
 	if (flags & LOOKUP_XATTR) {
 		/*
 		 * If the xattr property is off, refuse the lookup request.
 		 */
 		if (!(zfsvfs->z_flags & ZSB_XATTR)) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EOPNOTSUPP));
 		}
 
 		/*
 		 * We don't allow recursive attributes..
 		 * Maybe someday we will.
 		 */
 		if (zdp->z_pflags & ZFS_XATTR) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EINVAL));
 		}
 
 		if ((error = zfs_get_xattrdir(VTOZ(dvp), &zp, cr, flags))) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 		*vpp = ZTOV(zp);
 
 		/*
 		 * Do we have permission to get into attribute directory?
 		 */
 		error = zfs_zaccess(zp, ACE_EXECUTE, 0, B_FALSE, cr);
 		if (error) {
 			vrele(ZTOV(zp));
 		}
 
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Check accessibility of directory if we're not coming in via
 	 * VOP_CACHEDLOOKUP.
 	 */
 	if (!cached) {
 #ifdef NOEXECCHECK
 		if ((cnp->cn_flags & NOEXECCHECK) != 0) {
 			cnp->cn_flags &= ~NOEXECCHECK;
 		} else
 #endif
 		if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 
 	/*
 	 * First handle the special cases.
 	 */
 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
 		/*
 		 * If we are a snapshot mounted under .zfs, return
 		 * the vp for the snapshot directory.
 		 */
 		if (zdp->z_id == zfsvfs->z_root && zfsvfs->z_parent != zfsvfs) {
 			struct componentname cn;
 			vnode_t *zfsctl_vp;
 			int ltype;
 
 			ZFS_EXIT(zfsvfs);
 			ltype = VOP_ISLOCKED(dvp);
 			VOP_UNLOCK1(dvp);
 			error = zfsctl_root(zfsvfs->z_parent, LK_SHARED,
 			    &zfsctl_vp);
 			if (error == 0) {
 				cn.cn_nameptr = "snapshot";
 				cn.cn_namelen = strlen(cn.cn_nameptr);
 				cn.cn_nameiop = cnp->cn_nameiop;
 				cn.cn_flags = cnp->cn_flags & ~ISDOTDOT;
 				cn.cn_lkflags = cnp->cn_lkflags;
 				error = VOP_LOOKUP(zfsctl_vp, vpp, &cn);
 				vput(zfsctl_vp);
 			}
 			vn_lock(dvp, ltype | LK_RETRY);
 			return (error);
 		}
 	}
 	if (zfs_has_ctldir(zdp) && strcmp(nm, ZFS_CTLDIR_NAME) == 0) {
 		ZFS_EXIT(zfsvfs);
 		if ((cnp->cn_flags & ISLASTCN) != 0 && nameiop != LOOKUP)
 			return (SET_ERROR(ENOTSUP));
 		error = zfsctl_root(zfsvfs, cnp->cn_lkflags, vpp);
 		return (error);
 	}
 
 	/*
 	 * The loop is retry the lookup if the parent-child relationship
 	 * changes during the dot-dot locking complexities.
 	 */
 	for (;;) {
 		uint64_t parent;
 
 		error = zfs_dirlook(zdp, nm, &zp);
 		if (error == 0)
 			*vpp = ZTOV(zp);
 
 		ZFS_EXIT(zfsvfs);
 		if (error != 0)
 			break;
 
 		error = zfs_lookup_lock(dvp, *vpp, nm, cnp->cn_lkflags);
 		if (error != 0) {
 			/*
 			 * If we've got a locking error, then the vnode
 			 * got reclaimed because of a force unmount.
 			 * We never enter doomed vnodes into the name cache.
 			 */
 			*vpp = NULL;
 			return (error);
 		}
 
 		if ((cnp->cn_flags & ISDOTDOT) == 0)
 			break;
 
 		ZFS_ENTER(zfsvfs);
 		if (zdp->z_sa_hdl == NULL) {
 			error = SET_ERROR(EIO);
 		} else {
 			error = sa_lookup(zdp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 			    &parent, sizeof (parent));
 		}
 		if (error != 0) {
 			ZFS_EXIT(zfsvfs);
 			vput(ZTOV(zp));
 			break;
 		}
 		if (zp->z_id == parent) {
 			ZFS_EXIT(zfsvfs);
 			break;
 		}
 		vput(ZTOV(zp));
 	}
 
 	if (error != 0)
 		*vpp = NULL;
 
 	/* Translate errors and add SAVENAME when needed. */
 	if (cnp->cn_flags & ISLASTCN) {
 		switch (nameiop) {
 		case CREATE:
 		case RENAME:
 			if (error == ENOENT) {
 				error = EJUSTRETURN;
+#if __FreeBSD_version < 1400068
 				cnp->cn_flags |= SAVENAME;
+#endif
 				break;
 			}
 			zfs_fallthrough;
 		case DELETE:
+#if __FreeBSD_version < 1400068
 			if (error == 0)
 				cnp->cn_flags |= SAVENAME;
+#endif
 			break;
 		}
 	}
 
 #if	__FreeBSD_version > 1300124
 	if ((cnp->cn_flags & ISDOTDOT) != 0) {
 		/*
 		 * FIXME: zfs_lookup_lock relocks vnodes and does nothing to
 		 * handle races. In particular different callers may end up
 		 * with different vnodes and will try to add conflicting
 		 * entries to the namecache.
 		 *
 		 * While finding different result may be acceptable in face
 		 * of concurrent modification, adding conflicting entries
 		 * trips over an assert in the namecache.
 		 *
 		 * Ultimately let an entry through once everything settles.
 		 */
 		if (!vn_seqc_consistent(dvp, dvp_seqc)) {
 			cnp->cn_flags &= ~MAKEENTRY;
 		}
 	}
 #endif
 
 	/* Insert name into cache (as non-existent) if appropriate. */
 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
 	    error == ENOENT && (cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(dvp, NULL, cnp);
 
 	/* Insert name into cache if appropriate. */
 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay &&
 	    error == 0 && (cnp->cn_flags & MAKEENTRY)) {
 		if (!(cnp->cn_flags & ISLASTCN) ||
 		    (nameiop != DELETE && nameiop != RENAME)) {
 			cache_enter(dvp, *vpp, cnp);
 		}
 	}
 
 	return (error);
 }
 
 /*
  * Attempt to create a new entry in a directory.  If the entry
  * already exists, truncate the file if permissible, else return
  * an error.  Return the vp of the created or trunc'd file.
  *
  *	IN:	dvp	- vnode of directory to put new file entry in.
  *		name	- name of new file entry.
  *		vap	- attributes of new file.
  *		excl	- flag indicating exclusive or non-exclusive mode.
  *		mode	- mode to open file with.
  *		cr	- credentials of caller.
  *		flag	- large file flag [UNUSED].
  *		ct	- caller context
  *		vsecp	- ACL to be set
  *
  *	OUT:	vpp	- vnode of created or trunc'd entry.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated if new entry created
  *	 vp - ctime|mtime always, atime if new
  */
 int
 zfs_create(znode_t *dzp, const char *name, vattr_t *vap, int excl, int mode,
     znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
 {
 	(void) excl, (void) mode, (void) flag;
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	objset_t	*os;
 	dmu_tx_t	*tx;
 	int		error;
 	uid_t		uid = crgetuid(cr);
 	gid_t		gid = crgetgid(cr);
 	uint64_t	projid = ZFS_DEFAULT_PROJID;
 	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 	uint64_t	txtype;
 #ifdef DEBUG_VFS_LOCKS
 	vnode_t	*dvp = ZTOV(dzp);
 #endif
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (vsecp || (vap->va_mask & AT_XVATTR) ||
 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & AT_XVATTR) {
 		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_type)) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	*zpp = NULL;
 
 	if ((vap->va_mode & S_ISVTX) && secpolicy_vnode_stky_modify(cr))
 		vap->va_mode &= ~S_ISVTX;
 
 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
 	if (error) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	ASSERT3P(zp, ==, NULL);
 
 	/*
 	 * Create a new file object and update the directory
 	 * to reference it.
 	 */
 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
 		goto out;
 	}
 
 	/*
 	 * We only support the creation of regular files in
 	 * extended attribute directories.
 	 */
 
 	if ((dzp->z_pflags & ZFS_XATTR) &&
 	    (vap->va_type != VREG)) {
 		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap,
 	    cr, vsecp, &acl_ids)) != 0)
 		goto out;
 
 	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode))
 		projid = zfs_inherit_projid(dzp);
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, projid)) {
 		zfs_acl_ids_free(&acl_ids);
 		error = SET_ERROR(EDQUOT);
 		goto out;
 	}
 
 	getnewvnode_reserve_();
 
 	tx = dmu_tx_create(os);
 
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 	if (!zfsvfs->z_use_sa &&
 	    acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 		    0, acl_ids.z_aclp->z_acl_bytes);
 	}
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
 	txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
 	zfs_log_create(zilog, tx, txtype, dzp, zp, name,
 	    vsecp, acl_ids.z_fuidp, vap);
 	zfs_acl_ids_free(&acl_ids);
 	dmu_tx_commit(tx);
 
 	getnewvnode_drop_reserve();
 
 out:
 	VNCHECKREF(dvp);
 	if (error == 0) {
 		*zpp = zp;
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Remove an entry from a directory.
  *
  *	IN:	dvp	- vnode of directory to remove entry from.
  *		name	- name of entry to remove.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime
  *	 vp - ctime (if nlink > 0)
  */
 static int
 zfs_remove_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 {
 	znode_t		*dzp = VTOZ(dvp);
 	znode_t		*zp;
 	znode_t		*xzp;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	uint64_t	xattr_obj;
 	uint64_t	obj = 0;
 	dmu_tx_t	*tx;
 	boolean_t	unlinked;
 	uint64_t	txtype;
 	int		error;
 
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zp = VTOZ(vp);
 	ZFS_VERIFY_ZP(zp);
 	zilog = zfsvfs->z_log;
 
 	xattr_obj = 0;
 	xzp = NULL;
 
 	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
 		goto out;
 	}
 
 	/*
 	 * Need to use rmdir for removing directories.
 	 */
 	if (vp->v_type == VDIR) {
 		error = SET_ERROR(EPERM);
 		goto out;
 	}
 
 	vnevent_remove(vp, dvp, name, ct);
 
 	obj = zp->z_id;
 
 	/* are there any extended attributes? */
 	error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 	    &xattr_obj, sizeof (xattr_obj));
 	if (error == 0 && xattr_obj) {
 		error = zfs_zget(zfsvfs, xattr_obj, &xzp);
 		ASSERT0(error);
 	}
 
 	/*
 	 * We may delete the znode now, or we may put it in the unlinked set;
 	 * it depends on whether we're the last link, and on whether there are
 	 * other holds on the vnode.  So we dmu_tx_hold() the right things to
 	 * allow for either case.
 	 */
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 
 	if (xzp) {
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 	}
 
 	/* charge as an update -- would be nice not to charge at all */
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 
 	/*
 	 * Mark this transaction as typically resulting in a net free of space
 	 */
 	dmu_tx_mark_netfree(tx);
 
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Remove the directory entry.
 	 */
 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, &unlinked);
 
 	if (error) {
 		dmu_tx_commit(tx);
 		goto out;
 	}
 
 	if (unlinked) {
 		zfs_unlinked_add(zp, tx);
 		vp->v_vflag |= VV_NOSYNC;
 	}
 	/* XXX check changes to linux vnops */
 	txtype = TX_REMOVE;
 	zfs_log_remove(zilog, tx, txtype, dzp, name, obj, unlinked);
 
 	dmu_tx_commit(tx);
 out:
 
 	if (xzp)
 		vrele(ZTOV(xzp));
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 
 static int
 zfs_lookup_internal(znode_t *dzp, const char *name, vnode_t **vpp,
     struct componentname *cnp, int nameiop)
 {
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	int error;
 
 	cnp->cn_nameptr = __DECONST(char *, name);
 	cnp->cn_namelen = strlen(name);
 	cnp->cn_nameiop = nameiop;
-	cnp->cn_flags = ISLASTCN | SAVENAME;
+	cnp->cn_flags = ISLASTCN;
+#if __FreeBSD_version < 1400068
+	cnp->cn_flags |= SAVENAME;
+#endif
 	cnp->cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
 	cnp->cn_cred = kcred;
 #if __FreeBSD_version < 1400037
 	cnp->cn_thread = curthread;
 #endif
 
 	if (zfsvfs->z_use_namecache && !zfsvfs->z_replay) {
 		struct vop_lookup_args a;
 
 		a.a_gen.a_desc = &vop_lookup_desc;
 		a.a_dvp = ZTOV(dzp);
 		a.a_vpp = vpp;
 		a.a_cnp = cnp;
 		error = vfs_cache_lookup(&a);
 	} else {
 		error = zfs_lookup(ZTOV(dzp), name, vpp, cnp, nameiop, kcred, 0,
 		    B_FALSE);
 	}
 #ifdef ZFS_DEBUG
 	if (error) {
 		printf("got error %d on name %s on op %d\n", error, name,
 		    nameiop);
 		kdb_backtrace();
 	}
 #endif
 	return (error);
 }
 
 int
 zfs_remove(znode_t *dzp, const char *name, cred_t *cr, int flags)
 {
 	vnode_t *vp;
 	int error;
 	struct componentname cn;
 
 	if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
 		return (error);
 
 	error = zfs_remove_(ZTOV(dzp), vp, name, cr);
 	vput(vp);
 	return (error);
 }
 /*
  * Create a new directory and insert it into dvp using the name
  * provided.  Return a pointer to the inserted directory.
  *
  *	IN:	dvp	- vnode of directory to add subdir to.
  *		dirname	- name of new directory.
  *		vap	- attributes of new directory.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *		vsecp	- ACL to be set
  *
  *	OUT:	vpp	- vnode of created directory.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
  *	 vp - ctime|mtime|atime updated
  */
 int
 zfs_mkdir(znode_t *dzp, const char *dirname, vattr_t *vap, znode_t **zpp,
     cred_t *cr, int flags, vsecattr_t *vsecp)
 {
 	(void) flags, (void) vsecp;
 	znode_t		*zp;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	uint64_t	txtype;
 	dmu_tx_t	*tx;
 	int		error;
 	uid_t		uid = crgetuid(cr);
 	gid_t		gid = crgetgid(cr);
 	zfs_acl_ids_t   acl_ids;
 	boolean_t	fuid_dirtied;
 
 	ASSERT3U(vap->va_type, ==, VDIR);
 
 	/*
 	 * If we have an ephemeral id, ACL, or XVATTR then
 	 * make sure file system is at proper version
 	 */
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    ((vap->va_mask & AT_XVATTR) ||
 	    IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (dzp->z_pflags & ZFS_XATTR) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(dirname,
 	    strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (vap->va_mask & AT_XVATTR) {
 		if ((error = secpolicy_xvattr(ZTOV(dzp), (xvattr_t *)vap,
 		    crgetuid(cr), cr, vap->va_type)) != 0) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
 	    NULL, &acl_ids)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * First make sure the new directory doesn't exist.
 	 *
 	 * Existence is checked first to make sure we don't return
 	 * EACCES instead of EEXIST which can cause some applications
 	 * to fail.
 	 */
 	*zpp = NULL;
 
 	if ((error = zfs_dirent_lookup(dzp, dirname, &zp, ZNEW))) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	ASSERT3P(zp, ==, NULL);
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EDQUOT));
 	}
 
 	/*
 	 * Add a new entry to the directory.
 	 */
 	getnewvnode_reserve_();
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
 	dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 		    acl_ids.z_aclp->z_acl_bytes);
 	}
 
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE);
 
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Create new node.
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	/*
 	 * Now put new name in parent dir.
 	 */
 	(void) zfs_link_create(dzp, dirname, zp, tx, ZNEW);
 
 	*zpp = zp;
 
 	txtype = zfs_log_create_txtype(Z_DIR, NULL, vap);
 	zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, NULL,
 	    acl_ids.z_fuidp, vap);
 
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
 
 	getnewvnode_drop_reserve();
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 #if	__FreeBSD_version < 1300124
 static void
 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp)
 {
 
 	cache_purge(dvp);
 	cache_purge(vp);
 }
 #endif
 
 /*
  * Remove a directory subdir entry.  If the current working
  * directory is the same as the subdir to be removed, the
  * remove will fail.
  *
  *	IN:	dvp	- vnode of directory to remove from.
  *		name	- name of directory to be removed.
  *		cwd	- vnode of current working directory.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
  */
 static int
 zfs_rmdir_(vnode_t *dvp, vnode_t *vp, const char *name, cred_t *cr)
 {
 	znode_t		*dzp = VTOZ(dvp);
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	ZFS_VERIFY_ZP(zp);
 	zilog = zfsvfs->z_log;
 
 
 	if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
 		goto out;
 	}
 
 	if (vp->v_type != VDIR) {
 		error = SET_ERROR(ENOTDIR);
 		goto out;
 	}
 
 	vnevent_rmdir(vp, dvp, name, ct);
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	zfs_sa_upgrade_txholds(tx, zp);
 	zfs_sa_upgrade_txholds(tx, dzp);
 	dmu_tx_mark_netfree(tx);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	error = zfs_link_destroy(dzp, name, zp, tx, ZEXISTS, NULL);
 
 	if (error == 0) {
 		uint64_t txtype = TX_RMDIR;
 		zfs_log_remove(zilog, tx, txtype, dzp, name,
 		    ZFS_NO_OBJECT, B_FALSE);
 	}
 
 	dmu_tx_commit(tx);
 
 	cache_vop_rmdir(dvp, vp);
 out:
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 int
 zfs_rmdir(znode_t *dzp, const char *name, znode_t *cwd, cred_t *cr, int flags)
 {
 	struct componentname cn;
 	vnode_t *vp;
 	int error;
 
 	if ((error = zfs_lookup_internal(dzp, name, &vp, &cn, DELETE)))
 		return (error);
 
 	error = zfs_rmdir_(ZTOV(dzp), vp, name, cr);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Read as many directory entries as will fit into the provided
  * buffer from the given directory cursor position (specified in
  * the uio structure).
  *
  *	IN:	vp	- vnode of directory to read.
  *		uio	- structure supplying read location, range info,
  *			  and return buffer.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	OUT:	uio	- updated offset and range, buffer filled.
  *		eofp	- set to true if end-of-file detected.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
  *
  * Note that the low 4 bits of the cookie returned by zap is always zero.
  * This allows us to use the low range for "special" directory entries:
  * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
  * we use the offset 2 for the '.zfs' directory.
  */
 static int
 zfs_readdir(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, int *eofp,
     int *ncookies, cookie_t **cookies)
 {
 	znode_t		*zp = VTOZ(vp);
 	iovec_t		*iovp;
 	edirent_t	*eodp;
 	dirent64_t	*odp;
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	objset_t	*os;
 	caddr_t		outbuf;
 	size_t		bufsize;
 	zap_cursor_t	zc;
 	zap_attribute_t	zap;
 	uint_t		bytes_wanted;
 	uint64_t	offset; /* must be unsigned; checks for < 1 */
 	uint64_t	parent;
 	int		local_eof;
 	int		outcount;
 	int		error;
 	uint8_t		prefetch;
 	boolean_t	check_sysattrs;
 	uint8_t		type;
 	int		ncooks;
 	cookie_t	*cooks = NULL;
 	int		flags = 0;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (parent))) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * If we are not given an eof variable,
 	 * use a local one.
 	 */
 	if (eofp == NULL)
 		eofp = &local_eof;
 
 	/*
 	 * Check for valid iov_len.
 	 */
 	if (GET_UIO_STRUCT(uio)->uio_iov->iov_len <= 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Quit if directory has been removed (posix)
 	 */
 	if ((*eofp = zp->z_unlinked) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
 	error = 0;
 	os = zfsvfs->z_os;
 	offset = zfs_uio_offset(uio);
 	prefetch = zp->z_zn_prefetch;
 
 	/*
 	 * Initialize the iterator cursor.
 	 */
 	if (offset <= 3) {
 		/*
 		 * Start iteration from the beginning of the directory.
 		 */
 		zap_cursor_init(&zc, os, zp->z_id);
 	} else {
 		/*
 		 * The offset is a serialized cursor.
 		 */
 		zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
 	}
 
 	/*
 	 * Get space to change directory entries into fs independent format.
 	 */
 	iovp = GET_UIO_STRUCT(uio)->uio_iov;
 	bytes_wanted = iovp->iov_len;
 	if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1) {
 		bufsize = bytes_wanted;
 		outbuf = kmem_alloc(bufsize, KM_SLEEP);
 		odp = (struct dirent64 *)outbuf;
 	} else {
 		bufsize = bytes_wanted;
 		outbuf = NULL;
 		odp = (struct dirent64 *)iovp->iov_base;
 	}
 	eodp = (struct edirent *)odp;
 
 	if (ncookies != NULL) {
 		/*
 		 * Minimum entry size is dirent size and 1 byte for a file name.
 		 */
 		ncooks = zfs_uio_resid(uio) / (sizeof (struct dirent) -
 		    sizeof (((struct dirent *)NULL)->d_name) + 1);
 		cooks = malloc(ncooks * sizeof (*cooks), M_TEMP, M_WAITOK);
 		*cookies = cooks;
 		*ncookies = ncooks;
 	}
 	/*
 	 * If this VFS supports the system attribute view interface; and
 	 * we're looking at an extended attribute directory; and we care
 	 * about normalization conflicts on this vfs; then we must check
 	 * for normalization conflicts with the sysattr name space.
 	 */
 #ifdef TODO
 	check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
 	    (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
 	    (flags & V_RDDIR_ENTFLAGS);
 #else
 	check_sysattrs = 0;
 #endif
 
 	/*
 	 * Transform to file-system independent format
 	 */
 	outcount = 0;
 	while (outcount < bytes_wanted) {
 		ino64_t objnum;
 		ushort_t reclen;
 		off64_t *next = NULL;
 
 		/*
 		 * Special case `.', `..', and `.zfs'.
 		 */
 		if (offset == 0) {
 			(void) strcpy(zap.za_name, ".");
 			zap.za_normalization_conflict = 0;
 			objnum = zp->z_id;
 			type = DT_DIR;
 		} else if (offset == 1) {
 			(void) strcpy(zap.za_name, "..");
 			zap.za_normalization_conflict = 0;
 			objnum = parent;
 			type = DT_DIR;
 		} else if (offset == 2 && zfs_show_ctldir(zp)) {
 			(void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
 			zap.za_normalization_conflict = 0;
 			objnum = ZFSCTL_INO_ROOT;
 			type = DT_DIR;
 		} else {
 			/*
 			 * Grab next entry.
 			 */
 			if ((error = zap_cursor_retrieve(&zc, &zap))) {
 				if ((*eofp = (error == ENOENT)) != 0)
 					break;
 				else
 					goto update;
 			}
 
 			if (zap.za_integer_length != 8 ||
 			    zap.za_num_integers != 1) {
 				cmn_err(CE_WARN, "zap_readdir: bad directory "
 				    "entry, obj = %lld, offset = %lld\n",
 				    (u_longlong_t)zp->z_id,
 				    (u_longlong_t)offset);
 				error = SET_ERROR(ENXIO);
 				goto update;
 			}
 
 			objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
 			/*
 			 * MacOS X can extract the object type here such as:
 			 * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
 			 */
 			type = ZFS_DIRENT_TYPE(zap.za_first_integer);
 
 			if (check_sysattrs && !zap.za_normalization_conflict) {
 #ifdef TODO
 				zap.za_normalization_conflict =
 				    xattr_sysattr_casechk(zap.za_name);
 #else
 				panic("%s:%u: TODO", __func__, __LINE__);
 #endif
 			}
 		}
 
 		if (flags & V_RDDIR_ACCFILTER) {
 			/*
 			 * If we have no access at all, don't include
 			 * this entry in the returned information
 			 */
 			znode_t	*ezp;
 			if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
 				goto skip_entry;
 			if (!zfs_has_access(ezp, cr)) {
 				vrele(ZTOV(ezp));
 				goto skip_entry;
 			}
 			vrele(ZTOV(ezp));
 		}
 
 		if (flags & V_RDDIR_ENTFLAGS)
 			reclen = EDIRENT_RECLEN(strlen(zap.za_name));
 		else
 			reclen = DIRENT64_RECLEN(strlen(zap.za_name));
 
 		/*
 		 * Will this entry fit in the buffer?
 		 */
 		if (outcount + reclen > bufsize) {
 			/*
 			 * Did we manage to fit anything in the buffer?
 			 */
 			if (!outcount) {
 				error = SET_ERROR(EINVAL);
 				goto update;
 			}
 			break;
 		}
 		if (flags & V_RDDIR_ENTFLAGS) {
 			/*
 			 * Add extended flag entry:
 			 */
 			eodp->ed_ino = objnum;
 			eodp->ed_reclen = reclen;
 			/* NOTE: ed_off is the offset for the *next* entry */
 			next = &(eodp->ed_off);
 			eodp->ed_eflags = zap.za_normalization_conflict ?
 			    ED_CASE_CONFLICT : 0;
 			(void) strncpy(eodp->ed_name, zap.za_name,
 			    EDIRENT_NAMELEN(reclen));
 			eodp = (edirent_t *)((intptr_t)eodp + reclen);
 		} else {
 			/*
 			 * Add normal entry:
 			 */
 			odp->d_ino = objnum;
 			odp->d_reclen = reclen;
 			odp->d_namlen = strlen(zap.za_name);
 			/* NOTE: d_off is the offset for the *next* entry. */
 			next = &odp->d_off;
 			strlcpy(odp->d_name, zap.za_name, odp->d_namlen + 1);
 			odp->d_type = type;
 			dirent_terminate(odp);
 			odp = (dirent64_t *)((intptr_t)odp + reclen);
 		}
 		outcount += reclen;
 
 		ASSERT3S(outcount, <=, bufsize);
 
 		/* Prefetch znode */
 		if (prefetch)
 			dmu_prefetch(os, objnum, 0, 0, 0,
 			    ZIO_PRIORITY_SYNC_READ);
 
 	skip_entry:
 		/*
 		 * Move to the next entry, fill in the previous offset.
 		 */
 		if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
 			zap_cursor_advance(&zc);
 			offset = zap_cursor_serialize(&zc);
 		} else {
 			offset += 1;
 		}
 
 		/* Fill the offset right after advancing the cursor. */
 		if (next != NULL)
 			*next = offset;
 		if (cooks != NULL) {
 			*cooks++ = offset;
 			ncooks--;
 			KASSERT(ncooks >= 0, ("ncookies=%d", ncooks));
 		}
 	}
 	zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
 
 	/* Subtract unused cookies */
 	if (ncookies != NULL)
 		*ncookies -= ncooks;
 
 	if (zfs_uio_segflg(uio) == UIO_SYSSPACE && zfs_uio_iovcnt(uio) == 1) {
 		iovp->iov_base += outcount;
 		iovp->iov_len -= outcount;
 		zfs_uio_resid(uio) -= outcount;
 	} else if ((error =
 	    zfs_uiomove(outbuf, (long)outcount, UIO_READ, uio))) {
 		/*
 		 * Reset the pointer.
 		 */
 		offset = zfs_uio_offset(uio);
 	}
 
 update:
 	zap_cursor_fini(&zc);
 	if (zfs_uio_segflg(uio) != UIO_SYSSPACE || zfs_uio_iovcnt(uio) != 1)
 		kmem_free(outbuf, bufsize);
 
 	if (error == ENOENT)
 		error = 0;
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 
 	zfs_uio_setoffset(uio, offset);
 	ZFS_EXIT(zfsvfs);
 	if (error != 0 && cookies != NULL) {
 		free(*cookies, M_TEMP);
 		*cookies = NULL;
 		*ncookies = 0;
 	}
 	return (error);
 }
 
 /*
  * Get the requested file attributes and place them in the provided
  * vattr structure.
  *
  *	IN:	vp	- vnode of file.
  *		vap	- va_mask identifies requested attributes.
  *			  If AT_XVATTR set, then optional attrs are requested
  *		flags	- ATTR_NOACLCHECK (CIFS server context)
  *		cr	- credentials of caller.
  *
  *	OUT:	vap	- attribute values.
  *
  *	RETURN:	0 (always succeeds).
  */
 static int
 zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int	error = 0;
 	uint32_t blksize;
 	u_longlong_t nblocks;
 	uint64_t mtime[2], ctime[2], crtime[2], rdev;
 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
 	xoptattr_t *xoap = NULL;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	sa_bulk_attr_t bulk[4];
 	int count = 0;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL,
 		    &rdev, 8);
 
 	if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
 	 * Also, if we are the owner don't bother, since owner should
 	 * always be allowed to read basic attributes of file.
 	 */
 	if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
 	    (vap->va_uid != crgetuid(cr))) {
 		if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
 		    skipaclchk, cr))) {
 			ZFS_EXIT(zfsvfs);
 			return (error);
 		}
 	}
 
 	/*
 	 * Return all attributes.  It's cheaper to provide the answer
 	 * than to determine whether we were asked the question.
 	 */
 
 	vap->va_type = IFTOVT(zp->z_mode);
 	vap->va_mode = zp->z_mode & ~S_IFMT;
 	vn_fsid(vp, vap);
 	vap->va_nodeid = zp->z_id;
 	vap->va_nlink = zp->z_links;
 	if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp) &&
 	    zp->z_links < ZFS_LINK_MAX)
 		vap->va_nlink++;
 	vap->va_size = zp->z_size;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		vap->va_rdev = zfs_cmpldev(rdev);
 	vap->va_gen = zp->z_gen;
 	vap->va_flags = 0;	/* FreeBSD: Reset chflags(2) flags. */
 	vap->va_filerev = zp->z_seq;
 
 	/*
 	 * Add in any requested optional attributes and the create time.
 	 * Also set the corresponding bits in the returned attribute bitmap.
 	 */
 	if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
 		if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
 			xoap->xoa_archive =
 			    ((zp->z_pflags & ZFS_ARCHIVE) != 0);
 			XVA_SET_RTN(xvap, XAT_ARCHIVE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
 			xoap->xoa_readonly =
 			    ((zp->z_pflags & ZFS_READONLY) != 0);
 			XVA_SET_RTN(xvap, XAT_READONLY);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
 			xoap->xoa_system =
 			    ((zp->z_pflags & ZFS_SYSTEM) != 0);
 			XVA_SET_RTN(xvap, XAT_SYSTEM);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
 			xoap->xoa_hidden =
 			    ((zp->z_pflags & ZFS_HIDDEN) != 0);
 			XVA_SET_RTN(xvap, XAT_HIDDEN);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 			xoap->xoa_nounlink =
 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0);
 			XVA_SET_RTN(xvap, XAT_NOUNLINK);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 			xoap->xoa_immutable =
 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
 			XVA_SET_RTN(xvap, XAT_IMMUTABLE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 			xoap->xoa_appendonly =
 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0);
 			XVA_SET_RTN(xvap, XAT_APPENDONLY);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 			xoap->xoa_nodump =
 			    ((zp->z_pflags & ZFS_NODUMP) != 0);
 			XVA_SET_RTN(xvap, XAT_NODUMP);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
 			xoap->xoa_opaque =
 			    ((zp->z_pflags & ZFS_OPAQUE) != 0);
 			XVA_SET_RTN(xvap, XAT_OPAQUE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 			xoap->xoa_av_quarantined =
 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
 			XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 			xoap->xoa_av_modified =
 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
 			XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
 		    vp->v_type == VREG) {
 			zfs_sa_get_scanstamp(zp, xvap);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 			xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
 			XVA_SET_RTN(xvap, XAT_REPARSE);
 		}
 		if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
 			xoap->xoa_generation = zp->z_gen;
 			XVA_SET_RTN(xvap, XAT_GEN);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
 			xoap->xoa_offline =
 			    ((zp->z_pflags & ZFS_OFFLINE) != 0);
 			XVA_SET_RTN(xvap, XAT_OFFLINE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
 			xoap->xoa_sparse =
 			    ((zp->z_pflags & ZFS_SPARSE) != 0);
 			XVA_SET_RTN(xvap, XAT_SPARSE);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
 			xoap->xoa_projinherit =
 			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0);
 			XVA_SET_RTN(xvap, XAT_PROJINHERIT);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
 			xoap->xoa_projid = zp->z_projid;
 			XVA_SET_RTN(xvap, XAT_PROJID);
 		}
 	}
 
 	ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
 	ZFS_TIME_DECODE(&vap->va_mtime, mtime);
 	ZFS_TIME_DECODE(&vap->va_ctime, ctime);
 	ZFS_TIME_DECODE(&vap->va_birthtime, crtime);
 
 
 	sa_object_size(zp->z_sa_hdl, &blksize, &nblocks);
 	vap->va_blksize = blksize;
 	vap->va_bytes = nblocks << 9;	/* nblocks * 512 */
 
 	if (zp->z_blksz == 0) {
 		/*
 		 * Block size hasn't been set; suggest maximal I/O transfers.
 		 */
 		vap->va_blksize = zfsvfs->z_max_blksz;
 	}
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 /*
  * Set the file attributes to the values contained in the
  * vattr structure.
  *
  *	IN:	zp	- znode of file to be modified.
  *		vap	- new attribute values.
  *			  If AT_XVATTR set, then optional attrs are being set
  *		flags	- ATTR_UTIME set if non-default time values provided.
  *			- ATTR_NOACLCHECK (CIFS context only).
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - ctime updated, mtime updated if size changed.
  */
 int
 zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
 {
 	vnode_t		*vp = ZTOV(zp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	objset_t	*os;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	vattr_t		oldva;
 	xvattr_t	tmpxvattr;
 	uint_t		mask = vap->va_mask;
 	uint_t		saved_mask = 0;
 	uint64_t	saved_mode;
 	int		trim_mask = 0;
 	uint64_t	new_mode;
 	uint64_t	new_uid, new_gid;
 	uint64_t	xattr_obj;
 	uint64_t	mtime[2], ctime[2];
 	uint64_t	projid = ZFS_INVALID_PROJID;
 	znode_t		*attrzp;
 	int		need_policy = FALSE;
 	int		err, err2;
 	zfs_fuid_info_t *fuidp = NULL;
 	xvattr_t *xvap = (xvattr_t *)vap;	/* vap may be an xvattr_t * */
 	xoptattr_t	*xoap;
 	zfs_acl_t	*aclp;
 	boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	boolean_t	fuid_dirtied = B_FALSE;
 	sa_bulk_attr_t	bulk[7], xattr_bulk[7];
 	int		count = 0, xattr_count = 0;
 
 	if (mask == 0)
 		return (0);
 
 	if (mask & AT_NOSET)
 		return (SET_ERROR(EINVAL));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	os = zfsvfs->z_os;
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * Make sure that if we have ephemeral uid/gid or xvattr specified
 	 * that file system is at proper version level
 	 */
 
 	if (zfsvfs->z_use_fuids == B_FALSE &&
 	    (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
 	    ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
 	    (mask & AT_XVATTR))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	if (mask & AT_SIZE && vp->v_type == VDIR) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EISDIR));
 	}
 
 	if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * If this is an xvattr_t, then get a pointer to the structure of
 	 * optional attributes.  If this is NULL, then we have a vattr_t.
 	 */
 	xoap = xva_getxoptattr(xvap);
 
 	xva_init(&tmpxvattr);
 
 	/*
 	 * Immutable files can only alter immutable bit and atime
 	 */
 	if ((zp->z_pflags & ZFS_IMMUTABLE) &&
 	    ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
 	    ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	/*
 	 * Note: ZFS_READONLY is handled in zfs_zaccess_common.
 	 */
 
 	/*
 	 * Verify timestamps doesn't overflow 32 bits.
 	 * ZFS can handle large timestamps, but 32bit syscalls can't
 	 * handle times greater than 2039.  This check should be removed
 	 * once large timestamps are fully supported.
 	 */
 	if (mask & (AT_ATIME | AT_MTIME)) {
 		if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
 		    ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EOVERFLOW));
 		}
 	}
 	if (xoap != NULL && (mask & AT_XVATTR)) {
 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME) &&
 		    TIMESPEC_OVERFLOW(&vap->va_birthtime)) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EOVERFLOW));
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
 			if (!dmu_objset_projectquota_enabled(os) ||
 			    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode))) {
 				ZFS_EXIT(zfsvfs);
 				return (SET_ERROR(EOPNOTSUPP));
 			}
 
 			projid = xoap->xoa_projid;
 			if (unlikely(projid == ZFS_INVALID_PROJID)) {
 				ZFS_EXIT(zfsvfs);
 				return (SET_ERROR(EINVAL));
 			}
 
 			if (projid == zp->z_projid && zp->z_pflags & ZFS_PROJID)
 				projid = ZFS_INVALID_PROJID;
 			else
 				need_policy = TRUE;
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT) &&
 		    (xoap->xoa_projinherit !=
 		    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
 		    (!dmu_objset_projectquota_enabled(os) ||
 		    (!S_ISREG(zp->z_mode) && !S_ISDIR(zp->z_mode)))) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EOPNOTSUPP));
 		}
 	}
 
 	attrzp = NULL;
 	aclp = NULL;
 
 	if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EROFS));
 	}
 
 	/*
 	 * First validate permissions
 	 */
 
 	if (mask & AT_SIZE) {
 		/*
 		 * XXX - Note, we are not providing any open
 		 * mode flags here (like FNDELAY), so we may
 		 * block if there are locks present... this
 		 * should be addressed in openat().
 		 */
 		/* XXX - would it be OK to generate a log record here? */
 		err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
 		if (err) {
 			ZFS_EXIT(zfsvfs);
 			return (err);
 		}
 	}
 
 	if (mask & (AT_ATIME|AT_MTIME) ||
 	    ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
 	    XVA_ISSET_REQ(xvap, XAT_READONLY) ||
 	    XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
 	    XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
 	    XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
 	    XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
 	    XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
 		need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
 		    skipaclchk, cr);
 	}
 
 	if (mask & (AT_UID|AT_GID)) {
 		int	idmask = (mask & (AT_UID|AT_GID));
 		int	take_owner;
 		int	take_group;
 
 		/*
 		 * NOTE: even if a new mode is being set,
 		 * we may clear S_ISUID/S_ISGID bits.
 		 */
 
 		if (!(mask & AT_MODE))
 			vap->va_mode = zp->z_mode;
 
 		/*
 		 * Take ownership or chgrp to group we are a member of
 		 */
 
 		take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
 		take_group = (mask & AT_GID) &&
 		    zfs_groupmember(zfsvfs, vap->va_gid, cr);
 
 		/*
 		 * If both AT_UID and AT_GID are set then take_owner and
 		 * take_group must both be set in order to allow taking
 		 * ownership.
 		 *
 		 * Otherwise, send the check through secpolicy_vnode_setattr()
 		 *
 		 */
 
 		if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
 		    ((idmask == AT_UID) && take_owner) ||
 		    ((idmask == AT_GID) && take_group)) {
 			if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
 			    skipaclchk, cr) == 0) {
 				/*
 				 * Remove setuid/setgid for non-privileged users
 				 */
 				secpolicy_setid_clear(vap, vp, cr);
 				trim_mask = (mask & (AT_UID|AT_GID));
 			} else {
 				need_policy =  TRUE;
 			}
 		} else {
 			need_policy =  TRUE;
 		}
 	}
 
 	oldva.va_mode = zp->z_mode;
 	zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
 	if (mask & AT_XVATTR) {
 		/*
 		 * Update xvattr mask to include only those attributes
 		 * that are actually changing.
 		 *
 		 * the bits will be restored prior to actually setting
 		 * the attributes so the caller thinks they were set.
 		 */
 		if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
 			if (xoap->xoa_appendonly !=
 			    ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_APPENDONLY);
 				XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
 			if (xoap->xoa_projinherit !=
 			    ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_PROJINHERIT);
 				XVA_SET_REQ(&tmpxvattr, XAT_PROJINHERIT);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
 			if (xoap->xoa_nounlink !=
 			    ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NOUNLINK);
 				XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
 			if (xoap->xoa_immutable !=
 			    ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
 				XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
 			if (xoap->xoa_nodump !=
 			    ((zp->z_pflags & ZFS_NODUMP) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_NODUMP);
 				XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
 			if (xoap->xoa_av_modified !=
 			    ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
 				XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
 			if ((vp->v_type != VREG &&
 			    xoap->xoa_av_quarantined) ||
 			    xoap->xoa_av_quarantined !=
 			    ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
 				need_policy = TRUE;
 			} else {
 				XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
 				XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
 			}
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
 			ZFS_EXIT(zfsvfs);
 			return (SET_ERROR(EPERM));
 		}
 
 		if (need_policy == FALSE &&
 		    (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
 		    XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
 			need_policy = TRUE;
 		}
 	}
 
 	if (mask & AT_MODE) {
 		if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
 			err = secpolicy_setid_setsticky_clear(vp, vap,
 			    &oldva, cr);
 			if (err) {
 				ZFS_EXIT(zfsvfs);
 				return (err);
 			}
 			trim_mask |= AT_MODE;
 		} else {
 			need_policy = TRUE;
 		}
 	}
 
 	if (need_policy) {
 		/*
 		 * If trim_mask is set then take ownership
 		 * has been granted or write_acl is present and user
 		 * has the ability to modify mode.  In that case remove
 		 * UID|GID and or MODE from mask so that
 		 * secpolicy_vnode_setattr() doesn't revoke it.
 		 */
 
 		if (trim_mask) {
 			saved_mask = vap->va_mask;
 			vap->va_mask &= ~trim_mask;
 			if (trim_mask & AT_MODE) {
 				/*
 				 * Save the mode, as secpolicy_vnode_setattr()
 				 * will overwrite it with ova.va_mode.
 				 */
 				saved_mode = vap->va_mode;
 			}
 		}
 		err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
 		    (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
 		if (err) {
 			ZFS_EXIT(zfsvfs);
 			return (err);
 		}
 
 		if (trim_mask) {
 			vap->va_mask |= saved_mask;
 			if (trim_mask & AT_MODE) {
 				/*
 				 * Recover the mode after
 				 * secpolicy_vnode_setattr().
 				 */
 				vap->va_mode = saved_mode;
 			}
 		}
 	}
 
 	/*
 	 * secpolicy_vnode_setattr, or take ownership may have
 	 * changed va_mask
 	 */
 	mask = vap->va_mask;
 
 	if ((mask & (AT_UID | AT_GID)) || projid != ZFS_INVALID_PROJID) {
 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 		    &xattr_obj, sizeof (xattr_obj));
 
 		if (err == 0 && xattr_obj) {
 			err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
 			if (err == 0) {
 				err = vn_lock(ZTOV(attrzp), LK_EXCLUSIVE);
 				if (err != 0)
 					vrele(ZTOV(attrzp));
 			}
 			if (err)
 				goto out2;
 		}
 		if (mask & AT_UID) {
 			new_uid = zfs_fuid_create(zfsvfs,
 			    (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
 			if (new_uid != zp->z_uid &&
 			    zfs_id_overquota(zfsvfs, DMU_USERUSED_OBJECT,
 			    new_uid)) {
 				if (attrzp)
 					vput(ZTOV(attrzp));
 				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
 
 		if (mask & AT_GID) {
 			new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
 			    cr, ZFS_GROUP, &fuidp);
 			if (new_gid != zp->z_gid &&
 			    zfs_id_overquota(zfsvfs, DMU_GROUPUSED_OBJECT,
 			    new_gid)) {
 				if (attrzp)
 					vput(ZTOV(attrzp));
 				err = SET_ERROR(EDQUOT);
 				goto out2;
 			}
 		}
 
 		if (projid != ZFS_INVALID_PROJID &&
 		    zfs_id_overquota(zfsvfs, DMU_PROJECTUSED_OBJECT, projid)) {
 			if (attrzp)
 				vput(ZTOV(attrzp));
 			err = SET_ERROR(EDQUOT);
 			goto out2;
 		}
 	}
 	tx = dmu_tx_create(os);
 
 	if (mask & AT_MODE) {
 		uint64_t pmode = zp->z_mode;
 		uint64_t acl_obj;
 		new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
 
 		if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
 		    !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
 			err = SET_ERROR(EPERM);
 			goto out;
 		}
 
 		if ((err = zfs_acl_chmod_setattr(zp, &aclp, new_mode)))
 			goto out;
 
 		if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
 			/*
 			 * Are we upgrading ACL from old V0 format
 			 * to V1 format?
 			 */
 			if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
 			    zfs_znode_acl_version(zp) ==
 			    ZFS_ACL_VERSION_INITIAL) {
 				dmu_tx_hold_free(tx, acl_obj, 0,
 				    DMU_OBJECT_END);
 				dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 				    0, aclp->z_acl_bytes);
 			} else {
 				dmu_tx_hold_write(tx, acl_obj, 0,
 				    aclp->z_acl_bytes);
 			}
 		} else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 			dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
 			    0, aclp->z_acl_bytes);
 		}
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 	} else {
 		if (((mask & AT_XVATTR) &&
 		    XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) ||
 		    (projid != ZFS_INVALID_PROJID &&
 		    !(zp->z_pflags & ZFS_PROJID)))
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 		else
 			dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	}
 
 	if (attrzp) {
 		dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
 	}
 
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 
 	zfs_sa_upgrade_txholds(tx, zp);
 
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err)
 		goto out;
 
 	count = 0;
 	/*
 	 * Set each attribute requested.
 	 * We group settings according to the locks they need to acquire.
 	 *
 	 * Note: you cannot set ctime directly, although it will be
 	 * updated as a side-effect of calling this function.
 	 */
 
 	if (projid != ZFS_INVALID_PROJID && !(zp->z_pflags & ZFS_PROJID)) {
 		/*
 		 * For the existed object that is upgraded from old system,
 		 * its on-disk layout has no slot for the project ID attribute.
 		 * But quota accounting logic needs to access related slots by
 		 * offset directly. So we need to adjust old objects' layout
 		 * to make the project ID to some unified and fixed offset.
 		 */
 		if (attrzp)
 			err = sa_add_projid(attrzp->z_sa_hdl, tx, projid);
 		if (err == 0)
 			err = sa_add_projid(zp->z_sa_hdl, tx, projid);
 
 		if (unlikely(err == EEXIST))
 			err = 0;
 		else if (err != 0)
 			goto out;
 		else
 			projid = ZFS_INVALID_PROJID;
 	}
 
 	if (mask & (AT_UID|AT_GID|AT_MODE))
 		mutex_enter(&zp->z_acl_lock);
 
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, sizeof (zp->z_pflags));
 
 	if (attrzp) {
 		if (mask & (AT_UID|AT_GID|AT_MODE))
 			mutex_enter(&attrzp->z_acl_lock);
 		SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 		    SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
 		    sizeof (attrzp->z_pflags));
 		if (projid != ZFS_INVALID_PROJID) {
 			attrzp->z_projid = projid;
 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 			    SA_ZPL_PROJID(zfsvfs), NULL, &attrzp->z_projid,
 			    sizeof (attrzp->z_projid));
 		}
 	}
 
 	if (mask & (AT_UID|AT_GID)) {
 
 		if (mask & AT_UID) {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
 			    &new_uid, sizeof (new_uid));
 			zp->z_uid = new_uid;
 			if (attrzp) {
 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 				    SA_ZPL_UID(zfsvfs), NULL, &new_uid,
 				    sizeof (new_uid));
 				attrzp->z_uid = new_uid;
 			}
 		}
 
 		if (mask & AT_GID) {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
 			    NULL, &new_gid, sizeof (new_gid));
 			zp->z_gid = new_gid;
 			if (attrzp) {
 				SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 				    SA_ZPL_GID(zfsvfs), NULL, &new_gid,
 				    sizeof (new_gid));
 				attrzp->z_gid = new_gid;
 			}
 		}
 		if (!(mask & AT_MODE)) {
 			SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
 			    NULL, &new_mode, sizeof (new_mode));
 			new_mode = zp->z_mode;
 		}
 		err = zfs_acl_chown_setattr(zp);
 		ASSERT0(err);
 		if (attrzp) {
 			vn_seqc_write_begin(ZTOV(attrzp));
 			err = zfs_acl_chown_setattr(attrzp);
 			vn_seqc_write_end(ZTOV(attrzp));
 			ASSERT0(err);
 		}
 	}
 
 	if (mask & AT_MODE) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
 		    &new_mode, sizeof (new_mode));
 		zp->z_mode = new_mode;
 		ASSERT3P(aclp, !=, NULL);
 		err = zfs_aclset_common(zp, aclp, cr, tx);
 		ASSERT0(err);
 		if (zp->z_acl_cached)
 			zfs_acl_free(zp->z_acl_cached);
 		zp->z_acl_cached = aclp;
 		aclp = NULL;
 	}
 
 
 	if (mask & AT_ATIME) {
 		ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
 		    &zp->z_atime, sizeof (zp->z_atime));
 	}
 
 	if (mask & AT_MTIME) {
 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 		    mtime, sizeof (mtime));
 	}
 
 	if (projid != ZFS_INVALID_PROJID) {
 		zp->z_projid = projid;
 		SA_ADD_BULK_ATTR(bulk, count,
 		    SA_ZPL_PROJID(zfsvfs), NULL, &zp->z_projid,
 		    sizeof (zp->z_projid));
 	}
 
 	/* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
 	if (mask & AT_SIZE && !(mask & AT_MTIME)) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
 		    NULL, mtime, sizeof (mtime));
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    &ctime, sizeof (ctime));
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
 	} else if (mask != 0) {
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    &ctime, sizeof (ctime));
 		zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
 		if (attrzp) {
 			SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
 			    SA_ZPL_CTIME(zfsvfs), NULL,
 			    &ctime, sizeof (ctime));
 			zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
 			    mtime, ctime);
 		}
 	}
 
 	/*
 	 * Do this after setting timestamps to prevent timestamp
 	 * update from toggling bit
 	 */
 
 	if (xoap && (mask & AT_XVATTR)) {
 
 		if (XVA_ISSET_REQ(xvap, XAT_CREATETIME))
 			xoap->xoa_createtime = vap->va_birthtime;
 		/*
 		 * restore trimmed off masks
 		 * so that return masks can be set for caller.
 		 */
 
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
 			XVA_SET_REQ(xvap, XAT_APPENDONLY);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
 			XVA_SET_REQ(xvap, XAT_NOUNLINK);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
 			XVA_SET_REQ(xvap, XAT_IMMUTABLE);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
 			XVA_SET_REQ(xvap, XAT_NODUMP);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
 			XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
 			XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
 		}
 		if (XVA_ISSET_REQ(&tmpxvattr, XAT_PROJINHERIT)) {
 			XVA_SET_REQ(xvap, XAT_PROJINHERIT);
 		}
 
 		if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
 			ASSERT3S(vp->v_type, ==, VREG);
 
 		zfs_xvattr_set(zp, xvap, tx);
 	}
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	if (mask != 0)
 		zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
 
 	if (mask & (AT_UID|AT_GID|AT_MODE))
 		mutex_exit(&zp->z_acl_lock);
 
 	if (attrzp) {
 		if (mask & (AT_UID|AT_GID|AT_MODE))
 			mutex_exit(&attrzp->z_acl_lock);
 	}
 out:
 	if (err == 0 && attrzp) {
 		err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
 		    xattr_count, tx);
 		ASSERT0(err2);
 	}
 
 	if (attrzp)
 		vput(ZTOV(attrzp));
 
 	if (aclp)
 		zfs_acl_free(aclp);
 
 	if (fuidp) {
 		zfs_fuid_info_free(fuidp);
 		fuidp = NULL;
 	}
 
 	if (err) {
 		dmu_tx_abort(tx);
 	} else {
 		err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		dmu_tx_commit(tx);
 	}
 
 out2:
 	if (os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (err);
 }
 
 /*
  * Look up the directory entries corresponding to the source and target
  * directory/name pairs.
  */
 static int
 zfs_rename_relock_lookup(znode_t *sdzp, const struct componentname *scnp,
     znode_t **szpp, znode_t *tdzp, const struct componentname *tcnp,
     znode_t **tzpp)
 {
 	zfsvfs_t *zfsvfs;
 	znode_t *szp, *tzp;
 	int error;
 
 	/*
 	 * Before using sdzp and tdzp we must ensure that they are live.
 	 * As a porting legacy from illumos we have two things to worry
 	 * about.  One is typical for FreeBSD and it is that the vnode is
 	 * not reclaimed (doomed).  The other is that the znode is live.
 	 * The current code can invalidate the znode without acquiring the
 	 * corresponding vnode lock if the object represented by the znode
 	 * and vnode is no longer valid after a rollback or receive operation.
 	 * z_teardown_lock hidden behind ZFS_ENTER and ZFS_EXIT is the lock
 	 * that protects the znodes from the invalidation.
 	 */
 	zfsvfs = sdzp->z_zfsvfs;
 	ASSERT3P(zfsvfs, ==, tdzp->z_zfsvfs);
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(sdzp);
 	ZFS_VERIFY_ZP(tdzp);
 
 	/*
 	 * Re-resolve svp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	error = zfs_dirent_lookup(sdzp, scnp->cn_nameptr, &szp, ZEXISTS);
 	if (error != 0) {
 		/* Source entry invalid or not there. */
 		if ((scnp->cn_flags & ISDOTDOT) != 0 ||
 		    (scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.'))
 			error = SET_ERROR(EINVAL);
 		goto out;
 	}
 	*szpp = szp;
 
 	/*
 	 * Re-resolve tvp, if it disappeared we just carry on.
 	 */
 	error = zfs_dirent_lookup(tdzp, tcnp->cn_nameptr, &tzp, 0);
 	if (error != 0) {
 		vrele(ZTOV(szp));
 		if ((tcnp->cn_flags & ISDOTDOT) != 0)
 			error = SET_ERROR(EINVAL);
 		goto out;
 	}
 	*tzpp = tzp;
 out:
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
  * fail to acquire any lock in the path we will drop all held locks,
  * acquire the new lock in a blocking fashion, and then release it and
  * restart the rename.  This acquire/release step ensures that we do not
  * spin on a lock waiting for release.  On error release all vnode locks
  * and decrement references the way tmpfs_rename() would do.
  */
 static int
 zfs_rename_relock(struct vnode *sdvp, struct vnode **svpp,
     struct vnode *tdvp, struct vnode **tvpp,
     const struct componentname *scnp, const struct componentname *tcnp)
 {
 	struct vnode	*nvp, *svp, *tvp;
 	znode_t		*sdzp, *tdzp, *szp, *tzp;
 	int		error;
 
 	VOP_UNLOCK1(tdvp);
 	if (*tvpp != NULL && *tvpp != tdvp)
 		VOP_UNLOCK1(*tvpp);
 
 relock:
 	error = vn_lock(sdvp, LK_EXCLUSIVE);
 	if (error)
 		goto out;
 	error = vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT);
 	if (error != 0) {
 		VOP_UNLOCK1(sdvp);
 		if (error != EBUSY)
 			goto out;
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto out;
 		VOP_UNLOCK1(tdvp);
 		goto relock;
 	}
 	tdzp = VTOZ(tdvp);
 	sdzp = VTOZ(sdvp);
 
 	error = zfs_rename_relock_lookup(sdzp, scnp, &szp, tdzp, tcnp, &tzp);
 	if (error != 0) {
 		VOP_UNLOCK1(sdvp);
 		VOP_UNLOCK1(tdvp);
 		goto out;
 	}
 	svp = ZTOV(szp);
 	tvp = tzp != NULL ? ZTOV(tzp) : NULL;
 
 	/*
 	 * Now try acquire locks on svp and tvp.
 	 */
 	nvp = svp;
 	error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
 	if (error != 0) {
 		VOP_UNLOCK1(sdvp);
 		VOP_UNLOCK1(tdvp);
 		if (tvp != NULL)
 			vrele(tvp);
 		if (error != EBUSY) {
 			vrele(nvp);
 			goto out;
 		}
 		error = vn_lock(nvp, LK_EXCLUSIVE);
 		if (error != 0) {
 			vrele(nvp);
 			goto out;
 		}
 		VOP_UNLOCK1(nvp);
 		/*
 		 * Concurrent rename race.
 		 * XXX ?
 		 */
 		if (nvp == tdvp) {
 			vrele(nvp);
 			error = SET_ERROR(EINVAL);
 			goto out;
 		}
 		vrele(*svpp);
 		*svpp = nvp;
 		goto relock;
 	}
 	vrele(*svpp);
 	*svpp = nvp;
 
 	if (*tvpp != NULL)
 		vrele(*tvpp);
 	*tvpp = NULL;
 	if (tvp != NULL) {
 		nvp = tvp;
 		error = vn_lock(nvp, LK_EXCLUSIVE | LK_NOWAIT);
 		if (error != 0) {
 			VOP_UNLOCK1(sdvp);
 			VOP_UNLOCK1(tdvp);
 			VOP_UNLOCK1(*svpp);
 			if (error != EBUSY) {
 				vrele(nvp);
 				goto out;
 			}
 			error = vn_lock(nvp, LK_EXCLUSIVE);
 			if (error != 0) {
 				vrele(nvp);
 				goto out;
 			}
 			vput(nvp);
 			goto relock;
 		}
 		*tvpp = nvp;
 	}
 
 	return (0);
 
 out:
 	return (error);
 }
 
 /*
  * Note that we must use VRELE_ASYNC in this function as it walks
  * up the directory tree and vrele may need to acquire an exclusive
  * lock if a last reference to a vnode is dropped.
  */
 static int
 zfs_rename_check(znode_t *szp, znode_t *sdzp, znode_t *tdzp)
 {
 	zfsvfs_t	*zfsvfs;
 	znode_t		*zp, *zp1;
 	uint64_t	parent;
 	int		error;
 
 	zfsvfs = tdzp->z_zfsvfs;
 	if (tdzp == szp)
 		return (SET_ERROR(EINVAL));
 	if (tdzp == sdzp)
 		return (0);
 	if (tdzp->z_id == zfsvfs->z_root)
 		return (0);
 	zp = tdzp;
 	for (;;) {
 		ASSERT(!zp->z_unlinked);
 		if ((error = sa_lookup(zp->z_sa_hdl,
 		    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
 			break;
 
 		if (parent == szp->z_id) {
 			error = SET_ERROR(EINVAL);
 			break;
 		}
 		if (parent == zfsvfs->z_root)
 			break;
 		if (parent == sdzp->z_id)
 			break;
 
 		error = zfs_zget(zfsvfs, parent, &zp1);
 		if (error != 0)
 			break;
 
 		if (zp != tdzp)
 			VN_RELE_ASYNC(ZTOV(zp),
 			    dsl_pool_zrele_taskq(
 			    dmu_objset_pool(zfsvfs->z_os)));
 		zp = zp1;
 	}
 
 	if (error == ENOTDIR)
 		panic("checkpath: .. not a directory\n");
 	if (zp != tdzp)
 		VN_RELE_ASYNC(ZTOV(zp),
 		    dsl_pool_zrele_taskq(dmu_objset_pool(zfsvfs->z_os)));
 	return (error);
 }
 
 #if	__FreeBSD_version < 1300124
 static void
 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
     struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp)
 {
 
 	cache_purge(fvp);
 	if (tvp != NULL)
 		cache_purge(tvp);
 	cache_purge_negative(tdvp);
 }
 #endif
 
 static int
 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
     cred_t *cr);
 
 /*
  * Move an entry from the provided source directory to the target
  * directory.  Change the entry name as indicated.
  *
  *	IN:	sdvp	- Source directory containing the "old entry".
  *		scnp	- Old entry name.
  *		tdvp	- Target directory to contain the "new entry".
  *		tcnp	- New entry name.
  *		cr	- credentials of caller.
  *	INOUT:	svpp	- Source file
  *		tvpp	- Target file, may point to NULL initially
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	sdvp,tdvp - ctime|mtime updated
  */
 static int
 zfs_do_rename(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
     cred_t *cr)
 {
 	int	error;
 
 	ASSERT_VOP_ELOCKED(tdvp, __func__);
 	if (*tvpp != NULL)
 		ASSERT_VOP_ELOCKED(*tvpp, __func__);
 
 	/* Reject renames across filesystems. */
 	if ((*svpp)->v_mount != tdvp->v_mount ||
 	    ((*tvpp) != NULL && (*svpp)->v_mount != (*tvpp)->v_mount)) {
 		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 
 	if (zfsctl_is_node(tdvp)) {
 		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 
 	/*
 	 * Lock all four vnodes to ensure safety and semantics of renaming.
 	 */
 	error = zfs_rename_relock(sdvp, svpp, tdvp, tvpp, scnp, tcnp);
 	if (error != 0) {
 		/* no vnodes are locked in the case of error here */
 		return (error);
 	}
 
 	error = zfs_do_rename_impl(sdvp, svpp, scnp, tdvp, tvpp, tcnp, cr);
 	VOP_UNLOCK1(sdvp);
 	VOP_UNLOCK1(*svpp);
 out:
 	if (*tvpp != NULL)
 		VOP_UNLOCK1(*tvpp);
 	if (tdvp != *tvpp)
 		VOP_UNLOCK1(tdvp);
 
 	return (error);
 }
 
 static int
 zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp,
     vnode_t *tdvp, vnode_t **tvpp, struct componentname *tcnp,
     cred_t *cr)
 {
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs;
 	zilog_t		*zilog;
 	znode_t		*tdzp, *sdzp, *tzp, *szp;
 	const char	*snm = scnp->cn_nameptr;
 	const char	*tnm = tcnp->cn_nameptr;
 	int		error;
 
 	tdzp = VTOZ(tdvp);
 	sdzp = VTOZ(sdvp);
 	zfsvfs = tdzp->z_zfsvfs;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(tdzp);
 	ZFS_VERIFY_ZP(sdzp);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(tnm,
 	    strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		error = SET_ERROR(EILSEQ);
 		goto out;
 	}
 
 	/* If source and target are the same file, there is nothing to do. */
 	if ((*svpp) == (*tvpp)) {
 		error = 0;
 		goto out;
 	}
 
 	if (((*svpp)->v_type == VDIR && (*svpp)->v_mountedhere != NULL) ||
 	    ((*tvpp) != NULL && (*tvpp)->v_type == VDIR &&
 	    (*tvpp)->v_mountedhere != NULL)) {
 		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 
 	szp = VTOZ(*svpp);
 	ZFS_VERIFY_ZP(szp);
 	tzp = *tvpp == NULL ? NULL : VTOZ(*tvpp);
 	if (tzp != NULL)
 		ZFS_VERIFY_ZP(tzp);
 
 	/*
 	 * This is to prevent the creation of links into attribute space
 	 * by renaming a linked file into/outof an attribute directory.
 	 * See the comment in zfs_link() for why this is considered bad.
 	 */
 	if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
 		error = SET_ERROR(EINVAL);
 		goto out;
 	}
 
 	/*
 	 * If we are using project inheritance, means if the directory has
 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
 	 * such case, we only allow renames into our tree when the project
 	 * IDs are the same.
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
 		error = SET_ERROR(EXDEV);
 		goto out;
 	}
 
 	/*
 	 * Must have write access at the source to remove the old entry
 	 * and write access at the target to create the new entry.
 	 * Note that if target and source are the same, this can be
 	 * done in a single check.
 	 */
 	if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
 		goto out;
 
 	if ((*svpp)->v_type == VDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((scnp->cn_namelen == 1 && scnp->cn_nameptr[0] == '.') ||
 		    sdzp == szp ||
 		    (scnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 			error = EINVAL;
 			goto out;
 		}
 
 		/*
 		 * Check to make sure rename is valid.
 		 * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
 		 */
 		if ((error = zfs_rename_check(szp, sdzp, tdzp)))
 			goto out;
 	}
 
 	/*
 	 * Does target exist?
 	 */
 	if (tzp) {
 		/*
 		 * Source and target must be the same type.
 		 */
 		if ((*svpp)->v_type == VDIR) {
 			if ((*tvpp)->v_type != VDIR) {
 				error = SET_ERROR(ENOTDIR);
 				goto out;
 			} else {
 				cache_purge(tdvp);
 				if (sdvp != tdvp)
 					cache_purge(sdvp);
 			}
 		} else {
 			if ((*tvpp)->v_type == VDIR) {
 				error = SET_ERROR(EISDIR);
 				goto out;
 			}
 		}
 	}
 
 	vn_seqc_write_begin(*svpp);
 	vn_seqc_write_begin(sdvp);
 	if (*tvpp != NULL)
 		vn_seqc_write_begin(*tvpp);
 	if (tdvp != *tvpp)
 		vn_seqc_write_begin(tdvp);
 
 	vnevent_rename_src(*svpp, sdvp, scnp->cn_nameptr, ct);
 	if (tzp)
 		vnevent_rename_dest(*tvpp, tdvp, tnm, ct);
 
 	/*
 	 * notify the target directory if it is not the same
 	 * as source directory.
 	 */
 	if (tdvp != sdvp) {
 		vnevent_rename_dest_dir(tdvp, ct);
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
 	if (sdzp != tdzp) {
 		dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tdzp);
 	}
 	if (tzp) {
 		dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, tzp);
 	}
 
 	zfs_sa_upgrade_txholds(tx, szp);
 	dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		goto out_seq;
 	}
 
 	if (tzp)	/* Attempt to remove the existing target */
 		error = zfs_link_destroy(tdzp, tnm, tzp, tx, 0, NULL);
 
 	if (error == 0) {
 		error = zfs_link_create(tdzp, tnm, szp, tx, ZRENAMING);
 		if (error == 0) {
 			szp->z_pflags |= ZFS_AV_MODIFIED;
 
 			error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
 			    (void *)&szp->z_pflags, sizeof (uint64_t), tx);
 			ASSERT0(error);
 
 			error = zfs_link_destroy(sdzp, snm, szp, tx, ZRENAMING,
 			    NULL);
 			if (error == 0) {
 				zfs_log_rename(zilog, tx, TX_RENAME, sdzp,
 				    snm, tdzp, tnm, szp);
 
 				/*
 				 * Update path information for the target vnode
 				 */
 				vn_renamepath(tdvp, *svpp, tnm, strlen(tnm));
 			} else {
 				/*
 				 * At this point, we have successfully created
 				 * the target name, but have failed to remove
 				 * the source name.  Since the create was done
 				 * with the ZRENAMING flag, there are
 				 * complications; for one, the link count is
 				 * wrong.  The easiest way to deal with this
 				 * is to remove the newly created target, and
 				 * return the original error.  This must
 				 * succeed; fortunately, it is very unlikely to
 				 * fail, since we just created it.
 				 */
 				VERIFY0(zfs_link_destroy(tdzp, tnm, szp, tx,
 				    ZRENAMING, NULL));
 			}
 		}
 		if (error == 0) {
 			cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp);
 		}
 	}
 
 	dmu_tx_commit(tx);
 
 out_seq:
 	vn_seqc_write_end(*svpp);
 	vn_seqc_write_end(sdvp);
 	if (*tvpp != NULL)
 		vn_seqc_write_end(*tvpp);
 	if (tdvp != *tvpp)
 		vn_seqc_write_end(tdvp);
 
 out:
 	if (error == 0 && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 	ZFS_EXIT(zfsvfs);
 
 	return (error);
 }
 
 int
 zfs_rename(znode_t *sdzp, const char *sname, znode_t *tdzp, const char *tname,
     cred_t *cr, int flags)
 {
 	struct componentname scn, tcn;
 	vnode_t *sdvp, *tdvp;
 	vnode_t *svp, *tvp;
 	int error;
 	svp = tvp = NULL;
 
 	sdvp = ZTOV(sdzp);
 	tdvp = ZTOV(tdzp);
 	error = zfs_lookup_internal(sdzp, sname, &svp, &scn, DELETE);
 	if (sdzp->z_zfsvfs->z_replay == B_FALSE)
 		VOP_UNLOCK1(sdvp);
 	if (error != 0)
 		goto fail;
 	VOP_UNLOCK1(svp);
 
 	vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
 	error = zfs_lookup_internal(tdzp, tname, &tvp, &tcn, RENAME);
 	if (error == EJUSTRETURN)
 		tvp = NULL;
 	else if (error != 0) {
 		VOP_UNLOCK1(tdvp);
 		goto fail;
 	}
 
 	error = zfs_do_rename(sdvp, &svp, &scn, tdvp, &tvp, &tcn, cr);
 fail:
 	if (svp != NULL)
 		vrele(svp);
 	if (tvp != NULL)
 		vrele(tvp);
 
 	return (error);
 }
 
 /*
  * Insert the indicated symbolic reference entry into the directory.
  *
  *	IN:	dvp	- Directory to contain new symbolic link.
  *		link	- Name for new symlink entry.
  *		vap	- Attributes of new entry.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *		flags	- case flags
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	dvp - ctime|mtime updated
  */
 int
 zfs_symlink(znode_t *dzp, const char *name, vattr_t *vap,
     const char *link, znode_t **zpp, cred_t *cr, int flags)
 {
 	(void) flags;
 	znode_t		*zp;
 	dmu_tx_t	*tx;
 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
 	zilog_t		*zilog;
 	uint64_t	len = strlen(link);
 	int		error;
 	zfs_acl_ids_t	acl_ids;
 	boolean_t	fuid_dirtied;
 	uint64_t	txtype = TX_SYMLINK;
 
 	ASSERT3S(vap->va_type, ==, VLNK);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(dzp);
 	zilog = zfsvfs->z_log;
 
 	if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
 	    NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	if (len > MAXPATHLEN) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENAMETOOLONG));
 	}
 
 	if ((error = zfs_acl_ids_create(dzp, 0,
 	    vap, cr, NULL, &acl_ids)) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Attempt to lock directory; fail if entry already exists.
 	 */
 	error = zfs_dirent_lookup(dzp, name, &zp, ZNEW);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	if (zfs_acl_ids_overquota(zfsvfs, &acl_ids,
 	    0 /* projid */)) {
 		zfs_acl_ids_free(&acl_ids);
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EDQUOT));
 	}
 
 	getnewvnode_reserve_();
 	tx = dmu_tx_create(zfsvfs->z_os);
 	fuid_dirtied = zfsvfs->z_fuid_dirty;
 	dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
 	dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
 	dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 	    ZFS_SA_BASE_ATTR_SIZE + len);
 	dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 	if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
 		    acl_ids.z_aclp->z_acl_bytes);
 	}
 	if (fuid_dirtied)
 		zfs_fuid_txhold(zfsvfs, tx);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		zfs_acl_ids_free(&acl_ids);
 		dmu_tx_abort(tx);
 		getnewvnode_drop_reserve();
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Create a new object for the symlink.
 	 * for version 4 ZPL datasets the symlink will be an SA attribute
 	 */
 	zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
 
 	if (fuid_dirtied)
 		zfs_fuid_sync(zfsvfs, tx);
 
 	if (zp->z_is_sa)
 		error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
 		    __DECONST(void *, link), len, tx);
 	else
 		zfs_sa_symlink(zp, __DECONST(char *, link), len, tx);
 
 	zp->z_size = len;
 	(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 	    &zp->z_size, sizeof (zp->z_size), tx);
 	/*
 	 * Insert the new object into the directory.
 	 */
 	(void) zfs_link_create(dzp, name, zp, tx, ZNEW);
 
 	zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
 	*zpp = zp;
 
 	zfs_acl_ids_free(&acl_ids);
 
 	dmu_tx_commit(tx);
 
 	getnewvnode_drop_reserve();
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Return, in the buffer contained in the provided uio structure,
  * the symbolic path referred to by vp.
  *
  *	IN:	vp	- vnode of symbolic link.
  *		uio	- structure to contain the link path.
  *		cr	- credentials of caller.
  *		ct	- caller context
  *
  *	OUT:	uio	- structure containing the link path.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	vp - atime updated
  */
 static int
 zfs_readlink(vnode_t *vp, zfs_uio_t *uio, cred_t *cr, caller_context_t *ct)
 {
 	(void) cr, (void) ct;
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if (zp->z_is_sa)
 		error = sa_lookup_uio(zp->z_sa_hdl,
 		    SA_ZPL_SYMLINK(zfsvfs), uio);
 	else
 		error = zfs_sa_readlink(zp, uio);
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Insert a new entry into directory tdvp referencing svp.
  *
  *	IN:	tdvp	- Directory to contain new entry.
  *		svp	- vnode of new entry.
  *		name	- name of new entry.
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	tdvp - ctime|mtime updated
  *	 svp - ctime updated
  */
 int
 zfs_link(znode_t *tdzp, znode_t *szp, const char *name, cred_t *cr,
     int flags)
 {
 	(void) flags;
 	znode_t		*tzp;
 	zfsvfs_t	*zfsvfs = tdzp->z_zfsvfs;
 	zilog_t		*zilog;
 	dmu_tx_t	*tx;
 	int		error;
 	uint64_t	parent;
 	uid_t		owner;
 
 	ASSERT3S(ZTOV(tdzp)->v_type, ==, VDIR);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(tdzp);
 	zilog = zfsvfs->z_log;
 
 	/*
 	 * POSIX dictates that we return EPERM here.
 	 * Better choices include ENOTSUP or EISDIR.
 	 */
 	if (ZTOV(szp)->v_type == VDIR) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	ZFS_VERIFY_ZP(szp);
 
 	/*
 	 * If we are using project inheritance, means if the directory has
 	 * ZFS_PROJINHERIT set, then its descendant directories will inherit
 	 * not only the project ID, but also the ZFS_PROJINHERIT flag. Under
 	 * such case, we only allow hard link creation in our tree when the
 	 * project IDs are the same.
 	 */
 	if (tdzp->z_pflags & ZFS_PROJINHERIT &&
 	    tdzp->z_projid != szp->z_projid) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EXDEV));
 	}
 
 	if (szp->z_pflags & (ZFS_APPENDONLY |
 	    ZFS_IMMUTABLE | ZFS_READONLY)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	/* Prevent links to .zfs/shares files */
 
 	if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 	    &parent, sizeof (uint64_t))) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	if (parent == zfsvfs->z_shares_dir) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	if (zfsvfs->z_utf8 && u8_validate(name,
 	    strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EILSEQ));
 	}
 
 	/*
 	 * We do not support links between attributes and non-attributes
 	 * because of the potential security risk of creating links
 	 * into "normal" file space in order to circumvent restrictions
 	 * imposed in attribute space.
 	 */
 	if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 
 	owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
 	if (owner != crgetuid(cr) && secpolicy_basic_link(ZTOV(szp), cr) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EPERM));
 	}
 
 	if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	/*
 	 * Attempt to lock directory; fail if entry already exists.
 	 */
 	error = zfs_dirent_lookup(tdzp, name, &tzp, ZNEW);
 	if (error) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
 	dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, name);
 	zfs_sa_upgrade_txholds(tx, szp);
 	zfs_sa_upgrade_txholds(tx, tdzp);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	error = zfs_link_create(tdzp, name, szp, tx, 0);
 
 	if (error == 0) {
 		uint64_t txtype = TX_LINK;
 		zfs_log_link(zilog, tx, txtype, tdzp, szp, name);
 	}
 
 	dmu_tx_commit(tx);
 
 	if (error == 0) {
 		vnevent_link(ZTOV(szp), ct);
 	}
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 /*
  * Free or allocate space in a file.  Currently, this function only
  * supports the `F_FREESP' command.  However, this command is somewhat
  * misnamed, as its functionality includes the ability to allocate as
  * well as free space.
  *
  *	IN:	ip	- inode of file to free data in.
  *		cmd	- action to take (only F_FREESP supported).
  *		bfp	- section of file to free/alloc.
  *		flag	- current file open mode flags.
  *		offset	- current file offset.
  *		cr	- credentials of caller.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Timestamps:
  *	ip - ctime|mtime updated
  */
 int
 zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
     offset_t offset, cred_t *cr)
 {
 	(void) offset;
 	zfsvfs_t	*zfsvfs = ZTOZSB(zp);
 	uint64_t	off, len;
 	int		error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if (cmd != F_FREESP) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EROFS));
 	}
 
 	if (bfp->l_len < 0) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Permissions aren't checked on Solaris because on this OS
 	 * zfs_space() can only be called with an opened file handle.
 	 * On Linux we can get here through truncate_range() which
 	 * operates directly on inodes, so we need to check access rights.
 	 */
 	if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	off = bfp->l_start;
 	len = bfp->l_len; /* 0 means from off to end of file */
 
 	error = zfs_freesp(zp, off, len, flag, TRUE);
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 static void
 zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
 {
 	(void) cr, (void) ct;
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int error;
 
 	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
 	if (zp->z_sa_hdl == NULL) {
 		/*
 		 * The fs has been unmounted, or we did a
 		 * suspend/resume and this file no longer exists.
 		 */
 		ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
 		vrecycle(vp);
 		return;
 	}
 
 	if (zp->z_unlinked) {
 		/*
 		 * Fast path to recycle a vnode of a removed file.
 		 */
 		ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
 		vrecycle(vp);
 		return;
 	}
 
 	if (zp->z_atime_dirty && zp->z_unlinked == 0) {
 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
 
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 		} else {
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
 			    (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
 			zp->z_atime_dirty = 0;
 			dmu_tx_commit(tx);
 		}
 	}
 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
 }
 
 
 _Static_assert(sizeof (struct zfid_short) <= sizeof (struct fid),
 	"struct zfid_short bigger than struct fid");
 _Static_assert(sizeof (struct zfid_long) <= sizeof (struct fid),
 	"struct zfid_long bigger than struct fid");
 
 static int
 zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 {
 	(void) ct;
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	uint32_t	gen;
 	uint64_t	gen64;
 	uint64_t	object = zp->z_id;
 	zfid_short_t	*zfid;
 	int		size, i, error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
 	    &gen64, sizeof (uint64_t))) != 0) {
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 
 	gen = (uint32_t)gen64;
 
 	size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
 	fidp->fid_len = size;
 
 	zfid = (zfid_short_t *)fidp;
 
 	zfid->zf_len = size;
 
 	for (i = 0; i < sizeof (zfid->zf_object); i++)
 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 
 	/* Must have a non-zero generation number to distinguish from .zfs */
 	if (gen == 0)
 		gen = 1;
 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
 
 	if (size == LONG_FID_LEN) {
 		uint64_t	objsetid = dmu_objset_id(zfsvfs->z_os);
 		zfid_long_t	*zlfid;
 
 		zlfid = (zfid_long_t *)fidp;
 
 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
 			zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
 
 		/* XXX - this should be the generation number for the objset */
 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
 			zlfid->zf_setgen[i] = 0;
 	}
 
 	ZFS_EXIT(zfsvfs);
 	return (0);
 }
 
 static int
 zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
     caller_context_t *ct)
 {
 	znode_t *zp;
 	zfsvfs_t *zfsvfs;
 
 	switch (cmd) {
 	case _PC_LINK_MAX:
 		*valp = MIN(LONG_MAX, ZFS_LINK_MAX);
 		return (0);
 
 	case _PC_FILESIZEBITS:
 		*valp = 64;
 		return (0);
 	case _PC_MIN_HOLE_SIZE:
 		*valp = (int)SPA_MINBLOCKSIZE;
 		return (0);
 	case _PC_ACL_EXTENDED:
 #if 0		/* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
 		*valp = zfsvfs->z_acl_type == ZFSACLTYPE_POSIX ? 1 : 0;
 		ZFS_EXIT(zfsvfs);
 #else
 		*valp = 0;
 #endif
 		return (0);
 
 	case _PC_ACL_NFS4:
 		zp = VTOZ(vp);
 		zfsvfs = zp->z_zfsvfs;
 		ZFS_ENTER(zfsvfs);
 		ZFS_VERIFY_ZP(zp);
 		*valp = zfsvfs->z_acl_type == ZFS_ACLTYPE_NFSV4 ? 1 : 0;
 		ZFS_EXIT(zfsvfs);
 		return (0);
 
 	case _PC_ACL_PATH_MAX:
 		*valp = ACL_MAX_ENTRIES;
 		return (0);
 
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 static int
 zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
     int *rahead)
 {
 	znode_t *zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	zfs_locked_range_t *lr;
 	vm_object_t object;
 	off_t start, end, obj_size;
 	uint_t blksz;
 	int pgsin_b, pgsin_a;
 	int error;
 
 	ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
 	ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
 
 	start = IDX_TO_OFF(ma[0]->pindex);
 	end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
 
 	/*
 	 * Lock a range covering all required and optional pages.
 	 * Note that we need to handle the case of the block size growing.
 	 */
 	for (;;) {
 		blksz = zp->z_blksz;
 		lr = zfs_rangelock_tryenter(&zp->z_rangelock,
 		    rounddown(start, blksz),
 		    roundup(end, blksz) - rounddown(start, blksz), RL_READER);
 		if (lr == NULL) {
 			if (rahead != NULL) {
 				*rahead = 0;
 				rahead = NULL;
 			}
 			if (rbehind != NULL) {
 				*rbehind = 0;
 				rbehind = NULL;
 			}
 			break;
 		}
 		if (blksz == zp->z_blksz)
 			break;
 		zfs_rangelock_exit(lr);
 	}
 
 	object = ma[0]->object;
 	zfs_vmobject_wlock(object);
 	obj_size = object->un_pager.vnp.vnp_size;
 	zfs_vmobject_wunlock(object);
 	if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
 		if (lr != NULL)
 			zfs_rangelock_exit(lr);
 		ZFS_EXIT(zfsvfs);
 		return (zfs_vm_pagerret_bad);
 	}
 
 	pgsin_b = 0;
 	if (rbehind != NULL) {
 		pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
 		pgsin_b = MIN(*rbehind, pgsin_b);
 	}
 
 	pgsin_a = 0;
 	if (rahead != NULL) {
 		pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
 		if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
 			pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
 		pgsin_a = MIN(*rahead, pgsin_a);
 	}
 
 	/*
 	 * NB: we need to pass the exact byte size of the data that we expect
 	 * to read after accounting for the file size.  This is required because
 	 * ZFS will panic if we request DMU to read beyond the end of the last
 	 * allocated block.
 	 */
 	error = dmu_read_pages(zfsvfs->z_os, zp->z_id, ma, count, &pgsin_b,
 	    &pgsin_a, MIN(end, obj_size) - (end - PAGE_SIZE));
 
 	if (lr != NULL)
 		zfs_rangelock_exit(lr);
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 
 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, count*PAGE_SIZE);
 
 	ZFS_EXIT(zfsvfs);
 
 	if (error != 0)
 		return (zfs_vm_pagerret_error);
 
 	VM_CNT_INC(v_vnodein);
 	VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a);
 	if (rbehind != NULL)
 		*rbehind = pgsin_b;
 	if (rahead != NULL)
 		*rahead = pgsin_a;
 	return (zfs_vm_pagerret_ok);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_getpages_args {
 	struct vnode *a_vp;
 	vm_page_t *a_m;
 	int a_count;
 	int *a_rbehind;
 	int *a_rahead;
 };
 #endif
 
 static int
 zfs_freebsd_getpages(struct vop_getpages_args *ap)
 {
 
 	return (zfs_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind,
 	    ap->a_rahead));
 }
 
 static int
 zfs_putpages(struct vnode *vp, vm_page_t *ma, size_t len, int flags,
     int *rtvals)
 {
 	znode_t		*zp = VTOZ(vp);
 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs;
 	zfs_locked_range_t		*lr;
 	dmu_tx_t	*tx;
 	struct sf_buf	*sf;
 	vm_object_t	object;
 	vm_page_t	m;
 	caddr_t		va;
 	size_t		tocopy;
 	size_t		lo_len;
 	vm_ooffset_t	lo_off;
 	vm_ooffset_t	off;
 	uint_t		blksz;
 	int		ncount;
 	int		pcount;
 	int		err;
 	int		i;
 
 	object = vp->v_object;
 	KASSERT(ma[0]->object == object, ("mismatching object"));
 	KASSERT(len > 0 && (len & PAGE_MASK) == 0, ("unexpected length"));
 
 	pcount = btoc(len);
 	ncount = pcount;
 	for (i = 0; i < pcount; i++)
 		rtvals[i] = zfs_vm_pagerret_error;
 
 	ZFS_ENTER_ERROR(zfsvfs, zfs_vm_pagerret_error);
 	ZFS_VERIFY_ZP_ERROR(zp, zfs_vm_pagerret_error);
 
 	off = IDX_TO_OFF(ma[0]->pindex);
 	blksz = zp->z_blksz;
 	lo_off = rounddown(off, blksz);
 	lo_len = roundup(len + (off - lo_off), blksz);
 	lr = zfs_rangelock_enter(&zp->z_rangelock, lo_off, lo_len, RL_WRITER);
 
 	zfs_vmobject_wlock(object);
 	if (len + off > object->un_pager.vnp.vnp_size) {
 		if (object->un_pager.vnp.vnp_size > off) {
 			int pgoff;
 
 			len = object->un_pager.vnp.vnp_size - off;
 			ncount = btoc(len);
 			if ((pgoff = (int)len & PAGE_MASK) != 0) {
 				/*
 				 * If the object is locked and the following
 				 * conditions hold, then the page's dirty
 				 * field cannot be concurrently changed by a
 				 * pmap operation.
 				 */
 				m = ma[ncount - 1];
 				vm_page_assert_sbusied(m);
 				KASSERT(!pmap_page_is_write_mapped(m),
 				    ("zfs_putpages: page %p is not read-only",
 				    m));
 				vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
 				    pgoff);
 			}
 		} else {
 			len = 0;
 			ncount = 0;
 		}
 		if (ncount < pcount) {
 			for (i = ncount; i < pcount; i++) {
 				rtvals[i] = zfs_vm_pagerret_bad;
 			}
 		}
 	}
 	zfs_vmobject_wunlock(object);
 
 	if (ncount == 0)
 		goto out;
 
 	if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, zp->z_uid) ||
 	    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, zp->z_gid) ||
 	    (zp->z_projid != ZFS_DEFAULT_PROJID &&
 	    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
 	    zp->z_projid))) {
 		goto out;
 	}
 
 	tx = dmu_tx_create(zfsvfs->z_os);
 	dmu_tx_hold_write(tx, zp->z_id, off, len);
 
 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 	zfs_sa_upgrade_txholds(tx, zp);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
 		dmu_tx_abort(tx);
 		goto out;
 	}
 
 	if (zp->z_blksz < PAGE_SIZE) {
 		for (i = 0; len > 0; off += tocopy, len -= tocopy, i++) {
 			tocopy = len > PAGE_SIZE ? PAGE_SIZE : len;
 			va = zfs_map_page(ma[i], &sf);
 			dmu_write(zfsvfs->z_os, zp->z_id, off, tocopy, va, tx);
 			zfs_unmap_page(sf);
 		}
 	} else {
 		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, ma, tx);
 	}
 
 	if (err == 0) {
 		uint64_t mtime[2], ctime[2];
 		sa_bulk_attr_t bulk[3];
 		int count = 0;
 
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 		    &mtime, 16);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 		    &ctime, 16);
 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 		    &zp->z_pflags, 8);
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
 		err = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		ASSERT0(err);
 		/*
 		 * XXX we should be passing a callback to undirty
 		 * but that would make the locking messier
 		 */
 		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off,
 		    len, 0, NULL, NULL);
 
 		zfs_vmobject_wlock(object);
 		for (i = 0; i < ncount; i++) {
 			rtvals[i] = zfs_vm_pagerret_ok;
 			vm_page_undirty(ma[i]);
 		}
 		zfs_vmobject_wunlock(object);
 		VM_CNT_INC(v_vnodeout);
 		VM_CNT_ADD(v_vnodepgsout, ncount);
 	}
 	dmu_tx_commit(tx);
 
 out:
 	zfs_rangelock_exit(lr);
 	if ((flags & (zfs_vm_pagerput_sync | zfs_vm_pagerput_inval)) != 0 ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zfsvfs->z_log, zp->z_id);
 
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, len);
 
 	ZFS_EXIT(zfsvfs);
 	return (rtvals[0]);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_putpages_args {
 	struct vnode *a_vp;
 	vm_page_t *a_m;
 	int a_count;
 	int a_sync;
 	int *a_rtvals;
 };
 #endif
 
 static int
 zfs_freebsd_putpages(struct vop_putpages_args *ap)
 {
 
 	return (zfs_putpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_sync,
 	    ap->a_rtvals));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_bmap_args {
 	struct vnode *a_vp;
 	daddr_t  a_bn;
 	struct bufobj **a_bop;
 	daddr_t *a_bnp;
 	int *a_runp;
 	int *a_runb;
 };
 #endif
 
 static int
 zfs_freebsd_bmap(struct vop_bmap_args *ap)
 {
 
 	if (ap->a_bop != NULL)
 		*ap->a_bop = &ap->a_vp->v_bufobj;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_open_args {
 	struct vnode *a_vp;
 	int a_mode;
 	struct ucred *a_cred;
 	struct thread *a_td;
 };
 #endif
 
 static int
 zfs_freebsd_open(struct vop_open_args *ap)
 {
 	vnode_t	*vp = ap->a_vp;
 	znode_t *zp = VTOZ(vp);
 	int error;
 
 	error = zfs_open(&vp, ap->a_mode, ap->a_cred);
 	if (error == 0)
 		vnode_create_vobject(vp, zp->z_size, ap->a_td);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_close_args {
 	struct vnode *a_vp;
 	int  a_fflag;
 	struct ucred *a_cred;
 	struct thread *a_td;
 };
 #endif
 
 static int
 zfs_freebsd_close(struct vop_close_args *ap)
 {
 
 	return (zfs_close(ap->a_vp, ap->a_fflag, 1, 0, ap->a_cred));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_ioctl_args {
 	struct vnode *a_vp;
 	ulong_t a_command;
 	caddr_t a_data;
 	int a_fflag;
 	struct ucred *cred;
 	struct thread *td;
 };
 #endif
 
 static int
 zfs_freebsd_ioctl(struct vop_ioctl_args *ap)
 {
 
 	return (zfs_ioctl(ap->a_vp, ap->a_command, (intptr_t)ap->a_data,
 	    ap->a_fflag, ap->a_cred, NULL));
 }
 
 static int
 ioflags(int ioflags)
 {
 	int flags = 0;
 
 	if (ioflags & IO_APPEND)
 		flags |= O_APPEND;
 	if (ioflags & IO_NDELAY)
 		flags |= O_NONBLOCK;
 	if (ioflags & IO_SYNC)
 		flags |= O_SYNC;
 
 	return (flags);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_read_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int a_ioflag;
 	struct ucred *a_cred;
 };
 #endif
 
 static int
 zfs_freebsd_read(struct vop_read_args *ap)
 {
 	zfs_uio_t uio;
 	zfs_uio_init(&uio, ap->a_uio);
 	return (zfs_read(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
 	    ap->a_cred));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_write_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int a_ioflag;
 	struct ucred *a_cred;
 };
 #endif
 
 static int
 zfs_freebsd_write(struct vop_write_args *ap)
 {
 	zfs_uio_t uio;
 	zfs_uio_init(&uio, ap->a_uio);
 	return (zfs_write(VTOZ(ap->a_vp), &uio, ioflags(ap->a_ioflag),
 	    ap->a_cred));
 }
 
 #if __FreeBSD_version >= 1300102
 /*
  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
  * the comment above cache_fplookup for details.
  */
 static int
 zfs_freebsd_fplookup_vexec(struct vop_fplookup_vexec_args *v)
 {
 	vnode_t *vp;
 	znode_t *zp;
 	uint64_t pflags;
 
 	vp = v->a_vp;
 	zp = VTOZ_SMR(vp);
 	if (__predict_false(zp == NULL))
 		return (EAGAIN);
 	pflags = atomic_load_64(&zp->z_pflags);
 	if (pflags & ZFS_AV_QUARANTINED)
 		return (EAGAIN);
 	if (pflags & ZFS_XATTR)
 		return (EAGAIN);
 	if ((pflags & ZFS_NO_EXECS_DENIED) == 0)
 		return (EAGAIN);
 	return (0);
 }
 #endif
 
 #if __FreeBSD_version >= 1300139
 static int
 zfs_freebsd_fplookup_symlink(struct vop_fplookup_symlink_args *v)
 {
 	vnode_t *vp;
 	znode_t *zp;
 	char *target;
 
 	vp = v->a_vp;
 	zp = VTOZ_SMR(vp);
 	if (__predict_false(zp == NULL)) {
 		return (EAGAIN);
 	}
 
 	target = atomic_load_consume_ptr(&zp->z_cached_symlink);
 	if (target == NULL) {
 		return (EAGAIN);
 	}
 	return (cache_symlink_resolve(v->a_fpl, target, strlen(target)));
 }
 #endif
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_access_args {
 	struct vnode *a_vp;
 	accmode_t a_accmode;
 	struct ucred *a_cred;
 	struct thread *a_td;
 };
 #endif
 
 static int
 zfs_freebsd_access(struct vop_access_args *ap)
 {
 	vnode_t *vp = ap->a_vp;
 	znode_t *zp = VTOZ(vp);
 	accmode_t accmode;
 	int error = 0;
 
 
 	if (ap->a_accmode == VEXEC) {
 		if (zfs_fastaccesschk_execute(zp, ap->a_cred) == 0)
 			return (0);
 	}
 
 	/*
 	 * ZFS itself only knowns about VREAD, VWRITE, VEXEC and VAPPEND,
 	 */
 	accmode = ap->a_accmode & (VREAD|VWRITE|VEXEC|VAPPEND);
 	if (accmode != 0)
 		error = zfs_access(zp, accmode, 0, ap->a_cred);
 
 	/*
 	 * VADMIN has to be handled by vaccess().
 	 */
 	if (error == 0) {
 		accmode = ap->a_accmode & ~(VREAD|VWRITE|VEXEC|VAPPEND);
 		if (accmode != 0) {
 #if __FreeBSD_version >= 1300105
 			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
 			    zp->z_gid, accmode, ap->a_cred);
 #else
 			error = vaccess(vp->v_type, zp->z_mode, zp->z_uid,
 			    zp->z_gid, accmode, ap->a_cred, NULL);
 #endif
 		}
 	}
 
 	/*
 	 * For VEXEC, ensure that at least one execute bit is set for
 	 * non-directories.
 	 */
 	if (error == 0 && (ap->a_accmode & VEXEC) != 0 && vp->v_type != VDIR &&
 	    (zp->z_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) {
 		error = EACCES;
 	}
 
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_lookup_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 };
 #endif
 
 static int
 zfs_freebsd_lookup(struct vop_lookup_args *ap, boolean_t cached)
 {
 	struct componentname *cnp = ap->a_cnp;
 	char nm[NAME_MAX + 1];
 
 	ASSERT3U(cnp->cn_namelen, <, sizeof (nm));
 	strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof (nm)));
 
 	return (zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop,
 	    cnp->cn_cred, 0, cached));
 }
 
 static int
 zfs_freebsd_cachedlookup(struct vop_cachedlookup_args *ap)
 {
 
 	return (zfs_freebsd_lookup((struct vop_lookup_args *)ap, B_TRUE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_lookup_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 };
 #endif
 
 static int
 zfs_cache_lookup(struct vop_lookup_args *ap)
 {
 	zfsvfs_t *zfsvfs;
 
 	zfsvfs = ap->a_dvp->v_mount->mnt_data;
 	if (zfsvfs->z_use_namecache)
 		return (vfs_cache_lookup(ap));
 	else
 		return (zfs_freebsd_lookup(ap, B_FALSE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_create_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
 };
 #endif
 
 static int
 zfs_freebsd_create(struct vop_create_args *ap)
 {
 	zfsvfs_t *zfsvfs;
 	struct componentname *cnp = ap->a_cnp;
 	vattr_t *vap = ap->a_vap;
 	znode_t *zp = NULL;
 	int rc, mode;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
 
 	vattr_init_mask(vap);
 	mode = vap->va_mode & ALLPERMS;
 	zfsvfs = ap->a_dvp->v_mount->mnt_data;
 	*ap->a_vpp = NULL;
 
 	rc = zfs_create(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap, 0, mode,
 	    &zp, cnp->cn_cred, 0 /* flag */, NULL /* vsecattr */);
 	if (rc == 0)
 		*ap->a_vpp = ZTOV(zp);
 	if (zfsvfs->z_use_namecache &&
 	    rc == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
 
 	return (rc);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_remove_args {
 	struct vnode *a_dvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
 };
 #endif
 
 static int
 zfs_freebsd_remove(struct vop_remove_args *ap)
 {
 
+#if __FreeBSD_version < 1400068
 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
+#endif
 
 	return (zfs_remove_(ap->a_dvp, ap->a_vp, ap->a_cnp->cn_nameptr,
 	    ap->a_cnp->cn_cred));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_mkdir_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
 };
 #endif
 
 static int
 zfs_freebsd_mkdir(struct vop_mkdir_args *ap)
 {
 	vattr_t *vap = ap->a_vap;
 	znode_t *zp = NULL;
 	int rc;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(ap->a_cnp->cn_flags & SAVENAME);
+#endif
 
 	vattr_init_mask(vap);
 	*ap->a_vpp = NULL;
 
 	rc = zfs_mkdir(VTOZ(ap->a_dvp), ap->a_cnp->cn_nameptr, vap, &zp,
 	    ap->a_cnp->cn_cred, 0, NULL);
 
 	if (rc == 0)
 		*ap->a_vpp = ZTOV(zp);
 	return (rc);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_rmdir_args {
 	struct vnode *a_dvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
 };
 #endif
 
 static int
 zfs_freebsd_rmdir(struct vop_rmdir_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
 
 	return (zfs_rmdir_(ap->a_dvp, ap->a_vp, cnp->cn_nameptr, cnp->cn_cred));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_readdir_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	int *a_eofflag;
 	int *a_ncookies;
 	cookie_t **a_cookies;
 };
 #endif
 
 static int
 zfs_freebsd_readdir(struct vop_readdir_args *ap)
 {
 	zfs_uio_t uio;
 	zfs_uio_init(&uio, ap->a_uio);
 	return (zfs_readdir(ap->a_vp, &uio, ap->a_cred, ap->a_eofflag,
 	    ap->a_ncookies, ap->a_cookies));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_fsync_args {
 	struct vnode *a_vp;
 	int a_waitfor;
 	struct thread *a_td;
 };
 #endif
 
 static int
 zfs_freebsd_fsync(struct vop_fsync_args *ap)
 {
 
 	vop_stdfsync(ap);
 	return (zfs_fsync(VTOZ(ap->a_vp), 0, ap->a_td->td_ucred));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_getattr_args {
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 };
 #endif
 
 static int
 zfs_freebsd_getattr(struct vop_getattr_args *ap)
 {
 	vattr_t *vap = ap->a_vap;
 	xvattr_t xvap;
 	ulong_t fflags = 0;
 	int error;
 
 	xva_init(&xvap);
 	xvap.xva_vattr = *vap;
 	xvap.xva_vattr.va_mask |= AT_XVATTR;
 
 	/* Convert chflags into ZFS-type flags. */
 	/* XXX: what about SF_SETTABLE?. */
 	XVA_SET_REQ(&xvap, XAT_IMMUTABLE);
 	XVA_SET_REQ(&xvap, XAT_APPENDONLY);
 	XVA_SET_REQ(&xvap, XAT_NOUNLINK);
 	XVA_SET_REQ(&xvap, XAT_NODUMP);
 	XVA_SET_REQ(&xvap, XAT_READONLY);
 	XVA_SET_REQ(&xvap, XAT_ARCHIVE);
 	XVA_SET_REQ(&xvap, XAT_SYSTEM);
 	XVA_SET_REQ(&xvap, XAT_HIDDEN);
 	XVA_SET_REQ(&xvap, XAT_REPARSE);
 	XVA_SET_REQ(&xvap, XAT_OFFLINE);
 	XVA_SET_REQ(&xvap, XAT_SPARSE);
 
 	error = zfs_getattr(ap->a_vp, (vattr_t *)&xvap, 0, ap->a_cred);
 	if (error != 0)
 		return (error);
 
 	/* Convert ZFS xattr into chflags. */
 #define	FLAG_CHECK(fflag, xflag, xfield)	do {			\
 	if (XVA_ISSET_RTN(&xvap, (xflag)) && (xfield) != 0)		\
 		fflags |= (fflag);					\
 } while (0)
 	FLAG_CHECK(SF_IMMUTABLE, XAT_IMMUTABLE,
 	    xvap.xva_xoptattrs.xoa_immutable);
 	FLAG_CHECK(SF_APPEND, XAT_APPENDONLY,
 	    xvap.xva_xoptattrs.xoa_appendonly);
 	FLAG_CHECK(SF_NOUNLINK, XAT_NOUNLINK,
 	    xvap.xva_xoptattrs.xoa_nounlink);
 	FLAG_CHECK(UF_ARCHIVE, XAT_ARCHIVE,
 	    xvap.xva_xoptattrs.xoa_archive);
 	FLAG_CHECK(UF_NODUMP, XAT_NODUMP,
 	    xvap.xva_xoptattrs.xoa_nodump);
 	FLAG_CHECK(UF_READONLY, XAT_READONLY,
 	    xvap.xva_xoptattrs.xoa_readonly);
 	FLAG_CHECK(UF_SYSTEM, XAT_SYSTEM,
 	    xvap.xva_xoptattrs.xoa_system);
 	FLAG_CHECK(UF_HIDDEN, XAT_HIDDEN,
 	    xvap.xva_xoptattrs.xoa_hidden);
 	FLAG_CHECK(UF_REPARSE, XAT_REPARSE,
 	    xvap.xva_xoptattrs.xoa_reparse);
 	FLAG_CHECK(UF_OFFLINE, XAT_OFFLINE,
 	    xvap.xva_xoptattrs.xoa_offline);
 	FLAG_CHECK(UF_SPARSE, XAT_SPARSE,
 	    xvap.xva_xoptattrs.xoa_sparse);
 
 #undef	FLAG_CHECK
 	*vap = xvap.xva_vattr;
 	vap->va_flags = fflags;
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_setattr_args {
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 };
 #endif
 
 static int
 zfs_freebsd_setattr(struct vop_setattr_args *ap)
 {
 	vnode_t *vp = ap->a_vp;
 	vattr_t *vap = ap->a_vap;
 	cred_t *cred = ap->a_cred;
 	xvattr_t xvap;
 	ulong_t fflags;
 	uint64_t zflags;
 
 	vattr_init_mask(vap);
 	vap->va_mask &= ~AT_NOSET;
 
 	xva_init(&xvap);
 	xvap.xva_vattr = *vap;
 
 	zflags = VTOZ(vp)->z_pflags;
 
 	if (vap->va_flags != VNOVAL) {
 		zfsvfs_t *zfsvfs = VTOZ(vp)->z_zfsvfs;
 		int error;
 
 		if (zfsvfs->z_use_fuids == B_FALSE)
 			return (EOPNOTSUPP);
 
 		fflags = vap->va_flags;
 		/*
 		 * XXX KDM
 		 * We need to figure out whether it makes sense to allow
 		 * UF_REPARSE through, since we don't really have other
 		 * facilities to handle reparse points and zfs_setattr()
 		 * doesn't currently allow setting that attribute anyway.
 		 */
 		if ((fflags & ~(SF_IMMUTABLE|SF_APPEND|SF_NOUNLINK|UF_ARCHIVE|
 		    UF_NODUMP|UF_SYSTEM|UF_HIDDEN|UF_READONLY|UF_REPARSE|
 		    UF_OFFLINE|UF_SPARSE)) != 0)
 			return (EOPNOTSUPP);
 		/*
 		 * Unprivileged processes are not permitted to unset system
 		 * flags, or modify flags if any system flags are set.
 		 * Privileged non-jail processes may not modify system flags
 		 * if securelevel > 0 and any existing system flags are set.
 		 * Privileged jail processes behave like privileged non-jail
 		 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
 		 * otherwise, they behave like unprivileged processes.
 		 */
 		if (secpolicy_fs_owner(vp->v_mount, cred) == 0 ||
 		    spl_priv_check_cred(cred, PRIV_VFS_SYSFLAGS) == 0) {
 			if (zflags &
 			    (ZFS_IMMUTABLE | ZFS_APPENDONLY | ZFS_NOUNLINK)) {
 				error = securelevel_gt(cred, 0);
 				if (error != 0)
 					return (error);
 			}
 		} else {
 			/*
 			 * Callers may only modify the file flags on
 			 * objects they have VADMIN rights for.
 			 */
 			if ((error = VOP_ACCESS(vp, VADMIN, cred,
 			    curthread)) != 0)
 				return (error);
 			if (zflags &
 			    (ZFS_IMMUTABLE | ZFS_APPENDONLY |
 			    ZFS_NOUNLINK)) {
 				return (EPERM);
 			}
 			if (fflags &
 			    (SF_IMMUTABLE | SF_APPEND | SF_NOUNLINK)) {
 				return (EPERM);
 			}
 		}
 
 #define	FLAG_CHANGE(fflag, zflag, xflag, xfield)	do {		\
 	if (((fflags & (fflag)) && !(zflags & (zflag))) ||		\
 	    ((zflags & (zflag)) && !(fflags & (fflag)))) {		\
 		XVA_SET_REQ(&xvap, (xflag));				\
 		(xfield) = ((fflags & (fflag)) != 0);			\
 	}								\
 } while (0)
 		/* Convert chflags into ZFS-type flags. */
 		/* XXX: what about SF_SETTABLE?. */
 		FLAG_CHANGE(SF_IMMUTABLE, ZFS_IMMUTABLE, XAT_IMMUTABLE,
 		    xvap.xva_xoptattrs.xoa_immutable);
 		FLAG_CHANGE(SF_APPEND, ZFS_APPENDONLY, XAT_APPENDONLY,
 		    xvap.xva_xoptattrs.xoa_appendonly);
 		FLAG_CHANGE(SF_NOUNLINK, ZFS_NOUNLINK, XAT_NOUNLINK,
 		    xvap.xva_xoptattrs.xoa_nounlink);
 		FLAG_CHANGE(UF_ARCHIVE, ZFS_ARCHIVE, XAT_ARCHIVE,
 		    xvap.xva_xoptattrs.xoa_archive);
 		FLAG_CHANGE(UF_NODUMP, ZFS_NODUMP, XAT_NODUMP,
 		    xvap.xva_xoptattrs.xoa_nodump);
 		FLAG_CHANGE(UF_READONLY, ZFS_READONLY, XAT_READONLY,
 		    xvap.xva_xoptattrs.xoa_readonly);
 		FLAG_CHANGE(UF_SYSTEM, ZFS_SYSTEM, XAT_SYSTEM,
 		    xvap.xva_xoptattrs.xoa_system);
 		FLAG_CHANGE(UF_HIDDEN, ZFS_HIDDEN, XAT_HIDDEN,
 		    xvap.xva_xoptattrs.xoa_hidden);
 		FLAG_CHANGE(UF_REPARSE, ZFS_REPARSE, XAT_REPARSE,
 		    xvap.xva_xoptattrs.xoa_reparse);
 		FLAG_CHANGE(UF_OFFLINE, ZFS_OFFLINE, XAT_OFFLINE,
 		    xvap.xva_xoptattrs.xoa_offline);
 		FLAG_CHANGE(UF_SPARSE, ZFS_SPARSE, XAT_SPARSE,
 		    xvap.xva_xoptattrs.xoa_sparse);
 #undef	FLAG_CHANGE
 	}
 	if (vap->va_birthtime.tv_sec != VNOVAL) {
 		xvap.xva_vattr.va_mask |= AT_XVATTR;
 		XVA_SET_REQ(&xvap, XAT_CREATETIME);
 	}
 	return (zfs_setattr(VTOZ(vp), (vattr_t *)&xvap, 0, cred));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_rename_args {
 	struct vnode *a_fdvp;
 	struct vnode *a_fvp;
 	struct componentname *a_fcnp;
 	struct vnode *a_tdvp;
 	struct vnode *a_tvp;
 	struct componentname *a_tcnp;
 };
 #endif
 
 static int
 zfs_freebsd_rename(struct vop_rename_args *ap)
 {
 	vnode_t *fdvp = ap->a_fdvp;
 	vnode_t *fvp = ap->a_fvp;
 	vnode_t *tdvp = ap->a_tdvp;
 	vnode_t *tvp = ap->a_tvp;
 	int error;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(ap->a_fcnp->cn_flags & (SAVENAME|SAVESTART));
 	ASSERT(ap->a_tcnp->cn_flags & (SAVENAME|SAVESTART));
+#endif
 
 	error = zfs_do_rename(fdvp, &fvp, ap->a_fcnp, tdvp, &tvp,
 	    ap->a_tcnp, ap->a_fcnp->cn_cred);
 
 	vrele(fdvp);
 	vrele(fvp);
 	vrele(tdvp);
 	if (tvp != NULL)
 		vrele(tvp);
 
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_symlink_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
 	char *a_target;
 };
 #endif
 
 static int
 zfs_freebsd_symlink(struct vop_symlink_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	vattr_t *vap = ap->a_vap;
 	znode_t *zp = NULL;
 #if __FreeBSD_version >= 1300139
 	char *symlink;
 	size_t symlink_len;
 #endif
 	int rc;
 
+#if __FreeBSD_version < 1400068
 	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
 
 	vap->va_type = VLNK;	/* FreeBSD: Syscall only sets va_mode. */
 	vattr_init_mask(vap);
 	*ap->a_vpp = NULL;
 
 	rc = zfs_symlink(VTOZ(ap->a_dvp), cnp->cn_nameptr, vap,
 	    ap->a_target, &zp, cnp->cn_cred, 0 /* flags */);
 	if (rc == 0) {
 		*ap->a_vpp = ZTOV(zp);
 		ASSERT_VOP_ELOCKED(ZTOV(zp), __func__);
 #if __FreeBSD_version >= 1300139
 		MPASS(zp->z_cached_symlink == NULL);
 		symlink_len = strlen(ap->a_target);
 		symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
 		if (symlink != NULL) {
 			memcpy(symlink, ap->a_target, symlink_len);
 			symlink[symlink_len] = '\0';
 			atomic_store_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
 			    (uintptr_t)symlink);
 		}
 #endif
 	}
 	return (rc);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_readlink_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 };
 #endif
 
 static int
 zfs_freebsd_readlink(struct vop_readlink_args *ap)
 {
 	zfs_uio_t uio;
 	int error;
 #if __FreeBSD_version >= 1300139
 	znode_t	*zp = VTOZ(ap->a_vp);
 	char *symlink, *base;
 	size_t symlink_len;
 	bool trycache;
 #endif
 
 	zfs_uio_init(&uio, ap->a_uio);
 #if __FreeBSD_version >= 1300139
 	trycache = false;
 	if (zfs_uio_segflg(&uio) == UIO_SYSSPACE &&
 	    zfs_uio_iovcnt(&uio) == 1) {
 		base = zfs_uio_iovbase(&uio, 0);
 		symlink_len = zfs_uio_iovlen(&uio, 0);
 		trycache = true;
 	}
 #endif
 	error = zfs_readlink(ap->a_vp, &uio, ap->a_cred, NULL);
 #if __FreeBSD_version >= 1300139
 	if (atomic_load_ptr(&zp->z_cached_symlink) != NULL ||
 	    error != 0 || !trycache) {
 		return (error);
 	}
 	symlink_len -= zfs_uio_resid(&uio);
 	symlink = cache_symlink_alloc(symlink_len + 1, M_WAITOK);
 	if (symlink != NULL) {
 		memcpy(symlink, base, symlink_len);
 		symlink[symlink_len] = '\0';
 		if (!atomic_cmpset_rel_ptr((uintptr_t *)&zp->z_cached_symlink,
 		    (uintptr_t)NULL, (uintptr_t)symlink)) {
 			cache_symlink_free(symlink, symlink_len + 1);
 		}
 	}
 #endif
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_link_args {
 	struct vnode *a_tdvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
 };
 #endif
 
 static int
 zfs_freebsd_link(struct vop_link_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	vnode_t *vp = ap->a_vp;
 	vnode_t *tdvp = ap->a_tdvp;
 
 	if (tdvp->v_mount != vp->v_mount)
 		return (EXDEV);
 
+#if __FreeBSD_version < 1400068
 	ASSERT(cnp->cn_flags & SAVENAME);
+#endif
 
 	return (zfs_link(VTOZ(tdvp), VTOZ(vp),
 	    cnp->cn_nameptr, cnp->cn_cred, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_inactive_args {
 	struct vnode *a_vp;
 	struct thread *a_td;
 };
 #endif
 
 static int
 zfs_freebsd_inactive(struct vop_inactive_args *ap)
 {
 	vnode_t *vp = ap->a_vp;
 
 #if __FreeBSD_version >= 1300123
 	zfs_inactive(vp, curthread->td_ucred, NULL);
 #else
 	zfs_inactive(vp, ap->a_td->td_ucred, NULL);
 #endif
 	return (0);
 }
 
 #if __FreeBSD_version >= 1300042
 #ifndef _SYS_SYSPROTO_H_
 struct vop_need_inactive_args {
 	struct vnode *a_vp;
 	struct thread *a_td;
 };
 #endif
 
 static int
 zfs_freebsd_need_inactive(struct vop_need_inactive_args *ap)
 {
 	vnode_t *vp = ap->a_vp;
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	int need;
 
 	if (vn_need_pageq_flush(vp))
 		return (1);
 
 	if (!ZFS_TEARDOWN_INACTIVE_TRY_ENTER_READ(zfsvfs))
 		return (1);
 	need = (zp->z_sa_hdl == NULL || zp->z_unlinked || zp->z_atime_dirty);
 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
 
 	return (need);
 }
 #endif
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_reclaim_args {
 	struct vnode *a_vp;
 	struct thread *a_td;
 };
 #endif
 
 static int
 zfs_freebsd_reclaim(struct vop_reclaim_args *ap)
 {
 	vnode_t	*vp = ap->a_vp;
 	znode_t	*zp = VTOZ(vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 
 	ASSERT3P(zp, !=, NULL);
 
 #if __FreeBSD_version < 1300042
 	/* Destroy the vm object and flush associated pages. */
 	vnode_destroy_vobject(vp);
 #endif
 	/*
 	 * z_teardown_inactive_lock protects from a race with
 	 * zfs_znode_dmu_fini in zfsvfs_teardown during
 	 * force unmount.
 	 */
 	ZFS_TEARDOWN_INACTIVE_ENTER_READ(zfsvfs);
 	if (zp->z_sa_hdl == NULL)
 		zfs_znode_free(zp);
 	else
 		zfs_zinactive(zp);
 	ZFS_TEARDOWN_INACTIVE_EXIT_READ(zfsvfs);
 
 	vp->v_data = NULL;
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_fid_args {
 	struct vnode *a_vp;
 	struct fid *a_fid;
 };
 #endif
 
 static int
 zfs_freebsd_fid(struct vop_fid_args *ap)
 {
 
 	return (zfs_fid(ap->a_vp, (void *)ap->a_fid, NULL));
 }
 
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_pathconf_args {
 	struct vnode *a_vp;
 	int a_name;
 	register_t *a_retval;
 } *ap;
 #endif
 
 static int
 zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
 {
 	ulong_t val;
 	int error;
 
 	error = zfs_pathconf(ap->a_vp, ap->a_name, &val,
 	    curthread->td_ucred, NULL);
 	if (error == 0) {
 		*ap->a_retval = val;
 		return (error);
 	}
 	if (error != EOPNOTSUPP)
 		return (error);
 
 	switch (ap->a_name) {
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 #if __FreeBSD_version >= 1400032
 	case _PC_DEALLOC_PRESENT:
 		*ap->a_retval = 1;
 		return (0);
 #endif
 	case _PC_PIPE_BUF:
 		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
 			*ap->a_retval = PIPE_BUF;
 			return (0);
 		}
 		return (EINVAL);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 }
 
 static int zfs_xattr_compat = 1;
 
 static int
 zfs_check_attrname(const char *name)
 {
 	/* We don't allow '/' character in attribute name. */
 	if (strchr(name, '/') != NULL)
 		return (SET_ERROR(EINVAL));
 	/* We don't allow attribute names that start with a namespace prefix. */
 	if (ZFS_XA_NS_PREFIX_FORBIDDEN(name))
 		return (SET_ERROR(EINVAL));
 	return (0);
 }
 
 /*
  * FreeBSD's extended attributes namespace defines file name prefix for ZFS'
  * extended attribute name:
  *
  *	NAMESPACE	XATTR_COMPAT	PREFIX
  *	system		*		freebsd:system:
  *	user		1		(none, can be used to access ZFS
  *					fsattr(5) attributes created on Solaris)
  *	user		0		user.
  */
 static int
 zfs_create_attrname(int attrnamespace, const char *name, char *attrname,
     size_t size, boolean_t compat)
 {
 	const char *namespace, *prefix, *suffix;
 
 	memset(attrname, 0, size);
 
 	switch (attrnamespace) {
 	case EXTATTR_NAMESPACE_USER:
 		if (compat) {
 			/*
 			 * This is the default namespace by which we can access
 			 * all attributes created on Solaris.
 			 */
 			prefix = namespace = suffix = "";
 		} else {
 			/*
 			 * This is compatible with the user namespace encoding
 			 * on Linux prior to xattr_compat, but nothing
 			 * else.
 			 */
 			prefix = "";
 			namespace = "user";
 			suffix = ".";
 		}
 		break;
 	case EXTATTR_NAMESPACE_SYSTEM:
 		prefix = "freebsd:";
 		namespace = EXTATTR_NAMESPACE_SYSTEM_STRING;
 		suffix = ":";
 		break;
 	case EXTATTR_NAMESPACE_EMPTY:
 	default:
 		return (SET_ERROR(EINVAL));
 	}
 	if (snprintf(attrname, size, "%s%s%s%s", prefix, namespace, suffix,
 	    name) >= size) {
 		return (SET_ERROR(ENAMETOOLONG));
 	}
 	return (0);
 }
 
 static int
 zfs_ensure_xattr_cached(znode_t *zp)
 {
 	int error = 0;
 
 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
 
 	if (zp->z_xattr_cached != NULL)
 		return (0);
 
 	if (rw_write_held(&zp->z_xattr_lock))
 		return (zfs_sa_get_xattr(zp));
 
 	if (!rw_tryupgrade(&zp->z_xattr_lock)) {
 		rw_exit(&zp->z_xattr_lock);
 		rw_enter(&zp->z_xattr_lock, RW_WRITER);
 	}
 	if (zp->z_xattr_cached == NULL)
 		error = zfs_sa_get_xattr(zp);
 	rw_downgrade(&zp->z_xattr_lock);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_getextattr {
 	IN struct vnode *a_vp;
 	IN int a_attrnamespace;
 	IN const char *a_name;
 	INOUT struct uio *a_uio;
 	OUT size_t *a_size;
 	IN struct ucred *a_cred;
 	IN struct thread *a_td;
 };
 #endif
 
 static int
 zfs_getextattr_dir(struct vop_getextattr_args *ap, const char *attrname)
 {
 	struct thread *td = ap->a_td;
 	struct nameidata nd;
 	struct vattr va;
 	vnode_t *xvp = NULL, *vp;
 	int error, flags;
 
 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
 	    LOOKUP_XATTR, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	flags = FREAD;
 #if __FreeBSD_version < 1400043
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname,
 	    xvp, td);
 #else
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
 #endif
 	error = vn_open_cred(&nd, &flags, 0, VN_OPEN_INVFS, ap->a_cred, NULL);
 	vp = nd.ni_vp;
 	NDFREE_PNBUF(&nd);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	if (ap->a_size != NULL) {
 		error = VOP_GETATTR(vp, &va, ap->a_cred);
 		if (error == 0)
 			*ap->a_size = (size_t)va.va_size;
 	} else if (ap->a_uio != NULL)
 		error = VOP_READ(vp, ap->a_uio, IO_UNIT, ap->a_cred);
 
 	VOP_UNLOCK1(vp);
 	vn_close(vp, flags, ap->a_cred, td);
 	return (error);
 }
 
 static int
 zfs_getextattr_sa(struct vop_getextattr_args *ap, const char *attrname)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	uchar_t *nv_value;
 	uint_t nv_size;
 	int error;
 
 	error = zfs_ensure_xattr_cached(zp);
 	if (error != 0)
 		return (error);
 
 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
 
 	error = nvlist_lookup_byte_array(zp->z_xattr_cached, attrname,
 	    &nv_value, &nv_size);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	if (ap->a_size != NULL)
 		*ap->a_size = nv_size;
 	else if (ap->a_uio != NULL)
 		error = uiomove(nv_value, nv_size, ap->a_uio);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	return (0);
 }
 
 static int
 zfs_getextattr_impl(struct vop_getextattr_args *ap, boolean_t compat)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	char attrname[EXTATTR_MAXNAMELEN+1];
 	int error;
 
 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
 	    sizeof (attrname), compat);
 	if (error != 0)
 		return (error);
 
 	error = ENOENT;
 	if (zfsvfs->z_use_sa && zp->z_is_sa)
 		error = zfs_getextattr_sa(ap, attrname);
 	if (error == ENOENT)
 		error = zfs_getextattr_dir(ap, attrname);
 	return (error);
 }
 
 /*
  * Vnode operation to retrieve a named extended attribute.
  */
 static int
 zfs_getextattr(struct vop_getextattr_args *ap)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
 	/*
 	 * If the xattr property is off, refuse the request.
 	 */
 	if (!(zfsvfs->z_flags & ZSB_XATTR))
 		return (SET_ERROR(EOPNOTSUPP));
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	error = zfs_check_attrname(ap->a_name);
 	if (error != 0)
 		return (error);
 
 	error = ENOENT;
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	rw_enter(&zp->z_xattr_lock, RW_READER);
 
 	error = zfs_getextattr_impl(ap, zfs_xattr_compat);
 	if ((error == ENOENT || error == ENOATTR) &&
 	    ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
 		/*
 		 * Fall back to the alternate namespace format if we failed to
 		 * find a user xattr.
 		 */
 		error = zfs_getextattr_impl(ap, !zfs_xattr_compat);
 	}
 
 	rw_exit(&zp->z_xattr_lock);
 	ZFS_EXIT(zfsvfs);
 	if (error == ENOENT)
 		error = SET_ERROR(ENOATTR);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_deleteextattr {
 	IN struct vnode *a_vp;
 	IN int a_attrnamespace;
 	IN const char *a_name;
 	IN struct ucred *a_cred;
 	IN struct thread *a_td;
 };
 #endif
 
 static int
 zfs_deleteextattr_dir(struct vop_deleteextattr_args *ap, const char *attrname)
 {
 	struct nameidata nd;
 	vnode_t *xvp = NULL, *vp;
 	int error;
 
 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
 	    LOOKUP_XATTR, B_FALSE);
 	if (error != 0)
 		return (error);
 
 #if __FreeBSD_version < 1400043
 	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
 	    UIO_SYSSPACE, attrname, xvp, ap->a_td);
 #else
 	NDINIT_ATVP(&nd, DELETE, NOFOLLOW | LOCKPARENT | LOCKLEAF,
 	    UIO_SYSSPACE, attrname, xvp);
 #endif
 	error = namei(&nd);
 	vp = nd.ni_vp;
 	if (error != 0) {
 		NDFREE_PNBUF(&nd);
 		return (SET_ERROR(error));
 	}
 
 	error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	NDFREE_PNBUF(&nd);
 
 	vput(nd.ni_dvp);
 	if (vp == nd.ni_dvp)
 		vrele(vp);
 	else
 		vput(vp);
 
 	return (error);
 }
 
 static int
 zfs_deleteextattr_sa(struct vop_deleteextattr_args *ap, const char *attrname)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	nvlist_t *nvl;
 	int error;
 
 	error = zfs_ensure_xattr_cached(zp);
 	if (error != 0)
 		return (error);
 
 	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
 
 	nvl = zp->z_xattr_cached;
 	error = nvlist_remove(nvl, attrname, DATA_TYPE_BYTE_ARRAY);
 	if (error != 0)
 		error = SET_ERROR(error);
 	else
 		error = zfs_sa_set_xattr(zp, attrname, NULL, 0);
 	if (error != 0) {
 		zp->z_xattr_cached = NULL;
 		nvlist_free(nvl);
 	}
 	return (error);
 }
 
 static int
 zfs_deleteextattr_impl(struct vop_deleteextattr_args *ap, boolean_t compat)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	char attrname[EXTATTR_MAXNAMELEN+1];
 	int error;
 
 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
 	    sizeof (attrname), compat);
 	if (error != 0)
 		return (error);
 
 	error = ENOENT;
 	if (zfsvfs->z_use_sa && zp->z_is_sa)
 		error = zfs_deleteextattr_sa(ap, attrname);
 	if (error == ENOENT)
 		error = zfs_deleteextattr_dir(ap, attrname);
 	return (error);
 }
 
 /*
  * Vnode operation to remove a named attribute.
  */
 static int
 zfs_deleteextattr(struct vop_deleteextattr_args *ap)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
 	/*
 	 * If the xattr property is off, refuse the request.
 	 */
 	if (!(zfsvfs->z_flags & ZSB_XATTR))
 		return (SET_ERROR(EOPNOTSUPP));
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	error = zfs_check_attrname(ap->a_name);
 	if (error != 0)
 		return (error);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
 
 	error = zfs_deleteextattr_impl(ap, zfs_xattr_compat);
 	if ((error == ENOENT || error == ENOATTR) &&
 	    ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
 		/*
 		 * Fall back to the alternate namespace format if we failed to
 		 * find a user xattr.
 		 */
 		error = zfs_deleteextattr_impl(ap, !zfs_xattr_compat);
 	}
 
 	rw_exit(&zp->z_xattr_lock);
 	ZFS_EXIT(zfsvfs);
 	if (error == ENOENT)
 		error = SET_ERROR(ENOATTR);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_setextattr {
 	IN struct vnode *a_vp;
 	IN int a_attrnamespace;
 	IN const char *a_name;
 	INOUT struct uio *a_uio;
 	IN struct ucred *a_cred;
 	IN struct thread *a_td;
 };
 #endif
 
 static int
 zfs_setextattr_dir(struct vop_setextattr_args *ap, const char *attrname)
 {
 	struct thread *td = ap->a_td;
 	struct nameidata nd;
 	struct vattr va;
 	vnode_t *xvp = NULL, *vp;
 	int error, flags;
 
 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
 	    LOOKUP_XATTR | CREATE_XATTR_DIR, B_FALSE);
 	if (error != 0)
 		return (error);
 
 	flags = FFLAGS(O_WRONLY | O_CREAT);
 #if __FreeBSD_version < 1400043
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp, td);
 #else
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, attrname, xvp);
 #endif
 	error = vn_open_cred(&nd, &flags, 0600, VN_OPEN_INVFS, ap->a_cred,
 	    NULL);
 	vp = nd.ni_vp;
 	NDFREE_PNBUF(&nd);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	VATTR_NULL(&va);
 	va.va_size = 0;
 	error = VOP_SETATTR(vp, &va, ap->a_cred);
 	if (error == 0)
 		VOP_WRITE(vp, ap->a_uio, IO_UNIT, ap->a_cred);
 
 	VOP_UNLOCK1(vp);
 	vn_close(vp, flags, ap->a_cred, td);
 	return (error);
 }
 
 static int
 zfs_setextattr_sa(struct vop_setextattr_args *ap, const char *attrname)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	nvlist_t *nvl;
 	size_t sa_size;
 	int error;
 
 	error = zfs_ensure_xattr_cached(zp);
 	if (error != 0)
 		return (error);
 
 	ASSERT(RW_WRITE_HELD(&zp->z_xattr_lock));
 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
 
 	nvl = zp->z_xattr_cached;
 	size_t entry_size = ap->a_uio->uio_resid;
 	if (entry_size > DXATTR_MAX_ENTRY_SIZE)
 		return (SET_ERROR(EFBIG));
 	error = nvlist_size(nvl, &sa_size, NV_ENCODE_XDR);
 	if (error != 0)
 		return (SET_ERROR(error));
 	if (sa_size > DXATTR_MAX_SA_SIZE)
 		return (SET_ERROR(EFBIG));
 	uchar_t *buf = kmem_alloc(entry_size, KM_SLEEP);
 	error = uiomove(buf, entry_size, ap->a_uio);
 	if (error != 0) {
 		error = SET_ERROR(error);
 	} else {
 		error = nvlist_add_byte_array(nvl, attrname, buf, entry_size);
 		if (error != 0)
 			error = SET_ERROR(error);
 	}
 	if (error == 0)
 		error = zfs_sa_set_xattr(zp, attrname, buf, entry_size);
 	kmem_free(buf, entry_size);
 	if (error != 0) {
 		zp->z_xattr_cached = NULL;
 		nvlist_free(nvl);
 	}
 	return (error);
 }
 
 static int
 zfs_setextattr_impl(struct vop_setextattr_args *ap, boolean_t compat)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	char attrname[EXTATTR_MAXNAMELEN+1];
 	int error;
 
 	error = zfs_create_attrname(ap->a_attrnamespace, ap->a_name, attrname,
 	    sizeof (attrname), compat);
 	if (error != 0)
 		return (error);
 
 	struct vop_deleteextattr_args vda = {
 		.a_vp = ap->a_vp,
 		.a_attrnamespace = ap->a_attrnamespace,
 		.a_name = ap->a_name,
 		.a_cred = ap->a_cred,
 		.a_td = ap->a_td,
 	};
 	error = ENOENT;
 	if (zfsvfs->z_use_sa && zp->z_is_sa && zfsvfs->z_xattr_sa) {
 		error = zfs_setextattr_sa(ap, attrname);
 		if (error == 0) {
 			/*
 			 * Successfully put into SA, we need to clear the one
 			 * in dir if present.
 			 */
 			zfs_deleteextattr_dir(&vda, attrname);
 		}
 	}
 	if (error != 0) {
 		error = zfs_setextattr_dir(ap, attrname);
 		if (error == 0 && zp->z_is_sa) {
 			/*
 			 * Successfully put into dir, we need to clear the one
 			 * in SA if present.
 			 */
 			zfs_deleteextattr_sa(&vda, attrname);
 		}
 	}
 	if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
 		/*
 		 * Also clear all versions of the alternate compat name.
 		 */
 		zfs_deleteextattr_impl(&vda, !compat);
 	}
 	return (error);
 }
 
 /*
  * Vnode operation to set a named attribute.
  */
 static int
 zfs_setextattr(struct vop_setextattr_args *ap)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
 	/*
 	 * If the xattr property is off, refuse the request.
 	 */
 	if (!(zfsvfs->z_flags & ZSB_XATTR))
 		return (SET_ERROR(EOPNOTSUPP));
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	error = zfs_check_attrname(ap->a_name);
 	if (error != 0)
 		return (error);
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
 
 	error = zfs_setextattr_impl(ap, zfs_xattr_compat);
 
 	rw_exit(&zp->z_xattr_lock);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_listextattr {
 	IN struct vnode *a_vp;
 	IN int a_attrnamespace;
 	INOUT struct uio *a_uio;
 	OUT size_t *a_size;
 	IN struct ucred *a_cred;
 	IN struct thread *a_td;
 };
 #endif
 
 static int
 zfs_listextattr_dir(struct vop_listextattr_args *ap, const char *attrprefix)
 {
 	struct thread *td = ap->a_td;
 	struct nameidata nd;
 	uint8_t dirbuf[sizeof (struct dirent)];
 	struct iovec aiov;
 	struct uio auio;
 	vnode_t *xvp = NULL, *vp;
 	int error, eof;
 
 	error = zfs_lookup(ap->a_vp, NULL, &xvp, NULL, 0, ap->a_cred,
 	    LOOKUP_XATTR, B_FALSE);
 	if (error != 0) {
 		/*
 		 * ENOATTR means that the EA directory does not yet exist,
 		 * i.e. there are no extended attributes there.
 		 */
 		if (error == ENOATTR)
 			error = 0;
 		return (error);
 	}
 
 #if __FreeBSD_version < 1400043
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
 	    UIO_SYSSPACE, ".", xvp, td);
 #else
 	NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED,
 	    UIO_SYSSPACE, ".", xvp);
 #endif
 	error = namei(&nd);
 	vp = nd.ni_vp;
 	NDFREE_PNBUF(&nd);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_td = td;
 	auio.uio_rw = UIO_READ;
 	auio.uio_offset = 0;
 
 	size_t plen = strlen(attrprefix);
 
 	do {
 		aiov.iov_base = (void *)dirbuf;
 		aiov.iov_len = sizeof (dirbuf);
 		auio.uio_resid = sizeof (dirbuf);
 		error = VOP_READDIR(vp, &auio, ap->a_cred, &eof, NULL, NULL);
 		if (error != 0)
 			break;
 		int done = sizeof (dirbuf) - auio.uio_resid;
 		for (int pos = 0; pos < done; ) {
 			struct dirent *dp = (struct dirent *)(dirbuf + pos);
 			pos += dp->d_reclen;
 			/*
 			 * XXX: Temporarily we also accept DT_UNKNOWN, as this
 			 * is what we get when attribute was created on Solaris.
 			 */
 			if (dp->d_type != DT_REG && dp->d_type != DT_UNKNOWN)
 				continue;
 			else if (plen == 0 &&
 			    ZFS_XA_NS_PREFIX_FORBIDDEN(dp->d_name))
 				continue;
 			else if (strncmp(dp->d_name, attrprefix, plen) != 0)
 				continue;
 			uint8_t nlen = dp->d_namlen - plen;
 			if (ap->a_size != NULL) {
 				*ap->a_size += 1 + nlen;
 			} else if (ap->a_uio != NULL) {
 				/*
 				 * Format of extattr name entry is one byte for
 				 * length and the rest for name.
 				 */
 				error = uiomove(&nlen, 1, ap->a_uio);
 				if (error == 0) {
 					char *namep = dp->d_name + plen;
 					error = uiomove(namep, nlen, ap->a_uio);
 				}
 				if (error != 0) {
 					error = SET_ERROR(error);
 					break;
 				}
 			}
 		}
 	} while (!eof && error == 0);
 
 	vput(vp);
 	return (error);
 }
 
 static int
 zfs_listextattr_sa(struct vop_listextattr_args *ap, const char *attrprefix)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	int error;
 
 	error = zfs_ensure_xattr_cached(zp);
 	if (error != 0)
 		return (error);
 
 	ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
 	ASSERT3P(zp->z_xattr_cached, !=, NULL);
 
 	size_t plen = strlen(attrprefix);
 	nvpair_t *nvp = NULL;
 	while ((nvp = nvlist_next_nvpair(zp->z_xattr_cached, nvp)) != NULL) {
 		ASSERT3U(nvpair_type(nvp), ==, DATA_TYPE_BYTE_ARRAY);
 
 		const char *name = nvpair_name(nvp);
 		if (plen == 0 && ZFS_XA_NS_PREFIX_FORBIDDEN(name))
 			continue;
 		else if (strncmp(name, attrprefix, plen) != 0)
 			continue;
 		uint8_t nlen = strlen(name) - plen;
 		if (ap->a_size != NULL) {
 			*ap->a_size += 1 + nlen;
 		} else if (ap->a_uio != NULL) {
 			/*
 			 * Format of extattr name entry is one byte for
 			 * length and the rest for name.
 			 */
 			error = uiomove(&nlen, 1, ap->a_uio);
 			if (error == 0) {
 				char *namep = __DECONST(char *, name) + plen;
 				error = uiomove(namep, nlen, ap->a_uio);
 			}
 			if (error != 0) {
 				error = SET_ERROR(error);
 				break;
 			}
 		}
 	}
 
 	return (error);
 }
 
 static int
 zfs_listextattr_impl(struct vop_listextattr_args *ap, boolean_t compat)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	char attrprefix[16];
 	int error;
 
 	error = zfs_create_attrname(ap->a_attrnamespace, "", attrprefix,
 	    sizeof (attrprefix), compat);
 	if (error != 0)
 		return (error);
 
 	if (zfsvfs->z_use_sa && zp->z_is_sa)
 		error = zfs_listextattr_sa(ap, attrprefix);
 	if (error == 0)
 		error = zfs_listextattr_dir(ap, attrprefix);
 	return (error);
 }
 
 /*
  * Vnode operation to retrieve extended attributes on a vnode.
  */
 static int
 zfs_listextattr(struct vop_listextattr_args *ap)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
 	if (ap->a_size != NULL)
 		*ap->a_size = 0;
 
 	/*
 	 * If the xattr property is off, refuse the request.
 	 */
 	if (!(zfsvfs->z_flags & ZSB_XATTR))
 		return (SET_ERROR(EOPNOTSUPP));
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error != 0)
 		return (SET_ERROR(error));
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 	rw_enter(&zp->z_xattr_lock, RW_READER);
 
 	error = zfs_listextattr_impl(ap, zfs_xattr_compat);
 	if (error == 0 && ap->a_attrnamespace == EXTATTR_NAMESPACE_USER) {
 		/* Also list user xattrs with the alternate format. */
 		error = zfs_listextattr_impl(ap, !zfs_xattr_compat);
 	}
 
 	rw_exit(&zp->z_xattr_lock);
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_getacl_args {
 	struct vnode *vp;
 	acl_type_t type;
 	struct acl *aclp;
 	struct ucred *cred;
 	struct thread *td;
 };
 #endif
 
 static int
 zfs_freebsd_getacl(struct vop_getacl_args *ap)
 {
 	int		error;
 	vsecattr_t	vsecattr;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EINVAL);
 
 	vsecattr.vsa_mask = VSA_ACE | VSA_ACECNT;
 	if ((error = zfs_getsecattr(VTOZ(ap->a_vp),
 	    &vsecattr, 0, ap->a_cred)))
 		return (error);
 
 	error = acl_from_aces(ap->a_aclp, vsecattr.vsa_aclentp,
 	    vsecattr.vsa_aclcnt);
 	if (vsecattr.vsa_aclentp != NULL)
 		kmem_free(vsecattr.vsa_aclentp, vsecattr.vsa_aclentsz);
 
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_setacl_args {
 	struct vnode *vp;
 	acl_type_t type;
 	struct acl *aclp;
 	struct ucred *cred;
 	struct thread *td;
 };
 #endif
 
 static int
 zfs_freebsd_setacl(struct vop_setacl_args *ap)
 {
 	int		error;
 	vsecattr_t vsecattr;
 	int		aclbsize;	/* size of acl list in bytes */
 	aclent_t	*aaclp;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EINVAL);
 
 	if (ap->a_aclp == NULL)
 		return (EINVAL);
 
 	if (ap->a_aclp->acl_cnt < 1 || ap->a_aclp->acl_cnt > MAX_ACL_ENTRIES)
 		return (EINVAL);
 
 	/*
 	 * With NFSv4 ACLs, chmod(2) may need to add additional entries,
 	 * splitting every entry into two and appending "canonical six"
 	 * entries at the end.  Don't allow for setting an ACL that would
 	 * cause chmod(2) to run out of ACL entries.
 	 */
 	if (ap->a_aclp->acl_cnt * 2 + 6 > ACL_MAX_ENTRIES)
 		return (ENOSPC);
 
 	error = acl_nfs4_check(ap->a_aclp, ap->a_vp->v_type == VDIR);
 	if (error != 0)
 		return (error);
 
 	vsecattr.vsa_mask = VSA_ACE;
 	aclbsize = ap->a_aclp->acl_cnt * sizeof (ace_t);
 	vsecattr.vsa_aclentp = kmem_alloc(aclbsize, KM_SLEEP);
 	aaclp = vsecattr.vsa_aclentp;
 	vsecattr.vsa_aclentsz = aclbsize;
 
 	aces_from_acl(vsecattr.vsa_aclentp, &vsecattr.vsa_aclcnt, ap->a_aclp);
 	error = zfs_setsecattr(VTOZ(ap->a_vp), &vsecattr, 0, ap->a_cred);
 	kmem_free(aaclp, aclbsize);
 
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct vop_aclcheck_args {
 	struct vnode *vp;
 	acl_type_t type;
 	struct acl *aclp;
 	struct ucred *cred;
 	struct thread *td;
 };
 #endif
 
 static int
 zfs_freebsd_aclcheck(struct vop_aclcheck_args *ap)
 {
 
 	return (EOPNOTSUPP);
 }
 
 static int
 zfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	vnode_t *covered_vp;
 	vnode_t *vp = ap->a_vp;
 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 	znode_t *zp = VTOZ(vp);
 	int ltype;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	/*
 	 * If we are a snapshot mounted under .zfs, run the operation
 	 * on the covered vnode.
 	 */
 	if (zp->z_id != zfsvfs->z_root || zfsvfs->z_parent == zfsvfs) {
 		char name[MAXNAMLEN + 1];
 		znode_t *dzp;
 		size_t len;
 
 		error = zfs_znode_parent_and_name(zp, &dzp, name);
 		if (error == 0) {
 			len = strlen(name);
 			if (*ap->a_buflen < len)
 				error = SET_ERROR(ENOMEM);
 		}
 		if (error == 0) {
 			*ap->a_buflen -= len;
 			memcpy(ap->a_buf + *ap->a_buflen, name, len);
 			*ap->a_vpp = ZTOV(dzp);
 		}
 		ZFS_EXIT(zfsvfs);
 		return (error);
 	}
 	ZFS_EXIT(zfsvfs);
 
 	covered_vp = vp->v_mount->mnt_vnodecovered;
 #if __FreeBSD_version >= 1300045
 	enum vgetstate vs = vget_prep(covered_vp);
 #else
 	vhold(covered_vp);
 #endif
 	ltype = VOP_ISLOCKED(vp);
 	VOP_UNLOCK1(vp);
 #if __FreeBSD_version >= 1300045
 	error = vget_finish(covered_vp, LK_SHARED, vs);
 #else
 	error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
 #endif
 	if (error == 0) {
 #if __FreeBSD_version >= 1300123
 		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_buf,
 		    ap->a_buflen);
 #else
 		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
 		    ap->a_buf, ap->a_buflen);
 #endif
 		vput(covered_vp);
 	}
 	vn_lock(vp, ltype | LK_RETRY);
 	if (VN_IS_DOOMED(vp))
 		error = SET_ERROR(ENOENT);
 	return (error);
 }
 
 #if __FreeBSD_version >= 1400032
 static int
 zfs_deallocate(struct vop_deallocate_args *ap)
 {
 	znode_t *zp = VTOZ(ap->a_vp);
 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 	zilog_t *zilog;
 	off_t off, len, file_sz;
 	int error;
 
 	ZFS_ENTER(zfsvfs);
 	ZFS_VERIFY_ZP(zp);
 
 	/*
 	 * Callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(EROFS));
 	}
 
 	zilog = zfsvfs->z_log;
 	off = *ap->a_offset;
 	len = *ap->a_len;
 	file_sz = zp->z_size;
 	if (off + len > file_sz)
 		len = file_sz - off;
 	/* Fast path for out-of-range request. */
 	if (len <= 0) {
 		*ap->a_len = 0;
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
 
 	error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
 	if (error == 0) {
 		if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
 		    (ap->a_ioflag & IO_SYNC) != 0)
 			zil_commit(zilog, zp->z_id);
 		*ap->a_offset = off + len;
 		*ap->a_len = 0;
 	}
 
 	ZFS_EXIT(zfsvfs);
 	return (error);
 }
 #endif
 
 struct vop_vector zfs_vnodeops;
 struct vop_vector zfs_fifoops;
 struct vop_vector zfs_shareops;
 
 struct vop_vector zfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_inactive =		zfs_freebsd_inactive,
 #if __FreeBSD_version >= 1300042
 	.vop_need_inactive =	zfs_freebsd_need_inactive,
 #endif
 	.vop_reclaim =		zfs_freebsd_reclaim,
 #if __FreeBSD_version >= 1300102
 	.vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
 #endif
 #if __FreeBSD_version >= 1300139
 	.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
 #endif
 	.vop_access =		zfs_freebsd_access,
 	.vop_allocate =		VOP_EINVAL,
 #if __FreeBSD_version >= 1400032
 	.vop_deallocate =	zfs_deallocate,
 #endif
 	.vop_lookup =		zfs_cache_lookup,
 	.vop_cachedlookup =	zfs_freebsd_cachedlookup,
 	.vop_getattr =		zfs_freebsd_getattr,
 	.vop_setattr =		zfs_freebsd_setattr,
 	.vop_create =		zfs_freebsd_create,
 	.vop_mknod =		(vop_mknod_t *)zfs_freebsd_create,
 	.vop_mkdir =		zfs_freebsd_mkdir,
 	.vop_readdir =		zfs_freebsd_readdir,
 	.vop_fsync =		zfs_freebsd_fsync,
 	.vop_open =		zfs_freebsd_open,
 	.vop_close =		zfs_freebsd_close,
 	.vop_rmdir =		zfs_freebsd_rmdir,
 	.vop_ioctl =		zfs_freebsd_ioctl,
 	.vop_link =		zfs_freebsd_link,
 	.vop_symlink =		zfs_freebsd_symlink,
 	.vop_readlink =		zfs_freebsd_readlink,
 	.vop_read =		zfs_freebsd_read,
 	.vop_write =		zfs_freebsd_write,
 	.vop_remove =		zfs_freebsd_remove,
 	.vop_rename =		zfs_freebsd_rename,
 	.vop_pathconf =		zfs_freebsd_pathconf,
 	.vop_bmap =		zfs_freebsd_bmap,
 	.vop_fid =		zfs_freebsd_fid,
 	.vop_getextattr =	zfs_getextattr,
 	.vop_deleteextattr =	zfs_deleteextattr,
 	.vop_setextattr =	zfs_setextattr,
 	.vop_listextattr =	zfs_listextattr,
 	.vop_getacl =		zfs_freebsd_getacl,
 	.vop_setacl =		zfs_freebsd_setacl,
 	.vop_aclcheck =		zfs_freebsd_aclcheck,
 	.vop_getpages =		zfs_freebsd_getpages,
 	.vop_putpages =		zfs_freebsd_putpages,
 	.vop_vptocnp =		zfs_vptocnp,
 #if __FreeBSD_version >= 1300064
 	.vop_lock1 =		vop_lock,
 	.vop_unlock =		vop_unlock,
 	.vop_islocked =		vop_islocked,
 #endif
 #if __FreeBSD_version >= 1400043
 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
 #endif
 };
 VFS_VOP_VECTOR_REGISTER(zfs_vnodeops);
 
 struct vop_vector zfs_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_fsync =		zfs_freebsd_fsync,
 #if __FreeBSD_version >= 1300102
 	.vop_fplookup_vexec = zfs_freebsd_fplookup_vexec,
 #endif
 #if __FreeBSD_version >= 1300139
 	.vop_fplookup_symlink = zfs_freebsd_fplookup_symlink,
 #endif
 	.vop_access =		zfs_freebsd_access,
 	.vop_getattr =		zfs_freebsd_getattr,
 	.vop_inactive =		zfs_freebsd_inactive,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		zfs_freebsd_reclaim,
 	.vop_setattr =		zfs_freebsd_setattr,
 	.vop_write =		VOP_PANIC,
 	.vop_pathconf = 	zfs_freebsd_pathconf,
 	.vop_fid =		zfs_freebsd_fid,
 	.vop_getacl =		zfs_freebsd_getacl,
 	.vop_setacl =		zfs_freebsd_setacl,
 	.vop_aclcheck =		zfs_freebsd_aclcheck,
 #if __FreeBSD_version >= 1400043
 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
 #endif
 };
 VFS_VOP_VECTOR_REGISTER(zfs_fifoops);
 
 /*
  * special share hidden files vnode operations template
  */
 struct vop_vector zfs_shareops = {
 	.vop_default =		&default_vnodeops,
 #if __FreeBSD_version >= 1300121
 	.vop_fplookup_vexec =	VOP_EAGAIN,
 #endif
 #if __FreeBSD_version >= 1300139
 	.vop_fplookup_symlink =	VOP_EAGAIN,
 #endif
 	.vop_access =		zfs_freebsd_access,
 	.vop_inactive =		zfs_freebsd_inactive,
 	.vop_reclaim =		zfs_freebsd_reclaim,
 	.vop_fid =		zfs_freebsd_fid,
 	.vop_pathconf =		zfs_freebsd_pathconf,
 #if __FreeBSD_version >= 1400043
 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
 #endif
 };
 VFS_VOP_VECTOR_REGISTER(zfs_shareops);
 
 ZFS_MODULE_PARAM(zfs, zfs_, xattr_compat, INT, ZMOD_RW,
 	"Use legacy ZFS xattr naming for writing new user namespace xattrs");
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index 13619d318cfc..511430ccdd97 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -1,2112 +1,2111 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000-2004
  *	Poul-Henning Kamp.  All rights reserved.
  * Copyright (c) 1989, 1992-1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
  *
  * $FreeBSD$
  */
 
 /*
  * TODO:
  *	mkdir: want it ?
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/ttycom.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 static struct vop_vector devfs_vnodeops;
 static struct vop_vector devfs_specops;
 static struct fileops devfs_ops_f;
 
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data");
 
 struct mtx	devfs_de_interlock;
 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
 struct mtx	cdevpriv_mtx;
 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF);
 
 SYSCTL_DECL(_vfs_devfs);
 
 static int devfs_dotimes;
 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW,
     &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision");
 
 /*
  * Update devfs node timestamp.  Note that updates are unlocked and
  * stat(2) could see partially updated times.
  */
 static void
 devfs_timestamp(struct timespec *tsp)
 {
 	time_t ts;
 
 	if (devfs_dotimes) {
 		vfs_timestamp(tsp);
 	} else {
 		ts = time_second;
 		if (tsp->tv_sec != ts) {
 			tsp->tv_sec = ts;
 			tsp->tv_nsec = 0;
 		}
 	}
 }
 
 static int
 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp,
     int *ref)
 {
 	*dswp = devvn_refthread(fp->f_vnode, devp, ref);
 	if (*dswp == NULL || *devp != fp->f_data) {
 		if (*dswp != NULL)
 			dev_relthread(*devp, *ref);
 		return (ENXIO);
 	}
 	KASSERT((*devp)->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
 	if (*dswp == NULL)
 		return (ENXIO);
 	curthread->td_fpop = fp;
 	return (0);
 }
 
 int
 devfs_get_cdevpriv(void **datap)
 {
 	struct file *fp;
 	struct cdev_privdata *p;
 	int error;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return (EBADF);
 	p = fp->f_cdevpriv;
 	if (p != NULL) {
 		error = 0;
 		*datap = p->cdpd_data;
 	} else
 		error = ENOENT;
 	return (error);
 }
 
 int
 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr)
 {
 	struct file *fp;
 	struct cdev_priv *cdp;
 	struct cdev_privdata *p;
 	int error;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return (ENOENT);
 	cdp = cdev2priv((struct cdev *)fp->f_data);
 	p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK);
 	p->cdpd_data = priv;
 	p->cdpd_dtr = priv_dtr;
 	p->cdpd_fp = fp;
 	mtx_lock(&cdevpriv_mtx);
 	if (fp->f_cdevpriv == NULL) {
 		LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list);
 		fp->f_cdevpriv = p;
 		mtx_unlock(&cdevpriv_mtx);
 		error = 0;
 	} else {
 		mtx_unlock(&cdevpriv_mtx);
 		free(p, M_CDEVPDATA);
 		error = EBUSY;
 	}
 	return (error);
 }
 
 void
 devfs_destroy_cdevpriv(struct cdev_privdata *p)
 {
 
 	mtx_assert(&cdevpriv_mtx, MA_OWNED);
 	KASSERT(p->cdpd_fp->f_cdevpriv == p,
 	    ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p));
 	p->cdpd_fp->f_cdevpriv = NULL;
 	LIST_REMOVE(p, cdpd_list);
 	mtx_unlock(&cdevpriv_mtx);
 	(p->cdpd_dtr)(p->cdpd_data);
 	free(p, M_CDEVPDATA);
 }
 
 static void
 devfs_fpdrop(struct file *fp)
 {
 	struct cdev_privdata *p;
 
 	mtx_lock(&cdevpriv_mtx);
 	if ((p = fp->f_cdevpriv) == NULL) {
 		mtx_unlock(&cdevpriv_mtx);
 		return;
 	}
 	devfs_destroy_cdevpriv(p);
 }
 
 void
 devfs_clear_cdevpriv(void)
 {
 	struct file *fp;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return;
 	devfs_fpdrop(fp);
 }
 
 static void
 devfs_usecount_add(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 	struct cdev *dev;
 
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	VNPASS(vp->v_type == VCHR || vp->v_type == VBAD, vp);
 	if (VN_IS_DOOMED(vp)) {
 		goto out_unlock;
 	}
 
 	de = vp->v_data;
 	dev = vp->v_rdev;
 	MPASS(de != NULL);
 	MPASS(dev != NULL);
 	dev->si_usecount++;
 	de->de_usecount++;
 out_unlock:
 	VI_UNLOCK(vp);
 	mtx_unlock(&devfs_de_interlock);
 }
 
 static void
 devfs_usecount_subl(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 	struct cdev *dev;
 
 	mtx_assert(&devfs_de_interlock, MA_OWNED);
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNPASS(vp->v_type == VCHR || vp->v_type == VBAD, vp);
 
 	de = vp->v_data;
 	dev = vp->v_rdev;
 	if (de == NULL)
 		return;
 	if (dev == NULL) {
 		MPASS(de->de_usecount == 0);
 		return;
 	}
 	if (dev->si_usecount < de->de_usecount)
 		panic("%s: si_usecount underflow for dev %p "
 		    "(has %ld, dirent has %d)\n",
 		    __func__, dev, dev->si_usecount, de->de_usecount);
 	if (VN_IS_DOOMED(vp)) {
 		dev->si_usecount -= de->de_usecount;
 		de->de_usecount = 0;
 	} else {
 		if (de->de_usecount == 0)
 			panic("%s: de_usecount underflow for dev %p\n",
 			    __func__, dev);
 		dev->si_usecount--;
 		de->de_usecount--;
 	}
 }
 
 static void
 devfs_usecount_sub(struct vnode *vp)
 {
 
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	devfs_usecount_subl(vp);
 	VI_UNLOCK(vp);
 	mtx_unlock(&devfs_de_interlock);
 }
 
 static int
 devfs_usecountl(struct vnode *vp)
 {
 
 	VNPASS(vp->v_type == VCHR, vp);
 	mtx_assert(&devfs_de_interlock, MA_OWNED);
 	ASSERT_VI_LOCKED(vp, __func__);
 	return (vp->v_rdev->si_usecount);
 }
 
 int
 devfs_usecount(struct vnode *vp)
 {
 	int count;
 
 	VNPASS(vp->v_type == VCHR, vp);
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	count = devfs_usecountl(vp);
 	VI_UNLOCK(vp);
 	mtx_unlock(&devfs_de_interlock);
 	return (count);
 }
 
 void
 devfs_ctty_ref(struct vnode *vp)
 {
 
 	vrefact(vp);
 	devfs_usecount_add(vp);
 }
 
 void
 devfs_ctty_unref(struct vnode *vp)
 {
 
 	devfs_usecount_sub(vp);
 	vrele(vp);
 }
 
 /*
  * On success devfs_populate_vp() returns with dmp->dm_lock held.
  */
 static int
 devfs_populate_vp(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	int locked;
 
 	ASSERT_VOP_LOCKED(vp, "devfs_populate_vp");
 
 	dmp = VFSTODEVFS(vp->v_mount);
 	if (!devfs_populate_needed(dmp)) {
 		sx_xlock(&dmp->dm_lock);
 		goto out_nopopulate;
 	}
 
 	locked = VOP_ISLOCKED(vp);
 
 	sx_xlock(&dmp->dm_lock);
 	DEVFS_DMP_HOLD(dmp);
 
 	/* Can't call devfs_populate() with the vnode lock held. */
 	VOP_UNLOCK(vp);
 	devfs_populate(dmp);
 
 	sx_xunlock(&dmp->dm_lock);
 	vn_lock(vp, locked | LK_RETRY);
 	sx_xlock(&dmp->dm_lock);
 	if (DEVFS_DMP_DROP(dmp)) {
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 		return (ERESTART);
 	}
 out_nopopulate:
 	if (VN_IS_DOOMED(vp)) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ERESTART);
 	}
 	de = vp->v_data;
 	KASSERT(de != NULL,
 	    ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed"));
 	if ((de->de_flags & DE_DOOMED) != 0) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ERESTART);
 	}
 
 	return (0);
 }
 
 static int
 devfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct devfs_mount *dmp;
 	char *buf = ap->a_buf;
 	size_t *buflen = ap->a_buflen;
 	struct devfs_dirent *dd, *de;
 	int i, error;
 
 	dmp = VFSTODEVFS(vp->v_mount);
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	if (vp->v_type != VCHR && vp->v_type != VDIR) {
 		error = ENOENT;
 		goto finished;
 	}
 
 	dd = vp->v_data;
 	if (vp->v_type == VDIR && dd == dmp->dm_rootdir) {
 		*dvp = vp;
 		vref(*dvp);
 		goto finished;
 	}
 
 	i = *buflen;
 	i -= dd->de_dirent->d_namlen;
 	if (i < 0) {
 		error = ENOMEM;
 		goto finished;
 	}
 	bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen);
 	*buflen = i;
 	de = devfs_parent_dirent(dd);
 	if (de == NULL) {
 		error = ENOENT;
 		goto finished;
 	}
 	mtx_lock(&devfs_de_interlock);
 	*dvp = de->de_vnode;
 	if (*dvp != NULL) {
 		VI_LOCK(*dvp);
 		mtx_unlock(&devfs_de_interlock);
 		vholdl(*dvp);
 		VI_UNLOCK(*dvp);
 		vref(*dvp);
 		vdrop(*dvp);
 	} else {
 		mtx_unlock(&devfs_de_interlock);
 		error = ENOENT;
 	}
 finished:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 /*
  * Construct the fully qualified path name relative to the mountpoint.
  * If a NULL cnp is provided, no '/' is appended to the resulting path.
  */
 char *
 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd,
     struct componentname *cnp)
 {
 	int i;
 	struct devfs_dirent *de;
 
 	sx_assert(&dmp->dm_lock, SA_LOCKED);
 
 	i = SPECNAMELEN;
 	buf[i] = '\0';
 	if (cnp != NULL)
 		i -= cnp->cn_namelen;
 	if (i < 0)
 		 return (NULL);
 	if (cnp != NULL)
 		bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
 	de = dd;
 	while (de != dmp->dm_rootdir) {
 		if (cnp != NULL || i < SPECNAMELEN) {
 			i--;
 			if (i < 0)
 				 return (NULL);
 			buf[i] = '/';
 		}
 		i -= de->de_dirent->d_namlen;
 		if (i < 0)
 			 return (NULL);
 		bcopy(de->de_dirent->d_name, buf + i,
 		    de->de_dirent->d_namlen);
 		de = devfs_parent_dirent(de);
 		if (de == NULL)
 			return (NULL);
 	}
 	return (buf + i);
 }
 
 static int
 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp,
 	struct devfs_dirent *de)
 {
 	int not_found;
 
 	not_found = 0;
 	if (de->de_flags & DE_DOOMED)
 		not_found = 1;
 	if (DEVFS_DE_DROP(de)) {
 		KASSERT(not_found == 1, ("DEVFS de dropped but not doomed"));
 		devfs_dirent_free(de);
 	}
 	if (DEVFS_DMP_DROP(dmp)) {
 		KASSERT(not_found == 1,
 			("DEVFS mount struct freed before dirent"));
 		not_found = 2;
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 	}
 	if (not_found == 1 || (drop_dm_lock && not_found != 2))
 		sx_unlock(&dmp->dm_lock);
 	return (not_found);
 }
 
 /*
  * devfs_allocv shall be entered with dmp->dm_lock held, and it drops
  * it on return.
  */
 int
 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
     struct vnode **vpp)
 {
 	int error;
 	struct vnode *vp;
 	struct cdev *dev;
 	struct devfs_mount *dmp;
 	struct cdevsw *dsw;
 	enum vgetstate vs;
 
 	dmp = VFSTODEVFS(mp);
 	if (de->de_flags & DE_DOOMED) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ENOENT);
 	}
 loop:
 	DEVFS_DE_HOLD(de);
 	DEVFS_DMP_HOLD(dmp);
 	mtx_lock(&devfs_de_interlock);
 	vp = de->de_vnode;
 	if (vp != NULL) {
 		vs = vget_prep(vp);
 		mtx_unlock(&devfs_de_interlock);
 		sx_xunlock(&dmp->dm_lock);
 		vget_finish(vp, lockmode | LK_RETRY, vs);
 		sx_xlock(&dmp->dm_lock);
 		if (devfs_allocv_drop_refs(0, dmp, de)) {
 			vput(vp);
 			return (ENOENT);
 		}
 		else if (VN_IS_DOOMED(vp)) {
 			mtx_lock(&devfs_de_interlock);
 			if (de->de_vnode == vp) {
 				de->de_vnode = NULL;
 				vp->v_data = NULL;
 			}
 			mtx_unlock(&devfs_de_interlock);
 			vput(vp);
 			goto loop;
 		}
 		sx_xunlock(&dmp->dm_lock);
 		*vpp = vp;
 		return (0);
 	}
 	mtx_unlock(&devfs_de_interlock);
 	if (de->de_dirent->d_type == DT_CHR) {
 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) {
 			devfs_allocv_drop_refs(1, dmp, de);
 			return (ENOENT);
 		}
 		dev = &de->de_cdp->cdp_c;
 	} else {
 		dev = NULL;
 	}
 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
 	if (error != 0) {
 		devfs_allocv_drop_refs(1, dmp, de);
 		printf("devfs_allocv: failed to allocate new vnode\n");
 		return (error);
 	}
 
 	if (de->de_dirent->d_type == DT_CHR) {
 		vp->v_type = VCHR;
 		VI_LOCK(vp);
 		dev_lock();
 		dev_refl(dev);
 		/* XXX: v_rdev should be protect by vnode lock */
 		vp->v_rdev = dev;
 		VNPASS(vp->v_usecount == 1, vp);
 		/* Special casing of ttys for deadfs.  Probably redundant. */
 		dsw = dev->si_devsw;
 		if (dsw != NULL && (dsw->d_flags & D_TTY) != 0)
 			vp->v_vflag |= VV_ISTTY;
 		dev_unlock();
 		VI_UNLOCK(vp);
 		if ((dev->si_flags & SI_ETERNAL) != 0)
 			vp->v_vflag |= VV_ETERNALDEV;
 		vp->v_op = &devfs_specops;
 	} else if (de->de_dirent->d_type == DT_DIR) {
 		vp->v_type = VDIR;
 	} else if (de->de_dirent->d_type == DT_LNK) {
 		vp->v_type = VLNK;
 	} else {
 		vp->v_type = VBAD;
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS);
 	VN_LOCK_ASHARE(vp);
 	mtx_lock(&devfs_de_interlock);
 	vp->v_data = de;
 	de->de_vnode = vp;
 	mtx_unlock(&devfs_de_interlock);
 	error = insmntque1(vp, mp);
 	if (error != 0) {
 		mtx_lock(&devfs_de_interlock);
 		vp->v_data = NULL;
 		de->de_vnode = NULL;
 		mtx_unlock(&devfs_de_interlock);
 		vgone(vp);
 		vput(vp);
 		(void) devfs_allocv_drop_refs(1, dmp, de);
 		return (error);
 	}
 	if (devfs_allocv_drop_refs(0, dmp, de)) {
 		vput(vp);
 		return (ENOENT);
 	}
 #ifdef MAC
 	mac_devfs_vnode_associate(mp, de, vp);
 #endif
 	sx_xunlock(&dmp->dm_lock);
 	*vpp = vp;
 	return (0);
 }
 
 static int
 devfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct devfs_dirent *de;
 	struct proc *p;
 	int error;
 
 	de = vp->v_data;
 	if (vp->v_type == VDIR)
 		de = de->de_dir;
 
 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
 	    ap->a_accmode, ap->a_cred);
 	if (error == 0)
 		return (0);
 	if (error != EACCES)
 		return (error);
 	p = ap->a_td->td_proc;
 	/* We do, however, allow access to the controlling terminal */
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_CONTROLT)) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (p->p_session->s_ttydp == de->de_cdp)
 		error = 0;
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0,
     "devfs-only flag reuse failed");
 
 static int
 devfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp, *oldvp;
 	struct thread *td = ap->a_td;
 	struct proc *p;
 	struct cdev *dev = vp->v_rdev;
 	struct cdevsw *dsw;
 	struct devfs_dirent *de = vp->v_data;
 	int dflags, error, ref, vp_locked;
 
 	/*
 	 * XXX: Don't call d_close() if we were called because of
 	 * XXX: insmntque() failure.
 	 */
 	if (vp->v_data == NULL)
 		return (0);
 
 	/*
 	 * Hack: a tty device that is a controlling terminal
 	 * has a reference from the session structure.
 	 * We cannot easily tell that a character device is
 	 * a controlling terminal, unless it is the closing
 	 * process' controlling terminal.  In that case,
 	 * if the reference count is 2 (this last descriptor
 	 * plus the session), release the reference from the session.
 	 */
 	if (de->de_usecount == 2 && td != NULL) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 		if (vp == p->p_session->s_ttyvp) {
 			PROC_UNLOCK(p);
 			oldvp = NULL;
 			sx_xlock(&proctree_lock);
 			if (vp == p->p_session->s_ttyvp) {
 				SESS_LOCK(p->p_session);
 				mtx_lock(&devfs_de_interlock);
 				VI_LOCK(vp);
 				if (devfs_usecountl(vp) == 2 && !VN_IS_DOOMED(vp)) {
 					p->p_session->s_ttyvp = NULL;
 					p->p_session->s_ttydp = NULL;
 					oldvp = vp;
 				}
 				VI_UNLOCK(vp);
 				mtx_unlock(&devfs_de_interlock);
 				SESS_UNLOCK(p->p_session);
 			}
 			sx_xunlock(&proctree_lock);
 			if (oldvp != NULL)
 				devfs_ctty_unref(oldvp);
 		} else
 			PROC_UNLOCK(p);
 	}
 	/*
 	 * We do not want to really close the device if it
 	 * is still in use unless we are trying to close it
 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
 	 * holds a reference to the vnode, and because we mark
 	 * any other vnodes that alias this device, when the
 	 * sum of the reference counts on all the aliased
 	 * vnodes descends to one, we are on last close.
 	 */
 	dsw = dev_refthread(dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	dflags = 0;
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	if (devfs_usecountl(vp) == 1)
 		dflags |= FLASTCLOSE;
 	devfs_usecount_subl(vp);
 	mtx_unlock(&devfs_de_interlock);
 	if (VN_IS_DOOMED(vp)) {
 		/* Forced close. */
 		dflags |= FREVOKE | FNONBLOCK;
 	} else if (dsw->d_flags & D_TRACKCLOSE) {
 		/* Keep device updated on status. */
 	} else if ((dflags & FLASTCLOSE) == 0) {
 		VI_UNLOCK(vp);
 		dev_relthread(dev, ref);
 		return (0);
 	}
 	vholdnz(vp);
 	VI_UNLOCK(vp);
 	vp_locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
 	error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td);
 	dev_relthread(dev, ref);
 	vn_lock(vp, vp_locked | LK_RETRY);
 	vdrop(vp);
 	return (error);
 }
 
 static int
 devfs_close_f(struct file *fp, struct thread *td)
 {
 	int error;
 	struct file *fpop;
 
 	/*
 	 * NB: td may be NULL if this descriptor is closed due to
 	 * garbage collection from a closed UNIX domain socket.
 	 */
 	fpop = curthread->td_fpop;
 	curthread->td_fpop = fp;
 	error = vnops.fo_close(fp, td);
 	curthread->td_fpop = fpop;
 
 	/*
 	 * The f_cdevpriv cannot be assigned non-NULL value while we
 	 * are destroying the file.
 	 */
 	if (fp->f_cdevpriv != NULL)
 		devfs_fpdrop(fp);
 	return (error);
 }
 
 static int
 devfs_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	struct cdev *dev;
 	struct timeval boottime;
 	int error;
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	dmp = VFSTODEVFS(vp->v_mount);
 	sx_xunlock(&dmp->dm_lock);
 
 	de = vp->v_data;
 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
 	if (vp->v_type == VDIR) {
 		de = de->de_dir;
 		KASSERT(de != NULL,
 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
 	}
 	vap->va_uid = de->de_uid;
 	vap->va_gid = de->de_gid;
 	vap->va_mode = de->de_mode;
 	if (vp->v_type == VLNK)
 		vap->va_size = strlen(de->de_symlink);
 	else if (vp->v_type == VDIR)
 		vap->va_size = vap->va_bytes = DEV_BSIZE;
 	else
 		vap->va_size = 0;
 	if (vp->v_type != VDIR)
 		vap->va_bytes = 0;
 	vap->va_blocksize = DEV_BSIZE;
 	vap->va_type = vp->v_type;
 
 	getboottime(&boottime);
 #define fix(aa)							\
 	do {							\
 		if ((aa).tv_sec <= 3600) {			\
 			(aa).tv_sec = boottime.tv_sec;		\
 			(aa).tv_nsec = boottime.tv_usec * 1000; \
 		}						\
 	} while (0)
 
 	if (vp->v_type != VCHR)  {
 		fix(de->de_atime);
 		vap->va_atime = de->de_atime;
 		fix(de->de_mtime);
 		vap->va_mtime = de->de_mtime;
 		fix(de->de_ctime);
 		vap->va_ctime = de->de_ctime;
 	} else {
 		dev = vp->v_rdev;
 		fix(dev->si_atime);
 		vap->va_atime = dev->si_atime;
 		fix(dev->si_mtime);
 		vap->va_mtime = dev->si_mtime;
 		fix(dev->si_ctime);
 		vap->va_ctime = dev->si_ctime;
 
 		vap->va_rdev = cdev2priv(dev)->cdp_inode;
 	}
 	vap->va_gen = 0;
 	vap->va_flags = 0;
 	vap->va_filerev = 0;
 	vap->va_nlink = de->de_links;
 	vap->va_fileid = de->de_inode;
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
 {
 	struct file *fpop;
 	int error;
 
 	fpop = td->td_fpop;
 	td->td_fpop = fp;
 	error = vnops.fo_ioctl(fp, com, data, cred, td);
 	td->td_fpop = fpop;
 	return (error);
 }
 
 void *
 fiodgname_buf_get_ptr(void *fgnp, u_long com)
 {
 	union {
 		struct fiodgname_arg	fgn;
 #ifdef COMPAT_FREEBSD32
 		struct fiodgname_arg32	fgn32;
 #endif
 	} *fgnup;
 
 	fgnup = fgnp;
 	switch (com) {
 	case FIODGNAME:
 		return (fgnup->fgn.buf);
 #ifdef COMPAT_FREEBSD32
 	case FIODGNAME_32:
 		return ((void *)(uintptr_t)fgnup->fgn32.buf);
 #endif
 	default:
 		panic("Unhandled ioctl command %ld", com);
 	}
 }
 
 static int
 devfs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct fiodgname_arg *fgn;
 	struct vnode *vpold, *vp;
 	struct cdevsw *dsw;
 	struct thread *td;
 	struct session *sess;
 	struct cdev *dev;
 	int error, ref, i;
 	const char *p;
 	u_long com;
 
 	vp = ap->a_vp;
 	com = ap->a_command;
 	td = ap->a_td;
 
 	dsw = devvn_refthread(vp, &dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(dev)));
 
 	switch (com) {
 	case FIODTYPE:
 		*(int *)ap->a_data = dsw->d_flags & D_TYPEMASK;
 		error = 0;
 		break;
 	case FIODGNAME:
 #ifdef	COMPAT_FREEBSD32
 	case FIODGNAME_32:
 #endif
 		fgn = ap->a_data;
 		p = devtoname(dev);
 		i = strlen(p) + 1;
 		if (i > fgn->len)
 			error = EINVAL;
 		else
 			error = copyout(p, fiodgname_buf_get_ptr(fgn, com), i);
 		break;
 	default:
 		error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td);
 	}
 
 	dev_relthread(dev, ref);
 	if (error == ENOIOCTL)
 		error = ENOTTY;
 
 	if (error == 0 && com == TIOCSCTTY) {
 		/*
 		 * Do nothing if reassigning same control tty, or if the
 		 * control tty has already disappeared.  If it disappeared,
 		 * it's because we were racing with TIOCNOTTY.  TIOCNOTTY
 		 * already took care of releasing the old vnode and we have
 		 * nothing left to do.
 		 */
 		sx_slock(&proctree_lock);
 		sess = td->td_proc->p_session;
 		if (sess->s_ttyvp == vp || sess->s_ttyp == NULL) {
 			sx_sunlock(&proctree_lock);
 			return (0);
 		}
 
 		devfs_ctty_ref(vp);
 		SESS_LOCK(sess);
 		vpold = sess->s_ttyvp;
 		sess->s_ttyvp = vp;
 		sess->s_ttydp = cdev2priv(dev);
 		SESS_UNLOCK(sess);
 
 		sx_sunlock(&proctree_lock);
 
 		/* Get rid of reference to old control tty */
 		if (vpold)
 			devfs_ctty_unref(vpold);
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_kqfilter_f(struct file *fp, struct knote *kn)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	int error, ref;
 	struct file *fpop;
 	struct thread *td;
 
 	td = curthread;
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error)
 		return (error);
 	error = dsw->d_kqfilter(dev, kn);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	return (error);
 }
 
 static inline int
 devfs_prison_check(struct devfs_dirent *de, struct thread *td)
 {
 	struct cdev_priv *cdp;
 	struct ucred *dcr;
 	struct proc *p;
 	int error;
 
 	cdp = de->de_cdp;
 	if (cdp == NULL)
 		return (0);
 	dcr = cdp->cdp_c.si_cred;
 	if (dcr == NULL)
 		return (0);
 
 	error = prison_check(td->td_ucred, dcr);
 	if (error == 0)
 		return (0);
 	/* We do, however, allow access to the controlling terminal */
 	p = td->td_proc;
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_CONTROLT)) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (p->p_session->s_ttydp == cdp)
 		error = 0;
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 static int
 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct thread *td;
 	struct devfs_dirent *de, *dd;
 	struct devfs_dirent **dde;
 	struct devfs_mount *dmp;
 	struct mount *mp;
 	struct cdev *cdev;
 	int error, flags, nameiop, dvplocked;
 	char specname[SPECNAMELEN + 1], *pname;
 
 	td = curthread;
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dvp = ap->a_dvp;
 	pname = cnp->cn_nameptr;
 	flags = cnp->cn_flags;
 	nameiop = cnp->cn_nameiop;
 	mp = dvp->v_mount;
 	dmp = VFSTODEVFS(mp);
 	dd = dvp->v_data;
 	*vpp = NULLVP;
 
 	if ((flags & ISLASTCN) && nameiop == RENAME)
 		return (EOPNOTSUPP);
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
 		return (EIO);
 
 	error = vn_dir_check_exec(dvp, cnp);
 	if (error != 0)
 		return (error);
 
 	if (cnp->cn_namelen == 1 && *pname == '.') {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		*vpp = dvp;
 		VREF(dvp);
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		de = devfs_parent_dirent(dd);
 		if (de == NULL)
 			return (ENOENT);
 		dvplocked = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp);
 		error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK,
 		    vpp);
 		*dm_unlock = 0;
 		vn_lock(dvp, dvplocked | LK_RETRY);
 		return (error);
 	}
 
 	dd = dvp->v_data;
 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0);
 	while (de == NULL) {	/* While(...) so we can use break */
 
 		if (nameiop == DELETE)
 			return (ENOENT);
 
 		/*
 		 * OK, we didn't have an entry for the name we were asked for
 		 * so we try to see if anybody can create it on demand.
 		 */
 		pname = devfs_fqpn(specname, dmp, dd, cnp);
 		if (pname == NULL)
 			break;
 
 		cdev = NULL;
 		DEVFS_DMP_HOLD(dmp);
 		sx_xunlock(&dmp->dm_lock);
 		EVENTHANDLER_INVOKE(dev_clone,
 		    td->td_ucred, pname, strlen(pname), &cdev);
 
 		if (cdev == NULL)
 			sx_xlock(&dmp->dm_lock);
 		else if (devfs_populate_vp(dvp) != 0) {
 			*dm_unlock = 0;
 			sx_xlock(&dmp->dm_lock);
 			if (DEVFS_DMP_DROP(dmp)) {
 				sx_xunlock(&dmp->dm_lock);
 				devfs_unmount_final(dmp);
 			} else
 				sx_xunlock(&dmp->dm_lock);
 			dev_rel(cdev);
 			return (ENOENT);
 		}
 		if (DEVFS_DMP_DROP(dmp)) {
 			*dm_unlock = 0;
 			sx_xunlock(&dmp->dm_lock);
 			devfs_unmount_final(dmp);
 			if (cdev != NULL)
 				dev_rel(cdev);
 			return (ENOENT);
 		}
 
 		if (cdev == NULL)
 			break;
 
 		dev_lock();
 		dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx];
 		if (dde != NULL && *dde != NULL)
 			de = *dde;
 		dev_unlock();
 		dev_rel(cdev);
 		break;
 	}
 
 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
 		if ((nameiop == CREATE || nameiop == RENAME) &&
 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
-			cnp->cn_flags |= SAVENAME;
 			return (EJUSTRETURN);
 		}
 		return (ENOENT);
 	}
 
 	if (devfs_prison_check(de, td))
 		return (ENOENT);
 
 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			return (error);
 		if (*vpp == dvp) {
 			VREF(dvp);
 			*vpp = dvp;
 			return (0);
 		}
 	}
 	error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, vpp);
 	*dm_unlock = 0;
 	return (error);
 }
 
 static int
 devfs_lookup(struct vop_lookup_args *ap)
 {
 	int j;
 	struct devfs_mount *dmp;
 	int dm_unlock;
 
 	if (devfs_populate_vp(ap->a_dvp) != 0)
 		return (ENOTDIR);
 
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
 	dm_unlock = 1;
 	j = devfs_lookupx(ap, &dm_unlock);
 	if (dm_unlock == 1)
 		sx_xunlock(&dmp->dm_lock);
 	return (j);
 }
 
 static int
 devfs_mknod(struct vop_mknod_args *ap)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct devfs_dirent *dd, *de;
 	struct devfs_mount *dmp;
 	int error;
 
 	/*
 	 * The only type of node we should be creating here is a
 	 * character device, for anything else return EOPNOTSUPP.
 	 */
 	if (ap->a_vap->va_type != VCHR)
 		return (EOPNOTSUPP);
 	dvp = ap->a_dvp;
 	dmp = VFSTODEVFS(dvp->v_mount);
 
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dd = dvp->v_data;
 
 	error = ENOENT;
 	sx_xlock(&dmp->dm_lock);
 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
 			continue;
 		if (de->de_dirent->d_type == DT_CHR &&
 		    (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0)
 			continue;
 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
 		    de->de_dirent->d_namlen) != 0)
 			continue;
 		if (de->de_flags & DE_WHITEOUT)
 			break;
 		goto notfound;
 	}
 	if (de == NULL)
 		goto notfound;
 	de->de_flags &= ~DE_WHITEOUT;
 	error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp);
 	return (error);
 notfound:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_open(struct vop_open_args *ap)
 {
 	struct thread *td = ap->a_td;
 	struct vnode *vp = ap->a_vp;
 	struct cdev *dev = vp->v_rdev;
 	struct file *fp = ap->a_fp;
 	int error, ref, vlocked;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (vp->v_type == VBLK)
 		return (ENXIO);
 
 	if (dev == NULL)
 		return (ENXIO);
 
 	/* Make this field valid before any I/O in d_open. */
 	if (dev->si_iosize_max == 0)
 		dev->si_iosize_max = DFLTPHYS;
 
 	dsw = dev_refthread(dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	if (fp == NULL && dsw->d_fdopen != NULL) {
 		dev_relthread(dev, ref);
 		return (ENXIO);
 	}
 
 	if (vp->v_type == VCHR)
 		devfs_usecount_add(vp);
 
 	vlocked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 
 	fpop = td->td_fpop;
 	td->td_fpop = fp;
 	if (fp != NULL) {
 		fp->f_data = dev;
 		fp->f_vnode = vp;
 	}
 	if (dsw->d_fdopen != NULL)
 		error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
 	else
 		error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
 	/* Clean up any cdevpriv upon error. */
 	if (error != 0)
 		devfs_clear_cdevpriv();
 	td->td_fpop = fpop;
 
 	vn_lock(vp, vlocked | LK_RETRY);
 	if (error != 0 && vp->v_type == VCHR)
 		devfs_usecount_sub(vp);
 
 	dev_relthread(dev, ref);
 	if (error != 0) {
 		if (error == ERESTART)
 			error = EINTR;
 		return (error);
 	}
 
 #if 0	/* /dev/console */
 	KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp"));
 #else
 	if (fp == NULL)
 		return (error);
 #endif
 	if (fp->f_ops == &badfileops)
 		finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
 	return (error);
 }
 
 static int
 devfs_pathconf(struct vop_pathconf_args *ap)
 {
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_LINK_MAX:
 		*ap->a_retval = INT_MAX;
 		return (0);
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		return (0);
 	case _PC_MAX_CANON:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = MAX_CANON;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_MAX_INPUT:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = MAX_INPUT;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_VDISABLE:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = _POSIX_VDISABLE;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_MAC_PRESENT:
 #ifdef MAC
 		/*
 		 * If MAC is enabled, devfs automatically supports
 		 * trivial non-persistent label storage.
 		 */
 		*ap->a_retval = 1;
 #else
 		*ap->a_retval = 0;
 #endif
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 	/* NOTREACHED */
 }
 
 /* ARGSUSED */
 static int
 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	int error, ref;
 	struct file *fpop;
 
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_poll(fp, events, cred, td);
 		return (error);
 	}
 	error = dsw->d_poll(dev, events, td);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	return(error);
 }
 
 /*
  * Print out the contents of a special device vnode.
  */
 static int
 devfs_print(struct vop_print_args *ap)
 {
 
 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
 	return (0);
 }
 
 static int
 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred,
     int flags, struct thread *td)
 {
 	struct cdev *dev;
 	int ioflag, error, ref;
 	ssize_t resid;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_read(fp, uio, cred, flags, td);
 		return (error);
 	}
 	resid = uio->uio_resid;
 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
 	if (ioflag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 
 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 	error = dsw->d_read(dev, uio, ioflag);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
 		devfs_timestamp(&dev->si_atime);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 
 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_R);
 	return (error);
 }
 
 static int
 devfs_readdir(struct vop_readdir_args *ap)
 {
 	int error;
 	struct uio *uio;
 	struct dirent *dp;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	off_t off;
 	int *tmp_ncookies = NULL;
 
 	if (ap->a_vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	uio = ap->a_uio;
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	/*
 	 * XXX: This is a temporary hack to get around this filesystem not
 	 * supporting cookies. We store the location of the ncookies pointer
 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
 	 * and set the number of cookies to 0. We then set the pointer to
 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on 
 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
 	 * pointer to its original location before returning to the caller.
 	 */
 	if (ap->a_ncookies != NULL) {
 		tmp_ncookies = ap->a_ncookies;
 		*ap->a_ncookies = 0;
 		ap->a_ncookies = NULL;
 	}
 
 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
 	if (devfs_populate_vp(ap->a_vp) != 0) {
 		if (tmp_ncookies != NULL)
 			ap->a_ncookies = tmp_ncookies;
 		return (EIO);
 	}
 	error = 0;
 	de = ap->a_vp->v_data;
 	off = 0;
 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
 		if (dd->de_flags & (DE_COVERED | DE_WHITEOUT))
 			continue;
 		if (devfs_prison_check(dd, uio->uio_td))
 			continue;
 		if (dd->de_dirent->d_type == DT_DIR)
 			de = dd->de_dir;
 		else
 			de = dd;
 		dp = dd->de_dirent;
 		MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp));
 		if (dp->d_reclen > uio->uio_resid)
 			break;
 		dp->d_fileno = de->de_inode;
 		/* NOTE: d_off is the offset for the *next* entry. */
 		dp->d_off = off + dp->d_reclen;
 		if (off >= uio->uio_offset) {
 			error = vfs_read_dirent(ap, dp, off);
 			if (error)
 				break;
 		}
 		off += dp->d_reclen;
 	}
 	sx_xunlock(&dmp->dm_lock);
 	uio->uio_offset = off;
 
 	/*
 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
 	 * place.
 	 */
 	if (tmp_ncookies != NULL)
 		ap->a_ncookies = tmp_ncookies;
 
 	return (error);
 }
 
 static int
 devfs_readlink(struct vop_readlink_args *ap)
 {
 	struct devfs_dirent *de;
 
 	de = ap->a_vp->v_data;
 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
 }
 
 static void
 devfs_reclaiml(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 
 	mtx_assert(&devfs_de_interlock, MA_OWNED);
 	de = vp->v_data;
 	if (de != NULL) {
 		MPASS(de->de_usecount == 0);
 		de->de_vnode = NULL;
 		vp->v_data = NULL;
 	}
 }
 
 static int
 devfs_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 
 	vp = ap->a_vp;
 	mtx_lock(&devfs_de_interlock);
 	devfs_reclaiml(vp);
 	mtx_unlock(&devfs_de_interlock);
 	return (0);
 }
 
 static int
 devfs_reclaim_vchr(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 	struct cdev *dev;
 
 	vp = ap->a_vp;
 	MPASS(vp->v_type == VCHR);
 
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	devfs_usecount_subl(vp);
 	devfs_reclaiml(vp);
 	mtx_unlock(&devfs_de_interlock);
 	dev_lock();
 	dev = vp->v_rdev;
 	vp->v_rdev = NULL;
 	dev_unlock();
 	VI_UNLOCK(vp);
 	if (dev != NULL)
 		dev_rel(dev);
 	return (0);
 }
 
 static int
 devfs_remove(struct vop_remove_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de, *de_covered;
 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
 
 	ASSERT_VOP_ELOCKED(dvp, "devfs_remove");
 	ASSERT_VOP_ELOCKED(vp, "devfs_remove");
 
 	sx_xlock(&dmp->dm_lock);
 	dd = ap->a_dvp->v_data;
 	de = vp->v_data;
 	if (de->de_cdp == NULL) {
 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
 		if (de->de_dirent->d_type == DT_LNK) {
 			de_covered = devfs_find(dd, de->de_dirent->d_name,
 			    de->de_dirent->d_namlen, 0);
 			if (de_covered != NULL)
 				de_covered->de_flags &= ~DE_COVERED;
 		}
 		/* We need to unlock dvp because devfs_delete() may lock it. */
 		VOP_UNLOCK(vp);
 		if (dvp != vp)
 			VOP_UNLOCK(dvp);
 		devfs_delete(dmp, de, 0);
 		sx_xunlock(&dmp->dm_lock);
 		if (dvp != vp)
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	} else {
 		de->de_flags |= DE_WHITEOUT;
 		sx_xunlock(&dmp->dm_lock);
 	}
 	return (0);
 }
 
 /*
  * Revoke is called on a tty when a terminal session ends.  The vnode
  * is orphaned by setting v_op to deadfs so we need to let go of it
  * as well so that we create a new one next time around.
  *
  */
 static int
 devfs_revoke(struct vop_revoke_args *ap)
 {
 	struct vnode *vp = ap->a_vp, *vp2;
 	struct cdev *dev;
 	struct cdev_priv *cdp;
 	struct devfs_dirent *de;
 	enum vgetstate vs;
 	u_int i;
 
 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
 
 	dev = vp->v_rdev;
 	cdp = cdev2priv(dev);
 
 	dev_lock();
 	cdp->cdp_inuse++;
 	dev_unlock();
 
 	vhold(vp);
 	vgone(vp);
 	vdrop(vp);
 
 	VOP_UNLOCK(vp);
  loop:
 	for (;;) {
 		mtx_lock(&devfs_de_interlock);
 		dev_lock();
 		vp2 = NULL;
 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
 			de = cdp->cdp_dirents[i];
 			if (de == NULL)
 				continue;
 
 			vp2 = de->de_vnode;
 			if (vp2 != NULL) {
 				dev_unlock();
 				vs = vget_prep(vp2);
 				mtx_unlock(&devfs_de_interlock);
 				if (vget_finish(vp2, LK_EXCLUSIVE, vs) != 0)
 					goto loop;
 				vhold(vp2);
 				vgone(vp2);
 				vdrop(vp2);
 				vput(vp2);
 				break;
 			}
 		}
 		if (vp2 != NULL) {
 			continue;
 		}
 		dev_unlock();
 		mtx_unlock(&devfs_de_interlock);
 		break;
 	}
 	dev_lock();
 	cdp->cdp_inuse--;
 	if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) {
 		TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
 		dev_unlock();
 		dev_rel(&cdp->cdp_c);
 	} else
 		dev_unlock();
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	return (0);
 }
 
 static int
 devfs_rioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_mount *dmp;
 	int error;
 
 	vp = ap->a_vp;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	if (VN_IS_DOOMED(vp)) {
 		VOP_UNLOCK(vp);
 		return (EBADF);
 	}
 	dmp = VFSTODEVFS(vp->v_mount);
 	sx_xlock(&dmp->dm_lock);
 	VOP_UNLOCK(vp);
 	DEVFS_DMP_HOLD(dmp);
 	devfs_populate(dmp);
 	if (DEVFS_DMP_DROP(dmp)) {
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 		return (ENOENT);
 	}
 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 static int
 devfs_rread(struct vop_read_args *ap)
 {
 
 	if (ap->a_vp->v_type != VDIR)
 		return (EINVAL);
 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
 }
 
 static int
 devfs_setattr(struct vop_setattr_args *ap)
 {
 	struct devfs_dirent *de;
 	struct vattr *vap;
 	struct vnode *vp;
 	struct thread *td;
 	int c, error;
 	uid_t uid;
 	gid_t gid;
 
 	vap = ap->a_vap;
 	vp = ap->a_vp;
 	td = curthread;
 	if ((vap->va_type != VNON) ||
 	    (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) ||
 	    (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) ||
 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
 	    (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) ||
 	    (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	de = vp->v_data;
 	if (vp->v_type == VDIR)
 		de = de->de_dir;
 
 	c = 0;
 	if (vap->va_uid == (uid_t)VNOVAL)
 		uid = de->de_uid;
 	else
 		uid = vap->va_uid;
 	if (vap->va_gid == (gid_t)VNOVAL)
 		gid = de->de_gid;
 	else
 		gid = vap->va_gid;
 	if (uid != de->de_uid || gid != de->de_gid) {
 		if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) {
 			error = priv_check(td, PRIV_VFS_CHOWN);
 			if (error != 0)
 				goto ret;
 		}
 		de->de_uid = uid;
 		de->de_gid = gid;
 		c = 1;
 	}
 
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (ap->a_cred->cr_uid != de->de_uid) {
 			error = priv_check(td, PRIV_VFS_ADMIN);
 			if (error != 0)
 				goto ret;
 		}
 		de->de_mode = vap->va_mode;
 		c = 1;
 	}
 
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		error = vn_utimes_perm(vp, vap, ap->a_cred, td);
 		if (error != 0)
 			goto ret;
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			if (vp->v_type == VCHR)
 				vp->v_rdev->si_atime = vap->va_atime;
 			else
 				de->de_atime = vap->va_atime;
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			if (vp->v_type == VCHR)
 				vp->v_rdev->si_mtime = vap->va_mtime;
 			else
 				de->de_mtime = vap->va_mtime;
 		}
 		c = 1;
 	}
 
 	if (c) {
 		if (vp->v_type == VCHR)
 			vfs_timestamp(&vp->v_rdev->si_ctime);
 		else
 			vfs_timestamp(&de->de_mtime);
 	}
 
 ret:
 	sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock);
 	return (error);
 }
 
 #ifdef MAC
 static int
 devfs_setlabel(struct vop_setlabel_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_dirent *de;
 
 	vp = ap->a_vp;
 	de = vp->v_data;
 
 	mac_vnode_relabel(ap->a_cred, vp, ap->a_label);
 	mac_devfs_update(vp->v_mount, de, vp);
 
 	return (0);
 }
 #endif
 
 static int
 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred)
 {
 
 	return (vnops.fo_stat(fp, sb, cred));
 }
 
 static int
 devfs_symlink(struct vop_symlink_args *ap)
 {
 	int i, error;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de, *de_covered, *de_dotdot;
 	struct devfs_mount *dmp;
 
 	error = priv_check(curthread, PRIV_DEVFS_SYMLINK);
 	if (error)
 		return(error);
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
 	if (devfs_populate_vp(ap->a_dvp) != 0)
 		return (ENOENT);
 
 	dd = ap->a_dvp->v_data;
 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
 	de->de_flags = DE_USER;
 	de->de_uid = 0;
 	de->de_gid = 0;
 	de->de_mode = 0755;
 	de->de_inode = alloc_unr(devfs_inos);
 	de->de_dir = dd;
 	de->de_dirent->d_type = DT_LNK;
 	i = strlen(ap->a_target) + 1;
 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
 	bcopy(ap->a_target, de->de_symlink, i);
 #ifdef MAC
 	mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
 #endif
 	de_covered = devfs_find(dd, de->de_dirent->d_name,
 	    de->de_dirent->d_namlen, 0);
 	if (de_covered != NULL) {
 		if ((de_covered->de_flags & DE_USER) != 0) {
 			devfs_delete(dmp, de, DEVFS_DEL_NORECURSE);
 			sx_xunlock(&dmp->dm_lock);
 			return (EEXIST);
 		}
 		KASSERT((de_covered->de_flags & DE_COVERED) == 0,
 		    ("devfs_symlink: entry %p already covered", de_covered));
 		de_covered->de_flags |= DE_COVERED;
 	}
 
 	de_dotdot = TAILQ_FIRST(&dd->de_dlist);		/* "." */
 	de_dotdot = TAILQ_NEXT(de_dotdot, de_list);	/* ".." */
 	TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list);
 	devfs_dir_ref_de(dmp, dd);
 	devfs_rules_apply(dmp, de);
 
 	return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp));
 }
 
 static int
 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td)
 {
 
 	return (vnops.fo_truncate(fp, length, cred, td));
 }
 
 static int
 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred,
     int flags, struct thread *td)
 {
 	struct cdev *dev;
 	int error, ioflag, ref;
 	ssize_t resid;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_write(fp, uio, cred, flags, td);
 		return (error);
 	}
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
 	if (ioflag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 
 	resid = uio->uio_resid;
 
 	error = dsw->d_write(dev, uio, ioflag);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
 		devfs_timestamp(&dev->si_ctime);
 		dev->si_mtime = dev->si_ctime;
 	}
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 
 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_W);
 	return (error);
 }
 
 static int
 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
     struct thread *td)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	struct mount *mp;
 	struct vnode *vp;
 	struct file *fpop;
 	vm_object_t object;
 	vm_prot_t maxprot;
 	int error, ref;
 
 	vp = fp->f_vnode;
 
 	/*
 	 * Ensure that file and memory protections are
 	 * compatible.
 	 */
 	mp = vp->v_mount;
 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
 		maxprot = VM_PROT_NONE;
 		if ((prot & VM_PROT_EXECUTE) != 0)
 			return (EACCES);
 	} else
 		maxprot = VM_PROT_EXECUTE;
 	if ((fp->f_flag & FREAD) != 0)
 		maxprot |= VM_PROT_READ;
 	else if ((prot & VM_PROT_READ) != 0)
 		return (EACCES);
 
 	/*
 	 * If we are sharing potential changes via MAP_SHARED and we
 	 * are trying to get write permission although we opened it
 	 * without asking for it, bail out.
 	 *
 	 * Note that most character devices always share mappings.
 	 * The one exception is that D_MMAP_ANON devices
 	 * (i.e. /dev/zero) permit private writable mappings.
 	 *
 	 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests
 	 * as well as updating maxprot to permit writing for
 	 * D_MMAP_ANON devices rather than doing that here.
 	 */
 	if ((flags & MAP_SHARED) != 0) {
 		if ((fp->f_flag & FWRITE) != 0)
 			maxprot |= VM_PROT_WRITE;
 		else if ((prot & VM_PROT_WRITE) != 0)
 			return (EACCES);
 	}
 	maxprot &= cap_maxprot;
 
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0)
 		return (error);
 
 	error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff,
 	    &object);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	if (error != 0)
 		return (error);
 
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, FALSE, td);
 	if (error != 0)
 		vm_object_deallocate(object);
 	return (error);
 }
 
 dev_t
 dev2udev(struct cdev *x)
 {
 	if (x == NULL)
 		return (NODEV);
 	return (cdev2priv(x)->cdp_inode);
 }
 
 static struct fileops devfs_ops_f = {
 	.fo_read =	devfs_read_f,
 	.fo_write =	devfs_write_f,
 	.fo_truncate =	devfs_truncate_f,
 	.fo_ioctl =	devfs_ioctl_f,
 	.fo_poll =	devfs_poll_f,
 	.fo_kqfilter =	devfs_kqfilter_f,
 	.fo_stat =	devfs_stat_f,
 	.fo_close =	devfs_close_f,
 	.fo_chmod =	vn_chmod,
 	.fo_chown =	vn_chown,
 	.fo_sendfile =	vn_sendfile,
 	.fo_seek =	vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
 	.fo_mmap =	devfs_mmap_f,
 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
 /* Vops for non-CHR vnodes in /dev. */
 static struct vop_vector devfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		devfs_access,
 	.vop_getattr =		devfs_getattr,
 	.vop_ioctl =		devfs_rioctl,
 	.vop_lookup =		devfs_lookup,
 	.vop_mknod =		devfs_mknod,
 	.vop_pathconf =		devfs_pathconf,
 	.vop_read =		devfs_rread,
 	.vop_readdir =		devfs_readdir,
 	.vop_readlink =		devfs_readlink,
 	.vop_reclaim =		devfs_reclaim,
 	.vop_remove =		devfs_remove,
 	.vop_revoke =		devfs_revoke,
 	.vop_setattr =		devfs_setattr,
 #ifdef MAC
 	.vop_setlabel =		devfs_setlabel,
 #endif
 	.vop_symlink =		devfs_symlink,
 	.vop_vptocnp =		devfs_vptocnp,
 	.vop_lock1 =		vop_lock,
 	.vop_unlock =		vop_unlock,
 	.vop_islocked =		vop_islocked,
 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
 };
 VFS_VOP_VECTOR_REGISTER(devfs_vnodeops);
 
 /* Vops for VCHR vnodes in /dev. */
 static struct vop_vector devfs_specops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		devfs_access,
 	.vop_bmap =		VOP_PANIC,
 	.vop_close =		devfs_close,
 	.vop_create =		VOP_PANIC,
 	.vop_fsync =		vop_stdfsync,
 	.vop_getattr =		devfs_getattr,
 	.vop_ioctl =		devfs_ioctl,
 	.vop_link =		VOP_PANIC,
 	.vop_mkdir =		VOP_PANIC,
 	.vop_mknod =		VOP_PANIC,
 	.vop_open =		devfs_open,
 	.vop_pathconf =		devfs_pathconf,
 	.vop_poll =		dead_poll,
 	.vop_print =		devfs_print,
 	.vop_read =		dead_read,
 	.vop_readdir =		VOP_PANIC,
 	.vop_readlink =		VOP_PANIC,
 	.vop_reallocblks =	VOP_PANIC,
 	.vop_reclaim =		devfs_reclaim_vchr,
 	.vop_remove =		devfs_remove,
 	.vop_rename =		VOP_PANIC,
 	.vop_revoke =		devfs_revoke,
 	.vop_rmdir =		VOP_PANIC,
 	.vop_setattr =		devfs_setattr,
 #ifdef MAC
 	.vop_setlabel =		devfs_setlabel,
 #endif
 	.vop_strategy =		VOP_PANIC,
 	.vop_symlink =		VOP_PANIC,
 	.vop_vptocnp =		devfs_vptocnp,
 	.vop_write =		dead_write,
 	.vop_lock1 =		vop_lock,
 	.vop_unlock =		vop_unlock,
 	.vop_islocked =		vop_islocked,
 	.vop_add_writecount =	vop_stdadd_writecount_nomsync,
 };
 VFS_VOP_VECTOR_REGISTER(devfs_specops);
 
 /*
  * Our calling convention to the device drivers used to be that we passed
  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 
  * flags instead since that's what open(), close() and ioctl() takes and
  * we don't really want vnode.h in device drivers.
  * We solved the source compatibility by redefining some vnode flags to
  * be the same as the fcntl ones and by sending down the bitwise OR of
  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
  * pulls the rug out under this.
  */
 CTASSERT(O_NONBLOCK == IO_NDELAY);
 CTASSERT(O_FSYNC == IO_SYNC);
diff --git a/sys/fs/ext2fs/ext2_lookup.c b/sys/fs/ext2fs/ext2_lookup.c
index a8e28e24f9e2..16f2aa88b28c 100644
--- a/sys/fs/ext2fs/ext2_lookup.c
+++ b/sys/fs/ext2fs/ext2_lookup.c
@@ -1,1275 +1,1267 @@
 /*-
  *  modified for Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_lookup.c	8.6 (Berkeley) 4/1/94
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/endian.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/dirent.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
 
 #include <ufs/ufs/dir.h>
 
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/inode.h>
 #include <fs/ext2fs/ext2_mount.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_dinode.h>
 #include <fs/ext2fs/ext2_dir.h>
 #include <fs/ext2fs/ext2_extern.h>
 #include <fs/ext2fs/fs.h>
 
 SDT_PROVIDER_DECLARE(ext2fs);
 /*
  * ext2fs trace probe:
  * arg0: verbosity. Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(ext2fs, , lookup, trace, "int", "char*");
 SDT_PROBE_DEFINE4(ext2fs, , trace, ext2_dirbad_error,
     "char*", "ino_t", "doff_t", "char*");
 SDT_PROBE_DEFINE5(ext2fs, , trace, ext2_dirbadentry_error,
     "char*", "int", "uint32_t", "uint16_t", "uint8_t");
 
 static SYSCTL_NODE(_vfs, OID_AUTO, e2fs, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "EXT2FS filesystem");
 
 /*
    DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512)
    while it is the native blocksize in ext2fs - thus, a #define
    is no longer appropriate
 */
 #undef  DIRBLKSIZ
 
 static u_char ext2_ft_to_dt[] = {
 	DT_UNKNOWN,		/* EXT2_FT_UNKNOWN */
 	DT_REG,			/* EXT2_FT_REG_FILE */
 	DT_DIR,			/* EXT2_FT_DIR */
 	DT_CHR,			/* EXT2_FT_CHRDEV */
 	DT_BLK,			/* EXT2_FT_BLKDEV */
 	DT_FIFO,		/* EXT2_FT_FIFO */
 	DT_SOCK,		/* EXT2_FT_SOCK */
 	DT_LNK,			/* EXT2_FT_SYMLINK */
 };
 #define	FTTODT(ft) \
     ((ft) < nitems(ext2_ft_to_dt) ? ext2_ft_to_dt[(ft)] : DT_UNKNOWN)
 
 static u_char dt_to_ext2_ft[] = {
 	EXT2_FT_UNKNOWN,	/* DT_UNKNOWN */
 	EXT2_FT_FIFO,		/* DT_FIFO */
 	EXT2_FT_CHRDEV,		/* DT_CHR */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_DIR,		/* DT_DIR */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_BLKDEV,		/* DT_BLK */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_REG_FILE,	/* DT_REG */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_SYMLINK,	/* DT_LNK */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_SOCK,		/* DT_SOCK */
 	EXT2_FT_UNKNOWN,	/* unused */
 	EXT2_FT_UNKNOWN,	/* DT_WHT */
 };
 #define	DTTOFT(dt) \
     ((dt) < nitems(dt_to_ext2_ft) ? dt_to_ext2_ft[(dt)] : EXT2_FT_UNKNOWN)
 
 static int	ext2_check_direntry(struct vnode *dp,
 		    struct ext2fs_direct_2 *de, int entryoffsetinblock);
 static int	ext2_is_dot_entry(struct componentname *cnp);
 static int	ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp,
 		    struct componentname *cnp, ino_t *dd_ino);
 
 static int
 ext2_is_dot_entry(struct componentname *cnp)
 {
 	if (cnp->cn_namelen <= 2 && cnp->cn_nameptr[0] == '.' &&
 	    (cnp->cn_nameptr[1] == '.' || cnp->cn_nameptr[1] == '\0'))
 		return (1);
 	return (0);
 }
 
 /*
  * Vnode op for reading directories.
  */
 int
 ext2_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct buf *bp;
 	struct inode *ip;
 	struct ext2fs_direct_2 *dp, *edp;
 	uint64_t *cookies;
 	struct dirent dstdp;
 	off_t offset, startoffset;
 	size_t readcnt, skipcnt;
 	ssize_t startresid;
 	u_int ncookies;
 	int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->e2fs_bsize;
 	int error;
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	ip = VTOI(vp);
 	if (ap->a_ncookies != NULL) {
 		if (uio->uio_resid < 0)
 			ncookies = 0;
 		else
 			ncookies = uio->uio_resid;
 		if (uio->uio_offset >= ip->i_size)
 			ncookies = 0;
 		else if (ip->i_size - uio->uio_offset < ncookies)
 			ncookies = ip->i_size - uio->uio_offset;
 		ncookies = ncookies / (offsetof(struct ext2fs_direct_2,
 		    e2d_namlen) + 4) + 1;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	} else {
 		ncookies = 0;
 		cookies = NULL;
 	}
 	offset = startoffset = uio->uio_offset;
 	startresid = uio->uio_resid;
 	error = 0;
 	while (error == 0 && uio->uio_resid > 0 &&
 	    uio->uio_offset < ip->i_size) {
 		error = ext2_blkatoff(vp, uio->uio_offset, NULL, &bp);
 		if (error)
 			break;
 		if (bp->b_offset + bp->b_bcount > ip->i_size)
 			readcnt = ip->i_size - bp->b_offset;
 		else
 			readcnt = bp->b_bcount;
 		skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
 		    ~(size_t)(DIRBLKSIZ - 1);
 		offset = bp->b_offset + skipcnt;
 		dp = (struct ext2fs_direct_2 *)&bp->b_data[skipcnt];
 		edp = (struct ext2fs_direct_2 *)&bp->b_data[readcnt];
 		while (error == 0 && uio->uio_resid > 0 && dp < edp) {
 			if (le16toh(dp->e2d_reclen) <= offsetof(struct ext2fs_direct_2,
 			    e2d_namlen) || (caddr_t)dp + le16toh(dp->e2d_reclen) >
 			    (caddr_t)edp) {
 				error = EIO;
 				break;
 			}
 			/*-
 			 * "New" ext2fs directory entries differ in 3 ways
 			 * from ufs on-disk ones:
 			 * - the name is not necessarily NUL-terminated.
 			 * - the file type field always exists and always
 			 *   follows the name length field.
 			 * - the file type is encoded in a different way.
 			 *
 			 * "Old" ext2fs directory entries need no special
 			 * conversions, since they are binary compatible
 			 * with "new" entries having a file type of 0 (i.e.,
 			 * EXT2_FT_UNKNOWN).  Splitting the old name length
 			 * field didn't make a mess like it did in ufs,
 			 * because ext2fs uses a machine-independent disk
 			 * layout.
 			 */
 			dstdp.d_namlen = dp->e2d_namlen;
 			dstdp.d_type = FTTODT(dp->e2d_type);
 			if (offsetof(struct ext2fs_direct_2, e2d_namlen) +
 			    dstdp.d_namlen > le16toh(dp->e2d_reclen)) {
 				error = EIO;
 				break;
 			}
 			if (offset < startoffset || le32toh(dp->e2d_ino) == 0)
 				goto nextentry;
 			dstdp.d_fileno = le32toh(dp->e2d_ino);
 			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
 			bcopy(dp->e2d_name, dstdp.d_name, dstdp.d_namlen);
 			/* NOTE: d_off is the offset of the *next* entry. */
 			dstdp.d_off = offset + le16toh(dp->e2d_reclen);
 			dirent_terminate(&dstdp);
 			if (dstdp.d_reclen > uio->uio_resid) {
 				if (uio->uio_resid == startresid)
 					error = EINVAL;
 				else
 					error = EJUSTRETURN;
 				break;
 			}
 			/* Advance dp. */
 			error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
 			if (error)
 				break;
 			if (cookies != NULL) {
 				KASSERT(ncookies > 0,
 				    ("ext2_readdir: cookies buffer too small"));
 				*cookies = offset + le16toh(dp->e2d_reclen);
 				cookies++;
 				ncookies--;
 			}
 nextentry:
 			offset += le16toh(dp->e2d_reclen);
 			dp = (struct ext2fs_direct_2 *)((caddr_t)dp +
 			    le16toh(dp->e2d_reclen));
 		}
 		bqrelse(bp);
 		uio->uio_offset = offset;
 	}
 	/* We need to correct uio_offset. */
 	uio->uio_offset = offset;
 	if (error == EJUSTRETURN)
 		error = 0;
 	if (ap->a_ncookies != NULL) {
 		if (error == 0) {
 			ap->a_ncookies -= ncookies;
 		} else {
 			free(*ap->a_cookies, M_TEMP);
 			*ap->a_ncookies = 0;
 			*ap->a_cookies = NULL;
 		}
 	}
 	if (error == 0 && ap->a_eofflag)
 		*ap->a_eofflag = ip->i_size <= uio->uio_offset;
 	return (error);
 }
 
 /*
  * Convert a component of a pathname into a pointer to a locked inode.
  * This is a very central and rather complicated routine.
  * If the file system is not maintained in a strict tree hierarchy,
  * this can result in a deadlock situation (see comments in code below).
  *
  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
  * on whether the name is to be looked up, created, renamed, or deleted.
  * When CREATE, RENAME, or DELETE is specified, information usable in
  * creating, renaming, or deleting a directory entry may be calculated.
  * If flag has LOCKPARENT or'ed into it and the target of the pathname
  * exists, lookup returns both the target and its parent directory locked.
  * When creating or renaming and LOCKPARENT is specified, the target may
  * not be ".".  When deleting and LOCKPARENT is specified, the target may
  * be "."., but the caller must check to ensure it does an vrele and vput
  * instead of two vputs.
  *
  * Overall outline of ext2_lookup:
  *
  *	search for name in directory, to found or notfound
  * notfound:
  *	if creating, return locked directory, leaving info on available slots
  *	else return error
  * found:
  *	if at end of path and deleting, return information to allow delete
  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
  *	  inode and return info to allow rewrite
  *	if not at end, add name to cache; if at end and neither creating
  *	  nor deleting, add name to cache
  */
 int
 ext2_lookup(struct vop_cachedlookup_args *ap)
 {
 
 	return (ext2_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
 }
 
 static int
 ext2_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
     ino_t *dd_ino)
 {
 	struct inode *dp;		/* inode for directory being searched */
 	struct buf *bp;			/* a buffer of directory entries */
 	struct ext2fs_direct_2 *ep;	/* the current directory entry */
 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
 	struct ext2fs_searchslot ss;
 	doff_t i_diroff;		/* cached i_diroff value */
 	doff_t i_offset;		/* cached i_offset value */
 	int numdirpasses;		/* strategy for directory search */
 	doff_t endsearch;		/* offset to end directory search */
 	doff_t prevoff;			/* prev entry dp->i_offset */
 	struct vnode *pdp;		/* saved dp during symlink work */
 	struct vnode *tdp;		/* returned by VFS_VGET */
 	doff_t enduseful;		/* pointer past last used dir slot */
 	u_long bmask;			/* block offset mask */
 	int error;
 	struct ucred *cred = cnp->cn_cred;
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	ino_t ino, ino1;
 	int ltype;
 	int entry_found = 0;
 
 	int DIRBLKSIZ = VTOI(vdp)->i_e2fs->e2fs_bsize;
 
 	if (vpp != NULL)
 		*vpp = NULL;
 
 	dp = VTOI(vdp);
 	bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
 restart:
 	bp = NULL;
 	ss.slotoffset = -1;
 
 	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 *
 	 * Suppress search for slots unless creating
 	 * file and at end of pathname, in which case
 	 * we watch for a place to put the new file in
 	 * case it doesn't already exist.
 	 */
 	i_diroff = dp->i_diroff;
 	ss.slotstatus = FOUND;
 	ss.slotfreespace = ss.slotsize = ss.slotneeded = 0;
 	if ((nameiop == CREATE || nameiop == RENAME) &&
 	    (flags & ISLASTCN)) {
 		ss.slotstatus = NONE;
 		ss.slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen);
 		/*
 		 * was ss.slotneeded = (sizeof(struct direct) - MAXNAMLEN +
 		 * cnp->cn_namelen + 3) &~ 3;
 		 */
 	}
 	/*
 	 * Try to lookup dir entry using htree directory index.
 	 *
 	 * If we got an error or we want to find '.' or '..' entry,
 	 * we will fall back to linear search.
 	 */
 	if (!ext2_is_dot_entry(cnp) && ext2_htree_has_idx(dp)) {
 		numdirpasses = 1;
 		entryoffsetinblock = 0;
 		switch (ext2_htree_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
 		    &bp, &entryoffsetinblock, &i_offset, &prevoff,
 		    &enduseful, &ss)) {
 		case 0:
 			ep = (struct ext2fs_direct_2 *)((char *)bp->b_data +
 			    (i_offset & bmask));
 			goto foundentry;
 		case ENOENT:
 			i_offset = roundup2(dp->i_size, DIRBLKSIZ);
 			goto notfound;
 		default:
 			/*
 			 * Something failed; just fallback to do a linear
 			 * search.
 			 */
 			break;
 		}
 	}
 
 	/*
 	 * If there is cached information on a previous search of
 	 * this directory, pick up where we last left off.
 	 * We cache only lookups as these are the most common
 	 * and have the greatest payoff. Caching CREATE has little
 	 * benefit as it usually must search the entire directory
 	 * to determine that the entry does not exist. Caching the
 	 * location of the last DELETE or RENAME has not reduced
 	 * profiling time and hence has been removed in the interest
 	 * of simplicity.
 	 */
 	if (nameiop != LOOKUP || i_diroff == 0 ||
 	    i_diroff > dp->i_size) {
 		entryoffsetinblock = 0;
 		i_offset = 0;
 		numdirpasses = 1;
 	} else {
 		i_offset = i_diroff;
 		if ((entryoffsetinblock = i_offset & bmask) &&
 		    (error = ext2_blkatoff(vdp, (off_t)i_offset, NULL,
 		    &bp)))
 			return (error);
 		numdirpasses = 2;
 		nchstats.ncs_2passes++;
 	}
 	prevoff = i_offset;
 	endsearch = roundup2(dp->i_size, DIRBLKSIZ);
 	enduseful = 0;
 
 searchloop:
 	while (i_offset < endsearch) {
 		/*
 		 * If necessary, get the next directory block.
 		 */
 		if (bp != NULL)
 			brelse(bp);
 		error = ext2_blkatoff(vdp, (off_t)i_offset, NULL, &bp);
 		if (error != 0)
 			return (error);
 
 		entryoffsetinblock = 0;
 		if (ss.slotstatus == NONE) {
 			ss.slotoffset = -1;
 			ss.slotfreespace = 0;
 		}
 
 		error = ext2_search_dirblock(dp, bp->b_data, &entry_found,
 		    cnp->cn_nameptr, cnp->cn_namelen,
 		    &entryoffsetinblock, &i_offset, &prevoff,
 		    &enduseful, &ss);
 		if (error != 0) {
 			brelse(bp);
 			return (error);
 		}
 		if (entry_found) {
 			ep = (struct ext2fs_direct_2 *)((char *)bp->b_data +
 			    (entryoffsetinblock & bmask));
 foundentry:
 			ino = le32toh(ep->e2d_ino);
 			goto found;
 		}
 	}
 notfound:
 	/*
 	 * If we started in the middle of the directory and failed
 	 * to find our target, we must check the beginning as well.
 	 */
 	if (numdirpasses == 2) {
 		numdirpasses--;
 		i_offset = 0;
 		endsearch = i_diroff;
 		goto searchloop;
 	}
 	if (bp != NULL)
 		brelse(bp);
 	/*
 	 * If creating, and at end of pathname and current
 	 * directory has not been removed, then can consider
 	 * allowing file to be created.
 	 */
 	if ((nameiop == CREATE || nameiop == RENAME) &&
 	    (flags & ISLASTCN) && dp->i_nlink != 0) {
 		/*
 		 * Access for write is interpreted as allowing
 		 * creation of files in the directory.
 		 */
 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, curthread)) != 0)
 			return (error);
 		/*
 		 * Return an indication of where the new directory
 		 * entry should be put.  If we didn't find a slot,
 		 * then set dp->i_count to 0 indicating
 		 * that the new slot belongs at the end of the
 		 * directory. If we found a slot, then the new entry
 		 * can be put in the range from dp->i_offset to
 		 * dp->i_offset + dp->i_count.
 		 */
 		if (ss.slotstatus == NONE) {
 			dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
 			dp->i_count = 0;
 			enduseful = dp->i_offset;
 		} else {
 			dp->i_offset = ss.slotoffset;
 			dp->i_count = ss.slotsize;
 			if (enduseful < ss.slotoffset + ss.slotsize)
 				enduseful = ss.slotoffset + ss.slotsize;
 		}
 		dp->i_endoff = roundup2(enduseful, DIRBLKSIZ);
 		/*
 		 * We return with the directory locked, so that
 		 * the parameters we set up above will still be
 		 * valid if we actually decide to do a direnter().
 		 * We return ni_vp == NULL to indicate that the entry
 		 * does not currently exist; we leave a pointer to
 		 * the (locked) directory inode in ndp->ni_dvp.
-		 * The pathname buffer is saved so that the name
-		 * can be obtained later.
 		 *
 		 * NB - if the directory is unlocked, then this
 		 * information cannot be used.
 		 */
-		cnp->cn_flags |= SAVENAME;
 		return (EJUSTRETURN);
 	}
 	/*
 	 * Insert name into cache (as non-existent) if appropriate.
 	 */
 	if ((cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(vdp, NULL, cnp);
 	return (ENOENT);
 
 found:
 	if (dd_ino != NULL)
 		*dd_ino = ino;
 	if (numdirpasses == 2)
 		nchstats.ncs_pass2++;
 	/*
 	 * Check that directory length properly reflects presence
 	 * of this entry.
 	 */
 	if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->e2d_namlen) >
 	    dp->i_size) {
 		ext2_dirbad(dp, i_offset, "i_size too small");
 		dp->i_size = entryoffsetinblock + EXT2_DIR_REC_LEN(ep->e2d_namlen);
 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	brelse(bp);
 
 	/*
 	 * Found component in pathname.
 	 * If the final component of path name, save information
 	 * in the cache as to where the entry was found.
 	 */
 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
 		dp->i_diroff = rounddown2(i_offset, DIRBLKSIZ);
 	/*
 	 * If deleting, and at end of pathname, return
 	 * parameters which can be used to remove file.
 	 */
 	if (nameiop == DELETE && (flags & ISLASTCN)) {
 		if (flags & LOCKPARENT)
 			ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
 		/*
 		 * Write access to directory required to delete files.
 		 */
 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, curthread)) != 0)
 			return (error);
 		/*
 		 * Return pointer to current entry in dp->i_offset,
 		 * and distance past previous entry (if there
 		 * is a previous entry in this block) in dp->i_count.
 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
 		 *
 		 * Technically we shouldn't be setting these in the
 		 * WANTPARENT case (first lookup in rename()), but any
 		 * lookups that will result in directory changes will
 		 * overwrite these.
 		 */
 		dp->i_offset = i_offset;
 		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 			dp->i_count = 0;
 		else
 			dp->i_count = dp->i_offset - prevoff;
 		if (dd_ino != NULL)
 			return (0);
 		if (dp->i_number == ino) {
 			VREF(vdp);
 			*vpp = vdp;
 			return (0);
 		}
 		if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE,
 		    &tdp)) != 0)
 			return (error);
 		/*
 		 * If directory is "sticky", then user must own
 		 * the directory, or the file in it, else she
 		 * may not delete it (unless she's root). This
 		 * implements append-only directories.
 		 */
 		if ((dp->i_mode & ISVTX) &&
 		    cred->cr_uid != 0 &&
 		    cred->cr_uid != dp->i_uid &&
 		    VTOI(tdp)->i_uid != cred->cr_uid) {
 			vput(tdp);
 			return (EPERM);
 		}
 		*vpp = tdp;
 		return (0);
 	}
 
 	/*
 	 * If rewriting (RENAME), return the inode and the
 	 * information required to rewrite the present directory
 	 * Must get inode of directory entry to verify it's a
 	 * regular file, or empty directory.
 	 */
 	if (nameiop == RENAME && (flags & ISLASTCN)) {
 		if ((error = VOP_ACCESS(vdp, VWRITE, cred, curthread)) != 0)
 			return (error);
 		/*
 		 * Careful about locking second inode.
 		 * This can only occur if the target is ".".
 		 */
 		dp->i_offset = i_offset;
 		if (dp->i_number == ino)
 			return (EISDIR);
 		if (dd_ino != NULL)
 			return (0);
 		if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE,
 		    &tdp)) != 0)
 			return (error);
 		*vpp = tdp;
-		cnp->cn_flags |= SAVENAME;
 		return (0);
 	}
 	if (dd_ino != NULL)
 		return (0);
 
 	/*
 	 * Step through the translation in the name.  We do not `vput' the
 	 * directory because we may need it again if a symbolic link
 	 * is relative to the current directory.  Instead we save it
 	 * unlocked as "pdp".  We must get the target inode before unlocking
 	 * the directory to insure that the inode will not be removed
 	 * before we get it.  We prevent deadlock by always fetching
 	 * inodes from the root, moving down the directory tree. Thus
 	 * when following backward pointers ".." we must unlock the
 	 * parent directory before getting the requested directory.
 	 * There is a potential race condition here if both the current
 	 * and parent directories are removed before the VFS_VGET for the
 	 * inode associated with ".." returns.  We hope that this occurs
 	 * infrequently since we cannot avoid this race condition without
 	 * implementing a sophisticated deadlock detection algorithm.
 	 * Note also that this simple deadlock detection scheme will not
 	 * work if the file system has any hard links other than ".."
 	 * that point backwards in the directory structure.
 	 */
 	pdp = vdp;
 	if (flags & ISDOTDOT) {
 		error = vn_vget_ino(pdp, ino, cnp->cn_lkflags, &tdp);
 		if (VN_IS_DOOMED(pdp)) {
 			if (error == 0)
 				vput(tdp);
 			error = ENOENT;
 		}
 		if (error)
 			return (error);
 		/*
 		 * Recheck that ".." entry in the vdp directory points
 		 * to the inode we looked up before vdp lock was
 		 * dropped.
 		 */
 		error = ext2_lookup_ino(pdp, NULL, cnp, &ino1);
 		if (error) {
 			vput(tdp);
 			return (error);
 		}
 		if (ino1 != ino) {
 			vput(tdp);
 			goto restart;
 		}
 		*vpp = tdp;
 	} else if (dp->i_number == ino) {
 		VREF(vdp);	/* we want ourself, ie "." */
 		/*
 		 * When we lookup "." we still can be asked to lock it
 		 * differently.
 		 */
 		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
 		if (ltype != VOP_ISLOCKED(vdp)) {
 			if (ltype == LK_EXCLUSIVE)
 				vn_lock(vdp, LK_UPGRADE | LK_RETRY);
 			else	/* if (ltype == LK_SHARED) */
 				vn_lock(vdp, LK_DOWNGRADE | LK_RETRY);
 		}
 		*vpp = vdp;
 	} else {
 		if ((error = VFS_VGET(vdp->v_mount, ino, cnp->cn_lkflags,
 		    &tdp)) != 0)
 			return (error);
 		*vpp = tdp;
 	}
 
 	/*
 	 * Insert name into cache if appropriate.
 	 */
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(vdp, *vpp, cnp);
 	return (0);
 }
 
 int
 ext2_search_dirblock(struct inode *ip, void *data, int *foundp,
     const char *name, int namelen, int *entryoffsetinblockp,
     doff_t *offp, doff_t *prevoffp, doff_t *endusefulp,
     struct ext2fs_searchslot *ssp)
 {
 	struct vnode *vdp;
 	struct ext2fs_direct_2 *ep, *top;
 	uint32_t bsize = ip->i_e2fs->e2fs_bsize;
 	int offset = *entryoffsetinblockp;
 	int namlen;
 
 	vdp = ITOV(ip);
 
 	ep = (struct ext2fs_direct_2 *)((char *)data + offset);
 	top = (struct ext2fs_direct_2 *)((char *)data + bsize);
 	while (ep < top) {
 		if (ext2_check_direntry(vdp, ep, offset)) {
 			int i;
 
 			ext2_dirbad(ip, *offp, "mangled entry");
 			i = bsize - (offset & (bsize - 1));
 			*offp += i;
 			offset += i;
 			ep = (struct ext2fs_direct_2 *)((char *)data + offset);
 			continue;
 		}
 
 		/*
 		 * If an appropriate sized slot has not yet been found,
 		 * check to see if one is available. Also accumulate space
 		 * in the current block so that we can determine if
 		 * compaction is viable.
 		 */
 		if (ssp->slotstatus != FOUND) {
 			int size = le16toh(ep->e2d_reclen);
 
 			if (ep->e2d_ino != 0)
 				size -= EXT2_DIR_REC_LEN(ep->e2d_namlen);
 			else if (ext2_is_dirent_tail(ip, ep))
 				size -= sizeof(struct ext2fs_direct_tail);
 			if (size > 0) {
 				if (size >= ssp->slotneeded) {
 					ssp->slotstatus = FOUND;
 					ssp->slotoffset = *offp;
 					ssp->slotsize = le16toh(ep->e2d_reclen);
 				} else if (ssp->slotstatus == NONE) {
 					ssp->slotfreespace += size;
 					if (ssp->slotoffset == -1)
 						ssp->slotoffset = *offp;
 					if (ssp->slotfreespace >= ssp->slotneeded) {
 						ssp->slotstatus = COMPACT;
 						ssp->slotsize = *offp +
 						    le16toh(ep->e2d_reclen) -
 						    ssp->slotoffset;
 					}
 				}
 			}
 		}
 		/*
 		 * Check for a name match.
 		 */
 		if (ep->e2d_ino != 0) {
 			namlen = ep->e2d_namlen;
 			if (namlen == namelen &&
 			    !bcmp(name, ep->e2d_name, (unsigned)namlen)) {
 				/*
 				 * Save directory entry's inode number and
 				 * reclen in ndp->ni_ufs area, and release
 				 * directory buffer.
 				 */
 				*foundp = 1;
 				return (0);
 			}
 		}
 		*prevoffp = *offp;
 		*offp += le16toh(ep->e2d_reclen);
 		offset += le16toh(ep->e2d_reclen);
 		*entryoffsetinblockp = offset;
 		if (ep->e2d_ino != 0)
 			*endusefulp = *offp;
 		/*
 		 * Get pointer to the next entry.
 		 */
 		ep = (struct ext2fs_direct_2 *)((char *)data + offset);
 	}
 
 	return (0);
 }
 
 void
 ext2_dirbad(struct inode *ip, doff_t offset, char *how)
 {
 	struct mount *mp;
 
 	mp = ITOV(ip)->v_mount;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		panic("ext2_dirbad: %s: bad dir ino %ju at offset %ld: %s\n",
 		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
 		    (long)offset, how);
 	else
 		SDT_PROBE4(ext2fs, , trace, ext2_dirbad_error,
 		    mp->mnt_stat.f_mntonname, ip->i_number, offset, how);
 }
 
 /*
  * Do consistency checking on a directory entry:
  *	record length must be multiple of 4
  *	entry must fit in rest of its DIRBLKSIZ block
  *	record must be large enough to contain entry
  *	name is not longer than MAXNAMLEN
  *	name must be as long as advertised, and null terminated
  */
 static int
 ext2_check_direntry(struct vnode *dp, struct ext2fs_direct_2 *de,
     int entryoffsetinblock)
 {
 	struct m_ext2fs *fs = VTOI(dp)->i_e2fs;
 	char *error_msg = NULL;
 
 	if (le16toh(de->e2d_reclen) < EXT2_DIR_REC_LEN(1))
 		error_msg = "rec_len is smaller than minimal";
 	else if (le16toh(de->e2d_reclen) % 4 != 0)
 		error_msg = "rec_len % 4 != 0";
 	else if (le16toh(de->e2d_reclen) < EXT2_DIR_REC_LEN(de->e2d_namlen))
 		error_msg = "reclen is too small for name_len";
 	else if (entryoffsetinblock + le16toh(de->e2d_reclen)> fs->e2fs_bsize)
 		error_msg = "directory entry across blocks";
 	else if (le32toh(de->e2d_ino) > fs->e2fs->e2fs_icount)
 		error_msg = "directory entry inode out of bounds";
 
 	if (error_msg != NULL) {
 		SDT_PROBE5(ext2fs, , trace, ext2_dirbadentry_error,
 		    error_msg, entryoffsetinblock,
 		    le32toh(de->e2d_ino), le16toh(de->e2d_reclen),
 		    de->e2d_namlen);
 	}
 	return (error_msg == NULL ? 0 : EINVAL);
 }
 
 /*
  * Insert an entry into the fresh directory block.
  * Initialize entry tail if the metadata_csum feature is turned on.
  */
 static int
 ext2_add_first_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry,
     struct componentname *cnp)
 {
 	struct inode *dp;
 	struct iovec aiov;
 	struct uio auio;
 	char* buf = NULL;
 	int dirblksize, error;
 
 	dp = VTOI(dvp);
 	dirblksize = dp->i_e2fs->e2fs_bsize;
 
 	if (dp->i_offset & (dirblksize - 1))
 		panic("ext2_add_first_entry: bad directory offset");
 
 	if (EXT2_HAS_RO_COMPAT_FEATURE(dp->i_e2fs,
 	    EXT2F_ROCOMPAT_METADATA_CKSUM)) {
 		entry->e2d_reclen = htole16(dirblksize -
 		    sizeof(struct ext2fs_direct_tail));
 		buf = malloc(dirblksize, M_TEMP, M_WAITOK);
 		memcpy(buf, entry, EXT2_DIR_REC_LEN(entry->e2d_namlen));
 		ext2_init_dirent_tail(EXT2_DIRENT_TAIL(buf, dirblksize));
 		ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)buf);
 
 		auio.uio_offset = dp->i_offset;
 		auio.uio_resid = dirblksize;
 		aiov.iov_len = auio.uio_resid;
 		aiov.iov_base = (caddr_t)buf;
 	} else {
 		entry->e2d_reclen = htole16(dirblksize);
 		auio.uio_offset = dp->i_offset;
 		auio.uio_resid = EXT2_DIR_REC_LEN(entry->e2d_namlen);
 		aiov.iov_len = auio.uio_resid;
 		aiov.iov_base = (caddr_t)entry;
 	}
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_td = (struct thread *)0;
 	error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred);
 	if (error)
 		goto out;
 
 	dp->i_size = roundup2(dp->i_size, dirblksize);
 	dp->i_flag |= IN_CHANGE;
 
 out:
 	free(buf, M_TEMP);
 	return (error);
 
 }
 
 /*
  * Write a directory entry after a call to namei, using the parameters
  * that it left in nameidata.  The argument ip is the inode which the new
  * directory entry will refer to.  Dvp is a pointer to the directory to
  * be written, which was left locked by namei. Remaining parameters
  * (dp->i_offset, dp->i_count) indicate how the space for the new
  * entry is to be obtained.
  */
 int
 ext2_direnter(struct inode *ip, struct vnode *dvp, struct componentname *cnp)
 {
 	struct inode *dp;
 	struct ext2fs_direct_2 newdir;
 	int DIRBLKSIZ = ip->i_e2fs->e2fs_bsize;
 	int error;
 
-#ifdef INVARIANTS
-	if ((cnp->cn_flags & SAVENAME) == 0)
-		panic("ext2_direnter: missing name");
-#endif
 	dp = VTOI(dvp);
 	newdir.e2d_ino = htole32(ip->i_number);
 	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
 	    EXT2F_INCOMPAT_FTYPE)) {
 		newdir.e2d_namlen = cnp->cn_namelen;
 		newdir.e2d_type = DTTOFT(IFTODT(ip->i_mode));
 	} else
 		newdir.e2d_namlen = htole16(cnp->cn_namelen);
 
 	bcopy(cnp->cn_nameptr, newdir.e2d_name, (unsigned)cnp->cn_namelen + 1);
 
 	if (ext2_htree_has_idx(dp)) {
 		error = ext2_htree_add_entry(dvp, &newdir, cnp);
 		if (error) {
 			dp->i_flag &= ~IN_E3INDEX;
 			dp->i_flag |= IN_CHANGE | IN_UPDATE;
 		}
 		return (error);
 	}
 
 	if (EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_DIRHASHINDEX) &&
 	    !ext2_htree_has_idx(dp)) {
 		if ((dp->i_size / DIRBLKSIZ) == 1 &&
 		    dp->i_offset == DIRBLKSIZ) {
 			/*
 			 * Making indexed directory when one block is not
 			 * enough to save all entries.
 			 */
 			return ext2_htree_create_index(dvp, cnp, &newdir);
 		}
 	}
 
 	/*
 	 * If dp->i_count is 0, then namei could find no
 	 * space in the directory. Here, dp->i_offset will
 	 * be on a directory block boundary and we will write the
 	 * new entry into a fresh block.
 	 */
 	if (dp->i_count == 0)
 		return ext2_add_first_entry(dvp, &newdir, cnp);
 
 	error = ext2_add_entry(dvp, &newdir);
 	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
 		error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC,
 		    cnp->cn_cred, curthread);
 	return (error);
 }
 
 /*
  * Insert an entry into the directory block.
  * Compact the contents.
  */
 int
 ext2_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry)
 {
 	struct ext2fs_direct_2 *ep, *nep;
 	struct inode *dp;
 	struct buf *bp;
 	u_int dsize;
 	int error, loc, newentrysize, spacefree;
 	char *dirbuf;
 
 	dp = VTOI(dvp);
 
 	/*
 	 * If dp->i_count is non-zero, then namei found space
 	 * for the new entry in the range dp->i_offset to
 	 * dp->i_offset + dp->i_count in the directory.
 	 * To use this space, we may have to compact the entries located
 	 * there, by copying them together towards the beginning of the
 	 * block, leaving the free space in one usable chunk at the end.
 	 */
 
 	/*
 	 * Increase size of directory if entry eats into new space.
 	 * This should never push the size past a new multiple of
 	 * DIRBLKSIZE.
 	 *
 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
 	 */
 	if (dp->i_offset + dp->i_count > dp->i_size)
 		dp->i_size = dp->i_offset + dp->i_count;
 	/*
 	 * Get the block containing the space for the new directory entry.
 	 */
 	if ((error = ext2_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf,
 	    &bp)) != 0)
 		return (error);
 	/*
 	 * Find space for the new entry. In the simple case, the entry at
 	 * offset base will have the space. If it does not, then namei
 	 * arranged that compacting the region dp->i_offset to
 	 * dp->i_offset + dp->i_count would yield the
 	 * space.
 	 */
 	newentrysize = EXT2_DIR_REC_LEN(entry->e2d_namlen);
 	ep = (struct ext2fs_direct_2 *)dirbuf;
 	dsize = EXT2_DIR_REC_LEN(ep->e2d_namlen);
 	spacefree = le16toh(ep->e2d_reclen) - dsize;
 	for (loc = le16toh(ep->e2d_reclen); loc < dp->i_count; ) {
 		nep = (struct ext2fs_direct_2 *)(dirbuf + loc);
 		if (le32toh(ep->e2d_ino)) {
 			/* trim the existing slot */
 			ep->e2d_reclen = htole16(dsize);
 			ep = (struct ext2fs_direct_2 *)((char *)ep + dsize);
 		} else {
 			/* overwrite; nothing there; header is ours */
 			spacefree += dsize;
 		}
 		dsize = EXT2_DIR_REC_LEN(nep->e2d_namlen);
 		spacefree += le16toh(nep->e2d_reclen) - dsize;
 		loc += le16toh(nep->e2d_reclen);
 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
 	}
 	/*
 	 * Update the pointer fields in the previous entry (if any),
 	 * copy in the new entry, and write out the block.
 	 */
 	if (ep->e2d_ino == 0) {
 		if (spacefree + dsize < newentrysize)
 			panic("ext2_direnter: compact1");
 		entry->e2d_reclen = htole16(spacefree + dsize);
 	} else {
 		if (spacefree < newentrysize)
 			panic("ext2_direnter: compact2");
 		entry->e2d_reclen = htole16(spacefree);
 		ep->e2d_reclen = htole16(dsize);
 		ep = (struct ext2fs_direct_2 *)((char *)ep + dsize);
 	}
 	bcopy((caddr_t)entry, (caddr_t)ep, (u_int)newentrysize);
 	ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)bp->b_data);
 	if (DOINGASYNC(dvp)) {
 		bdwrite(bp);
 		error = 0;
 	} else {
 		error = bwrite(bp);
 	}
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Remove a directory entry after a call to namei, using
  * the parameters which it left in nameidata. The entry
  * dp->i_offset contains the offset into the directory of the
  * entry to be eliminated.  The dp->i_count field contains the
  * size of the previous record in the directory.  If this
  * is 0, the first entry is being deleted, so we need only
  * zero the inode number to mark the entry as free.  If the
  * entry is not the first in the directory, we must reclaim
  * the space of the now empty record by adding the record size
  * to the size of the previous entry.
  */
 int
 ext2_dirremove(struct vnode *dvp, struct componentname *cnp)
 {
 	struct inode *dp;
 	struct ext2fs_direct_2 *ep, *rep;
 	struct buf *bp;
 	int error;
 
 	dp = VTOI(dvp);
 	if (dp->i_count == 0) {
 		/*
 		 * First entry in block: set d_ino to zero.
 		 */
 		if ((error =
 		    ext2_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep,
 		    &bp)) != 0)
 			return (error);
 		ep->e2d_ino = 0;
 		ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)bp->b_data);
 		error = bwrite(bp);
 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (error);
 	}
 	/*
 	 * Collapse new free space into previous entry.
 	 */
 	if ((error = ext2_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count),
 	    (char **)&ep, &bp)) != 0)
 		return (error);
 
 	/* Set 'rep' to the entry being removed. */
 	if (dp->i_count == 0)
 		rep = ep;
 	else
 		rep = (struct ext2fs_direct_2 *)((char *)ep +
 		    le16toh(ep->e2d_reclen));
 	ep->e2d_reclen += rep->e2d_reclen;
 	ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)bp->b_data);
 	if (DOINGASYNC(dvp) && dp->i_count != 0)
 		bdwrite(bp);
 	else
 		error = bwrite(bp);
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Rewrite an existing directory entry to point at the inode
  * supplied.  The parameters describing the directory entry are
  * set up by a call to namei.
  */
 int
 ext2_dirrewrite(struct inode *dp, struct inode *ip, struct componentname *cnp)
 {
 	struct buf *bp;
 	struct ext2fs_direct_2 *ep;
 	struct vnode *vdp = ITOV(dp);
 	int error;
 
 	if ((error = ext2_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep,
 	    &bp)) != 0)
 		return (error);
 	ep->e2d_ino = htole32(ip->i_number);
 	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
 	    EXT2F_INCOMPAT_FTYPE))
 		ep->e2d_type = DTTOFT(IFTODT(ip->i_mode));
 	else
 		ep->e2d_type = EXT2_FT_UNKNOWN;
 	ext2_dirent_csum_set(dp, (struct ext2fs_direct_2 *)bp->b_data);
 	error = bwrite(bp);
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Check if a directory is empty or not.
  * Inode supplied must be locked.
  *
  * Using a struct dirtemplate here is not precisely
  * what we want, but better than using a struct direct.
  *
  * NB: does not handle corrupted directories.
  */
 int
 ext2_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred)
 {
 	off_t off;
 	struct dirtemplate dbuf;
 	struct ext2fs_direct_2 *dp = (struct ext2fs_direct_2 *)&dbuf;
 	int error, namlen;
 	ssize_t count;
 #define	MINDIRSIZ (sizeof(struct dirtemplate) / 2)
 
 	for (off = 0; off < ip->i_size; off += le16toh(dp->e2d_reclen)) {
 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
 		    off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred,
 		    NOCRED, &count, (struct thread *)0);
 		/*
 		 * Since we read MINDIRSIZ, residual must
 		 * be 0 unless we're at end of file.
 		 */
 		if (error || count != 0)
 			return (0);
 		/* avoid infinite loops */
 		if (dp->e2d_reclen == 0)
 			return (0);
 		/* skip empty entries */
 		if (dp->e2d_ino == 0)
 			continue;
 		/* accept only "." and ".." */
 		namlen = dp->e2d_namlen;
 		if (namlen > 2)
 			return (0);
 		if (dp->e2d_name[0] != '.')
 			return (0);
 		/*
 		 * At this point namlen must be 1 or 2.
 		 * 1 implies ".", 2 implies ".." if second
 		 * char is also "."
 		 */
 		if (namlen == 1)
 			continue;
 		if (dp->e2d_name[1] == '.' && le32toh(dp->e2d_ino) == parentino)
 			continue;
 		return (0);
 	}
 	return (1);
 }
 
 /*
  * Check if source directory is in the path of the target directory.
  * Target is supplied locked, source is unlocked.
  * The target is always vput before returning.
  */
 int
 ext2_checkpath(struct inode *source, struct inode *target, struct ucred *cred)
 {
 	struct vnode *vp;
 	int error, namlen;
 	struct dirtemplate dirbuf;
 
 	vp = ITOV(target);
 	if (target->i_number == source->i_number) {
 		error = EEXIST;
 		goto out;
 	}
 	if (target->i_number == EXT2_ROOTINO) {
 		error = 0;
 		goto out;
 	}
 
 	for (;;) {
 		if (vp->v_type != VDIR) {
 			error = ENOTDIR;
 			break;
 		}
 		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
 		    sizeof(struct dirtemplate), (off_t)0, UIO_SYSSPACE,
 		    IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL,
 		    NULL);
 		if (error != 0)
 			break;
 		namlen = dirbuf.dotdot_type;	/* like ufs little-endian */
 		if (namlen != 2 ||
 		    dirbuf.dotdot_name[0] != '.' ||
 		    dirbuf.dotdot_name[1] != '.') {
 			error = ENOTDIR;
 			break;
 		}
 		if (le32toh(dirbuf.dotdot_ino) == source->i_number) {
 			error = EINVAL;
 			break;
 		}
 		if (le32toh(dirbuf.dotdot_ino) == EXT2_ROOTINO)
 			break;
 		vput(vp);
 		if ((error = VFS_VGET(vp->v_mount, le32toh(dirbuf.dotdot_ino),
 		    LK_EXCLUSIVE, &vp)) != 0) {
 			vp = NULL;
 			break;
 		}
 	}
 
 out:
 	if (error == ENOTDIR)
 		SDT_PROBE2(ext2fs, , lookup, trace, 1,
 		    "checkpath: .. not a directory");
 	if (vp != NULL)
 		vput(vp);
 	return (error);
 }
diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c
index 9843fc16e6b2..b629ddefcd1a 100644
--- a/sys/fs/ext2fs/ext2_vnops.c
+++ b/sys/fs/ext2fs/ext2_vnops.c
@@ -1,2350 +1,2333 @@
 /*-
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_vnops.c	8.7 (Berkeley) 2/3/94
  *	@(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
  * $FreeBSD$
  */
 
 #include "opt_suiddir.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/limits.h>
 #include <sys/sdt.h>
 #include <sys/stat.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/endian.h>
 #include <sys/priv.h>
 #include <sys/rwlock.h>
 #include <sys/mount.h>
 #include <sys/unistd.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/lockf.h>
 #include <sys/event.h>
 #include <sys/conf.h>
 #include <sys/file.h>
 #include <sys/extattr.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 #include "opt_directio.h"
 
 #include <ufs/ufs/dir.h>
 
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/inode.h>
 #include <fs/ext2fs/ext2_acl.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_extern.h>
 #include <fs/ext2fs/ext2_dinode.h>
 #include <fs/ext2fs/ext2_dir.h>
 #include <fs/ext2fs/ext2_mount.h>
 #include <fs/ext2fs/ext2_extattr.h>
 #include <fs/ext2fs/ext2_extents.h>
 
 SDT_PROVIDER_DECLARE(ext2fs);
 /*
  * ext2fs trace probe:
  * arg0: verbosity. Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(ext2fs, , vnops, trace, "int", "char*");
 
 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
 static void ext2_itimes_locked(struct vnode *);
 
 static vop_access_t	ext2_access;
 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *);
 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *,
     struct thread *);
 static vop_close_t	ext2_close;
 static vop_create_t	ext2_create;
 static vop_fsync_t	ext2_fsync;
 static vop_getattr_t	ext2_getattr;
 static vop_ioctl_t	ext2_ioctl;
 static vop_link_t	ext2_link;
 static vop_mkdir_t	ext2_mkdir;
 static vop_mknod_t	ext2_mknod;
 static vop_open_t	ext2_open;
 static vop_pathconf_t	ext2_pathconf;
 static vop_print_t	ext2_print;
 static vop_read_t	ext2_read;
 static vop_readlink_t	ext2_readlink;
 static vop_remove_t	ext2_remove;
 static vop_rename_t	ext2_rename;
 static vop_rmdir_t	ext2_rmdir;
 static vop_setattr_t	ext2_setattr;
 static vop_strategy_t	ext2_strategy;
 static vop_symlink_t	ext2_symlink;
 static vop_write_t	ext2_write;
 static vop_deleteextattr_t	ext2_deleteextattr;
 static vop_getextattr_t	ext2_getextattr;
 static vop_listextattr_t	ext2_listextattr;
 static vop_setextattr_t	ext2_setextattr;
 static vop_vptofh_t	ext2_vptofh;
 static vop_close_t	ext2fifo_close;
 
 /* Global vfs data structures for ext2. */
 struct vop_vector ext2_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_access =		ext2_access,
 	.vop_bmap =		ext2_bmap,
 	.vop_cachedlookup =	ext2_lookup,
 	.vop_close =		ext2_close,
 	.vop_create =		ext2_create,
 	.vop_fsync =		ext2_fsync,
 	.vop_getpages =		vnode_pager_local_getpages,
 	.vop_getpages_async =	vnode_pager_local_getpages_async,
 	.vop_getattr =		ext2_getattr,
 	.vop_inactive =		ext2_inactive,
 	.vop_ioctl =		ext2_ioctl,
 	.vop_link =		ext2_link,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_mkdir =		ext2_mkdir,
 	.vop_mknod =		ext2_mknod,
 	.vop_open =		ext2_open,
 	.vop_pathconf =		ext2_pathconf,
 	.vop_poll =		vop_stdpoll,
 	.vop_print =		ext2_print,
 	.vop_read =		ext2_read,
 	.vop_readdir =		ext2_readdir,
 	.vop_readlink =		ext2_readlink,
 	.vop_reallocblks =	ext2_reallocblks,
 	.vop_reclaim =		ext2_reclaim,
 	.vop_remove =		ext2_remove,
 	.vop_rename =		ext2_rename,
 	.vop_rmdir =		ext2_rmdir,
 	.vop_setattr =		ext2_setattr,
 	.vop_strategy =		ext2_strategy,
 	.vop_symlink =		ext2_symlink,
 	.vop_write =		ext2_write,
 	.vop_deleteextattr =	ext2_deleteextattr,
 	.vop_getextattr =	ext2_getextattr,
 	.vop_listextattr =	ext2_listextattr,
 	.vop_setextattr =	ext2_setextattr,
 #ifdef UFS_ACL
 	.vop_getacl =		ext2_getacl,
 	.vop_setacl =		ext2_setacl,
 	.vop_aclcheck =		ext2_aclcheck,
 #endif /* UFS_ACL */
 	.vop_vptofh =		ext2_vptofh,
 };
 VFS_VOP_VECTOR_REGISTER(ext2_vnodeops);
 
 struct vop_vector ext2_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_access =		ext2_access,
 	.vop_close =		ext2fifo_close,
 	.vop_fsync =		ext2_fsync,
 	.vop_getattr =		ext2_getattr,
 	.vop_inactive =		ext2_inactive,
 	.vop_pathconf =		ext2_pathconf,
 	.vop_print =		ext2_print,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		ext2_reclaim,
 	.vop_setattr =		ext2_setattr,
 	.vop_write =		VOP_PANIC,
 	.vop_vptofh =		ext2_vptofh,
 };
 VFS_VOP_VECTOR_REGISTER(ext2_fifoops);
 
 /*
  * A virgin directory (no blushing please).
  * Note that the type and namlen fields are reversed relative to ext2.
  * Also, we don't use `struct odirtemplate', since it would just cause
  * endianness problems.
  */
 static struct dirtemplate mastertemplate = {
 	0, htole16(12), 1, EXT2_FT_DIR, ".",
 	0, htole16(DIRBLKSIZ - 12), 2, EXT2_FT_DIR, ".."
 };
 static struct dirtemplate omastertemplate = {
 	0, htole16(12), 1, EXT2_FT_UNKNOWN, ".",
 	0, htole16(DIRBLKSIZ - 12), 2, EXT2_FT_UNKNOWN, ".."
 };
 
 static void
 ext2_itimes_locked(struct vnode *vp)
 {
 	struct inode *ip;
 	struct timespec ts;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 
 	ip = VTOI(vp);
 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
 		return;
 	if ((vp->v_type == VBLK || vp->v_type == VCHR))
 		ip->i_flag |= IN_LAZYMOD;
 	else
 		ip->i_flag |= IN_MODIFIED;
 	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 		vfs_timestamp(&ts);
 		if (ip->i_flag & IN_ACCESS) {
 			ip->i_atime = ts.tv_sec;
 			ip->i_atimensec = ts.tv_nsec;
 		}
 		if (ip->i_flag & IN_UPDATE) {
 			ip->i_mtime = ts.tv_sec;
 			ip->i_mtimensec = ts.tv_nsec;
 			ip->i_modrev++;
 		}
 		if (ip->i_flag & IN_CHANGE) {
 			ip->i_ctime = ts.tv_sec;
 			ip->i_ctimensec = ts.tv_nsec;
 		}
 	}
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
 }
 
 void
 ext2_itimes(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	ext2_itimes_locked(vp);
 	VI_UNLOCK(vp);
 }
 
 /*
  * Create a regular file
  */
 static int
 ext2_create(struct vop_create_args *ap)
 {
 	int error;
 
 	error =
 	    ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
 	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
 	if (error != 0)
 		return (error);
 	if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
 	return (0);
 }
 
 static int
 ext2_open(struct vop_open_args *ap)
 {
 
 	if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
 		return (EPERM);
 
 	vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td);
 
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 static int
 ext2_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	VI_LOCK(vp);
 	if (vp->v_usecount > 1)
 		ext2_itimes_locked(vp);
 	VI_UNLOCK(vp);
 	return (0);
 }
 
 static int
 ext2_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	accmode_t accmode = ap->a_accmode;
 	int error;
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (accmode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 	}
 
 	/* If immutable bit set, nobody gets to write it. */
 	if ((accmode & VWRITE) && (ip->i_flags & SF_IMMUTABLE))
 		return (EPERM);
 
 	error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
 	    ap->a_accmode, ap->a_cred);
 	return (error);
 }
 
 static int
 ext2_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct vattr *vap = ap->a_vap;
 
 	ext2_itimes(vp);
 	/*
 	 * Copy from inode table
 	 */
 	vap->va_fsid = dev2udev(ip->i_devvp->v_rdev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode & ~IFMT;
 	vap->va_nlink = ip->i_nlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	vap->va_rdev = ip->i_rdev;
 	vap->va_size = ip->i_size;
 	vap->va_atime.tv_sec = ip->i_atime;
 	vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0;
 	vap->va_mtime.tv_sec = ip->i_mtime;
 	vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0;
 	vap->va_ctime.tv_sec = ip->i_ctime;
 	vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0;
 	if (E2DI_HAS_XTIME(ip)) {
 		vap->va_birthtime.tv_sec = ip->i_birthtime;
 		vap->va_birthtime.tv_nsec = ip->i_birthnsec;
 	}
 	vap->va_flags = ip->i_flags;
 	vap->va_gen = ip->i_gen;
 	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
 	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
 	vap->va_type = IFTOVT(ip->i_mode);
 	vap->va_filerev = ip->i_modrev;
 	return (0);
 }
 
 /*
  * Set attribute vnode op. called from several syscalls
  */
 static int
 ext2_setattr(struct vop_setattr_args *ap)
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	int error;
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		/* Disallow flags not supported by ext2fs. */
 		if (vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP))
 			return (EOPNOTSUPP);
 
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		/*
 		 * Callers may only modify the file flags on objects they
 		 * have VADMIN rights for.
 		 */
 		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 			return (error);
 		/*
 		 * Unprivileged processes and privileged processes in
 		 * jail() are not permitted to unset system flags, or
 		 * modify flags if any system flags are set.
 		 * Privileged non-jail processes may not modify system flags
 		 * if securelevel > 0 and any existing system flags are set.
 		 */
 		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) {
 			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) {
 				error = securelevel_gt(cred, 0);
 				if (error)
 					return (error);
 			}
 		} else {
 			if (ip->i_flags & (SF_IMMUTABLE | SF_APPEND) ||
 			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
 				return (EPERM);
 		}
 		ip->i_flags = vap->va_flags;
 		ip->i_flag |= IN_CHANGE;
 		if (ip->i_flags & (IMMUTABLE | APPEND))
 			return (0);
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 	/*
 	 * Go through the fields and update iff not VNOVAL.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred,
 		    td)) != 0)
 			return (error);
 	}
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket, fifo, or a block or
 		 * character device resident on the file system.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 		if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0)
 			return (error);
 	}
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		/*
 		 * From utimes(2):
 		 * If times is NULL, ... The caller must be the owner of
 		 * the file, have permission to write the file, or be the
 		 * super-user.
 		 * If times is non-NULL, ... The caller must be the owner of
 		 * the file or be the super-user.
 		 */
 		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
 		    (error = VOP_ACCESS(vp, VWRITE, cred, td))))
 			return (error);
 		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			ip->i_flag &= ~IN_ACCESS;
 			ip->i_atime = vap->va_atime.tv_sec;
 			ip->i_atimensec = vap->va_atime.tv_nsec;
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			ip->i_flag &= ~IN_UPDATE;
 			ip->i_mtime = vap->va_mtime.tv_sec;
 			ip->i_mtimensec = vap->va_mtime.tv_nsec;
 		}
 		if (E2DI_HAS_XTIME(ip) && vap->va_birthtime.tv_sec != VNOVAL) {
 			ip->i_birthtime = vap->va_birthtime.tv_sec;
 			ip->i_birthnsec = vap->va_birthtime.tv_nsec;
 		}
 		error = ext2_update(vp, 0);
 		if (error)
 			return (error);
 	}
 	error = 0;
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		error = ext2_chmod(vp, (int)vap->va_mode, cred, td);
 	}
 	return (error);
 }
 
 /*
  * Change the mode on a file.
  * Inode must be locked before calling.
  */
 static int
 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
 {
 	struct inode *ip = VTOI(vp);
 	int error;
 
 	/*
 	 * To modify the permissions on a file, must possess VADMIN
 	 * for that file.
 	 */
 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 		return (error);
 	/*
 	 * Privileged processes may set the sticky bit on non-directories,
 	 * as well as set the setgid bit on a file with a group that the
 	 * process is not a member of.
 	 */
 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
 		error = priv_check_cred(cred, PRIV_VFS_STICKYFILE);
 		if (error)
 			return (EFTYPE);
 	}
 	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
 		if (error)
 			return (error);
 	}
 	ip->i_mode &= ~ALLPERMS;
 	ip->i_mode |= (mode & ALLPERMS);
 	ip->i_flag |= IN_CHANGE;
 	return (0);
 }
 
 /*
  * Perform chown operation on inode ip;
  * inode must be locked prior to call.
  */
 static int
 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
     struct thread *td)
 {
 	struct inode *ip = VTOI(vp);
 	uid_t ouid;
 	gid_t ogid;
 	int error = 0;
 
 	if (uid == (uid_t)VNOVAL)
 		uid = ip->i_uid;
 	if (gid == (gid_t)VNOVAL)
 		gid = ip->i_gid;
 	/*
 	 * To modify the ownership of a file, must possess VADMIN
 	 * for that file.
 	 */
 	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 		return (error);
 	/*
 	 * To change the owner of a file, or change the group of a file
 	 * to a group of which we are not a member, the caller must
 	 * have privilege.
 	 */
 	if (uid != ip->i_uid || (gid != ip->i_gid &&
 	    !groupmember(gid, cred))) {
 		error = priv_check_cred(cred, PRIV_VFS_CHOWN);
 		if (error)
 			return (error);
 	}
 	ogid = ip->i_gid;
 	ouid = ip->i_uid;
 	ip->i_gid = gid;
 	ip->i_uid = uid;
 	ip->i_flag |= IN_CHANGE;
 	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID) != 0)
 			ip->i_mode &= ~(ISUID | ISGID);
 	}
 	return (0);
 }
 
 /*
  * Synch an open file.
  */
 /* ARGSUSED */
 static int
 ext2_fsync(struct vop_fsync_args *ap)
 {
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 
 	vop_stdfsync(ap);
 
 	return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT));
 }
 
 static int
 ext2_check_mknod_limits(dev_t dev)
 {
 	unsigned maj = major(dev);
 	unsigned min = minor(dev);
 
 	if (maj > EXT2_MAJOR_MAX || min > EXT2_MINOR_MAX)
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Mknod vnode call
  */
 /* ARGSUSED */
 static int
 ext2_mknod(struct vop_mknod_args *ap)
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp = ap->a_vpp;
 	struct inode *ip;
 	ino_t ino;
 	int error;
 
 	if (vap->va_rdev != VNOVAL) {
 		error = ext2_check_mknod_limits(vap->va_rdev);
 		if (error)
 			return (error);
 	}
 
 	error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
 	    ap->a_dvp, vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	ip = VTOI(*vpp);
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	if (vap->va_rdev != VNOVAL)
 		ip->i_rdev = vap->va_rdev;
 
 	/*
 	 * Remove inode, then reload it through VFS_VGET so it is
 	 * checked to see if it is an alias of an existing entry in
 	 * the inode cache.	 XXX I don't believe this is necessary now.
 	 */
 	(*vpp)->v_type = VNON;
 	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
 	vgone(*vpp);
 	vput(*vpp);
 	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
 	if (error) {
 		*vpp = NULL;
 		return (error);
 	}
 	return (0);
 }
 
 static int
 ext2_remove(struct vop_remove_args *ap)
 {
 	struct inode *ip;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	int error;
 
 	ip = VTOI(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 	error = ext2_dirremove(dvp, ap->a_cnp);
 	if (error == 0) {
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 	}
 out:
 	return (error);
 }
 
 /*
  * link vnode call
  */
 static int
 ext2_link(struct vop_link_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip;
 	int error;
 
-#ifdef INVARIANTS
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("ext2_link: no name");
-#endif
 	ip = VTOI(vp);
 	if ((nlink_t)ip->i_nlink >= EXT4_LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 	ip->i_nlink++;
 	ip->i_flag |= IN_CHANGE;
 	error = ext2_update(vp, !DOINGASYNC(vp));
 	if (!error)
 		error = ext2_direnter(ip, tdvp, cnp);
 	if (error) {
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 	}
 out:
 	return (error);
 }
 
 static int
 ext2_inc_nlink(struct inode *ip)
 {
 
 	ip->i_nlink++;
 
 	if (S_ISDIR(ip->i_mode) &&
 	    EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK) &&
 	    ip->i_nlink > 1) {
 		if (ip->i_nlink >= EXT4_LINK_MAX || ip->i_nlink == 2)
 			ip->i_nlink = 1;
 	} else if (ip->i_nlink > EXT4_LINK_MAX) {
 		ip->i_nlink--;
 		return (EMLINK);
 	}
 
 	return (0);
 }
 
 static void
 ext2_dec_nlink(struct inode *ip)
 {
 
 	if (!S_ISDIR(ip->i_mode) || ip->i_nlink > 2)
 		ip->i_nlink--;
 }
 
 /*
  * Rename system call.
  * 	rename("foo", "bar");
  * is essentially
  *	unlink("bar");
  *	link("foo", "bar");
  *	unlink("foo");
  * but ``atomically''.  Can't do full commit without saving state in the
  * inode on disk which isn't feasible at this time.  Best we can do is
  * always guarantee the target exists.
  *
  * Basic algorithm is:
  *
  * 1) Bump link count on source while we're linking it to the
  *    target.  This also ensure the inode won't be deleted out
  *    from underneath us while we work (it may be truncated by
  *    a concurrent `trunc' or `open' for creation).
  * 2) Link source to destination.  If destination already exists,
  *    delete it first.
  * 3) Unlink source reference to inode if still around. If a
  *    directory was moved and the parent of the destination
  *    is different from the source, patch the ".." entry in the
  *    directory.
  */
 static int
 ext2_rename(struct vop_rename_args *ap)
 {
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct inode *ip, *xp, *dp;
 	struct dirtemplate *dirbuf;
 	int doingdirectory = 0, oldparent = 0, newparent = 0;
 	int error = 0;
 	u_char namlen;
 
-#ifdef INVARIANTS
-	if ((tcnp->cn_flags & HASBUF) == 0 ||
-	    (fcnp->cn_flags & HASBUF) == 0)
-		panic("ext2_rename: no name");
-#endif
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 abortit:
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		vrele(fdvp);
 		vrele(fvp);
 		return (error);
 	}
 
 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
 		goto abortit;
 	}
 
 	/*
 	 * Renaming a file to itself has no effect.  The upper layers should
 	 * not call us in that case.  Temporarily just warn if they do.
 	 */
 	if (fvp == tvp) {
 		SDT_PROBE2(ext2fs, , vnops, trace, 1,
 		    "rename: fvp == tvp (can't happen)");
 		error = 0;
 		goto abortit;
 	}
 
 	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
 		goto abortit;
 	dp = VTOI(fdvp);
 	ip = VTOI(fvp);
 	if (ip->i_nlink >= EXT4_LINK_MAX &&
 	    !EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK)) {
 		VOP_UNLOCK(fvp);
 		error = EMLINK;
 		goto abortit;
 	}
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (dp->i_flags & APPEND)) {
 		VOP_UNLOCK(fvp);
 		error = EPERM;
 		goto abortit;
 	}
 	if ((ip->i_mode & IFMT) == IFDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
 		    (ip->i_flag & IN_RENAME)) {
 			VOP_UNLOCK(fvp);
 			error = EINVAL;
 			goto abortit;
 		}
 		ip->i_flag |= IN_RENAME;
 		oldparent = dp->i_number;
 		doingdirectory++;
 	}
 	vrele(fdvp);
 
 	/*
 	 * When the target exists, both the directory
 	 * and target vnodes are returned locked.
 	 */
 	dp = VTOI(tdvp);
 	xp = NULL;
 	if (tvp)
 		xp = VTOI(tvp);
 
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
 	 *    completing our work, the link count
 	 *    may be wrong, but correctable.
 	 */
 	ext2_inc_nlink(ip);
 	ip->i_flag |= IN_CHANGE;
 	if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) {
 		VOP_UNLOCK(fvp);
 		goto bad;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory hierarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". We must repeat the call
 	 * to namei, as the parent directory is unlocked by the
 	 * call to checkpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread);
 	VOP_UNLOCK(fvp);
 	if (oldparent != dp->i_number)
 		newparent = dp->i_number;
 	if (doingdirectory && newparent) {
 		if (error)	/* write access check above */
 			goto bad;
 		if (xp != NULL)
 			vput(tvp);
 		error = ext2_checkpath(ip, dp, tcnp->cn_cred);
 		if (error)
 			goto out;
 		VREF(tdvp);
 		error = vfs_relookup(tdvp, &tvp, tcnp);
 		if (error)
 			goto out;
 		vrele(tdvp);
 		dp = VTOI(tdvp);
 		xp = NULL;
 		if (tvp)
 			xp = VTOI(tvp);
 	}
 	/*
 	 * 2) If target doesn't exist, link the target
 	 *    to the source and unlink the source.
 	 *    Otherwise, rewrite the target directory
 	 *    entry to reference the source inode and
 	 *    expunge the original entry's existence.
 	 */
 	if (xp == NULL) {
 		if (dp->i_devvp != ip->i_devvp)
 			panic("ext2_rename: EXDEV");
 		/*
 		 * Account for ".." in new directory.
 		 * When source and destination have the same
 		 * parent we don't fool with the link count.
 		 */
 		if (doingdirectory && newparent) {
 			error = ext2_inc_nlink(dp);
 			if (error)
 				goto bad;
 
 			dp->i_flag |= IN_CHANGE;
 			error = ext2_update(tdvp, !DOINGASYNC(tdvp));
 			if (error)
 				goto bad;
 		}
 		error = ext2_direnter(ip, tdvp, tcnp);
 		if (error) {
 			if (doingdirectory && newparent) {
 				ext2_dec_nlink(dp);
 				dp->i_flag |= IN_CHANGE;
 				(void)ext2_update(tdvp, 1);
 			}
 			goto bad;
 		}
 		vput(tdvp);
 	} else {
 		if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp)
 			panic("ext2_rename: EXDEV");
 		/*
 		 * Short circuit rename(foo, foo).
 		 */
 		if (xp->i_number == ip->i_number)
 			panic("ext2_rename: same file");
 		/*
 		 * If the parent directory is "sticky", then the user must
 		 * own the parent directory, or the destination of the rename,
 		 * otherwise the destination may not be changed (except by
 		 * root). This implements append-only directories.
 		 */
 		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
 		    tcnp->cn_cred->cr_uid != dp->i_uid &&
 		    xp->i_uid != tcnp->cn_cred->cr_uid) {
 			error = EPERM;
 			goto bad;
 		}
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((xp->i_mode & IFMT) == IFDIR) {
 			if (!ext2_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
 		error = ext2_dirrewrite(dp, ip, tcnp);
 		if (error)
 			goto bad;
 		/*
 		 * If the target directory is in the same
 		 * directory as the source directory,
 		 * decrement the link count on the parent
 		 * of the target directory.
 		 */
 		if (doingdirectory && !newparent) {
 			ext2_dec_nlink(dp);
 			dp->i_flag |= IN_CHANGE;
 		}
 		vput(tdvp);
 		/*
 		 * Adjust the link count of the target to
 		 * reflect the dirrewrite above.  If this is
 		 * a directory it is empty and there are
 		 * no links to it, so we can squash the inode and
 		 * any space associated with it.  We disallowed
 		 * renaming over top of a directory with links to
 		 * it above, as the remaining link would point to
 		 * a directory without "." or ".." entries.
 		 */
 		ext2_dec_nlink(xp);
 		if (doingdirectory) {
 			if (xp->i_nlink > 2)
 				panic("ext2_rename: linked directory");
 			error = ext2_truncate(tvp, (off_t)0, IO_SYNC,
 			    tcnp->cn_cred, curthread);
 			xp->i_nlink = 0;
 		}
 		xp->i_flag |= IN_CHANGE;
 		vput(tvp);
 		xp = NULL;
 	}
 
 	/*
 	 * 3) Unlink the source.
 	 */
 	fcnp->cn_flags &= ~MODMASK;
 	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 	VREF(fdvp);
 	error = vfs_relookup(fdvp, &fvp, fcnp);
 	if (error == 0)
 		vrele(fdvp);
 	if (fvp != NULL) {
 		xp = VTOI(fvp);
 		dp = VTOI(fdvp);
 	} else {
 		/*
 		 * From name has disappeared.  IN_RENAME is not sufficient
 		 * to protect against directory races due to timing windows,
 		 * so we can't panic here.
 		 */
 		vrele(ap->a_fvp);
 		return (0);
 	}
 	/*
 	 * Ensure that the directory entry still exists and has not
 	 * changed while the new name has been entered. If the source is
 	 * a file then the entry may have been unlinked or renamed. In
 	 * either case there is no further work to be done. If the source
 	 * is a directory then it cannot have been rmdir'ed; its link
 	 * count of three would cause a rmdir to fail with ENOTEMPTY.
 	 * The IN_RENAME flag ensures that it cannot be moved by another
 	 * rename.
 	 */
 	if (xp != ip) {
 		/*
 		 * From name resolves to a different inode.  IN_RENAME is
 		 * not sufficient protection against timing window races
 		 * so we can't panic here.
 		 */
 	} else {
 		/*
 		 * If the source is a directory with a
 		 * new parent, the link count of the old
 		 * parent directory must be decremented
 		 * and ".." set to point to the new parent.
 		 */
 		if (doingdirectory && newparent) {
 			ext2_dec_nlink(dp);
 			dp->i_flag |= IN_CHANGE;
 			dirbuf = malloc(dp->i_e2fs->e2fs_bsize, M_TEMP, M_WAITOK | M_ZERO);
 			error = vn_rdwr(UIO_READ, fvp, (caddr_t)dirbuf,
 			    ip->i_e2fs->e2fs_bsize, (off_t)0,
 			    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 			    tcnp->cn_cred, NOCRED, NULL, NULL);
 			if (error == 0) {
 				/* Like ufs little-endian: */
 				namlen = dirbuf->dotdot_type;
 				if (namlen != 2 ||
 				    dirbuf->dotdot_name[0] != '.' ||
 				    dirbuf->dotdot_name[1] != '.') {
 					ext2_dirbad(xp, (doff_t)12,
 					    "rename: mangled dir");
 				} else {
 					dirbuf->dotdot_ino = htole32(newparent);
 					/*
 					 * dirblock 0 could be htree root,
 					 * try both csum update functions.
 					 */
 					ext2_dirent_csum_set(ip,
 					    (struct ext2fs_direct_2 *)dirbuf);
 					ext2_dx_csum_set(ip,
 					    (struct ext2fs_direct_2 *)dirbuf);
 					(void)vn_rdwr(UIO_WRITE, fvp,
 					    (caddr_t)dirbuf,
 					    ip->i_e2fs->e2fs_bsize,
 					    (off_t)0, UIO_SYSSPACE,
 					    IO_NODELOCKED | IO_SYNC |
 					    IO_NOMACCHECK, tcnp->cn_cred,
 					    NOCRED, NULL, NULL);
 					cache_purge(fdvp);
 				}
 			}
 			free(dirbuf, M_TEMP);
 		}
 		error = ext2_dirremove(fdvp, fcnp);
 		if (!error) {
 			ext2_dec_nlink(xp);
 			xp->i_flag |= IN_CHANGE;
 		}
 		xp->i_flag &= ~IN_RENAME;
 	}
 	if (dp)
 		vput(fdvp);
 	if (xp)
 		vput(fvp);
 	vrele(ap->a_fvp);
 	return (error);
 
 bad:
 	if (xp)
 		vput(ITOV(xp));
 	vput(ITOV(dp));
 out:
 	if (doingdirectory)
 		ip->i_flag &= ~IN_RENAME;
 	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
 		ext2_dec_nlink(ip);
 		ip->i_flag |= IN_CHANGE;
 		ip->i_flag &= ~IN_RENAME;
 		vput(fvp);
 	} else
 		vrele(fvp);
 	return (error);
 }
 
 #ifdef UFS_ACL
 static int
 ext2_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
     mode_t dmode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct inode *ip = VTOI(tvp);
 	struct acl *dacl, *acl;
 
 	acl = acl_alloc(M_WAITOK);
 	dacl = acl_alloc(M_WAITOK);
 
 	/*
 	 * Retrieve default ACL from parent, if any.
 	 */
 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 	switch (error) {
 	case 0:
 		/*
 		 * Retrieved a default ACL, so merge mode and ACL if
 		 * necessary.  If the ACL is empty, fall through to
 		 * the "not defined or available" case.
 		 */
 		if (acl->acl_cnt != 0) {
 			dmode = acl_posix1e_newfilemode(dmode, acl);
 			ip->i_mode = dmode;
 			*dacl = *acl;
 			ext2_sync_acl_from_inode(ip, acl);
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case EOPNOTSUPP:
 		/*
 		 * Just use the mode as-is.
 		 */
 		ip->i_mode = dmode;
 		error = 0;
 		goto out;
 
 	default:
 		goto out;
 	}
 
 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 	if (error == 0)
 		error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
 	switch (error) {
 	case 0:
 		break;
 
 	case EOPNOTSUPP:
 		/*
 		 * XXX: This should not happen, as EOPNOTSUPP above
 		 * was supposed to free acl.
 		 */
 #ifdef DEBUG
 		printf("ext2_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
 #endif	/* DEBUG */
 		break;
 
 	default:
 		goto out;
 	}
 
 out:
 	acl_free(acl);
 	acl_free(dacl);
 
 	return (error);
 }
 
 static int
 ext2_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
     mode_t mode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct inode *ip = VTOI(tvp);
 	struct acl *acl;
 
 	acl = acl_alloc(M_WAITOK);
 
 	/*
 	 * Retrieve default ACL for parent, if any.
 	 */
 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 	switch (error) {
 	case 0:
 		/*
 		 * Retrieved a default ACL, so merge mode and ACL if
 		 * necessary.
 		 */
 		if (acl->acl_cnt != 0) {
 			/*
 			 * Two possible ways for default ACL to not
 			 * be present.  First, the EA can be
 			 * undefined, or second, the default ACL can
 			 * be blank.  If it's blank, fall through to
 			 * the it's not defined case.
 			 */
 			mode = acl_posix1e_newfilemode(mode, acl);
 			ip->i_mode = mode;
 			ext2_sync_acl_from_inode(ip, acl);
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case EOPNOTSUPP:
 		/*
 		 * Just use the mode as-is.
 		 */
 		ip->i_mode = mode;
 		error = 0;
 		goto out;
 
 	default:
 		goto out;
 	}
 
 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 	switch (error) {
 	case 0:
 		break;
 
 	case EOPNOTSUPP:
 		/*
 		 * XXX: This should not happen, as EOPNOTSUPP above was
 		 * supposed to free acl.
 		 */
 		printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 		    "but no VOP_SETACL()\n");
 		/* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 		    "but no VOP_SETACL()"); */
 		break;
 
 	default:
 		goto out;
 	}
 
 out:
 	acl_free(acl);
 
 	return (error);
 }
 
 #endif /* UFS_ACL */
 
 /*
  * Mkdir system call
  */
 static int
 ext2_mkdir(struct vop_mkdir_args *ap)
 {
 	struct m_ext2fs *fs;
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	struct vnode *tvp;
 	struct dirtemplate dirtemplate, *dtp;
 	char *buf = NULL;
 	int error, dmode;
 
-#ifdef INVARIANTS
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("ext2_mkdir: no name");
-#endif
 	dp = VTOI(dvp);
 	if ((nlink_t)dp->i_nlink >= EXT4_LINK_MAX &&
 	    !EXT2_HAS_RO_COMPAT_FEATURE(dp->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK)) {
 		error = EMLINK;
 		goto out;
 	}
 	dmode = vap->va_mode & 0777;
 	dmode |= IFDIR;
 	/*
 	 * Must simulate part of ext2_makeinode here to acquire the inode,
 	 * but not have it entered in the parent directory. The entry is
 	 * made later after writing "." and ".." entries.
 	 */
 	error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
 	ip = VTOI(tvp);
 	fs = ip->i_e2fs;
 	ip->i_gid = dp->i_gid;
 #ifdef SUIDDIR
 	{
 		/*
 		 * if we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TOO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * The new directory also inherits the SUID bit.
 		 * If user's UID and dir UID are the same,
 		 * 'give it away' so that the SUID is still forced on.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (dp->i_mode & ISUID) && dp->i_uid) {
 			dmode |= ISUID;
 			ip->i_uid = dp->i_uid;
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 		}
 	}
 #else
 	ip->i_uid = cnp->cn_cred->cr_uid;
 #endif
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = dmode;
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
 	ip->i_nlink = 2;
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_flags |= UF_OPAQUE;
 	error = ext2_update(tvp, 1);
 
 	/*
 	 * Bump link count in parent directory
 	 * to reflect work done below.  Should
 	 * be done before reference is created
 	 * so reparation is possible if we crash.
 	 */
 	ext2_inc_nlink(dp);
 	dp->i_flag |= IN_CHANGE;
 	error = ext2_update(dvp, !DOINGASYNC(dvp));
 	if (error)
 		goto bad;
 
 	/* Initialize directory with "." and ".." from static template. */
 	if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
 	    EXT2F_INCOMPAT_FTYPE))
 		dtp = &mastertemplate;
 	else
 		dtp = &omastertemplate;
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = htole32(ip->i_number);
 	dirtemplate.dotdot_ino = htole32(dp->i_number);
 	/*
 	 * note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE so let's
 	 * just redefine it - for this function only
 	 */
 #undef  DIRBLKSIZ
 #define DIRBLKSIZ  VTOI(dvp)->i_e2fs->e2fs_bsize
 	dirtemplate.dotdot_reclen = htole16(DIRBLKSIZ - 12);
 	buf = malloc(DIRBLKSIZ, M_TEMP, M_WAITOK | M_ZERO);
 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
 		dirtemplate.dotdot_reclen =
 		    htole16(le16toh(dirtemplate.dotdot_reclen) -
 		    sizeof(struct ext2fs_direct_tail));
 		ext2_init_dirent_tail(EXT2_DIRENT_TAIL(buf, DIRBLKSIZ));
 	}
 	memcpy(buf, &dirtemplate, sizeof(dirtemplate));
 	ext2_dirent_csum_set(ip, (struct ext2fs_direct_2 *)buf);
 	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)buf,
 	    DIRBLKSIZ, (off_t)0, UIO_SYSSPACE,
 	    IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED,
 	    NULL, NULL);
 	if (error) {
 		ext2_dec_nlink(dp);
 		dp->i_flag |= IN_CHANGE;
 		goto bad;
 	}
 	if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
 		/* XXX should grow with balloc() */
 		panic("ext2_mkdir: blksize");
 	else {
 		ip->i_size = DIRBLKSIZ;
 		ip->i_flag |= IN_CHANGE;
 	}
 
 #ifdef UFS_ACL
 	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 		error = ext2_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
 		    cnp->cn_cred, curthread);
 		if (error)
 			goto bad;
 	}
 
 #endif /* UFS_ACL */
 
 	/* Directory set up, now install its entry in the parent directory. */
 	error = ext2_direnter(ip, dvp, cnp);
 	if (error) {
 		ext2_dec_nlink(dp);
 		dp->i_flag |= IN_CHANGE;
 	}
 bad:
 	/*
 	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
 	 * for us because we set the link count to 0.
 	 */
 	if (error) {
 		ip->i_nlink = 0;
 		ip->i_flag |= IN_CHANGE;
 		vput(tvp);
 	} else
 		*ap->a_vpp = tvp;
 out:
 	free(buf, M_TEMP);
 	return (error);
 #undef  DIRBLKSIZ
 #define DIRBLKSIZ  DEV_BSIZE
 }
 
 /*
  * Rmdir system call.
  */
 static int
 ext2_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	int error;
 
 	ip = VTOI(vp);
 	dp = VTOI(dvp);
 
 	/*
 	 * Verify the directory is empty (and valid).
 	 * (Rmdir ".." won't be valid since
 	 *  ".." will contain a reference to
 	 *  the current directory and thus be
 	 *  non-empty.)
 	 */
 	if (!ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	if ((dp->i_flags & APPEND)
 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 	/*
 	 * Delete reference to directory before purging
 	 * inode.  If we crash in between, the directory
 	 * will be reattached to lost+found,
 	 */
 	error = ext2_dirremove(dvp, cnp);
 	if (error)
 		goto out;
 	ext2_dec_nlink(dp);
 	dp->i_flag |= IN_CHANGE;
 	cache_purge(dvp);
 	VOP_UNLOCK(dvp);
 	/*
 	 * Truncate inode.  The only stuff left
 	 * in the directory is "." and "..".
 	 */
 	ip->i_nlink = 0;
 	error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
 	    curthread);
 	cache_purge(ITOV(ip));
 	if (vn_lock(dvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(vp);
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 out:
 	return (error);
 }
 
 /*
  * symlink -- make a symbolic link
  */
 static int
 ext2_symlink(struct vop_symlink_args *ap)
 {
 	struct vnode *vp, **vpp = ap->a_vpp;
 	struct inode *ip;
 	int len, error;
 
 	error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 	    vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	vp = *vpp;
 	len = strlen(ap->a_target);
 	if (len < VFSTOEXT2(vp->v_mount)->um_e2fs->e2fs_maxsymlinklen) {
 		ip = VTOI(vp);
 		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
 		ip->i_size = len;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	} else
 		error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target),
 		    len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
 	if (error)
 		vput(vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  */
 static int
 ext2_readlink(struct vop_readlink_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	int isize;
 
 	isize = ip->i_size;
 	if (isize < VFSTOEXT2(vp->v_mount)->um_e2fs->e2fs_maxsymlinklen) {
 		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
 		return (0);
 	}
 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  *
  * In order to be able to swap to a file, the ext2_bmaparray() operation may not
  * deadlock on memory.  See ext2_bmap() for details.
  */
 static int
 ext2_strategy(struct vop_strategy_args *ap)
 {
 	struct buf *bp = ap->a_bp;
 	struct vnode *vp = ap->a_vp;
 	struct bufobj *bo;
 	daddr_t blkno;
 	int error;
 
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		panic("ext2_strategy: spec");
 	if (bp->b_blkno == bp->b_lblkno) {
 		if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS)
 			error = ext4_bmapext(vp, bp->b_lblkno, &blkno, NULL, NULL);
 		else
 			error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL);
 
 		bp->b_blkno = blkno;
 		if (error) {
 			bp->b_error = error;
 			bp->b_ioflags |= BIO_ERROR;
 			bufdone(bp);
 			return (0);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		bufdone(bp);
 		return (0);
 	}
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bo = VFSTOEXT2(vp->v_mount)->um_bo;
 	BO_STRATEGY(bo, bp);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 static int
 ext2_print(struct vop_print_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 
 	vn_printf(ip->i_devvp, "\tino %ju", (uintmax_t)ip->i_number);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the inode then do device close.
  */
 static int
 ext2fifo_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	VI_LOCK(vp);
 	if (vp->v_usecount > 1)
 		ext2_itimes_locked(vp);
 	VI_UNLOCK(vp);
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
  * Return POSIX pathconf information applicable to ext2 filesystems.
  */
 static int
 ext2_pathconf(struct vop_pathconf_args *ap)
 {
 	int error = 0;
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		if (EXT2_HAS_RO_COMPAT_FEATURE(VTOI(ap->a_vp)->i_e2fs,
 		    EXT2F_ROCOMPAT_DIR_NLINK))
 			*ap->a_retval = INT_MAX;
 		else
 			*ap->a_retval = EXT4_LINK_MAX;
 		break;
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		break;
 	case _PC_PIPE_BUF:
 		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
 			*ap->a_retval = PIPE_BUF;
 		else
 			error = EINVAL;
 		break;
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		break;
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		break;
 
 #ifdef UFS_ACL
 	case _PC_ACL_EXTENDED:
 		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 	case _PC_ACL_PATH_MAX:
 		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
 			*ap->a_retval = ACL_MAX_ENTRIES;
 		else
 			*ap->a_retval = 3;
 		break;
 #endif /* UFS_ACL */
 
 	case _PC_MIN_HOLE_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_PRIO_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_SYNC_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ALLOC_SIZE_MIN:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
 		break;
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		break;
 	case _PC_REC_INCR_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_MAX_XFER_SIZE:
 		*ap->a_retval = -1;	/* means ``unlimited'' */
 		break;
 	case _PC_REC_MIN_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_XFER_ALIGN:
 		*ap->a_retval = PAGE_SIZE;
 		break;
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		break;
 
 	default:
 		error = vop_stdpathconf(ap);
 		break;
 	}
 	return (error);
 }
 
 /*
  * Vnode operation to remove a named attribute.
  */
 static int
 ext2_deleteextattr(struct vop_deleteextattr_args *ap)
 {
 	struct inode *ip;
 	struct m_ext2fs *fs;
 	int error;
 
 	ip = VTOI(ap->a_vp);
 	fs = ip->i_e2fs;
 
 	if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR))
 		return (EOPNOTSUPP);
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error)
 		return (error);
 
 	error = ENOATTR;
 
 	if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) {
 		error = ext2_extattr_inode_delete(ip, ap->a_attrnamespace, ap->a_name);
 		if (error != ENOATTR)
 			return (error);
 	}
 
 	if (ip->i_facl)
 		error = ext2_extattr_block_delete(ip, ap->a_attrnamespace, ap->a_name);
 
 	return (error);
 }
 
 /*
  * Vnode operation to retrieve a named extended attribute.
  */
 static int
 ext2_getextattr(struct vop_getextattr_args *ap)
 {
 	struct inode *ip;
 	struct m_ext2fs *fs;
 	int error;
 
 	ip = VTOI(ap->a_vp);
 	fs = ip->i_e2fs;
 
 	if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR))
 		return (EOPNOTSUPP);
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error)
 		return (error);
 
 	if (ap->a_size != NULL)
 		*ap->a_size = 0;
 
 	error = ENOATTR;
 
 	if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) {
 		error = ext2_extattr_inode_get(ip, ap->a_attrnamespace,
 		    ap->a_name, ap->a_uio, ap->a_size);
 		if (error != ENOATTR)
 			return (error);
 	}
 
 	if (ip->i_facl)
 		error = ext2_extattr_block_get(ip, ap->a_attrnamespace,
 		    ap->a_name, ap->a_uio, ap->a_size);
 
 	return (error);
 }
 
 /*
  * Vnode operation to retrieve extended attributes on a vnode.
  */
 static int
 ext2_listextattr(struct vop_listextattr_args *ap)
 {
 	struct inode *ip;
 	struct m_ext2fs *fs;
 	int error;
 
 	ip = VTOI(ap->a_vp);
 	fs = ip->i_e2fs;
 
 	if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR))
 		return (EOPNOTSUPP);
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error)
 		return (error);
 
 	if (ap->a_size != NULL)
 		*ap->a_size = 0;
 
 	if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) {
 		error = ext2_extattr_inode_list(ip, ap->a_attrnamespace,
 		    ap->a_uio, ap->a_size);
 		if (error)
 			return (error);
 	}
 
 	if (ip->i_facl)
 		error = ext2_extattr_block_list(ip, ap->a_attrnamespace,
 		    ap->a_uio, ap->a_size);
 
 	return (error);
 }
 
 /*
  * Vnode operation to set a named attribute.
  */
 static int
 ext2_setextattr(struct vop_setextattr_args *ap)
 {
 	struct inode *ip;
 	struct m_ext2fs *fs;
 	int error;
 
 	ip = VTOI(ap->a_vp);
 	fs = ip->i_e2fs;
 
 	if (!EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_EXT_ATTR))
 		return (EOPNOTSUPP);
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error)
 		return (error);
 
 	error = ext2_extattr_valid_attrname(ap->a_attrnamespace, ap->a_name);
 	if (error)
 		return (error);
 
 	if (EXT2_INODE_SIZE(fs) != E2FS_REV0_INODE_SIZE) {
 		error = ext2_extattr_inode_set(ip, ap->a_attrnamespace,
 		    ap->a_name, ap->a_uio);
 		if (error != ENOSPC)
 			return (error);
 	}
 
 	error = ext2_extattr_block_set(ip, ap->a_attrnamespace,
 	    ap->a_name, ap->a_uio);
 
 	return (error);
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 static int
 ext2_vptofh(struct vop_vptofh_args *ap)
 {
 	struct inode *ip;
 	struct ufid *ufhp;
 
 	ip = VTOI(ap->a_vp);
 	ufhp = (struct ufid *)ap->a_fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 /*
  * Initialize the vnode associated with a new inode, handle aliased
  * vnodes.
  */
 int
 ext2_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp)
 {
 	struct inode *ip;
 	struct vnode *vp;
 
 	vp = *vpp;
 	ip = VTOI(vp);
 	vp->v_type = IFTOVT(ip->i_mode);
 	/*
 	 * Only unallocated inodes should be of type VNON.
 	 */
 	if (ip->i_mode != 0 && vp->v_type == VNON)
 		return (EINVAL);
 	if (vp->v_type == VFIFO)
 		vp->v_op = fifoops;
 
 	if (ip->i_number == EXT2_ROOTINO)
 		vp->v_vflag |= VV_ROOT;
 	ip->i_modrev = init_va_filerev();
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Allocate a new inode.
  */
 static int
 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp)
 {
 	struct inode *ip, *pdir;
 	struct vnode *tvp;
 	int error;
 
 	pdir = VTOI(dvp);
-#ifdef INVARIANTS
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("ext2_makeinode: no name");
-#endif
 	*vpp = NULL;
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
 	error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp);
 	if (error) {
 		return (error);
 	}
 	ip = VTOI(tvp);
 	ip->i_gid = pdir->i_gid;
 #ifdef SUIDDIR
 	{
 		/*
 		 * if we are
 		 * not the owner of the directory,
 		 * and we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TOO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * Note that this drops off the execute bits for security.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (pdir->i_mode & ISUID) &&
 		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 			ip->i_uid = pdir->i_uid;
 			mode &= ~07111;
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 		}
 	}
 #else
 	ip->i_uid = cnp->cn_cred->cr_uid;
 #endif
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = mode;
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
 	ip->i_nlink = 1;
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) {
 		if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID))
 			ip->i_mode &= ~ISGID;
 	}
 
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_flags |= UF_OPAQUE;
 
 	/*
 	 * Make sure inode goes to disk before directory entry.
 	 */
 	error = ext2_update(tvp, !DOINGASYNC(tvp));
 	if (error)
 		goto bad;
 
 #ifdef UFS_ACL
 	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 		error = ext2_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
 		    cnp->cn_cred, curthread);
 		if (error)
 			goto bad;
 	}
 #endif /* UFS_ACL */
 
 	error = ext2_direnter(ip, dvp, cnp);
 	if (error)
 		goto bad;
 
 	*vpp = tvp;
 	return (0);
 
 bad:
 	/*
 	 * Write error occurred trying to update the inode
 	 * or the directory so must deallocate the inode.
 	 */
 	ip->i_nlink = 0;
 	ip->i_flag |= IN_CHANGE;
 	vput(tvp);
 	return (error);
 }
 
 /*
  * Vnode op for reading.
  */
 static int
 ext2_read(struct vop_read_args *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 	struct uio *uio;
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	daddr_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
 	int error, orig_resid, seqcount;
 	int ioflag;
 
 	vp = ap->a_vp;
 	uio = ap->a_uio;
 	ioflag = ap->a_ioflag;
 
 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 	ip = VTOI(vp);
 
 #ifdef INVARIANTS
 	if (uio->uio_rw != UIO_READ)
 		panic("%s: mode", "ext2_read");
 
 	if (vp->v_type == VLNK) {
 		if ((int)ip->i_size <
 		    VFSTOEXT2(vp->v_mount)->um_e2fs->e2fs_maxsymlinklen)
 			panic("%s: short symlink", "ext2_read");
 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
 		panic("%s: type %d", "ext2_read", vp->v_type);
 #endif
 	orig_resid = uio->uio_resid;
 	KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
 	if (orig_resid == 0)
 		return (0);
 	KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
 	fs = ip->i_e2fs;
 	if (uio->uio_offset < ip->i_size &&
 	    uio->uio_offset >= fs->e2fs_maxfilesize)
 		return (EOVERFLOW);
 
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
 			break;
 		lbn = lblkno(fs, uio->uio_offset);
 		nextlbn = lbn + 1;
 		size = blksize(fs, ip, lbn);
 		blkoffset = blkoff(fs, uio->uio_offset);
 
 		xfersize = fs->e2fs_fsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 		if (bytesinfile < xfersize)
 			xfersize = bytesinfile;
 
 		if (lblktosize(fs, nextlbn) >= ip->i_size)
 			error = bread(vp, lbn, size, NOCRED, &bp);
 		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, ip->i_size, lbn, size,
 			    NOCRED, blkoffset + uio->uio_resid, seqcount,
 			    0, &bp);
 		} else if (seqcount > 1) {
 			u_int nextsize = blksize(fs, ip, nextlbn);
 
 			error = breadn(vp, lbn,
 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
 		} else
 			error = bread(vp, lbn, size, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			bp = NULL;
 			break;
 		}
 
 		/*
 		 * We should only get non-zero b_resid when an I/O error
 		 * has occurred, which should cause us to break above.
 		 * However, if the short read did not cause an error,
 		 * then we want to ensure that we do not uiomove bad
 		 * or uninitialized data.
 		 */
 		size -= bp->b_resid;
 		if (size < xfersize) {
 			if (size == 0)
 				break;
 			xfersize = size;
 		}
 		error = uiomove((char *)bp->b_data + blkoffset,
 		    (int)xfersize, uio);
 		if (error)
 			break;
 		vfs_bio_brelse(bp, ioflag);
 	}
 
 	/*
 	 * This can only happen in the case of an error because the loop
 	 * above resets bp to NULL on each iteration and on normal
 	 * completion has not set a new value into it. so it must have come
 	 * from a 'break' statement
 	 */
 	if (bp != NULL)
 		vfs_bio_brelse(bp, ioflag);
 
 	if ((error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		ip->i_flag |= IN_ACCESS;
 	return (error);
 }
 
 static int
 ext2_ioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	int error;
 
 	vp = ap->a_vp;
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 		if (!(VTOI(vp)->i_flag & IN_E4EXTENTS)) {
 			error = vn_lock(vp, LK_SHARED);
 			if (error == 0) {
 				error = ext2_bmap_seekdata(vp,
 				    (off_t *)ap->a_data);
 				VOP_UNLOCK(vp);
 			} else
 				error = EBADF;
 			return (error);
 		}
 	case FIOSEEKHOLE:
 		return (vn_bmap_seekhole(vp, ap->a_command,
 		    (off_t *)ap->a_data, ap->a_cred));
 	default:
 		return (ENOTTY);
 	}
 }
 
 /*
  * Vnode op for writing.
  */
 static int
 ext2_write(struct vop_write_args *ap)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct inode *ip;
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	daddr_t lbn;
 	off_t osize;
 	int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
 
 	ioflag = ap->a_ioflag;
 	uio = ap->a_uio;
 	vp = ap->a_vp;
 
 	seqcount = ioflag >> IO_SEQSHIFT;
 	ip = VTOI(vp);
 
 #ifdef INVARIANTS
 	if (uio->uio_rw != UIO_WRITE)
 		panic("%s: mode", "ext2_write");
 #endif
 
 	switch (vp->v_type) {
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = ip->i_size;
 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
 			return (EPERM);
 		/* FALLTHROUGH */
 	case VLNK:
 		break;
 	case VDIR:
 		/* XXX differs from ffs -- this is called from ext2_mkdir(). */
 		if ((ioflag & IO_SYNC) == 0)
 			panic("ext2_write: nonsync dir write");
 		break;
 	default:
 		panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp,
 		    vp->v_type, (intmax_t)uio->uio_offset,
 		    (intmax_t)uio->uio_resid);
 	}
 
 	KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0"));
 	KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0"));
 	fs = ip->i_e2fs;
 	if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize)
 		return (EFBIG);
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
 	 * file servers have no limits, I don't think it matters.
 	 */
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 
 	resid = uio->uio_resid;
 	osize = ip->i_size;
 	if (seqcount > BA_SEQMAX)
 		flags = BA_SEQMAX << BA_SEQSHIFT;
 	else
 		flags = seqcount << BA_SEQSHIFT;
 	if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
 		flags |= IO_SYNC;
 
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
 		xfersize = fs->e2fs_fsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 		if (uio->uio_offset + xfersize > ip->i_size)
 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
 
 		/*
 		 * We must perform a read-before-write if the transfer size
 		 * does not cover the entire buffer.
 		 */
 		if (fs->e2fs_bsize > xfersize)
 			flags |= BA_CLRBUF;
 		else
 			flags &= ~BA_CLRBUF;
 		error = ext2_balloc(ip, lbn, blkoffset + xfersize,
 		    ap->a_cred, &bp, flags);
 		if (error != 0)
 			break;
 
 		if ((ioflag & (IO_SYNC | IO_INVAL)) == (IO_SYNC | IO_INVAL))
 			bp->b_flags |= B_NOCACHE;
 		if (uio->uio_offset + xfersize > ip->i_size)
 			ip->i_size = uio->uio_offset + xfersize;
 		size = blksize(fs, ip, lbn) - bp->b_resid;
 		if (size < xfersize)
 			xfersize = size;
 
 		error =
 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 		/*
 		 * If the buffer is not already filled and we encounter an
 		 * error while trying to fill it, we have to clear out any
 		 * garbage data from the pages instantiated for the buffer.
 		 * If we do not, a failed uiomove() during a write can leave
 		 * the prior contents of the pages exposed to a userland mmap.
 		 *
 		 * Note that we need only clear buffers with a transfer size
 		 * equal to the block size because buffers with a shorter
 		 * transfer size were cleared above by the call to ext2_balloc()
 		 * with the BA_CLRBUF flag set.
 		 *
 		 * If the source region for uiomove identically mmaps the
 		 * buffer, uiomove() performed the NOP copy, and the buffer
 		 * content remains valid because the page fault handler
 		 * validated the pages.
 		 */
 		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
 		    fs->e2fs_bsize == xfersize)
 			vfs_bio_clrbuf(bp);
 
 		vfs_bio_set_flags(bp, ioflag);
 
 		/*
 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
 		 * if we have a severe page deficiency write the buffer
 		 * asynchronously.  Otherwise try to cluster, and if that
 		 * doesn't do it then either do an async write (if O_DIRECT),
 		 * or a delayed write (if not).
 		 */
 		if (ioflag & IO_SYNC) {
 			(void)bwrite(bp);
 		} else if (vm_page_count_severe() ||
 			    buf_dirty_count_severe() ||
 		    (ioflag & IO_ASYNC)) {
 			bp->b_flags |= B_CLUSTEROK;
 			bawrite(bp);
 		} else if (xfersize + blkoffset == fs->e2fs_fsize) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
 				cluster_write(vp, &ip->i_clusterw, bp,
 				    ip->i_size, seqcount, 0);
 			} else {
 				bawrite(bp);
 			}
 		} else if (ioflag & IO_DIRECT) {
 			bp->b_flags |= B_CLUSTEROK;
 			bawrite(bp);
 		} else {
 			bp->b_flags |= B_CLUSTEROK;
 			bdwrite(bp);
 		}
 		if (error || xfersize == 0)
 			break;
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
 	    ap->a_cred) {
 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID))
 			ip->i_mode &= ~(ISUID | ISGID);
 	}
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			(void)ext2_truncate(vp, osize,
 			    ioflag & IO_SYNC, ap->a_cred, uio->uio_td);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
 	}
 	if (uio->uio_resid != resid) {
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		if (ioflag & IO_SYNC)
 			error = ext2_update(vp, 1);
 	}
 	return (error);
 }
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 845ea04eca93..e89b831a4859 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -1,3182 +1,3168 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2007-2009 Google Inc. and Amit Singh
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of Google Inc. nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Copyright (C) 2005 Csaba Henk.
  * All rights reserved.
  *
  * Copyright (c) 2019 The FreeBSD Foundation
  *
  * Portions of this software were developed by BFF Storage Systems, LLC under
  * sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/filio.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <sys/sx.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/extattr.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/filedesc.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/dirent.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_object.h>
 
 #include "fuse.h"
 #include "fuse_file.h"
 #include "fuse_internal.h"
 #include "fuse_ipc.h"
 #include "fuse_node.h"
 #include "fuse_io.h"
 
 #include <sys/priv.h>
 
 /* Maximum number of hardlinks to a single FUSE file */
 #define FUSE_LINK_MAX                      UINT32_MAX
 
 SDT_PROVIDER_DECLARE(fusefs);
 /* 
  * Fuse trace probe:
  * arg0: verbosity.  Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*");
 
 /* vnode ops */
 static vop_access_t fuse_vnop_access;
 static vop_advlock_t fuse_vnop_advlock;
 static vop_allocate_t fuse_vnop_allocate;
 static vop_bmap_t fuse_vnop_bmap;
 static vop_close_t fuse_fifo_close;
 static vop_close_t fuse_vnop_close;
 static vop_copy_file_range_t fuse_vnop_copy_file_range;
 static vop_create_t fuse_vnop_create;
 static vop_deallocate_t fuse_vnop_deallocate;
 static vop_deleteextattr_t fuse_vnop_deleteextattr;
 static vop_fdatasync_t fuse_vnop_fdatasync;
 static vop_fsync_t fuse_vnop_fsync;
 static vop_getattr_t fuse_vnop_getattr;
 static vop_getextattr_t fuse_vnop_getextattr;
 static vop_inactive_t fuse_vnop_inactive;
 static vop_ioctl_t fuse_vnop_ioctl;
 static vop_link_t fuse_vnop_link;
 static vop_listextattr_t fuse_vnop_listextattr;
 static vop_lookup_t fuse_vnop_lookup;
 static vop_mkdir_t fuse_vnop_mkdir;
 static vop_mknod_t fuse_vnop_mknod;
 static vop_open_t fuse_vnop_open;
 static vop_pathconf_t fuse_vnop_pathconf;
 static vop_read_t fuse_vnop_read;
 static vop_readdir_t fuse_vnop_readdir;
 static vop_readlink_t fuse_vnop_readlink;
 static vop_reclaim_t fuse_vnop_reclaim;
 static vop_remove_t fuse_vnop_remove;
 static vop_rename_t fuse_vnop_rename;
 static vop_rmdir_t fuse_vnop_rmdir;
 static vop_setattr_t fuse_vnop_setattr;
 static vop_setextattr_t fuse_vnop_setextattr;
 static vop_strategy_t fuse_vnop_strategy;
 static vop_symlink_t fuse_vnop_symlink;
 static vop_write_t fuse_vnop_write;
 static vop_getpages_t fuse_vnop_getpages;
 static vop_print_t fuse_vnop_print;
 static vop_vptofh_t fuse_vnop_vptofh;
 
 struct vop_vector fuse_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_access =		fuse_vnop_access,
 	.vop_close =		fuse_fifo_close,
 	.vop_fsync =		fuse_vnop_fsync,
 	.vop_getattr =		fuse_vnop_getattr,
 	.vop_inactive =		fuse_vnop_inactive,
 	.vop_pathconf =		fuse_vnop_pathconf,
 	.vop_print =		fuse_vnop_print,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		fuse_vnop_reclaim,
 	.vop_setattr =		fuse_vnop_setattr,
 	.vop_write =		VOP_PANIC,
 	.vop_vptofh =		fuse_vnop_vptofh,
 };
 VFS_VOP_VECTOR_REGISTER(fuse_fifoops);
 
 struct vop_vector fuse_vnops = {
 	.vop_allocate =	fuse_vnop_allocate,
 	.vop_default = &default_vnodeops,
 	.vop_access = fuse_vnop_access,
 	.vop_advlock = fuse_vnop_advlock,
 	.vop_bmap = fuse_vnop_bmap,
 	.vop_close = fuse_vnop_close,
 	.vop_copy_file_range = fuse_vnop_copy_file_range,
 	.vop_create = fuse_vnop_create,
 	.vop_deallocate = fuse_vnop_deallocate,
 	.vop_deleteextattr = fuse_vnop_deleteextattr,
 	.vop_fsync = fuse_vnop_fsync,
 	.vop_fdatasync = fuse_vnop_fdatasync,
 	.vop_getattr = fuse_vnop_getattr,
 	.vop_getextattr = fuse_vnop_getextattr,
 	.vop_inactive = fuse_vnop_inactive,
 	.vop_ioctl = fuse_vnop_ioctl,
 	.vop_link = fuse_vnop_link,
 	.vop_listextattr = fuse_vnop_listextattr,
 	.vop_lookup = fuse_vnop_lookup,
 	.vop_mkdir = fuse_vnop_mkdir,
 	.vop_mknod = fuse_vnop_mknod,
 	.vop_open = fuse_vnop_open,
 	.vop_pathconf = fuse_vnop_pathconf,
 	/*
 	 * TODO: implement vop_poll after upgrading to protocol 7.21.
 	 * FUSE_POLL was added in protocol 7.11, but it's kind of broken until
 	 * 7.21, which adds the ability for the client to choose which poll
 	 * events it wants, and for a client to deregister a file handle
 	 */
 	.vop_read = fuse_vnop_read,
 	.vop_readdir = fuse_vnop_readdir,
 	.vop_readlink = fuse_vnop_readlink,
 	.vop_reclaim = fuse_vnop_reclaim,
 	.vop_remove = fuse_vnop_remove,
 	.vop_rename = fuse_vnop_rename,
 	.vop_rmdir = fuse_vnop_rmdir,
 	.vop_setattr = fuse_vnop_setattr,
 	.vop_setextattr = fuse_vnop_setextattr,
 	.vop_strategy = fuse_vnop_strategy,
 	.vop_symlink = fuse_vnop_symlink,
 	.vop_write = fuse_vnop_write,
 	.vop_getpages = fuse_vnop_getpages,
 	.vop_print = fuse_vnop_print,
 	.vop_vptofh = fuse_vnop_vptofh,
 };
 VFS_VOP_VECTOR_REGISTER(fuse_vnops);
 
 uma_zone_t fuse_pbuf_zone;
 
 /* Check permission for extattr operations, much like extattr_check_cred */
 static int
 fuse_extattr_check_cred(struct vnode *vp, int ns, struct ucred *cred,
 	struct thread *td, accmode_t accmode)
 {
 	struct mount *mp = vnode_mount(vp);
 	struct fuse_data *data = fuse_get_mpdata(mp);
 	int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS;
 
 	/*
 	 * Kernel-invoked always succeeds.
 	 */
 	if (cred == NOCRED)
 		return (0);
 
 	/*
 	 * Do not allow privileged processes in jail to directly manipulate
 	 * system attributes.
 	 */
 	switch (ns) {
 	case EXTATTR_NAMESPACE_SYSTEM:
 		if (default_permissions) {
 			return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM));
 		}
 		return (0);
 	case EXTATTR_NAMESPACE_USER:
 		if (default_permissions) {
 			return (fuse_internal_access(vp, accmode, td, cred));
 		}
 		return (0);
 	default:
 		return (EPERM);
 	}
 }
 
 /* Get a filehandle for a directory */
 static int
 fuse_filehandle_get_dir(struct vnode *vp, struct fuse_filehandle **fufhp,
 	struct ucred *cred, pid_t pid)
 {
 	if (fuse_filehandle_get(vp, FREAD, fufhp, cred, pid) == 0)
 		return 0;
 	return fuse_filehandle_get(vp, FEXEC, fufhp, cred, pid);
 }
 
 /* Send FUSE_FLUSH for this vnode */
 static int
 fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
 {
 	struct fuse_flush_in *ffi;
 	struct fuse_filehandle *fufh;
 	struct fuse_dispatcher fdi;
 	struct thread *td = curthread;
 	struct mount *mp = vnode_mount(vp);
 	int err;
 
 	if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH))
 		return 0;
 
 	err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
 	if (err)
 		return err;
 
 	fdisp_init(&fdi, sizeof(*ffi));
 	fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred);
 	ffi = fdi.indata;
 	ffi->fh = fufh->fh_id;
 	/* 
 	 * If the file has a POSIX lock then we're supposed to set lock_owner.
 	 * If not, then lock_owner is undefined.  So we may as well always set
 	 * it.
 	 */
 	ffi->lock_owner = td->td_proc->p_pid;
 
 	err = fdisp_wait_answ(&fdi);
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_FLUSH);
 		err = 0;
 	}
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /* Close wrapper for fifos.  */
 static int
 fuse_fifo_close(struct vop_close_args *ap)
 {
 	return (fifo_specops.vop_close(ap));
 }
 
 /* Invalidate a range of cached data, whether dirty of not */
 static int
 fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end)
 {
 	struct buf *bp;
 	daddr_t left_lbn, end_lbn, right_lbn;
 	off_t new_filesize;
 	int iosize, left_on, right_on, right_blksize;
 
 	iosize = fuse_iosize(vp);
 	left_lbn = start / iosize;
 	end_lbn = howmany(end, iosize);
 	left_on = start & (iosize - 1);
 	if (left_on != 0) {
 		bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0);
 		if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) {
 			/*
 			 * Flush the dirty buffer, because we don't have a
 			 * byte-granular way to record which parts of the
 			 * buffer are valid.
 			 */
 			bwrite(bp);
 			if (bp->b_error)
 				return (bp->b_error);
 		} else {
 			brelse(bp);
 		}
 	}
 	right_on = end & (iosize - 1);
 	if (right_on != 0) {
 		right_lbn = end / iosize;
 		new_filesize = MAX(filesize, end);
 		right_blksize = MIN(iosize, new_filesize - iosize * right_lbn);
 		bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0);
 		if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) {
 			/*
 			 * Flush the dirty buffer, because we don't have a
 			 * byte-granular way to record which parts of the
 			 * buffer are valid.
 			 */
 			bwrite(bp);
 			if (bp->b_error)
 				return (bp->b_error);
 		} else {
 			brelse(bp);
 		}
 	}
 
 	v_inval_buf_range(vp, left_lbn, end_lbn, iosize);
 	return (0);
 }
 
 
 /* Send FUSE_LSEEK for this node */
 static int
 fuse_vnop_do_lseek(struct vnode *vp, struct thread *td, struct ucred *cred,
 	pid_t pid, off_t *offp, int whence)
 {
 	struct fuse_dispatcher fdi;
 	struct fuse_filehandle *fufh;
 	struct fuse_lseek_in *flsi;
 	struct fuse_lseek_out *flso;
 	struct mount *mp = vnode_mount(vp);
 	int err;
 
 	ASSERT_VOP_LOCKED(vp, __func__);
 
 	err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid);
 	if (err)
 		return (err);
 	fdisp_init(&fdi, sizeof(*flsi));
 	fdisp_make_vp(&fdi, FUSE_LSEEK, vp, td, cred);
 	flsi = fdi.indata;
 	flsi->fh = fufh->fh_id;
 	flsi->offset = *offp;
 	flsi->whence = whence;
 	err = fdisp_wait_answ(&fdi);
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_LSEEK);
 	} else if (err == 0) {
 		fsess_set_impl(mp, FUSE_LSEEK);
 		flso = fdi.answ;
 		*offp = flso->offset;
 	}
 	fdisp_destroy(&fdi);
 
 	return (err);
 }
 
 /*
     struct vnop_access_args {
 	struct vnode *a_vp;
 #if VOP_ACCESS_TAKES_ACCMODE_T
 	accmode_t a_accmode;
 #else
 	int a_mode;
 #endif
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int accmode = ap->a_accmode;
 	struct ucred *cred = ap->a_cred;
 
 	struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		if (vnode_isvroot(vp)) {
 			return 0;
 		}
 		return ENXIO;
 	}
 	if (!(data->dataflags & FSESS_INITED)) {
 		if (vnode_isvroot(vp)) {
 			if (priv_check_cred(cred, PRIV_VFS_ADMIN) ||
 			    (fuse_match_cred(data->daemoncred, cred) == 0)) {
 				return 0;
 			}
 		}
 		return EBADF;
 	}
 	if (vnode_islnk(vp)) {
 		return 0;
 	}
 
 	err = fuse_internal_access(vp, accmode, ap->a_td, ap->a_cred);
 	return err;
 }
 
 /*
  * struct vop_advlock_args {
  *	struct vop_generic_args a_gen;
  *	struct vnode *a_vp;
  *	void *a_id;
  *	int a_op;
  *	struct flock *a_fl;
  *	int a_flags;
  * }
  */
 static int
 fuse_vnop_advlock(struct vop_advlock_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct flock *fl = ap->a_fl;
 	struct thread *td = curthread;
 	struct ucred *cred = td->td_ucred;
 	pid_t pid = td->td_proc->p_pid;
 	struct fuse_filehandle *fufh;
 	struct fuse_dispatcher fdi;
 	struct fuse_lk_in *fli;
 	struct fuse_lk_out *flo;
 	enum fuse_opcode op;
 	int dataflags, err;
 	int flags = ap->a_flags;
 
 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	switch(ap->a_op) {
 	case F_GETLK:
 		op = FUSE_GETLK;
 		break;
 	case F_SETLK:
 		if (flags & F_WAIT)
 			op = FUSE_SETLKW;
 		else
 			op = FUSE_SETLK;
 		break;
 	case F_UNLCK:
 		op = FUSE_SETLK;
 		break;
 	default:
 		return EINVAL;
 	}
 
 	if (!(dataflags & FSESS_POSIX_LOCKS))
 		return vop_stdadvlock(ap);
 	/* FUSE doesn't properly support flock until protocol 7.17 */
 	if (flags & F_FLOCK)
 		return vop_stdadvlock(ap);
 
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 
 	err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
 	if (err)
 		goto out;
 
 	fdisp_init(&fdi, sizeof(*fli));
 
 	fdisp_make_vp(&fdi, op, vp, td, cred);
 	fli = fdi.indata;
 	fli->fh = fufh->fh_id;
 	fli->owner = td->td_proc->p_pid;
 	fli->lk.start = fl->l_start;
 	if (fl->l_len != 0)
 		fli->lk.end = fl->l_start + fl->l_len - 1;
 	else
 		fli->lk.end = INT64_MAX;
 	fli->lk.type = fl->l_type;
 	fli->lk.pid = td->td_proc->p_pid;
 
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 
 	if (err == 0 && op == FUSE_GETLK) {
 		flo = fdi.answ;
 		fl->l_type = flo->lk.type;
 		fl->l_pid = flo->lk.pid;
 		if (flo->lk.type != F_UNLCK) {
 			fl->l_start = flo->lk.start;
 			if (flo->lk.end == INT64_MAX)
 				fl->l_len = 0;
 			else
 				fl->l_len = flo->lk.end - flo->lk.start + 1;
 			fl->l_start = flo->lk.start;
 		}
 	}
 
 out:
 	VOP_UNLOCK(vp);
 	return err;
 }
 
 static int
 fuse_vnop_allocate(struct vop_allocate_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	off_t *len = ap->a_len;
 	off_t *offset = ap->a_offset;
 	struct ucred *cred = ap->a_cred;
 	struct fuse_filehandle *fufh;
 	struct mount *mp = vnode_mount(vp);
 	struct fuse_dispatcher fdi;
 	struct fuse_fallocate_in *ffi;
 	struct uio io;
 	pid_t pid = curthread->td_proc->p_pid;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	off_t filesize;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	switch (vp->v_type) {
 	case VFIFO:
 		return (ESPIPE);
 	case VLNK:
 	case VREG:
 		if (vfs_isrdonly(mp))
 			return (EROFS);
 		break;
 	default:
 		return (ENODEV);
 	}
 
 	if (vfs_isrdonly(mp))
 		return (EROFS);
 
 	if (fsess_not_impl(mp, FUSE_FALLOCATE))
 		return (EINVAL);
 
 	io.uio_offset = *offset;
 	io.uio_resid = *len;
 	err = vn_rlimit_fsize(vp, &io, curthread);
 	if (err)
 		return (err);
 
 	err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
 	if (err)
 		return (err);
 
 	fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
 
 	err = fuse_vnode_size(vp, &filesize, cred, curthread);
 	if (err)
 		return (err);
 	fuse_inval_buf_range(vp, filesize, *offset, *offset + *len);
 
 	fdisp_init(&fdi, sizeof(*ffi));
 	fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred);
 	ffi = fdi.indata;
 	ffi->fh = fufh->fh_id;
 	ffi->offset = *offset;
 	ffi->length = *len;
 	ffi->mode = 0;
 	err = fdisp_wait_answ(&fdi);
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_FALLOCATE);
 		err = EINVAL;
 	} else if (err == EOPNOTSUPP) {
 		/*
 		 * The file system server does not support FUSE_FALLOCATE with
 		 * the supplied mode for this particular file.
 		 */
 		err = EINVAL;
 	} else if (!err) {
 		*offset += *len;
 		*len = 0;
 		fuse_vnode_undirty_cached_timestamps(vp, false);
 		fuse_internal_clear_suid_on_write(vp, cred, curthread);
 		if (*offset > fvdat->cached_attrs.va_size) {
 			fuse_vnode_setsize(vp, *offset, false);
 			getnanouptime(&fvdat->last_local_modify);
 		}
 	}
 
 	return (err);
 }
 
 /* {
 	struct vnode *a_vp;
 	daddr_t a_bn;
 	struct bufobj **a_bop;
 	daddr_t *a_bnp;
 	int *a_runp;
 	int *a_runb;
 } */
 static int
 fuse_vnop_bmap(struct vop_bmap_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct bufobj **bo = ap->a_bop;
 	struct thread *td = curthread;
 	struct mount *mp;
 	struct fuse_dispatcher fdi;
 	struct fuse_bmap_in *fbi;
 	struct fuse_bmap_out *fbo;
 	struct fuse_data *data;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	uint64_t biosize;
 	off_t fsize;
 	daddr_t lbn = ap->a_bn;
 	daddr_t *pbn = ap->a_bnp;
 	int *runp = ap->a_runp;
 	int *runb = ap->a_runb;
 	int error = 0;
 	int maxrun;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	mp = vnode_mount(vp);
 	data = fuse_get_mpdata(mp);
 	biosize = fuse_iosize(vp);
 	maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1,
 		data->max_readahead_blocks);
 
 	if (bo != NULL)
 		*bo = &vp->v_bufobj;
 
 	/*
 	 * The FUSE_BMAP operation does not include the runp and runb
 	 * variables, so we must guess.  Report nonzero contiguous runs so
 	 * cluster_read will combine adjacent reads.  It's worthwhile to reduce
 	 * upcalls even if we don't know the true physical layout of the file.
 	 * 
 	 * FUSE file systems may opt out of read clustering in two ways:
 	 * * mounting with -onoclusterr
 	 * * Setting max_readahead <= maxbcachebuf during FUSE_INIT
 	 */
 	if (runb != NULL)
 		*runb = MIN(lbn, maxrun);
 	if (runp != NULL && maxrun == 0)
 		*runp = 0;
 	else if (runp != NULL) {
 		/*
 		 * If the file's size is cached, use that value to calculate
 		 * runp, even if the cache is expired.  runp is only advisory,
 		 * and the risk of getting it wrong is not worth the cost of
 		 * another upcall.
 		 */
 		if (fvdat->cached_attrs.va_size != VNOVAL)
 			fsize = fvdat->cached_attrs.va_size;
 		else
 			error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
 		if (error == 0)
 			*runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
 				    maxrun);
 		else
 			*runp = 0;
 	}
 
 	if (fsess_maybe_impl(mp, FUSE_BMAP)) {
 		fdisp_init(&fdi, sizeof(*fbi));
 		fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred);
 		fbi = fdi.indata;
 		fbi->block = lbn;
 		fbi->blocksize = biosize;
 		error = fdisp_wait_answ(&fdi);
 		if (error == ENOSYS) {
 			fdisp_destroy(&fdi);
 			fsess_set_notimpl(mp, FUSE_BMAP);
 			error = 0;
 		} else {
 			fbo = fdi.answ;
 			if (error == 0 && pbn != NULL)
 				*pbn = fbo->block;
 			fdisp_destroy(&fdi);
 			return error;
 		}
 	}
 
 	/* If the daemon doesn't support BMAP, make up a sensible default */
 	if (pbn != NULL)
 		*pbn = lbn * btodb(biosize);
 	return (error);
 }
 
 /*
     struct vop_close_args {
 	struct vnode *a_vp;
 	int  a_fflag;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct ucred *cred = ap->a_cred;
 	int fflag = ap->a_fflag;
 	struct thread *td = ap->a_td;
 	pid_t pid = td->td_proc->p_pid;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	int err = 0;
 
 	if (fuse_isdeadfs(vp))
 		return 0;
 	if (vnode_isdir(vp))
 		return 0;
 	if (fflag & IO_NDELAY)
 		return 0;
 
 	err = fuse_flush(vp, cred, pid, fflag);
 	if (err == 0 && (fvdat->flag & FN_ATIMECHANGE)) {
 		struct vattr vap;
 
 		VATTR_NULL(&vap);
 		vap.va_atime = fvdat->cached_attrs.va_atime;
 		err = fuse_internal_setattr(vp, &vap, td, NULL);
 	}
 	/* TODO: close the file handle, if we're sure it's no longer used */
 	if ((fvdat->flag & FN_SIZECHANGE) != 0) {
 		fuse_vnode_savesize(vp, cred, td->td_proc->p_pid);
 	}
 	return err;
 }
 
 /*
    struct vop_copy_file_range_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_invp;
 	off_t *a_inoffp;
 	struct vnode *a_outvp;
 	off_t *a_outoffp;
 	size_t *a_lenp;
 	unsigned int a_flags;
 	struct ucred *a_incred;
 	struct ucred *a_outcred;
 	struct thread *a_fsizetd;
 }
  */
 static int
 fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
 {
 	struct vnode *invp = ap->a_invp;
 	struct vnode *outvp = ap->a_outvp;
 	struct mount *mp = vnode_mount(invp);
 	struct fuse_vnode_data *outfvdat = VTOFUD(outvp);
 	struct fuse_dispatcher fdi;
 	struct fuse_filehandle *infufh, *outfufh;
 	struct fuse_copy_file_range_in *fcfri;
 	struct ucred *incred = ap->a_incred;
 	struct ucred *outcred = ap->a_outcred;
 	struct fuse_write_out *fwo;
 	struct thread *td;
 	struct uio io;
 	off_t outfilesize;
 	pid_t pid;
 	int err;
 
 	if (mp != vnode_mount(outvp))
 		goto fallback;
 
 	if (incred->cr_uid != outcred->cr_uid)
 		goto fallback;
 
 	if (incred->cr_groups[0] != outcred->cr_groups[0])
 		goto fallback;
 
 	if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE))
 		goto fallback;
 
 	if (ap->a_fsizetd == NULL)
 		td = curthread;
 	else
 		td = ap->a_fsizetd;
 	pid = td->td_proc->p_pid;
 
 	/* Lock both vnodes, avoiding risk of deadlock. */
 	do {
 		err = vn_lock(outvp, LK_EXCLUSIVE);
 		if (invp == outvp)
 			break;
 		if (err == 0) {
 			err = vn_lock(invp, LK_SHARED | LK_NOWAIT);
 			if (err == 0)
 				break;
 			VOP_UNLOCK(outvp);
 			err = vn_lock(invp, LK_SHARED);
 			if (err == 0)
 				VOP_UNLOCK(invp);
 		}
 	} while (err == 0);
 	if (err != 0)
 		return (err);
 
 	err = fuse_filehandle_getrw(invp, FREAD, &infufh, incred, pid);
 	if (err)
 		goto unlock;
 
 	err = fuse_filehandle_getrw(outvp, FWRITE, &outfufh, outcred, pid);
 	if (err)
 		goto unlock;
 
 	if (ap->a_fsizetd) {
 		io.uio_offset = *ap->a_outoffp;
 		io.uio_resid = *ap->a_lenp;
 		err = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd);
 		if (err)
 			goto unlock;
 	}
 
 	err = fuse_vnode_size(outvp, &outfilesize, outcred, curthread);
 	if (err)
 		goto unlock;
 
 	err = fuse_inval_buf_range(outvp, outfilesize, *ap->a_outoffp,
 		*ap->a_outoffp + *ap->a_lenp);
 	if (err)
 		goto unlock;
 
 	fdisp_init(&fdi, sizeof(*fcfri));
 	fdisp_make_vp(&fdi, FUSE_COPY_FILE_RANGE, invp, td, incred);
 	fcfri = fdi.indata;
 	fcfri->fh_in = infufh->fh_id;
 	fcfri->off_in = *ap->a_inoffp;
 	fcfri->nodeid_out = VTOI(outvp);
 	fcfri->fh_out = outfufh->fh_id;
 	fcfri->off_out = *ap->a_outoffp;
 	fcfri->len = *ap->a_lenp;
 	fcfri->flags = 0;
 
 	err = fdisp_wait_answ(&fdi);
 	if (err == 0) {
 		fwo = fdi.answ;
 		*ap->a_lenp = fwo->size;
 		*ap->a_inoffp += fwo->size;
 		*ap->a_outoffp += fwo->size;
 		fuse_internal_clear_suid_on_write(outvp, outcred, td);
 		if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) {
                         fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
 			getnanouptime(&outfvdat->last_local_modify);
 		}
 		fuse_vnode_update(invp, FN_ATIMECHANGE);
 		fuse_vnode_update(outvp, FN_MTIMECHANGE | FN_CTIMECHANGE);
 	}
 	fdisp_destroy(&fdi);
 
 unlock:
 	if (invp != outvp)
 		VOP_UNLOCK(invp);
 	VOP_UNLOCK(outvp);
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE);
 fallback:
 		err = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
 		    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags,
 		    ap->a_incred, ap->a_outcred, ap->a_fsizetd);
 	}
 
 	return (err);
 }
 
 static void
 fdisp_make_mknod_for_fallback(
 	struct fuse_dispatcher *fdip,
 	struct componentname *cnp,
 	struct vnode *dvp,
 	uint64_t parentnid,
 	struct thread *td,
 	struct ucred *cred,
 	mode_t mode,
 	enum fuse_opcode *op)
 {
 	struct fuse_mknod_in *fmni;
 
 	fdisp_init(fdip, sizeof(*fmni) + cnp->cn_namelen + 1);
 	*op = FUSE_MKNOD;
 	fdisp_make(fdip, *op, vnode_mount(dvp), parentnid, td, cred);
 	fmni = fdip->indata;
 	fmni->mode = mode;
 	fmni->rdev = 0;
 	memcpy((char *)fdip->indata + sizeof(*fmni), cnp->cn_nameptr,
 	    cnp->cn_namelen);
 	((char *)fdip->indata)[sizeof(*fmni) + cnp->cn_namelen] = '\0';
 }
 /*
     struct vnop_create_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_create(struct vop_create_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 	struct thread *td = curthread;
 	struct ucred *cred = cnp->cn_cred;
 
 	struct fuse_data *data;
 	struct fuse_create_in *fci;
 	struct fuse_entry_out *feo;
 	struct fuse_open_out *foo;
 	struct fuse_dispatcher fdi, fdi2;
 	struct fuse_dispatcher *fdip = &fdi;
 	struct fuse_dispatcher *fdip2 = NULL;
 
 	int err;
 
 	struct mount *mp = vnode_mount(dvp);
 	data = fuse_get_mpdata(mp);
 	uint64_t parentnid = VTOFUD(dvp)->nid;
 	mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
 	enum fuse_opcode op;
 	int flags;
 
 	if (fuse_isdeadfs(dvp))
 		return ENXIO;
 
 	/* FUSE expects sockets to be created with FUSE_MKNOD */
 	if (vap->va_type == VSOCK)
 		return fuse_internal_mknod(dvp, vpp, cnp, vap);
 
 	/* 
 	 * VOP_CREATE doesn't tell us the open(2) flags, so we guess.  Only a
 	 * writable mode makes sense, and we might as well include readability
 	 * too.
 	 */
 	flags = O_RDWR;
 
 	bzero(&fdi, sizeof(fdi));
 
 	if (vap->va_type != VREG)
 		return (EINVAL);
 
 	if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
 		/* Fallback to FUSE_MKNOD/FUSE_OPEN */
 		fdisp_make_mknod_for_fallback(fdip, cnp, dvp, parentnid, td,
 			cred, mode, &op);
 	} else {
 		/* Use FUSE_CREATE */
 		size_t insize;
 
 		op = FUSE_CREATE;
 		fdisp_init(fdip, sizeof(*fci) + cnp->cn_namelen + 1);
 		fdisp_make(fdip, op, vnode_mount(dvp), parentnid, td, cred);
 		fci = fdip->indata;
 		fci->mode = mode;
 		fci->flags = O_CREAT | flags;
 		if (fuse_libabi_geq(data, 7, 12)) {
 			insize = sizeof(*fci);
 			fci->umask = td->td_proc->p_pd->pd_cmask;
 		} else {
 			insize = sizeof(struct fuse_open_in);
 		}
 
 		memcpy((char *)fdip->indata + insize, cnp->cn_nameptr,
 		    cnp->cn_namelen);
 		((char *)fdip->indata)[insize + cnp->cn_namelen] = '\0';
 	}
 
 	err = fdisp_wait_answ(fdip);
 
 	if (err) {
 		if (err == ENOSYS && op == FUSE_CREATE) {
 			fsess_set_notimpl(mp, FUSE_CREATE);
 			fdisp_destroy(fdip);
 			fdisp_make_mknod_for_fallback(fdip, cnp, dvp,
 				parentnid, td, cred, mode, &op);
 			err = fdisp_wait_answ(fdip);
 		}
 		if (err)
 			goto out;
 	}
 
 	feo = fdip->answ;
 
 	if ((err = fuse_internal_checkentry(feo, vap->va_type))) {
 		goto out;
 	}
 
 	if (op == FUSE_CREATE) {
 		if (fuse_libabi_geq(data, 7, 9))
 			foo = (struct fuse_open_out*)(feo + 1);
 		else
 			foo = (struct fuse_open_out*)((char*)feo +
 				FUSE_COMPAT_ENTRY_OUT_SIZE);
 	} else {
 		/* Issue a separate FUSE_OPEN */
 		struct fuse_open_in *foi;
 
 		fdip2 = &fdi2;
 		fdisp_init(fdip2, sizeof(*foi));
 		fdisp_make(fdip2, FUSE_OPEN, vnode_mount(dvp), feo->nodeid, td,
 			cred);
 		foi = fdip2->indata;
 		foi->flags = flags;
 		err = fdisp_wait_answ(fdip2);
 		if (err)
 			goto out;
 		foo = fdip2->answ;
 	}
 	err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vap->va_type);
 	if (err) {
 		struct fuse_release_in *fri;
 		uint64_t nodeid = feo->nodeid;
 		uint64_t fh_id = foo->fh;
 
 		fdisp_init(fdip, sizeof(*fri));
 		fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred);
 		fri = fdip->indata;
 		fri->fh = fh_id;
 		fri->flags = flags;
 		fuse_insert_callback(fdip->tick, fuse_internal_forget_callback);
 		fuse_insert_message(fdip->tick, false);
 		goto out;
 	}
 	ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create");
 	fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
 		feo->attr_valid_nsec, NULL, true);
 
 	fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, td, cred, foo);
 	fuse_vnode_open(*vpp, foo->open_flags, td);
 	/* 
 	 * Purge the parent's attribute cache because the daemon should've
 	 * updated its mtime and ctime
 	 */
 	fuse_vnode_clear_attr_cache(dvp);
 	cache_purge_negative(dvp);
 
 out:
 	if (fdip2)
 		fdisp_destroy(fdip2);
 	fdisp_destroy(fdip);
 	return err;
 }
 
 /*
     struct vnop_fdatasync_args {
 	struct vop_generic_args a_gen;
 	struct vnode * a_vp;
 	struct thread * a_td;
     };
 */
 static int
 fuse_vnop_fdatasync(struct vop_fdatasync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	int waitfor = MNT_WAIT;
 
 	int err = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return 0;
 	}
 	if ((err = vop_stdfdatasync_buf(ap)))
 		return err;
 
 	return fuse_internal_fsync(vp, td, waitfor, true);
 }
 
 /*
     struct vnop_fsync_args {
 	struct vop_generic_args a_gen;
 	struct vnode * a_vp;
 	int  a_waitfor;
 	struct thread * a_td;
     };
 */
 static int
 fuse_vnop_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	int waitfor = ap->a_waitfor;
 	int err = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return 0;
 	}
 	if ((err = vop_stdfsync(ap)))
 		return err;
 
 	return fuse_internal_fsync(vp, td, waitfor, false);
 }
 
 /*
     struct vnop_getattr_args {
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 
 	int err = 0;
 	int dataflags;
 
 	dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
 
 	/* Note that we are not bailing out on a dead file system just yet. */
 
 	if (!(dataflags & FSESS_INITED)) {
 		if (!vnode_isvroot(vp)) {
 			fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
 			err = ENOTCONN;
 			return err;
 		} else {
 			goto fake;
 		}
 	}
 	err = fuse_internal_getattr(vp, vap, cred, td);
 	if (err == ENOTCONN && vnode_isvroot(vp)) {
 		/* see comment in fuse_vfsop_statfs() */
 		goto fake;
 	} else {
 		return err;
 	}
 
 fake:
 	bzero(vap, sizeof(*vap));
 	vap->va_type = vnode_vtype(vp);
 
 	return 0;
 }
 
 /*
     struct vnop_inactive_args {
 	struct vnode *a_vp;
     };
 */
 static int
 fuse_vnop_inactive(struct vop_inactive_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;
 
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh, *fufh_tmp;
 
 	int need_flush = 1;
 
 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
 		if (need_flush && vp->v_type == VREG) {
 			if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
 				fuse_vnode_savesize(vp, NULL, 0);
 			}
 			if ((fvdat->flag & FN_REVOKED) != 0)
 				fuse_io_invalbuf(vp, td);
 			else
 				fuse_io_flushbuf(vp, MNT_WAIT, td);
 			need_flush = 0;
 		}
 		fuse_filehandle_close(vp, fufh, td, NULL);
 	}
 
 	if ((fvdat->flag & FN_REVOKED) != 0)
 		vrecycle(vp);
 
 	return 0;
 }
 
 /*
     struct vnop_ioctl_args {
 	struct vnode *a_vp;
 	u_long a_command;
 	caddr_t a_data;
 	int a_fflag;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_ioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct mount *mp = vnode_mount(vp);
 	struct ucred *cred = ap->a_cred;
 	off_t *offp;
 	pid_t pid = ap->a_td->td_proc->p_pid;
 	int err;
 
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 	case FIOSEEKHOLE:
 		/* Call FUSE_LSEEK, if we can, or fall back to vop_stdioctl */
 		if (fsess_maybe_impl(mp, FUSE_LSEEK)) {
 			int whence;
 
 			offp = ap->a_data;
 			if (ap->a_command == FIOSEEKDATA)
 				whence = SEEK_DATA;
 			else
 				whence = SEEK_HOLE;
 
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			err = fuse_vnop_do_lseek(vp, ap->a_td, cred, pid, offp,
 			    whence);
 			VOP_UNLOCK(vp);
 		}
 		if (fsess_not_impl(mp, FUSE_LSEEK))
 			err = vop_stdioctl(ap);
 		break;
 	default:
 		/* TODO: implement FUSE_IOCTL */
 		err = ENOTTY;
 		break;
 	}
 	return (err);
 }
 
 
 /*
     struct vnop_link_args {
 	struct vnode *a_tdvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
     };
 */
 static int
 fuse_vnop_link(struct vop_link_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 
 	struct vattr *vap = VTOVA(vp);
 
 	struct fuse_dispatcher fdi;
 	struct fuse_entry_out *feo;
 	struct fuse_link_in fli;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (vnode_mount(tdvp) != vnode_mount(vp)) {
 		return EXDEV;
 	}
 
 	/*
 	 * This is a seatbelt check to protect naive userspace filesystems from
 	 * themselves and the limitations of the FUSE IPC protocol.  If a
 	 * filesystem does not allow attribute caching, assume it is capable of
 	 * validating that nlink does not overflow.
 	 */
 	if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
 		return EMLINK;
 	fli.oldnodeid = VTOI(vp);
 
 	fdisp_init(&fdi, 0);
 	fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp,
 	    FUSE_LINK, &fli, sizeof(fli), &fdi);
 	if ((err = fdisp_wait_answ(&fdi))) {
 		goto out;
 	}
 	feo = fdi.answ;
 
 	if (fli.oldnodeid != feo->nodeid) {
 		struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
 		fuse_warn(data, FSESS_WARN_ILLEGAL_INODE,
 			"Assigned wrong inode for a hard link.");
 		fuse_vnode_clear_attr_cache(vp);
 		fuse_vnode_clear_attr_cache(tdvp);
 		err = EIO;
 		goto out;
 	}
 
 	err = fuse_internal_checkentry(feo, vnode_vtype(vp));
 	if (!err) {
 		/* 
 		 * Purge the parent's attribute cache because the daemon
 		 * should've updated its mtime and ctime
 		 */
 		fuse_vnode_clear_attr_cache(tdvp);
 		fuse_internal_cache_attrs(vp, &feo->attr, feo->attr_valid,
 			feo->attr_valid_nsec, NULL, true);
 	}
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 struct fuse_lookup_alloc_arg {
 	struct fuse_entry_out *feo;
 	struct componentname *cnp;
 	uint64_t nid;
 	enum vtype vtyp;
 };
 
 /* Callback for vn_get_ino */
 static int
 fuse_lookup_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **vpp)
 {
 	struct fuse_lookup_alloc_arg *flaa = arg;
 
 	return fuse_vnode_get(mp, flaa->feo, flaa->nid, NULL, vpp, flaa->cnp,
 		flaa->vtyp);
 }
 
 SDT_PROBE_DEFINE3(fusefs, , vnops, cache_lookup,
 	"int", "struct timespec*", "struct timespec*");
 /*
     struct vnop_lookup_args {
 	struct vnodeop_desc *a_desc;
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
     };
 */
 int
 fuse_vnop_lookup(struct vop_lookup_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct thread *td = curthread;
 	struct ucred *cred = cnp->cn_cred;
 	struct timespec now;
 
 	int nameiop = cnp->cn_nameiop;
 	int flags = cnp->cn_flags;
-	int wantparent = flags & (LOCKPARENT | WANTPARENT);
 	int islastcn = flags & ISLASTCN;
 	struct mount *mp = vnode_mount(dvp);
 	struct fuse_data *data = fuse_get_mpdata(mp);
 	int default_permissions = data->dataflags & FSESS_DEFAULT_PERMISSIONS;
 	bool is_dot;
 
 	int err = 0;
 	int lookup_err = 0;
 	struct vnode *vp = NULL;
 
 	struct fuse_dispatcher fdi;
 	bool did_lookup = false;
 	struct fuse_entry_out *feo = NULL;
 	enum vtype vtyp;	/* vnode type of target */
 
 	uint64_t nid;
 
 	if (fuse_isdeadfs(dvp)) {
 		*vpp = NULL;
 		return ENXIO;
 	}
 	if (!vnode_isdir(dvp))
 		return ENOTDIR;
 
 	if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP))
 		return EROFS;
 
 	if ((cnp->cn_flags & NOEXECCHECK) != 0)
 		cnp->cn_flags &= ~NOEXECCHECK;
 	else if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
 		return err;
 
 	is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
 	if ((flags & ISDOTDOT) && !(data->dataflags & FSESS_EXPORT_SUPPORT))
 	{
 		if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
 			/*
 			 * Since the file system doesn't support ".." lookups,
 			 * we have no way to find this entry.
 			 */
 			return ESTALE;
 		}
 		nid = VTOFUD(dvp)->parent_nid;
 		if (nid == 0)
 			return ENOENT;
 		/* .. is obviously a directory */
 		vtyp = VDIR;
 	} else if (is_dot) {
 		nid = VTOI(dvp);
 		/* . is obviously a directory */
 		vtyp = VDIR;
 	} else {
 		struct timespec timeout;
 		int ncpticks; /* here to accommodate for API contract */
 
 		err = cache_lookup(dvp, vpp, cnp, &timeout, &ncpticks);
 		getnanouptime(&now);
 		SDT_PROBE3(fusefs, , vnops, cache_lookup, err, &timeout, &now);
 		switch (err) {
 		case -1:		/* positive match */
 			if (timespeccmp(&timeout, &now, >)) {
 				counter_u64_add(fuse_lookup_cache_hits, 1);
 			} else {
 				/* Cache timeout */
 				counter_u64_add(fuse_lookup_cache_misses, 1);
 				bintime_clear(
 					&VTOFUD(*vpp)->entry_cache_timeout);
 				cache_purge(*vpp);
 				if (dvp != *vpp)
 					vput(*vpp);
 				else 
 					vrele(*vpp);
 				*vpp = NULL;
 				break;
 			}
 			return 0;
 
 		case 0:		/* no match in cache */
 			counter_u64_add(fuse_lookup_cache_misses, 1);
 			break;
 
 		case ENOENT:		/* negative match */
 			if (timespeccmp(&timeout, &now, <=)) {
 				/* Cache timeout */
 				cache_purge_negative(dvp);
 				break;
 			}
 			/* fall through */
 		default:
 			return err;
 		}
 
 		fdisp_init(&fdi, cnp->cn_namelen + 1);
 		fdisp_make(&fdi, FUSE_LOOKUP, mp, VTOI(dvp), td, cred);
 
 		memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 		((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 		lookup_err = fdisp_wait_answ(&fdi);
 		did_lookup = true;
 
 		if (!lookup_err) {
 			/* lookup call succeeded */
 			feo = (struct fuse_entry_out *)fdi.answ;
 			nid = feo->nodeid;
 			if (nid == 0) {
 				/* zero nodeid means ENOENT and cache it */
 				struct timespec timeout;
 
 				fdi.answ_stat = ENOENT;
 				lookup_err = ENOENT;
 				if (cnp->cn_flags & MAKEENTRY) {
 					fuse_validity_2_timespec(feo, &timeout);
 					/* Use the same entry_time for .. as for
 					 * the file itself.  That doesn't honor
 					 * exactly what the fuse server tells
 					 * us, but to do otherwise would require
 					 * another cache lookup at this point.
 					 */
 					struct timespec *dtsp = NULL;
 					cache_enter_time(dvp, *vpp, cnp,
 						&timeout, dtsp);
 				}
 			}
 			vtyp = IFTOVT(feo->attr.mode);
 		}
 		if (lookup_err && (!fdi.answ_stat || lookup_err != ENOENT)) {
 			fdisp_destroy(&fdi);
 			return lookup_err;
 		}
 	}
 	/* lookup_err, if non-zero, must be ENOENT at this point */
 
 	if (lookup_err) {
 		/* Entry not found */
 		if ((nameiop == CREATE || nameiop == RENAME) && islastcn) {
 			if (default_permissions)
 				err = fuse_internal_access(dvp, VWRITE, td,
 				    cred);
 			else
 				err = 0;
 			if (!err) {
-				/*
-				 * Set the SAVENAME flag to hold onto the
-				 * pathname for use later in VOP_CREATE or
-				 * VOP_RENAME.
-				 */
-				cnp->cn_flags |= SAVENAME;
-
 				err = EJUSTRETURN;
 			}
 		} else {
 			err = ENOENT;
 		}
 	} else {
 		/* Entry was found */
 		if (flags & ISDOTDOT) {
 			struct fuse_lookup_alloc_arg flaa;
 
 			flaa.nid = nid;
 			flaa.feo = feo;
 			flaa.cnp = cnp;
 			flaa.vtyp = vtyp;
 			err = vn_vget_ino_gen(dvp, fuse_lookup_alloc, &flaa, 0,
 				&vp);
 			*vpp = vp;
 		} else if (nid == VTOI(dvp)) {
 			if (is_dot) {
 				vref(dvp);
 				*vpp = dvp;
 			} else {
 				fuse_warn(fuse_get_mpdata(mp),
 				    FSESS_WARN_ILLEGAL_INODE,
 				    "Assigned same inode to both parent and "
 				    "child.");
 				err = EIO;
 			}
 
 		} else {
 			struct fuse_vnode_data *fvdat;
 
 			err = fuse_vnode_get(vnode_mount(dvp), feo, nid, dvp,
 			    &vp, cnp, vtyp);
 			if (err)
 				goto out;
 			*vpp = vp;
 			fvdat = VTOFUD(vp);
 
 			MPASS(feo != NULL);
 			if (timespeccmp(&now, &fvdat->last_local_modify, >)) {
 				/*
 				 * Attributes from the server are definitely
 				 * newer than the last attributes we sent to
 				 * the server, so cache them.
 				 */
 				fuse_internal_cache_attrs(*vpp, &feo->attr,
 					feo->attr_valid, feo->attr_valid_nsec,
 					NULL, true);
 			}
 			fuse_validity_2_bintime(feo->entry_valid,
 				feo->entry_valid_nsec,
 				&fvdat->entry_cache_timeout);
 
 			if ((nameiop == DELETE || nameiop == RENAME) &&
 				islastcn && default_permissions)
 			{
 				struct vattr dvattr;
 
 				err = fuse_internal_access(dvp, VWRITE, td,
 					cred);
 				if (err != 0)
 					goto out;
 				/* 
 				 * if the parent's sticky bit is set, check
 				 * whether we're allowed to remove the file.
 				 * Need to figure out the vnode locking to make
 				 * this work.
 				 */
 				fuse_internal_getattr(dvp, &dvattr, cred, td);
 				if ((dvattr.va_mode & S_ISTXT) &&
 					fuse_internal_access(dvp, VADMIN, td,
 						cred) &&
 					fuse_internal_access(*vpp, VADMIN, td,
 						cred)) {
 					err = EPERM;
 					goto out;
 				}
 			}
-
-			if (islastcn && (
-				(nameiop == DELETE) ||
-				(nameiop == RENAME && wantparent))) {
-				cnp->cn_flags |= SAVENAME;
-			}
 		}
 	}
 out:
 	if (err) {
 		if (vp != NULL && dvp != vp)
 			vput(vp);
 		else if (vp != NULL)
 			vrele(vp);
 		*vpp = NULL;
 	}
 	if (did_lookup)
 		fdisp_destroy(&fdi);
 
 	return err;
 }
 
 /*
     struct vnop_mkdir_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_mkdir(struct vop_mkdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 
 	struct fuse_mkdir_in fmdi;
 
 	if (fuse_isdeadfs(dvp)) {
 		return ENXIO;
 	}
 	fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
 	fmdi.umask = curthread->td_proc->p_pd->pd_cmask;
 
 	return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi,
 	    sizeof(fmdi), VDIR));
 }
 
 /*
     struct vnop_mknod_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
     };
 */
 static int
 fuse_vnop_mknod(struct vop_mknod_args *ap)
 {
 
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct vattr *vap = ap->a_vap;
 
 	if (fuse_isdeadfs(dvp))
 		return ENXIO;
 
 	return fuse_internal_mknod(dvp, vpp, cnp, vap);
 }
 
 /*
     struct vop_open_args {
 	struct vnode *a_vp;
 	int  a_mode;
 	struct ucred *a_cred;
 	struct thread *a_td;
 	int a_fdidx; / struct file *a_fp;
     };
 */
 static int
 fuse_vnop_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int a_mode = ap->a_mode;
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = td->td_proc->p_pid;
 
 	if (fuse_isdeadfs(vp))
 		return ENXIO;
 	if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
 		return (EOPNOTSUPP);
 	if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
 		return EINVAL;
 
 	if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
 		fuse_vnode_open(vp, 0, td);
 		return 0;
 	}
 
 	return fuse_filehandle_open(vp, a_mode, NULL, td, cred);
 }
 
 static int
 fuse_vnop_pathconf(struct vop_pathconf_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct mount *mp;
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_LINK_MAX:
 		*ap->a_retval = MIN(LONG_MAX, FUSE_LINK_MAX);
 		return (0);
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_MIN_HOLE_SIZE:
 		/*
 		 * The FUSE protocol provides no mechanism for a server to
 		 * report _PC_MIN_HOLE_SIZE.  It's a protocol bug.  Instead,
 		 * return EINVAL if the server does not support FUSE_LSEEK, or
 		 * 1 if it does.
 		 */
 		mp = vnode_mount(vp);
 		if (!fsess_is_impl(mp, FUSE_LSEEK) &&
 		    !fsess_not_impl(mp, FUSE_LSEEK)) {
 			off_t offset = 0;
 
 			/* Issue a FUSE_LSEEK to find out if it's implemented */
 			fuse_vnop_do_lseek(vp, curthread, curthread->td_ucred,
 			    curthread->td_proc->p_pid, &offset, SEEK_DATA);
 		}
 
 		if (fsess_is_impl(mp, FUSE_LSEEK)) {
 			*ap->a_retval = 1;
 			return (0);
 		} else {
 			/*
 			 * Probably FUSE_LSEEK is not implemented.  It might
 			 * be, if the FUSE_LSEEK above returned an error like
 			 * EACCES, but in that case we can't tell, so it's
 			 * safest to report EINVAL anyway.
 			 */
 			return (EINVAL);
 		}
 	default:
 		return (vop_stdpathconf(ap));
 	}
 }
 
 SDT_PROBE_DEFINE3(fusefs, , vnops, filehandles_closed, "struct vnode*",
     "struct uio*", "struct ucred*");
 /*
     struct vnop_read_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int  a_ioflag;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_read(struct vop_read_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = curthread->td_proc->p_pid;
 	struct fuse_filehandle *fufh;
 	int err;
 	bool closefufh = false, directio;
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
 		ioflag |= IO_DIRECT;
 	}
 
 	err = fuse_filehandle_getrw(vp, FREAD, &fufh, cred, pid);
 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * nfsd will do I/O without first doing VOP_OPEN.  We
 		 * must implicitly open the file here
 		 */
 		err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
 		closefufh = true;
 	}
 	if (err) {
 		SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred);
 		return err;
 	}
 
 	/*
          * Ideally, when the daemon asks for direct io at open time, the
          * standard file flag should be set according to this, so that would
          * just change the default mode, which later on could be changed via
          * fcntl(2).
          * But this doesn't work, the O_DIRECT flag gets cleared at some point
          * (don't know where). So to make any use of the Fuse direct_io option,
          * we hardwire it into the file's private data (similarly to Linux,
          * btw.).
          */
 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
 
 	fuse_vnode_update(vp, FN_ATIMECHANGE);
 	if (directio) {
 		SDT_PROBE2(fusefs, , vnops, trace, 1, "direct read of vnode");
 		err = fuse_read_directbackend(vp, uio, cred, fufh);
 	} else {
 		SDT_PROBE2(fusefs, , vnops, trace, 1, "buffered read of vnode");
 		err = fuse_read_biobackend(vp, uio, ioflag, cred, fufh, pid);
 	}
 
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
 	return (err);
 }
 
 /*
     struct vnop_readdir_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	int *a_eofflag;
 	int *a_ncookies;
 	uint64_t **a_cookies;
     };
 */
 static int
 fuse_vnop_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct ucred *cred = ap->a_cred;
 	struct fuse_filehandle *fufh = NULL;
 	struct mount *mp = vnode_mount(vp);
 	struct fuse_iov cookediov;
 	int err = 0;
 	uint64_t *cookies;
 	ssize_t tresid;
 	int ncookies;
 	bool closefufh = false;
 	pid_t pid = curthread->td_proc->p_pid;
 
 	if (ap->a_eofflag)
 		*ap->a_eofflag = 0;
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (				/* XXXIP ((uio_iovcnt(uio) > 1)) || */
 	    (uio_resid(uio) < sizeof(struct dirent))) {
 		return EINVAL;
 	}
 
 	tresid = uio->uio_resid;
 	err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
 	if (err == EBADF && mp->mnt_flag & MNT_EXPORTED) {
 		KASSERT(fuse_get_mpdata(mp)->dataflags
 				& FSESS_NO_OPENDIR_SUPPORT,
 			("FUSE file systems that don't set "
 			 "FUSE_NO_OPENDIR_SUPPORT should not be exported"));
 		/* 
 		 * nfsd will do VOP_READDIR without first doing VOP_OPEN.  We
 		 * must implicitly open the directory here.
 		 */
 		err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred);
 		closefufh = true;
 	}
 	if (err)
 		return (err);
 	if (ap->a_ncookies != NULL) {
 		ncookies = uio->uio_resid /
 			(offsetof(struct dirent, d_name) + 4) + 1;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	} else {
 		ncookies = 0;
 		cookies = NULL;
 	}
 #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1)
 	fiov_init(&cookediov, DIRCOOKEDSIZE);
 
 	err = fuse_internal_readdir(vp, uio, fufh, &cookediov,
 		&ncookies, cookies);
 
 	fiov_teardown(&cookediov);
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
 	if (ap->a_ncookies != NULL) {
 		if (err == 0) {
 			*ap->a_ncookies -= ncookies;
 		} else {
 			free(*ap->a_cookies, M_TEMP);
 			*ap->a_ncookies = 0;
 			*ap->a_cookies = NULL;
 		}
 	}
 	if (err == 0 && tresid == uio->uio_resid)
 		*ap->a_eofflag = 1;
 
 	return err;
 }
 
 /*
     struct vnop_readlink_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_readlink(struct vop_readlink_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct ucred *cred = ap->a_cred;
 
 	struct fuse_dispatcher fdi;
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (!vnode_islnk(vp)) {
 		return EINVAL;
 	}
 	fdisp_init(&fdi, 0);
 	err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred);
 	if (err) {
 		goto out;
 	}
 	if (((char *)fdi.answ)[0] == '/' &&
 	    fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) {
 		char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname;
 
 		err = uiomove(mpth, strlen(mpth), uio);
 	}
 	if (!err) {
 		err = uiomove(fdi.answ, fdi.iosize, uio);
 	}
 out:
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /*
     struct vnop_reclaim_args {
 	struct vnode *a_vp;
     };
 */
 static int
 fuse_vnop_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_filehandle *fufh, *fufh_tmp;
 
 	if (!fvdat) {
 		panic("FUSE: no vnode data during recycling");
 	}
 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
 		printf("FUSE: vnode being reclaimed with open fufh "
 			"(type=%#x)", fufh->fufh_type);
 		fuse_filehandle_close(vp, fufh, td, NULL);
 	}
 
 	if (VTOI(vp) == 1) {
 		/*
 		 * Don't send FUSE_FORGET for the root inode, because
 		 * we never send FUSE_LOOKUP for it (see
 		 * fuse_vfsop_root) and we don't want the server to see
 		 * mismatched lookup counts.
 		 */
 		struct fuse_data *data;
 		struct vnode *vroot;
 
 		data = fuse_get_mpdata(vnode_mount(vp));
 		FUSE_LOCK();
 		vroot = data->vroot;
 		data->vroot = NULL;
 		FUSE_UNLOCK();
 		if (vroot)
 			vrele(vroot);
 	} else if (!fuse_isdeadfs(vp) && fvdat->nlookup > 0) {
 		fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp),
 		    fvdat->nlookup);
 	}
 	cache_purge(vp);
 	vfs_hash_remove(vp);
 	fuse_vnode_destroy(vp);
 
 	return 0;
 }
 
 /*
     struct vnop_remove_args {
 	struct vnode *a_dvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
     };
 */
 static int
 fuse_vnop_remove(struct vop_remove_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 	struct componentname *cnp = ap->a_cnp;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (vnode_isdir(vp)) {
 		return EPERM;
 	}
 
 	err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
 
 	return err;
 }
 
 /*
     struct vnop_rename_args {
 	struct vnode *a_fdvp;
 	struct vnode *a_fvp;
 	struct componentname *a_fcnp;
 	struct vnode *a_tdvp;
 	struct vnode *a_tvp;
 	struct componentname *a_tcnp;
     };
 */
 static int
 fuse_vnop_rename(struct vop_rename_args *ap)
 {
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct fuse_data *data;
 	bool newparent = fdvp != tdvp;
 	bool isdir = fvp->v_type == VDIR;
 	int err = 0;
 
 	if (fuse_isdeadfs(fdvp)) {
 		return ENXIO;
 	}
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp && fvp->v_mount != tvp->v_mount)) {
 		SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
 		err = EXDEV;
 		goto out;
 	}
 	cache_purge(fvp);
 
 	/*
 	 * FUSE library is expected to check if target directory is not
 	 * under the source directory in the file system tree.
 	 * Linux performs this check at VFS level.
 	 */
 	/* 
 	 * If source is a directory, and it will get a new parent, user must
 	 * have write permission to it, so ".." can be modified.
 	 */
 	data = fuse_get_mpdata(vnode_mount(tdvp));
 	if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
 		err = fuse_internal_access(fvp, VWRITE,
 			curthread, tcnp->cn_cred);
 		if (err)
 			goto out;
 	}
 	sx_xlock(&data->rename_lock);
 	err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
 	if (err == 0) {
 		if (tdvp != fdvp)
 			fuse_vnode_setparent(fvp, tdvp);
 		if (tvp != NULL)
 			fuse_vnode_setparent(tvp, NULL);
 	}
 	sx_unlock(&data->rename_lock);
 
 	if (tvp != NULL && tvp != fvp) {
 		cache_purge(tvp);
 	}
 	if (vnode_isdir(fvp)) {
 		if (((tvp != NULL) && vnode_isdir(tvp)) || vnode_isdir(fvp)) {
 			cache_purge(tdvp);
 		}
 		cache_purge(fdvp);
 	}
 out:
 	if (tdvp == tvp) {
 		vrele(tdvp);
 	} else {
 		vput(tdvp);
 	}
 	if (tvp != NULL) {
 		vput(tvp);
 	}
 	vrele(fdvp);
 	vrele(fvp);
 
 	return err;
 }
 
 /*
     struct vnop_rmdir_args {
 	    struct vnode *a_dvp;
 	    struct vnode *a_vp;
 	    struct componentname *a_cnp;
     } *ap;
 */
 static int
 fuse_vnop_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 
 	int err;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 	if (VTOFUD(vp) == VTOFUD(dvp)) {
 		return EINVAL;
 	}
 	err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
 
 	return err;
 }
 
 /*
     struct vnop_setattr_args {
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	struct mount *mp;
 	struct fuse_data *data;
 	struct vattr old_va;
 	int dataflags;
 	int err = 0, err2;
 	accmode_t accmode = 0;
 	bool checkperm;
 	bool drop_suid = false;
 	gid_t cr_gid;
 
 	mp = vnode_mount(vp);
 	data = fuse_get_mpdata(mp);
 	dataflags = data->dataflags;
 	checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
 	if (cred->cr_ngroups > 0)
 		cr_gid = cred->cr_groups[0];
 	else
 		cr_gid = 0;
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	if (vap->va_uid != (uid_t)VNOVAL) {
 		if (checkperm) {
 			/* Only root may change a file's owner */
 			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
 			if (err) {
 				/* As a special case, allow the null chown */
 				err2 = fuse_internal_getattr(vp, &old_va, cred,
 					td);
 				if (err2)
 					return (err2);
 				if (vap->va_uid != old_va.va_uid)
 					return err;
 				else
 					accmode |= VADMIN;
 				drop_suid = true;
 			} else
 				accmode |= VADMIN;
 		} else
 			accmode |= VADMIN;
 	}
 	if (vap->va_gid != (gid_t)VNOVAL) {
 		if (checkperm && priv_check_cred(cred, PRIV_VFS_CHOWN))
 			drop_suid = true;
 		if (checkperm && !groupmember(vap->va_gid, cred))
 		{
 			/*
 			 * Non-root users may only chgrp to one of their own
 			 * groups 
 			 */
 			err = priv_check_cred(cred, PRIV_VFS_CHOWN);
 			if (err) {
 				/* As a special case, allow the null chgrp */
 				err2 = fuse_internal_getattr(vp, &old_va, cred,
 					td);
 				if (err2)
 					return (err2);
 				if (vap->va_gid != old_va.va_gid)
 					return err;
 				accmode |= VADMIN;
 			} else
 				accmode |= VADMIN;
 		} else
 			accmode |= VADMIN;
 	}
 	if (vap->va_size != VNOVAL) {
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			if (vfs_isrdonly(mp))
 				return (EROFS);
 			break;
 		default:
 			/*
 			 * According to POSIX, the result is unspecified
 			 * for file types other than regular files,
 			 * directories and shared memory objects.  We
 			 * don't support shared memory objects in the file
 			 * system, and have dubious support for truncating
 			 * symlinks.  Just ignore the request in other cases.
 			 */
 			return (0);
 		}
 		/* Don't set accmode.  Permission to trunc is checked upstack */
 	}
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vap->va_vaflags & VA_UTIMES_NULL)
 			accmode |= VWRITE;
 		else
 			accmode |= VADMIN;
 	}
 	if (drop_suid) {
 		if (vap->va_mode != (mode_t)VNOVAL)
 			vap->va_mode &= ~(S_ISUID | S_ISGID);
 		else {
 			err = fuse_internal_getattr(vp, &old_va, cred, td);
 			if (err)
 				return (err);
 			vap->va_mode = old_va.va_mode & ~(S_ISUID | S_ISGID);
 		}
 	}
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		/* Only root may set the sticky bit on non-directories */
 		if (checkperm && vp->v_type != VDIR && (vap->va_mode & S_ISTXT)
 		    && priv_check_cred(cred, PRIV_VFS_STICKYFILE))
 			return EFTYPE;
 		if (checkperm && (vap->va_mode & S_ISGID)) {
 			err = fuse_internal_getattr(vp, &old_va, cred, td);
 			if (err)
 				return (err);
 			if (!groupmember(old_va.va_gid, cred)) {
 				err = priv_check_cred(cred, PRIV_VFS_SETGID);
 				if (err)
 					return (err);
 			}
 		}
 		accmode |= VADMIN;
 	}
 
 	if (vfs_isrdonly(mp))
 		return EROFS;
 
 	if (checkperm) {
 		err = fuse_internal_access(vp, accmode, td, cred);
 	} else {
 		err = 0;
 	}
 	if (err)
 		return err;
 	else
 		return fuse_internal_setattr(vp, vap, td, cred);
 }
 
 /*
     struct vnop_strategy_args {
 	struct vnode *a_vp;
 	struct buf *a_bp;
     };
 */
 static int
 fuse_vnop_strategy(struct vop_strategy_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct buf *bp = ap->a_bp;
 
 	if (!vp || fuse_isdeadfs(vp)) {
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = ENXIO;
 		bufdone(bp);
 		return 0;
 	}
 
 	/*
 	 * VOP_STRATEGY always returns zero and signals error via bp->b_ioflags.
 	 * fuse_io_strategy sets bp's error fields
 	 */
 	(void)fuse_io_strategy(vp, bp);
 
 	return 0;
 }
 
 /*
     struct vnop_symlink_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
 	char *a_target;
     };
 */
 static int
 fuse_vnop_symlink(struct vop_symlink_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	const char *target = ap->a_target;
 
 	struct fuse_dispatcher fdi;
 
 	int err;
 	size_t len;
 
 	if (fuse_isdeadfs(dvp)) {
 		return ENXIO;
 	}
 	/*
 	 * Unlike the other creator type calls, here we have to create a message
 	 * where the name of the new entry comes first, and the data describing
 	 * the entry comes second.
 	 * Hence we can't rely on our handy fuse_internal_newentry() routine,
 	 * but put together the message manually and just call the core part.
 	 */
 
 	len = strlen(target) + 1;
 	fdisp_init(&fdi, len + cnp->cn_namelen + 1);
 	fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL);
 
 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 	memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len);
 
 	err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi);
 	fdisp_destroy(&fdi);
 	return err;
 }
 
 /*
     struct vnop_write_args {
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int  a_ioflag;
 	struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_write(struct vop_write_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = curthread->td_proc->p_pid;
 	struct fuse_filehandle *fufh;
 	int err;
 	bool closefufh = false, directio;
 
 	MPASS(vp->v_type == VREG || vp->v_type == VDIR);
 
 	if (fuse_isdeadfs(vp)) {
 		return ENXIO;
 	}
 
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
 		ioflag |= IO_DIRECT;
 	}
 
 	err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * nfsd will do I/O without first doing VOP_OPEN.  We
 		 * must implicitly open the file here
 		 */
 		err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred);
 		closefufh = true;
 	}
 	if (err) {
 		SDT_PROBE3(fusefs, , vnops, filehandles_closed, vp, uio, cred);
 		return err;
 	}
 
 	/*
          * Ideally, when the daemon asks for direct io at open time, the
          * standard file flag should be set according to this, so that would
          * just change the default mode, which later on could be changed via
          * fcntl(2).
          * But this doesn't work, the O_DIRECT flag gets cleared at some point
          * (don't know where). So to make any use of the Fuse direct_io option,
          * we hardwire it into the file's private data (similarly to Linux,
          * btw.).
          */
 	directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp));
 
 	fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
 	if (directio) {
 		off_t start, end, filesize;
 		bool pages = (ioflag & IO_VMIO) != 0;
 
 		SDT_PROBE2(fusefs, , vnops, trace, 1, "direct write of vnode");
 
 		err = fuse_vnode_size(vp, &filesize, cred, curthread);
 		if (err)
 			goto out;
 
 		start = uio->uio_offset;
 		end = start + uio->uio_resid;
 		if (!pages) {
 			err = fuse_inval_buf_range(vp, filesize, start,
 			    end);
 			if (err)
 				goto out;
 		}
 		err = fuse_write_directbackend(vp, uio, cred, fufh,
 			filesize, ioflag, pages);
 	} else {
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
 			"buffered write of vnode");
 		if (!fsess_opt_writeback(vnode_mount(vp)))
 			ioflag |= IO_SYNC;
 		err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag, pid);
 	}
 	fuse_internal_clear_suid_on_write(vp, cred, uio->uio_td);
 
 out:
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
 	return (err);
 }
 
 static daddr_t
 fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
 {
 	const int biosize = fuse_iosize(vp);
 
 	return (off / biosize);
 }
 
 static int
 fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *blksz)
 {
 	off_t filesize;
 	int err;
 	const int biosize = fuse_iosize(vp);
 
 	err = fuse_vnode_size(vp, &filesize, NULL, NULL);
 	if (err) {
 		/* This will turn into a SIGBUS */
 		return (EIO);
 	} else if ((off_t)lbn * biosize >= filesize) {
 		*blksz = 0;
 	} else if ((off_t)(lbn + 1) * biosize > filesize) {
 		*blksz = filesize - (off_t)lbn *biosize;
 	} else {
 		*blksz = biosize;
 	}
 	return (0);
 }
 
 /*
     struct vnop_getpages_args {
 	struct vnode *a_vp;
 	vm_page_t *a_m;
 	int a_count;
 	int a_reqpage;
     };
 */
 static int
 fuse_vnop_getpages(struct vop_getpages_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	if (!fsess_opt_mmap(vnode_mount(vp))) {
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
 			"called on non-cacheable vnode??\n");
 		return (VM_PAGER_ERROR);
 	}
 
 	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
 	    ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz));
 }
 
 static const char extattr_namespace_separator = '.';
 
 /*
     struct vop_getextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct uio *a_uio;
 	size_t *a_size;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_getextattr(struct vop_getextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_getxattr_in *get_xattr_in;
 	struct fuse_getxattr_out *get_xattr_out;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	char *attr_str;
 	size_t len;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (fsess_not_impl(mp, FUSE_GETXATTR))
 		return EOPNOTSUPP;
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
 	if (err)
 		return err;
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len + sizeof(*get_xattr_in));
 	fdisp_make_vp(&fdi, FUSE_GETXATTR, vp, td, cred);
 
 	get_xattr_in = fdi.indata;
 	/*
 	 * Check to see whether we're querying the available size or
 	 * issuing the actual request.  If we pass in 0, we get back struct
 	 * fuse_getxattr_out.  If we pass in a non-zero size, we get back
 	 * that much data, without the struct fuse_getxattr_out header.
 	 */
 	if (uio == NULL)
 		get_xattr_in->size = 0;
 	else
 		get_xattr_in->size = uio->uio_resid;
 
 	attr_str = (char *)fdi.indata + sizeof(*get_xattr_in);
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
 		if (err == ENOSYS) {
 			fsess_set_notimpl(mp, FUSE_GETXATTR);
 			err = EOPNOTSUPP;
 		}
 		goto out;
 	}
 
 	get_xattr_out = fdi.answ;
 
 	if (ap->a_size != NULL)
 		*ap->a_size = get_xattr_out->size;
 
 	if (uio != NULL)
 		err = uiomove(fdi.answ, fdi.iosize, uio);
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vop_setextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_setextattr(struct vop_setextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_setxattr_in *set_xattr_in;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	size_t len;
 	char *attr_str;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (fsess_not_impl(mp, FUSE_SETXATTR))
 		return EOPNOTSUPP;
 
 	if (vfs_isrdonly(mp))
 		return EROFS;
 
 	/* Deleting xattrs must use VOP_DELETEEXTATTR instead */
 	if (ap->a_uio == NULL) {
 		/*
 		 * If we got here as fallback from VOP_DELETEEXTATTR, then
 		 * return EOPNOTSUPP.
 		 */
 		if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
 			return (EOPNOTSUPP);
 		else
 			return (EINVAL);
 	}
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
 		VWRITE);
 	if (err)
 		return err;
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len + sizeof(*set_xattr_in) + uio->uio_resid);
 	fdisp_make_vp(&fdi, FUSE_SETXATTR, vp, td, cred);
 
 	set_xattr_in = fdi.indata;
 	set_xattr_in->size = uio->uio_resid;
 
 	attr_str = (char *)fdi.indata + sizeof(*set_xattr_in);
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = uiomove((char *)fdi.indata + sizeof(*set_xattr_in) + len,
 	    uio->uio_resid, uio);
 	if (err != 0) {
 		goto out;
 	}
 
 	err = fdisp_wait_answ(&fdi);
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_SETXATTR);
 		err = EOPNOTSUPP;
 	}
 	if (err == ERESTART) {
 		/* Can't restart after calling uiomove */
 		err = EINTR;
 	}
 
 out:
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
  * The Linux / FUSE extended attribute list is simply a collection of
  * NUL-terminated strings.  The FreeBSD extended attribute list is a single
  * byte length followed by a non-NUL terminated string.  So, this allows
  * conversion of the Linux / FUSE format to the FreeBSD format in place.
  * Linux attribute names are reported with the namespace as a prefix (e.g.
  * "user.attribute_name"), but in FreeBSD they are reported without the
  * namespace prefix (e.g. "attribute_name").  So, we're going from:
  *
  * user.attr_name1\0user.attr_name2\0
  *
  * to:
  *
  * <num>attr_name1<num>attr_name2
  *
  * Where "<num>" is a single byte number of characters in the attribute name.
  * 
  * Args:
  * prefix - exattr namespace prefix string
  * list, list_len - input list with namespace prefixes
  * bsd_list, bsd_list_len - output list compatible with bsd vfs
  */
 static int
 fuse_xattrlist_convert(char *prefix, const char *list, int list_len,
     char *bsd_list, int *bsd_list_len)
 {
 	int len, pos, dist_to_next, prefix_len;
 
 	pos = 0;
 	*bsd_list_len = 0;
 	prefix_len = strlen(prefix);
 
 	while (pos < list_len && list[pos] != '\0') {
 		dist_to_next = strlen(&list[pos]) + 1;
 		if (bcmp(&list[pos], prefix, prefix_len) == 0 &&
 		    list[pos + prefix_len] == extattr_namespace_separator) {
 			len = dist_to_next -
 			    (prefix_len + sizeof(extattr_namespace_separator)) - 1;
 			if (len >= EXTATTR_MAXNAMELEN)
 				return (ENAMETOOLONG);
 
 			bsd_list[*bsd_list_len] = len;
 			memcpy(&bsd_list[*bsd_list_len + 1],
 			    &list[pos + prefix_len +
 			    sizeof(extattr_namespace_separator)], len);
 
 			*bsd_list_len += len + 1;
 		}
 
 		pos += dist_to_next;
 	}
 
 	return (0);
 }
 
 /*
  * List extended attributes
  *
  * The FUSE_LISTXATTR operation is based on Linux's listxattr(2) syscall, which
  * has a number of differences compared to its FreeBSD equivalent,
  * extattr_list_file:
  *
  * - FUSE_LISTXATTR returns all extended attributes across all namespaces,
  *   whereas listxattr(2) only returns attributes for a single namespace
  * - FUSE_LISTXATTR prepends each attribute name with "namespace."
  * - If the provided buffer is not large enough to hold the result,
  *   FUSE_LISTXATTR should return ERANGE, whereas listxattr is expected to
  *   return as many results as will fit.
  */
 /*
     struct vop_listextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	struct uio *a_uio;
 	size_t *a_size;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_listextattr(struct vop_listextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct fuse_dispatcher fdi;
 	struct fuse_listxattr_in *list_xattr_in;
 	struct fuse_listxattr_out *list_xattr_out;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	char *bsd_list = NULL;
 	char *linux_list;
 	int bsd_list_len;
 	int linux_list_len;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (fsess_not_impl(mp, FUSE_LISTXATTR))
 		return EOPNOTSUPP;
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
 	if (err)
 		return err;
 
 	/*
 	 * Add space for a NUL and the period separator if enabled.
 	 * Default to looking for user attributes.
 	 */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	fdisp_init(&fdi, sizeof(*list_xattr_in));
 	fdisp_make_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
 
 	/*
 	 * Retrieve Linux / FUSE compatible list size.
 	 */
 	list_xattr_in = fdi.indata;
 	list_xattr_in->size = 0;
 
 	err = fdisp_wait_answ(&fdi);
 	if (err != 0) {
 		if (err == ENOSYS) {
 			fsess_set_notimpl(mp, FUSE_LISTXATTR);
 			err = EOPNOTSUPP;
 		}
 		goto out;
 	}
 
 	list_xattr_out = fdi.answ;
 	linux_list_len = list_xattr_out->size;
 	if (linux_list_len == 0) {
 		if (ap->a_size != NULL)
 			*ap->a_size = linux_list_len;
 		goto out;
 	}
 
 	/*
 	 * Retrieve Linux / FUSE compatible list values.
 	 */
 	fdisp_refresh_vp(&fdi, FUSE_LISTXATTR, vp, td, cred);
 	list_xattr_in = fdi.indata;
 	list_xattr_in->size = linux_list_len;
 
 	err = fdisp_wait_answ(&fdi);
 	if (err == ERANGE) {
 		/* 
 		 * Race detected.  The attribute list must've grown since the
 		 * first FUSE_LISTXATTR call.  Start over.  Go all the way back
 		 * to userland so we can process signals, if necessary, before
 		 * restarting.
 		 */
 		err = ERESTART;
 		goto out;
 	} else if (err != 0)
 		goto out;
 
 	linux_list = fdi.answ;
 	/* FUSE doesn't allow the server to return more data than requested */
 	if (fdi.iosize > linux_list_len) {
 		struct fuse_data *data = fuse_get_mpdata(mp);
 
 		fuse_warn(data, FSESS_WARN_LSEXTATTR_LONG,
 			"server returned "
 			"more extended attribute data than requested; "
 			"should've returned ERANGE instead.");
 	} else {
 		/* But returning less data is fine */
 		linux_list_len = fdi.iosize;
 	}
 
 	/*
 	 * Retrieve the BSD compatible list values.
 	 * The Linux / FUSE attribute list format isn't the same
 	 * as FreeBSD's format. So we need to transform it into
 	 * FreeBSD's format before giving it to the user.
 	 */
 	bsd_list = malloc(linux_list_len, M_TEMP, M_WAITOK);
 	err = fuse_xattrlist_convert(prefix, linux_list, linux_list_len,
 	    bsd_list, &bsd_list_len);
 	if (err != 0)
 		goto out;
 
 	if (ap->a_size != NULL)
 		*ap->a_size = bsd_list_len;
 
 	if (uio != NULL)
 		err = uiomove(bsd_list, bsd_list_len, uio);
 
 out:
 	free(bsd_list, M_TEMP);
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vop_deallocate_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	off_t *a_offset;
 	off_t *a_len;
 	int a_flags;
 	int a_ioflag;
         struct ucred *a_cred;
     };
 */
 static int
 fuse_vnop_deallocate(struct vop_deallocate_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct mount *mp = vnode_mount(vp);
 	struct fuse_filehandle *fufh;
 	struct fuse_dispatcher fdi;
 	struct fuse_fallocate_in *ffi;
 	struct ucred *cred = ap->a_cred;
 	pid_t pid = curthread->td_proc->p_pid;
 	off_t *len = ap->a_len;
 	off_t *offset = ap->a_offset;
 	int ioflag = ap->a_ioflag;
 	off_t filesize;
 	int err;
 	bool closefufh = false;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (vfs_isrdonly(mp))
 		return (EROFS);
 
 	if (fsess_not_impl(mp, FUSE_FALLOCATE))
 		goto fallback;
 
 	err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * nfsd will do I/O without first doing VOP_OPEN.  We
 		 * must implicitly open the file here
 		 */
 		err = fuse_filehandle_open(vp, FWRITE, &fufh, curthread, cred);
 		closefufh = true;
 	}
 	if (err)
 		return (err);
 
 	fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
 
 	err = fuse_vnode_size(vp, &filesize, cred, curthread);
 	if (err)
 		goto out;
 	fuse_inval_buf_range(vp, filesize, *offset, *offset + *len);
 
 	fdisp_init(&fdi, sizeof(*ffi));
 	fdisp_make_vp(&fdi, FUSE_FALLOCATE, vp, curthread, cred);
 	ffi = fdi.indata;
 	ffi->fh = fufh->fh_id;
 	ffi->offset = *offset;
 	ffi->length = *len;
 	/*
 	 * FreeBSD's fspacectl is equivalent to Linux's fallocate with
 	 * mode == FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE
 	 */
 	ffi->mode = FUSE_FALLOC_FL_PUNCH_HOLE | FUSE_FALLOC_FL_KEEP_SIZE;
 	err = fdisp_wait_answ(&fdi);
 
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_FALLOCATE);
 		goto fallback;
 	} else if (err == EOPNOTSUPP) {
 		/*
 		 * The file system server does not support FUSE_FALLOCATE with
 		 * the supplied mode for this particular file.
 		 */
 		goto fallback;
 	} else if (!err) {
 		/*
 		 * Clip the returned offset to EoF.  Do it here rather than
 		 * before FUSE_FALLOCATE just in case the kernel's cached file
 		 * size is out of date.  Unfortunately, FUSE does not return
 		 * any information about filesize from that operation.
 		 */
 		*offset = MIN(*offset + *len, filesize);
 		*len = 0;
 		fuse_vnode_undirty_cached_timestamps(vp, false);
 		fuse_internal_clear_suid_on_write(vp, cred, curthread);
 
 		if (ioflag & IO_SYNC)
 			err = fuse_internal_fsync(vp, curthread, MNT_WAIT,
 			    false);
 	}
 
 out:
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
 	return (err);
 
 fallback:
 	if (closefufh)
 		fuse_filehandle_close(vp, fufh, curthread, cred);
 
 	return (vop_stddeallocate(ap));
 }
 
 /*
     struct vop_deleteextattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_attrnamespace;
 	const char *a_name;
 	struct ucred *a_cred;
 	struct thread *a_td;
     };
 */
 static int
 fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct fuse_dispatcher fdi;
 	struct mount *mp = vnode_mount(vp);
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	char *prefix;
 	size_t len;
 	char *attr_str;
 	int err;
 
 	if (fuse_isdeadfs(vp))
 		return (ENXIO);
 
 	if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
 		return EOPNOTSUPP;
 
 	if (vfs_isrdonly(mp))
 		return EROFS;
 
 	err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
 		VWRITE);
 	if (err)
 		return err;
 
 	/* Default to looking for user attributes. */
 	if (ap->a_attrnamespace == EXTATTR_NAMESPACE_SYSTEM)
 		prefix = EXTATTR_NAMESPACE_SYSTEM_STRING;
 	else
 		prefix = EXTATTR_NAMESPACE_USER_STRING;
 
 	len = strlen(prefix) + sizeof(extattr_namespace_separator) +
 	    strlen(ap->a_name) + 1;
 
 	fdisp_init(&fdi, len);
 	fdisp_make_vp(&fdi, FUSE_REMOVEXATTR, vp, td, cred);
 
 	attr_str = fdi.indata;
 	snprintf(attr_str, len, "%s%c%s", prefix, extattr_namespace_separator,
 	    ap->a_name);
 
 	err = fdisp_wait_answ(&fdi);
 	if (err == ENOSYS) {
 		fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
 		err = EOPNOTSUPP;
 	}
 
 	fdisp_destroy(&fdi);
 	return (err);
 }
 
 /*
     struct vnop_print_args {
 	struct vnode *a_vp;
     };
 */
 static int
 fuse_vnop_print(struct vop_print_args *ap)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
 
 	printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n",
 	    (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid,
 	    (uintmax_t)fvdat->nlookup,
 	    fvdat->flag);
 
 	return 0;
 }
 
 /*
  * Get an NFS filehandle for a FUSE file.
  *
  * This will only work for FUSE file systems that guarantee the uniqueness of
  * nodeid:generation, which most don't.
  */
 /*
 vop_vptofh {
 	IN struct vnode *a_vp;
 	IN struct fid *a_fhp;
 };
 */
 static int
 fuse_vnop_vptofh(struct vop_vptofh_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_fid *fhp = (struct fuse_fid *)(ap->a_fhp);
 	_Static_assert(sizeof(struct fuse_fid) <= sizeof(struct fid),
 		"FUSE fid type is too big");
 	struct mount *mp = vnode_mount(vp);
 	struct fuse_data *data = fuse_get_mpdata(mp);
 	struct vattr va;
 	int err;
 
 	if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) {
 		/* NFS requires lookups for "." and ".." */
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
 			"VOP_VPTOFH without FUSE_EXPORT_SUPPORT");
 		return EOPNOTSUPP;
 	}
 	if ((mp->mnt_flag & MNT_EXPORTED) &&
 		!(data->dataflags & FSESS_NO_OPENDIR_SUPPORT))
 	{
 		/*
 		 * NFS is stateless, so nfsd must reopen a directory on every
 		 * call to VOP_READDIR, passing in the d_off field from the
 		 * final dirent of the previous invocation.  But without
 		 * FUSE_NO_OPENDIR_SUPPORT, the FUSE protocol does not
 		 * guarantee that d_off will be valid after a directory is
 		 * closed and reopened.  So prohibit exporting FUSE file
 		 * systems that don't set that flag.
 		 *
 		 * But userspace NFS servers don't have this problem.
                  */
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
 			"VOP_VPTOFH without FUSE_NO_OPENDIR_SUPPORT");
 		return EOPNOTSUPP;
 	}
 
 	err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
 	if (err)
 		return err;
 
 	/*ip = VTOI(ap->a_vp);*/
 	/*ufhp = (struct ufid *)ap->a_fhp;*/
 	fhp->len = sizeof(struct fuse_fid);
 	fhp->nid = fvdat->nid;
 	if (fvdat->generation <= UINT32_MAX)
 		fhp->gen = fvdat->generation;
 	else
 		return EOVERFLOW;
 	return (0);
 }
diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c
index 91b778b8173b..c061a9169f25 100644
--- a/sys/fs/msdosfs/msdosfs_lookup.c
+++ b/sys/fs/msdosfs/msdosfs_lookup.c
@@ -1,1100 +1,1096 @@
 /* $FreeBSD$ */
 /*	$NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 
 #include <fs/msdosfs/bpb.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
 #include <fs/msdosfs/msdosfsmount.h>
 
 static int
 msdosfs_lookup_checker(struct msdosfsmount *pmp, struct vnode *dvp,
     struct denode *tdp, struct vnode **vpp)
 {
 	struct vnode *vp;
 
 	vp = DETOV(tdp);
 
 	/*
 	 * Lookup assumes that directory cannot be hardlinked.
 	 * Corrupted msdosfs filesystem could break this assumption.
 	 */
 	if (vp == dvp) {
 		vput(vp);
 		msdosfs_integrity_error(pmp);
 		*vpp = NULL;
 		return (EBADF);
 	}
 
 	*vpp = vp;
 	return (0);
 }
 
 int
 msdosfs_lookup(struct vop_cachedlookup_args *ap)
 {
 
 	return (msdosfs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL,
 	    NULL));
 }
 
 struct deget_dotdot {
 	u_long cluster;
 	int blkoff;
 };
 
 static int
 msdosfs_deget_dotdot(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
 {
 	struct deget_dotdot *dd_arg;
 	struct denode *rdp;
 	struct msdosfsmount *pmp;
 	int error;
 
 	pmp = VFSTOMSDOSFS(mp);
 	dd_arg = arg;
 	error = deget(pmp, dd_arg->cluster, dd_arg->blkoff,
 	    LK_EXCLUSIVE, &rdp);
 	if (error == 0)
 		*rvp = DETOV(rdp);
 	return (error);
 }
 
 /*
  * When we search a directory the blocks containing directory entries are
  * read and examined.  The directory entries contain information that would
  * normally be in the inode of a unix filesystem.  This means that some of
  * a directory's contents may also be in memory resident denodes (sort of
  * an inode).  This can cause problems if we are searching while some other
  * process is modifying a directory.  To prevent one process from accessing
  * incompletely modified directory information we depend upon being the
  * sole owner of a directory block.  bread/brelse provide this service.
  * This being the case, when a process modifies a directory it must first
  * acquire the disk block that contains the directory entry to be modified.
  * Then update the disk block and the denode, and then write the disk block
  * out to disk.  This way disk blocks containing directory entries and in
  * memory denode's will be in synch.
  */
 int
 msdosfs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname
     *cnp, daddr_t *scnp, u_long *blkoffp)
 {
 	struct mbnambuf nb;
 	daddr_t bn;
 	int error;
 	int slotcount;
 	int slotoffset = 0;
 	int frcn;
 	u_long cluster;
 	u_long blkoff;
 	int diroff;
 	int blsize;
 	int isadir;		/* ~0 if found direntry is a directory	 */
 	daddr_t scn;		/* starting cluster number		 */
 	struct vnode *pdp;
 	struct denode *dp;
 	struct denode *tdp;
 	struct msdosfsmount *pmp;
 	struct buf *bp = NULL;
 	struct direntry *dep = NULL;
 	struct deget_dotdot dd_arg;
 	u_char dosfilename[12];
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	int unlen;
 	uint64_t inode1;
 
 	int wincnt = 1;
 	int chksum = -1, chksum_ok;
 	int olddos = 1;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_lookup(): looking for %s\n", cnp->cn_nameptr);
 #endif
 	dp = VTODE(vdp);
 	pmp = dp->de_pmp;
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_lookup(): vdp %p, dp %p, Attr %02x\n",
 	    vdp, dp, dp->de_Attributes);
 #endif
 
  restart:
 	if (vpp != NULL)
 		*vpp = NULL;
 	/*
 	 * If they are going after the . or .. entry in the root directory,
 	 * they won't find it.  DOS filesystems don't have them in the root
 	 * directory.  So, we fake it. deget() is in on this scam too.
 	 */
 	if ((vdp->v_vflag & VV_ROOT) && cnp->cn_nameptr[0] == '.' &&
 	    (cnp->cn_namelen == 1 ||
 		(cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.'))) {
 		isadir = ATTR_DIRECTORY;
 		scn = MSDOSFSROOT;
 #ifdef MSDOSFS_DEBUG
 		printf("msdosfs_lookup(): looking for . or .. in root directory\n");
 #endif
 		cluster = MSDOSFSROOT;
 		blkoff = MSDOSFSROOT_OFS;
 		goto foundroot;
 	}
 
 	switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename,
 	    cnp->cn_namelen, 0, pmp)) {
 	case 0:
 		return (EINVAL);
 	case 1:
 		break;
 	case 2:
 		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
 		    cnp->cn_namelen, pmp) + 1;
 		break;
 	case 3:
 		olddos = 0;
 		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
 		    cnp->cn_namelen, pmp) + 1;
 		break;
 	}
 	if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) {
 		wincnt = 1;
 		olddos = 1;
 	}
 	unlen = winLenFixup(cnp->cn_nameptr, cnp->cn_namelen);
 
 	/*
 	 * Suppress search for slots unless creating
 	 * file and at end of pathname, in which case
 	 * we watch for a place to put the new file in
 	 * case it doesn't already exist.
 	 */
 	slotcount = wincnt;
 	if ((nameiop == CREATE || nameiop == RENAME) &&
 	    (flags & ISLASTCN))
 		slotcount = 0;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_lookup(): dos version of filename %s, length %ld\n",
 	    dosfilename, cnp->cn_namelen);
 #endif
 	/*
 	 * Search the directory pointed at by vdp for the name pointed at
 	 * by cnp->cn_nameptr.
 	 */
 	tdp = NULL;
 	mbnambuf_init(&nb);
 	/*
 	 * The outer loop ranges over the clusters that make up the
 	 * directory.  Note that the root directory is different from all
 	 * other directories.  It has a fixed number of blocks that are not
 	 * part of the pool of allocatable clusters.  So, we treat it a
 	 * little differently. The root directory starts at "cluster" 0.
 	 */
 	diroff = 0;
 	for (frcn = 0;; frcn++) {
 		error = pcbmap(dp, frcn, &bn, &cluster, &blsize);
 		if (error) {
 			if (error == E2BIG)
 				break;
 			return (error);
 		}
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
 		if (error) {
 			return (error);
 		}
 		for (blkoff = 0; blkoff < blsize;
 		     blkoff += sizeof(struct direntry),
 		     diroff += sizeof(struct direntry)) {
 			dep = (struct direntry *)(bp->b_data + blkoff);
 			/*
 			 * If the slot is empty and we are still looking
 			 * for an empty then remember this one.  If the
 			 * slot is not empty then check to see if it
 			 * matches what we are looking for.  If the slot
 			 * has never been filled with anything, then the
 			 * remainder of the directory has never been used,
 			 * so there is no point in searching it.
 			 */
 			if (dep->deName[0] == SLOT_EMPTY ||
 			    dep->deName[0] == SLOT_DELETED) {
 				/*
 				 * Drop memory of previous long matches
 				 */
 				chksum = -1;
 				mbnambuf_init(&nb);
 
 				if (slotcount < wincnt) {
 					slotcount++;
 					slotoffset = diroff;
 				}
 				if (dep->deName[0] == SLOT_EMPTY) {
 					brelse(bp);
 					goto notfound;
 				}
 			} else {
 				/*
 				 * If there wasn't enough space for our winentries,
 				 * forget about the empty space
 				 */
 				if (slotcount < wincnt)
 					slotcount = 0;
 
 				/*
 				 * Check for Win95 long filename entry
 				 */
 				if (dep->deAttributes == ATTR_WIN95) {
 					if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 						continue;
 
 					chksum = win2unixfn(&nb,
 					    (struct winentry *)dep, chksum,
 					    pmp);
 					continue;
 				}
 
 				chksum = winChkName(&nb,
 				    (const u_char *)cnp->cn_nameptr, unlen,
 				    chksum, pmp);
 				if (chksum == -2) {
 					chksum = -1;
 					continue;
 				}
 
 				/*
 				 * Ignore volume labels (anywhere, not just
 				 * the root directory).
 				 */
 				if (dep->deAttributes & ATTR_VOLUME) {
 					chksum = -1;
 					continue;
 				}
 
 				/*
 				 * Check for a checksum or name match
 				 */
 				chksum_ok = (chksum == winChksum(dep->deName));
 				if (!chksum_ok
 				    && (!olddos || bcmp(dosfilename, dep->deName, 11))) {
 					chksum = -1;
 					continue;
 				}
 #ifdef MSDOSFS_DEBUG
 				printf("msdosfs_lookup(): match blkoff %d, diroff %d\n",
 				    blkoff, diroff);
 #endif
 				/*
 				 * Remember where this directory
 				 * entry came from for whoever did
 				 * this lookup.
 				 */
 				dp->de_fndoffset = diroff;
 				if (chksum_ok && nameiop == RENAME) {
 					/*
 					 * Target had correct long name
 					 * directory entries, reuse them
 					 * as needed.
 					 */
 					dp->de_fndcnt = wincnt - 1;
 				} else {
 					/*
 					 * Long name directory entries
 					 * not present or corrupt, can only
 					 * reuse dos directory entry.
 					 */
 					dp->de_fndcnt = 0;
 				}
 
 				goto found;
 			}
 		}	/* for (blkoff = 0; .... */
 		/*
 		 * Release the buffer holding the directory cluster just
 		 * searched.
 		 */
 		brelse(bp);
 	}	/* for (frcn = 0; ; frcn++) */
 
 notfound:
 	/*
 	 * We hold no disk buffers at this point.
 	 */
 
 	/*
 	 * Fixup the slot description to point to the place where
 	 * we might put the new DOS direntry (putting the Win95
 	 * long name entries before that)
 	 */
 	if (!slotcount) {
 		slotcount = 1;
 		slotoffset = diroff;
 	}
 	if (wincnt > slotcount)
 		slotoffset += sizeof(struct direntry) * (wincnt - slotcount);
 
 	/*
 	 * If we get here we didn't find the entry we were looking for. But
 	 * that's ok if we are creating or renaming and are at the end of
 	 * the pathname and the directory hasn't been removed.
 	 */
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_lookup(): op %d, refcnt %ld\n",
 	    nameiop, dp->de_refcnt);
 	printf("               slotcount %d, slotoffset %d\n",
 	       slotcount, slotoffset);
 #endif
 	if ((nameiop == CREATE || nameiop == RENAME) &&
 	    (flags & ISLASTCN) && dp->de_refcnt != 0) {
 		/*
 		 * Access for write is interpreted as allowing
 		 * creation of files in the directory.
 		 */
 		error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, curthread);
 		if (error)
 			return (error);
 		/*
 		 * Return an indication of where the new directory
 		 * entry should be put.
 		 */
 		dp->de_fndoffset = slotoffset;
 		dp->de_fndcnt = wincnt - 1;
 
 		/*
 		 * We return with the directory locked, so that
 		 * the parameters we set up above will still be
 		 * valid if we actually decide to do a direnter().
 		 * We return ni_vp == NULL to indicate that the entry
 		 * does not currently exist; we leave a pointer to
 		 * the (locked) directory inode in ndp->ni_dvp.
-		 * The pathname buffer is saved so that the name
-		 * can be obtained later.
 		 *
 		 * NB - if the directory is unlocked, then this
 		 * information cannot be used.
 		 */
-		cnp->cn_flags |= SAVENAME;
 		return (EJUSTRETURN);
 	}
 #if 0
 	/*
 	 * Insert name into cache (as non-existent) if appropriate.
 	 *
 	 * XXX Negative caching is broken for msdosfs because the name
 	 * cache doesn't understand peculiarities such as case insensitivity
 	 * and 8.3 filenames.  Hence, it may not invalidate all negative
 	 * entries if a file with this name is later created.
 	 */
 	if ((cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(vdp, *vpp, cnp);
 #endif
 	return (ENOENT);
 
 found:
 	/*
 	 * NOTE:  We still have the buffer with matched directory entry at
 	 * this point.
 	 */
 	isadir = dep->deAttributes & ATTR_DIRECTORY;
 	scn = getushort(dep->deStartCluster);
 	if (FAT32(pmp)) {
 		scn |= getushort(dep->deHighClust) << 16;
 		if (scn == pmp->pm_rootdirblk) {
 			/*
 			 * There should actually be 0 here.
 			 * Just ignore the error.
 			 */
 			scn = MSDOSFSROOT;
 		}
 	}
 
 	if (isadir) {
 		cluster = scn;
 		if (cluster == MSDOSFSROOT)
 			blkoff = MSDOSFSROOT_OFS;
 		else
 			blkoff = 0;
 	} else if (cluster == MSDOSFSROOT)
 		blkoff = diroff;
 
 	/*
 	 * Now release buf to allow deget to read the entry again.
 	 * Reserving it here and giving it to deget could result
 	 * in a deadlock.
 	 */
 	brelse(bp);
 	bp = NULL;
 
 foundroot:
 	/*
 	 * If we entered at foundroot, then we are looking for the . or ..
 	 * entry of the filesystems root directory.  isadir and scn were
 	 * setup before jumping here.  And, bp is already null.
 	 */
 	if (FAT32(pmp) && scn == MSDOSFSROOT)
 		scn = pmp->pm_rootdirblk;
 
 	if (scnp != NULL) {
 		*scnp = cluster;
 		*blkoffp = blkoff;
 		return (0);
 	}
 
 	/*
 	 * If deleting, and at end of pathname, return
 	 * parameters which can be used to remove file.
 	 */
 	if (nameiop == DELETE && (flags & ISLASTCN)) {
 		/*
 		 * Don't allow deleting the root.
 		 */
 		if (blkoff == MSDOSFSROOT_OFS)
 			return (EBUSY);
 
 		/*
 		 * Write access to directory required to delete files.
 		 */
 		error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, curthread);
 		if (error)
 			return (error);
 
 		/*
 		 * Return pointer to current entry in dp->i_offset.
 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
 		 */
 		if (dp->de_StartCluster == scn && isadir) {	/* "." */
 			VREF(vdp);
 			*vpp = vdp;
 			return (0);
 		}
 		error = deget(pmp, cluster, blkoff, LK_EXCLUSIVE, &tdp);
 		if (error)
 			return (error);
 		return (msdosfs_lookup_checker(pmp, vdp, tdp, vpp));
 	}
 
 	/*
 	 * If rewriting (RENAME), return the inode and the
 	 * information required to rewrite the present directory
 	 * Must get inode of directory entry to verify it's a
 	 * regular file, or empty directory.
 	 */
 	if (nameiop == RENAME && (flags & ISLASTCN)) {
 		if (blkoff == MSDOSFSROOT_OFS)
 			return (EBUSY);
 
 		error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, curthread);
 		if (error)
 			return (error);
 
 		/*
 		 * Careful about locking second inode.
 		 * This can only occur if the target is ".".
 		 */
 		if (dp->de_StartCluster == scn && isadir)
 			return (EISDIR);
 
 		if ((error = deget(pmp, cluster, blkoff, LK_EXCLUSIVE,
 		    &tdp)) != 0)
 			return (error);
 		if ((error = msdosfs_lookup_checker(pmp, vdp, tdp, vpp))
 		    != 0)
 			return (error);
-		cnp->cn_flags |= SAVENAME;
 		return (0);
 	}
 
 	/*
 	 * Step through the translation in the name.  We do not `vput' the
 	 * directory because we may need it again if a symbolic link
 	 * is relative to the current directory.  Instead we save it
 	 * unlocked as "pdp".  We must get the target inode before unlocking
 	 * the directory to insure that the inode will not be removed
 	 * before we get it.  We prevent deadlock by always fetching
 	 * inodes from the root, moving down the directory tree. Thus
 	 * when following backward pointers ".." we must unlock the
 	 * parent directory before getting the requested directory.
 	 */
 	pdp = vdp;
 	if (flags & ISDOTDOT) {
 		dd_arg.cluster = cluster;
 		dd_arg.blkoff = blkoff;
 		error = vn_vget_ino_gen(vdp, msdosfs_deget_dotdot,
 		    &dd_arg, cnp->cn_lkflags, vpp);
 		if (error != 0) {
 			*vpp = NULL;
 			return (error);
 		}
 		/*
 		 * Recheck that ".." still points to the inode we
 		 * looked up before pdp lock was dropped.
 		 */
 		error = msdosfs_lookup_ino(pdp, NULL, cnp, &scn, &blkoff);
 		if (error) {
 			vput(*vpp);
 			*vpp = NULL;
 			return (error);
 		}
 		if (FAT32(pmp) && scn == MSDOSFSROOT)
 			scn = pmp->pm_rootdirblk;
 		inode1 = scn * pmp->pm_bpcluster + blkoff;
 		if (VTODE(*vpp)->de_inode != inode1) {
 			vput(*vpp);
 			goto restart;
 		}
 		error = msdosfs_lookup_checker(pmp, vdp, VTODE(*vpp), vpp);
 		if (error != 0)
 			return (error);
 	} else if (dp->de_StartCluster == scn && isadir) {
 		if (cnp->cn_namelen != 1 || cnp->cn_nameptr[0] != '.') {
 			/* fs is corrupted, non-dot lookup returned dvp */
 			msdosfs_integrity_error(pmp);
 			return (EBADF);
 		}
 		VREF(vdp);	/* we want ourself, ie "." */
 		*vpp = vdp;
 	} else {
 		if ((error = deget(pmp, cluster, blkoff, LK_EXCLUSIVE,
 		    &tdp)) != 0)
 			return (error);
 		if ((error = msdosfs_lookup_checker(pmp, vdp, tdp, vpp)) != 0)
 			return (error);
 	}
 
 	/*
 	 * Insert name into cache if appropriate.
 	 */
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(vdp, *vpp, cnp);
 	return (0);
 }
 
 /*
  * dep  - directory entry to copy into the directory
  * ddep - directory to add to
  * depp - return the address of the denode for the created directory entry
  *	  if depp != 0
  * cnp  - componentname needed for Win95 long filenames
  */
 int
 createde(struct denode *dep, struct denode *ddep, struct denode **depp,
     struct componentname *cnp)
 {
 	int error;
 	u_long dirclust, diroffset;
 	struct direntry *ndep;
 	struct msdosfsmount *pmp = ddep->de_pmp;
 	struct buf *bp;
 	daddr_t bn;
 	int blsize;
 
 #ifdef MSDOSFS_DEBUG
 	printf("createde(dep %p, ddep %p, depp %p, cnp %p)\n",
 	    dep, ddep, depp, cnp);
 #endif
 
 	/*
 	 * If no space left in the directory then allocate another cluster
 	 * and chain it onto the end of the file.  There is one exception
 	 * to this.  That is, if the root directory has no more space it
 	 * can NOT be expanded.  extendfile() checks for and fails attempts
 	 * to extend the root directory.  We just return an error in that
 	 * case.
 	 */
 	if (ddep->de_fndoffset >= ddep->de_FileSize) {
 		diroffset = ddep->de_fndoffset + sizeof(struct direntry)
 		    - ddep->de_FileSize;
 		dirclust = de_clcount(pmp, diroffset);
 		error = extendfile(ddep, dirclust, 0, 0, DE_CLEAR);
 		if (error) {
 			(void)detrunc(ddep, ddep->de_FileSize, 0, NOCRED);
 			return error;
 		}
 
 		/*
 		 * Update the size of the directory
 		 */
 		ddep->de_FileSize += de_cn2off(pmp, dirclust);
 	}
 
 	/*
 	 * We just read in the cluster with space.  Copy the new directory
 	 * entry in.  Then write it to disk. NOTE:  DOS directories
 	 * do not get smaller as clusters are emptied.
 	 */
 	error = pcbmap(ddep, de_cluster(pmp, ddep->de_fndoffset),
 		       &bn, &dirclust, &blsize);
 	if (error)
 		return error;
 	diroffset = ddep->de_fndoffset;
 	if (dirclust != MSDOSFSROOT)
 		diroffset &= pmp->pm_crbomask;
 	if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp)) != 0) {
 		brelse(bp);
 		return error;
 	}
 	ndep = bptoep(pmp, bp, ddep->de_fndoffset);
 
 	DE_EXTERNALIZE(ndep, dep);
 
 	/*
 	 * Now write the Win95 long name
 	 */
 	if (ddep->de_fndcnt > 0) {
 		uint8_t chksum = winChksum(ndep->deName);
 		const u_char *un = (const u_char *)cnp->cn_nameptr;
 		int unlen = cnp->cn_namelen;
 		int cnt = 1;
 
 		while (--ddep->de_fndcnt >= 0) {
 			if (!(ddep->de_fndoffset & pmp->pm_crbomask)) {
 				if (DOINGASYNC(DETOV(ddep)))
 					bdwrite(bp);
 				else if ((error = bwrite(bp)) != 0)
 					return error;
 
 				ddep->de_fndoffset -= sizeof(struct direntry);
 				error = pcbmap(ddep,
 					       de_cluster(pmp,
 							  ddep->de_fndoffset),
 					       &bn, 0, &blsize);
 				if (error)
 					return error;
 
 				error = bread(pmp->pm_devvp, bn, blsize,
 					      NOCRED, &bp);
 				if (error) {
 					return error;
 				}
 				ndep = bptoep(pmp, bp, ddep->de_fndoffset);
 			} else {
 				ndep--;
 				ddep->de_fndoffset -= sizeof(struct direntry);
 			}
 			if (!unix2winfn(un, unlen, (struct winentry *)ndep,
 					cnt++, chksum, pmp))
 				break;
 		}
 	}
 
 	if (DOINGASYNC(DETOV(ddep)))
 		bdwrite(bp);
 	else if ((error = bwrite(bp)) != 0)
 		return error;
 
 	/*
 	 * If they want us to return with the denode gotten.
 	 */
 	if (depp) {
 		if (dep->de_Attributes & ATTR_DIRECTORY) {
 			dirclust = dep->de_StartCluster;
 			if (FAT32(pmp) && dirclust == pmp->pm_rootdirblk)
 				dirclust = MSDOSFSROOT;
 			if (dirclust == MSDOSFSROOT)
 				diroffset = MSDOSFSROOT_OFS;
 			else
 				diroffset = 0;
 		}
 		return (deget(pmp, dirclust, diroffset, LK_EXCLUSIVE, depp));
 	}
 
 	return 0;
 }
 
 /*
  * Be sure a directory is empty except for "." and "..". Return 1 if empty,
  * return 0 if not empty or error.
  */
 int
 dosdirempty(struct denode *dep)
 {
 	int blsize;
 	int error;
 	u_long cn;
 	daddr_t bn;
 	struct buf *bp;
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct direntry *dentp;
 
 	/*
 	 * Since the filesize field in directory entries for a directory is
 	 * zero, we just have to feel our way through the directory until
 	 * we hit end of file.
 	 */
 	for (cn = 0;; cn++) {
 		if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) {
 			if (error == E2BIG)
 				return (1);	/* it's empty */
 			return (0);
 		}
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
 		if (error) {
 			return (0);
 		}
 		for (dentp = (struct direntry *)bp->b_data;
 		     (char *)dentp < bp->b_data + blsize;
 		     dentp++) {
 			if (dentp->deName[0] != SLOT_DELETED &&
 			    (dentp->deAttributes & ATTR_VOLUME) == 0) {
 				/*
 				 * In dos directories an entry whose name
 				 * starts with SLOT_EMPTY (0) starts the
 				 * beginning of the unused part of the
 				 * directory, so we can just return that it
 				 * is empty.
 				 */
 				if (dentp->deName[0] == SLOT_EMPTY) {
 					brelse(bp);
 					return (1);
 				}
 				/*
 				 * Any names other than "." and ".." in a
 				 * directory mean it is not empty.
 				 */
 				if (bcmp(dentp->deName, ".          ", 11) &&
 				    bcmp(dentp->deName, "..         ", 11)) {
 					brelse(bp);
 #ifdef MSDOSFS_DEBUG
 					printf("dosdirempty(): entry found %02x, %02x\n",
 					    dentp->deName[0], dentp->deName[1]);
 #endif
 					return (0);	/* not empty */
 				}
 			}
 		}
 		brelse(bp);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Check to see if the directory described by target is in some
  * subdirectory of source.  This prevents something like the following from
  * succeeding and leaving a bunch or files and directories orphaned. mv
  * /a/b/c /a/b/c/d/e/f Where c and f are directories.
  *
  * source - the inode for /a/b/c
  * target - the inode for /a/b/c/d/e/f
  *
  * Returns 0 if target is NOT a subdirectory of source.
  * Otherwise returns a non-zero error number.
  */
 int
 doscheckpath(struct denode *source, struct denode *target, daddr_t *wait_scn)
 {
 	daddr_t scn;
 	struct msdosfsmount *pmp;
 	struct direntry *ep;
 	struct denode *dep;
 	struct buf *bp = NULL;
 	int error = 0;
 
 	*wait_scn = 0;
 
 	pmp = target->de_pmp;
 	lockmgr_assert(&pmp->pm_checkpath_lock, KA_XLOCKED);
 	KASSERT(pmp == source->de_pmp,
 	    ("doscheckpath: source and target on different filesystems"));
 
 	if ((target->de_Attributes & ATTR_DIRECTORY) == 0 ||
 	    (source->de_Attributes & ATTR_DIRECTORY) == 0)
 		return (ENOTDIR);
 
 	if (target->de_StartCluster == source->de_StartCluster)
 		return (EEXIST);
 
 	if (target->de_StartCluster == MSDOSFSROOT ||
 	    (FAT32(pmp) && target->de_StartCluster == pmp->pm_rootdirblk))
 		return (0);
 
 	dep = target;
 	vget(DETOV(dep), LK_EXCLUSIVE);
 	for (;;) {
 		if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) {
 			error = ENOTDIR;
 			break;
 		}
 		scn = dep->de_StartCluster;
 		error = bread(pmp->pm_devvp, cntobn(pmp, scn),
 		    pmp->pm_bpcluster, NOCRED, &bp);
 		if (error != 0)
 			break;
 
 		ep = (struct direntry *)bp->b_data + 1;
 		if ((ep->deAttributes & ATTR_DIRECTORY) == 0 ||
 		    bcmp(ep->deName, "..         ", 11) != 0) {
 			error = ENOTDIR;
 			brelse(bp);
 			break;
 		}
 
 		scn = getushort(ep->deStartCluster);
 		if (FAT32(pmp))
 			scn |= getushort(ep->deHighClust) << 16;
 		brelse(bp);
 
 		if (scn == source->de_StartCluster) {
 			error = EINVAL;
 			break;
 		}
 		if (scn == MSDOSFSROOT)
 			break;
 		if (FAT32(pmp) && scn == pmp->pm_rootdirblk) {
 			/*
 			 * scn should be 0 in this case,
 			 * but we silently ignore the error.
 			 */
 			break;
 		}
 
 		vput(DETOV(dep));
 		dep = NULL;
 		/* NOTE: deget() clears dep on error */
 		error = deget(pmp, scn, 0, LK_EXCLUSIVE | LK_NOWAIT, &dep);
 		if (error != 0) {
 			*wait_scn = scn;
 			break;
 		}
 	}
 #ifdef MSDOSFS_DEBUG
 	if (error == ENOTDIR)
 		printf("doscheckpath(): .. not a directory?\n");
 #endif
 	if (dep != NULL)
 		vput(DETOV(dep));
 	return (error);
 }
 
 /*
  * Read in the disk block containing the directory entry (dirclu, dirofs)
  * and return the address of the buf header, and the address of the
  * directory entry within the block.
  */
 int
 readep(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset,
     struct buf **bpp, struct direntry **epp)
 {
 	int error;
 	daddr_t bn;
 	int blsize;
 
 	blsize = pmp->pm_bpcluster;
 	if (dirclust == MSDOSFSROOT
 	    && de_blk(pmp, diroffset + blsize) > pmp->pm_rootdirsize)
 		blsize = de_bn2off(pmp, pmp->pm_rootdirsize) & pmp->pm_crbomask;
 	bn = detobn(pmp, dirclust, diroffset);
 	if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, bpp)) != 0) {
 		brelse(*bpp);
 		*bpp = NULL;
 		return (error);
 	}
 	if (epp)
 		*epp = bptoep(pmp, *bpp, diroffset);
 	return (0);
 }
 
 /*
  * Read in the disk block containing the directory entry dep came from and
  * return the address of the buf header, and the address of the directory
  * entry within the block.
  */
 int
 readde(struct denode *dep, struct buf **bpp, struct direntry **epp)
 {
 
 	return (readep(dep->de_pmp, dep->de_dirclust, dep->de_diroffset,
 	    bpp, epp));
 }
 
 /*
  * Remove a directory entry. At this point the file represented by the
  * directory entry to be removed is still full length until no one has it
  * open.  When the file no longer being used msdosfs_inactive() is called
  * and will truncate the file to 0 length.  When the vnode containing the
  * denode is needed for some other purpose by VFS it will call
  * msdosfs_reclaim() which will remove the denode from the denode cache.
  *
  * pdep	directory where the entry is removed
  * dep	file to be removed
  */
 int
 removede(struct denode *pdep, struct denode *dep)
 {
 	int error;
 	struct direntry *ep;
 	struct buf *bp;
 	daddr_t bn;
 	int blsize;
 	struct msdosfsmount *pmp = pdep->de_pmp;
 	u_long offset = pdep->de_fndoffset;
 
 #ifdef MSDOSFS_DEBUG
 	printf("removede(): filename %s, dep %p, offset %08lx\n",
 	    dep->de_Name, dep, offset);
 #endif
 
 	dep->de_refcnt--;
 	offset += sizeof(struct direntry);
 	do {
 		offset -= sizeof(struct direntry);
 		error = pcbmap(pdep, de_cluster(pmp, offset), &bn, 0, &blsize);
 		if (error)
 			return error;
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
 		if (error) {
 			return error;
 		}
 		ep = bptoep(pmp, bp, offset);
 		/*
 		 * Check whether, if we came here the second time, i.e.
 		 * when underflowing into the previous block, the last
 		 * entry in this block is a longfilename entry, too.
 		 */
 		if (ep->deAttributes != ATTR_WIN95
 		    && offset != pdep->de_fndoffset) {
 			brelse(bp);
 			break;
 		}
 		offset += sizeof(struct direntry);
 		while (1) {
 			/*
 			 * We are a bit aggressive here in that we delete any Win95
 			 * entries preceding this entry, not just the ones we "own".
 			 * Since these presumably aren't valid anyway,
 			 * there should be no harm.
 			 */
 			offset -= sizeof(struct direntry);
 			ep--->deName[0] = SLOT_DELETED;
 			if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 			    || !(offset & pmp->pm_crbomask)
 			    || ep->deAttributes != ATTR_WIN95)
 				break;
 		}
 		if (DOINGASYNC(DETOV(pdep)))
 			bdwrite(bp);
 		else if ((error = bwrite(bp)) != 0)
 			return error;
 	} while (!(pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 	    && !(offset & pmp->pm_crbomask)
 	    && offset);
 	return 0;
 }
 
 /*
  * Create a unique DOS name in dvp
  */
 int
 uniqdosname(struct denode *dep, struct componentname *cnp, u_char *cp)
 {
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct direntry *dentp;
 	int gen;
 	int blsize;
 	u_long cn;
 	daddr_t bn;
 	struct buf *bp;
 	int error;
 
 	if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 		return (unix2dosfn((const u_char *)cnp->cn_nameptr, cp,
 		    cnp->cn_namelen, 0, pmp) ? 0 : EINVAL);
 
 	for (gen = 1;; gen++) {
 		/*
 		 * Generate DOS name with generation number
 		 */
 		if (!unix2dosfn((const u_char *)cnp->cn_nameptr, cp,
 		    cnp->cn_namelen, gen, pmp))
 			return gen == 1 ? EINVAL : EEXIST;
 
 		/*
 		 * Now look for a dir entry with this exact name
 		 */
 		for (cn = error = 0; !error; cn++) {
 			if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) {
 				if (error == E2BIG)	/* EOF reached and not found */
 					return 0;
 				return error;
 			}
 			error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
 			if (error) {
 				return error;
 			}
 			for (dentp = (struct direntry *)bp->b_data;
 			     (char *)dentp < bp->b_data + blsize;
 			     dentp++) {
 				if (dentp->deName[0] == SLOT_EMPTY) {
 					/*
 					 * Last used entry and not found
 					 */
 					brelse(bp);
 					return 0;
 				}
 				/*
 				 * Ignore volume labels and Win95 entries
 				 */
 				if (dentp->deAttributes & ATTR_VOLUME)
 					continue;
 				if (!bcmp(dentp->deName, cp, 11)) {
 					error = EEXIST;
 					break;
 				}
 			}
 			brelse(bp);
 		}
 	}
 }
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
index 36da35257d2c..f5a07cd5f492 100644
--- a/sys/fs/msdosfs/msdosfs_vnops.c
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -1,2005 +1,1992 @@
 /* $FreeBSD$ */
 /*	$NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/clock.h>
 #include <sys/dirent.h>
 #include <sys/lock.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vnode_pager.h>
 
 #include <fs/msdosfs/bpb.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
 #include <fs/msdosfs/msdosfsmount.h>
 
 /*
  * Prototypes for MSDOSFS vnode operations
  */
 static vop_create_t	msdosfs_create;
 static vop_mknod_t	msdosfs_mknod;
 static vop_open_t	msdosfs_open;
 static vop_close_t	msdosfs_close;
 static vop_access_t	msdosfs_access;
 static vop_getattr_t	msdosfs_getattr;
 static vop_setattr_t	msdosfs_setattr;
 static vop_read_t	msdosfs_read;
 static vop_write_t	msdosfs_write;
 static vop_fsync_t	msdosfs_fsync;
 static vop_remove_t	msdosfs_remove;
 static vop_link_t	msdosfs_link;
 static vop_rename_t	msdosfs_rename;
 static vop_mkdir_t	msdosfs_mkdir;
 static vop_rmdir_t	msdosfs_rmdir;
 static vop_symlink_t	msdosfs_symlink;
 static vop_readdir_t	msdosfs_readdir;
 static vop_bmap_t	msdosfs_bmap;
 static vop_getpages_t	msdosfs_getpages;
 static vop_strategy_t	msdosfs_strategy;
 static vop_print_t	msdosfs_print;
 static vop_pathconf_t	msdosfs_pathconf;
 static vop_vptofh_t	msdosfs_vptofh;
 
 /*
  * Some general notes:
  *
  * In the ufs filesystem the inodes, superblocks, and indirect blocks are
  * read/written using the vnode for the filesystem. Blocks that represent
  * the contents of a file are read/written using the vnode for the file
  * (including directories when they are read/written as files). This
  * presents problems for the dos filesystem because data that should be in
  * an inode (if dos had them) resides in the directory itself.  Since we
  * must update directory entries without the benefit of having the vnode
  * for the directory we must use the vnode for the filesystem.  This means
  * that when a directory is actually read/written (via read, write, or
  * readdir, or seek) we must use the vnode for the filesystem instead of
  * the vnode for the directory as would happen in ufs. This is to insure we
  * retrieve the correct block from the buffer cache since the hash value is
  * based upon the vnode address and the desired block number.
  */
 
 /*
  * Create a regular file. On entry the directory to contain the file being
  * created is locked.  We must release before we return. We must also free
  * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or
  * only if the SAVESTART bit in cn_flags is clear on success.
  */
 static int
 msdosfs_create(struct vop_create_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct denode ndirent;
 	struct denode *dep;
 	struct denode *pdep = VTODE(ap->a_dvp);
 	struct timespec ts;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap);
 #endif
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad;
 	}
 
 	/*
 	 * Create a directory entry for the file, then call createde() to
 	 * have it installed. NOTE: DOS files are always executable.  We
 	 * use the absence of the owner write bit to make the file
 	 * readonly.
 	 */
-#ifdef DIAGNOSTIC
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("msdosfs_create: no name");
-#endif
 	memset(&ndirent, 0, sizeof(ndirent));
 	error = uniqdosname(pdep, cnp, ndirent.de_Name);
 	if (error)
 		goto bad;
 
 	ndirent.de_Attributes = ATTR_ARCHIVE;
 	ndirent.de_LowerCase = 0;
 	ndirent.de_StartCluster = 0;
 	ndirent.de_FileSize = 0;
 	ndirent.de_pmp = pdep->de_pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	vfs_timestamp(&ts);
 	DETIMES(&ndirent, &ts, &ts, &ts);
 	error = createde(&ndirent, pdep, &dep, cnp);
 	if (error)
 		goto bad;
 	*ap->a_vpp = DETOV(dep);
 	if ((cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
 	return (0);
 
 bad:
 	return (error);
 }
 
 static int
 msdosfs_mknod(struct vop_mknod_args *ap)
 {
 
     return (EINVAL);
 }
 
 static int
 msdosfs_open(struct vop_open_args *ap)
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	vnode_create_vobject(ap->a_vp, dep->de_FileSize, ap->a_td);
 	return 0;
 }
 
 static int
 msdosfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 	struct timespec ts;
 
 	VI_LOCK(vp);
 	if (vp->v_usecount > 1) {
 		vfs_timestamp(&ts);
 		DETIMES(dep, &ts, &ts, &ts);
 	}
 	VI_UNLOCK(vp);
 	return 0;
 }
 
 static int
 msdosfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	mode_t file_mode;
 	accmode_t accmode = ap->a_accmode;
 
 	file_mode = S_IRWXU|S_IRWXG|S_IRWXO;
 	file_mode &= (vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
 
 	/*
 	 * Disallow writing to directories and regular files if the
 	 * filesystem is read-only.
 	 */
 	if (accmode & VWRITE) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 	}
 
 	return (vaccess(vp->v_type, file_mode, pmp->pm_uid, pmp->pm_gid,
 	    ap->a_accmode, ap->a_cred));
 }
 
 static int
 msdosfs_getattr(struct vop_getattr_args *ap)
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	mode_t mode;
 	struct timespec ts;
 	u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
 	uint64_t fileid;
 
 	vfs_timestamp(&ts);
 	DETIMES(dep, &ts, &ts, &ts);
 	vap->va_fsid = dev2udev(pmp->pm_dev);
 	/*
 	 * The following computation of the fileid must be the same as that
 	 * used in msdosfs_readdir() to compute d_fileno. If not, pwd
 	 * doesn't work.
 	 */
 	if (dep->de_Attributes & ATTR_DIRECTORY) {
 		fileid = (uint64_t)cntobn(pmp, dep->de_StartCluster) *
 		    dirsperblk;
 		if (dep->de_StartCluster == MSDOSFSROOT)
 			fileid = 1;
 	} else {
 		fileid = (uint64_t)cntobn(pmp, dep->de_dirclust) *
 		    dirsperblk;
 		if (dep->de_dirclust == MSDOSFSROOT)
 			fileid = (uint64_t)roottobn(pmp, 0) * dirsperblk;
 		fileid += (uoff_t)dep->de_diroffset / sizeof(struct direntry);
 	}
 	vap->va_fileid = fileid;
 
 	mode = S_IRWXU|S_IRWXG|S_IRWXO;
 	if (dep->de_Attributes & ATTR_READONLY)
 		mode &= ~(S_IWUSR|S_IWGRP|S_IWOTH);
 	vap->va_mode = mode &
 	    (ap->a_vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
 	vap->va_uid = pmp->pm_uid;
 	vap->va_gid = pmp->pm_gid;
 	vap->va_nlink = 1;
 	vap->va_rdev = NODEV;
 	vap->va_size = dep->de_FileSize;
 	fattime2timespec(dep->de_MDate, dep->de_MTime, 0, 0, &vap->va_mtime);
 	vap->va_ctime = vap->va_mtime;
 	if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) {
 		fattime2timespec(dep->de_ADate, 0, 0, 0, &vap->va_atime);
 		fattime2timespec(dep->de_CDate, dep->de_CTime, dep->de_CHun,
 		    0, &vap->va_birthtime);
 	} else {
 		vap->va_atime = vap->va_mtime;
 		vap->va_birthtime.tv_sec = -1;
 		vap->va_birthtime.tv_nsec = 0;
 	}
 	vap->va_flags = 0;
 	if (dep->de_Attributes & ATTR_ARCHIVE)
 		vap->va_flags |= UF_ARCHIVE;
 	if (dep->de_Attributes & ATTR_HIDDEN)
 		vap->va_flags |= UF_HIDDEN;
 	if (dep->de_Attributes & ATTR_READONLY)
 		vap->va_flags |= UF_READONLY;
 	if (dep->de_Attributes & ATTR_SYSTEM)
 		vap->va_flags |= UF_SYSTEM;
 	vap->va_gen = 0;
 	vap->va_blocksize = pmp->pm_bpcluster;
 	vap->va_bytes =
 	    (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask;
 	vap->va_type = ap->a_vp->v_type;
 	vap->va_filerev = dep->de_modrev;
 	return (0);
 }
 
 static int
 msdosfs_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	int error = 0;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_setattr(): vp %p, vap %p, cred %p\n",
 	    ap->a_vp, vap, cred);
 #endif
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 #ifdef MSDOSFS_DEBUG
 		printf("msdosfs_setattr(): returning EINVAL\n");
 		printf("    va_type %d, va_nlink %llx, va_fsid %llx, va_fileid %llx\n",
 		    vap->va_type, (unsigned long long)vap->va_nlink,
 		    (unsigned long long)vap->va_fsid,
 		    (unsigned long long)vap->va_fileid);
 		printf("    va_blocksize %lx, va_rdev %llx, va_bytes %llx, va_gen %lx\n",
 		    vap->va_blocksize, (unsigned long long)vap->va_rdev,
 		    (unsigned long long)vap->va_bytes, vap->va_gen);
 		printf("    va_uid %x, va_gid %x\n",
 		    vap->va_uid, vap->va_gid);
 #endif
 		return (EINVAL);
 	}
 
 	/*
 	 * We don't allow setting attributes on the root directory.
 	 * The special case for the root directory is because before
 	 * FAT32, the root directory didn't have an entry for itself
 	 * (and was otherwise special).  With FAT32, the root
 	 * directory is not so special, but still doesn't have an
 	 * entry for itself.
 	 */
 	if (vp->v_vflag & VV_ROOT)
 		return (EINVAL);
 
 	if (vap->va_flags != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid) {
 			error = priv_check_cred(cred, PRIV_VFS_ADMIN);
 			if (error)
 				return (error);
 		}
 		/*
 		 * We are very inconsistent about handling unsupported
 		 * attributes.  We ignored the access time and the
 		 * read and execute bits.  We were strict for the other
 		 * attributes.
 		 */
 		if (vap->va_flags & ~(UF_ARCHIVE | UF_HIDDEN | UF_READONLY |
 		    UF_SYSTEM))
 			return EOPNOTSUPP;
 		if (vap->va_flags & UF_ARCHIVE)
 			dep->de_Attributes |= ATTR_ARCHIVE;
 		else
 			dep->de_Attributes &= ~ATTR_ARCHIVE;
 		if (vap->va_flags & UF_HIDDEN)
 			dep->de_Attributes |= ATTR_HIDDEN;
 		else
 			dep->de_Attributes &= ~ATTR_HIDDEN;
 		/* We don't allow changing the readonly bit on directories. */
 		if (vp->v_type != VDIR) {
 			if (vap->va_flags & UF_READONLY)
 				dep->de_Attributes |= ATTR_READONLY;
 			else
 				dep->de_Attributes &= ~ATTR_READONLY;
 		}
 		if (vap->va_flags & UF_SYSTEM)
 			dep->de_Attributes |= ATTR_SYSTEM;
 		else
 			dep->de_Attributes &= ~ATTR_SYSTEM;
 		dep->de_flag |= DE_MODIFIED;
 	}
 
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		uid_t uid;
 		gid_t gid;
 
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		uid = vap->va_uid;
 		if (uid == (uid_t)VNOVAL)
 			uid = pmp->pm_uid;
 		gid = vap->va_gid;
 		if (gid == (gid_t)VNOVAL)
 			gid = pmp->pm_gid;
 		if (cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
 		    (gid != pmp->pm_gid && !groupmember(gid, cred))) {
 			error = priv_check_cred(cred, PRIV_VFS_CHOWN);
 			if (error)
 				return (error);
 		}
 		if (uid != pmp->pm_uid || gid != pmp->pm_gid)
 			return EINVAL;
 	}
 
 	if (vap->va_size != VNOVAL) {
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VREG:
 			/*
 			 * Truncation is only supported for regular files,
 			 * Disallow it if the filesystem is read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			/*
 			 * According to POSIX, the result is unspecified
 			 * for file types other than regular files,
 			 * directories and shared memory objects.  We
 			 * don't support any file types except regular
 			 * files and directories in this file system, so
 			 * this (default) case is unreachable and can do
 			 * anything.  Keep falling through to detrunc()
 			 * for now.
 			 */
 			break;
 		}
 		error = detrunc(dep, vap->va_size, 0, cred);
 		if (error)
 			return error;
 	}
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		error = vn_utimes_perm(vp, vap, cred, td);
 		if (error != 0)
 			return (error);
 		if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 &&
 		    vap->va_atime.tv_sec != VNOVAL) {
 			dep->de_flag &= ~DE_ACCESS;
 			timespec2fattime(&vap->va_atime, 0,
 			    &dep->de_ADate, NULL, NULL);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			dep->de_flag &= ~DE_UPDATE;
 			timespec2fattime(&vap->va_mtime, 0,
 			    &dep->de_MDate, &dep->de_MTime, NULL);
 		}
 		/*
 		 * We don't set the archive bit when modifying the time of
 		 * a directory to emulate the Windows/DOS behavior.
 		 */
 		if (vp->v_type != VDIR)
 			dep->de_Attributes |= ATTR_ARCHIVE;
 		dep->de_flag |= DE_MODIFIED;
 	}
 	/*
 	 * DOS files only have the ability to have their writability
 	 * attribute set, so we use the owner write bit to set the readonly
 	 * attribute.
 	 */
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid) {
 			error = priv_check_cred(cred, PRIV_VFS_ADMIN);
 			if (error)
 				return (error);
 		}
 		if (vp->v_type != VDIR) {
 			/* We ignore the read and execute bits. */
 			if (vap->va_mode & S_IWUSR)
 				dep->de_Attributes &= ~ATTR_READONLY;
 			else
 				dep->de_Attributes |= ATTR_READONLY;
 			dep->de_Attributes |= ATTR_ARCHIVE;
 			dep->de_flag |= DE_MODIFIED;
 		}
 	}
 	return (deupdat(dep, 0));
 }
 
 static int
 msdosfs_read(struct vop_read_args *ap)
 {
 	int error = 0;
 	int blsize;
 	int isadir;
 	ssize_t orig_resid;
 	u_int n;
 	u_long diff;
 	u_long on;
 	daddr_t lbn;
 	daddr_t rablock;
 	int rasize;
 	int seqcount;
 	struct buf *bp;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct uio *uio = ap->a_uio;
 
 	/*
 	 * If they didn't ask for any data, then we are done.
 	 */
 	orig_resid = uio->uio_resid;
 	if (orig_resid == 0)
 		return (0);
 
 	/*
 	 * The caller is supposed to ensure that
 	 * uio->uio_offset >= 0 and uio->uio_resid >= 0.
 	 * We don't need to check for large offsets as in ffs because
 	 * dep->de_FileSize <= MSDOSFS_FILESIZE_MAX < OFF_MAX, so large
 	 * offsets cannot cause overflow even in theory.
 	 */
 
 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 
 	isadir = dep->de_Attributes & ATTR_DIRECTORY;
 	do {
 		if (uio->uio_offset >= dep->de_FileSize)
 			break;
 		lbn = de_cluster(pmp, uio->uio_offset);
 		rablock = lbn + 1;
 		blsize = pmp->pm_bpcluster;
 		on = uio->uio_offset & pmp->pm_crbomask;
 		/*
 		 * If we are operating on a directory file then be sure to
 		 * do i/o with the vnode for the filesystem instead of the
 		 * vnode for the directory.
 		 */
 		if (isadir) {
 			/* convert cluster # to block # */
 			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
 			if (error == E2BIG) {
 				error = EINVAL;
 				break;
 			} else if (error)
 				break;
 			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
 		} else if (de_cn2off(pmp, rablock) >= dep->de_FileSize) {
 			error = bread(vp, lbn, blsize, NOCRED, &bp);
 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, dep->de_FileSize, lbn, blsize,
 			    NOCRED, on + uio->uio_resid, seqcount, 0, &bp);
 		} else if (seqcount > 1) {
 			rasize = blsize;
 			error = breadn(vp, lbn,
 			    blsize, &rablock, &rasize, 1, NOCRED, &bp);
 		} else {
 			error = bread(vp, lbn, blsize, NOCRED, &bp);
 		}
 		if (error) {
 			brelse(bp);
 			break;
 		}
 		diff = pmp->pm_bpcluster - on;
 		n = diff > uio->uio_resid ? uio->uio_resid : diff;
 		diff = dep->de_FileSize - uio->uio_offset;
 		if (diff < n)
 			n = diff;
 		diff = blsize - bp->b_resid;
 		if (diff < n)
 			n = diff;
 		error = vn_io_fault_uiomove(bp->b_data + on, (int) n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
 	if (!isadir && (error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		dep->de_flag |= DE_ACCESS;
 	return (error);
 }
 
 /*
  * Write data to a file or directory.
  */
 static int
 msdosfs_write(struct vop_write_args *ap)
 {
 	int n;
 	int croffset;
 	ssize_t resid;
 	u_long osize;
 	int error = 0;
 	u_long count;
 	int seqcount;
 	daddr_t bn, lastcn;
 	struct buf *bp;
 	int ioflag = ap->a_ioflag;
 	struct uio *uio = ap->a_uio;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *thisvp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct ucred *cred = ap->a_cred;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n",
 	    vp, uio, ioflag, cred);
 	printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n",
 	    dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster);
 #endif
 
 	switch (vp->v_type) {
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = dep->de_FileSize;
 		thisvp = vp;
 		break;
 	case VDIR:
 		return EISDIR;
 	default:
 		panic("msdosfs_write(): bad file type");
 	}
 
 	/*
 	 * This is needed (unlike in ffs_write()) because we extend the
 	 * file outside of the loop but we don't want to extend the file
 	 * for writes of 0 bytes.
 	 */
 	if (uio->uio_resid == 0)
 		return (0);
 
 	/*
 	 * The caller is supposed to ensure that
 	 * uio->uio_offset >= 0 and uio->uio_resid >= 0.
 	 */
 	if ((uoff_t)uio->uio_offset + uio->uio_resid > MSDOSFS_FILESIZE_MAX)
 		return (EFBIG);
 
 	/*
 	 * If they've exceeded their filesize limit, tell them about it.
 	 */
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 
 	/*
 	 * If the offset we are starting the write at is beyond the end of
 	 * the file, then they've done a seek.  Unix filesystems allow
 	 * files with holes in them, DOS doesn't so we must fill the hole
 	 * with zeroed blocks.
 	 */
 	if (uio->uio_offset > dep->de_FileSize) {
 		error = deextend(dep, uio->uio_offset, cred);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Remember some values in case the write fails.
 	 */
 	resid = uio->uio_resid;
 	osize = dep->de_FileSize;
 
 	/*
 	 * If we write beyond the end of the file, extend it to its ultimate
 	 * size ahead of the time to hopefully get a contiguous area.
 	 */
 	if (uio->uio_offset + resid > osize) {
 		count = de_clcount(pmp, uio->uio_offset + resid) -
 			de_clcount(pmp, osize);
 		error = extendfile(dep, count, NULL, NULL, 0);
 		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
 			goto errexit;
 		lastcn = dep->de_fc[FC_LASTFC].fc_frcn;
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
 	seqcount = ioflag >> IO_SEQSHIFT;
 	do {
 		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
 			error = ENOSPC;
 			break;
 		}
 
 		croffset = uio->uio_offset & pmp->pm_crbomask;
 		n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
 		if (uio->uio_offset + n > dep->de_FileSize) {
 			dep->de_FileSize = uio->uio_offset + n;
 			/* The object size needs to be set before buffer is allocated */
 			vnode_pager_setsize(vp, dep->de_FileSize);
 		}
 
 		bn = de_cluster(pmp, uio->uio_offset);
 		if ((uio->uio_offset & pmp->pm_crbomask) == 0
 		    && (de_cluster(pmp, uio->uio_offset + uio->uio_resid)
 			> de_cluster(pmp, uio->uio_offset)
 			|| uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
 			/*
 			 * If either the whole cluster gets written,
 			 * or we write the cluster from its start beyond EOF,
 			 * then no need to read data from disk.
 			 */
 			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0);
 			/*
 			 * This call to vfs_bio_clrbuf() ensures that
 			 * even if vn_io_fault_uiomove() below faults,
 			 * garbage from the newly instantiated buffer
 			 * is not exposed to the userspace via mmap().
 			 */
 			vfs_bio_clrbuf(bp);
 			/*
 			 * Do the bmap now, since pcbmap needs buffers
 			 * for the FAT table. (see msdosfs_strategy)
 			 */
 			if (bp->b_blkno == bp->b_lblkno) {
 				error = pcbmap(dep, bp->b_lblkno, &bn, 0, 0);
 				if (error)
 					bp->b_blkno = -1;
 				else
 					bp->b_blkno = bn;
 			}
 			if (bp->b_blkno == -1) {
 				brelse(bp);
 				if (!error)
 					error = EIO;		/* XXX */
 				break;
 			}
 		} else {
 			/*
 			 * The block we need to write into exists, so read it in.
 			 */
 			error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp);
 			if (error) {
 				break;
 			}
 		}
 
 		/*
 		 * Should these vnode_pager_* functions be done on dir
 		 * files?
 		 */
 
 		/*
 		 * Copy the data from user space into the buf header.
 		 */
 		error = vn_io_fault_uiomove(bp->b_data + croffset, n, uio);
 		if (error) {
 			brelse(bp);
 			break;
 		}
 
 		/* Prepare for clustered writes in some else clauses. */
 		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
 			bp->b_flags |= B_CLUSTEROK;
 
 		/*
 		 * If IO_SYNC, then each buffer is written synchronously.
 		 * Otherwise, if we have a severe page deficiency then
 		 * write the buffer asynchronously.  Otherwise, if on a
 		 * cluster boundary then write the buffer asynchronously,
 		 * combining it with contiguous clusters if permitted and
 		 * possible, since we don't expect more writes into this
 		 * buffer soon.  Otherwise, do a delayed write because we
 		 * expect more writes into this buffer soon.
 		 */
 		if (ioflag & IO_SYNC)
 			(void)bwrite(bp);
 		else if (vm_page_count_severe() || buf_dirty_count_severe())
 			bawrite(bp);
 		else if (n + croffset == pmp->pm_bpcluster) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
 				cluster_write(vp, &dep->de_clusterw, bp,
 				    dep->de_FileSize, seqcount, 0);
 			else
 				bawrite(bp);
 		} else
 			bdwrite(bp);
 		dep->de_flag |= DE_UPDATE;
 	} while (error == 0 && uio->uio_resid > 0);
 
 	/*
 	 * If the write failed and they want us to, truncate the file back
 	 * to the size it was before the write was attempted.
 	 */
 errexit:
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			detrunc(dep, osize, ioflag & IO_SYNC, NOCRED);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		} else {
 			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED);
 			if (uio->uio_resid != resid)
 				error = 0;
 		}
 	} else if (ioflag & IO_SYNC)
 		error = deupdat(dep, 1);
 	return (error);
 }
 
 /*
  * Flush the blocks of a file to disk.
  */
 static int
 msdosfs_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *devvp;
 	int allerror, error;
 
 	vop_stdfsync(ap);
 
 	/*
 	* If the syncing request comes from fsync(2), sync the entire
 	* FAT and any other metadata that happens to be on devvp.  We
 	* need this mainly for the FAT.  We write the FAT sloppily, and
 	* syncing it all now is the best we can easily do to get all
 	* directory entries associated with the file (not just the file)
 	* fully synced.  The other metadata includes critical metadata
 	* for all directory entries, but only in the MNT_ASYNC case.  We
 	* will soon sync all metadata in the file's directory entry.
 	* Non-critical metadata for associated directory entries only
 	* gets synced accidentally, as in most file systems.
 	*/
 	if (ap->a_waitfor != MNT_NOWAIT) {
 		devvp = VTODE(ap->a_vp)->de_pmp->pm_devvp;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		allerror = VOP_FSYNC(devvp, MNT_WAIT, ap->a_td);
 		VOP_UNLOCK(devvp);
 	} else
 		allerror = 0;
 
 	error = deupdat(VTODE(ap->a_vp), ap->a_waitfor != MNT_NOWAIT);
 	if (allerror == 0)
 		allerror = error;
 	return (allerror);
 }
 
 static int
 msdosfs_remove(struct vop_remove_args *ap)
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	struct denode *ddep = VTODE(ap->a_dvp);
 	int error;
 
 	if (ap->a_vp->v_type == VDIR)
 		error = EPERM;
 	else
 		error = removede(ddep, dep);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount);
 #endif
 	return (error);
 }
 
 /*
  * DOS filesystems don't know what links are.
  */
 static int
 msdosfs_link(struct vop_link_args *ap)
 {
 	return (EOPNOTSUPP);
 }
 
 /*
  * Renames on files require moving the denode to a new hash queue since the
  * denode's location is used to compute which hash queue to put the file
  * in. Unless it is a rename in place.  For example "mv a b".
  *
  * What follows is the basic algorithm:
  *
  * if (file move) {
  *	if (dest file exists) {
  *		remove dest file
  *	}
  *	if (dest and src in same directory) {
  *		rewrite name in existing directory slot
  *	} else {
  *		write new entry in dest directory
  *		update offset and dirclust in denode
  *		move denode to new hash chain
  *		clear old directory entry
  *	}
  * } else {
  *	directory move
  *	if (dest directory exists) {
  *		if (dest is not empty) {
  *			return ENOTEMPTY
  *		}
  *		remove dest directory
  *	}
  *	if (dest and src in same directory) {
  *		rewrite name in existing entry
  *	} else {
  *		be sure dest is not a child of src directory
  *		write entry in dest directory
  *		update "." and ".." in moved directory
  *		clear old directory entry for moved directory
  *	}
  * }
  *
  * On entry:
  *	source's parent directory is unlocked
  *	source file or directory is unlocked
  *	destination's parent directory is locked
  *	destination file or directory is locked if it exists
  *
  * On exit:
  *	all denodes should be released
  */
 static int
 msdosfs_rename(struct vop_rename_args *ap)
 {
 	struct vnode *fdvp, *fvp, *tdvp, *tvp, *vp;
 	struct componentname *fcnp, *tcnp;
 	struct denode *fdip, *fip, *tdip, *tip, *nip;
 	u_char toname[12], oldname[11];
 	u_long to_diroffset;
 	bool checkpath_locked, doingdirectory, newparent;
 	int error;
 	u_long cn, pcl, blkoff;
 	daddr_t bn, wait_scn, scn;
 	struct msdosfsmount *pmp;
 	struct direntry *dotdotp;
 	struct buf *bp;
 
 	tdvp = ap->a_tdvp;
 	fvp = ap->a_fvp;
 	fdvp = ap->a_fdvp;
 	tvp = ap->a_tvp;
 	tcnp = ap->a_tcnp;
 	fcnp = ap->a_fcnp;
 	pmp = VFSTOMSDOSFS(fdvp->v_mount);
 
-#ifdef DIAGNOSTIC
-	if ((tcnp->cn_flags & HASBUF) == 0 ||
-	    (fcnp->cn_flags & HASBUF) == 0)
-		panic("msdosfs_rename: no name");
-#endif
 	/*
 	 * Check for cross-device rename.
 	 */
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
 		error = EXDEV;
 		goto abortit;
 	}
 
 	/*
 	 * If source and dest are the same, do nothing.
 	 */
 	if (tvp == fvp) {
 		error = 0;
 		goto abortit;
 	}
 
 	/*
 	 * When the target exists, both the directory
 	 * and target vnodes are passed locked.
 	 */
 	VOP_UNLOCK(tdvp);
 	if (tvp != NULL && tvp != tdvp)
 		VOP_UNLOCK(tvp);
 
 	checkpath_locked = false;
 
 relock:
 	doingdirectory = newparent = false;
 
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error != 0)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(tdvp);
 		goto relock;
 	}
 
 	error = msdosfs_lookup_ino(fdvp, NULL, fcnp, &scn, &blkoff);
 	if (error != 0) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		goto releout;
 	}
 	error = deget(pmp, scn, blkoff, LK_EXCLUSIVE | LK_NOWAIT, &nip);
 	if (error != 0) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if (error != EBUSY)
 			goto releout;
 		error = deget(pmp, scn, blkoff, LK_EXCLUSIVE, &nip);
 		if (error != 0)
 			goto releout;
 		vp = fvp;
 		fvp = DETOV(nip);
 		VOP_UNLOCK(fvp);
 		vrele(vp);
 		goto relock;
 	}
 	vrele(fvp);
 	fvp = DETOV(nip);
 
 	error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
 	if (error != 0 && error != EJUSTRETURN) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		VOP_UNLOCK(fvp);
 		goto releout;
 	}
 	if (error == EJUSTRETURN && tvp != NULL) {
 		vrele(tvp);
 		tvp = NULL;
 	}
 	if (error == 0) {
 		nip = NULL;
 		error = deget(pmp, scn, blkoff, LK_EXCLUSIVE | LK_NOWAIT,
 		    &nip);
 		if (tvp != NULL) {
 			vrele(tvp);
 			tvp = NULL;
 		}
 		if (error != 0) {
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			VOP_UNLOCK(fvp);
 			if (error != EBUSY)
 				goto releout;
 			error = deget(pmp, scn, blkoff, LK_EXCLUSIVE,
 			    &nip);
 			if (error != 0)
 				goto releout;
 			vput(DETOV(nip));
 			goto relock;
 		}
 		tvp = DETOV(nip);
 	}
 
 	fdip = VTODE(fdvp);
 	fip = VTODE(fvp);
 	tdip = VTODE(tdvp);
 	tip = tvp != NULL ? VTODE(tvp) : NULL;
 
 	/*
 	 * Remember direntry place to use for destination
 	 */
 	to_diroffset = tdip->de_fndoffset;
 
 	/*
 	 * Be sure we are not renaming ".", "..", or an alias of ".". This
 	 * leads to a crippled directory tree.  It's pretty tough to do a
 	 * "ls" or "pwd" with the "." directory entry missing, and "cd .."
 	 * doesn't work if the ".." entry is missing.
 	 */
 	if ((fip->de_Attributes & ATTR_DIRECTORY) != 0) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    fdip == fip ||
 		    (fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (tcnp->cn_flags & ISDOTDOT) != 0) {
 			error = EINVAL;
 			goto unlock;
 		}
 		doingdirectory = true;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory hierarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". We must repeat the call
 	 * to namei, as the parent directory is unlocked by the
 	 * call to doscheckpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread);
 	if (fdip->de_StartCluster != tdip->de_StartCluster)
 		newparent = true;
 	if (doingdirectory && newparent) {
 		if (error != 0)	/* write access check above */
 			goto unlock;
 		lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL);
 		checkpath_locked = true;
 		error = doscheckpath(fip, tdip, &wait_scn);
 		if (wait_scn != 0) {
 			lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
 			checkpath_locked = false;
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			VOP_UNLOCK(fvp);
 			if (tvp != NULL && tvp != tdvp)
 				VOP_UNLOCK(tvp);
 			error = deget(pmp, wait_scn, 0, LK_EXCLUSIVE,
 			    &nip);
 			if (error == 0) {
 				vput(DETOV(nip));
 				goto relock;
 			}
 		}
 		if (error != 0)
 			goto unlock;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("msdosfs_rename: lost to startdir");
 	}
 
 	if (tip != NULL) {
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((tip->de_Attributes & ATTR_DIRECTORY) != 0) {
 			if (!dosdirempty(tip)) {
 				error = ENOTEMPTY;
 				goto unlock;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto unlock;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto unlock;
 		}
 		error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
 		MPASS(error == 0);
 		error = removede(tdip, tip);
 		if (error != 0)
 			goto unlock;
 		vput(tvp);
 		tvp = NULL;
 		tip = NULL;
 	}
 
 	/*
 	 * Convert the filename in tcnp into a dos filename. We copy this
 	 * into the denode and directory entry for the destination
 	 * file/directory.
 	 */
 	error = uniqdosname(tdip, tcnp, toname);
 	if (error != 0)
 		goto unlock;
 
 	/*
 	 * First write a new entry in the destination
 	 * directory and mark the entry in the source directory
 	 * as deleted.  Then move the denode to the correct hash
 	 * chain for its new location in the filesystem.  And, if
 	 * we moved a directory, then update its .. entry to point
 	 * to the new parent directory.
 	 */
 	memcpy(oldname, fip->de_Name, 11);
 	memcpy(fip->de_Name, toname, 11);	/* update denode */
 	error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
 	MPASS(error == EJUSTRETURN);
 	error = createde(fip, tdip, NULL, tcnp);
 	if (error != 0) {
 		memcpy(fip->de_Name, oldname, 11);
 		goto unlock;
 	}
 
 	/*
 	 * If fip is for a directory, then its name should always
 	 * be "." since it is for the directory entry in the
 	 * directory itself (msdosfs_lookup() always translates
 	 * to the "." entry so as to get a unique denode, except
 	 * for the root directory there are different
 	 * complications).  However, we just corrupted its name
 	 * to pass the correct name to createde().  Undo this.
 	 */
 	if ((fip->de_Attributes & ATTR_DIRECTORY) != 0)
 		memcpy(fip->de_Name, oldname, 11);
 	fip->de_refcnt++;
 	error = msdosfs_lookup_ino(fdvp, NULL, fcnp, &scn, &blkoff);
 	MPASS(error == 0);
 	error = removede(fdip, fip);
 	if (error != 0) {
 		/* XXX should downgrade to ro here, fs is corrupt */
 		goto unlock;
 	}
 	if (!doingdirectory) {
 		error = pcbmap(tdip, de_cluster(pmp, to_diroffset), 0,
 		    &fip->de_dirclust, 0);
 		if (error != 0) {
 			/*
 			 * XXX should downgrade to ro here,
 			 * fs is corrupt
 			 */
 			goto unlock;
 		}
 		if (fip->de_dirclust == MSDOSFSROOT)
 			fip->de_diroffset = to_diroffset;
 		else
 			fip->de_diroffset = to_diroffset & pmp->pm_crbomask;
 	}
 	reinsert(fip);
 
 	/*
 	 * If we moved a directory to a new parent directory, then we must
 	 * fixup the ".." entry in the moved directory.
 	 */
 	if (doingdirectory && newparent) {
 		cn = fip->de_StartCluster;
 		if (cn == MSDOSFSROOT) {
 			/* this should never happen */
 			panic("msdosfs_rename(): updating .. in root directory?");
 		} else
 			bn = cntobn(pmp, cn);
 		error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
 		    NOCRED, &bp);
 		if (error != 0) {
 			/* XXX should downgrade to ro here, fs is corrupt */
 			goto unlock;
 		}
 		dotdotp = (struct direntry *)bp->b_data + 1;
 		pcl = tdip->de_StartCluster;
 		if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
 			pcl = MSDOSFSROOT;
 		putushort(dotdotp->deStartCluster, pcl);
 		if (FAT32(pmp))
 			putushort(dotdotp->deHighClust, pcl >> 16);
 		if (DOINGASYNC(fvp))
 			bdwrite(bp);
 		else if ((error = bwrite(bp)) != 0) {
 			/* XXX should downgrade to ro here, fs is corrupt */
 			goto unlock;
 		}
 	}
 
 	/*
 	 * The msdosfs lookup is case insensitive. Several aliases may
 	 * be inserted for a single directory entry. As a consequnce,
 	 * name cache purge done by lookup for fvp when DELETE op for
 	 * namei is specified, might be not enough to expunge all
 	 * namecache entries that were installed for this direntry.
 	 */
 	cache_purge(fvp);
 
 unlock:
 	if (checkpath_locked)
 		lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
 	vput(fdvp);
 	vput(fvp);
 	if (tvp != NULL) {
 		if (tvp != tdvp)
 			vput(tvp);
 		else
 			vrele(tvp);
 	}
 	vput(tdvp);
 	return (error);
 releout:
 	MPASS(!checkpath_locked);
 	vrele(tdvp);
 	if (tvp != NULL)
 		vrele(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	return (error);
 abortit:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp != NULL)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	return (error);
 }
 
 static struct {
 	struct direntry dot;
 	struct direntry dotdot;
 } dosdirtemplate = {
 	{	".          ",				/* the . entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,					/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 }				/* filesize */
 	},
 	{	"..         ",				/* the .. entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,					/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 }				/* filesize */
 	}
 };
 
 static int
 msdosfs_mkdir(struct vop_mkdir_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct denode *dep;
 	struct denode *pdep = VTODE(ap->a_dvp);
 	struct direntry *denp;
 	struct msdosfsmount *pmp = pdep->de_pmp;
 	struct buf *bp;
 	u_long newcluster, pcl;
 	int bn;
 	int error;
 	struct denode ndirent;
 	struct timespec ts;
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad2;
 	}
 
 	/*
 	 * Allocate a cluster to hold the about to be created directory.
 	 */
 	error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL);
 	if (error)
 		goto bad2;
 
 	memset(&ndirent, 0, sizeof(ndirent));
 	ndirent.de_pmp = pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	vfs_timestamp(&ts);
 	DETIMES(&ndirent, &ts, &ts, &ts);
 
 	/*
 	 * Now fill the cluster with the "." and ".." entries. And write
 	 * the cluster to disk.  This way it is there for the parent
 	 * directory to be pointing at if there were a crash.
 	 */
 	bn = cntobn(pmp, newcluster);
 	/* always succeeds */
 	bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0, 0);
 	memset(bp->b_data, 0, pmp->pm_bpcluster);
 	memcpy(bp->b_data, &dosdirtemplate, sizeof dosdirtemplate);
 	denp = (struct direntry *)bp->b_data;
 	putushort(denp[0].deStartCluster, newcluster);
 	putushort(denp[0].deCDate, ndirent.de_CDate);
 	putushort(denp[0].deCTime, ndirent.de_CTime);
 	denp[0].deCHundredth = ndirent.de_CHun;
 	putushort(denp[0].deADate, ndirent.de_ADate);
 	putushort(denp[0].deMDate, ndirent.de_MDate);
 	putushort(denp[0].deMTime, ndirent.de_MTime);
 	pcl = pdep->de_StartCluster;
 	/*
 	 * Although the root directory has a non-magic starting cluster
 	 * number for FAT32, chkdsk and fsck_msdosfs still require
 	 * references to it in dotdot entries to be magic.
 	 */
 	if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
 		pcl = MSDOSFSROOT;
 	putushort(denp[1].deStartCluster, pcl);
 	putushort(denp[1].deCDate, ndirent.de_CDate);
 	putushort(denp[1].deCTime, ndirent.de_CTime);
 	denp[1].deCHundredth = ndirent.de_CHun;
 	putushort(denp[1].deADate, ndirent.de_ADate);
 	putushort(denp[1].deMDate, ndirent.de_MDate);
 	putushort(denp[1].deMTime, ndirent.de_MTime);
 	if (FAT32(pmp)) {
 		putushort(denp[0].deHighClust, newcluster >> 16);
 		putushort(denp[1].deHighClust, pcl >> 16);
 	}
 
 	if (DOINGASYNC(ap->a_dvp))
 		bdwrite(bp);
 	else if ((error = bwrite(bp)) != 0)
 		goto bad;
 
 	/*
 	 * Now build up a directory entry pointing to the newly allocated
 	 * cluster.  This will be written to an empty slot in the parent
 	 * directory.
 	 */
-#ifdef DIAGNOSTIC
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("msdosfs_mkdir: no name");
-#endif
 	error = uniqdosname(pdep, cnp, ndirent.de_Name);
 	if (error)
 		goto bad;
 
 	ndirent.de_Attributes = ATTR_DIRECTORY;
 	ndirent.de_LowerCase = 0;
 	ndirent.de_StartCluster = newcluster;
 	ndirent.de_FileSize = 0;
 	error = createde(&ndirent, pdep, &dep, cnp);
 	if (error)
 		goto bad;
 	*ap->a_vpp = DETOV(dep);
 	return (0);
 
 bad:
 	clusterfree(pmp, newcluster);
 bad2:
 	return (error);
 }
 
 static int
 msdosfs_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct denode *ip, *dp;
 	int error;
 
 	ip = VTODE(vp);
 	dp = VTODE(dvp);
 
 	/*
 	 * Verify the directory is empty (and valid).
 	 * (Rmdir ".." won't be valid since
 	 *  ".." will contain a reference to
 	 *  the current directory and thus be
 	 *  non-empty.)
 	 */
 	error = 0;
 	if (!dosdirempty(ip)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	/*
 	 * Delete the entry from the directory.  For dos filesystems this
 	 * gets rid of the directory entry on disk, the in memory copy
 	 * still exists but the de_refcnt is <= 0.  This prevents it from
 	 * being found by deget().  When the vput() on dep is done we give
 	 * up access and eventually msdosfs_reclaim() will be called which
 	 * will remove it from the denode cache.
 	 */
 	error = removede(dp, ip);
 	if (error)
 		goto out;
 	/*
 	 * This is where we decrement the link count in the parent
 	 * directory.  Since dos filesystems don't do this we just purge
 	 * the name cache.
 	 */
 	cache_purge(dvp);
 	/*
 	 * Truncate the directory that is being deleted.
 	 */
 	error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred);
 	cache_purge(vp);
 
 out:
 	return (error);
 }
 
 /*
  * DOS filesystems don't know what symlinks are.
  */
 static int
 msdosfs_symlink(struct vop_symlink_args *ap)
 {
 	return (EOPNOTSUPP);
 }
 
 static int
 msdosfs_readdir(struct vop_readdir_args *ap)
 {
 	struct mbnambuf nb;
 	int error = 0;
 	int diff;
 	long n;
 	int blsize;
 	long on;
 	u_long cn;
 	u_long dirsperblk;
 	long bias = 0;
 	daddr_t bn, lbn;
 	struct buf *bp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct direntry *dentp;
 	struct dirent dirbuf;
 	struct uio *uio = ap->a_uio;
 	uint64_t *cookies = NULL;
 	int ncookies = 0;
 	off_t offset, off;
 	int chksum = -1;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n",
 	    ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
 #endif
 
 	/*
 	 * msdosfs_readdir() won't operate properly on regular files since
 	 * it does i/o only with the filesystem vnode, and hence can
 	 * retrieve the wrong block from the buffer cache for a plain file.
 	 * So, fail attempts to readdir() on a plain file.
 	 */
 	if ((dep->de_Attributes & ATTR_DIRECTORY) == 0)
 		return (ENOTDIR);
 
 	/*
 	 * To be safe, initialize dirbuf
 	 */
 	memset(dirbuf.d_name, 0, sizeof(dirbuf.d_name));
 
 	/*
 	 * If the user buffer is smaller than the size of one dos directory
 	 * entry or the file offset is not a multiple of the size of a
 	 * directory entry, then we fail the read.
 	 */
 	off = offset = uio->uio_offset;
 	if (uio->uio_resid < sizeof(struct direntry) ||
 	    (offset & (sizeof(struct direntry) - 1)))
 		return (EINVAL);
 
 	if (ap->a_ncookies) {
 		ncookies = uio->uio_resid / 16;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP,
 		       M_WAITOK);
 		*ap->a_cookies = cookies;
 		*ap->a_ncookies = ncookies;
 	}
 
 	dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
 
 	/*
 	 * If they are reading from the root directory then, we simulate
 	 * the . and .. entries since these don't exist in the root
 	 * directory.  We also set the offset bias to make up for having to
 	 * simulate these entries. By this I mean that at file offset 64 we
 	 * read the first entry in the root directory that lives on disk.
 	 */
 	if (dep->de_StartCluster == MSDOSFSROOT
 	    || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) {
 #if 0
 		printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n",
 		    offset);
 #endif
 		bias = 2 * sizeof(struct direntry);
 		if (offset < bias) {
 			for (n = (int)offset / sizeof(struct direntry);
 			     n < 2; n++) {
 				dirbuf.d_fileno = FAT32(pmp) ?
 				    (uint64_t)cntobn(pmp, pmp->pm_rootdirblk) *
 				    dirsperblk : 1;
 				dirbuf.d_type = DT_DIR;
 				switch (n) {
 				case 0:
 					dirbuf.d_namlen = 1;
 					dirbuf.d_name[0] = '.';
 					break;
 				case 1:
 					dirbuf.d_namlen = 2;
 					dirbuf.d_name[0] = '.';
 					dirbuf.d_name[1] = '.';
 					break;
 				}
 				dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 				/* NOTE: d_off is the offset of the *next* entry. */
 				dirbuf.d_off = offset + sizeof(struct direntry);
 				dirent_terminate(&dirbuf);
 				if (uio->uio_resid < dirbuf.d_reclen)
 					goto out;
 				error = uiomove(&dirbuf, dirbuf.d_reclen, uio);
 				if (error)
 					goto out;
 				offset += sizeof(struct direntry);
 				off = offset;
 				if (cookies) {
 					*cookies++ = offset;
 					if (--ncookies <= 0)
 						goto out;
 				}
 			}
 		}
 	}
 
 	mbnambuf_init(&nb);
 	off = offset;
 	while (uio->uio_resid > 0) {
 		lbn = de_cluster(pmp, offset - bias);
 		on = (offset - bias) & pmp->pm_crbomask;
 		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
 		diff = dep->de_FileSize - (offset - bias);
 		if (diff <= 0)
 			break;
 		n = min(n, diff);
 		error = pcbmap(dep, lbn, &bn, &cn, &blsize);
 		if (error)
 			break;
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
 		if (error) {
 			return (error);
 		}
 		n = min(n, blsize - bp->b_resid);
 		if (n == 0) {
 			brelse(bp);
 			return (EIO);
 		}
 
 		/*
 		 * Convert from dos directory entries to fs-independent
 		 * directory entries.
 		 */
 		for (dentp = (struct direntry *)(bp->b_data + on);
 		     (char *)dentp < bp->b_data + on + n;
 		     dentp++, offset += sizeof(struct direntry)) {
 #if 0
 			printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n",
 			    dentp, prev, crnt, dentp->deName[0], dentp->deAttributes);
 #endif
 			/*
 			 * If this is an unused entry, we can stop.
 			 */
 			if (dentp->deName[0] == SLOT_EMPTY) {
 				brelse(bp);
 				goto out;
 			}
 			/*
 			 * Skip deleted entries.
 			 */
 			if (dentp->deName[0] == SLOT_DELETED) {
 				chksum = -1;
 				mbnambuf_init(&nb);
 				continue;
 			}
 
 			/*
 			 * Handle Win95 long directory entries
 			 */
 			if (dentp->deAttributes == ATTR_WIN95) {
 				if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 					continue;
 				chksum = win2unixfn(&nb,
 				    (struct winentry *)dentp, chksum, pmp);
 				continue;
 			}
 
 			/*
 			 * Skip volume labels
 			 */
 			if (dentp->deAttributes & ATTR_VOLUME) {
 				chksum = -1;
 				mbnambuf_init(&nb);
 				continue;
 			}
 			/*
 			 * This computation of d_fileno must match
 			 * the computation of va_fileid in
 			 * msdosfs_getattr.
 			 */
 			if (dentp->deAttributes & ATTR_DIRECTORY) {
 				cn = getushort(dentp->deStartCluster);
 				if (FAT32(pmp)) {
 					cn |= getushort(dentp->deHighClust) <<
 					    16;
 					if (cn == MSDOSFSROOT)
 						cn = pmp->pm_rootdirblk;
 				}
 				if (cn == MSDOSFSROOT && !FAT32(pmp))
 					dirbuf.d_fileno = 1;
 				else
 					dirbuf.d_fileno = cntobn(pmp, cn) *
 					    dirsperblk;
 				dirbuf.d_type = DT_DIR;
 			} else {
 				dirbuf.d_fileno = (uoff_t)offset /
 				    sizeof(struct direntry);
 				dirbuf.d_type = DT_REG;
 			}
 
 			if (chksum != winChksum(dentp->deName)) {
 				dirbuf.d_namlen = dos2unixfn(dentp->deName,
 				    (u_char *)dirbuf.d_name,
 				    dentp->deLowerCase |
 					((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ?
 					(LCASE_BASE | LCASE_EXT) : 0),
 				    pmp);
 				mbnambuf_init(&nb);
 			} else
 				mbnambuf_flush(&nb, &dirbuf);
 			chksum = -1;
 			dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 			/* NOTE: d_off is the offset of the *next* entry. */
 			dirbuf.d_off = offset + sizeof(struct direntry);
 			dirent_terminate(&dirbuf);
 			if (uio->uio_resid < dirbuf.d_reclen) {
 				brelse(bp);
 				goto out;
 			}
 			error = uiomove(&dirbuf, dirbuf.d_reclen, uio);
 			if (error) {
 				brelse(bp);
 				goto out;
 			}
 			if (cookies) {
 				*cookies++ = offset + sizeof(struct direntry);
 				if (--ncookies <= 0) {
 					brelse(bp);
 					goto out;
 				}
 			}
 			off = offset + sizeof(struct direntry);
 		}
 		brelse(bp);
 	}
 out:
 	/* Subtract unused cookies */
 	if (ap->a_ncookies)
 		*ap->a_ncookies -= ncookies;
 
 	uio->uio_offset = off;
 
 	/*
 	 * Set the eofflag (NFS uses it)
 	 */
 	if (ap->a_eofflag) {
 		if (dep->de_FileSize - (offset - bias) <= 0)
 			*ap->a_eofflag = 1;
 		else
 			*ap->a_eofflag = 0;
 	}
 	return (error);
 }
 
 /*-
  * a_vp   - pointer to the file's vnode
  * a_bn   - logical block number within the file (cluster number for us)
  * a_bop  - where to return the bufobj of the special file containing the fs
  * a_bnp  - where to return the "physical" block number corresponding to a_bn
  *          (relative to the special file; units are blocks of size DEV_BSIZE)
  * a_runp - where to return the "run past" a_bn.  This is the count of logical
  *          blocks whose physical blocks (together with a_bn's physical block)
  *          are contiguous.
  * a_runb - where to return the "run before" a_bn.
  */
 static int
 msdosfs_bmap(struct vop_bmap_args *ap)
 {
 	struct fatcache savefc;
 	struct denode *dep;
 	struct mount *mp;
 	struct msdosfsmount *pmp;
 	struct vnode *vp;
 	daddr_t runbn;
 	u_long cn;
 	int bnpercn, error, maxio, maxrun, run;
 
 	vp = ap->a_vp;
 	dep = VTODE(vp);
 	pmp = dep->de_pmp;
 	if (ap->a_bop != NULL)
 		*ap->a_bop = &pmp->pm_devvp->v_bufobj;
 	if (ap->a_bnp == NULL)
 		return (0);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	cn = ap->a_bn;
 	if (cn != ap->a_bn)
 		return (EFBIG);
 	error = pcbmap(dep, cn, ap->a_bnp, NULL, NULL);
 	if (error != 0 || (ap->a_runp == NULL && ap->a_runb == NULL))
 		return (error);
 
 	/*
 	 * Prepare to back out updates of the fatchain cache after the one
 	 * for the first block done by pcbmap() above.  Without the backout,
 	 * then whenever the caller doesn't do i/o to all of the blocks that
 	 * we find, the single useful cache entry would be too far in advance
 	 * of the actual i/o to work for the next sequential i/o.  Then the
 	 * FAT would be searched from the beginning.  With the backout, the
 	 * FAT is searched starting at most a few blocks early.  This wastes
 	 * much less time.  Time is also wasted finding more blocks than the
 	 * caller will do i/o to.  This is necessary because the runlength
 	 * parameters are output-only.
 	 */
 	savefc = dep->de_fc[FC_LASTMAP];
 
 	mp = vp->v_mount;
 	maxio = mp->mnt_iosize_max / mp->mnt_stat.f_iosize;
 	bnpercn = de_cn2bn(pmp, 1);
 	if (ap->a_runp != NULL) {
 		maxrun = ulmin(maxio - 1, pmp->pm_maxcluster - cn);
 		for (run = 1; run <= maxrun; run++) {
 			if (pcbmap(dep, cn + run, &runbn, NULL, NULL) != 0 ||
 			    runbn != *ap->a_bnp + run * bnpercn)
 				break;
 		}
 		*ap->a_runp = run - 1;
 	}
 	if (ap->a_runb != NULL) {
 		maxrun = ulmin(maxio - 1, cn);
 		for (run = 1; run < maxrun; run++) {
 			if (pcbmap(dep, cn - run, &runbn, NULL, NULL) != 0 ||
 			    runbn != *ap->a_bnp - run * bnpercn)
 				break;
 		}
 		*ap->a_runb = run - 1;
 	}
 	dep->de_fc[FC_LASTMAP] = savefc;
 	return (0);
 }
 
 SYSCTL_NODE(_vfs, OID_AUTO, msdosfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "msdos filesystem");
 static int use_buf_pager = 1;
 SYSCTL_INT(_vfs_msdosfs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN,
     &use_buf_pager, 0,
     "Use buffer pager instead of bmap");
 
 static daddr_t
 msdosfs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
 {
 
 	return (de_cluster(VTODE(vp)->de_pmp, off));
 }
 
 static int
 msdosfs_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz)
 {
 
 	*sz = VTODE(vp)->de_pmp->pm_bpcluster;
 	return (0);
 }
 
 static int
 msdosfs_getpages(struct vop_getpages_args *ap)
 {
 
 	if (use_buf_pager)
 		return (vfs_bio_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		    ap->a_rbehind, ap->a_rahead, msdosfs_gbp_getblkno,
 		    msdosfs_gbp_getblksz));
 	return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 	    ap->a_rbehind, ap->a_rahead, NULL, NULL));
 }
 
 static int
 msdosfs_strategy(struct vop_strategy_args *ap)
 {
 	struct buf *bp = ap->a_bp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct bufobj *bo;
 	int error = 0;
 	daddr_t blkno;
 
 	/*
 	 * If we don't already know the filesystem relative block number
 	 * then get it using pcbmap().  If pcbmap() returns the block
 	 * number as -1 then we've got a hole in the file.  DOS filesystems
 	 * don't allow files with holes, so we shouldn't ever see this.
 	 */
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = pcbmap(dep, bp->b_lblkno, &blkno, 0, 0);
 		bp->b_blkno = blkno;
 		if (error) {
 			bp->b_error = error;
 			bp->b_ioflags |= BIO_ERROR;
 			bufdone(bp);
 			return (0);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if (bp->b_blkno == -1) {
 		bufdone(bp);
 		return (0);
 	}
 	/*
 	 * Read/write the block from/to the disk that contains the desired
 	 * file block.
 	 */
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bo = dep->de_pmp->pm_bo;
 	BO_STRATEGY(bo, bp);
 	return (0);
 }
 
 static int
 msdosfs_print(struct vop_print_args *ap)
 {
 	struct denode *dep = VTODE(ap->a_vp);
 
 	printf("\tstartcluster %lu, dircluster %lu, diroffset %lu, ",
 	       dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset);
 	printf("on dev %s\n", devtoname(dep->de_pmp->pm_dev));
 	return (0);
 }
 
 static int
 msdosfs_pathconf(struct vop_pathconf_args *ap)
 {
 	struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp;
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 32;
 		return (0);
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 0;
 		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 	/* NOTREACHED */
 }
 
 static int
 msdosfs_vptofh(struct vop_vptofh_args *ap)
 {
 	struct denode *dep;
 	struct defid *defhp;
 
 	dep = VTODE(ap->a_vp);
 	defhp = (struct defid *)ap->a_fhp;
 	defhp->defid_len = sizeof(struct defid);
 	defhp->defid_dirclust = dep->de_dirclust;
 	defhp->defid_dirofs = dep->de_diroffset;
 	/* defhp->defid_gen = dep->de_gen; */
 	return (0);
 }
 
 /* Global vfs data structures for msdosfs */
 struct vop_vector msdosfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		msdosfs_access,
 	.vop_bmap =		msdosfs_bmap,
 	.vop_getpages =		msdosfs_getpages,
 	.vop_cachedlookup =	msdosfs_lookup,
 	.vop_open =		msdosfs_open,
 	.vop_close =		msdosfs_close,
 	.vop_create =		msdosfs_create,
 	.vop_fsync =		msdosfs_fsync,
 	.vop_fdatasync =	vop_stdfdatasync_buf,
 	.vop_getattr =		msdosfs_getattr,
 	.vop_inactive =		msdosfs_inactive,
 	.vop_link =		msdosfs_link,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_mkdir =		msdosfs_mkdir,
 	.vop_mknod =		msdosfs_mknod,
 	.vop_pathconf =		msdosfs_pathconf,
 	.vop_print =		msdosfs_print,
 	.vop_read =		msdosfs_read,
 	.vop_readdir =		msdosfs_readdir,
 	.vop_reclaim =		msdosfs_reclaim,
 	.vop_remove =		msdosfs_remove,
 	.vop_rename =		msdosfs_rename,
 	.vop_rmdir =		msdosfs_rmdir,
 	.vop_setattr =		msdosfs_setattr,
 	.vop_strategy =		msdosfs_strategy,
 	.vop_symlink =		msdosfs_symlink,
 	.vop_write =		msdosfs_write,
 	.vop_vptofh =		msdosfs_vptofh,
 };
 VFS_VOP_VECTOR_REGISTER(msdosfs_vnodeops);
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index b1a174f171fa..817c1093374c 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -1,4552 +1,4538 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from nfs_vnops.c	8.16 (Berkeley) 5/27/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * vnode op calls for Sun NFS version 2, 3 and 4
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/extattr.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 #include <fs/nfs/nfsport.h>
 #include <fs/nfsclient/nfsnode.h>
 #include <fs/nfsclient/nfsmount.h>
 #include <fs/nfsclient/nfs.h>
 #include <fs/nfsclient/nfs_kdtrace.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 #include <nfs/nfs_lock.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 
 dtrace_nfsclient_accesscache_flush_probe_func_t
 		dtrace_nfscl_accesscache_flush_done_probe;
 uint32_t	nfscl_accesscache_flush_done_id;
 
 dtrace_nfsclient_accesscache_get_probe_func_t
 		dtrace_nfscl_accesscache_get_hit_probe,
 		dtrace_nfscl_accesscache_get_miss_probe;
 uint32_t	nfscl_accesscache_get_hit_id;
 uint32_t	nfscl_accesscache_get_miss_id;
 
 dtrace_nfsclient_accesscache_load_probe_func_t
 		dtrace_nfscl_accesscache_load_done_probe;
 uint32_t	nfscl_accesscache_load_done_id;
 #endif /* !KDTRACE_HOOKS */
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 extern struct nfsstatsv1 nfsstatsv1;
 extern int nfsrv_useacl;
 extern int nfscl_debuglevel;
 MALLOC_DECLARE(M_NEWNFSREQ);
 
 static vop_read_t	nfsfifo_read;
 static vop_write_t	nfsfifo_write;
 static vop_close_t	nfsfifo_close;
 static int	nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
 		    struct thread *);
 static vop_lookup_t	nfs_lookup;
 static vop_create_t	nfs_create;
 static vop_mknod_t	nfs_mknod;
 static vop_open_t	nfs_open;
 static vop_pathconf_t	nfs_pathconf;
 static vop_close_t	nfs_close;
 static vop_access_t	nfs_access;
 static vop_getattr_t	nfs_getattr;
 static vop_setattr_t	nfs_setattr;
 static vop_read_t	nfs_read;
 static vop_fsync_t	nfs_fsync;
 static vop_remove_t	nfs_remove;
 static vop_link_t	nfs_link;
 static vop_rename_t	nfs_rename;
 static vop_mkdir_t	nfs_mkdir;
 static vop_rmdir_t	nfs_rmdir;
 static vop_symlink_t	nfs_symlink;
 static vop_readdir_t	nfs_readdir;
 static vop_strategy_t	nfs_strategy;
 static	int	nfs_lookitup(struct vnode *, char *, int,
 		    struct ucred *, struct thread *, struct nfsnode **);
 static	int	nfs_sillyrename(struct vnode *, struct vnode *,
 		    struct componentname *);
 static vop_access_t	nfsspec_access;
 static vop_readlink_t	nfs_readlink;
 static vop_print_t	nfs_print;
 static vop_advlock_t	nfs_advlock;
 static vop_advlockasync_t nfs_advlockasync;
 static vop_getacl_t nfs_getacl;
 static vop_setacl_t nfs_setacl;
 static vop_advise_t nfs_advise;
 static vop_allocate_t nfs_allocate;
 static vop_deallocate_t nfs_deallocate;
 static vop_copy_file_range_t nfs_copy_file_range;
 static vop_ioctl_t nfs_ioctl;
 static vop_getextattr_t nfs_getextattr;
 static vop_setextattr_t nfs_setextattr;
 static vop_listextattr_t nfs_listextattr;
 static vop_deleteextattr_t nfs_deleteextattr;
 static vop_lock1_t	nfs_lock;
 
 /*
  * Global vfs data structures for nfs
  */
 
 static struct vop_vector newnfs_vnodeops_nosig = {
 	.vop_default =		&default_vnodeops,
 	.vop_access =		nfs_access,
 	.vop_advlock =		nfs_advlock,
 	.vop_advlockasync =	nfs_advlockasync,
 	.vop_close =		nfs_close,
 	.vop_create =		nfs_create,
 	.vop_fsync =		nfs_fsync,
 	.vop_getattr =		nfs_getattr,
 	.vop_getpages =		ncl_getpages,
 	.vop_putpages =		ncl_putpages,
 	.vop_inactive =		ncl_inactive,
 	.vop_link =		nfs_link,
 	.vop_lock1 =		nfs_lock,
 	.vop_lookup =		nfs_lookup,
 	.vop_mkdir =		nfs_mkdir,
 	.vop_mknod =		nfs_mknod,
 	.vop_open =		nfs_open,
 	.vop_pathconf =		nfs_pathconf,
 	.vop_print =		nfs_print,
 	.vop_read =		nfs_read,
 	.vop_readdir =		nfs_readdir,
 	.vop_readlink =		nfs_readlink,
 	.vop_reclaim =		ncl_reclaim,
 	.vop_remove =		nfs_remove,
 	.vop_rename =		nfs_rename,
 	.vop_rmdir =		nfs_rmdir,
 	.vop_setattr =		nfs_setattr,
 	.vop_strategy =		nfs_strategy,
 	.vop_symlink =		nfs_symlink,
 	.vop_write =		ncl_write,
 	.vop_getacl =		nfs_getacl,
 	.vop_setacl =		nfs_setacl,
 	.vop_advise =		nfs_advise,
 	.vop_allocate =		nfs_allocate,
 	.vop_deallocate =	nfs_deallocate,
 	.vop_copy_file_range =	nfs_copy_file_range,
 	.vop_ioctl =		nfs_ioctl,
 	.vop_getextattr =	nfs_getextattr,
 	.vop_setextattr =	nfs_setextattr,
 	.vop_listextattr =	nfs_listextattr,
 	.vop_deleteextattr =	nfs_deleteextattr,
 };
 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops_nosig);
 
 static int
 nfs_vnodeops_bypass(struct vop_generic_args *a)
 {
 
 	return (vop_sigdefer(&newnfs_vnodeops_nosig, a));
 }
 
 struct vop_vector newnfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_bypass =		nfs_vnodeops_bypass,
 };
 VFS_VOP_VECTOR_REGISTER(newnfs_vnodeops);
 
 static struct vop_vector newnfs_fifoops_nosig = {
 	.vop_default =		&fifo_specops,
 	.vop_access =		nfsspec_access,
 	.vop_close =		nfsfifo_close,
 	.vop_fsync =		nfs_fsync,
 	.vop_getattr =		nfs_getattr,
 	.vop_inactive =		ncl_inactive,
 	.vop_pathconf =		nfs_pathconf,
 	.vop_print =		nfs_print,
 	.vop_read =		nfsfifo_read,
 	.vop_reclaim =		ncl_reclaim,
 	.vop_setattr =		nfs_setattr,
 	.vop_write =		nfsfifo_write,
 };
 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops_nosig);
 
 static int
 nfs_fifoops_bypass(struct vop_generic_args *a)
 {
 
 	return (vop_sigdefer(&newnfs_fifoops_nosig, a));
 }
 
 struct vop_vector newnfs_fifoops = {
 	.vop_default =		&default_vnodeops,
 	.vop_bypass =		nfs_fifoops_bypass,
 };
 VFS_VOP_VECTOR_REGISTER(newnfs_fifoops);
 
 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp, struct vattr *vap);
 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
     int namelen, struct ucred *cred, struct thread *td);
 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
     char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
     char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
     struct componentname *scnp, struct sillyrename *sp);
 
 /*
  * Global variables
  */
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
 
 static int	nfs_prime_access_cache = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
 	   &nfs_prime_access_cache, 0,
 	   "Prime NFS ACCESS cache when fetching attributes");
 
 static int	newnfs_commit_on_close = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW,
     &newnfs_commit_on_close, 0, "write+commit on close, else only write");
 
 static int	nfs_clean_pages_on_close = 1;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
 	   &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
 
 int newnfs_directio_enable = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
 	   &newnfs_directio_enable, 0, "Enable NFS directio");
 
 int nfs_keep_dirty_on_error;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW,
     &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned");
 
 /*
  * This sysctl allows other processes to mmap a file that has been opened
  * O_DIRECT by a process.  In general, having processes mmap the file while
  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
  * this by default to prevent DoS attacks - to prevent a malicious user from
  * opening up files O_DIRECT preventing other users from mmap'ing these
  * files.  "Protected" environments where stricter consistency guarantees are
  * required can disable this knob.  The process that opened the file O_DIRECT
  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
  * meaningful.
  */
 int newnfs_directio_allow_mmap = 1;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
 	   &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
 
 static uint64_t	nfs_maxalloclen = 64 * 1024 * 1024;
 SYSCTL_U64(_vfs_nfs, OID_AUTO, maxalloclen, CTLFLAG_RW,
 	   &nfs_maxalloclen, 0, "NFS max allocate/deallocate length");
 
 #define	NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY		\
 			 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE	\
 			 | NFSACCESS_DELETE | NFSACCESS_LOOKUP)
 
 /*
  * SMP Locking Note :
  * The list of locks after the description of the lock is the ordering
  * of other locks acquired with the lock held.
  * np->n_mtx : Protects the fields in the nfsnode.
        VM Object Lock
        VI_MTX (acquired indirectly)
  * nmp->nm_mtx : Protects the fields in the nfsmount.
        rep->r_mtx
  * ncl_iod_mutex : Global lock, protects shared nfsiod state.
  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
        nmp->nm_mtx
        rep->r_mtx
  * rep->r_mtx : Protects the fields in an nfsreq.
  */
 
 static int
 nfs_lock(struct vop_lock1_args *ap)
 {
 	struct vnode *vp;
 	struct nfsnode *np;
 	u_quad_t nsize;
 	int error, lktype;
 	bool onfault;
 
 	vp = ap->a_vp;
 	lktype = ap->a_flags & LK_TYPE_MASK;
 	error = VOP_LOCK1_APV(&default_vnodeops, ap);
 	if (error != 0 || vp->v_op != &newnfs_vnodeops)
 		return (error);
 	np = VTONFS(vp);
 	if (np == NULL)
 		return (0);
 	NFSLOCKNODE(np);
 	if ((np->n_flag & NVNSETSZSKIP) == 0 || (lktype != LK_SHARED &&
 	    lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE &&
 	    lktype != LK_TRYUPGRADE)) {
 		NFSUNLOCKNODE(np);
 		return (0);
 	}
 	onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT &&
 	    (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE &&
 	    (lktype == LK_SHARED || lktype == LK_EXCLUSIVE);
 	if (onfault && vp->v_vnlock->lk_recurse == 0) {
 		/*
 		 * Force retry in vm_fault(), to make the lock request
 		 * sleepable, which allows us to piggy-back the
 		 * sleepable call to vnode_pager_setsize().
 		 */
 		NFSUNLOCKNODE(np);
 		VOP_UNLOCK(vp);
 		return (EBUSY);
 	}
 	if ((ap->a_flags & LK_NOWAIT) != 0 ||
 	    (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) {
 		NFSUNLOCKNODE(np);
 		return (0);
 	}
 	if (lktype == LK_SHARED) {
 		NFSUNLOCKNODE(np);
 		VOP_UNLOCK(vp);
 		ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK);
 		ap->a_flags |= LK_EXCLUSIVE;
 		error = VOP_LOCK1_APV(&default_vnodeops, ap);
 		if (error != 0 || vp->v_op != &newnfs_vnodeops)
 			return (error);
 		if (vp->v_data == NULL)
 			goto downgrade;
 		MPASS(vp->v_data == np);
 		NFSLOCKNODE(np);
 		if ((np->n_flag & NVNSETSZSKIP) == 0) {
 			NFSUNLOCKNODE(np);
 			goto downgrade;
 		}
 	}
 	np->n_flag &= ~NVNSETSZSKIP;
 	nsize = np->n_size;
 	NFSUNLOCKNODE(np);
 	vnode_pager_setsize(vp, nsize);
 downgrade:
 	if (lktype == LK_SHARED) {
 		ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK);
 		ap->a_flags |= LK_DOWNGRADE;
 		(void)VOP_LOCK1_APV(&default_vnodeops, ap);
 	}
 	return (0);
 }
 
 static int
 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td,
     struct ucred *cred, u_int32_t *retmode)
 {
 	int error = 0, attrflag, i, lrupos;
 	u_int32_t rmode;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsvattr nfsva;
 
 	error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag,
 	    &rmode);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 	if (!error) {
 		lrupos = 0;
 		NFSLOCKNODE(np);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
 			if (np->n_accesscache[i].uid == cred->cr_uid) {
 				np->n_accesscache[i].mode = rmode;
 				np->n_accesscache[i].stamp = time_second;
 				break;
 			}
 			if (i > 0 && np->n_accesscache[i].stamp <
 			    np->n_accesscache[lrupos].stamp)
 				lrupos = i;
 		}
 		if (i == NFS_ACCESSCACHESIZE) {
 			np->n_accesscache[lrupos].uid = cred->cr_uid;
 			np->n_accesscache[lrupos].mode = rmode;
 			np->n_accesscache[lrupos].stamp = time_second;
 		}
 		NFSUNLOCKNODE(np);
 		if (retmode != NULL)
 			*retmode = rmode;
 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 #ifdef KDTRACE_HOOKS
 	if (error != 0)
 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
 		    error);
 #endif
 	return (error);
 }
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int error = 0, i, gotahit;
 	u_int32_t mode, wmode, rmode;
 	int v34 = NFS_ISV34(vp);
 	struct nfsnode *np = VTONFS(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS |
 	    VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL |
 	    VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * For nfs v3 or v4, check to see if we have done this recently, and if
 	 * so return our cached result instead of making an ACCESS call.
 	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v34) {
 		if (ap->a_accmode & VREAD)
 			mode = NFSACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type != VDIR) {
 			if (ap->a_accmode & VWRITE)
 				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
 			if (ap->a_accmode & VAPPEND)
 				mode |= NFSACCESS_EXTEND;
 			if (ap->a_accmode & VEXEC)
 				mode |= NFSACCESS_EXECUTE;
 			if (ap->a_accmode & VDELETE)
 				mode |= NFSACCESS_DELETE;
 		} else {
 			if (ap->a_accmode & VWRITE)
 				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
 			if (ap->a_accmode & VAPPEND)
 				mode |= NFSACCESS_EXTEND;
 			if (ap->a_accmode & VEXEC)
 				mode |= NFSACCESS_LOOKUP;
 			if (ap->a_accmode & VDELETE)
 				mode |= NFSACCESS_DELETE;
 			if (ap->a_accmode & VDELETE_CHILD)
 				mode |= NFSACCESS_MODIFY;
 		}
 		/* XXX safety belt, only make blanket request if caching */
 		if (nfsaccess_cache_timeout > 0) {
 			wmode = NFSACCESS_READ | NFSACCESS_MODIFY |
 				NFSACCESS_EXTEND | NFSACCESS_EXECUTE |
 				NFSACCESS_DELETE | NFSACCESS_LOOKUP;
 		} else {
 			wmode = mode;
 		}
 
 		/*
 		 * Does our cached result allow us to give a definite yes to
 		 * this request?
 		 */
 		gotahit = 0;
 		NFSLOCKNODE(np);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
 			if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
 			    if (time_second < (np->n_accesscache[i].stamp
 				+ nfsaccess_cache_timeout) &&
 				(np->n_accesscache[i].mode & mode) == mode) {
 				NFSINCRGLOBAL(nfsstatsv1.accesscache_hits);
 				gotahit = 1;
 			    }
 			    break;
 			}
 		}
 		NFSUNLOCKNODE(np);
 #ifdef KDTRACE_HOOKS
 		if (gotahit != 0)
 			KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
 			    ap->a_cred->cr_uid, mode);
 		else
 			KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
 			    ap->a_cred->cr_uid, mode);
 #endif
 		if (gotahit == 0) {
 			/*
 			 * Either a no, or a don't know.  Go to the wire.
 			 */
 			NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
 		        error = nfs34_access_otw(vp, wmode, ap->a_td,
 			    ap->a_cred, &rmode);
 			if (!error &&
 			    (rmode & mode) != mode)
 				error = EACCES;
 		}
 		return (error);
 	} else {
 		if ((error = nfsspec_access(ap)) != 0) {
 			return (error);
 		}
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		NFSLOCKNODE(np);
 		if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			NFSUNLOCKNODE(np);
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_td = ap->a_td;
 
 			if (vp->v_type == VREG)
 				error = ncl_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = ncl_readdirrpc(vp, &auio, ap->a_cred,
 				    ap->a_td);
 				free(bp, M_TEMP);
 			} else if (vp->v_type == VLNK)
 				error = ncl_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		} else
 			NFSUNLOCKNODE(np);
 		return (error);
 	}
 }
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	int error;
 	int fmode = ap->a_mode;
 	struct ucred *cred;
 	vm_object_t obj;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
 		return (EOPNOTSUPP);
 
 	/*
 	 * For NFSv4, we need to do the Open Op before cache validation,
 	 * so that we conform to RFC3530 Sec. 9.3.1.
 	 */
 	if (NFS_ISV4(vp)) {
 		error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td);
 		if (error) {
 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
 			    (gid_t)0);
 			return (error);
 		}
 	}
 
 	/*
 	 * Now, if this Open will be doing reading, re-validate/flush the
 	 * cache, so that Close/Open coherency is maintained.
 	 */
 	NFSLOCKNODE(np);
 	if (np->n_flag & NMODIFIED) {
 		NFSUNLOCKNODE(np);
 		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 			if (VN_IS_DOOMED(vp))
 				return (EBADF);
 		}
 		error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 		if (error == EINTR || error == EIO) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		NFSLOCKNODE(np);
 		np->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 		if (vp->v_type == VDIR)
 			np->n_direofoffset = 0;
 		NFSUNLOCKNODE(np);
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		NFSLOCKNODE(np);
 		np->n_mtime = vattr.va_mtime;
 		if (NFS_ISV4(vp))
 			np->n_change = vattr.va_filerev;
 	} else {
 		NFSUNLOCKNODE(np);
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		NFSLOCKNODE(np);
 		if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) ||
 		    NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			NFSUNLOCKNODE(np);
 			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 				NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 				if (VN_IS_DOOMED(vp))
 					return (EBADF);
 			}
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 			if (error == EINTR || error == EIO) {
 				if (NFS_ISV4(vp))
 					(void) nfsrpc_close(vp, 0, ap->a_td);
 				return (error);
 			}
 			NFSLOCKNODE(np);
 			np->n_mtime = vattr.va_mtime;
 			if (NFS_ISV4(vp))
 				np->n_change = vattr.va_filerev;
 		}
 	}
 
 	/*
 	 * If the object has >= 1 O_DIRECT active opens, we disable caching.
 	 */
 	if (newnfs_directio_enable && (fmode & O_DIRECT) &&
 	    (vp->v_type == VREG)) {
 		if (np->n_directio_opens == 0) {
 			NFSUNLOCKNODE(np);
 			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 				NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 				if (VN_IS_DOOMED(vp))
 					return (EBADF);
 			}
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 			if (error) {
 				if (NFS_ISV4(vp))
 					(void) nfsrpc_close(vp, 0, ap->a_td);
 				return (error);
 			}
 			NFSLOCKNODE(np);
 			np->n_flag |= NNONCACHE;
 		}
 		np->n_directio_opens++;
 	}
 
 	/* If opened for writing via NFSv4.1 or later, mark that for pNFS. */
 	if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0)
 		np->n_flag |= NWRITEOPENED;
 
 	/*
 	 * If this is an open for writing, capture a reference to the
 	 * credentials, so they can be used by ncl_putpages(). Using
 	 * these write credentials is preferable to the credentials of
 	 * whatever thread happens to be doing the VOP_PUTPAGES() since
 	 * the write RPCs are less likely to fail with EACCES.
 	 */
 	if ((fmode & FWRITE) != 0) {
 		cred = np->n_writecred;
 		np->n_writecred = crhold(ap->a_cred);
 	} else
 		cred = NULL;
 	NFSUNLOCKNODE(np);
 
 	if (cred != NULL)
 		crfree(cred);
 	vnode_create_vobject(vp, vattr.va_size, ap->a_td);
 
 	/*
 	 * If the text file has been mmap'd, flush any dirty pages to the
 	 * buffer cache and then...
 	 * Make sure all writes are pushed to the NFS server.  If this is not
 	 * done, the modify time of the file can change while the text
 	 * file is being executed.  This will cause the process that is
 	 * executing the text file to be terminated.
 	 */
 	if (vp->v_writecount <= -1) {
 		if ((obj = vp->v_object) != NULL &&
 		    vm_object_mightbedirty(obj)) {
 			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 				NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 				if (VN_IS_DOOMED(vp))
 					return (EBADF);
 			}
 			VM_OBJECT_WLOCK(obj);
 			vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
 			VM_OBJECT_WUNLOCK(obj);
 		}
 
 		/* Now, flush the buffer cache. */
 		ncl_flush(vp, MNT_WAIT, curthread, 0, 0);
 
 		/* And, finally, make sure that n_mtime is up to date. */
 		np = VTONFS(vp);
 		NFSLOCKNODE(np);
 		np->n_mtime = np->n_vattr.na_mtime;
 		NFSUNLOCKNODE(np);
 	}
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to ncl_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NFS Version 4 - flush the dirty buffers and commit them, if
  *		       nfscl_mustflush() says this is necessary.
  *                     It is necessary if there is no write delegation held,
  *                     in order to satisfy open/close coherency.
  *                     If the file isn't cached on local stable storage,
  *                     it may be necessary in order to detect "out of space"
  *                     errors from the server, if the write delegation
  *                     issued by the server doesn't allow the file to grow.
  */
 /* ARGSUSED */
 static int
 nfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsvattr nfsva;
 	struct ucred *cred;
 	int error = 0, ret, localcred = 0;
 	int fmode = ap->a_fflag;
 
 	if (NFSCL_FORCEDISM(vp->v_mount))
 		return (0);
 	/*
 	 * During shutdown, a_cred isn't valid, so just use root.
 	 */
 	if (ap->a_cred == NOCRED) {
 		cred = newnfs_getcred();
 		localcred = 1;
 	} else {
 		cred = ap->a_cred;
 	}
 	if (vp->v_type == VREG) {
 	    /*
 	     * Examine and clean dirty pages, regardless of NMODIFIED.
 	     * This closes a major hole in close-to-open consistency.
 	     * We want to push out all dirty pages (and buffers) on
 	     * close, regardless of whether they were dirtied by
 	     * mmap'ed writes or via write().
 	     */
 	    if (nfs_clean_pages_on_close && vp->v_object) {
 		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 			if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK)
 				return (EBADF);
 		}
 		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(vp->v_object);
 	    }
 	    NFSLOCKNODE(np);
 	    if (np->n_flag & NMODIFIED) {
 		NFSUNLOCKNODE(np);
 		if (NFS_ISV3(vp)) {
 		    /*
 		     * Under NFSv3 we have dirty buffers to dispose of.  We
 		     * must flush them to the NFS server.  We have the option
 		     * of waiting all the way through the commit rpc or just
 		     * waiting for the initial write.  The default is to only
 		     * wait through the initial write so the data is in the
 		     * server's cache, which is roughly similar to the state
 		     * a standard disk subsystem leaves the file in on close().
 		     *
 		     * We cannot clear the NMODIFIED bit in np->n_flag due to
 		     * potential races with other processes, and certainly
 		     * cannot clear it if we don't commit.
 		     * These races occur when there is no longer the old
 		     * traditional vnode locking implemented for Vnode Ops.
 		     */
 		    int cm = newnfs_commit_on_close ? 1 : 0;
 		    if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			    NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 			    if (VN_IS_DOOMED(vp) && ap->a_fflag != FNONBLOCK)
 				    return (EBADF);
 		    }
 		    error = ncl_flush(vp, MNT_WAIT, ap->a_td, cm, 0);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else if (NFS_ISV4(vp)) { 
 			if (nfscl_mustflush(vp) != 0) {
 				int cm = newnfs_commit_on_close ? 1 : 0;
 				if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 					NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 					if (VN_IS_DOOMED(vp) && ap->a_fflag !=
 					    FNONBLOCK)
 						return (EBADF);
 				}
 				error = ncl_flush(vp, MNT_WAIT, ap->a_td,
 				    cm, 0);
 				/*
 				 * as above w.r.t races when clearing
 				 * NMODIFIED.
 				 * np->n_flag &= ~NMODIFIED;
 				 */
 			}
 		} else {
 			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 				NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 				if (VN_IS_DOOMED(vp) && ap->a_fflag !=
 				    FNONBLOCK)
 					return (EBADF);
 			}
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 		}
 		NFSLOCKNODE(np);
 	    }
  	    /* 
  	     * Invalidate the attribute cache in all cases.
  	     * An open is going to fetch fresh attrs any way, other procs
  	     * on this node that have file open will be forced to do an 
  	     * otw attr fetch, but this is safe.
 	     * --> A user found that their RPC count dropped by 20% when
 	     *     this was commented out and I can't see any requirement
 	     *     for it, so I've disabled it when negative lookups are
 	     *     enabled. (What does this have to do with negative lookup
 	     *     caching? Well nothing, except it was reported by the
 	     *     same user that needed negative lookup caching and I wanted
 	     *     there to be a way to disable it to see if it
 	     *     is the cause of some caching/coherency issue that might
 	     *     crop up.)
  	     */
 	    if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) {
 		    np->n_attrstamp = 0;
 		    KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	    NFSUNLOCKNODE(np);
 	}
 
 	if (NFS_ISV4(vp)) {
 		/*
 		 * Get attributes so "change" is up to date.
 		 */
 		if (error == 0 && nfscl_mustflush(vp) != 0 &&
 		    vp->v_type == VREG &&
 		    (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) {
 			ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva);
 			if (!ret) {
 				np->n_change = nfsva.na_filerev;
 				(void) nfscl_loadattrcache(&vp, &nfsva, NULL,
 				    0, 0);
 			}
 		}
 
 		/*
 		 * and do the close.
 		 */
 		ret = nfsrpc_close(vp, 0, ap->a_td);
 		if (!error && ret)
 			error = ret;
 		if (error)
 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
 			    (gid_t)0);
 	}
 	if (newnfs_directio_enable)
 		KASSERT((np->n_directio_asyncwr == 0),
 			("nfs_close: dirty unflushed (%d) directio buffers\n",
 			 np->n_directio_asyncwr));
 	if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
 		NFSLOCKNODE(np);
 		KASSERT((np->n_directio_opens > 0), 
 			("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
 		np->n_directio_opens--;
 		if (np->n_directio_opens == 0)
 			np->n_flag &= ~NNONCACHE;
 		NFSUNLOCKNODE(np);
 	}
 	if (localcred)
 		NFSFREECRED(cred);
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;	/* XXX */
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct nfsvattr nfsva;
 	struct vattr *vap = ap->a_vap;
 	struct vattr vattr;
 
 	/*
 	 * Update local times for special files.
 	 */
 	NFSLOCKNODE(np);
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	NFSUNLOCKNODE(np);
 	/*
 	 * First look in the cache.
 	 */
 	if (ncl_getattrcache(vp, &vattr) == 0) {
 		ncl_copy_vattr(vap, &vattr);
 
 		/*
 		 * Get the local modify time for the case of a write
 		 * delegation.
 		 */
 		nfscl_deleggetmodtime(vp, &vap->va_mtime);
 		return (0);
 	}
 
 	if (NFS_ISV34(vp) && nfs_prime_access_cache &&
 	    nfsaccess_cache_timeout > 0) {
 		NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
 		nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL);
 		if (ncl_getattrcache(vp, ap->a_vap) == 0) {
 			nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime);
 			return (0);
 		}
 	}
 	error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva);
 	if (!error)
 		error = nfscl_loadattrcache(&vp, &nfsva, vap, 0, 0);
 	if (!error) {
 		/*
 		 * Get the local modify time for the case of a write
 		 * delegation.
 		 */
 		nfscl_deleggetmodtime(vp, &vap->va_mtime);
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct thread *td = curthread;	/* XXX */
 	struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 	struct timespec ts;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 
 	/*
 	 * Setting of flags and marking of atimes are not supported.
 	 */
 	if (vap->va_flags != VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL ||
 	    vap->va_birthtime.tv_sec != VNOVAL ||
 	    vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_birthtime.tv_sec == VNOVAL &&
 			    vap->va_mode == (mode_t)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);		
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			/*
 			 *  We run vnode_pager_setsize() early (why?),
 			 * we must set np->n_size now to avoid vinvalbuf
 			 * V_SAVE races that might setsize a lower
 			 * value.
 			 */
 			NFSLOCKNODE(np);
 			tsize = np->n_size;
 			NFSUNLOCKNODE(np);
 			error = ncl_meta_setsize(vp, td, vap->va_size);
 			NFSLOCKNODE(np);
  			if (np->n_flag & NMODIFIED) {
 			    tsize = np->n_size;
 			    NFSUNLOCKNODE(np);
 			    error = ncl_vinvalbuf(vp, vap->va_size == 0 ?
 			        0 : V_SAVE, td, 1);
 			    if (error != 0) {
 				    vnode_pager_setsize(vp, tsize);
 				    return (error);
 			    }
 			    /*
 			     * Call nfscl_delegmodtime() to set the modify time
 			     * locally, as required.
 			     */
 			    nfscl_delegmodtime(vp);
  			} else
 			    NFSUNLOCKNODE(np);
 			/*
 			 * np->n_size has already been set to vap->va_size
 			 * in ncl_meta_setsize(). We must set it again since
 			 * nfs_loadattrcache() could be called through
 			 * ncl_meta_setsize() and could modify np->n_size.
 			 */
 			NFSLOCKNODE(np);
  			np->n_vattr.na_size = np->n_size = vap->va_size;
 			NFSUNLOCKNODE(np);
   		}
   	} else {
 		NFSLOCKNODE(np);
 		if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 
 		    (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
 			NFSUNLOCKNODE(np);
 			error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
 			if (error == EINTR || error == EIO)
 				return (error);
 		} else
 			NFSUNLOCKNODE(np);
 	}
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
 	if (vap->va_size != VNOVAL) {
 		if (error == 0) {
 			nanouptime(&ts);
 			NFSLOCKNODE(np);
 			np->n_localmodtime = ts;
 			NFSUNLOCKNODE(np);
 		} else {
 			NFSLOCKNODE(np);
 			np->n_size = np->n_vattr.na_size = tsize;
 			vnode_pager_setsize(vp, tsize);
 			NFSUNLOCKNODE(np);
 		}
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsnode *np = VTONFS(vp);
 	int error, ret, attrflag, i;
 	struct nfsvattr nfsva;
 
 	if (NFS_ISV34(vp)) {
 		NFSLOCKNODE(np);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
 			np->n_accesscache[i].stamp = 0;
 		np->n_flag |= NDELEGMOD;
 		NFSUNLOCKNODE(np);
 		KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
 	}
 	error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid);
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(struct vop_lookup_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct mount *mp = dvp->v_mount;
 	int flags = cnp->cn_flags;
 	struct vnode *newvp;
 	struct nfsmount *nmp;
 	struct nfsnode *np, *newnp;
 	int error = 0, attrflag, dattrflag, ltype, ncticks;
 	struct thread *td = curthread;
 	struct nfsfh *nfhp;
 	struct nfsvattr dnfsva, nfsva;
 	struct vattr vattr;
 	struct timespec nctime, ts;
 	uint32_t openmode;
 
 	*vpp = NULLVP;
 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	nmp = VFSTONFS(mp);
 	np = VTONFS(dvp);
 
 	/* For NFSv4, wait until any remove is done. */
 	NFSLOCKNODE(np);
 	while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) {
 		np->n_flag |= NREMOVEWANT;
 		(void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0);
 	}
 	NFSUNLOCKNODE(np);
 
 	error = vn_dir_check_exec(dvp, cnp);
 	if (error != 0)
 		return (error);
 	error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
 	if (error > 0 && error != ENOENT)
 		return (error);
 	if (error == -1) {
 		/*
 		 * Lookups of "." are special and always return the
 		 * current directory.  cache_lookup() already handles
 		 * associated locking bookkeeping, etc.
 		 */
 		if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
-			/* XXX: Is this really correct? */
-			if (cnp->cn_nameiop != LOOKUP &&
-			    (flags & ISLASTCN))
-				cnp->cn_flags |= SAVENAME;
 			return (0);
 		}
 
 		/*
 		 * We only accept a positive hit in the cache if the
 		 * change time of the file matches our cached copy.
 		 * Otherwise, we discard the cache entry and fallback
 		 * to doing a lookup RPC.  We also only trust cache
 		 * entries for less than nm_nametimeo seconds.
 		 *
 		 * To better handle stale file handles and attributes,
 		 * clear the attribute cache of this node if it is a
 		 * leaf component, part of an open() call, and not
 		 * locally modified before fetching the attributes.
 		 * This should allow stale file handles to be detected
 		 * here where we can fall back to a LOOKUP RPC to
 		 * recover rather than having nfs_open() detect the
 		 * stale file handle and failing open(2) with ESTALE.
 		 */
 		newvp = *vpp;
 		newnp = VTONFS(newvp);
 		if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
 		    (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 		    !(newnp->n_flag & NMODIFIED)) {
 			NFSLOCKNODE(newnp);
 			newnp->n_attrstamp = 0;
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 			NFSUNLOCKNODE(newnp);
 		}
 		if (nfscl_nodeleg(newvp, 0) == 0 ||
 		    ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
 		    VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
 		    timespeccmp(&vattr.va_ctime, &nctime, ==))) {
 			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
-			if (cnp->cn_nameiop != LOOKUP &&
-			    (flags & ISLASTCN))
-				cnp->cn_flags |= SAVENAME;
 			return (0);
 		}
 		cache_purge(newvp);
 		if (dvp != newvp)
 			vput(newvp);
 		else 
 			vrele(newvp);
 		*vpp = NULLVP;
 	} else if (error == ENOENT) {
 		if (VN_IS_DOOMED(dvp))
 			return (ENOENT);
 		/*
 		 * We only accept a negative hit in the cache if the
 		 * modification time of the parent directory matches
 		 * the cached copy in the name cache entry.
 		 * Otherwise, we discard all of the negative cache
 		 * entries for this directory.  We also only trust
 		 * negative cache entries for up to nm_negnametimeo
 		 * seconds.
 		 */
 		if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
 		    VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
 		    timespeccmp(&vattr.va_mtime, &nctime, ==)) {
 			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
 			return (ENOENT);
 		}
 		cache_purge_negative(dvp);
 	}
 
 	openmode = 0;
 	/*
 	 * If this an NFSv4.1/4.2 mount using the "oneopenown" mount
 	 * option, it is possible to do the Open operation in the same
 	 * compound as Lookup, so long as delegations are not being
 	 * issued.  This saves doing a separate RPC for Open.
 	 * For pnfs, do not do this, since the Open+LayoutGet will
 	 * be needed as a separate RPC.
 	 */
 	NFSLOCKMNT(nmp);
 	if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp) && !NFSHASPNFS(nmp) &&
 	    (nmp->nm_privflag & NFSMNTP_DELEGISSUED) == 0 &&
 	    (!NFSMNT_RDONLY(mp) || (flags & OPENWRITE) == 0) &&
 	    (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN)) {
 		if ((flags & OPENREAD) != 0)
 			openmode |= NFSV4OPEN_ACCESSREAD;
 		if ((flags & OPENWRITE) != 0)
 			openmode |= NFSV4OPEN_ACCESSWRITE;
 	}
 	NFSUNLOCKMNT(nmp);
 
 	newvp = NULLVP;
 	NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses);
 	nanouptime(&ts);
 	error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 	    openmode);
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	if (error) {
 		if (newvp != NULLVP) {
 			vput(newvp);
 			*vpp = NULLVP;
 		}
 
 		if (error != ENOENT) {
 			if (NFS_ISV4(dvp))
 				error = nfscl_maperr(td, error, (uid_t)0,
 				    (gid_t)0);
 			return (error);
 		}
 
 		/* The requested file was not found. */
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN)) {
 			/*
 			 * XXX: UFS does a full VOP_ACCESS(dvp,
 			 * VWRITE) here instead of just checking
 			 * MNT_RDONLY.
 			 */
 			if (mp->mnt_flag & MNT_RDONLY)
 				return (EROFS);
-			cnp->cn_flags |= SAVENAME;
 			return (EJUSTRETURN);
 		}
 
 		if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) {
 			/*
 			 * Cache the modification time of the parent
 			 * directory from the post-op attributes in
 			 * the name cache entry.  The negative cache
 			 * entry will be ignored once the directory
 			 * has changed.  Don't bother adding the entry
 			 * if the directory has already changed.
 			 */
 			NFSLOCKNODE(np);
 			if (timespeccmp(&np->n_vattr.na_mtime,
 			    &dnfsva.na_mtime, ==)) {
 				NFSUNLOCKNODE(np);
 				cache_enter_time(dvp, NULL, cnp,
 				    &dnfsva.na_mtime, NULL);
 			} else
 				NFSUNLOCKNODE(np);
 		}
 		return (ENOENT);
 	}
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 			free(nfhp, M_NFSFH);
 			return (EISDIR);
 		}
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, LK_EXCLUSIVE);
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
 		/*
 		 * If n_localmodtime >= time before RPC, then
 		 * a file modification operation, such as
 		 * VOP_SETATTR() of size, has occurred while
 		 * the Lookup RPC and acquisition of the vnode
 		 * happened.  As such, the attributes might
 		 * be stale, with possibly an incorrect size.
 		 */
 		NFSLOCKNODE(np);
 		if (timespecisset(&np->n_localmodtime) &&
 		    timespeccmp(&np->n_localmodtime, &ts, >=)) {
 			NFSCL_DEBUG(4, "nfs_lookup: rename localmod "
 			    "stale attributes\n");
 			attrflag = 0;
 		}
 		NFSUNLOCKNODE(np);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
 		*vpp = newvp;
-		cnp->cn_flags |= SAVENAME;
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		ltype = NFSVOPISLOCKED(dvp);
 		error = vfs_busy(mp, MBF_NOWAIT);
 		if (error != 0) {
 			vfs_ref(mp);
 			NFSVOPUNLOCK(dvp);
 			error = vfs_busy(mp, 0);
 			NFSVOPLOCK(dvp, ltype | LK_RETRY);
 			vfs_rel(mp);
 			if (error == 0 && VN_IS_DOOMED(dvp)) {
 				vfs_unbusy(mp);
 				error = ENOENT;
 			}
 			if (error != 0)
 				return (error);
 		}
 		NFSVOPUNLOCK(dvp);
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np,
 		    cnp->cn_lkflags);
 		if (error == 0)
 			newvp = NFSTOV(np);
 		vfs_unbusy(mp);
 		if (newvp != dvp)
 			NFSVOPLOCK(dvp, ltype | LK_RETRY);
 		if (VN_IS_DOOMED(dvp)) {
 			if (error == 0) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 			error = ENOENT;
 		}
 		if (error != 0)
 			return (error);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
 	} else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 		free(nfhp, M_NFSFH);
 		VREF(dvp);
 		newvp = dvp;
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
 	} else {
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np,
 		    cnp->cn_lkflags);
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
 		/*
 		 * If n_localmodtime >= time before RPC, then
 		 * a file modification operation, such as
 		 * VOP_SETATTR() of size, has occurred while
 		 * the Lookup RPC and acquisition of the vnode
 		 * happened.  As such, the attributes might
 		 * be stale, with possibly an incorrect size.
 		 */
 		NFSLOCKNODE(np);
 		if (timespecisset(&np->n_localmodtime) &&
 		    timespeccmp(&np->n_localmodtime, &ts, >=)) {
 			NFSCL_DEBUG(4, "nfs_lookup: localmod "
 			    "stale attributes\n");
 			attrflag = 0;
 		}
 		NFSUNLOCKNODE(np);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
 		else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 		    !(np->n_flag & NMODIFIED)) {			
 			/*
 			 * Flush the attribute cache when opening a
 			 * leaf node to ensure that fresh attributes
 			 * are fetched in nfs_open() since we did not
 			 * fetch attributes from the LOOKUP reply.
 			 */
 			NFSLOCKNODE(np);
 			np->n_attrstamp = 0;
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 			NFSUNLOCKNODE(np);
 		}
 	}
-	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
-		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) && dvp != newvp &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
 	    attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
 		cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 		    newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime);
 	*vpp = newvp;
 	return (0);
 }
 
 /*
  * nfs read call.
  * Just call ncl_bioread() to do the work.
  */
 static int
 nfs_read(struct vop_read_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	switch (vp->v_type) {
 	case VREG:
 		return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 	case VDIR:
 		return (EISDIR);
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(struct vop_readlink_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EINVAL);
 	return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Do a readlink rpc.
  * Called by ncl_doio() from below the buffer cache.
  */
 int
 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 {
 	int error, ret, attrflag;
 	struct nfsvattr nfsva;
 
 	error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva,
 	    &attrflag);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 {
 	int error, ret, attrflag;
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(vp->v_mount);
 	error = EIO;
 	attrflag = 0;
 	if (NFSHASPNFS(nmp))
 		error = nfscl_doiods(vp, uiop, NULL, NULL,
 		    NFSV4OPEN_ACCESSREAD, 0, cred, uiop->uio_td);
 	NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error);
 	if (error != 0)
 		error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva,
 		    &attrflag);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     int *iomode, int *must_commit, int called_from_strategy, int ioflag)
 {
 	struct nfsvattr nfsva;
 	int error, attrflag, ret;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(vp->v_mount);
 	error = EIO;
 	attrflag = 0;
 	if (NFSHASPNFS(nmp))
 		error = nfscl_doiods(vp, uiop, iomode, must_commit,
 		    NFSV4OPEN_ACCESSWRITE, 0, cred, uiop->uio_td);
 	NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error);
 	if (error != 0)
 		error = nfsrpc_write(vp, uiop, iomode, must_commit, cred,
 		    uiop->uio_td, &nfsva, &attrflag, called_from_strategy,
 		    ioflag);
 	if (attrflag) {
 		if (VTONFS(vp)->n_flag & ND_NFSV4)
 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 1, 1);
 		else
 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (DOINGASYNC(vp))
 		*iomode = NFSWRITE_FILESYNC;
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct vattr *vap)
 {
 	struct nfsvattr nfsva, dnfsva;
 	struct vnode *newvp = NULL;
 	struct nfsnode *np = NULL, *dnp;
 	struct nfsfh *nfhp;
 	struct vattr vattr;
 	int error = 0, attrflag, dattrflag;
 	u_int32_t rdev;
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = vap->va_rdev;
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = 0xffffffff;
 	else
 		return (EOPNOTSUPP);
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 		return (error);
 	error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap,
 	    rdev, vap->va_type, cnp->cn_cred, curthread, &dnfsva,
 	    &nfsva, &nfhp, &attrflag, &dattrflag);
 	if (!error) {
 		if (!nfhp)
 			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, curthread,
 			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 0);
 		if (nfhp)
 			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 			    curthread, &np, LK_EXCLUSIVE);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	if (!error) {
 		newvp = NFSTOV(np);
 		if (attrflag != 0) {
 			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
 			if (error != 0)
 				vput(newvp);
 		}
 	}
 	if (!error) {
 		*vpp = newvp;
 	} else if (NFS_ISV4(dvp)) {
 		error = nfscl_maperr(curthread, error, vap->va_uid,
 		    vap->va_gid);
 	}
 	dnp = VTONFS(dvp);
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	NFSUNLOCKNODE(dnp);
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(struct vop_mknod_args *ap)
 {
 	return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
 }
 
 static struct mtx nfs_cverf_mtx;
 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex",
     MTX_DEF);
 
 static nfsquad_t
 nfs_get_cverf(void)
 {
 	static nfsquad_t cverf;
 	nfsquad_t ret;
 	static int cverf_initialized = 0;
 
 	mtx_lock(&nfs_cverf_mtx);
 	if (cverf_initialized == 0) {
 		cverf.lval[0] = arc4random();
 		cverf.lval[1] = arc4random();
 		cverf_initialized = 1;
 	} else
 		cverf.qval++;
 	ret = cverf;
 	mtx_unlock(&nfs_cverf_mtx);
 
 	return (ret);
 }
 
 /*
  * nfs file create call
  */
 static int
 nfs_create(struct vop_create_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	struct nfsmount *nmp;
 	struct nfsvattr dnfsva, nfsva;
 	struct nfsfh *nfhp;
 	nfsquad_t cverf;
 	int error = 0, attrflag, dattrflag, fmode = 0;
 	struct vattr vattr;
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 		return (error);
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 	dnp = VTONFS(dvp);
 	nmp = VFSTONFS(dvp->v_mount);
 again:
 	/* For NFSv4, wait until any remove is done. */
 	NFSLOCKNODE(dnp);
 	while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) {
 		dnp->n_flag |= NREMOVEWANT;
 		(void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0);
 	}
 	NFSUNLOCKNODE(dnp);
 
 	cverf = nfs_get_cverf();
 	error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    vap, cverf, fmode, cnp->cn_cred, curthread, &dnfsva, &nfsva,
 	    &nfhp, &attrflag, &dattrflag);
 	if (!error) {
 		if (nfhp == NULL)
 			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, curthread,
 			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag, 0);
 		if (nfhp != NULL)
 			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 			    curthread, &np, LK_EXCLUSIVE);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	if (!error) {
 		newvp = NFSTOV(np);
 		if (attrflag == 0)
 			error = nfsrpc_getattr(newvp, cnp->cn_cred, curthread,
 			    &nfsva);
 		if (error == 0)
 			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
 	}
 	if (error) {
 		if (newvp != NULL) {
 			vput(newvp);
 			newvp = NULL;
 		}
 		if (NFS_ISV34(dvp) && (fmode & O_EXCL) &&
 		    error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 	} else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) {
 		if (nfscl_checksattr(vap, &nfsva)) {
 			error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred,
 			    curthread, &nfsva, &attrflag);
 			if (error && (vap->va_uid != (uid_t)VNOVAL ||
 			    vap->va_gid != (gid_t)VNOVAL)) {
 				/* try again without setting uid/gid */
 				vap->va_uid = (uid_t)VNOVAL;
 				vap->va_gid = (uid_t)VNOVAL;
 				error = nfsrpc_setattr(newvp, vap, NULL, 
 				    cnp->cn_cred, curthread, &nfsva, &attrflag);
 			}
 			if (attrflag)
 				(void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 				    0, 1);
 			if (error != 0)
 				vput(newvp);
 		}
 	}
 	if (!error) {
 		if ((cnp->cn_flags & MAKEENTRY) && attrflag) {
 			if (dvp != newvp)
 				cache_enter_time(dvp, newvp, cnp,
 				    &nfsva.na_ctime, NULL);
 			else
 				printf("nfs_create: bogus NFS server returned "
 				    "the directory as the new file object\n");
 		}
 		*ap->a_vpp = newvp;
 	} else if (NFS_ISV4(dvp)) {
 		error = nfscl_maperr(curthread, error, vap->va_uid,
 		    vap->va_gid);
 	}
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	NFSUNLOCKNODE(dnp);
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(struct vop_remove_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
-	KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name"));
 	KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
 	if (vp->v_type == VDIR)
 		error = EPERM;
 	else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = ncl_vinvalbuf(vp, 0, curthread, 1);
 		if (error != EINTR && error != EIO)
 			/* Do the rpc */
 			error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, curthread);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	NFSLOCKNODE(np);
 	np->n_attrstamp = 0;
 	NFSUNLOCKNODE(np);
 	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 ncl_removeit(struct sillyrename *sp, struct vnode *vp)
 {
 	/*
 	 * Make sure that the directory vnode is still valid.
 	 * XXX we should lock sp->s_dvp here.
 	 */
 	if (sp->s_dvp->v_type == VBAD)
 		return (0);
 	return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
 	    sp->s_cred, NULL));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and ncl_removeit().
  */
 static int
 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
     int namelen, struct ucred *cred, struct thread *td)
 {
 	struct nfsvattr dnfsva;
 	struct nfsnode *dnp = VTONFS(dvp);
 	int error = 0, dattrflag;
 
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NREMOVEINPROG;
 	NFSUNLOCKNODE(dnp);
 	error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
 	    &dattrflag);
 	NFSLOCKNODE(dnp);
 	if ((dnp->n_flag & NREMOVEWANT)) {
 		dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
 		NFSUNLOCKNODE(dnp);
 		wakeup((caddr_t)dnp);
 	} else {
 		dnp->n_flag &= ~NREMOVEINPROG;
 		NFSUNLOCKNODE(dnp);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	NFSUNLOCKNODE(dnp);
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(struct vop_rename_args *ap)
 {
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct nfsnode *fnp = VTONFS(ap->a_fvp);
 	struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
 	struct nfsv4node *newv4 = NULL;
 	int error;
 
-	KASSERT((tcnp->cn_flags & HASBUF) != 0 &&
-	    (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name"));
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	if (fvp == tvp) {
 		printf("nfs_rename: fvp == tvp (can't happen)\n");
 		error = 0;
 		goto out;
 	}
 	if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0)
 		goto out;
 
 	/*
 	 * We have to flush B_DELWRI data prior to renaming
 	 * the file.  If we don't, the delayed-write buffers
 	 * can be flushed out later after the file has gone stale
 	 * under NFSV3.  NFSV2 does not have this problem because
 	 * ( as far as I can tell ) it flushes dirty buffers more
 	 * often.
 	 * 
 	 * Skip the rename operation if the fsync fails, this can happen
 	 * due to the server's volume being full, when we pushed out data
 	 * that was written back to our cache earlier. Not checking for
 	 * this condition can result in potential (silent) data loss.
 	 */
 	error = VOP_FSYNC(fvp, MNT_WAIT, curthread);
 	NFSVOPUNLOCK(fvp);
 	if (!error && tvp)
 		error = VOP_FSYNC(tvp, MNT_WAIT, curthread);
 	if (error)
 		goto out;
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 	    tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 	    curthread);
 
 	if (error == 0 && NFS_ISV4(tdvp)) {
 		/*
 		 * For NFSv4, check to see if it is the same name and
 		 * replace the name, if it is different.
 		 */
 		newv4 = malloc(
 		    sizeof (struct nfsv4node) +
 		    tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1,
 		    M_NFSV4NODE, M_WAITOK);
 		NFSLOCKNODE(tdnp);
 		NFSLOCKNODE(fnp);
 		if (fnp->n_v4 != NULL && fvp->v_type == VREG &&
 		    (fnp->n_v4->n4_namelen != tcnp->cn_namelen ||
 		      NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4),
 		      tcnp->cn_namelen) ||
 		      tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen ||
 		      NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 			tdnp->n_fhp->nfh_len))) {
 #ifdef notdef
 { char nnn[100]; int nnnl;
 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99;
 bcopy(tcnp->cn_nameptr, nnn, nnnl);
 nnn[nnnl] = '\0';
 printf("ren replace=%s\n",nnn);
 }
 #endif
 			free(fnp->n_v4, M_NFSV4NODE);
 			fnp->n_v4 = newv4;
 			newv4 = NULL;
 			fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len;
 			fnp->n_v4->n4_namelen = tcnp->cn_namelen;
 			NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 			    tdnp->n_fhp->nfh_len);
 			NFSBCOPY(tcnp->cn_nameptr,
 			    NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen);
 		}
 		NFSUNLOCKNODE(tdnp);
 		NFSUNLOCKNODE(fnp);
 		if (newv4 != NULL)
 			free(newv4, M_NFSV4NODE);
 	}
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
     struct sillyrename *sp)
 {
 
 	return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
 	    sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
 	    curthread));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
     int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
     int tnamelen, struct ucred *cred, struct thread *td)
 {
 	struct nfsvattr fnfsva, tnfsva;
 	struct nfsnode *fdnp = VTONFS(fdvp);
 	struct nfsnode *tdnp = VTONFS(tdvp);
 	int error = 0, fattrflag, tattrflag;
 
 	error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
 	    tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
 	    &tattrflag);
 	NFSLOCKNODE(fdnp);
 	fdnp->n_flag |= NMODIFIED;
 	if (fattrflag != 0) {
 		NFSUNLOCKNODE(fdnp);
 		(void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, 0, 1);
 	} else {
 		fdnp->n_attrstamp = 0;
 		NFSUNLOCKNODE(fdnp);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
 	}
 	NFSLOCKNODE(tdnp);
 	tdnp->n_flag |= NMODIFIED;
 	if (tattrflag != 0) {
 		NFSUNLOCKNODE(tdnp);
 		(void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, 0, 1);
 	} else {
 		tdnp->n_attrstamp = 0;
 		NFSUNLOCKNODE(tdnp);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 	}
 	if (error && NFS_ISV4(fdvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(struct vop_link_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np, *tdnp;
 	struct nfsvattr nfsva, dnfsva;
 	int error = 0, attrflag, dattrflag;
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, MNT_WAIT, curthread);
 
 	error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, curthread, &dnfsva, &nfsva, &attrflag, &dattrflag);
 	tdnp = VTONFS(tdvp);
 	NFSLOCKNODE(tdnp);
 	tdnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		NFSUNLOCKNODE(tdnp);
 		(void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, 0, 1);
 	} else {
 		tdnp->n_attrstamp = 0;
 		NFSUNLOCKNODE(tdnp);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 	}
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 	else {
 		np = VTONFS(vp);
 		NFSLOCKNODE(np);
 		np->n_attrstamp = 0;
 		NFSUNLOCKNODE(np);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	}
 	/*
 	 * If negative lookup caching is enabled, I might as well
 	 * add an entry for this node. Not necessary for correctness,
 	 * but if negative caching is enabled, then the system
 	 * must care about lookup caching hit rate, so...
 	 */
 	if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 &&
 	    (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
 		if (tdvp != vp)
 			cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL);
 		else
 			printf("nfs_link: bogus NFS server returned "
 			    "the directory as the new link\n");
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(curthread, error, (uid_t)0,
 		    (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(struct vop_symlink_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsvattr nfsva, dnfsva;
 	struct nfsfh *nfhp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	int error = 0, attrflag, dattrflag, ret;
 
 	vap->va_type = VLNK;
 	error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    ap->a_target, vap, cnp->cn_cred, curthread, &dnfsva,
 	    &nfsva, &nfhp, &attrflag, &dattrflag);
 	if (nfhp) {
 		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread,
 		    &np, LK_EXCLUSIVE);
 		if (!ret)
 			newvp = NFSTOV(np);
 		else if (!error)
 			error = ret;
 	}
 	if (newvp != NULL) {
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
 	} else if (!error) {
 		/*
 		 * If we do not have an error and we could not extract the
 		 * newvp from the response due to the request being NFSv2, we
 		 * have to do a lookup in order to obtain a newvp to return.
 		 */
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 		    cnp->cn_cred, curthread, &np);
 		if (!error)
 			newvp = NFSTOV(np);
 	}
 	if (error) {
 		if (newvp)
 			vput(newvp);
 		if (NFS_ISV4(dvp))
 			error = nfscl_maperr(curthread, error,
 			    vap->va_uid, vap->va_gid);
 	} else {
 		*ap->a_vpp = newvp;
 	}
 
 	dnp = VTONFS(dvp);
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		NFSUNLOCKNODE(dnp);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		NFSUNLOCKNODE(dnp);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	/*
 	 * If negative lookup caching is enabled, I might as well
 	 * add an entry for this node. Not necessary for correctness,
 	 * but if negative caching is enabled, then the system
 	 * must care about lookup caching hit rate, so...
 	 */
 	if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 	    (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
 		if (dvp != newvp)
 			cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 			    NULL);
 		else
 			printf("nfs_symlink: bogus NFS server returned "
 			    "the directory as the new file object\n");
 	}
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(struct vop_mkdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	struct vattr vattr;
 	struct nfsfh *nfhp;
 	struct nfsvattr nfsva, dnfsva;
 	int error = 0, attrflag, dattrflag, ret;
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
 		return (error);
 	vap->va_type = VDIR;
 	error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    vap, cnp->cn_cred, curthread, &dnfsva, &nfsva, &nfhp,
 	    &attrflag, &dattrflag);
 	dnp = VTONFS(dvp);
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		NFSUNLOCKNODE(dnp);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		NFSUNLOCKNODE(dnp);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	if (nfhp) {
 		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, curthread,
 		    &np, LK_EXCLUSIVE);
 		if (!ret) {
 			newvp = NFSTOV(np);
 			if (attrflag)
 			   (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 				0, 1);
 		} else if (!error)
 			error = ret;
 	}
 	if (!error && newvp == NULL) {
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 		    cnp->cn_cred, curthread, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vput(newvp);
 		if (NFS_ISV4(dvp))
 			error = nfscl_maperr(curthread, error,
 			    vap->va_uid, vap->va_gid);
 	} else {
 		/*
 		 * If negative lookup caching is enabled, I might as well
 		 * add an entry for this node. Not necessary for correctness,
 		 * but if negative caching is enabled, then the system
 		 * must care about lookup caching hit rate, so...
 		 */
 		if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 		    (cnp->cn_flags & MAKEENTRY) &&
 		    attrflag != 0 && dattrflag != 0) {
 			if (dvp != newvp)
 				cache_enter_time(dvp, newvp, cnp,
 				    &nfsva.na_ctime, &dnfsva.na_ctime);
 			else
 				printf("nfs_mkdir: bogus NFS server returned "
 				    "the directory that the directory was "
 				    "created in as the new file object\n");
 		}
 		*ap->a_vpp = newvp;
 	}
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *dnp;
 	struct nfsvattr dnfsva;
 	int error, dattrflag;
 
 	if (dvp == vp)
 		return (EINVAL);
 	error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, curthread, &dnfsva, &dattrflag);
 	dnp = VTONFS(dvp);
 	NFSLOCKNODE(dnp);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		NFSUNLOCKNODE(dnp);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		NFSUNLOCKNODE(dnp);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 
 	cache_purge(dvp);
 	cache_purge(vp);
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(curthread, error, (uid_t)0,
 		    (gid_t)0);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct uio *uio = ap->a_uio;
 	ssize_t tresid, left;
 	int error = 0;
 	struct vattr vattr;
 
 	if (ap->a_eofflag != NULL)
 		*ap->a_eofflag = 0;
 	if (vp->v_type != VDIR) 
 		return(EPERM);
 
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	NFSLOCKNODE(np);
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		NFSUNLOCKNODE(np);
 		if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
 			NFSLOCKNODE(np);
 			if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) ||
 			    !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 				NFSUNLOCKNODE(np);
 				NFSINCRGLOBAL(nfsstatsv1.direofcache_hits);
 				if (ap->a_eofflag != NULL)
 					*ap->a_eofflag = 1;
 				return (0);
 			} else
 				NFSUNLOCKNODE(np);
 		}
 	} else
 		NFSUNLOCKNODE(np);
 
 	/*
 	 * NFS always guarantees that directory entries don't straddle
 	 * DIRBLKSIZ boundaries.  As such, we need to limit the size
 	 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial
 	 * directory entry.
 	 */
 	left = uio->uio_resid % DIRBLKSIZ;
 	if (left == uio->uio_resid)
 		return (EINVAL);
 	uio->uio_resid -= left;
 
 	/*
 	 * Call ncl_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = ncl_bioread(vp, uio, 0, ap->a_cred);
 
 	if (!error && uio->uio_resid == tresid) {
 		NFSINCRGLOBAL(nfsstatsv1.direofcache_misses);
 		if (ap->a_eofflag != NULL)
 			*ap->a_eofflag = 1;
 	}
 
 	/* Add the partial DIRBLKSIZ (left) back in. */
 	uio->uio_resid += left;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by ncl_doio().
  */
 int
 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsvattr nfsva;
 	nfsuint64 *cookiep, cookie;
 	struct nfsnode *dnp = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, eof, attrflag;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirrpc bad uio"));
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	ncl_dircookie_lock(dnp);
 	NFSUNLOCKNODE(dnp);
 	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep) {
 		cookie = *cookiep;
 		ncl_dircookie_unlock(dnp);
 	} else {
 		ncl_dircookie_unlock(dnp);		
 		return (NFSERR_BAD_COOKIE);
 	}
 
 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 		(void)ncl_fsinfo(nmp, vp, cred, td);
 
 	error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva,
 	    &attrflag, &eof);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 
 	if (!error) {
 		/*
 		 * We are now either at the end of the directory or have filled
 		 * the block.
 		 */
 		if (eof) {
 			NFSLOCKNODE(dnp);
 			dnp->n_direofoffset = uiop->uio_offset;
 			NFSUNLOCKNODE(dnp);
 		} else {
 			if (uiop->uio_resid > 0)
 				printf("EEK! readdirrpc resid > 0\n");
 			ncl_dircookie_lock(dnp);
 			NFSUNLOCKNODE(dnp);
 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 			*cookiep = cookie;
 			ncl_dircookie_unlock(dnp);
 		}
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc().
  */
 int
 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsvattr nfsva;
 	nfsuint64 *cookiep, cookie;
 	struct nfsnode *dnp = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, attrflag, eof;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirplusrpc bad uio"));
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	ncl_dircookie_lock(dnp);
 	NFSUNLOCKNODE(dnp);
 	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep) {
 		cookie = *cookiep;
 		ncl_dircookie_unlock(dnp);
 	} else {
 		ncl_dircookie_unlock(dnp);
 		return (NFSERR_BAD_COOKIE);
 	}
 
 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 		(void)ncl_fsinfo(nmp, vp, cred, td);
 	error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva,
 	    &attrflag, &eof);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 
 	if (!error) {
 		/*
 		 * We are now either at end of the directory or have filled the
 		 * the block.
 		 */
 		if (eof) {
 			NFSLOCKNODE(dnp);
 			dnp->n_direofoffset = uiop->uio_offset;
 			NFSUNLOCKNODE(dnp);
 		} else {
 			if (uiop->uio_resid > 0)
 				printf("EEK! readdirplusrpc resid > 0\n");
 			ncl_dircookie_lock(dnp);
 			NFSUNLOCKNODE(dnp);
 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 			*cookiep = cookie;
 			ncl_dircookie_unlock(dnp);
 		}
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
 	struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 	unsigned int lticks;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 	KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir"));
 	sp = malloc(sizeof (struct sillyrename),
 	    M_NEWNFSREQ, M_WAITOK);
 	sp->s_cred = crhold(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* 
 	 * Fudge together a funny name.
 	 * Changing the format of the funny name to accommodate more 
 	 * sillynames per directory.
 	 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 
 	 * CPU ticks since boot.
 	 */
 	pid = curthread->td_proc->p_pid;
 	lticks = (unsigned int)ticks;
 	for ( ; ; ) {
 		sp->s_namlen = sprintf(sp->s_name, 
 				       ".nfs.%08x.%04x4.4", lticks, 
 				       pid);
 		if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 				 curthread, NULL))
 			break;
 		lticks++;
 	}
 	error = nfs_renameit(dvp, vp, cnp, sp);
 	if (error)
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		curthread, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free(sp, M_NEWNFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
     struct thread *td, struct nfsnode **npp)
 {
 	struct vnode *newvp = NULL, *vp;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	struct nfsfh *nfhp, *onfhp;
 	struct nfsvattr nfsva, dnfsva;
 	struct componentname cn;
 	int error = 0, attrflag, dattrflag;
 	u_int hash;
 	struct timespec ts;
 
 	nanouptime(&ts);
 	error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva,
 	    &nfhp, &attrflag, &dattrflag, 0);
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, 0, 1);
 	if (npp && !error) {
 		if (*npp != NULL) {
 		    np = *npp;
 		    vp = NFSTOV(np);
 		    /*
 		     * For NFSv4, check to see if it is the same name and
 		     * replace the name, if it is different.
 		     */
 		    if (np->n_v4 != NULL && nfsva.na_type == VREG &&
 			(np->n_v4->n4_namelen != len ||
 			 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) ||
 			 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
 			 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 			 dnp->n_fhp->nfh_len))) {
 #ifdef notdef
 { char nnn[100]; int nnnl;
 nnnl = (len < 100) ? len : 99;
 bcopy(name, nnn, nnnl);
 nnn[nnnl] = '\0';
 printf("replace=%s\n",nnn);
 }
 #endif
 			    free(np->n_v4, M_NFSV4NODE);
 			    np->n_v4 = malloc(
 				sizeof (struct nfsv4node) +
 				dnp->n_fhp->nfh_len + len - 1,
 				M_NFSV4NODE, M_WAITOK);
 			    np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
 			    np->n_v4->n4_namelen = len;
 			    NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 				dnp->n_fhp->nfh_len);
 			    NFSBCOPY(name, NFS4NODENAME(np->n_v4), len);
 		    }
 		    hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len,
 			FNV1_32_INIT);
 		    onfhp = np->n_fhp;
 		    /*
 		     * Rehash node for new file handle.
 		     */
 		    vfs_hash_rehash(vp, hash);
 		    np->n_fhp = nfhp;
 		    if (onfhp != NULL)
 			free(onfhp, M_NFSFH);
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) {
 		    free(nfhp, M_NFSFH);
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    cn.cn_nameptr = name;
 		    cn.cn_namelen = len;
 		    error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td,
 			&np, LK_EXCLUSIVE);
 		    if (error)
 			return (error);
 		    newvp = NFSTOV(np);
 		    /*
 		     * If n_localmodtime >= time before RPC, then
 		     * a file modification operation, such as
 		     * VOP_SETATTR() of size, has occurred while
 		     * the Lookup RPC and acquisition of the vnode
 		     * happened.  As such, the attributes might
 		     * be stale, with possibly an incorrect size.
 		     */
 		    NFSLOCKNODE(np);
 		    if (timespecisset(&np->n_localmodtime) &&
 			timespeccmp(&np->n_localmodtime, &ts, >=)) {
 			NFSCL_DEBUG(4, "nfs_lookitup: localmod "
 			    "stale attributes\n");
 			attrflag = 0;
 		    }
 		    NFSUNLOCKNODE(np);
 		}
 		if (!attrflag && *npp == NULL) {
 			if (newvp == dvp)
 				vrele(newvp);
 			else
 				vput(newvp);
 			return (ENOENT);
 		}
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, 0, 1);
 	}
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 		} else
 			*npp = np;
 	}
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * Nfs Version 3 and 4 commit rpc
  */
 int
 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
    struct thread *td)
 {
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *np;
 	struct uio uio;
 	int error, attrflag;
 
 	np = VTONFS(vp);
 	error = EIO;
 	attrflag = 0;
 	if (NFSHASPNFS(nmp) && (np->n_flag & NDSCOMMIT) != 0) {
 		uio.uio_offset = offset;
 		uio.uio_resid = cnt;
 		error = nfscl_doiods(vp, &uio, NULL, NULL,
 		    NFSV4OPEN_ACCESSWRITE, 1, cred, td);
 		if (error != 0) {
 			NFSLOCKNODE(np);
 			np->n_flag &= ~NDSCOMMIT;
 			NFSUNLOCKNODE(np);
 		}
 	}
 	if (error != 0) {
 		mtx_lock(&nmp->nm_mtx);
 		if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
 			mtx_unlock(&nmp->nm_mtx);
 			return (0);
 		}
 		mtx_unlock(&nmp->nm_mtx);
 		error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva,
 		    &attrflag);
 	}
 	if (attrflag != 0)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 	if (error != 0 && NFS_ISV4(vp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling ncl_asyncio(), otherwise just all ncl_doio() to do the
  * request.
  */
 static int
 nfs_strategy(struct vop_strategy_args *ap)
 {
 	struct buf *bp;
 	struct vnode *vp;
 	struct ucred *cr;
 
 	bp = ap->a_bp;
 	vp = ap->a_vp;
 	KASSERT(bp->b_vp == vp, ("missing b_getvp"));
 	KASSERT(!(bp->b_flags & B_DONE),
 	    ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 
 	if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno)
 		bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize /
 		    DEV_BSIZE);
 	if (bp->b_iocmd == BIO_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 	    ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread))
 		(void) ncl_doio(vp, bp, cr, curthread, 1);
 	return (0);
 }
 
 /*
  * fsync vnode op. Just call ncl_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(struct vop_fsync_args *ap)
 {
 
 	if (ap->a_vp->v_type != VREG) {
 		/*
 		 * For NFS, metadata is changed synchronously on the server,
 		 * so there is nothing to flush. Also, ncl_flush() clears
 		 * the NMODIFIED flag and that shouldn't be done here for
 		 * directories.
 		 */
 		return (0);
 	}
 	return (ncl_flush(ap->a_vp, ap->a_waitfor, ap->a_td, 1, 0));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  * If the called_from_renewthread argument is TRUE, it has been called
  * from the NFSv4 renew thread and, as such, cannot block indefinitely
  * waiting for a buffer write to complete.
  */
 int
 ncl_flush(struct vnode *vp, int waitfor, struct thread *td,
     int commit, int called_from_renewthread)
 {
 	struct nfsnode *np = VTONFS(vp);
 	struct buf *bp;
 	int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1, trycnt = 0;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 	struct bufobj *bo;
 #ifndef NFS_COMMITBVECSIZ
 #define	NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	u_int bvecsize = 0, bveccount;
 	struct timespec ts;
 
 	if (called_from_renewthread != 0)
 		slptimeo = hz;
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	bo = &vp->v_bufobj;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but has not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, ncl_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV34(vp) && commit) {
 		if (bvec != NULL && bvec != bvec_on_stack)
 			free(bvec, M_TEMP);
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		BO_LOCK(bo);
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (!BUF_ISLOCKED(bp) &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			/*
 			 * Release the vnode interlock to avoid a lock
 			 * order reversal.
 			 */
 			BO_UNLOCK(bo);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			BO_LOCK(bo);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bvecpos >= bvecsize)
 				break;
 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 				nbp = TAILQ_NEXT(bp, b_bobufs);
 				continue;
 			}
 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 			    (B_DELWRI | B_NEEDCOMMIT)) {
 				BUF_UNLOCK(bp);
 				nbp = TAILQ_NEXT(bp, b_bobufs);
 				continue;
 			}
 			BO_UNLOCK(bo);
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 *
 			 * NOTE: we are not clearing B_DONE here, so we have
 			 * to do it later on in this routine if we intend to
 			 * initiate I/O on the bp.
 			 *
 			 * Note: to avoid loopback deadlocks, we do not
 			 * assign b_runningbufspace.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			vfs_busy_pages(bp, 1);
 
 			BO_LOCK(bo);
 			/*
 			 * bp is protected by being locked, but nbp is not
 			 * and vfs_busy_pages() may sleep.  We have to
 			 * recalculate nbp.
 			 */
 			nbp = TAILQ_NEXT(bp, b_bobufs);
 
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		BO_UNLOCK(bo);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = ncl_commit(vp, off, (int)(endoff - off),
 					  wcred, td);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = ncl_commit(vp, off, (int)size,
 						  bp->b_wcred, td);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			ncl_clearcommit(vp->v_mount);
 
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 			if (!NFSCL_FORCEDISM(vp->v_mount) && retv) {
 				/*
 				 * Error, leave B_DELWRI intact
 				 */
 				vfs_unbusy_pages(bp);
 				brelse(bp);
 			} else {
 				/*
 				 * Success, remove B_DELWRI ( bundirty() ).
 				 *
 				 * b_dirtyoff/b_dirtyend seem to be NFS
 				 * specific.  We should probably move that
 				 * into bundirty(). XXX
 				 */
 				bufobj_wref(bo);
 				bp->b_flags |= B_ASYNC;
 				bundirty(bp);
 				bp->b_flags &= ~B_DONE;
 				bp->b_ioflags &= ~BIO_ERROR;
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 				bufdone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	BO_LOCK(bo);
 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 
 			error = BUF_TIMELOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo);
 			if (error == 0) {
 				BUF_UNLOCK(bp);
 				goto loop;
 			}
 			if (error == ENOLCK) {
 				error = 0;
 				goto loop;
 			}
 			if (called_from_renewthread != 0) {
 				/*
 				 * Return EIO so the flush will be retried
 				 * later.
 				 */
 				error = EIO;
 				goto done;
 			}
 			if (newnfs_sigintr(nmp, td)) {
 				error = EINTR;
 				goto done;
 			}
 			if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		BO_UNLOCK(bo);
 		bremfree(bp);
 		bp->b_flags |= B_ASYNC;
 		bwrite(bp);
 		if (newnfs_sigintr(nmp, td)) {
 			error = EINTR;
 			goto done;
 		}
 		goto loop;
 	}
 	if (passone) {
 		passone = 0;
 		BO_UNLOCK(bo);
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (bo->bo_numoutput) {
 			error = bufobj_wwait(bo, slpflag, slptimeo);
 			if (error) {
 			    BO_UNLOCK(bo);
 			    if (called_from_renewthread != 0) {
 				/*
 				 * Return EIO so that the flush will be
 				 * retried later.
 				 */
 				error = EIO;
 				goto done;
 			    }
 			    error = newnfs_sigintr(nmp, td);
 			    if (error)
 				goto done;
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			    BO_LOCK(bo);
 			}
 		}
 		if (bo->bo_dirty.bv_cnt != 0 && commit) {
 			BO_UNLOCK(bo);
 			goto loop;
 		}
 		/*
 		 * Wait for all the async IO requests to drain
 		 */
 		BO_UNLOCK(bo);
 		NFSLOCKNODE(np);
 		while (np->n_directio_asyncwr > 0) {
 			np->n_flag |= NFSYNCWAIT;
 			error = newnfs_msleep(td, &np->n_directio_asyncwr,
 			    &np->n_mtx, slpflag | (PRIBIO + 1), 
 			    "nfsfsync", 0);
 			if (error) {
 				if (newnfs_sigintr(nmp, td)) {
 					NFSUNLOCKNODE(np);
 					error = EINTR;	
 					goto done;
 				}
 			}
 		}
 		NFSUNLOCKNODE(np);
 	} else
 		BO_UNLOCK(bo);
 	if (NFSHASPNFS(nmp)) {
 		nfscl_layoutcommit(vp, td);
 		/*
 		 * Invalidate the attribute cache, since writes to a DS
 		 * won't update the size attribute.
 		 */
 		NFSLOCKNODE(np);
 		np->n_attrstamp = 0;
 	} else
 		NFSLOCKNODE(np);
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
   	if (commit && bo->bo_dirty.bv_cnt == 0 &&
 	    bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
   		np->n_flag &= ~NMODIFIED;
 	NFSUNLOCKNODE(np);
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
 	    (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
 	    np->n_directio_asyncwr != 0)) {
 		if (trycnt++ < 5) {
 			/* try, try again... */
 			passone = 1;
 			wcred = NULL;
 			bvec = NULL;
 			bvecsize = 0;
 			goto again;
 		}
 		vn_printf(vp, "ncl_flush failed");
 		error = called_from_renewthread != 0 ? EIO : EBUSY;
 	}
 	if (error == 0) {
 		nanouptime(&ts);
 		NFSLOCKNODE(np);
 		np->n_localmodtime = ts;
 		NFSUNLOCKNODE(np);
 	}
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  */
 static int
 nfs_advlock(struct vop_advlock_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct ucred *cred;
 	struct nfsnode *np = VTONFS(ap->a_vp);
 	struct proc *p = (struct proc *)ap->a_id;
 	struct thread *td = curthread;	/* XXX */
 	struct vattr va;
 	int ret, error;
 	u_quad_t size;
 	struct nfsmount *nmp;
 
 	error = NFSVOPLOCK(vp, LK_SHARED);
 	if (error != 0)
 		return (EBADF);
 	nmp = VFSTONFS(vp->v_mount);
 	if (!NFS_ISV4(vp) || (nmp->nm_flag & NFSMNT_NOLOCKD) != 0) {
 		if ((nmp->nm_flag & NFSMNT_NOLOCKD) != 0) {
 			size = np->n_size;
 			NFSVOPUNLOCK(vp);
 			error = lf_advlock(ap, &(vp->v_lockf), size);
 		} else {
 			if (nfs_advlock_p != NULL)
 				error = nfs_advlock_p(ap);
 			else {
 				NFSVOPUNLOCK(vp);
 				error = ENOLCK;
 			}
 		}
 		if (error == 0 && ap->a_op == F_SETLK) {
 			error = NFSVOPLOCK(vp, LK_SHARED);
 			if (error == 0) {
 				/* Mark that a file lock has been acquired. */
 				NFSLOCKNODE(np);
 				np->n_flag |= NHASBEENLOCKED;
 				NFSUNLOCKNODE(np);
 				NFSVOPUNLOCK(vp);
 			}
 		}
 		return (error);
 	} else if ((ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
 		if (vp->v_type != VREG) {
 			error = EINVAL;
 			goto out;
 		}
 		if ((ap->a_flags & F_POSIX) != 0)
 			cred = p->p_ucred;
 		else
 			cred = td->td_ucred;
 		NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 		if (VN_IS_DOOMED(vp)) {
 			error = EBADF;
 			goto out;
 		}
 
 		/*
 		 * If this is unlocking a write locked region, flush and
 		 * commit them before unlocking. This is required by
 		 * RFC3530 Sec. 9.3.2.
 		 */
 		if (ap->a_op == F_UNLCK &&
 		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
 		    ap->a_flags))
 			(void) ncl_flush(vp, MNT_WAIT, td, 1, 0);
 
 		/*
 		 * Mark NFS node as might have acquired a lock.
 		 * This is separate from NHASBEENLOCKED, because it must
 		 * be done before the nfsrpc_advlock() call, which might
 		 * add a nfscllock structure to the client state.
 		 * It is used to check for the case where a nfscllock
 		 * state structure cannot exist for the file.
 		 * Only done for "oneopenown" NFSv4.1/4.2 mounts.
 		 */
 		if (NFSHASNFSV4N(nmp) && NFSHASONEOPENOWN(nmp)) {
 			NFSLOCKNODE(np);
 			np->n_flag |= NMIGHTBELOCKED;
 			NFSUNLOCKNODE(np);
 		}
 
 		/*
 		 * Loop around doing the lock op, while a blocking lock
 		 * must wait for the lock op to succeed.
 		 */
 		do {
 			ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
 			    ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
 			if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 			    ap->a_op == F_SETLK) {
 				NFSVOPUNLOCK(vp);
 				error = nfs_catnap(PZERO | PCATCH, ret,
 				    "ncladvl");
 				if (error)
 					return (EINTR);
 				NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 				if (VN_IS_DOOMED(vp)) {
 					error = EBADF;
 					goto out;
 				}
 			}
 		} while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 		     ap->a_op == F_SETLK);
 		if (ret == NFSERR_DENIED) {
 			error = EAGAIN;
 			goto out;
 		} else if (ret == EINVAL || ret == EBADF || ret == EINTR) {
 			error = ret;
 			goto out;
 		} else if (ret != 0) {
 			error = EACCES;
 			goto out;
 		}
 
 		/*
 		 * Now, if we just got a lock, invalidate data in the buffer
 		 * cache, as required, so that the coherency conforms with
 		 * RFC3530 Sec. 9.3.2.
 		 */
 		if (ap->a_op == F_SETLK) {
 			if ((np->n_flag & NMODIFIED) == 0) {
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 				ret = VOP_GETATTR(vp, &va, cred);
 			}
 			if ((np->n_flag & NMODIFIED) || ret ||
 			    np->n_change != va.va_filerev) {
 				(void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 				ret = VOP_GETATTR(vp, &va, cred);
 				if (!ret) {
 					np->n_mtime = va.va_mtime;
 					np->n_change = va.va_filerev;
 				}
 			}
 			/* Mark that a file lock has been acquired. */
 			NFSLOCKNODE(np);
 			np->n_flag |= NHASBEENLOCKED;
 			NFSUNLOCKNODE(np);
 		}
 	} else
 		error = EOPNOTSUPP;
 out:
 	NFSVOPUNLOCK(vp);
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  */
 static int
 nfs_advlockasync(struct vop_advlockasync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	u_quad_t size;
 	int error;
 
 	if (NFS_ISV4(vp))
 		return (EOPNOTSUPP);
 	error = NFSVOPLOCK(vp, LK_SHARED);
 	if (error)
 		return (error);
 	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 		size = VTONFS(vp)->n_size;
 		NFSVOPUNLOCK(vp);
 		error = lf_advlockasync(ap, &(vp->v_lockf), size);
 	} else {
 		NFSVOPUNLOCK(vp);
 		error = EOPNOTSUPP;
 	}
 	return (error);
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(struct vop_print_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 
 	printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid,
 	    (uintmax_t)np->n_vattr.na_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * This is the "real" nfs::bwrite(struct buf*).
  * We set B_CACHE if this is a VMIO buffer.
  */
 int
 ncl_writebp(struct buf *bp, int force __unused, struct thread *td)
 {
 	int oldflags, rtval;
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 
 	oldflags = bp->b_flags;
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * Undirty the bp.  We will redirty it later if the I/O fails.
 	 */
 	bundirty(bp);
 	bp->b_flags &= ~B_DONE;
 	bp->b_ioflags &= ~BIO_ERROR;
 	bp->b_iocmd = BIO_WRITE;
 
 	bufobj_wref(bp->b_bufobj);
 	curthread->td_ru.ru_oublock++;
 
 	/*
 	 * Note: to avoid loopback deadlocks, we do not
 	 * assign b_runningbufspace.
 	 */
 	vfs_busy_pages(bp, 1);
 
 	BUF_KERNPROC(bp);
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bstrategy(bp);
 
 	if ((oldflags & B_ASYNC) != 0)
 		return (0);
 
 	rtval = bufwait(bp);
 	if (oldflags & B_DELWRI)
 		reassignbuf(bp);
 	brelse(bp);
 	return (rtval);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(struct vop_access_args *ap)
 {
 	struct vattr *vap;
 	struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	accmode_t accmode = ap->a_accmode;
 	struct vattr vattr;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred);
 	if (error)
 		goto out;
 	error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
 	    accmode, cred);
 out:
 	return error;
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(struct vop_read_args *ap)
 {
 	struct nfsnode *np = VTONFS(ap->a_vp);
 	int error;
 
 	/*
 	 * Set access flag.
 	 */
 	NFSLOCKNODE(np);
 	np->n_flag |= NACC;
 	vfs_timestamp(&np->n_atim);
 	NFSUNLOCKNODE(np);
 	error = fifo_specops.vop_read(ap);
 	return error;	
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(struct vop_write_args *ap)
 {
 	struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	NFSLOCKNODE(np);
 	np->n_flag |= NUPD;
 	vfs_timestamp(&np->n_mtim);
 	NFSUNLOCKNODE(np);
 	return(fifo_specops.vop_write(ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	struct timespec ts;
 
 	NFSLOCKNODE(np);
 	if (np->n_flag & (NACC | NUPD)) {
 		vfs_timestamp(&ts);
 		if (np->n_flag & NACC)
 			np->n_atim = ts;
 		if (np->n_flag & NUPD)
 			np->n_mtim = ts;
 		np->n_flag |= NCHG;
 		if (vrefcnt(vp) == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			NFSUNLOCKNODE(np);
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred);
 			goto out;
 		}
 	}
 	NFSUNLOCKNODE(np);
 out:
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
  * Just call ncl_writebp() with the force argument set to 1.
  *
  * NOTE: B_DONE may or may not be set in a_bp on call.
  */
 static int
 nfs_bwrite(struct buf *bp)
 {
 
 	return (ncl_writebp(bp, 1, curthread));
 }
 
 struct buf_ops buf_ops_newnfs = {
 	.bop_name	=	"buf_ops_nfs",
 	.bop_write	=	nfs_bwrite,
 	.bop_strategy	=	bufstrategy,
 	.bop_sync	=	bufsync,
 	.bop_bdflush	=	bufbdflush,
 };
 
 static int
 nfs_getacl(struct vop_getacl_args *ap)
 {
 	int error;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EOPNOTSUPP);
 	error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp);
 	if (error > NFSERR_STALE) {
 		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 		error = EPERM;
 	}
 	return (error);
 }
 
 static int
 nfs_setacl(struct vop_setacl_args *ap)
 {
 	int error;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EOPNOTSUPP);
 	error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp);
 	if (error > NFSERR_STALE) {
 		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 		error = EPERM;
 	}
 	return (error);
 }
 
 /*
  * VOP_ADVISE for NFS.
  * Just return 0 for any errors, since it is just a hint.
  */
 static int
 nfs_advise(struct vop_advise_args *ap)
 {
 	struct thread *td = curthread;
 	struct nfsmount *nmp;
 	uint64_t len;
 	int error;
 
 	/*
 	 * First do vop_stdadvise() to handle the buffer cache.
 	 */
 	error = vop_stdadvise(ap);
 	if (error != 0)
 		return (error);
 	if (ap->a_start < 0 || ap->a_end < 0)
 		return (0);
 	if (ap->a_end == OFF_MAX)
 		len = 0;
 	else if (ap->a_end < ap->a_start)
 		return (0);
 	else
 		len = ap->a_end - ap->a_start + 1;
 	nmp = VFSTONFS(ap->a_vp->v_mount);
 	mtx_lock(&nmp->nm_mtx);
 	if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION ||
 	    (NFSHASPNFS(nmp) && (nmp->nm_privflag & NFSMNTP_IOADVISETHRUMDS) ==
 	    0) || (nmp->nm_privflag & NFSMNTP_NOADVISE) != 0) {
 		mtx_unlock(&nmp->nm_mtx);
 		return (0);
 	}
 	mtx_unlock(&nmp->nm_mtx);
 	error = nfsrpc_advise(ap->a_vp, ap->a_start, len, ap->a_advice,
 	    td->td_ucred, td);
 	if (error == NFSERR_NOTSUPP) {
 		mtx_lock(&nmp->nm_mtx);
 		nmp->nm_privflag |= NFSMNTP_NOADVISE;
 		mtx_unlock(&nmp->nm_mtx);
 	}
 	return (0);
 }
 
 /*
  * nfs allocate call
  */
 static int
 nfs_allocate(struct vop_allocate_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	off_t alen;
 	int attrflag, error, ret;
 	struct timespec ts;
 	struct uio io;
 
 	attrflag = 0;
 	nmp = VFSTONFS(vp->v_mount);
 	np = VTONFS(vp);
 	mtx_lock(&nmp->nm_mtx);
 	if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION &&
 	    (nmp->nm_privflag & NFSMNTP_NOALLOCATE) == 0) {
 		mtx_unlock(&nmp->nm_mtx);
 		alen = *ap->a_len;
 		if ((uint64_t)alen > nfs_maxalloclen)
 			alen = nfs_maxalloclen;
 
 		/* Check the file size limit. */
 		io.uio_offset = *ap->a_offset;
 		io.uio_resid = alen;
 		error = vn_rlimit_fsize(vp, &io, td);
 
 		/*
 		 * Flush first to ensure that the allocate adds to the
 		 * file's allocation on the server.
 		 */
 		if (error == 0)
 			error = ncl_flush(vp, MNT_WAIT, td, 1, 0);
 		if (error == 0)
 			error = nfsrpc_allocate(vp, *ap->a_offset, alen,
 			    &nfsva, &attrflag, ap->a_cred, td);
 		if (error == 0) {
 			*ap->a_offset += alen;
 			*ap->a_len -= alen;
 			nanouptime(&ts);
 			NFSLOCKNODE(np);
 			np->n_localmodtime = ts;
 			NFSUNLOCKNODE(np);
 		} else if (error == NFSERR_NOTSUPP) {
 			mtx_lock(&nmp->nm_mtx);
 			nmp->nm_privflag |= NFSMNTP_NOALLOCATE;
 			mtx_unlock(&nmp->nm_mtx);
 			error = EINVAL;
 		}
 	} else {
 		mtx_unlock(&nmp->nm_mtx);
 		error = EINVAL;
 	}
 	if (attrflag != 0) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (error == 0 && ret != 0)
 			error = ret;
 	}
 	if (error != 0)
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs deallocate call
  */
 static int
 nfs_deallocate(struct vop_deallocate_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	off_t tlen, mlen;
 	int attrflag, error, ret;
 	bool clipped;
 	struct timespec ts;
 
 	error = 0;
 	attrflag = 0;
 	nmp = VFSTONFS(vp->v_mount);
 	np = VTONFS(vp);
 	mtx_lock(&nmp->nm_mtx);
 	if (NFSHASNFSV4(nmp) && nmp->nm_minorvers >= NFSV42_MINORVERSION &&
 	    (nmp->nm_privflag & NFSMNTP_NODEALLOCATE) == 0) {
 		mtx_unlock(&nmp->nm_mtx);
 		tlen = omin(OFF_MAX - *ap->a_offset, *ap->a_len);
 		NFSCL_DEBUG(4, "dealloc: off=%jd len=%jd maxfilesize=%ju\n",
 		    (intmax_t)*ap->a_offset, (intmax_t)tlen,
 		    (uintmax_t)nmp->nm_maxfilesize);
 		if ((uint64_t)*ap->a_offset >= nmp->nm_maxfilesize) {
 			/* Avoid EFBIG error return from the NFSv4.2 server. */
 			*ap->a_len = 0;
 			return (0);
 		}
 		clipped = false;
 		if ((uint64_t)*ap->a_offset + tlen > nmp->nm_maxfilesize)
 			tlen = nmp->nm_maxfilesize - *ap->a_offset;
 		if ((uint64_t)*ap->a_offset < np->n_size) {
 			/* Limit the len to nfs_maxalloclen before EOF. */
 			mlen = omin((off_t)np->n_size - *ap->a_offset, tlen);
 			if ((uint64_t)mlen > nfs_maxalloclen) {
 				NFSCL_DEBUG(4, "dealloc: tlen maxalloclen\n");
 				tlen = nfs_maxalloclen;
 				clipped = true;
 			}
 		}
 		if (error == 0)
 			error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
 		if (error == 0) {
 			vnode_pager_purge_range(vp, *ap->a_offset,
 			    *ap->a_offset + tlen);
 			error = nfsrpc_deallocate(vp, *ap->a_offset, tlen,
 			    &nfsva, &attrflag, ap->a_cred, td);
 			NFSCL_DEBUG(4, "dealloc: rpc=%d\n", error);
 		}
 		if (error == 0) {
 			NFSCL_DEBUG(4, "dealloc: attrflag=%d na_size=%ju\n",
 			    attrflag, (uintmax_t)nfsva.na_size);
 			nanouptime(&ts);
 			NFSLOCKNODE(np);
 			np->n_localmodtime = ts;
 			NFSUNLOCKNODE(np);
 			if (attrflag != 0) {
 				if ((uint64_t)*ap->a_offset < nfsva.na_size)
 					*ap->a_offset += omin((off_t)
 					    nfsva.na_size - *ap->a_offset,
 					    tlen);
 			}
 			if (clipped && tlen < *ap->a_len)
 				*ap->a_len -= tlen;
 			else
 				*ap->a_len = 0;
 		} else if (error == NFSERR_NOTSUPP) {
 			mtx_lock(&nmp->nm_mtx);
 			nmp->nm_privflag |= NFSMNTP_NODEALLOCATE;
 			mtx_unlock(&nmp->nm_mtx);
 		}
 	} else {
 		mtx_unlock(&nmp->nm_mtx);
 		error = EIO;
 	}
 	/*
 	 * If the NFS server cannot perform the Deallocate operation, just call
 	 * vop_stddeallocate() to perform it.
 	 */
 	if (error != 0 && error != NFSERR_FBIG && error != NFSERR_INVAL) {
 		error = vop_stddeallocate(ap);
 		NFSCL_DEBUG(4, "dealloc: stddeallocate=%d\n", error);
 	}
 	if (attrflag != 0) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (error == 0 && ret != 0)
 			error = ret;
 	}
 	if (error != 0)
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs copy_file_range call
  */
 static int
 nfs_copy_file_range(struct vop_copy_file_range_args *ap)
 {
 	struct vnode *invp = ap->a_invp;
 	struct vnode *outvp = ap->a_outvp;
 	struct mount *mp;
 	struct nfsvattr innfsva, outnfsva;
 	struct vattr *vap;
 	struct uio io;
 	struct nfsmount *nmp;
 	size_t len, len2;
 	int error, inattrflag, outattrflag, ret, ret2;
 	off_t inoff, outoff;
 	bool consecutive, must_commit, tryoutcred;
 
 	ret = ret2 = 0;
 	nmp = VFSTONFS(invp->v_mount);
 	mtx_lock(&nmp->nm_mtx);
 	/* NFSv4.2 Copy is not permitted for infile == outfile. */
 	if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION ||
 	    (nmp->nm_privflag & NFSMNTP_NOCOPY) != 0 || invp == outvp) {
 		mtx_unlock(&nmp->nm_mtx);
 		error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
 		    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags,
 		    ap->a_incred, ap->a_outcred, ap->a_fsizetd);
 		return (error);
 	}
 	mtx_unlock(&nmp->nm_mtx);
 
 	/* Lock both vnodes, avoiding risk of deadlock. */
 	do {
 		mp = NULL;
 		error = vn_start_write(outvp, &mp, V_WAIT);
 		if (error == 0) {
 			error = vn_lock(outvp, LK_EXCLUSIVE);
 			if (error == 0) {
 				error = vn_lock(invp, LK_SHARED | LK_NOWAIT);
 				if (error == 0)
 					break;
 				VOP_UNLOCK(outvp);
 				if (mp != NULL)
 					vn_finished_write(mp);
 				mp = NULL;
 				error = vn_lock(invp, LK_SHARED);
 				if (error == 0)
 					VOP_UNLOCK(invp);
 			}
 		}
 		if (mp != NULL)
 			vn_finished_write(mp);
 	} while (error == 0);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Do the vn_rlimit_fsize() check.  Should this be above the VOP layer?
 	 */
 	io.uio_offset = *ap->a_outoffp;
 	io.uio_resid = *ap->a_lenp;
 	error = vn_rlimit_fsize(outvp, &io, ap->a_fsizetd);
 
 	/*
 	 * Flush the input file so that the data is up to date before
 	 * the copy.  Flush writes for the output file so that they
 	 * do not overwrite the data copied to the output file by the Copy.
 	 * Set the commit argument for both flushes so that the data is on
 	 * stable storage before the Copy RPC.  This is done in case the
 	 * server reboots during the Copy and needs to be redone.
 	 */
 	if (error == 0)
 		error = ncl_flush(invp, MNT_WAIT, curthread, 1, 0);
 	if (error == 0)
 		error = ncl_flush(outvp, MNT_WAIT, curthread, 1, 0);
 
 	/* Do the actual NFSv4.2 RPC. */
 	len = *ap->a_lenp;
 	mtx_lock(&nmp->nm_mtx);
 	if ((nmp->nm_privflag & NFSMNTP_NOCONSECUTIVE) == 0)
 		consecutive = true;
 	else
 		consecutive = false;
 	mtx_unlock(&nmp->nm_mtx);
 	inoff = *ap->a_inoffp;
 	outoff = *ap->a_outoffp;
 	tryoutcred = true;
 	must_commit = false;
 	if (error == 0) {
 		vap = &VTONFS(invp)->n_vattr.na_vattr;
 		error = VOP_GETATTR(invp, vap, ap->a_incred);
 		if (error == 0) {
 			/*
 			 * Clip "len" at va_size so that RFC compliant servers
 			 * will not reply NFSERR_INVAL.
 			 * Setting "len == 0" for the RPC would be preferred,
 			 * but some Linux servers do not support that.
 			 */
 			if (inoff >= vap->va_size)
 				*ap->a_lenp = len = 0;
 			else if (inoff + len > vap->va_size)
 				*ap->a_lenp = len = vap->va_size - inoff;
 		} else
 			error = 0;
 	}
 
 	/*
 	 * len will be set to 0 upon a successful Copy RPC.
 	 * As such, this only loops when the Copy RPC needs to be retried.
 	 */
 	while (len > 0 && error == 0) {
 		inattrflag = outattrflag = 0;
 		len2 = len;
 		if (tryoutcred)
 			error = nfsrpc_copy_file_range(invp, ap->a_inoffp,
 			    outvp, ap->a_outoffp, &len2, ap->a_flags,
 			    &inattrflag, &innfsva, &outattrflag, &outnfsva,
 			    ap->a_outcred, consecutive, &must_commit);
 		else
 			error = nfsrpc_copy_file_range(invp, ap->a_inoffp,
 			    outvp, ap->a_outoffp, &len2, ap->a_flags,
 			    &inattrflag, &innfsva, &outattrflag, &outnfsva,
 			    ap->a_incred, consecutive, &must_commit);
 		if (inattrflag != 0)
 			ret = nfscl_loadattrcache(&invp, &innfsva, NULL, 0, 1);
 		if (outattrflag != 0)
 			ret2 = nfscl_loadattrcache(&outvp, &outnfsva, NULL,
 			    1, 1);
 		if (error == 0) {
 			if (consecutive == false) {
 				if (len2 == len) {
 					mtx_lock(&nmp->nm_mtx);
 					nmp->nm_privflag |=
 					    NFSMNTP_NOCONSECUTIVE;
 					mtx_unlock(&nmp->nm_mtx);
 				} else
 					error = NFSERR_OFFLOADNOREQS;
 			}
 			*ap->a_lenp = len2;
 			len = 0;
 			if (len2 > 0 && must_commit && error == 0)
 				error = ncl_commit(outvp, outoff, *ap->a_lenp,
 				    ap->a_outcred, curthread);
 			if (error == 0 && ret != 0)
 				error = ret;
 			if (error == 0 && ret2 != 0)
 				error = ret2;
 		} else if (error == NFSERR_OFFLOADNOREQS && consecutive) {
 			/*
 			 * Try consecutive == false, which is ok only if all
 			 * bytes are copied.
 			 * If only some bytes were copied when consecutive
 			 * is false, there is no way to know which bytes
 			 * still need to be written.
 			 */
 			consecutive = false;
 			error = 0;
 		} else if (error == NFSERR_ACCES && tryoutcred) {
 			/* Try again with incred. */
 			tryoutcred = false;
 			error = 0;
 		}
 		if (error == NFSERR_STALEWRITEVERF) {
 			/*
 			 * Server rebooted, so do it all again.
 			 */
 			*ap->a_inoffp = inoff;
 			*ap->a_outoffp = outoff;
 			len = *ap->a_lenp;
 			must_commit = false;
 			error = 0;
 		}
 	}
 	VOP_UNLOCK(invp);
 	VOP_UNLOCK(outvp);
 	if (mp != NULL)
 		vn_finished_write(mp);
 	if (error == NFSERR_NOTSUPP || error == NFSERR_OFFLOADNOREQS ||
 	    error == NFSERR_ACCES) {
 		/*
 		 * Unlike the NFSv4.2 Copy, vn_generic_copy_file_range() can
 		 * use a_incred for the read and a_outcred for the write, so
 		 * try this for NFSERR_ACCES failures for the Copy.
 		 * For NFSERR_NOTSUPP and NFSERR_OFFLOADNOREQS, the Copy can
 		 * never succeed, so disable it.
 		 */
 		if (error != NFSERR_ACCES) {
 			/* Can never do Copy on this mount. */
 			mtx_lock(&nmp->nm_mtx);
 			nmp->nm_privflag |= NFSMNTP_NOCOPY;
 			mtx_unlock(&nmp->nm_mtx);
 		}
 		*ap->a_inoffp = inoff;
 		*ap->a_outoffp = outoff;
 		error = vn_generic_copy_file_range(ap->a_invp, ap->a_inoffp,
 		    ap->a_outvp, ap->a_outoffp, ap->a_lenp, ap->a_flags,
 		    ap->a_incred, ap->a_outcred, ap->a_fsizetd);
 	} else if (error != 0)
 		*ap->a_lenp = 0;
 
 	if (error != 0)
 		error = nfscl_maperr(curthread, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs ioctl call
  */
 static int
 nfs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp;
 	int attrflag, content, error, ret;
 	bool eof = false;			/* shut up compiler. */
 
 	if (vp->v_type != VREG)
 		return (ENOTTY);
 	nmp = VFSTONFS(vp->v_mount);
 	if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION) {
 		error = vop_stdioctl(ap);
 		return (error);
 	}
 
 	/* Do the actual NFSv4.2 RPC. */
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 		content = NFSV4CONTENT_DATA;
 		break;
 	case FIOSEEKHOLE:
 		content = NFSV4CONTENT_HOLE;
 		break;
 	default:
 		return (ENOTTY);
 	}
 
 	error = vn_lock(vp, LK_SHARED);
 	if (error != 0)
 		return (EBADF);
 	attrflag = 0;
 	if (*((off_t *)ap->a_data) >= VTONFS(vp)->n_size)
 		error = ENXIO;
 	else {
 		/*
 		 * Flush all writes, so that the server is up to date.
 		 * Although a Commit is not required, the commit argument
 		 * is set so that, for a pNFS File/Flexible File Layout
 		 * server, the LayoutCommit will be done to ensure the file
 		 * size is up to date on the Metadata Server.
 		 */
 		error = ncl_flush(vp, MNT_WAIT, ap->a_td, 1, 0);
 		if (error == 0)
 			error = nfsrpc_seek(vp, (off_t *)ap->a_data, &eof,
 			    content, ap->a_cred, &nfsva, &attrflag);
 		/* If at eof for FIOSEEKDATA, return ENXIO. */
 		if (eof && error == 0 && content == NFSV4CONTENT_DATA)
 			error = ENXIO;
 	}
 	if (attrflag != 0) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (error == 0 && ret != 0)
 			error = ret;
 	}
 	NFSVOPUNLOCK(vp);
 
 	if (error != 0)
 		error = ENXIO;
 	return (error);
 }
 
 /*
  * nfs getextattr call
  */
 static int
 nfs_getextattr(struct vop_getextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsmount *nmp;
 	struct ucred *cred;
 	struct thread *td = ap->a_td;
 	struct nfsvattr nfsva;
 	ssize_t len;
 	int attrflag, error, ret;
 
 	nmp = VFSTONFS(vp->v_mount);
 	mtx_lock(&nmp->nm_mtx);
 	if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION ||
 	    (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 ||
 	    ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) {
 		mtx_unlock(&nmp->nm_mtx);
 		return (EOPNOTSUPP);
 	}
 	mtx_unlock(&nmp->nm_mtx);
 
 	cred = ap->a_cred;
 	if (cred == NULL)
 		cred = td->td_ucred;
 	/* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */
 	attrflag = 0;
 	error = nfsrpc_getextattr(vp, ap->a_name, ap->a_uio, &len, &nfsva,
 	    &attrflag, cred, td);
 	if (attrflag != 0) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (error == 0 && ret != 0)
 			error = ret;
 	}
 	if (error == 0 && ap->a_size != NULL)
 		*ap->a_size = len;
 
 	switch (error) {
 	case NFSERR_NOTSUPP:
 	case NFSERR_OPILLEGAL:
 		mtx_lock(&nmp->nm_mtx);
 		nmp->nm_privflag |= NFSMNTP_NOXATTR;
 		mtx_unlock(&nmp->nm_mtx);
 		error = EOPNOTSUPP;
 		break;
 	case NFSERR_NOXATTR:
 	case NFSERR_XATTR2BIG:
 		error = ENOATTR;
 		break;
 	default:
 		error = nfscl_maperr(td, error, 0, 0);
 		break;
 	}
 	return (error);
 }
 
 /*
  * nfs setextattr call
  */
 static int
 nfs_setextattr(struct vop_setextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsmount *nmp;
 	struct ucred *cred;
 	struct thread *td = ap->a_td;
 	struct nfsvattr nfsva;
 	int attrflag, error, ret;
 
 	nmp = VFSTONFS(vp->v_mount);
 	mtx_lock(&nmp->nm_mtx);
 	if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION ||
 	    (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 ||
 	    ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) {
 		mtx_unlock(&nmp->nm_mtx);
 		return (EOPNOTSUPP);
 	}
 	mtx_unlock(&nmp->nm_mtx);
 
 	if (ap->a_uio->uio_resid < 0)
 		return (EINVAL);
 	cred = ap->a_cred;
 	if (cred == NULL)
 		cred = td->td_ucred;
 	/* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */
 	attrflag = 0;
 	error = nfsrpc_setextattr(vp, ap->a_name, ap->a_uio, &nfsva,
 	    &attrflag, cred, td);
 	if (attrflag != 0) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (error == 0 && ret != 0)
 			error = ret;
 	}
 
 	switch (error) {
 	case NFSERR_NOTSUPP:
 	case NFSERR_OPILLEGAL:
 		mtx_lock(&nmp->nm_mtx);
 		nmp->nm_privflag |= NFSMNTP_NOXATTR;
 		mtx_unlock(&nmp->nm_mtx);
 		error = EOPNOTSUPP;
 		break;
 	case NFSERR_NOXATTR:
 	case NFSERR_XATTR2BIG:
 		error = ENOATTR;
 		break;
 	default:
 		error = nfscl_maperr(td, error, 0, 0);
 		break;
 	}
 	return (error);
 }
 
 /*
  * nfs listextattr call
  */
 static int
 nfs_listextattr(struct vop_listextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsmount *nmp;
 	struct ucred *cred;
 	struct thread *td = ap->a_td;
 	struct nfsvattr nfsva;
 	size_t len, len2;
 	uint64_t cookie;
 	int attrflag, error, ret;
 	bool eof;
 
 	nmp = VFSTONFS(vp->v_mount);
 	mtx_lock(&nmp->nm_mtx);
 	if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION ||
 	    (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 ||
 	    ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) {
 		mtx_unlock(&nmp->nm_mtx);
 		return (EOPNOTSUPP);
 	}
 	mtx_unlock(&nmp->nm_mtx);
 
 	cred = ap->a_cred;
 	if (cred == NULL)
 		cred = td->td_ucred;
 
 	/* Loop around doing List Extended Attribute RPCs. */
 	eof = false;
 	cookie = 0;
 	len2 = 0;
 	error = 0;
 	while (!eof && error == 0) {
 		len = nmp->nm_rsize;
 		attrflag = 0;
 		error = nfsrpc_listextattr(vp, &cookie, ap->a_uio, &len, &eof,
 		    &nfsva, &attrflag, cred, td);
 		if (attrflag != 0) {
 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 			if (error == 0 && ret != 0)
 				error = ret;
 		}
 		if (error == 0) {
 			len2 += len;
 			if (len2 > SSIZE_MAX)
 				error = ENOATTR;
 		}
 	}
 	if (error == 0 && ap->a_size != NULL)
 		*ap->a_size = len2;
 
 	switch (error) {
 	case NFSERR_NOTSUPP:
 	case NFSERR_OPILLEGAL:
 		mtx_lock(&nmp->nm_mtx);
 		nmp->nm_privflag |= NFSMNTP_NOXATTR;
 		mtx_unlock(&nmp->nm_mtx);
 		error = EOPNOTSUPP;
 		break;
 	case NFSERR_NOXATTR:
 	case NFSERR_XATTR2BIG:
 		error = ENOATTR;
 		break;
 	default:
 		error = nfscl_maperr(td, error, 0, 0);
 		break;
 	}
 	return (error);
 }
 
 /*
  * nfs setextattr call
  */
 static int
 nfs_deleteextattr(struct vop_deleteextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsmount *nmp;
 	struct nfsvattr nfsva;
 	int attrflag, error, ret;
 
 	nmp = VFSTONFS(vp->v_mount);
 	mtx_lock(&nmp->nm_mtx);
 	if (!NFSHASNFSV4(nmp) || nmp->nm_minorvers < NFSV42_MINORVERSION ||
 	    (nmp->nm_privflag & NFSMNTP_NOXATTR) != 0 ||
 	    ap->a_attrnamespace != EXTATTR_NAMESPACE_USER) {
 		mtx_unlock(&nmp->nm_mtx);
 		return (EOPNOTSUPP);
 	}
 	mtx_unlock(&nmp->nm_mtx);
 
 	/* Do the actual NFSv4.2 Optional Extended Attribute (RFC-8276) RPC. */
 	attrflag = 0;
 	error = nfsrpc_rmextattr(vp, ap->a_name, &nfsva, &attrflag, ap->a_cred,
 	    ap->a_td);
 	if (attrflag != 0) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (error == 0 && ret != 0)
 			error = ret;
 	}
 
 	switch (error) {
 	case NFSERR_NOTSUPP:
 	case NFSERR_OPILLEGAL:
 		mtx_lock(&nmp->nm_mtx);
 		nmp->nm_privflag |= NFSMNTP_NOXATTR;
 		mtx_unlock(&nmp->nm_mtx);
 		error = EOPNOTSUPP;
 		break;
 	case NFSERR_NOXATTR:
 	case NFSERR_XATTR2BIG:
 		error = ENOATTR;
 		break;
 	default:
 		error = nfscl_maperr(ap->a_td, error, 0, 0);
 		break;
 	}
 	return (error);
 }
 
 /*
  * Return POSIX pathconf information applicable to nfs filesystems.
  */
 static int
 nfs_pathconf(struct vop_pathconf_args *ap)
 {
 	struct nfsv3_pathconf pc;
 	struct nfsvattr nfsva;
 	struct vnode *vp = ap->a_vp;
 	struct nfsmount *nmp;
 	struct thread *td = curthread;
 	off_t off;
 	bool eof;
 	int attrflag, error;
 
 	if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX ||
 	    ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
 	    ap->a_name == _PC_NO_TRUNC)) ||
 	    (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) {
 		/*
 		 * Since only the above 4 a_names are returned by the NFSv3
 		 * Pathconf RPC, there is no point in doing it for others.
 		 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can
 		 * be used for _PC_NFS4_ACL as well.
 		 */
 		error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
 		    &attrflag);
 		if (attrflag != 0)
 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1);
 		if (error != 0)
 			return (error);
 	} else {
 		/*
 		 * For NFSv2 (or NFSv3 when not one of the above 4 a_names),
 		 * just fake them.
 		 */
 		pc.pc_linkmax = NFS_LINK_MAX;
 		pc.pc_namemax = NFS_MAXNAMLEN;
 		pc.pc_notrunc = 1;
 		pc.pc_chownrestricted = 1;
 		pc.pc_caseinsensitive = 0;
 		pc.pc_casepreserving = 1;
 		error = 0;
 	}
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 #ifdef _LP64
 		*ap->a_retval = pc.pc_linkmax;
 #else
 		*ap->a_retval = MIN(LONG_MAX, pc.pc_linkmax);
 #endif
 		break;
 	case _PC_NAME_MAX:
 		*ap->a_retval = pc.pc_namemax;
 		break;
 	case _PC_PIPE_BUF:
 		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
 			*ap->a_retval = PIPE_BUF;
 		else
 			error = EINVAL;
 		break;
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = pc.pc_chownrestricted;
 		break;
 	case _PC_NO_TRUNC:
 		*ap->a_retval = pc.pc_notrunc;
 		break;
 	case _PC_ACL_NFS4:
 		if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 &&
 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL))
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 	case _PC_ACL_PATH_MAX:
 		if (NFS_ISV4(vp))
 			*ap->a_retval = ACL_MAX_ENTRIES;
 		else
 			*ap->a_retval = 3;
 		break;
 	case _PC_PRIO_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_SYNC_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ALLOC_SIZE_MIN:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_bsize;
 		break;
 	case _PC_FILESIZEBITS:
 		if (NFS_ISV34(vp))
 			*ap->a_retval = 64;
 		else
 			*ap->a_retval = 32;
 		break;
 	case _PC_REC_INCR_XFER_SIZE:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_MAX_XFER_SIZE:
 		*ap->a_retval = -1; /* means ``unlimited'' */
 		break;
 	case _PC_REC_MIN_XFER_SIZE:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_XFER_ALIGN:
 		*ap->a_retval = PAGE_SIZE;
 		break;
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = NFS_MAXPATHLEN;
 		break;
 	case _PC_MIN_HOLE_SIZE:
 		/* Only some NFSv4.2 servers support Seek for Holes. */
 		*ap->a_retval = 0;
 		nmp = VFSTONFS(vp->v_mount);
 		if (NFS_ISV4(vp) && nmp->nm_minorvers == NFSV42_MINORVERSION) {
 			/*
 			 * NFSv4.2 doesn't have an attribute for hole size,
 			 * so all we can do is see if the Seek operation is
 			 * supported and then use f_iosize as a "best guess".
 			 */
 			mtx_lock(&nmp->nm_mtx);
 			if ((nmp->nm_privflag & NFSMNTP_SEEKTESTED) == 0) {
 				mtx_unlock(&nmp->nm_mtx);
 				off = 0;
 				attrflag = 0;
 				error = nfsrpc_seek(vp, &off, &eof,
 				    NFSV4CONTENT_HOLE, td->td_ucred, &nfsva,
 				    &attrflag);
 				if (attrflag != 0)
 					nfscl_loadattrcache(&vp, &nfsva,
 					    NULL, 0, 1);
 				mtx_lock(&nmp->nm_mtx);
 				if (error == NFSERR_NOTSUPP)
 					nmp->nm_privflag |= NFSMNTP_SEEKTESTED;
 				else
 					nmp->nm_privflag |= NFSMNTP_SEEKTESTED |
 					    NFSMNTP_SEEK;
 				error = 0;
 			}
 			if ((nmp->nm_privflag & NFSMNTP_SEEK) != 0)
 				*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 			mtx_unlock(&nmp->nm_mtx);
 		}
 		break;
 
 	default:
 		error = vop_stdpathconf(ap);
 		break;
 	}
 	return (error);
 }
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 9fc475ac7ecb..8e15237bc10c 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -1,7127 +1,7120 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/capsicum.h>
 #include <sys/extattr.h>
 
 /*
  * Functions that perform the vfs operations required by the routines in
  * nfsd_serv.c. It is hoped that this change will make the server more
  * portable.
  */
 
 #include <fs/nfs/nfsport.h>
 #include <security/mac/mac_framework.h>
 #include <sys/callout.h>
 #include <sys/filio.h>
 #include <sys/hash.h>
 #include <sys/sysctl.h>
 #include <nlm/nlm_prot.h>
 #include <nlm/nlm.h>
 
 FEATURE(nfsd, "NFSv4 server");
 
 extern u_int32_t newnfs_true, newnfs_false, newnfs_xdrneg1;
 extern int nfsrv_useacl;
 extern int newnfs_numnfsd;
 extern struct mount nfsv4root_mnt;
 extern struct nfsrv_stablefirst nfsrv_stablefirst;
 extern SVCPOOL	*nfsrvd_pool;
 extern struct nfsv4lock nfsd_suspend_lock;
 extern struct nfsclienthashhead *nfsclienthash;
 extern struct nfslockhashhead *nfslockhash;
 extern struct nfssessionhash *nfssessionhash;
 extern int nfsrv_sessionhashsize;
 extern struct nfsstatsv1 nfsstatsv1;
 extern struct nfslayouthash *nfslayouthash;
 extern int nfsrv_layouthashsize;
 extern struct mtx nfsrv_dslock_mtx;
 extern int nfs_pnfsiothreads;
 extern struct nfsdontlisthead nfsrv_dontlisthead;
 extern volatile int nfsrv_dontlistlen;
 extern volatile int nfsrv_devidcnt;
 extern int nfsrv_maxpnfsmirror;
 extern uint32_t nfs_srvmaxio;
 extern int nfs_bufpackets;
 extern u_long sb_max_adj;
 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
 NFSDLOCKMUTEX;
 NFSSTATESPINLOCK;
 struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
 struct nfsrchash_bucket nfsrcahash_table[NFSRVCACHE_HASHSIZE];
 struct mtx nfsrc_udpmtx;
 struct mtx nfs_v4root_mutex;
 struct mtx nfsrv_dontlistlock_mtx;
 struct mtx nfsrv_recalllock_mtx;
 struct nfsrvfh nfs_rootfh, nfs_pubfh;
 int nfs_pubfhset = 0, nfs_rootfhset = 0;
 struct proc *nfsd_master_proc = NULL;
 int nfsd_debuglevel = 0;
 static pid_t nfsd_master_pid = (pid_t)-1;
 static char nfsd_master_comm[MAXCOMLEN + 1];
 static struct timeval nfsd_master_start;
 static uint32_t nfsv4_sysid = 0;
 static fhandle_t zerofh;
 struct callout nfsd_callout;
 
 static int nfssvc_srvcall(struct thread *, struct nfssvc_args *,
     struct ucred *);
 
 int nfsrv_enable_crossmntpt = 1;
 static int nfs_commit_blks;
 static int nfs_commit_miss;
 extern int nfsrv_issuedelegs;
 extern int nfsrv_dolocallocks;
 extern int nfsd_enable_stringtouid;
 extern struct nfsdevicehead nfsrv_devidhead;
 
 static int nfsrv_createiovec(int, struct mbuf **, struct mbuf **,
     struct iovec **);
 static int nfsrv_createiovec_extpgs(int, int, struct mbuf **,
     struct mbuf **, struct iovec **);
 static int nfsrv_createiovecw(int, struct mbuf *, char *, struct iovec **,
     int *);
 static void nfsrv_pnfscreate(struct vnode *, struct vattr *, struct ucred *,
     NFSPROC_T *);
 static void nfsrv_pnfsremovesetup(struct vnode *, NFSPROC_T *, struct vnode **,
     int *, char *, fhandle_t *);
 static void nfsrv_pnfsremove(struct vnode **, int, char *, fhandle_t *,
     NFSPROC_T *);
 static int nfsrv_proxyds(struct vnode *, off_t, int, struct ucred *,
     struct thread *, int, struct mbuf **, char *, struct mbuf **,
     struct nfsvattr *, struct acl *, off_t *, int, bool *);
 static int nfsrv_setextattr(struct vnode *, struct nfsvattr *, NFSPROC_T *);
 static int nfsrv_readdsrpc(fhandle_t *, off_t, int, struct ucred *,
     NFSPROC_T *, struct nfsmount *, struct mbuf **, struct mbuf **);
 static int nfsrv_writedsrpc(fhandle_t *, off_t, int, struct ucred *,
     NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct mbuf **,
     char *, int *);
 static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
     NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
 static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *,
     NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *);
 static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
     struct vnode *, struct nfsmount **, int, struct acl *, int *);
 static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
     struct vnode *, struct nfsmount **, int, struct nfsvattr *, int *);
 static int nfsrv_getattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *,
     struct vnode *, struct nfsmount *, struct nfsvattr *);
 static int nfsrv_seekdsrpc(fhandle_t *, off_t *, int, bool *, struct ucred *,
     NFSPROC_T *, struct nfsmount *);
 static int nfsrv_putfhname(fhandle_t *, char *);
 static int nfsrv_pnfslookupds(struct vnode *, struct vnode *,
     struct pnfsdsfile *, struct vnode **, NFSPROC_T *);
 static void nfsrv_pnfssetfh(struct vnode *, struct pnfsdsfile *, char *, char *,
     struct vnode *, NFSPROC_T *);
 static int nfsrv_dsremove(struct vnode *, char *, struct ucred *, NFSPROC_T *);
 static int nfsrv_dssetacl(struct vnode *, struct acl *, struct ucred *,
     NFSPROC_T *);
 static int nfsrv_pnfsstatfs(struct statfs *, struct mount *);
 
 int nfs_pnfsio(task_fn_t *, void *);
 
 SYSCTL_NODE(_vfs, OID_AUTO, nfsd, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "NFS server");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, mirrormnt, CTLFLAG_RW,
     &nfsrv_enable_crossmntpt, 0, "Enable nfsd to cross mount points");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks,
     0, "");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss,
     0, "");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW,
     &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_locallocks, CTLFLAG_RW,
     &nfsrv_dolocallocks, 0, "Enable nfsd to acquire local locks on files");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel,
     0, "Debug level for NFS server");
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, CTLFLAG_RW,
     &nfsd_enable_stringtouid, 0, "Enable nfsd to accept numeric owner_names");
 static int nfsrv_pnfsgetdsattr = 1;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW,
     &nfsrv_pnfsgetdsattr, 0, "When set getattr gets DS attributes via RPC");
 
 /*
  * nfsrv_dsdirsize can only be increased and only when the nfsd threads are
  * not running.
  * The dsN subdirectories for the increased values must have been created
  * on all DS servers before this increase is done.
  */
 u_int	nfsrv_dsdirsize = 20;
 static int
 sysctl_dsdirsize(SYSCTL_HANDLER_ARGS)
 {
 	int error, newdsdirsize;
 
 	newdsdirsize = nfsrv_dsdirsize;
 	error = sysctl_handle_int(oidp, &newdsdirsize, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (newdsdirsize <= nfsrv_dsdirsize || newdsdirsize > 10000 ||
 	    newnfs_numnfsd != 0)
 		return (EINVAL);
 	nfsrv_dsdirsize = newdsdirsize;
 	return (0);
 }
 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, dsdirsize,
     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(nfsrv_dsdirsize),
     sysctl_dsdirsize, "IU", "Number of dsN subdirs on the DS servers");
 
 /*
  * nfs_srvmaxio can only be increased and only when the nfsd threads are
  * not running.  The setting must be a power of 2, with the current limit of
  * 1Mbyte.
  */
 static int
 sysctl_srvmaxio(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	u_int newsrvmaxio;
 	uint64_t tval;
 
 	newsrvmaxio = nfs_srvmaxio;
 	error = sysctl_handle_int(oidp, &newsrvmaxio, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	if (newsrvmaxio == nfs_srvmaxio)
 		return (0);
 	if (newsrvmaxio < nfs_srvmaxio) {
 		printf("nfsd: vfs.nfsd.srvmaxio can only be increased\n");
 		return (EINVAL);
 	}
 	if (newsrvmaxio > 1048576) {
 		printf("nfsd: vfs.nfsd.srvmaxio cannot be > 1Mbyte\n");
 		return (EINVAL);
 	}
 	if ((newsrvmaxio & (newsrvmaxio - 1)) != 0) {
 		printf("nfsd: vfs.nfsd.srvmaxio must be a power of 2\n");
 		return (EINVAL);
 	}
 
 	/*
 	 * Check that kern.ipc.maxsockbuf is large enough for
 	 * newsrviomax, given the setting of vfs.nfs.bufpackets.
 	 */
 	if ((newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets >
 	    sb_max_adj) {
 		/*
 		 * Suggest vfs.nfs.bufpackets * maximum RPC message for
 		 * sb_max_adj.
 		 */
 		tval = (newsrvmaxio + NFS_MAXXDR) * nfs_bufpackets;
 
 		/*
 		 * Convert suggested sb_max_adj value to a suggested
 		 * sb_max value, which is what is set via kern.ipc.maxsockbuf.
 		 * Perform the inverse calculation of (from uipc_sockbuf.c):
 		 * sb_max_adj = (u_quad_t)sb_max * MCLBYTES /
 		 *     (MSIZE + MCLBYTES);
 		 * XXX If the calculation of sb_max_adj from sb_max changes,
 		 *     this calculation must be changed as well.
 		 */
 		tval *= (MSIZE + MCLBYTES);  /* Brackets for readability. */
 		tval += MCLBYTES - 1;        /* Round up divide. */
 		tval /= MCLBYTES;
 		printf("nfsd: set kern.ipc.maxsockbuf to a minimum of "
 		    "%ju to support %ubyte NFS I/O\n", (uintmax_t)tval,
 		    newsrvmaxio);
 		return (EINVAL);
 	}
 
 	NFSD_LOCK();
 	if (newnfs_numnfsd != 0) {
 		NFSD_UNLOCK();
 		printf("nfsd: cannot set vfs.nfsd.srvmaxio when nfsd "
 		    "threads are running\n");
 		return (EINVAL);
 	}
 
 
 	nfs_srvmaxio = newsrvmaxio;
 	NFSD_UNLOCK();
 	return (0);
 }
 SYSCTL_PROC(_vfs_nfsd, OID_AUTO, srvmaxio,
     CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0,
     sysctl_srvmaxio, "IU", "Maximum I/O size in bytes");
 
 #define	MAX_REORDERED_RPC	16
 #define	NUM_HEURISTIC		1031
 #define	NHUSE_INIT		64
 #define	NHUSE_INC		16
 #define	NHUSE_MAX		2048
 
 static struct nfsheur {
 	struct vnode *nh_vp;	/* vp to match (unreferenced pointer) */
 	off_t nh_nextoff;	/* next offset for sequential detection */
 	int nh_use;		/* use count for selection */
 	int nh_seqcount;	/* heuristic */
 } nfsheur[NUM_HEURISTIC];
 
 /*
  * Heuristic to detect sequential operation.
  */
 static struct nfsheur *
 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp)
 {
 	struct nfsheur *nh;
 	int hi, try;
 
 	/* Locate best candidate. */
 	try = 32;
 	hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
 	nh = &nfsheur[hi];
 	while (try--) {
 		if (nfsheur[hi].nh_vp == vp) {
 			nh = &nfsheur[hi];
 			break;
 		}
 		if (nfsheur[hi].nh_use > 0)
 			--nfsheur[hi].nh_use;
 		hi = (hi + 1) % NUM_HEURISTIC;
 		if (nfsheur[hi].nh_use < nh->nh_use)
 			nh = &nfsheur[hi];
 	}
 
 	/* Initialize hint if this is a new file. */
 	if (nh->nh_vp != vp) {
 		nh->nh_vp = vp;
 		nh->nh_nextoff = uio->uio_offset;
 		nh->nh_use = NHUSE_INIT;
 		if (uio->uio_offset == 0)
 			nh->nh_seqcount = 4;
 		else
 			nh->nh_seqcount = 1;
 	}
 
 	/* Calculate heuristic. */
 	if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
 	    uio->uio_offset == nh->nh_nextoff) {
 		/* See comments in vfs_vnops.c:sequential_heuristic(). */
 		nh->nh_seqcount += howmany(uio->uio_resid, 16384);
 		if (nh->nh_seqcount > IO_SEQMAX)
 			nh->nh_seqcount = IO_SEQMAX;
 	} else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
 	    imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
 		/* Probably a reordered RPC, leave seqcount alone. */
 	} else if (nh->nh_seqcount > 1) {
 		nh->nh_seqcount /= 2;
 	} else {
 		nh->nh_seqcount = 0;
 	}
 	nh->nh_use += NHUSE_INC;
 	if (nh->nh_use > NHUSE_MAX)
 		nh->nh_use = NHUSE_MAX;
 	return (nh);
 }
 
 /*
  * Get attributes into nfsvattr structure.
  */
 int
 nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap,
     struct nfsrv_descript *nd, struct thread *p, int vpislocked,
     nfsattrbit_t *attrbitp)
 {
 	int error, gotattr, lockedit = 0;
 	struct nfsvattr na;
 
 	if (vpislocked == 0) {
 		/*
 		 * When vpislocked == 0, the vnode is either exclusively
 		 * locked by this thread or not locked by this thread.
 		 * As such, shared lock it, if not exclusively locked.
 		 */
 		if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
 			lockedit = 1;
 			NFSVOPLOCK(vp, LK_SHARED | LK_RETRY);
 		}
 	}
 
 	/*
 	 * Acquire the Change, Size, TimeAccess, TimeModify and SpaceUsed
 	 * attributes, as required.
 	 * This needs to be done for regular files if:
 	 * - non-NFSv4 RPCs or
 	 * - when attrbitp == NULL or
 	 * - an NFSv4 RPC with any of the above attributes in attrbitp.
 	 * A return of 0 for nfsrv_proxyds() indicates that it has acquired
 	 * these attributes.  nfsrv_proxyds() will return an error if the
 	 * server is not a pNFS one.
 	 */
 	gotattr = 0;
 	if (vp->v_type == VREG && nfsrv_devidcnt > 0 && (attrbitp == NULL ||
 	    (nd->nd_flag & ND_NFSV4) == 0 ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_CHANGE) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEACCESS) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_TIMEMODIFY) ||
 	    NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEUSED))) {
 		error = nfsrv_proxyds(vp, 0, 0, nd->nd_cred, p,
 		    NFSPROC_GETATTR, NULL, NULL, NULL, &na, NULL, NULL, 0,
 		    NULL);
 		if (error == 0)
 			gotattr = 1;
 	}
 
 	error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred);
 	if (lockedit != 0)
 		NFSVOPUNLOCK(vp);
 
 	/*
 	 * If we got the Change, Size and Modify Time from the DS,
 	 * replace them.
 	 */
 	if (gotattr != 0) {
 		nvap->na_atime = na.na_atime;
 		nvap->na_mtime = na.na_mtime;
 		nvap->na_filerev = na.na_filerev;
 		nvap->na_size = na.na_size;
 		nvap->na_bytes = na.na_bytes;
 	}
 	NFSD_DEBUG(4, "nfsvno_getattr: gotattr=%d err=%d chg=%ju\n", gotattr,
 	    error, (uintmax_t)na.na_filerev);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get a file handle for a vnode.
  */
 int
 nfsvno_getfh(struct vnode *vp, fhandle_t *fhp, struct thread *p)
 {
 	int error;
 
 	NFSBZERO((caddr_t)fhp, sizeof(fhandle_t));
 	fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VOP_VPTOFH(vp, &fhp->fh_fid);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Perform access checking for vnodes obtained from file handles that would
  * refer to files already opened by a Unix client. You cannot just use
  * vn_writechk() and VOP_ACCESSX() for two reasons.
  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write
  *     case.
  * 2 - The owner is to be given access irrespective of mode bits for some
  *     operations, so that processes that chmod after opening a file don't
  *     break.
  */
 int
 nfsvno_accchk(struct vnode *vp, accmode_t accmode, struct ucred *cred,
     struct nfsexstuff *exp, struct thread *p, int override, int vpislocked,
     u_int32_t *supportedtypep)
 {
 	struct vattr vattr;
 	int error = 0, getret = 0;
 
 	if (vpislocked == 0) {
 		if (NFSVOPLOCK(vp, LK_SHARED) != 0) {
 			error = EPERM;
 			goto out;
 		}
 	}
 	if (accmode & VWRITE) {
 		/* Just vn_writechk() changed to check rdonly */
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket or a block or character
 		 * device resident on the file system.
 		 */
 		if (NFSVNO_EXRDONLY(exp) ||
 		    (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 			switch (vp->v_type) {
 			case VREG:
 			case VDIR:
 			case VLNK:
 				error = EROFS;
 			default:
 				break;
 			}
 		}
 		/*
 		 * If there's shared text associated with
 		 * the inode, try to free it up once.  If
 		 * we fail, we can't allow writing.
 		 */
 		if (VOP_IS_TEXT(vp) && error == 0)
 			error = ETXTBSY;
 	}
 	if (error != 0) {
 		if (vpislocked == 0)
 			NFSVOPUNLOCK(vp);
 		goto out;
 	}
 
 	/*
 	 * Should the override still be applied when ACLs are enabled?
 	 */
 	error = VOP_ACCESSX(vp, accmode, cred, p);
 	if (error != 0 && (accmode & (VDELETE | VDELETE_CHILD))) {
 		/*
 		 * Try again with VEXPLICIT_DENY, to see if the test for
 		 * deletion is supported.
 		 */
 		error = VOP_ACCESSX(vp, accmode | VEXPLICIT_DENY, cred, p);
 		if (error == 0) {
 			if (vp->v_type == VDIR) {
 				accmode &= ~(VDELETE | VDELETE_CHILD);
 				accmode |= VWRITE;
 				error = VOP_ACCESSX(vp, accmode, cred, p);
 			} else if (supportedtypep != NULL) {
 				*supportedtypep &= ~NFSACCESS_DELETE;
 			}
 		}
 	}
 
 	/*
 	 * Allow certain operations for the owner (reads and writes
 	 * on files that are already open).
 	 */
 	if (override != NFSACCCHK_NOOVERRIDE &&
 	    (error == EPERM || error == EACCES)) {
 		if (cred->cr_uid == 0 && (override & NFSACCCHK_ALLOWROOT))
 			error = 0;
 		else if (override & NFSACCCHK_ALLOWOWNER) {
 			getret = VOP_GETATTR(vp, &vattr, cred);
 			if (getret == 0 && cred->cr_uid == vattr.va_uid)
 				error = 0;
 		}
 	}
 	if (vpislocked == 0)
 		NFSVOPUNLOCK(vp);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set attribute(s) vnop.
  */
 int
 nfsvno_setattr(struct vnode *vp, struct nfsvattr *nvap, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	u_quad_t savsize = 0;
 	int error, savedit;
 	time_t savbtime;
 
 	/*
 	 * If this is an exported file system and a pNFS service is running,
 	 * don't VOP_SETATTR() of size for the MDS file system.
 	 */
 	savedit = 0;
 	error = 0;
 	if (vp->v_type == VREG && (vp->v_mount->mnt_flag & MNT_EXPORTED) != 0 &&
 	    nfsrv_devidcnt != 0 && nvap->na_vattr.va_size != VNOVAL &&
 	    nvap->na_vattr.va_size > 0) {
 		savsize = nvap->na_vattr.va_size;
 		nvap->na_vattr.va_size = VNOVAL;
 		if (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
 		    nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
 		    nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
 		    nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
 		    nvap->na_vattr.va_mtime.tv_sec != VNOVAL)
 			savedit = 1;
 		else
 			savedit = 2;
 	}
 	if (savedit != 2)
 		error = VOP_SETATTR(vp, &nvap->na_vattr, cred);
 	if (savedit != 0)
 		nvap->na_vattr.va_size = savsize;
 	if (error == 0 && (nvap->na_vattr.va_uid != (uid_t)VNOVAL ||
 	    nvap->na_vattr.va_gid != (gid_t)VNOVAL ||
 	    nvap->na_vattr.va_size != VNOVAL ||
 	    nvap->na_vattr.va_mode != (mode_t)VNOVAL ||
 	    nvap->na_vattr.va_atime.tv_sec != VNOVAL ||
 	    nvap->na_vattr.va_mtime.tv_sec != VNOVAL)) {
 		/* Never modify birthtime on a DS file. */
 		savbtime = nvap->na_vattr.va_birthtime.tv_sec;
 		nvap->na_vattr.va_birthtime.tv_sec = VNOVAL;
 		/* For a pNFS server, set the attributes on the DS file. */
 		error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETATTR,
 		    NULL, NULL, NULL, nvap, NULL, NULL, 0, NULL);
 		nvap->na_vattr.va_birthtime.tv_sec = savbtime;
 		if (error == ENOENT)
 			error = 0;
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set up nameidata for a lookup() call and do it.
  */
 int
 nfsvno_namei(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct vnode *dp, int islocked, struct nfsexstuff *exp,
     struct vnode **retdirp)
 {
 	struct componentname *cnp = &ndp->ni_cnd;
 	int i;
 	struct iovec aiov;
 	struct uio auio;
 	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0, linklen;
 	int error = 0;
 	char *cp;
 
 	*retdirp = NULL;
 	cnp->cn_nameptr = cnp->cn_pnbuf;
 	ndp->ni_lcf = 0;
 	/*
 	 * Extract and set starting directory.
 	 */
 	if (dp->v_type != VDIR) {
 		if (islocked)
 			vput(dp);
 		else
 			vrele(dp);
 		nfsvno_relpathbuf(ndp);
 		error = ENOTDIR;
 		goto out1;
 	}
 	if (islocked)
 		NFSVOPUNLOCK(dp);
 	VREF(dp);
 	*retdirp = dp;
 	if (NFSVNO_EXRDONLY(exp))
 		cnp->cn_flags |= RDONLY;
 	ndp->ni_segflg = UIO_SYSSPACE;
 
 	if (nd->nd_flag & ND_PUBLOOKUP) {
 		ndp->ni_loopcnt = 0;
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(dp);
 			/*
 			 * Check for degenerate pathnames here, since lookup()
 			 * panics on them.
 			 */
 			for (i = 1; i < ndp->ni_pathlen; i++)
 				if (cnp->cn_pnbuf[i] != '/')
 					break;
 			if (i == ndp->ni_pathlen) {
 				error = NFSERR_ACCES;
 				goto out;
 			}
 			dp = rootvnode;
 			VREF(dp);
 		}
 	} else if ((nfsrv_enable_crossmntpt == 0 && NFSVNO_EXPORTED(exp)) ||
 	    (nd->nd_flag & ND_NFSV4) == 0) {
 		/*
 		 * Only cross mount points for NFSv4 when doing a
 		 * mount while traversing the file system above
 		 * the mount point, unless nfsrv_enable_crossmntpt is set.
 		 */
 		cnp->cn_flags |= NOCROSSMOUNT;
 	}
 
 	/*
 	 * Initialize for scan, set ni_startdir and bump ref on dp again
 	 * because lookup() will dereference ni_startdir.
 	 */
 
 	ndp->ni_startdir = dp;
 	ndp->ni_rootdir = rootvnode;
 	ndp->ni_topdir = NULL;
 
 	if (!lockleaf)
 		cnp->cn_flags |= LOCKLEAF;
 	for (;;) {
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		/*
 		 * Call lookup() to do the real work.  If an error occurs,
 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
 		 * we do not have to dereference anything before returning.
 		 * In either case ni_startdir will be dereferenced and NULLed
 		 * out.
 		 */
 		error = vfs_lookup(ndp);
 		if (error)
 			break;
 
 		/*
 		 * Check for encountering a symbolic link.  Trivial
 		 * termination occurs if no symlink encountered.
 		 */
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
-			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0)
-				nfsvno_relpathbuf(ndp);
 			if (ndp->ni_vp && !lockleaf)
 				NFSVOPUNLOCK(ndp->ni_vp);
 			break;
 		}
 
 		/*
 		 * Validate symlink
 		 */
 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
 			NFSVOPUNLOCK(ndp->ni_dvp);
 		if (!(nd->nd_flag & ND_PUBLOOKUP)) {
 			error = EINVAL;
 			goto badlink2;
 		}
 
 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
 			error = ELOOP;
 			goto badlink2;
 		}
 		if (ndp->ni_pathlen > 1)
 			cp = uma_zalloc(namei_zone, M_WAITOK);
 		else
 			cp = cnp->cn_pnbuf;
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_td = NULL;
 		auio.uio_resid = MAXPATHLEN;
 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
 		if (error) {
 		badlink1:
 			if (ndp->ni_pathlen > 1)
 				uma_zfree(namei_zone, cp);
 		badlink2:
 			vrele(ndp->ni_dvp);
 			vput(ndp->ni_vp);
 			break;
 		}
 		linklen = MAXPATHLEN - auio.uio_resid;
 		if (linklen == 0) {
 			error = ENOENT;
 			goto badlink1;
 		}
 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
 			error = ENAMETOOLONG;
 			goto badlink1;
 		}
 
 		/*
 		 * Adjust or replace path
 		 */
 		if (ndp->ni_pathlen > 1) {
 			NFSBCOPY(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
 			uma_zfree(namei_zone, cnp->cn_pnbuf);
 			cnp->cn_pnbuf = cp;
 		} else
 			cnp->cn_pnbuf[linklen] = '\0';
 		ndp->ni_pathlen += linklen;
 
 		/*
 		 * Cleanup refs for next loop and check if root directory
 		 * should replace current directory.  Normally ni_dvp
 		 * becomes the new base directory and is cleaned up when
 		 * we loop.  Explicitly null pointers after invalidation
 		 * to clarify operation.
 		 */
 		vput(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(ndp->ni_dvp);
 			ndp->ni_dvp = ndp->ni_rootdir;
 			VREF(ndp->ni_dvp);
 		}
 		ndp->ni_startdir = ndp->ni_dvp;
 		ndp->ni_dvp = NULL;
 	}
 	if (!lockleaf)
 		cnp->cn_flags &= ~LOCKLEAF;
 
 out:
 	if (error) {
 		nfsvno_relpathbuf(ndp);
 		ndp->ni_vp = NULL;
 		ndp->ni_dvp = NULL;
 		ndp->ni_startdir = NULL;
 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
 		ndp->ni_dvp = NULL;
 	}
 
 out1:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Set up a pathname buffer and return a pointer to it and, optionally
  * set a hash pointer.
  */
 void
 nfsvno_setpathbuf(struct nameidata *ndp, char **bufpp, u_long **hashpp)
 {
 	struct componentname *cnp = &ndp->ni_cnd;
 
-	cnp->cn_flags |= (NOMACCHECK | HASBUF);
+	cnp->cn_flags |= (NOMACCHECK);
 	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
 	if (hashpp != NULL)
 		*hashpp = NULL;
 	*bufpp = cnp->cn_pnbuf;
 }
 
 /*
  * Release the above path buffer, if not released by nfsvno_namei().
  */
 void
 nfsvno_relpathbuf(struct nameidata *ndp)
 {
 
-	if ((ndp->ni_cnd.cn_flags & HASBUF) == 0)
-		panic("nfsrelpath");
 	uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
-	ndp->ni_cnd.cn_flags &= ~HASBUF;
+	ndp->ni_cnd.cn_pnbuf = NULL;
 }
 
 /*
  * Readlink vnode op into an mbuf list.
  */
 int
 nfsvno_readlink(struct vnode *vp, struct ucred *cred, int maxextsiz,
     struct thread *p, struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
 {
 	struct iovec *iv;
 	struct uio io, *uiop = &io;
 	struct mbuf *mp, *mp3;
 	int len, tlen, error = 0;
 
 	len = NFS_MAXPATHLEN;
 	if (maxextsiz > 0)
 		uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
 		    &mp3, &mp, &iv);
 	else
 		uiop->uio_iovcnt = nfsrv_createiovec(len, &mp3, &mp, &iv);
 	uiop->uio_iov = iv;
 	uiop->uio_offset = 0;
 	uiop->uio_resid = len;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = NULL;
 	error = VOP_READLINK(vp, uiop, cred);
 	free(iv, M_TEMP);
 	if (error) {
 		m_freem(mp3);
 		*lenp = 0;
 		goto out;
 	}
 	if (uiop->uio_resid > 0) {
 		len -= uiop->uio_resid;
 		tlen = NFSM_RNDUP(len);
 		if (tlen == 0) {
 			m_freem(mp3);
 			mp3 = mp = NULL;
 		} else if (tlen != NFS_MAXPATHLEN || tlen != len)
 			mp = nfsrv_adj(mp3, NFS_MAXPATHLEN - tlen,
 			    tlen - len);
 	}
 	*lenp = len;
 	*mpp = mp3;
 	*mpendp = mp;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Create an mbuf chain and an associated iovec that can be used to Read
  * or Getextattr of data.
  * Upon success, return pointers to the first and last mbufs in the chain
  * plus the malloc'd iovec and its iovlen.
  */
 static int
 nfsrv_createiovec(int len, struct mbuf **mpp, struct mbuf **mpendp,
     struct iovec **ivp)
 {
 	struct mbuf *m, *m2 = NULL, *m3;
 	struct iovec *iv;
 	int i, left, siz;
 
 	left = len;
 	m3 = NULL;
 	/*
 	 * Generate the mbuf list with the uio_iov ref. to it.
 	 */
 	i = 0;
 	while (left > 0) {
 		NFSMGET(m);
 		MCLGET(m, M_WAITOK);
 		m->m_len = 0;
 		siz = min(M_TRAILINGSPACE(m), left);
 		left -= siz;
 		i++;
 		if (m3)
 			m2->m_next = m;
 		else
 			m3 = m;
 		m2 = m;
 	}
 	*ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
 	m = m3;
 	left = len;
 	i = 0;
 	while (left > 0) {
 		if (m == NULL)
 			panic("nfsrv_createiovec iov");
 		siz = min(M_TRAILINGSPACE(m), left);
 		if (siz > 0) {
 			iv->iov_base = mtod(m, caddr_t) + m->m_len;
 			iv->iov_len = siz;
 			m->m_len += siz;
 			left -= siz;
 			iv++;
 			i++;
 		}
 		m = m->m_next;
 	}
 	*mpp = m3;
 	*mpendp = m2;
 	return (i);
 }
 
 /*
  * Create an mbuf chain and an associated iovec that can be used to Read
  * or Getextattr of data.
  * Upon success, return pointers to the first and last mbufs in the chain
  * plus the malloc'd iovec and its iovlen.
  * Same as above, but creates ext_pgs mbuf(s).
  */
 static int
 nfsrv_createiovec_extpgs(int len, int maxextsiz, struct mbuf **mpp,
     struct mbuf **mpendp, struct iovec **ivp)
 {
 	struct mbuf *m, *m2 = NULL, *m3;
 	struct iovec *iv;
 	int i, left, pgno, siz;
 
 	left = len;
 	m3 = NULL;
 	/*
 	 * Generate the mbuf list with the uio_iov ref. to it.
 	 */
 	i = 0;
 	while (left > 0) {
 		siz = min(left, maxextsiz);
 		m = mb_alloc_ext_plus_pages(siz, M_WAITOK);
 		left -= siz;
 		i += m->m_epg_npgs;
 		if (m3 != NULL)
 			m2->m_next = m;
 		else
 			m3 = m;
 		m2 = m;
 	}
 	*ivp = iv = malloc(i * sizeof (struct iovec), M_TEMP, M_WAITOK);
 	m = m3;
 	left = len;
 	i = 0;
 	pgno = 0;
 	while (left > 0) {
 		if (m == NULL)
 			panic("nfsvno_createiovec_extpgs iov");
 		siz = min(PAGE_SIZE, left);
 		if (siz > 0) {
 			iv->iov_base = (void *)PHYS_TO_DMAP(m->m_epg_pa[pgno]);
 			iv->iov_len = siz;
 			m->m_len += siz;
 			if (pgno == m->m_epg_npgs - 1)
 				m->m_epg_last_len = siz;
 			left -= siz;
 			iv++;
 			i++;
 			pgno++;
 		}
 		if (pgno == m->m_epg_npgs && left > 0) {
 			m = m->m_next;
 			if (m == NULL)
 				panic("nfsvno_createiovec_extpgs iov");
 			pgno = 0;
 		}
 	}
 	*mpp = m3;
 	*mpendp = m2;
 	return (i);
 }
 
 /*
  * Read vnode op call into mbuf list.
  */
 int
 nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
     int maxextsiz, struct thread *p, struct mbuf **mpp,
     struct mbuf **mpendp)
 {
 	struct mbuf *m;
 	struct iovec *iv;
 	int error = 0, len, tlen, ioflag = 0;
 	struct mbuf *m3;
 	struct uio io, *uiop = &io;
 	struct nfsheur *nh;
 
 	/*
 	 * Attempt to read from a DS file. A return of ENOENT implies
 	 * there is no DS file to read.
 	 */
 	error = nfsrv_proxyds(vp, off, cnt, cred, p, NFSPROC_READDS, mpp,
 	    NULL, mpendp, NULL, NULL, NULL, 0, NULL);
 	if (error != ENOENT)
 		return (error);
 
 	len = NFSM_RNDUP(cnt);
 	if (maxextsiz > 0)
 		uiop->uio_iovcnt = nfsrv_createiovec_extpgs(len, maxextsiz,
 		    &m3, &m, &iv);
 	else
 		uiop->uio_iovcnt = nfsrv_createiovec(len, &m3, &m, &iv);
 	uiop->uio_iov = iv;
 	uiop->uio_offset = off;
 	uiop->uio_resid = len;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = NULL;
 	nh = nfsrv_sequential_heuristic(uiop, vp);
 	ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
 	/* XXX KDM make this more systematic? */
 	nfsstatsv1.srvbytes[NFSV4OP_READ] += uiop->uio_resid;
 	error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
 	free(iv, M_TEMP);
 	if (error) {
 		m_freem(m3);
 		*mpp = NULL;
 		goto out;
 	}
 	nh->nh_nextoff = uiop->uio_offset;
 	tlen = len - uiop->uio_resid;
 	cnt = cnt < tlen ? cnt : tlen;
 	tlen = NFSM_RNDUP(cnt);
 	if (tlen == 0) {
 		m_freem(m3);
 		m3 = m = NULL;
 	} else if (len != tlen || tlen != cnt)
 		m = nfsrv_adj(m3, len - tlen, tlen - cnt);
 	*mpp = m3;
 	*mpendp = m;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Create the iovec for the mbuf chain passed in as an argument.
  * The "cp" argument is where the data starts within the first mbuf in
  * the chain. It returns the iovec and the iovcnt.
  */
 static int
 nfsrv_createiovecw(int retlen, struct mbuf *m, char *cp, struct iovec **ivpp,
     int *iovcntp)
 {
 	struct mbuf *mp;
 	struct iovec *ivp;
 	int cnt, i, len;
 
 	/*
 	 * Loop through the mbuf chain, counting how many mbufs are a
 	 * part of this write operation, so the iovec size is known.
 	 */
 	cnt = 0;
 	len = retlen;
 	mp = m;
 	i = mtod(mp, caddr_t) + mp->m_len - cp;
 	while (len > 0) {
 		if (i > 0) {
 			len -= i;
 			cnt++;
 		}
 		mp = mp->m_next;
 		if (!mp) {
 			if (len > 0)
 				return (EBADRPC);
 		} else
 			i = mp->m_len;
 	}
 
 	/* Now, create the iovec. */
 	mp = m;
 	*ivpp = ivp = malloc(cnt * sizeof (struct iovec), M_TEMP,
 	    M_WAITOK);
 	*iovcntp = cnt;
 	i = mtod(mp, caddr_t) + mp->m_len - cp;
 	len = retlen;
 	while (len > 0) {
 		if (mp == NULL)
 			panic("nfsrv_createiovecw");
 		if (i > 0) {
 			i = min(i, len);
 			ivp->iov_base = cp;
 			ivp->iov_len = i;
 			ivp++;
 			len -= i;
 		}
 		mp = mp->m_next;
 		if (mp) {
 			i = mp->m_len;
 			cp = mtod(mp, caddr_t);
 		}
 	}
 	return (0);
 }
 
 /*
  * Write vnode op from an mbuf list.
  */
 int
 nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable,
     struct mbuf *mp, char *cp, struct ucred *cred, struct thread *p)
 {
 	struct iovec *iv;
 	int cnt, ioflags, error;
 	struct uio io, *uiop = &io;
 	struct nfsheur *nh;
 
 	/*
 	 * Attempt to write to a DS file. A return of ENOENT implies
 	 * there is no DS file to write.
 	 */
 	error = nfsrv_proxyds(vp, off, retlen, cred, p, NFSPROC_WRITEDS,
 	    &mp, cp, NULL, NULL, NULL, NULL, 0, NULL);
 	if (error != ENOENT) {
 		*stable = NFSWRITE_FILESYNC;
 		return (error);
 	}
 
 	if (*stable == NFSWRITE_UNSTABLE)
 		ioflags = IO_NODELOCKED;
 	else
 		ioflags = (IO_SYNC | IO_NODELOCKED);
 	error = nfsrv_createiovecw(retlen, mp, cp, &iv, &cnt);
 	if (error != 0)
 		return (error);
 	uiop->uio_iov = iv;
 	uiop->uio_iovcnt = cnt;
 	uiop->uio_resid = retlen;
 	uiop->uio_rw = UIO_WRITE;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	NFSUIOPROC(uiop, p);
 	uiop->uio_offset = off;
 	nh = nfsrv_sequential_heuristic(uiop, vp);
 	ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
 	/* XXX KDM make this more systematic? */
 	nfsstatsv1.srvbytes[NFSV4OP_WRITE] += uiop->uio_resid;
 	error = VOP_WRITE(vp, uiop, ioflags, cred);
 	if (error == 0)
 		nh->nh_nextoff = uiop->uio_offset;
 	free(iv, M_TEMP);
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Common code for creating a regular file (plus special files for V2).
  */
 int
 nfsvno_createsub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct vnode **vpp, struct nfsvattr *nvap, int *exclusive_flagp,
     int32_t *cverf, NFSDEV_T rdev, struct nfsexstuff *exp)
 {
 	u_quad_t tempsize;
 	int error;
 	struct thread *p = curthread;
 
 	error = nd->nd_repstat;
 	if (!error && ndp->ni_vp == NULL) {
 		if (nvap->na_type == VREG || nvap->na_type == VSOCK) {
 			vrele(ndp->ni_startdir);
 			error = VOP_CREATE(ndp->ni_dvp,
 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
 			/* For a pNFS server, create the data file on a DS. */
 			if (error == 0 && nvap->na_type == VREG) {
 				/*
 				 * Create a data file on a DS for a pNFS server.
 				 * This function just returns if not
 				 * running a pNFS DS or the creation fails.
 				 */
 				nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
 				    nd->nd_cred, p);
 			}
 			VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
 			    NULL, false);
 			nfsvno_relpathbuf(ndp);
 			if (!error) {
 				if (*exclusive_flagp) {
 					*exclusive_flagp = 0;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_atime.tv_sec = cverf[0];
 					nvap->na_atime.tv_nsec = cverf[1];
 					error = VOP_SETATTR(ndp->ni_vp,
 					    &nvap->na_vattr, nd->nd_cred);
 					if (error != 0) {
 						vput(ndp->ni_vp);
 						ndp->ni_vp = NULL;
 						error = NFSERR_NOTSUPP;
 					}
 				}
 			}
 		/*
 		 * NFS V2 Only. nfsrvd_mknod() does this for V3.
 		 * (This implies, just get out on an error.)
 		 */
 		} else if (nvap->na_type == VCHR || nvap->na_type == VBLK ||
 			nvap->na_type == VFIFO) {
 			if (nvap->na_type == VCHR && rdev == 0xffffffff)
 				nvap->na_type = VFIFO;
                         if (nvap->na_type != VFIFO &&
 			    (error = priv_check_cred(nd->nd_cred, PRIV_VFS_MKNOD_DEV))) {
 				vrele(ndp->ni_startdir);
 				nfsvno_relpathbuf(ndp);
 				vput(ndp->ni_dvp);
 				goto out;
 			}
 			nvap->na_rdev = rdev;
 			error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
 			    &ndp->ni_cnd, &nvap->na_vattr);
 			VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp :
 			    NULL, false);
 			nfsvno_relpathbuf(ndp);
 			vrele(ndp->ni_startdir);
 			if (error)
 				goto out;
 		} else {
 			vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vput(ndp->ni_dvp);
 			error = ENXIO;
 			goto out;
 		}
 		*vpp = ndp->ni_vp;
 	} else {
 		/*
 		 * Handle cases where error is already set and/or
 		 * the file exists.
 		 * 1 - clean up the lookup
 		 * 2 - iff !error and na_size set, truncate it
 		 */
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		*vpp = ndp->ni_vp;
 		if (ndp->ni_dvp == *vpp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		if (!error && nvap->na_size != VNOVAL) {
 			error = nfsvno_accchk(*vpp, VWRITE,
 			    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 			    NFSACCCHK_VPISLOCKED, NULL);
 			if (!error) {
 				tempsize = nvap->na_size;
 				NFSVNO_ATTRINIT(nvap);
 				nvap->na_size = tempsize;
 				error = nfsvno_setattr(*vpp, nvap,
 				    nd->nd_cred, p, exp);
 			}
 		}
 		if (error)
 			vput(*vpp);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do a mknod vnode op.
  */
 int
 nfsvno_mknod(struct nameidata *ndp, struct nfsvattr *nvap, struct ucred *cred,
     struct thread *p)
 {
 	int error = 0;
 	enum vtype vtyp;
 
 	vtyp = nvap->na_type;
 	/*
 	 * Iff doesn't exist, create it.
 	 */
 	if (ndp->ni_vp) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 	if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		vput(ndp->ni_dvp);
 		error = NFSERR_BADTYPE;
 		goto out;
 	}
 	if (vtyp == VSOCK) {
 		vrele(ndp->ni_startdir);
 		error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
 		    &ndp->ni_cnd, &nvap->na_vattr);
 		VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
 		    false);
 		nfsvno_relpathbuf(ndp);
 	} else {
 		if (nvap->na_type != VFIFO &&
 		    (error = priv_check_cred(cred, PRIV_VFS_MKNOD_DEV))) {
 			vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vput(ndp->ni_dvp);
 			goto out;
 		}
 		error = VOP_MKNOD(ndp->ni_dvp, &ndp->ni_vp,
 		    &ndp->ni_cnd, &nvap->na_vattr);
 		VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL,
 		    false);
 		nfsvno_relpathbuf(ndp);
 		vrele(ndp->ni_startdir);
 		/*
 		 * Since VOP_MKNOD returns the ni_vp, I can't
 		 * see any reason to do the lookup.
 		 */
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Mkdir vnode op.
  */
 int
 nfsvno_mkdir(struct nameidata *ndp, struct nfsvattr *nvap, uid_t saved_uid,
     struct ucred *cred, struct thread *p, struct nfsexstuff *exp)
 {
 	int error = 0;
 
 	if (ndp->ni_vp != NULL) {
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		nfsvno_relpathbuf(ndp);
 		error = EEXIST;
 		goto out;
 	}
 	error = VOP_MKDIR(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
 	    &nvap->na_vattr);
 	VOP_VPUT_PAIR(ndp->ni_dvp, error == 0 ? &ndp->ni_vp : NULL, false);
 	nfsvno_relpathbuf(ndp);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * symlink vnode op.
  */
 int
 nfsvno_symlink(struct nameidata *ndp, struct nfsvattr *nvap, char *pathcp,
     int pathlen, int not_v2, uid_t saved_uid, struct ucred *cred, struct thread *p,
     struct nfsexstuff *exp)
 {
 	int error = 0;
 
 	if (ndp->ni_vp) {
 		vrele(ndp->ni_startdir);
 		nfsvno_relpathbuf(ndp);
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		vrele(ndp->ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 
 	error = VOP_SYMLINK(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd,
 	    &nvap->na_vattr, pathcp);
 	/*
 	 * Although FreeBSD still had the lookup code in
 	 * it for 7/current, there doesn't seem to be any
 	 * point, since VOP_SYMLINK() returns the ni_vp.
 	 * Just vput it for v2.
 	 */
 	VOP_VPUT_PAIR(ndp->ni_dvp, &ndp->ni_vp, !not_v2 && error == 0);
 	vrele(ndp->ni_startdir);
 	nfsvno_relpathbuf(ndp);
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Parse symbolic link arguments.
  * This function has an ugly side effect. It will malloc() an area for
  * the symlink and set iov_base to point to it, only if it succeeds.
  * So, if it returns with uiop->uio_iov->iov_base != NULL, that must
  * be FREE'd later.
  */
 int
 nfsvno_getsymlink(struct nfsrv_descript *nd, struct nfsvattr *nvap,
     struct thread *p, char **pathcpp, int *lenp)
 {
 	u_int32_t *tl;
 	char *pathcp = NULL;
 	int error = 0, len;
 	struct nfsv2_sattr *sp;
 
 	*pathcpp = NULL;
 	*lenp = 0;
 	if ((nd->nd_flag & ND_NFSV3) &&
 	    (error = nfsrv_sattr(nd, NULL, nvap, NULL, NULL, p)))
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	len = fxdr_unsigned(int, *tl);
 	if (len > NFS_MAXPATHLEN || len <= 0) {
 		error = EBADRPC;
 		goto nfsmout;
 	}
 	pathcp = malloc(len + 1, M_TEMP, M_WAITOK);
 	error = nfsrv_mtostr(nd, pathcp, len);
 	if (error)
 		goto nfsmout;
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		nvap->na_mode = fxdr_unsigned(u_int16_t, sp->sa_mode);
 	}
 	*pathcpp = pathcp;
 	*lenp = len;
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (pathcp)
 		free(pathcp, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Remove a non-directory object.
  */
 int
 nfsvno_removesub(struct nameidata *ndp, int is_v4, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *vp, *dsdvp[NFSDEV_MAXMIRRORS];
 	int error = 0, mirrorcnt;
 	char fname[PNFS_FILENAME_LEN + 1];
 	fhandle_t fh;
 
 	vp = ndp->ni_vp;
 	dsdvp[0] = NULL;
 	if (vp->v_type == VDIR)
 		error = NFSERR_ISDIR;
 	else if (is_v4)
 		error = nfsrv_checkremove(vp, 1, NULL, (nfsquad_t)((u_quad_t)0),
 		    p);
 	if (error == 0)
 		nfsrv_pnfsremovesetup(vp, p, dsdvp, &mirrorcnt, fname, &fh);
 	if (!error)
 		error = VOP_REMOVE(ndp->ni_dvp, vp, &ndp->ni_cnd);
 	if (error == 0 && dsdvp[0] != NULL)
 		nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
 	if (ndp->ni_dvp == vp)
 		vrele(ndp->ni_dvp);
 	else
 		vput(ndp->ni_dvp);
 	vput(vp);
-	if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
-		nfsvno_relpathbuf(ndp);
+	nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Remove a directory.
  */
 int
 nfsvno_rmdirsub(struct nameidata *ndp, int is_v4, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *vp;
 	int error = 0;
 
 	vp = ndp->ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (ndp->ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_vflag & VV_ROOT)
 		error = EBUSY;
 out:
 	if (!error)
 		error = VOP_RMDIR(ndp->ni_dvp, vp, &ndp->ni_cnd);
 	if (ndp->ni_dvp == vp)
 		vrele(ndp->ni_dvp);
 	else
 		vput(ndp->ni_dvp);
 	vput(vp);
-	if ((ndp->ni_cnd.cn_flags & SAVENAME) != 0)
-		nfsvno_relpathbuf(ndp);
+	nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Rename vnode op.
  */
 int
 nfsvno_rename(struct nameidata *fromndp, struct nameidata *tondp,
     u_int32_t ndstat, u_int32_t ndflag, struct ucred *cred, struct thread *p)
 {
 	struct vnode *fvp, *tvp, *tdvp, *dsdvp[NFSDEV_MAXMIRRORS];
 	int error = 0, mirrorcnt;
 	char fname[PNFS_FILENAME_LEN + 1];
 	fhandle_t fh;
 
 	dsdvp[0] = NULL;
 	fvp = fromndp->ni_vp;
 	if (ndstat) {
 		vrele(fromndp->ni_dvp);
 		vrele(fvp);
 		error = ndstat;
 		goto out1;
 	}
 	tdvp = tondp->ni_dvp;
 	tvp = tondp->ni_vp;
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = (ndflag & ND_NFSV2) ? EISDIR : EEXIST;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = (ndflag & ND_NFSV2) ? ENOTDIR : EEXIST;
 			goto out;
 		}
 		if (tvp->v_type == VDIR && tvp->v_mountedhere) {
 			error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 			goto out;
 		}
 
 		/*
 		 * A rename to '.' or '..' results in a prematurely
 		 * unlocked vnode on FreeBSD5, so I'm just going to fail that
 		 * here.
 		 */
 		if ((tondp->ni_cnd.cn_namelen == 1 &&
 		     tondp->ni_cnd.cn_nameptr[0] == '.') ||
 		    (tondp->ni_cnd.cn_namelen == 2 &&
 		     tondp->ni_cnd.cn_nameptr[0] == '.' &&
 		     tondp->ni_cnd.cn_nameptr[1] == '.')) {
 			error = EINVAL;
 			goto out;
 		}
 	}
 	if (fvp->v_type == VDIR && fvp->v_mountedhere) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 		goto out;
 	}
 	if (fvp->v_mount != tdvp->v_mount) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EXDEV;
 		goto out;
 	}
 	if (fvp == tdvp) {
 		error = (ndflag & ND_NFSV2) ? ENOTEMPTY : EINVAL;
 		goto out;
 	}
 	if (fvp == tvp) {
 		/*
 		 * If source and destination are the same, there is nothing to
 		 * do. Set error to -1 to indicate this.
 		 */
 		error = -1;
 		goto out;
 	}
 	if (ndflag & ND_NFSV4) {
 		if (NFSVOPLOCK(fvp, LK_EXCLUSIVE) == 0) {
 			error = nfsrv_checkremove(fvp, 0, NULL,
 			    (nfsquad_t)((u_quad_t)0), p);
 			NFSVOPUNLOCK(fvp);
 		} else
 			error = EPERM;
 		if (tvp && !error)
 			error = nfsrv_checkremove(tvp, 1, NULL,
 			    (nfsquad_t)((u_quad_t)0), p);
 	} else {
 		/*
 		 * For NFSv2 and NFSv3, try to get rid of the delegation, so
 		 * that the NFSv4 client won't be confused by the rename.
 		 * Since nfsd_recalldelegation() can only be called on an
 		 * unlocked vnode at this point and fvp is the file that will
 		 * still exist after the rename, just do fvp.
 		 */
 		nfsd_recalldelegation(fvp, p);
 	}
 	if (error == 0 && tvp != NULL) {
 		nfsrv_pnfsremovesetup(tvp, p, dsdvp, &mirrorcnt, fname, &fh);
 		NFSD_DEBUG(4, "nfsvno_rename: pnfsremovesetup"
 		    " dsdvp=%p\n", dsdvp[0]);
 	}
 out:
 	if (!error) {
 		error = VOP_RENAME(fromndp->ni_dvp, fromndp->ni_vp,
 		    &fromndp->ni_cnd, tondp->ni_dvp, tondp->ni_vp,
 		    &tondp->ni_cnd);
 	} else {
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		vrele(fromndp->ni_dvp);
 		vrele(fvp);
 		if (error == -1)
 			error = 0;
 	}
 
 	/*
 	 * If dsdvp[0] != NULL, it was set up by nfsrv_pnfsremovesetup() and
 	 * if the rename succeeded, the DS file for the tvp needs to be
 	 * removed.
 	 */
 	if (error == 0 && dsdvp[0] != NULL) {
 		nfsrv_pnfsremove(dsdvp, mirrorcnt, fname, &fh, p);
 		NFSD_DEBUG(4, "nfsvno_rename: pnfsremove\n");
 	}
 
 	vrele(tondp->ni_startdir);
 	nfsvno_relpathbuf(tondp);
 out1:
 	vrele(fromndp->ni_startdir);
 	nfsvno_relpathbuf(fromndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Link vnode op.
  */
 int
 nfsvno_link(struct nameidata *ndp, struct vnode *vp, struct ucred *cred,
     struct thread *p, struct nfsexstuff *exp)
 {
 	struct vnode *xp;
 	int error = 0;
 
 	xp = ndp->ni_vp;
 	if (xp != NULL) {
 		error = EEXIST;
 	} else {
 		xp = ndp->ni_dvp;
 		if (vp->v_mount != xp->v_mount)
 			error = EXDEV;
 	}
 	if (!error) {
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (!VN_IS_DOOMED(vp))
 			error = VOP_LINK(ndp->ni_dvp, vp, &ndp->ni_cnd);
 		else
 			error = EPERM;
 		if (ndp->ni_dvp == vp) {
 			vrele(ndp->ni_dvp);
 			NFSVOPUNLOCK(vp);
 		} else {
 			vref(vp);
 			VOP_VPUT_PAIR(ndp->ni_dvp, &vp, true);
 		}
 	} else {
 		if (ndp->ni_dvp == ndp->ni_vp)
 			vrele(ndp->ni_dvp);
 		else
 			vput(ndp->ni_dvp);
 		if (ndp->ni_vp)
 			vrele(ndp->ni_vp);
 	}
 	nfsvno_relpathbuf(ndp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do the fsync() appropriate for the commit.
  */
 int
 nfsvno_fsync(struct vnode *vp, u_int64_t off, int cnt, struct ucred *cred,
     struct thread *td)
 {
 	int error = 0;
 
 	/*
 	 * RFC 1813 3.3.21: if count is 0, a flush from offset to the end of
 	 * file is done.  At this time VOP_FSYNC does not accept offset and
 	 * byte count parameters so call VOP_FSYNC the whole file for now.
 	 * The same is true for NFSv4: RFC 3530 Sec. 14.2.3.
 	 * File systems that do not use the buffer cache (as indicated
 	 * by MNTK_USES_BCACHE not being set) must use VOP_FSYNC().
 	 */
 	if (cnt == 0 || cnt > MAX_COMMIT_COUNT ||
 	    (vp->v_mount->mnt_kern_flag & MNTK_USES_BCACHE) == 0) {
 		/*
 		 * Give up and do the whole thing
 		 */
 		if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
 			VM_OBJECT_WLOCK(vp->v_object);
 			vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
 			VM_OBJECT_WUNLOCK(vp->v_object);
 		}
 		error = VOP_FSYNC(vp, MNT_WAIT, td);
 	} else {
 		/*
 		 * Locate and synchronously write any buffers that fall
 		 * into the requested range.  Note:  we are assuming that
 		 * f_iosize is a power of 2.
 		 */
 		int iosize = vp->v_mount->mnt_stat.f_iosize;
 		int iomask = iosize - 1;
 		struct bufobj *bo;
 		daddr_t lblkno;
 
 		/*
 		 * Align to iosize boundary, super-align to page boundary.
 		 */
 		if (off & iomask) {
 			cnt += off & iomask;
 			off &= ~(u_quad_t)iomask;
 		}
 		if (off & PAGE_MASK) {
 			cnt += off & PAGE_MASK;
 			off &= ~(u_quad_t)PAGE_MASK;
 		}
 		lblkno = off / iosize;
 
 		if (vp->v_object && vm_object_mightbedirty(vp->v_object)) {
 			VM_OBJECT_WLOCK(vp->v_object);
 			vm_object_page_clean(vp->v_object, off, off + cnt,
 			    OBJPC_SYNC);
 			VM_OBJECT_WUNLOCK(vp->v_object);
 		}
 
 		bo = &vp->v_bufobj;
 		BO_LOCK(bo);
 		while (cnt > 0) {
 			struct buf *bp;
 
 			/*
 			 * If we have a buffer and it is marked B_DELWRI we
 			 * have to lock and write it.  Otherwise the prior
 			 * write is assumed to have already been committed.
 			 *
 			 * gbincore() can return invalid buffers now so we
 			 * have to check that bit as well (though B_DELWRI
 			 * should not be set if B_INVAL is set there could be
 			 * a race here since we haven't locked the buffer).
 			 */
 			if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 				    LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
 					BO_LOCK(bo);
 					continue; /* retry */
 				}
 			    	if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
 				    B_DELWRI) {
 					bremfree(bp);
 					bp->b_flags &= ~B_ASYNC;
 					bwrite(bp);
 					++nfs_commit_miss;
 				} else
 					BUF_UNLOCK(bp);
 				BO_LOCK(bo);
 			}
 			++nfs_commit_blks;
 			if (cnt < iosize)
 				break;
 			cnt -= iosize;
 			++lblkno;
 		}
 		BO_UNLOCK(bo);
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Statfs vnode op.
  */
 int
 nfsvno_statfs(struct vnode *vp, struct statfs *sf)
 {
 	struct statfs *tsf;
 	int error;
 
 	tsf = NULL;
 	if (nfsrv_devidcnt > 0) {
 		/* For a pNFS service, get the DS numbers. */
 		tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK | M_ZERO);
 		error = nfsrv_pnfsstatfs(tsf, vp->v_mount);
 		if (error != 0) {
 			free(tsf, M_TEMP);
 			tsf = NULL;
 		}
 	}
 	error = VFS_STATFS(vp->v_mount, sf);
 	if (error == 0) {
 		if (tsf != NULL) {
 			sf->f_blocks = tsf->f_blocks;
 			sf->f_bavail = tsf->f_bavail;
 			sf->f_bfree = tsf->f_bfree;
 			sf->f_bsize = tsf->f_bsize;
 		}
 		/*
 		 * Since NFS handles these values as unsigned on the
 		 * wire, there is no way to represent negative values,
 		 * so set them to 0. Without this, they will appear
 		 * to be very large positive values for clients like
 		 * Solaris10.
 		 */
 		if (sf->f_bavail < 0)
 			sf->f_bavail = 0;
 		if (sf->f_ffree < 0)
 			sf->f_ffree = 0;
 	}
 	free(tsf, M_TEMP);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Do the vnode op stuff for Open. Similar to nfsvno_createsub(), but
  * must handle nfsrv_opencheck() calls after any other access checks.
  */
 void
 nfsvno_open(struct nfsrv_descript *nd, struct nameidata *ndp,
     nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *stp,
     int *exclusive_flagp, struct nfsvattr *nvap, int32_t *cverf, int create,
     NFSACL_T *aclp, nfsattrbit_t *attrbitp, struct ucred *cred,
     struct nfsexstuff *exp, struct vnode **vpp)
 {
 	struct vnode *vp = NULL;
 	u_quad_t tempsize;
 	struct nfsexstuff nes;
 	struct thread *p = curthread;
 
 	if (ndp->ni_vp == NULL)
 		nd->nd_repstat = nfsrv_opencheck(clientid,
 		    stateidp, stp, NULL, nd, p, nd->nd_repstat);
 	if (!nd->nd_repstat) {
 		if (ndp->ni_vp == NULL) {
 			vrele(ndp->ni_startdir);
 			nd->nd_repstat = VOP_CREATE(ndp->ni_dvp,
 			    &ndp->ni_vp, &ndp->ni_cnd, &nvap->na_vattr);
 			/* For a pNFS server, create the data file on a DS. */
 			if (nd->nd_repstat == 0) {
 				/*
 				 * Create a data file on a DS for a pNFS server.
 				 * This function just returns if not
 				 * running a pNFS DS or the creation fails.
 				 */
 				nfsrv_pnfscreate(ndp->ni_vp, &nvap->na_vattr,
 				    cred, p);
 			}
 			VOP_VPUT_PAIR(ndp->ni_dvp, nd->nd_repstat == 0 ?
 			    &ndp->ni_vp : NULL, false);
 			nfsvno_relpathbuf(ndp);
 			if (!nd->nd_repstat) {
 				if (*exclusive_flagp) {
 					*exclusive_flagp = 0;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_atime.tv_sec = cverf[0];
 					nvap->na_atime.tv_nsec = cverf[1];
 					nd->nd_repstat = VOP_SETATTR(ndp->ni_vp,
 					    &nvap->na_vattr, cred);
 					if (nd->nd_repstat != 0) {
 						vput(ndp->ni_vp);
 						ndp->ni_vp = NULL;
 						nd->nd_repstat = NFSERR_NOTSUPP;
 					} else
 						NFSSETBIT_ATTRBIT(attrbitp,
 						    NFSATTRBIT_TIMEACCESS);
 				} else {
 					nfsrv_fixattr(nd, ndp->ni_vp, nvap,
 					    aclp, p, attrbitp, exp);
 				}
 			}
 			vp = ndp->ni_vp;
 		} else {
 			if (ndp->ni_startdir)
 				vrele(ndp->ni_startdir);
 			nfsvno_relpathbuf(ndp);
 			vp = ndp->ni_vp;
 			if (create == NFSV4OPEN_CREATE) {
 				if (ndp->ni_dvp == vp)
 					vrele(ndp->ni_dvp);
 				else
 					vput(ndp->ni_dvp);
 			}
 			if (NFSVNO_ISSETSIZE(nvap) && vp->v_type == VREG) {
 				if (ndp->ni_cnd.cn_flags & RDONLY)
 					NFSVNO_SETEXRDONLY(&nes);
 				else
 					NFSVNO_EXINIT(&nes);
 				nd->nd_repstat = nfsvno_accchk(vp, 
 				    VWRITE, cred, &nes, p,
 				    NFSACCCHK_NOOVERRIDE,
 				    NFSACCCHK_VPISLOCKED, NULL);
 				nd->nd_repstat = nfsrv_opencheck(clientid,
 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
 				if (!nd->nd_repstat) {
 					tempsize = nvap->na_size;
 					NFSVNO_ATTRINIT(nvap);
 					nvap->na_size = tempsize;
 					nd->nd_repstat = nfsvno_setattr(vp,
 					    nvap, cred, p, exp);
 				}
 			} else if (vp->v_type == VREG) {
 				nd->nd_repstat = nfsrv_opencheck(clientid,
 				    stateidp, stp, vp, nd, p, nd->nd_repstat);
 			}
 		}
 	} else {
-		if (ndp->ni_cnd.cn_flags & HASBUF)
-			nfsvno_relpathbuf(ndp);
+		nfsvno_relpathbuf(ndp);
 		if (ndp->ni_startdir && create == NFSV4OPEN_CREATE) {
 			vrele(ndp->ni_startdir);
 			if (ndp->ni_dvp == ndp->ni_vp)
 				vrele(ndp->ni_dvp);
 			else
 				vput(ndp->ni_dvp);
 			if (ndp->ni_vp)
 				vput(ndp->ni_vp);
 		}
 	}
 	*vpp = vp;
 
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * Updates the file rev and sets the mtime and ctime
  * to the current clock time, returning the va_filerev and va_Xtime
  * values.
  * Return ESTALE to indicate the vnode is VIRF_DOOMED.
  */
 int
 nfsvno_updfilerev(struct vnode *vp, struct nfsvattr *nvap,
     struct nfsrv_descript *nd, struct thread *p)
 {
 	struct vattr va;
 
 	VATTR_NULL(&va);
 	vfs_timestamp(&va.va_mtime);
 	if (NFSVOPISLOCKED(vp) != LK_EXCLUSIVE) {
 		NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
 		if (VN_IS_DOOMED(vp))
 			return (ESTALE);
 	}
 	(void) VOP_SETATTR(vp, &va, nd->nd_cred);
 	(void) nfsvno_getattr(vp, nvap, nd, p, 1, NULL);
 	return (0);
 }
 
 /*
  * Glue routine to nfsv4_fillattr().
  */
 int
 nfsvno_fillattr(struct nfsrv_descript *nd, struct mount *mp, struct vnode *vp,
     struct nfsvattr *nvap, fhandle_t *fhp, int rderror, nfsattrbit_t *attrbitp,
     struct ucred *cred, struct thread *p, int isdgram, int reterr,
     int supports_nfsv4acls, int at_root, uint64_t mounted_on_fileno)
 {
 	struct statfs *sf;
 	int error;
 
 	sf = NULL;
 	if (nfsrv_devidcnt > 0 &&
 	    (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEAVAIL) ||
 	     NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACEFREE) ||
 	     NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SPACETOTAL))) {
 		sf = malloc(sizeof(*sf), M_TEMP, M_WAITOK | M_ZERO);
 		error = nfsrv_pnfsstatfs(sf, mp);
 		if (error != 0) {
 			free(sf, M_TEMP);
 			sf = NULL;
 		}
 	}
 	error = nfsv4_fillattr(nd, mp, vp, NULL, &nvap->na_vattr, fhp, rderror,
 	    attrbitp, cred, p, isdgram, reterr, supports_nfsv4acls, at_root,
 	    mounted_on_fileno, sf);
 	free(sf, M_TEMP);
 	NFSEXITCODE2(0, nd);
 	return (error);
 }
 
 /* Since the Readdir vnode ops vary, put the entire functions in here. */
 /*
  * nfs readdir service
  * - mallocs what it thinks is enough to read
  *	count rounded up to a multiple of DIRBLKSIZ <= NFS_MAXREADDIR
  * - calls VOP_READDIR()
  * - loops around building the reply
  *	if the output generated exceeds count break out of loop
  *	The NFSM_CLGET macro is used here so that the reply will be packed
  *	tightly in mbuf clusters.
  * - it trims out records with d_fileno == 0
  *	this doesn't matter for Unix clients, but they might confuse clients
  *	for other os'.
  * - it trims out records with d_type == DT_WHT
  *	these cannot be seen through NFS (unless we extend the protocol)
  *     The alternate call nfsrvd_readdirplus() does lookups as well.
  * PS: The NFS protocol spec. does not clarify what the "count" byte
  *	argument is a count of.. just name strings and file id's or the
  *	entire reply rpc or ...
  *	I tried just file name and id sizes and it confused the Sun client,
  *	so I am using the full rpc size now. The "paranoia.." comment refers
  *	to including the status longwords that are not a part of the dir.
  *	"entry" structures, but are in the rpc.
  */
 int
 nfsrvd_readdir(struct nfsrv_descript *nd, int isdgram,
     struct vnode *vp, struct nfsexstuff *exp)
 {
 	struct dirent *dp;
 	u_int32_t *tl;
 	int dirlen;
 	char *cpos, *cend, *rbuf;
 	struct nfsvattr at;
 	int nlen, error = 0, getret = 1;
 	int siz, cnt, fullsiz, eofflag, ncookies;
 	u_int64_t off, toff, verf __unused;
 	uint64_t *cookies = NULL, *cookiep;
 	struct uio io;
 	struct iovec iv;
 	int is_ufs;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		off = fxdr_unsigned(u_quad_t, *tl++);
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
 		tl += 2;
 		verf = fxdr_hyper(tl);
 		tl += 2;
 	}
 	toff = off;
 	cnt = fxdr_unsigned(int, *tl);
 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 		cnt = NFS_SRVMAXDATA(nd);
 	siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 	fullsiz = siz;
 	if (nd->nd_flag & ND_NFSV3) {
 		nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1,
 		    NULL);
 #if 0
 		/*
 		 * va_filerev is not sufficient as a cookie verifier,
 		 * since it is not supposed to change when entries are
 		 * removed/added unless that offset cookies returned to
 		 * the client are no longer valid.
 		 */
 		if (!nd->nd_repstat && toff && verf != at.na_filerev)
 			nd->nd_repstat = NFSERR_BAD_COOKIE;
 #endif
 	}
 	if (!nd->nd_repstat && vp->v_type != VDIR)
 		nd->nd_repstat = NFSERR_NOTDIR;
 	if (nd->nd_repstat == 0 && cnt == 0) {
 		if (nd->nd_flag & ND_NFSV2)
 			/* NFSv2 does not have NFSERR_TOOSMALL */
 			nd->nd_repstat = EPERM;
 		else
 			nd->nd_repstat = NFSERR_TOOSMALL;
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 	rbuf = malloc(siz, M_TEMP, M_WAITOK);
 again:
 	eofflag = 0;
 	if (cookies) {
 		free(cookies, M_TEMP);
 		cookies = NULL;
 	}
 
 	iv.iov_base = rbuf;
 	iv.iov_len = siz;
 	io.uio_iov = &iv;
 	io.uio_iovcnt = 1;
 	io.uio_offset = (off_t)off;
 	io.uio_resid = siz;
 	io.uio_segflg = UIO_SYSSPACE;
 	io.uio_rw = UIO_READ;
 	io.uio_td = NULL;
 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 	    &cookies);
 	off = (u_int64_t)io.uio_offset;
 	if (io.uio_resid)
 		siz -= io.uio_resid;
 
 	if (!cookies && !nd->nd_repstat)
 		nd->nd_repstat = NFSERR_PERM;
 	if (nd->nd_flag & ND_NFSV3) {
 		getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 		if (!nd->nd_repstat)
 			nd->nd_repstat = getret;
 	}
 
 	/*
 	 * Handles the failed cases. nd->nd_repstat == 0 past here.
 	 */
 	if (nd->nd_repstat) {
 		vput(vp);
 		free(rbuf, M_TEMP);
 		if (cookies)
 			free(cookies, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	/*
 	 * If nothing read, return eof
 	 * rpc reply
 	 */
 	if (siz == 0) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV2) {
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		} else {
 			nfsrv_postopattr(nd, getret, &at);
 			NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			txdr_hyper(at.na_filerev, tl);
 			tl += 2;
 		}
 		*tl++ = newnfs_false;
 		*tl = newnfs_true;
 		free(rbuf, M_TEMP);
 		free(cookies, M_TEMP);
 		goto out;
 	}
 
 	/*
 	 * Check for degenerate cases of nothing useful read.
 	 * If so go try again
 	 */
 	cpos = rbuf;
 	cend = rbuf + siz;
 	dp = (struct dirent *)cpos;
 	cookiep = cookies;
 
 	/*
 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
 	 * directory offset up to a block boundary, so it is necessary to
 	 * skip over the records that precede the requested offset. This
 	 * requires the assumption that file offset cookies monotonically
 	 * increase.
 	 */
 	while (cpos < cend && ncookies > 0 &&
 	    (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 	     (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff))) {
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos >= cend || ncookies == 0) {
 		siz = fullsiz;
 		toff = off;
 		goto again;
 	}
 	vput(vp);
 
 	/*
 	 * If cnt > MCLBYTES and the reply will not be saved, use
 	 * ext_pgs mbufs for TLS.
 	 * For NFSv4.0, we do not know for sure if the reply will
 	 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
 	 */
 	if (cnt > MCLBYTES && siz > MCLBYTES &&
 	    (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
 	    (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
 		nd->nd_flag |= ND_EXTPG;
 
 	/*
 	 * dirlen is the size of the reply, including all XDR and must
 	 * not exceed cnt. For NFSv2, RFC1094 didn't clearly indicate
 	 * if the XDR should be included in "count", but to be safe, we do.
 	 * (Include the two booleans at the end of the reply in dirlen now.)
 	 */
 	if (nd->nd_flag & ND_NFSV3) {
 		nfsrv_postopattr(nd, getret, &at);
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		txdr_hyper(at.na_filerev, tl);
 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 	} else {
 		dirlen = 2 * NFSX_UNSIGNED;
 	}
 
 	/* Loop through the records and build reply */
 	while (cpos < cend && ncookies > 0) {
 		nlen = dp->d_namlen;
 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 			nlen <= NFS_MAXNAMLEN) {
 			if (nd->nd_flag & ND_NFSV3)
 				dirlen += (6*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 			else
 				dirlen += (4*NFSX_UNSIGNED + NFSM_RNDUP(nlen));
 			if (dirlen > cnt) {
 				eofflag = 0;
 				break;
 			}
 
 			/*
 			 * Build the directory record xdr from
 			 * the dirent entry.
 			 */
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				*tl++ = 0;
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 			}
 			*tl = txdr_unsigned(dp->d_fileno);
 			(void) nfsm_strtom(nd, dp->d_name, nlen);
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				txdr_hyper(*cookiep, tl);
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(*cookiep);
 			}
 		}
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos < cend)
 		eofflag = 0;
 	NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	*tl++ = newnfs_false;
 	if (eofflag)
 		*tl = newnfs_true;
 	else
 		*tl = newnfs_false;
 	free(rbuf, M_TEMP);
 	free(cookies, M_TEMP);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Readdirplus for V3 and Readdir for V4.
  */
 int
 nfsrvd_readdirplus(struct nfsrv_descript *nd, int isdgram,
     struct vnode *vp, struct nfsexstuff *exp)
 {
 	struct dirent *dp;
 	u_int32_t *tl;
 	int dirlen;
 	char *cpos, *cend, *rbuf;
 	struct vnode *nvp;
 	fhandle_t nfh;
 	struct nfsvattr nva, at, *nvap = &nva;
 	struct mbuf *mb0, *mb1;
 	struct nfsreferral *refp;
 	int nlen, r, error = 0, getret = 1, usevget = 1;
 	int siz, cnt, fullsiz, eofflag, ncookies, entrycnt;
 	caddr_t bpos0, bpos1;
 	u_int64_t off, toff, verf __unused;
 	uint64_t *cookies = NULL, *cookiep;
 	nfsattrbit_t attrbits, rderrbits, savbits;
 	struct uio io;
 	struct iovec iv;
 	struct componentname cn;
 	int at_root, is_ufs, is_zfs, needs_unbusy, supports_nfsv4acls;
 	struct mount *mp, *new_mp;
 	uint64_t mounted_on_fileno;
 	struct thread *p = curthread;
 	int bextpg0, bextpg1, bextpgsiz0, bextpgsiz1;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 	off = fxdr_hyper(tl);
 	toff = off;
 	tl += 2;
 	verf = fxdr_hyper(tl);
 	tl += 2;
 	siz = fxdr_unsigned(int, *tl++);
 	cnt = fxdr_unsigned(int, *tl);
 
 	/*
 	 * Use the server's maximum data transfer size as the upper bound
 	 * on reply datalen.
 	 */
 	if (cnt > NFS_SRVMAXDATA(nd) || cnt < 0)
 		cnt = NFS_SRVMAXDATA(nd);
 
 	/*
 	 * siz is a "hint" of how much directory information (name, fileid,
 	 * cookie) should be in the reply. At least one client "hints" 0,
 	 * so I set it to cnt for that case. I also round it up to the
 	 * next multiple of DIRBLKSIZ.
 	 * Since the size of a Readdirplus directory entry reply will always
 	 * be greater than a directory entry returned by VOP_READDIR(), it
 	 * does not make sense to read more than NFS_SRVMAXDATA() via
 	 * VOP_READDIR().
 	 */
 	if (siz <= 0)
 		siz = cnt;
 	else if (siz > NFS_SRVMAXDATA(nd))
 		siz = NFS_SRVMAXDATA(nd);
 	siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
 
 	if (nd->nd_flag & ND_NFSV4) {
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		if (error)
 			goto nfsmout;
 		NFSSET_ATTRBIT(&savbits, &attrbits);
 		NFSCLRNOTFILLABLE_ATTRBIT(&attrbits, nd);
 		NFSZERO_ATTRBIT(&rderrbits);
 		NFSSETBIT_ATTRBIT(&rderrbits, NFSATTRBIT_RDATTRERROR);
 	} else {
 		NFSZERO_ATTRBIT(&attrbits);
 	}
 	fullsiz = siz;
 	nd->nd_repstat = getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 #if 0
 	if (!nd->nd_repstat) {
 	    if (off && verf != at.na_filerev) {
 		/*
 		 * va_filerev is not sufficient as a cookie verifier,
 		 * since it is not supposed to change when entries are
 		 * removed/added unless that offset cookies returned to
 		 * the client are no longer valid.
 		 */
 		if (nd->nd_flag & ND_NFSV4) {
 			nd->nd_repstat = NFSERR_NOTSAME;
 		} else {
 			nd->nd_repstat = NFSERR_BAD_COOKIE;
 		}
 	    }
 	}
 #endif
 	if (!nd->nd_repstat && vp->v_type != VDIR)
 		nd->nd_repstat = NFSERR_NOTDIR;
 	if (!nd->nd_repstat && cnt == 0)
 		nd->nd_repstat = NFSERR_TOOSMALL;
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 		    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	is_ufs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "ufs") == 0;
 	is_zfs = strcmp(vp->v_mount->mnt_vfc->vfc_name, "zfs") == 0;
 
 	rbuf = malloc(siz, M_TEMP, M_WAITOK);
 again:
 	eofflag = 0;
 	if (cookies) {
 		free(cookies, M_TEMP);
 		cookies = NULL;
 	}
 
 	iv.iov_base = rbuf;
 	iv.iov_len = siz;
 	io.uio_iov = &iv;
 	io.uio_iovcnt = 1;
 	io.uio_offset = (off_t)off;
 	io.uio_resid = siz;
 	io.uio_segflg = UIO_SYSSPACE;
 	io.uio_rw = UIO_READ;
 	io.uio_td = NULL;
 	nd->nd_repstat = VOP_READDIR(vp, &io, nd->nd_cred, &eofflag, &ncookies,
 	    &cookies);
 	off = (u_int64_t)io.uio_offset;
 	if (io.uio_resid)
 		siz -= io.uio_resid;
 
 	getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 
 	if (!cookies && !nd->nd_repstat)
 		nd->nd_repstat = NFSERR_PERM;
 	if (!nd->nd_repstat)
 		nd->nd_repstat = getret;
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (cookies)
 			free(cookies, M_TEMP);
 		free(rbuf, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	/*
 	 * If nothing read, return eof
 	 * rpc reply
 	 */
 	if (siz == 0) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		txdr_hyper(at.na_filerev, tl);
 		tl += 2;
 		*tl++ = newnfs_false;
 		*tl = newnfs_true;
 		free(cookies, M_TEMP);
 		free(rbuf, M_TEMP);
 		goto out;
 	}
 
 	/*
 	 * Check for degenerate cases of nothing useful read.
 	 * If so go try again
 	 */
 	cpos = rbuf;
 	cend = rbuf + siz;
 	dp = (struct dirent *)cpos;
 	cookiep = cookies;
 
 	/*
 	 * For some reason FreeBSD's ufs_readdir() chooses to back the
 	 * directory offset up to a block boundary, so it is necessary to
 	 * skip over the records that precede the requested offset. This
 	 * requires the assumption that file offset cookies monotonically
 	 * increase.
 	 */
 	while (cpos < cend && ncookies > 0 &&
 	  (dp->d_fileno == 0 || dp->d_type == DT_WHT ||
 	   (is_ufs == 1 && ((u_quad_t)(*cookiep)) <= toff) ||
 	   ((nd->nd_flag & ND_NFSV4) &&
 	    ((dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 	     (dp->d_namlen==2 && dp->d_name[0]=='.' && dp->d_name[1]=='.'))))) {
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	if (cpos >= cend || ncookies == 0) {
 		siz = fullsiz;
 		toff = off;
 		goto again;
 	}
 
 	/*
 	 * Busy the file system so that the mount point won't go away
 	 * and, as such, VFS_VGET() can be used safely.
 	 */
 	mp = vp->v_mount;
 	vfs_ref(mp);
 	NFSVOPUNLOCK(vp);
 	nd->nd_repstat = vfs_busy(mp, 0);
 	vfs_rel(mp);
 	if (nd->nd_repstat != 0) {
 		vrele(vp);
 		free(cookies, M_TEMP);
 		free(rbuf, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 
 	/*
 	 * Check to see if entries in this directory can be safely acquired
 	 * via VFS_VGET() or if a switch to VOP_LOOKUP() is required.
 	 * ZFS snapshot directories need VOP_LOOKUP(), so that any
 	 * automount of the snapshot directory that is required will
 	 * be done.
 	 * This needs to be done here for NFSv4, since NFSv4 never does
 	 * a VFS_VGET() for "." or "..".
 	 */
 	if (is_zfs == 1) {
 		r = VFS_VGET(mp, at.na_fileid, LK_SHARED, &nvp);
 		if (r == EOPNOTSUPP) {
 			usevget = 0;
 			cn.cn_nameiop = LOOKUP;
 			cn.cn_lkflags = LK_SHARED | LK_RETRY;
 			cn.cn_cred = nd->nd_cred;
 		} else if (r == 0)
 			vput(nvp);
 	}
 
 	/*
 	 * If the reply is likely to exceed MCLBYTES and the reply will
 	 * not be saved, use ext_pgs mbufs for TLS.
 	 * It is difficult to predict how large each entry will be and
 	 * how many entries have been read, so just assume the directory
 	 * entries grow by a factor of 4 when attributes are included.
 	 * For NFSv4.0, we do not know for sure if the reply will
 	 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
 	 */
 	if (cnt > MCLBYTES && siz > MCLBYTES / 4 &&
 	    (nd->nd_flag & (ND_TLS | ND_EXTPG | ND_SAVEREPLY)) == ND_TLS &&
 	    (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4)
 		nd->nd_flag |= ND_EXTPG;
 
 	/*
 	 * Save this position, in case there is an error before one entry
 	 * is created.
 	 */
 	mb0 = nd->nd_mb;
 	bpos0 = nd->nd_bpos;
 	bextpg0 = nd->nd_bextpg;
 	bextpgsiz0 = nd->nd_bextpgsiz;
 
 	/*
 	 * Fill in the first part of the reply.
 	 * dirlen is the reply length in bytes and cannot exceed cnt.
 	 * (Include the two booleans at the end of the reply in dirlen now,
 	 *  so we recognize when we have exceeded cnt.)
 	 */
 	if (nd->nd_flag & ND_NFSV3) {
 		dirlen = NFSX_V3POSTOPATTR + NFSX_VERF + 2 * NFSX_UNSIGNED;
 		nfsrv_postopattr(nd, getret, &at);
 	} else {
 		dirlen = NFSX_VERF + 2 * NFSX_UNSIGNED;
 	}
 	NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 	txdr_hyper(at.na_filerev, tl);
 
 	/*
 	 * Save this position, in case there is an empty reply needed.
 	 */
 	mb1 = nd->nd_mb;
 	bpos1 = nd->nd_bpos;
 	bextpg1 = nd->nd_bextpg;
 	bextpgsiz1 = nd->nd_bextpgsiz;
 
 	/* Loop through the records and build reply */
 	entrycnt = 0;
 	while (cpos < cend && ncookies > 0 && dirlen < cnt) {
 		nlen = dp->d_namlen;
 		if (dp->d_fileno != 0 && dp->d_type != DT_WHT &&
 		    nlen <= NFS_MAXNAMLEN &&
 		    ((nd->nd_flag & ND_NFSV3) || nlen > 2 ||
 		     (nlen==2 && (dp->d_name[0]!='.' || dp->d_name[1]!='.'))
 		      || (nlen == 1 && dp->d_name[0] != '.'))) {
 			/*
 			 * Save the current position in the reply, in case
 			 * this entry exceeds cnt.
 			 */
 			mb1 = nd->nd_mb;
 			bpos1 = nd->nd_bpos;
 			bextpg1 = nd->nd_bextpg;
 			bextpgsiz1 = nd->nd_bextpgsiz;
 
 			/*
 			 * For readdir_and_lookup get the vnode using
 			 * the file number.
 			 */
 			nvp = NULL;
 			refp = NULL;
 			r = 0;
 			at_root = 0;
 			needs_unbusy = 0;
 			new_mp = mp;
 			mounted_on_fileno = (uint64_t)dp->d_fileno;
 			if ((nd->nd_flag & ND_NFSV3) ||
 			    NFSNONZERO_ATTRBIT(&savbits)) {
 				if (nd->nd_flag & ND_NFSV4)
 					refp = nfsv4root_getreferral(NULL,
 					    vp, dp->d_fileno);
 				if (refp == NULL) {
 					if (usevget)
 						r = VFS_VGET(mp, dp->d_fileno,
 						    LK_SHARED, &nvp);
 					else
 						r = EOPNOTSUPP;
 					if (r == EOPNOTSUPP) {
 						if (usevget) {
 							usevget = 0;
 							cn.cn_nameiop = LOOKUP;
 							cn.cn_lkflags =
 							    LK_SHARED |
 							    LK_RETRY;
 							cn.cn_cred =
 							    nd->nd_cred;
 						}
 						cn.cn_nameptr = dp->d_name;
 						cn.cn_namelen = nlen;
 						cn.cn_flags = ISLASTCN |
 						    NOFOLLOW | LOCKLEAF;
 						if (nlen == 2 &&
 						    dp->d_name[0] == '.' &&
 						    dp->d_name[1] == '.')
 							cn.cn_flags |=
 							    ISDOTDOT;
 						if (NFSVOPLOCK(vp, LK_SHARED)
 						    != 0) {
 							nd->nd_repstat = EPERM;
 							break;
 						}
 						if ((vp->v_vflag & VV_ROOT) != 0
 						    && (cn.cn_flags & ISDOTDOT)
 						    != 0) {
 							vref(vp);
 							nvp = vp;
 							r = 0;
 						} else {
 							r = VOP_LOOKUP(vp, &nvp,
 							    &cn);
 							if (vp != nvp)
 								NFSVOPUNLOCK(vp);
 						}
 					}
 
 					/*
 					 * For NFSv4, check to see if nvp is
 					 * a mount point and get the mount
 					 * point vnode, as required.
 					 */
 					if (r == 0 &&
 					    nfsrv_enable_crossmntpt != 0 &&
 					    (nd->nd_flag & ND_NFSV4) != 0 &&
 					    nvp->v_type == VDIR &&
 					    nvp->v_mountedhere != NULL) {
 						new_mp = nvp->v_mountedhere;
 						r = vfs_busy(new_mp, 0);
 						vput(nvp);
 						nvp = NULL;
 						if (r == 0) {
 							r = VFS_ROOT(new_mp,
 							    LK_SHARED, &nvp);
 							needs_unbusy = 1;
 							if (r == 0)
 								at_root = 1;
 						}
 					}
 				}
 
 				/*
 				 * If we failed to look up the entry, then it
 				 * has become invalid, most likely removed.
 				 */
 				if (r != 0) {
 					if (needs_unbusy)
 						vfs_unbusy(new_mp);
 					goto invalid;
 				}
 				KASSERT(refp != NULL || nvp != NULL,
 				    ("%s: undetected lookup error", __func__));
 
 				if (refp == NULL &&
 				    ((nd->nd_flag & ND_NFSV3) ||
 				     NFSNONZERO_ATTRBIT(&attrbits))) {
 					r = nfsvno_getfh(nvp, &nfh, p);
 					if (!r)
 					    r = nfsvno_getattr(nvp, nvap, nd, p,
 						1, &attrbits);
 					if (r == 0 && is_zfs == 1 &&
 					    nfsrv_enable_crossmntpt != 0 &&
 					    (nd->nd_flag & ND_NFSV4) != 0 &&
 					    nvp->v_type == VDIR &&
 					    vp->v_mount != nvp->v_mount) {
 					    /*
 					     * For a ZFS snapshot, there is a
 					     * pseudo mount that does not set
 					     * v_mountedhere, so it needs to
 					     * be detected via a different
 					     * mount structure.
 					     */
 					    at_root = 1;
 					    if (new_mp == mp)
 						new_mp = nvp->v_mount;
 					}
 				}
 
 				/*
 				 * If we failed to get attributes of the entry,
 				 * then just skip it for NFSv3 (the traditional
 				 * behavior in the old NFS server).
 				 * For NFSv4 the behavior is controlled by
 				 * RDATTRERROR: we either ignore the error or
 				 * fail the request.
 				 * Note that RDATTRERROR is never set for NFSv3.
 				 */
 				if (r != 0) {
 					if (!NFSISSET_ATTRBIT(&attrbits,
 					    NFSATTRBIT_RDATTRERROR)) {
 						vput(nvp);
 						if (needs_unbusy != 0)
 							vfs_unbusy(new_mp);
 						if ((nd->nd_flag & ND_NFSV3))
 							goto invalid;
 						nd->nd_repstat = r;
 						break;
 					}
 				}
 			}
 
 			/*
 			 * Build the directory record xdr
 			 */
 			if (nd->nd_flag & ND_NFSV3) {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				*tl++ = 0;
 				*tl = txdr_unsigned(dp->d_fileno);
 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				txdr_hyper(*cookiep, tl);
 				nfsrv_postopattr(nd, 0, nvap);
 				dirlen += nfsm_fhtom(nd,(u_int8_t *)&nfh,0,1);
 				dirlen += (5*NFSX_UNSIGNED+NFSX_V3POSTOPATTR);
 				if (nvp != NULL)
 					vput(nvp);
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = newnfs_true;
 				txdr_hyper(*cookiep, tl);
 				dirlen += nfsm_strtom(nd, dp->d_name, nlen);
 				if (nvp != NULL) {
 					supports_nfsv4acls =
 					    nfs_supportsnfsv4acls(nvp);
 					NFSVOPUNLOCK(nvp);
 				} else
 					supports_nfsv4acls = 0;
 				if (refp != NULL) {
 					dirlen += nfsrv_putreferralattr(nd,
 					    &savbits, refp, 0,
 					    &nd->nd_repstat);
 					if (nd->nd_repstat) {
 						if (nvp != NULL)
 							vrele(nvp);
 						if (needs_unbusy != 0)
 							vfs_unbusy(new_mp);
 						break;
 					}
 				} else if (r) {
 					dirlen += nfsvno_fillattr(nd, new_mp,
 					    nvp, nvap, &nfh, r, &rderrbits,
 					    nd->nd_cred, p, isdgram, 0,
 					    supports_nfsv4acls, at_root,
 					    mounted_on_fileno);
 				} else {
 					dirlen += nfsvno_fillattr(nd, new_mp,
 					    nvp, nvap, &nfh, r, &attrbits,
 					    nd->nd_cred, p, isdgram, 0,
 					    supports_nfsv4acls, at_root,
 					    mounted_on_fileno);
 				}
 				if (nvp != NULL)
 					vrele(nvp);
 				dirlen += (3 * NFSX_UNSIGNED);
 			}
 			if (needs_unbusy != 0)
 				vfs_unbusy(new_mp);
 			if (dirlen <= cnt)
 				entrycnt++;
 		}
 invalid:
 		cpos += dp->d_reclen;
 		dp = (struct dirent *)cpos;
 		cookiep++;
 		ncookies--;
 	}
 	vrele(vp);
 	vfs_unbusy(mp);
 
 	/*
 	 * If dirlen > cnt, we must strip off the last entry. If that
 	 * results in an empty reply, report NFSERR_TOOSMALL.
 	 */
 	if (dirlen > cnt || nd->nd_repstat) {
 		if (!nd->nd_repstat && entrycnt == 0)
 			nd->nd_repstat = NFSERR_TOOSMALL;
 		if (nd->nd_repstat) {
 			nfsm_trimtrailing(nd, mb0, bpos0, bextpg0, bextpgsiz0);
 			if (nd->nd_flag & ND_NFSV3)
 				nfsrv_postopattr(nd, getret, &at);
 		} else
 			nfsm_trimtrailing(nd, mb1, bpos1, bextpg1, bextpgsiz1);
 		eofflag = 0;
 	} else if (cpos < cend)
 		eofflag = 0;
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		if (eofflag)
 			*tl = newnfs_true;
 		else
 			*tl = newnfs_false;
 	}
 	free(cookies, M_TEMP);
 	free(rbuf, M_TEMP);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Get the settable attributes out of the mbuf list.
  * (Return 0 or EBADRPC)
  */
 int
 nfsrv_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 {
 	u_int32_t *tl;
 	struct nfsv2_sattr *sp;
 	int error = 0, toclient = 0;
 
 	switch (nd->nd_flag & (ND_NFSV2 | ND_NFSV3 | ND_NFSV4)) {
 	case ND_NFSV2:
 		NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		/*
 		 * Some old clients didn't fill in the high order 16bits.
 		 * --> check the low order 2 bytes for 0xffff
 		 */
 		if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
 			nvap->na_mode = nfstov_mode(sp->sa_mode);
 		if (sp->sa_uid != newnfs_xdrneg1)
 			nvap->na_uid = fxdr_unsigned(uid_t, sp->sa_uid);
 		if (sp->sa_gid != newnfs_xdrneg1)
 			nvap->na_gid = fxdr_unsigned(gid_t, sp->sa_gid);
 		if (sp->sa_size != newnfs_xdrneg1)
 			nvap->na_size = fxdr_unsigned(u_quad_t, sp->sa_size);
 		if (sp->sa_atime.nfsv2_sec != newnfs_xdrneg1) {
 #ifdef notyet
 			fxdr_nfsv2time(&sp->sa_atime, &nvap->na_atime);
 #else
 			nvap->na_atime.tv_sec =
 				fxdr_unsigned(u_int32_t,sp->sa_atime.nfsv2_sec);
 			nvap->na_atime.tv_nsec = 0;
 #endif
 		}
 		if (sp->sa_mtime.nfsv2_sec != newnfs_xdrneg1)
 			fxdr_nfsv2time(&sp->sa_mtime, &nvap->na_mtime);
 		break;
 	case ND_NFSV3:
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_mode = nfstov_mode(*tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_uid = fxdr_unsigned(uid_t, *tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_gid = fxdr_unsigned(gid_t, *tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (*tl == newnfs_true) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			nvap->na_size = fxdr_hyper(tl);
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		switch (fxdr_unsigned(int, *tl)) {
 		case NFSV3SATTRTIME_TOCLIENT:
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			fxdr_nfsv3time(tl, &nvap->na_atime);
 			toclient = 1;
 			break;
 		case NFSV3SATTRTIME_TOSERVER:
 			vfs_timestamp(&nvap->na_atime);
 			nvap->na_vaflags |= VA_UTIMES_NULL;
 			break;
 		}
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		switch (fxdr_unsigned(int, *tl)) {
 		case NFSV3SATTRTIME_TOCLIENT:
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			fxdr_nfsv3time(tl, &nvap->na_mtime);
 			nvap->na_vaflags &= ~VA_UTIMES_NULL;
 			break;
 		case NFSV3SATTRTIME_TOSERVER:
 			vfs_timestamp(&nvap->na_mtime);
 			if (!toclient)
 				nvap->na_vaflags |= VA_UTIMES_NULL;
 			break;
 		}
 		break;
 	case ND_NFSV4:
 		error = nfsv4_sattr(nd, vp, nvap, attrbitp, aclp, p);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Handle the setable attributes for V4.
  * Returns NFSERR_BADXDR if it can't be parsed, 0 otherwise.
  */
 int
 nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap,
     nfsattrbit_t *attrbitp, NFSACL_T *aclp, struct thread *p)
 {
 	u_int32_t *tl;
 	int attrsum = 0;
 	int i, j;
 	int error, attrsize, bitpos, aclsize, aceerr, retnotsup = 0;
 	int moderet, toclient = 0;
 	u_char *cp, namestr[NFSV4_SMALLSTR + 1];
 	uid_t uid;
 	gid_t gid;
 	u_short mode, mask;		/* Same type as va_mode. */
 	struct vattr va;
 
 	error = nfsrv_getattrbits(nd, attrbitp, NULL, &retnotsup);
 	if (error)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	attrsize = fxdr_unsigned(int, *tl);
 
 	/*
 	 * Loop around getting the setable attributes. If an unsupported
 	 * one is found, set nd_repstat == NFSERR_ATTRNOTSUPP and return.
 	 */
 	if (retnotsup) {
 		nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 		bitpos = NFSATTRBIT_MAX;
 	} else {
 		bitpos = 0;
 	}
 	moderet = 0;
 	for (; bitpos < NFSATTRBIT_MAX; bitpos++) {
 	    if (attrsum > attrsize) {
 		error = NFSERR_BADXDR;
 		goto nfsmout;
 	    }
 	    if (NFSISSET_ATTRBIT(attrbitp, bitpos))
 		switch (bitpos) {
 		case NFSATTRBIT_SIZE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
                      if (vp != NULL && vp->v_type != VREG) {
                             error = (vp->v_type == VDIR) ? NFSERR_ISDIR :
                                 NFSERR_INVAL;
                             goto nfsmout;
 			}
 			nvap->na_size = fxdr_hyper(tl);
 			attrsum += NFSX_HYPER;
 			break;
 		case NFSATTRBIT_ACL:
 			error = nfsrv_dissectacl(nd, aclp, true, &aceerr,
 			    &aclsize, p);
 			if (error)
 				goto nfsmout;
 			if (aceerr && !nd->nd_repstat)
 				nd->nd_repstat = aceerr;
 			attrsum += aclsize;
 			break;
 		case NFSATTRBIT_ARCHIVE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_HIDDEN:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_MIMETYPE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			i = fxdr_unsigned(int, *tl);
 			error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
 			if (error)
 				goto nfsmout;
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(i));
 			break;
 		case NFSATTRBIT_MODE:
 			moderet = NFSERR_INVAL;	/* Can't do MODESETMASKED. */
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nvap->na_mode = nfstov_mode(*tl);
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_OWNER:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			j = fxdr_unsigned(int, *tl);
 			if (j < 0) {
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			if (j > NFSV4_SMALLSTR)
 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 			else
 				cp = namestr;
 			error = nfsrv_mtostr(nd, cp, j);
 			if (error) {
 				if (j > NFSV4_SMALLSTR)
 					free(cp, M_NFSSTRING);
 				goto nfsmout;
 			}
 			if (!nd->nd_repstat) {
 				nd->nd_repstat = nfsv4_strtouid(nd, cp, j,
 				    &uid);
 				if (!nd->nd_repstat)
 					nvap->na_uid = uid;
 			}
 			if (j > NFSV4_SMALLSTR)
 				free(cp, M_NFSSTRING);
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 			break;
 		case NFSATTRBIT_OWNERGROUP:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			j = fxdr_unsigned(int, *tl);
 			if (j < 0) {
 				error = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			if (j > NFSV4_SMALLSTR)
 				cp = malloc(j + 1, M_NFSSTRING, M_WAITOK);
 			else
 				cp = namestr;
 			error = nfsrv_mtostr(nd, cp, j);
 			if (error) {
 				if (j > NFSV4_SMALLSTR)
 					free(cp, M_NFSSTRING);
 				goto nfsmout;
 			}
 			if (!nd->nd_repstat) {
 				nd->nd_repstat = nfsv4_strtogid(nd, cp, j,
 				    &gid);
 				if (!nd->nd_repstat)
 					nvap->na_gid = gid;
 			}
 			if (j > NFSV4_SMALLSTR)
 				free(cp, M_NFSSTRING);
 			attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j));
 			break;
 		case NFSATTRBIT_SYSTEM:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_UNSIGNED;
 			break;
 		case NFSATTRBIT_TIMEACCESSSET:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			attrsum += NFSX_UNSIGNED;
 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			    fxdr_nfsv4time(tl, &nvap->na_atime);
 			    toclient = 1;
 			    attrsum += NFSX_V4TIME;
 			} else {
 			    vfs_timestamp(&nvap->na_atime);
 			    nvap->na_vaflags |= VA_UTIMES_NULL;
 			}
 			break;
 		case NFSATTRBIT_TIMEBACKUP:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			attrsum += NFSX_V4TIME;
 			break;
 		case NFSATTRBIT_TIMECREATE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			fxdr_nfsv4time(tl, &nvap->na_btime);
 			attrsum += NFSX_V4TIME;
 			break;
 		case NFSATTRBIT_TIMEMODIFYSET:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			attrsum += NFSX_UNSIGNED;
 			if (fxdr_unsigned(int, *tl)==NFSV4SATTRTIME_TOCLIENT) {
 			    NFSM_DISSECT(tl, u_int32_t *, NFSX_V4TIME);
 			    fxdr_nfsv4time(tl, &nvap->na_mtime);
 			    nvap->na_vaflags &= ~VA_UTIMES_NULL;
 			    attrsum += NFSX_V4TIME;
 			} else {
 			    vfs_timestamp(&nvap->na_mtime);
 			    if (!toclient)
 				nvap->na_vaflags |= VA_UTIMES_NULL;
 			}
 			break;
 		case NFSATTRBIT_MODESETMASKED:
 			NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			mode = fxdr_unsigned(u_short, *tl++);
 			mask = fxdr_unsigned(u_short, *tl);
 			/*
 			 * vp == NULL implies an Open/Create operation.
 			 * This attribute can only be used for Setattr and
 			 * only for NFSv4.1 or higher.
 			 * If moderet != 0, a mode attribute has also been
 			 * specified and this attribute cannot be done in the
 			 * same Setattr operation.
 			 */
 			if ((nd->nd_flag & ND_NFSV41) == 0)
 				nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			else if ((mode & ~07777) != 0 || (mask & ~07777) != 0 ||
 			    vp == NULL)
 				nd->nd_repstat = NFSERR_INVAL;
 			else if (moderet == 0)
 				moderet = VOP_GETATTR(vp, &va, nd->nd_cred);
 			if (moderet == 0)
 				nvap->na_mode = (mode & mask) |
 				    (va.va_mode & ~mask);
 			else
 				nd->nd_repstat = moderet;
 			attrsum += 2 * NFSX_UNSIGNED;
 			break;
 		default:
 			nd->nd_repstat = NFSERR_ATTRNOTSUPP;
 			/*
 			 * set bitpos so we drop out of the loop.
 			 */
 			bitpos = NFSATTRBIT_MAX;
 			break;
 		}
 	}
 
 	/*
 	 * some clients pad the attrlist, so we need to skip over the
 	 * padding.
 	 */
 	if (attrsum > attrsize) {
 		error = NFSERR_BADXDR;
 	} else {
 		attrsize = NFSM_RNDUP(attrsize);
 		if (attrsum < attrsize)
 			error = nfsm_advance(nd, attrsize - attrsum, -1);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check/setup export credentials.
  */
 int
 nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
     struct ucred *credanon, bool testsec)
 {
 	int error;
 
 	/*
 	 * Check/setup credentials.
 	 */
 	if (nd->nd_flag & ND_GSS)
 		exp->nes_exflag &= ~MNT_EXPORTANON;
 
 	/*
 	 * Check to see if the operation is allowed for this security flavor.
 	 */
 	error = 0;
 	if (testsec) {
 		error = nfsvno_testexp(nd, exp);
 		if (error != 0)
 			goto out;
 	}
 
 	/*
 	 * Check to see if the file system is exported V4 only.
 	 */
 	if (NFSVNO_EXV4ONLY(exp) && !(nd->nd_flag & ND_NFSV4)) {
 		error = NFSERR_PROGNOTV4;
 		goto out;
 	}
 
 	/*
 	 * Now, map the user credentials.
 	 * (Note that ND_AUTHNONE will only be set for an NFSv3
 	 *  Fsinfo RPC. If set for anything else, this code might need
 	 *  to change.)
 	 */
 	if (NFSVNO_EXPORTED(exp)) {
 		if (((nd->nd_flag & ND_GSS) == 0 && nd->nd_cred->cr_uid == 0) ||
 		     NFSVNO_EXPORTANON(exp) ||
 		     (nd->nd_flag & ND_AUTHNONE) != 0) {
 			nd->nd_cred->cr_uid = credanon->cr_uid;
 			nd->nd_cred->cr_gid = credanon->cr_gid;
 			crsetgroups(nd->nd_cred, credanon->cr_ngroups,
 			    credanon->cr_groups);
 		} else if ((nd->nd_flag & ND_GSS) == 0) {
 			/*
 			 * If using AUTH_SYS, call nfsrv_getgrpscred() to see
 			 * if there is a replacement credential with a group
 			 * list set up by "nfsuserd -manage-gids".
 			 * If there is no replacement, nfsrv_getgrpscred()
 			 * simply returns its argument.
 			 */
 			nd->nd_cred = nfsrv_getgrpscred(nd->nd_cred);
 		}
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Check exports.
  */
 int
 nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp,
     struct ucred **credp)
 {
 	int error;
 
 	error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
 	    &exp->nes_numsecflavor, exp->nes_secflavors);
 	if (error) {
 		if (nfs_rootfhset) {
 			exp->nes_exflag = 0;
 			exp->nes_numsecflavor = 0;
 			error = 0;
 		}
 	} else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
 	    MAXSECFLAVORS) {
 		printf("nfsvno_checkexp: numsecflavors out of range\n");
 		exp->nes_numsecflavor = 0;
 		error = EACCES;
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get a vnode for a file handle and export stuff.
  */
 int
 nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam,
     int lktype, struct vnode **vpp, struct nfsexstuff *exp,
     struct ucred **credp)
 {
 	int error;
 
 	*credp = NULL;
 	exp->nes_numsecflavor = 0;
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, lktype, vpp);
 	if (error != 0)
 		/* Make sure the server replies ESTALE to the client. */
 		error = ESTALE;
 	if (nam && !error) {
 		error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp,
 		    &exp->nes_numsecflavor, exp->nes_secflavors);
 		if (error) {
 			if (nfs_rootfhset) {
 				exp->nes_exflag = 0;
 				exp->nes_numsecflavor = 0;
 				error = 0;
 			} else {
 				vput(*vpp);
 			}
 		} else if (exp->nes_numsecflavor < 1 || exp->nes_numsecflavor >
 		    MAXSECFLAVORS) {
 			printf("nfsvno_fhtovp: numsecflavors out of range\n");
 			exp->nes_numsecflavor = 0;
 			error = EACCES;
 			vput(*vpp);
 		}
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * nfsd_fhtovp() - convert a fh to a vnode ptr
  * 	- look up fsid in mount list (if not found ret error)
  *	- get vp and export rights by calling nfsvno_fhtovp()
  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
  *	  for AUTH_SYS
  *	- if mpp != NULL, return the mount point so that it can
  *	  be used for vn_finished_write() by the caller
  */
 void
 nfsd_fhtovp(struct nfsrv_descript *nd, struct nfsrvfh *nfp, int lktype,
     struct vnode **vpp, struct nfsexstuff *exp,
     struct mount **mpp, int startwrite, int nextop)
 {
 	struct mount *mp, *mpw;
 	struct ucred *credanon;
 	fhandle_t *fhp;
 	int error;
 
 	if (mpp != NULL)
 		*mpp = NULL;
 	*vpp = NULL;
 	fhp = (fhandle_t *)nfp->nfsrvfh_data;
 	mp = vfs_busyfs(&fhp->fh_fsid);
 	if (mp == NULL) {
 		nd->nd_repstat = ESTALE;
 		goto out;
 	}
 
 	if (startwrite) {
 		mpw = mp;
 		error = vn_start_write(NULL, &mpw, V_WAIT);
 		if (error != 0) {
 			mpw = NULL;
 			vfs_unbusy(mp);
 			nd->nd_repstat = ESTALE;
 			goto out;
 		}
 		if (lktype == LK_SHARED && !(MNT_SHARED_WRITES(mp)))
 			lktype = LK_EXCLUSIVE;
 	} else
 		mpw = NULL;
 
 	nd->nd_repstat = nfsvno_fhtovp(mp, fhp, nd->nd_nam, lktype, vpp, exp,
 	    &credanon);
 	vfs_unbusy(mp);
 
 	/*
 	 * For NFSv4 without a pseudo root fs, unexported file handles
 	 * can be returned, so that Lookup works everywhere.
 	 */
 	if (!nd->nd_repstat && exp->nes_exflag == 0 &&
 	    !(nd->nd_flag & ND_NFSV4)) {
 		vput(*vpp);
 		*vpp = NULL;
 		nd->nd_repstat = EACCES;
 	}
 
 	/*
 	 * Personally, I've never seen any point in requiring a
 	 * reserved port#, since only in the rare case where the
 	 * clients are all boxes with secure system privileges,
 	 * does it provide any enhanced security, but... some people
 	 * believe it to be useful and keep putting this code back in.
 	 * (There is also some "security checker" out there that
 	 *  complains if the nfs server doesn't enforce this.)
 	 * However, note the following:
 	 * RFC3530 (NFSv4) specifies that a reserved port# not be
 	 *	required.
 	 * RFC2623 recommends that, if a reserved port# is checked for,
 	 *	that there be a way to turn that off--> ifdef'd.
 	 */
 #ifdef NFS_REQRSVPORT
 	if (!nd->nd_repstat) {
 		struct sockaddr_in *saddr;
 		struct sockaddr_in6 *saddr6;
 
 		saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *);
 		saddr6 = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in6 *);
 		if (!(nd->nd_flag & ND_NFSV4) &&
 		    ((saddr->sin_family == AF_INET &&
 		      ntohs(saddr->sin_port) >= IPPORT_RESERVED) ||
 		     (saddr6->sin6_family == AF_INET6 &&
 		      ntohs(saddr6->sin6_port) >= IPPORT_RESERVED))) {
 			vput(*vpp);
 			nd->nd_repstat = (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	}
 #endif	/* NFS_REQRSVPORT */
 
 	/*
 	 * Check/setup credentials.
 	 */
 	if (!nd->nd_repstat) {
 		nd->nd_saveduid = nd->nd_cred->cr_uid;
 		nd->nd_repstat = nfsd_excred(nd, exp, credanon,
 		    nfsrv_checkwrongsec(nd, nextop, (*vpp)->v_type));
 		if (nd->nd_repstat)
 			vput(*vpp);
 	}
 	if (credanon != NULL)
 		crfree(credanon);
 	if (nd->nd_repstat) {
 		vn_finished_write(mpw);
 		*vpp = NULL;
 	} else if (mpp != NULL) {
 		*mpp = mpw;
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * glue for fp.
  */
 static int
 fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	int error = 0;
 
 	fdp = p->td_proc->p_fd;
 	if (fd < 0 || fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
 		error = EBADF;
 		goto out;
 	}
 	*fpp = fp;
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Called from nfssvc() to update the exports list. Just call
  * vfs_export(). This has to be done, since the v4 root fake fs isn't
  * in the mount list.
  */
 int
 nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p)
 {
 	struct nfsex_args *nfsexargp = (struct nfsex_args *)argp;
 	int error = 0;
 	struct nameidata nd;
 	fhandle_t fh;
 
 	error = vfs_export(&nfsv4root_mnt, &nfsexargp->export);
 	if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0)
 		nfs_rootfhset = 0;
 	else if (error == 0) {
 		if (nfsexargp->fspec == NULL) {
 			error = EPERM;
 			goto out;
 		}
 		/*
 		 * If fspec != NULL, this is the v4root path.
 		 */
 		NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, nfsexargp->fspec);
 		if ((error = namei(&nd)) != 0)
 			goto out;
 		error = nfsvno_getfh(nd.ni_vp, &fh, p);
 		vrele(nd.ni_vp);
 		if (!error) {
 			nfs_rootfh.nfsrvfh_len = NFSX_MYFH;
 			NFSBCOPY((caddr_t)&fh,
 			    nfs_rootfh.nfsrvfh_data,
 			    sizeof (fhandle_t));
 			nfs_rootfhset = 1;
 		}
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * This function needs to test to see if the system is near its limit
  * for memory allocation via malloc() or mget() and return True iff
  * either of these resources are near their limit.
  * XXX (For now, this is just a stub.)
  */
 int nfsrv_testmalloclimit = 0;
 int
 nfsrv_mallocmget_limit(void)
 {
 	static int printmesg = 0;
 	static int testval = 1;
 
 	if (nfsrv_testmalloclimit && (testval++ % 1000) == 0) {
 		if ((printmesg++ % 100) == 0)
 			printf("nfsd: malloc/mget near limit\n");
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * BSD specific initialization of a mount point.
  */
 void
 nfsd_mntinit(void)
 {
 	static int inited = 0;
 
 	if (inited)
 		return;
 	inited = 1;
 	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
 	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
 	TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist);
 	nfsv4root_mnt.mnt_export = NULL;
 	TAILQ_INIT(&nfsv4root_opt);
 	TAILQ_INIT(&nfsv4root_newopt);
 	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
 	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
 	nfsv4root_mnt.mnt_nvnodelistsize = 0;
 	nfsv4root_mnt.mnt_lazyvnodelistsize = 0;
 }
 
 static void
 nfsd_timer(void *arg)
 {
 
 	nfsrv_servertimer();
 	callout_reset_sbt(&nfsd_callout, SBT_1S, SBT_1S, nfsd_timer, NULL, 0);
 }
 
 /*
  * Get a vnode for a file handle, without checking exports, etc.
  */
 struct vnode *
 nfsvno_getvp(fhandle_t *fhp)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	mp = vfs_busyfs(&fhp->fh_fsid);
 	if (mp == NULL)
 		return (NULL);
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error)
 		return (NULL);
 	return (vp);
 }
 
 /*
  * Do a local VOP_ADVLOCK().
  */
 int
 nfsvno_advlock(struct vnode *vp, int ftype, u_int64_t first,
     u_int64_t end, struct thread *td)
 {
 	int error = 0;
 	struct flock fl;
 	u_int64_t tlen;
 
 	if (nfsrv_dolocallocks == 0)
 		goto out;
 	ASSERT_VOP_UNLOCKED(vp, "nfsvno_advlock: vp locked");
 
 	fl.l_whence = SEEK_SET;
 	fl.l_type = ftype;
 	fl.l_start = (off_t)first;
 	if (end == NFS64BITSSET) {
 		fl.l_len = 0;
 	} else {
 		tlen = end - first;
 		fl.l_len = (off_t)tlen;
 	}
 	/*
 	 * For FreeBSD8, the l_pid and l_sysid must be set to the same
 	 * values for all calls, so that all locks will be held by the
 	 * nfsd server. (The nfsd server handles conflicts between the
 	 * various clients.)
 	 * Since an NFSv4 lockowner is a ClientID plus an array of up to 1024
 	 * bytes, so it can't be put in l_sysid.
 	 */
 	if (nfsv4_sysid == 0)
 		nfsv4_sysid = nlm_acquire_next_sysid();
 	fl.l_pid = (pid_t)0;
 	fl.l_sysid = (int)nfsv4_sysid;
 
 	if (ftype == F_UNLCK)
 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_UNLCK, &fl,
 		    (F_POSIX | F_REMOTE));
 	else
 		error = VOP_ADVLOCK(vp, (caddr_t)td->td_proc, F_SETLK, &fl,
 		    (F_POSIX | F_REMOTE));
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Check the nfsv4 root exports.
  */
 int
 nfsvno_v4rootexport(struct nfsrv_descript *nd)
 {
 	struct ucred *credanon;
 	int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i;
 	uint64_t exflags;
 
 	error = vfs_stdcheckexp(&nfsv4root_mnt, nd->nd_nam, &exflags,
 	    &credanon, &numsecflavor, secflavors);
 	if (error) {
 		error = NFSERR_PROGUNAVAIL;
 		goto out;
 	}
 	if (credanon != NULL)
 		crfree(credanon);
 	for (i = 0; i < numsecflavor; i++) {
 		if (secflavors[i] == AUTH_SYS)
 			nd->nd_flag |= ND_EXAUTHSYS;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5)
 			nd->nd_flag |= ND_EXGSS;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5I)
 			nd->nd_flag |= ND_EXGSSINTEGRITY;
 		else if (secflavors[i] == RPCSEC_GSS_KRB5P)
 			nd->nd_flag |= ND_EXGSSPRIVACY;
 	}
 
 	/* And set ND_EXxx flags for TLS. */
 	if ((exflags & MNT_EXTLS) != 0) {
 		nd->nd_flag |= ND_EXTLS;
 		if ((exflags & MNT_EXTLSCERT) != 0)
 			nd->nd_flag |= ND_EXTLSCERT;
 		if ((exflags & MNT_EXTLSCERTUSER) != 0)
 			nd->nd_flag |= ND_EXTLSCERTUSER;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Nfs server pseudo system call for the nfsd's
  */
 /*
  * MPSAFE
  */
 static int
 nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
 {
 	struct file *fp;
 	struct nfsd_addsock_args sockarg;
 	struct nfsd_nfsd_args nfsdarg;
 	struct nfsd_nfsd_oargs onfsdarg;
 	struct nfsd_pnfsd_args pnfsdarg;
 	struct vnode *vp, *nvp, *curdvp;
 	struct pnfsdsfile *pf;
 	struct nfsdevice *ds, *fds;
 	cap_rights_t rights;
 	int buflen, error, ret;
 	char *buf, *cp, *cp2, *cp3;
 	char fname[PNFS_FILENAME_LEN + 1];
 
 	if (uap->flag & NFSSVC_NFSDADDSOCK) {
 		error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg));
 		if (error)
 			goto out;
 		/*
 		 * Since we don't know what rights might be required,
 		 * pretend that we need them all. It is better to be too
 		 * careful than too reckless.
 		 */
 		error = fget(td, sockarg.sock,
 		    cap_rights_init_one(&rights, CAP_SOCK_SERVER), &fp);
 		if (error != 0)
 			goto out;
 		if (fp->f_type != DTYPE_SOCKET) {
 			fdrop(fp, td);
 			error = EPERM;
 			goto out;
 		}
 		error = nfsrvd_addsock(fp);
 		fdrop(fp, td);
 	} else if (uap->flag & NFSSVC_NFSDNFSD) {
 		if (uap->argp == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 		if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) {
 			error = copyin(uap->argp, &onfsdarg, sizeof(onfsdarg));
 			if (error == 0) {
 				nfsdarg.principal = onfsdarg.principal;
 				nfsdarg.minthreads = onfsdarg.minthreads;
 				nfsdarg.maxthreads = onfsdarg.maxthreads;
 				nfsdarg.version = 1;
 				nfsdarg.addr = NULL;
 				nfsdarg.addrlen = 0;
 				nfsdarg.dnshost = NULL;
 				nfsdarg.dnshostlen = 0;
 				nfsdarg.dspath = NULL;
 				nfsdarg.dspathlen = 0;
 				nfsdarg.mdspath = NULL;
 				nfsdarg.mdspathlen = 0;
 				nfsdarg.mirrorcnt = 1;
 			}
 		} else
 			error = copyin(uap->argp, &nfsdarg, sizeof(nfsdarg));
 		if (error)
 			goto out;
 		if (nfsdarg.addrlen > 0 && nfsdarg.addrlen < 10000 &&
 		    nfsdarg.dnshostlen > 0 && nfsdarg.dnshostlen < 10000 &&
 		    nfsdarg.dspathlen > 0 && nfsdarg.dspathlen < 10000 &&
 		    nfsdarg.mdspathlen > 0 && nfsdarg.mdspathlen < 10000 &&
 		    nfsdarg.mirrorcnt >= 1 &&
 		    nfsdarg.mirrorcnt <= NFSDEV_MAXMIRRORS &&
 		    nfsdarg.addr != NULL && nfsdarg.dnshost != NULL &&
 		    nfsdarg.dspath != NULL && nfsdarg.mdspath != NULL) {
 			NFSD_DEBUG(1, "addrlen=%d dspathlen=%d dnslen=%d"
 			    " mdspathlen=%d mirrorcnt=%d\n", nfsdarg.addrlen,
 			    nfsdarg.dspathlen, nfsdarg.dnshostlen,
 			    nfsdarg.mdspathlen, nfsdarg.mirrorcnt);
 			cp = malloc(nfsdarg.addrlen + 1, M_TEMP, M_WAITOK);
 			error = copyin(nfsdarg.addr, cp, nfsdarg.addrlen);
 			if (error != 0) {
 				free(cp, M_TEMP);
 				goto out;
 			}
 			cp[nfsdarg.addrlen] = '\0';	/* Ensure nul term. */
 			nfsdarg.addr = cp;
 			cp = malloc(nfsdarg.dnshostlen + 1, M_TEMP, M_WAITOK);
 			error = copyin(nfsdarg.dnshost, cp, nfsdarg.dnshostlen);
 			if (error != 0) {
 				free(nfsdarg.addr, M_TEMP);
 				free(cp, M_TEMP);
 				goto out;
 			}
 			cp[nfsdarg.dnshostlen] = '\0';	/* Ensure nul term. */
 			nfsdarg.dnshost = cp;
 			cp = malloc(nfsdarg.dspathlen + 1, M_TEMP, M_WAITOK);
 			error = copyin(nfsdarg.dspath, cp, nfsdarg.dspathlen);
 			if (error != 0) {
 				free(nfsdarg.addr, M_TEMP);
 				free(nfsdarg.dnshost, M_TEMP);
 				free(cp, M_TEMP);
 				goto out;
 			}
 			cp[nfsdarg.dspathlen] = '\0';	/* Ensure nul term. */
 			nfsdarg.dspath = cp;
 			cp = malloc(nfsdarg.mdspathlen + 1, M_TEMP, M_WAITOK);
 			error = copyin(nfsdarg.mdspath, cp, nfsdarg.mdspathlen);
 			if (error != 0) {
 				free(nfsdarg.addr, M_TEMP);
 				free(nfsdarg.dnshost, M_TEMP);
 				free(nfsdarg.dspath, M_TEMP);
 				free(cp, M_TEMP);
 				goto out;
 			}
 			cp[nfsdarg.mdspathlen] = '\0';	/* Ensure nul term. */
 			nfsdarg.mdspath = cp;
 		} else {
 			nfsdarg.addr = NULL;
 			nfsdarg.addrlen = 0;
 			nfsdarg.dnshost = NULL;
 			nfsdarg.dnshostlen = 0;
 			nfsdarg.dspath = NULL;
 			nfsdarg.dspathlen = 0;
 			nfsdarg.mdspath = NULL;
 			nfsdarg.mdspathlen = 0;
 			nfsdarg.mirrorcnt = 1;
 		}
 		nfsd_timer(NULL);
 		error = nfsrvd_nfsd(td, &nfsdarg);
 		free(nfsdarg.addr, M_TEMP);
 		free(nfsdarg.dnshost, M_TEMP);
 		free(nfsdarg.dspath, M_TEMP);
 		free(nfsdarg.mdspath, M_TEMP);
 	} else if (uap->flag & NFSSVC_PNFSDS) {
 		error = copyin(uap->argp, &pnfsdarg, sizeof(pnfsdarg));
 		if (error == 0 && (pnfsdarg.op == PNFSDOP_DELDSSERVER ||
 		    pnfsdarg.op == PNFSDOP_FORCEDELDS)) {
 			cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 			error = copyinstr(pnfsdarg.dspath, cp, PATH_MAX + 1,
 			    NULL);
 			if (error == 0)
 				error = nfsrv_deldsserver(pnfsdarg.op, cp, td);
 			free(cp, M_TEMP);
 		} else if (error == 0 && pnfsdarg.op == PNFSDOP_COPYMR) {
 			cp = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 			buflen = sizeof(*pf) * NFSDEV_MAXMIRRORS;
 			buf = malloc(buflen, M_TEMP, M_WAITOK);
 			error = copyinstr(pnfsdarg.mdspath, cp, PATH_MAX + 1,
 			    NULL);
 			NFSD_DEBUG(4, "pnfsdcopymr cp mdspath=%d\n", error);
 			if (error == 0 && pnfsdarg.dspath != NULL) {
 				cp2 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 				error = copyinstr(pnfsdarg.dspath, cp2,
 				    PATH_MAX + 1, NULL);
 				NFSD_DEBUG(4, "pnfsdcopymr cp dspath=%d\n",
 				    error);
 			} else
 				cp2 = NULL;
 			if (error == 0 && pnfsdarg.curdspath != NULL) {
 				cp3 = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK);
 				error = copyinstr(pnfsdarg.curdspath, cp3,
 				    PATH_MAX + 1, NULL);
 				NFSD_DEBUG(4, "pnfsdcopymr cp curdspath=%d\n",
 				    error);
 			} else
 				cp3 = NULL;
 			curdvp = NULL;
 			fds = NULL;
 			if (error == 0)
 				error = nfsrv_mdscopymr(cp, cp2, cp3, buf,
 				    &buflen, fname, td, &vp, &nvp, &pf, &ds,
 				    &fds);
 			NFSD_DEBUG(4, "nfsrv_mdscopymr=%d\n", error);
 			if (error == 0) {
 				if (pf->dsf_dir >= nfsrv_dsdirsize) {
 					printf("copymr: dsdir out of range\n");
 					pf->dsf_dir = 0;
 				}
 				NFSD_DEBUG(4, "copymr: buflen=%d\n", buflen);
 				error = nfsrv_copymr(vp, nvp,
 				    ds->nfsdev_dsdir[pf->dsf_dir], ds, pf,
 				    (struct pnfsdsfile *)buf,
 				    buflen / sizeof(*pf), td->td_ucred, td);
 				vput(vp);
 				vput(nvp);
 				if (fds != NULL && error == 0) {
 					curdvp = fds->nfsdev_dsdir[pf->dsf_dir];
 					ret = vn_lock(curdvp, LK_EXCLUSIVE);
 					if (ret == 0) {
 						nfsrv_dsremove(curdvp, fname,
 						    td->td_ucred, td);
 						NFSVOPUNLOCK(curdvp);
 					}
 				}
 				NFSD_DEBUG(4, "nfsrv_copymr=%d\n", error);
 			}
 			free(cp, M_TEMP);
 			free(cp2, M_TEMP);
 			free(cp3, M_TEMP);
 			free(buf, M_TEMP);
 		}
 	} else {
 		error = nfssvc_srvcall(td, uap, td->td_ucred);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 static int
 nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred)
 {
 	struct nfsex_args export;
 	struct nfsex_oldargs oexp;
 	struct file *fp = NULL;
 	int stablefd, i, len;
 	struct nfsd_clid adminrevoke;
 	struct nfsd_dumplist dumplist;
 	struct nfsd_dumpclients *dumpclients;
 	struct nfsd_dumplocklist dumplocklist;
 	struct nfsd_dumplocks *dumplocks;
 	struct nameidata nd;
 	vnode_t vp;
 	int error = EINVAL, igotlock;
 	struct proc *procp;
 	gid_t *grps;
 	static int suspend_nfsd = 0;
 
 	if (uap->flag & NFSSVC_PUBLICFH) {
 		NFSBZERO((caddr_t)&nfs_pubfh.nfsrvfh_data,
 		    sizeof (fhandle_t));
 		error = copyin(uap->argp,
 		    &nfs_pubfh.nfsrvfh_data, sizeof (fhandle_t));
 		if (!error)
 			nfs_pubfhset = 1;
 	} else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
 	    (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) {
 		error = copyin(uap->argp,(caddr_t)&export,
 		    sizeof (struct nfsex_args));
 		if (!error) {
 			grps = NULL;
 			if (export.export.ex_ngroups > NGROUPS_MAX ||
 			    export.export.ex_ngroups < 0)
 				error = EINVAL;
 			else if (export.export.ex_ngroups > 0) {
 				grps = malloc(export.export.ex_ngroups *
 				    sizeof(gid_t), M_TEMP, M_WAITOK);
 				error = copyin(export.export.ex_groups, grps,
 				    export.export.ex_ngroups * sizeof(gid_t));
 				export.export.ex_groups = grps;
 			} else
 				export.export.ex_groups = NULL;
 			if (!error)
 				error = nfsrv_v4rootexport(&export, cred, p);
 			free(grps, M_TEMP);
 		}
 	} else if ((uap->flag & (NFSSVC_V4ROOTEXPORT | NFSSVC_NEWSTRUCT)) ==
 	    NFSSVC_V4ROOTEXPORT) {
 		error = copyin(uap->argp,(caddr_t)&oexp,
 		    sizeof (struct nfsex_oldargs));
 		if (!error) {
 			memset(&export.export, 0, sizeof(export.export));
 			export.export.ex_flags = (uint64_t)oexp.export.ex_flags;
 			export.export.ex_root = oexp.export.ex_root;
 			export.export.ex_uid = oexp.export.ex_anon.cr_uid;
 			export.export.ex_ngroups =
 			    oexp.export.ex_anon.cr_ngroups;
 			export.export.ex_groups = NULL;
 			if (export.export.ex_ngroups > XU_NGROUPS ||
 			    export.export.ex_ngroups < 0)
 				error = EINVAL;
 			else if (export.export.ex_ngroups > 0) {
 				export.export.ex_groups = malloc(
 				    export.export.ex_ngroups * sizeof(gid_t),
 				    M_TEMP, M_WAITOK);
 				for (i = 0; i < export.export.ex_ngroups; i++)
 					export.export.ex_groups[i] =
 					    oexp.export.ex_anon.cr_groups[i];
 			}
 			export.export.ex_addr = oexp.export.ex_addr;
 			export.export.ex_addrlen = oexp.export.ex_addrlen;
 			export.export.ex_mask = oexp.export.ex_mask;
 			export.export.ex_masklen = oexp.export.ex_masklen;
 			export.export.ex_indexfile = oexp.export.ex_indexfile;
 			export.export.ex_numsecflavors =
 			    oexp.export.ex_numsecflavors;
 			if (export.export.ex_numsecflavors >= MAXSECFLAVORS ||
 			    export.export.ex_numsecflavors < 0)
 				error = EINVAL;
 			else {
 				for (i = 0; i < export.export.ex_numsecflavors;
 				    i++)
 					export.export.ex_secflavors[i] =
 					    oexp.export.ex_secflavors[i];
 			}
 			export.fspec = oexp.fspec;
 			if (error == 0)
 				error = nfsrv_v4rootexport(&export, cred, p);
 			free(export.export.ex_groups, M_TEMP);
 		}
 	} else if (uap->flag & NFSSVC_NOPUBLICFH) {
 		nfs_pubfhset = 0;
 		error = 0;
 	} else if (uap->flag & NFSSVC_STABLERESTART) {
 		error = copyin(uap->argp, (caddr_t)&stablefd,
 		    sizeof (int));
 		if (!error)
 			error = fp_getfvp(p, stablefd, &fp, &vp);
 		if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE))
 			error = EBADF;
 		if (!error && newnfs_numnfsd != 0)
 			error = EPERM;
 		if (!error) {
 			nfsrv_stablefirst.nsf_fp = fp;
 			nfsrv_setupstable(p);
 		}
 	} else if (uap->flag & NFSSVC_ADMINREVOKE) {
 		error = copyin(uap->argp, (caddr_t)&adminrevoke,
 		    sizeof (struct nfsd_clid));
 		if (!error)
 			error = nfsrv_adminrevoke(&adminrevoke, p);
 	} else if (uap->flag & NFSSVC_DUMPCLIENTS) {
 		error = copyin(uap->argp, (caddr_t)&dumplist,
 		    sizeof (struct nfsd_dumplist));
 		if (!error && (dumplist.ndl_size < 1 ||
 			dumplist.ndl_size > NFSRV_MAXDUMPLIST))
 			error = EPERM;
 		if (!error) {
 		    len = sizeof (struct nfsd_dumpclients) * dumplist.ndl_size;
 		    dumpclients = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 		    nfsrv_dumpclients(dumpclients, dumplist.ndl_size);
 		    error = copyout(dumpclients, dumplist.ndl_list, len);
 		    free(dumpclients, M_TEMP);
 		}
 	} else if (uap->flag & NFSSVC_DUMPLOCKS) {
 		error = copyin(uap->argp, (caddr_t)&dumplocklist,
 		    sizeof (struct nfsd_dumplocklist));
 		if (!error && (dumplocklist.ndllck_size < 1 ||
 			dumplocklist.ndllck_size > NFSRV_MAXDUMPLIST))
 			error = EPERM;
 		if (!error)
 			error = nfsrv_lookupfilename(&nd,
 				dumplocklist.ndllck_fname, p);
 		if (!error) {
 			len = sizeof (struct nfsd_dumplocks) *
 				dumplocklist.ndllck_size;
 			dumplocks = malloc(len, M_TEMP, M_WAITOK | M_ZERO);
 			nfsrv_dumplocks(nd.ni_vp, dumplocks,
 			    dumplocklist.ndllck_size, p);
 			vput(nd.ni_vp);
 			error = copyout(dumplocks, dumplocklist.ndllck_list,
 			    len);
 			free(dumplocks, M_TEMP);
 		}
 	} else if (uap->flag & NFSSVC_BACKUPSTABLE) {
 		procp = p->td_proc;
 		PROC_LOCK(procp);
 		nfsd_master_pid = procp->p_pid;
 		bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1);
 		nfsd_master_start = procp->p_stats->p_start;
 		nfsd_master_proc = procp;
 		PROC_UNLOCK(procp);
 	} else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) {
 		NFSLOCKV4ROOTMUTEX();
 		if (suspend_nfsd == 0) {
 			/* Lock out all nfsd threads */
 			do {
 				igotlock = nfsv4_lock(&nfsd_suspend_lock, 1,
 				    NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
 			} while (igotlock == 0 && suspend_nfsd == 0);
 			suspend_nfsd = 1;
 		}
 		NFSUNLOCKV4ROOTMUTEX();
 		error = 0;
 	} else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) {
 		NFSLOCKV4ROOTMUTEX();
 		if (suspend_nfsd != 0) {
 			nfsv4_unlock(&nfsd_suspend_lock, 0);
 			suspend_nfsd = 0;
 		}
 		NFSUNLOCKV4ROOTMUTEX();
 		error = 0;
 	}
 
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Check exports.
  * Returns 0 if ok, 1 otherwise.
  */
 int
 nfsvno_testexp(struct nfsrv_descript *nd, struct nfsexstuff *exp)
 {
 	int i;
 
 	if ((NFSVNO_EXTLS(exp) && (nd->nd_flag & ND_TLS) == 0) ||
 	    (NFSVNO_EXTLSCERT(exp) &&
 	     (nd->nd_flag & ND_TLSCERT) == 0) ||
 	    (NFSVNO_EXTLSCERTUSER(exp) &&
 	     (nd->nd_flag & ND_TLSCERTUSER) == 0)) {
 		if ((nd->nd_flag & ND_NFSV4) != 0)
 			return (NFSERR_WRONGSEC);
 #ifdef notnow
 		/* There is currently no auth_stat for this. */
 		else if ((nd->nd_flag & ND_TLS) == 0)
 			return (NFSERR_AUTHERR | AUTH_NEEDS_TLS);
 		else
 			return (NFSERR_AUTHERR | AUTH_NEEDS_TLS_MUTUAL_HOST);
 #endif
 		else
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 	}
 
 	/*
 	 * RFC2623 suggests that the NFSv3 Fsinfo RPC be allowed to use
 	 * AUTH_NONE or AUTH_SYS for file systems requiring RPCSEC_GSS.
 	 */
 	if ((nd->nd_flag & ND_NFSV3) != 0 && nd->nd_procnum == NFSPROC_FSINFO)
 		return (0);
 
 	/*
 	 * This seems odd, but allow the case where the security flavor
 	 * list is empty. This happens when NFSv4 is traversing non-exported
 	 * file systems. Exported file systems should always have a non-empty
 	 * security flavor list.
 	 */
 	if (exp->nes_numsecflavor == 0)
 		return (0);
 
 	for (i = 0; i < exp->nes_numsecflavor; i++) {
 		/*
 		 * The tests for privacy and integrity must be first,
 		 * since ND_GSS is set for everything but AUTH_SYS.
 		 */
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5P &&
 		    (nd->nd_flag & ND_GSSPRIVACY))
 			return (0);
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5I &&
 		    (nd->nd_flag & ND_GSSINTEGRITY))
 			return (0);
 		if (exp->nes_secflavors[i] == RPCSEC_GSS_KRB5 &&
 		    (nd->nd_flag & ND_GSS))
 			return (0);
 		if (exp->nes_secflavors[i] == AUTH_SYS &&
 		    (nd->nd_flag & ND_GSS) == 0)
 			return (0);
 	}
 	if ((nd->nd_flag & ND_NFSV4) != 0)
 		return (NFSERR_WRONGSEC);
 	return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 }
 
 /*
  * Calculate a hash value for the fid in a file handle.
  */
 uint32_t
 nfsrv_hashfh(fhandle_t *fhp)
 {
 	uint32_t hashval;
 
 	hashval = hash32_buf(&fhp->fh_fid, sizeof(struct fid), 0);
 	return (hashval);
 }
 
 /*
  * Calculate a hash value for the sessionid.
  */
 uint32_t
 nfsrv_hashsessionid(uint8_t *sessionid)
 {
 	uint32_t hashval;
 
 	hashval = hash32_buf(sessionid, NFSX_V4SESSIONID, 0);
 	return (hashval);
 }
 
 /*
  * Signal the userland master nfsd to backup the stable restart file.
  */
 void
 nfsrv_backupstable(void)
 {
 	struct proc *procp;
 
 	if (nfsd_master_proc != NULL) {
 		procp = pfind(nfsd_master_pid);
 		/* Try to make sure it is the correct process. */
 		if (procp == nfsd_master_proc &&
 		    procp->p_stats->p_start.tv_sec ==
 		    nfsd_master_start.tv_sec &&
 		    procp->p_stats->p_start.tv_usec ==
 		    nfsd_master_start.tv_usec &&
 		    strcmp(procp->p_comm, nfsd_master_comm) == 0)
 			kern_psignal(procp, SIGUSR2);
 		else
 			nfsd_master_proc = NULL;
 
 		if (procp != NULL)
 			PROC_UNLOCK(procp);
 	}
 }
 
 /*
  * Create a DS data file for nfsrv_pnfscreate(). Called for each mirror.
  * The arguments are in a structure, so that they can be passed through
  * taskqueue for a kernel process to execute this function.
  */
 struct nfsrvdscreate {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	struct ucred		*tcred;
 	struct vnode		*dvp;
 	NFSPROC_T		*p;
 	struct pnfsdsfile	*pf;
 	int			err;
 	fhandle_t		fh;
 	struct vattr		va;
 	struct vattr		createva;
 };
 
 int
 nfsrv_dscreate(struct vnode *dvp, struct vattr *vap, struct vattr *nvap,
     fhandle_t *fhp, struct pnfsdsfile *pf, struct pnfsdsattr *dsa,
     char *fnamep, struct ucred *tcred, NFSPROC_T *p, struct vnode **nvpp)
 {
 	struct vnode *nvp;
 	struct nameidata named;
 	struct vattr va;
 	char *bufp;
 	u_long *hashp;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	int error;
 
 	NFSNAMEICNDSET(&named.ni_cnd, tcred, CREATE,
 	    LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	named.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
 	named.ni_cnd.cn_nameptr = bufp;
 	if (fnamep != NULL) {
 		strlcpy(bufp, fnamep, PNFS_FILENAME_LEN + 1);
 		named.ni_cnd.cn_namelen = strlen(bufp);
 	} else
 		named.ni_cnd.cn_namelen = nfsrv_putfhname(fhp, bufp);
 	NFSD_DEBUG(4, "nfsrv_dscreate: dvp=%p fname=%s\n", dvp, bufp);
 
 	/* Create the date file in the DS mount. */
 	error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
 	if (error == 0) {
 		error = VOP_CREATE(dvp, &nvp, &named.ni_cnd, vap);
 		vref(dvp);
 		VOP_VPUT_PAIR(dvp, error == 0 ? &nvp : NULL, false);
 		if (error == 0) {
 			/* Set the ownership of the file. */
 			error = VOP_SETATTR(nvp, nvap, tcred);
 			NFSD_DEBUG(4, "nfsrv_dscreate:"
 			    " setattr-uid=%d\n", error);
 			if (error != 0)
 				vput(nvp);
 		}
 		if (error != 0)
 			printf("pNFS: pnfscreate failed=%d\n", error);
 	} else
 		printf("pNFS: pnfscreate vnlock=%d\n", error);
 	if (error == 0) {
 		np = VTONFS(nvp);
 		nmp = VFSTONFS(nvp->v_mount);
 		if (strcmp(nvp->v_mount->mnt_vfc->vfc_name, "nfs")
 		    != 0 || nmp->nm_nam->sa_len > sizeof(
 		    struct sockaddr_in6) ||
 		    np->n_fhp->nfh_len != NFSX_MYFH) {
 			printf("Bad DS file: fstype=%s salen=%d"
 			    " fhlen=%d\n",
 			    nvp->v_mount->mnt_vfc->vfc_name,
 			    nmp->nm_nam->sa_len, np->n_fhp->nfh_len);
 			error = ENOENT;
 		}
 
 		/* Set extattrs for the DS on the MDS file. */
 		if (error == 0) {
 			if (dsa != NULL) {
 				error = VOP_GETATTR(nvp, &va, tcred);
 				if (error == 0) {
 					dsa->dsa_filerev = va.va_filerev;
 					dsa->dsa_size = va.va_size;
 					dsa->dsa_atime = va.va_atime;
 					dsa->dsa_mtime = va.va_mtime;
 					dsa->dsa_bytes = va.va_bytes;
 				}
 			}
 			if (error == 0) {
 				NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh,
 				    NFSX_MYFH);
 				NFSBCOPY(nmp->nm_nam, &pf->dsf_sin,
 				    nmp->nm_nam->sa_len);
 				NFSBCOPY(named.ni_cnd.cn_nameptr,
 				    pf->dsf_filename,
 				    sizeof(pf->dsf_filename));
 			}
 		} else
 			printf("pNFS: pnfscreate can't get DS"
 			    " attr=%d\n", error);
 		if (nvpp != NULL && error == 0)
 			*nvpp = nvp;
 		else
 			vput(nvp);
 	}
 	nfsvno_relpathbuf(&named);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_dscreate().
  */
 static void
 start_dscreate(void *arg, int pending)
 {
 	struct nfsrvdscreate *dsc;
 
 	dsc = (struct nfsrvdscreate *)arg;
 	dsc->err = nfsrv_dscreate(dsc->dvp, &dsc->createva, &dsc->va, &dsc->fh,
 	    dsc->pf, NULL, NULL, dsc->tcred, dsc->p, NULL);
 	dsc->done = 1;
 	NFSD_DEBUG(4, "start_dscreate: err=%d\n", dsc->err);
 }
 
 /*
  * Create a pNFS data file on the Data Server(s).
  */
 static void
 nfsrv_pnfscreate(struct vnode *vp, struct vattr *vap, struct ucred *cred,
     NFSPROC_T *p)
 {
 	struct nfsrvdscreate *dsc, *tdsc = NULL;
 	struct nfsdevice *ds, *tds, *fds;
 	struct mount *mp;
 	struct pnfsdsfile *pf, *tpf;
 	struct pnfsdsattr dsattr;
 	struct vattr va;
 	struct vnode *dvp[NFSDEV_MAXMIRRORS];
 	struct nfsmount *nmp;
 	fhandle_t fh;
 	uid_t vauid;
 	gid_t vagid;
 	u_short vamode;
 	struct ucred *tcred;
 	int dsdir[NFSDEV_MAXMIRRORS], error, i, mirrorcnt, ret;
 	int failpos, timo;
 
 	/* Get a DS server directory in a round-robin order. */
 	mirrorcnt = 1;
 	mp = vp->v_mount;
 	ds = fds = NULL;
 	NFSDDSLOCK();
 	/*
 	 * Search for the first entry that handles this MDS fs, but use the
 	 * first entry for all MDS fs's otherwise.
 	 */
 	TAILQ_FOREACH(tds, &nfsrv_devidhead, nfsdev_list) {
 		if (tds->nfsdev_nmp != NULL) {
 			if (tds->nfsdev_mdsisset == 0 && ds == NULL)
 				ds = tds;
 			else if (tds->nfsdev_mdsisset != 0 && fsidcmp(
 			    &mp->mnt_stat.f_fsid, &tds->nfsdev_mdsfsid) == 0) {
 				ds = fds = tds;
 				break;
 			}
 		}
 	}
 	if (ds == NULL) {
 		NFSDDSUNLOCK();
 		NFSD_DEBUG(4, "nfsrv_pnfscreate: no srv\n");
 		return;
 	}
 	i = dsdir[0] = ds->nfsdev_nextdir;
 	ds->nfsdev_nextdir = (ds->nfsdev_nextdir + 1) % nfsrv_dsdirsize;
 	dvp[0] = ds->nfsdev_dsdir[i];
 	tds = TAILQ_NEXT(ds, nfsdev_list);
 	if (nfsrv_maxpnfsmirror > 1 && tds != NULL) {
 		TAILQ_FOREACH_FROM(tds, &nfsrv_devidhead, nfsdev_list) {
 			if (tds->nfsdev_nmp != NULL &&
 			    ((tds->nfsdev_mdsisset == 0 && fds == NULL) ||
 			     (tds->nfsdev_mdsisset != 0 && fds != NULL &&
 			      fsidcmp(&mp->mnt_stat.f_fsid,
 			      &tds->nfsdev_mdsfsid) == 0))) {
 				dsdir[mirrorcnt] = i;
 				dvp[mirrorcnt] = tds->nfsdev_dsdir[i];
 				mirrorcnt++;
 				if (mirrorcnt >= nfsrv_maxpnfsmirror)
 					break;
 			}
 		}
 	}
 	/* Put at end of list to implement round-robin usage. */
 	TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
 	TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
 	NFSDDSUNLOCK();
 	dsc = NULL;
 	if (mirrorcnt > 1)
 		tdsc = dsc = malloc(sizeof(*dsc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK | M_ZERO);
 	tpf = pf = malloc(sizeof(*pf) * nfsrv_maxpnfsmirror, M_TEMP, M_WAITOK |
 	    M_ZERO);
 
 	error = nfsvno_getfh(vp, &fh, p);
 	if (error == 0)
 		error = VOP_GETATTR(vp, &va, cred);
 	if (error == 0) {
 		/* Set the attributes for "vp" to Setattr the DS vp. */
 		vauid = va.va_uid;
 		vagid = va.va_gid;
 		vamode = va.va_mode;
 		VATTR_NULL(&va);
 		va.va_uid = vauid;
 		va.va_gid = vagid;
 		va.va_mode = vamode;
 		va.va_size = 0;
 	} else
 		printf("pNFS: pnfscreate getfh+attr=%d\n", error);
 
 	NFSD_DEBUG(4, "nfsrv_pnfscreate: cruid=%d crgid=%d\n", cred->cr_uid,
 	    cred->cr_gid);
 	/* Make data file name based on FH. */
 	tcred = newnfs_getcred();
 
 	/*
 	 * Create the file on each DS mirror, using kernel process(es) for the
 	 * additional mirrors.
 	 */
 	failpos = -1;
 	for (i = 0; i < mirrorcnt - 1 && error == 0; i++, tpf++, tdsc++) {
 		tpf->dsf_dir = dsdir[i];
 		tdsc->tcred = tcred;
 		tdsc->p = p;
 		tdsc->pf = tpf;
 		tdsc->createva = *vap;
 		NFSBCOPY(&fh, &tdsc->fh, sizeof(fh));
 		tdsc->va = va;
 		tdsc->dvp = dvp[i];
 		tdsc->done = 0;
 		tdsc->inprog = 0;
 		tdsc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_dscreate, tdsc);
 			NFSD_DEBUG(4, "nfsrv_pnfscreate: nfs_pnfsio=%d\n", ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_dscreate(dvp[i], vap, &va, &fh, tpf, NULL,
 			    NULL, tcred, p, NULL);
 			if (ret != 0) {
 				KASSERT(error == 0, ("nfsrv_dscreate err=%d",
 				    error));
 				if (failpos == -1 && nfsds_failerr(ret))
 					failpos = i;
 				else
 					error = ret;
 			}
 		}
 	}
 	if (error == 0) {
 		tpf->dsf_dir = dsdir[mirrorcnt - 1];
 		error = nfsrv_dscreate(dvp[mirrorcnt - 1], vap, &va, &fh, tpf,
 		    &dsattr, NULL, tcred, p, NULL);
 		if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(error)) {
 			failpos = mirrorcnt - 1;
 			error = 0;
 		}
 	}
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	/* Wait for kernel task(s) to complete. */
 	for (tdsc = dsc, i = 0; i < mirrorcnt - 1; i++, tdsc++) {
 		while (tdsc->inprog != 0 && tdsc->done == 0)
 			tsleep(&tdsc->tsk, PVFS, "srvdcr", timo);
 		if (tdsc->err != 0) {
 			if (failpos == -1 && nfsds_failerr(tdsc->err))
 				failpos = i;
 			else if (error == 0)
 				error = tdsc->err;
 		}
 	}
 
 	/*
 	 * If failpos has been set, that mirror has failed, so it needs
 	 * to be disabled.
 	 */
 	if (failpos >= 0) {
 		nmp = VFSTONFS(dvp[failpos]->v_mount);
 		NFSLOCKMNT(nmp);
 		if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 		     NFSMNTP_CANCELRPCS)) == 0) {
 			nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 			NFSUNLOCKMNT(nmp);
 			ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
 			NFSD_DEBUG(4, "dscreatfail fail=%d ds=%p\n", failpos,
 			    ds);
 			if (ds != NULL)
 				nfsrv_killrpcs(nmp);
 			NFSLOCKMNT(nmp);
 			nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 			wakeup(nmp);
 		}
 		NFSUNLOCKMNT(nmp);
 	}
 
 	NFSFREECRED(tcred);
 	if (error == 0) {
 		ASSERT_VOP_ELOCKED(vp, "nfsrv_pnfscreate vp");
 
 		NFSD_DEBUG(4, "nfsrv_pnfscreate: mirrorcnt=%d maxmirror=%d\n",
 		    mirrorcnt, nfsrv_maxpnfsmirror);
 		/*
 		 * For all mirrors that couldn't be created, fill in the
 		 * *pf structure, but with an IP address == 0.0.0.0.
 		 */
 		tpf = pf + mirrorcnt;
 		for (i = mirrorcnt; i < nfsrv_maxpnfsmirror; i++, tpf++) {
 			*tpf = *pf;
 			tpf->dsf_sin.sin_family = AF_INET;
 			tpf->dsf_sin.sin_len = sizeof(struct sockaddr_in);
 			tpf->dsf_sin.sin_addr.s_addr = 0;
 			tpf->dsf_sin.sin_port = 0;
 		}
 
 		error = vn_extattr_set(vp, IO_NODELOCKED,
 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
 		    sizeof(*pf) * nfsrv_maxpnfsmirror, (char *)pf, p);
 		if (error == 0)
 			error = vn_extattr_set(vp, IO_NODELOCKED,
 			    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr",
 			    sizeof(dsattr), (char *)&dsattr, p);
 		if (error != 0)
 			printf("pNFS: pnfscreate setextattr=%d\n",
 			    error);
 	} else
 		printf("pNFS: pnfscreate=%d\n", error);
 	free(pf, M_TEMP);
 	free(dsc, M_TEMP);
 }
 
 /*
  * Get the information needed to remove the pNFS Data Server file from the
  * Metadata file.  Upon success, ddvp is set non-NULL to the locked
  * DS directory vnode.  The caller must unlock *ddvp when done with it.
  */
 static void
 nfsrv_pnfsremovesetup(struct vnode *vp, NFSPROC_T *p, struct vnode **dvpp,
     int *mirrorcntp, char *fname, fhandle_t *fhp)
 {
 	struct vattr va;
 	struct ucred *tcred;
 	char *buf;
 	int buflen, error;
 
 	dvpp[0] = NULL;
 	/* If not an exported regular file or not a pNFS server, just return. */
 	if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
 	    nfsrv_devidcnt == 0)
 		return;
 
 	/* Check to see if this is the last hard link. */
 	tcred = newnfs_getcred();
 	error = VOP_GETATTR(vp, &va, tcred);
 	NFSFREECRED(tcred);
 	if (error != 0) {
 		printf("pNFS: nfsrv_pnfsremovesetup getattr=%d\n", error);
 		return;
 	}
 	if (va.va_nlink > 1)
 		return;
 
 	error = nfsvno_getfh(vp, fhp, p);
 	if (error != 0) {
 		printf("pNFS: nfsrv_pnfsremovesetup getfh=%d\n", error);
 		return;
 	}
 
 	buflen = 1024;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	/* Get the directory vnode for the DS mount and the file handle. */
 	error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, dvpp,
 	    NULL, NULL, fname, NULL, NULL, NULL, NULL, NULL);
 	free(buf, M_TEMP);
 	if (error != 0)
 		printf("pNFS: nfsrv_pnfsremovesetup getsockmnt=%d\n", error);
 }
 
 /*
  * Remove a DS data file for nfsrv_pnfsremove(). Called for each mirror.
  * The arguments are in a structure, so that they can be passed through
  * taskqueue for a kernel process to execute this function.
  */
 struct nfsrvdsremove {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	struct ucred		*tcred;
 	struct vnode		*dvp;
 	NFSPROC_T		*p;
 	int			err;
 	char			fname[PNFS_FILENAME_LEN + 1];
 };
 
 static int
 nfsrv_dsremove(struct vnode *dvp, char *fname, struct ucred *tcred,
     NFSPROC_T *p)
 {
 	struct nameidata named;
 	struct vnode *nvp;
 	char *bufp;
 	u_long *hashp;
 	int error;
 
 	error = NFSVOPLOCK(dvp, LK_EXCLUSIVE);
 	if (error != 0)
 		return (error);
 	named.ni_cnd.cn_nameiop = DELETE;
 	named.ni_cnd.cn_lkflags = LK_EXCLUSIVE | LK_RETRY;
 	named.ni_cnd.cn_cred = tcred;
-	named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
+	named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF;
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	named.ni_cnd.cn_nameptr = bufp;
 	named.ni_cnd.cn_namelen = strlen(fname);
 	strlcpy(bufp, fname, NAME_MAX);
 	NFSD_DEBUG(4, "nfsrv_pnfsremove: filename=%s\n", bufp);
 	error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
 	NFSD_DEBUG(4, "nfsrv_pnfsremove: aft LOOKUP=%d\n", error);
 	if (error == 0) {
 		error = VOP_REMOVE(dvp, nvp, &named.ni_cnd);
 		vput(nvp);
 	}
 	NFSVOPUNLOCK(dvp);
 	nfsvno_relpathbuf(&named);
 	if (error != 0)
 		printf("pNFS: nfsrv_pnfsremove failed=%d\n", error);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_dsremove().
  */
 static void
 start_dsremove(void *arg, int pending)
 {
 	struct nfsrvdsremove *dsrm;
 
 	dsrm = (struct nfsrvdsremove *)arg;
 	dsrm->err = nfsrv_dsremove(dsrm->dvp, dsrm->fname, dsrm->tcred,
 	    dsrm->p);
 	dsrm->done = 1;
 	NFSD_DEBUG(4, "start_dsremove: err=%d\n", dsrm->err);
 }
 
 /*
  * Remove a pNFS data file from a Data Server.
  * nfsrv_pnfsremovesetup() must have been called before the MDS file was
  * removed to set up the dvp and fill in the FH.
  */
 static void
 nfsrv_pnfsremove(struct vnode **dvp, int mirrorcnt, char *fname, fhandle_t *fhp,
     NFSPROC_T *p)
 {
 	struct ucred *tcred;
 	struct nfsrvdsremove *dsrm, *tdsrm;
 	struct nfsdevice *ds;
 	struct nfsmount *nmp;
 	int failpos, i, ret, timo;
 
 	tcred = newnfs_getcred();
 	dsrm = NULL;
 	if (mirrorcnt > 1)
 		dsrm = malloc(sizeof(*dsrm) * mirrorcnt - 1, M_TEMP, M_WAITOK);
 	/*
 	 * Remove the file on each DS mirror, using kernel process(es) for the
 	 * additional mirrors.
 	 */
 	failpos = -1;
 	for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
 		tdsrm->tcred = tcred;
 		tdsrm->p = p;
 		tdsrm->dvp = dvp[i];
 		strlcpy(tdsrm->fname, fname, PNFS_FILENAME_LEN + 1);
 		tdsrm->inprog = 0;
 		tdsrm->done = 0;
 		tdsrm->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_dsremove, tdsrm);
 			NFSD_DEBUG(4, "nfsrv_pnfsremove: nfs_pnfsio=%d\n", ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_dsremove(dvp[i], fname, tcred, p);
 			if (failpos == -1 && nfsds_failerr(ret))
 				failpos = i;
 		}
 	}
 	ret = nfsrv_dsremove(dvp[mirrorcnt - 1], fname, tcred, p);
 	if (failpos == -1 && mirrorcnt > 1 && nfsds_failerr(ret))
 		failpos = mirrorcnt - 1;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	/* Wait for kernel task(s) to complete. */
 	for (tdsrm = dsrm, i = 0; i < mirrorcnt - 1; i++, tdsrm++) {
 		while (tdsrm->inprog != 0 && tdsrm->done == 0)
 			tsleep(&tdsrm->tsk, PVFS, "srvdsrm", timo);
 		if (failpos == -1 && nfsds_failerr(tdsrm->err))
 			failpos = i;
 	}
 
 	/*
 	 * If failpos has been set, that mirror has failed, so it needs
 	 * to be disabled.
 	 */
 	if (failpos >= 0) {
 		nmp = VFSTONFS(dvp[failpos]->v_mount);
 		NFSLOCKMNT(nmp);
 		if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
 		     NFSMNTP_CANCELRPCS)) == 0) {
 			nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 			NFSUNLOCKMNT(nmp);
 			ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER, nmp, p);
 			NFSD_DEBUG(4, "dsremovefail fail=%d ds=%p\n", failpos,
 			    ds);
 			if (ds != NULL)
 				nfsrv_killrpcs(nmp);
 			NFSLOCKMNT(nmp);
 			nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 			wakeup(nmp);
 		}
 		NFSUNLOCKMNT(nmp);
 	}
 
 	/* Get rid all layouts for the file. */
 	nfsrv_freefilelayouts(fhp);
 
 	NFSFREECRED(tcred);
 	free(dsrm, M_TEMP);
 }
 
 /*
  * Generate a file name based on the file handle and put it in *bufp.
  * Return the number of bytes generated.
  */
 static int
 nfsrv_putfhname(fhandle_t *fhp, char *bufp)
 {
 	int i;
 	uint8_t *cp;
 	const uint8_t *hexdigits = "0123456789abcdef";
 
 	cp = (uint8_t *)fhp;
 	for (i = 0; i < sizeof(*fhp); i++) {
 		bufp[2 * i] = hexdigits[(*cp >> 4) & 0xf];
 		bufp[2 * i + 1] = hexdigits[*cp++ & 0xf];
 	}
 	bufp[2 * i] = '\0';
 	return (2 * i);
 }
 
 /*
  * Update the Metadata file's attributes from the DS file when a Read/Write
  * layout is returned.
  * Basically just call nfsrv_proxyds() with procedure == NFSPROC_LAYOUTRETURN
  * so that it does a nfsrv_getattrdsrpc() and nfsrv_setextattr() on the DS file.
  */
 int
 nfsrv_updatemdsattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
 {
 	struct ucred *tcred;
 	int error;
 
 	/* Do this as root so that it won't fail with EACCES. */
 	tcred = newnfs_getcred();
 	error = nfsrv_proxyds(vp, 0, 0, tcred, p, NFSPROC_LAYOUTRETURN,
 	    NULL, NULL, NULL, nap, NULL, NULL, 0, NULL);
 	NFSFREECRED(tcred);
 	return (error);
 }
 
 /*
  * Set the NFSv4 ACL on the DS file to the same ACL as the MDS file.
  */
 static int
 nfsrv_dssetacl(struct vnode *vp, struct acl *aclp, struct ucred *cred,
     NFSPROC_T *p)
 {
 	int error;
 
 	error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SETACL,
 	    NULL, NULL, NULL, NULL, aclp, NULL, 0, NULL);
 	return (error);
 }
 
 static int
 nfsrv_proxyds(struct vnode *vp, off_t off, int cnt, struct ucred *cred,
     struct thread *p, int ioproc, struct mbuf **mpp, char *cp,
     struct mbuf **mpp2, struct nfsvattr *nap, struct acl *aclp,
     off_t *offp, int content, bool *eofp)
 {
 	struct nfsmount *nmp[NFSDEV_MAXMIRRORS], *failnmp;
 	fhandle_t fh[NFSDEV_MAXMIRRORS];
 	struct vnode *dvp[NFSDEV_MAXMIRRORS];
 	struct nfsdevice *ds;
 	struct pnfsdsattr dsattr;
 	struct opnfsdsattr odsattr;
 	char *buf;
 	int buflen, error, failpos, i, mirrorcnt, origmircnt, trycnt;
 
 	NFSD_DEBUG(4, "in nfsrv_proxyds\n");
 	/*
 	 * If not a regular file, not exported or not a pNFS server,
 	 * just return ENOENT.
 	 */
 	if (vp->v_type != VREG || (vp->v_mount->mnt_flag & MNT_EXPORTED) == 0 ||
 	    nfsrv_devidcnt == 0)
 		return (ENOENT);
 
 	buflen = 1024;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	error = 0;
 
 	/*
 	 * For Getattr, get the Change attribute (va_filerev) and size (va_size)
 	 * from the MetaData file's extended attribute.
 	 */
 	if (ioproc == NFSPROC_GETATTR) {
 		error = vn_extattr_get(vp, IO_NODELOCKED,
 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsattr", &buflen, buf,
 		    p);
 		if (error == 0) {
 			if (buflen == sizeof(odsattr)) {
 				NFSBCOPY(buf, &odsattr, buflen);
 				nap->na_filerev = odsattr.dsa_filerev;
 				nap->na_size = odsattr.dsa_size;
 				nap->na_atime = odsattr.dsa_atime;
 				nap->na_mtime = odsattr.dsa_mtime;
 				/*
 				 * Fake na_bytes by rounding up na_size.
 				 * Since we don't know the block size, just
 				 * use BLKDEV_IOSIZE.
 				 */
 				nap->na_bytes = (odsattr.dsa_size +
 				    BLKDEV_IOSIZE - 1) & ~(BLKDEV_IOSIZE - 1);
 			} else if (buflen == sizeof(dsattr)) {
 				NFSBCOPY(buf, &dsattr, buflen);
 				nap->na_filerev = dsattr.dsa_filerev;
 				nap->na_size = dsattr.dsa_size;
 				nap->na_atime = dsattr.dsa_atime;
 				nap->na_mtime = dsattr.dsa_mtime;
 				nap->na_bytes = dsattr.dsa_bytes;
 			} else
 				error = ENXIO;
 		}
 		if (error == 0) {
 			/*
 			 * If nfsrv_pnfsgetdsattr is 0 or nfsrv_checkdsattr()
 			 * returns 0, just return now.  nfsrv_checkdsattr()
 			 * returns 0 if there is no Read/Write layout
 			 * plus either an Open/Write_access or Write
 			 * delegation issued to a client for the file.
 			 */
 			if (nfsrv_pnfsgetdsattr == 0 ||
 			    nfsrv_checkdsattr(vp, p) == 0) {
 				free(buf, M_TEMP);
 				return (error);
 			}
 		}
 
 		/*
 		 * Clear ENOATTR so the code below will attempt to do a
 		 * nfsrv_getattrdsrpc() to get the attributes and (re)create
 		 * the extended attribute.
 		 */
 		if (error == ENOATTR)
 			error = 0;
 	}
 
 	origmircnt = -1;
 	trycnt = 0;
 tryagain:
 	if (error == 0) {
 		buflen = 1024;
 		if (ioproc == NFSPROC_READDS && NFSVOPISLOCKED(vp) ==
 		    LK_EXCLUSIVE)
 			printf("nfsrv_proxyds: Readds vp exclusively locked\n");
 		error = nfsrv_dsgetsockmnt(vp, LK_SHARED, buf, &buflen,
 		    &mirrorcnt, p, dvp, fh, NULL, NULL, NULL, NULL, NULL,
 		    NULL, NULL);
 		if (error == 0) {
 			for (i = 0; i < mirrorcnt; i++)
 				nmp[i] = VFSTONFS(dvp[i]->v_mount);
 		} else
 			printf("pNFS: proxy getextattr sockaddr=%d\n", error);
 	} else
 		printf("pNFS: nfsrv_dsgetsockmnt=%d\n", error);
 	if (error == 0) {
 		failpos = -1;
 		if (origmircnt == -1)
 			origmircnt = mirrorcnt;
 		/*
 		 * If failpos is set to a mirror#, then that mirror has
 		 * failed and will be disabled. For Read, Getattr and Seek, the
 		 * function only tries one mirror, so if that mirror has
 		 * failed, it will need to be retried. As such, increment
 		 * tryitagain for these cases.
 		 * For Write, Setattr and Setacl, the function tries all
 		 * mirrors and will not return an error for the case where
 		 * one mirror has failed. For these cases, the functioning
 		 * mirror(s) will have been modified, so a retry isn't
 		 * necessary. These functions will set failpos for the
 		 * failed mirror#.
 		 */
 		if (ioproc == NFSPROC_READDS) {
 			error = nfsrv_readdsrpc(fh, off, cnt, cred, p, nmp[0],
 			    mpp, mpp2);
 			if (nfsds_failerr(error) && mirrorcnt > 1) {
 				/*
 				 * Setting failpos will cause the mirror
 				 * to be disabled and then a retry of this
 				 * read is required.
 				 */
 				failpos = 0;
 				error = 0;
 				trycnt++;
 			}
 		} else if (ioproc == NFSPROC_WRITEDS)
 			error = nfsrv_writedsrpc(fh, off, cnt, cred, p, vp,
 			    &nmp[0], mirrorcnt, mpp, cp, &failpos);
 		else if (ioproc == NFSPROC_SETATTR)
 			error = nfsrv_setattrdsrpc(fh, cred, p, vp, &nmp[0],
 			    mirrorcnt, nap, &failpos);
 		else if (ioproc == NFSPROC_SETACL)
 			error = nfsrv_setacldsrpc(fh, cred, p, vp, &nmp[0],
 			    mirrorcnt, aclp, &failpos);
 		else if (ioproc == NFSPROC_SEEKDS) {
 			error = nfsrv_seekdsrpc(fh, offp, content, eofp, cred,
 			    p, nmp[0]);
 			if (nfsds_failerr(error) && mirrorcnt > 1) {
 				/*
 				 * Setting failpos will cause the mirror
 				 * to be disabled and then a retry of this
 				 * read is required.
 				 */
 				failpos = 0;
 				error = 0;
 				trycnt++;
 			}
 		} else if (ioproc == NFSPROC_ALLOCATE)
 			error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp,
 			    &nmp[0], mirrorcnt, &failpos);
 		else if (ioproc == NFSPROC_DEALLOCATE)
 			error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p,
 			    vp, &nmp[0], mirrorcnt, &failpos);
 		else {
 			error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p,
 			    vp, nmp[mirrorcnt - 1], nap);
 			if (nfsds_failerr(error) && mirrorcnt > 1) {
 				/*
 				 * Setting failpos will cause the mirror
 				 * to be disabled and then a retry of this
 				 * getattr is required.
 				 */
 				failpos = mirrorcnt - 1;
 				error = 0;
 				trycnt++;
 			}
 		}
 		ds = NULL;
 		if (failpos >= 0) {
 			failnmp = nmp[failpos];
 			NFSLOCKMNT(failnmp);
 			if ((failnmp->nm_privflag & (NFSMNTP_FORCEDISM |
 			     NFSMNTP_CANCELRPCS)) == 0) {
 				failnmp->nm_privflag |= NFSMNTP_CANCELRPCS;
 				NFSUNLOCKMNT(failnmp);
 				ds = nfsrv_deldsnmp(PNFSDOP_DELDSSERVER,
 				    failnmp, p);
 				NFSD_DEBUG(4, "dsldsnmp fail=%d ds=%p\n",
 				    failpos, ds);
 				if (ds != NULL)
 					nfsrv_killrpcs(failnmp);
 				NFSLOCKMNT(failnmp);
 				failnmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
 				wakeup(failnmp);
 			}
 			NFSUNLOCKMNT(failnmp);
 		}
 		for (i = 0; i < mirrorcnt; i++)
 			NFSVOPUNLOCK(dvp[i]);
 		NFSD_DEBUG(4, "nfsrv_proxyds: aft RPC=%d trya=%d\n", error,
 		    trycnt);
 		/* Try the Read/Getattr again if a mirror was deleted. */
 		if (ds != NULL && trycnt > 0 && trycnt < origmircnt)
 			goto tryagain;
 	} else {
 		/* Return ENOENT for any Extended Attribute error. */
 		error = ENOENT;
 	}
 	free(buf, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_proxyds: error=%d\n", error);
 	return (error);
 }
 
 /*
  * Get the DS mount point, fh and directory from the "pnfsd.dsfile" extended
  * attribute.
  * newnmpp - If it points to a non-NULL nmp, that is the destination and needs
  *           to be checked.  If it points to a NULL nmp, then it returns
  *           a suitable destination.
  * curnmp - If non-NULL, it is the source mount for the copy.
  */
 int
 nfsrv_dsgetsockmnt(struct vnode *vp, int lktype, char *buf, int *buflenp,
     int *mirrorcntp, NFSPROC_T *p, struct vnode **dvpp, fhandle_t *fhp,
     char *devid, char *fnamep, struct vnode **nvpp, struct nfsmount **newnmpp,
     struct nfsmount *curnmp, int *ippos, int *dsdirp)
 {
 	struct vnode *dvp, *nvp = NULL, **tdvpp;
 	struct mount *mp;
 	struct nfsmount *nmp, *newnmp;
 	struct sockaddr *sad;
 	struct sockaddr_in *sin;
 	struct nfsdevice *ds, *tds, *fndds;
 	struct pnfsdsfile *pf;
 	uint32_t dsdir;
 	int error, fhiszero, fnd, gotone, i, mirrorcnt;
 
 	ASSERT_VOP_LOCKED(vp, "nfsrv_dsgetsockmnt vp");
 	*mirrorcntp = 1;
 	tdvpp = dvpp;
 	if (nvpp != NULL)
 		*nvpp = NULL;
 	if (dvpp != NULL)
 		*dvpp = NULL;
 	if (ippos != NULL)
 		*ippos = -1;
 	if (newnmpp != NULL)
 		newnmp = *newnmpp;
 	else
 		newnmp = NULL;
 	mp = vp->v_mount;
 	error = vn_extattr_get(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
 	    "pnfsd.dsfile", buflenp, buf, p);
 	mirrorcnt = *buflenp / sizeof(*pf);
 	if (error == 0 && (mirrorcnt < 1 || mirrorcnt > NFSDEV_MAXMIRRORS ||
 	    *buflenp != sizeof(*pf) * mirrorcnt))
 		error = ENOATTR;
 
 	pf = (struct pnfsdsfile *)buf;
 	/* If curnmp != NULL, check for a match in the mirror list. */
 	if (curnmp != NULL && error == 0) {
 		fnd = 0;
 		for (i = 0; i < mirrorcnt; i++, pf++) {
 			sad = (struct sockaddr *)&pf->dsf_sin;
 			if (nfsaddr2_match(sad, curnmp->nm_nam)) {
 				if (ippos != NULL)
 					*ippos = i;
 				fnd = 1;
 				break;
 			}
 		}
 		if (fnd == 0)
 			error = ENXIO;
 	}
 
 	gotone = 0;
 	pf = (struct pnfsdsfile *)buf;
 	NFSD_DEBUG(4, "nfsrv_dsgetsockmnt: mirrorcnt=%d err=%d\n", mirrorcnt,
 	    error);
 	for (i = 0; i < mirrorcnt && error == 0; i++, pf++) {
 		fhiszero = 0;
 		sad = (struct sockaddr *)&pf->dsf_sin;
 		sin = &pf->dsf_sin;
 		dsdir = pf->dsf_dir;
 		if (dsdir >= nfsrv_dsdirsize) {
 			printf("nfsrv_dsgetsockmnt: dsdir=%d\n", dsdir);
 			error = ENOATTR;
 		} else if (nvpp != NULL && newnmp != NULL &&
 		    nfsaddr2_match(sad, newnmp->nm_nam))
 			error = EEXIST;
 		if (error == 0) {
 			if (ippos != NULL && curnmp == NULL &&
 			    sad->sa_family == AF_INET &&
 			    sin->sin_addr.s_addr == 0)
 				*ippos = i;
 			if (NFSBCMP(&zerofh, &pf->dsf_fh, sizeof(zerofh)) == 0)
 				fhiszero = 1;
 			/* Use the socket address to find the mount point. */
 			fndds = NULL;
 			NFSDDSLOCK();
 			/* Find a match for the IP address. */
 			TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 				if (ds->nfsdev_nmp != NULL) {
 					dvp = ds->nfsdev_dvp;
 					nmp = VFSTONFS(dvp->v_mount);
 					if (nmp != ds->nfsdev_nmp)
 						printf("different2 nmp %p %p\n",
 						    nmp, ds->nfsdev_nmp);
 					if (nfsaddr2_match(sad, nmp->nm_nam)) {
 						fndds = ds;
 						break;
 					}
 				}
 			}
 			if (fndds != NULL && newnmpp != NULL &&
 			    newnmp == NULL) {
 				/* Search for a place to make a mirror copy. */
 				TAILQ_FOREACH(tds, &nfsrv_devidhead,
 				    nfsdev_list) {
 					if (tds->nfsdev_nmp != NULL &&
 					    fndds != tds &&
 					    ((tds->nfsdev_mdsisset == 0 &&
 					      fndds->nfsdev_mdsisset == 0) ||
 					     (tds->nfsdev_mdsisset != 0 &&
 					      fndds->nfsdev_mdsisset != 0 &&
 					      fsidcmp(&tds->nfsdev_mdsfsid,
 					      &mp->mnt_stat.f_fsid) == 0))) {
 						*newnmpp = tds->nfsdev_nmp;
 						break;
 					}
 				}
 				if (tds != NULL) {
 					/*
 					 * Move this entry to the end of the
 					 * list, so it won't be selected as
 					 * easily the next time.
 					 */
 					TAILQ_REMOVE(&nfsrv_devidhead, tds,
 					    nfsdev_list);
 					TAILQ_INSERT_TAIL(&nfsrv_devidhead, tds,
 					    nfsdev_list);
 				}
 			}
 			NFSDDSUNLOCK();
 			if (fndds != NULL) {
 				dvp = fndds->nfsdev_dsdir[dsdir];
 				if (lktype != 0 || fhiszero != 0 ||
 				    (nvpp != NULL && *nvpp == NULL)) {
 					if (fhiszero != 0)
 						error = vn_lock(dvp,
 						    LK_EXCLUSIVE);
 					else if (lktype != 0)
 						error = vn_lock(dvp, lktype);
 					else
 						error = vn_lock(dvp, LK_SHARED);
 					/*
 					 * If the file handle is all 0's, try to
 					 * do a Lookup against the DS to acquire
 					 * it.
 					 * If dvpp == NULL or the Lookup fails,
 					 * unlock dvp after the call.
 					 */
 					if (error == 0 && (fhiszero != 0 ||
 					    (nvpp != NULL && *nvpp == NULL))) {
 						error = nfsrv_pnfslookupds(vp,
 						    dvp, pf, &nvp, p);
 						if (error == 0) {
 							if (fhiszero != 0)
 								nfsrv_pnfssetfh(
 								    vp, pf,
 								    devid,
 								    fnamep,
 								    nvp, p);
 							if (nvpp != NULL &&
 							    *nvpp == NULL) {
 								*nvpp = nvp;
 								*dsdirp = dsdir;
 							} else
 								vput(nvp);
 						}
 						if (error != 0 || lktype == 0)
 							NFSVOPUNLOCK(dvp);
 					}
 				}
 				if (error == 0) {
 					gotone++;
 					NFSD_DEBUG(4, "gotone=%d\n", gotone);
 					if (devid != NULL) {
 						NFSBCOPY(fndds->nfsdev_deviceid,
 						    devid, NFSX_V4DEVICEID);
 						devid += NFSX_V4DEVICEID;
 					}
 					if (dvpp != NULL)
 						*tdvpp++ = dvp;
 					if (fhp != NULL)
 						NFSBCOPY(&pf->dsf_fh, fhp++,
 						    NFSX_MYFH);
 					if (fnamep != NULL && gotone == 1)
 						strlcpy(fnamep,
 						    pf->dsf_filename,
 						    sizeof(pf->dsf_filename));
 				} else
 					NFSD_DEBUG(4, "nfsrv_dsgetsockmnt "
 					    "err=%d\n", error);
 			}
 		}
 	}
 	if (error == 0 && gotone == 0)
 		error = ENOENT;
 
 	NFSD_DEBUG(4, "eo nfsrv_dsgetsockmnt: gotone=%d err=%d\n", gotone,
 	    error);
 	if (error == 0)
 		*mirrorcntp = gotone;
 	else {
 		if (gotone > 0 && dvpp != NULL) {
 			/*
 			 * If the error didn't occur on the first one and
 			 * dvpp != NULL, the one(s) prior to the failure will
 			 * have locked dvp's that need to be unlocked.
 			 */
 			for (i = 0; i < gotone; i++) {
 				NFSVOPUNLOCK(*dvpp);
 				*dvpp++ = NULL;
 			}
 		}
 		/*
 		 * If it found the vnode to be copied from before a failure,
 		 * it needs to be vput()'d.
 		 */
 		if (nvpp != NULL && *nvpp != NULL) {
 			vput(*nvpp);
 			*nvpp = NULL;
 		}
 	}
 	return (error);
 }
 
 /*
  * Set the extended attribute for the Change attribute.
  */
 static int
 nfsrv_setextattr(struct vnode *vp, struct nfsvattr *nap, NFSPROC_T *p)
 {
 	struct pnfsdsattr dsattr;
 	int error;
 
 	ASSERT_VOP_ELOCKED(vp, "nfsrv_setextattr vp");
 	dsattr.dsa_filerev = nap->na_filerev;
 	dsattr.dsa_size = nap->na_size;
 	dsattr.dsa_atime = nap->na_atime;
 	dsattr.dsa_mtime = nap->na_mtime;
 	dsattr.dsa_bytes = nap->na_bytes;
 	error = vn_extattr_set(vp, IO_NODELOCKED, EXTATTR_NAMESPACE_SYSTEM,
 	    "pnfsd.dsattr", sizeof(dsattr), (char *)&dsattr, p);
 	if (error != 0)
 		printf("pNFS: setextattr=%d\n", error);
 	return (error);
 }
 
 static int
 nfsrv_readdsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
     NFSPROC_T *p, struct nfsmount *nmp, struct mbuf **mpp, struct mbuf **mpendp)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsv4stateid_t st;
 	struct mbuf *m, *m2;
 	int error = 0, retlen, tlen, trimlen;
 
 	NFSD_DEBUG(4, "in nfsrv_readdsrpc\n");
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	*mpp = NULL;
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfscl_reqstart(nd, NFSPROC_READDS, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 	    NULL, NULL, 0, 0, cred);
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED * 3);
 	txdr_hyper(off, tl);
 	*(tl + 2) = txdr_unsigned(len);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		NFSM_STRSIZ(retlen, len);
 		if (retlen > 0) {
 			/* Trim off the pre-data XDR from the mbuf chain. */
 			m = nd->nd_mrep;
 			while (m != NULL && m != nd->nd_md) {
 				if (m->m_next == nd->nd_md) {
 					m->m_next = NULL;
 					m_freem(nd->nd_mrep);
 					nd->nd_mrep = m = nd->nd_md;
 				} else
 					m = m->m_next;
 			}
 			if (m == NULL) {
 				printf("nfsrv_readdsrpc: busted mbuf list\n");
 				error = ENOENT;
 				goto nfsmout;
 			}
 
 			/*
 			 * Now, adjust first mbuf so that any XDR before the
 			 * read data is skipped over.
 			 */
 			trimlen = nd->nd_dpos - mtod(m, char *);
 			if (trimlen > 0) {
 				m->m_len -= trimlen;
 				NFSM_DATAP(m, trimlen);
 			}
 
 			/*
 			 * Truncate the mbuf chain at retlen bytes of data,
 			 * plus XDR padding that brings the length up to a
 			 * multiple of 4.
 			 */
 			tlen = NFSM_RNDUP(retlen);
 			do {
 				if (m->m_len >= tlen) {
 					m->m_len = tlen;
 					tlen = 0;
 					m2 = m->m_next;
 					m->m_next = NULL;
 					m_freem(m2);
 					break;
 				}
 				tlen -= m->m_len;
 				m = m->m_next;
 			} while (m != NULL);
 			if (tlen > 0) {
 				printf("nfsrv_readdsrpc: busted mbuf list\n");
 				error = ENOENT;
 				goto nfsmout;
 			}
 			*mpp = nd->nd_mrep;
 			*mpendp = m;
 			nd->nd_mrep = NULL;
 		}
 	} else
 		error = nd->nd_repstat;
 nfsmout:
 	/* If nd->nd_mrep is already NULL, this is a no-op. */
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_readdsrpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Do a write RPC on a DS data file, using this structure for the arguments,
  * so that this function can be executed by a separate kernel process.
  */
 struct nfsrvwritedsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	off_t			off;
 	int			len;
 	struct nfsmount		*nmp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	struct mbuf		*m;
 	int			err;
 };
 
 static int
 nfsrv_writedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, int len,
     struct nfsvattr *nap, struct mbuf *m, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t st;
 	int commit, error, retlen;
 
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_WRITE, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
 
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
 	txdr_hyper(off, tl);
 	tl += 2;
 	/*
 	 * Do all writes FileSync, since the server doesn't hold onto dirty
 	 * buffers.  Since clients should be accessing the DS servers directly
 	 * using the pNFS layouts, this just needs to work correctly as a
 	 * fallback.
 	 */
 	*tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
 	*tl = txdr_unsigned(len);
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: len=%d\n", len);
 
 	/* Put data in mbuf chain. */
 	nd->nd_mb->m_next = m;
 
 	/* Set nd_mb and nd_bpos to end of data. */
 	while (m->m_next != NULL)
 		m = m->m_next;
 	nd->nd_mb = m;
 	nfsm_set(nd, m->m_len);
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: lastmb len=%d\n", m->m_len);
 
 	/* Do a Getattr for the attributes that change upon writing. */
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft writerpc=%d\n", nd->nd_repstat);
 	/* Get rid of weak cache consistency data for now. */
 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 	    (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 		NFSD_DEBUG(4, "nfsrv_writedsdorpc: wcc attr=%d\n", error);
 		if (error != 0)
 			goto nfsmout;
 		/*
 		 * Get rid of Op# and status for next op.
 		 */
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		if (*++tl != 0)
 			nd->nd_flag |= ND_NOMOREDATA;
 	}
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
 		retlen = fxdr_unsigned(int, *tl++);
 		commit = fxdr_unsigned(int, *tl);
 		if (commit != NFSWRITE_FILESYNC)
 			error = NFSERR_IO;
 		NFSD_DEBUG(4, "nfsrv_writedsdorpc:retlen=%d commit=%d err=%d\n",
 		    retlen, commit, error);
 	} else
 		error = nd->nd_repstat;
 	/* We have no use for the Write Verifier since we use FileSync. */
 
 	/*
 	 * Get the Change, Size, Access Time and Modify Time attributes and set
 	 * on the Metadata file, so its attributes will be what the file's
 	 * would be if it had been written.
 	 */
 	if (error == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 	}
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc: aft loadattr=%d\n", error);
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_writedsdorpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_writedsdorpc().
  */
 static void
 start_writedsdorpc(void *arg, int pending)
 {
 	struct nfsrvwritedsdorpc *drpc;
 
 	drpc = (struct nfsrvwritedsdorpc *)arg;
 	drpc->err = nfsrv_writedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
 	    drpc->len, NULL, drpc->m, drpc->cred, drpc->p);
 	drpc->done = 1;
 	NFSD_DEBUG(4, "start_writedsdorpc: err=%d\n", drpc->err);
 }
 
 static int
 nfsrv_writedsrpc(fhandle_t *fhp, off_t off, int len, struct ucred *cred,
     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
     struct mbuf **mpp, char *cp, int *failposp)
 {
 	struct nfsrvwritedsdorpc *drpc, *tdrpc = NULL;
 	struct nfsvattr na;
 	struct mbuf *m;
 	int error, i, offs, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_writedsrpc\n");
 	KASSERT(*mpp != NULL, ("nfsrv_writedsrpc: NULL mbuf chain"));
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/* Calculate offset in mbuf chain that data starts. */
 	offs = cp - mtod(*mpp, char *);
 	NFSD_DEBUG(4, "nfsrv_writedsrpc: mcopy offs=%d len=%d\n", offs, len);
 
 	/*
 	 * Do the write RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->off = off;
 		tdrpc->len = len;
 		tdrpc->nmp = *nmpp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->inprog = 0;
 		tdrpc->err = 0;
 		tdrpc->m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_writedsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_writedsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, NULL,
 			    tdrpc->m, cred, p);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	m = m_copym(*mpp, offs, NFSM_RNDUP(len), M_WAITOK);
 	ret = nfsrv_writedsdorpc(*nmpp, fhp, off, len, &na, m, cred, p);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	if (error == 0)
 		error = nfsrv_setextattr(vp, &na, p);
 	NFSD_DEBUG(4, "nfsrv_writedsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvwrds", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 /*
  * Do a allocate RPC on a DS data file, using this structure for the arguments,
  * so that this function can be executed by a separate kernel process.
  */
 struct nfsrvallocatedsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	off_t			off;
 	off_t			len;
 	struct nfsmount		*nmp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	int			err;
 };
 
 static int
 nfsrv_allocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
     off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t st;
 	int error;
 
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_ALLOCATE, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
 
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
 	txdr_hyper(off, tl); tl += 2;
 	txdr_hyper(len, tl); tl += 2;
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: len=%jd\n", (intmax_t)len);
 
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	NFSGETATTR_ATTRBIT(&attrbits);
 	nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft allocaterpc=%d\n",
 	    nd->nd_repstat);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 	} else
 		error = nd->nd_repstat;
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc: aft loadattr=%d\n", error);
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_allocatedsdorpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_allocatedsdorpc().
  */
 static void
 start_allocatedsdorpc(void *arg, int pending)
 {
 	struct nfsrvallocatedsdorpc *drpc;
 
 	drpc = (struct nfsrvallocatedsdorpc *)arg;
 	drpc->err = nfsrv_allocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
 	    drpc->len, NULL, drpc->cred, drpc->p);
 	drpc->done = 1;
 	NFSD_DEBUG(4, "start_allocatedsdorpc: err=%d\n", drpc->err);
 }
 
 static int
 nfsrv_allocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
     int *failposp)
 {
 	struct nfsrvallocatedsdorpc *drpc, *tdrpc = NULL;
 	struct nfsvattr na;
 	int error, i, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_allocatedsrpc\n");
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/*
 	 * Do the allocate RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->off = off;
 		tdrpc->len = len;
 		tdrpc->nmp = *nmpp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->inprog = 0;
 		tdrpc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_allocatedsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_allocatedsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, NULL,
 			    cred, p);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	ret = nfsrv_allocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	if (error == 0)
 		error = nfsrv_setextattr(vp, &na, p);
 	NFSD_DEBUG(4, "nfsrv_allocatedsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 /*
  * Do a deallocate RPC on a DS data file, using this structure for the
  * arguments, so that this function can be executed by a separate kernel
  * process.
  */
 struct nfsrvdeallocatedsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	off_t			off;
 	off_t			len;
 	struct nfsmount		*nmp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	int			err;
 };
 
 static int
 nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off,
     off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsattrbit_t attrbits;
 	nfsv4stateid_t st;
 	int error;
 
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
 
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
 	txdr_hyper(off, tl); tl += 2;
 	txdr_hyper(len, tl); tl += 2;
 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len);
 
 	/* Do a Getattr for the attributes that change upon writing. */
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p,
 	    cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft deallocaterpc=%d\n",
 	    nd->nd_repstat);
 	/* Get rid of weak cache consistency data for now. */
 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 	    (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 		NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: wcc attr=%d\n", error);
 		if (error != 0)
 			goto nfsmout;
 		/*
 		 * Get rid of Op# and status for next op.
 		 */
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		if (*++tl != 0)
 			nd->nd_flag |= ND_NOMOREDATA;
 	}
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL,
 		    NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 	} else
 		error = nd->nd_repstat;
 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error);
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Start up the thread that will execute nfsrv_deallocatedsdorpc().
  */
 static void
 start_deallocatedsdorpc(void *arg, int pending)
 {
 	struct nfsrvdeallocatedsdorpc *drpc;
 
 	drpc = (struct nfsrvdeallocatedsdorpc *)arg;
 	drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off,
 	    drpc->len, NULL, drpc->cred, drpc->p);
 	drpc->done = 1;
 	NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err);
 }
 
 static int
 nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred,
     NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
     int *failposp)
 {
 	struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL;
 	struct nfsvattr na;
 	int error, i, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n");
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/*
 	 * Do the deallocate RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->off = off;
 		tdrpc->len = len;
 		tdrpc->nmp = *nmpp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->inprog = 0;
 		tdrpc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len,
 			    NULL, cred, p);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	if (error == 0)
 		error = nfsrv_setextattr(vp, &na, p);
 	NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvalds", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 static int
 nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap,
     struct nfsvattr *dsnap)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsv4stateid_t st;
 	nfsattrbit_t attrbits;
 	int error;
 
 	NFSD_DEBUG(4, "in nfsrv_setattrdsdorpc\n");
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfscl_reqstart(nd, NFSPROC_SETATTR, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 	    NULL, NULL, 0, 0, cred);
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	nfscl_fillsattr(nd, &nap->na_vattr, vp, NFSSATTR_FULL, 0);
 
 	/* Do a Getattr for the attributes that change due to writing. */
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(NFSV4OP_GETATTR);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattrrpc=%d\n",
 	    nd->nd_repstat);
 	/* Get rid of weak cache consistency data for now. */
 	if ((nd->nd_flag & (ND_NOMOREDATA | ND_NFSV4 | ND_V4WCCATTR)) ==
 	    (ND_NFSV4 | ND_V4WCCATTR)) {
 		error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
 		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 		NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: wcc attr=%d\n", error);
 		if (error != 0)
 			goto nfsmout;
 		/*
 		 * Get rid of Op# and status for next op.
 		 */
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		if (*++tl != 0)
 			nd->nd_flag |= ND_NOMOREDATA;
 	}
 	error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 	if (error != 0)
 		goto nfsmout;
 	if (nd->nd_repstat != 0)
 		error = nd->nd_repstat;
 	/*
 	 * Get the Change, Size, Access Time and Modify Time attributes and set
 	 * on the Metadata file, so its attributes will be what the file's
 	 * would be if it had been written.
 	 */
 	if (error == 0) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		error = nfsv4_loadattr(nd, NULL, dsnap, NULL, NULL, 0, NULL,
 		    NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL);
 	}
 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc: aft setattr loadattr=%d\n", error);
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_setattrdsdorpc error=%d\n", error);
 	return (error);
 }
 
 struct nfsrvsetattrdsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	struct nfsmount		*nmp;
 	struct vnode		*vp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	struct nfsvattr		na;
 	struct nfsvattr		dsna;
 	int			err;
 };
 
 /*
  * Start up the thread that will execute nfsrv_setattrdsdorpc().
  */
 static void
 start_setattrdsdorpc(void *arg, int pending)
 {
 	struct nfsrvsetattrdsdorpc *drpc;
 
 	drpc = (struct nfsrvsetattrdsdorpc *)arg;
 	drpc->err = nfsrv_setattrdsdorpc(&drpc->fh, drpc->cred, drpc->p,
 	    drpc->vp, drpc->nmp, &drpc->na, &drpc->dsna);
 	drpc->done = 1;
 }
 
 static int
 nfsrv_setattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt,
     struct nfsvattr *nap, int *failposp)
 {
 	struct nfsrvsetattrdsdorpc *drpc, *tdrpc = NULL;
 	struct nfsvattr na;
 	int error, i, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_setattrdsrpc\n");
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/*
 	 * Do the setattr RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		tdrpc->inprog = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->nmp = *nmpp;
 		tdrpc->vp = vp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->na = *nap;
 		tdrpc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_setattrdsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_setattrdsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap,
 			    &na);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	ret = nfsrv_setattrdsdorpc(fhp, cred, p, vp, *nmpp, nap, &na);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	if (error == 0)
 		error = nfsrv_setextattr(vp, &na, p);
 	NFSD_DEBUG(4, "nfsrv_setattrdsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvsads", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 /*
  * Do a Setattr of an NFSv4 ACL on the DS file.
  */
 static int
 nfsrv_setacldsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount *nmp, struct acl *aclp)
 {
 	struct nfsrv_descript *nd;
 	nfsv4stateid_t st;
 	nfsattrbit_t attrbits;
 	int error;
 
 	NFSD_DEBUG(4, "in nfsrv_setacldsdorpc\n");
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nfscl_reqstart(nd, NFSPROC_SETACL, nmp, (u_int8_t *)fhp, sizeof(*fhp),
 	    NULL, NULL, 0, 0, cred);
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_ACL);
 	/*
 	 * The "vp" argument to nfsv4_fillattr() is only used for vnode_type(),
 	 * so passing in the metadata "vp" will be ok, since it is of
 	 * the same type (VREG).
 	 */
 	nfsv4_fillattr(nd, NULL, vp, aclp, NULL, NULL, 0, &attrbits, NULL,
 	    NULL, 0, 0, 0, 0, 0, NULL);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_setacldsdorpc: aft setaclrpc=%d\n",
 	    nd->nd_repstat);
 	error = nd->nd_repstat;
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	return (error);
 }
 
 struct nfsrvsetacldsdorpc {
 	int			done;
 	int			inprog;
 	struct task		tsk;
 	fhandle_t		fh;
 	struct nfsmount		*nmp;
 	struct vnode		*vp;
 	struct ucred		*cred;
 	NFSPROC_T		*p;
 	struct acl		*aclp;
 	int			err;
 };
 
 /*
  * Start up the thread that will execute nfsrv_setacldsdorpc().
  */
 static void
 start_setacldsdorpc(void *arg, int pending)
 {
 	struct nfsrvsetacldsdorpc *drpc;
 
 	drpc = (struct nfsrvsetacldsdorpc *)arg;
 	drpc->err = nfsrv_setacldsdorpc(&drpc->fh, drpc->cred, drpc->p,
 	    drpc->vp, drpc->nmp, drpc->aclp);
 	drpc->done = 1;
 }
 
 static int
 nfsrv_setacldsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, struct acl *aclp,
     int *failposp)
 {
 	struct nfsrvsetacldsdorpc *drpc, *tdrpc = NULL;
 	int error, i, ret, timo;
 
 	NFSD_DEBUG(4, "in nfsrv_setacldsrpc\n");
 	drpc = NULL;
 	if (mirrorcnt > 1)
 		tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP,
 		    M_WAITOK);
 
 	/*
 	 * Do the setattr RPC for every DS, using a separate kernel process
 	 * for every DS except the last one.
 	 */
 	error = 0;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		tdrpc->done = 0;
 		tdrpc->inprog = 0;
 		NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp));
 		tdrpc->nmp = *nmpp;
 		tdrpc->vp = vp;
 		tdrpc->cred = cred;
 		tdrpc->p = p;
 		tdrpc->aclp = aclp;
 		tdrpc->err = 0;
 		ret = EIO;
 		if (nfs_pnfsiothreads != 0) {
 			ret = nfs_pnfsio(start_setacldsdorpc, tdrpc);
 			NFSD_DEBUG(4, "nfsrv_setacldsrpc: nfs_pnfsio=%d\n",
 			    ret);
 		}
 		if (ret != 0) {
 			ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp,
 			    aclp);
 			if (nfsds_failerr(ret) && *failposp == -1)
 				*failposp = i;
 			else if (error == 0 && ret != 0)
 				error = ret;
 		}
 		nmpp++;
 		fhp++;
 	}
 	ret = nfsrv_setacldsdorpc(fhp, cred, p, vp, *nmpp, aclp);
 	if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1)
 		*failposp = mirrorcnt - 1;
 	else if (error == 0 && ret != 0)
 		error = ret;
 	NFSD_DEBUG(4, "nfsrv_setacldsrpc: aft setextat=%d\n", error);
 	tdrpc = drpc;
 	timo = hz / 50;		/* Wait for 20msec. */
 	if (timo < 1)
 		timo = 1;
 	for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) {
 		/* Wait for RPCs on separate threads to complete. */
 		while (tdrpc->inprog != 0 && tdrpc->done == 0)
 			tsleep(&tdrpc->tsk, PVFS, "srvacds", timo);
 		if (nfsds_failerr(tdrpc->err) && *failposp == -1)
 			*failposp = i;
 		else if (error == 0 && tdrpc->err != 0)
 			error = tdrpc->err;
 	}
 	free(drpc, M_TEMP);
 	return (error);
 }
 
 /*
  * Getattr call to the DS for the attributes that change due to writing.
  */
 static int
 nfsrv_getattrdsrpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p,
     struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap)
 {
 	struct nfsrv_descript *nd;
 	int error;
 	nfsattrbit_t attrbits;
 
 	NFSD_DEBUG(4, "in nfsrv_getattrdsrpc\n");
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_GETATTR, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESS);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFY);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SPACEUSED);
 	(void) nfsrv_putattrbit(nd, &attrbits);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft getattrrpc=%d\n",
 	    nd->nd_repstat);
 	if (nd->nd_repstat == 0) {
 		error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
 		    NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
 		    NULL, NULL);
 		/*
 		 * We can only save the updated values in the extended
 		 * attribute if the vp is exclusively locked.
 		 * This should happen when any of the following operations
 		 * occur on the vnode:
 		 *    Close, Delegreturn, LayoutCommit, LayoutReturn
 		 * As such, the updated extended attribute should get saved
 		 * before nfsrv_checkdsattr() returns 0 and allows the cached
 		 * attributes to be returned without calling this function.
 		 */
 		if (error == 0 && VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
 			error = nfsrv_setextattr(vp, nap, p);
 			NFSD_DEBUG(4, "nfsrv_getattrdsrpc: aft setextat=%d\n",
 			    error);
 		}
 	} else
 		error = nd->nd_repstat;
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_getattrdsrpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Seek call to a DS.
  */
 static int
 nfsrv_seekdsrpc(fhandle_t *fhp, off_t *offp, int content, bool *eofp,
     struct ucred *cred, NFSPROC_T *p, struct nfsmount *nmp)
 {
 	uint32_t *tl;
 	struct nfsrv_descript *nd;
 	nfsv4stateid_t st;
 	int error;
 
 	NFSD_DEBUG(4, "in nfsrv_seekdsrpc\n");
 	/*
 	 * Use a stateid where other is an alternating 01010 pattern and
 	 * seqid is 0xffffffff.  This value is not defined as special by
 	 * the RFC and is used by the FreeBSD NFS server to indicate an
 	 * MDS->DS proxy operation.
 	 */
 	st.other[0] = 0x55555555;
 	st.other[1] = 0x55555555;
 	st.other[2] = 0x55555555;
 	st.seqid = 0xffffffff;
 	nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
 	nfscl_reqstart(nd, NFSPROC_SEEKDS, nmp, (u_int8_t *)fhp,
 	    sizeof(fhandle_t), NULL, NULL, 0, 0, cred);
 	nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID);
 	NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
 	txdr_hyper(*offp, tl); tl += 2;
 	*tl = txdr_unsigned(content);
 	error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, cred,
 	    NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL);
 	if (error != 0) {
 		free(nd, M_TEMP);
 		return (error);
 	}
 	NFSD_DEBUG(4, "nfsrv_seekdsrpc: aft seekrpc=%d\n", nd->nd_repstat);
 	if (nd->nd_repstat == 0) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER);
 		if (*tl++ == newnfs_true)
 			*eofp = true;
 		else
 			*eofp = false;
 		*offp = fxdr_hyper(tl);
 	} else
 		error = nd->nd_repstat;
 nfsmout:
 	m_freem(nd->nd_mrep);
 	free(nd, M_TEMP);
 	NFSD_DEBUG(4, "nfsrv_seekdsrpc error=%d\n", error);
 	return (error);
 }
 
 /*
  * Get the device id and file handle for a DS file.
  */
 int
 nfsrv_dsgetdevandfh(struct vnode *vp, NFSPROC_T *p, int *mirrorcntp,
     fhandle_t *fhp, char *devid)
 {
 	int buflen, error;
 	char *buf;
 
 	buflen = 1024;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	error = nfsrv_dsgetsockmnt(vp, 0, buf, &buflen, mirrorcntp, p, NULL,
 	    fhp, devid, NULL, NULL, NULL, NULL, NULL, NULL);
 	free(buf, M_TEMP);
 	return (error);
 }
 
 /*
  * Do a Lookup against the DS for the filename.
  */
 static int
 nfsrv_pnfslookupds(struct vnode *vp, struct vnode *dvp, struct pnfsdsfile *pf,
     struct vnode **nvpp, NFSPROC_T *p)
 {
 	struct nameidata named;
 	struct ucred *tcred;
 	char *bufp;
 	u_long *hashp;
 	struct vnode *nvp;
 	int error;
 
 	tcred = newnfs_getcred();
 	named.ni_cnd.cn_nameiop = LOOKUP;
 	named.ni_cnd.cn_lkflags = LK_SHARED | LK_RETRY;
 	named.ni_cnd.cn_cred = tcred;
-	named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF | SAVENAME;
+	named.ni_cnd.cn_flags = ISLASTCN | LOCKPARENT | LOCKLEAF;
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	named.ni_cnd.cn_nameptr = bufp;
 	named.ni_cnd.cn_namelen = strlen(pf->dsf_filename);
 	strlcpy(bufp, pf->dsf_filename, NAME_MAX);
 	NFSD_DEBUG(4, "nfsrv_pnfslookupds: filename=%s\n", bufp);
 	error = VOP_LOOKUP(dvp, &nvp, &named.ni_cnd);
 	NFSD_DEBUG(4, "nfsrv_pnfslookupds: aft LOOKUP=%d\n", error);
 	NFSFREECRED(tcred);
 	nfsvno_relpathbuf(&named);
 	if (error == 0)
 		*nvpp = nvp;
 	NFSD_DEBUG(4, "eo nfsrv_pnfslookupds=%d\n", error);
 	return (error);
 }
 
 /*
  * Set the file handle to the correct one.
  */
 static void
 nfsrv_pnfssetfh(struct vnode *vp, struct pnfsdsfile *pf, char *devid,
     char *fnamep, struct vnode *nvp, NFSPROC_T *p)
 {
 	struct nfsnode *np;
 	int ret = 0;
 
 	np = VTONFS(nvp);
 	NFSBCOPY(np->n_fhp->nfh_fh, &pf->dsf_fh, NFSX_MYFH);
 	/*
 	 * We can only do a vn_set_extattr() if the vnode is exclusively
 	 * locked and vn_start_write() has been done.  If devid != NULL or
 	 * fnamep != NULL or the vnode is shared locked, vn_start_write()
 	 * may not have been done.
 	 * If not done now, it will be done on a future call.
 	 */
 	if (devid == NULL && fnamep == NULL && NFSVOPISLOCKED(vp) ==
 	    LK_EXCLUSIVE)
 		ret = vn_extattr_set(vp, IO_NODELOCKED,
 		    EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile", sizeof(*pf),
 		    (char *)pf, p);
 	NFSD_DEBUG(4, "eo nfsrv_pnfssetfh=%d\n", ret);
 }
 
 /*
  * Cause RPCs waiting on "nmp" to fail.  This is called for a DS mount point
  * when the DS has failed.
  */
 void
 nfsrv_killrpcs(struct nfsmount *nmp)
 {
 
 	/*
 	 * Call newnfs_nmcancelreqs() to cause
 	 * any RPCs in progress on the mount point to
 	 * fail.
 	 * This will cause any process waiting for an
 	 * RPC to complete while holding a vnode lock
 	 * on the mounted-on vnode (such as "df" or
 	 * a non-forced "umount") to fail.
 	 * This will unlock the mounted-on vnode so
 	 * a forced dismount can succeed.
 	 * The NFSMNTP_CANCELRPCS flag should be set when this function is
 	 * called.
 	 */
 	newnfs_nmcancelreqs(nmp);
 }
 
 /*
  * Sum up the statfs info for each of the DSs, so that the client will
  * receive the total for all DSs.
  */
 static int
 nfsrv_pnfsstatfs(struct statfs *sf, struct mount *mp)
 {
 	struct statfs *tsf;
 	struct nfsdevice *ds;
 	struct vnode **dvpp, **tdvpp, *dvp;
 	uint64_t tot;
 	int cnt, error = 0, i;
 
 	if (nfsrv_devidcnt <= 0)
 		return (ENXIO);
 	dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
 	tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
 
 	/* Get an array of the dvps for the DSs. */
 	tdvpp = dvpp;
 	i = 0;
 	NFSDDSLOCK();
 	/* First, search for matches for same file system. */
 	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 		if (ds->nfsdev_nmp != NULL && ds->nfsdev_mdsisset != 0 &&
 		    fsidcmp(&ds->nfsdev_mdsfsid, &mp->mnt_stat.f_fsid) == 0) {
 			if (++i > nfsrv_devidcnt)
 				break;
 			*tdvpp++ = ds->nfsdev_dvp;
 		}
 	}
 	/*
 	 * If no matches for same file system, total all servers not assigned
 	 * to a file system.
 	 */
 	if (i == 0) {
 		TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 			if (ds->nfsdev_nmp != NULL &&
 			    ds->nfsdev_mdsisset == 0) {
 				if (++i > nfsrv_devidcnt)
 					break;
 				*tdvpp++ = ds->nfsdev_dvp;
 			}
 		}
 	}
 	NFSDDSUNLOCK();
 	cnt = i;
 
 	/* Do a VFS_STATFS() for each of the DSs and sum them up. */
 	tdvpp = dvpp;
 	for (i = 0; i < cnt && error == 0; i++) {
 		dvp = *tdvpp++;
 		error = VFS_STATFS(dvp->v_mount, tsf);
 		if (error == 0) {
 			if (sf->f_bsize == 0) {
 				if (tsf->f_bsize > 0)
 					sf->f_bsize = tsf->f_bsize;
 				else
 					sf->f_bsize = 8192;
 			}
 			if (tsf->f_blocks > 0) {
 				if (sf->f_bsize != tsf->f_bsize) {
 					tot = tsf->f_blocks * tsf->f_bsize;
 					sf->f_blocks += (tot / sf->f_bsize);
 				} else
 					sf->f_blocks += tsf->f_blocks;
 			}
 			if (tsf->f_bfree > 0) {
 				if (sf->f_bsize != tsf->f_bsize) {
 					tot = tsf->f_bfree * tsf->f_bsize;
 					sf->f_bfree += (tot / sf->f_bsize);
 				} else
 					sf->f_bfree += tsf->f_bfree;
 			}
 			if (tsf->f_bavail > 0) {
 				if (sf->f_bsize != tsf->f_bsize) {
 					tot = tsf->f_bavail * tsf->f_bsize;
 					sf->f_bavail += (tot / sf->f_bsize);
 				} else
 					sf->f_bavail += tsf->f_bavail;
 			}
 		}
 	}
 	free(tsf, M_TEMP);
 	free(dvpp, M_TEMP);
 	return (error);
 }
 
 /*
  * Set an NFSv4 acl.
  */
 int
 nfsrv_setacl(struct vnode *vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p)
 {
 	int error;
 
 	if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) {
 		error = NFSERR_ATTRNOTSUPP;
 		goto out;
 	}
 	/*
 	 * With NFSv4 ACLs, chmod(2) may need to add additional entries.
 	 * Make sure it has enough room for that - splitting every entry
 	 * into two and appending "canonical six" entries at the end.
 	 * Cribbed out of kern/vfs_acl.c - Rick M.
 	 */
 	if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) {
 		error = NFSERR_ATTRNOTSUPP;
 		goto out;
 	}
 	error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
 	if (error == 0) {
 		error = nfsrv_dssetacl(vp, aclp, cred, p);
 		if (error == ENOENT)
 			error = 0;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Seek vnode op call (actually it is a VOP_IOCTL()).
  * This function is called with the vnode locked, but unlocks and vrele()s
  * the vp before returning.
  */
 int
 nfsvno_seek(struct nfsrv_descript *nd, struct vnode *vp, u_long cmd,
     off_t *offp, int content, bool *eofp, struct ucred *cred, NFSPROC_T *p)
 {
 	struct nfsvattr at;
 	int error, ret;
 
 	ASSERT_VOP_LOCKED(vp, "nfsvno_seek vp");
 	/*
 	 * Attempt to seek on a DS file. A return of ENOENT implies
 	 * there is no DS file to seek on.
 	 */
 	error = nfsrv_proxyds(vp, 0, 0, cred, p, NFSPROC_SEEKDS, NULL,
 	    NULL, NULL, NULL, NULL, offp, content, eofp);
 	if (error != ENOENT) {
 		vput(vp);
 		return (error);
 	}
 
 	/*
 	 * Do the VOP_IOCTL() call.  For the case where *offp == file_size,
 	 * VOP_IOCTL() will return ENXIO.  However, the correct reply for
 	 * NFSv4.2 is *eofp == true and error == 0 for this case.
 	 */
 	NFSVOPUNLOCK(vp);
 	error = VOP_IOCTL(vp, cmd, offp, 0, cred, p);
 	*eofp = false;
 	if (error == ENXIO || (error == 0 && cmd == FIOSEEKHOLE)) {
 		/* Handle the cases where we might be at EOF. */
 		ret = nfsvno_getattr(vp, &at, nd, p, 0, NULL);
 		if (ret == 0 && *offp == at.na_size) {
 			*eofp = true;
 			error = 0;
 		}
 		if (ret != 0 && error == 0)
 			error = ret;
 	}
 	vrele(vp);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Allocate vnode op call.
  */
 int
 nfsvno_allocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
     NFSPROC_T *p)
 {
 	int error;
 	off_t olen;
 
 	ASSERT_VOP_ELOCKED(vp, "nfsvno_allocate vp");
 	/*
 	 * Attempt to allocate on a DS file. A return of ENOENT implies
 	 * there is no DS file to allocate on.
 	 */
 	error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_ALLOCATE, NULL,
 	    NULL, NULL, NULL, NULL, &len, 0, NULL);
 	if (error != ENOENT)
 		return (error);
 
 	/*
 	 * Do the actual VOP_ALLOCATE(), looping so long as
 	 * progress is being made, to achieve completion.
 	 */
 	do {
 		olen = len;
 		error = VOP_ALLOCATE(vp, &off, &len, IO_SYNC, cred);
 		if (error == 0 && len > 0 && olen > len)
 			maybe_yield();
 	} while (error == 0 && len > 0 && olen > len);
 	if (error == 0 && len > 0)
 		error = NFSERR_IO;
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Deallocate vnode op call.
  */
 int
 nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred,
     NFSPROC_T *p)
 {
 	int error;
 	off_t olen;
 
 	ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp");
 	/*
 	 * Attempt to deallocate on a DS file. A return of ENOENT implies
 	 * there is no DS file to deallocate on.
 	 */
 	error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL,
 	    NULL, NULL, NULL, NULL, &len, 0, NULL);
 	if (error != ENOENT)
 		return (error);
 
 	/*
 	 * Do the actual VOP_DEALLOCATE(), looping so long as
 	 * progress is being made, to achieve completion.
 	 */
 	do {
 		olen = len;
 		error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred);
 		if (error == 0 && len > 0 && olen > len)
 			maybe_yield();
 	} while (error == 0 && len > 0 && olen > len);
 	if (error == 0 && len > 0)
 		error = NFSERR_IO;
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Get Extended Atribute vnode op into an mbuf list.
  */
 int
 nfsvno_getxattr(struct vnode *vp, char *name, uint32_t maxresp,
     struct ucred *cred, uint64_t flag, int maxextsiz, struct thread *p,
     struct mbuf **mpp, struct mbuf **mpendp, int *lenp)
 {
 	struct iovec *iv;
 	struct uio io, *uiop = &io;
 	struct mbuf *m, *m2;
 	int alen, error, len, tlen;
 	size_t siz;
 
 	/* First, find out the size of the extended attribute. */
 	error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
 	    &siz, cred, p);
 	if (error != 0)
 		return (NFSERR_NOXATTR);
 	if (siz > maxresp - NFS_MAXXDR)
 		return (NFSERR_XATTR2BIG);
 	len = siz;
 	tlen = NFSM_RNDUP(len);
 	if (tlen > 0) {
 		/*
 		 * If cnt > MCLBYTES and the reply will not be saved, use
 		 * ext_pgs mbufs for TLS.
 		 * For NFSv4.0, we do not know for sure if the reply will
 		 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
 		 * Always use ext_pgs mbufs if ND_EXTPG is set.
 		 */
 		if ((flag & ND_EXTPG) != 0 || (tlen > MCLBYTES &&
 		    (flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS &&
 		    (flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4))
 			uiop->uio_iovcnt = nfsrv_createiovec_extpgs(tlen,
 			    maxextsiz, &m, &m2, &iv);
 		else
 			uiop->uio_iovcnt = nfsrv_createiovec(tlen, &m, &m2,
 			    &iv);
 		uiop->uio_iov = iv;
 	} else {
 		uiop->uio_iovcnt = 0;
 		uiop->uio_iov = iv = NULL;
 		m = m2 = NULL;
 	}
 	uiop->uio_offset = 0;
 	uiop->uio_resid = tlen;
 	uiop->uio_rw = UIO_READ;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = p;
 #ifdef MAC
 	error = mac_vnode_check_getextattr(cred, vp, EXTATTR_NAMESPACE_USER,
 	    name);
 	if (error != 0)
 		goto out;
 #endif
 
 	if (tlen > 0)
 		error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
 		    NULL, cred, p);
 	if (error != 0)
 		goto out;
 	if (uiop->uio_resid > 0) {
 		alen = tlen;
 		len = tlen - uiop->uio_resid;
 		tlen = NFSM_RNDUP(len);
 		if (alen != tlen)
 			printf("nfsvno_getxattr: weird size read\n");
 		if (tlen == 0) {
 			m_freem(m);
 			m = m2 = NULL;
 		} else if (alen != tlen || tlen != len)
 			m2 = nfsrv_adj(m, alen - tlen, tlen - len);
 	}
 	*lenp = len;
 	*mpp = m;
 	*mpendp = m2;
 
 out:
 	if (error != 0) {
 		if (m != NULL)
 			m_freem(m);
 		*lenp = 0;
 	}
 	free(iv, M_TEMP);
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Set Extended attribute vnode op from an mbuf list.
  */
 int
 nfsvno_setxattr(struct vnode *vp, char *name, int len, struct mbuf *m,
     char *cp, struct ucred *cred, struct thread *p)
 {
 	struct iovec *iv;
 	struct uio uio, *uiop = &uio;
 	int cnt, error;
 
 	error = 0;
 #ifdef MAC
 	error = mac_vnode_check_setextattr(cred, vp, EXTATTR_NAMESPACE_USER,
 	    name);
 #endif
 	if (error != 0)
 		goto out;
 
 	uiop->uio_rw = UIO_WRITE;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_td = p;
 	uiop->uio_offset = 0;
 	uiop->uio_resid = len;
 	if (len > 0) {
 		error = nfsrv_createiovecw(len, m, cp, &iv, &cnt);
 		uiop->uio_iov = iv;
 		uiop->uio_iovcnt = cnt;
 	} else {
 		uiop->uio_iov = iv = NULL;
 		uiop->uio_iovcnt = 0;
 	}
 	if (error == 0) {
 		error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, uiop,
 		    cred, p);
 		free(iv, M_TEMP);
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Remove Extended attribute vnode op.
  */
 int
 nfsvno_rmxattr(struct nfsrv_descript *nd, struct vnode *vp, char *name,
     struct ucred *cred, struct thread *p)
 {
 	int error;
 
 	/*
 	 * Get rid of any delegations.  I am not sure why this is required,
 	 * but RFC-8276 says so.
 	 */
 	error = nfsrv_checkremove(vp, 0, nd, nd->nd_clientid, p);
 	if (error != 0)
 		goto out;
 #ifdef MAC
 	error = mac_vnode_check_deleteextattr(cred, vp, EXTATTR_NAMESPACE_USER,
 	    name);
 	if (error != 0)
 		goto out;
 #endif
 
 	error = VOP_DELETEEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, cred, p);
 	if (error == EOPNOTSUPP)
 		error = VOP_SETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
 		    cred, p);
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * List Extended Atribute vnode op into an mbuf list.
  */
 int
 nfsvno_listxattr(struct vnode *vp, uint64_t cookie, struct ucred *cred,
     struct thread *p, u_char **bufp, uint32_t *lenp, bool *eofp)
 {
 	struct iovec iv;
 	struct uio io;
 	int error;
 	size_t siz;
 
 	*bufp = NULL;
 	/* First, find out the size of the extended attribute. */
 	error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, NULL, &siz, cred,
 	    p);
 	if (error != 0)
 		return (NFSERR_NOXATTR);
 	if (siz <= cookie) {
 		*lenp = 0;
 		*eofp = true;
 		goto out;
 	}
 	if (siz > cookie + *lenp) {
 		siz = cookie + *lenp;
 		*eofp = false;
 	} else
 		*eofp = true;
 	/* Just choose a sanity limit of 10Mbytes for malloc(M_TEMP). */
 	if (siz > 10 * 1024 * 1024) {
 		error = NFSERR_XATTR2BIG;
 		goto out;
 	}
 	*bufp = malloc(siz, M_TEMP, M_WAITOK);
 	iv.iov_base = *bufp;
 	iv.iov_len = siz;
 	io.uio_iovcnt = 1;
 	io.uio_iov = &iv;
 	io.uio_offset = 0;
 	io.uio_resid = siz;
 	io.uio_rw = UIO_READ;
 	io.uio_segflg = UIO_SYSSPACE;
 	io.uio_td = p;
 #ifdef MAC
 	error = mac_vnode_check_listextattr(cred, vp, EXTATTR_NAMESPACE_USER);
 	if (error != 0)
 		goto out;
 #endif
 
 	error = VOP_LISTEXTATTR(vp, EXTATTR_NAMESPACE_USER, &io, NULL, cred,
 	    p);
 	if (error != 0)
 		goto out;
 	if (io.uio_resid > 0)
 		siz -= io.uio_resid;
 	*lenp = siz;
 
 out:
 	if (error != 0) {
 		free(*bufp, M_TEMP);
 		*bufp = NULL;
 	}
 	NFSEXITCODE(error);
 	return (error);
 }
 
 /*
  * Trim trailing data off the mbuf list being built.
  */
 void
 nfsm_trimtrailing(struct nfsrv_descript *nd, struct mbuf *mb, char *bpos,
     int bextpg, int bextpgsiz)
 {
 	vm_page_t pg;
 	int fullpgsiz, i;
 
 	if (mb->m_next != NULL) {
 		m_freem(mb->m_next);
 		mb->m_next = NULL;
 	}
 	if ((mb->m_flags & M_EXTPG) != 0) {
 		KASSERT(bextpg >= 0 && bextpg < mb->m_epg_npgs,
 		    ("nfsm_trimtrailing: bextpg out of range"));
 		KASSERT(bpos == (char *)(void *)
 		    PHYS_TO_DMAP(mb->m_epg_pa[bextpg]) + PAGE_SIZE - bextpgsiz,
 		    ("nfsm_trimtrailing: bextpgsiz bad!"));
 
 		/* First, get rid of any pages after this position. */
 		for (i = mb->m_epg_npgs - 1; i > bextpg; i--) {
 			pg = PHYS_TO_VM_PAGE(mb->m_epg_pa[i]);
 			vm_page_unwire_noq(pg);
 			vm_page_free(pg);
 		}
 		mb->m_epg_npgs = bextpg + 1;
 		if (bextpg == 0)
 			fullpgsiz = PAGE_SIZE - mb->m_epg_1st_off;
 		else
 			fullpgsiz = PAGE_SIZE;
 		mb->m_epg_last_len = fullpgsiz - bextpgsiz;
 		mb->m_len = m_epg_pagelen(mb, 0, mb->m_epg_1st_off);
 		for (i = 1; i < mb->m_epg_npgs; i++)
 			mb->m_len += m_epg_pagelen(mb, i, 0);
 		nd->nd_bextpgsiz = bextpgsiz;
 		nd->nd_bextpg = bextpg;
 	} else
 		mb->m_len = bpos - mtod(mb, char *);
 	nd->nd_mb = mb;
 	nd->nd_bpos = bpos;
 }
 
 
 /*
  * Check to see if a put file handle operation should test for
  * NFSERR_WRONGSEC, although NFSv3 actually returns NFSERR_AUTHERR.
  * When Open is the next operation, NFSERR_WRONGSEC cannot be
  * replied for the Open cases that use a component.  This can
  * be identified by the fact that the file handle's type is VDIR.
  */
 bool
 nfsrv_checkwrongsec(struct nfsrv_descript *nd, int nextop, enum vtype vtyp)
 {
 
 	if ((nd->nd_flag & ND_NFSV4) == 0)
 		return (true);
 
 	if ((nd->nd_flag & ND_LASTOP) != 0)
 		return (false);
 
 	if (nextop == NFSV4OP_PUTROOTFH || nextop == NFSV4OP_PUTFH ||
 	    nextop == NFSV4OP_PUTPUBFH || nextop == NFSV4OP_RESTOREFH ||
 	    nextop == NFSV4OP_LOOKUP || nextop == NFSV4OP_LOOKUPP ||
 	    nextop == NFSV4OP_SECINFO || nextop == NFSV4OP_SECINFONONAME)
 		return (false);
 	if (nextop == NFSV4OP_OPEN && vtyp == VDIR)
 		return (false);
 	return (true);
 }
 
 /*
  * Check DSs marked no space.
  */
 void
 nfsrv_checknospc(void)
 {
 	struct statfs *tsf;
 	struct nfsdevice *ds;
 	struct vnode **dvpp, **tdvpp, *dvp;
 	char *devid, *tdevid;
 	int cnt, error = 0, i;
 
 	if (nfsrv_devidcnt <= 0)
 		return;
 	dvpp = mallocarray(nfsrv_devidcnt, sizeof(*dvpp), M_TEMP, M_WAITOK);
 	devid = malloc(nfsrv_devidcnt * NFSX_V4DEVICEID, M_TEMP, M_WAITOK);
 	tsf = malloc(sizeof(*tsf), M_TEMP, M_WAITOK);
 
 	/* Get an array of the dvps for the DSs. */
 	tdvpp = dvpp;
 	tdevid = devid;
 	i = 0;
 	NFSDDSLOCK();
 	/* First, search for matches for same file system. */
 	TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
 		if (ds->nfsdev_nmp != NULL && ds->nfsdev_nospc) {
 			if (++i > nfsrv_devidcnt)
 				break;
 			*tdvpp++ = ds->nfsdev_dvp;
 			NFSBCOPY(ds->nfsdev_deviceid, tdevid, NFSX_V4DEVICEID);
 			tdevid += NFSX_V4DEVICEID;
 		}
 	}
 	NFSDDSUNLOCK();
 
 	/* Do a VFS_STATFS() for each of the DSs and clear no space. */
 	cnt = i;
 	tdvpp = dvpp;
 	tdevid = devid;
 	for (i = 0; i < cnt && error == 0; i++) {
 		dvp = *tdvpp++;
 		error = VFS_STATFS(dvp->v_mount, tsf);
 		if (error == 0 && tsf->f_bavail > 0) {
 			NFSD_DEBUG(1, "nfsrv_checknospc: reset nospc\n");
 			nfsrv_marknospc(tdevid, false);
 		}
 		tdevid += NFSX_V4DEVICEID;
 	}
 	free(tsf, M_TEMP);
 	free(dvpp, M_TEMP);
 	free(devid, M_TEMP);
 }
 
 extern int (*nfsd_call_nfsd)(struct thread *, struct nfssvc_args *);
 
 /*
  * Called once to initialize data structures...
  */
 static int
 nfsd_modevent(module_t mod, int type, void *data)
 {
 	int error = 0, i;
 	static int loaded = 0;
 
 	switch (type) {
 	case MOD_LOAD:
 		if (loaded)
 			goto out;
 		newnfs_portinit();
 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 			mtx_init(&nfsrchash_table[i].mtx, "nfsrtc", NULL,
 			    MTX_DEF);
 			mtx_init(&nfsrcahash_table[i].mtx, "nfsrtca", NULL,
 			    MTX_DEF);
 		}
 		mtx_init(&nfsrc_udpmtx, "nfsuc", NULL, MTX_DEF);
 		mtx_init(&nfs_v4root_mutex, "nfs4rt", NULL, MTX_DEF);
 		mtx_init(&nfsv4root_mnt.mnt_mtx, "nfs4mnt", NULL, MTX_DEF);
 		mtx_init(&nfsrv_dontlistlock_mtx, "nfs4dnl", NULL, MTX_DEF);
 		mtx_init(&nfsrv_recalllock_mtx, "nfs4rec", NULL, MTX_DEF);
 		lockinit(&nfsv4root_mnt.mnt_explock, PVFS, "explock", 0, 0);
 		callout_init(&nfsd_callout, 1);
 		nfsrvd_initcache();
 		nfsd_init();
 		NFSD_LOCK();
 		nfsrvd_init(0);
 		NFSD_UNLOCK();
 		nfsd_mntinit();
 #ifdef VV_DISABLEDELEG
 		vn_deleg_ops.vndeleg_recall = nfsd_recalldelegation;
 		vn_deleg_ops.vndeleg_disable = nfsd_disabledelegation;
 #endif
 		nfsd_call_nfsd = nfssvc_nfsd;
 		loaded = 1;
 		break;
 
 	case MOD_UNLOAD:
 		if (newnfs_numnfsd != 0) {
 			error = EBUSY;
 			break;
 		}
 
 #ifdef VV_DISABLEDELEG
 		vn_deleg_ops.vndeleg_recall = NULL;
 		vn_deleg_ops.vndeleg_disable = NULL;
 #endif
 		nfsd_call_nfsd = NULL;
 		callout_drain(&nfsd_callout);
 
 		/* Clean out all NFSv4 state. */
 		nfsrv_throwawayallstate(curthread);
 
 		/* Clean the NFS server reply cache */
 		nfsrvd_cleancache();
 
 		/* Free up the krpc server pool. */
 		if (nfsrvd_pool != NULL)
 			svcpool_destroy(nfsrvd_pool);
 
 		/* and get rid of the locks */
 		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 			mtx_destroy(&nfsrchash_table[i].mtx);
 			mtx_destroy(&nfsrcahash_table[i].mtx);
 		}
 		mtx_destroy(&nfsrc_udpmtx);
 		mtx_destroy(&nfs_v4root_mutex);
 		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
 		mtx_destroy(&nfsrv_dontlistlock_mtx);
 		mtx_destroy(&nfsrv_recalllock_mtx);
 		for (i = 0; i < nfsrv_sessionhashsize; i++)
 			mtx_destroy(&nfssessionhash[i].mtx);
 		if (nfslayouthash != NULL) {
 			for (i = 0; i < nfsrv_layouthashsize; i++)
 				mtx_destroy(&nfslayouthash[i].mtx);
 			free(nfslayouthash, M_NFSDSESSION);
 		}
 		lockdestroy(&nfsv4root_mnt.mnt_explock);
 		free(nfsclienthash, M_NFSDCLIENT);
 		free(nfslockhash, M_NFSDLOCKFILE);
 		free(nfssessionhash, M_NFSDSESSION);
 		loaded = 0;
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 out:
 	NFSEXITCODE(error);
 	return (error);
 }
 static moduledata_t nfsd_mod = {
 	"nfsd",
 	nfsd_modevent,
 	NULL,
 };
 DECLARE_MODULE(nfsd, nfsd_mod, SI_SUB_VFS, SI_ORDER_ANY);
 
 /* So that loader and kldload(2) can find us, wherever we are.. */
 MODULE_VERSION(nfsd, 1);
 MODULE_DEPEND(nfsd, nfscommon, 1, 1, 1);
 MODULE_DEPEND(nfsd, nfslockd, 1, 1, 1);
 MODULE_DEPEND(nfsd, krpc, 1, 1, 1);
 MODULE_DEPEND(nfsd, nfssvc, 1, 1, 1);
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index db1075596b91..23360f9e3909 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -1,6188 +1,6186 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 /*
  * nfs version 2, 3 and 4 server calls to vnode ops
  * - these routines generally have 3 phases
  *   1 - break down and validate rpc request in mbuf list
  *   2 - do the vnode ops for the request, usually by calling a nfsvno_XXX()
  *       function in nfsd_port.c
  *   3 - build the rpc reply in an mbuf list
  * For nfsv4, these functions are called for each Op within the Compound RPC.
  */
 
 #include <fs/nfs/nfsport.h>
 #include <sys/extattr.h>
 #include <sys/filio.h>
 
 /* Global vars */
 extern u_int32_t newnfs_false, newnfs_true;
 extern enum vtype nv34tov_type[8];
 extern struct timeval nfsboottime;
 extern int nfsrv_enable_crossmntpt;
 extern int nfsrv_statehashsize;
 extern int nfsrv_layouthashsize;
 extern time_t nfsdev_time;
 extern volatile int nfsrv_devidcnt;
 extern int nfsd_debuglevel;
 extern u_long sb_max_adj;
 extern int nfsrv_pnfsatime;
 extern int nfsrv_maxpnfsmirror;
 extern uint32_t nfs_srvmaxio;
 
 static int	nfs_async = 0;
 SYSCTL_DECL(_vfs_nfsd);
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
     "Tell client that writes were synced even though they were not");
 extern int	nfsrv_doflexfile;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, default_flexfile, CTLFLAG_RW,
     &nfsrv_doflexfile, 0, "Make Flex File Layout the default for pNFS");
 static int	nfsrv_linux42server = 1;
 SYSCTL_INT(_vfs_nfsd, OID_AUTO, linux42server, CTLFLAG_RW,
     &nfsrv_linux42server, 0,
     "Enable Linux style NFSv4.2 server (non-RFC compliant)");
 static bool	nfsrv_openaccess = true;
 SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, v4openaccess, CTLFLAG_RW,
     &nfsrv_openaccess, 0,
     "Enable Linux style NFSv4 Open access check");
 static char nfsrv_scope[NFSV4_OPAQUELIMIT];
 SYSCTL_STRING(_vfs_nfsd, OID_AUTO, scope, CTLFLAG_RWTUN,
     &nfsrv_scope, NFSV4_OPAQUELIMIT, "Server scope");
 static char nfsrv_owner_major[NFSV4_OPAQUELIMIT];
 SYSCTL_STRING(_vfs_nfsd, OID_AUTO, owner_major, CTLFLAG_RWTUN,
     &nfsrv_owner_major, NFSV4_OPAQUELIMIT, "Server owner major");
 static uint64_t nfsrv_owner_minor;
 SYSCTL_U64(_vfs_nfsd, OID_AUTO, owner_minor, CTLFLAG_RWTUN,
     &nfsrv_owner_minor, 0, "Server owner minor");
 /*
  * Only enable this if all your exported file systems
  * (or pNFS DSs for the pNFS case) support VOP_ALLOCATE.
  */
 static bool	nfsrv_doallocate = false;
 SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, enable_v42allocate, CTLFLAG_RW,
     &nfsrv_doallocate, 0,
     "Enable NFSv4.2 Allocate operation");
 
 /*
  * This list defines the GSS mechanisms supported.
  * (Don't ask me how you get these strings from the RFC stuff like
  *  iso(1), org(3)... but someone did it, so I don't need to know.)
  */
 static struct nfsgss_mechlist nfsgss_mechlist[] = {
 	{ 9, "\052\206\110\206\367\022\001\002\002", 11 },
 	{ 0, "", 0 },
 };
 
 /* local functions */
 static void nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp,
     vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp,
     int *diraft_retp, nfsattrbit_t *attrbitp,
     NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp,
     int pathlen);
 static void nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp,
     vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp,
     int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp,
     NFSPROC_T *p, struct nfsexstuff *exp);
 
 /*
  * nfs access service (not a part of NFS V2)
  */
 int
 nfsrvd_access(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int getret, error = 0;
 	struct nfsvattr nva;
 	u_int32_t testmode, nfsmode, supported = 0;
 	accmode_t deletebit;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, 1, &nva);
 		goto out;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	nfsmode = fxdr_unsigned(u_int32_t, *tl);
 	if ((nd->nd_flag & ND_NFSV4) &&
 	    (nfsmode & ~(NFSACCESS_READ | NFSACCESS_LOOKUP |
 	     NFSACCESS_MODIFY | NFSACCESS_EXTEND | NFSACCESS_DELETE |
 	     NFSACCESS_EXECUTE | NFSACCESS_XAREAD | NFSACCESS_XAWRITE |
 	     NFSACCESS_XALIST))) {
 		nd->nd_repstat = NFSERR_INVAL;
 		vput(vp);
 		goto out;
 	}
 	if (nfsmode & NFSACCESS_READ) {
 		supported |= NFSACCESS_READ;
 		if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_READ;
 	}
 	if (nfsmode & NFSACCESS_MODIFY) {
 		supported |= NFSACCESS_MODIFY;
 		if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_MODIFY;
 	}
 	if (nfsmode & NFSACCESS_EXTEND) {
 		supported |= NFSACCESS_EXTEND;
 		if (nfsvno_accchk(vp, VWRITE | VAPPEND, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_EXTEND;
 	}
 	if (nfsmode & NFSACCESS_XAREAD) {
 		supported |= NFSACCESS_XAREAD;
 		if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_XAREAD;
 	}
 	if (nfsmode & NFSACCESS_XAWRITE) {
 		supported |= NFSACCESS_XAWRITE;
 		if (nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_XAWRITE;
 	}
 	if (nfsmode & NFSACCESS_XALIST) {
 		supported |= NFSACCESS_XALIST;
 		if (nfsvno_accchk(vp, VREAD, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_XALIST;
 	}
 	if (nfsmode & NFSACCESS_DELETE) {
 		supported |= NFSACCESS_DELETE;
 		if (vp->v_type == VDIR)
 			deletebit = VDELETE_CHILD;
 		else
 			deletebit = VDELETE;
 		if (nfsvno_accchk(vp, deletebit, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~NFSACCESS_DELETE;
 	}
 	if (vp->v_type == VDIR)
 		testmode = NFSACCESS_LOOKUP;
 	else
 		testmode = NFSACCESS_EXECUTE;
 	if (nfsmode & testmode) {
 		supported |= (nfsmode & testmode);
 		if (nfsvno_accchk(vp, VEXEC, nd->nd_cred, exp, p,
 		    NFSACCCHK_NOOVERRIDE, NFSACCCHK_VPISLOCKED, &supported))
 			nfsmode &= ~testmode;
 	}
 	nfsmode &= supported;
 	if (nd->nd_flag & ND_NFSV3) {
 		getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 		nfsrv_postopattr(nd, getret, &nva);
 	}
 	vput(vp);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(supported);
 	} else
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(nfsmode);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs getattr service
  */
 int
 nfsrvd_getattr(struct nfsrv_descript *nd, int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	struct nfsvattr nva;
 	fhandle_t fh;
 	int at_root = 0, error = 0, supports_nfsv4acls;
 	struct nfsreferral *refp;
 	nfsattrbit_t attrbits, tmpbits;
 	struct mount *mp;
 	struct vnode *tvp = NULL;
 	struct vattr va;
 	uint64_t mounted_on_fileno = 0;
 	accmode_t accmode;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat)
 		goto out;
 	if (nd->nd_flag & ND_NFSV4) {
 		error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL);
 		if (error) {
 			vput(vp);
 			goto out;
 		}
 
 		/*
 		 * Check for a referral.
 		 */
 		refp = nfsv4root_getreferral(vp, NULL, 0);
 		if (refp != NULL) {
 			(void) nfsrv_putreferralattr(nd, &attrbits, refp, 1,
 			    &nd->nd_repstat);
 			vput(vp);
 			goto out;
 		}
 		if (nd->nd_repstat == 0) {
 			accmode = 0;
 			NFSSET_ATTRBIT(&tmpbits, &attrbits);
 
 			/*
 			 * GETATTR with write-only attr time_access_set and time_modify_set
 			 * should return NFS4ERR_INVAL.
 			 */
 			if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEACCESSSET) ||
 					NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_TIMEMODIFYSET)){
 				error = NFSERR_INVAL;
 				vput(vp);
 				goto out;
 			}
 			if (NFSISSET_ATTRBIT(&tmpbits, NFSATTRBIT_ACL)) {
 				NFSCLRBIT_ATTRBIT(&tmpbits, NFSATTRBIT_ACL);
 				accmode |= VREAD_ACL;
 			}
 			if (NFSNONZERO_ATTRBIT(&tmpbits))
 				accmode |= VREAD_ATTRIBUTES;
 			if (accmode != 0)
 				nd->nd_repstat = nfsvno_accchk(vp, accmode,
 				    nd->nd_cred, exp, p, NFSACCCHK_NOOVERRIDE,
 				    NFSACCCHK_VPISLOCKED, NULL);
 		}
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits);
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_FILEHANDLE))
 				nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = nfsrv_checkgetattr(nd, vp,
 				    &nva, &attrbits, p);
 			if (nd->nd_repstat == 0) {
 				supports_nfsv4acls = nfs_supportsnfsv4acls(vp);
 				mp = vp->v_mount;
 				if (nfsrv_enable_crossmntpt != 0 &&
 				    vp->v_type == VDIR &&
 				    (vp->v_vflag & VV_ROOT) != 0 &&
 				    vp != rootvnode) {
 					tvp = mp->mnt_vnodecovered;
 					VREF(tvp);
 					at_root = 1;
 				} else
 					at_root = 0;
 				vfs_ref(mp);
 				NFSVOPUNLOCK(vp);
 				if (at_root != 0) {
 					if ((nd->nd_repstat =
 					     NFSVOPLOCK(tvp, LK_SHARED)) == 0) {
 						nd->nd_repstat = VOP_GETATTR(
 						    tvp, &va, nd->nd_cred);
 						vput(tvp);
 					} else
 						vrele(tvp);
 					if (nd->nd_repstat == 0)
 						mounted_on_fileno = (uint64_t)
 						    va.va_fileid;
 					else
 						at_root = 0;
 				}
 				if (nd->nd_repstat == 0)
 					nd->nd_repstat = vfs_busy(mp, 0);
 				vfs_rel(mp);
 				if (nd->nd_repstat == 0) {
 					(void)nfsvno_fillattr(nd, mp, vp, &nva,
 					    &fh, 0, &attrbits, nd->nd_cred, p,
 					    isdgram, 1, supports_nfsv4acls,
 					    at_root, mounted_on_fileno);
 					vfs_unbusy(mp);
 				}
 				vrele(vp);
 			} else
 				vput(vp);
 		} else {
 			nfsrv_fillattr(nd, &nva);
 			vput(vp);
 		}
 	} else {
 		vput(vp);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs setattr service
  */
 int
 nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, nva2;
 	u_int32_t *tl;
 	int preat_ret = 1, postat_ret = 1, gcheck = 0, error = 0;
 	int gotproxystateid;
 	struct timespec guard = { 0, 0 };
 	nfsattrbit_t attrbits, retbits;
 	nfsv4stateid_t stateid;
 	NFSACL_T *aclp = NULL;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva);
 		goto out;
 	}
 #ifdef NFS4_ACL_EXTATTR_NAME
 	aclp = acl_alloc(M_WAITOK);
 	aclp->acl_cnt = 0;
 #endif
 	gotproxystateid = 0;
 	NFSVNO_ATTRINIT(&nva);
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		stateid.other[0] = *tl++;
 		stateid.other[1] = *tl++;
 		stateid.other[2] = *tl;
 		if (stateid.other[0] == 0x55555555 &&
 		    stateid.other[1] == 0x55555555 &&
 		    stateid.other[2] == 0x55555555 &&
 		    stateid.seqid == 0xffffffff)
 			gotproxystateid = 1;
 	}
 	error = nfsrv_sattr(nd, vp, &nva, &attrbits, aclp, p);
 	if (error)
 		goto nfsmout;
 
 	/* For NFSv4, only va_uid is used from nva2. */
 	NFSZERO_ATTRBIT(&retbits);
 	NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER);
 	preat_ret = nfsvno_getattr(vp, &nva2, nd, p, 1, &retbits);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = preat_ret;
 
 	NFSZERO_ATTRBIT(&retbits);
 	if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		gcheck = fxdr_unsigned(int, *tl);
 		if (gcheck) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			fxdr_nfsv3time(tl, &guard);
 		}
 		if (!nd->nd_repstat && gcheck &&
 		    (nva2.na_ctime.tv_sec != guard.tv_sec ||
 		     nva2.na_ctime.tv_nsec != guard.tv_nsec))
 			nd->nd_repstat = NFSERR_NOT_SYNC;
 		if (nd->nd_repstat) {
 			vput(vp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva);
 			goto out;
 		}
 	} else if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4))
 		nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
 
 	/*
 	 * Now that we have all the fields, lets do it.
 	 * If the size is being changed write access is required, otherwise
 	 * just check for a read only file system.
 	 */
 	if (!nd->nd_repstat) {
 		if (NFSVNO_NOTSETSIZE(&nva)) {
 			if (NFSVNO_EXRDONLY(exp) ||
 			    (vp->v_mount->mnt_flag & MNT_RDONLY))
 				nd->nd_repstat = EROFS;
 		} else {
 			if (vp->v_type != VREG)
 				nd->nd_repstat = EINVAL;
 			else if (nva2.na_uid != nd->nd_cred->cr_uid ||
 			    NFSVNO_EXSTRICTACCESS(exp))
 				nd->nd_repstat = nfsvno_accchk(vp,
 				    VWRITE, nd->nd_cred, exp, p,
 				    NFSACCCHK_NOOVERRIDE,
 				    NFSACCCHK_VPISLOCKED, NULL);
 		}
 	}
 	/*
 	 * Proxy operations from the MDS are allowed via the all 0s special
 	 * stateid.
 	 */
 	if (nd->nd_repstat == 0 && (nd->nd_flag & ND_NFSV4) != 0 &&
 	    gotproxystateid == 0)
 		nd->nd_repstat = nfsrv_checksetattr(vp, nd, &stateid,
 		    &nva, &attrbits, exp, p);
 
 	if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) {
 	    /*
 	     * For V4, try setting the attrbutes in sets, so that the
 	     * reply bitmap will be correct for an error case.
 	     */
 	    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER) ||
 		NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP)) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, uid, nva.na_uid);
 		NFSVNO_SETATTRVAL(&nva2, gid, nva.na_gid);
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat) {
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNER))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER);
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNERGROUP);
 		}
 	    }
 	    if (!nd->nd_repstat &&
 		NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, size, nva.na_size);
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat)
 		    NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SIZE);
 	    }
 	    if (!nd->nd_repstat &&
 		(NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET) ||
 		 NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, atime, nva.na_atime);
 		NFSVNO_SETATTRVAL(&nva2, mtime, nva.na_mtime);
 		if (nva.na_vaflags & VA_UTIMES_NULL) {
 			nva2.na_vaflags |= VA_UTIMES_NULL;
 			NFSVNO_SETACTIVE(&nva2, vaflags);
 		}
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat) {
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEACCESSSET);
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMEMODIFYSET))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMEMODIFYSET);
 		}
 	    }
 	    if (!nd->nd_repstat &&
 		NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE)) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, btime, nva.na_btime);
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat)
 		    NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_TIMECREATE);
 	    }
 	    if (!nd->nd_repstat &&
 		(NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE) ||
 		 NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODESETMASKED))) {
 		NFSVNO_ATTRINIT(&nva2);
 		NFSVNO_SETATTRVAL(&nva2, mode, nva.na_mode);
 		nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p,
 		    exp);
 		if (!nd->nd_repstat) {
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODE))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODE);
 		    if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_MODESETMASKED))
 			NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODESETMASKED);
 		}
 	    }
 
 #ifdef NFS4_ACL_EXTATTR_NAME
 	    if (!nd->nd_repstat && aclp->acl_cnt > 0 &&
 		NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_ACL)) {
 		nd->nd_repstat = nfsrv_setacl(vp, aclp, nd->nd_cred, p);
 		if (!nd->nd_repstat) 
 		    NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_ACL);
 	    }
 #endif
 	} else if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_setattr(vp, &nva, nd->nd_cred, p,
 		    exp);
 	}
 	if (nd->nd_flag & (ND_NFSV2 | ND_NFSV3)) {
 		postat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 		if (!nd->nd_repstat)
 			nd->nd_repstat = postat_ret;
 	}
 	vput(vp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	if (nd->nd_flag & ND_NFSV3)
 		nfsrv_wcc(nd, preat_ret, &nva2, postat_ret, &nva);
 	else if (nd->nd_flag & ND_NFSV4)
 		(void) nfsrv_putattrbit(nd, &retbits);
 	else if (!nd->nd_repstat)
 		nfsrv_fillattr(nd, &nva);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	if (nd->nd_flag & ND_NFSV4) {
 		/*
 		 * For all nd_repstat, the V4 reply includes a bitmap,
 		 * even NFSERR_BADXDR, which is what this will end up
 		 * returning.
 		 */
 		(void) nfsrv_putattrbit(nd, &retbits);
 	}
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs lookup rpc
  * (Also performs lookup parent for v4)
  */
 int
 nfsrvd_lookup(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp)
 {
 	struct nameidata named;
 	vnode_t vp, dirp = NULL;
 	int error = 0, dattr_ret = 1;
 	struct nfsvattr nva, dattr;
 	char *bufp;
 	u_long *hashp;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, dattr_ret, &dattr);
 		goto out;
 	}
 
 	/*
 	 * For some reason, if dp is a symlink, the error
 	 * returned is supposed to be NFSERR_SYMLINK and not NFSERR_NOTDIR.
 	 */
 	if (dp->v_type == VLNK && (nd->nd_flag & ND_NFSV4)) {
 		nd->nd_repstat = NFSERR_SYMLINK;
 		vrele(dp);
 		goto out;
 	}
 
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
 	    LOCKLEAF | SAVESTART);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error) {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 		goto out;
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, &dirp);
 	} else {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (nd->nd_repstat) {
 		if (dirp) {
 			if (nd->nd_flag & ND_NFSV3)
 				dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p,
 				    0, NULL);
 			vrele(dirp);
 		}
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, dattr_ret, &dattr);
 		goto out;
 	}
 	if (named.ni_startdir)
 		vrele(named.ni_startdir);
 	nfsvno_relpathbuf(&named);
 	vp = named.ni_vp;
 	if ((nd->nd_flag & ND_NFSV4) != 0 && !NFSVNO_EXPORTED(exp) &&
 	    vp->v_type != VDIR && vp->v_type != VLNK)
 		/*
 		 * Only allow lookup of VDIR and VLNK for traversal of
 		 * non-exported volumes during NFSv4 mounting.
 		 */
 		nd->nd_repstat = ENOENT;
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsvno_getfh(vp, fhp, p);
 	if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 	if (vpp != NULL && nd->nd_repstat == 0)
 		*vpp = vp;
 	else
 		vput(vp);
 	if (dirp) {
 		if (nd->nd_flag & ND_NFSV3)
 			dattr_ret = nfsvno_getattr(dirp, &dattr, nd, p, 0,
 			    NULL);
 		vrele(dirp);
 	}
 	if (nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, dattr_ret, &dattr);
 		goto out;
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0);
 		nfsrv_fillattr(nd, &nva);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0);
 		nfsrv_postopattr(nd, 0, &nva);
 		nfsrv_postopattr(nd, dattr_ret, &dattr);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs readlink service
  */
 int
 nfsrvd_readlink(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct mbuf *mp = NULL, *mpend = NULL;
 	int getret = 1, len;
 	struct nfsvattr nva;
 	struct thread *p = curthread;
 	uint16_t off;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &nva);
 		goto out;
 	}
 	if (vp->v_type != VLNK) {
 		if (nd->nd_flag & ND_NFSV2)
 			nd->nd_repstat = ENXIO;
 		else
 			nd->nd_repstat = EINVAL;
 	}
 	if (nd->nd_repstat == 0) {
 		if ((nd->nd_flag & ND_EXTPG) != 0)
 			nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred,
 			    nd->nd_maxextsiz, p, &mp, &mpend, &len);
 		else
 			nd->nd_repstat = nfsvno_readlink(vp, nd->nd_cred,
 			    0, p, &mp, &mpend, &len);
 	}
 	if (nd->nd_flag & ND_NFSV3)
 		getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 	vput(vp);
 	if (nd->nd_flag & ND_NFSV3)
 		nfsrv_postopattr(nd, getret, &nva);
 	if (nd->nd_repstat)
 		goto out;
 	NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(len);
 	if (mp != NULL) {
 		nd->nd_mb->m_next = mp;
 		nd->nd_mb = mpend;
 		if ((mpend->m_flags & M_EXTPG) != 0) {
 			nd->nd_bextpg = mpend->m_epg_npgs - 1;
 			nd->nd_bpos = (char *)(void *)
 			    PHYS_TO_DMAP(mpend->m_epg_pa[nd->nd_bextpg]);
 			off = (nd->nd_bextpg == 0) ? mpend->m_epg_1st_off : 0;
 			nd->nd_bpos += off + mpend->m_epg_last_len;
 			nd->nd_bextpgsiz = PAGE_SIZE - mpend->m_epg_last_len -
 			    off;
 		} else
 			nd->nd_bpos = mtod(mpend, char *) + mpend->m_len;
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs read service
  */
 int
 nfsrvd_read(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0, cnt, getret = 1, gotproxystateid, reqlen, eof = 0;
 	struct mbuf *m2, *m3;
 	struct nfsvattr nva;
 	off_t off = 0x0;
 	struct nfsstate st, *stp = &st;
 	struct nfslock lo, *lop = &lo;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	struct thread *p = curthread;
 	uint16_t poff;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &nva);
 		goto out;
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		off = (off_t)fxdr_unsigned(u_int32_t, *tl++);
 		reqlen = fxdr_unsigned(int, *tl);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
 		tl += 2;
 		reqlen = fxdr_unsigned(int, *tl);
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3*NFSX_UNSIGNED);
 		reqlen = fxdr_unsigned(int, *(tl + 6));
 	}
 	if (reqlen > NFS_SRVMAXDATA(nd)) {
 		reqlen = NFS_SRVMAXDATA(nd);
 	} else if (reqlen < 0) {
 		error = EBADRPC;
 		goto nfsmout;
 	}
 	gotproxystateid = 0;
 	if (nd->nd_flag & ND_NFSV4) {
 		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS);
 		lop->lo_flags = NFSLCK_READ;
 		stp->ls_ownerlen = 0;
 		stp->ls_op = NULL;
 		stp->ls_uid = nd->nd_cred->cr_uid;
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
 		clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
 		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				clientid.qval = nd->nd_clientid.qval;
 			else if (nd->nd_clientid.qval != clientid.qval)
 				printf("EEK1 multiple clids\n");
 		} else {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
 		stp->ls_stateid.other[2] = *tl++;
 		/*
 		 * Don't allow the client to use a special stateid for a DS op.
 		 */
 		if ((nd->nd_flag & ND_DSSERVER) != 0 &&
 		    ((stp->ls_stateid.other[0] == 0x0 &&
 		    stp->ls_stateid.other[1] == 0x0 &&
 		    stp->ls_stateid.other[2] == 0x0) ||
 		    (stp->ls_stateid.other[0] == 0xffffffff &&
 		    stp->ls_stateid.other[1] == 0xffffffff &&
 		    stp->ls_stateid.other[2] == 0xffffffff) ||
 		    stp->ls_stateid.seqid != 0))
 			nd->nd_repstat = NFSERR_BADSTATEID;
 		/* However, allow the proxy stateid. */
 		if (stp->ls_stateid.seqid == 0xffffffff &&
 		    stp->ls_stateid.other[0] == 0x55555555 &&
 		    stp->ls_stateid.other[1] == 0x55555555 &&
 		    stp->ls_stateid.other[2] == 0x55555555)
 			gotproxystateid = 1;
 		off = fxdr_hyper(tl);
 		lop->lo_first = off;
 		tl += 2;
 		lop->lo_end = off + reqlen;
 		/*
 		 * Paranoia, just in case it wraps around.
 		 */
 		if (lop->lo_end < off)
 			lop->lo_end = NFS64BITSSET;
 	}
 	if (vp->v_type != VREG) {
 		if (nd->nd_flag & ND_NFSV3)
 			nd->nd_repstat = EINVAL;
 		else
 			nd->nd_repstat = (vp->v_type == VDIR) ? EISDIR :
 			    EINVAL;
 	}
 	getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = getret;
 	if (!nd->nd_repstat &&
 	    (nva.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp))) {
 		nd->nd_repstat = nfsvno_accchk(vp, VREAD,
 		    nd->nd_cred, exp, p,
 		    NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL);
 		if (nd->nd_repstat)
 			nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 			    nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 			    NFSACCCHK_VPISLOCKED, NULL);
 	}
 	/*
 	 * DS reads are marked by ND_DSSERVER or use the proxy special
 	 * stateid.
 	 */
 	if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) ==
 	    ND_NFSV4 && gotproxystateid == 0)
 		nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 		    &stateid, exp, nd, p);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_postopattr(nd, getret, &nva);
 		goto out;
 	}
 	if (off >= nva.na_size) {
 		cnt = 0;
 		eof = 1;
 	} else if (reqlen == 0)
 		cnt = 0;
 	else if ((off + reqlen) >= nva.na_size) {
 		cnt = nva.na_size - off;
 		eof = 1;
 	} else
 		cnt = reqlen;
 	m3 = NULL;
 	if (cnt > 0) {
 		/*
 		 * If cnt > MCLBYTES and the reply will not be saved, use
 		 * ext_pgs mbufs for TLS.
 		 * For NFSv4.0, we do not know for sure if the reply will
 		 * be saved, so do not use ext_pgs mbufs for NFSv4.0.
 		 * Always use ext_pgs mbufs if ND_EXTPG is set.
 		 */
 		if ((nd->nd_flag & ND_EXTPG) != 0 || (cnt > MCLBYTES &&
 		    (nd->nd_flag & (ND_TLS | ND_SAVEREPLY)) == ND_TLS &&
 		    (nd->nd_flag & (ND_NFSV4 | ND_NFSV41)) != ND_NFSV4))
 			nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred,
 			    nd->nd_maxextsiz, p, &m3, &m2);
 		else
 			nd->nd_repstat = nfsvno_read(vp, off, cnt, nd->nd_cred,
 			    0, p, &m3, &m2);
 		if (!(nd->nd_flag & ND_NFSV4)) {
 			getret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = getret;
 		}
 		if (nd->nd_repstat) {
 			vput(vp);
 			if (m3)
 				m_freem(m3);
 			if (nd->nd_flag & ND_NFSV3)
 				nfsrv_postopattr(nd, getret, &nva);
 			goto out;
 		}
 	}
 	vput(vp);
 	if (nd->nd_flag & ND_NFSV2) {
 		nfsrv_fillattr(nd, &nva);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 	} else {
 		if (nd->nd_flag & ND_NFSV3) {
 			nfsrv_postopattr(nd, getret, &nva);
 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(cnt);
 		} else
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 		if (eof)
 			*tl++ = newnfs_true;
 		else
 			*tl++ = newnfs_false;
 	}
 	*tl = txdr_unsigned(cnt);
 	if (m3) {
 		nd->nd_mb->m_next = m3;
 		nd->nd_mb = m2;
 		if ((m2->m_flags & M_EXTPG) != 0) {
 			nd->nd_flag |= ND_EXTPG;
 			nd->nd_bextpg = m2->m_epg_npgs - 1;
 			nd->nd_bpos = (char *)(void *)
 			    PHYS_TO_DMAP(m2->m_epg_pa[nd->nd_bextpg]);
 			poff = (nd->nd_bextpg == 0) ? m2->m_epg_1st_off : 0;
 			nd->nd_bpos += poff + m2->m_epg_last_len;
 			nd->nd_bextpgsiz = PAGE_SIZE - m2->m_epg_last_len -
 			    poff;
 		} else
 			nd->nd_bpos = mtod(m2, char *) + m2->m_len;
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs write service
  */
 int
 nfsrvd_write(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsvattr nva, forat;
 	int aftat_ret = 1, retlen, len, error = 0, forat_ret = 1;
 	int gotproxystateid, stable = NFSWRITE_FILESYNC;
 	off_t off;
 	struct nfsstate st, *stp = &st;
 	struct nfslock lo, *lop = &lo;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	nfsattrbit_t attrbits;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva);
 		goto out;
 	}
 	gotproxystateid = 0;
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
 		tl += 2;
 		retlen = len = fxdr_unsigned(int32_t, *tl);
 	} else if (nd->nd_flag & ND_NFSV3) {
 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
 		tl += 3;
 		stable = fxdr_unsigned(int, *tl++);
 		retlen = len = fxdr_unsigned(int32_t, *tl);
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 4 * NFSX_UNSIGNED);
 		stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 		lop->lo_flags = NFSLCK_WRITE;
 		stp->ls_ownerlen = 0;
 		stp->ls_op = NULL;
 		stp->ls_uid = nd->nd_cred->cr_uid;
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
 		clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
 		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				clientid.qval = nd->nd_clientid.qval;
 			else if (nd->nd_clientid.qval != clientid.qval)
 				printf("EEK2 multiple clids\n");
 		} else {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
 		stp->ls_stateid.other[2] = *tl++;
 		/*
 		 * Don't allow the client to use a special stateid for a DS op.
 		 */
 		if ((nd->nd_flag & ND_DSSERVER) != 0 &&
 		    ((stp->ls_stateid.other[0] == 0x0 &&
 		    stp->ls_stateid.other[1] == 0x0 &&
 		    stp->ls_stateid.other[2] == 0x0) ||
 		    (stp->ls_stateid.other[0] == 0xffffffff &&
 		    stp->ls_stateid.other[1] == 0xffffffff &&
 		    stp->ls_stateid.other[2] == 0xffffffff) ||
 		    stp->ls_stateid.seqid != 0))
 			nd->nd_repstat = NFSERR_BADSTATEID;
 		/* However, allow the proxy stateid. */
 		if (stp->ls_stateid.seqid == 0xffffffff &&
 		    stp->ls_stateid.other[0] == 0x55555555 &&
 		    stp->ls_stateid.other[1] == 0x55555555 &&
 		    stp->ls_stateid.other[2] == 0x55555555)
 			gotproxystateid = 1;
 		off = fxdr_hyper(tl);
 		lop->lo_first = off;
 		tl += 2;
 		stable = fxdr_unsigned(int, *tl++);
 		retlen = len = fxdr_unsigned(int32_t, *tl);
 		lop->lo_end = off + len;
 		/*
 		 * Paranoia, just in case it wraps around, which shouldn't
 		 * ever happen anyhow.
 		 */
 		if (lop->lo_end < lop->lo_first)
 			lop->lo_end = NFS64BITSSET;
 	}
 
 	if (retlen > nfs_srvmaxio || retlen < 0)
 		nd->nd_repstat = EIO;
 	if (vp->v_type != VREG && !nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3)
 			nd->nd_repstat = EINVAL;
 		else
 			nd->nd_repstat = (vp->v_type == VDIR) ? EISDIR :
 			    EINVAL;
 	}
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
 	forat_ret = nfsvno_getattr(vp, &forat, nd, p, 1, &attrbits);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = forat_ret;
 	if (!nd->nd_repstat &&
 	    (forat.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp)))
 		nd->nd_repstat = nfsvno_accchk(vp, VWRITE,
 		    nd->nd_cred, exp, p,
 		    NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, NULL);
 	/*
 	 * DS reads are marked by ND_DSSERVER or use the proxy special
 	 * stateid.
 	 */
 	if (nd->nd_repstat == 0 && (nd->nd_flag & (ND_NFSV4 | ND_DSSERVER)) ==
 	    ND_NFSV4 && gotproxystateid == 0)
 		nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 		    &stateid, exp, nd, p);
 	if (nd->nd_repstat) {
 		vput(vp);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva);
 		goto out;
 	}
 
 	/*
 	 * For NFS Version 2, it is not obvious what a write of zero length
 	 * should do, but I might as well be consistent with Version 3,
 	 * which is to return ok so long as there are no permission problems.
 	 */
 	if (retlen > 0) {
 		nd->nd_repstat = nfsvno_write(vp, off, retlen, &stable,
 		    nd->nd_md, nd->nd_dpos, nd->nd_cred, p);
 		error = nfsm_advance(nd, NFSM_RNDUP(retlen), -1);
 		if (error)
 			goto nfsmout;
 	}
 	if (nd->nd_flag & ND_NFSV4)
 		aftat_ret = 0;
 	else
 		aftat_ret = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 	vput(vp);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = aftat_ret;
 	if (nd->nd_flag & (ND_NFSV3 | ND_NFSV4)) {
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, forat_ret, &forat, aftat_ret, &nva);
 		if (nd->nd_repstat)
 			goto out;
 		NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(retlen);
 		/*
 		 * If nfs_async is set, then pretend the write was FILESYNC.
 		 * Warning: Doing this violates RFC1813 and runs a risk
 		 * of data written by a client being lost when the server
 		 * crashes/reboots.
 		 */
 		if (stable == NFSWRITE_UNSTABLE && nfs_async == 0)
 			*tl++ = txdr_unsigned(stable);
 		else
 			*tl++ = txdr_unsigned(NFSWRITE_FILESYNC);
 		/*
 		 * Actually, there is no need to txdr these fields,
 		 * but it may make the values more human readable,
 		 * for debugging purposes.
 		 */
 		*tl++ = txdr_unsigned(nfsboottime.tv_sec);
 		*tl = txdr_unsigned(nfsboottime.tv_usec);
 	} else if (!nd->nd_repstat)
 		nfsrv_fillattr(nd, &nva);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs create service (creates regular files for V2 and V3. Spec. files for V2.)
  * now does a truncate to 0 length via. setattr if it already exists
  * The core creation routine has been extracted out into nfsrv_creatsub(),
  * so it can also be used by nfsrv_open() for V4.
  */
 int
 nfsrvd_create(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, dirfor, diraft;
 	struct nfsv2_sattr *sp;
 	struct nameidata named;
 	u_int32_t *tl;
 	int error = 0, tsize, dirfor_ret = 1, diraft_ret = 1;
 	int how = NFSCREATE_UNCHECKED, exclusive_flag = 0;
 	NFSDEV_T rdev = 0;
 	vnode_t vp = NULL, dirp = NULL;
 	fhandle_t fh;
 	char *bufp;
 	u_long *hashp;
 	enum vtype vtyp;
 	int32_t cverf[2], tverf[2] = { 0, 0 };
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
 	    LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
 		goto nfsmout;
 	if (!nd->nd_repstat) {
 		NFSVNO_ATTRINIT(&nva);
 		if (nd->nd_flag & ND_NFSV2) {
 			NFSM_DISSECT(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 			vtyp = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
 			if (vtyp == VNON)
 				vtyp = VREG;
 			NFSVNO_SETATTRVAL(&nva, type, vtyp);
 			NFSVNO_SETATTRVAL(&nva, mode,
 			    nfstov_mode(sp->sa_mode));
 			switch (nva.na_type) {
 			case VREG:
 				tsize = fxdr_unsigned(int32_t, sp->sa_size);
 				if (tsize != -1)
 					NFSVNO_SETATTRVAL(&nva, size,
 					    (u_quad_t)tsize);
 				break;
 			case VCHR:
 			case VBLK:
 			case VFIFO:
 				rdev = fxdr_unsigned(NFSDEV_T, sp->sa_size);
 				break;
 			default:
 				break;
 			}
 		} else {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			how = fxdr_unsigned(int, *tl);
 			switch (how) {
 			case NFSCREATE_GUARDED:
 			case NFSCREATE_UNCHECKED:
 				error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p);
 				if (error)
 					goto nfsmout;
 				break;
 			case NFSCREATE_EXCLUSIVE:
 				NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 				cverf[0] = *tl++;
 				cverf[1] = *tl;
 				exclusive_flag = 1;
 				break;
 			}
 			NFSVNO_SETATTRVAL(&nva, type, VREG);
 		}
 	}
 	if (nd->nd_repstat) {
 		nfsvno_relpathbuf(&named);
 		if (nd->nd_flag & ND_NFSV3) {
 			dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 1,
 			    NULL);
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		}
 		vput(dp);
 		goto out;
 	}
 
 	nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, &dirp);
 	if (dirp) {
 		if (nd->nd_flag & ND_NFSV2) {
 			vrele(dirp);
 			dirp = NULL;
 		} else {
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0,
 			    NULL);
 		}
 	}
 	if (nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		if (dirp)
 			vrele(dirp);
 		goto out;
 	}
 
 	if (!(nd->nd_flag & ND_NFSV2)) {
 		switch (how) {
 		case NFSCREATE_GUARDED:
 			if (named.ni_vp)
 				nd->nd_repstat = EEXIST;
 			break;
 		case NFSCREATE_UNCHECKED:
 			break;
 		case NFSCREATE_EXCLUSIVE:
 			if (named.ni_vp == NULL)
 				NFSVNO_SETATTRVAL(&nva, mode, 0);
 			break;
 		}
 	}
 
 	/*
 	 * Iff doesn't exist, create it
 	 * otherwise just truncate to 0 length
 	 *   should I set the mode too ?
 	 */
 	nd->nd_repstat = nfsvno_createsub(nd, &named, &vp, &nva,
 	    &exclusive_flag, cverf, rdev, exp);
 
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
 		if (!nd->nd_repstat)
 			nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1,
 			    NULL);
 		vput(vp);
 		if (!nd->nd_repstat) {
 			tverf[0] = nva.na_atime.tv_sec;
 			tverf[1] = nva.na_atime.tv_nsec;
 		}
 	}
 	if (nd->nd_flag & ND_NFSV2) {
 		if (!nd->nd_repstat) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0);
 			nfsrv_fillattr(nd, &nva);
 		}
 	} else {
 		if (exclusive_flag && !nd->nd_repstat && (cverf[0] != tverf[0]
 		    || cverf[1] != tverf[1]))
 			nd->nd_repstat = EEXIST;
 		diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL);
 		vrele(dirp);
 		if (!nd->nd_repstat) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 1);
 			nfsrv_postopattr(nd, 0, &nva);
 		}
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(dp);
 	nfsvno_relpathbuf(&named);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs v3 mknod service (and v4 create)
  */
 int
 nfsrvd_mknod(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, dirfor, diraft;
 	u_int32_t *tl;
 	struct nameidata named;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen;
 	u_int32_t major, minor;
 	enum vtype vtyp = VNON;
 	nfstype nfs4type = NFNON;
 	vnode_t vp, dirp = NULL;
 	nfsattrbit_t attrbits;
 	char *bufp = NULL, *pathcp = NULL;
 	u_long *hashp, cnflags;
 	NFSACL_T *aclp = NULL;
 	struct thread *p = curthread;
 
 	NFSVNO_ATTRINIT(&nva);
 	cnflags = (LOCKPARENT | SAVESTART);
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 #ifdef NFS4_ACL_EXTATTR_NAME
 	aclp = acl_alloc(M_WAITOK);
 	aclp->acl_cnt = 0;
 #endif
 
 	/*
 	 * For V4, the creation stuff is here, Yuck!
 	 */
 	if (nd->nd_flag & ND_NFSV4) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		vtyp = nfsv34tov_type(*tl);
 		nfs4type = fxdr_unsigned(nfstype, *tl);
 		switch (nfs4type) {
 		case NFLNK:
 			error = nfsvno_getsymlink(nd, &nva, p, &pathcp,
 			    &pathlen);
 			if (error)
 				goto nfsmout;
 			break;
 		case NFCHR:
 		case NFBLK:
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			major = fxdr_unsigned(u_int32_t, *tl++);
 			minor = fxdr_unsigned(u_int32_t, *tl);
 			nva.na_rdev = NFSMAKEDEV(major, minor);
 			break;
 		case NFSOCK:
 		case NFFIFO:
 			break;
 		case NFDIR:
-			cnflags = (LOCKPARENT | SAVENAME);
+			cnflags = LOCKPARENT;
 			break;
 		default:
 			nd->nd_repstat = NFSERR_BADTYPE;
 			vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			goto out;
 		}
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, cnflags | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
 		goto nfsmout;
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			vtyp = nfsv34tov_type(*tl);
 		}
 		error = nfsrv_sattr(nd, NULL, &nva, &attrbits, aclp, p);
 		if (error)
 			goto nfsmout;
 		nva.na_type = vtyp;
 		if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV3) &&
 		    (vtyp == VCHR || vtyp == VBLK)) {
 			NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			major = fxdr_unsigned(u_int32_t, *tl++);
 			minor = fxdr_unsigned(u_int32_t, *tl);
 			nva.na_rdev = NFSMAKEDEV(major, minor);
 		}
 	}
 
 	dirfor_ret = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL);
 	if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) {
 		if (!dirfor_ret && NFSVNO_ISSETGID(&nva) &&
 		    dirfor.na_gid == nva.na_gid)
 			NFSVNO_UNSET(&nva, gid);
 		nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
 	}
 	if (nd->nd_repstat) {
 		vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 		acl_free(aclp);
 #endif
 		nfsvno_relpathbuf(&named);
 		if (pathcp)
 			free(pathcp, M_TEMP);
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		goto out;
 	}
 
 	/*
 	 * Yuck! For V4, mkdir and link are here and some V4 clients don't fill
 	 * in va_mode, so we'll have to set a default here.
 	 */
 	if (NFSVNO_NOTSETMODE(&nva)) {
 		if (vtyp == VLNK)
 			nva.na_mode = 0755;
 		else
 			nva.na_mode = 0400;
 	}
 
 	if (vtyp == VDIR)
 		named.ni_cnd.cn_flags |= WILLBEDIR;
 	nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, &dirp);
 	if (nd->nd_repstat) {
 		if (dirp) {
 			if (nd->nd_flag & ND_NFSV3)
 				dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd,
 				    p, 0, NULL);
 			vrele(dirp);
 		}
 #ifdef NFS4_ACL_EXTATTR_NAME
 		acl_free(aclp);
 #endif
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		goto out;
 	}
 	if (dirp)
 		dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL);
 
 	if ((nd->nd_flag & ND_NFSV4) && (vtyp == VDIR || vtyp == VLNK)) {
 		if (vtyp == VDIR) {
 			nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp,
 			    &dirfor, &diraft, &diraft_ret, &attrbits, aclp, p,
 			    exp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			goto out;
 		} else if (vtyp == VLNK) {
 			nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp,
 			    &dirfor, &diraft, &diraft_ret, &attrbits,
 			    aclp, p, exp, pathcp, pathlen);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			free(pathcp, M_TEMP);
 			goto out;
 		}
 	}
 
 	nd->nd_repstat = nfsvno_mknod(&named, &nva, nd->nd_cred, p);
 	if (!nd->nd_repstat) {
 		vp = named.ni_vp;
 		nfsrv_fixattr(nd, vp, &nva, aclp, p, &attrbits, exp);
 		nd->nd_repstat = nfsvno_getfh(vp, fhp, p);
 		if ((nd->nd_flag & ND_NFSV3) && !nd->nd_repstat)
 			nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1,
 			    NULL);
 		if (vpp != NULL && nd->nd_repstat == 0) {
 			NFSVOPUNLOCK(vp);
 			*vpp = vp;
 		} else
 			vput(vp);
 	}
 
 	diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL);
 	vrele(dirp);
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1);
 			nfsrv_postopattr(nd, 0, &nva);
 		} else {
 			NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			*tl++ = newnfs_false;
 			txdr_hyper(dirfor.na_filerev, tl);
 			tl += 2;
 			txdr_hyper(diraft.na_filerev, tl);
 			(void) nfsrv_putattrbit(nd, &attrbits);
 		}
 	}
 	if (nd->nd_flag & ND_NFSV3)
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	if (bufp)
 		nfsvno_relpathbuf(&named);
 	if (pathcp)
 		free(pathcp, M_TEMP);
 
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs remove service
  */
 int
 nfsrvd_remove(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, struct nfsexstuff *exp)
 {
 	struct nameidata named;
 	u_int32_t *tl;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1;
 	vnode_t dirp = NULL;
 	struct nfsvattr dirfor, diraft;
 	char *bufp;
 	u_long *hashp;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, DELETE,
 	    LOCKPARENT | LOCKLEAF);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error) {
 		vput(dp);
 		nfsvno_relpathbuf(&named);
 		goto out;
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, &dirp);
 	} else {
 		vput(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (dirp) {
 		if (!(nd->nd_flag & ND_NFSV2)) {
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0,
 			    NULL);
 		} else {
 			vrele(dirp);
 			dirp = NULL;
 		}
 	}
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			if (named.ni_vp->v_type == VDIR)
 				nd->nd_repstat = nfsvno_rmdirsub(&named, 1,
 				    nd->nd_cred, p, exp);
 			else
 				nd->nd_repstat = nfsvno_removesub(&named, 1,
 				    nd->nd_cred, p, exp);
 		} else if (nd->nd_procnum == NFSPROC_RMDIR) {
 			nd->nd_repstat = nfsvno_rmdirsub(&named, 0,
 			    nd->nd_cred, p, exp);
 		} else {
 			nd->nd_repstat = nfsvno_removesub(&named, 0,
 			    nd->nd_cred, p, exp);
 		}
 	}
 	if (!(nd->nd_flag & ND_NFSV2)) {
 		if (dirp) {
 			diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0,
 			    NULL);
 			vrele(dirp);
 		}
 		if (nd->nd_flag & ND_NFSV3) {
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		} else if (!nd->nd_repstat) {
 			NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			*tl++ = newnfs_false;
 			txdr_hyper(dirfor.na_filerev, tl);
 			tl += 2;
 			txdr_hyper(diraft.na_filerev, tl);
 		}
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs rename service
  */
 int
 nfsrvd_rename(struct nfsrv_descript *nd, int isdgram,
     vnode_t dp, vnode_t todp, struct nfsexstuff *exp, struct nfsexstuff *toexp)
 {
 	u_int32_t *tl;
 	int error = 0, fdirfor_ret = 1, fdiraft_ret = 1;
 	int tdirfor_ret = 1, tdiraft_ret = 1;
 	struct nameidata fromnd, tond;
 	vnode_t fdirp = NULL, tdirp = NULL, tdp = NULL;
 	struct nfsvattr fdirfor, fdiraft, tdirfor, tdiraft;
 	struct nfsexstuff tnes;
 	struct nfsrvfh tfh;
 	char *bufp, *tbufp = NULL;
 	u_long *hashp;
 	fhandle_t fh;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 		nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 		goto out;
 	}
 	if (!(nd->nd_flag & ND_NFSV2))
 		fdirfor_ret = nfsvno_getattr(dp, &fdirfor, nd, p, 1, NULL);
 	tond.ni_cnd.cn_nameiop = 0;
 	tond.ni_startdir = NULL;
 	NFSNAMEICNDSET(&fromnd.ni_cnd, nd->nd_cred, DELETE, WANTPARENT | SAVESTART);
 	nfsvno_setpathbuf(&fromnd, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &fromnd.ni_pathlen);
 	if (error) {
 		vput(dp);
 		if (todp)
 			vrele(todp);
 		nfsvno_relpathbuf(&fromnd);
 		goto out;
 	}
 	/*
 	 * Unlock dp in this code section, so it is unlocked before
 	 * tdp gets locked. This avoids a potential LOR if tdp is the
 	 * parent directory of dp.
 	 */
 	if (nd->nd_flag & ND_NFSV4) {
 		tdp = todp;
 		tnes = *toexp;
 		if (dp != tdp) {
 			NFSVOPUNLOCK(dp);
 			/* Might lock tdp. */
 			tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 0,
 			    NULL);
 		} else {
 			tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1,
 			    NULL);
 			NFSVOPUNLOCK(dp);
 		}
 	} else {
 		tfh.nfsrvfh_len = 0;
 		error = nfsrv_mtofh(nd, &tfh);
 		if (error == 0)
 			error = nfsvno_getfh(dp, &fh, p);
 		if (error) {
 			vput(dp);
 			/* todp is always NULL except NFSv4 */
 			nfsvno_relpathbuf(&fromnd);
 			goto out;
 		}
 
 		/* If this is the same file handle, just VREF() the vnode. */
 		if (tfh.nfsrvfh_len == NFSX_MYFH &&
 		    !NFSBCMP(tfh.nfsrvfh_data, &fh, NFSX_MYFH)) {
 			VREF(dp);
 			tdp = dp;
 			tnes = *exp;
 			tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd, p, 1,
 			    NULL);
 			NFSVOPUNLOCK(dp);
 		} else {
 			NFSVOPUNLOCK(dp);
 			nd->nd_cred->cr_uid = nd->nd_saveduid;
 			nfsd_fhtovp(nd, &tfh, LK_EXCLUSIVE, &tdp, &tnes, NULL,
 			    0, -1);	/* Locks tdp. */
 			if (tdp) {
 				tdirfor_ret = nfsvno_getattr(tdp, &tdirfor, nd,
 				    p, 1, NULL);
 				NFSVOPUNLOCK(tdp);
 			}
 		}
 	}
 	NFSNAMEICNDSET(&tond.ni_cnd, nd->nd_cred, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART);
 	nfsvno_setpathbuf(&tond, &tbufp, &hashp);
 	if (!nd->nd_repstat) {
 		error = nfsrv_parsename(nd, tbufp, hashp, &tond.ni_pathlen);
 		if (error) {
 			if (tdp)
 				vrele(tdp);
 			vrele(dp);
 			nfsvno_relpathbuf(&fromnd);
 			nfsvno_relpathbuf(&tond);
 			goto out;
 		}
 	}
 	if (nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3) {
 			nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret,
 			    &fdiraft);
 			nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret,
 			    &tdiraft);
 		}
 		if (tdp)
 			vrele(tdp);
 		vrele(dp);
 		nfsvno_relpathbuf(&fromnd);
 		nfsvno_relpathbuf(&tond);
 		goto out;
 	}
 
 	/*
 	 * Done parsing, now down to business.
 	 */
 	nd->nd_repstat = nfsvno_namei(nd, &fromnd, dp, 0, exp, &fdirp);
 	if (nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV3) {
 			nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret,
 			    &fdiraft);
 			nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret,
 			    &tdiraft);
 		}
 		if (fdirp)
 			vrele(fdirp);
 		if (tdp)
 			vrele(tdp);
 		nfsvno_relpathbuf(&tond);
 		goto out;
 	}
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	nd->nd_repstat = nfsvno_namei(nd, &tond, tdp, 0, &tnes, &tdirp);
 	nd->nd_repstat = nfsvno_rename(&fromnd, &tond, nd->nd_repstat,
 	    nd->nd_flag, nd->nd_cred, p);
 	if (fdirp)
 		fdiraft_ret = nfsvno_getattr(fdirp, &fdiraft, nd, p, 0, NULL);
 	if (tdirp)
 		tdiraft_ret = nfsvno_getattr(tdirp, &tdiraft, nd, p, 0, NULL);
 	if (fdirp)
 		vrele(fdirp);
 	if (tdirp)
 		vrele(tdirp);
 	if (nd->nd_flag & ND_NFSV3) {
 		nfsrv_wcc(nd, fdirfor_ret, &fdirfor, fdiraft_ret, &fdiraft);
 		nfsrv_wcc(nd, tdirfor_ret, &tdirfor, tdiraft_ret, &tdiraft);
 	} else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 10 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		txdr_hyper(fdirfor.na_filerev, tl);
 		tl += 2;
 		txdr_hyper(fdiraft.na_filerev, tl);
 		tl += 2;
 		*tl++ = newnfs_false;
 		txdr_hyper(tdirfor.na_filerev, tl);
 		tl += 2;
 		txdr_hyper(tdiraft.na_filerev, tl);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs link service
  */
 int
 nfsrvd_link(struct nfsrv_descript *nd, int isdgram,
     vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp)
 {
 	struct nameidata named;
 	u_int32_t *tl;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1, getret = 1;
 	vnode_t dirp = NULL, dp = NULL;
 	struct nfsvattr dirfor, diraft, at;
 	struct nfsexstuff tnes;
 	struct nfsrvfh dfh;
 	char *bufp;
 	u_long *hashp;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	NFSVOPUNLOCK(vp);
 	if (vp->v_type == VDIR) {
 		if (nd->nd_flag & ND_NFSV4)
 			nd->nd_repstat = NFSERR_ISDIR;
 		else
 			nd->nd_repstat = NFSERR_INVAL;
 		if (tovp)
 			vrele(tovp);
 	}
 	if (!nd->nd_repstat) {
 		if (nd->nd_flag & ND_NFSV4) {
 			dp = tovp;
 			tnes = *toexp;
 		} else {
 			error = nfsrv_mtofh(nd, &dfh);
 			if (error) {
 				vrele(vp);
 				/* tovp is always NULL unless NFSv4 */
 				goto out;
 			}
 			nfsd_fhtovp(nd, &dfh, LK_EXCLUSIVE, &dp, &tnes, NULL,
 			    0, -1);
 			if (dp)
 				NFSVOPUNLOCK(dp);
 		}
 	}
-	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
-	    LOCKPARENT | SAVENAME | NOCACHE);
+	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | NOCACHE);
 	if (!nd->nd_repstat) {
 		nfsvno_setpathbuf(&named, &bufp, &hashp);
 		error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 		if (error) {
 			vrele(vp);
 			if (dp)
 				vrele(dp);
 			nfsvno_relpathbuf(&named);
 			goto out;
 		}
 		if (!nd->nd_repstat) {
 			nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, &tnes,
 			    &dirp);
 		} else {
 			if (dp)
 				vrele(dp);
 			nfsvno_relpathbuf(&named);
 		}
 	}
 	if (dirp) {
 		if (nd->nd_flag & ND_NFSV2) {
 			vrele(dirp);
 			dirp = NULL;
 		} else {
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0,
 			    NULL);
 		}
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_link(&named, vp, nd->nd_cred, p, exp);
 	if (nd->nd_flag & ND_NFSV3)
 		getret = nfsvno_getattr(vp, &at, nd, p, 0, NULL);
 	if (dirp) {
 		diraft_ret = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL);
 		vrele(dirp);
 	}
 	vrele(vp);
 	if (nd->nd_flag & ND_NFSV3) {
 		nfsrv_postopattr(nd, getret, &at);
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 	} else if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		txdr_hyper(dirfor.na_filerev, tl);
 		tl += 2;
 		txdr_hyper(diraft.na_filerev, tl);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs symbolic link service
  */
 int
 nfsrvd_symlink(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, dirfor, diraft;
 	struct nameidata named;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1, pathlen;
 	vnode_t dirp = NULL;
 	char *bufp, *pathcp = NULL;
 	u_long *hashp;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
 	if (vpp)
 		*vpp = NULL;
 	NFSVNO_ATTRINIT(&nva);
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
 	    LOCKPARENT | SAVESTART | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (!error && !nd->nd_repstat)
 		error = nfsvno_getsymlink(nd, &nva, p, &pathcp, &pathlen);
 	if (error) {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 		goto out;
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, &dirp);
 	} else {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) {
 		vrele(dirp);
 		dirp = NULL;
 	}
 
 	/*
 	 * And call nfsrvd_symlinksub() to do the common code. It will
 	 * return EBADRPC upon a parsing error, 0 otherwise.
 	 */
 	if (!nd->nd_repstat) {
 		if (dirp != NULL)
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0,
 			    NULL);
 		nfsrvd_symlinksub(nd, &named, &nva, fhp, vpp, dirp,
 		    &dirfor, &diraft, &diraft_ret, NULL, NULL, p, exp,
 		    pathcp, pathlen);
 	} else if (dirp != NULL) {
 		dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL);
 		vrele(dirp);
 	}
 	if (pathcp)
 		free(pathcp, M_TEMP);
 
 	if (nd->nd_flag & ND_NFSV3) {
 		if (!nd->nd_repstat) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1);
 			nfsrv_postopattr(nd, 0, &nva);
 		}
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 	}
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Common code for creating a symbolic link.
  */
 static void
 nfsrvd_symlinksub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp,
     vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp,
     int *diraft_retp, nfsattrbit_t *attrbitp,
     NFSACL_T *aclp, NFSPROC_T *p, struct nfsexstuff *exp, char *pathcp,
     int pathlen)
 {
 	u_int32_t *tl;
 
 	nd->nd_repstat = nfsvno_symlink(ndp, nvap, pathcp, pathlen,
 	    !(nd->nd_flag & ND_NFSV2), nd->nd_saveduid, nd->nd_cred, p, exp);
 	if (!nd->nd_repstat && !(nd->nd_flag & ND_NFSV2)) {
 		nfsrv_fixattr(nd, ndp->ni_vp, nvap, aclp, p, attrbitp, exp);
 		if (nd->nd_flag & ND_NFSV3) {
 			nd->nd_repstat = nfsvno_getfh(ndp->ni_vp, fhp, p);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = nfsvno_getattr(ndp->ni_vp,
 				    nvap, nd, p, 1, NULL);
 		}
 		if (vpp != NULL && nd->nd_repstat == 0) {
 			NFSVOPUNLOCK(ndp->ni_vp);
 			*vpp = ndp->ni_vp;
 		} else
 			vput(ndp->ni_vp);
 	}
 	if (dirp) {
 		*diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL);
 		vrele(dirp);
 	}
 	if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		txdr_hyper(dirforp->na_filerev, tl);
 		tl += 2;
 		txdr_hyper(diraftp->na_filerev, tl);
 		(void) nfsrv_putattrbit(nd, attrbitp);
 	}
 
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * nfs mkdir service
  */
 int
 nfsrvd_mkdir(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, fhandle_t *fhp, struct nfsexstuff *exp)
 {
 	struct nfsvattr nva, dirfor, diraft;
 	struct nameidata named;
 	u_int32_t *tl;
 	int error = 0, dirfor_ret = 1, diraft_ret = 1;
 	vnode_t dirp = NULL;
 	char *bufp;
 	u_long *hashp;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 		goto out;
 	}
-	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
-	    LOCKPARENT | SAVENAME | NOCACHE);
+	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE, LOCKPARENT | NOCACHE);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error)
 		goto nfsmout;
 	if (!nd->nd_repstat) {
 		NFSVNO_ATTRINIT(&nva);
 		if (nd->nd_flag & ND_NFSV3) {
 			error = nfsrv_sattr(nd, NULL, &nva, NULL, NULL, p);
 			if (error)
 				goto nfsmout;
 		} else {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			nva.na_mode = nfstov_mode(*tl++);
 		}
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp, &dirp);
 	} else {
 		vrele(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (dirp != NULL && !(nd->nd_flag & ND_NFSV3)) {
 		vrele(dirp);
 		dirp = NULL;
 	}
 	if (nd->nd_repstat) {
 		if (dirp != NULL) {
 			dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0,
 			    NULL);
 			vrele(dirp);
 		}
 		if (nd->nd_flag & ND_NFSV3)
 			nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret,
 			    &diraft);
 		goto out;
 	}
 	if (dirp != NULL)
 		dirfor_ret = nfsvno_getattr(dirp, &dirfor, nd, p, 0, NULL);
 
 	/*
 	 * Call nfsrvd_mkdirsub() for the code common to V4 as well.
 	 */
 	nfsrvd_mkdirsub(nd, &named, &nva, fhp, vpp, dirp, &dirfor, &diraft,
 	    &diraft_ret, NULL, NULL, p, exp);
 
 	if (nd->nd_flag & ND_NFSV3) {
 		if (!nd->nd_repstat) {
 			(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 1);
 			nfsrv_postopattr(nd, 0, &nva);
 		}
 		nfsrv_wcc(nd, dirfor_ret, &dirfor, diraft_ret, &diraft);
 	} else if (!nd->nd_repstat) {
 		(void) nfsm_fhtom(nd, (u_int8_t *)fhp, 0, 0);
 		nfsrv_fillattr(nd, &nva);
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vrele(dp);
 	nfsvno_relpathbuf(&named);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * Code common to mkdir for V2,3 and 4.
  */
 static void
 nfsrvd_mkdirsub(struct nfsrv_descript *nd, struct nameidata *ndp,
     struct nfsvattr *nvap, fhandle_t *fhp, vnode_t *vpp,
     vnode_t dirp, struct nfsvattr *dirforp, struct nfsvattr *diraftp,
     int *diraft_retp, nfsattrbit_t *attrbitp, NFSACL_T *aclp,
     NFSPROC_T *p, struct nfsexstuff *exp)
 {
 	vnode_t vp;
 	u_int32_t *tl;
 
 	NFSVNO_SETATTRVAL(nvap, type, VDIR);
 	nd->nd_repstat = nfsvno_mkdir(ndp, nvap, nd->nd_saveduid,
 	    nd->nd_cred, p, exp);
 	if (!nd->nd_repstat) {
 		vp = ndp->ni_vp;
 		nfsrv_fixattr(nd, vp, nvap, aclp, p, attrbitp, exp);
 		nd->nd_repstat = nfsvno_getfh(vp, fhp, p);
 		if (!(nd->nd_flag & ND_NFSV4) && !nd->nd_repstat)
 			nd->nd_repstat = nfsvno_getattr(vp, nvap, nd, p, 1,
 			    NULL);
 		if (vpp && !nd->nd_repstat) {
 			NFSVOPUNLOCK(vp);
 			*vpp = vp;
 		} else {
 			vput(vp);
 		}
 	}
 	if (dirp) {
 		*diraft_retp = nfsvno_getattr(dirp, diraftp, nd, p, 0, NULL);
 		vrele(dirp);
 	}
 	if ((nd->nd_flag & ND_NFSV4) && !nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		*tl++ = newnfs_false;
 		txdr_hyper(dirforp->na_filerev, tl);
 		tl += 2;
 		txdr_hyper(diraftp->na_filerev, tl);
 		(void) nfsrv_putattrbit(nd, attrbitp);
 	}
 
 	NFSEXITCODE2(0, nd);
 }
 
 /*
  * nfs commit service
  */
 int
 nfsrvd_commit(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	struct nfsvattr bfor, aft;
 	u_int32_t *tl;
 	int error = 0, for_ret = 1, aft_ret = 1, cnt;
 	u_int64_t off;
 	struct thread *p = curthread;
 
        if (nd->nd_repstat) {
 		nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft);
 		goto out;
 	}
 
 	/* Return NFSERR_ISDIR in NFSv4 when commit on a directory. */
 	if (vp->v_type != VREG) {
 		if (nd->nd_flag & ND_NFSV3)
 			error = NFSERR_NOTSUPP;
 		else
 			error = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_INVAL;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 
 	/*
 	 * XXX At this time VOP_FSYNC() does not accept offset and byte
 	 * count parameters, so these arguments are useless (someday maybe).
 	 */
 	off = fxdr_hyper(tl);
 	tl += 2;
 	cnt = fxdr_unsigned(int, *tl);
 	if (nd->nd_flag & ND_NFSV3)
 		for_ret = nfsvno_getattr(vp, &bfor, nd, p, 1, NULL);
 	nd->nd_repstat = nfsvno_fsync(vp, off, cnt, nd->nd_cred, p);
 	if (nd->nd_flag & ND_NFSV3) {
 		aft_ret = nfsvno_getattr(vp, &aft, nd, p, 1, NULL);
 		nfsrv_wcc(nd, for_ret, &bfor, aft_ret, &aft);
 	}
 	vput(vp);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_VERF);
 		*tl++ = txdr_unsigned(nfsboottime.tv_sec);
 		*tl = txdr_unsigned(nfsboottime.tv_usec);
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs statfs service
  */
 int
 nfsrvd_statfs(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	struct statfs *sf;
 	u_int32_t *tl;
 	int getret = 1;
 	struct nfsvattr at;
 	u_quad_t tval;
 	struct thread *p = curthread;
 
 	sf = NULL;
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	nd->nd_repstat = nfsvno_statfs(vp, sf);
 	getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 	vput(vp);
 	if (nd->nd_flag & ND_NFSV3)
 		nfsrv_postopattr(nd, getret, &at);
 	if (nd->nd_repstat)
 		goto out;
 	if (nd->nd_flag & ND_NFSV2) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_V2STATFS);
 		*tl++ = txdr_unsigned(NFS_V2MAXDATA);
 		*tl++ = txdr_unsigned(sf->f_bsize);
 		*tl++ = txdr_unsigned(sf->f_blocks);
 		*tl++ = txdr_unsigned(sf->f_bfree);
 		*tl = txdr_unsigned(sf->f_bavail);
 	} else {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_V3STATFS);
 		tval = (u_quad_t)sf->f_blocks;
 		tval *= (u_quad_t)sf->f_bsize;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_bfree;
 		tval *= (u_quad_t)sf->f_bsize;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_bavail;
 		tval *= (u_quad_t)sf->f_bsize;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_files;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_ffree;
 		txdr_hyper(tval, tl); tl += 2;
 		tval = (u_quad_t)sf->f_ffree;
 		txdr_hyper(tval, tl); tl += 2;
 		*tl = 0;
 	}
 
 out:
 	free(sf, M_STATFS);
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs fsinfo service
  */
 int
 nfsrvd_fsinfo(struct nfsrv_descript *nd, int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsfsinfo fs;
 	int getret = 1;
 	struct nfsvattr at;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 	nfsvno_getfs(&fs, isdgram);
 	vput(vp);
 	nfsrv_postopattr(nd, getret, &at);
 	NFSM_BUILD(tl, u_int32_t *, NFSX_V3FSINFO);
 	*tl++ = txdr_unsigned(fs.fs_rtmax);
 	*tl++ = txdr_unsigned(fs.fs_rtpref);
 	*tl++ = txdr_unsigned(fs.fs_rtmult);
 	*tl++ = txdr_unsigned(fs.fs_wtmax);
 	*tl++ = txdr_unsigned(fs.fs_wtpref);
 	*tl++ = txdr_unsigned(fs.fs_wtmult);
 	*tl++ = txdr_unsigned(fs.fs_dtpref);
 	txdr_hyper(fs.fs_maxfilesize, tl);
 	tl += 2;
 	txdr_nfsv3time(&fs.fs_timedelta, tl);
 	tl += 2;
 	*tl = txdr_unsigned(fs.fs_properties);
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs pathconf service
  */
 int
 nfsrvd_pathconf(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	struct nfsv3_pathconf *pc;
 	int getret = 1;
 	long linkmax, namemax, chownres, notrunc;
 	struct nfsvattr at;
 	struct thread *p = curthread;
 
 	if (nd->nd_repstat) {
 		nfsrv_postopattr(nd, getret, &at);
 		goto out;
 	}
 	nd->nd_repstat = nfsvno_pathconf(vp, _PC_LINK_MAX, &linkmax,
 	    nd->nd_cred, p);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_pathconf(vp, _PC_NAME_MAX, &namemax,
 		    nd->nd_cred, p);
 	if (!nd->nd_repstat)
 		nd->nd_repstat=nfsvno_pathconf(vp, _PC_CHOWN_RESTRICTED,
 		    &chownres, nd->nd_cred, p);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_pathconf(vp, _PC_NO_TRUNC, &notrunc,
 		    nd->nd_cred, p);
 	getret = nfsvno_getattr(vp, &at, nd, p, 1, NULL);
 	vput(vp);
 	nfsrv_postopattr(nd, getret, &at);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(pc, struct nfsv3_pathconf *, NFSX_V3PATHCONF);
 		pc->pc_linkmax = txdr_unsigned(linkmax);
 		pc->pc_namemax = txdr_unsigned(namemax);
 		pc->pc_notrunc = txdr_unsigned(notrunc);
 		pc->pc_chownrestricted = txdr_unsigned(chownres);
 
 		/*
 		 * These should probably be supported by VOP_PATHCONF(), but
 		 * until msdosfs is exportable (why would you want to?), the
 		 * Unix defaults should be ok.
 		 */
 		pc->pc_caseinsensitive = newnfs_false;
 		pc->pc_casepreserving = newnfs_true;
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfsv4 lock service
  */
 int
 nfsrvd_lock(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	struct nfsstate *stp = NULL;
 	struct nfslock *lop;
 	struct nfslockconflict cf;
 	int error = 0;
 	u_short flags = NFSLCK_LOCK, lflags;
 	u_int64_t offset, len;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4LOCKT_READW:
 		flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_READ:
 		lflags = NFSLCK_READ;
 		break;
 	case NFSV4LOCKT_WRITEW:
 		flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_WRITE:
 		lflags = NFSLCK_WRITE;
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	if (*tl++ == newnfs_true)
 		flags |= NFSLCK_RECLAIM;
 	offset = fxdr_hyper(tl);
 	tl += 2;
 	len = fxdr_hyper(tl);
 	tl += 2;
 	if (*tl == newnfs_true)
 		flags |= NFSLCK_OPENTOLOCK;
 	if (flags & NFSLCK_OPENTOLOCK) {
 		NFSM_DISSECT(tl, u_int32_t *, 5 * NFSX_UNSIGNED + NFSX_STATEID);
 		i = fxdr_unsigned(int, *(tl+4+(NFSX_STATEID / NFSX_UNSIGNED)));
 		if (i <= 0 || i > NFSV4_OPAQUELIMIT) {
 			nd->nd_repstat = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 		stp = malloc(sizeof (struct nfsstate) + i,
 			M_NFSDSTATE, M_WAITOK);
 		stp->ls_ownerlen = i;
 		stp->ls_op = nd->nd_rp;
 		stp->ls_seq = fxdr_unsigned(int, *tl++);
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 			NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 
 		/*
 		 * For the special stateid of other all 0s and seqid == 1, set
 		 * the stateid to the current stateid, if it is set.
 		 */
 		if ((nd->nd_flag & ND_NFSV41) != 0 &&
 		    stp->ls_stateid.seqid == 1 &&
 		    stp->ls_stateid.other[0] == 0 &&
 		    stp->ls_stateid.other[1] == 0 &&
 		    stp->ls_stateid.other[2] == 0) {
 			if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 				stp->ls_stateid = nd->nd_curstateid;
 				stp->ls_stateid.seqid = 0;
 			} else {
 				nd->nd_repstat = NFSERR_BADSTATEID;
 				goto nfsmout;
 			}
 		}
 
 		stp->ls_opentolockseq = fxdr_unsigned(int, *tl++);
 		clientid.lval[0] = *tl++;
 		clientid.lval[1] = *tl++;
 		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				clientid.qval = nd->nd_clientid.qval;
 			else if (nd->nd_clientid.qval != clientid.qval)
 				printf("EEK3 multiple clids\n");
 		} else {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
 		error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen);
 		if (error)
 			goto nfsmout;
 	} else {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
 		stp = malloc(sizeof (struct nfsstate),
 			M_NFSDSTATE, M_WAITOK);
 		stp->ls_ownerlen = 0;
 		stp->ls_op = nd->nd_rp;
 		stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 			NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 
 		/*
 		 * For the special stateid of other all 0s and seqid == 1, set
 		 * the stateid to the current stateid, if it is set.
 		 */
 		if ((nd->nd_flag & ND_NFSV41) != 0 &&
 		    stp->ls_stateid.seqid == 1 &&
 		    stp->ls_stateid.other[0] == 0 &&
 		    stp->ls_stateid.other[1] == 0 &&
 		    stp->ls_stateid.other[2] == 0) {
 			if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 				stp->ls_stateid = nd->nd_curstateid;
 				stp->ls_stateid.seqid = 0;
 			} else {
 				nd->nd_repstat = NFSERR_BADSTATEID;
 				goto nfsmout;
 			}
 		}
 
 		stp->ls_seq = fxdr_unsigned(int, *tl);
 		clientid.lval[0] = stp->ls_stateid.other[0];
 		clientid.lval[1] = stp->ls_stateid.other[1];
 		if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				clientid.qval = nd->nd_clientid.qval;
 			else if (nd->nd_clientid.qval != clientid.qval)
 				printf("EEK4 multiple clids\n");
 		} else {
 			if ((nd->nd_flag & ND_NFSV41) != 0)
 				printf("EEK! no clientid from session\n");
 			nd->nd_flag |= ND_IMPLIEDCLID;
 			nd->nd_clientid.qval = clientid.qval;
 		}
 	}
 	lop = malloc(sizeof (struct nfslock),
 		M_NFSDLOCK, M_WAITOK);
 	lop->lo_first = offset;
 	if (len == NFS64BITSSET) {
 		lop->lo_end = NFS64BITSSET;
 	} else {
 		lop->lo_end = offset + len;
 		if (lop->lo_end <= lop->lo_first)
 			nd->nd_repstat = NFSERR_INVAL;
 	}
 	lop->lo_flags = lflags;
 	stp->ls_flags = flags;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 
 	/*
 	 * Do basic access checking.
 	 */
 	if (!nd->nd_repstat && vp->v_type != VREG) {
 	    if (vp->v_type == VDIR)
 		nd->nd_repstat = NFSERR_ISDIR;
 	    else
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	if (!nd->nd_repstat) {
 	    if (lflags & NFSLCK_WRITE) {
 		nd->nd_repstat = nfsvno_accchk(vp, VWRITE,
 		    nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	    } else {
 		nd->nd_repstat = nfsvno_accchk(vp, VREAD,
 		    nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 		    NFSACCCHK_VPISLOCKED, NULL);
 		if (nd->nd_repstat)
 		    nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 			nd->nd_cred, exp, p, NFSACCCHK_ALLOWOWNER,
 			NFSACCCHK_VPISLOCKED, NULL);
 	    }
 	}
 
 	/*
 	 * We call nfsrv_lockctrl() even if nd_repstat set, so that the
 	 * seqid# gets updated. nfsrv_lockctrl() will return the value
 	 * of nd_repstat, if it gets that far.
 	 */
 	nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid, 
 		&stateid, exp, nd, p);
 	if (lop)
 		free(lop, M_NFSDLOCK);
 	if (stp)
 		free(stp, M_NFSDSTATE);
 	if (!nd->nd_repstat) {
 		/* For NFSv4.1, set the Current StateID. */
 		if ((nd->nd_flag & ND_NFSV41) != 0) {
 			nd->nd_curstateid = stateid;
 			nd->nd_flag |= ND_CURSTATEID;
 		}
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	} else if (nd->nd_repstat == NFSERR_DENIED) {
 		NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 		txdr_hyper(cf.cl_first, tl);
 		tl += 2;
 		if (cf.cl_end == NFS64BITSSET)
 			len = NFS64BITSSET;
 		else
 			len = cf.cl_end - cf.cl_first;
 		txdr_hyper(len, tl);
 		tl += 2;
 		if (cf.cl_flags == NFSLCK_WRITE)
 			*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
 		else
 			*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
 		*tl++ = stateid.other[0];
 		*tl = stateid.other[1];
 		(void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen);
 	}
 	vput(vp);
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	if (stp)
 		free(stp, M_NFSDSTATE);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 lock test service
  */
 int
 nfsrvd_lockt(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	struct nfsstate *stp = NULL;
 	struct nfslock lo, *lop = &lo;
 	struct nfslockconflict cf;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	u_int64_t len;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *(tl + 7));
 	if (i <= 0 || i > NFSV4_OPAQUELIMIT) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	stp = malloc(sizeof (struct nfsstate) + i,
 	    M_NFSDSTATE, M_WAITOK);
 	stp->ls_ownerlen = i;
 	stp->ls_op = NULL;
 	stp->ls_flags = NFSLCK_TEST;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4LOCKT_READW:
 		stp->ls_flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_READ:
 		lo.lo_flags = NFSLCK_READ;
 		break;
 	case NFSV4LOCKT_WRITEW:
 		stp->ls_flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_WRITE:
 		lo.lo_flags = NFSLCK_WRITE;
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	lo.lo_first = fxdr_hyper(tl);
 	tl += 2;
 	len = fxdr_hyper(tl);
 	if (len == NFS64BITSSET) {
 		lo.lo_end = NFS64BITSSET;
 	} else {
 		lo.lo_end = lo.lo_first + len;
 		if (lo.lo_end <= lo.lo_first)
 			nd->nd_repstat = NFSERR_INVAL;
 	}
 	tl += 2;
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK5 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen);
 	if (error)
 		goto nfsmout;
 	if (!nd->nd_repstat && vp->v_type != VREG) {
 	    if (vp->v_type == VDIR)
 		nd->nd_repstat = NFSERR_ISDIR;
 	    else
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	if (!nd->nd_repstat)
 	  nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, &cf, clientid,
 	    &stateid, exp, nd, p);
 	if (nd->nd_repstat) {
 	    if (nd->nd_repstat == NFSERR_DENIED) {
 		NFSM_BUILD(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 		txdr_hyper(cf.cl_first, tl);
 		tl += 2;
 		if (cf.cl_end == NFS64BITSSET)
 			len = NFS64BITSSET;
 		else
 			len = cf.cl_end - cf.cl_first;
 		txdr_hyper(len, tl);
 		tl += 2;
 		if (cf.cl_flags == NFSLCK_WRITE)
 			*tl++ = txdr_unsigned(NFSV4LOCKT_WRITE);
 		else
 			*tl++ = txdr_unsigned(NFSV4LOCKT_READ);
 		*tl++ = stp->ls_stateid.other[0];
 		*tl = stp->ls_stateid.other[1];
 		(void) nfsm_strtom(nd, cf.cl_owner, cf.cl_ownerlen);
 	    }
 	}
 	vput(vp);
 	if (stp)
 		free(stp, M_NFSDSTATE);
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	if (stp)
 		free(stp, M_NFSDSTATE);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 unlock service
  */
 int
 nfsrvd_locku(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	struct nfsstate *stp;
 	struct nfslock *lop;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	u_int64_t len;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED + NFSX_STATEID);
 	stp = malloc(sizeof (struct nfsstate),
 	    M_NFSDSTATE, M_WAITOK);
 	lop = malloc(sizeof (struct nfslock),
 	    M_NFSDLOCK, M_WAITOK);
 	stp->ls_flags = NFSLCK_UNLOCK;
 	lop->lo_flags = NFSLCK_UNLOCK;
 	stp->ls_op = nd->nd_rp;
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4LOCKT_READW:
 		stp->ls_flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_READ:
 		break;
 	case NFSV4LOCKT_WRITEW:
 		stp->ls_flags |= NFSLCK_BLOCKING;
 	case NFSV4LOCKT_WRITE:
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 		free(stp, M_NFSDSTATE);
 		free(lop, M_NFSDLOCK);
 		goto nfsmout;
 	}
 	stp->ls_ownerlen = 0;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_seq = fxdr_unsigned(int, *tl++);
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 	    NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set the
 	 * stateid to the current stateid, if it is set.
 	 */
 	if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 &&
 	    stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 &&
 	    stp->ls_stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 			stp->ls_stateid = nd->nd_curstateid;
 			stp->ls_stateid.seqid = 0;
 		} else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			free(stp, M_NFSDSTATE);
 			free(lop, M_NFSDLOCK);
 			goto nfsmout;
 		}
 	}
 
 	lop->lo_first = fxdr_hyper(tl);
 	tl += 2;
 	len = fxdr_hyper(tl);
 	if (len == NFS64BITSSET) {
 		lop->lo_end = NFS64BITSSET;
 	} else {
 		lop->lo_end = lop->lo_first + len;
 		if (lop->lo_end <= lop->lo_first)
 			nd->nd_repstat = NFSERR_INVAL;
 	}
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK6 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	if (!nd->nd_repstat && vp->v_type != VREG) {
 	    if (vp->v_type == VDIR)
 		nd->nd_repstat = NFSERR_ISDIR;
 	    else
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	/*
 	 * Call nfsrv_lockctrl() even if nd_repstat is set, so that the
 	 * seqid# gets incremented. nfsrv_lockctrl() will return the
 	 * value of nd_repstat, if it gets that far.
 	 */
 	nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 	    &stateid, exp, nd, p);
 	if (stp)
 		free(stp, M_NFSDSTATE);
 	if (lop)
 		free(lop, M_NFSDLOCK);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	}
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 open service
  */
 int
 nfsrvd_open(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, vnode_t *vpp, __unused fhandle_t *fhp, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i, retext;
 	struct nfsstate *stp = NULL;
 	int error = 0, create, claim, exclusive_flag = 0, override;
 	u_int32_t rflags = NFSV4OPEN_LOCKTYPEPOSIX, acemask;
 	int how = NFSCREATE_UNCHECKED;
 	int32_t cverf[2], tverf[2] = { 0, 0 };
 	vnode_t vp = NULL, dirp = NULL;
 	struct nfsvattr nva, dirfor, diraft;
 	struct nameidata named;
 	nfsv4stateid_t stateid, delegstateid;
 	nfsattrbit_t attrbits;
 	nfsquad_t clientid;
 	char *bufp = NULL;
 	u_long *hashp;
 	NFSACL_T *aclp = NULL;
 	struct thread *p = curthread;
 
 #ifdef NFS4_ACL_EXTATTR_NAME
 	aclp = acl_alloc(M_WAITOK);
 	aclp->acl_cnt = 0;
 #endif
 	NFSZERO_ATTRBIT(&attrbits);
 	named.ni_startdir = NULL;
 	named.ni_cnd.cn_nameiop = 0;
 	NFSM_DISSECT(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *(tl + 5));
 	if (i <= 0 || i > NFSV4_OPAQUELIMIT) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	stp = malloc(sizeof (struct nfsstate) + i,
 	    M_NFSDSTATE, M_WAITOK);
 	stp->ls_ownerlen = i;
 	stp->ls_op = nd->nd_rp;
 	stp->ls_flags = NFSLCK_OPEN;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++);
 	i = fxdr_unsigned(int, *tl++);
 	retext = 0;
 	if ((i & (NFSV4OPEN_WANTDELEGMASK | NFSV4OPEN_WANTSIGNALDELEG |
 	    NFSV4OPEN_WANTPUSHDELEG)) != 0 && (nd->nd_flag & ND_NFSV41) != 0) {
 		retext = 1;
 		/* For now, ignore these. */
 		i &= ~(NFSV4OPEN_WANTPUSHDELEG | NFSV4OPEN_WANTSIGNALDELEG);
 		switch (i & NFSV4OPEN_WANTDELEGMASK) {
 		case NFSV4OPEN_WANTANYDELEG:
 			stp->ls_flags |= (NFSLCK_WANTRDELEG |
 			    NFSLCK_WANTWDELEG);
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		case NFSV4OPEN_WANTREADDELEG:
 			stp->ls_flags |= NFSLCK_WANTRDELEG;
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		case NFSV4OPEN_WANTWRITEDELEG:
 			stp->ls_flags |= NFSLCK_WANTWDELEG;
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		case NFSV4OPEN_WANTNODELEG:
 			stp->ls_flags |= NFSLCK_WANTNODELEG;
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		case NFSV4OPEN_WANTCANCEL:
 			printf("NFSv4: ignore Open WantCancel\n");
 			i &= ~NFSV4OPEN_WANTDELEGMASK;
 			break;
 		default:
 			/* nd_repstat will be set to NFSERR_INVAL below. */
 			break;
 		}
 	}
 	switch (i) {
 	case NFSV4OPEN_ACCESSREAD:
 		stp->ls_flags |= NFSLCK_READACCESS;
 		break;
 	case NFSV4OPEN_ACCESSWRITE:
 		stp->ls_flags |= NFSLCK_WRITEACCESS;
 		break;
 	case NFSV4OPEN_ACCESSBOTH:
 		stp->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
 		break;
 	default:
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	i = fxdr_unsigned(int, *tl++);
 	switch (i) {
 	case NFSV4OPEN_DENYNONE:
 		break;
 	case NFSV4OPEN_DENYREAD:
 		stp->ls_flags |= NFSLCK_READDENY;
 		break;
 	case NFSV4OPEN_DENYWRITE:
 		stp->ls_flags |= NFSLCK_WRITEDENY;
 		break;
 	case NFSV4OPEN_DENYBOTH:
 		stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY);
 		break;
 	default:
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK7 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	error = nfsrv_mtostr(nd, stp->ls_owner, stp->ls_ownerlen);
 	if (error)
 		goto nfsmout;
 	NFSVNO_ATTRINIT(&nva);
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	create = fxdr_unsigned(int, *tl);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getattr(dp, &dirfor, nd, p, 0, NULL);
 	if (create == NFSV4OPEN_CREATE) {
 		nva.na_type = VREG;
 		nva.na_mode = 0;
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 		how = fxdr_unsigned(int, *tl);
 		switch (how) {
 		case NFSCREATE_UNCHECKED:
 		case NFSCREATE_GUARDED:
 			error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p);
 			if (error)
 				goto nfsmout;
 			/*
 			 * If the na_gid being set is the same as that of
 			 * the directory it is going in, clear it, since
 			 * that is what will be set by default. This allows
 			 * a user that isn't in that group to do the create.
 			 */
 			if (!nd->nd_repstat && NFSVNO_ISSETGID(&nva) &&
 			    nva.na_gid == dirfor.na_gid)
 				NFSVNO_UNSET(&nva, gid);
 			if (!nd->nd_repstat)
 				nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
 			break;
 		case NFSCREATE_EXCLUSIVE:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 			cverf[0] = *tl++;
 			cverf[1] = *tl;
 			break;
 		case NFSCREATE_EXCLUSIVE41:
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF);
 			cverf[0] = *tl++;
 			cverf[1] = *tl;
 			error = nfsv4_sattr(nd, NULL, &nva, &attrbits, aclp, p);
 			if (error != 0)
 				goto nfsmout;
 			if (NFSISSET_ATTRBIT(&attrbits,
 			    NFSATTRBIT_TIMEACCESSSET))
 				nd->nd_repstat = NFSERR_INVAL;
 			/*
 			 * If the na_gid being set is the same as that of
 			 * the directory it is going in, clear it, since
 			 * that is what will be set by default. This allows
 			 * a user that isn't in that group to do the create.
 			 */
 			if (nd->nd_repstat == 0 && NFSVNO_ISSETGID(&nva) &&
 			    nva.na_gid == dirfor.na_gid)
 				NFSVNO_UNSET(&nva, gid);
 			if (nd->nd_repstat == 0)
 				nd->nd_repstat = nfsrv_checkuidgid(nd, &nva);
 			break;
 		default:
 			nd->nd_repstat = NFSERR_BADXDR;
 			goto nfsmout;
 		}
 	} else if (create != NFSV4OPEN_NOCREATE) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 
 	/*
 	 * Now, handle the claim, which usually includes looking up a
 	 * name in the directory referenced by dp. The exception is
 	 * NFSV4OPEN_CLAIMPREVIOUS.
 	 */
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	claim = fxdr_unsigned(int, *tl);
 	if (claim == NFSV4OPEN_CLAIMDELEGATECUR || claim ==
 	    NFSV4OPEN_CLAIMDELEGATECURFH) {
 		NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 		stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 		NFSBCOPY((caddr_t)tl,(caddr_t)stateid.other,NFSX_STATEIDOTHER);
 		stp->ls_flags |= NFSLCK_DELEGCUR;
 	} else if (claim == NFSV4OPEN_CLAIMDELEGATEPREV || claim ==
 	    NFSV4OPEN_CLAIMDELEGATEPREVFH) {
 		stp->ls_flags |= NFSLCK_DELEGPREV;
 	}
 	if (claim == NFSV4OPEN_CLAIMNULL || claim == NFSV4OPEN_CLAIMDELEGATECUR
 	    || claim == NFSV4OPEN_CLAIMDELEGATEPREV) {
 		if (!nd->nd_repstat && create == NFSV4OPEN_CREATE &&
 		    claim != NFSV4OPEN_CLAIMNULL)
 			nd->nd_repstat = NFSERR_INVAL;
 		if (nd->nd_repstat) {
 			nd->nd_repstat = nfsrv_opencheck(clientid,
 			    &stateid, stp, NULL, nd, p, nd->nd_repstat);
 			goto nfsmout;
 		}
 		if (create == NFSV4OPEN_CREATE)
 		    NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, CREATE,
 			LOCKPARENT | LOCKLEAF | SAVESTART | NOCACHE);
 		else
 		    NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
 			LOCKLEAF | SAVESTART);
 		nfsvno_setpathbuf(&named, &bufp, &hashp);
 		error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 		if (error) {
 			vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 			acl_free(aclp);
 #endif
 			free(stp, M_NFSDSTATE);
 			nfsvno_relpathbuf(&named);
 			NFSEXITCODE2(error, nd);
 			return (error);
 		}
 		if (!nd->nd_repstat) {
 			nd->nd_repstat = nfsvno_namei(nd, &named, dp, 0, exp,
 			    &dirp);
 		} else {
 			vrele(dp);
 			nfsvno_relpathbuf(&named);
 		}
 		if (create == NFSV4OPEN_CREATE) {
 		    switch (how) {
 		    case NFSCREATE_UNCHECKED:
 			if (named.ni_vp) {
 				/*
 				 * Clear the setable attribute bits, except
 				 * for Size, if it is being truncated.
 				 */
 				NFSZERO_ATTRBIT(&attrbits);
 				if (NFSVNO_ISSETSIZE(&nva))
 					NFSSETBIT_ATTRBIT(&attrbits,
 					    NFSATTRBIT_SIZE);
 			}
 			break;
 		    case NFSCREATE_GUARDED:
 			if (named.ni_vp && !nd->nd_repstat)
 				nd->nd_repstat = EEXIST;
 			break;
 		    case NFSCREATE_EXCLUSIVE:
 			exclusive_flag = 1;
 			if (!named.ni_vp)
 				nva.na_mode = 0;
 			break;
 		    case NFSCREATE_EXCLUSIVE41:
 			exclusive_flag = 1;
 			break;
 		    }
 		}
 		nfsvno_open(nd, &named, clientid, &stateid, stp,
 		    &exclusive_flag, &nva, cverf, create, aclp, &attrbits,
 		    nd->nd_cred, exp, &vp);
 	} else if (claim == NFSV4OPEN_CLAIMPREVIOUS || claim ==
 	    NFSV4OPEN_CLAIMFH || claim == NFSV4OPEN_CLAIMDELEGATECURFH ||
 	    claim == NFSV4OPEN_CLAIMDELEGATEPREVFH) {
 		if (claim == NFSV4OPEN_CLAIMPREVIOUS) {
 			NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 			i = fxdr_unsigned(int, *tl);
 			switch (i) {
 			case NFSV4OPEN_DELEGATEREAD:
 				stp->ls_flags |= NFSLCK_DELEGREAD;
 				break;
 			case NFSV4OPEN_DELEGATEWRITE:
 				stp->ls_flags |= NFSLCK_DELEGWRITE;
 			case NFSV4OPEN_DELEGATENONE:
 				break;
 			default:
 				nd->nd_repstat = NFSERR_BADXDR;
 				goto nfsmout;
 			}
 			stp->ls_flags |= NFSLCK_RECLAIM;
 		} else {
 			if (nd->nd_repstat == 0 && create == NFSV4OPEN_CREATE)
 				nd->nd_repstat = NFSERR_INVAL;
 		}
 		vp = dp;
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (!VN_IS_DOOMED(vp))
 			nd->nd_repstat = nfsrv_opencheck(clientid, &stateid,
 			    stp, vp, nd, p, nd->nd_repstat);
 		else
 			nd->nd_repstat = NFSERR_PERM;
 	} else {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 
 	/*
 	 * Do basic access checking.
 	 */
 	if (!nd->nd_repstat && vp->v_type != VREG) {
 		/*
 		 * The IETF working group decided that this is the correct
 		 * error return for all non-regular files.
 		 */
 		nd->nd_repstat = (vp->v_type == VDIR) ? NFSERR_ISDIR : NFSERR_SYMLINK;
 	}
 
 	/*
 	 * If the Open is being done for a file that already exists, apply
 	 * normal permission checking including for the file owner, if
 	 * vfs.nfsd.v4openaccess is set.
 	 * Previously, the owner was always allowed to open the file to
 	 * be consistent with the NFS tradition of always allowing the
 	 * owner of the file to write to the file regardless of permissions.
 	 * It now appears that the Linux client expects the owner
 	 * permissions to be checked for opens that are not creating the
 	 * file.  I believe the correct approach is to use the Access
 	 * operation's results to be consistent with NFSv3, but that is
 	 * not what the current Linux client appears to be doing.
 	 * Since both the Linux and OpenSolaris NFSv4 servers do this check,
 	 * I have enabled it by default.  Since Linux does not apply this
 	 * check for claim_delegate_cur, this code does the same.
 	 * If this semantic change causes a problem, it can be disabled by
 	 * setting the sysctl vfs.nfsd.v4openaccess to 0 to re-enable the
 	 * previous semantics.
 	 */
 	if (nfsrv_openaccess && create == NFSV4OPEN_NOCREATE &&
 	    (stp->ls_flags & NFSLCK_DELEGCUR) == 0)
 		override = NFSACCCHK_NOOVERRIDE;
 	else
 		override = NFSACCCHK_ALLOWOWNER;
 	if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_WRITEACCESS))
 	    nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred,
 	        exp, p, override, NFSACCCHK_VPISLOCKED, NULL);
 	if (!nd->nd_repstat && (stp->ls_flags & NFSLCK_READACCESS)) {
 	    nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred,
 	        exp, p, override, NFSACCCHK_VPISLOCKED, NULL);
 	    if (nd->nd_repstat)
 		nd->nd_repstat = nfsvno_accchk(vp, VEXEC,
 		    nd->nd_cred, exp, p, override,
 		    NFSACCCHK_VPISLOCKED, NULL);
 	}
 
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 		if (!nd->nd_repstat) {
 			tverf[0] = nva.na_atime.tv_sec;
 			tverf[1] = nva.na_atime.tv_nsec;
 		}
 	}
 	if (!nd->nd_repstat && exclusive_flag && (cverf[0] != tverf[0] ||
 	    cverf[1] != tverf[1]))
 		nd->nd_repstat = EEXIST;
 	/*
 	 * Do the open locking/delegation stuff.
 	 */
 	if (!nd->nd_repstat)
 	    nd->nd_repstat = nfsrv_openctrl(nd, vp, &stp, clientid, &stateid,
 		&delegstateid, &rflags, exp, p, nva.na_filerev);
 
 	/*
 	 * vp must be unlocked before the call to nfsvno_getattr(dirp,...)
 	 * below, to avoid a deadlock with the lookup in nfsvno_namei() above.
 	 * (ie: Leave the NFSVOPUNLOCK() about here.)
 	 */
 	if (vp)
 		NFSVOPUNLOCK(vp);
 	if (stp)
 		free(stp, M_NFSDSTATE);
 	if (!nd->nd_repstat && dirp)
 		nd->nd_repstat = nfsvno_getattr(dirp, &diraft, nd, p, 0, NULL);
 	if (!nd->nd_repstat) {
 		/* For NFSv4.1, set the Current StateID. */
 		if ((nd->nd_flag & ND_NFSV41) != 0) {
 			nd->nd_curstateid = stateid;
 			nd->nd_flag |= ND_CURSTATEID;
 		}
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID + 6 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 		if (claim == NFSV4OPEN_CLAIMPREVIOUS) {
 			*tl++ = newnfs_true;
 			*tl++ = 0;
 			*tl++ = 0;
 			*tl++ = 0;
 			*tl++ = 0;
 		} else {
 			*tl++ = newnfs_false;	/* Since dirp is not locked */
 			txdr_hyper(dirfor.na_filerev, tl);
 			tl += 2;
 			txdr_hyper(diraft.na_filerev, tl);
 			tl += 2;
 		}
 		*tl = txdr_unsigned(rflags & NFSV4OPEN_RFLAGS);
 		(void) nfsrv_putattrbit(nd, &attrbits);
 		NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (rflags & NFSV4OPEN_READDELEGATE)
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATEREAD);
 		else if (rflags & NFSV4OPEN_WRITEDELEGATE)
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATEWRITE);
 		else if (retext != 0) {
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATENONEEXT);
 			if ((rflags & NFSV4OPEN_WDNOTWANTED) != 0) {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OPEN_NOTWANTED);
 			} else if ((rflags & NFSV4OPEN_WDSUPPFTYPE) != 0) {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OPEN_NOTSUPPFTYPE);
 			} else if ((rflags & NFSV4OPEN_WDCONTENTION) != 0) {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV4OPEN_CONTENTION);
 				*tl = newnfs_false;
 			} else if ((rflags & NFSV4OPEN_WDRESOURCE) != 0) {
 				NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV4OPEN_RESOURCE);
 				*tl = newnfs_false;
 			} else {
 				NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV4OPEN_NOTWANTED);
 			}
 		} else
 			*tl = txdr_unsigned(NFSV4OPEN_DELEGATENONE);
 		if (rflags & (NFSV4OPEN_READDELEGATE|NFSV4OPEN_WRITEDELEGATE)) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID+NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(delegstateid.seqid);
 			NFSBCOPY((caddr_t)delegstateid.other, (caddr_t)tl,
 			    NFSX_STATEIDOTHER);
 			tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 			if (rflags & NFSV4OPEN_RECALL)
 				*tl = newnfs_true;
 			else
 				*tl = newnfs_false;
 			if (rflags & NFSV4OPEN_WRITEDELEGATE) {
 				NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV4OPEN_LIMITSIZE);
 				txdr_hyper(nva.na_size, tl);
 			}
 			NFSM_BUILD(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(NFSV4ACE_ALLOWEDTYPE);
 			*tl++ = txdr_unsigned(0x0);
 			acemask = NFSV4ACE_ALLFILESMASK;
 			if (nva.na_mode & S_IRUSR)
 			    acemask |= NFSV4ACE_READMASK;
 			if (nva.na_mode & S_IWUSR)
 			    acemask |= NFSV4ACE_WRITEMASK;
 			if (nva.na_mode & S_IXUSR)
 			    acemask |= NFSV4ACE_EXECUTEMASK;
 			*tl = txdr_unsigned(acemask);
 			(void) nfsm_strtom(nd, "OWNER@", 6);
 		}
 		*vpp = vp;
 	} else if (vp) {
 		vrele(vp);
 	}
 	if (dirp)
 		vrele(dirp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vrele(dp);
 #ifdef NFS4_ACL_EXTATTR_NAME
 	acl_free(aclp);
 #endif
 	if (stp)
 		free(stp, M_NFSDSTATE);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 close service
  */
 int
 nfsrvd_close(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsstate st, *stp = &st;
 	int error = 0, writeacc;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	struct nfsvattr na;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++);
 	stp->ls_ownerlen = 0;
 	stp->ls_op = nd->nd_rp;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 	    NFSX_STATEIDOTHER);
 
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set the
 	 * stateid to the current stateid, if it is set.
 	 */
 	if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 &&
 	    stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 &&
 	    stp->ls_stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0)
 			stp->ls_stateid = nd->nd_curstateid;
 		else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			goto nfsmout;
 		}
 	}
 
 	stp->ls_flags = NFSLCK_CLOSE;
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK8 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p,
 	    &writeacc);
 	/* For pNFS, update the attributes. */
 	if (writeacc != 0 || nfsrv_pnfsatime != 0)
 		nfsrv_updatemdsattr(vp, &na, p);
 	vput(vp);
 	if (!nd->nd_repstat) {
 		/*
 		 * If the stateid that has been closed is the current stateid,
 		 * unset it.
 		 */
 		if ((nd->nd_flag & ND_CURSTATEID) != 0 &&
 		    stateid.other[0] == nd->nd_curstateid.other[0] &&
 		    stateid.other[1] == nd->nd_curstateid.other[1] &&
 		    stateid.other[2] == nd->nd_curstateid.other[2])
 			nd->nd_flag &= ~ND_CURSTATEID;
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	}
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 delegpurge service
  */
 int
 nfsrvd_delegpurge(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0;
 	nfsquad_t clientid;
 	struct thread *p = curthread;
 
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK9 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_delegupdate(nd, clientid, NULL, NULL,
 	    NFSV4OP_DELEGPURGE, nd->nd_cred, p, NULL);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 delegreturn service
  */
 int
 nfsrvd_delegreturn(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0, writeacc;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	struct nfsvattr na;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID);
 	stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER);
 	clientid.lval[0] = stateid.other[0];
 	clientid.lval[1] = stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK10 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_delegupdate(nd, clientid, &stateid, vp,
 	    NFSV4OP_DELEGRETURN, nd->nd_cred, p, &writeacc);
 	/* For pNFS, update the attributes. */
 	if (writeacc != 0 || nfsrv_pnfsatime != 0)
 		nfsrv_updatemdsattr(vp, &na, p);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 get file handle service
  */
 int
 nfsrvd_getfh(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	fhandle_t fh;
 	struct thread *p = curthread;
 
 	nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
 	vput(vp);
 	if (!nd->nd_repstat)
 		(void) nfsm_fhtom(nd, (u_int8_t *)&fh, 0, 0);
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfsv4 open confirm service
  */
 int
 nfsrvd_openconfirm(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsstate st, *stp = &st;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	struct thread *p = curthread;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED);
 	stp->ls_ownerlen = 0;
 	stp->ls_op = nd->nd_rp;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 	    NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl);
 	stp->ls_flags = NFSLCK_CONFIRM;
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK11 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid, nd, p,
 	    NULL);
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	}
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 open downgrade service
  */
 int
 nfsrvd_opendowngrade(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	struct nfsstate st, *stp = &st;
 	int error = 0;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	struct thread *p = curthread;
 
 	/* opendowngrade can only work on a file object.*/
 	if (vp->v_type != VREG) {
 		error = NFSERR_INVAL;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + 3 * NFSX_UNSIGNED);
 	stp->ls_ownerlen = 0;
 	stp->ls_op = nd->nd_rp;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	NFSBCOPY((caddr_t)tl, (caddr_t)stp->ls_stateid.other,
 	    NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set the
 	 * stateid to the current stateid, if it is set.
 	 */
 	if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 1 &&
 	    stp->ls_stateid.other[0] == 0 && stp->ls_stateid.other[1] == 0 &&
 	    stp->ls_stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0)
 			stp->ls_stateid = nd->nd_curstateid;
 		else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			goto nfsmout;
 		}
 	}
 
 	stp->ls_seq = fxdr_unsigned(u_int32_t, *tl++);
 	i = fxdr_unsigned(int, *tl++);
 	if ((nd->nd_flag & ND_NFSV41) != 0)
 		i &= ~NFSV4OPEN_WANTDELEGMASK;
 	switch (i) {
 	case NFSV4OPEN_ACCESSREAD:
 		stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_DOWNGRADE);
 		break;
 	case NFSV4OPEN_ACCESSWRITE:
 		stp->ls_flags = (NFSLCK_WRITEACCESS | NFSLCK_DOWNGRADE);
 		break;
 	case NFSV4OPEN_ACCESSBOTH:
 		stp->ls_flags = (NFSLCK_READACCESS | NFSLCK_WRITEACCESS |
 		    NFSLCK_DOWNGRADE);
 		break;
 	default:
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 	i = fxdr_unsigned(int, *tl);
 	switch (i) {
 	case NFSV4OPEN_DENYNONE:
 		break;
 	case NFSV4OPEN_DENYREAD:
 		stp->ls_flags |= NFSLCK_READDENY;
 		break;
 	case NFSV4OPEN_DENYWRITE:
 		stp->ls_flags |= NFSLCK_WRITEDENY;
 		break;
 	case NFSV4OPEN_DENYBOTH:
 		stp->ls_flags |= (NFSLCK_READDENY | NFSLCK_WRITEDENY);
 		break;
 	default:
 		nd->nd_repstat = NFSERR_INVAL;
 	}
 
 	clientid.lval[0] = stp->ls_stateid.other[0];
 	clientid.lval[1] = stp->ls_stateid.other[1];
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK12 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsrv_openupdate(vp, stp, clientid, &stateid,
 		    nd, p, NULL);
 	if (!nd->nd_repstat) {
 		/* For NFSv4.1, set the Current StateID. */
 		if ((nd->nd_flag & ND_NFSV41) != 0) {
 			nd->nd_curstateid = stateid;
 			nd->nd_flag |= ND_CURSTATEID;
 		}
 		NFSM_BUILD(tl, u_int32_t *, NFSX_STATEID);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY((caddr_t)stateid.other,(caddr_t)tl,NFSX_STATEIDOTHER);
 	}
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 renew lease service
  */
 int
 nfsrvd_renew(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0;
 	nfsquad_t clientid;
 	struct thread *p = curthread;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_HYPER);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK13 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_RENEWOP|CLOPS_RENEW),
 	    NULL, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 security info service
  */
 int
 nfsrvd_secinfo(struct nfsrv_descript *nd, int isdgram,
     vnode_t dp, struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int len;
 	struct nameidata named;
 	vnode_t dirp = NULL, vp;
 	struct nfsrvfh fh;
 	struct nfsexstuff retnes;
 	u_int32_t *sizp;
 	int error = 0, i;
 	uint64_t savflag;
 	char *bufp;
 	u_long *hashp;
 	struct thread *p = curthread;
 
 	/*
 	 * All this just to get the export flags for the name.
 	 */
 	NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
 	    LOCKLEAF | SAVESTART);
 	nfsvno_setpathbuf(&named, &bufp, &hashp);
 	error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 	if (error) {
 		vput(dp);
 		nfsvno_relpathbuf(&named);
 		goto out;
 	}
 	if (!nd->nd_repstat) {
 		nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, &dirp);
 	} else {
 		vput(dp);
 		nfsvno_relpathbuf(&named);
 	}
 	if (dirp)
 		vrele(dirp);
 	if (nd->nd_repstat)
 		goto out;
 	vrele(named.ni_startdir);
 	nfsvno_relpathbuf(&named);
 	fh.nfsrvfh_len = NFSX_MYFH;
 	vp = named.ni_vp;
 	nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p);
 	vput(vp);
 	savflag = nd->nd_flag;
 	if (!nd->nd_repstat) {
 		/*
 		 * Pretend the next op is Secinfo, so that no wrongsec
 		 * test will be done.
 		 */
 		nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0,
 		    NFSV4OP_SECINFO);
 		if (vp)
 			vput(vp);
 	}
 	nd->nd_flag = savflag;
 	if (nd->nd_repstat)
 		goto out;
 
 	/*
 	 * Finally have the export flags for name, so we can create
 	 * the security info.
 	 */
 	len = 0;
 	NFSM_BUILD(sizp, u_int32_t *, NFSX_UNSIGNED);
 
 	/* If nes_numsecflavor == 0, all are allowed. */
 	if (retnes.nes_numsecflavor == 0) {
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(RPCAUTH_UNIX);
 		*tl = txdr_unsigned(RPCAUTH_GSS);
 		nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 		    nfsgss_mechlist[KERBV_MECH].len);
 		NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 		*tl++ = txdr_unsigned(RPCAUTHGSS_SVCNONE);
 		*tl = txdr_unsigned(RPCAUTH_GSS);
 		nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 		    nfsgss_mechlist[KERBV_MECH].len);
 		NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 		*tl++ = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY);
 		*tl = txdr_unsigned(RPCAUTH_GSS);
 		nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 		    nfsgss_mechlist[KERBV_MECH].len);
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 		*tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY);
 		len = 4;
 	}
 	for (i = 0; i < retnes.nes_numsecflavor; i++) {
 		if (retnes.nes_secflavors[i] == AUTH_SYS) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(RPCAUTH_UNIX);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(RPCAUTH_GSS);
 			(void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCNONE);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(RPCAUTH_GSS);
 			(void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) {
 			NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(RPCAUTH_GSS);
 			(void) nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY);
 			len++;
 		}
 	}
 	*sizp = txdr_unsigned(len);
 
 out:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 security info no name service
  */
 int
 nfsrvd_secinfononame(struct nfsrv_descript *nd, int isdgram,
     vnode_t dp, struct nfsexstuff *exp)
 {
 	uint32_t *tl, *sizp;
 	struct nameidata named;
 	vnode_t dirp = NULL, vp;
 	struct nfsrvfh fh;
 	struct nfsexstuff retnes;
 	int error = 0, fhstyle, i, len;
 	uint64_t savflag;
 	char *bufp;
 	u_long *hashp;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	fhstyle = fxdr_unsigned(int, *tl);
 	switch (fhstyle) {
 	case NFSSECINFONONAME_PARENT:
 		if (dp->v_type != VDIR) {
 			vput(dp);
 			nd->nd_repstat = NFSERR_NOTDIR;
 			goto nfsmout;
 		}
 		NFSNAMEICNDSET(&named.ni_cnd, nd->nd_cred, LOOKUP,
 		    LOCKLEAF | SAVESTART);
 		nfsvno_setpathbuf(&named, &bufp, &hashp);
 		error = nfsrv_parsename(nd, bufp, hashp, &named.ni_pathlen);
 		if (error != 0) {
 			vput(dp);
 			nfsvno_relpathbuf(&named);
 			goto nfsmout;
 		}
 		if (nd->nd_repstat == 0)
 			nd->nd_repstat = nfsvno_namei(nd, &named, dp, 1, exp, &dirp);
 		else
 			vput(dp);
 		if (dirp != NULL)
 			vrele(dirp);
 		vrele(named.ni_startdir);
 		nfsvno_relpathbuf(&named);
 		vp = named.ni_vp;
 		break;
 	case NFSSECINFONONAME_CURFH:
 		vp = dp;
 		break;
 	default:
 		nd->nd_repstat = NFSERR_INVAL;
 		vput(dp);
 	}
 	if (nd->nd_repstat != 0)
 		goto nfsmout;
 	fh.nfsrvfh_len = NFSX_MYFH;
 	nd->nd_repstat = nfsvno_getfh(vp, (fhandle_t *)fh.nfsrvfh_data, p);
 	vput(vp);
 	savflag = nd->nd_flag;
 	if (nd->nd_repstat == 0) {
 		/*
 		 * Pretend the next op is Secinfo, so that no wrongsec
 		 * test will be done.
 		 */
 		nfsd_fhtovp(nd, &fh, LK_SHARED, &vp, &retnes, NULL, 0,
 		    NFSV4OP_SECINFO);
 		if (vp != NULL)
 			vput(vp);
 	}
 	nd->nd_flag = savflag;
 	if (nd->nd_repstat != 0)
 		goto nfsmout;
 
 	/*
 	 * Finally have the export flags for fh/parent, so we can create
 	 * the security info.
 	 */
 	len = 0;
 	NFSM_BUILD(sizp, uint32_t *, NFSX_UNSIGNED);
 
 	/* If nes_numsecflavor == 0, all are allowed. */
 	if (retnes.nes_numsecflavor == 0) {
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(RPCAUTH_UNIX);
 		*tl = txdr_unsigned(RPCAUTH_GSS);
 		nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 		    nfsgss_mechlist[KERBV_MECH].len);
 		NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 		*tl++ = txdr_unsigned(RPCAUTHGSS_SVCNONE);
 		*tl = txdr_unsigned(RPCAUTH_GSS);
 		nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 		    nfsgss_mechlist[KERBV_MECH].len);
 		NFSM_BUILD(tl, uint32_t *, 3 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 		*tl++ = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY);
 		*tl = txdr_unsigned(RPCAUTH_GSS);
 		nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 		    nfsgss_mechlist[KERBV_MECH].len);
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 		*tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY);
 		len = 4;
 	}
 	for (i = 0; i < retnes.nes_numsecflavor; i++) {
 		if (retnes.nes_secflavors[i] == AUTH_SYS) {
 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(RPCAUTH_UNIX);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5) {
 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(RPCAUTH_GSS);
 			nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCNONE);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5I) {
 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(RPCAUTH_GSS);
 			nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCINTEGRITY);
 			len++;
 		} else if (retnes.nes_secflavors[i] == RPCSEC_GSS_KRB5P) {
 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(RPCAUTH_GSS);
 			nfsm_strtom(nd, nfsgss_mechlist[KERBV_MECH].str,
 			    nfsgss_mechlist[KERBV_MECH].len);
 			NFSM_BUILD(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(GSS_KERBV_QOP);
 			*tl = txdr_unsigned(RPCAUTHGSS_SVCPRIVACY);
 			len++;
 		}
 	}
 	*sizp = txdr_unsigned(len);
 
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 set client id service
  */
 int
 nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int i;
 	int error = 0, idlen;
 	struct nfsclient *clp = NULL;
 #ifdef INET
 	struct sockaddr_in *rin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *rin6;
 #endif
 #if defined(INET) || defined(INET6)
 	u_char *ucp, *ucp2;
 #endif
 	u_char *verf, *addrbuf;
 	nfsquad_t clientid, confirm;
 	struct thread *p = curthread;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto out;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED);
 	verf = (u_char *)tl;
 	tl += (NFSX_VERF / NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *tl);
 	if (i > NFSV4_OPAQUELIMIT || i <= 0) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	idlen = i;
 	if (nd->nd_flag & ND_GSS)
 		i += nd->nd_princlen;
 	clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK |
 	    M_ZERO);
 	clp->lc_stateid = malloc(sizeof(struct nfsstatehead) *
 	    nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK);
 	NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx);
 	/* Allocated large enough for an AF_INET or AF_INET6 socket. */
 	clp->lc_req.nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME,
 	    M_WAITOK | M_ZERO);
 	clp->lc_req.nr_cred = NULL;
 	NFSBCOPY(verf, clp->lc_verf, NFSX_VERF);
 	clp->lc_idlen = idlen;
 	error = nfsrv_mtostr(nd, clp->lc_id, idlen);
 	if (error)
 		goto nfsmout;
 	if (nd->nd_flag & ND_GSS) {
 		clp->lc_flags = LCL_GSS;
 		if (nd->nd_flag & ND_GSSINTEGRITY)
 			clp->lc_flags |= LCL_GSSINTEGRITY;
 		else if (nd->nd_flag & ND_GSSPRIVACY)
 			clp->lc_flags |= LCL_GSSPRIVACY;
 	} else {
 		clp->lc_flags = 0;
 	}
 	if ((nd->nd_flag & ND_GSS) && nd->nd_princlen > 0) {
 		clp->lc_flags |= LCL_NAME;
 		clp->lc_namelen = nd->nd_princlen;
 		clp->lc_name = &clp->lc_id[idlen];
 		NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen);
 	} else {
 		clp->lc_uid = nd->nd_cred->cr_uid;
 		clp->lc_gid = nd->nd_cred->cr_gid;
 	}
 
 	/* If the client is using TLS, do so for the callback connection. */
 	if (nd->nd_flag & ND_TLS)
 		clp->lc_flags |= LCL_TLSCB;
 
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	clp->lc_program = fxdr_unsigned(u_int32_t, *tl);
 	error = nfsrv_getclientipaddr(nd, clp);
 	if (error)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	clp->lc_callback = fxdr_unsigned(u_int32_t, *tl);
 
 	/*
 	 * nfsrv_setclient() does the actual work of adding it to the
 	 * client list. If there is no error, the structure has been
 	 * linked into the client list and clp should no longer be used
 	 * here. When an error is returned, it has not been linked in,
 	 * so it should be free'd.
 	 */
 	nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p);
 	if (nd->nd_repstat == NFSERR_CLIDINUSE) {
 		/*
 		 * 8 is the maximum length of the port# string.
 		 */
 		addrbuf = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
 		switch (clp->lc_req.nr_nam->sa_family) {
 #ifdef INET
 		case AF_INET:
 			if (clp->lc_flags & LCL_TCPCALLBACK)
 				(void) nfsm_strtom(nd, "tcp", 3);
 			else 
 				(void) nfsm_strtom(nd, "udp", 3);
 			rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 			ucp = (u_char *)&rin->sin_addr.s_addr;
 			ucp2 = (u_char *)&rin->sin_port;
 			sprintf(addrbuf, "%d.%d.%d.%d.%d.%d", ucp[0] & 0xff,
 			    ucp[1] & 0xff, ucp[2] & 0xff, ucp[3] & 0xff,
 			    ucp2[0] & 0xff, ucp2[1] & 0xff);
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			if (clp->lc_flags & LCL_TCPCALLBACK)
 				(void) nfsm_strtom(nd, "tcp6", 4);
 			else 
 				(void) nfsm_strtom(nd, "udp6", 4);
 			rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 			ucp = inet_ntop(AF_INET6, &rin6->sin6_addr, addrbuf,
 			    INET6_ADDRSTRLEN);
 			if (ucp != NULL)
 				i = strlen(ucp);
 			else
 				i = 0;
 			ucp2 = (u_char *)&rin6->sin6_port;
 			sprintf(&addrbuf[i], ".%d.%d", ucp2[0] & 0xff,
 			    ucp2[1] & 0xff);
 			break;
 #endif
 		}
 		(void) nfsm_strtom(nd, addrbuf, strlen(addrbuf));
 		free(addrbuf, M_TEMP);
 	}
 	if (clp) {
 		free(clp->lc_req.nr_nam, M_SONAME);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 		free(clp->lc_stateid, M_NFSDCLIENT);
 		free(clp, M_NFSDCLIENT);
 	}
 	if (!nd->nd_repstat) {
 		NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER);
 		*tl++ = clientid.lval[0];
 		*tl++ = clientid.lval[1];
 		*tl++ = confirm.lval[0];
 		*tl = confirm.lval[1];
 	}
 
 out:
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (clp) {
 		free(clp->lc_req.nr_nam, M_SONAME);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 		free(clp->lc_stateid, M_NFSDCLIENT);
 		free(clp, M_NFSDCLIENT);
 	}
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 set client id confirm service
  */
 int
 nfsrvd_setclientidcfrm(struct nfsrv_descript *nd,
     __unused int isdgram, __unused vnode_t vp,
     __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0;
 	nfsquad_t clientid, confirm;
 	struct thread *p = curthread;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl++;
 	confirm.lval[0] = *tl++;
 	confirm.lval[1] = *tl;
 
 	/*
 	 * nfsrv_getclient() searches the client list for a match and
 	 * returns the appropriate NFSERR status.
 	 */
 	nd->nd_repstat = nfsrv_getclient(clientid, (CLOPS_CONFIRM|CLOPS_RENEW),
 	    NULL, NULL, confirm, 0, nd, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 verify service
  */
 int
 nfsrvd_verify(struct nfsrv_descript *nd, int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	int error = 0, ret, fhsize = NFSX_MYFH;
 	struct nfsvattr nva;
 	struct statfs *sf;
 	struct nfsfsinfo fs;
 	fhandle_t fh;
 	struct thread *p = curthread;
 
 	sf = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, NULL);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_statfs(vp, sf);
 	if (!nd->nd_repstat)
 		nd->nd_repstat = nfsvno_getfh(vp, &fh, p);
 	if (!nd->nd_repstat) {
 		nfsvno_getfs(&fs, isdgram);
 		error = nfsv4_loadattr(nd, vp, &nva, NULL, &fh, fhsize, NULL,
 		    sf, NULL, &fs, NULL, 1, &ret, NULL, NULL, p, nd->nd_cred);
 		if (!error) {
 			if (nd->nd_procnum == NFSV4OP_NVERIFY) {
 				if (ret == 0)
 					nd->nd_repstat = NFSERR_SAME;
 				else if (ret != NFSERR_NOTSAME)
 					nd->nd_repstat = ret;
 			} else if (ret)
 				nd->nd_repstat = ret;
 		}
 	}
 	vput(vp);
 	free(sf, M_STATFS);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs openattr rpc
  */
 int
 nfsrvd_openattr(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t dp, __unused vnode_t *vpp, __unused fhandle_t *fhp,
     __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	int error = 0, createdir __unused;
 
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
 	createdir = fxdr_unsigned(int, *tl);
 	nd->nd_repstat = NFSERR_NOTSUPP;
 nfsmout:
 	vrele(dp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 release lock owner service
  */
 int
 nfsrvd_releaselckown(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	u_int32_t *tl;
 	struct nfsstate *stp = NULL;
 	int error = 0, len;
 	nfsquad_t clientid;
 	struct thread *p = curthread;
 
 	if ((nd->nd_flag & ND_NFSV41) != 0) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	len = fxdr_unsigned(int, *(tl + 2));
 	if (len <= 0 || len > NFSV4_OPAQUELIMIT) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	stp = malloc(sizeof (struct nfsstate) + len,
 	    M_NFSDSTATE, M_WAITOK);
 	stp->ls_ownerlen = len;
 	stp->ls_op = NULL;
 	stp->ls_flags = NFSLCK_RELEASE;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK14 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	error = nfsrv_mtostr(nd, stp->ls_owner, len);
 	if (error)
 		goto nfsmout;
 	nd->nd_repstat = nfsrv_releaselckown(stp, clientid, p);
 	free(stp, M_NFSDSTATE);
 
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (stp)
 		free(stp, M_NFSDSTATE);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 exchange_id service
  */
 int
 nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	int error = 0, i, idlen;
 	struct nfsclient *clp = NULL;
 	nfsquad_t clientid, confirm;
 	uint8_t *verf;
 	uint32_t sp4type, v41flags;
 	struct timespec verstime;
 #ifdef INET
 	struct sockaddr_in *sin, *rin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 *sin6, *rin6;
 #endif
 	struct thread *p = curthread;
 	char *s;
 
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, u_int32_t *, NFSX_VERF + NFSX_UNSIGNED);
 	verf = (uint8_t *)tl;
 	tl += (NFSX_VERF / NFSX_UNSIGNED);
 	i = fxdr_unsigned(int, *tl);
 	if (i > NFSV4_OPAQUELIMIT || i <= 0) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	idlen = i;
 	if (nd->nd_flag & ND_GSS)
 		i += nd->nd_princlen;
 	clp = malloc(sizeof(struct nfsclient) + i, M_NFSDCLIENT, M_WAITOK |
 	    M_ZERO);
 	clp->lc_stateid = malloc(sizeof(struct nfsstatehead) *
 	    nfsrv_statehashsize, M_NFSDCLIENT, M_WAITOK);
 	NFSINITSOCKMUTEX(&clp->lc_req.nr_mtx);
 	/* Allocated large enough for an AF_INET or AF_INET6 socket. */
 	clp->lc_req.nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME,
 	    M_WAITOK | M_ZERO);
 	switch (nd->nd_nam->sa_family) {
 #ifdef INET
 	case AF_INET:
 		rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
 		sin = (struct sockaddr_in *)nd->nd_nam;
 		rin->sin_family = AF_INET;
 		rin->sin_len = sizeof(struct sockaddr_in);
 		rin->sin_port = 0;
 		rin->sin_addr.s_addr = sin->sin_addr.s_addr;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
 		sin6 = (struct sockaddr_in6 *)nd->nd_nam;
 		rin6->sin6_family = AF_INET6;
 		rin6->sin6_len = sizeof(struct sockaddr_in6);
 		rin6->sin6_port = 0;
 		rin6->sin6_addr = sin6->sin6_addr;
 		break;
 #endif
 	}
 	clp->lc_req.nr_cred = NULL;
 	NFSBCOPY(verf, clp->lc_verf, NFSX_VERF);
 	clp->lc_idlen = idlen;
 	error = nfsrv_mtostr(nd, clp->lc_id, idlen);
 	if (error != 0)
 		goto nfsmout;
 	if ((nd->nd_flag & ND_GSS) != 0) {
 		clp->lc_flags = LCL_GSS | LCL_NFSV41;
 		if ((nd->nd_flag & ND_GSSINTEGRITY) != 0)
 			clp->lc_flags |= LCL_GSSINTEGRITY;
 		else if ((nd->nd_flag & ND_GSSPRIVACY) != 0)
 			clp->lc_flags |= LCL_GSSPRIVACY;
 	} else
 		clp->lc_flags = LCL_NFSV41;
 	if ((nd->nd_flag & ND_NFSV42) != 0)
 		clp->lc_flags |= LCL_NFSV42;
 	if ((nd->nd_flag & ND_GSS) != 0 && nd->nd_princlen > 0) {
 		clp->lc_flags |= LCL_NAME;
 		clp->lc_namelen = nd->nd_princlen;
 		clp->lc_name = &clp->lc_id[idlen];
 		NFSBCOPY(nd->nd_principal, clp->lc_name, clp->lc_namelen);
 	} else {
 		clp->lc_uid = nd->nd_cred->cr_uid;
 		clp->lc_gid = nd->nd_cred->cr_gid;
 	}
 	NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	v41flags = fxdr_unsigned(uint32_t, *tl++);
 	if ((v41flags & ~(NFSV4EXCH_SUPPMOVEDREFER | NFSV4EXCH_SUPPMOVEDMIGR |
 	    NFSV4EXCH_BINDPRINCSTATEID | NFSV4EXCH_MASKPNFS |
 	    NFSV4EXCH_UPDCONFIRMEDRECA)) != 0) {
 		nd->nd_repstat = NFSERR_INVAL;
 		goto nfsmout;
 	}
 	if ((v41flags & NFSV4EXCH_UPDCONFIRMEDRECA) != 0)
 		confirm.lval[1] = 1;
 	else
 		confirm.lval[1] = 0;
 	if (nfsrv_devidcnt == 0)
 		v41flags = NFSV4EXCH_USENONPNFS | NFSV4EXCH_USEPNFSDS;
  	else
  		v41flags = NFSV4EXCH_USEPNFSMDS;
 	sp4type = fxdr_unsigned(uint32_t, *tl);
 	if (sp4type != NFSV4EXCH_SP4NONE) {
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 
 	/*
 	 * nfsrv_setclient() does the actual work of adding it to the
 	 * client list. If there is no error, the structure has been
 	 * linked into the client list and clp should no longer be used
 	 * here. When an error is returned, it has not been linked in,
 	 * so it should be free'd.
 	 */
 	nd->nd_repstat = nfsrv_setclient(nd, &clp, &clientid, &confirm, p);
 	if (clp != NULL) {
 		free(clp->lc_req.nr_nam, M_SONAME);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 		free(clp->lc_stateid, M_NFSDCLIENT);
 		free(clp, M_NFSDCLIENT);
 	}
 	if (nd->nd_repstat == 0) {
 		if (confirm.lval[1] != 0)
 			v41flags |= NFSV4EXCH_CONFIRMEDR;
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + 3 * NFSX_UNSIGNED);
 		*tl++ = clientid.lval[0];			/* ClientID */
 		*tl++ = clientid.lval[1];
 		*tl++ = txdr_unsigned(confirm.lval[0]);		/* SequenceID */
 		*tl++ = txdr_unsigned(v41flags);		/* Exch flags */
 		*tl++ = txdr_unsigned(NFSV4EXCH_SP4NONE);	/* No SSV */
 		txdr_hyper(nfsrv_owner_minor, tl);	/* Owner Minor */
 		if (nfsrv_owner_major[0] != 0)
 			s = nfsrv_owner_major;
 		else
 			s = nd->nd_cred->cr_prison->pr_hostuuid;
 		nfsm_strtom(nd, s, strlen(s));		/* Owner Major */
 		if (nfsrv_scope[0] != 0)
 			s = nfsrv_scope;
 		else
 			s = nd->nd_cred->cr_prison->pr_hostuuid;
 		nfsm_strtom(nd, s, strlen(s)	);		/* Scope */
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(1);
 		(void)nfsm_strtom(nd, "freebsd.org", strlen("freebsd.org"));
 		(void)nfsm_strtom(nd, version, strlen(version));
 		NFSM_BUILD(tl, uint32_t *, NFSX_V4TIME);
 		verstime.tv_sec = 1293840000;		/* Jan 1, 2011 */
 		verstime.tv_nsec = 0;
 		txdr_nfsv4time(&verstime, tl);
 	}
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	if (clp != NULL) {
 		free(clp->lc_req.nr_nam, M_SONAME);
 		NFSFREEMUTEX(&clp->lc_req.nr_mtx);
 		free(clp->lc_stateid, M_NFSDCLIENT);
 		free(clp, M_NFSDCLIENT);
 	}
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 create session service
  */
 int
 nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	int error = 0;
 	nfsquad_t clientid, confirm;
 	struct nfsdsession *sep = NULL;
 	uint32_t rdmacnt;
 	struct thread *p = curthread;
 	static bool do_printf = true;
 
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	sep = (struct nfsdsession *)malloc(sizeof(struct nfsdsession),
 	    M_NFSDSESSION, M_WAITOK | M_ZERO);
 	sep->sess_refcnt = 1;
 	mtx_init(&sep->sess_cbsess.nfsess_mtx, "nfscbsession", NULL, MTX_DEF);
 	NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + 2 * NFSX_UNSIGNED);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl++;
 	confirm.lval[0] = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_crflags = fxdr_unsigned(uint32_t, *tl);
 	/* Persistent sessions and RDMA are not supported. */
 	sep->sess_crflags &= NFSV4CRSESS_CONNBACKCHAN;
 
 	/* Fore channel attributes. */
 	NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	tl++;					/* Header pad always 0. */
 	sep->sess_maxreq = fxdr_unsigned(uint32_t, *tl++);
 	if (sep->sess_maxreq > sb_max_adj - NFS_MAXXDR) {
 		sep->sess_maxreq = sb_max_adj - NFS_MAXXDR;
 		if (do_printf)
 			printf("Consider increasing kern.ipc.maxsockbuf\n");
 		do_printf = false;
 	}
 	sep->sess_maxresp = fxdr_unsigned(uint32_t, *tl++);
 	if (sep->sess_maxresp > sb_max_adj - NFS_MAXXDR) {
 		sep->sess_maxresp = sb_max_adj - NFS_MAXXDR;
 		if (do_printf)
 			printf("Consider increasing kern.ipc.maxsockbuf\n");
 		do_printf = false;
 	}
 	sep->sess_maxrespcached = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_maxops = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_maxslots = fxdr_unsigned(uint32_t, *tl++);
 	if (sep->sess_maxslots > NFSV4_SLOTS)
 		sep->sess_maxslots = NFSV4_SLOTS;
 	rdmacnt = fxdr_unsigned(uint32_t, *tl);
 	if (rdmacnt > 1) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	} else if (rdmacnt == 1)
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 
 	/* Back channel attributes. */
 	NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
 	tl++;					/* Header pad always 0. */
 	sep->sess_cbmaxreq = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_cbmaxresp = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_cbmaxrespcached = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_cbmaxops = fxdr_unsigned(uint32_t, *tl++);
 	sep->sess_cbsess.nfsess_foreslots = fxdr_unsigned(uint32_t, *tl++);
 	rdmacnt = fxdr_unsigned(uint32_t, *tl);
 	if (rdmacnt > 1) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	} else if (rdmacnt == 1)
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	sep->sess_cbprogram = fxdr_unsigned(uint32_t, *tl);
 
 	/*
 	 * nfsrv_getclient() searches the client list for a match and
 	 * returns the appropriate NFSERR status.
 	 */
 	nd->nd_repstat = nfsrv_getclient(clientid, CLOPS_CONFIRM | CLOPS_RENEW,
 	    NULL, sep, confirm, sep->sess_cbprogram, nd, p);
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
 		NFSBCOPY(sep->sess_sessionid, tl, NFSX_V4SESSIONID);
 		NFSM_BUILD(tl, uint32_t *, 18 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(confirm.lval[0]);	/* sequenceid */
 		*tl++ = txdr_unsigned(sep->sess_crflags);
 
 		/* Fore channel attributes. */
 		*tl++ = 0;
 		*tl++ = txdr_unsigned(sep->sess_maxreq);
 		*tl++ = txdr_unsigned(sep->sess_maxresp);
 		*tl++ = txdr_unsigned(sep->sess_maxrespcached);
 		*tl++ = txdr_unsigned(sep->sess_maxops);
 		*tl++ = txdr_unsigned(sep->sess_maxslots);
 		*tl++ = txdr_unsigned(1);
 		*tl++ = txdr_unsigned(0);			/* No RDMA. */
 
 		/* Back channel attributes. */
 		*tl++ = 0;
 		*tl++ = txdr_unsigned(sep->sess_cbmaxreq);
 		*tl++ = txdr_unsigned(sep->sess_cbmaxresp);
 		*tl++ = txdr_unsigned(sep->sess_cbmaxrespcached);
 		*tl++ = txdr_unsigned(sep->sess_cbmaxops);
 		*tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots);
 		*tl++ = txdr_unsigned(1);
 		*tl = txdr_unsigned(0);			/* No RDMA. */
 	}
 nfsmout:
 	if (nd->nd_repstat != 0 && sep != NULL)
 		free(sep, M_NFSDSESSION);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 sequence service
  */
 int
 nfsrvd_sequence(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	uint32_t highest_slotid, sequenceid, sflags, target_highest_slotid;
 	int cache_this, error = 0;
 	struct thread *p = curthread;
 
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID);
 	NFSBCOPY(tl, nd->nd_sessionid, NFSX_V4SESSIONID);
 	NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 	sequenceid = fxdr_unsigned(uint32_t, *tl++);
 	nd->nd_slotid = fxdr_unsigned(uint32_t, *tl++);
 	highest_slotid = fxdr_unsigned(uint32_t, *tl++);
 	if (*tl == newnfs_true)
 		cache_this = 1;
 	else
 		cache_this = 0;
 	nd->nd_repstat = nfsrv_checksequence(nd, sequenceid, &highest_slotid,
 	    &target_highest_slotid, cache_this, &sflags, p);
 	if (nd->nd_repstat != NFSERR_BADSLOT)
 		nd->nd_flag |= ND_HASSEQUENCE;
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID);
 		NFSBCOPY(nd->nd_sessionid, tl, NFSX_V4SESSIONID);
 		NFSM_BUILD(tl, uint32_t *, 5 * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(sequenceid);
 		*tl++ = txdr_unsigned(nd->nd_slotid);
 		*tl++ = txdr_unsigned(highest_slotid);
 		*tl++ = txdr_unsigned(target_highest_slotid);
 		*tl = txdr_unsigned(sflags);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 reclaim complete service
  */
 int
 nfsrvd_reclaimcomplete(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	int error = 0, onefs;
 
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	/*
 	 * I believe that a ReclaimComplete with rca_one_fs == TRUE is only
 	 * to be used after a file system has been transferred to a different
 	 * file server.  However, RFC5661 is somewhat vague w.r.t. this and
 	 * the ESXi 6.7 client does both a ReclaimComplete with rca_one_fs
 	 * == TRUE and one with ReclaimComplete with rca_one_fs == FALSE.
 	 * Therefore, just ignore the rca_one_fs == TRUE operation and return
 	 * NFS_OK without doing anything.
 	 */
 	onefs = 0;
 	if (*tl == newnfs_true)
 		onefs = 1;
 	nd->nd_repstat = nfsrv_checkreclaimcomplete(nd, onefs);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 destroy clientid service
  */
 int
 nfsrvd_destroyclientid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsquad_t clientid;
 	int error = 0;
 	struct thread *p = curthread;
 
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	clientid.lval[0] = *tl++;
 	clientid.lval[1] = *tl;
 	nd->nd_repstat = nfsrv_destroyclient(clientid, p);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 bind connection to session service
  */
 int
 nfsrvd_bindconnsess(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	uint8_t sessid[NFSX_V4SESSIONID];
 	int error = 0, foreaft;
 
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 2 * NFSX_UNSIGNED);
 	NFSBCOPY(tl, sessid, NFSX_V4SESSIONID);
 	tl += (NFSX_V4SESSIONID / NFSX_UNSIGNED);
 	foreaft = fxdr_unsigned(int, *tl++);
 	if (*tl == newnfs_true) {
 		/* RDMA is not supported. */
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 
 	nd->nd_repstat = nfsrv_bindconnsess(nd, sessid, &foreaft);
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 2 *
 		    NFSX_UNSIGNED);
 		NFSBCOPY(sessid, tl, NFSX_V4SESSIONID);
 		tl += (NFSX_V4SESSIONID / NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(foreaft);
 		*tl = newnfs_false;
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 destroy session service
  */
 int
 nfsrvd_destroysession(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint8_t *cp, sessid[NFSX_V4SESSIONID];
 	int error = 0;
 
 	if ((nd->nd_repstat = nfsd_checkrootexp(nd)) != 0)
 		goto nfsmout;
 	NFSM_DISSECT(cp, uint8_t *, NFSX_V4SESSIONID);
 	NFSBCOPY(cp, sessid, NFSX_V4SESSIONID);
 	nd->nd_repstat = nfsrv_destroysession(nd, sessid);
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 free stateid service
  */
 int
 nfsrvd_freestateid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsv4stateid_t stateid;
 	int error = 0;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
 	stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
 
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set the
 	 * stateid to the current stateid, if it is set.
 	 */
 	if (stateid.seqid == 1 && stateid.other[0] == 0 &&
 	    stateid.other[1] == 0 && stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 			stateid = nd->nd_curstateid;
 			stateid.seqid = 0;
 		} else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			goto nfsmout;
 		}
 	}
 
 	nd->nd_repstat = nfsrv_freestateid(nd, &stateid, p);
 
 	/* If the current stateid has been free'd, unset it. */
 	if (nd->nd_repstat == 0 && (nd->nd_flag & ND_CURSTATEID) != 0 &&
 	    stateid.other[0] == nd->nd_curstateid.other[0] &&
 	    stateid.other[1] == nd->nd_curstateid.other[1] &&
 	    stateid.other[2] == nd->nd_curstateid.other[2])
 		nd->nd_flag &= ~ND_CURSTATEID;
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 layoutget service
  */
 int
 nfsrvd_layoutget(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsv4stateid_t stateid;
 	int error = 0, layoutlen, layouttype, iomode, maxcnt, retonclose;
 	uint64_t offset, len, minlen;
 	char *layp;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED + 3 * NFSX_HYPER +
 	    NFSX_STATEID);
 	tl++;		/* Signal layout available. Ignore for now. */
 	layouttype = fxdr_unsigned(int, *tl++);
 	iomode = fxdr_unsigned(int, *tl++);
 	offset = fxdr_hyper(tl); tl += 2;
 	len = fxdr_hyper(tl); tl += 2;
 	minlen = fxdr_hyper(tl); tl += 2;
 	stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	maxcnt = fxdr_unsigned(int, *tl);
 	NFSD_DEBUG(4, "layoutget ltyp=%d iom=%d off=%ju len=%ju mlen=%ju\n",
 	    layouttype, iomode, (uintmax_t)offset, (uintmax_t)len,
 	    (uintmax_t)minlen);
 	if (len < minlen ||
 	    (minlen != UINT64_MAX && offset + minlen < offset) ||
 	    (len != UINT64_MAX && offset + len < offset)) {
 		nd->nd_repstat = NFSERR_INVAL;
 		goto nfsmout;
 	}
 
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set the
 	 * stateid to the current stateid, if it is set.
 	 */
 	if (stateid.seqid == 1 && stateid.other[0] == 0 &&
 	    stateid.other[1] == 0 && stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 			stateid = nd->nd_curstateid;
 			stateid.seqid = 0;
 		} else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			goto nfsmout;
 		}
 	}
 
 	layp = NULL;
 	if (layouttype == NFSLAYOUT_NFSV4_1_FILES && nfsrv_maxpnfsmirror == 1)
 		layp = malloc(NFSX_V4FILELAYOUT, M_TEMP, M_WAITOK);
 	else if (layouttype == NFSLAYOUT_FLEXFILE)
 		layp = malloc(NFSX_V4FLEXLAYOUT(nfsrv_maxpnfsmirror), M_TEMP,
 		    M_WAITOK);
 	else
 		nd->nd_repstat = NFSERR_UNKNLAYOUTTYPE;
 	if (layp != NULL)
 		nd->nd_repstat = nfsrv_layoutget(nd, vp, exp, layouttype,
 		    &iomode, &offset, &len, minlen, &stateid, maxcnt,
 		    &retonclose, &layoutlen, layp, nd->nd_cred, p);
 	NFSD_DEBUG(4, "nfsrv_layoutget stat=%u layoutlen=%d\n", nd->nd_repstat,
 	    layoutlen);
 	if (nd->nd_repstat == 0) {
 		/* For NFSv4.1, set the Current StateID. */
 		if ((nd->nd_flag & ND_NFSV41) != 0) {
 			nd->nd_curstateid = stateid;
 			nd->nd_flag |= ND_CURSTATEID;
 		}
 		NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_STATEID +
 		    2 * NFSX_HYPER);
 		*tl++ = txdr_unsigned(retonclose);
 		*tl++ = txdr_unsigned(stateid.seqid);
 		NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(1);	/* Only returns one layout. */
 		txdr_hyper(offset, tl); tl += 2;
 		txdr_hyper(len, tl); tl += 2;
 		*tl++ = txdr_unsigned(iomode);
 		*tl = txdr_unsigned(layouttype);
 		nfsm_strtom(nd, layp, layoutlen);
 	} else if (nd->nd_repstat == NFSERR_LAYOUTTRYLATER) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		*tl = newnfs_false;
 	}
 	free(layp, M_TEMP);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 layoutcommit service
  */
 int
 nfsrvd_layoutcommit(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsv4stateid_t stateid;
 	int error = 0, hasnewoff, hasnewmtime, layouttype, maxcnt, reclaim;
 	int hasnewsize;
 	uint64_t offset, len, newoff = 0, newsize;
 	struct timespec newmtime;
 	char *layp;
 	struct thread *p = curthread;
 
 	layp = NULL;
 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + 2 * NFSX_HYPER +
 	    NFSX_STATEID);
 	offset = fxdr_hyper(tl); tl += 2;
 	len = fxdr_hyper(tl); tl += 2;
 	reclaim = fxdr_unsigned(int, *tl++);
 	stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set the
 	 * stateid to the current stateid, if it is set.
 	 */
 	if (stateid.seqid == 1 && stateid.other[0] == 0 &&
 	    stateid.other[1] == 0 && stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 			stateid = nd->nd_curstateid;
 			stateid.seqid = 0;
 		} else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			goto nfsmout;
 		}
 	}
 
 	hasnewoff = fxdr_unsigned(int, *tl);
 	if (hasnewoff != 0) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
 		newoff = fxdr_hyper(tl); tl += 2;
 	} else
 		NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	hasnewmtime = fxdr_unsigned(int, *tl);
 	if (hasnewmtime != 0) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4TIME + 2 * NFSX_UNSIGNED);
 		fxdr_nfsv4time(tl, &newmtime);
 		tl += (NFSX_V4TIME / NFSX_UNSIGNED);
 	} else
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	layouttype = fxdr_unsigned(int, *tl++);
 	maxcnt = fxdr_unsigned(int, *tl);
 	if (maxcnt > 0) {
 		layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK);
 		error = nfsrv_mtostr(nd, layp, maxcnt);
 		if (error != 0)
 			goto nfsmout;
 	}
 	nd->nd_repstat = nfsrv_layoutcommit(nd, vp, layouttype, hasnewoff,
 	    newoff, offset, len, hasnewmtime, &newmtime, reclaim, &stateid,
 	    maxcnt, layp, &hasnewsize, &newsize, nd->nd_cred, p);
 	NFSD_DEBUG(4, "nfsrv_layoutcommit stat=%u\n", nd->nd_repstat);
 	if (nd->nd_repstat == 0) {
 		if (hasnewsize != 0) {
 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER);
 			*tl++ = newnfs_true;
 			txdr_hyper(newsize, tl);
 		} else {
 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 			*tl = newnfs_false;
 		}
 	}
 nfsmout:
 	free(layp, M_TEMP);
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 layoutreturn service
  */
 int
 nfsrvd_layoutreturn(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl, *layp;
 	nfsv4stateid_t stateid;
 	int error = 0, fnd, kind, layouttype, iomode, maxcnt, reclaim;
 	uint64_t offset, len;
 	struct thread *p = curthread;
 
 	layp = NULL;
 	NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED);
 	reclaim = *tl++;
 	layouttype = fxdr_unsigned(int, *tl++);
 	iomode = fxdr_unsigned(int, *tl++);
 	kind = fxdr_unsigned(int, *tl);
 	NFSD_DEBUG(4, "layoutreturn recl=%d ltyp=%d iom=%d kind=%d\n", reclaim,
 	    layouttype, iomode, kind);
 	if (kind == NFSV4LAYOUTRET_FILE) {
 		NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
 		    NFSX_UNSIGNED);
 		offset = fxdr_hyper(tl); tl += 2;
 		len = fxdr_hyper(tl); tl += 2;
 		stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 		NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
 		tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 
 		/*
 		 * For the special stateid of other all 0s and seqid == 1, set
 		 * the stateid to the current stateid, if it is set.
 		 */
 		if (stateid.seqid == 1 && stateid.other[0] == 0 &&
 		    stateid.other[1] == 0 && stateid.other[2] == 0) {
 			if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 				stateid = nd->nd_curstateid;
 				stateid.seqid = 0;
 			} else {
 				nd->nd_repstat = NFSERR_BADSTATEID;
 				goto nfsmout;
 			}
 		}
 
 		maxcnt = fxdr_unsigned(int, *tl);
 		/*
 		 * There is no fixed upper bound defined in the RFCs,
 		 * but 128Kbytes should be more than sufficient.
 		 */
 		if (maxcnt < 0 || maxcnt > 131072)
 			maxcnt = 0;
 		if (maxcnt > 0) {
 			layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK);
 			error = nfsrv_mtostr(nd, (char *)layp, maxcnt);
 			if (error != 0)
 				goto nfsmout;
 		}
 	} else {
 		if (reclaim == newnfs_true) {
 			nd->nd_repstat = NFSERR_INVAL;
 			goto nfsmout;
 		}
 		offset = len = 0;
 		maxcnt = 0;
 	}
 	nd->nd_repstat = nfsrv_layoutreturn(nd, vp, layouttype, iomode,
 	    offset, len, reclaim, kind, &stateid, maxcnt, layp, &fnd,
 	    nd->nd_cred, p);
 	NFSD_DEBUG(4, "nfsrv_layoutreturn stat=%u fnd=%d\n", nd->nd_repstat,
 	    fnd);
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		if (fnd != 0) {
 			*tl = newnfs_true;
 			NFSM_BUILD(tl, uint32_t *, NFSX_STATEID);
 			*tl++ = txdr_unsigned(stateid.seqid);
 			NFSBCOPY(stateid.other, tl, NFSX_STATEIDOTHER);
 		} else
 			*tl = newnfs_false;
 	}
 nfsmout:
 	free(layp, M_TEMP);
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 layout error service
  */
 int
 nfsrvd_layouterror(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsv4stateid_t stateid;
 	int cnt, error = 0, i, stat;
 	int opnum __unused;
 	char devid[NFSX_V4DEVICEID];
 	uint64_t offset, len;
 
 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_STATEID +
 	    NFSX_UNSIGNED);
 	offset = fxdr_hyper(tl); tl += 2;
 	len = fxdr_hyper(tl); tl += 2;
 	stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	cnt = fxdr_unsigned(int, *tl);
 	NFSD_DEBUG(4, "layouterror off=%ju len=%ju cnt=%d\n", (uintmax_t)offset,
 	    (uintmax_t)len, cnt);
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set
 	 * the stateid to the current stateid, if it is set.
 	 */
 	if (stateid.seqid == 1 && stateid.other[0] == 0 &&
 	    stateid.other[1] == 0 && stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 			stateid = nd->nd_curstateid;
 			stateid.seqid = 0;
 		} else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			goto nfsmout;
 		}
 	}
 
 	/*
 	 * Ignore offset, len and stateid for now.
 	 */
 	for (i = 0; i < cnt; i++) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_V4DEVICEID + 2 *
 		    NFSX_UNSIGNED);
 		NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
 		tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 		stat = fxdr_unsigned(int, *tl++);
 		opnum = fxdr_unsigned(int, *tl);
 		NFSD_DEBUG(4, "nfsrvd_layouterr op=%d stat=%d\n", opnum, stat);
 		/*
 		 * Except for NFSERR_ACCES, NFSERR_STALE and NFSERR_NOSPC
 		 * errors, disable the mirror.
 		 */
 		if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
 		    stat != NFSERR_NOSPC)
 			nfsrv_delds(devid, curthread);
 
 		/* For NFSERR_NOSPC, mark all deviceids and layouts. */
 		if (stat == NFSERR_NOSPC)
 			nfsrv_marknospc(devid, true);
 	}
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 layout stats service
  */
 int
 nfsrvd_layoutstats(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsv4stateid_t stateid;
 	int cnt, error = 0;
 	int layouttype __unused;
 	char devid[NFSX_V4DEVICEID] __unused;
 	uint64_t offset __unused, len __unused, readcount __unused;
 	uint64_t readbytes __unused, writecount __unused, writebytes __unused;
 
 	NFSM_DISSECT(tl, uint32_t *, 6 * NFSX_HYPER + NFSX_STATEID +
 	    NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED);
 	offset = fxdr_hyper(tl); tl += 2;
 	len = fxdr_hyper(tl); tl += 2;
 	stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	readcount = fxdr_hyper(tl); tl += 2;
 	readbytes = fxdr_hyper(tl); tl += 2;
 	writecount = fxdr_hyper(tl); tl += 2;
 	writebytes = fxdr_hyper(tl); tl += 2;
 	NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 	layouttype = fxdr_unsigned(int, *tl++);
 	cnt = fxdr_unsigned(int, *tl);
 	error = nfsm_advance(nd, NFSM_RNDUP(cnt), -1);
 	if (error != 0)
 		goto nfsmout;
 	NFSD_DEBUG(4, "layoutstats cnt=%d\n", cnt);
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set
 	 * the stateid to the current stateid, if it is set.
 	 */
 	if (stateid.seqid == 1 && stateid.other[0] == 0 &&
 	    stateid.other[1] == 0 && stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 			stateid = nd->nd_curstateid;
 			stateid.seqid = 0;
 		} else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			goto nfsmout;
 		}
 	}
 
 	/*
 	 * No use for the stats for now.
 	 */
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 io_advise service
  */
 int
 nfsrvd_ioadvise(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsv4stateid_t stateid;
 	nfsattrbit_t hints;
 	int error = 0, ret;
 	off_t offset, len;
 
 	NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER);
 	stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER);
 	tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
 	offset = fxdr_hyper(tl); tl += 2;
 	len = fxdr_hyper(tl);
 	error = nfsrv_getattrbits(nd, &hints, NULL, NULL);
 	if (error != 0)
 		goto nfsmout;
 	/*
 	 * For the special stateid of other all 0s and seqid == 1, set
 	 * the stateid to the current stateid, if it is set.
 	 */
 	if (stateid.seqid == 1 && stateid.other[0] == 0 &&
 	    stateid.other[1] == 0 && stateid.other[2] == 0) {
 		if ((nd->nd_flag & ND_CURSTATEID) != 0) {
 			stateid = nd->nd_curstateid;
 			stateid.seqid = 0;
 		} else {
 			nd->nd_repstat = NFSERR_BADSTATEID;
 			goto nfsmout;
 		}
 	}
 
 	if (offset < 0) {
 		nd->nd_repstat = NFSERR_INVAL;
 		goto nfsmout;
 	}
 	if (len < 0)
 		len = 0;
 	if (vp->v_type != VREG) {
 		if (vp->v_type == VDIR)
 			nd->nd_repstat = NFSERR_ISDIR;
 		else
 			nd->nd_repstat = NFSERR_WRONGTYPE;
 		goto nfsmout;
 	}
 
 	/*
 	 * For now, we can only handle WILLNEED and DONTNEED and don't use
 	 * the stateid.
 	 */
 	if ((NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED) &&
 	    !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED)) ||
 	    (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED) &&
 	    !NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED))) {
 		NFSVOPUNLOCK(vp);
 		if (NFSISSET_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED)) {
 			ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_WILLNEED);
 			NFSZERO_ATTRBIT(&hints);
 			if (ret == 0)
 				NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_WILLNEED);
 			else
 				NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL);
 		} else {
 			ret = VOP_ADVISE(vp, offset, len, POSIX_FADV_DONTNEED);
 			NFSZERO_ATTRBIT(&hints);
 			if (ret == 0)
 				NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_DONTNEED);
 			else
 				NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL);
 		}
 		vrele(vp);
 	} else {
 		NFSZERO_ATTRBIT(&hints);
 		NFSSETBIT_ATTRBIT(&hints, NFSV4IOHINT_NORMAL);
 		vput(vp);
 	}
 	nfsrv_putattrbit(nd, &hints);
 	NFSEXITCODE2(error, nd);
 	return (error);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 getdeviceinfo service
  */
 int
 nfsrvd_getdevinfo(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl, maxcnt, notify[NFSV4_NOTIFYBITMAP];
 	int cnt, devaddrlen, error = 0, i, layouttype;
 	char devid[NFSX_V4DEVICEID], *devaddr;
 	time_t dev_time;
 
 	NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED + NFSX_V4DEVICEID);
 	NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
 	tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
 	layouttype = fxdr_unsigned(int, *tl++);
 	maxcnt = fxdr_unsigned(uint32_t, *tl++);
 	cnt = fxdr_unsigned(int, *tl);
 	NFSD_DEBUG(4, "getdevinfo ltyp=%d maxcnt=%u bitcnt=%d\n", layouttype,
 	    maxcnt, cnt);
 	if (cnt > NFSV4_NOTIFYBITMAP || cnt < 0) {
 		nd->nd_repstat = NFSERR_INVAL;
 		goto nfsmout;
 	}
 	if (cnt > 0) {
 		NFSM_DISSECT(tl, uint32_t *, cnt * NFSX_UNSIGNED);
 		for (i = 0; i < cnt; i++)
 			notify[i] = fxdr_unsigned(uint32_t, *tl++);
 	}
 	for (i = cnt; i < NFSV4_NOTIFYBITMAP; i++)
 		notify[i] = 0;
 
 	/*
 	 * Check that the device id is not stale.  Device ids are recreated
 	 * each time the nfsd threads are restarted.
 	 */
 	NFSBCOPY(devid, &dev_time, sizeof(dev_time));
 	if (dev_time != nfsdev_time) {
 		nd->nd_repstat = NFSERR_NOENT;
 		goto nfsmout;
 	}
 
 	/* Look for the device id. */
 	nd->nd_repstat = nfsrv_getdevinfo(devid, layouttype, &maxcnt,
 	    notify, &devaddrlen, &devaddr);
 	NFSD_DEBUG(4, "nfsrv_getdevinfo stat=%u\n", nd->nd_repstat);
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(layouttype);
 		nfsm_strtom(nd, devaddr, devaddrlen);
 		cnt = 0;
 		for (i = 0; i < NFSV4_NOTIFYBITMAP; i++) {
 			if (notify[i] != 0)
 				cnt = i + 1;
 		}
 		NFSM_BUILD(tl, uint32_t *, (cnt + 1) * NFSX_UNSIGNED);
 		*tl++ = txdr_unsigned(cnt);
 		for (i = 0; i < cnt; i++)
 			*tl++ = txdr_unsigned(notify[i]);
 	} else if (nd->nd_repstat == NFSERR_TOOSMALL) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(maxcnt);
 	}
 nfsmout:
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfsv4 test stateid service
  */
 int
 nfsrvd_teststateid(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	nfsv4stateid_t *stateidp = NULL, *tstateidp;
 	int cnt, error = 0, i, ret;
 	struct thread *p = curthread;
 
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	cnt = fxdr_unsigned(int, *tl);
 	if (cnt <= 0 || cnt > 1024) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	stateidp = mallocarray(cnt, sizeof(nfsv4stateid_t), M_TEMP, M_WAITOK);
 	tstateidp = stateidp;
 	for (i = 0; i < cnt; i++) {
 		NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID);
 		tstateidp->seqid = fxdr_unsigned(uint32_t, *tl++);
 		NFSBCOPY(tl, tstateidp->other, NFSX_STATEIDOTHER);
 		tstateidp++;
 	}
 	NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(cnt);
 	tstateidp = stateidp;
 	for (i = 0; i < cnt; i++) {
 		ret = nfsrv_teststateid(nd, tstateidp, p);
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(ret);
 		tstateidp++;
 	}
 nfsmout:
 	free(stateidp, M_TEMP);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs allocate service
  */
 int
 nfsrvd_allocate(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	struct nfsvattr forat;
 	int error = 0, forat_ret = 1, gotproxystateid;
 	off_t off, len;
 	struct nfsstate st, *stp = &st;
 	struct nfslock lo, *lop = &lo;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	nfsattrbit_t attrbits;
 
 	if (!nfsrv_doallocate) {
 		/*
 		 * If any exported file system, such as a ZFS one, cannot
 		 * do VOP_ALLOCATE(), this operation cannot be supported
 		 * for NFSv4.2.  This cannot be done 'per filesystem', but
 		 * must be for the entire nfsd NFSv4.2 service.
 		 */
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	gotproxystateid = 0;
 	NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER);
 	stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 	lop->lo_flags = NFSLCK_WRITE;
 	stp->ls_ownerlen = 0;
 	stp->ls_op = NULL;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
 	clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK2 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	stp->ls_stateid.other[2] = *tl++;
 	/*
 	 * Don't allow this to be done for a DS.
 	 */
 	if ((nd->nd_flag & ND_DSSERVER) != 0)
 		nd->nd_repstat = NFSERR_NOTSUPP;
 	/* However, allow the proxy stateid. */
 	if (stp->ls_stateid.seqid == 0xffffffff &&
 	    stp->ls_stateid.other[0] == 0x55555555 &&
 	    stp->ls_stateid.other[1] == 0x55555555 &&
 	    stp->ls_stateid.other[2] == 0x55555555)
 		gotproxystateid = 1;
 	off = fxdr_hyper(tl); tl += 2;
 	lop->lo_first = off;
 	len = fxdr_hyper(tl);
 	lop->lo_end = lop->lo_first + len;
 	/*
 	 * Sanity check the offset and length.
 	 * off and len are off_t (signed int64_t) whereas
 	 * lo_first and lo_end are uint64_t and, as such,
 	 * if off >= 0 && len > 0, lo_end cannot overflow
 	 * unless off_t is changed to something other than
 	 * int64_t.  Check lo_end < lo_first in case that
 	 * is someday the case.
 	 */
 	if (nd->nd_repstat == 0 && (len <= 0 || off < 0 || lop->lo_end >
 	    OFF_MAX || lop->lo_end < lop->lo_first))
 		nd->nd_repstat = NFSERR_INVAL;
 
 	if (nd->nd_repstat == 0 && vp->v_type != VREG)
 		nd->nd_repstat = NFSERR_WRONGTYPE;
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
 	forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = forat_ret;
 	if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp)))
 		nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp,
 		    curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED,
 		    NULL);
 	if (nd->nd_repstat == 0 && gotproxystateid == 0)
 		nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 		    &stateid, exp, nd, curthread);
 
 	NFSD_DEBUG(4, "nfsrvd_allocate: off=%jd len=%jd stat=%d\n",
 	    (intmax_t)off, (intmax_t)len, nd->nd_repstat);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsvno_allocate(vp, off, len, nd->nd_cred,
 		    curthread);
 	NFSD_DEBUG(4, "nfsrvd_allocate: aft nfsvno_allocate=%d\n",
 	    nd->nd_repstat);
 	vput(vp);
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs deallocate service
  */
 int
 nfsrvd_deallocate(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	struct nfsvattr forat;
 	int error = 0, forat_ret = 1, gotproxystateid;
 	off_t off, len;
 	struct nfsstate st, *stp = &st;
 	struct nfslock lo, *lop = &lo;
 	nfsv4stateid_t stateid;
 	nfsquad_t clientid;
 	nfsattrbit_t attrbits;
 
 	gotproxystateid = 0;
 	NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER);
 	stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 	lop->lo_flags = NFSLCK_WRITE;
 	stp->ls_ownerlen = 0;
 	stp->ls_op = NULL;
 	stp->ls_uid = nd->nd_cred->cr_uid;
 	stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++);
 	clientid.lval[0] = stp->ls_stateid.other[0] = *tl++;
 	clientid.lval[1] = stp->ls_stateid.other[1] = *tl++;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			clientid.qval = nd->nd_clientid.qval;
 		else if (nd->nd_clientid.qval != clientid.qval)
 			printf("EEK2 multiple clids\n");
 	} else {
 		if ((nd->nd_flag & ND_NFSV41) != 0)
 			printf("EEK! no clientid from session\n");
 		nd->nd_flag |= ND_IMPLIEDCLID;
 		nd->nd_clientid.qval = clientid.qval;
 	}
 	stp->ls_stateid.other[2] = *tl++;
 	/*
 	 * Don't allow this to be done for a DS.
 	 */
 	if ((nd->nd_flag & ND_DSSERVER) != 0)
 		nd->nd_repstat = NFSERR_NOTSUPP;
 	/* However, allow the proxy stateid. */
 	if (stp->ls_stateid.seqid == 0xffffffff &&
 	    stp->ls_stateid.other[0] == 0x55555555 &&
 	    stp->ls_stateid.other[1] == 0x55555555 &&
 	    stp->ls_stateid.other[2] == 0x55555555)
 		gotproxystateid = 1;
 	off = fxdr_hyper(tl); tl += 2;
 	lop->lo_first = off;
 	len = fxdr_hyper(tl);
 	if (len < 0)
 		len = OFF_MAX;
 	NFSD_DEBUG(4, "dealloc: off=%jd len=%jd\n", (intmax_t)off,
 	    (intmax_t)len);
 	lop->lo_end = lop->lo_first + len;
 	/*
 	 * Sanity check the offset and length.
 	 * off and len are off_t (signed int64_t) whereas
 	 * lo_first and lo_end are uint64_t and, as such,
 	 * if off >= 0 && len > 0, lo_end cannot overflow
 	 * unless off_t is changed to something other than
 	 * int64_t.  Check lo_end < lo_first in case that
 	 * is someday the case.
 	 * The error to return is not specified by RFC 7862 so I
 	 * made this compatible with the Linux knfsd.
 	 */
 	if (nd->nd_repstat == 0) {
 		if (off < 0 || lop->lo_end > NFSRV_MAXFILESIZE)
 			nd->nd_repstat = NFSERR_FBIG;
 		else if (len == 0 || lop->lo_end < lop->lo_first)
 			nd->nd_repstat = NFSERR_INVAL;
 	}
 
 	if (nd->nd_repstat == 0 && vp->v_type != VREG)
 		nd->nd_repstat = NFSERR_WRONGTYPE;
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
 	forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = forat_ret;
 	if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp)))
 		nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp,
 		    curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED,
 		    NULL);
 	if (nd->nd_repstat == 0 && gotproxystateid == 0)
 		nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
 		    &stateid, exp, nd, curthread);
 
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsvno_deallocate(vp, off, len, nd->nd_cred,
 		    curthread);
 	vput(vp);
 	NFSD_DEBUG(4, "eo deallocate=%d\n", nd->nd_repstat);
 	NFSEXITCODE2(0, nd);
 	return (0);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs copy service
  */
 int
 nfsrvd_copy_file_range(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, vnode_t tovp, struct nfsexstuff *exp, struct nfsexstuff *toexp)
 {
 	uint32_t *tl;
 	struct nfsvattr at;
 	int cnt, error = 0, ret;
 	off_t inoff, outoff;
 	uint64_t len;
 	size_t xfer;
 	struct nfsstate inst, outst, *instp = &inst, *outstp = &outst;
 	struct nfslock inlo, outlo, *inlop = &inlo, *outlop = &outlo;
 	nfsquad_t clientid;
 	nfsv4stateid_t stateid;
 	nfsattrbit_t attrbits;
 	void *rl_rcookie, *rl_wcookie;
 
 	rl_rcookie = rl_wcookie = NULL;
 	if (nfsrv_devidcnt > 0) {
 		/*
 		 * For a pNFS server, reply NFSERR_NOTSUPP so that the client
 		 * will do the copy via I/O on the DS(s).
 		 */
 		nd->nd_repstat = NFSERR_NOTSUPP;
 		goto nfsmout;
 	}
 	if (vp == tovp) {
 		/* Copying a byte range within the same file is not allowed. */
 		nd->nd_repstat = NFSERR_INVAL;
 		goto nfsmout;
 	}
 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_STATEID + 3 * NFSX_HYPER +
 	    3 * NFSX_UNSIGNED);
 	instp->ls_flags = (NFSLCK_CHECK | NFSLCK_READACCESS);
 	inlop->lo_flags = NFSLCK_READ;
 	instp->ls_ownerlen = 0;
 	instp->ls_op = NULL;
 	instp->ls_uid = nd->nd_cred->cr_uid;
 	instp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	clientid.lval[0] = instp->ls_stateid.other[0] = *tl++;
 	clientid.lval[1] = instp->ls_stateid.other[1] = *tl++;
 	if ((nd->nd_flag & ND_IMPLIEDCLID) != 0)
 		clientid.qval = nd->nd_clientid.qval;
 	instp->ls_stateid.other[2] = *tl++;
 	outstp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
 	outlop->lo_flags = NFSLCK_WRITE;
 	outstp->ls_ownerlen = 0;
 	outstp->ls_op = NULL;
 	outstp->ls_uid = nd->nd_cred->cr_uid;
 	outstp->ls_stateid.seqid = fxdr_unsigned(uint32_t, *tl++);
 	outstp->ls_stateid.other[0] = *tl++;
 	outstp->ls_stateid.other[1] = *tl++;
 	outstp->ls_stateid.other[2] = *tl++;
 	inoff = fxdr_hyper(tl); tl += 2;
 	inlop->lo_first = inoff;
 	outoff = fxdr_hyper(tl); tl += 2;
 	outlop->lo_first = outoff;
 	len = fxdr_hyper(tl); tl += 2;
 	if (len == 0) {
 		/* len == 0 means to EOF. */
 		inlop->lo_end = OFF_MAX;
 		outlop->lo_end = OFF_MAX;
 	} else {
 		inlop->lo_end = inlop->lo_first + len;
 		outlop->lo_end = outlop->lo_first + len;
 	}
 
 	/*
 	 * At this time only consecutive, synchronous copy is supported,
 	 * so ca_consecutive and ca_synchronous can be ignored.
 	 */
 	tl += 2;
 
 	cnt = fxdr_unsigned(int, *tl);
 	if ((nd->nd_flag & ND_DSSERVER) != 0 || cnt != 0)
 		nd->nd_repstat = NFSERR_NOTSUPP;
 	if (nd->nd_repstat == 0 && (inoff > OFF_MAX || outoff > OFF_MAX ||
 	    inlop->lo_end > OFF_MAX || outlop->lo_end > OFF_MAX ||
 	    inlop->lo_end < inlop->lo_first || outlop->lo_end <
 	    outlop->lo_first))
 		nd->nd_repstat = NFSERR_INVAL;
 
 	if (nd->nd_repstat == 0 && vp->v_type != VREG)
 		nd->nd_repstat = NFSERR_WRONGTYPE;
 
 	/* Check permissions for the input file. */
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
 	ret = nfsvno_getattr(vp, &at, nd, curthread, 1, &attrbits);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = ret;
 	if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp)))
 		nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp,
 		    curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED,
 		    NULL);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsrv_lockctrl(vp, &instp, &inlop, NULL,
 		    clientid, &stateid, exp, nd, curthread);
 	NFSVOPUNLOCK(vp);
 	if (nd->nd_repstat != 0)
 		goto out;
 
 	error = NFSVOPLOCK(tovp, LK_SHARED);
 	if (error != 0)
 		goto out;
 	if (tovp->v_type != VREG)
 		nd->nd_repstat = NFSERR_WRONGTYPE;
 
 	/* For the output file, we only need the Owner attribute. */
 	ret = nfsvno_getattr(tovp, &at, nd, curthread, 1, &attrbits);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = ret;
 	if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp)))
 		nd->nd_repstat = nfsvno_accchk(tovp, VWRITE, nd->nd_cred, toexp,
 		    curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED,
 		    NULL);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsrv_lockctrl(tovp, &outstp, &outlop, NULL,
 		    clientid, &stateid, toexp, nd, curthread);
 	NFSVOPUNLOCK(tovp);
 
 	/* Range lock the byte ranges for both invp and outvp. */
 	if (nd->nd_repstat == 0) {
 		for (;;) {
 			if (len == 0) {
 				rl_wcookie = vn_rangelock_wlock(tovp, outoff,
 				    OFF_MAX);
 				rl_rcookie = vn_rangelock_tryrlock(vp, inoff,
 				    OFF_MAX);
 			} else {
 				rl_wcookie = vn_rangelock_wlock(tovp, outoff,
 				    outoff + len);
 				rl_rcookie = vn_rangelock_tryrlock(vp, inoff,
 				    inoff + len);
 			}
 			if (rl_rcookie != NULL)
 				break;
 			vn_rangelock_unlock(tovp, rl_wcookie);
 			if (len == 0)
 				rl_rcookie = vn_rangelock_rlock(vp, inoff,
 				    OFF_MAX);
 			else
 				rl_rcookie = vn_rangelock_rlock(vp, inoff,
 				    inoff + len);
 			vn_rangelock_unlock(vp, rl_rcookie);
 		}
 
 		error = NFSVOPLOCK(vp, LK_SHARED);
 		if (error == 0) {
 			ret = nfsvno_getattr(vp, &at, nd, curthread, 1, NULL);
 			if (ret == 0) {
 				/*
 				 * Since invp is range locked, na_size should
 				 * not change.
 				 */
 				if (len == 0 && at.na_size > inoff) {
 					/*
 					 * If len == 0, set it based on invp's 
 					 * size. If offset is past EOF, just
 					 * leave len == 0.
 					 */
 					len = at.na_size - inoff;
 				} else if (nfsrv_linux42server == 0 &&
 				    inoff + len > at.na_size) {
 					/*
 					 * RFC-7862 says that NFSERR_INVAL must
 					 * be returned when inoff + len exceeds
 					 * the file size, however the NFSv4.2
 					 * Linux client likes to do this, so
 					 * only check if nfsrv_linux42server
 					 * is not set.
 					 */
 					nd->nd_repstat = NFSERR_INVAL;
 				}
 			}
 			NFSVOPUNLOCK(vp);
 			if (ret != 0 && nd->nd_repstat == 0)
 				nd->nd_repstat = ret;
 		} else if (nd->nd_repstat == 0)
 			nd->nd_repstat = error;
 	}
 
 	xfer = len;
 	if (nd->nd_repstat == 0) {
 		nd->nd_repstat = vn_copy_file_range(vp, &inoff, tovp, &outoff,
 		    &xfer, COPY_FILE_RANGE_TIMEO1SEC, nd->nd_cred, nd->nd_cred,
 		    NULL);
 		if (nd->nd_repstat == 0)
 			len = xfer;
 	}
 
 	/* Unlock the ranges. */
 	if (rl_rcookie != NULL)
 		vn_rangelock_unlock(vp, rl_rcookie);
 	if (rl_wcookie != NULL)
 		vn_rangelock_unlock(tovp, rl_wcookie);
 
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED + NFSX_HYPER +
 		    NFSX_VERF);
 		*tl++ = txdr_unsigned(0);	/* No callback ids. */
 		txdr_hyper(len, tl); tl += 2;
 		*tl++ = txdr_unsigned(NFSWRITE_UNSTABLE);
 		*tl++ = txdr_unsigned(nfsboottime.tv_sec);
 		*tl++ = txdr_unsigned(nfsboottime.tv_usec);
 		*tl++ = newnfs_true;
 		*tl = newnfs_true;
 	}
 out:
 	vrele(vp);
 	vrele(tovp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 nfsmout:
 	vput(vp);
 	vrele(tovp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs seek service
  */
 int
 nfsrvd_seek(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	struct nfsvattr at;
 	int content, error = 0;
 	off_t off;
 	u_long cmd;
 	nfsattrbit_t attrbits;
 	bool eof;
 
 	NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + NFSX_HYPER + NFSX_UNSIGNED);
 	/* Ignore the stateid for now. */
 	tl += (NFSX_STATEID / NFSX_UNSIGNED);
 	off = fxdr_hyper(tl); tl += 2;
 	content = fxdr_unsigned(int, *tl);
 	if (content == NFSV4CONTENT_DATA)
 		cmd = FIOSEEKDATA;
 	else if (content == NFSV4CONTENT_HOLE)
 		cmd = FIOSEEKHOLE;
 	else
 		nd->nd_repstat = NFSERR_BADXDR;
 	if (nd->nd_repstat == 0 && vp->v_type == VDIR)
 		nd->nd_repstat = NFSERR_ISDIR;
 	if (nd->nd_repstat == 0 && vp->v_type != VREG)
 		nd->nd_repstat = NFSERR_WRONGTYPE;
 	if (nd->nd_repstat == 0 && off < 0)
 		nd->nd_repstat = NFSERR_NXIO;
 	if (nd->nd_repstat == 0) {
 		/* Check permissions for the input file. */
 		NFSZERO_ATTRBIT(&attrbits);
 		NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER);
 		nd->nd_repstat = nfsvno_getattr(vp, &at, nd, curthread, 1,
 		    &attrbits);
 	}
 	if (nd->nd_repstat == 0 && (at.na_uid != nd->nd_cred->cr_uid ||
 	     NFSVNO_EXSTRICTACCESS(exp)))
 		nd->nd_repstat = nfsvno_accchk(vp, VREAD, nd->nd_cred, exp,
 		    curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED,
 		    NULL);
 	if (nd->nd_repstat != 0)
 		goto nfsmout;
 
 	/* nfsvno_seek() unlocks and vrele()s the vp. */
 	nd->nd_repstat = nfsvno_seek(nd, vp, cmd, &off, content, &eof,
 	    nd->nd_cred, curthread);
 	if (nd->nd_repstat == 0 && eof && content == NFSV4CONTENT_DATA &&
 	    nfsrv_linux42server != 0)
 		nd->nd_repstat = NFSERR_NXIO;
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED + NFSX_HYPER);
 		if (eof)
 			*tl++ = newnfs_true;
 		else
 			*tl++ = newnfs_false;
 		txdr_hyper(off, tl);
 	}
 	NFSEXITCODE2(error, nd);
 	return (error);
 nfsmout:
 	vput(vp);
 	NFSEXITCODE2(error, nd);
 	return (error);
 }
 
 /*
  * nfs get extended attribute service
  */
 int
 nfsrvd_getxattr(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	struct mbuf *mp = NULL, *mpend = NULL;
 	int error, len;
 	char *name;
 	struct thread *p = curthread;
 	uint16_t off;
 
 	error = 0;
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	len = fxdr_unsigned(int, *tl);
 	if (len <= 0) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	if (len > EXTATTR_MAXNAMELEN) {
 		nd->nd_repstat = NFSERR_NOXATTR;
 		goto nfsmout;
 	}
 	name = malloc(len + 1, M_TEMP, M_WAITOK);
 	nd->nd_repstat = nfsrv_mtostr(nd, name, len);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsvno_getxattr(vp, name,
 		    nd->nd_maxresp, nd->nd_cred, nd->nd_flag,
 		    nd->nd_maxextsiz, p, &mp, &mpend, &len);
 	if (nd->nd_repstat == ENOATTR)
 		nd->nd_repstat = NFSERR_NOXATTR;
 	else if (nd->nd_repstat == EOPNOTSUPP)
 		nd->nd_repstat = NFSERR_NOTSUPP;
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 		*tl = txdr_unsigned(len);
 		if (len > 0) {
 			nd->nd_mb->m_next = mp;
 			nd->nd_mb = mpend;
 			if ((mpend->m_flags & M_EXTPG) != 0) {
 				nd->nd_flag |= ND_EXTPG;
 				nd->nd_bextpg = mpend->m_epg_npgs - 1;
 				nd->nd_bpos = (char *)(void *)
 				   PHYS_TO_DMAP(mpend->m_epg_pa[nd->nd_bextpg]);
 				off = (nd->nd_bextpg == 0) ?
 				    mpend->m_epg_1st_off : 0;
 				nd->nd_bpos += off + mpend->m_epg_last_len;
 				nd->nd_bextpgsiz = PAGE_SIZE -
 				    mpend->m_epg_last_len - off;
 			} else
 				nd->nd_bpos = mtod(mpend, char *) +
 				    mpend->m_len;
 		}
 	}
 	free(name, M_TEMP);
 
 nfsmout:
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = error;
 	vput(vp);
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs set extended attribute service
  */
 int
 nfsrvd_setxattr(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	struct nfsvattr ova, nva;
 	nfsattrbit_t attrbits;
 	int error, len, opt;
 	char *name;
 	size_t siz;
 	struct thread *p = curthread;
 
 	error = 0;
 	name = NULL;
 	NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
 	opt = fxdr_unsigned(int, *tl++);
 	len = fxdr_unsigned(int, *tl);
 	if (len <= 0) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	if (len > EXTATTR_MAXNAMELEN) {
 		nd->nd_repstat = NFSERR_NOXATTR;
 		goto nfsmout;
 	}
 	name = malloc(len + 1, M_TEMP, M_WAITOK);
 	error = nfsrv_mtostr(nd, name, len);
 	if (error != 0)
 		goto nfsmout;
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	len = fxdr_unsigned(int, *tl);
 	if (len < 0 || len > IOSIZE_MAX) {
 		nd->nd_repstat = NFSERR_XATTR2BIG;
 		goto nfsmout;
 	}
 	switch (opt) {
 	case NFSV4SXATTR_CREATE:
 		error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
 		    &siz, nd->nd_cred, p);
 		if (error != ENOATTR)
 			nd->nd_repstat = NFSERR_EXIST;
 		error = 0;
 		break;
 	case NFSV4SXATTR_REPLACE:
 		error = VOP_GETEXTATTR(vp, EXTATTR_NAMESPACE_USER, name, NULL,
 		    &siz, nd->nd_cred, p);
 		if (error != 0)
 			nd->nd_repstat = NFSERR_NOXATTR;
 		break;
 	case NFSV4SXATTR_EITHER:
 		break;
 	default:
 		nd->nd_repstat = NFSERR_BADXDR;
 	}
 	if (nd->nd_repstat != 0)
 		goto nfsmout;
 
 	/* Now, do the Set Extended attribute, with Change before and after. */
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits);
 	if (nd->nd_repstat == 0) {
 		nd->nd_repstat = nfsvno_setxattr(vp, name, len, nd->nd_md,
 		    nd->nd_dpos, nd->nd_cred, p);
 		if (nd->nd_repstat == ENXIO)
 			nd->nd_repstat = NFSERR_XATTR2BIG;
 	}
 	if (nd->nd_repstat == 0 && len > 0)
 		nd->nd_repstat = nfsm_advance(nd, NFSM_RNDUP(len), -1);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits);
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
 		*tl++ = newnfs_true;
 		txdr_hyper(ova.na_filerev, tl); tl += 2;
 		txdr_hyper(nva.na_filerev, tl);
 	}
 
 nfsmout:
 	free(name, M_TEMP);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = error;
 	vput(vp);
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs remove extended attribute service
  */
 int
 nfsrvd_rmxattr(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t *tl;
 	struct nfsvattr ova, nva;
 	nfsattrbit_t attrbits;
 	int error, len;
 	char *name;
 	struct thread *p = curthread;
 
 	error = 0;
 	name = NULL;
 	NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
 	len = fxdr_unsigned(int, *tl);
 	if (len <= 0) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	if (len > EXTATTR_MAXNAMELEN) {
 		nd->nd_repstat = NFSERR_NOXATTR;
 		goto nfsmout;
 	}
 	name = malloc(len + 1, M_TEMP, M_WAITOK);
 	error = nfsrv_mtostr(nd, name, len);
 	if (error != 0)
 		goto nfsmout;
 
 	if ((nd->nd_flag & ND_IMPLIEDCLID) == 0) {
 		printf("EEK! nfsrvd_rmxattr: no implied clientid\n");
 		error = NFSERR_NOXATTR;
 		goto nfsmout;
 	}
 	/*
 	 * Now, do the Remove Extended attribute, with Change before and
 	 * after.
 	*/
 	NFSZERO_ATTRBIT(&attrbits);
 	NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE);
 	nd->nd_repstat = nfsvno_getattr(vp, &ova, nd, p, 1, &attrbits);
 	if (nd->nd_repstat == 0) {
 		nd->nd_repstat = nfsvno_rmxattr(nd, vp, name, nd->nd_cred, p);
 		if (nd->nd_repstat == ENOATTR)
 			nd->nd_repstat = NFSERR_NOXATTR;
 	}
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = nfsvno_getattr(vp, &nva, nd, p, 1, &attrbits);
 	if (nd->nd_repstat == 0) {
 		NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED);
 		*tl++ = newnfs_true;
 		txdr_hyper(ova.na_filerev, tl); tl += 2;
 		txdr_hyper(nva.na_filerev, tl);
 	}
 
 nfsmout:
 	free(name, M_TEMP);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = error;
 	vput(vp);
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfs list extended attribute service
  */
 int
 nfsrvd_listxattr(struct nfsrv_descript *nd, __unused int isdgram,
     vnode_t vp, __unused struct nfsexstuff *exp)
 {
 	uint32_t cnt, *tl, len, len2, i, pos, retlen;
 	int error;
 	uint64_t cookie, cookie2;
 	u_char *buf;
 	bool eof;
 	struct thread *p = curthread;
 
 	error = 0;
 	buf = NULL;
 	NFSM_DISSECT(tl, uint32_t *, NFSX_HYPER + NFSX_UNSIGNED);
 	/*
 	 * The cookie doesn't need to be in net byte order, but FreeBSD
 	 * does so to make it more readable in packet traces.
 	 */
 	cookie = fxdr_hyper(tl); tl += 2;
 	len = fxdr_unsigned(uint32_t, *tl);
 	if (len == 0 || cookie >= IOSIZE_MAX) {
 		nd->nd_repstat = NFSERR_BADXDR;
 		goto nfsmout;
 	}
 	if (len > nd->nd_maxresp - NFS_MAXXDR)
 		len = nd->nd_maxresp - NFS_MAXXDR;
 	len2 = len;
 	nd->nd_repstat = nfsvno_listxattr(vp, cookie, nd->nd_cred, p, &buf,
 	    &len, &eof);
 	if (nd->nd_repstat == EOPNOTSUPP)
 		nd->nd_repstat = NFSERR_NOTSUPP;
 	if (nd->nd_repstat == 0) {
 		cookie2 = cookie + len;
 		if (cookie2 < cookie)
 			nd->nd_repstat = NFSERR_BADXDR;
 	}
 	retlen = NFSX_HYPER + 2 * NFSX_UNSIGNED;
 	if (nd->nd_repstat == 0 && len2 < retlen)
 		nd->nd_repstat = NFSERR_TOOSMALL;
 	if (nd->nd_repstat == 0) {
 		/* Now copy the entries out. */
 		if (len == 0) {
 			/* The cookie was at eof. */
 			NFSM_BUILD(tl, uint32_t *, NFSX_HYPER + 2 *
 			    NFSX_UNSIGNED);
 			txdr_hyper(cookie2, tl); tl += 2;
 			*tl++ = txdr_unsigned(0);
 			*tl = newnfs_true;
 			goto nfsmout;
 		}
 
 		/* Sanity check the cookie. */
 		for (pos = 0; pos < len; pos += (i + 1)) {
 			if (pos == cookie)
 				break;
 			i = buf[pos];
 		}
 		if (pos != cookie) {
 			nd->nd_repstat = NFSERR_INVAL;
 			goto nfsmout;
 		}
 
 		/* Loop around copying the entrie(s) out. */
 		cnt = 0;
 		len -= cookie;
 		i = buf[pos];
 		while (i < len && len2 >= retlen + NFSM_RNDUP(i) +
 		    NFSX_UNSIGNED) {
 			if (cnt == 0) {
 				NFSM_BUILD(tl, uint32_t *, NFSX_HYPER +
 				    NFSX_UNSIGNED);
 				txdr_hyper(cookie2, tl); tl += 2;
 			}
 			retlen += nfsm_strtom(nd, &buf[pos + 1], i);
 			len -= (i + 1);
 			pos += (i + 1);
 			i = buf[pos];
 			cnt++;
 		}
 		/*
 		 * eof is set true/false by nfsvno_listxattr(), but if we
 		 * can't copy all entries returned by nfsvno_listxattr(),
 		 * we are not at eof.
 		 */
 		if (len > 0)
 			eof = false;
 		if (cnt > 0) {
 			/* *tl is set above. */
 			*tl = txdr_unsigned(cnt);
 			NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED);
 			if (eof)
 				*tl = newnfs_true;
 			else
 				*tl = newnfs_false;
 		} else
 			nd->nd_repstat = NFSERR_TOOSMALL;
 	}
 
 nfsmout:
 	free(buf, M_TEMP);
 	if (nd->nd_repstat == 0)
 		nd->nd_repstat = error;
 	vput(vp);
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
 
 /*
  * nfsv4 service not supported
  */
 int
 nfsrvd_notsupp(struct nfsrv_descript *nd, __unused int isdgram,
     __unused vnode_t vp, __unused struct nfsexstuff *exp)
 {
 
 	nd->nd_repstat = NFSERR_NOTSUPP;
 	NFSEXITCODE2(0, nd);
 	return (0);
 }
diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c
index 8df399b392f9..044745111543 100644
--- a/sys/fs/smbfs/smbfs_vnops.c
+++ b/sys/fs/smbfs/smbfs_vnops.c
@@ -1,1390 +1,1385 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000-2001 Boris Popov
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/fcntl.h>
 #include <sys/mount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/limits.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <netsmb/smb.h>
 #include <netsmb/smb_conn.h>
 #include <netsmb/smb_subr.h>
 
 #include <fs/smbfs/smbfs.h>
 #include <fs/smbfs/smbfs_node.h>
 #include <fs/smbfs/smbfs_subr.h>
 
 /*
  * Prototypes for SMBFS vnode operations
  */
 static vop_create_t	smbfs_create;
 static vop_mknod_t	smbfs_mknod;
 static vop_open_t	smbfs_open;
 static vop_close_t	smbfs_close;
 static vop_access_t	smbfs_access;
 static vop_getattr_t	smbfs_getattr;
 static vop_setattr_t	smbfs_setattr;
 static vop_read_t	smbfs_read;
 static vop_write_t	smbfs_write;
 static vop_fsync_t	smbfs_fsync;
 static vop_remove_t	smbfs_remove;
 static vop_link_t	smbfs_link;
 static vop_lookup_t	smbfs_lookup;
 static vop_rename_t	smbfs_rename;
 static vop_mkdir_t	smbfs_mkdir;
 static vop_rmdir_t	smbfs_rmdir;
 static vop_symlink_t	smbfs_symlink;
 static vop_readdir_t	smbfs_readdir;
 static vop_strategy_t	smbfs_strategy;
 static vop_print_t	smbfs_print;
 static vop_pathconf_t	smbfs_pathconf;
 static vop_advlock_t	smbfs_advlock;
 static vop_getextattr_t	smbfs_getextattr;
 
 struct vop_vector smbfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		smbfs_access,
 	.vop_advlock =		smbfs_advlock,
 	.vop_close =		smbfs_close,
 	.vop_create =		smbfs_create,
 	.vop_fsync =		smbfs_fsync,
 	.vop_getattr =		smbfs_getattr,
 	.vop_getextattr = 	smbfs_getextattr,
 	.vop_getpages =		smbfs_getpages,
 	.vop_inactive =		smbfs_inactive,
 	.vop_ioctl =		smbfs_ioctl,
 	.vop_link =		smbfs_link,
 	.vop_lookup =		smbfs_lookup,
 	.vop_mkdir =		smbfs_mkdir,
 	.vop_mknod =		smbfs_mknod,
 	.vop_open =		smbfs_open,
 	.vop_pathconf =		smbfs_pathconf,
 	.vop_print =		smbfs_print,
 	.vop_putpages =		smbfs_putpages,
 	.vop_read =		smbfs_read,
 	.vop_readdir =		smbfs_readdir,
 	.vop_reclaim =		smbfs_reclaim,
 	.vop_remove =		smbfs_remove,
 	.vop_rename =		smbfs_rename,
 	.vop_rmdir =		smbfs_rmdir,
 	.vop_setattr =		smbfs_setattr,
 /*	.vop_setextattr =	smbfs_setextattr,*/
 	.vop_strategy =		smbfs_strategy,
 	.vop_symlink =		smbfs_symlink,
 	.vop_write =		smbfs_write,
 };
 VFS_VOP_VECTOR_REGISTER(smbfs_vnodeops);
 
 static int
 smbfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		accmode_t a_accmode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	accmode_t accmode = ap->a_accmode;
 	mode_t mpmode;
 	struct smbmount *smp = VTOSMBFS(vp);
 
 	SMBVDEBUG("\n");
 	if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		    case VREG: case VDIR: case VLNK:
 			return EROFS;
 		    default:
 			break;
 		}
 	}
 	mpmode = vp->v_type == VREG ? smp->sm_file_mode : smp->sm_dir_mode;
 	return (vaccess(vp->v_type, mpmode, smp->sm_uid,
 	    smp->sm_gid, ap->a_accmode, ap->a_cred));
 }
 
 /* ARGSUSED */
 static int
 smbfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 	struct smb_cred *scred;
 	struct vattr vattr;
 	int mode = ap->a_mode;
 	int error, accmode;
 
 	SMBVDEBUG("%s,%d\n", np->n_name, (np->n_flag & NOPEN) != 0);
 	if (vp->v_type != VREG && vp->v_type != VDIR) { 
 		SMBFSERR("open eacces vtype=%d\n", vp->v_type);
 		return EACCES;
 	}
 	if (vp->v_type == VDIR) {
 		np->n_flag |= NOPEN;
 		return 0;
 	}
 	if (np->n_flag & NMODIFIED) {
 		if ((error = smbfs_vinvalbuf(vp, ap->a_td)) == EINTR)
 			return error;
 		smbfs_attr_cacheremove(vp);
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error)
 			return error;
 		np->n_mtime.tv_sec = vattr.va_mtime.tv_sec;
 	} else {
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error)
 			return error;
 		if (np->n_mtime.tv_sec != vattr.va_mtime.tv_sec) {
 			error = smbfs_vinvalbuf(vp, ap->a_td);
 			if (error == EINTR)
 				return error;
 			np->n_mtime.tv_sec = vattr.va_mtime.tv_sec;
 		}
 	}
 	if ((np->n_flag & NOPEN) != 0)
 		return 0;
 	/*
 	 * Use DENYNONE to give unixy semantics of permitting
 	 * everything not forbidden by permissions.  Ie denial
 	 * is up to server with clients/openers needing to use
 	 * advisory locks for further control.
 	 */
 	accmode = SMB_SM_DENYNONE|SMB_AM_OPENREAD;
 	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 		accmode = SMB_SM_DENYNONE|SMB_AM_OPENRW;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, ap->a_td, ap->a_cred);
 	error = smbfs_smb_open(np, accmode, scred);
 	if (error) {
 		if (mode & FWRITE)
 			return EACCES;
 		else if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			accmode = SMB_SM_DENYNONE|SMB_AM_OPENREAD;
 			error = smbfs_smb_open(np, accmode, scred);
 		}
 	}
 	if (error == 0) {
 		np->n_flag |= NOPEN;
 		vnode_create_vobject(ap->a_vp, vattr.va_size, ap->a_td);
 	}
 	smbfs_attr_cacheremove(vp);
 	smbfs_free_scred(scred);
 	return error;
 }
 
 static int
 smbfs_close(ap)
 	struct vop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	struct smbnode *np = VTOSMB(vp);
 	struct smb_cred *scred;
 
 	if (vp->v_type == VDIR && (np->n_flag & NOPEN) != 0 &&
 	    np->n_dirseq != NULL) {
 		scred = smbfs_malloc_scred();
 		smb_makescred(scred, td, ap->a_cred);
 		smbfs_findclose(np->n_dirseq, scred);
 		smbfs_free_scred(scred);
 		np->n_dirseq = NULL;
 	}
 	return 0;
 }
 
 /*
  * smbfs_getattr call from vfs.
  */
 static int
 smbfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 	struct vattr *va=ap->a_vap;
 	struct smbfattr fattr;
 	struct smb_cred *scred;
 	u_quad_t oldsize;
 	int error;
 
 	SMBVDEBUG("%lx: '%s' %d\n", (long)vp, np->n_name, (vp->v_vflag & VV_ROOT) != 0);
 	error = smbfs_attr_cachelookup(vp, va);
 	if (!error)
 		return 0;
 	SMBVDEBUG("not in the cache\n");
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, curthread, ap->a_cred);
 	oldsize = np->n_size;
 	error = smbfs_smb_lookup(np, NULL, 0, &fattr, scred);
 	if (error) {
 		SMBVDEBUG("error %d\n", error);
 		smbfs_free_scred(scred);
 		return error;
 	}
 	smbfs_attr_cacheenter(vp, &fattr);
 	smbfs_attr_cachelookup(vp, va);
 	if (np->n_flag & NOPEN)
 		np->n_size = oldsize;
 	smbfs_free_scred(scred);
 	return 0;
 }
 
 static int
 smbfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 	struct vattr *vap = ap->a_vap;
 	struct timespec *mtime, *atime;
 	struct smb_cred *scred;
 	struct smb_share *ssp = np->n_mount->sm_share;
 	struct smb_vc *vcp = SSTOVC(ssp);
 	struct thread *td = curthread;
 	u_quad_t tsize = 0;
 	int isreadonly, doclose, error = 0;
 	int old_n_dosattr;
 
 	SMBVDEBUG("\n");
 	isreadonly = (vp->v_mount->mnt_flag & MNT_RDONLY);
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || 
 	     vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL ||
 	     vap->va_mode != (mode_t)VNOVAL || vap->va_flags != VNOVAL) &&
 	     isreadonly)
 		return EROFS;
 
 	/*
 	 * We only support setting four flags.  Don't allow setting others.
 	 *
 	 * We map UF_READONLY to SMB_FA_RDONLY, unlike the MacOS X version
 	 * of this code, which maps both UF_IMMUTABLE AND SF_IMMUTABLE to
 	 * SMB_FA_RDONLY.  The immutable flags have different semantics
 	 * than readonly, which is the reason for the difference.
 	 */
 	if (vap->va_flags != VNOVAL) {
 		if (vap->va_flags & ~(UF_HIDDEN|UF_SYSTEM|UF_ARCHIVE|
 				      UF_READONLY))
 			return EINVAL;
 	}
 
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, td, ap->a_cred);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		    case VDIR:
  			error = EISDIR;
 			goto out;
  		    case VREG:
 			break;
  		    default:
 			error = EINVAL;
 			goto out;
   		}
 		if (isreadonly) {
 			error = EROFS;
 			goto out;
 		}
 		doclose = 0;
 		vnode_pager_setsize(vp, (u_long)vap->va_size);
  		tsize = np->n_size;
  		np->n_size = vap->va_size;
 		if ((np->n_flag & NOPEN) == 0) {
 			error = smbfs_smb_open(np,
 					       SMB_SM_DENYNONE|SMB_AM_OPENRW,
 					       scred);
 			if (error == 0)
 				doclose = 1;
 		}
 		if (error == 0)
 			error = smbfs_smb_setfsize(np,
 			    (int64_t)vap->va_size, scred);
 		if (doclose)
 			smbfs_smb_close(ssp, np->n_fid, NULL, scred);
 		if (error) {
 			np->n_size = tsize;
 			vnode_pager_setsize(vp, (u_long)tsize);
 			goto out;
 		}
   	}
 	if ((vap->va_flags != VNOVAL) || (vap->va_mode != (mode_t)VNOVAL)) {
 		old_n_dosattr = np->n_dosattr;
 
 		if (vap->va_mode != (mode_t)VNOVAL) {
 			if (vap->va_mode & S_IWUSR)
 				np->n_dosattr &= ~SMB_FA_RDONLY;
 			else
 				np->n_dosattr |= SMB_FA_RDONLY;
 		}
 
 		if (vap->va_flags != VNOVAL) {
 			if (vap->va_flags & UF_HIDDEN)
 				np->n_dosattr |= SMB_FA_HIDDEN;
 			else
 				np->n_dosattr &= ~SMB_FA_HIDDEN;
 
 			if (vap->va_flags & UF_SYSTEM)
 				np->n_dosattr |= SMB_FA_SYSTEM;
 			else
 				np->n_dosattr &= ~SMB_FA_SYSTEM;
 
 			if (vap->va_flags & UF_ARCHIVE)
 				np->n_dosattr |= SMB_FA_ARCHIVE;
 			else
 				np->n_dosattr &= ~SMB_FA_ARCHIVE;
 
 			/*
 			 * We only support setting the immutable / readonly
 			 * bit for regular files.  According to comments in
 			 * the MacOS X version of this code, supporting the
 			 * readonly bit on directories doesn't do the same
 			 * thing in Windows as in Unix.
 			 */
 			if (vp->v_type == VREG) {
 				if (vap->va_flags & UF_READONLY)
 					np->n_dosattr |= SMB_FA_RDONLY;
 				else
 					np->n_dosattr &= ~SMB_FA_RDONLY;
 			}
 		}
 
 		if (np->n_dosattr != old_n_dosattr) {
 			error = smbfs_smb_setpattr(np, np->n_dosattr, NULL, scred);
 			if (error)
 				goto out;
 		}
 	}
 	mtime = atime = NULL;
 	if (vap->va_mtime.tv_sec != VNOVAL)
 		mtime = &vap->va_mtime;
 	if (vap->va_atime.tv_sec != VNOVAL)
 		atime = &vap->va_atime;
 	if (mtime != atime) {
 		if (vap->va_vaflags & VA_UTIMES_NULL) {
 			error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td);
 			if (error)
 				error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td);
 		} else
 			error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td);
 #if 0
 		if (mtime == NULL)
 			mtime = &np->n_mtime;
 		if (atime == NULL)
 			atime = &np->n_atime;
 #endif
 		/*
 		 * If file is opened, then we can use handle based calls.
 		 * If not, use path based ones.
 		 */
 		if ((np->n_flag & NOPEN) == 0) {
 			if (vcp->vc_flags & SMBV_WIN95) {
 				error = VOP_OPEN(vp, FWRITE, ap->a_cred, td,
 				    NULL);
 				if (!error) {
 /*					error = smbfs_smb_setfattrNT(np, 0,
 					    mtime, atime, scred);
 					VOP_GETATTR(vp, &vattr, ap->a_cred); */
 					if (mtime)
 						np->n_mtime = *mtime;
 					VOP_CLOSE(vp, FWRITE, ap->a_cred, td);
 				}
 			} else if ((vcp->vc_sopt.sv_caps & SMB_CAP_NT_SMBS)) {
 				error = smbfs_smb_setptime2(np, mtime, atime, 0, scred);
 /*				error = smbfs_smb_setpattrNT(np, 0, mtime, atime, scred);*/
 			} else if (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) {
 				error = smbfs_smb_setptime2(np, mtime, atime, 0, scred);
 			} else {
 				error = smbfs_smb_setpattr(np, 0, mtime, scred);
 			}
 		} else {
 			if (vcp->vc_sopt.sv_caps & SMB_CAP_NT_SMBS) {
 				error = smbfs_smb_setfattrNT(np, 0, mtime, atime, scred);
 			} else if (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN1_0) {
 				error = smbfs_smb_setftime(np, mtime, atime, scred);
 			} else {
 				/*
 				 * I have no idea how to handle this for core
 				 * level servers. The possible solution is to
 				 * update mtime after file is closed.
 				 */
 				 SMBERROR("can't update times on an opened file\n");
 			}
 		}
 	}
 	/*
 	 * Invalidate attribute cache in case if server doesn't set
 	 * required attributes.
 	 */
 	smbfs_attr_cacheremove(vp);	/* invalidate cache */
 	VOP_GETATTR(vp, vap, ap->a_cred);
 	np->n_mtime.tv_sec = vap->va_mtime.tv_sec;
 out:
 	smbfs_free_scred(scred);
 	return error;
 }
 /*
  * smbfs_read call.
  */
 static int
 smbfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 
 	SMBVDEBUG("\n");
 	if (vp->v_type != VREG && vp->v_type != VDIR)
 		return EPERM;
 	return smbfs_readvnode(vp, uio, ap->a_cred);
 }
 
 static int
 smbfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 
 	SMBVDEBUG("%d,ofs=%jd,sz=%zd\n",vp->v_type, (intmax_t)uio->uio_offset, 
 	    uio->uio_resid);
 	if (vp->v_type != VREG)
 		return (EPERM);
 	return smbfs_writevnode(vp, uio, ap->a_cred,ap->a_ioflag);
 }
 /*
  * smbfs_create call
  * Create a regular file. On entry the directory to contain the file being
  * created is locked.  We must release before we return. We must also free
  * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or
  * only if the SAVESTART bit in cn_flags is clear on success.
  */
 static int
 smbfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp=ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct smbnode *dnp = VTOSMB(dvp);
 	struct vnode *vp;
 	struct vattr vattr;
 	struct smbfattr fattr;
 	struct smb_cred *scred;
 	char *name = cnp->cn_nameptr;
 	int nmlen = cnp->cn_namelen;
 	int error;
 
 	SMBVDEBUG("\n");
 	*vpp = NULL;
 	if (vap->va_type != VREG)
 		return EOPNOTSUPP;
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 		return error;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, curthread, cnp->cn_cred);
 
 	error = smbfs_smb_create(dnp, name, nmlen, scred);
 	if (error)
 		goto out;
 	error = smbfs_smb_lookup(dnp, name, nmlen, &fattr, scred);
 	if (error)
 		goto out;
 	error = smbfs_nget(VTOVFS(dvp), dvp, name, nmlen, &fattr, &vp);
 	if (error)
 		goto out;
 	*vpp = vp;
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(dvp, vp, cnp);
 out:
 	smbfs_free_scred(scred);
 	return error;
 }
 
 static int
 smbfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_dvp;
 		struct vnode * a_vp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 /*	struct vnode *dvp = ap->a_dvp;*/
 	struct componentname *cnp = ap->a_cnp;
 	struct smbnode *np = VTOSMB(vp);
 	struct smb_cred *scred;
 	int error;
 
 	if (vp->v_type == VDIR || (np->n_flag & NOPEN) != 0 || vrefcnt(vp) != 1)
 		return EPERM;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, curthread, cnp->cn_cred);
 	error = smbfs_smb_delete(np, scred);
 	if (error == 0)
 		np->n_flag |= NGONE;
 	cache_purge(vp);
 	smbfs_free_scred(scred);
 	return error;
 }
 
 /*
  * smbfs_file rename call
  */
 static int
 smbfs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 /*	struct componentname *fcnp = ap->a_fcnp;*/
 	struct smb_cred *scred;
 #ifdef notnow
 	u_int16_t flags = 6;
 #endif
 	int error=0;
 
 	scred = NULL;
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	if (tvp && vrefcnt(tvp) > 1) {
 		error = EBUSY;
 		goto out;
 	}
 #ifdef notnow
 	flags = 0x10;			/* verify all writes */
 #endif
 	if (fvp->v_type == VDIR) {
 #ifdef notnow
 		flags |= 2;
 #endif
 	} else if (fvp->v_type == VREG) {
 #ifdef notnow
 		flags |= 1;
 #endif
 	} else {
 		return EINVAL;
 	}
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, curthread, tcnp->cn_cred);
 	/*
 	 * It seems that Samba doesn't implement SMB_COM_MOVE call...
 	 */
 #ifdef notnow
 	if (SMB_DIALECT(SSTOCN(smp->sm_share)) >= SMB_DIALECT_LANMAN1_0) {
 		error = smbfs_smb_move(VTOSMB(fvp), VTOSMB(tdvp),
 		    tcnp->cn_nameptr, tcnp->cn_namelen, flags, scred);
 	} else
 #endif
 	{
 		/*
 		 * We have to do the work atomicaly
 		 */
 		if (tvp && tvp != fvp) {
 			error = smbfs_smb_delete(VTOSMB(tvp), scred);
 			if (error)
 				goto out_cacherem;
 			VTOSMB(fvp)->n_flag |= NGONE;
 		}
 		error = smbfs_smb_rename(VTOSMB(fvp), VTOSMB(tdvp),
 		    tcnp->cn_nameptr, tcnp->cn_namelen, scred);
 	}
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out_cacherem:
 	smbfs_attr_cacheremove(fdvp);
 	smbfs_attr_cacheremove(tdvp);
 out:
 	smbfs_free_scred(scred);
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 #ifdef possible_mistake
 	vgone(fvp);
 	if (tvp)
 		vgone(tvp);
 #endif
 	return error;
 }
 
 /*
  * somtime it will come true...
  */
 static int
 smbfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	return EOPNOTSUPP;
 }
 
 /*
  * smbfs_symlink link create call.
  * Sometime it will be functional...
  */
 static int
 smbfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	return EOPNOTSUPP;
 }
 
 static int
 smbfs_mknod(ap) 
 	struct vop_mknod_args /* {
 	} */ *ap;
 {
 	return EOPNOTSUPP;
 }
 
 static int
 smbfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 /*	struct vattr *vap = ap->a_vap;*/
 	struct vnode *vp;
 	struct componentname *cnp = ap->a_cnp;
 	struct smbnode *dnp = VTOSMB(dvp);
 	struct vattr vattr;
 	struct smb_cred *scred;
 	struct smbfattr fattr;
 	char *name = cnp->cn_nameptr;
 	int len = cnp->cn_namelen;
 	int error;
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) {
 		return error;
 	}	
 	if ((name[0] == '.') && ((len == 1) || ((len == 2) && (name[1] == '.'))))
 		return EEXIST;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, curthread, cnp->cn_cred);
 	error = smbfs_smb_mkdir(dnp, name, len, scred);
 	if (error)
 		goto out;
 	error = smbfs_smb_lookup(dnp, name, len, &fattr, scred);
 	if (error)
 		goto out;
 	error = smbfs_nget(VTOVFS(dvp), dvp, name, len, &fattr, &vp);
 	if (error)
 		goto out;
 	*ap->a_vpp = vp;
 out:
 	smbfs_free_scred(scred);
 	return error;
 }
 
 /*
  * smbfs_remove directory call
  */
 static int
 smbfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 /*	struct smbmount *smp = VTOSMBFS(vp);*/
 	struct smbnode *dnp = VTOSMB(dvp);
 	struct smbnode *np = VTOSMB(vp);
 	struct smb_cred *scred;
 	int error;
 
 	if (dvp == vp)
 		return EINVAL;
 
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, curthread, cnp->cn_cred);
 	error = smbfs_smb_rmdir(np, scred);
 	if (error == 0)
 		np->n_flag |= NGONE;
 	dnp->n_flag |= NMODIFIED;
 	smbfs_attr_cacheremove(dvp);
 /*	cache_purge(dvp);*/
 	cache_purge(vp);
 	smbfs_free_scred(scred);
 	return error;
 }
 
 /*
  * smbfs_readdir call
  */
 static int
 smbfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		uint64_t *a_cookies;
 		int a_ncookies;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	int error;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
 #ifdef notnow
 	if (ap->a_ncookies) {
 		printf("smbfs_readdir: no support for cookies now...");
 		return (EOPNOTSUPP);
 	}
 #endif
 	error = smbfs_readvnode(vp, uio, ap->a_cred);
 	return error;
 }
 
 /* ARGSUSED */
 static int
 smbfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_vp;
 		struct ucred * a_cred;
 		int  a_waitfor;
 		struct thread * a_td;
 	} */ *ap;
 {
 /*	return (smb_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_td, 1));*/
     return (0);
 }
 
 static 
 int smbfs_print (ap) 
 	struct vop_print_args /* {
 	struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 
 	if (np == NULL) {
 		printf("no smbnode data\n");
 		return (0);
 	}
 	printf("\tname = %s, parent = %p, open = %d\n", np->n_name,
 	    np->n_parent ? np->n_parent : NULL, (np->n_flag & NOPEN) != 0);
 	return (0);
 }
 
 static int
 smbfs_pathconf (ap)
 	struct vop_pathconf_args  /* {
 	struct vnode *vp;
 	int name;
 	register_t *retval;
 	} */ *ap;
 {
 	struct smbmount *smp = VFSTOSMBFS(VTOVFS(ap->a_vp));
 	struct smb_vc *vcp = SSTOVC(smp->sm_share);
 	long *retval = ap->a_retval;
 	int error = 0;
 
 	switch (ap->a_name) {
 	    case _PC_FILESIZEBITS:
 		if (vcp->vc_sopt.sv_caps & (SMB_CAP_LARGE_READX |
 		    SMB_CAP_LARGE_WRITEX))
 		    *retval = 64;
 		else
 		    *retval = 32;
 		break;
 	    case _PC_NAME_MAX:
 		*retval = (vcp->vc_hflags2 & SMB_FLAGS2_KNOWS_LONG_NAMES) ? 255 : 12;
 		break;
 	    case _PC_PATH_MAX:
 		*retval = 800;	/* XXX: a correct one ? */
 		break;
 	    case _PC_NO_TRUNC:
 		*retval = 1;
 		break;
 	    default:
 		error = vop_stdpathconf(ap);
 	}
 	return error;
 }
 
 static int
 smbfs_strategy (ap) 
 	struct vop_strategy_args /* {
 	struct buf *a_bp
 	} */ *ap;
 {
 	struct buf *bp=ap->a_bp;
 	struct ucred *cr;
 	struct thread *td;
 
 	SMBVDEBUG("\n");
 	if (bp->b_flags & B_ASYNC)
 		td = (struct thread *)0;
 	else
 		td = curthread;	/* XXX */
 	if (bp->b_iocmd == BIO_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	if ((bp->b_flags & B_ASYNC) == 0 )
 		(void)smbfs_doio(ap->a_vp, bp, cr, td);
 	return (0);
 }
 
 int
 smbfs_ioctl(ap)
 	struct vop_ioctl_args /* {
 		struct vnode *a_vp;
 		u_long a_command;
 		caddr_t a_data;
 		int fflag;
 		struct ucred *cred;
 		struct thread *td;
 	} */ *ap;
 {
 	return ENOTTY;
 }
 
 static char smbfs_atl[] = "rhsvda";
 static int
 smbfs_getextattr(struct vop_getextattr_args *ap)
 /* {
         IN struct vnode *a_vp;
         IN char *a_name;
         INOUT struct uio *a_uio;
         IN struct ucred *a_cred;
         IN struct thread *a_td;
 };
 */
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = ap->a_td;
 	struct ucred *cred = ap->a_cred;
 	struct uio *uio = ap->a_uio;
 	const char *name = ap->a_name;
 	struct smbnode *np = VTOSMB(vp);
 	struct vattr vattr;
 	char buf[10];
 	int i, attr, error;
 
 	error = VOP_ACCESS(vp, VREAD, cred, td);
 	if (error)
 		return error;
 	error = VOP_GETATTR(vp, &vattr, cred);
 	if (error)
 		return error;
 	if (strcmp(name, "dosattr") == 0) {
 		attr = np->n_dosattr;
 		for (i = 0; i < 6; i++, attr >>= 1)
 			buf[i] = (attr & 1) ? smbfs_atl[i] : '-';
 		buf[i] = 0;
 		error = uiomove(buf, i, uio);
 		
 	} else
 		error = EINVAL;
 	return error;
 }
 
 /*
  * Since we expected to support F_GETLK (and SMB protocol has no such function),
  * it is necessary to use lf_advlock(). It would be nice if this function had
  * a callback mechanism because it will help to improve a level of consistency.
  */
 int
 smbfs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct smbnode *np = VTOSMB(vp);
 	struct flock *fl = ap->a_fl;
 	caddr_t id = (caddr_t)1 /* ap->a_id */;
 /*	int flags = ap->a_flags;*/
 	struct thread *td = curthread;
 	struct smb_cred *scred;
 	u_quad_t size;
 	off_t start, end, oadd;
 	int error, lkop;
 
 	if (vp->v_type == VDIR) {
 		/*
 		 * SMB protocol have no support for directory locking.
 		 * Although locks can be processed on local machine, I don't
 		 * think that this is a good idea, because some programs
 		 * can work wrong assuming directory is locked. So, we just
 		 * return 'operation not supported
 		 */
 		 return EOPNOTSUPP;
 	}
 	size = np->n_size;
 	switch (fl->l_whence) {
 	case SEEK_SET:
 	case SEEK_CUR:
 		start = fl->l_start;
 		break;
 
 	case SEEK_END:
 		if (size > OFF_MAX ||
 		    (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
 			return EOVERFLOW;
 		start = size + fl->l_start;
 		break;
 
 	default:
 		return EINVAL;
 	}
 	if (start < 0)
 		return EINVAL;
 	if (fl->l_len < 0) {
 		if (start == 0)
 			return EINVAL;
 		end = start - 1;
 		start += fl->l_len;
 		if (start < 0)
 			return EINVAL;
 	} else if (fl->l_len == 0)
 		end = -1;
 	else {
 		oadd = fl->l_len - 1;
 		if (oadd > OFF_MAX - start)
 			return EOVERFLOW;
 		end = start + oadd;
 	}
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, td, td->td_ucred);
 	switch (ap->a_op) {
 	    case F_SETLK:
 		switch (fl->l_type) {
 		    case F_WRLCK:
 			lkop = SMB_LOCK_EXCL;
 			break;
 		    case F_RDLCK:
 			lkop = SMB_LOCK_SHARED;
 			break;
 		    case F_UNLCK:
 			lkop = SMB_LOCK_RELEASE;
 			break;
 		    default:
 			smbfs_free_scred(scred);
 			return EINVAL;
 		}
 		error = lf_advlock(ap, &vp->v_lockf, size);
 		if (error)
 			break;
 		lkop = SMB_LOCK_EXCL;
 		error = smbfs_smb_lock(np, lkop, id, start, end, scred);
 		if (error) {
 			int oldtype = fl->l_type;
 			fl->l_type = F_UNLCK;
 			ap->a_op = F_UNLCK;
 			lf_advlock(ap, &vp->v_lockf, size);
 			fl->l_type = oldtype;
 		}
 		break;
 	    case F_UNLCK:
 		lf_advlock(ap, &vp->v_lockf, size);
 		error = smbfs_smb_lock(np, SMB_LOCK_RELEASE, id, start, end, scred);
 		break;
 	    case F_GETLK:
 		error = lf_advlock(ap, &vp->v_lockf, size);
 		break;
 	    default:
 		smbfs_free_scred(scred);
 		return EINVAL;
 	}
 	smbfs_free_scred(scred);
 	return error;
 }
 
 static int
 smbfs_pathcheck(struct smbmount *smp, const char *name, int nmlen, int nameiop)
 {
 	static const char *badchars = "*/:<>?";
 	static const char *badchars83 = " +|,[]=;";
 	const char *cp;
 	int i, error;
 
 	/*
 	 * Backslash characters, being a path delimiter, are prohibited
 	 * within a path component even for LOOKUP operations.
 	 */
 	if (strchr(name, '\\') != NULL)
 		return ENOENT;
 
 	if (nameiop == LOOKUP)
 		return 0;
 	error = ENOENT;
 	if (SMB_DIALECT(SSTOVC(smp->sm_share)) < SMB_DIALECT_LANMAN2_0) {
 		/*
 		 * Name should conform 8.3 format
 		 */
 		if (nmlen > 12)
 			return ENAMETOOLONG;
 		cp = strchr(name, '.');
 		if (cp == NULL)
 			return error;
 		if (cp == name || (cp - name) > 8)
 			return error;
 		cp = strchr(cp + 1, '.');
 		if (cp != NULL)
 			return error;
 		for (cp = name, i = 0; i < nmlen; i++, cp++)
 			if (strchr(badchars83, *cp) != NULL)
 				return error;
 	}
 	for (cp = name, i = 0; i < nmlen; i++, cp++)
 		if (strchr(badchars, *cp) != NULL)
 			return error;
 	return 0;
 }
 
 /*
  * Things go even weird without fixed inode numbers...
  */
 int
 smbfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct thread *td = curthread;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct vnode *vp;
 	struct smbmount *smp;
 	struct mount *mp = dvp->v_mount;
 	struct smbnode *dnp;
 	struct smbfattr fattr, *fap;
 	struct smb_cred *scred;
 	char *name = cnp->cn_nameptr;
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	int nmlen = cnp->cn_namelen;
 	int error, islastcn, isdot;
 	int killit;
 
 	SMBVDEBUG("\n");
 	if (dvp->v_type != VDIR)
 		return ENOTDIR;
 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) {
 		SMBFSERR("invalid '..'\n");
 		return EIO;
 	}
 	islastcn = flags & ISLASTCN;
 	if (islastcn && (mp->mnt_flag & MNT_RDONLY) && (nameiop != LOOKUP))
 		return EROFS;
 	error = vn_dir_check_exec(dvp, cnp);
 	if (error != 0)
 		return error;
 	smp = VFSTOSMBFS(mp);
 	dnp = VTOSMB(dvp);
 	isdot = (nmlen == 1 && name[0] == '.');
 
 	error = smbfs_pathcheck(smp, cnp->cn_nameptr, cnp->cn_namelen, nameiop);
 
 	if (error) 
 		return ENOENT;
 
 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
 	SMBVDEBUG("cache_lookup returned %d\n", error);
 	if (error > 0)
 		return error;
 	if (error) {		/* name was found */
 		struct vattr vattr;
 
 		killit = 0;
 		vp = *vpp;
 		error = VOP_GETATTR(vp, &vattr, cnp->cn_cred);
 		/*
 		 * If the file type on the server is inconsistent
 		 * with what it was when we created the vnode,
 		 * kill the bogus vnode now and fall through to
 		 * the code below to create a new one with the
 		 * right type.
 		 */
 		if (error == 0 &&
 		   ((vp->v_type == VDIR &&
 		   (VTOSMB(vp)->n_dosattr & SMB_FA_DIR) == 0) ||
 		   (vp->v_type == VREG &&
 		   (VTOSMB(vp)->n_dosattr & SMB_FA_DIR) != 0)))
 		   killit = 1;
 		else if (error == 0
 	     /*    && vattr.va_ctime.tv_sec == VTOSMB(vp)->n_ctime*/) {
-		     if (nameiop != LOOKUP && islastcn)
-			     cnp->cn_flags |= SAVENAME;
 		     SMBVDEBUG("use cached vnode\n");
 		     return (0);
 		}
 		cache_purge(vp);
 		/*
 		 * XXX This is not quite right, if '.' is
 		 * inconsistent, we really need to start the lookup
 		 * all over again.  Hopefully there is some other
 		 * guarantee that prevents this case from happening.
 		 */
 		if (killit && vp != dvp)
 			vgone(vp);
 		if (vp != dvp)
 			vput(vp);
 		else
 			vrele(vp);
 		*vpp = NULLVP;
 	}
 	/* 
 	 * entry is not in the cache or has been expired
 	 */
 	error = 0;
 	*vpp = NULLVP;
 	scred = smbfs_malloc_scred();
 	smb_makescred(scred, td, cnp->cn_cred);
 	fap = &fattr;
 	if (flags & ISDOTDOT) {
 		/*
 		 * In the DOTDOT case, don't go over-the-wire
 		 * in order to request attributes. We already
 		 * know it's a directory and subsequent call to
 		 * smbfs_getattr() will restore consistency.
 		 *
 		 */
 		SMBVDEBUG("smbfs_smb_lookup: dotdot\n");
 	} else if (isdot) {
 		error = smbfs_smb_lookup(dnp, NULL, 0, fap, scred);
 		SMBVDEBUG("result of smbfs_smb_lookup: %d\n", error);
 	}
 	else {
 		error = smbfs_smb_lookup(dnp, name, nmlen, fap, scred);
 		SMBVDEBUG("result of smbfs_smb_lookup: %d\n", error);
 	}
 	if (error && error != ENOENT)
 		goto out;
 	if (error) {			/* entry not found */
 		/*
 		 * Handle RENAME or CREATE case...
 		 */
 		if ((nameiop == CREATE || nameiop == RENAME) && islastcn) {
 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 			if (error)
 				goto out;
-			cnp->cn_flags |= SAVENAME;
 			error = EJUSTRETURN;
 			goto out;
 		}
 		error = ENOENT;
 		goto out;
 	}/* else {
 		SMBVDEBUG("Found entry %s with id=%d\n", fap->entryName, fap->dirEntNum);
 	}*/
 	/*
 	 * handle DELETE case ...
 	 */
 	if (nameiop == DELETE && islastcn) { 	/* delete last component */
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			goto out;
 		if (isdot) {
 			VREF(dvp);
 			*vpp = dvp;
 			goto out;
 		}
 		error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp);
 		if (error)
 			goto out;
 		*vpp = vp;
-		cnp->cn_flags |= SAVENAME;
 		goto out;
 	}
 	if (nameiop == RENAME && islastcn) {
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			goto out;
 		if (isdot) {
 			error = EISDIR;
 			goto out;
 		}
 		error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp);
 		if (error)
 			goto out;
 		*vpp = vp;
-		cnp->cn_flags |= SAVENAME;
 		goto out;
 	}
 	if (flags & ISDOTDOT) {
 		mp = dvp->v_mount;
 		error = vfs_busy(mp, MBF_NOWAIT);
 		if (error != 0) {
 			vfs_ref(mp);
 			VOP_UNLOCK(dvp);
 			error = vfs_busy(mp, 0);
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 			vfs_rel(mp);
 			if (error) {
 				error = ENOENT;
 				goto out;
 			}
 			if (VN_IS_DOOMED(dvp)) {
 				vfs_unbusy(mp);
 				error = ENOENT;
 				goto out;
 			}
 		}	
 		VOP_UNLOCK(dvp);
 		error = smbfs_nget(mp, dvp, name, nmlen, NULL, &vp);
 		vfs_unbusy(mp);
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		if (VN_IS_DOOMED(dvp)) {
 			if (error == 0)
 				vput(vp);
 			error = ENOENT;
 		}
 		if (error)
 			goto out;
 		*vpp = vp;
 	} else if (isdot) {
 		vref(dvp);
 		*vpp = dvp;
 	} else {
 		error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp);
 		if (error)
 			goto out;
 		*vpp = vp;
 		SMBVDEBUG("lookup: getnewvp!\n");
 	}
 	if ((cnp->cn_flags & MAKEENTRY)/* && !islastcn*/) {
 /*		VTOSMB(*vpp)->n_ctime = VTOSMB(*vpp)->n_vattr.va_ctime.tv_sec;*/
 		cache_enter(dvp, *vpp, cnp);
 	}
 out:
 	smbfs_free_scred(scred);
 	return (error);
 }
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index 7ec5f9bf3ffa..82036a35b4f0 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -1,2271 +1,2270 @@
 /*	$NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
  *
  * Copyright (c) 2005 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
  * 2005 program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Efficient memory file system supporting functions.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/dirent.h>
 #include <sys/fnv_hash.h>
 #include <sys/lock.h>
 #include <sys/limits.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/random.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/smr.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/swap_pager.h>
 
 #include <fs/tmpfs/tmpfs.h>
 #include <fs/tmpfs/tmpfs_fifoops.h>
 #include <fs/tmpfs/tmpfs_vnops.h>
 
 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "tmpfs file system");
 
 static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED;
 
 MALLOC_DEFINE(M_TMPFSDIR, "tmpfs dir", "tmpfs dirent structure");
 static uma_zone_t tmpfs_node_pool;
 VFS_SMR_DECLARE;
 
 int tmpfs_pager_type = -1;
 
 static vm_object_t
 tmpfs_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
     vm_ooffset_t offset, struct ucred *cred)
 {
 	vm_object_t object;
 
 	MPASS(handle == NULL);
 	MPASS(offset == 0);
 	object = vm_object_allocate_dyn(tmpfs_pager_type, size,
 	    OBJ_COLORED | OBJ_SWAP);
 	if (!swap_pager_init_object(object, NULL, NULL, size, 0)) {
 		vm_object_deallocate(object);
 		object = NULL;
 	}
 	return (object);
 }
 
 /*
  * Make sure tmpfs vnodes with writable mappings can be found on the lazy list.
  *
  * This allows for periodic mtime updates while only scanning vnodes which are
  * plausibly dirty, see tmpfs_update_mtime_lazy.
  */
 static void
 tmpfs_pager_writecount_recalc(vm_object_t object, vm_offset_t old,
     vm_offset_t new)
 {
 	struct vnode *vp;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	vp = object->un_pager.swp.swp_tmpfs;
 
 	/*
 	 * Forced unmount?
 	 */
 	if (vp == NULL) {
 		KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
 		    ("object %p with OBJ_TMPFS_VREF but without vnode", object));
 		VM_OBJECT_WUNLOCK(object);
 		return;
 	}
 
 	if (old == 0) {
 		VNASSERT((object->flags & OBJ_TMPFS_VREF) == 0, vp,
 		    ("object without writable mappings has a reference"));
 		VNPASS(vp->v_usecount > 0, vp);
 	} else {
 		VNASSERT((object->flags & OBJ_TMPFS_VREF) != 0, vp,
 		    ("object with writable mappings does not have a reference"));
 	}
 
 	if (old == new) {
 		VM_OBJECT_WUNLOCK(object);
 		return;
 	}
 
 	if (new == 0) {
 		vm_object_clear_flag(object, OBJ_TMPFS_VREF);
 		VM_OBJECT_WUNLOCK(object);
 		vrele(vp);
 	} else {
 		if ((object->flags & OBJ_TMPFS_VREF) == 0) {
 			vref(vp);
 			vlazy(vp);
 			vm_object_set_flag(object, OBJ_TMPFS_VREF);
 		}
 		VM_OBJECT_WUNLOCK(object);
 	}
 }
 
 static void
 tmpfs_pager_update_writecount(vm_object_t object, vm_offset_t start,
     vm_offset_t end)
 {
 	vm_offset_t new, old;
 
 	VM_OBJECT_WLOCK(object);
 	KASSERT((object->flags & OBJ_ANON) == 0,
 	    ("%s: object %p with OBJ_ANON", __func__, object));
 	old = object->un_pager.swp.writemappings;
 	object->un_pager.swp.writemappings += (vm_ooffset_t)end - start;
 	new = object->un_pager.swp.writemappings;
 	tmpfs_pager_writecount_recalc(object, old, new);
 	VM_OBJECT_ASSERT_UNLOCKED(object);
 }
 
 static void
 tmpfs_pager_release_writecount(vm_object_t object, vm_offset_t start,
     vm_offset_t end)
 {
 	vm_offset_t new, old;
 
 	VM_OBJECT_WLOCK(object);
 	KASSERT((object->flags & OBJ_ANON) == 0,
 	    ("%s: object %p with OBJ_ANON", __func__, object));
 	old = object->un_pager.swp.writemappings;
 	object->un_pager.swp.writemappings -= (vm_ooffset_t)end - start;
 	new = object->un_pager.swp.writemappings;
 	tmpfs_pager_writecount_recalc(object, old, new);
 	VM_OBJECT_ASSERT_UNLOCKED(object);
 }
 
 static void
 tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, bool *vp_heldp)
 {
 	struct vnode *vp;
 
 	/*
 	 * Tmpfs VREG node, which was reclaimed, has tmpfs_pager_type
 	 * type, but not OBJ_TMPFS flag.  In this case there is no
 	 * v_writecount to adjust.
 	 */
 	if (vp_heldp != NULL)
 		VM_OBJECT_RLOCK(object);
 	else
 		VM_OBJECT_ASSERT_LOCKED(object);
 	if ((object->flags & OBJ_TMPFS) != 0) {
 		vp = object->un_pager.swp.swp_tmpfs;
 		if (vp != NULL) {
 			*vpp = vp;
 			if (vp_heldp != NULL) {
 				vhold(vp);
 				*vp_heldp = true;
 			}
 		}
 	}
 	if (vp_heldp != NULL)
 		VM_OBJECT_RUNLOCK(object);
 }
 
 struct pagerops tmpfs_pager_ops = {
 	.pgo_kvme_type = KVME_TYPE_VNODE,
 	.pgo_alloc = tmpfs_pager_alloc,
 	.pgo_set_writeable_dirty = vm_object_set_writeable_dirty_,
 	.pgo_update_writecount = tmpfs_pager_update_writecount,
 	.pgo_release_writecount = tmpfs_pager_release_writecount,
 	.pgo_mightbedirty = vm_object_mightbedirty_,
 	.pgo_getvp = tmpfs_pager_getvp,
 };
 
 static int
 tmpfs_node_ctor(void *mem, int size, void *arg, int flags)
 {
 	struct tmpfs_node *node;
 
 	node = mem;
 	node->tn_gen++;
 	node->tn_size = 0;
 	node->tn_status = 0;
 	node->tn_accessed = false;
 	node->tn_flags = 0;
 	node->tn_links = 0;
 	node->tn_vnode = NULL;
 	node->tn_vpstate = 0;
 	return (0);
 }
 
 static void
 tmpfs_node_dtor(void *mem, int size, void *arg)
 {
 	struct tmpfs_node *node;
 
 	node = mem;
 	node->tn_type = VNON;
 }
 
 static int
 tmpfs_node_init(void *mem, int size, int flags)
 {
 	struct tmpfs_node *node;
 
 	node = mem;
 	node->tn_id = 0;
 	mtx_init(&node->tn_interlock, "tmpfsni", NULL, MTX_DEF);
 	node->tn_gen = arc4random();
 	return (0);
 }
 
 static void
 tmpfs_node_fini(void *mem, int size)
 {
 	struct tmpfs_node *node;
 
 	node = mem;
 	mtx_destroy(&node->tn_interlock);
 }
 
 int
 tmpfs_subr_init(void)
 {
 	tmpfs_pager_type = vm_pager_alloc_dyn_type(&tmpfs_pager_ops,
 	    OBJT_SWAP);
 	if (tmpfs_pager_type == -1)
 		return (EINVAL);
 	tmpfs_node_pool = uma_zcreate("TMPFS node",
 	    sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor,
 	    tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0);
 	VFS_SMR_ZONE_SET(tmpfs_node_pool);
 	return (0);
 }
 
 void
 tmpfs_subr_uninit(void)
 {
 	if (tmpfs_pager_type != -1)
 		vm_pager_free_dyn_type(tmpfs_pager_type);
 	tmpfs_pager_type = -1;
 	uma_zdestroy(tmpfs_node_pool);
 }
 
 static int
 sysctl_mem_reserved(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	long pages, bytes;
 
 	pages = *(long *)arg1;
 	bytes = pages * PAGE_SIZE;
 
 	error = sysctl_handle_long(oidp, &bytes, 0, req);
 	if (error || !req->newptr)
 		return (error);
 
 	pages = bytes / PAGE_SIZE;
 	if (pages < TMPFS_PAGES_MINRESERVED)
 		return (EINVAL);
 
 	*(long *)arg1 = pages;
 	return (0);
 }
 
 SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved,
     CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &tmpfs_pages_reserved, 0,
     sysctl_mem_reserved, "L",
     "Amount of available memory and swap below which tmpfs growth stops");
 
 static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a,
     struct tmpfs_dirent *b);
 RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
 
 size_t
 tmpfs_mem_avail(void)
 {
 	size_t avail;
 	long reserved;
 
 	avail = swap_pager_avail + vm_free_count();
 	reserved = atomic_load_long(&tmpfs_pages_reserved);
 	if (__predict_false(avail < reserved))
 		return (0);
 	return (avail - reserved);
 }
 
 size_t
 tmpfs_pages_used(struct tmpfs_mount *tmp)
 {
 	const size_t node_size = sizeof(struct tmpfs_node) +
 	    sizeof(struct tmpfs_dirent);
 	size_t meta_pages;
 
 	meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size,
 	    PAGE_SIZE);
 	return (meta_pages + tmp->tm_pages_used);
 }
 
 static size_t
 tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages)
 {
 	if (tmpfs_mem_avail() < req_pages)
 		return (0);
 
 	if (tmp->tm_pages_max != ULONG_MAX &&
 	    tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp))
 			return (0);
 
 	return (1);
 }
 
 static int
 tmpfs_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
     int end, boolean_t ignerr)
 {
 	vm_page_t m;
 	int rv, error;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT(base >= 0, ("%s: base %d", __func__, base));
 	KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
 	    end));
 	error = 0;
 
 retry:
 	m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
 	if (m != NULL) {
 		MPASS(vm_page_all_valid(m));
 	} else if (vm_pager_has_page(object, idx, NULL, NULL)) {
 		m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL |
 		    VM_ALLOC_WAITFAIL);
 		if (m == NULL)
 			goto retry;
 		vm_object_pip_add(object, 1);
 		VM_OBJECT_WUNLOCK(object);
 		rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
 		VM_OBJECT_WLOCK(object);
 		vm_object_pip_wakeup(object);
 		if (rv == VM_PAGER_OK) {
 			/*
 			 * Since the page was not resident, and therefore not
 			 * recently accessed, immediately enqueue it for
 			 * asynchronous laundering.  The current operation is
 			 * not regarded as an access.
 			 */
 			vm_page_launder(m);
 		} else {
 			vm_page_free(m);
 			m = NULL;
 			if (!ignerr)
 				error = EIO;
 		}
 	}
 	if (m != NULL) {
 		pmap_zero_page_area(m, base, end - base);
 		vm_page_set_dirty(m);
 		vm_page_xunbusy(m);
 	}
 
 	return (error);
 }
 
 void
 tmpfs_ref_node(struct tmpfs_node *node)
 {
 #ifdef INVARIANTS
 	u_int old;
 
 	old =
 #endif
 	refcount_acquire(&node->tn_refcount);
 #ifdef INVARIANTS
 	KASSERT(old > 0, ("node %p zero refcount", node));
 #endif
 }
 
 /*
  * Allocates a new node of type 'type' inside the 'tmp' mount point, with
  * its owner set to 'uid', its group to 'gid' and its mode set to 'mode',
  * using the credentials of the process 'p'.
  *
  * If the node type is set to 'VDIR', then the parent parameter must point
  * to the parent directory of the node being created.  It may only be NULL
  * while allocating the root node.
  *
  * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter
  * specifies the device the node represents.
  *
  * If the node type is set to 'VLNK', then the parameter target specifies
  * the file name of the target file for the symbolic link that is being
  * created.
  *
  * Note that new nodes are retrieved from the available list if it has
  * items or, if it is empty, from the node pool as long as there is enough
  * space to create them.
  *
  * Returns zero on success or an appropriate error code on failure.
  */
 int
 tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type,
     uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent,
     const char *target, dev_t rdev, struct tmpfs_node **node)
 {
 	struct tmpfs_node *nnode;
 	char *symlink;
 	char symlink_smr;
 
 	/* If the root directory of the 'tmp' file system is not yet
 	 * allocated, this must be the request to do it. */
 	MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
 
 	MPASS(IFF(type == VLNK, target != NULL));
 	MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
 
 	if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max)
 		return (ENOSPC);
 	if (tmpfs_pages_check_avail(tmp, 1) == 0)
 		return (ENOSPC);
 
 	if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
 		/*
 		 * When a new tmpfs node is created for fully
 		 * constructed mount point, there must be a parent
 		 * node, which vnode is locked exclusively.  As
 		 * consequence, if the unmount is executing in
 		 * parallel, vflush() cannot reclaim the parent vnode.
 		 * Due to this, the check for MNTK_UNMOUNT flag is not
 		 * racy: if we did not see MNTK_UNMOUNT flag, then tmp
 		 * cannot be destroyed until node construction is
 		 * finished and the parent vnode unlocked.
 		 *
 		 * Tmpfs does not need to instantiate new nodes during
 		 * unmount.
 		 */
 		return (EBUSY);
 	}
 	if ((mp->mnt_kern_flag & MNT_RDONLY) != 0)
 		return (EROFS);
 
 	nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK);
 
 	/* Generic initialization. */
 	nnode->tn_type = type;
 	vfs_timestamp(&nnode->tn_atime);
 	nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime =
 	    nnode->tn_atime;
 	nnode->tn_uid = uid;
 	nnode->tn_gid = gid;
 	nnode->tn_mode = mode;
 	nnode->tn_id = alloc_unr64(&tmp->tm_ino_unr);
 	nnode->tn_refcount = 1;
 
 	/* Type-specific initialization. */
 	switch (nnode->tn_type) {
 	case VBLK:
 	case VCHR:
 		nnode->tn_rdev = rdev;
 		break;
 
 	case VDIR:
 		RB_INIT(&nnode->tn_dir.tn_dirhead);
 		LIST_INIT(&nnode->tn_dir.tn_dupindex);
 		MPASS(parent != nnode);
 		MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL));
 		nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
 		nnode->tn_dir.tn_readdir_lastn = 0;
 		nnode->tn_dir.tn_readdir_lastp = NULL;
 		nnode->tn_links++;
 		TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent);
 		nnode->tn_dir.tn_parent->tn_links++;
 		TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent);
 		break;
 
 	case VFIFO:
 		/* FALLTHROUGH */
 	case VSOCK:
 		break;
 
 	case VLNK:
 		MPASS(strlen(target) < MAXPATHLEN);
 		nnode->tn_size = strlen(target);
 
 		symlink = NULL;
 		if (!tmp->tm_nonc) {
 			symlink = cache_symlink_alloc(nnode->tn_size + 1, M_WAITOK);
 			symlink_smr = true;
 		}
 		if (symlink == NULL) {
 			symlink = malloc(nnode->tn_size + 1, M_TMPFSNAME, M_WAITOK);
 			symlink_smr = false;
 		}
 		memcpy(symlink, target, nnode->tn_size + 1);
 
 		/*
 		 * Allow safe symlink resolving for lockless lookup.
 		 * tmpfs_fplookup_symlink references this comment.
 		 *
 		 * 1. nnode is not yet visible to the world
 		 * 2. both tn_link_target and tn_link_smr get populated
 		 * 3. release fence publishes their content
 		 * 4. tn_link_target content is immutable until node destruction,
 		 *    where the pointer gets set to NULL
 		 * 5. tn_link_smr is never changed once set
 		 *
 		 * As a result it is sufficient to issue load consume on the node
 		 * pointer to also get the above content in a stable manner.
 		 * Worst case tn_link_smr flag may be set to true despite being stale,
 		 * while the target buffer is already cleared out.
 		 */
 		atomic_store_ptr(&nnode->tn_link_target, symlink);
 		atomic_store_char((char *)&nnode->tn_link_smr, symlink_smr);
 		atomic_thread_fence_rel();
 		break;
 
 	case VREG:
 		nnode->tn_reg.tn_aobj =
 		    vm_pager_allocate(tmpfs_pager_type, NULL, 0,
 			VM_PROT_DEFAULT, 0,
 			NULL /* XXXKIB - tmpfs needs swap reservation */);
 		/* OBJ_TMPFS is set together with the setting of vp->v_object */
 		nnode->tn_reg.tn_tmp = tmp;
 		break;
 
 	default:
 		panic("tmpfs_alloc_node: type %p %d", nnode,
 		    (int)nnode->tn_type);
 	}
 
 	TMPFS_LOCK(tmp);
 	LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries);
 	nnode->tn_attached = true;
 	tmp->tm_nodes_inuse++;
 	tmp->tm_refcount++;
 	TMPFS_UNLOCK(tmp);
 
 	*node = nnode;
 	return (0);
 }
 
 /*
  * Destroys the node pointed to by node from the file system 'tmp'.
  * If the node references a directory, no entries are allowed.
  */
 void
 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node)
 {
 	if (refcount_release_if_not_last(&node->tn_refcount))
 		return;
 
 	TMPFS_LOCK(tmp);
 	TMPFS_NODE_LOCK(node);
 	if (!tmpfs_free_node_locked(tmp, node, false)) {
 		TMPFS_NODE_UNLOCK(node);
 		TMPFS_UNLOCK(tmp);
 	}
 }
 
 bool
 tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node,
     bool detach)
 {
 	vm_object_t uobj;
 	char *symlink;
 	bool last;
 
 	TMPFS_MP_ASSERT_LOCKED(tmp);
 	TMPFS_NODE_ASSERT_LOCKED(node);
 
 	last = refcount_release(&node->tn_refcount);
 	if (node->tn_attached && (detach || last)) {
 		MPASS(tmp->tm_nodes_inuse > 0);
 		tmp->tm_nodes_inuse--;
 		LIST_REMOVE(node, tn_entries);
 		node->tn_attached = false;
 	}
 	if (!last)
 		return (false);
 
 	TMPFS_NODE_UNLOCK(node);
 
 #ifdef INVARIANTS
 	MPASS(node->tn_vnode == NULL);
 	MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0);
 
 	/*
 	 * Make sure this is a node type we can deal with. Everything is explicitly
 	 * enumerated without the 'default' clause so the compiler can throw an
 	 * error in case a new type is added.
 	 */
 	switch (node->tn_type) {
 	case VBLK:
 	case VCHR:
 	case VDIR:
 	case VFIFO:
 	case VSOCK:
 	case VLNK:
 	case VREG:
 		break;
 	case VNON:
 	case VBAD:
 	case VMARKER:
 		panic("%s: bad type %d for node %p", __func__, (int)node->tn_type, node);
 	}
 #endif
 
 	switch (node->tn_type) {
 	case VREG:
 		uobj = node->tn_reg.tn_aobj;
 		if (uobj != NULL) {
 			if (uobj->size != 0)
 				atomic_subtract_long(&tmp->tm_pages_used, uobj->size);
 		}
 
 		tmpfs_free_tmp(tmp);
 
 		if (uobj != NULL) {
 			KASSERT((uobj->flags & OBJ_TMPFS) == 0,
 			    ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj));
 			vm_object_deallocate(uobj);
 		}
 		break;
 	case VLNK:
 		tmpfs_free_tmp(tmp);
 
 		symlink = node->tn_link_target;
 		atomic_store_ptr(&node->tn_link_target, NULL);
 		if (atomic_load_char(&node->tn_link_smr)) {
 			cache_symlink_free(symlink, node->tn_size + 1);
 		} else {
 			free(symlink, M_TMPFSNAME);
 		}
 		break;
 	default:
 		tmpfs_free_tmp(tmp);
 		break;
 	}
 
 	uma_zfree_smr(tmpfs_node_pool, node);
 	return (true);
 }
 
 static __inline uint32_t
 tmpfs_dirent_hash(const char *name, u_int len)
 {
 	uint32_t hash;
 
 	hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK;
 #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP
 	hash &= 0xf;
 #endif
 	if (hash < TMPFS_DIRCOOKIE_MIN)
 		hash += TMPFS_DIRCOOKIE_MIN;
 
 	return (hash);
 }
 
 static __inline off_t
 tmpfs_dirent_cookie(struct tmpfs_dirent *de)
 {
 	if (de == NULL)
 		return (TMPFS_DIRCOOKIE_EOF);
 
 	MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN);
 
 	return (de->td_cookie);
 }
 
 static __inline boolean_t
 tmpfs_dirent_dup(struct tmpfs_dirent *de)
 {
 	return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0);
 }
 
 static __inline boolean_t
 tmpfs_dirent_duphead(struct tmpfs_dirent *de)
 {
 	return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0);
 }
 
 void
 tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen)
 {
 	de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen);
 	memcpy(de->ud.td_name, name, namelen);
 	de->td_namelen = namelen;
 }
 
 /*
  * Allocates a new directory entry for the node node with a name of name.
  * The new directory entry is returned in *de.
  *
  * The link count of node is increased by one to reflect the new object
  * referencing it.
  *
  * Returns zero on success or an appropriate error code on failure.
  */
 int
 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node,
     const char *name, u_int len, struct tmpfs_dirent **de)
 {
 	struct tmpfs_dirent *nde;
 
 	nde = malloc(sizeof(*nde), M_TMPFSDIR, M_WAITOK);
 	nde->td_node = node;
 	if (name != NULL) {
 		nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK);
 		tmpfs_dirent_init(nde, name, len);
 	} else
 		nde->td_namelen = 0;
 	if (node != NULL)
 		node->tn_links++;
 
 	*de = nde;
 
 	return (0);
 }
 
 /*
  * Frees a directory entry.  It is the caller's responsibility to destroy
  * the node referenced by it if needed.
  *
  * The link count of node is decreased by one to reflect the removal of an
  * object that referenced it.  This only happens if 'node_exists' is true;
  * otherwise the function will not access the node referred to by the
  * directory entry, as it may already have been released from the outside.
  */
 void
 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de)
 {
 	struct tmpfs_node *node;
 
 	node = de->td_node;
 	if (node != NULL) {
 		MPASS(node->tn_links > 0);
 		node->tn_links--;
 	}
 	if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL)
 		free(de->ud.td_name, M_TMPFSNAME);
 	free(de, M_TMPFSDIR);
 }
 
 void
 tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj)
 {
 	bool want_vrele;
 
 	ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject");
 	if (vp->v_type != VREG || obj == NULL)
 		return;
 
 	VM_OBJECT_WLOCK(obj);
 	VI_LOCK(vp);
 	/*
 	 * May be going through forced unmount.
 	 */
 	want_vrele = false;
 	if ((obj->flags & OBJ_TMPFS_VREF) != 0) {
 		vm_object_clear_flag(obj, OBJ_TMPFS_VREF);
 		want_vrele = true;
 	}
 
 	vm_object_clear_flag(obj, OBJ_TMPFS);
 	obj->un_pager.swp.swp_tmpfs = NULL;
 	if (vp->v_writecount < 0)
 		vp->v_writecount = 0;
 	VI_UNLOCK(vp);
 	VM_OBJECT_WUNLOCK(obj);
 	if (want_vrele) {
 		vrele(vp);
 	}
 }
 
 /*
  * Allocates a new vnode for the node node or returns a new reference to
  * an existing one if the node had already a vnode referencing it.  The
  * resulting locked vnode is returned in *vpp.
  *
  * Returns zero on success or an appropriate error code on failure.
  */
 int
 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
     struct vnode **vpp)
 {
 	struct vnode *vp;
 	enum vgetstate vs;
 	struct tmpfs_mount *tm;
 	vm_object_t object;
 	int error;
 
 	error = 0;
 	tm = VFS_TO_TMPFS(mp);
 	TMPFS_NODE_LOCK(node);
 	tmpfs_ref_node(node);
 loop:
 	TMPFS_NODE_ASSERT_LOCKED(node);
 	if ((vp = node->tn_vnode) != NULL) {
 		MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0);
 		if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) ||
 		    (VN_IS_DOOMED(vp) &&
 		     (lkflag & LK_NOWAIT) != 0)) {
 			TMPFS_NODE_UNLOCK(node);
 			error = ENOENT;
 			vp = NULL;
 			goto out;
 		}
 		if (VN_IS_DOOMED(vp)) {
 			node->tn_vpstate |= TMPFS_VNODE_WRECLAIM;
 			while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) {
 				msleep(&node->tn_vnode, TMPFS_NODE_MTX(node),
 				    0, "tmpfsE", 0);
 			}
 			goto loop;
 		}
 		vs = vget_prep(vp);
 		TMPFS_NODE_UNLOCK(node);
 		error = vget_finish(vp, lkflag, vs);
 		if (error == ENOENT) {
 			TMPFS_NODE_LOCK(node);
 			goto loop;
 		}
 		if (error != 0) {
 			vp = NULL;
 			goto out;
 		}
 
 		/*
 		 * Make sure the vnode is still there after
 		 * getting the interlock to avoid racing a free.
 		 */
 		if (node->tn_vnode != vp) {
 			vput(vp);
 			TMPFS_NODE_LOCK(node);
 			goto loop;
 		}
 
 		goto out;
 	}
 
 	if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) ||
 	    (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) {
 		TMPFS_NODE_UNLOCK(node);
 		error = ENOENT;
 		vp = NULL;
 		goto out;
 	}
 
 	/*
 	 * otherwise lock the vp list while we call getnewvnode
 	 * since that can block.
 	 */
 	if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) {
 		node->tn_vpstate |= TMPFS_VNODE_WANT;
 		error = msleep((caddr_t) &node->tn_vpstate,
 		    TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0);
 		if (error != 0)
 			goto out;
 		goto loop;
 	} else
 		node->tn_vpstate |= TMPFS_VNODE_ALLOCATING;
 
 	TMPFS_NODE_UNLOCK(node);
 
 	/* Get a new vnode and associate it with our node. */
 	error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ?
 	    &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp);
 	if (error != 0)
 		goto unlock;
 	MPASS(vp != NULL);
 
 	/* lkflag is ignored, the lock is exclusive */
 	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 
 	vp->v_data = node;
 	vp->v_type = node->tn_type;
 
 	/* Type-specific initialization. */
 	switch (node->tn_type) {
 	case VBLK:
 		/* FALLTHROUGH */
 	case VCHR:
 		/* FALLTHROUGH */
 	case VLNK:
 		/* FALLTHROUGH */
 	case VSOCK:
 		break;
 	case VFIFO:
 		vp->v_op = &tmpfs_fifoop_entries;
 		break;
 	case VREG:
 		object = node->tn_reg.tn_aobj;
 		VM_OBJECT_WLOCK(object);
 		KASSERT((object->flags & OBJ_TMPFS_VREF) == 0,
 		    ("%s: object %p with OBJ_TMPFS_VREF but without vnode",
 		    __func__, object));
 		KASSERT(object->un_pager.swp.writemappings == 0,
 		    ("%s: object %p has writemappings",
 		    __func__, object));
 		VI_LOCK(vp);
 		KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs"));
 		vp->v_object = object;
 		object->un_pager.swp.swp_tmpfs = vp;
 		vm_object_set_flag(object, OBJ_TMPFS);
 		vn_irflag_set_locked(vp, VIRF_PGREAD | VIRF_TEXT_REF);
 		VI_UNLOCK(vp);
 		VM_OBJECT_WUNLOCK(object);
 		break;
 	case VDIR:
 		MPASS(node->tn_dir.tn_parent != NULL);
 		if (node->tn_dir.tn_parent == node)
 			vp->v_vflag |= VV_ROOT;
 		break;
 
 	default:
 		panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type);
 	}
 	if (vp->v_type != VFIFO)
 		VN_LOCK_ASHARE(vp);
 
 	error = insmntque1(vp, mp);
 	if (error != 0) {
 		/* Need to clear v_object for insmntque failure. */
 		tmpfs_destroy_vobject(vp, vp->v_object);
 		vp->v_object = NULL;
 		vp->v_data = NULL;
 		vp->v_op = &dead_vnodeops;
 		vgone(vp);
 		vput(vp);
 		vp = NULL;
 	}
 
 unlock:
 	TMPFS_NODE_LOCK(node);
 
 	MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING);
 	node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING;
 	node->tn_vnode = vp;
 
 	if (node->tn_vpstate & TMPFS_VNODE_WANT) {
 		node->tn_vpstate &= ~TMPFS_VNODE_WANT;
 		TMPFS_NODE_UNLOCK(node);
 		wakeup((caddr_t) &node->tn_vpstate);
 	} else
 		TMPFS_NODE_UNLOCK(node);
 
 out:
 	if (error == 0) {
 		*vpp = vp;
 
 #ifdef INVARIANTS
 		MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp));
 		TMPFS_NODE_LOCK(node);
 		MPASS(*vpp == node->tn_vnode);
 		TMPFS_NODE_UNLOCK(node);
 #endif
 	}
 	tmpfs_free_node(tm, node);
 
 	return (error);
 }
 
 /*
  * Destroys the association between the vnode vp and the node it
  * references.
  */
 void
 tmpfs_free_vp(struct vnode *vp)
 {
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	TMPFS_NODE_ASSERT_LOCKED(node);
 	node->tn_vnode = NULL;
 	if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0)
 		wakeup(&node->tn_vnode);
 	node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM;
 	vp->v_data = NULL;
 }
 
 /*
  * Allocates a new file of type 'type' and adds it to the parent directory
  * 'dvp'; this addition is done using the component name given in 'cnp'.
  * The ownership of the new file is automatically assigned based on the
  * credentials of the caller (through 'cnp'), the group is set based on
  * the parent directory and the mode is determined from the 'vap' argument.
  * If successful, *vpp holds a vnode to the newly created file and zero
  * is returned.  Otherwise *vpp is NULL and the function returns an
  * appropriate error code.
  */
 int
 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
     struct componentname *cnp, const char *target)
 {
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 	struct tmpfs_node *parent;
 
 	ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file");
-	MPASS(cnp->cn_flags & HASBUF);
 
 	tmp = VFS_TO_TMPFS(dvp->v_mount);
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	*vpp = NULL;
 
 	/* If the entry we are creating is a directory, we cannot overflow
 	 * the number of links of its parent, because it will get a new
 	 * link. */
 	if (vap->va_type == VDIR) {
 		/* Ensure that we do not overflow the maximum number of links
 		 * imposed by the system. */
 		MPASS(dnode->tn_links <= TMPFS_LINK_MAX);
 		if (dnode->tn_links == TMPFS_LINK_MAX) {
 			return (EMLINK);
 		}
 
 		parent = dnode;
 		MPASS(parent != NULL);
 	} else
 		parent = NULL;
 
 	/* Allocate a node that represents the new file. */
 	error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type,
 	    cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent,
 	    target, vap->va_rdev, &node);
 	if (error != 0)
 		return (error);
 
 	/* Allocate a directory entry that points to the new file. */
 	error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen,
 	    &de);
 	if (error != 0) {
 		tmpfs_free_node(tmp, node);
 		return (error);
 	}
 
 	/* Allocate a vnode for the new file. */
 	error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp);
 	if (error != 0) {
 		tmpfs_free_dirent(tmp, de);
 		tmpfs_free_node(tmp, node);
 		return (error);
 	}
 
 	/* Now that all required items are allocated, we can proceed to
 	 * insert the new node into the directory, an operation that
 	 * cannot fail. */
 	if (cnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 	tmpfs_dir_attach(dvp, de);
 	return (0);
 }
 
 struct tmpfs_dirent *
 tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
 {
 	struct tmpfs_dirent *de;
 
 	de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead);
 	dc->tdc_tree = de;
 	if (de != NULL && tmpfs_dirent_duphead(de))
 		de = LIST_FIRST(&de->ud.td_duphead);
 	dc->tdc_current = de;
 
 	return (dc->tdc_current);
 }
 
 struct tmpfs_dirent *
 tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc)
 {
 	struct tmpfs_dirent *de;
 
 	MPASS(dc->tdc_tree != NULL);
 	if (tmpfs_dirent_dup(dc->tdc_current)) {
 		dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries);
 		if (dc->tdc_current != NULL)
 			return (dc->tdc_current);
 	}
 	dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir,
 	    &dnode->tn_dir.tn_dirhead, dc->tdc_tree);
 	if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) {
 		dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
 		MPASS(dc->tdc_current != NULL);
 	}
 
 	return (dc->tdc_current);
 }
 
 /* Lookup directory entry in RB-Tree. Function may return duphead entry. */
 static struct tmpfs_dirent *
 tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash)
 {
 	struct tmpfs_dirent *de, dekey;
 
 	dekey.td_hash = hash;
 	de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey);
 	return (de);
 }
 
 /* Lookup directory entry by cookie, initialize directory cursor accordingly. */
 static struct tmpfs_dirent *
 tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie,
     struct tmpfs_dir_cursor *dc)
 {
 	struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead;
 	struct tmpfs_dirent *de, dekey;
 
 	MPASS(cookie >= TMPFS_DIRCOOKIE_MIN);
 
 	if (cookie == node->tn_dir.tn_readdir_lastn &&
 	    (de = node->tn_dir.tn_readdir_lastp) != NULL) {
 		/* Protect against possible race, tn_readdir_last[pn]
 		 * may be updated with only shared vnode lock held. */
 		if (cookie == tmpfs_dirent_cookie(de))
 			goto out;
 	}
 
 	if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) {
 		LIST_FOREACH(de, &node->tn_dir.tn_dupindex,
 		    uh.td_dup.index_entries) {
 			MPASS(tmpfs_dirent_dup(de));
 			if (de->td_cookie == cookie)
 				goto out;
 			/* dupindex list is sorted. */
 			if (de->td_cookie < cookie) {
 				de = NULL;
 				goto out;
 			}
 		}
 		MPASS(de == NULL);
 		goto out;
 	}
 
 	if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) {
 		de = NULL;
 	} else {
 		dekey.td_hash = cookie;
 		/* Recover if direntry for cookie was removed */
 		de = RB_NFIND(tmpfs_dir, dirhead, &dekey);
 	}
 	dc->tdc_tree = de;
 	dc->tdc_current = de;
 	if (de != NULL && tmpfs_dirent_duphead(de)) {
 		dc->tdc_current = LIST_FIRST(&de->ud.td_duphead);
 		MPASS(dc->tdc_current != NULL);
 	}
 	return (dc->tdc_current);
 
 out:
 	dc->tdc_tree = de;
 	dc->tdc_current = de;
 	if (de != NULL && tmpfs_dirent_dup(de))
 		dc->tdc_tree = tmpfs_dir_xlookup_hash(node,
 		    de->td_hash);
 	return (dc->tdc_current);
 }
 
 /*
  * Looks for a directory entry in the directory represented by node.
  * 'cnp' describes the name of the entry to look for.  Note that the .
  * and .. components are not allowed as they do not physically exist
  * within directories.
  *
  * Returns a pointer to the entry when found, otherwise NULL.
  */
 struct tmpfs_dirent *
 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f,
     struct componentname *cnp)
 {
 	struct tmpfs_dir_duphead *duphead;
 	struct tmpfs_dirent *de;
 	uint32_t hash;
 
 	MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.'));
 	MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' &&
 	    cnp->cn_nameptr[1] == '.')));
 	TMPFS_VALIDATE_DIR(node);
 
 	hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen);
 	de = tmpfs_dir_xlookup_hash(node, hash);
 	if (de != NULL && tmpfs_dirent_duphead(de)) {
 		duphead = &de->ud.td_duphead;
 		LIST_FOREACH(de, duphead, uh.td_dup.entries) {
 			if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
 			    cnp->cn_namelen))
 				break;
 		}
 	} else if (de != NULL) {
 		if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr,
 		    cnp->cn_namelen))
 			de = NULL;
 	}
 	if (de != NULL && f != NULL && de->td_node != f)
 		de = NULL;
 
 	return (de);
 }
 
 /*
  * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex
  * list, allocate new cookie value.
  */
 static void
 tmpfs_dir_attach_dup(struct tmpfs_node *dnode,
     struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde)
 {
 	struct tmpfs_dir_duphead *dupindex;
 	struct tmpfs_dirent *de, *pde;
 
 	dupindex = &dnode->tn_dir.tn_dupindex;
 	de = LIST_FIRST(dupindex);
 	if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) {
 		if (de == NULL)
 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
 		else
 			nde->td_cookie = de->td_cookie + 1;
 		MPASS(tmpfs_dirent_dup(nde));
 		LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries);
 		LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
 		return;
 	}
 
 	/*
 	 * Cookie numbers are near exhaustion. Scan dupindex list for unused
 	 * numbers. dupindex list is sorted in descending order. Keep it so
 	 * after inserting nde.
 	 */
 	while (1) {
 		pde = de;
 		de = LIST_NEXT(de, uh.td_dup.index_entries);
 		if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) {
 			/*
 			 * Last element of the index doesn't have minimal cookie
 			 * value, use it.
 			 */
 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN;
 			LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries);
 			LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
 			return;
 		} else if (de == NULL) {
 			/*
 			 * We are so lucky have 2^30 hash duplicates in single
 			 * directory :) Return largest possible cookie value.
 			 * It should be fine except possible issues with
 			 * VOP_READDIR restart.
 			 */
 			nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX;
 			LIST_INSERT_HEAD(dupindex, nde,
 			    uh.td_dup.index_entries);
 			LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
 			return;
 		}
 		if (de->td_cookie + 1 == pde->td_cookie ||
 		    de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX)
 			continue;	/* No hole or invalid cookie. */
 		nde->td_cookie = de->td_cookie + 1;
 		MPASS(tmpfs_dirent_dup(nde));
 		MPASS(pde->td_cookie > nde->td_cookie);
 		MPASS(nde->td_cookie > de->td_cookie);
 		LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries);
 		LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries);
 		return;
 	}
 }
 
 /*
  * Attaches the directory entry de to the directory represented by vp.
  * Note that this does not change the link count of the node pointed by
  * the directory entry, as this is done by tmpfs_alloc_dirent.
  */
 void
 tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de)
 {
 	struct tmpfs_node *dnode;
 	struct tmpfs_dirent *xde, *nde;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 	MPASS(de->td_namelen > 0);
 	MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN);
 	MPASS(de->td_cookie == de->td_hash);
 
 	dnode = VP_TO_TMPFS_DIR(vp);
 	dnode->tn_dir.tn_readdir_lastn = 0;
 	dnode->tn_dir.tn_readdir_lastp = NULL;
 
 	MPASS(!tmpfs_dirent_dup(de));
 	xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
 	if (xde != NULL && tmpfs_dirent_duphead(xde))
 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
 	else if (xde != NULL) {
 		/*
 		 * Allocate new duphead. Swap xde with duphead to avoid
 		 * adding/removing elements with the same hash.
 		 */
 		MPASS(!tmpfs_dirent_dup(xde));
 		tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0,
 		    &nde);
 		/* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */
 		memcpy(nde, xde, sizeof(*xde));
 		xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD;
 		LIST_INIT(&xde->ud.td_duphead);
 		xde->td_namelen = 0;
 		xde->td_node = NULL;
 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde);
 		tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de);
 	}
 	dnode->tn_size += sizeof(struct tmpfs_dirent);
 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 	dnode->tn_accessed = true;
 	tmpfs_update(vp);
 }
 
 /*
  * Detaches the directory entry de from the directory represented by vp.
  * Note that this does not change the link count of the node pointed by
  * the directory entry, as this is done by tmpfs_free_dirent.
  */
 void
 tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de)
 {
 	struct tmpfs_mount *tmp;
 	struct tmpfs_dir *head;
 	struct tmpfs_node *dnode;
 	struct tmpfs_dirent *xde;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 
 	dnode = VP_TO_TMPFS_DIR(vp);
 	head = &dnode->tn_dir.tn_dirhead;
 	dnode->tn_dir.tn_readdir_lastn = 0;
 	dnode->tn_dir.tn_readdir_lastp = NULL;
 
 	if (tmpfs_dirent_dup(de)) {
 		/* Remove duphead if de was last entry. */
 		if (LIST_NEXT(de, uh.td_dup.entries) == NULL) {
 			xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash);
 			MPASS(tmpfs_dirent_duphead(xde));
 		} else
 			xde = NULL;
 		LIST_REMOVE(de, uh.td_dup.entries);
 		LIST_REMOVE(de, uh.td_dup.index_entries);
 		if (xde != NULL) {
 			if (LIST_EMPTY(&xde->ud.td_duphead)) {
 				RB_REMOVE(tmpfs_dir, head, xde);
 				tmp = VFS_TO_TMPFS(vp->v_mount);
 				MPASS(xde->td_node == NULL);
 				tmpfs_free_dirent(tmp, xde);
 			}
 		}
 		de->td_cookie = de->td_hash;
 	} else
 		RB_REMOVE(tmpfs_dir, head, de);
 
 	dnode->tn_size -= sizeof(struct tmpfs_dirent);
 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 	dnode->tn_accessed = true;
 	tmpfs_update(vp);
 }
 
 void
 tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode)
 {
 	struct tmpfs_dirent *de, *dde, *nde;
 
 	RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) {
 		RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de);
 		/* Node may already be destroyed. */
 		de->td_node = NULL;
 		if (tmpfs_dirent_duphead(de)) {
 			while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) {
 				LIST_REMOVE(dde, uh.td_dup.entries);
 				dde->td_node = NULL;
 				tmpfs_free_dirent(tmp, dde);
 			}
 		}
 		tmpfs_free_dirent(tmp, de);
 	}
 }
 
 /*
  * Helper function for tmpfs_readdir.  Creates a '.' entry for the given
  * directory and returns it in the uio space.  The function returns 0
  * on success, -1 if there was not enough space in the uio structure to
  * hold the directory entry or an appropriate error code if another
  * error happens.
  */
 static int
 tmpfs_dir_getdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node,
     struct uio *uio)
 {
 	int error;
 	struct dirent dent;
 
 	TMPFS_VALIDATE_DIR(node);
 	MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT);
 
 	dent.d_fileno = node->tn_id;
 	dent.d_off = TMPFS_DIRCOOKIE_DOTDOT;
 	dent.d_type = DT_DIR;
 	dent.d_namlen = 1;
 	dent.d_name[0] = '.';
 	dent.d_reclen = GENERIC_DIRSIZ(&dent);
 	dirent_terminate(&dent);
 
 	if (dent.d_reclen > uio->uio_resid)
 		error = EJUSTRETURN;
 	else
 		error = uiomove(&dent, dent.d_reclen, uio);
 
 	tmpfs_set_accessed(tm, node);
 
 	return (error);
 }
 
 /*
  * Helper function for tmpfs_readdir.  Creates a '..' entry for the given
  * directory and returns it in the uio space.  The function returns 0
  * on success, -1 if there was not enough space in the uio structure to
  * hold the directory entry or an appropriate error code if another
  * error happens.
  */
 static int
 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tm, struct tmpfs_node *node,
     struct uio *uio, off_t next)
 {
 	struct tmpfs_node *parent;
 	struct dirent dent;
 	int error;
 
 	TMPFS_VALIDATE_DIR(node);
 	MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT);
 
 	/*
 	 * Return ENOENT if the current node is already removed.
 	 */
 	TMPFS_ASSERT_LOCKED(node);
 	parent = node->tn_dir.tn_parent;
 	if (parent == NULL)
 		return (ENOENT);
 
 	dent.d_fileno = parent->tn_id;
 	dent.d_off = next;
 	dent.d_type = DT_DIR;
 	dent.d_namlen = 2;
 	dent.d_name[0] = '.';
 	dent.d_name[1] = '.';
 	dent.d_reclen = GENERIC_DIRSIZ(&dent);
 	dirent_terminate(&dent);
 
 	if (dent.d_reclen > uio->uio_resid)
 		error = EJUSTRETURN;
 	else
 		error = uiomove(&dent, dent.d_reclen, uio);
 
 	tmpfs_set_accessed(tm, node);
 
 	return (error);
 }
 
 /*
  * Helper function for tmpfs_readdir.  Returns as much directory entries
  * as can fit in the uio space.  The read starts at uio->uio_offset.
  * The function returns 0 on success, -1 if there was not enough space
  * in the uio structure to hold the directory entry or an appropriate
  * error code if another error happens.
  */
 int
 tmpfs_dir_getdents(struct tmpfs_mount *tm, struct tmpfs_node *node,
     struct uio *uio, int maxcookies, uint64_t *cookies, int *ncookies)
 {
 	struct tmpfs_dir_cursor dc;
 	struct tmpfs_dirent *de, *nde;
 	off_t off;
 	int error;
 
 	TMPFS_VALIDATE_DIR(node);
 
 	off = 0;
 
 	/*
 	 * Lookup the node from the current offset.  The starting offset of
 	 * 0 will lookup both '.' and '..', and then the first real entry,
 	 * or EOF if there are none.  Then find all entries for the dir that
 	 * fit into the buffer.  Once no more entries are found (de == NULL),
 	 * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next
 	 * call to return 0.
 	 */
 	switch (uio->uio_offset) {
 	case TMPFS_DIRCOOKIE_DOT:
 		error = tmpfs_dir_getdotdent(tm, node, uio);
 		if (error != 0)
 			return (error);
 		uio->uio_offset = off = TMPFS_DIRCOOKIE_DOTDOT;
 		if (cookies != NULL)
 			cookies[(*ncookies)++] = off;
 		/* FALLTHROUGH */
 	case TMPFS_DIRCOOKIE_DOTDOT:
 		de = tmpfs_dir_first(node, &dc);
 		off = tmpfs_dirent_cookie(de);
 		error = tmpfs_dir_getdotdotdent(tm, node, uio, off);
 		if (error != 0)
 			return (error);
 		uio->uio_offset = off;
 		if (cookies != NULL)
 			cookies[(*ncookies)++] = off;
 		/* EOF. */
 		if (de == NULL)
 			return (0);
 		break;
 	case TMPFS_DIRCOOKIE_EOF:
 		return (0);
 	default:
 		de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc);
 		if (de == NULL)
 			return (EINVAL);
 		if (cookies != NULL)
 			off = tmpfs_dirent_cookie(de);
 	}
 
 	/*
 	 * Read as much entries as possible; i.e., until we reach the end of the
 	 * directory or we exhaust uio space.
 	 */
 	do {
 		struct dirent d;
 
 		/*
 		 * Create a dirent structure representing the current tmpfs_node
 		 * and fill it.
 		 */
 		if (de->td_node == NULL) {
 			d.d_fileno = 1;
 			d.d_type = DT_WHT;
 		} else {
 			d.d_fileno = de->td_node->tn_id;
 			switch (de->td_node->tn_type) {
 			case VBLK:
 				d.d_type = DT_BLK;
 				break;
 
 			case VCHR:
 				d.d_type = DT_CHR;
 				break;
 
 			case VDIR:
 				d.d_type = DT_DIR;
 				break;
 
 			case VFIFO:
 				d.d_type = DT_FIFO;
 				break;
 
 			case VLNK:
 				d.d_type = DT_LNK;
 				break;
 
 			case VREG:
 				d.d_type = DT_REG;
 				break;
 
 			case VSOCK:
 				d.d_type = DT_SOCK;
 				break;
 
 			default:
 				panic("tmpfs_dir_getdents: type %p %d",
 				    de->td_node, (int)de->td_node->tn_type);
 			}
 		}
 		d.d_namlen = de->td_namelen;
 		MPASS(de->td_namelen < sizeof(d.d_name));
 		(void)memcpy(d.d_name, de->ud.td_name, de->td_namelen);
 		d.d_reclen = GENERIC_DIRSIZ(&d);
 
 		/*
 		 * Stop reading if the directory entry we are treating is bigger
 		 * than the amount of data that can be returned.
 		 */
 		if (d.d_reclen > uio->uio_resid) {
 			error = EJUSTRETURN;
 			break;
 		}
 
 		nde = tmpfs_dir_next(node, &dc);
 		d.d_off = tmpfs_dirent_cookie(nde);
 		dirent_terminate(&d);
 
 		/*
 		 * Copy the new dirent structure into the output buffer and
 		 * advance pointers.
 		 */
 		error = uiomove(&d, d.d_reclen, uio);
 		if (error == 0) {
 			de = nde;
 			if (cookies != NULL) {
 				off = tmpfs_dirent_cookie(de);
 				MPASS(*ncookies < maxcookies);
 				cookies[(*ncookies)++] = off;
 			}
 		}
 	} while (error == 0 && uio->uio_resid > 0 && de != NULL);
 
 	/* Skip setting off when using cookies as it is already done above. */
 	if (cookies == NULL)
 		off = tmpfs_dirent_cookie(de);
 
 	/* Update the offset and cache. */
 	uio->uio_offset = off;
 	node->tn_dir.tn_readdir_lastn = off;
 	node->tn_dir.tn_readdir_lastp = de;
 
 	tmpfs_set_accessed(tm, node);
 	return (error);
 }
 
 int
 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
 	int error;
 
 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL,
 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
 	if (error != 0)
 		return (error);
 	tmpfs_dir_attach(dvp, de);
 	return (0);
 }
 
 void
 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
 
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
 	MPASS(de != NULL && de->td_node == NULL);
 	tmpfs_dir_detach(dvp, de);
 	tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
 }
 
 /*
  * Resizes the aobj associated with the regular file pointed to by 'vp' to the
  * size 'newsize'.  'vp' must point to a vnode that represents a regular file.
  * 'newsize' must be positive.
  *
  * Returns zero on success or an appropriate error code on failure.
  */
 int
 tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
 {
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 	vm_object_t uobj;
 	vm_pindex_t idx, newpages, oldpages;
 	off_t oldsize;
 	int base, error;
 
 	MPASS(vp->v_type == VREG);
 	MPASS(newsize >= 0);
 
 	node = VP_TO_TMPFS_NODE(vp);
 	uobj = node->tn_reg.tn_aobj;
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 
 	/*
 	 * Convert the old and new sizes to the number of pages needed to
 	 * store them.  It may happen that we do not need to do anything
 	 * because the last allocated page can accommodate the change on
 	 * its own.
 	 */
 	oldsize = node->tn_size;
 	oldpages = OFF_TO_IDX(oldsize + PAGE_MASK);
 	MPASS(oldpages == uobj->size);
 	newpages = OFF_TO_IDX(newsize + PAGE_MASK);
 
 	if (__predict_true(newpages == oldpages && newsize >= oldsize)) {
 		node->tn_size = newsize;
 		return (0);
 	}
 
 	if (newpages > oldpages &&
 	    tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0)
 		return (ENOSPC);
 
 	VM_OBJECT_WLOCK(uobj);
 	if (newsize < oldsize) {
 		/*
 		 * Zero the truncated part of the last page.
 		 */
 		base = newsize & PAGE_MASK;
 		if (base != 0) {
 			idx = OFF_TO_IDX(newsize);
 			error = tmpfs_partial_page_invalidate(uobj, idx, base,
 			    PAGE_SIZE, ignerr);
 			if (error != 0) {
 				VM_OBJECT_WUNLOCK(uobj);
 				return (error);
 			}
 		}
 
 		/*
 		 * Release any swap space and free any whole pages.
 		 */
 		if (newpages < oldpages)
 			vm_object_page_remove(uobj, newpages, 0, 0);
 	}
 	uobj->size = newpages;
 	VM_OBJECT_WUNLOCK(uobj);
 
 	atomic_add_long(&tmp->tm_pages_used, newpages - oldpages);
 
 	node->tn_size = newsize;
 	return (0);
 }
 
 /*
  * Punch hole in the aobj associated with the regular file pointed to by 'vp'.
  * Requests completely beyond the end-of-file are converted to no-op.
  *
  * Returns 0 on success or error code from tmpfs_partial_page_invalidate() on
  * failure.
  */
 int
 tmpfs_reg_punch_hole(struct vnode *vp, off_t *offset, off_t *length)
 {
 	struct tmpfs_node *node;
 	vm_object_t object;
 	vm_pindex_t pistart, pi, piend;
 	int startofs, endofs, end;
 	off_t off, len;
 	int error;
 
 	KASSERT(*length <= OFF_MAX - *offset, ("%s: offset + length overflows",
 	    __func__));
 	node = VP_TO_TMPFS_NODE(vp);
 	KASSERT(node->tn_type == VREG, ("%s: node is not regular file",
 	    __func__));
 	object = node->tn_reg.tn_aobj;
 	off = *offset;
 	len = omin(node->tn_size - off, *length);
 	startofs = off & PAGE_MASK;
 	endofs = (off + len) & PAGE_MASK;
 	pistart = OFF_TO_IDX(off);
 	piend = OFF_TO_IDX(off + len);
 	pi = OFF_TO_IDX((vm_ooffset_t)off + PAGE_MASK);
 	error = 0;
 
 	/* Handle the case when offset is on or beyond file size. */
 	if (len <= 0) {
 		*length = 0;
 		return (0);
 	}
 
 	VM_OBJECT_WLOCK(object);
 
 	/*
 	 * If there is a partial page at the beginning of the hole-punching
 	 * request, fill the partial page with zeroes.
 	 */
 	if (startofs != 0) {
 		end = pistart != piend ? PAGE_SIZE : endofs;
 		error = tmpfs_partial_page_invalidate(object, pistart, startofs,
 		    end, FALSE);
 		if (error != 0)
 			goto out;
 		off += end - startofs;
 		len -= end - startofs;
 	}
 
 	/*
 	 * Toss away the full pages in the affected area.
 	 */
 	if (pi < piend) {
 		vm_object_page_remove(object, pi, piend, 0);
 		off += IDX_TO_OFF(piend - pi);
 		len -= IDX_TO_OFF(piend - pi);
 	}
 
 	/*
 	 * If there is a partial page at the end of the hole-punching request,
 	 * fill the partial page with zeroes.
 	 */
 	if (endofs != 0 && pistart != piend) {
 		error = tmpfs_partial_page_invalidate(object, piend, 0, endofs,
 		    FALSE);
 		if (error != 0)
 			goto out;
 		off += endofs;
 		len -= endofs;
 	}
 
 out:
 	VM_OBJECT_WUNLOCK(object);
 	*offset = off;
 	*length = len;
 	return (error);
 }
 
 void
 tmpfs_check_mtime(struct vnode *vp)
 {
 	struct tmpfs_node *node;
 	struct vm_object *obj;
 
 	ASSERT_VOP_ELOCKED(vp, "check_mtime");
 	if (vp->v_type != VREG)
 		return;
 	obj = vp->v_object;
 	KASSERT(obj->type == tmpfs_pager_type &&
 	    (obj->flags & (OBJ_SWAP | OBJ_TMPFS)) ==
 	    (OBJ_SWAP | OBJ_TMPFS), ("non-tmpfs obj"));
 	/* unlocked read */
 	if (obj->generation != obj->cleangeneration) {
 		VM_OBJECT_WLOCK(obj);
 		if (obj->generation != obj->cleangeneration) {
 			obj->cleangeneration = obj->generation;
 			node = VP_TO_TMPFS_NODE(vp);
 			node->tn_status |= TMPFS_NODE_MODIFIED |
 			    TMPFS_NODE_CHANGED;
 		}
 		VM_OBJECT_WUNLOCK(obj);
 	}
 }
 
 /*
  * Change flags of the given vnode.
  * Caller should execute tmpfs_update on vp after a successful execution.
  * The vnode must be locked on entry and remain locked on exit.
  */
 int
 tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred,
     struct thread *p)
 {
 	int error;
 	struct tmpfs_node *node;
 
 	ASSERT_VOP_ELOCKED(vp, "chflags");
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK |
 	    UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP |
 	    UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
 	    UF_SPARSE | UF_SYSTEM)) != 0)
 		return (EOPNOTSUPP);
 
 	/* Disallow this operation if the file system is mounted read-only. */
 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	/*
 	 * Callers may only modify the file flags on objects they
 	 * have VADMIN rights for.
 	 */
 	if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
 		return (error);
 	/*
 	 * Unprivileged processes are not permitted to unset system
 	 * flags, or modify flags if any system flags are set.
 	 */
 	if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) {
 		if (node->tn_flags &
 		    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
 			error = securelevel_gt(cred, 0);
 			if (error)
 				return (error);
 		}
 	} else {
 		if (node->tn_flags &
 		    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
 		    ((flags ^ node->tn_flags) & SF_SETTABLE))
 			return (EPERM);
 	}
 	node->tn_flags = flags;
 	node->tn_status |= TMPFS_NODE_CHANGED;
 
 	ASSERT_VOP_ELOCKED(vp, "chflags2");
 
 	return (0);
 }
 
 /*
  * Change access mode on the given vnode.
  * Caller should execute tmpfs_update on vp after a successful execution.
  * The vnode must be locked on entry and remain locked on exit.
  */
 int
 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p)
 {
 	int error;
 	struct tmpfs_node *node;
 	mode_t newmode;
 
 	ASSERT_VOP_ELOCKED(vp, "chmod");
 	ASSERT_VOP_IN_SEQC(vp);
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* Disallow this operation if the file system is mounted read-only. */
 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	/* Immutable or append-only files cannot be modified, either. */
 	if (node->tn_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 
 	/*
 	 * To modify the permissions on a file, must possess VADMIN
 	 * for that file.
 	 */
 	if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
 		return (error);
 
 	/*
 	 * Privileged processes may set the sticky bit on non-directories,
 	 * as well as set the setgid bit on a file with a group that the
 	 * process is not a member of.
 	 */
 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
 			return (EFTYPE);
 	}
 	if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) {
 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
 		if (error)
 			return (error);
 	}
 
 	newmode = node->tn_mode & ~ALLPERMS;
 	newmode |= mode & ALLPERMS;
 	atomic_store_short(&node->tn_mode, newmode);
 
 	node->tn_status |= TMPFS_NODE_CHANGED;
 
 	ASSERT_VOP_ELOCKED(vp, "chmod2");
 
 	return (0);
 }
 
 /*
  * Change ownership of the given vnode.  At least one of uid or gid must
  * be different than VNOVAL.  If one is set to that value, the attribute
  * is unchanged.
  * Caller should execute tmpfs_update on vp after a successful execution.
  * The vnode must be locked on entry and remain locked on exit.
  */
 int
 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
     struct thread *p)
 {
 	int error;
 	struct tmpfs_node *node;
 	uid_t ouid;
 	gid_t ogid;
 	mode_t newmode;
 
 	ASSERT_VOP_ELOCKED(vp, "chown");
 	ASSERT_VOP_IN_SEQC(vp);
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* Assign default values if they are unknown. */
 	MPASS(uid != VNOVAL || gid != VNOVAL);
 	if (uid == VNOVAL)
 		uid = node->tn_uid;
 	if (gid == VNOVAL)
 		gid = node->tn_gid;
 	MPASS(uid != VNOVAL && gid != VNOVAL);
 
 	/* Disallow this operation if the file system is mounted read-only. */
 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	/* Immutable or append-only files cannot be modified, either. */
 	if (node->tn_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 
 	/*
 	 * To modify the ownership of a file, must possess VADMIN for that
 	 * file.
 	 */
 	if ((error = VOP_ACCESS(vp, VADMIN, cred, p)))
 		return (error);
 
 	/*
 	 * To change the owner of a file, or change the group of a file to a
 	 * group of which we are not a member, the caller must have
 	 * privilege.
 	 */
 	if ((uid != node->tn_uid ||
 	    (gid != node->tn_gid && !groupmember(gid, cred))) &&
 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
 		return (error);
 
 	ogid = node->tn_gid;
 	ouid = node->tn_uid;
 
 	node->tn_uid = uid;
 	node->tn_gid = gid;
 
 	node->tn_status |= TMPFS_NODE_CHANGED;
 
 	if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
 			atomic_store_short(&node->tn_mode, newmode);
 		}
 	}
 
 	ASSERT_VOP_ELOCKED(vp, "chown2");
 
 	return (0);
 }
 
 /*
  * Change size of the given vnode.
  * Caller should execute tmpfs_update on vp after a successful execution.
  * The vnode must be locked on entry and remain locked on exit.
  */
 int
 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred,
     struct thread *p)
 {
 	int error;
 	struct tmpfs_node *node;
 
 	ASSERT_VOP_ELOCKED(vp, "chsize");
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* Decide whether this is a valid operation based on the file type. */
 	error = 0;
 	switch (vp->v_type) {
 	case VDIR:
 		return (EISDIR);
 
 	case VREG:
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		break;
 
 	case VBLK:
 		/* FALLTHROUGH */
 	case VCHR:
 		/* FALLTHROUGH */
 	case VFIFO:
 		/*
 		 * Allow modifications of special files even if in the file
 		 * system is mounted read-only (we are not modifying the
 		 * files themselves, but the objects they represent).
 		 */
 		return (0);
 
 	default:
 		/* Anything else is unsupported. */
 		return (EOPNOTSUPP);
 	}
 
 	/* Immutable or append-only files cannot be modified, either. */
 	if (node->tn_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 
 	error = tmpfs_truncate(vp, size);
 	/*
 	 * tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents
 	 * for us, as will update tn_status; no need to do that here.
 	 */
 
 	ASSERT_VOP_ELOCKED(vp, "chsize2");
 
 	return (error);
 }
 
 /*
  * Change access and modification times of the given vnode.
  * Caller should execute tmpfs_update on vp after a successful execution.
  * The vnode must be locked on entry and remain locked on exit.
  */
 int
 tmpfs_chtimes(struct vnode *vp, struct vattr *vap,
     struct ucred *cred, struct thread *l)
 {
 	int error;
 	struct tmpfs_node *node;
 
 	ASSERT_VOP_ELOCKED(vp, "chtimes");
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* Disallow this operation if the file system is mounted read-only. */
 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	/* Immutable or append-only files cannot be modified, either. */
 	if (node->tn_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 
 	error = vn_utimes_perm(vp, vap, cred, l);
 	if (error != 0)
 		return (error);
 
 	if (vap->va_atime.tv_sec != VNOVAL)
 		node->tn_accessed = true;
 
 	if (vap->va_mtime.tv_sec != VNOVAL)
 		node->tn_status |= TMPFS_NODE_MODIFIED;
 
 	if (vap->va_birthtime.tv_sec != VNOVAL)
 		node->tn_status |= TMPFS_NODE_MODIFIED;
 
 	tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime);
 
 	if (vap->va_birthtime.tv_sec != VNOVAL)
 		node->tn_birthtime = vap->va_birthtime;
 	ASSERT_VOP_ELOCKED(vp, "chtimes2");
 
 	return (0);
 }
 
 void
 tmpfs_set_status(struct tmpfs_mount *tm, struct tmpfs_node *node, int status)
 {
 
 	if ((node->tn_status & status) == status || tm->tm_ronly)
 		return;
 	TMPFS_NODE_LOCK(node);
 	node->tn_status |= status;
 	TMPFS_NODE_UNLOCK(node);
 }
 
 void
 tmpfs_set_accessed(struct tmpfs_mount *tm, struct tmpfs_node *node)
 {
 	if (node->tn_accessed || tm->tm_ronly)
 		return;
 	atomic_store_8(&node->tn_accessed, true);
 }
 
 /* Sync timestamps */
 void
 tmpfs_itimes(struct vnode *vp, const struct timespec *acc,
     const struct timespec *mod)
 {
 	struct tmpfs_node *node;
 	struct timespec now;
 
 	ASSERT_VOP_LOCKED(vp, "tmpfs_itimes");
 	node = VP_TO_TMPFS_NODE(vp);
 
 	if (!node->tn_accessed &&
 	    (node->tn_status & (TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0)
 		return;
 
 	vfs_timestamp(&now);
 	TMPFS_NODE_LOCK(node);
 	if (node->tn_accessed) {
 		if (acc == NULL)
 			 acc = &now;
 		node->tn_atime = *acc;
 	}
 	if (node->tn_status & TMPFS_NODE_MODIFIED) {
 		if (mod == NULL)
 			mod = &now;
 		node->tn_mtime = *mod;
 	}
 	if (node->tn_status & TMPFS_NODE_CHANGED)
 		node->tn_ctime = now;
 	node->tn_status &= ~(TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
 	node->tn_accessed = false;
 	TMPFS_NODE_UNLOCK(node);
 
 	/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
 	random_harvest_queue(node, sizeof(*node), RANDOM_FS_ATIME);
 }
 
 int
 tmpfs_truncate(struct vnode *vp, off_t length)
 {
 	int error;
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	if (length < 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	if (node->tn_size == length) {
 		error = 0;
 		goto out;
 	}
 
 	if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
 		return (EFBIG);
 
 	error = tmpfs_reg_resize(vp, length, FALSE);
 	if (error == 0)
 		node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 
 out:
 	tmpfs_update(vp);
 
 	return (error);
 }
 
 static __inline int
 tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b)
 {
 	if (a->td_hash > b->td_hash)
 		return (1);
 	else if (a->td_hash < b->td_hash)
 		return (-1);
 	return (0);
 }
 
 RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp);
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index bdad78f66ea5..8077689caeac 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -1,1882 +1,1872 @@
 /*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
  *
  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
  * 2005 program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * tmpfs vnode interface.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/limits.h>
 #include <sys/lockf.h>
 #include <sys/lock.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/smr.h>
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 
 #include <fs/tmpfs/tmpfs_vnops.h>
 #include <fs/tmpfs/tmpfs.h>
 
 SYSCTL_DECL(_vfs_tmpfs);
 VFS_SMR_DECLARE;
 
 static volatile int tmpfs_rename_restarts;
 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
     "Times rename had to restart due to lock contention");
 
 static int
 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
 {
 
 	return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
 }
 
 static int
 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
 	struct tmpfs_node *dnode, *pnode;
 	struct tmpfs_mount *tm;
 	int error;
 
 	/* Caller assumes responsibility for ensuring access (VEXEC). */
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	*vpp = NULLVP;
 
 	/* We cannot be requesting the parent directory of the root node. */
 	MPASS(IMPLIES(dnode->tn_type == VDIR &&
 	    dnode->tn_dir.tn_parent == dnode,
 	    !(cnp->cn_flags & ISDOTDOT)));
 
 	TMPFS_ASSERT_LOCKED(dnode);
 	if (dnode->tn_dir.tn_parent == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	if (cnp->cn_flags & ISDOTDOT) {
 		tm = VFS_TO_TMPFS(dvp->v_mount);
 		pnode = dnode->tn_dir.tn_parent;
 		tmpfs_ref_node(pnode);
 		error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
 		    pnode, cnp->cn_lkflags, vpp);
 		tmpfs_free_node(tm, pnode);
 		if (error != 0)
 			goto out;
 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 		VREF(dvp);
 		*vpp = dvp;
 		error = 0;
 	} else {
 		de = tmpfs_dir_lookup(dnode, NULL, cnp);
 		if (de != NULL && de->td_node == NULL)
 			cnp->cn_flags |= ISWHITEOUT;
 		if (de == NULL || de->td_node == NULL) {
 			/*
 			 * The entry was not found in the directory.
 			 * This is OK if we are creating or renaming an
 			 * entry and are working on the last component of
 			 * the path name.
 			 */
 			if ((cnp->cn_flags & ISLASTCN) &&
 			    (cnp->cn_nameiop == CREATE || \
 			    cnp->cn_nameiop == RENAME ||
 			    (cnp->cn_nameiop == DELETE &&
 			    cnp->cn_flags & DOWHITEOUT &&
 			    cnp->cn_flags & ISWHITEOUT))) {
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
 				    curthread);
 				if (error != 0)
 					goto out;
 
-				/*
-				 * Keep the component name in the buffer for
-				 * future uses.
-				 */
-				cnp->cn_flags |= SAVENAME;
-
 				error = EJUSTRETURN;
 			} else
 				error = ENOENT;
 		} else {
 			struct tmpfs_node *tnode;
 
 			/*
 			 * The entry was found, so get its associated
 			 * tmpfs_node.
 			 */
 			tnode = de->td_node;
 
 			/*
 			 * If we are not at the last path component and
 			 * found a non-directory or non-link entry (which
 			 * may itself be pointing to a directory), raise
 			 * an error.
 			 */
 			if ((tnode->tn_type != VDIR &&
 			    tnode->tn_type != VLNK) &&
 			    !(cnp->cn_flags & ISLASTCN)) {
 				error = ENOTDIR;
 				goto out;
 			}
 
 			/*
 			 * If we are deleting or renaming the entry, keep
 			 * track of its tmpfs_dirent so that it can be
 			 * easily deleted later.
 			 */
 			if ((cnp->cn_flags & ISLASTCN) &&
 			    (cnp->cn_nameiop == DELETE ||
 			    cnp->cn_nameiop == RENAME)) {
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
 				    curthread);
 				if (error != 0)
 					goto out;
 
 				/* Allocate a new vnode on the matching entry. */
 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
 				    cnp->cn_lkflags, vpp);
 				if (error != 0)
 					goto out;
 
 				if ((dnode->tn_mode & S_ISTXT) &&
 				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred,
 				  curthread) && VOP_ACCESS(*vpp, VADMIN,
 				  cnp->cn_cred, curthread)) {
 					error = EPERM;
 					vput(*vpp);
 					*vpp = NULL;
 					goto out;
 				}
-				cnp->cn_flags |= SAVENAME;
 			} else {
 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
 				    cnp->cn_lkflags, vpp);
 				if (error != 0)
 					goto out;
 			}
 		}
 	}
 
 	/*
 	 * Store the result of this lookup in the cache.  Avoid this if the
 	 * request was for creation, as it does not improve timings on
 	 * emprical tests.
 	 */
 	if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
 		cache_enter(dvp, *vpp, cnp);
 
 out:
 	/*
 	 * If there were no errors, *vpp cannot be null and it must be
 	 * locked.
 	 */
 	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
 
 	return (error);
 }
 
 static int
 tmpfs_cached_lookup(struct vop_cachedlookup_args *v)
 {
 
 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
 }
 
 static int
 tmpfs_lookup(struct vop_lookup_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	int error;
 
 	/* Check accessibility of requested node as a first step. */
 	error = vn_dir_check_exec(dvp, cnp);
 	if (error != 0)
 		return (error);
 
 	return (tmpfs_lookup1(dvp, vpp, cnp));
 }
 
 static int
 tmpfs_create(struct vop_create_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 	int error;
 
 	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
 
 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
 	if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
 		cache_enter(dvp, *vpp, cnp);
 	return (error);
 }
 
 static int
 tmpfs_mknod(struct vop_mknod_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 
 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
 	    vap->va_type != VFIFO)
 		return (EINVAL);
 
 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL));
 }
 
 struct fileops tmpfs_fnops;
 
 static int
 tmpfs_open(struct vop_open_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	struct file *fp;
 	int error, mode;
 
 	vp = v->a_vp;
 	mode = v->a_mode;
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/*
 	 * The file is still active but all its names have been removed
 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
 	 * it is about to die.
 	 */
 	if (node->tn_links < 1)
 		return (ENOENT);
 
 	/* If the file is marked append-only, deny write requests. */
 	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
 		error = EPERM;
 	else {
 		error = 0;
 		/* For regular files, the call below is nop. */
 		KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
 		    OBJ_DEAD) == 0, ("dead object"));
 		vnode_create_vobject(vp, node->tn_size, v->a_td);
 	}
 
 	fp = v->a_fp;
 	MPASS(fp == NULL || fp->f_data == NULL);
 	if (error == 0 && fp != NULL && vp->v_type == VREG) {
 		tmpfs_ref_node(node);
 		finit_vnode(fp, mode, node, &tmpfs_fnops);
 	}
 
 	return (error);
 }
 
 static int
 tmpfs_close(struct vop_close_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	/* Update node times. */
 	tmpfs_update(vp);
 
 	return (0);
 }
 
 int
 tmpfs_fo_close(struct file *fp, struct thread *td)
 {
 	struct tmpfs_node *node;
 
 	node = fp->f_data;
 	if (node != NULL) {
 		MPASS(node->tn_type == VREG);
 		tmpfs_free_node(node->tn_reg.tn_tmp, node);
 	}
 	return (vnops.fo_close(fp, td));
 }
 
 /*
  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
  * the comment above cache_fplookup for details.
  */
 int
 tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	struct ucred *cred;
 	mode_t all_x, mode;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE_SMR(vp);
 	if (__predict_false(node == NULL))
 		return (EAGAIN);
 
 	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
 	mode = atomic_load_short(&node->tn_mode);
 	if (__predict_true((mode & all_x) == all_x))
 		return (0);
 
 	cred = v->a_cred;
 	return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred));
 }
 
 int
 tmpfs_access(struct vop_access_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	accmode_t accmode = v->a_accmode;
 	struct ucred *cred = v->a_cred;
 	mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH;
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/*
 	 * Common case path lookup.
 	 */
 	if (__predict_true(accmode == VEXEC && (node->tn_mode & all_x) == all_x))
 		return (0);
 
 	switch (vp->v_type) {
 	case VDIR:
 		/* FALLTHROUGH */
 	case VLNK:
 		/* FALLTHROUGH */
 	case VREG:
 		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
 			error = EROFS;
 			goto out;
 		}
 		break;
 
 	case VBLK:
 		/* FALLTHROUGH */
 	case VCHR:
 		/* FALLTHROUGH */
 	case VSOCK:
 		/* FALLTHROUGH */
 	case VFIFO:
 		break;
 
 	default:
 		error = EINVAL;
 		goto out;
 	}
 
 	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
 		error = EPERM;
 		goto out;
 	}
 
 	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
 	    accmode, cred);
 
 out:
 	MPASS(VOP_ISLOCKED(vp));
 
 	return (error);
 }
 
 int
 tmpfs_stat(struct vop_stat_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct stat *sb = v->a_sb;
 	vm_object_t obj;
 	struct tmpfs_node *node;
 	int error;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	tmpfs_update_getattr(vp);
 
 	error = vop_stat_helper_pre(v);
 	if (__predict_false(error))
 		return (error);
 
 	sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
 	sb->st_ino = node->tn_id;
 	sb->st_mode = node->tn_mode | VTTOIF(vp->v_type);
 	sb->st_nlink = node->tn_links;
 	sb->st_uid = node->tn_uid;
 	sb->st_gid = node->tn_gid;
 	sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
 		node->tn_rdev : NODEV;
 	sb->st_size = node->tn_size;
 	sb->st_atim.tv_sec = node->tn_atime.tv_sec;
 	sb->st_atim.tv_nsec = node->tn_atime.tv_nsec;
 	sb->st_mtim.tv_sec = node->tn_mtime.tv_sec;
 	sb->st_mtim.tv_nsec = node->tn_mtime.tv_nsec;
 	sb->st_ctim.tv_sec = node->tn_ctime.tv_sec;
 	sb->st_ctim.tv_nsec = node->tn_ctime.tv_nsec;
 	sb->st_birthtim.tv_sec = node->tn_birthtime.tv_sec;
 	sb->st_birthtim.tv_nsec = node->tn_birthtime.tv_nsec;
 	sb->st_blksize = PAGE_SIZE;
 	sb->st_flags = node->tn_flags;
 	sb->st_gen = node->tn_gen;
 	if (vp->v_type == VREG) {
 		obj = node->tn_reg.tn_aobj;
 		sb->st_blocks = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
 	} else
 		sb->st_blocks = node->tn_size;
 	sb->st_blocks /= S_BLKSIZE;
 	return (vop_stat_helper_post(v, error));
 }
 
 int
 tmpfs_getattr(struct vop_getattr_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct vattr *vap = v->a_vap;
 	vm_object_t obj;
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	tmpfs_update_getattr(vp);
 
 	vap->va_type = vp->v_type;
 	vap->va_mode = node->tn_mode;
 	vap->va_nlink = node->tn_links;
 	vap->va_uid = node->tn_uid;
 	vap->va_gid = node->tn_gid;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	vap->va_fileid = node->tn_id;
 	vap->va_size = node->tn_size;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_atime = node->tn_atime;
 	vap->va_mtime = node->tn_mtime;
 	vap->va_ctime = node->tn_ctime;
 	vap->va_birthtime = node->tn_birthtime;
 	vap->va_gen = node->tn_gen;
 	vap->va_flags = node->tn_flags;
 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
 		node->tn_rdev : NODEV;
 	if (vp->v_type == VREG) {
 		obj = node->tn_reg.tn_aobj;
 		vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
 	} else
 		vap->va_bytes = node->tn_size;
 	vap->va_filerev = 0;
 
 	return (0);
 }
 
 int
 tmpfs_setattr(struct vop_setattr_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct vattr *vap = v->a_vap;
 	struct ucred *cred = v->a_cred;
 	struct thread *td = curthread;
 
 	int error;
 
 	MPASS(VOP_ISLOCKED(vp));
 	ASSERT_VOP_IN_SEQC(vp);
 
 	error = 0;
 
 	/* Abort if any unsettable attribute is given. */
 	if (vap->va_type != VNON ||
 	    vap->va_nlink != VNOVAL ||
 	    vap->va_fsid != VNOVAL ||
 	    vap->va_fileid != VNOVAL ||
 	    vap->va_blocksize != VNOVAL ||
 	    vap->va_gen != VNOVAL ||
 	    vap->va_rdev != VNOVAL ||
 	    vap->va_bytes != VNOVAL)
 		error = EINVAL;
 
 	if (error == 0 && (vap->va_flags != VNOVAL))
 		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
 
 	if (error == 0 && (vap->va_size != VNOVAL))
 		error = tmpfs_chsize(vp, vap->va_size, cred, td);
 
 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
 
 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
 		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
 
 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
 	    vap->va_atime.tv_nsec != VNOVAL) ||
 	    (vap->va_mtime.tv_sec != VNOVAL &&
 	    vap->va_mtime.tv_nsec != VNOVAL) ||
 	    (vap->va_birthtime.tv_sec != VNOVAL &&
 	    vap->va_birthtime.tv_nsec != VNOVAL)))
 		error = tmpfs_chtimes(vp, vap, cred, td);
 
 	/*
 	 * Update the node times.  We give preference to the error codes
 	 * generated by this function rather than the ones that may arise
 	 * from tmpfs_update.
 	 */
 	tmpfs_update(vp);
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	return (error);
 }
 
 static int
 tmpfs_read(struct vop_read_args *v)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_node *node;
 
 	vp = v->a_vp;
 	if (vp->v_type != VREG)
 		return (EISDIR);
 	uio = v->a_uio;
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	node = VP_TO_TMPFS_NODE(vp);
 	tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node);
 	return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
 }
 
 static int
 tmpfs_read_pgcache(struct vop_read_pgcache_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	vm_object_t object;
 	off_t size;
 	int error;
 
 	vp = v->a_vp;
 	VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp);
 
 	if (v->a_uio->uio_offset < 0)
 		return (EINVAL);
 
 	error = EJUSTRETURN;
 	vfs_smr_enter();
 
 	node = VP_TO_TMPFS_NODE_SMR(vp);
 	if (node == NULL)
 		goto out_smr;
 	MPASS(node->tn_type == VREG);
 	MPASS(node->tn_refcount >= 1);
 	object = node->tn_reg.tn_aobj;
 	if (object == NULL)
 		goto out_smr;
 
 	MPASS(object->type == tmpfs_pager_type);
 	MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_SWAP)) ==
 	    OBJ_SWAP);
 	if (!VN_IS_DOOMED(vp)) {
 		/* size cannot become shorter due to rangelock. */
 		size = node->tn_size;
 		tmpfs_set_accessed(node->tn_reg.tn_tmp, node);
 		vfs_smr_exit();
 		error = uiomove_object(object, size, v->a_uio);
 		return (error);
 	}
 out_smr:
 	vfs_smr_exit();
 	return (error);
 }
 
 static int
 tmpfs_write(struct vop_write_args *v)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_node *node;
 	off_t oldsize;
 	int error, ioflag;
 	mode_t newmode;
 
 	vp = v->a_vp;
 	uio = v->a_uio;
 	ioflag = v->a_ioflag;
 	error = 0;
 	node = VP_TO_TMPFS_NODE(vp);
 	oldsize = node->tn_size;
 
 	if (uio->uio_offset < 0 || vp->v_type != VREG)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 	if (ioflag & IO_APPEND)
 		uio->uio_offset = node->tn_size;
 	if (uio->uio_offset + uio->uio_resid >
 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
 		return (EFBIG);
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
 		    FALSE);
 		if (error != 0)
 			goto out;
 	}
 
 	error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
 	node->tn_status |= TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED;
 	node->tn_accessed = true;
 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
 		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) {
 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
 			vn_seqc_write_begin(vp);
 			atomic_store_short(&node->tn_mode, newmode);
 			vn_seqc_write_end(vp);
 		}
 	}
 	if (error != 0)
 		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
 
 out:
 	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
 	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
 
 	return (error);
 }
 
 static int
 tmpfs_deallocate(struct vop_deallocate_args *v)
 {
 	return (tmpfs_reg_punch_hole(v->a_vp, v->a_offset, v->a_len));
 }
 
 static int
 tmpfs_fsync(struct vop_fsync_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	tmpfs_check_mtime(vp);
 	tmpfs_update(vp);
 
 	return (0);
 }
 
 static int
 tmpfs_remove(struct vop_remove_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
 	MPASS(VOP_ISLOCKED(vp));
 
 	if (vp->v_type == VDIR) {
 		error = EISDIR;
 		goto out;
 	}
 
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
 	MPASS(de != NULL);
 
 	/* Files marked as immutable or append-only cannot be deleted. */
 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
 	    (dnode->tn_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Remove the entry from the directory; as it is a file, we do not
 	 * have to change the number of hard links of the directory. */
 	tmpfs_dir_detach(dvp, de);
 	if (v->a_cnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de);
 
 	node->tn_status |= TMPFS_NODE_CHANGED;
 	node->tn_accessed = true;
 	error = 0;
 
 out:
 	return (error);
 }
 
 static int
 tmpfs_link(struct vop_link_args *v)
 {
 	struct vnode *dvp = v->a_tdvp;
 	struct vnode *vp = v->a_vp;
 	struct componentname *cnp = v->a_cnp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
-	MPASS(cnp->cn_flags & HASBUF);
 	MPASS(dvp != vp); /* XXX When can this be false? */
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* Ensure that we do not overflow the maximum number of links imposed
 	 * by the system. */
 	MPASS(node->tn_links <= TMPFS_LINK_MAX);
 	if (node->tn_links == TMPFS_LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 
 	/* We cannot create links of files marked immutable or append-only. */
 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Allocate a new directory entry to represent the node. */
 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
 	if (error != 0)
 		goto out;
 
 	/* Insert the new directory entry into the appropriate directory. */
 	if (cnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 	tmpfs_dir_attach(dvp, de);
 
 	/* vp link count has changed, so update node times. */
 	node->tn_status |= TMPFS_NODE_CHANGED;
 	tmpfs_update(vp);
 
 	error = 0;
 
 out:
 	return (error);
 }
 
 /*
  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
  * fail to acquire any lock in the path we will drop all held locks,
  * acquire the new lock in a blocking fashion, and then release it and
  * restart the rename.  This acquire/release step ensures that we do not
  * spin on a lock waiting for release.  On error release all vnode locks
  * and decrement references the way tmpfs_rename() would do.
  */
 static int
 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
     struct vnode *tdvp, struct vnode **tvpp,
     struct componentname *fcnp, struct componentname *tcnp)
 {
 	struct vnode *nvp;
 	struct mount *mp;
 	struct tmpfs_dirent *de;
 	int error, restarts = 0;
 
 	VOP_UNLOCK(tdvp);
 	if (*tvpp != NULL && *tvpp != tdvp)
 		VOP_UNLOCK(*tvpp);
 	mp = fdvp->v_mount;
 
 relock:
 	restarts += 1;
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto releout;
 		VOP_UNLOCK(tdvp);
 		goto relock;
 	}
 	/*
 	 * Re-resolve fvp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
 	if (de == NULL) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
 			error = EINVAL;
 		else
 			error = ENOENT;
 		goto releout;
 	}
 	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 	if (error != 0) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if (error != EBUSY)
 			goto releout;
 		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(nvp);
 		/*
 		 * Concurrent rename race.
 		 */
 		if (nvp == tdvp) {
 			vrele(nvp);
 			error = EINVAL;
 			goto releout;
 		}
 		vrele(*fvpp);
 		*fvpp = nvp;
 		goto relock;
 	}
 	vrele(*fvpp);
 	*fvpp = nvp;
 	VOP_UNLOCK(*fvpp);
 	/*
 	 * Re-resolve tvp and acquire the vnode lock if present.
 	 */
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
 	/*
 	 * If tvp disappeared we just carry on.
 	 */
 	if (de == NULL && *tvpp != NULL) {
 		vrele(*tvpp);
 		*tvpp = NULL;
 	}
 	/*
 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
 	 * if the non-blocking acquire fails.
 	 */
 	if (de != NULL) {
 		nvp = NULL;
 		error = tmpfs_alloc_vp(mp, de->td_node,
 		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 		if (*tvpp != NULL)
 			vrele(*tvpp);
 		*tvpp = nvp;
 		if (error != 0) {
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			if (error != EBUSY)
 				goto releout;
 			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
 			    &nvp);
 			if (error != 0)
 				goto releout;
 			VOP_UNLOCK(nvp);
 			/*
 			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
 			 */
 			if (nvp == fdvp) {
 				error = ENOTEMPTY;
 				goto releout;
 			}
 			goto relock;
 		}
 	}
 	tmpfs_rename_restarts += restarts;
 
 	return (0);
 
 releout:
 	vrele(fdvp);
 	vrele(*fvpp);
 	vrele(tdvp);
 	if (*tvpp != NULL)
 		vrele(*tvpp);
 	tmpfs_rename_restarts += restarts;
 
 	return (error);
 }
 
 static int
 tmpfs_rename(struct vop_rename_args *v)
 {
 	struct vnode *fdvp = v->a_fdvp;
 	struct vnode *fvp = v->a_fvp;
 	struct componentname *fcnp = v->a_fcnp;
 	struct vnode *tdvp = v->a_tdvp;
 	struct vnode *tvp = v->a_tvp;
 	struct componentname *tcnp = v->a_tcnp;
 	char *newname;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *fdnode;
 	struct tmpfs_node *fnode;
 	struct tmpfs_node *tnode;
 	struct tmpfs_node *tdnode;
 	int error;
 	bool want_seqc_end;
 
 	MPASS(VOP_ISLOCKED(tdvp));
 	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
-	MPASS(fcnp->cn_flags & HASBUF);
-	MPASS(tcnp->cn_flags & HASBUF);
 
 	want_seqc_end = false;
 
 	/*
 	 * Disallow cross-device renames.
 	 * XXX Why isn't this done by the caller?
 	 */
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/* If source and target are the same file, there is nothing to do. */
 	if (fvp == tvp) {
 		error = 0;
 		goto out;
 	}
 
 	/*
 	 * If we need to move the directory between entries, lock the
 	 * source so that we can safely operate on it.
 	 */
 	if (fdvp != tdvp && fdvp != tvp) {
 		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
 			    fcnp, tcnp);
 			if (error != 0)
 				return (error);
 			ASSERT_VOP_ELOCKED(fdvp,
 			    "tmpfs_rename: fdvp not locked");
 			ASSERT_VOP_ELOCKED(tdvp,
 			    "tmpfs_rename: tdvp not locked");
 			if (tvp != NULL)
 				ASSERT_VOP_ELOCKED(tvp,
 				    "tmpfs_rename: tvp not locked");
 			if (fvp == tvp) {
 				error = 0;
 				goto out_locked;
 			}
 		}
 	}
 
 	if (tvp != NULL)
 		vn_seqc_write_begin(tvp);
 	vn_seqc_write_begin(tdvp);
 	vn_seqc_write_begin(fvp);
 	vn_seqc_write_begin(fdvp);
 	want_seqc_end = true;
 
 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
 	tdnode = VP_TO_TMPFS_DIR(tdvp);
 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
 	fdnode = VP_TO_TMPFS_DIR(fdvp);
 	fnode = VP_TO_TMPFS_NODE(fvp);
 	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
 
 	/*
 	 * Entry can disappear before we lock fdvp,
 	 * also avoid manipulating '.' and '..' entries.
 	 */
 	if (de == NULL) {
 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
 			error = EINVAL;
 		else
 			error = ENOENT;
 		goto out_locked;
 	}
 	MPASS(de->td_node == fnode);
 
 	/*
 	 * If re-naming a directory to another preexisting directory
 	 * ensure that the target directory is empty so that its
 	 * removal causes no side effects.
 	 * Kern_rename guarantees the destination to be a directory
 	 * if the source is one.
 	 */
 	if (tvp != NULL) {
 		MPASS(tnode != NULL);
 
 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
 			error = EPERM;
 			goto out_locked;
 		}
 
 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
 			if (tnode->tn_size > 0) {
 				error = ENOTEMPTY;
 				goto out_locked;
 			}
 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
 			error = ENOTDIR;
 			goto out_locked;
 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
 			error = EISDIR;
 			goto out_locked;
 		} else {
 			MPASS(fnode->tn_type != VDIR &&
 				tnode->tn_type != VDIR);
 		}
 	}
 
 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
 		error = EPERM;
 		goto out_locked;
 	}
 
 	/*
 	 * Ensure that we have enough memory to hold the new name, if it
 	 * has to be changed.
 	 */
 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
 	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
 		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
 	} else
 		newname = NULL;
 
 	/*
 	 * If the node is being moved to another directory, we have to do
 	 * the move.
 	 */
 	if (fdnode != tdnode) {
 		/*
 		 * In case we are moving a directory, we have to adjust its
 		 * parent to point to the new parent.
 		 */
 		if (de->td_node->tn_type == VDIR) {
 			struct tmpfs_node *n;
 
 			/*
 			 * Ensure the target directory is not a child of the
 			 * directory being moved.  Otherwise, we'd end up
 			 * with stale nodes.
 			 */
 			n = tdnode;
 			/*
 			 * TMPFS_LOCK guaranties that no nodes are freed while
 			 * traversing the list. Nodes can only be marked as
 			 * removed: tn_parent == NULL.
 			 */
 			TMPFS_LOCK(tmp);
 			TMPFS_NODE_LOCK(n);
 			while (n != n->tn_dir.tn_parent) {
 				struct tmpfs_node *parent;
 
 				if (n == fnode) {
 					TMPFS_NODE_UNLOCK(n);
 					TMPFS_UNLOCK(tmp);
 					error = EINVAL;
 					if (newname != NULL)
 						    free(newname, M_TMPFSNAME);
 					goto out_locked;
 				}
 				parent = n->tn_dir.tn_parent;
 				TMPFS_NODE_UNLOCK(n);
 				if (parent == NULL) {
 					n = NULL;
 					break;
 				}
 				TMPFS_NODE_LOCK(parent);
 				if (parent->tn_dir.tn_parent == NULL) {
 					TMPFS_NODE_UNLOCK(parent);
 					n = NULL;
 					break;
 				}
 				n = parent;
 			}
 			TMPFS_UNLOCK(tmp);
 			if (n == NULL) {
 				error = EINVAL;
 				if (newname != NULL)
 					    free(newname, M_TMPFSNAME);
 				goto out_locked;
 			}
 			TMPFS_NODE_UNLOCK(n);
 
 			/* Adjust the parent pointer. */
 			TMPFS_VALIDATE_DIR(fnode);
 			TMPFS_NODE_LOCK(de->td_node);
 			de->td_node->tn_dir.tn_parent = tdnode;
 			TMPFS_NODE_UNLOCK(de->td_node);
 
 			/*
 			 * As a result of changing the target of the '..'
 			 * entry, the link count of the source and target
 			 * directories has to be adjusted.
 			 */
 			TMPFS_NODE_LOCK(tdnode);
 			TMPFS_ASSERT_LOCKED(tdnode);
 			tdnode->tn_links++;
 			TMPFS_NODE_UNLOCK(tdnode);
 
 			TMPFS_NODE_LOCK(fdnode);
 			TMPFS_ASSERT_LOCKED(fdnode);
 			fdnode->tn_links--;
 			TMPFS_NODE_UNLOCK(fdnode);
 		}
 	}
 
 	/*
 	 * Do the move: just remove the entry from the source directory
 	 * and insert it into the target one.
 	 */
 	tmpfs_dir_detach(fdvp, de);
 
 	if (fcnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(fdvp, fcnp);
 	if (tcnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(tdvp, tcnp);
 
 	/*
 	 * If the name has changed, we need to make it effective by changing
 	 * it in the directory entry.
 	 */
 	if (newname != NULL) {
 		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
 
 		free(de->ud.td_name, M_TMPFSNAME);
 		de->ud.td_name = newname;
 		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
 
 		fnode->tn_status |= TMPFS_NODE_CHANGED;
 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
 	}
 
 	/*
 	 * If we are overwriting an entry, we have to remove the old one
 	 * from the target directory.
 	 */
 	if (tvp != NULL) {
 		struct tmpfs_dirent *tde;
 
 		/* Remove the old entry from the target directory. */
 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
 		tmpfs_dir_detach(tdvp, tde);
 
 		/*
 		 * Free the directory entry we just deleted.  Note that the
 		 * node referred by it will not be removed until the vnode is
 		 * really reclaimed.
 		 */
 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
 	}
 
 	tmpfs_dir_attach(tdvp, de);
 
 	if (tmpfs_use_nc(fvp)) {
 		cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp);
 	}
 
 	error = 0;
 
 out_locked:
 	if (fdvp != tdvp && fdvp != tvp)
 		VOP_UNLOCK(fdvp);
 
 out:
 	if (want_seqc_end) {
 		if (tvp != NULL)
 			vn_seqc_write_end(tvp);
 		vn_seqc_write_end(tdvp);
 		vn_seqc_write_end(fvp);
 		vn_seqc_write_end(fdvp);
 	}
 
 	/*
 	 * Release target nodes.
 	 * XXX: I don't understand when tdvp can be the same as tvp, but
 	 * other code takes care of this...
 	 */
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp != NULL)
 		vput(tvp);
 
 	/* Release source nodes. */
 	vrele(fdvp);
 	vrele(fvp);
 
 	return (error);
 }
 
 static int
 tmpfs_mkdir(struct vop_mkdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 
 	MPASS(vap->va_type == VDIR);
 
 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL));
 }
 
 static int
 tmpfs_rmdir(struct vop_rmdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
 	MPASS(VOP_ISLOCKED(vp));
 
 	tmp = VFS_TO_TMPFS(dvp->v_mount);
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_DIR(vp);
 
 	/* Directories with more than two entries ('.' and '..') cannot be
 	 * removed. */
 	 if (node->tn_size > 0) {
 		 error = ENOTEMPTY;
 		 goto out;
 	 }
 
 	if ((dnode->tn_flags & APPEND)
 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* This invariant holds only if we are not trying to remove "..".
 	  * We checked for that above so this is safe now. */
 	MPASS(node->tn_dir.tn_parent == dnode);
 
 	/* Get the directory entry associated with node (vp).  This was
 	 * filled by tmpfs_lookup while looking up the entry. */
 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
 	MPASS(TMPFS_DIRENT_MATCHES(de,
 	    v->a_cnp->cn_nameptr,
 	    v->a_cnp->cn_namelen));
 
 	/* Check flags to see if we are allowed to remove the directory. */
 	if ((dnode->tn_flags & APPEND) != 0 ||
 	    (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Detach the directory entry from the directory (dnode). */
 	tmpfs_dir_detach(dvp, de);
 	if (v->a_cnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
 
 	/* No vnode should be allocated for this entry from this point */
 	TMPFS_NODE_LOCK(node);
 	node->tn_links--;
 	node->tn_dir.tn_parent = NULL;
 	node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 	node->tn_accessed = true;
 
 	TMPFS_NODE_UNLOCK(node);
 
 	TMPFS_NODE_LOCK(dnode);
 	dnode->tn_links--;
 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 	dnode->tn_accessed = true;
 	TMPFS_NODE_UNLOCK(dnode);
 
 	if (tmpfs_use_nc(dvp)) {
 		cache_vop_rmdir(dvp, vp);
 	}
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de);
 
 	/* Release the deleted vnode (will destroy the node, notify
 	 * interested parties and clean it from the cache). */
 
 	dnode->tn_status |= TMPFS_NODE_CHANGED;
 	tmpfs_update(dvp);
 
 	error = 0;
 
 out:
 	return (error);
 }
 
 static int
 tmpfs_symlink(struct vop_symlink_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 	const char *target = v->a_target;
 
 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
 	MPASS(vap->va_type == VLNK);
 #else
 	vap->va_type = VLNK;
 #endif
 
 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, target));
 }
 
 static int
 tmpfs_readdir(struct vop_readdir_args *va)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_mount *tm;
 	struct tmpfs_node *node;
 	uint64_t **cookies;
 	int *eofflag, *ncookies;
 	ssize_t startresid;
 	int error, maxcookies;
 
 	vp = va->a_vp;
 	uio = va->a_uio;
 	eofflag = va->a_eofflag;
 	cookies = va->a_cookies;
 	ncookies = va->a_ncookies;
 
 	/* This operation only makes sense on directory nodes. */
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	maxcookies = 0;
 	node = VP_TO_TMPFS_DIR(vp);
 	tm = VFS_TO_TMPFS(vp->v_mount);
 
 	startresid = uio->uio_resid;
 
 	/* Allocate cookies for NFS and compat modules. */
 	if (cookies != NULL && ncookies != NULL) {
 		maxcookies = howmany(node->tn_size,
 		    sizeof(struct tmpfs_dirent)) + 2;
 		*cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
 		    M_WAITOK);
 		*ncookies = 0;
 	}
 
 	if (cookies == NULL)
 		error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL);
 	else
 		error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies,
 		    ncookies);
 
 	/* Buffer was filled without hitting EOF. */
 	if (error == EJUSTRETURN)
 		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
 
 	if (error != 0 && cookies != NULL && ncookies != NULL) {
 		free(*cookies, M_TEMP);
 		*cookies = NULL;
 		*ncookies = 0;
 	}
 
 	if (eofflag != NULL)
 		*eofflag =
 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
 
 	return (error);
 }
 
 static int
 tmpfs_readlink(struct vop_readlink_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct uio *uio = v->a_uio;
 
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(uio->uio_offset == 0);
 	MPASS(vp->v_type == VLNK);
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	error = uiomove(node->tn_link_target, MIN(node->tn_size, uio->uio_resid),
 	    uio);
 	tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node);
 
 	return (error);
 }
 
 /*
  * VOP_FPLOOKUP_SYMLINK routines are subject to special circumstances, see
  * the comment above cache_fplookup for details.
  *
  * Check tmpfs_alloc_node for tmpfs-specific synchronisation notes.
  */
 static int
 tmpfs_fplookup_symlink(struct vop_fplookup_symlink_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	char *symlink;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE_SMR(vp);
 	if (__predict_false(node == NULL))
 		return (EAGAIN);
 	if (!atomic_load_char(&node->tn_link_smr))
 		return (EAGAIN);
 	symlink = atomic_load_ptr(&node->tn_link_target);
 	if (symlink == NULL)
 		return (EAGAIN);
 
 	return (cache_symlink_resolve(v->a_fpl, symlink, node->tn_size));
 }
 
 static int
 tmpfs_inactive(struct vop_inactive_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	if (node->tn_links == 0)
 		vrecycle(vp);
 	else
 		tmpfs_check_mtime(vp);
 	return (0);
 }
 
 static int
 tmpfs_need_inactive(struct vop_need_inactive_args *ap)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	struct vm_object *obj;
 
 	vp = ap->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	if (node->tn_links == 0)
 		goto need;
 	if (vp->v_type == VREG) {
 		obj = vp->v_object;
 		if (obj->generation != obj->cleangeneration)
 			goto need;
 	}
 	return (0);
 need:
 	return (1);
 }
 
 int
 tmpfs_reclaim(struct vop_reclaim_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 	bool unlock;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 
 	if (vp->v_type == VREG)
 		tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
 	vp->v_object = NULL;
 
 	TMPFS_LOCK(tmp);
 	TMPFS_NODE_LOCK(node);
 	tmpfs_free_vp(vp);
 
 	/*
 	 * If the node referenced by this vnode was deleted by the user,
 	 * we must free its associated data structures (now that the vnode
 	 * is being reclaimed).
 	 */
 	unlock = true;
 	if (node->tn_links == 0 &&
 	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
 		unlock = !tmpfs_free_node_locked(tmp, node, true);
 	}
 
 	if (unlock) {
 		TMPFS_NODE_UNLOCK(node);
 		TMPFS_UNLOCK(tmp);
 	}
 
 	MPASS(vp->v_data == NULL);
 	return (0);
 }
 
 int
 tmpfs_print(struct vop_print_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
 	    node, node->tn_flags, (uintmax_t)node->tn_links);
 	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
 	    node->tn_mode, node->tn_uid, node->tn_gid,
 	    (intmax_t)node->tn_size, node->tn_status);
 
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 
 	printf("\n");
 
 	return (0);
 }
 
 int
 tmpfs_pathconf(struct vop_pathconf_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	int name = v->a_name;
 	long *retval = v->a_retval;
 
 	int error;
 
 	error = 0;
 
 	switch (name) {
 	case _PC_LINK_MAX:
 		*retval = TMPFS_LINK_MAX;
 		break;
 
 	case _PC_SYMLINK_MAX:
 		*retval = MAXPATHLEN;
 		break;
 
 	case _PC_NAME_MAX:
 		*retval = NAME_MAX;
 		break;
 
 	case _PC_PIPE_BUF:
 		if (vp->v_type == VDIR || vp->v_type == VFIFO)
 			*retval = PIPE_BUF;
 		else
 			error = EINVAL;
 		break;
 
 	case _PC_CHOWN_RESTRICTED:
 		*retval = 1;
 		break;
 
 	case _PC_NO_TRUNC:
 		*retval = 1;
 		break;
 
 	case _PC_SYNC_IO:
 		*retval = 1;
 		break;
 
 	case _PC_FILESIZEBITS:
 		*retval = 64;
 		break;
 
 	default:
 		error = vop_stdpathconf(v);
 	}
 
 	return (error);
 }
 
 static int
 tmpfs_vptofh(struct vop_vptofh_args *ap)
 /*
 vop_vptofh {
 	IN struct vnode *a_vp;
 	IN struct fid *a_fhp;
 };
 */
 {
 	struct tmpfs_fid_data tfd;
 	struct tmpfs_node *node;
 	struct fid *fhp;
 
 	node = VP_TO_TMPFS_NODE(ap->a_vp);
 	fhp = ap->a_fhp;
 	fhp->fid_len = sizeof(tfd);
 
 	/*
 	 * Copy into fid_data from the stack to avoid unaligned pointer use.
 	 * See the comment in sys/mount.h on struct fid for details.
 	 */
 	tfd.tfd_id = node->tn_id;
 	tfd.tfd_gen = node->tn_gen;
 	memcpy(fhp->fid_data, &tfd, fhp->fid_len);
 
 	return (0);
 }
 
 static int
 tmpfs_whiteout(struct vop_whiteout_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct tmpfs_dirent *de;
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		return (0);
 	case CREATE:
 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
 		if (de != NULL)
 			return (de->td_node == NULL ? 0 : EEXIST);
 		return (tmpfs_dir_whiteout_add(dvp, cnp));
 	case DELETE:
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 		return (0);
 	default:
 		panic("tmpfs_whiteout: unknown op");
 	}
 }
 
 static int
 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp,
     struct tmpfs_dirent **pde)
 {
 	struct tmpfs_dir_cursor dc;
 	struct tmpfs_dirent *de;
 
 	for (de = tmpfs_dir_first(tnp, &dc); de != NULL;
 	     de = tmpfs_dir_next(tnp, &dc)) {
 		if (de->td_node == tn) {
 			*pde = de;
 			return (0);
 		}
 	}
 	return (ENOENT);
 }
 
 static int
 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn,
     struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp)
 {
 	struct tmpfs_dirent *de;
 	int error, i;
 
 	error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED,
 	    dvp);
 	if (error != 0)
 		return (error);
 	error = tmpfs_vptocnp_dir(tn, tnp, &de);
 	if (error == 0) {
 		i = *buflen;
 		i -= de->td_namelen;
 		if (i < 0) {
 			error = ENOMEM;
 		} else {
 			bcopy(de->ud.td_name, buf + i, de->td_namelen);
 			*buflen = i;
 		}
 	}
 	if (error == 0) {
 		if (vp != *dvp)
 			VOP_UNLOCK(*dvp);
 	} else {
 		if (vp != *dvp)
 			vput(*dvp);
 		else
 			vrele(vp);
 	}
 	return (error);
 }
 
 static int
 tmpfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp, **dvp;
 	struct tmpfs_node *tn, *tnp, *tnp1;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tm;
 	char *buf;
 	size_t *buflen;
 	int error;
 
 	vp = ap->a_vp;
 	dvp = ap->a_vpp;
 	buf = ap->a_buf;
 	buflen = ap->a_buflen;
 
 	tm = VFS_TO_TMPFS(vp->v_mount);
 	tn = VP_TO_TMPFS_NODE(vp);
 	if (tn->tn_type == VDIR) {
 		tnp = tn->tn_dir.tn_parent;
 		if (tnp == NULL)
 			return (ENOENT);
 		tmpfs_ref_node(tnp);
 		error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf,
 		    buflen, dvp);
 		tmpfs_free_node(tm, tnp);
 		return (error);
 	}
 restart:
 	TMPFS_LOCK(tm);
 restart_locked:
 	LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) {
 		if (tnp->tn_type != VDIR)
 			continue;
 		TMPFS_NODE_LOCK(tnp);
 		tmpfs_ref_node(tnp);
 
 		/*
 		 * tn_vnode cannot be instantiated while we hold the
 		 * node lock, so the directory cannot be changed while
 		 * we iterate over it.  Do this to avoid instantiating
 		 * vnode for directories which cannot point to our
 		 * node.
 		 */
 		error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp,
 		    &de) : 0;
 
 		if (error == 0) {
 			TMPFS_NODE_UNLOCK(tnp);
 			TMPFS_UNLOCK(tm);
 			error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen,
 			    dvp);
 			if (error == 0) {
 				tmpfs_free_node(tm, tnp);
 				return (0);
 			}
 			if (VN_IS_DOOMED(vp)) {
 				tmpfs_free_node(tm, tnp);
 				return (ENOENT);
 			}
 			TMPFS_LOCK(tm);
 			TMPFS_NODE_LOCK(tnp);
 		}
 		if (tmpfs_free_node_locked(tm, tnp, false)) {
 			goto restart;
 		} else {
 			KASSERT(tnp->tn_refcount > 0,
 			    ("node %p refcount zero", tnp));
 			if (tnp->tn_attached) {
 				tnp1 = LIST_NEXT(tnp, tn_entries);
 				TMPFS_NODE_UNLOCK(tnp);
 			} else {
 				TMPFS_NODE_UNLOCK(tnp);
 				goto restart_locked;
 			}
 		}
 	}
 	TMPFS_UNLOCK(tm);
 	return (ENOENT);
 }
 
 /*
  * Vnode operations vector used for files stored in a tmpfs file system.
  */
 struct vop_vector tmpfs_vnodeop_entries = {
 	.vop_default =			&default_vnodeops,
 	.vop_lookup =			vfs_cache_lookup,
 	.vop_cachedlookup =		tmpfs_cached_lookup,
 	.vop_create =			tmpfs_create,
 	.vop_mknod =			tmpfs_mknod,
 	.vop_open =			tmpfs_open,
 	.vop_close =			tmpfs_close,
 	.vop_fplookup_vexec =		tmpfs_fplookup_vexec,
 	.vop_fplookup_symlink =		tmpfs_fplookup_symlink,
 	.vop_access =			tmpfs_access,
 	.vop_stat =			tmpfs_stat,
 	.vop_getattr =			tmpfs_getattr,
 	.vop_setattr =			tmpfs_setattr,
 	.vop_read =			tmpfs_read,
 	.vop_read_pgcache =		tmpfs_read_pgcache,
 	.vop_write =			tmpfs_write,
 	.vop_deallocate =		tmpfs_deallocate,
 	.vop_fsync =			tmpfs_fsync,
 	.vop_remove =			tmpfs_remove,
 	.vop_link =			tmpfs_link,
 	.vop_rename =			tmpfs_rename,
 	.vop_mkdir =			tmpfs_mkdir,
 	.vop_rmdir =			tmpfs_rmdir,
 	.vop_symlink =			tmpfs_symlink,
 	.vop_readdir =			tmpfs_readdir,
 	.vop_readlink =			tmpfs_readlink,
 	.vop_inactive =			tmpfs_inactive,
 	.vop_need_inactive =		tmpfs_need_inactive,
 	.vop_reclaim =			tmpfs_reclaim,
 	.vop_print =			tmpfs_print,
 	.vop_pathconf =			tmpfs_pathconf,
 	.vop_vptofh =			tmpfs_vptofh,
 	.vop_whiteout =			tmpfs_whiteout,
 	.vop_bmap =			VOP_EOPNOTSUPP,
 	.vop_vptocnp =			tmpfs_vptocnp,
 	.vop_lock1 =			vop_lock,
 	.vop_unlock = 			vop_unlock,
 	.vop_islocked = 		vop_islocked,
 	.vop_add_writecount =		vop_stdadd_writecount_nomsync,
 };
 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries);
 
 /*
  * Same vector for mounts which do not use namecache.
  */
 struct vop_vector tmpfs_vnodeop_nonc_entries = {
 	.vop_default =			&tmpfs_vnodeop_entries,
 	.vop_lookup =			tmpfs_lookup,
 };
 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries);
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index a3a5feef3291..ac00cc1bc093 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -1,1336 +1,1319 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1994 Jan-Simon Pendry
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/resourcevar.h>
 
 #include <machine/atomic.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 #include <fs/unionfs/union.h>
 
 #define NUNIONFSNODECACHE 16
 #define UNIONFSHASHMASK (NUNIONFSNODECACHE - 1)
 
 static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table");
 MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part");
 MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part");
 
 static struct task unionfs_deferred_rele_task;
 static struct mtx unionfs_deferred_rele_lock;
 static STAILQ_HEAD(, unionfs_node) unionfs_deferred_rele_list =
     STAILQ_HEAD_INITIALIZER(unionfs_deferred_rele_list);
 static TASKQUEUE_DEFINE_THREAD(unionfs_rele);
 
 unsigned int unionfs_ndeferred = 0;
 SYSCTL_UINT(_vfs, OID_AUTO, unionfs_ndeferred, CTLFLAG_RD,
     &unionfs_ndeferred, 0, "unionfs deferred vnode release");
 
 static void unionfs_deferred_rele(void *, int);
 
 /*
  * Initialize
  */
 int 
 unionfs_init(struct vfsconf *vfsp)
 {
 	UNIONFSDEBUG("unionfs_init\n");	/* printed during system boot */
 	TASK_INIT(&unionfs_deferred_rele_task, 0, unionfs_deferred_rele, NULL);
 	mtx_init(&unionfs_deferred_rele_lock, "uniondefr", NULL, MTX_DEF); 
 	return (0);
 }
 
 /*
  * Uninitialize
  */
 int 
 unionfs_uninit(struct vfsconf *vfsp)
 {
 	taskqueue_quiesce(taskqueue_unionfs_rele);
 	taskqueue_free(taskqueue_unionfs_rele);
 	mtx_destroy(&unionfs_deferred_rele_lock);
 	return (0);
 }
 
 static void
 unionfs_deferred_rele(void *arg __unused, int pending __unused)
 {
 	STAILQ_HEAD(, unionfs_node) local_rele_list;
 	struct unionfs_node *unp, *tunp;
 	unsigned int ndeferred;
 
 	ndeferred = 0;
 	STAILQ_INIT(&local_rele_list);
 	mtx_lock(&unionfs_deferred_rele_lock);
 	STAILQ_CONCAT(&local_rele_list, &unionfs_deferred_rele_list);
 	mtx_unlock(&unionfs_deferred_rele_lock);
 	STAILQ_FOREACH_SAFE(unp, &local_rele_list, un_rele, tunp) {
 		++ndeferred;
 		MPASS(unp->un_dvp != NULL);
 		vrele(unp->un_dvp);
 		free(unp, M_UNIONFSNODE);
 	}
 
 	/* We expect this function to be single-threaded, thus no atomic */
 	unionfs_ndeferred += ndeferred;
 }
 
 static struct unionfs_node_hashhead *
 unionfs_get_hashhead(struct vnode *dvp, struct vnode *lookup)
 {
 	struct unionfs_node *unp;
 
 	unp = VTOUNIONFS(dvp);
 
 	return (&(unp->un_hashtbl[vfs_hash_index(lookup) & UNIONFSHASHMASK]));
 }
 
 /*
  * Attempt to lookup a cached unionfs vnode by upper/lower vp
  * from dvp, with dvp's interlock held.
  */
 static struct vnode *
 unionfs_get_cached_vnode_locked(struct vnode *lookup, struct vnode *dvp)
 {
 	struct unionfs_node *unp;
 	struct unionfs_node_hashhead *hd;
 	struct vnode *vp;
 
 	hd = unionfs_get_hashhead(dvp, lookup);
 
 	LIST_FOREACH(unp, hd, un_hash) {
 		if (unp->un_uppervp == lookup ||
 		    unp->un_lowervp == lookup) {
 			vp = UNIONFSTOV(unp);
 			VI_LOCK_FLAGS(vp, MTX_DUPOK);
 			vp->v_iflag &= ~VI_OWEINACT;
 			if (VN_IS_DOOMED(vp) ||
 			    ((vp->v_iflag & VI_DOINGINACT) != 0)) {
 				VI_UNLOCK(vp);
 				vp = NULLVP;
 			} else {
 				vrefl(vp);
 				VI_UNLOCK(vp);
 			}
 			return (vp);
 		}
 	}
 
 	return (NULLVP);
 }
 
 
 /*
  * Get the cached vnode.
  */
 static struct vnode *
 unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp,
     struct vnode *dvp)
 {
 	struct vnode *vp;
 
 	vp = NULLVP;
 	VI_LOCK(dvp);
 	if (uvp != NULLVP)
 		vp = unionfs_get_cached_vnode_locked(uvp, dvp);
 	else if (lvp != NULLVP)
 		vp = unionfs_get_cached_vnode_locked(lvp, dvp);
 	VI_UNLOCK(dvp);
 
 	return (vp);
 }
 
 /*
  * Add the new vnode into cache.
  */
 static struct vnode *
 unionfs_ins_cached_vnode(struct unionfs_node *uncp,
     struct vnode *dvp)
 {
 	struct unionfs_node_hashhead *hd;
 	struct vnode *vp;
 
 	ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__);
 	ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__);
 	KASSERT(uncp->un_uppervp == NULLVP || uncp->un_uppervp->v_type == VDIR,
 	    ("%s: v_type != VDIR", __func__));
 	KASSERT(uncp->un_lowervp == NULLVP || uncp->un_lowervp->v_type == VDIR,
 	    ("%s: v_type != VDIR", __func__));
 
 	vp = NULLVP;
 	VI_LOCK(dvp);
 	if (uncp->un_uppervp != NULL)
 		vp = unionfs_get_cached_vnode_locked(uncp->un_uppervp, dvp);
 	else if (uncp->un_lowervp != NULL)
 		vp = unionfs_get_cached_vnode_locked(uncp->un_lowervp, dvp);
 	if (vp == NULLVP) {
 		hd = unionfs_get_hashhead(dvp, (uncp->un_uppervp != NULLVP ?
 		    uncp->un_uppervp : uncp->un_lowervp));
 		LIST_INSERT_HEAD(hd, uncp, un_hash);
 	}
 	VI_UNLOCK(dvp);
 
 	return (vp);
 }
 
 /*
  * Remove the vnode.
  */
 static void
 unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp)
 {
 	KASSERT(unp != NULL, ("%s: null node", __func__));
 	KASSERT(dvp != NULLVP,
 	    ("%s: null parent vnode", __func__));
 
 	VI_LOCK(dvp);
 	if (unp->un_hash.le_prev != NULL) {
 		LIST_REMOVE(unp, un_hash);
 		unp->un_hash.le_next = NULL;
 		unp->un_hash.le_prev = NULL;
 	}
 	VI_UNLOCK(dvp);
 }
 
 /*
  * Common cleanup handling for unionfs_nodeget
  * Upper, lower, and parent directory vnodes are expected to be referenced by
  * the caller.  Upper and lower vnodes, if non-NULL, are also expected to be
  * exclusively locked by the caller.
  * This function will return with the caller's locks and references undone.
  */
 static void
 unionfs_nodeget_cleanup(struct vnode *vp, struct unionfs_node *unp)
 {
 
 	/*
 	 * Lock and reset the default vnode lock; vgone() expects a locked
 	 * vnode, and we're going to reset the vnode ops.
 	 */
 	lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL);
 
 	/*
 	 * Clear out private data and reset the vnode ops to avoid use of
 	 * unionfs vnode ops on a partially constructed vnode.
 	 */
 	VI_LOCK(vp);
 	vp->v_data = NULL;
 	vp->v_vnlock = &vp->v_lock;
 	vp->v_op = &dead_vnodeops;
 	VI_UNLOCK(vp);
 	vgone(vp);
 	vput(vp);
 
 	if (unp->un_dvp != NULLVP)
 		vrele(unp->un_dvp);
 	if (unp->un_uppervp != NULLVP)
 		vput(unp->un_uppervp);
 	if (unp->un_lowervp != NULLVP)
 		vput(unp->un_lowervp);
 	if (unp->un_hashtbl != NULL)
 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK);
 	free(unp->un_path, M_UNIONFSPATH);
 	free(unp, M_UNIONFSNODE);
 }
 
 /*
  * Make a new or get existing unionfs node.
  * 
  * uppervp and lowervp should be unlocked. Because if new unionfs vnode is
  * locked, uppervp or lowervp is locked too. In order to prevent dead lock,
  * you should not lock plurality simultaneously.
  */
 int
 unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
     struct vnode *lowervp, struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp)
 {
 	char	       *path;
 	struct unionfs_mount *ump;
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 	u_long		hashmask;
 	int		error;
 	int		lkflags;
 	enum vtype	vt;
 
 	error = 0;
 	ump = MOUNTTOUNIONFSMOUNT(mp);
 	lkflags = (cnp ? cnp->cn_lkflags : 0);
 	path = (cnp ? cnp->cn_nameptr : NULL);
 	*vpp = NULLVP;
 
 	if (uppervp == NULLVP && lowervp == NULLVP)
 		panic("%s: upper and lower is null", __func__);
 
 	vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type);
 
 	/* If it has no ISLASTCN flag, path check is skipped. */
 	if (cnp && !(cnp->cn_flags & ISLASTCN))
 		path = NULL;
 
 	/* check the cache */
 	if (dvp != NULLVP && vt == VDIR) {
 		vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp);
 		if (vp != NULLVP) {
 			*vpp = vp;
 			goto unionfs_nodeget_out;
 		}
 	}
 
 	unp = malloc(sizeof(struct unionfs_node),
 	    M_UNIONFSNODE, M_WAITOK | M_ZERO);
 
 	error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp);
 	if (error != 0) {
 		free(unp, M_UNIONFSNODE);
 		return (error);
 	}
 	if (dvp != NULLVP)
 		vref(dvp);
 	if (uppervp != NULLVP)
 		vref(uppervp);
 	if (lowervp != NULLVP)
 		vref(lowervp);
 
 	if (vt == VDIR) {
 		unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH,
 		    &hashmask);
 		KASSERT(hashmask == UNIONFSHASHMASK,
 		    ("unexpected unionfs hash mask 0x%lx", hashmask));
 	}
 
 	unp->un_vnode = vp;
 	unp->un_uppervp = uppervp;
 	unp->un_lowervp = lowervp;
 	unp->un_dvp = dvp;
 	if (uppervp != NULLVP)
 		vp->v_vnlock = uppervp->v_vnlock;
 	else
 		vp->v_vnlock = lowervp->v_vnlock;
 
 	if (path != NULL) {
 		unp->un_path = malloc(cnp->cn_namelen + 1,
 		    M_UNIONFSPATH, M_WAITOK | M_ZERO);
 		bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen);
 		unp->un_path[cnp->cn_namelen] = '\0';
 		unp->un_pathlen = cnp->cn_namelen;
 	}
 	vp->v_type = vt;
 	vp->v_data = unp;
 
 	/*
 	 * TODO: This is an imperfect check, as there's no guarantee that
 	 * the underlying filesystems will always return vnode pointers
 	 * for the root inodes that match our cached values.  To reduce
 	 * the likelihood of failure, for example in the case where either
 	 * vnode has been forcibly doomed, we check both pointers and set
 	 * VV_ROOT if either matches.
 	 */
 	if (ump->um_uppervp == uppervp || ump->um_lowervp == lowervp)
 		vp->v_vflag |= VV_ROOT;
 	KASSERT(dvp != NULL || (vp->v_vflag & VV_ROOT) != 0,
 	    ("%s: NULL dvp for non-root vp %p", __func__, vp));
 
 	vn_lock_pair(lowervp, false, uppervp, false); 
 	error = insmntque1(vp, mp);
 	if (error != 0) {
 		unionfs_nodeget_cleanup(vp, unp);
 		return (error);
 	}
 	if (lowervp != NULL && VN_IS_DOOMED(lowervp)) {
 		vput(lowervp);
 		unp->un_lowervp = NULL;
 	}
 	if (uppervp != NULL && VN_IS_DOOMED(uppervp)) {
 		vput(uppervp);
 		unp->un_uppervp = NULL;
 	}
 	if (unp->un_lowervp == NULL && unp->un_uppervp == NULL) {
 		unionfs_nodeget_cleanup(vp, unp);
 		return (ENOENT);
 	}
 
 	if (dvp != NULLVP && vt == VDIR)
 		*vpp = unionfs_ins_cached_vnode(unp, dvp);
 	if (*vpp != NULLVP) {
 		unionfs_nodeget_cleanup(vp, unp);
 		vp = *vpp;
 	} else {
 		if (uppervp != NULL)
 			VOP_UNLOCK(uppervp);
 		if (lowervp != NULL)
 			VOP_UNLOCK(lowervp);
 		*vpp = vp;
 	}
 
 unionfs_nodeget_out:
 	if (lkflags & LK_TYPE_MASK)
 		vn_lock(vp, lkflags | LK_RETRY);
 
 	return (0);
 }
 
 /*
  * Clean up the unionfs node.
  */
 void
 unionfs_noderem(struct vnode *vp)
 {
 	struct unionfs_node *unp, *unp_t1, *unp_t2;
 	struct unionfs_node_hashhead *hd;
 	struct unionfs_node_status *unsp, *unsp_tmp;
 	struct vnode   *lvp;
 	struct vnode   *uvp;
 	struct vnode   *dvp;
 	int		count;
 	int		writerefs;
 
 	/*
 	 * The root vnode lock may be recursed during unmount, because
 	 * it may share the same lock as the unionfs mount's covered vnode,
 	 * which is locked across VFS_UNMOUNT().  This lock will then be
 	 * recursively taken during the vflush() issued by unionfs_unmount().
 	 * But we still only need to lock the unionfs lock once, because only
 	 * one of those lock operations was taken against a unionfs vnode and
 	 * will be undone against a unionfs vnode.
 	 */
 	KASSERT(vp->v_vnlock->lk_recurse == 0 || (vp->v_vflag & VV_ROOT) != 0,
 	    ("%s: vnode %p locked recursively", __func__, vp));
 	if (lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 		panic("%s: failed to acquire lock for vnode lock", __func__);
 
 	/*
 	 * Use the interlock to protect the clearing of v_data to
 	 * prevent faults in unionfs_lock().
 	 */
 	VI_LOCK(vp);
 	unp = VTOUNIONFS(vp);
 	lvp = unp->un_lowervp;
 	uvp = unp->un_uppervp;
 	dvp = unp->un_dvp;
 	unp->un_lowervp = unp->un_uppervp = NULLVP;
 	vp->v_vnlock = &(vp->v_lock);
 	vp->v_data = NULL;
 	vp->v_object = NULL;
 	if (unp->un_hashtbl != NULL) {
 		/*
 		 * Clear out any cached child vnodes.  This should only
 		 * be necessary during forced unmount, when the vnode may
 		 * be reclaimed with a non-zero use count.  Otherwise the
 		 * reference held by each child should prevent reclamation.
 		 */
 		for (count = 0; count <= UNIONFSHASHMASK; count++) {
 			hd = unp->un_hashtbl + count;
 			LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) {
 				LIST_REMOVE(unp_t1, un_hash);
 				unp_t1->un_hash.le_next = NULL;
 				unp_t1->un_hash.le_prev = NULL;
 			}
 		}
 	}
 	VI_UNLOCK(vp);
 
 	writerefs = atomic_load_int(&vp->v_writecount);
 	VNASSERT(writerefs >= 0, vp,
 	    ("%s: write count %d, unexpected text ref", __func__, writerefs));
 	/*
 	 * If we were opened for write, we leased the write reference
 	 * to the lower vnode.  If this is a reclamation due to the
 	 * forced unmount, undo the reference now.
 	 */
 	if (writerefs > 0) {
 		VNASSERT(uvp != NULL, vp,
 		    ("%s: write reference without upper vnode", __func__));
 		VOP_ADD_WRITECOUNT(uvp, -writerefs);
 	}
 	if (lvp != NULLVP)
 		VOP_UNLOCK(lvp);
 	if (uvp != NULLVP)
 		VOP_UNLOCK(uvp);
 
 	if (dvp != NULLVP)
 		unionfs_rem_cached_vnode(unp, dvp);
 
 	if (lvp != NULLVP)
 		vrele(lvp);
 	if (uvp != NULLVP)
 		vrele(uvp);
 	if (unp->un_path != NULL) {
 		free(unp->un_path, M_UNIONFSPATH);
 		unp->un_path = NULL;
 		unp->un_pathlen = 0;
 	}
 
 	if (unp->un_hashtbl != NULL) {
 		hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK);
 	}
 
 	LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) {
 		LIST_REMOVE(unsp, uns_list);
 		free(unsp, M_TEMP);
 	}
 	if (dvp != NULLVP) {
 		mtx_lock(&unionfs_deferred_rele_lock);
 		STAILQ_INSERT_TAIL(&unionfs_deferred_rele_list, unp, un_rele);
 		mtx_unlock(&unionfs_deferred_rele_lock);
 		taskqueue_enqueue(taskqueue_unionfs_rele,
 		    &unionfs_deferred_rele_task);
 	} else
 		free(unp, M_UNIONFSNODE);
 }
 
 /*
  * Get the unionfs node status object for the vnode corresponding to unp,
  * for the process that owns td.  Allocate a new status object if one
  * does not already exist.
  */
 void
 unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
     struct unionfs_node_status **unspp)
 {
 	struct unionfs_node_status *unsp;
 	pid_t pid;
 
 	pid = td->td_proc->p_pid;
 
 	KASSERT(NULL != unspp, ("%s: NULL status", __func__));
 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__);
 
 	LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) {
 		if (unsp->uns_pid == pid) {
 			*unspp = unsp;
 			return;
 		}
 	}
 
 	/* create a new unionfs node status */
 	unsp = malloc(sizeof(struct unionfs_node_status),
 	    M_TEMP, M_WAITOK | M_ZERO);
 
 	unsp->uns_pid = pid;
 	LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list);
 
 	*unspp = unsp;
 }
 
 /*
  * Remove the unionfs node status, if you can.
  * You need exclusive lock this vnode.
  */
 void
 unionfs_tryrem_node_status(struct unionfs_node *unp,
     struct unionfs_node_status *unsp)
 {
 	KASSERT(NULL != unsp, ("%s: NULL status", __func__));
 	ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__);
 
 	if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt)
 		return;
 
 	LIST_REMOVE(unsp, uns_list);
 	free(unsp, M_TEMP);
 }
 
 /*
  * Create upper node attr.
  */
 void
 unionfs_create_uppervattr_core(struct unionfs_mount *ump, struct vattr *lva,
     struct vattr *uva, struct thread *td)
 {
 	VATTR_NULL(uva);
 	uva->va_type = lva->va_type;
 	uva->va_atime = lva->va_atime;
 	uva->va_mtime = lva->va_mtime;
 	uva->va_ctime = lva->va_ctime;
 
 	switch (ump->um_copymode) {
 	case UNIONFS_TRANSPARENT:
 		uva->va_mode = lva->va_mode;
 		uva->va_uid = lva->va_uid;
 		uva->va_gid = lva->va_gid;
 		break;
 	case UNIONFS_MASQUERADE:
 		if (ump->um_uid == lva->va_uid) {
 			uva->va_mode = lva->va_mode & 077077;
 			uva->va_mode |= (lva->va_type == VDIR ?
 			    ump->um_udir : ump->um_ufile) & 0700;
 			uva->va_uid = lva->va_uid;
 			uva->va_gid = lva->va_gid;
 		} else {
 			uva->va_mode = (lva->va_type == VDIR ?
 			    ump->um_udir : ump->um_ufile);
 			uva->va_uid = ump->um_uid;
 			uva->va_gid = ump->um_gid;
 		}
 		break;
 	default:		/* UNIONFS_TRADITIONAL */
 		uva->va_mode = 0777 & ~td->td_proc->p_pd->pd_cmask;
 		uva->va_uid = ump->um_uid;
 		uva->va_gid = ump->um_gid;
 		break;
 	}
 }
 
 /*
  * Create upper node attr.
  */
 int
 unionfs_create_uppervattr(struct unionfs_mount *ump, struct vnode *lvp,
     struct vattr *uva, struct ucred *cred, struct thread *td)
 {
 	struct vattr	lva;
 	int		error;
 
 	if ((error = VOP_GETATTR(lvp, &lva, cred)))
 		return (error);
 
 	unionfs_create_uppervattr_core(ump, &lva, uva, td);
 
 	return (error);
 }
 
 /*
  * relookup
  * 
  * dvp should be locked on entry and will be locked on return.
  * 
  * If an error is returned, *vpp will be invalid, otherwise it will hold a
  * locked, referenced vnode. If *vpp == dvp then remember that only one
  * LK_EXCLUSIVE lock is held.
  */
 int
 unionfs_relookup(struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp, struct componentname *cn, struct thread *td,
     char *path, int pathlen, u_long nameiop)
 {
 	int error;
 
 	cn->cn_namelen = pathlen;
 	cn->cn_pnbuf = path;
 	cn->cn_nameiop = nameiop;
-	cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN);
+	cn->cn_flags = (LOCKPARENT | LOCKLEAF | ISLASTCN);
 	cn->cn_lkflags = LK_EXCLUSIVE;
 	cn->cn_cred = cnp->cn_cred;
 	cn->cn_nameptr = cn->cn_pnbuf;
 
 	if (nameiop == DELETE)
 		cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART));
 	else if (RENAME == nameiop)
 		cn->cn_flags |= (cnp->cn_flags & SAVESTART);
 	else if (nameiop == CREATE)
 		cn->cn_flags |= NOCACHE;
 
 	vref(dvp);
 	VOP_UNLOCK(dvp);
 
 	if ((error = vfs_relookup(dvp, vpp, cn))) {
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 	} else
 		vrele(dvp);
 
-	KASSERT((cn->cn_flags & HASBUF) != 0,
-	    ("%s: HASBUF cleared", __func__));
-	KASSERT((cn->cn_flags & SAVENAME) != 0,
-	    ("%s: SAVENAME cleared", __func__));
 	KASSERT(cn->cn_pnbuf == path, ("%s: cn_pnbuf changed", __func__));
 
 	return (error);
 }
 
 /*
  * relookup for CREATE namei operation.
  *
  * dvp is unionfs vnode. dvp should be locked.
  *
  * If it called 'unionfs_copyfile' function by unionfs_link etc,
  * VOP_LOOKUP information is broken.
  * So it need relookup in order to create link etc.
  */
 int
 unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp,
     struct thread *td)
 {
 	struct vnode *udvp;
 	struct vnode *vp;
 	struct componentname cn;
 	int error;
 
 	udvp = UNIONFSVPTOUPPERVP(dvp);
 	vp = NULLVP;
 
-	KASSERT((cnp->cn_flags & HASBUF) != 0,
-	    ("%s called without HASBUF", __func__));
 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
 	    cnp->cn_namelen, CREATE);
 	if (error)
 		return (error);
 
 	if (vp != NULLVP) {
 		if (udvp == vp)
 			vrele(vp);
 		else
 			vput(vp);
 
 		error = EEXIST;
 	}
 
 	return (error);
 }
 
 /*
  * relookup for DELETE namei operation.
  *
  * dvp is unionfs vnode. dvp should be locked.
  */
 int
 unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp,
     struct thread *td)
 {
 	struct vnode *udvp;
 	struct vnode *vp;
 	struct componentname cn;
 	int error;
 
 	udvp = UNIONFSVPTOUPPERVP(dvp);
 	vp = NULLVP;
 
-	KASSERT((cnp->cn_flags & HASBUF) != 0,
-	    ("%s called without HASBUF", __func__));
 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
 	    cnp->cn_namelen, DELETE);
 	if (error)
 		return (error);
 
 	if (vp == NULLVP)
 		error = ENOENT;
 	else {
 		if (udvp == vp)
 			vrele(vp);
 		else
 			vput(vp);
 	}
 
 	return (error);
 }
 
 /*
  * relookup for RENAME namei operation.
  *
  * dvp is unionfs vnode. dvp should be locked.
  */
 int
 unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp,
     struct thread *td)
 {
 	struct vnode *udvp;
 	struct vnode *vp;
 	struct componentname cn;
 	int error;
 
 	udvp = UNIONFSVPTOUPPERVP(dvp);
 	vp = NULLVP;
 
-	KASSERT((cnp->cn_flags & HASBUF) != 0,
-	    ("%s called without HASBUF", __func__));
 	error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr,
 	    cnp->cn_namelen, RENAME);
 	if (error)
 		return (error);
 
 	if (vp != NULLVP) {
 		if (udvp == vp)
 			vrele(vp);
 		else
 			vput(vp);
 	}
 
 	return (error);
 }
 
 /*
  * Update the unionfs_node.
  * 
  * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the
  * uvp's lock and lower's lock will be unlocked.
  */
 static void
 unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp,
     struct thread *td)
 {
 	struct unionfs_node_hashhead *hd;
 	struct vnode   *vp;
 	struct vnode   *lvp;
 	struct vnode   *dvp;
 	unsigned	count, lockrec;
 
 	vp = UNIONFSTOV(unp);
 	lvp = unp->un_lowervp;
 	ASSERT_VOP_ELOCKED(lvp, __func__);
 	ASSERT_VOP_ELOCKED(uvp, __func__);
 	dvp = unp->un_dvp;
 
 	VNASSERT(vp->v_writecount == 0, vp,
 	    ("%s: non-zero writecount", __func__));
 	/*
 	 * Update the upper vnode's lock state to match the lower vnode,
 	 * and then switch the unionfs vnode's lock to the upper vnode.
 	 */
 	lockrec = lvp->v_vnlock->lk_recurse;
 	for (count = 0; count < lockrec; count++)
 		vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY);
 	VI_LOCK(vp);
 	unp->un_uppervp = uvp;
 	vp->v_vnlock = uvp->v_vnlock;
 	VI_UNLOCK(vp);
 
 	/*
 	 * Re-cache the unionfs vnode against the upper vnode
 	 */
 	if (dvp != NULLVP && vp->v_type == VDIR) {
 		VI_LOCK(dvp);
 		if (unp->un_hash.le_prev != NULL) {
 			LIST_REMOVE(unp, un_hash);
 			hd = unionfs_get_hashhead(dvp, uvp);
 			LIST_INSERT_HEAD(hd, unp, un_hash);
 		}
 		VI_UNLOCK(unp->un_dvp);
 	}
 }
 
 /*
  * Create a new shadow dir.
  * 
  * udvp should be locked on entry and will be locked on return.
  * 
  * If no error returned, unp will be updated.
  */
 int
 unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp,
     struct unionfs_node *unp, struct componentname *cnp, struct thread *td)
 {
 	struct vnode   *lvp;
 	struct vnode   *uvp;
 	struct vattr	va;
 	struct vattr	lva;
 	struct nameidata nd;
 	struct mount   *mp;
 	struct ucred   *cred;
 	struct ucred   *credbk;
 	struct uidinfo *rootinfo;
 	int		error;
 
 	if (unp->un_uppervp != NULLVP)
 		return (EEXIST);
 
 	lvp = unp->un_lowervp;
 	uvp = NULLVP;
 	credbk = cnp->cn_cred;
 
 	/* Authority change to root */
 	rootinfo = uifind((uid_t)0);
 	cred = crdup(cnp->cn_cred);
 	/*
 	 * The calls to chgproccnt() are needed to compensate for change_ruid()
 	 * calling chgproccnt().
 	 */
 	chgproccnt(cred->cr_ruidinfo, 1, 0);
 	change_euid(cred, rootinfo);
 	change_ruid(cred, rootinfo);
 	change_svuid(cred, (uid_t)0);
 	uifree(rootinfo);
 	cnp->cn_cred = cred;
 
 	memset(&nd.ni_cnd, 0, sizeof(struct componentname));
 	NDPREINIT(&nd);
 
 	if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred)))
 		goto unionfs_mkshadowdir_abort;
 
 	if ((error = unionfs_relookup(udvp, &uvp, cnp, &nd.ni_cnd, td,
 	    cnp->cn_nameptr, cnp->cn_namelen, CREATE)))
 		goto unionfs_mkshadowdir_abort;
 	if (uvp != NULLVP) {
 		if (udvp == uvp)
 			vrele(uvp);
 		else
 			vput(uvp);
 
 		error = EEXIST;
 		goto unionfs_mkshadowdir_abort;
 	}
 
 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)))
 		goto unionfs_mkshadowdir_abort;
 	unionfs_create_uppervattr_core(ump, &lva, &va, td);
 
 	error = VOP_MKDIR(udvp, &uvp, &nd.ni_cnd, &va);
 
 	if (!error) {
 		unionfs_node_update(unp, uvp, td);
 
 		/*
 		 * XXX The bug which cannot set uid/gid was corrected.
 		 * Ignore errors.
 		 */
 		va.va_type = VNON;
 		VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
 	}
 	vn_finished_write(mp);
 
 unionfs_mkshadowdir_abort:
 	cnp->cn_cred = credbk;
 	chgproccnt(cred->cr_ruidinfo, -1, 0);
 	crfree(cred);
 
 	return (error);
 }
 
 /*
  * Create a new whiteout.
  * 
  * dvp should be locked on entry and will be locked on return.
  */
 int
 unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
     struct thread *td, char *path, int pathlen)
 {
 	struct vnode   *wvp;
 	struct nameidata nd;
 	struct mount   *mp;
 	int		error;
 
 	wvp = NULLVP;
 	NDPREINIT(&nd);
 	if ((error = unionfs_relookup(dvp, &wvp, cnp, &nd.ni_cnd, td, path,
 	    pathlen, CREATE))) {
 		return (error);
 	}
 	if (wvp != NULLVP) {
 		if (dvp == wvp)
 			vrele(wvp);
 		else
 			vput(wvp);
 
 		return (EEXIST);
 	}
 
 	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)))
 		goto unionfs_mkwhiteout_free_out;
 	error = VOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE);
 
 	vn_finished_write(mp);
 
 unionfs_mkwhiteout_free_out:
 	return (error);
 }
 
 /*
  * Create a new vnode for create a new shadow file.
  * 
  * If an error is returned, *vpp will be invalid, otherwise it will hold a
  * locked, referenced and opened vnode.
  * 
  * unp is never updated.
  */
 static int
 unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp,
     struct unionfs_node *unp, struct vattr *uvap, struct thread *td)
 {
 	struct unionfs_mount *ump;
 	struct vnode   *vp;
 	struct vnode   *lvp;
 	struct ucred   *cred;
 	struct vattr	lva;
 	struct nameidata nd;
 	int		fmode;
 	int		error;
 
 	ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount);
 	vp = NULLVP;
 	lvp = unp->un_lowervp;
 	cred = td->td_ucred;
 	fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL);
 	error = 0;
 
 	if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0)
 		return (error);
 	unionfs_create_uppervattr_core(ump, &lva, uvap, td);
 
 	if (unp->un_path == NULL)
 		panic("%s: NULL un_path", __func__);
 
 	nd.ni_cnd.cn_namelen = unp->un_pathlen;
 	nd.ni_cnd.cn_pnbuf = unp->un_path;
 	nd.ni_cnd.cn_nameiop = CREATE;
-	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME |
-	    ISLASTCN;
+	nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | ISLASTCN;
 	nd.ni_cnd.cn_lkflags = LK_EXCLUSIVE;
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameptr = nd.ni_cnd.cn_pnbuf;
 	NDPREINIT(&nd);
 
 	vref(udvp);
 	if ((error = vfs_relookup(udvp, &vp, &nd.ni_cnd)) != 0)
 		goto unionfs_vn_create_on_upper_free_out2;
 	vrele(udvp);
 
 	if (vp != NULLVP) {
 		if (vp == udvp)
 			vrele(vp);
 		else
 			vput(vp);
 		error = EEXIST;
 		goto unionfs_vn_create_on_upper_free_out1;
 	}
 
 	if ((error = VOP_CREATE(udvp, &vp, &nd.ni_cnd, uvap)) != 0)
 		goto unionfs_vn_create_on_upper_free_out1;
 
 	if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) {
 		vput(vp);
 		goto unionfs_vn_create_on_upper_free_out1;
 	}
 	error = VOP_ADD_WRITECOUNT(vp, 1);
 	CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d",
 	    __func__, vp, vp->v_writecount);
 	if (error == 0) {
 		*vpp = vp;
 	} else {
 		VOP_CLOSE(vp, fmode, cred, td);
 	}
 
 unionfs_vn_create_on_upper_free_out1:
 	VOP_UNLOCK(udvp);
 
 unionfs_vn_create_on_upper_free_out2:
-	KASSERT((nd.ni_cnd.cn_flags & HASBUF) != 0,
-	    ("%s: HASBUF cleared", __func__));
-	KASSERT((nd.ni_cnd.cn_flags & SAVENAME) != 0,
-	    ("%s: SAVENAME cleared", __func__));
 	KASSERT(nd.ni_cnd.cn_pnbuf == unp->un_path,
 	    ("%s: cn_pnbuf changed", __func__));
 
 	return (error);
 }
 
 /*
  * Copy from lvp to uvp.
  * 
  * lvp and uvp should be locked and opened on entry and will be locked and
  * opened on return.
  */
 static int
 unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp,
     struct ucred *cred, struct thread *td)
 {
 	char           *buf;
 	struct uio	uio;
 	struct iovec	iov;
 	off_t		offset;
 	int		count;
 	int		error;
 	int		bufoffset;
 
 	error = 0;
 	memset(&uio, 0, sizeof(uio));
 
 	uio.uio_td = td;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_offset = 0;
 
 	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
 
 	while (error == 0) {
 		offset = uio.uio_offset;
 
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		iov.iov_base = buf;
 		iov.iov_len = MAXBSIZE;
 		uio.uio_resid = iov.iov_len;
 		uio.uio_rw = UIO_READ;
 
 		if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0)
 			break;
 		if ((count = MAXBSIZE - uio.uio_resid) == 0)
 			break;
 
 		bufoffset = 0;
 		while (bufoffset < count) {
 			uio.uio_iov = &iov;
 			uio.uio_iovcnt = 1;
 			iov.iov_base = buf + bufoffset;
 			iov.iov_len = count - bufoffset;
 			uio.uio_offset = offset + bufoffset;
 			uio.uio_resid = iov.iov_len;
 			uio.uio_rw = UIO_WRITE;
 
 			if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0)
 				break;
 
 			bufoffset += (count - bufoffset) - uio.uio_resid;
 		}
 
 		uio.uio_offset = offset + bufoffset;
 	}
 
 	free(buf, M_TEMP);
 
 	return (error);
 }
 
 /*
  * Copy file from lower to upper.
  * 
  * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to
  * docopy.
  * 
  * If no error returned, unp will be updated.
  */
 int
 unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred,
     struct thread *td)
 {
 	struct mount   *mp;
 	struct vnode   *udvp;
 	struct vnode   *lvp;
 	struct vnode   *uvp;
 	struct vattr	uva;
 	int		error;
 
 	lvp = unp->un_lowervp;
 	uvp = NULLVP;
 
 	if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (unp->un_dvp == NULLVP)
 		return (EINVAL);
 	if (unp->un_uppervp != NULLVP)
 		return (EEXIST);
 	udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp;
 	if (udvp == NULLVP)
 		return (EROFS);
 	if ((udvp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 
 	error = VOP_ACCESS(lvp, VREAD, cred, td);
 	if (error != 0)
 		return (error);
 
 	if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td);
 	if (error != 0) {
 		vn_finished_write(mp);
 		return (error);
 	}
 
 	if (docopy != 0) {
 		error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
 		if (error == 0) {
 			error = unionfs_copyfile_core(lvp, uvp, cred, td);
 			VOP_CLOSE(lvp, FREAD, cred, td);
 		}
 	}
 	VOP_CLOSE(uvp, FWRITE, cred, td);
 	VOP_ADD_WRITECOUNT_CHECKED(uvp, -1);
 	CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d",
 	    __func__, uvp, uvp->v_writecount);
 
 	vn_finished_write(mp);
 
 	if (error == 0) {
 		/* Reset the attributes. Ignore errors. */
 		uva.va_type = VNON;
 		VOP_SETATTR(uvp, &uva, cred);
 	}
 
 	unionfs_node_update(unp, uvp, td);
 
 	return (error);
 }
 
 /*
  * It checks whether vp can rmdir. (check empty)
  *
  * vp is unionfs vnode.
  * vp should be locked.
  */
 int
 unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td)
 {
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct vnode   *tvp;
 	struct dirent  *dp;
 	struct dirent  *edp;
 	struct componentname cn;
 	struct iovec	iov;
 	struct uio	uio;
 	struct vattr	va;
 	int		error;
 	int		eofflag;
 	int		lookuperr;
 
 	/*
 	 * The size of buf needs to be larger than DIRBLKSIZ.
 	 */
 	char		buf[256 * 6];
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 
 	eofflag = 0;
 	uvp = UNIONFSVPTOUPPERVP(vp);
 	lvp = UNIONFSVPTOLOWERVP(vp);
 
 	/* check opaque */
 	if ((error = VOP_GETATTR(uvp, &va, cred)) != 0)
 		return (error);
 	if (va.va_flags & OPAQUE)
 		return (0);
 
 	/* open vnode */
 #ifdef MAC
 	if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0)
 		return (error);
 #endif
 	if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0)
 		return (error);
 	if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0)
 		return (error);
 
 	uio.uio_rw = UIO_READ;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_td = td;
 	uio.uio_offset = 0;
 
 #ifdef MAC
 	error = mac_vnode_check_readdir(td->td_ucred, lvp);
 #endif
 	while (!error && !eofflag) {
 		iov.iov_base = buf;
 		iov.iov_len = sizeof(buf);
 		uio.uio_iov = &iov;
 		uio.uio_iovcnt = 1;
 		uio.uio_resid = iov.iov_len;
 
 		error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL);
 		if (error != 0)
 			break;
 		KASSERT(eofflag != 0 || uio.uio_resid < sizeof(buf),
 		    ("%s: empty read from lower FS", __func__));
 
 		edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid];
 		for (dp = (struct dirent*)buf; !error && dp < edp;
 		     dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) {
 			if (dp->d_type == DT_WHT || dp->d_fileno == 0 ||
 			    (dp->d_namlen == 1 && dp->d_name[0] == '.') ||
 			    (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2)))
 				continue;
 
 			cn.cn_namelen = dp->d_namlen;
 			cn.cn_pnbuf = NULL;
 			cn.cn_nameptr = dp->d_name;
 			cn.cn_nameiop = LOOKUP;
-			cn.cn_flags = LOCKPARENT | LOCKLEAF | SAVENAME |
-			    RDONLY | ISLASTCN;
+			cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
 			cn.cn_lkflags = LK_EXCLUSIVE;
 			cn.cn_cred = cred;
 
 			/*
 			 * check entry in lower.
 			 * Sometimes, readdir function returns
 			 * wrong entry.
 			 */
 			lookuperr = VOP_LOOKUP(lvp, &tvp, &cn);
 
 			if (!lookuperr)
 				vput(tvp);
 			else
 				continue; /* skip entry */
 
 			/*
 			 * check entry
 			 * If it has no exist/whiteout entry in upper,
 			 * directory is not empty.
 			 */
-			cn.cn_flags = LOCKPARENT | LOCKLEAF | SAVENAME |
-			    RDONLY | ISLASTCN;
+			cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN;
 			lookuperr = VOP_LOOKUP(uvp, &tvp, &cn);
 
 			if (!lookuperr)
 				vput(tvp);
 
 			/* ignore exist or whiteout entry */
 			if (!lookuperr ||
 			    (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT)))
 				continue;
 
 			error = ENOTEMPTY;
 		}
 	}
 
 	/* close vnode */
 	VOP_CLOSE(vp, FREAD, cred, td);
 
 	return (error);
 }
 
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index 8403805a99d4..04d788959892 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -1,2829 +1,2820 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
  * Copyright (c) 1992, 1993, 1994, 1995
  *      The Regents of the University of California.
  * Copyright (c) 2005, 2006, 2012 Masanori Ozawa <ozawa@ongs.co.jp>, ONGS Inc.
  * Copyright (c) 2006, 2012 Daichi Goto <daichi@freebsd.org>
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
  * $FreeBSD$
  *
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/kdb.h>
 #include <sys/fcntl.h>
 #include <sys/stat.h>
 #include <sys/dirent.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 
 #include <fs/unionfs/union.h>
 
 #include <machine/atomic.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vnode_pager.h>
 
 #if 0
 #define UNIONFS_INTERNAL_DEBUG(msg, args...)    printf(msg, ## args)
 #define UNIONFS_IDBG_RENAME
 #else
 #define UNIONFS_INTERNAL_DEBUG(msg, args...)
 #endif
 
 #define KASSERT_UNIONFS_VNODE(vp) \
 	VNASSERT(((vp)->v_op == &unionfs_vnodeops), vp, \
 	    ("%s: non-unionfs vnode", __func__))
 
 static int
 unionfs_lookup(struct vop_cachedlookup_args *ap)
 {
 	struct unionfs_node *dunp;
 	struct vnode   *dvp, *udvp, *ldvp, *vp, *uvp, *lvp, *dtmpvp;
 	struct vattr	va;
 	struct componentname *cnp;
 	struct thread  *td;
 	u_long		nameiop;
 	u_long		cnflags, cnflagsbk;
 	int		iswhiteout;
 	int		lockflag;
 	int		error , uerror, lerror;
 
 	iswhiteout = 0;
 	lockflag = 0;
 	error = uerror = lerror = ENOENT;
 	cnp = ap->a_cnp;
 	nameiop = cnp->cn_nameiop;
 	cnflags = cnp->cn_flags;
 	dvp = ap->a_dvp;
 	dunp = VTOUNIONFS(dvp);
 	udvp = dunp->un_uppervp;
 	ldvp = dunp->un_lowervp;
 	vp = uvp = lvp = NULLVP;
 	td = curthread;
 	*(ap->a_vpp) = NULLVP;
 
 	UNIONFS_INTERNAL_DEBUG(
 	    "unionfs_lookup: enter: nameiop=%ld, flags=%lx, path=%s\n",
 	    nameiop, cnflags, cnp->cn_nameptr);
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	/*
 	 * If read-only and op is not LOOKUP, will return EROFS.
 	 */
 	if ((cnflags & ISLASTCN) &&
 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    LOOKUP != nameiop)
 		return (EROFS);
 
 	/*
 	 * lookup dotdot
 	 */
 	if (cnflags & ISDOTDOT) {
 		if (LOOKUP != nameiop && udvp == NULLVP)
 			return (EROFS);
 
 		if (udvp != NULLVP) {
 			dtmpvp = udvp;
 			if (ldvp != NULLVP)
 				VOP_UNLOCK(ldvp);
 		}
 		else
 			dtmpvp = ldvp;
 
 		error = VOP_LOOKUP(dtmpvp, &vp, cnp);
 
 		if (dtmpvp == udvp && ldvp != NULLVP) {
 			VOP_UNLOCK(udvp);
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		}
 
 		if (error == 0) {
 			/*
 			 * Exchange lock and reference from vp to
 			 * dunp->un_dvp. vp is upper/lower vnode, but it
 			 * will need to return the unionfs vnode.
 			 */
 			if (nameiop == DELETE  || nameiop == RENAME ||
 			    (cnp->cn_lkflags & LK_TYPE_MASK))
 				VOP_UNLOCK(vp);
 			vrele(vp);
 
 			VOP_UNLOCK(dvp);
 			*(ap->a_vpp) = dunp->un_dvp;
 			vref(dunp->un_dvp);
 
 			if (nameiop == DELETE || nameiop == RENAME)
 				vn_lock(dunp->un_dvp, LK_EXCLUSIVE | LK_RETRY);
 			else if (cnp->cn_lkflags & LK_TYPE_MASK)
 				vn_lock(dunp->un_dvp, cnp->cn_lkflags |
 				    LK_RETRY);
 
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		} else if (error == ENOENT && (cnflags & MAKEENTRY) != 0)
 			cache_enter(dvp, NULLVP, cnp);
 
 		goto unionfs_lookup_return;
 	}
 
 	/*
 	 * lookup upper layer
 	 */
 	if (udvp != NULLVP) {
 		uerror = VOP_LOOKUP(udvp, &uvp, cnp);
 
 		if (uerror == 0) {
 			if (udvp == uvp) {	/* is dot */
 				vrele(uvp);
 				*(ap->a_vpp) = dvp;
 				vref(dvp);
 
 				error = uerror;
 				goto unionfs_lookup_return;
 			}
 			if (nameiop == DELETE || nameiop == RENAME ||
 			    (cnp->cn_lkflags & LK_TYPE_MASK))
 				VOP_UNLOCK(uvp);
 		}
 
 		/* check whiteout */
 		if (uerror == ENOENT || uerror == EJUSTRETURN)
 			if (cnp->cn_flags & ISWHITEOUT)
 				iswhiteout = 1;	/* don't lookup lower */
 		if (iswhiteout == 0 && ldvp != NULLVP)
 			if (!VOP_GETATTR(udvp, &va, cnp->cn_cred) &&
 			    (va.va_flags & OPAQUE))
 				iswhiteout = 1;	/* don't lookup lower */
 #if 0
 		UNIONFS_INTERNAL_DEBUG(
 		    "unionfs_lookup: debug: whiteout=%d, path=%s\n",
 		    iswhiteout, cnp->cn_nameptr);
 #endif
 	}
 
 	/*
 	 * lookup lower layer
 	 */
 	if (ldvp != NULLVP && !(cnflags & DOWHITEOUT) && iswhiteout == 0) {
 		/* always op is LOOKUP */
 		cnp->cn_nameiop = LOOKUP;
 		cnflagsbk = cnp->cn_flags;
 		cnp->cn_flags = cnflags;
 
 		lerror = VOP_LOOKUP(ldvp, &lvp, cnp);
 
 		cnp->cn_nameiop = nameiop;
 		if (udvp != NULLVP && (uerror == 0 || uerror == EJUSTRETURN))
 			cnp->cn_flags = cnflagsbk;
 
 		if (lerror == 0) {
 			if (ldvp == lvp) {	/* is dot */
 				if (uvp != NULLVP)
 					vrele(uvp);	/* no need? */
 				vrele(lvp);
 				*(ap->a_vpp) = dvp;
 				vref(dvp);
 
 				UNIONFS_INTERNAL_DEBUG(
 				    "unionfs_lookup: leave (%d)\n", lerror);
 
 				return (lerror);
 			}
 			if (cnp->cn_lkflags & LK_TYPE_MASK)
 				VOP_UNLOCK(lvp);
 		}
 	}
 
 	/*
 	 * check lookup result
 	 */
 	if (uvp == NULLVP && lvp == NULLVP) {
 		error = (udvp != NULLVP ? uerror : lerror);
 		goto unionfs_lookup_return;
 	}
 
 	/*
 	 * check vnode type
 	 */
 	if (uvp != NULLVP && lvp != NULLVP && uvp->v_type != lvp->v_type) {
 		vrele(lvp);
 		lvp = NULLVP;
 	}
 
 	/*
 	 * check shadow dir
 	 */
 	if (uerror != 0 && uerror != EJUSTRETURN && udvp != NULLVP &&
 	    lerror == 0 && lvp != NULLVP && lvp->v_type == VDIR &&
 	    !(dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (1 < cnp->cn_namelen || '.' != *(cnp->cn_nameptr))) {
 		/* get unionfs vnode in order to create a new shadow dir. */
 		error = unionfs_nodeget(dvp->v_mount, NULLVP, lvp, dvp, &vp,
 		    cnp);
 		if (error != 0)
 			goto unionfs_lookup_cleanup;
 
 		if (LK_SHARED == (cnp->cn_lkflags & LK_TYPE_MASK))
 			VOP_UNLOCK(vp);
 		if (LK_EXCLUSIVE != VOP_ISLOCKED(vp)) {
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 			lockflag = 1;
 		}
 		error = unionfs_mkshadowdir(MOUNTTOUNIONFSMOUNT(dvp->v_mount),
 		    udvp, VTOUNIONFS(vp), cnp, td);
 		if (lockflag != 0)
 			VOP_UNLOCK(vp);
 		if (error != 0) {
 			UNIONFSDEBUG(
 			    "unionfs_lookup: Unable to create shadow dir.");
 			if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE)
 				vput(vp);
 			else
 				vrele(vp);
 			goto unionfs_lookup_cleanup;
 		}
 		if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_SHARED)
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 	}
 	/*
 	 * get unionfs vnode.
 	 */
 	else {
 		if (uvp != NULLVP)
 			error = uerror;
 		else
 			error = lerror;
 		if (error != 0)
 			goto unionfs_lookup_cleanup;
 		/*
 		 * get socket vnode.
 		 */
 		if (uvp != NULLVP && uvp->v_type == VSOCK) {
 			vp = uvp;
 			vref(vp);
 			if (cnp->cn_lkflags & LK_TYPE_MASK)
 				vn_lock(vp, cnp->cn_lkflags | LK_RETRY);
 		}
 		else if (lvp != NULLVP && lvp->v_type == VSOCK) {
 			vp = lvp;
 			vref(vp);
 			if (cnp->cn_lkflags & LK_TYPE_MASK)
 				vn_lock(vp, cnp->cn_lkflags | LK_RETRY);
 		}
 		/*
 		 * get unionfs vnode.
 		 */
 		else
 			error = unionfs_nodeget(dvp->v_mount, uvp, lvp,
 			    dvp, &vp, cnp);
 		if (error != 0) {
 			UNIONFSDEBUG(
 			    "unionfs_lookup: Unable to create unionfs vnode.");
 			goto unionfs_lookup_cleanup;
 		}
 		if ((nameiop == DELETE || nameiop == RENAME) &&
 		    (cnp->cn_lkflags & LK_TYPE_MASK) == 0)
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 
 	*(ap->a_vpp) = vp;
 
 	if ((cnflags & MAKEENTRY) && vp->v_type != VSOCK)
 		cache_enter(dvp, vp, cnp);
 
 unionfs_lookup_cleanup:
 	if (uvp != NULLVP)
 		vrele(uvp);
 	if (lvp != NULLVP)
 		vrele(lvp);
 
 	if (error == ENOENT && (cnflags & MAKEENTRY) != 0)
 		cache_enter(dvp, NULLVP, cnp);
 
 unionfs_lookup_return:
 
-	/* Ensure subsequent vnops will get a valid pathname buffer. */
-	if (nameiop != LOOKUP && (error == 0 || error == EJUSTRETURN))
-		cnp->cn_flags |= SAVENAME;
-
 	UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_create(struct vop_create_args *ap)
 {
 	struct unionfs_node *dunp;
 	struct componentname *cnp;
 	struct vnode   *udvp;
 	struct vnode   *vp;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_create: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_dvp);
 
 	dunp = VTOUNIONFS(ap->a_dvp);
 	cnp = ap->a_cnp;
 	udvp = dunp->un_uppervp;
 	error = EROFS;
 
 	if (udvp != NULLVP) {
 		error = VOP_CREATE(udvp, &vp, cnp, ap->a_vap);
 		if (error != 0)
 			goto unionfs_create_abort;
 
 		if (vp->v_type == VSOCK)
 			*(ap->a_vpp) = vp;
 		else {
 			VOP_UNLOCK(vp);
 			error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP,
 			    ap->a_dvp, ap->a_vpp, cnp);
 			vrele(vp);
 		}
 	}
 
 unionfs_create_abort:
 	UNIONFS_INTERNAL_DEBUG("unionfs_create: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_whiteout(struct vop_whiteout_args *ap)
 {
 	struct unionfs_node *dunp;
 	struct componentname *cnp;
 	struct vnode   *udvp;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_whiteout: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_dvp);
 
 	dunp = VTOUNIONFS(ap->a_dvp);
 	cnp = ap->a_cnp;
 	udvp = dunp->un_uppervp;
 	error = EOPNOTSUPP;
 
 	if (udvp != NULLVP) {
 		switch (ap->a_flags) {
 		case CREATE:
 		case DELETE:
 		case LOOKUP:
 			error = VOP_WHITEOUT(udvp, cnp, ap->a_flags);
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 	}
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_whiteout: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_mknod(struct vop_mknod_args *ap)
 {
 	struct unionfs_node *dunp;
 	struct componentname *cnp;
 	struct vnode   *udvp;
 	struct vnode   *vp;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_mknod: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_dvp);
 
 	dunp = VTOUNIONFS(ap->a_dvp);
 	cnp = ap->a_cnp;
 	udvp = dunp->un_uppervp;
 	error = EROFS;
 
 	if (udvp != NULLVP) {
 		error = VOP_MKNOD(udvp, &vp, cnp, ap->a_vap);
 		if (error != 0)
 			goto unionfs_mknod_abort;
 
 		if (vp->v_type == VSOCK)
 			*(ap->a_vpp) = vp;
 		else {
 			VOP_UNLOCK(vp);
 			error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP,
 			    ap->a_dvp, ap->a_vpp, cnp);
 			vrele(vp);
 		}
 	}
 
 unionfs_mknod_abort:
 	UNIONFS_INTERNAL_DEBUG("unionfs_mknod: leave (%d)\n", error);
 
 	return (error);
 }
 
 enum unionfs_lkupgrade {
 	UNIONFS_LKUPGRADE_SUCCESS, /* lock successfully upgraded */
 	UNIONFS_LKUPGRADE_ALREADY, /* lock already held exclusive */
 	UNIONFS_LKUPGRADE_DOOMED   /* lock was upgraded, but vnode reclaimed */
 };
 
 static inline enum unionfs_lkupgrade
 unionfs_upgrade_lock(struct vnode *vp)
 {
 	ASSERT_VOP_LOCKED(vp, __func__);
 
 	if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
 		return (UNIONFS_LKUPGRADE_ALREADY);
 
 	if (vn_lock(vp, LK_UPGRADE) != 0) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (VN_IS_DOOMED(vp))
 			return (UNIONFS_LKUPGRADE_DOOMED);
 	}
 	return (UNIONFS_LKUPGRADE_SUCCESS);
 }
 
 static inline void
 unionfs_downgrade_lock(struct vnode *vp, enum unionfs_lkupgrade status)
 {
 	if (status != UNIONFS_LKUPGRADE_ALREADY)
 		vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
 }
 
 static int
 unionfs_open(struct vop_open_args *ap)
 {
 	struct unionfs_node *unp;
 	struct unionfs_node_status *unsp;
 	struct vnode   *vp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct vnode   *targetvp;
 	struct ucred   *cred;
 	struct thread  *td;
 	int		error;
 	enum unionfs_lkupgrade lkstatus;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_open: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = 0;
 	vp = ap->a_vp;
 	targetvp = NULLVP;
 	cred = ap->a_cred;
 	td = ap->a_td;
 
 	/*
 	 * The executable loader path may call this function with vp locked
 	 * shared.  If the vnode is reclaimed while upgrading, we can't safely
 	 * use unp or do anything else unionfs- specific.
 	 */
 	lkstatus = unionfs_upgrade_lock(vp);
 	if (lkstatus == UNIONFS_LKUPGRADE_DOOMED) {
 		error = ENOENT;
 		goto unionfs_open_cleanup;
 	}
 
 	unp = VTOUNIONFS(vp);
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 	unionfs_get_node_status(unp, td, &unsp);
 
 	if (unsp->uns_lower_opencnt > 0 || unsp->uns_upper_opencnt > 0) {
 		/* vnode is already opend. */
 		if (unsp->uns_upper_opencnt > 0)
 			targetvp = uvp;
 		else
 			targetvp = lvp;
 
 		if (targetvp == lvp &&
 		    (ap->a_mode & FWRITE) && lvp->v_type == VREG)
 			targetvp = NULLVP;
 	}
 	if (targetvp == NULLVP) {
 		if (uvp == NULLVP) {
 			if ((ap->a_mode & FWRITE) && lvp->v_type == VREG) {
 				error = unionfs_copyfile(unp,
 				    !(ap->a_mode & O_TRUNC), cred, td);
 				if (error != 0)
 					goto unionfs_open_abort;
 				targetvp = uvp = unp->un_uppervp;
 			} else
 				targetvp = lvp;
 		} else
 			targetvp = uvp;
 	}
 
 	error = VOP_OPEN(targetvp, ap->a_mode, cred, td, ap->a_fp);
 	if (error == 0) {
 		if (targetvp == uvp) {
 			if (uvp->v_type == VDIR && lvp != NULLVP &&
 			    unsp->uns_lower_opencnt <= 0) {
 				/* open lower for readdir */
 				error = VOP_OPEN(lvp, FREAD, cred, td, NULL);
 				if (error != 0) {
 					VOP_CLOSE(uvp, ap->a_mode, cred, td);
 					goto unionfs_open_abort;
 				}
 				unsp->uns_node_flag |= UNS_OPENL_4_READDIR;
 				unsp->uns_lower_opencnt++;
 			}
 			unsp->uns_upper_opencnt++;
 		} else {
 			unsp->uns_lower_opencnt++;
 			unsp->uns_lower_openmode = ap->a_mode;
 		}
 		vp->v_object = targetvp->v_object;
 	}
 
 unionfs_open_abort:
 	if (error != 0)
 		unionfs_tryrem_node_status(unp, unsp);
 
 unionfs_open_cleanup:
 	unionfs_downgrade_lock(vp, lkstatus);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_open: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_close(struct vop_close_args *ap)
 {
 	struct unionfs_node *unp;
 	struct unionfs_node_status *unsp;
 	struct ucred   *cred;
 	struct thread  *td;
 	struct vnode   *vp;
 	struct vnode   *ovp;
 	int		error;
 	enum unionfs_lkupgrade lkstatus;;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_close: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	vp = ap->a_vp;
 	cred = ap->a_cred;
 	td = ap->a_td;
 	error = 0;
 
 	/*
 	 * If the vnode is reclaimed while upgrading, we can't safely use unp
 	 * or do anything else unionfs- specific.
 	 */
 	lkstatus = unionfs_upgrade_lock(vp);
 	if (lkstatus == UNIONFS_LKUPGRADE_DOOMED)
 		goto unionfs_close_cleanup;
 
 	unp = VTOUNIONFS(vp);
 	unionfs_get_node_status(unp, td, &unsp);
 
 	if (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0) {
 #ifdef DIAGNOSTIC
 		printf("unionfs_close: warning: open count is 0\n");
 #endif
 		if (unp->un_uppervp != NULLVP)
 			ovp = unp->un_uppervp;
 		else
 			ovp = unp->un_lowervp;
 	} else if (unsp->uns_upper_opencnt > 0)
 		ovp = unp->un_uppervp;
 	else
 		ovp = unp->un_lowervp;
 
 	error = VOP_CLOSE(ovp, ap->a_fflag, cred, td);
 
 	if (error != 0)
 		goto unionfs_close_abort;
 
 	vp->v_object = ovp->v_object;
 
 	if (ovp == unp->un_uppervp) {
 		unsp->uns_upper_opencnt--;
 		if (unsp->uns_upper_opencnt == 0) {
 			if (unsp->uns_node_flag & UNS_OPENL_4_READDIR) {
 				VOP_CLOSE(unp->un_lowervp, FREAD, cred, td);
 				unsp->uns_node_flag &= ~UNS_OPENL_4_READDIR;
 				unsp->uns_lower_opencnt--;
 			}
 			if (unsp->uns_lower_opencnt > 0)
 				vp->v_object = unp->un_lowervp->v_object;
 		}
 	} else
 		unsp->uns_lower_opencnt--;
 
 unionfs_close_abort:
 	unionfs_tryrem_node_status(unp, unsp);
 
 unionfs_close_cleanup:
 	unionfs_downgrade_lock(vp, lkstatus);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_close: leave (%d)\n", error);
 
 	return (error);
 }
 
 /*
  * Check the access mode toward shadow file/dir.
  */
 static int
 unionfs_check_corrected_access(accmode_t accmode, struct vattr *va,
     struct ucred *cred)
 {
 	uid_t		uid;	/* upper side vnode's uid */
 	gid_t		gid;	/* upper side vnode's gid */
 	u_short		vmode;	/* upper side vnode's mode */
 	u_short		mask;
 
 	mask = 0;
 	uid = va->va_uid;
 	gid = va->va_gid;
 	vmode = va->va_mode;
 
 	/* check owner */
 	if (cred->cr_uid == uid) {
 		if (accmode & VEXEC)
 			mask |= S_IXUSR;
 		if (accmode & VREAD)
 			mask |= S_IRUSR;
 		if (accmode & VWRITE)
 			mask |= S_IWUSR;
 		return ((vmode & mask) == mask ? 0 : EACCES);
 	}
 
 	/* check group */
 	if (groupmember(gid, cred)) {
 		if (accmode & VEXEC)
 			mask |= S_IXGRP;
 		if (accmode & VREAD)
 			mask |= S_IRGRP;
 		if (accmode & VWRITE)
 			mask |= S_IWGRP;
 		return ((vmode & mask) == mask ? 0 : EACCES);
 	}
 
 	/* check other */
 	if (accmode & VEXEC)
 		mask |= S_IXOTH;
 	if (accmode & VREAD)
 		mask |= S_IROTH;
 	if (accmode & VWRITE)
 		mask |= S_IWOTH;
 
 	return ((vmode & mask) == mask ? 0 : EACCES);
 }
 
 static int
 unionfs_access(struct vop_access_args *ap)
 {
 	struct unionfs_mount *ump;
 	struct unionfs_node *unp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct thread  *td;
 	struct vattr	va;
 	accmode_t	accmode;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_access: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
 	unp = VTOUNIONFS(ap->a_vp);
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 	td = ap->a_td;
 	accmode = ap->a_accmode;
 	error = EACCES;
 
 	if ((accmode & VWRITE) &&
 	    (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (ap->a_vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 
 	if (uvp != NULLVP) {
 		error = VOP_ACCESS(uvp, accmode, ap->a_cred, td);
 
 		UNIONFS_INTERNAL_DEBUG("unionfs_access: leave (%d)\n", error);
 
 		return (error);
 	}
 
 	if (lvp != NULLVP) {
 		if (accmode & VWRITE) {
 			if (ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY) {
 				switch (ap->a_vp->v_type) {
 				case VREG:
 				case VDIR:
 				case VLNK:
 					return (EROFS);
 				default:
 					break;
 				}
 			} else if (ap->a_vp->v_type == VREG ||
 			    ap->a_vp->v_type == VDIR) {
 				/* check shadow file/dir */
 				if (ump->um_copymode != UNIONFS_TRANSPARENT) {
 					error = unionfs_create_uppervattr(ump,
 					    lvp, &va, ap->a_cred, td);
 					if (error != 0)
 						return (error);
 
 					error = unionfs_check_corrected_access(
 					    accmode, &va, ap->a_cred);
 					if (error != 0)
 						return (error);
 				}
 			}
 			accmode &= ~(VWRITE | VAPPEND);
 			accmode |= VREAD; /* will copy to upper */
 		}
 		error = VOP_ACCESS(lvp, accmode, ap->a_cred, td);
 	}
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_access: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_getattr(struct vop_getattr_args *ap)
 {
 	struct unionfs_node *unp;
 	struct unionfs_mount *ump;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct thread  *td;
 	struct vattr	va;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_getattr: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 	td = curthread;
 
 	if (uvp != NULLVP) {
 		if ((error = VOP_GETATTR(uvp, ap->a_vap, ap->a_cred)) == 0)
 			ap->a_vap->va_fsid =
 			    ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
 
 		UNIONFS_INTERNAL_DEBUG(
 		    "unionfs_getattr: leave mode=%o, uid=%d, gid=%d (%d)\n",
 		    ap->a_vap->va_mode, ap->a_vap->va_uid,
 		    ap->a_vap->va_gid, error);
 
 		return (error);
 	}
 
 	error = VOP_GETATTR(lvp, ap->a_vap, ap->a_cred);
 
 	if (error == 0 && !(ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY)) {
 		/* correct the attr toward shadow file/dir. */
 		if (ap->a_vp->v_type == VREG || ap->a_vp->v_type == VDIR) {
 			unionfs_create_uppervattr_core(ump, ap->a_vap, &va, td);
 			ap->a_vap->va_mode = va.va_mode;
 			ap->a_vap->va_uid = va.va_uid;
 			ap->a_vap->va_gid = va.va_gid;
 		}
 	}
 
 	if (error == 0)
 		ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
 
 	UNIONFS_INTERNAL_DEBUG(
 	    "unionfs_getattr: leave mode=%o, uid=%d, gid=%d (%d)\n",
 	    ap->a_vap->va_mode, ap->a_vap->va_uid, ap->a_vap->va_gid, error);
 
 	return (error);
 }
 
 static int
 unionfs_setattr(struct vop_setattr_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct thread  *td;
 	struct vattr   *vap;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_setattr: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = EROFS;
 	unp = VTOUNIONFS(ap->a_vp);
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 	td = curthread;
 	vap = ap->a_vap;
 
 	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	     vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	     vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL))
 		return (EROFS);
 
 	if (uvp == NULLVP && lvp->v_type == VREG) {
 		error = unionfs_copyfile(unp, (vap->va_size != 0),
 		    ap->a_cred, td);
 		if (error != 0)
 			return (error);
 		uvp = unp->un_uppervp;
 	}
 
 	if (uvp != NULLVP)
 		error = VOP_SETATTR(uvp, vap, ap->a_cred);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_setattr: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_read(struct vop_read_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *tvp;
 	int		error;
 
 	/* UNIONFS_INTERNAL_DEBUG("unionfs_read: enter\n"); */
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 	error = VOP_READ(tvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
 
 	/* UNIONFS_INTERNAL_DEBUG("unionfs_read: leave (%d)\n", error); */
 
 	return (error);
 }
 
 static int
 unionfs_write(struct vop_write_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *tvp;
 	int		error;
 
 	/* UNIONFS_INTERNAL_DEBUG("unionfs_write: enter\n"); */
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 	error = VOP_WRITE(tvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
 
 	/* UNIONFS_INTERNAL_DEBUG("unionfs_write: leave (%d)\n", error); */
 
 	return (error);
 }
 
 static int
 unionfs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct unionfs_node *unp;
 	struct unionfs_node_status *unsp;
 	struct vnode   *ovp;
 	int error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_ioctl: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
  	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
 	unp = VTOUNIONFS(ap->a_vp);
 	unionfs_get_node_status(unp, ap->a_td, &unsp);
 	ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp);
 	unionfs_tryrem_node_status(unp, unsp);
 	VOP_UNLOCK(ap->a_vp);
 
 	if (ovp == NULLVP)
 		return (EBADF);
 
 	error = VOP_IOCTL(ovp, ap->a_command, ap->a_data, ap->a_fflag,
 	    ap->a_cred, ap->a_td);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_ioctl: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_poll(struct vop_poll_args *ap)
 {
 	struct unionfs_node *unp;
 	struct unionfs_node_status *unsp;
 	struct vnode *ovp;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
  	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
 	unp = VTOUNIONFS(ap->a_vp);
 	unionfs_get_node_status(unp, ap->a_td, &unsp);
 	ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp);
 	unionfs_tryrem_node_status(unp, unsp);
 	VOP_UNLOCK(ap->a_vp);
 
 	if (ovp == NULLVP)
 		return (EBADF);
 
 	return (VOP_POLL(ovp, ap->a_events, ap->a_cred, ap->a_td));
 }
 
 static int
 unionfs_fsync(struct vop_fsync_args *ap)
 {
 	struct unionfs_node *unp;
 	struct unionfs_node_status *unsp;
 	struct vnode *ovp;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	unionfs_get_node_status(unp, ap->a_td, &unsp);
 	ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp);
 	unionfs_tryrem_node_status(unp, unsp);
 
 	if (ovp == NULLVP)
 		return (EBADF);
 
 	return (VOP_FSYNC(ovp, ap->a_waitfor, ap->a_td));
 }
 
 static int
 unionfs_remove(struct vop_remove_args *ap)
 {
 	char	       *path;
 	struct unionfs_node *dunp;
 	struct unionfs_node *unp;
 	struct unionfs_mount *ump;
 	struct vnode   *udvp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct vnode   *vp;
 	struct componentname *cnp;
 	struct componentname cn;
 	struct thread  *td;
 	int		error;
 	int		pathlen;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_remove: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_dvp);
 
 	error = 0;
 	dunp = VTOUNIONFS(ap->a_dvp);
 	udvp = dunp->un_uppervp;
 	cnp = ap->a_cnp;
 	td = curthread;
 
 	if (ap->a_vp->v_op != &unionfs_vnodeops) {
 		if (ap->a_vp->v_type != VSOCK)
 			return (EINVAL);
 		ump = NULL;
 		vp = uvp = lvp = NULLVP;
 		/* search vnode */
 		VOP_UNLOCK(ap->a_vp);
 		error = unionfs_relookup(udvp, &vp, cnp, &cn, td,
 		    cnp->cn_nameptr, cnp->cn_namelen, DELETE);
 		if (error != 0 && error != ENOENT) {
 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
 			return (error);
 		}
 
 		if (error == 0 && vp == ap->a_vp) {
 			/* target vnode in upper */
 			uvp = vp;
 			vrele(vp);
 		} else {
 			/* target vnode in lower */
 			if (vp != NULLVP) {
 				if (udvp == vp)
 					vrele(vp);
 				else
 					vput(vp);
 			}
 			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
 			lvp = ap->a_vp;
 		}
 		path = cnp->cn_nameptr;
 		pathlen = cnp->cn_namelen;
 	} else {
 		ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
 		unp = VTOUNIONFS(ap->a_vp);
 		uvp = unp->un_uppervp;
 		lvp = unp->un_lowervp;
 		path = unp->un_path;
 		pathlen = unp->un_pathlen;
 	}
 
 	if (udvp == NULLVP)
 		return (EROFS);
 
 	if (uvp != NULLVP) {
 		/*
 		 * XXX: if the vnode type is VSOCK, it will create whiteout
 		 *      after remove.
 		 */
 		if (ump == NULL || ump->um_whitemode == UNIONFS_WHITE_ALWAYS ||
 		    lvp != NULLVP)
 			cnp->cn_flags |= DOWHITEOUT;
 		error = VOP_REMOVE(udvp, uvp, cnp);
 	} else if (lvp != NULLVP)
 		error = unionfs_mkwhiteout(udvp, cnp, td, path, pathlen);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_link(struct vop_link_args *ap)
 {
 	struct unionfs_node *dunp;
 	struct unionfs_node *unp;
 	struct vnode   *udvp;
 	struct vnode   *uvp;
 	struct componentname *cnp;
 	struct thread  *td;
 	int		error;
 	int		needrelookup;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_link: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_tdvp);
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = 0;
 	needrelookup = 0;
 	dunp = VTOUNIONFS(ap->a_tdvp);
 	unp = NULL;
 	udvp = dunp->un_uppervp;
 	uvp = NULLVP;
 	cnp = ap->a_cnp;
 	td = curthread;
 
 	if (udvp == NULLVP)
 		return (EROFS);
 
 	if (ap->a_vp->v_op != &unionfs_vnodeops)
 		uvp = ap->a_vp;
 	else {
 		unp = VTOUNIONFS(ap->a_vp);
 
 		if (unp->un_uppervp == NULLVP) {
 			if (ap->a_vp->v_type != VREG)
 				return (EOPNOTSUPP);
 
 			error = unionfs_copyfile(unp, 1, cnp->cn_cred, td);
 			if (error != 0)
 				return (error);
 			needrelookup = 1;
 		}
 		uvp = unp->un_uppervp;
 	}
 
 	if (needrelookup != 0)
 		error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td);
 
 	if (error == 0)
 		error = VOP_LINK(udvp, uvp, cnp);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_link: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_rename(struct vop_rename_args *ap)
 {
 	struct vnode   *fdvp;
 	struct vnode   *fvp;
 	struct componentname *fcnp;
 	struct vnode   *tdvp;
 	struct vnode   *tvp;
 	struct componentname *tcnp;
 	struct vnode   *ltdvp;
 	struct vnode   *ltvp;
 	struct thread  *td;
 
 	/* rename target vnodes */
 	struct vnode   *rfdvp;
 	struct vnode   *rfvp;
 	struct vnode   *rtdvp;
 	struct vnode   *rtvp;
 
 	struct unionfs_mount *ump;
 	struct unionfs_node *unp;
 	int		error;
 	int		needrelookup;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_rename: enter\n");
 
 	error = 0;
 	fdvp = ap->a_fdvp;
 	fvp = ap->a_fvp;
 	fcnp = ap->a_fcnp;
 	tdvp = ap->a_tdvp;
 	tvp = ap->a_tvp;
 	tcnp = ap->a_tcnp;
 	ltdvp = NULLVP;
 	ltvp = NULLVP;
 	td = curthread;
 	rfdvp = fdvp;
 	rfvp = fvp;
 	rtdvp = tdvp;
 	rtvp = tvp;
 	needrelookup = 0;
 
-#ifdef DIAGNOSTIC
-	if (!(fcnp->cn_flags & HASBUF) || !(tcnp->cn_flags & HASBUF))
-		panic("unionfs_rename: no name");
-#endif
-
 	/* check for cross device rename */
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp != NULLVP && fvp->v_mount != tvp->v_mount)) {
 		if (fvp->v_op != &unionfs_vnodeops)
 			error = ENODEV;
 		else
 			error = EXDEV;
 		goto unionfs_rename_abort;
 	}
 
 	/* Renaming a file to itself has no effect. */
 	if (fvp == tvp)
 		goto unionfs_rename_abort;
 
 	/*
 	 * from/to vnode is unionfs node.
 	 */
 
 	KASSERT_UNIONFS_VNODE(fdvp);
 	KASSERT_UNIONFS_VNODE(fvp);
 	KASSERT_UNIONFS_VNODE(tdvp);
 	if (tvp != NULLVP)
 		KASSERT_UNIONFS_VNODE(tvp);
 
 	unp = VTOUNIONFS(fdvp);
 #ifdef UNIONFS_IDBG_RENAME
 	UNIONFS_INTERNAL_DEBUG("fdvp=%p, ufdvp=%p, lfdvp=%p\n",
 	    fdvp, unp->un_uppervp, unp->un_lowervp);
 #endif
 	if (unp->un_uppervp == NULLVP) {
 		error = ENODEV;
 		goto unionfs_rename_abort;
 	}
 	rfdvp = unp->un_uppervp;
 	vref(rfdvp);
 
 	unp = VTOUNIONFS(fvp);
 #ifdef UNIONFS_IDBG_RENAME
 	UNIONFS_INTERNAL_DEBUG("fvp=%p, ufvp=%p, lfvp=%p\n",
 	    fvp, unp->un_uppervp, unp->un_lowervp);
 #endif
 	ump = MOUNTTOUNIONFSMOUNT(fvp->v_mount);
 	if (unp->un_uppervp == NULLVP) {
 		switch (fvp->v_type) {
 		case VREG:
 			if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
 				goto unionfs_rename_abort;
 			error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td);
 			VOP_UNLOCK(fvp);
 			if (error != 0)
 				goto unionfs_rename_abort;
 			break;
 		case VDIR:
 			if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
 				goto unionfs_rename_abort;
 			error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td);
 			VOP_UNLOCK(fvp);
 			if (error != 0)
 				goto unionfs_rename_abort;
 			break;
 		default:
 			error = ENODEV;
 			goto unionfs_rename_abort;
 		}
 
 		needrelookup = 1;
 	}
 
 	if (unp->un_lowervp != NULLVP)
 		fcnp->cn_flags |= DOWHITEOUT;
 	rfvp = unp->un_uppervp;
 	vref(rfvp);
 
 	unp = VTOUNIONFS(tdvp);
 #ifdef UNIONFS_IDBG_RENAME
 	UNIONFS_INTERNAL_DEBUG("tdvp=%p, utdvp=%p, ltdvp=%p\n",
 	    tdvp, unp->un_uppervp, unp->un_lowervp);
 #endif
 	if (unp->un_uppervp == NULLVP) {
 		error = ENODEV;
 		goto unionfs_rename_abort;
 	}
 	rtdvp = unp->un_uppervp;
 	ltdvp = unp->un_lowervp;
 	vref(rtdvp);
 
 	if (tdvp == tvp) {
 		rtvp = rtdvp;
 		vref(rtvp);
 	} else if (tvp != NULLVP) {
 		unp = VTOUNIONFS(tvp);
 #ifdef UNIONFS_IDBG_RENAME
 		UNIONFS_INTERNAL_DEBUG("tvp=%p, utvp=%p, ltvp=%p\n",
 		    tvp, unp->un_uppervp, unp->un_lowervp);
 #endif
 		if (unp->un_uppervp == NULLVP)
 			rtvp = NULLVP;
 		else {
 			if (tvp->v_type == VDIR) {
 				error = EINVAL;
 				goto unionfs_rename_abort;
 			}
 			rtvp = unp->un_uppervp;
 			ltvp = unp->un_lowervp;
 			vref(rtvp);
 		}
 	}
 
 	if (rfvp == rtvp)
 		goto unionfs_rename_abort;
 
 	if (needrelookup != 0) {
 		if ((error = vn_lock(fdvp, LK_EXCLUSIVE)) != 0)
 			goto unionfs_rename_abort;
 		error = unionfs_relookup_for_delete(fdvp, fcnp, td);
 		VOP_UNLOCK(fdvp);
 		if (error != 0)
 			goto unionfs_rename_abort;
 
 		/* Lock of tvp is canceled in order to avoid recursive lock. */
 		if (tvp != NULLVP && tvp != tdvp)
 			VOP_UNLOCK(tvp);
 		error = unionfs_relookup_for_rename(tdvp, tcnp, td);
 		if (tvp != NULLVP && tvp != tdvp)
 			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
 		if (error != 0)
 			goto unionfs_rename_abort;
 	}
 
 	error = VOP_RENAME(rfdvp, rfvp, fcnp, rtdvp, rtvp, tcnp);
 
 	if (error == 0) {
 		if (rtvp != NULLVP && rtvp->v_type == VDIR)
 			cache_purge(tdvp);
 		if (fvp->v_type == VDIR && fdvp != tdvp)
 			cache_purge(fdvp);
 	}
 
 	if (ltdvp != NULLVP)
 		VOP_UNLOCK(ltdvp);
 	if (tdvp != rtdvp)
 		vrele(tdvp);
 	if (ltvp != NULLVP)
 		VOP_UNLOCK(ltvp);
 	if (tvp != rtvp && tvp != NULLVP) {
 		if (rtvp == NULLVP)
 			vput(tvp);
 		else
 			vrele(tvp);
 	}
 	if (fdvp != rfdvp)
 		vrele(fdvp);
 	if (fvp != rfvp)
 		vrele(fvp);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_rename: leave (%d)\n", error);
 
 	return (error);
 
 unionfs_rename_abort:
 	vput(tdvp);
 	if (tdvp != rtdvp)
 		vrele(rtdvp);
 	if (tvp != NULLVP) {
 		if (tdvp != tvp)
 			vput(tvp);
 		else
 			vrele(tvp);
 	}
 	if (tvp != rtvp && rtvp != NULLVP)
 		vrele(rtvp);
 	if (fdvp != rfdvp)
 		vrele(rfdvp);
 	if (fvp != rfvp)
 		vrele(rfvp);
 	vrele(fdvp);
 	vrele(fvp);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_rename: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_mkdir(struct vop_mkdir_args *ap)
 {
 	struct unionfs_node *dunp;
 	struct componentname *cnp;
 	struct vnode   *udvp;
 	struct vnode   *uvp;
 	struct vattr	va;
 	int		error;
 	int		lkflags;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_dvp);
 
 	error = EROFS;
 	dunp = VTOUNIONFS(ap->a_dvp);
 	cnp = ap->a_cnp;
 	lkflags = cnp->cn_lkflags;
 	udvp = dunp->un_uppervp;
 
 	if (udvp != NULLVP) {
 		/* check opaque */
 		if (!(cnp->cn_flags & ISWHITEOUT)) {
 			error = VOP_GETATTR(udvp, &va, cnp->cn_cred);
 			if (error != 0)
 				return (error);
 			if ((va.va_flags & OPAQUE) != 0)
 				cnp->cn_flags |= ISWHITEOUT;
 		}
 
 		if ((error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap)) == 0) {
 			VOP_UNLOCK(uvp);
 			cnp->cn_lkflags = LK_EXCLUSIVE;
 			error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP,
 			    ap->a_dvp, ap->a_vpp, cnp);
 			cnp->cn_lkflags = lkflags;
 			vrele(uvp);
 		}
 	}
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_rmdir(struct vop_rmdir_args *ap)
 {
 	struct unionfs_node *dunp;
 	struct unionfs_node *unp;
 	struct unionfs_mount *ump;
 	struct componentname *cnp;
 	struct thread  *td;
 	struct vnode   *udvp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_rmdir: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_dvp);
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = 0;
 	dunp = VTOUNIONFS(ap->a_dvp);
 	unp = VTOUNIONFS(ap->a_vp);
 	cnp = ap->a_cnp;
 	td = curthread;
 	udvp = dunp->un_uppervp;
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 
 	if (udvp == NULLVP)
 		return (EROFS);
 
 	if (udvp == uvp)
 		return (EOPNOTSUPP);
 
 	if (uvp != NULLVP) {
 		if (lvp != NULLVP) {
 			error = unionfs_check_rmdir(ap->a_vp, cnp->cn_cred, td);
 			if (error != 0)
 				return (error);
 		}
 		ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount);
 		if (ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP)
 			cnp->cn_flags |= DOWHITEOUT;
 		/*
 		 * The relookup path will need to relock the parent dvp and
 		 * possibly the vp as well.  Locking is expected to be done
 		 * in parent->child order; drop the lock on vp to avoid LOR
 		 * and potential recursion on vp's lock.
 		 * vp is expected to remain referenced during VOP_RMDIR(),
 		 * so vref/vrele should not be necessary here.
 		 */
 		VOP_UNLOCK(ap->a_vp);
 		VNPASS(vrefcnt(ap->a_vp) > 0, ap->a_vp);
 		error = unionfs_relookup_for_delete(ap->a_dvp, cnp, td);
 		vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY);
 		/*
 		 * VOP_RMDIR is dispatched against udvp, so if uvp became
 		 * doomed while the lock was dropped above the target
 		 * filesystem may not be able to cope.
 		 */
 		if (error == 0 && VN_IS_DOOMED(uvp))
 			error = ENOENT;
 		if (error == 0)
 			error = VOP_RMDIR(udvp, uvp, cnp);
 	} else if (lvp != NULLVP)
 		error = unionfs_mkwhiteout(udvp, cnp, td,
 		    unp->un_path, unp->un_pathlen);
 
 	if (error == 0) {
 		cache_purge(ap->a_dvp);
 		cache_purge(ap->a_vp);
 	}
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_rmdir: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_symlink(struct vop_symlink_args *ap)
 {
 	struct unionfs_node *dunp;
 	struct componentname *cnp;
 	struct vnode   *udvp;
 	struct vnode   *uvp;
 	int		error;
 	int		lkflags;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_symlink: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_dvp);
 
 	error = EROFS;
 	dunp = VTOUNIONFS(ap->a_dvp);
 	cnp = ap->a_cnp;
 	lkflags = cnp->cn_lkflags;
 	udvp = dunp->un_uppervp;
 
 	if (udvp != NULLVP) {
 		error = VOP_SYMLINK(udvp, &uvp, cnp, ap->a_vap, ap->a_target);
 		if (error == 0) {
 			VOP_UNLOCK(uvp);
 			cnp->cn_lkflags = LK_EXCLUSIVE;
 			error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP,
 			    ap->a_dvp, ap->a_vpp, cnp);
 			cnp->cn_lkflags = lkflags;
 			vrele(uvp);
 		}
 	}
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_symlink: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_readdir(struct vop_readdir_args *ap)
 {
 	struct unionfs_node *unp;
 	struct unionfs_node_status *unsp;
 	struct uio     *uio;
 	struct vnode   *vp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct thread  *td;
 	struct vattr    va;
 
 	uint64_t	*cookies_bk;
 	int		error;
 	int		eofflag;
 	int		ncookies_bk;
 	int		uio_offset_bk;
 	enum unionfs_lkupgrade lkstatus;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_readdir: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = 0;
 	eofflag = 0;
 	uio_offset_bk = 0;
 	uio = ap->a_uio;
 	uvp = NULLVP;
 	lvp = NULLVP;
 	td = uio->uio_td;
 	ncookies_bk = 0;
 	cookies_bk = NULL;
 
 	vp = ap->a_vp;
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	/*
 	 * If the vnode is reclaimed while upgrading, we can't safely use unp
 	 * or do anything else unionfs- specific.
 	 */
 	lkstatus = unionfs_upgrade_lock(vp);
 	if (lkstatus == UNIONFS_LKUPGRADE_DOOMED)
 		error = EBADF;
 	if (error == 0) {
 		unp = VTOUNIONFS(vp);
 		uvp = unp->un_uppervp;
 		lvp = unp->un_lowervp;
 		/* check the open count. unionfs needs open before readdir. */
 		unionfs_get_node_status(unp, td, &unsp);
 		if ((uvp != NULLVP && unsp->uns_upper_opencnt <= 0) ||
 			(lvp != NULLVP && unsp->uns_lower_opencnt <= 0)) {
 			unionfs_tryrem_node_status(unp, unsp);
 			error = EBADF;
 		}
 	}
 	unionfs_downgrade_lock(vp, lkstatus);
 	if (error != 0)
 		goto unionfs_readdir_exit;
 
 	/* check opaque */
 	if (uvp != NULLVP && lvp != NULLVP) {
 		if ((error = VOP_GETATTR(uvp, &va, ap->a_cred)) != 0)
 			goto unionfs_readdir_exit;
 		if (va.va_flags & OPAQUE)
 			lvp = NULLVP;
 	}
 
 	/* upper only */
 	if (uvp != NULLVP && lvp == NULLVP) {
 		error = VOP_READDIR(uvp, uio, ap->a_cred, ap->a_eofflag,
 		    ap->a_ncookies, ap->a_cookies);
 		unsp->uns_readdir_status = 0;
 
 		goto unionfs_readdir_exit;
 	}
 
 	/* lower only */
 	if (uvp == NULLVP && lvp != NULLVP) {
 		error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
 		    ap->a_ncookies, ap->a_cookies);
 		unsp->uns_readdir_status = 2;
 
 		goto unionfs_readdir_exit;
 	}
 
 	/*
 	 * readdir upper and lower
 	 */
 	KASSERT(uvp != NULLVP, ("unionfs_readdir: null upper vp"));
 	KASSERT(lvp != NULLVP, ("unionfs_readdir: null lower vp"));
 	if (uio->uio_offset == 0)
 		unsp->uns_readdir_status = 0;
 
 	if (unsp->uns_readdir_status == 0) {
 		/* read upper */
 		error = VOP_READDIR(uvp, uio, ap->a_cred, &eofflag,
 				    ap->a_ncookies, ap->a_cookies);
 
 		if (error != 0 || eofflag == 0)
 			goto unionfs_readdir_exit;
 		unsp->uns_readdir_status = 1;
 
 		/*
 		 * UFS(and other FS) needs size of uio_resid larger than
 		 * DIRBLKSIZ.
 		 * size of DIRBLKSIZ equals DEV_BSIZE.
 		 * (see: ufs/ufs/ufs_vnops.c ufs_readdir func , ufs/ufs/dir.h)
 		 */
 		if (uio->uio_resid <= (uio->uio_resid & (DEV_BSIZE -1)))
 			goto unionfs_readdir_exit;
 
 		/*
 		 * Backup cookies.
 		 * It prepares to readdir in lower.
 		 */
 		if (ap->a_ncookies != NULL) {
 			ncookies_bk = *(ap->a_ncookies);
 			*(ap->a_ncookies) = 0;
 		}
 		if (ap->a_cookies != NULL) {
 			cookies_bk = *(ap->a_cookies);
 			*(ap->a_cookies) = NULL;
 		}
 	}
 
 	/* initialize for readdir in lower */
 	if (unsp->uns_readdir_status == 1) {
 		unsp->uns_readdir_status = 2;
 		/*
 		 * Backup uio_offset. See the comment after the
 		 * VOP_READDIR call on the lower layer.
 		 */
 		uio_offset_bk = uio->uio_offset;
 		uio->uio_offset = 0;
 	}
 
 	if (lvp == NULLVP) {
 		error = EBADF;
 		goto unionfs_readdir_exit;
 	}
 	/* read lower */
 	error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag,
 			    ap->a_ncookies, ap->a_cookies);
 
 	/*
 	 * We can't return an uio_offset of 0: this would trigger an
 	 * infinite loop, because the next call to unionfs_readdir would
 	 * always restart with the upper layer (uio_offset == 0) and
 	 * always return some data.
 	 *
 	 * This happens when the lower layer root directory is removed.
 	 * (A root directory deleting of unionfs should not be permitted.
 	 *  But current VFS can not do it.)
 	 */
 	if (uio->uio_offset == 0)
 		uio->uio_offset = uio_offset_bk;
 
 	if (cookies_bk != NULL) {
 		/* merge cookies */
 		int		size;
 		uint64_t         *newcookies, *pos;
 
 		size = *(ap->a_ncookies) + ncookies_bk;
 		newcookies = (uint64_t *) malloc(size * sizeof(*newcookies),
 		    M_TEMP, M_WAITOK);
 		pos = newcookies;
 
 		memcpy(pos, cookies_bk, ncookies_bk * sizeof(*newcookies));
 		pos += ncookies_bk;
 		memcpy(pos, *(ap->a_cookies),
 		    *(ap->a_ncookies) * sizeof(*newcookies));
 		free(cookies_bk, M_TEMP);
 		free(*(ap->a_cookies), M_TEMP);
 		*(ap->a_ncookies) = size;
 		*(ap->a_cookies) = newcookies;
 	}
 
 unionfs_readdir_exit:
 	if (error != 0 && ap->a_eofflag != NULL)
 		*(ap->a_eofflag) = 1;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_readdir: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_readlink(struct vop_readlink_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 	int error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_readlink: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 	error = VOP_READLINK(vp, ap->a_uio, ap->a_cred);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_readlink: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_getwritemount(struct vop_getwritemount_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *uvp;
 	struct vnode   *vp, *ovp;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: enter\n");
 
 	error = 0;
 	vp = ap->a_vp;
 	uvp = NULLVP;
 
 	VI_LOCK(vp);
 	unp = VTOUNIONFS(vp);
 	if (unp != NULL)
 		uvp = unp->un_uppervp;
 
 	/*
 	 * If our node has no upper vnode, check the parent directory.
 	 * We may be initiating a write operation that will produce a
 	 * new upper vnode through CoW.
 	 */
 	if (uvp == NULLVP && unp != NULL) {
 		ovp = vp;
 		vp = unp->un_dvp;
 		/*
 		 * Only the root vnode should have an empty parent, but it
 		 * should not have an empty uppervp, so we shouldn't get here.
 		 */
 		VNASSERT(vp != NULL, ovp, ("%s: NULL parent vnode", __func__));
 		VI_UNLOCK(ovp);
 		VI_LOCK(vp);
 		unp = VTOUNIONFS(vp);
 		if (unp != NULL)
 			uvp = unp->un_uppervp;
 		if (uvp == NULLVP)
 			error = EACCES;
 	}
 
 	if (uvp != NULLVP) {
 		vholdnz(uvp);
 		VI_UNLOCK(vp);
 		error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp);
 		vdrop(uvp);
 	} else {
 		VI_UNLOCK(vp);
 		*(ap->a_mpp) = NULL;
 	}
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_inactive(struct vop_inactive_args *ap)
 {
 	ap->a_vp->v_object = NULL;
 	vrecycle(ap->a_vp);
 	return (0);
 }
 
 static int
 unionfs_reclaim(struct vop_reclaim_args *ap)
 {
 	/* UNIONFS_INTERNAL_DEBUG("unionfs_reclaim: enter\n"); */
 
 	unionfs_noderem(ap->a_vp);
 
 	/* UNIONFS_INTERNAL_DEBUG("unionfs_reclaim: leave\n"); */
 
 	return (0);
 }
 
 static int
 unionfs_print(struct vop_print_args *ap)
 {
 	struct unionfs_node *unp;
 	/* struct unionfs_node_status *unsp; */
 
 	unp = VTOUNIONFS(ap->a_vp);
 	/* unionfs_get_node_status(unp, curthread, &unsp); */
 
 	printf("unionfs_vp=%p, uppervp=%p, lowervp=%p\n",
 	    ap->a_vp, unp->un_uppervp, unp->un_lowervp);
 	/*
 	printf("unionfs opencnt: uppervp=%d, lowervp=%d\n",
 	    unsp->uns_upper_opencnt, unsp->uns_lower_opencnt);
 	*/
 
 	if (unp->un_uppervp != NULLVP)
 		vn_printf(unp->un_uppervp, "unionfs: upper ");
 	if (unp->un_lowervp != NULLVP)
 		vn_printf(unp->un_lowervp, "unionfs: lower ");
 
 	return (0);
 }
 
 static int
 unionfs_islocked(struct vop_islocked_args *ap)
 {
 	struct unionfs_node *unp;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	if (unp == NULL)
 		return (vop_stdislocked(ap));
 
 	if (unp->un_uppervp != NULLVP)
 		return (VOP_ISLOCKED(unp->un_uppervp));
 	if (unp->un_lowervp != NULLVP)
 		return (VOP_ISLOCKED(unp->un_lowervp));
 	return (vop_stdislocked(ap));
 }
 
 static int
 unionfs_get_llt_revlock(struct vnode *vp, int flags)
 {
 	int revlock;
 
 	revlock = 0;
 
 	switch (flags & LK_TYPE_MASK) {
 	case LK_SHARED:
 		if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
 			revlock = LK_UPGRADE;
 		else
 			revlock = LK_RELEASE;
 		break;
 	case LK_EXCLUSIVE:
 	case LK_UPGRADE:
 		revlock = LK_RELEASE;
 		break;
 	case LK_DOWNGRADE:
 		revlock = LK_UPGRADE;
 		break;
 	default:
 		break;
 	}
 
 	return (revlock);
 }
 
 /*
  * The state of an acquired lock is adjusted similarly to
  * the time of error generating. 
  * flags: LK_RELEASE or LK_UPGRADE
  */
 static void
 unionfs_revlock(struct vnode *vp, int flags)
 {
 	if (flags & LK_RELEASE)
 		VOP_UNLOCK_FLAGS(vp, flags);
 	else {
 		/* UPGRADE */
 		if (vn_lock(vp, flags) != 0)
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 }
 
 static int
 unionfs_lock(struct vop_lock1_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	int		error;
 	int		flags;
 	int		revlock;
 	int		interlock;
 	int		uhold;
 
 	/*
 	 * TODO: rework the unionfs locking scheme.
 	 * It's not guaranteed to be safe to blindly lock two vnodes on
 	 * different mounts as is done here.  Further, the entanglement
 	 * of locking both vnodes with the various options that can be
 	 * passed to VOP_LOCK() makes this code hard to reason about.
 	 * Instead, consider locking only the upper vnode, or the lower
 	 * vnode is the upper is not present, and taking separate measures
 	 * to lock both vnodes in the few cases when that is needed.
 	 */
 	error = 0;
 	interlock = 1;
 	uhold = 0;
 	flags = ap->a_flags;
 	vp = ap->a_vp;
 
 	if (LK_RELEASE == (flags & LK_TYPE_MASK) || !(flags & LK_TYPE_MASK))
 		return (VOP_UNLOCK_FLAGS(vp, flags | LK_RELEASE));
 
 	if ((flags & LK_INTERLOCK) == 0)
 		VI_LOCK(vp);
 
 	unp = VTOUNIONFS(vp);
 	if (unp == NULL)
 		goto unionfs_lock_null_vnode;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	lvp = unp->un_lowervp;
 	uvp = unp->un_uppervp;
 
 	if ((revlock = unionfs_get_llt_revlock(vp, flags)) == 0)
 		panic("unknown lock type: 0x%x", flags & LK_TYPE_MASK);
 
 	/*
 	 * During unmount, the root vnode lock may be taken recursively,
 	 * because it may share the same v_vnlock field as the vnode covered by
 	 * the unionfs mount.  The covered vnode is locked across VFS_UNMOUNT(),
 	 * and the same lock may be taken recursively here during vflush()
 	 * issued by unionfs_unmount().
 	 */
 	if ((flags & LK_TYPE_MASK) == LK_EXCLUSIVE &&
 	    (vp->v_vflag & VV_ROOT) != 0)
 		flags |= LK_CANRECURSE;
 
 	if (lvp != NULLVP) {
 		if (uvp != NULLVP && flags & LK_UPGRADE) {
 			/*
 			 * Share Lock is once released and a deadlock is
 			 * avoided.
 			 */
 			vholdnz(uvp);
 			uhold = 1;
 			VOP_UNLOCK(uvp);
 			unp = VTOUNIONFS(vp);
 			if (unp == NULL) {
 				/* vnode is released. */
 				VI_UNLOCK(vp);
 				VOP_UNLOCK(lvp);
 				vdrop(uvp);
 				return (EBUSY);
 			}
 		}
 		VI_LOCK_FLAGS(lvp, MTX_DUPOK);
 		flags |= LK_INTERLOCK;
 		vholdl(lvp);
 
 		VI_UNLOCK(vp);
 		ap->a_flags &= ~LK_INTERLOCK;
 
 		error = VOP_LOCK(lvp, flags);
 
 		VI_LOCK(vp);
 		unp = VTOUNIONFS(vp);
 		if (unp == NULL) {
 			/* vnode is released. */
 			VI_UNLOCK(vp);
 			if (error == 0)
 				VOP_UNLOCK(lvp);
 			vdrop(lvp);
 			if (uhold != 0)
 				vdrop(uvp);
 			return (vop_stdlock(ap));
 		}
 	}
 
 	if (error == 0 && uvp != NULLVP) {
 		if (uhold && flags & LK_UPGRADE) {
 			flags &= ~LK_TYPE_MASK;
 			flags |= LK_EXCLUSIVE;
 		}
 		VI_LOCK_FLAGS(uvp, MTX_DUPOK);
 		flags |= LK_INTERLOCK;
 		if (uhold == 0) {
 			vholdl(uvp);
 			uhold = 1;
 		}
 
 		VI_UNLOCK(vp);
 		ap->a_flags &= ~LK_INTERLOCK;
 
 		error = VOP_LOCK(uvp, flags);
 
 		VI_LOCK(vp);
 		unp = VTOUNIONFS(vp);
 		if (unp == NULL) {
 			/* vnode is released. */
 			VI_UNLOCK(vp);
 			if (error == 0)
 				VOP_UNLOCK(uvp);
 			vdrop(uvp);
 			if (lvp != NULLVP) {
 				VOP_UNLOCK(lvp);
 				vdrop(lvp);
 			}
 			return (vop_stdlock(ap));
 		}
 		if (error != 0 && lvp != NULLVP) {
 			/* rollback */
 			VI_UNLOCK(vp);
 			unionfs_revlock(lvp, revlock);
 			interlock = 0;
 		}
 	}
 
 	if (interlock)
 		VI_UNLOCK(vp);
 	if (lvp != NULLVP)
 		vdrop(lvp);
 	if (uhold != 0)
 		vdrop(uvp);
 
 	return (error);
 
 unionfs_lock_null_vnode:
 	ap->a_flags |= LK_INTERLOCK;
 	return (vop_stdlock(ap));
 }
 
 static int
 unionfs_unlock(struct vop_unlock_args *ap)
 {
 	struct vnode   *vp;
 	struct vnode   *lvp;
 	struct vnode   *uvp;
 	struct unionfs_node *unp;
 	int		error;
 	int		uhold;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = 0;
 	uhold = 0;
 	vp = ap->a_vp;
 
 	unp = VTOUNIONFS(vp);
 	if (unp == NULL)
 		goto unionfs_unlock_null_vnode;
 	lvp = unp->un_lowervp;
 	uvp = unp->un_uppervp;
 
 	if (lvp != NULLVP) {
 		vholdnz(lvp);
 		error = VOP_UNLOCK(lvp);
 	}
 
 	if (error == 0 && uvp != NULLVP) {
 		vholdnz(uvp);
 		uhold = 1;
 		error = VOP_UNLOCK(uvp);
 	}
 
 	if (lvp != NULLVP)
 		vdrop(lvp);
 	if (uhold != 0)
 		vdrop(uvp);
 
 	return error;
 
 unionfs_unlock_null_vnode:
 	return (vop_stdunlock(ap));
 }
 
 static int
 unionfs_pathconf(struct vop_pathconf_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 	return (VOP_PATHCONF(vp, ap->a_name, ap->a_retval));
 }
 
 static int
 unionfs_advlock(struct vop_advlock_args *ap)
 {
 	struct unionfs_node *unp;
 	struct unionfs_node_status *unsp;
 	struct vnode   *vp;
 	struct vnode   *uvp;
 	struct thread  *td;
 	int error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_advlock: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	vp = ap->a_vp;
 	td = curthread;
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	uvp = unp->un_uppervp;
 
 	if (uvp == NULLVP) {
 		error = unionfs_copyfile(unp, 1, td->td_ucred, td);
 		if (error != 0)
 			goto unionfs_advlock_abort;
 		uvp = unp->un_uppervp;
 
 		unionfs_get_node_status(unp, td, &unsp);
 		if (unsp->uns_lower_opencnt > 0) {
 			/* try reopen the vnode */
 			error = VOP_OPEN(uvp, unsp->uns_lower_openmode,
 				td->td_ucred, td, NULL);
 			if (error)
 				goto unionfs_advlock_abort;
 			unsp->uns_upper_opencnt++;
 			VOP_CLOSE(unp->un_lowervp, unsp->uns_lower_openmode,
 			    td->td_ucred, td);
 			unsp->uns_lower_opencnt--;
 		} else
 			unionfs_tryrem_node_status(unp, unsp);
 	}
 
 	VOP_UNLOCK(vp);
 
 	error = VOP_ADVLOCK(uvp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_advlock: leave (%d)\n", error);
 
 	return error;
 
 unionfs_advlock_abort:
 	VOP_UNLOCK(vp);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_advlock: leave (%d)\n", error);
 
 	return error;
 }
 
 static int
 unionfs_strategy(struct vop_strategy_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 #ifdef DIAGNOSTIC
 	if (vp == NULLVP)
 		panic("unionfs_strategy: nullvp");
 
 	if (ap->a_bp->b_iocmd == BIO_WRITE && vp == unp->un_lowervp)
 		panic("unionfs_strategy: writing to lowervp");
 #endif
 
 	return (VOP_STRATEGY(vp, ap->a_bp));
 }
 
 static int
 unionfs_getacl(struct vop_getacl_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 	int		error;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_getacl: enter\n");
 
 	error = VOP_GETACL(vp, ap->a_type, ap->a_aclp, ap->a_cred, ap->a_td);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_getacl: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_setacl(struct vop_setacl_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct thread  *td;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_setacl: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = EROFS;
 	unp = VTOUNIONFS(ap->a_vp);
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 	td = ap->a_td;
 
 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	if (uvp == NULLVP && lvp->v_type == VREG) {
 		if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
 			return (error);
 		uvp = unp->un_uppervp;
 	}
 
 	if (uvp != NULLVP)
 		error = VOP_SETACL(uvp, ap->a_type, ap->a_aclp, ap->a_cred, td);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_setacl: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_aclcheck(struct vop_aclcheck_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_aclcheck: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 	error = VOP_ACLCHECK(vp, ap->a_type, ap->a_aclp, ap->a_cred, ap->a_td);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_aclcheck: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_openextattr(struct vop_openextattr_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 	struct vnode   *tvp;
 	int		error;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	vp = ap->a_vp;
 	unp = VTOUNIONFS(vp);
 	tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
 
 	if ((tvp == unp->un_uppervp && (unp->un_flag & UNIONFS_OPENEXTU)) ||
 	    (tvp == unp->un_lowervp && (unp->un_flag & UNIONFS_OPENEXTL)))
 		return (EBUSY);
 
 	error = VOP_OPENEXTATTR(tvp, ap->a_cred, ap->a_td);
 
 	if (error == 0) {
 		if (vn_lock(vp, LK_UPGRADE) != 0)
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (!VN_IS_DOOMED(vp)) {
 			if (tvp == unp->un_uppervp)
 				unp->un_flag |= UNIONFS_OPENEXTU;
 			else
 				unp->un_flag |= UNIONFS_OPENEXTL;
 		}
 		vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
 	}
 
 	return (error);
 }
 
 static int
 unionfs_closeextattr(struct vop_closeextattr_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 	struct vnode   *tvp;
 	int		error;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	vp = ap->a_vp;
 	unp = VTOUNIONFS(vp);
 	tvp = NULLVP;
 
 	if (unp->un_flag & UNIONFS_OPENEXTU)
 		tvp = unp->un_uppervp;
 	else if (unp->un_flag & UNIONFS_OPENEXTL)
 		tvp = unp->un_lowervp;
 
 	if (tvp == NULLVP)
 		return (EOPNOTSUPP);
 
 	error = VOP_CLOSEEXTATTR(tvp, ap->a_commit, ap->a_cred, ap->a_td);
 
 	if (error == 0) {
 		if (vn_lock(vp, LK_UPGRADE) != 0)
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (!VN_IS_DOOMED(vp)) {
 			if (tvp == unp->un_uppervp)
 				unp->un_flag &= ~UNIONFS_OPENEXTU;
 			else
 				unp->un_flag &= ~UNIONFS_OPENEXTL;
 		}
 		vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
 	}
 
 	return (error);
 }
 
 static int
 unionfs_getextattr(struct vop_getextattr_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *vp;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	vp = NULLVP;
 
 	if (unp->un_flag & UNIONFS_OPENEXTU)
 		vp = unp->un_uppervp;
 	else if (unp->un_flag & UNIONFS_OPENEXTL)
 		vp = unp->un_lowervp;
 
 	if (vp == NULLVP)
 		return (EOPNOTSUPP);
 
 	return (VOP_GETEXTATTR(vp, ap->a_attrnamespace, ap->a_name,
 	    ap->a_uio, ap->a_size, ap->a_cred, ap->a_td));
 }
 
 static int
 unionfs_setextattr(struct vop_setextattr_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct vnode   *ovp;
 	struct ucred   *cred;
 	struct thread  *td;
 	int		error;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = EROFS;
 	unp = VTOUNIONFS(ap->a_vp);
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 	ovp = NULLVP;
 	cred = ap->a_cred;
 	td = ap->a_td;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: enter (un_flag=%x)\n",
 	    unp->un_flag);
 
 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	if (unp->un_flag & UNIONFS_OPENEXTU)
 		ovp = unp->un_uppervp;
 	else if (unp->un_flag & UNIONFS_OPENEXTL)
 		ovp = unp->un_lowervp;
 
 	if (ovp == NULLVP)
 		return (EOPNOTSUPP);
 
 	if (ovp == lvp && lvp->v_type == VREG) {
 		VOP_CLOSEEXTATTR(lvp, 0, cred, td);
 		if (uvp == NULLVP &&
 		    (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
 unionfs_setextattr_reopen:
 			if ((unp->un_flag & UNIONFS_OPENEXTL) &&
 			    VOP_OPENEXTATTR(lvp, cred, td)) {
 #ifdef DIAGNOSTIC
 				panic("unionfs: VOP_OPENEXTATTR failed");
 #endif
 				unp->un_flag &= ~UNIONFS_OPENEXTL;
 			}
 			goto unionfs_setextattr_abort;
 		}
 		uvp = unp->un_uppervp;
 		if ((error = VOP_OPENEXTATTR(uvp, cred, td)) != 0)
 			goto unionfs_setextattr_reopen;
 		unp->un_flag &= ~UNIONFS_OPENEXTL;
 		unp->un_flag |= UNIONFS_OPENEXTU;
 		ovp = uvp;
 	}
 
 	if (ovp == uvp)
 		error = VOP_SETEXTATTR(ovp, ap->a_attrnamespace, ap->a_name,
 		    ap->a_uio, cred, td);
 
 unionfs_setextattr_abort:
 	UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_listextattr(struct vop_listextattr_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode *vp;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	unp = VTOUNIONFS(ap->a_vp);
 	vp = NULLVP;
 
 	if (unp->un_flag & UNIONFS_OPENEXTU)
 		vp = unp->un_uppervp;
 	else if (unp->un_flag & UNIONFS_OPENEXTL)
 		vp = unp->un_lowervp;
 
 	if (vp == NULLVP)
 		return (EOPNOTSUPP);
 
 	return (VOP_LISTEXTATTR(vp, ap->a_attrnamespace, ap->a_uio,
 	    ap->a_size, ap->a_cred, ap->a_td));
 }
 
 static int
 unionfs_deleteextattr(struct vop_deleteextattr_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct vnode   *ovp;
 	struct ucred   *cred;
 	struct thread  *td;
 	int		error;
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = EROFS;
 	unp = VTOUNIONFS(ap->a_vp);
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 	ovp = NULLVP;
 	cred = ap->a_cred;
 	td = ap->a_td;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_deleteextattr: enter (un_flag=%x)\n",
 	    unp->un_flag);
 
 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	if (unp->un_flag & UNIONFS_OPENEXTU)
 		ovp = unp->un_uppervp;
 	else if (unp->un_flag & UNIONFS_OPENEXTL)
 		ovp = unp->un_lowervp;
 
 	if (ovp == NULLVP)
 		return (EOPNOTSUPP);
 
 	if (ovp == lvp && lvp->v_type == VREG) {
 		VOP_CLOSEEXTATTR(lvp, 0, cred, td);
 		if (uvp == NULLVP &&
 		    (error = unionfs_copyfile(unp, 1, cred, td)) != 0) {
 unionfs_deleteextattr_reopen:
 			if ((unp->un_flag & UNIONFS_OPENEXTL) &&
 			    VOP_OPENEXTATTR(lvp, cred, td)) {
 #ifdef DIAGNOSTIC
 				panic("unionfs: VOP_OPENEXTATTR failed");
 #endif
 				unp->un_flag &= ~UNIONFS_OPENEXTL;
 			}
 			goto unionfs_deleteextattr_abort;
 		}
 		uvp = unp->un_uppervp;
 		if ((error = VOP_OPENEXTATTR(uvp, cred, td)) != 0)
 			goto unionfs_deleteextattr_reopen;
 		unp->un_flag &= ~UNIONFS_OPENEXTL;
 		unp->un_flag |= UNIONFS_OPENEXTU;
 		ovp = uvp;
 	}
 
 	if (ovp == uvp)
 		error = VOP_DELETEEXTATTR(ovp, ap->a_attrnamespace, ap->a_name,
 		    ap->a_cred, ap->a_td);
 
 unionfs_deleteextattr_abort:
 	UNIONFS_INTERNAL_DEBUG("unionfs_deleteextattr: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_setlabel(struct vop_setlabel_args *ap)
 {
 	struct unionfs_node *unp;
 	struct vnode   *uvp;
 	struct vnode   *lvp;
 	struct thread  *td;
 	int		error;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_setlabel: enter\n");
 
 	KASSERT_UNIONFS_VNODE(ap->a_vp);
 
 	error = EROFS;
 	unp = VTOUNIONFS(ap->a_vp);
 	uvp = unp->un_uppervp;
 	lvp = unp->un_lowervp;
 	td = ap->a_td;
 
 	if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	if (uvp == NULLVP && lvp->v_type == VREG) {
 		if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0)
 			return (error);
 		uvp = unp->un_uppervp;
 	}
 
 	if (uvp != NULLVP)
 		error = VOP_SETLABEL(uvp, ap->a_label, ap->a_cred, td);
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_setlabel: leave (%d)\n", error);
 
 	return (error);
 }
 
 static int
 unionfs_vptofh(struct vop_vptofh_args *ap)
 {
 	return (EOPNOTSUPP);
 }
 
 static int
 unionfs_add_writecount(struct vop_add_writecount_args *ap)
 {
 	struct vnode *tvp, *vp;
 	struct unionfs_node *unp;
 	int error, writerefs __diagused;
 
 	vp = ap->a_vp;
 	unp = VTOUNIONFS(vp);
 	tvp = unp->un_uppervp;
 	KASSERT(tvp != NULL,
 	    ("%s: adding write ref without upper vnode", __func__));
 	error = VOP_ADD_WRITECOUNT(tvp, ap->a_inc);
 	if (error != 0)
 		return (error);
 	/*
 	 * We need to track the write refs we've passed to the underlying
 	 * vnodes so that we can undo them in case we are forcibly unmounted.
 	 */
 	writerefs = atomic_fetchadd_int(&vp->v_writecount, ap->a_inc);
 	/* text refs are bypassed to lowervp */
 	VNASSERT(writerefs >= 0, vp,
 	    ("%s: invalid write count %d", __func__, writerefs));
 	VNASSERT(writerefs + ap->a_inc >= 0, vp,
 	    ("%s: invalid write count inc %d + %d", __func__,
 	    writerefs, ap->a_inc));
 	return (0);
 }
 
 static int
 unionfs_vput_pair(struct vop_vput_pair_args *ap)
 {
 	struct mount *mp;
 	struct vnode *dvp, *vp, **vpp, *lvp, *ldvp, *uvp, *udvp, *tempvp;
 	struct unionfs_node *dunp, *unp;
 	int error, res;
 
 	dvp = ap->a_dvp;
 	vpp = ap->a_vpp;
 	vp = NULLVP;
 	lvp = NULLVP;
 	uvp = NULLVP;
 	unp = NULL;
 
 	dunp = VTOUNIONFS(dvp);
 	udvp = dunp->un_uppervp;
 	ldvp = dunp->un_lowervp;
 
 	/*
 	 * Underlying vnodes should be locked because the encompassing unionfs
 	 * node is locked, but will not be referenced, as the reference will
 	 * only be on the unionfs node.  Reference them now so that the vput()s
 	 * performed by VOP_VPUT_PAIR() will have a reference to drop.
 	 */
 	if (udvp != NULLVP)
 		vref(udvp);
 	if (ldvp != NULLVP)
 		vref(ldvp);
 
 	if (vpp != NULL)
 		vp = *vpp;
 
 	if (vp != NULLVP) {
 		unp = VTOUNIONFS(vp);
 		uvp = unp->un_uppervp;
 		lvp = unp->un_lowervp;
 		if (uvp != NULLVP)
 			vref(uvp);
 		if (lvp != NULLVP)
 			vref(lvp);
 
 		/*
 		 * If we're being asked to return a locked child vnode, then
 		 * we may need to create a replacement vnode in case the
 		 * original is reclaimed while the lock is dropped.  In that
 		 * case we'll need to ensure the mount and the underlying
 		 * vnodes aren't also recycled during that window.
 		 */
 		if (!ap->a_unlock_vp) {
 			vhold(vp);
 			if (uvp != NULLVP)
 				vhold(uvp);
 			if (lvp != NULLVP)
 				vhold(lvp);
 			mp = vp->v_mount;
 			vfs_ref(mp);
 		}
 	}
 
 	/*
 	 * TODO: Because unionfs_lock() locks both the lower and upper vnodes
 	 * (if available), we must also call VOP_VPUT_PAIR() on both the lower
 	 * and upper parent/child pairs.  If unionfs_lock() is reworked to lock
 	 * only a single vnode, this code will need to change to also only
 	 * operate on one vnode pair.
 	 */
 	ASSERT_VOP_LOCKED(ldvp, __func__);
 	ASSERT_VOP_LOCKED(udvp, __func__);
 	ASSERT_VOP_LOCKED(lvp, __func__);
 	ASSERT_VOP_LOCKED(uvp, __func__);
 
 	KASSERT(lvp == NULLVP || ldvp != NULLVP,
 	    ("%s: NULL ldvp with non-NULL lvp", __func__));
 	if (ldvp != NULLVP)
 		res = VOP_VPUT_PAIR(ldvp, lvp != NULLVP ? &lvp : NULL, true);
 	KASSERT(uvp == NULLVP || udvp != NULLVP,
 	    ("%s: NULL udvp with non-NULL uvp", __func__));
 	if (udvp != NULLVP)
 		res = VOP_VPUT_PAIR(udvp, uvp != NULLVP ? &uvp : NULL, true);
 
 	ASSERT_VOP_UNLOCKED(ldvp, __func__);
 	ASSERT_VOP_UNLOCKED(udvp, __func__);
 	ASSERT_VOP_UNLOCKED(lvp, __func__);
 	ASSERT_VOP_UNLOCKED(uvp, __func__);
 
 	/*
 	 * VOP_VPUT_PAIR() dropped the references we added to the underlying
 	 * vnodes, now drop the caller's reference to the unionfs vnodes.
 	 */
 	if (vp != NULLVP && ap->a_unlock_vp)
 		vrele(vp);
 	vrele(dvp);
 
 	if (vp == NULLVP || ap->a_unlock_vp)
 		return (res);
 
 	/*
 	 * We're being asked to return a locked vnode.  At this point, the
 	 * underlying vnodes have been unlocked, so vp may have been reclaimed.
 	 */
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (vp->v_data == NULL && vfs_busy(mp, MBF_NOWAIT) == 0) {
 		vput(vp);
 		error = unionfs_nodeget(mp, uvp, lvp, dvp, &tempvp, NULL);
 		if (error == 0) {
 			vn_lock(tempvp, LK_EXCLUSIVE | LK_RETRY);
 			*vpp = tempvp;
 		} else
 			vget(vp, LK_EXCLUSIVE | LK_RETRY);
 		vfs_unbusy(mp);
 	}
 	if (lvp != NULLVP)
 		vdrop(lvp);
 	if (uvp != NULLVP)
 		vdrop(uvp);
 	vdrop(vp);
 	vfs_rel(mp);
 
 	return (res);
 }
 
 static int
 unionfs_set_text(struct vop_set_text_args *ap)
 {
 	struct vnode *tvp;
 	struct unionfs_node *unp;
 	int error;
 
 	/*
 	 * We assume text refs are managed against lvp/uvp through the
 	 * executable mapping backed by its VM object.  We therefore don't
 	 * need to track leased text refs in the case of a forcible unmount.
 	 */
 	unp = VTOUNIONFS(ap->a_vp);
 	ASSERT_VOP_LOCKED(ap->a_vp, __func__);
 	tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp;
 	error = VOP_SET_TEXT(tvp);
 	return (error);
 }
 
 static int
 unionfs_unset_text(struct vop_unset_text_args *ap)
 {
 	struct vnode *tvp;
 	struct unionfs_node *unp;
 
 	ASSERT_VOP_LOCKED(ap->a_vp, __func__);
 	unp = VTOUNIONFS(ap->a_vp);
 	tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp;
 	VOP_UNSET_TEXT_CHECKED(tvp);
 	return (0);
 }
 
 struct vop_vector unionfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		unionfs_access,
 	.vop_aclcheck =		unionfs_aclcheck,
 	.vop_advlock =		unionfs_advlock,
 	.vop_bmap =		VOP_EOPNOTSUPP,
 	.vop_cachedlookup =	unionfs_lookup,
 	.vop_close =		unionfs_close,
 	.vop_closeextattr =	unionfs_closeextattr,
 	.vop_create =		unionfs_create,
 	.vop_deleteextattr =	unionfs_deleteextattr,
 	.vop_fsync =		unionfs_fsync,
 	.vop_getacl =		unionfs_getacl,
 	.vop_getattr =		unionfs_getattr,
 	.vop_getextattr =	unionfs_getextattr,
 	.vop_getwritemount =	unionfs_getwritemount,
 	.vop_inactive =		unionfs_inactive,
 	.vop_need_inactive =	vop_stdneed_inactive,
 	.vop_islocked =		unionfs_islocked,
 	.vop_ioctl =		unionfs_ioctl,
 	.vop_link =		unionfs_link,
 	.vop_listextattr =	unionfs_listextattr,
 	.vop_lock1 =		unionfs_lock,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_mkdir =		unionfs_mkdir,
 	.vop_mknod =		unionfs_mknod,
 	.vop_open =		unionfs_open,
 	.vop_openextattr =	unionfs_openextattr,
 	.vop_pathconf =		unionfs_pathconf,
 	.vop_poll =		unionfs_poll,
 	.vop_print =		unionfs_print,
 	.vop_read =		unionfs_read,
 	.vop_readdir =		unionfs_readdir,
 	.vop_readlink =		unionfs_readlink,
 	.vop_reclaim =		unionfs_reclaim,
 	.vop_remove =		unionfs_remove,
 	.vop_rename =		unionfs_rename,
 	.vop_rmdir =		unionfs_rmdir,
 	.vop_setacl =		unionfs_setacl,
 	.vop_setattr =		unionfs_setattr,
 	.vop_setextattr =	unionfs_setextattr,
 	.vop_setlabel =		unionfs_setlabel,
 	.vop_strategy =		unionfs_strategy,
 	.vop_symlink =		unionfs_symlink,
 	.vop_unlock =		unionfs_unlock,
 	.vop_whiteout =		unionfs_whiteout,
 	.vop_write =		unionfs_write,
 	.vop_vptofh =		unionfs_vptofh,
 	.vop_add_writecount =	unionfs_add_writecount,
 	.vop_vput_pair =	unionfs_vput_pair,
 	.vop_set_text =		unionfs_set_text,
 	.vop_unset_text = 	unionfs_unset_text,
 };
 VFS_VOP_VECTOR_REGISTER(unionfs_vnodeops);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 364c10987ea6..cb45d18fbb85 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,2086 +1,2086 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1993, David Greenman
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_hwpmc_hooks.h"
 #include "opt_ktrace.h"
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/acct.h>
 #include <sys/asan.h>
 #include <sys/capsicum.h>
 #include <sys/compressor.h>
 #include <sys/eventhandler.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/imgact.h>
 #include <sys/imgact_elf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/reg.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sdt.h>
 #include <sys/sf_buf.h>
 #include <sys/shm.h>
 #include <sys/signalvar.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/timers.h>
 #include <sys/umtxvar.h>
 #include <sys/vnode.h>
 #include <sys/wait.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/pmap.h>
 #include <vm/vm_page.h>
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 
 #ifdef	HWPMC_HOOKS
 #include <sys/pmckern.h>
 #endif
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 dtrace_execexit_func_t	dtrace_fasttrap_exec;
 #endif
 
 SDT_PROVIDER_DECLARE(proc);
 SDT_PROBE_DEFINE1(proc, , , exec, "char *");
 SDT_PROBE_DEFINE1(proc, , , exec__failure, "int");
 SDT_PROBE_DEFINE1(proc, , , exec__success, "char *");
 
 MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
 
 int coredump_pack_fileinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
     &coredump_pack_fileinfo, 0,
     "Enable file path packing in 'procstat -f' coredump notes");
 
 int coredump_pack_vmmapinfo = 1;
 SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
     &coredump_pack_vmmapinfo, 0,
     "Enable file path packing in 'procstat -v' coredump notes");
 
 static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
 static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);
 static int do_execve(struct thread *td, struct image_args *args,
     struct mac *mac_p, struct vmspace *oldvmspace);
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_PS_STRINGS, ps_strings, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_ps_strings, "LU",
     "Location of process' ps_strings structure");
 
 /* XXX This should be vm_size_t. */
 SYSCTL_PROC(_kern, KERN_USRSTACK, usrstack, CTLTYPE_ULONG|CTLFLAG_RD|
     CTLFLAG_CAPRD|CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_usrstack, "LU",
     "Top of process stack");
 
 SYSCTL_PROC(_kern, OID_AUTO, stackprot, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_MPSAFE,
     NULL, 0, sysctl_kern_stackprot, "I",
     "Stack memory permissions");
 
 u_long ps_arg_cache_limit = PAGE_SIZE / 16;
 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 
     &ps_arg_cache_limit, 0,
     "Process' command line characters cache limit");
 
 static int disallow_high_osrel;
 SYSCTL_INT(_kern, OID_AUTO, disallow_high_osrel, CTLFLAG_RW,
     &disallow_high_osrel, 0,
     "Disallow execution of binaries built for higher version of the world");
 
 static int map_at_zero = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RWTUN, &map_at_zero, 0,
     "Permit processes to map an object at virtual address 0.");
 
 static int core_dump_can_intr = 1;
 SYSCTL_INT(_kern, OID_AUTO, core_dump_can_intr, CTLFLAG_RWTUN,
     &core_dump_can_intr, 0,
     "Core dumping interruptible with SIGKILL");
 
 static int
 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	vm_offset_t ps_strings;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val;
 		val = (unsigned int)PROC_PS_STRINGS(p);
 		return (SYSCTL_OUT(req, &val, sizeof(val)));
 	}
 #endif
 	ps_strings = PROC_PS_STRINGS(p);
 	return (SYSCTL_OUT(req, &ps_strings, sizeof(ps_strings)));
 }
 
 static int
 sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 	vm_offset_t val;
 
 	p = curproc;
 #ifdef SCTL_MASK32
 	if (req->flags & SCTL_MASK32) {
 		unsigned int val32;
 
 		val32 = round_page((unsigned int)p->p_vmspace->vm_stacktop);
 		return (SYSCTL_OUT(req, &val32, sizeof(val32)));
 	}
 #endif
 	val = round_page(p->p_vmspace->vm_stacktop);
 	return (SYSCTL_OUT(req, &val, sizeof(val)));
 }
 
 static int
 sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p;
 
 	p = curproc;
 	return (SYSCTL_OUT(req, &p->p_sysent->sv_stackprot,
 	    sizeof(p->p_sysent->sv_stackprot)));
 }
 
 /*
  * Each of the items is a pointer to a `const struct execsw', hence the
  * double pointer here.
  */
 static const struct execsw **execsw;
 
 #ifndef _SYS_SYSPROTO_H_
 struct execve_args {
 	char    *fname;
 	char    **argv;
 	char    **envv;
 };
 #endif
 
 int
 sys_execve(struct thread *td, struct execve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, NULL, oldvmspace);
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fexecve_args {
 	int	fd;
 	char	**argv;
 	char	**envv;
 };
 #endif
 int
 sys_fexecve(struct thread *td, struct fexecve_args *uap)
 {
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, NULL, UIO_SYSSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0) {
 		args.fd = uap->fd;
 		error = kern_execve(td, &args, NULL, oldvmspace);
 	}
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct __mac_execve_args {
 	char	*fname;
 	char	**argv;
 	char	**envv;
 	struct mac	*mac_p;
 };
 #endif
 
 int
 sys___mac_execve(struct thread *td, struct __mac_execve_args *uap)
 {
 #ifdef MAC
 	struct image_args args;
 	struct vmspace *oldvmspace;
 	int error;
 
 	error = pre_execve(td, &oldvmspace);
 	if (error != 0)
 		return (error);
 	error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE,
 	    uap->argv, uap->envv);
 	if (error == 0)
 		error = kern_execve(td, &args, uap->mac_p, oldvmspace);
 	post_execve(td, error, oldvmspace);
 	AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td);
 	return (error);
 #else
 	return (ENOSYS);
 #endif
 }
 
 int
 pre_execve(struct thread *td, struct vmspace **oldvmspace)
 {
 	struct proc *p;
 	int error;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	error = 0;
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		if (thread_single(p, SINGLE_BOUNDARY) != 0)
 			error = ERESTART;
 		PROC_UNLOCK(p);
 	}
 	KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0,
 	    ("nested execve"));
 	*oldvmspace = p->p_vmspace;
 	return (error);
 }
 
 void
 post_execve(struct thread *td, int error, struct vmspace *oldvmspace)
 {
 	struct proc *p;
 
 	KASSERT(td == curthread, ("non-current thread %p", td));
 	p = td->td_proc;
 	if ((p->p_flag & P_HADTHREADS) != 0) {
 		PROC_LOCK(p);
 		/*
 		 * If success, we upgrade to SINGLE_EXIT state to
 		 * force other threads to suicide.
 		 */
 		if (error == EJUSTRETURN)
 			thread_single(p, SINGLE_EXIT);
 		else
 			thread_single_end(p, SINGLE_BOUNDARY);
 		PROC_UNLOCK(p);
 	}
 	exec_cleanup(td, oldvmspace);
 }
 
 /*
  * kern_execve() has the astonishing property of not always returning to
  * the caller.  If sufficiently bad things happen during the call to
  * do_execve(), it can end up calling exit1(); as a result, callers must
  * avoid doing anything which they might need to undo (e.g., allocating
  * memory).
  */
 int
 kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
     struct vmspace *oldvmspace)
 {
 
 	TSEXEC(td->td_proc->p_pid, args->begin_argv);
 	AUDIT_ARG_ARGV(args->begin_argv, args->argc,
 	    exec_args_get_begin_envv(args) - args->begin_argv);
 	AUDIT_ARG_ENVV(exec_args_get_begin_envv(args), args->envc,
 	    args->endp - exec_args_get_begin_envv(args));
 
 	/* Must have at least one argument. */
 	if (args->argc == 0) {
 		exec_free_args(args);
 		return (EINVAL);
 	}
 	return (do_execve(td, args, mac_p, oldvmspace));
 }
 
 static void
 execve_nosetid(struct image_params *imgp)
 {
 	imgp->credential_setid = false;
 	if (imgp->newcred != NULL) {
 		crfree(imgp->newcred);
 		imgp->newcred = NULL;
 	}
 }
 
 /*
  * In-kernel implementation of execve().  All arguments are assumed to be
  * userspace pointers from the passed thread.
  */
 static int
 do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
     struct vmspace *oldvmspace)
 {
 	struct proc *p = td->td_proc;
 	struct nameidata nd;
 	struct ucred *oldcred;
 	struct uidinfo *euip = NULL;
 	uintptr_t stack_base;
 	struct image_params image_params, *imgp;
 	struct vattr attr;
 	int (*img_first)(struct image_params *);
 	struct pargs *oldargs = NULL, *newargs = NULL;
 	struct sigacts *oldsigacts = NULL, *newsigacts = NULL;
 #ifdef KTRACE
 	struct ktr_io_params *kiop;
 #endif
 	struct vnode *oldtextvp, *newtextvp;
 	struct vnode *oldtextdvp, *newtextdvp;
 	char *oldbinname, *newbinname;
 	bool credential_changing;
 #ifdef MAC
 	struct label *interpvplabel = NULL;
 	bool will_transition;
 #endif
 #ifdef HWPMC_HOOKS
 	struct pmckern_procexec pe;
 #endif
 	int error, i, orig_osrel;
 	uint32_t orig_fctl0;
 	Elf_Brandinfo *orig_brandinfo;
 	size_t freepath_size;
 	static const char fexecv_proc_title[] = "(fexecv)";
 
 	imgp = &image_params;
 	oldtextvp = oldtextdvp = NULL;
 	newtextvp = newtextdvp = NULL;
 	newbinname = oldbinname = NULL;
 #ifdef KTRACE
 	kiop = NULL;
 #endif
 
 	/*
 	 * Lock the process and set the P_INEXEC flag to indicate that
 	 * it should be left alone until we're done here.  This is
 	 * necessary to avoid race conditions - e.g. in ptrace() -
 	 * that might allow a local user to illicitly obtain elevated
 	 * privileges.
 	 */
 	PROC_LOCK(p);
 	KASSERT((p->p_flag & P_INEXEC) == 0,
 	    ("%s(): process already has P_INEXEC flag", __func__));
 	p->p_flag |= P_INEXEC;
 	PROC_UNLOCK(p);
 
 	/*
 	 * Initialize part of the common data
 	 */
 	bzero(imgp, sizeof(*imgp));
 	imgp->proc = p;
 	imgp->attr = &attr;
 	imgp->args = args;
 	oldcred = p->p_ucred;
 	orig_osrel = p->p_osrel;
 	orig_fctl0 = p->p_fctl0;
 	orig_brandinfo = p->p_elf_brandinfo;
 
 #ifdef MAC
 	error = mac_execve_enter(imgp, mac_p);
 	if (error)
 		goto exec_fail;
 #endif
 
 	SDT_PROBE1(proc, , , exec, args->fname);
 
 interpret:
 	if (args->fname != NULL) {
 #ifdef CAPABILITY_MODE
 		/*
 		 * While capability mode can't reach this point via direct
 		 * path arguments to execve(), we also don't allow
 		 * interpreters to be used in capability mode (for now).
 		 * Catch indirect lookups and return a permissions error.
 		 */
 		if (IN_CAPABILITY_MODE(td)) {
 			error = ECAPMODE;
 			goto exec_fail;
 		}
 #endif
 
 		/*
 		 * Translate the file name. namei() returns a vnode
 		 * pointer in ni_vp among other things.
 		 */
 		NDINIT(&nd, LOOKUP, ISOPEN | LOCKLEAF | LOCKSHARED | FOLLOW |
-		    SAVENAME | AUDITVNODE1 | WANTPARENT, UIO_SYSSPACE,
+		    AUDITVNODE1 | WANTPARENT, UIO_SYSSPACE,
 		    args->fname);
 
 		error = namei(&nd);
 		if (error)
 			goto exec_fail;
 
 		newtextvp = nd.ni_vp;
 		newtextdvp = nd.ni_dvp;
 		nd.ni_dvp = NULL;
 		newbinname = malloc(nd.ni_cnd.cn_namelen + 1, M_PARGS,
 		    M_WAITOK);
 		memcpy(newbinname, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen);
 		newbinname[nd.ni_cnd.cn_namelen] = '\0';
 		imgp->vp = newtextvp;
 
 		/*
 		 * Do the best to calculate the full path to the image file.
 		 */
 		if (args->fname[0] == '/') {
 			imgp->execpath = args->fname;
 		} else {
 			VOP_UNLOCK(imgp->vp);
 			freepath_size = MAXPATHLEN;
 			if (vn_fullpath_hardlink(newtextvp, newtextdvp,
 			    newbinname, nd.ni_cnd.cn_namelen, &imgp->execpath,
 			    &imgp->freepath, &freepath_size) != 0)
 				imgp->execpath = args->fname;
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		}
 	} else {
 		AUDIT_ARG_FD(args->fd);
 
 		/*
 		 * If the descriptors was not opened with O_PATH, then
 		 * we require that it was opened with O_EXEC or
 		 * O_RDONLY.  In either case, exec_check_permissions()
 		 * below checks _current_ file access mode regardless
 		 * of the permissions additionally checked at the
 		 * open(2).
 		 */
 		error = fgetvp_exec(td, args->fd, &cap_fexecve_rights,
 		    &newtextvp);
 		if (error != 0)
 			goto exec_fail;
 
 		if (vn_fullpath(newtextvp, &imgp->execpath,
 		    &imgp->freepath) != 0)
 			imgp->execpath = args->fname;
 		vn_lock(newtextvp, LK_SHARED | LK_RETRY);
 		AUDIT_ARG_VNODE1(newtextvp);
 		imgp->vp = newtextvp;
 	}
 
 	/*
 	 * Check file permissions.  Also 'opens' file and sets its vnode to
 	 * text mode.
 	 */
 	error = exec_check_permissions(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->object = imgp->vp->v_object;
 	if (imgp->object != NULL)
 		vm_object_reference(imgp->object);
 
 	error = exec_map_first_page(imgp);
 	if (error)
 		goto exec_fail_dealloc;
 
 	imgp->proc->p_osrel = 0;
 	imgp->proc->p_fctl0 = 0;
 	imgp->proc->p_elf_brandinfo = NULL;
 
 	/*
 	 * Implement image setuid/setgid.
 	 *
 	 * Determine new credentials before attempting image activators
 	 * so that it can be used by process_exec handlers to determine
 	 * credential/setid changes.
 	 *
 	 * Don't honor setuid/setgid if the filesystem prohibits it or if
 	 * the process is being traced.
 	 *
 	 * We disable setuid/setgid/etc in capability mode on the basis
 	 * that most setugid applications are not written with that
 	 * environment in mind, and will therefore almost certainly operate
 	 * incorrectly. In principle there's no reason that setugid
 	 * applications might not be useful in capability mode, so we may want
 	 * to reconsider this conservative design choice in the future.
 	 *
 	 * XXXMAC: For the time being, use NOSUID to also prohibit
 	 * transitions on the file system.
 	 */
 	credential_changing = false;
 	credential_changing |= (attr.va_mode & S_ISUID) &&
 	    oldcred->cr_uid != attr.va_uid;
 	credential_changing |= (attr.va_mode & S_ISGID) &&
 	    oldcred->cr_gid != attr.va_gid;
 #ifdef MAC
 	will_transition = mac_vnode_execve_will_transition(oldcred, imgp->vp,
 	    interpvplabel, imgp) != 0;
 	credential_changing |= will_transition;
 #endif
 
 	/* Don't inherit PROC_PDEATHSIG_CTL value if setuid/setgid. */
 	if (credential_changing)
 		imgp->proc->p_pdeathsig = 0;
 
 	if (credential_changing &&
 #ifdef CAPABILITY_MODE
 	    ((oldcred->cr_flags & CRED_FLAG_CAPMODE) == 0) &&
 #endif
 	    (imgp->vp->v_mount->mnt_flag & MNT_NOSUID) == 0 &&
 	    (p->p_flag & P_TRACED) == 0) {
 		imgp->credential_setid = true;
 		VOP_UNLOCK(imgp->vp);
 		imgp->newcred = crdup(oldcred);
 		if (attr.va_mode & S_ISUID) {
 			euip = uifind(attr.va_uid);
 			change_euid(imgp->newcred, euip);
 		}
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (attr.va_mode & S_ISGID)
 			change_egid(imgp->newcred, attr.va_gid);
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXXMAC: Note that the current logic will save the
 		 * uid and gid if a MAC domain transition occurs, even
 		 * though maybe it shouldn't.
 		 */
 		change_svuid(imgp->newcred, imgp->newcred->cr_uid);
 		change_svgid(imgp->newcred, imgp->newcred->cr_gid);
 	} else {
 		/*
 		 * Implement correct POSIX saved-id behavior.
 		 *
 		 * XXX: It's not clear that the existing behavior is
 		 * POSIX-compliant.  A number of sources indicate that the
 		 * saved uid/gid should only be updated if the new ruid is
 		 * not equal to the old ruid, or the new euid is not equal
 		 * to the old euid and the new euid is not equal to the old
 		 * ruid.  The FreeBSD code always updates the saved uid/gid.
 		 * Also, this code uses the new (replaced) euid and egid as
 		 * the source, which may or may not be the right ones to use.
 		 */
 		if (oldcred->cr_svuid != oldcred->cr_uid ||
 		    oldcred->cr_svgid != oldcred->cr_gid) {
 			VOP_UNLOCK(imgp->vp);
 			imgp->newcred = crdup(oldcred);
 			vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 			change_svuid(imgp->newcred, imgp->newcred->cr_uid);
 			change_svgid(imgp->newcred, imgp->newcred->cr_gid);
 		}
 	}
 	/* The new credentials are installed into the process later. */
 
 	/*
 	 *	If the current process has a special image activator it
 	 *	wants to try first, call it.   For example, emulating shell
 	 *	scripts differently.
 	 */
 	error = -1;
 	if ((img_first = imgp->proc->p_sysent->sv_imgact_try) != NULL)
 		error = img_first(imgp);
 
 	/*
 	 *	Loop through the list of image activators, calling each one.
 	 *	An activator returns -1 if there is no match, 0 on success,
 	 *	and an error otherwise.
 	 */
 	for (i = 0; error == -1 && execsw[i]; ++i) {
 		if (execsw[i]->ex_imgact == NULL ||
 		    execsw[i]->ex_imgact == img_first) {
 			continue;
 		}
 		error = (*execsw[i]->ex_imgact)(imgp);
 	}
 
 	if (error) {
 		if (error == -1)
 			error = ENOEXEC;
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Special interpreter operation, cleanup and loop up to try to
 	 * activate the interpreter.
 	 */
 	if (imgp->interpreted) {
 		exec_unmap_first_page(imgp);
 		/*
 		 * The text reference needs to be removed for scripts.
 		 * There is a short period before we determine that
 		 * something is a script where text reference is active.
 		 * The vnode lock is held over this entire period
 		 * so nothing should illegitimately be blocked.
 		 */
 		MPASS(imgp->textset);
 		VOP_UNSET_TEXT_CHECKED(newtextvp);
 		imgp->textset = false;
 		/* free name buffer and old vnode */
 #ifdef MAC
 		mac_execve_interpreter_enter(newtextvp, &interpvplabel);
 #endif
 		if (imgp->opened) {
 			VOP_CLOSE(newtextvp, FREAD, td->td_ucred, td);
 			imgp->opened = false;
 		}
 		vput(newtextvp);
 		imgp->vp = newtextvp = NULL;
 		if (args->fname != NULL) {
 			if (newtextdvp != NULL) {
 				vrele(newtextdvp);
 				newtextdvp = NULL;
 			}
 			NDFREE_PNBUF(&nd);
 			free(newbinname, M_PARGS);
 			newbinname = NULL;
 		}
 		vm_object_deallocate(imgp->object);
 		imgp->object = NULL;
 		execve_nosetid(imgp);
 		imgp->execpath = NULL;
 		free(imgp->freepath, M_TEMP);
 		imgp->freepath = NULL;
 		/* set new name to that of the interpreter */
 		args->fname = imgp->interpreter_name;
 		goto interpret;
 	}
 
 	/*
 	 * NB: We unlock the vnode here because it is believed that none
 	 * of the sv_copyout_strings/sv_fixup operations require the vnode.
 	 */
 	VOP_UNLOCK(imgp->vp);
 
 	if (disallow_high_osrel &&
 	    P_OSREL_MAJOR(p->p_osrel) > P_OSREL_MAJOR(__FreeBSD_version)) {
 		error = ENOEXEC;
 		uprintf("Osrel %d for image %s too high\n", p->p_osrel,
 		    imgp->execpath != NULL ? imgp->execpath : "<unresolved>");
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Copy out strings (args and env) and initialize stack base.
 	 */
 	error = (*p->p_sysent->sv_copyout_strings)(imgp, &stack_base);
 	if (error != 0) {
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * Stack setup.
 	 */
 	error = (*p->p_sysent->sv_fixup)(&stack_base, imgp);
 	if (error != 0) {
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		goto exec_fail_dealloc;
 	}
 
 	/*
 	 * For security and other reasons, the file descriptor table cannot be
 	 * shared after an exec.
 	 */
 	fdunshare(td);
 	pdunshare(td);
 	/* close files on exec */
 	fdcloseexec(td);
 
 	/*
 	 * Malloc things before we need locks.
 	 */
 	i = exec_args_get_begin_envv(imgp->args) - imgp->args->begin_argv;
 	/* Cache arguments if they fit inside our allowance */
 	if (ps_arg_cache_limit >= i + sizeof(struct pargs)) {
 		newargs = pargs_alloc(i);
 		bcopy(imgp->args->begin_argv, newargs->ar_args, i);
 	}
 
 	/*
 	 * For security and other reasons, signal handlers cannot
 	 * be shared after an exec. The new process gets a copy of the old
 	 * handlers. In execsigs(), the new process will have its signals
 	 * reset.
 	 */
 	if (sigacts_shared(p->p_sigacts)) {
 		oldsigacts = p->p_sigacts;
 		newsigacts = sigacts_alloc();
 		sigacts_copy(newsigacts, oldsigacts);
 	}
 
 	vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 
 	PROC_LOCK(p);
 	if (oldsigacts)
 		p->p_sigacts = newsigacts;
 	/* Stop profiling */
 	stopprofclock(p);
 
 	/* reset caught signals */
 	execsigs(p);
 
 	/* name this process - nameiexec(p, ndp) */
 	bzero(p->p_comm, sizeof(p->p_comm));
 	if (args->fname)
 		bcopy(nd.ni_cnd.cn_nameptr, p->p_comm,
 		    min(nd.ni_cnd.cn_namelen, MAXCOMLEN));
 	else if (vn_commname(newtextvp, p->p_comm, sizeof(p->p_comm)) != 0)
 		bcopy(fexecv_proc_title, p->p_comm, sizeof(fexecv_proc_title));
 	bcopy(p->p_comm, td->td_name, sizeof(td->td_name));
 #ifdef KTR
 	sched_clear_tdname(td);
 #endif
 
 	/*
 	 * mark as execed, wakeup the process that vforked (if any) and tell
 	 * it that it now has its own resources back
 	 */
 	p->p_flag |= P_EXEC;
 	if ((p->p_flag2 & P2_NOTRACE_EXEC) == 0)
 		p->p_flag2 &= ~P2_NOTRACE;
 	if ((p->p_flag2 & P2_STKGAP_DISABLE_EXEC) == 0)
 		p->p_flag2 &= ~P2_STKGAP_DISABLE;
 	if (p->p_flag & P_PPWAIT) {
 		p->p_flag &= ~(P_PPWAIT | P_PPTRACE);
 		cv_broadcast(&p->p_pwait);
 		/* STOPs are no longer ignored, arrange for AST */
 		signotify(td);
 	}
 
 	if ((imgp->sysent->sv_setid_allowed != NULL &&
 	    !(*imgp->sysent->sv_setid_allowed)(td, imgp)) ||
 	    (p->p_flag2 & P2_NO_NEW_PRIVS) != 0)
 		execve_nosetid(imgp);
 
 	/*
 	 * Implement image setuid/setgid installation.
 	 */
 	if (imgp->credential_setid) {
 		/*
 		 * Turn off syscall tracing for set-id programs, except for
 		 * root.  Record any set-id flags first to make sure that
 		 * we do not regain any tracing during a possible block.
 		 */
 		setsugid(p);
 #ifdef KTRACE
 		kiop = ktrprocexec(p);
 #endif
 		/*
 		 * Close any file descriptors 0..2 that reference procfs,
 		 * then make sure file descriptors 0..2 are in use.
 		 *
 		 * Both fdsetugidsafety() and fdcheckstd() may call functions
 		 * taking sleepable locks, so temporarily drop our locks.
 		 */
 		PROC_UNLOCK(p);
 		VOP_UNLOCK(imgp->vp);
 		fdsetugidsafety(td);
 		error = fdcheckstd(td);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 		if (error != 0)
 			goto exec_fail_dealloc;
 		PROC_LOCK(p);
 #ifdef MAC
 		if (will_transition) {
 			mac_vnode_execve_transition(oldcred, imgp->newcred,
 			    imgp->vp, interpvplabel, imgp);
 		}
 #endif
 	} else {
 		if (oldcred->cr_uid == oldcred->cr_ruid &&
 		    oldcred->cr_gid == oldcred->cr_rgid)
 			p->p_flag &= ~P_SUGID;
 	}
 	/*
 	 * Set the new credentials.
 	 */
 	if (imgp->newcred != NULL) {
 		proc_set_cred(p, imgp->newcred);
 		crfree(oldcred);
 		oldcred = NULL;
 	}
 
 	/*
 	 * Store the vp for use in kern.proc.pathname.  This vnode was
 	 * referenced by namei() or by fexecve variant of fname handling.
 	 */
 	oldtextvp = p->p_textvp;
 	p->p_textvp = newtextvp;
 	oldtextdvp = p->p_textdvp;
 	p->p_textdvp = newtextdvp;
 	newtextdvp = NULL;
 	oldbinname = p->p_binname;
 	p->p_binname = newbinname;
 	newbinname = NULL;
 
 #ifdef KDTRACE_HOOKS
 	/*
 	 * Tell the DTrace fasttrap provider about the exec if it
 	 * has declared an interest.
 	 */
 	if (dtrace_fasttrap_exec)
 		dtrace_fasttrap_exec(p);
 #endif
 
 	/*
 	 * Notify others that we exec'd, and clear the P_INEXEC flag
 	 * as we're now a bona fide freshly-execed process.
 	 */
 	KNOTE_LOCKED(p->p_klist, NOTE_EXEC);
 	p->p_flag &= ~P_INEXEC;
 
 	/* clear "fork but no exec" flag, as we _are_ execing */
 	p->p_acflag &= ~AFORK;
 
 	/*
 	 * Free any previous argument cache and replace it with
 	 * the new argument cache, if any.
 	 */
 	oldargs = p->p_args;
 	p->p_args = newargs;
 	newargs = NULL;
 
 	PROC_UNLOCK(p);
 
 #ifdef	HWPMC_HOOKS
 	/*
 	 * Check if system-wide sampling is in effect or if the
 	 * current process is using PMCs.  If so, do exec() time
 	 * processing.  This processing needs to happen AFTER the
 	 * P_INEXEC flag is cleared.
 	 */
 	if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
 		VOP_UNLOCK(imgp->vp);
 		pe.pm_credentialschanged = credential_changing;
 		pe.pm_entryaddr = imgp->entry_addr;
 
 		PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC, (void *) &pe);
 		vn_lock(imgp->vp, LK_SHARED | LK_RETRY);
 	}
 #endif
 
 	/* Set values passed into the program in registers. */
 	(*p->p_sysent->sv_setregs)(td, imgp, stack_base);
 
 	VOP_MMAPPED(imgp->vp);
 
 	SDT_PROBE1(proc, , , exec__success, args->fname);
 
 exec_fail_dealloc:
 	if (error != 0) {
 		p->p_osrel = orig_osrel;
 		p->p_fctl0 = orig_fctl0;
 		p->p_elf_brandinfo = orig_brandinfo;
 	}
 
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	if (imgp->vp != NULL) {
 		if (imgp->opened)
 			VOP_CLOSE(imgp->vp, FREAD, td->td_ucred, td);
 		if (imgp->textset)
 			VOP_UNSET_TEXT_CHECKED(imgp->vp);
 		if (error != 0)
 			vput(imgp->vp);
 		else
 			VOP_UNLOCK(imgp->vp);
 		if (args->fname != NULL)
 			NDFREE_PNBUF(&nd);
 		if (newtextdvp != NULL)
 			vrele(newtextdvp);
 		free(newbinname, M_PARGS);
 	}
 
 	if (imgp->object != NULL)
 		vm_object_deallocate(imgp->object);
 
 	free(imgp->freepath, M_TEMP);
 
 	if (error == 0) {
 		if (p->p_ptevents & PTRACE_EXEC) {
 			PROC_LOCK(p);
 			if (p->p_ptevents & PTRACE_EXEC)
 				td->td_dbgflags |= TDB_EXEC;
 			PROC_UNLOCK(p);
 		}
 	} else {
 exec_fail:
 		/* we're done here, clear P_INEXEC */
 		PROC_LOCK(p);
 		p->p_flag &= ~P_INEXEC;
 		PROC_UNLOCK(p);
 
 		SDT_PROBE1(proc, , , exec__failure, error);
 	}
 
 	if (imgp->newcred != NULL && oldcred != NULL)
 		crfree(imgp->newcred);
 
 #ifdef MAC
 	mac_execve_exit(imgp);
 	mac_execve_interpreter_exit(interpvplabel);
 #endif
 	exec_free_args(args);
 
 	/*
 	 * Handle deferred decrement of ref counts.
 	 */
 	if (oldtextvp != NULL)
 		vrele(oldtextvp);
 	if (oldtextdvp != NULL)
 		vrele(oldtextdvp);
 	free(oldbinname, M_PARGS);
 #ifdef KTRACE
 	ktr_io_params_free(kiop);
 #endif
 	pargs_drop(oldargs);
 	pargs_drop(newargs);
 	if (oldsigacts != NULL)
 		sigacts_free(oldsigacts);
 	if (euip != NULL)
 		uifree(euip);
 
 	if (error && imgp->vmspace_destroyed) {
 		/* sorry, no more process anymore. exit gracefully */
 		exec_cleanup(td, oldvmspace);
 		exit1(td, 0, SIGABRT);
 		/* NOT REACHED */
 	}
 
 #ifdef KTRACE
 	if (error == 0)
 		ktrprocctor(p);
 #endif
 
 	/*
 	 * We don't want cpu_set_syscall_retval() to overwrite any of
 	 * the register values put in place by exec_setregs().
 	 * Implementations of cpu_set_syscall_retval() will leave
 	 * registers unmodified when returning EJUSTRETURN.
 	 */
 	return (error == 0 ? EJUSTRETURN : error);
 }
 
 void
 exec_cleanup(struct thread *td, struct vmspace *oldvmspace)
 {
 	if ((td->td_pflags & TDP_EXECVMSPC) != 0) {
 		KASSERT(td->td_proc->p_vmspace != oldvmspace,
 		    ("oldvmspace still used"));
 		vmspace_free(oldvmspace);
 		td->td_pflags &= ~TDP_EXECVMSPC;
 	}
 }
 
 int
 exec_map_first_page(struct image_params *imgp)
 {
 	vm_object_t object;
 	vm_page_t m;
 	int error;
 
 	if (imgp->firstpage != NULL)
 		exec_unmap_first_page(imgp);
 
 	object = imgp->vp->v_object;
 	if (object == NULL)
 		return (EACCES);
 #if VM_NRESERVLEVEL > 0
 	if ((object->flags & OBJ_COLORED) == 0) {
 		VM_OBJECT_WLOCK(object);
 		vm_object_color(object, 0);
 		VM_OBJECT_WUNLOCK(object);
 	}
 #endif
 	error = vm_page_grab_valid_unlocked(&m, object, 0,
 	    VM_ALLOC_COUNT(VM_INITIAL_PAGEIN) |
 	    VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
 
 	if (error != VM_PAGER_OK)
 		return (EIO);
 	imgp->firstpage = sf_buf_alloc(m, 0);
 	imgp->image_header = (char *)sf_buf_kva(imgp->firstpage);
 
 	return (0);
 }
 
 void
 exec_unmap_first_page(struct image_params *imgp)
 {
 	vm_page_t m;
 
 	if (imgp->firstpage != NULL) {
 		m = sf_buf_page(imgp->firstpage);
 		sf_buf_free(imgp->firstpage);
 		imgp->firstpage = NULL;
 		vm_page_unwire(m, PQ_ACTIVE);
 	}
 }
 
 void
 exec_onexec_old(struct thread *td)
 {
 	sigfastblock_clear(td);
 	umtx_exec(td->td_proc);
 }
 
 /*
  * This is an optimization which removes the unmanaged shared page
  * mapping. In combination with pmap_remove_pages(), which cleans all
  * managed mappings in the process' vmspace pmap, no work will be left
  * for pmap_remove(min, max).
  */
 void
 exec_free_abi_mappings(struct proc *p)
 {
 	struct vmspace *vmspace;
 
 	vmspace = p->p_vmspace;
 	if (refcount_load(&vmspace->vm_refcnt) != 1)
 		return;
 
 	if (!PROC_HAS_SHP(p))
 		return;
 
 	pmap_remove(vmspace_pmap(vmspace), vmspace->vm_shp_base,
 	    vmspace->vm_shp_base + p->p_sysent->sv_shared_page_len);
 }
 
 /*
  * Run down the current address space and install a new one.
  */
 int
 exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
 {
 	int error;
 	struct proc *p = imgp->proc;
 	struct vmspace *vmspace = p->p_vmspace;
 	struct thread *td = curthread;
 	vm_offset_t sv_minuser;
 	vm_map_t map;
 
 	imgp->vmspace_destroyed = true;
 	imgp->sysent = sv;
 
 	if (p->p_sysent->sv_onexec_old != NULL)
 		p->p_sysent->sv_onexec_old(td);
 	itimers_exec(p);
 
 	EVENTHANDLER_DIRECT_INVOKE(process_exec, p, imgp);
 
 	/*
 	 * Blow away entire process VM, if address space not shared,
 	 * otherwise, create a new VM space so that other threads are
 	 * not disrupted
 	 */
 	map = &vmspace->vm_map;
 	if (map_at_zero)
 		sv_minuser = sv->sv_minuser;
 	else
 		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
 	if (refcount_load(&vmspace->vm_refcnt) == 1 &&
 	    vm_map_min(map) == sv_minuser &&
 	    vm_map_max(map) == sv->sv_maxuser &&
 	    cpu_exec_vmspace_reuse(p, map)) {
 		exec_free_abi_mappings(p);
 		shmexit(vmspace);
 		pmap_remove_pages(vmspace_pmap(vmspace));
 		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
 		/*
 		 * An exec terminates mlockall(MCL_FUTURE).
 		 * ASLR and W^X states must be re-evaluated.
 		 */
 		vm_map_lock(map);
 		vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
 		    MAP_ASLR_IGNSTART | MAP_ASLR_STACK | MAP_WXORX);
 		vm_map_unlock(map);
 	} else {
 		error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
 		if (error)
 			return (error);
 		vmspace = p->p_vmspace;
 		map = &vmspace->vm_map;
 	}
 	map->flags |= imgp->map_flags;
 
 	return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0);
 }
 
 /*
  * Compute the stack size limit and map the main process stack.
  * Map the shared page.
  */
 int
 exec_map_stack(struct image_params *imgp)
 {
 	struct rlimit rlim_stack;
 	struct sysentvec *sv;
 	struct proc *p;
 	vm_map_t map;
 	struct vmspace *vmspace;
 	vm_offset_t stack_addr, stack_top;
 	vm_offset_t sharedpage_addr;
 	u_long ssiz;
 	int error, find_space, stack_off;
 	vm_prot_t stack_prot;
 	vm_object_t obj;
 
 	p = imgp->proc;
 	sv = p->p_sysent;
 
 	if (imgp->stack_sz != 0) {
 		ssiz = trunc_page(imgp->stack_sz);
 		PROC_LOCK(p);
 		lim_rlimit_proc(p, RLIMIT_STACK, &rlim_stack);
 		PROC_UNLOCK(p);
 		if (ssiz > rlim_stack.rlim_max)
 			ssiz = rlim_stack.rlim_max;
 		if (ssiz > rlim_stack.rlim_cur) {
 			rlim_stack.rlim_cur = ssiz;
 			kern_setrlimit(curthread, RLIMIT_STACK, &rlim_stack);
 		}
 	} else if (sv->sv_maxssiz != NULL) {
 		ssiz = *sv->sv_maxssiz;
 	} else {
 		ssiz = maxssiz;
 	}
 
 	vmspace = p->p_vmspace;
 	map = &vmspace->vm_map;
 
 	stack_prot = sv->sv_shared_page_obj != NULL && imgp->stack_prot != 0 ?
 	    imgp->stack_prot : sv->sv_stackprot;
 	if ((map->flags & MAP_ASLR_STACK) != 0) {
 		stack_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
 		    lim_max(curthread, RLIMIT_DATA));
 		find_space = VMFS_ANY_SPACE;
 	} else {
 		stack_addr = sv->sv_usrstack - ssiz;
 		find_space = VMFS_NO_SPACE;
 	}
 	error = vm_map_find(map, NULL, 0, &stack_addr, (vm_size_t)ssiz,
 	    sv->sv_usrstack, find_space, stack_prot, VM_PROT_ALL,
 	    MAP_STACK_GROWS_DOWN);
 	if (error != KERN_SUCCESS) {
 		uprintf("exec_new_vmspace: mapping stack size %#jx prot %#x "
 		    "failed, mach error %d errno %d\n", (uintmax_t)ssiz,
 		    stack_prot, error, vm_mmap_to_errno(error));
 		return (vm_mmap_to_errno(error));
 	}
 
 	stack_top = stack_addr + ssiz;
 	if ((map->flags & MAP_ASLR_STACK) != 0) {
 		/* Randomize within the first page of the stack. */
 		arc4rand(&stack_off, sizeof(stack_off), 0);
 		stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *));
 	}
 
 	/* Map a shared page */
 	obj = sv->sv_shared_page_obj;
 	if (obj == NULL) {
 		sharedpage_addr = 0;
 		goto out;
 	}
 
 	/*
 	 * If randomization is disabled then the shared page will
 	 * be mapped at address specified in sysentvec.
 	 * Otherwise any address above .data section can be selected.
 	 * Same logic is used for stack address randomization.
 	 * If the address randomization is applied map a guard page
 	 * at the top of UVA.
 	 */
 	vm_object_reference(obj);
 	if ((imgp->imgp_flags & IMGP_ASLR_SHARED_PAGE) != 0) {
 		sharedpage_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
 		    lim_max(curthread, RLIMIT_DATA));
 
 		error = vm_map_fixed(map, NULL, 0,
 		    sv->sv_maxuser - PAGE_SIZE, PAGE_SIZE,
 		    VM_PROT_NONE, VM_PROT_NONE, MAP_CREATE_GUARD);
 		if (error != KERN_SUCCESS) {
 			/*
 			 * This is not fatal, so let's just print a warning
 			 * and continue.
 			 */
 			uprintf("%s: Mapping guard page at the top of UVA failed"
 			    " mach error %d errno %d",
 			    __func__, error, vm_mmap_to_errno(error));
 		}
 
 		error = vm_map_find(map, obj, 0,
 		    &sharedpage_addr, sv->sv_shared_page_len,
 		    sv->sv_maxuser, VMFS_ANY_SPACE,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
 	} else {
 		sharedpage_addr = sv->sv_shared_page_base;
 		vm_map_fixed(map, obj, 0,
 		    sharedpage_addr, sv->sv_shared_page_len,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    VM_PROT_READ | VM_PROT_EXECUTE,
 		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
 	}
 	if (error != KERN_SUCCESS) {
 		uprintf("%s: mapping shared page at addr: %p"
 		    "failed, mach error %d errno %d\n", __func__,
 		    (void *)sharedpage_addr, error, vm_mmap_to_errno(error));
 		vm_object_deallocate(obj);
 		return (vm_mmap_to_errno(error));
 	}
 out:
 	/*
 	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
 	 * are still used to enforce the stack rlimit on the process stack.
 	 */
 	vmspace->vm_maxsaddr = (char *)stack_addr;
 	vmspace->vm_stacktop = stack_top;
 	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
 	vmspace->vm_shp_base = sharedpage_addr;
 
 	return (0);
 }
 
 /*
  * Copy out argument and environment strings from the old process address
  * space into the temporary string buffer.
  */
 int
 exec_copyin_args(struct image_args *args, const char *fname,
     enum uio_seg segflg, char **argv, char **envv)
 {
 	u_long arg, env;
 	int error;
 
 	bzero(args, sizeof(*args));
 	if (argv == NULL)
 		return (EFAULT);
 
 	/*
 	 * Allocate demand-paged memory for the file name, argument, and
 	 * environment strings.
 	 */
 	error = exec_alloc_args(args);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Copy the file name.
 	 */
 	error = exec_args_add_fname(args, fname, segflg);
 	if (error != 0)
 		goto err_exit;
 
 	/*
 	 * extract arguments first
 	 */
 	for (;;) {
 		error = fueword(argv++, &arg);
 		if (error == -1) {
 			error = EFAULT;
 			goto err_exit;
 		}
 		if (arg == 0)
 			break;
 		error = exec_args_add_arg(args, (char *)(uintptr_t)arg,
 		    UIO_USERSPACE);
 		if (error != 0)
 			goto err_exit;
 	}
 
 	/*
 	 * extract environment strings
 	 */
 	if (envv) {
 		for (;;) {
 			error = fueword(envv++, &env);
 			if (error == -1) {
 				error = EFAULT;
 				goto err_exit;
 			}
 			if (env == 0)
 				break;
 			error = exec_args_add_env(args,
 			    (char *)(uintptr_t)env, UIO_USERSPACE);
 			if (error != 0)
 				goto err_exit;
 		}
 	}
 
 	return (0);
 
 err_exit:
 	exec_free_args(args);
 	return (error);
 }
 
 struct exec_args_kva {
 	vm_offset_t addr;
 	u_int gen;
 	SLIST_ENTRY(exec_args_kva) next;
 };
 
 DPCPU_DEFINE_STATIC(struct exec_args_kva *, exec_args_kva);
 
 static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist;
 static struct mtx exec_args_kva_mtx;
 static u_int exec_args_gen;
 
 static void
 exec_prealloc_args_kva(void *arg __unused)
 {
 	struct exec_args_kva *argkva;
 	u_int i;
 
 	SLIST_INIT(&exec_args_kva_freelist);
 	mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF);
 	for (i = 0; i < exec_map_entries; i++) {
 		argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK);
 		argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size);
 		argkva->gen = exec_args_gen;
 		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
 	}
 }
 SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL);
 
 static vm_offset_t
 exec_alloc_args_kva(void **cookie)
 {
 	struct exec_args_kva *argkva;
 
 	argkva = (void *)atomic_readandclear_ptr(
 	    (uintptr_t *)DPCPU_PTR(exec_args_kva));
 	if (argkva == NULL) {
 		mtx_lock(&exec_args_kva_mtx);
 		while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL)
 			(void)mtx_sleep(&exec_args_kva_freelist,
 			    &exec_args_kva_mtx, 0, "execkva", 0);
 		SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next);
 		mtx_unlock(&exec_args_kva_mtx);
 	}
 	kasan_mark((void *)argkva->addr, exec_map_entry_size,
 	    exec_map_entry_size, 0);
 	*(struct exec_args_kva **)cookie = argkva;
 	return (argkva->addr);
 }
 
 static void
 exec_release_args_kva(struct exec_args_kva *argkva, u_int gen)
 {
 	vm_offset_t base;
 
 	base = argkva->addr;
 	kasan_mark((void *)argkva->addr, 0, exec_map_entry_size,
 	    KASAN_EXEC_ARGS_FREED);
 	if (argkva->gen != gen) {
 		(void)vm_map_madvise(exec_map, base, base + exec_map_entry_size,
 		    MADV_FREE);
 		argkva->gen = gen;
 	}
 	if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva),
 	    (uintptr_t)NULL, (uintptr_t)argkva)) {
 		mtx_lock(&exec_args_kva_mtx);
 		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
 		wakeup_one(&exec_args_kva_freelist);
 		mtx_unlock(&exec_args_kva_mtx);
 	}
 }
 
 static void
 exec_free_args_kva(void *cookie)
 {
 
 	exec_release_args_kva(cookie, exec_args_gen);
 }
 
 static void
 exec_args_kva_lowmem(void *arg __unused)
 {
 	SLIST_HEAD(, exec_args_kva) head;
 	struct exec_args_kva *argkva;
 	u_int gen;
 	int i;
 
 	gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1;
 
 	/*
 	 * Force an madvise of each KVA range. Any currently allocated ranges
 	 * will have MADV_FREE applied once they are freed.
 	 */
 	SLIST_INIT(&head);
 	mtx_lock(&exec_args_kva_mtx);
 	SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva);
 	mtx_unlock(&exec_args_kva_mtx);
 	while ((argkva = SLIST_FIRST(&head)) != NULL) {
 		SLIST_REMOVE_HEAD(&head, next);
 		exec_release_args_kva(argkva, gen);
 	}
 
 	CPU_FOREACH(i) {
 		argkva = (void *)atomic_readandclear_ptr(
 		    (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva));
 		if (argkva != NULL)
 			exec_release_args_kva(argkva, gen);
 	}
 }
 EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL,
     EVENTHANDLER_PRI_ANY);
 
 /*
  * Allocate temporary demand-paged, zero-filled memory for the file name,
  * argument, and environment strings.
  */
 int
 exec_alloc_args(struct image_args *args)
 {
 
 	args->buf = (char *)exec_alloc_args_kva(&args->bufkva);
 	return (0);
 }
 
 void
 exec_free_args(struct image_args *args)
 {
 
 	if (args->buf != NULL) {
 		exec_free_args_kva(args->bufkva);
 		args->buf = NULL;
 	}
 	if (args->fname_buf != NULL) {
 		free(args->fname_buf, M_TEMP);
 		args->fname_buf = NULL;
 	}
 }
 
 /*
  * A set to functions to fill struct image args.
  *
  * NOTE: exec_args_add_fname() must be called (possibly with a NULL
  * fname) before the other functions.  All exec_args_add_arg() calls must
  * be made before any exec_args_add_env() calls.  exec_args_adjust_args()
  * may be called any time after exec_args_add_fname().
  *
  * exec_args_add_fname() - install path to be executed
  * exec_args_add_arg() - append an argument string
  * exec_args_add_env() - append an env string
  * exec_args_adjust_args() - adjust location of the argument list to
  *                           allow new arguments to be prepended
  */
 int
 exec_args_add_fname(struct image_args *args, const char *fname,
     enum uio_seg segflg)
 {
 	int error;
 	size_t length;
 
 	KASSERT(args->fname == NULL, ("fname already appended"));
 	KASSERT(args->endp == NULL, ("already appending to args"));
 
 	if (fname != NULL) {
 		args->fname = args->buf;
 		error = segflg == UIO_SYSSPACE ?
 		    copystr(fname, args->fname, PATH_MAX, &length) :
 		    copyinstr(fname, args->fname, PATH_MAX, &length);
 		if (error != 0)
 			return (error == ENAMETOOLONG ? E2BIG : error);
 	} else
 		length = 0;
 
 	/* Set up for _arg_*()/_env_*() */
 	args->endp = args->buf + length;
 	/* begin_argv must be set and kept updated */
 	args->begin_argv = args->endp;
 	KASSERT(exec_map_entry_size - length >= ARG_MAX,
 	    ("too little space remaining for arguments %zu < %zu",
 	    exec_map_entry_size - length, (size_t)ARG_MAX));
 	args->stringspace = ARG_MAX;
 
 	return (0);
 }
 
 static int
 exec_args_add_str(struct image_args *args, const char *str,
     enum uio_seg segflg, int *countp)
 {
 	int error;
 	size_t length;
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 	KASSERT(args->begin_argv != NULL, ("begin_argp not initialized"));
 
 	error = (segflg == UIO_SYSSPACE) ?
 	    copystr(str, args->endp, args->stringspace, &length) :
 	    copyinstr(str, args->endp, args->stringspace, &length);
 	if (error != 0)
 		return (error == ENAMETOOLONG ? E2BIG : error);
 	args->stringspace -= length;
 	args->endp += length;
 	(*countp)++;
 
 	return (0);
 }
 
 int
 exec_args_add_arg(struct image_args *args, const char *argp,
     enum uio_seg segflg)
 {
 
 	KASSERT(args->envc == 0, ("appending args after env"));
 
 	return (exec_args_add_str(args, argp, segflg, &args->argc));
 }
 
 int
 exec_args_add_env(struct image_args *args, const char *envp,
     enum uio_seg segflg)
 {
 
 	if (args->envc == 0)
 		args->begin_envv = args->endp;
 
 	return (exec_args_add_str(args, envp, segflg, &args->envc));
 }
 
 int
 exec_args_adjust_args(struct image_args *args, size_t consume, ssize_t extend)
 {
 	ssize_t offset;
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 	KASSERT(args->begin_argv != NULL, ("begin_argp not initialized"));
 
 	offset = extend - consume;
 	if (args->stringspace < offset)
 		return (E2BIG);
 	memmove(args->begin_argv + extend, args->begin_argv + consume,
 	    args->endp - args->begin_argv + consume);
 	if (args->envc > 0)
 		args->begin_envv += offset;
 	args->endp += offset;
 	args->stringspace -= offset;
 	return (0);
 }
 
 char *
 exec_args_get_begin_envv(struct image_args *args)
 {
 
 	KASSERT(args->endp != NULL, ("endp not initialized"));
 
 	if (args->envc > 0)
 		return (args->begin_envv);
 	return (args->endp);
 }
 
 /*
  * Copy strings out to the new process address space, constructing new arg
  * and env vector tables. Return a pointer to the base so that it can be used
  * as the initial stack pointer.
  */
 int
 exec_copyout_strings(struct image_params *imgp, uintptr_t *stack_base)
 {
 	int argc, envc;
 	char **vectp;
 	char *stringp;
 	uintptr_t destp, ustringp;
 	struct ps_strings *arginfo;
 	struct proc *p;
 	struct sysentvec *sysent;
 	size_t execpath_len;
 	int error, szsigcode;
 	char canary[sizeof(long) * 8];
 
 	p = imgp->proc;
 	sysent = p->p_sysent;
 
 	destp =	PROC_PS_STRINGS(p);
 	arginfo = imgp->ps_strings = (void *)destp;
 
 	/*
 	 * Install sigcode.
 	 */
 	if (sysent->sv_shared_page_base == 0 && sysent->sv_szsigcode != NULL) {
 		szsigcode = *(sysent->sv_szsigcode);
 		destp -= szsigcode;
 		destp = rounddown2(destp, sizeof(void *));
 		error = copyout(sysent->sv_sigcode, (void *)destp, szsigcode);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Copy the image path for the rtld.
 	 */
 	if (imgp->execpath != NULL && imgp->auxargs != NULL) {
 		execpath_len = strlen(imgp->execpath) + 1;
 		destp -= execpath_len;
 		destp = rounddown2(destp, sizeof(void *));
 		imgp->execpathp = (void *)destp;
 		error = copyout(imgp->execpath, imgp->execpathp, execpath_len);
 		if (error != 0)
 			return (error);
 	}
 
 	/*
 	 * Prepare the canary for SSP.
 	 */
 	arc4rand(canary, sizeof(canary), 0);
 	destp -= sizeof(canary);
 	imgp->canary = (void *)destp;
 	error = copyout(canary, imgp->canary, sizeof(canary));
 	if (error != 0)
 		return (error);
 	imgp->canarylen = sizeof(canary);
 
 	/*
 	 * Prepare the pagesizes array.
 	 */
 	imgp->pagesizeslen = sizeof(pagesizes[0]) * MAXPAGESIZES;
 	destp -= imgp->pagesizeslen;
 	destp = rounddown2(destp, sizeof(void *));
 	imgp->pagesizes = (void *)destp;
 	error = copyout(pagesizes, imgp->pagesizes, imgp->pagesizeslen);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Allocate room for the argument and environment strings.
 	 */
 	destp -= ARG_MAX - imgp->args->stringspace;
 	destp = rounddown2(destp, sizeof(void *));
 	ustringp = destp;
 
 	if (imgp->auxargs) {
 		/*
 		 * Allocate room on the stack for the ELF auxargs
 		 * array.  It has up to AT_COUNT entries.
 		 */
 		destp -= AT_COUNT * sizeof(Elf_Auxinfo);
 		destp = rounddown2(destp, sizeof(void *));
 	}
 
 	vectp = (char **)destp;
 
 	/*
 	 * Allocate room for the argv[] and env vectors including the
 	 * terminating NULL pointers.
 	 */
 	vectp -= imgp->args->argc + 1 + imgp->args->envc + 1;
 
 	/*
 	 * vectp also becomes our initial stack base
 	 */
 	*stack_base = (uintptr_t)vectp;
 
 	stringp = imgp->args->begin_argv;
 	argc = imgp->args->argc;
 	envc = imgp->args->envc;
 
 	/*
 	 * Copy out strings - arguments and environment.
 	 */
 	error = copyout(stringp, (void *)ustringp,
 	    ARG_MAX - imgp->args->stringspace);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Fill in "ps_strings" struct for ps, w, etc.
 	 */
 	imgp->argv = vectp;
 	if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nargvstr, argc) != 0)
 		return (EFAULT);
 
 	/*
 	 * Fill in argument portion of vector table.
 	 */
 	for (; argc > 0; --argc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* a null vector table pointer separates the argp's from the envp's */
 	if (suword(vectp++, 0) != 0)
 		return (EFAULT);
 
 	imgp->envv = vectp;
 	if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 ||
 	    suword32(&arginfo->ps_nenvstr, envc) != 0)
 		return (EFAULT);
 
 	/*
 	 * Fill in environment portion of vector table.
 	 */
 	for (; envc > 0; --envc) {
 		if (suword(vectp++, ustringp) != 0)
 			return (EFAULT);
 		while (*stringp++ != 0)
 			ustringp++;
 		ustringp++;
 	}
 
 	/* end of vector table is a null pointer */
 	if (suword(vectp, 0) != 0)
 		return (EFAULT);
 
 	if (imgp->auxargs) {
 		vectp++;
 		error = imgp->sysent->sv_copyout_auxargs(imgp,
 		    (uintptr_t)vectp);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 /*
  * Check permissions of file to execute.
  *	Called with imgp->vp locked.
  *	Return 0 for success or error code on failure.
  */
 int
 exec_check_permissions(struct image_params *imgp)
 {
 	struct vnode *vp = imgp->vp;
 	struct vattr *attr = imgp->attr;
 	struct thread *td;
 	int error;
 
 	td = curthread;
 
 	/* Get file attributes */
 	error = VOP_GETATTR(vp, attr, td->td_ucred);
 	if (error)
 		return (error);
 
 #ifdef MAC
 	error = mac_vnode_check_exec(td->td_ucred, imgp->vp, imgp);
 	if (error)
 		return (error);
 #endif
 
 	/*
 	 * 1) Check if file execution is disabled for the filesystem that
 	 *    this file resides on.
 	 * 2) Ensure that at least one execute bit is on. Otherwise, a
 	 *    privileged user will always succeed, and we don't want this
 	 *    to happen unless the file really is executable.
 	 * 3) Ensure that the file is a regular file.
 	 */
 	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
 	    (attr->va_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0 ||
 	    (attr->va_type != VREG))
 		return (EACCES);
 
 	/*
 	 * Zero length files can't be exec'd
 	 */
 	if (attr->va_size == 0)
 		return (ENOEXEC);
 
 	/*
 	 *  Check for execute permission to file based on current credentials.
 	 */
 	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
 	if (error)
 		return (error);
 
 	/*
 	 * Check number of open-for-writes on the file and deny execution
 	 * if there are any.
 	 *
 	 * Add a text reference now so no one can write to the
 	 * executable while we're activating it.
 	 *
 	 * Remember if this was set before and unset it in case this is not
 	 * actually an executable image.
 	 */
 	error = VOP_SET_TEXT(vp);
 	if (error != 0)
 		return (error);
 	imgp->textset = true;
 
 	/*
 	 * Call filesystem specific open routine (which does nothing in the
 	 * general case).
 	 */
 	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
 	if (error == 0)
 		imgp->opened = true;
 	return (error);
 }
 
 /*
  * Exec handler registration
  */
 int
 exec_register(const struct execsw *execsw_arg)
 {
 	const struct execsw **es, **xs, **newexecsw;
 	u_int count = 2;	/* New slot and trailing NULL */
 
 	if (execsw)
 		for (es = execsw; *es; es++)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	xs = newexecsw;
 	if (execsw)
 		for (es = execsw; *es; es++)
 			*xs++ = *es;
 	*xs++ = execsw_arg;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
 
 int
 exec_unregister(const struct execsw *execsw_arg)
 {
 	const struct execsw **es, **xs, **newexecsw;
 	int count = 1;
 
 	if (execsw == NULL)
 		panic("unregister with no handlers left?\n");
 
 	for (es = execsw; *es; es++) {
 		if (*es == execsw_arg)
 			break;
 	}
 	if (*es == NULL)
 		return (ENOENT);
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			count++;
 	newexecsw = malloc(count * sizeof(*es), M_TEMP, M_WAITOK);
 	xs = newexecsw;
 	for (es = execsw; *es; es++)
 		if (*es != execsw_arg)
 			*xs++ = *es;
 	*xs = NULL;
 	if (execsw)
 		free(execsw, M_TEMP);
 	execsw = newexecsw;
 	return (0);
 }
 
 /*
  * Write out a core segment to the compression stream.
  */
 static int
 compress_chunk(struct coredump_params *cp, char *base, char *buf, size_t len)
 {
 	size_t chunk_len;
 	int error;
 
 	while (len > 0) {
 		chunk_len = MIN(len, CORE_BUF_SIZE);
 
 		/*
 		 * We can get EFAULT error here.
 		 * In that case zero out the current chunk of the segment.
 		 */
 		error = copyin(base, buf, chunk_len);
 		if (error != 0)
 			bzero(buf, chunk_len);
 		error = compressor_write(cp->comp, buf, chunk_len);
 		if (error != 0)
 			break;
 		base += chunk_len;
 		len -= chunk_len;
 	}
 	return (error);
 }
 
 int
 core_write(struct coredump_params *cp, const void *base, size_t len,
     off_t offset, enum uio_seg seg, size_t *resid)
 {
 
 	return (vn_rdwr_inchunks(UIO_WRITE, cp->vp, __DECONST(void *, base),
 	    len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
 	    cp->active_cred, cp->file_cred, resid, cp->td));
 }
 
 int
 core_output(char *base, size_t len, off_t offset, struct coredump_params *cp,
     void *tmpbuf)
 {
 	vm_map_t map;
 	struct mount *mp;
 	size_t resid, runlen;
 	int error;
 	bool success;
 
 	KASSERT((uintptr_t)base % PAGE_SIZE == 0,
 	    ("%s: user address %p is not page-aligned", __func__, base));
 
 	if (cp->comp != NULL)
 		return (compress_chunk(cp, base, tmpbuf, len));
 
 	map = &cp->td->td_proc->p_vmspace->vm_map;
 	for (; len > 0; base += runlen, offset += runlen, len -= runlen) {
 		/*
 		 * Attempt to page in all virtual pages in the range.  If a
 		 * virtual page is not backed by the pager, it is represented as
 		 * a hole in the file.  This can occur with zero-filled
 		 * anonymous memory or truncated files, for example.
 		 */
 		for (runlen = 0; runlen < len; runlen += PAGE_SIZE) {
 			if (core_dump_can_intr && curproc_sigkilled())
 				return (EINTR);
 			error = vm_fault(map, (uintptr_t)base + runlen,
 			    VM_PROT_READ, VM_FAULT_NOFILL, NULL);
 			if (runlen == 0)
 				success = error == KERN_SUCCESS;
 			else if ((error == KERN_SUCCESS) != success)
 				break;
 		}
 
 		if (success) {
 			error = core_write(cp, base, runlen, offset,
 			    UIO_USERSPACE, &resid);
 			if (error != 0) {
 				if (error != EFAULT)
 					break;
 
 				/*
 				 * EFAULT may be returned if the user mapping
 				 * could not be accessed, e.g., because a mapped
 				 * file has been truncated.  Skip the page if no
 				 * progress was made, to protect against a
 				 * hypothetical scenario where vm_fault() was
 				 * successful but core_write() returns EFAULT
 				 * anyway.
 				 */
 				runlen -= resid;
 				if (runlen == 0) {
 					success = false;
 					runlen = PAGE_SIZE;
 				}
 			}
 		}
 		if (!success) {
 			error = vn_start_write(cp->vp, &mp, V_WAIT);
 			if (error != 0)
 				break;
 			vn_lock(cp->vp, LK_EXCLUSIVE | LK_RETRY);
 			error = vn_truncate_locked(cp->vp, offset + runlen,
 			    false, cp->td->td_ucred);
 			VOP_UNLOCK(cp->vp);
 			vn_finished_write(mp);
 			if (error != 0)
 				break;
 		}
 	}
 	return (error);
 }
 
 /*
  * Drain into a core file.
  */
 int
 sbuf_drain_core_output(void *arg, const char *data, int len)
 {
 	struct coredump_params *cp;
 	struct proc *p;
 	int error, locked;
 
 	cp = arg;
 	p = cp->td->td_proc;
 
 	/*
 	 * Some kern_proc out routines that print to this sbuf may
 	 * call us with the process lock held. Draining with the
 	 * non-sleepable lock held is unsafe. The lock is needed for
 	 * those routines when dumping a live process. In our case we
 	 * can safely release the lock before draining and acquire
 	 * again after.
 	 */
 	locked = PROC_LOCKED(p);
 	if (locked)
 		PROC_UNLOCK(p);
 	if (cp->comp != NULL)
 		error = compressor_write(cp->comp, __DECONST(char *, data),
 		    len);
 	else
 		error = core_write(cp, __DECONST(void *, data), len, cp->offset,
 		    UIO_SYSSPACE, NULL);
 	if (locked)
 		PROC_LOCK(p);
 	if (error != 0)
 		return (-error);
 	cp->offset += len;
 	return (len);
 }
diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c
index 745c1174638e..29c7bcdd4289 100644
--- a/sys/kern/uipc_mqueue.c
+++ b/sys/kern/uipc_mqueue.c
@@ -1,2944 +1,2939 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
  * Copyright (c) 2016-2017 Robert N. M. Watson
  * All rights reserved.
  *
  * Portions of this software were developed by BAE Systems, the University of
  * Cambridge Computer Laboratory, and Memorial University under DARPA/AFRL
  * contract FA8650-15-C-7558 ("CADETS"), as part of the DARPA Transparent
  * Computing (TC) research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 /*
  * POSIX message queue implementation.
  *
  * 1) A mqueue filesystem can be mounted, each message queue appears
  *    in mounted directory, user can change queue's permission and
  *    ownership, or remove a queue. Manually creating a file in the
  *    directory causes a message queue to be created in the kernel with
  *    default message queue attributes applied and same name used, this
  *    method is not advocated since mq_open syscall allows user to specify
  *    different attributes. Also the file system can be mounted multiple
  *    times at different mount points but shows same contents.
  *
  * 2) Standard POSIX message queue API. The syscalls do not use vfs layer,
  *    but directly operate on internal data structure, this allows user to
  *    use the IPC facility without having to mount mqueue file system.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/buf.h>
 #include <sys/capsicum.h>
 #include <sys/dirent.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mount.h>
 #include <sys/mqueue.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/posix4.h>
 #include <sys/poll.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/sysproto.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/unistd.h>
 #include <sys/user.h>
 #include <sys/vnode.h>
 #include <machine/atomic.h>
 
 #include <security/audit/audit.h>
 
 FEATURE(p1003_1b_mqueue, "POSIX P1003.1B message queues support");
 
 /*
  * Limits and constants
  */
 #define	MQFS_NAMELEN		NAME_MAX
 #define MQFS_DELEN		(8 + MQFS_NAMELEN)
 
 /* node types */
 typedef enum {
 	mqfstype_none = 0,
 	mqfstype_root,
 	mqfstype_dir,
 	mqfstype_this,
 	mqfstype_parent,
 	mqfstype_file,
 	mqfstype_symlink,
 } mqfs_type_t;
 
 struct mqfs_node;
 
 /*
  * mqfs_info: describes a mqfs instance
  */
 struct mqfs_info {
 	struct sx		mi_lock;
 	struct mqfs_node	*mi_root;
 	struct unrhdr		*mi_unrhdr;
 };
 
 struct mqfs_vdata {
 	LIST_ENTRY(mqfs_vdata)	mv_link;
 	struct mqfs_node	*mv_node;
 	struct vnode		*mv_vnode;
 	struct task		mv_task;
 };
 
 /*
  * mqfs_node: describes a node (file or directory) within a mqfs
  */
 struct mqfs_node {
 	char			mn_name[MQFS_NAMELEN+1];
 	struct mqfs_info	*mn_info;
 	struct mqfs_node	*mn_parent;
 	LIST_HEAD(,mqfs_node)	mn_children;
 	LIST_ENTRY(mqfs_node)	mn_sibling;
 	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
 	const void		*mn_pr_root;
 	int			mn_refcount;
 	mqfs_type_t		mn_type;
 	int			mn_deleted;
 	uint32_t		mn_fileno;
 	void			*mn_data;
 	struct timespec		mn_birth;
 	struct timespec		mn_ctime;
 	struct timespec		mn_atime;
 	struct timespec		mn_mtime;
 	uid_t			mn_uid;
 	gid_t			mn_gid;
 	int			mn_mode;
 };
 
 #define	VTON(vp)	(((struct mqfs_vdata *)((vp)->v_data))->mv_node)
 #define VTOMQ(vp) 	((struct mqueue *)(VTON(vp)->mn_data))
 #define	VFSTOMQFS(m)	((struct mqfs_info *)((m)->mnt_data))
 #define	FPTOMQ(fp)	((struct mqueue *)(((struct mqfs_node *) \
 				(fp)->f_data)->mn_data))
 
 TAILQ_HEAD(msgq, mqueue_msg);
 
 struct mqueue;
 
 struct mqueue_notifier {
 	LIST_ENTRY(mqueue_notifier)	nt_link;
 	struct sigevent			nt_sigev;
 	ksiginfo_t			nt_ksi;
 	struct proc			*nt_proc;
 };
 
 struct mqueue {
 	struct mtx	mq_mutex;
 	int		mq_flags;
 	long		mq_maxmsg;
 	long		mq_msgsize;
 	long		mq_curmsgs;
 	long		mq_totalbytes;
 	struct msgq	mq_msgq;
 	int		mq_receivers;
 	int		mq_senders;
 	struct selinfo	mq_rsel;
 	struct selinfo	mq_wsel;
 	struct mqueue_notifier	*mq_notifier;
 };
 
 #define	MQ_RSEL		0x01
 #define	MQ_WSEL		0x02
 
 struct mqueue_msg {
 	TAILQ_ENTRY(mqueue_msg)	msg_link;
 	unsigned int	msg_prio;
 	unsigned int	msg_size;
 	/* following real data... */
 };
 
 static SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 	"POSIX real time message queue");
 
 static int	default_maxmsg  = 10;
 static int	default_msgsize = 1024;
 
 static int	maxmsg = 100;
 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW,
     &maxmsg, 0, "Default maximum messages in queue");
 static int	maxmsgsize = 16384;
 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW,
     &maxmsgsize, 0, "Default maximum message size");
 static int	maxmq = 100;
 SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW,
     &maxmq, 0, "maximum message queues");
 static int	curmq = 0;
 SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW,
     &curmq, 0, "current message queue number");
 static int	unloadable = 0;
 static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data");
 
 static eventhandler_tag exit_tag;
 
 /* Only one instance per-system */
 static struct mqfs_info		mqfs_data;
 static uma_zone_t		mqnode_zone;
 static uma_zone_t		mqueue_zone;
 static uma_zone_t		mvdata_zone;
 static uma_zone_t		mqnoti_zone;
 static struct vop_vector	mqfs_vnodeops;
 static struct fileops		mqueueops;
 static unsigned			mqfs_osd_jail_slot;
 
 /*
  * Directory structure construction and manipulation
  */
 #ifdef notyet
 static struct mqfs_node	*mqfs_create_dir(struct mqfs_node *parent,
 	const char *name, int namelen, struct ucred *cred, int mode);
 static struct mqfs_node	*mqfs_create_link(struct mqfs_node *parent,
 	const char *name, int namelen, struct ucred *cred, int mode);
 #endif
 
 static struct mqfs_node	*mqfs_create_file(struct mqfs_node *parent,
 	const char *name, int namelen, struct ucred *cred, int mode);
 static int	mqfs_destroy(struct mqfs_node *mn);
 static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
 static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
 static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
 static int	mqfs_prison_remove(void *obj, void *data);
 
 /*
  * Message queue construction and maniplation
  */
 static struct mqueue	*mqueue_alloc(const struct mq_attr *attr);
 static void	mqueue_free(struct mqueue *mq);
 static int	mqueue_send(struct mqueue *mq, const char *msg_ptr,
 			size_t msg_len, unsigned msg_prio, int waitok,
 			const struct timespec *abs_timeout);
 static int	mqueue_receive(struct mqueue *mq, char *msg_ptr,
 			size_t msg_len, unsigned *msg_prio, int waitok,
 			const struct timespec *abs_timeout);
 static int	_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg,
 			int timo);
 static int	_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
 			int timo);
 static void	mqueue_send_notification(struct mqueue *mq);
 static void	mqueue_fdclose(struct thread *td, int fd, struct file *fp);
 static void	mq_proc_exit(void *arg, struct proc *p);
 
 /*
  * kqueue filters
  */
 static void	filt_mqdetach(struct knote *kn);
 static int	filt_mqread(struct knote *kn, long hint);
 static int	filt_mqwrite(struct knote *kn, long hint);
 
 struct filterops mq_rfiltops = {
 	.f_isfd = 1,
 	.f_detach = filt_mqdetach,
 	.f_event = filt_mqread,
 };
 struct filterops mq_wfiltops = {
 	.f_isfd = 1,
 	.f_detach = filt_mqdetach,
 	.f_event = filt_mqwrite,
 };
 
 /*
  * Initialize fileno bitmap
  */
 static void
 mqfs_fileno_init(struct mqfs_info *mi)
 {
 	struct unrhdr *up;
 
 	up = new_unrhdr(1, INT_MAX, NULL);
 	mi->mi_unrhdr = up;
 }
 
 /*
  * Tear down fileno bitmap
  */
 static void
 mqfs_fileno_uninit(struct mqfs_info *mi)
 {
 	struct unrhdr *up;
 
 	up = mi->mi_unrhdr;
 	mi->mi_unrhdr = NULL;
 	delete_unrhdr(up);
 }
 
 /*
  * Allocate a file number
  */
 static void
 mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn)
 {
 	/* make sure our parent has a file number */
 	if (mn->mn_parent && !mn->mn_parent->mn_fileno)
 		mqfs_fileno_alloc(mi, mn->mn_parent);
 
 	switch (mn->mn_type) {
 	case mqfstype_root:
 	case mqfstype_dir:
 	case mqfstype_file:
 	case mqfstype_symlink:
 		mn->mn_fileno = alloc_unr(mi->mi_unrhdr);
 		break;
 	case mqfstype_this:
 		KASSERT(mn->mn_parent != NULL,
 		    ("mqfstype_this node has no parent"));
 		mn->mn_fileno = mn->mn_parent->mn_fileno;
 		break;
 	case mqfstype_parent:
 		KASSERT(mn->mn_parent != NULL,
 		    ("mqfstype_parent node has no parent"));
 		if (mn->mn_parent == mi->mi_root) {
 			mn->mn_fileno = mn->mn_parent->mn_fileno;
 			break;
 		}
 		KASSERT(mn->mn_parent->mn_parent != NULL,
 		    ("mqfstype_parent node has no grandparent"));
 		mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno;
 		break;
 	default:
 		KASSERT(0,
 		    ("mqfs_fileno_alloc() called for unknown type node: %d",
 			mn->mn_type));
 		break;
 	}
 }
 
 /*
  * Release a file number
  */
 static void
 mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn)
 {
 	switch (mn->mn_type) {
 	case mqfstype_root:
 	case mqfstype_dir:
 	case mqfstype_file:
 	case mqfstype_symlink:
 		free_unr(mi->mi_unrhdr, mn->mn_fileno);
 		break;
 	case mqfstype_this:
 	case mqfstype_parent:
 		/* ignore these, as they don't "own" their file number */
 		break;
 	default:
 		KASSERT(0,
 		    ("mqfs_fileno_free() called for unknown type node: %d", 
 			mn->mn_type));
 		break;
 	}
 }
 
 static __inline struct mqfs_node *
 mqnode_alloc(void)
 {
 	return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO);
 }
 
 static __inline void
 mqnode_free(struct mqfs_node *node)
 {
 	uma_zfree(mqnode_zone, node);
 }
 
 static __inline void
 mqnode_addref(struct mqfs_node *node)
 {
 	atomic_add_int(&node->mn_refcount, 1);
 }
 
 static __inline void
 mqnode_release(struct mqfs_node *node)
 {
 	struct mqfs_info *mqfs;
 	int old, exp;
 
 	mqfs = node->mn_info;
 	old = atomic_fetchadd_int(&node->mn_refcount, -1);
 	if (node->mn_type == mqfstype_dir ||
 	    node->mn_type == mqfstype_root)
 		exp = 3; /* include . and .. */
 	else
 		exp = 1;
 	if (old == exp) {
 		int locked = sx_xlocked(&mqfs->mi_lock);
 		if (!locked)
 			sx_xlock(&mqfs->mi_lock);
 		mqfs_destroy(node);
 		if (!locked)
 			sx_xunlock(&mqfs->mi_lock);
 	}
 }
 
 /*
  * Add a node to a directory
  */
 static int
 mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node)
 {
 	KASSERT(parent != NULL, ("%s(): parent is NULL", __func__));
 	KASSERT(parent->mn_info != NULL,
 	    ("%s(): parent has no mn_info", __func__));
 	KASSERT(parent->mn_type == mqfstype_dir ||
 	    parent->mn_type == mqfstype_root,
 	    ("%s(): parent is not a directory", __func__));
 
 	node->mn_info = parent->mn_info;
 	node->mn_parent = parent;
 	LIST_INIT(&node->mn_children);
 	LIST_INIT(&node->mn_vnodes);
 	LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling);
 	mqnode_addref(parent);
 	return (0);
 }
 
 static struct mqfs_node *
 mqfs_create_node(const char *name, int namelen, struct ucred *cred, int mode,
 	int nodetype)
 {
 	struct mqfs_node *node;
 
 	node = mqnode_alloc();
 	strncpy(node->mn_name, name, namelen);
 	node->mn_pr_root = cred->cr_prison->pr_root;
 	node->mn_type = nodetype;
 	node->mn_refcount = 1;
 	vfs_timestamp(&node->mn_birth);
 	node->mn_ctime = node->mn_atime = node->mn_mtime
 		= node->mn_birth;
 	node->mn_uid = cred->cr_uid;
 	node->mn_gid = cred->cr_gid;
 	node->mn_mode = mode;
 	return (node);
 }
 
 /*
  * Create a file
  */
 static struct mqfs_node *
 mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen,
 	struct ucred *cred, int mode)
 {
 	struct mqfs_node *node;
 
 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_file);
 	if (mqfs_add_node(parent, node) != 0) {
 		mqnode_free(node);
 		return (NULL);
 	}
 	return (node);
 }
 
 /*
  * Add . and .. to a directory
  */
 static int
 mqfs_fixup_dir(struct mqfs_node *parent)
 {
 	struct mqfs_node *dir;
 
 	dir = mqnode_alloc();
 	dir->mn_name[0] = '.';
 	dir->mn_type = mqfstype_this;
 	dir->mn_refcount = 1;
 	if (mqfs_add_node(parent, dir) != 0) {
 		mqnode_free(dir);
 		return (-1);
 	}
 
 	dir = mqnode_alloc();
 	dir->mn_name[0] = dir->mn_name[1] = '.';
 	dir->mn_type = mqfstype_parent;
 	dir->mn_refcount = 1;
 
 	if (mqfs_add_node(parent, dir) != 0) {
 		mqnode_free(dir);
 		return (-1);
 	}
 
 	return (0);
 }
 
 #ifdef notyet
 
 /*
  * Create a directory
  */
 static struct mqfs_node *
 mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen,
 	struct ucred *cred, int mode)
 {
 	struct mqfs_node *node;
 
 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_dir);
 	if (mqfs_add_node(parent, node) != 0) {
 		mqnode_free(node);
 		return (NULL);
 	}
 
 	if (mqfs_fixup_dir(node) != 0) {
 		mqfs_destroy(node);
 		return (NULL);
 	}
 	return (node);
 }
 
 /*
  * Create a symlink
  */
 static struct mqfs_node *
 mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen,
 	struct ucred *cred, int mode)
 {
 	struct mqfs_node *node;
 
 	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_symlink);
 	if (mqfs_add_node(parent, node) != 0) {
 		mqnode_free(node);
 		return (NULL);
 	}
 	return (node);
 }
 
 #endif
 
 /*
  * Destroy a node or a tree of nodes
  */
 static int
 mqfs_destroy(struct mqfs_node *node)
 {
 	struct mqfs_node *parent;
 
 	KASSERT(node != NULL,
 	    ("%s(): node is NULL", __func__));
 	KASSERT(node->mn_info != NULL,
 	    ("%s(): node has no mn_info", __func__));
 
 	/* destroy children */
 	if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root)
 		while (! LIST_EMPTY(&node->mn_children))
 			mqfs_destroy(LIST_FIRST(&node->mn_children));
 
 	/* unlink from parent */
 	if ((parent = node->mn_parent) != NULL) {
 		KASSERT(parent->mn_info == node->mn_info,
 		    ("%s(): parent has different mn_info", __func__));
 		LIST_REMOVE(node, mn_sibling);
 	}
 
 	if (node->mn_fileno != 0)
 		mqfs_fileno_free(node->mn_info, node);
 	if (node->mn_data != NULL)
 		mqueue_free(node->mn_data);
 	mqnode_free(node);
 	return (0);
 }
 
 /*
  * Mount a mqfs instance
  */
 static int
 mqfs_mount(struct mount *mp)
 {
 	struct statfs *sbp;
 
 	if (mp->mnt_flag & MNT_UPDATE)
 		return (EOPNOTSUPP);
 
 	mp->mnt_data = &mqfs_data;
 	MNT_ILOCK(mp);
 	mp->mnt_flag |= MNT_LOCAL;
 	MNT_IUNLOCK(mp);
 	vfs_getnewfsid(mp);
 
 	sbp = &mp->mnt_stat;
 	vfs_mountedfrom(mp, "mqueue");
 	sbp->f_bsize = PAGE_SIZE;
 	sbp->f_iosize = PAGE_SIZE;
 	sbp->f_blocks = 1;
 	sbp->f_bfree = 0;
 	sbp->f_bavail = 0;
 	sbp->f_files = 1;
 	sbp->f_ffree = 0;
 	return (0);
 }
 
 /*
  * Unmount a mqfs instance
  */
 static int
 mqfs_unmount(struct mount *mp, int mntflags)
 {
 	int error;
 
 	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0,
 	    curthread);
 	return (error);
 }
 
 /*
  * Return a root vnode
  */
 static int
 mqfs_root(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct mqfs_info *mqfs;
 	int ret;
 
 	mqfs = VFSTOMQFS(mp);
 	ret = mqfs_allocv(mp, vpp, mqfs->mi_root);
 	return (ret);
 }
 
 /*
  * Return filesystem stats
  */
 static int
 mqfs_statfs(struct mount *mp, struct statfs *sbp)
 {
 	/* XXX update statistics */
 	return (0);
 }
 
 /*
  * Initialize a mqfs instance
  */
 static int
 mqfs_init(struct vfsconf *vfc)
 {
 	struct mqfs_node *root;
 	struct mqfs_info *mi;
 	osd_method_t methods[PR_MAXMETHOD] = {
 	    [PR_METHOD_REMOVE] = mqfs_prison_remove,
 	};
 
 	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	mvdata_zone = uma_zcreate("mvdata",
 		sizeof(struct mqfs_vdata), NULL, NULL, NULL,
 		NULL, UMA_ALIGN_PTR, 0);
 	mqnoti_zone = uma_zcreate("mqnotifier", sizeof(struct mqueue_notifier),
 		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	mi = &mqfs_data;
 	sx_init(&mi->mi_lock, "mqfs lock");
 	/* set up the root diretory */
 	root = mqfs_create_node("/", 1, curthread->td_ucred, 01777,
 		mqfstype_root);
 	root->mn_info = mi;
 	LIST_INIT(&root->mn_children);
 	LIST_INIT(&root->mn_vnodes);
 	mi->mi_root = root;
 	mqfs_fileno_init(mi);
 	mqfs_fileno_alloc(mi, root);
 	mqfs_fixup_dir(root);
 	exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL,
 	    EVENTHANDLER_PRI_ANY);
 	mq_fdclose = mqueue_fdclose;
 	p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
 	mqfs_osd_jail_slot = osd_jail_register(NULL, methods);
 	return (0);
 }
 
 /*
  * Destroy a mqfs instance
  */
 static int
 mqfs_uninit(struct vfsconf *vfc)
 {
 	struct mqfs_info *mi;
 
 	if (!unloadable)
 		return (EOPNOTSUPP);
 	osd_jail_deregister(mqfs_osd_jail_slot);
 	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
 	mi = &mqfs_data;
 	mqfs_destroy(mi->mi_root);
 	mi->mi_root = NULL;
 	mqfs_fileno_uninit(mi);
 	sx_destroy(&mi->mi_lock);
 	uma_zdestroy(mqnode_zone);
 	uma_zdestroy(mqueue_zone);
 	uma_zdestroy(mvdata_zone);
 	uma_zdestroy(mqnoti_zone);
 	return (0);
 }
 
 /*
  * task routine
  */
 static void
 do_recycle(void *context, int pending __unused)
 {
 	struct vnode *vp = (struct vnode *)context;
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vrecycle(vp);
 	VOP_UNLOCK(vp);
 	vdrop(vp);
 }
 
 /*
  * Allocate a vnode
  */
 static int
 mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn)
 {
 	struct mqfs_vdata *vd;
 	struct mqfs_info  *mqfs;
 	struct vnode *newvpp;
 	int error;
 
 	mqfs = pn->mn_info;
 	*vpp = NULL;
 	sx_xlock(&mqfs->mi_lock);
 	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
 		if (vd->mv_vnode->v_mount == mp) {
 			vhold(vd->mv_vnode);
 			break;
 		}
 	}
 
 	if (vd != NULL) {
 found:
 		*vpp = vd->mv_vnode;
 		sx_xunlock(&mqfs->mi_lock);
 		error = vget(*vpp, LK_RETRY | LK_EXCLUSIVE);
 		vdrop(*vpp);
 		return (error);
 	}
 	sx_xunlock(&mqfs->mi_lock);
 
 	error = getnewvnode("mqueue", mp, &mqfs_vnodeops, &newvpp);
 	if (error)
 		return (error);
 	vn_lock(newvpp, LK_EXCLUSIVE | LK_RETRY);
 	error = insmntque(newvpp, mp);
 	if (error != 0)
 		return (error);
 
 	sx_xlock(&mqfs->mi_lock);
 	/*
 	 * Check if it has already been allocated
 	 * while we were blocked.
 	 */
 	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
 		if (vd->mv_vnode->v_mount == mp) {
 			vhold(vd->mv_vnode);
 			sx_xunlock(&mqfs->mi_lock);
 
 			vgone(newvpp);
 			vput(newvpp);
 			goto found;
 		}
 	}
 
 	*vpp = newvpp;
 
 	vd = uma_zalloc(mvdata_zone, M_WAITOK);
 	(*vpp)->v_data = vd;
 	vd->mv_vnode = *vpp;
 	vd->mv_node = pn;
 	TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp);
 	LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link);
 	mqnode_addref(pn);
 	switch (pn->mn_type) {
 	case mqfstype_root:
 		(*vpp)->v_vflag = VV_ROOT;
 		/* fall through */
 	case mqfstype_dir:
 	case mqfstype_this:
 	case mqfstype_parent:
 		(*vpp)->v_type = VDIR;
 		break;
 	case mqfstype_file:
 		(*vpp)->v_type = VREG;
 		break;
 	case mqfstype_symlink:
 		(*vpp)->v_type = VLNK;
 		break;
 	case mqfstype_none:
 		KASSERT(0, ("mqfs_allocf called for null node\n"));
 	default:
 		panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type);
 	}
 	sx_xunlock(&mqfs->mi_lock);
 	return (0);
 }
 
 /* 
  * Search a directory entry
  */
 static struct mqfs_node *
 mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred)
 {
 	struct mqfs_node *pn;
 	const void *pr_root;
 
 	sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
 	pr_root = cred->cr_prison->pr_root;
 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
 		/* Only match names within the same prison root directory */
 		if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) &&
 		    strncmp(pn->mn_name, name, len) == 0 &&
 		    pn->mn_name[len] == '\0')
 			return (pn);
 	}
 	return (NULL);
 }
 
 /*
  * Look up a file or directory.
  */
 static int
 mqfs_lookupx(struct vop_cachedlookup_args *ap)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct mqfs_node *pd;
 	struct mqfs_node *pn;
 	struct mqfs_info *mqfs;
 	int nameiop, flags, error, namelen;
 	char *pname;
 	struct thread *td;
 
 	td = curthread;
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dvp = ap->a_dvp;
 	pname = cnp->cn_nameptr;
 	namelen = cnp->cn_namelen;
 	flags = cnp->cn_flags;
 	nameiop = cnp->cn_nameiop;
 	pd = VTON(dvp);
 	pn = NULL;
 	mqfs = pd->mn_info;
 	*vpp = NULLVP;
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
 	if (error)
 		return (error);
 
 	/* shortcut: check if the name is too long */
 	if (cnp->cn_namelen >= MQFS_NAMELEN)
 		return (ENOENT);
 
 	/* self */
 	if (namelen == 1 && pname[0] == '.') {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		pn = pd;
 		*vpp = dvp;
 		VREF(dvp);
 		return (0);
 	}
 
 	/* parent */
 	if (cnp->cn_flags & ISDOTDOT) {
 		if (dvp->v_vflag & VV_ROOT)
 			return (EIO);
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		VOP_UNLOCK(dvp);
 		KASSERT(pd->mn_parent, ("non-root directory has no parent"));
 		pn = pd->mn_parent;
 		error = mqfs_allocv(dvp->v_mount, vpp, pn);
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		return (error);
 	}
 
 	/* named node */
 	sx_xlock(&mqfs->mi_lock);
 	pn = mqfs_search(pd, pname, namelen, cnp->cn_cred);
 	if (pn != NULL)
 		mqnode_addref(pn);
 	sx_xunlock(&mqfs->mi_lock);
 
 	/* found */
 	if (pn != NULL) {
 		/* DELETE */
 		if (nameiop == DELETE && (flags & ISLASTCN)) {
 			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 			if (error) {
 				mqnode_release(pn);
 				return (error);
 			}
 			if (*vpp == dvp) {
 				VREF(dvp);
 				*vpp = dvp;
 				mqnode_release(pn);
 				return (0);
 			}
 		}
 
 		/* allocate vnode */
 		error = mqfs_allocv(dvp->v_mount, vpp, pn);
 		mqnode_release(pn);
 		if (error == 0 && cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, *vpp, cnp);
 		return (error);
 	}
 
 	/* not found */
 
 	/* will create a new entry in the directory ? */
 	if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT)
 	    && (flags & ISLASTCN)) {
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			return (error);
-		cnp->cn_flags |= SAVENAME;
 		return (EJUSTRETURN);
 	}
 	return (ENOENT);
 }
 
 #if 0
 struct vop_lookup_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 };
 #endif
 
 /*
  * vnode lookup operation
  */
 static int
 mqfs_lookup(struct vop_cachedlookup_args *ap)
 {
 	int rc;
 
 	rc = mqfs_lookupx(ap);
 	return (rc);
 }
 
 #if 0
 struct vop_create_args {
 	struct vnode *a_dvp;
 	struct vnode **a_vpp;
 	struct componentname *a_cnp;
 	struct vattr *a_vap;
 };
 #endif
 
 /*
  * vnode creation operation
  */
 static int
 mqfs_create(struct vop_create_args *ap)
 {
 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
 	struct componentname *cnp = ap->a_cnp;
 	struct mqfs_node *pd;
 	struct mqfs_node *pn;
 	struct mqueue *mq;
 	int error;
 
 	pd = VTON(ap->a_dvp);
 	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
 		return (ENOTDIR);
 	mq = mqueue_alloc(NULL);
 	if (mq == NULL)
 		return (EAGAIN);
 	sx_xlock(&mqfs->mi_lock);
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("%s: no name", __func__);
 	pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen,
 		cnp->cn_cred, ap->a_vap->va_mode);
 	if (pn == NULL) {
 		sx_xunlock(&mqfs->mi_lock);
 		error = ENOSPC;
 	} else {
 		mqnode_addref(pn);
 		sx_xunlock(&mqfs->mi_lock);
 		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
 		mqnode_release(pn);
 		if (error)
 			mqfs_destroy(pn);
 		else
 			pn->mn_data = mq;
 	}
 	if (error)
 		mqueue_free(mq);
 	return (error);
 }
 
 /*
  * Remove an entry
  */
 static
 int do_unlink(struct mqfs_node *pn, struct ucred *ucred)
 {
 	struct mqfs_node *parent;
 	struct mqfs_vdata *vd;
 	int error = 0;
 
 	sx_assert(&pn->mn_info->mi_lock, SX_LOCKED);
 
 	if (ucred->cr_uid != pn->mn_uid &&
 	    (error = priv_check_cred(ucred, PRIV_MQ_ADMIN)) != 0)
 		error = EACCES;
 	else if (!pn->mn_deleted) {
 		parent = pn->mn_parent;
 		pn->mn_parent = NULL;
 		pn->mn_deleted = 1;
 		LIST_REMOVE(pn, mn_sibling);
 		LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
 			cache_purge(vd->mv_vnode);
 			vhold(vd->mv_vnode);
 			taskqueue_enqueue(taskqueue_thread, &vd->mv_task);
 		}
 		mqnode_release(pn);
 		mqnode_release(parent);
 	} else
 		error = ENOENT;
 	return (error);
 }
 
 #if 0
 struct vop_remove_args {
 	struct vnode *a_dvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
 };
 #endif
 
 /*
  * vnode removal operation
  */
 static int
 mqfs_remove(struct vop_remove_args *ap)
 {
 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
 	struct mqfs_node *pn;
 	int error;
 
 	if (ap->a_vp->v_type == VDIR)
                 return (EPERM);
 	pn = VTON(ap->a_vp);
 	sx_xlock(&mqfs->mi_lock);
 	error = do_unlink(pn, ap->a_cnp->cn_cred);
 	sx_xunlock(&mqfs->mi_lock);
 	return (error);
 }
 
 #if 0
 struct vop_inactive_args {
 	struct vnode *a_vp;
 	struct thread *a_td;
 };
 #endif
 
 static int
 mqfs_inactive(struct vop_inactive_args *ap)
 {
 	struct mqfs_node *pn = VTON(ap->a_vp);
 
 	if (pn->mn_deleted)
 		vrecycle(ap->a_vp);
 	return (0);
 }
 
 #if 0
 struct vop_reclaim_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 };
 #endif
 
 static int
 mqfs_reclaim(struct vop_reclaim_args *ap)
 {
 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount);
 	struct vnode *vp = ap->a_vp;
 	struct mqfs_node *pn;
 	struct mqfs_vdata *vd;
 
 	vd = vp->v_data;
 	pn = vd->mv_node;
 	sx_xlock(&mqfs->mi_lock);
 	vp->v_data = NULL;
 	LIST_REMOVE(vd, mv_link);
 	uma_zfree(mvdata_zone, vd);
 	mqnode_release(pn);
 	sx_xunlock(&mqfs->mi_lock);
 	return (0);
 }
 
 #if 0
 struct vop_open_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_mode;
 	struct ucred *a_cred;
 	struct thread *a_td;
 	struct file *a_fp;
 };
 #endif
 
 static int
 mqfs_open(struct vop_open_args *ap)
 {
 	return (0);
 }
 
 #if 0
 struct vop_close_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	int a_fflag;
 	struct ucred *a_cred;
 	struct thread *a_td;
 };
 #endif
 
 static int
 mqfs_close(struct vop_close_args *ap)
 {
 	return (0);
 }
 
 #if 0
 struct vop_access_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	accmode_t a_accmode;
 	struct ucred *a_cred;
 	struct thread *a_td;
 };
 #endif
 
 /*
  * Verify permissions
  */
 static int
 mqfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr vattr;
 	int error;
 
 	error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 	if (error)
 		return (error);
 	error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid, vattr.va_gid,
 	    ap->a_accmode, ap->a_cred);
 	return (error);
 }
 
 #if 0
 struct vop_getattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 };
 #endif
 
 /*
  * Get file attributes
  */
 static int
 mqfs_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct mqfs_node *pn = VTON(vp);
 	struct vattr *vap = ap->a_vap;
 	int error = 0;
 
 	vap->va_type = vp->v_type;
 	vap->va_mode = pn->mn_mode;
 	vap->va_nlink = 1;
 	vap->va_uid = pn->mn_uid;
 	vap->va_gid = pn->mn_gid;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	vap->va_fileid = pn->mn_fileno;
 	vap->va_size = 0;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_bytes = vap->va_size = 0;
 	vap->va_atime = pn->mn_atime;
 	vap->va_mtime = pn->mn_mtime;
 	vap->va_ctime = pn->mn_ctime;
 	vap->va_birthtime = pn->mn_birth;
 	vap->va_gen = 0;
 	vap->va_flags = 0;
 	vap->va_rdev = NODEV;
 	vap->va_bytes = 0;
 	vap->va_filerev = 0;
 	return (error);
 }
 
 #if 0
 struct vop_setattr_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	struct vattr *a_vap;
 	struct ucred *a_cred;
 };
 #endif
 /*
  * Set attributes
  */
 static int
 mqfs_setattr(struct vop_setattr_args *ap)
 {
 	struct mqfs_node *pn;
 	struct vattr *vap;
 	struct vnode *vp;
 	struct thread *td;
 	int c, error;
 	uid_t uid;
 	gid_t gid;
 
 	td = curthread;
 	vap = ap->a_vap;
 	vp = ap->a_vp;
 	if ((vap->va_type != VNON) ||
 	    (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) ||
 	    (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) ||
 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
 	    (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) ||
 	    (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 
 	pn = VTON(vp);
 
 	error = c = 0;
 	if (vap->va_uid == (uid_t)VNOVAL)
 		uid = pn->mn_uid;
 	else
 		uid = vap->va_uid;
 	if (vap->va_gid == (gid_t)VNOVAL)
 		gid = pn->mn_gid;
 	else
 		gid = vap->va_gid;
 
 	if (uid != pn->mn_uid || gid != pn->mn_gid) {
 		/*
 		 * To modify the ownership of a file, must possess VADMIN
 		 * for that file.
 		 */
 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)))
 			return (error);
 
 		/*
 		 * XXXRW: Why is there a privilege check here: shouldn't the
 		 * check in VOP_ACCESS() be enough?  Also, are the group bits
 		 * below definitely right?
 		 */
 		if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid ||
 		    (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) &&
 		    (error = priv_check(td, PRIV_MQ_ADMIN)) != 0)
 			return (error);
 		pn->mn_uid = uid;
 		pn->mn_gid = gid;
 		c = 1;
 	}
 
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if ((ap->a_cred->cr_uid != pn->mn_uid) &&
 		    (error = priv_check(td, PRIV_MQ_ADMIN)))
 			return (error);
 		pn->mn_mode = vap->va_mode;
 		c = 1;
 	}
 
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		/* See the comment in ufs_vnops::ufs_setattr(). */
 		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
 		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td))))
 			return (error);
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			pn->mn_atime = vap->va_atime;
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			pn->mn_mtime = vap->va_mtime;
 		}
 		c = 1;
 	}
 	if (c) {
 		vfs_timestamp(&pn->mn_ctime);
 	}
 	return (0);
 }
 
 #if 0
 struct vop_read_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	int a_ioflag;
 	struct ucred *a_cred;
 };
 #endif
 
 /*
  * Read from a file
  */
 static int
 mqfs_read(struct vop_read_args *ap)
 {
 	char buf[80];
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct mqueue *mq;
 	int len, error;
 
 	if (vp->v_type != VREG)
 		return (EINVAL);
 
 	mq = VTOMQ(vp);
 	snprintf(buf, sizeof(buf),
 		"QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n",
 		mq->mq_totalbytes,
 		mq->mq_maxmsg,
 		mq->mq_curmsgs,
 		mq->mq_msgsize);
 	buf[sizeof(buf)-1] = '\0';
 	len = strlen(buf);
 	error = uiomove_frombuf(buf, len, uio);
 	return (error);
 }
 
 #if 0
 struct vop_readdir_args {
 	struct vop_generic_args a_gen;
 	struct vnode *a_vp;
 	struct uio *a_uio;
 	struct ucred *a_cred;
 	int *a_eofflag;
 	int *a_ncookies;
 	uint64_t **a_cookies;
 };
 #endif
 
 /*
  * Return directory entries.
  */
 static int
 mqfs_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp;
 	struct mqfs_info *mi;
 	struct mqfs_node *pd;
 	struct mqfs_node *pn;
 	struct dirent entry;
 	struct uio *uio;
 	const void *pr_root;
 	int *tmp_ncookies = NULL;
 	off_t offset;
 	int error, i;
 
 	vp = ap->a_vp;
 	mi = VFSTOMQFS(vp->v_mount);
 	pd = VTON(vp);
 	uio = ap->a_uio;
 
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	if (ap->a_ncookies != NULL) {
 		tmp_ncookies = ap->a_ncookies;
 		*ap->a_ncookies = 0;
 		ap->a_ncookies = NULL;
         }
 
 	error = 0;
 	offset = 0;
 
 	pr_root = ap->a_cred->cr_prison->pr_root;
 	sx_xlock(&mi->mi_lock);
 
 	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
 		entry.d_reclen = sizeof(entry);
 
 		/*
 		 * Only show names within the same prison root directory
 		 * (or not associated with a prison, e.g. "." and "..").
 		 */
 		if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root)
 			continue;
 		if (!pn->mn_fileno)
 			mqfs_fileno_alloc(mi, pn);
 		entry.d_fileno = pn->mn_fileno;
 		entry.d_off = offset + entry.d_reclen;
 		for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i)
 			entry.d_name[i] = pn->mn_name[i];
 		entry.d_namlen = i;
 		switch (pn->mn_type) {
 		case mqfstype_root:
 		case mqfstype_dir:
 		case mqfstype_this:
 		case mqfstype_parent:
 			entry.d_type = DT_DIR;
 			break;
 		case mqfstype_file:
 			entry.d_type = DT_REG;
 			break;
 		case mqfstype_symlink:
 			entry.d_type = DT_LNK;
 			break;
 		default:
 			panic("%s has unexpected node type: %d", pn->mn_name,
 				pn->mn_type);
 		}
 		dirent_terminate(&entry);
 		if (entry.d_reclen > uio->uio_resid)
                         break;
 		if (offset >= uio->uio_offset) {
 			error = vfs_read_dirent(ap, &entry, offset);
                         if (error)
                                 break;
                 }
                 offset += entry.d_reclen;
 	}
 	sx_xunlock(&mi->mi_lock);
 
 	uio->uio_offset = offset;
 
 	if (tmp_ncookies != NULL)
 		ap->a_ncookies = tmp_ncookies;
 
 	return (error);
 }
 
 #ifdef notyet
 
 #if 0
 struct vop_mkdir_args {
 	struct vnode *a_dvp;
 	struvt vnode **a_vpp;
 	struvt componentname *a_cnp;
 	struct vattr *a_vap;
 };
 #endif
 
 /*
  * Create a directory.
  */
 static int
 mqfs_mkdir(struct vop_mkdir_args *ap)
 {
 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
 	struct componentname *cnp = ap->a_cnp;
 	struct mqfs_node *pd = VTON(ap->a_dvp);
 	struct mqfs_node *pn;
 	int error;
 
 	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
 		return (ENOTDIR);
 	sx_xlock(&mqfs->mi_lock);
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("%s: no name", __func__);
 	pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen,
 		ap->a_vap->cn_cred, ap->a_vap->va_mode);
 	if (pn != NULL)
 		mqnode_addref(pn);
 	sx_xunlock(&mqfs->mi_lock);
 	if (pn == NULL) {
 		error = ENOSPC;
 	} else {
 		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
 		mqnode_release(pn);
 	}
 	return (error);
 }
 
 #if 0
 struct vop_rmdir_args {
 	struct vnode *a_dvp;
 	struct vnode *a_vp;
 	struct componentname *a_cnp;
 };
 #endif
 
 /*
  * Remove a directory.
  */
 static int
 mqfs_rmdir(struct vop_rmdir_args *ap)
 {
 	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
 	struct mqfs_node *pn = VTON(ap->a_vp);
 	struct mqfs_node *pt;
 
 	if (pn->mn_type != mqfstype_dir)
 		return (ENOTDIR);
 
 	sx_xlock(&mqfs->mi_lock);
 	if (pn->mn_deleted) {
 		sx_xunlock(&mqfs->mi_lock);
 		return (ENOENT);
 	}
 
 	pt = LIST_FIRST(&pn->mn_children);
 	pt = LIST_NEXT(pt, mn_sibling);
 	pt = LIST_NEXT(pt, mn_sibling);
 	if (pt != NULL) {
 		sx_xunlock(&mqfs->mi_lock);
 		return (ENOTEMPTY);
 	}
 	pt = pn->mn_parent;
 	pn->mn_parent = NULL;
 	pn->mn_deleted = 1;
 	LIST_REMOVE(pn, mn_sibling);
 	mqnode_release(pn);
 	mqnode_release(pt);
 	sx_xunlock(&mqfs->mi_lock);
 	cache_purge(ap->a_vp);
 	return (0);
 }
 
 #endif /* notyet */
 
 /*
  * See if this prison root is obsolete, and clean up associated queues if it is.
  */
 static int
 mqfs_prison_remove(void *obj, void *data __unused)
 {
 	const struct prison *pr = obj;
 	struct prison *tpr;
 	struct mqfs_node *pn, *tpn;
 	struct vnode *pr_root;
 
 	pr_root = pr->pr_root;
 	if (pr->pr_parent->pr_root == pr_root)
 		return (0);
 	TAILQ_FOREACH(tpr, &allprison, pr_list) {
 		if (tpr != pr && tpr->pr_root == pr_root)
 			return (0);
 	}
 	/*
 	 * No jails are rooted in this directory anymore,
 	 * so no queues should be either.
 	 */
 	sx_xlock(&mqfs_data.mi_lock);
 	LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
 	    mn_sibling, tpn) {
 		if (pn->mn_pr_root == pr_root)
 			(void)do_unlink(pn, curthread->td_ucred);
 	}
 	sx_xunlock(&mqfs_data.mi_lock);
 	return (0);
 }
 
 /*
  * Allocate a message queue
  */
 static struct mqueue *
 mqueue_alloc(const struct mq_attr *attr)
 {
 	struct mqueue *mq;
 
 	if (curmq >= maxmq)
 		return (NULL);
 	mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO);
 	TAILQ_INIT(&mq->mq_msgq);
 	if (attr != NULL) {
 		mq->mq_maxmsg = attr->mq_maxmsg;
 		mq->mq_msgsize = attr->mq_msgsize;
 	} else {
 		mq->mq_maxmsg = default_maxmsg;
 		mq->mq_msgsize = default_msgsize;
 	}
 	mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF);
 	knlist_init_mtx(&mq->mq_rsel.si_note, &mq->mq_mutex);
 	knlist_init_mtx(&mq->mq_wsel.si_note, &mq->mq_mutex);
 	atomic_add_int(&curmq, 1);
 	return (mq);
 }
 
 /*
  * Destroy a message queue
  */
 static void
 mqueue_free(struct mqueue *mq)
 {
 	struct mqueue_msg *msg;
 
 	while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) {
 		TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link);
 		free(msg, M_MQUEUEDATA);
 	}
 
 	mtx_destroy(&mq->mq_mutex);
 	seldrain(&mq->mq_rsel);
 	seldrain(&mq->mq_wsel);
 	knlist_destroy(&mq->mq_rsel.si_note);
 	knlist_destroy(&mq->mq_wsel.si_note);
 	uma_zfree(mqueue_zone, mq);
 	atomic_add_int(&curmq, -1);
 }
 
 /*
  * Load a message from user space
  */
 static struct mqueue_msg *
 mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio)
 {
 	struct mqueue_msg *msg;
 	size_t len;
 	int error;
 
 	len = sizeof(struct mqueue_msg) + msg_size;
 	msg = malloc(len, M_MQUEUEDATA, M_WAITOK);
 	error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg),
 	    msg_size);
 	if (error) {
 		free(msg, M_MQUEUEDATA);
 		msg = NULL;
 	} else {
 		msg->msg_size = msg_size;
 		msg->msg_prio = msg_prio;
 	}
 	return (msg);
 }
 
 /*
  * Save a message to user space
  */
 static int
 mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio)
 {
 	int error;
 
 	error = copyout(((char *)msg) + sizeof(*msg), msg_ptr,
 		msg->msg_size);
 	if (error == 0 && msg_prio != NULL)
 		error = copyout(&msg->msg_prio, msg_prio, sizeof(int));
 	return (error);
 }
 
 /*
  * Free a message's memory
  */
 static __inline void
 mqueue_freemsg(struct mqueue_msg *msg)
 {
 	free(msg, M_MQUEUEDATA);
 }
 
 /*
  * Send a message. if waitok is false, thread will not be
  * blocked if there is no data in queue, otherwise, absolute
  * time will be checked.
  */
 int
 mqueue_send(struct mqueue *mq, const char *msg_ptr,
 	size_t msg_len, unsigned msg_prio, int waitok,
 	const struct timespec *abs_timeout)
 {
 	struct mqueue_msg *msg;
 	struct timespec ts, ts2;
 	struct timeval tv;
 	int error;
 
 	if (msg_prio >= MQ_PRIO_MAX)
 		return (EINVAL);
 	if (msg_len > mq->mq_msgsize)
 		return (EMSGSIZE);
 	msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio);
 	if (msg == NULL)
 		return (EFAULT);
 
 	/* O_NONBLOCK case */
 	if (!waitok) {
 		error = _mqueue_send(mq, msg, -1);
 		if (error)
 			goto bad;
 		return (0);
 	}
 
 	/* we allow a null timeout (wait forever) */
 	if (abs_timeout == NULL) {
 		error = _mqueue_send(mq, msg, 0);
 		if (error)
 			goto bad;
 		return (0);
 	}
 
 	/* send it before checking time */
 	error = _mqueue_send(mq, msg, -1);
 	if (error == 0)
 		return (0);
 
 	if (error != EAGAIN)
 		goto bad;
 
 	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
 		error = EINVAL;
 		goto bad;
 	}
 	for (;;) {
 		getnanotime(&ts);
 		timespecsub(abs_timeout, &ts, &ts2);
 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
 			error = ETIMEDOUT;
 			break;
 		}
 		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
 		error = _mqueue_send(mq, msg, tvtohz(&tv));
 		if (error != ETIMEDOUT)
 			break;
 	}
 	if (error == 0)
 		return (0);
 bad:
 	mqueue_freemsg(msg);
 	return (error);
 }
 
 /*
  * Common routine to send a message
  */
 static int
 _mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo)
 {	
 	struct mqueue_msg *msg2;
 	int error = 0;
 
 	mtx_lock(&mq->mq_mutex);
 	while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) {
 		if (timo < 0) {
 			mtx_unlock(&mq->mq_mutex);
 			return (EAGAIN);
 		}
 		mq->mq_senders++;
 		error = msleep(&mq->mq_senders, &mq->mq_mutex,
 			    PCATCH, "mqsend", timo);
 		mq->mq_senders--;
 		if (error == EAGAIN)
 			error = ETIMEDOUT;
 	}
 	if (mq->mq_curmsgs >= mq->mq_maxmsg) {
 		mtx_unlock(&mq->mq_mutex);
 		return (error);
 	}
 	error = 0;
 	if (TAILQ_EMPTY(&mq->mq_msgq)) {
 		TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link);
 	} else {
 		if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) {
 			TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link);
 		} else {
 			TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) {
 				if (msg2->msg_prio < msg->msg_prio)
 					break;
 			}
 			TAILQ_INSERT_BEFORE(msg2, msg, msg_link);
 		}
 	}
 	mq->mq_curmsgs++;
 	mq->mq_totalbytes += msg->msg_size;
 	if (mq->mq_receivers)
 		wakeup_one(&mq->mq_receivers);
 	else if (mq->mq_notifier != NULL)
 		mqueue_send_notification(mq);
 	if (mq->mq_flags & MQ_RSEL) {
 		mq->mq_flags &= ~MQ_RSEL;
 		selwakeup(&mq->mq_rsel);
 	}
 	KNOTE_LOCKED(&mq->mq_rsel.si_note, 0);
 	mtx_unlock(&mq->mq_mutex);
 	return (0);
 }
 
 /*
  * Send realtime a signal to process which registered itself
  * successfully by mq_notify.
  */
 static void
 mqueue_send_notification(struct mqueue *mq)
 {
 	struct mqueue_notifier *nt;
 	struct thread *td;
 	struct proc *p;
 	int error;
 
 	mtx_assert(&mq->mq_mutex, MA_OWNED);
 	nt = mq->mq_notifier;
 	if (nt->nt_sigev.sigev_notify != SIGEV_NONE) {
 		p = nt->nt_proc;
 		error = sigev_findtd(p, &nt->nt_sigev, &td);
 		if (error) {
 			mq->mq_notifier = NULL;
 			return;
 		}
 		if (!KSI_ONQ(&nt->nt_ksi)) {
 			ksiginfo_set_sigev(&nt->nt_ksi, &nt->nt_sigev);
 			tdsendsignal(p, td, nt->nt_ksi.ksi_signo, &nt->nt_ksi);
 		}
 		PROC_UNLOCK(p);
 	}
 	mq->mq_notifier = NULL;
 }
 
 /*
  * Get a message. if waitok is false, thread will not be
  * blocked if there is no data in queue, otherwise, absolute
  * time will be checked.
  */
 int
 mqueue_receive(struct mqueue *mq, char *msg_ptr,
 	size_t msg_len, unsigned *msg_prio, int waitok,
 	const struct timespec *abs_timeout)
 {
 	struct mqueue_msg *msg;
 	struct timespec ts, ts2;
 	struct timeval tv;
 	int error;
 
 	if (msg_len < mq->mq_msgsize)
 		return (EMSGSIZE);
 
 	/* O_NONBLOCK case */
 	if (!waitok) {
 		error = _mqueue_recv(mq, &msg, -1);
 		if (error)
 			return (error);
 		goto received;
 	}
 
 	/* we allow a null timeout (wait forever). */
 	if (abs_timeout == NULL) {
 		error = _mqueue_recv(mq, &msg, 0);
 		if (error)
 			return (error);
 		goto received;
 	}
 
 	/* try to get a message before checking time */
 	error = _mqueue_recv(mq, &msg, -1);
 	if (error == 0)
 		goto received;
 
 	if (error != EAGAIN)
 		return (error);
 
 	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
 		error = EINVAL;
 		return (error);
 	}
 
 	for (;;) {
 		getnanotime(&ts);
 		timespecsub(abs_timeout, &ts, &ts2);
 		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
 			error = ETIMEDOUT;
 			return (error);
 		}
 		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
 		error = _mqueue_recv(mq, &msg, tvtohz(&tv));
 		if (error == 0)
 			break;
 		if (error != ETIMEDOUT)
 			return (error);
 	}
 
 received:
 	error = mqueue_savemsg(msg, msg_ptr, msg_prio);
 	if (error == 0) {
 		curthread->td_retval[0] = msg->msg_size;
 		curthread->td_retval[1] = 0;
 	}
 	mqueue_freemsg(msg);
 	return (error);
 }
 
 /*
  * Common routine to receive a message
  */
 static int
 _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo)
 {	
 	int error = 0;
 
 	mtx_lock(&mq->mq_mutex);
 	while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) {
 		if (timo < 0) {
 			mtx_unlock(&mq->mq_mutex);
 			return (EAGAIN);
 		}
 		mq->mq_receivers++;
 		error = msleep(&mq->mq_receivers, &mq->mq_mutex,
 			    PCATCH, "mqrecv", timo);
 		mq->mq_receivers--;
 		if (error == EAGAIN)
 			error = ETIMEDOUT;
 	}
 	if (*msg != NULL) {
 		error = 0;
 		TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link);
 		mq->mq_curmsgs--;
 		mq->mq_totalbytes -= (*msg)->msg_size;
 		if (mq->mq_senders)
 			wakeup_one(&mq->mq_senders);
 		if (mq->mq_flags & MQ_WSEL) {
 			mq->mq_flags &= ~MQ_WSEL;
 			selwakeup(&mq->mq_wsel);
 		}
 		KNOTE_LOCKED(&mq->mq_wsel.si_note, 0);
 	}
 	if (mq->mq_notifier != NULL && mq->mq_receivers == 0 &&
 	    !TAILQ_EMPTY(&mq->mq_msgq)) {
 		mqueue_send_notification(mq);
 	}
 	mtx_unlock(&mq->mq_mutex);
 	return (error);
 }
 
 static __inline struct mqueue_notifier *
 notifier_alloc(void)
 {
 	return (uma_zalloc(mqnoti_zone, M_WAITOK | M_ZERO));
 }
 
 static __inline void
 notifier_free(struct mqueue_notifier *p)
 {
 	uma_zfree(mqnoti_zone, p);
 }
 
 static struct mqueue_notifier *
 notifier_search(struct proc *p, int fd)
 {
 	struct mqueue_notifier *nt;
 
 	LIST_FOREACH(nt, &p->p_mqnotifier, nt_link) {
 		if (nt->nt_ksi.ksi_mqd == fd)
 			break;
 	}
 	return (nt);
 }
 
 static __inline void
 notifier_insert(struct proc *p, struct mqueue_notifier *nt)
 {
 	LIST_INSERT_HEAD(&p->p_mqnotifier, nt, nt_link);
 }
 
 static __inline void
 notifier_delete(struct proc *p, struct mqueue_notifier *nt)
 {
 	LIST_REMOVE(nt, nt_link);
 	notifier_free(nt);
 }
 
 static void
 notifier_remove(struct proc *p, struct mqueue *mq, int fd)
 {
 	struct mqueue_notifier *nt;
 
 	mtx_assert(&mq->mq_mutex, MA_OWNED);
 	PROC_LOCK(p);
 	nt = notifier_search(p, fd);
 	if (nt != NULL) {
 		if (mq->mq_notifier == nt)
 			mq->mq_notifier = NULL;
 		sigqueue_take(&nt->nt_ksi);
 		notifier_delete(p, nt);
 	}
 	PROC_UNLOCK(p);
 }
 
 static int
 kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode,
     const struct mq_attr *attr)
 {
 	char path[MQFS_NAMELEN + 1];
 	struct mqfs_node *pn;
 	struct pwddesc *pdp;
 	struct file *fp;
 	struct mqueue *mq;
 	int fd, error, len, cmode;
 
 	AUDIT_ARG_FFLAGS(flags);
 	AUDIT_ARG_MODE(mode);
 
 	pdp = td->td_proc->p_pd;
 	cmode = (((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT);
 	mq = NULL;
 	if ((flags & O_CREAT) != 0 && attr != NULL) {
 		if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > maxmsg)
 			return (EINVAL);
 		if (attr->mq_msgsize <= 0 || attr->mq_msgsize > maxmsgsize)
 			return (EINVAL);
 	}
 
 	error = copyinstr(upath, path, MQFS_NAMELEN + 1, NULL);
         if (error)
 		return (error);
 
 	/*
 	 * The first character of name must be a slash  (/) character
 	 * and the remaining characters of name cannot include any slash
 	 * characters. 
 	 */
 	len = strlen(path);
 	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
 		return (EINVAL);
 	/*
 	 * "." and ".." are magic directories, populated on the fly, and cannot
 	 * be opened as queues.
 	 */
 	if (strcmp(path, "/.") == 0 || strcmp(path, "/..") == 0)
 		return (EINVAL);
 	AUDIT_ARG_UPATH1_CANON(path);
 
 	error = falloc(td, &fp, &fd, O_CLOEXEC);
 	if (error)
 		return (error);
 
 	sx_xlock(&mqfs_data.mi_lock);
 	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
 	if (pn == NULL) {
 		if (!(flags & O_CREAT)) {
 			error = ENOENT;
 		} else {
 			mq = mqueue_alloc(attr);
 			if (mq == NULL) {
 				error = ENFILE;
 			} else {
 				pn = mqfs_create_file(mqfs_data.mi_root,
 				         path + 1, len - 1, td->td_ucred,
 					 cmode);
 				if (pn == NULL) {
 					error = ENOSPC;
 					mqueue_free(mq);
 				}
 			}
 		}
 
 		if (error == 0) {
 			pn->mn_data = mq;
 		}
 	} else {
 		if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
 			error = EEXIST;
 		} else {
 			accmode_t accmode = 0;
 
 			if (flags & FREAD)
 				accmode |= VREAD;
 			if (flags & FWRITE)
 				accmode |= VWRITE;
 			error = vaccess(VREG, pn->mn_mode, pn->mn_uid,
 			    pn->mn_gid, accmode, td->td_ucred);
 		}
 	}
 
 	if (error) {
 		sx_xunlock(&mqfs_data.mi_lock);
 		fdclose(td, fp, fd);
 		fdrop(fp, td);
 		return (error);
 	}
 
 	mqnode_addref(pn);
 	sx_xunlock(&mqfs_data.mi_lock);
 
 	finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
 	    &mqueueops);
 
 	td->td_retval[0] = fd;
 	fdrop(fp, td);
 	return (0);
 }
 
 /*
  * Syscall to open a message queue.
  */
 int
 sys_kmq_open(struct thread *td, struct kmq_open_args *uap)
 {
 	struct mq_attr attr;
 	int flags, error;
 
 	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
 		return (EINVAL);
 	flags = FFLAGS(uap->flags);
 	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
 		error = copyin(uap->attr, &attr, sizeof(attr));
 		if (error)
 			return (error);
 	}
 	return (kern_kmq_open(td, uap->path, flags, uap->mode,
 	    uap->attr != NULL ? &attr : NULL));
 }
 
 /*
  * Syscall to unlink a message queue.
  */
 int
 sys_kmq_unlink(struct thread *td, struct kmq_unlink_args *uap)
 {
 	char path[MQFS_NAMELEN+1];
 	struct mqfs_node *pn;
 	int error, len;
 
 	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
         if (error)
 		return (error);
 
 	len = strlen(path);
 	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
 		return (EINVAL);
 	if (strcmp(path, "/.") == 0 || strcmp(path, "/..") == 0)
 		return (EINVAL);
 	AUDIT_ARG_UPATH1_CANON(path);
 
 	sx_xlock(&mqfs_data.mi_lock);
 	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
 	if (pn != NULL)
 		error = do_unlink(pn, td->td_ucred);
 	else
 		error = ENOENT;
 	sx_xunlock(&mqfs_data.mi_lock);
 	return (error);
 }
 
 typedef int (*_fgetf)(struct thread *, int, cap_rights_t *, struct file **);
 
 /*
  * Get message queue by giving file slot
  */
 static int
 _getmq(struct thread *td, int fd, cap_rights_t *rightsp, _fgetf func,
        struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
 {
 	struct mqfs_node *pn;
 	int error;
 
 	error = func(td, fd, rightsp, fpp);
 	if (error)
 		return (error);
 	if (&mqueueops != (*fpp)->f_ops) {
 		fdrop(*fpp, td);
 		return (EBADF);
 	}
 	pn = (*fpp)->f_data;
 	if (ppn)
 		*ppn = pn;
 	if (pmq)
 		*pmq = pn->mn_data;
 	return (0);
 }
 
 static __inline int
 getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
 	struct mqueue **pmq)
 {
 
 	return _getmq(td, fd, &cap_event_rights, fget,
 	    fpp, ppn, pmq);
 }
 
 static __inline int
 getmq_read(struct thread *td, int fd, struct file **fpp,
 	 struct mqfs_node **ppn, struct mqueue **pmq)
 {
 
 	return _getmq(td, fd, &cap_read_rights, fget_read,
 	    fpp, ppn, pmq);
 }
 
 static __inline int
 getmq_write(struct thread *td, int fd, struct file **fpp,
 	struct mqfs_node **ppn, struct mqueue **pmq)
 {
 
 	return _getmq(td, fd, &cap_write_rights, fget_write,
 	    fpp, ppn, pmq);
 }
 
 static int
 kern_kmq_setattr(struct thread *td, int mqd, const struct mq_attr *attr,
     struct mq_attr *oattr)
 {
 	struct mqueue *mq;
 	struct file *fp;
 	u_int oflag, flag;
 	int error;
 
 	AUDIT_ARG_FD(mqd);
 	if (attr != NULL && (attr->mq_flags & ~O_NONBLOCK) != 0)
 		return (EINVAL);
 	error = getmq(td, mqd, &fp, NULL, &mq);
 	if (error)
 		return (error);
 	oattr->mq_maxmsg  = mq->mq_maxmsg;
 	oattr->mq_msgsize = mq->mq_msgsize;
 	oattr->mq_curmsgs = mq->mq_curmsgs;
 	if (attr != NULL) {
 		do {
 			oflag = flag = fp->f_flag;
 			flag &= ~O_NONBLOCK;
 			flag |= (attr->mq_flags & O_NONBLOCK);
 		} while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
 	} else
 		oflag = fp->f_flag;
 	oattr->mq_flags = (O_NONBLOCK & oflag);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
 {
 	struct mq_attr attr, oattr;
 	int error;
 
 	if (uap->attr != NULL) {
 		error = copyin(uap->attr, &attr, sizeof(attr));
 		if (error != 0)
 			return (error);
 	}
 	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
 	    &oattr);
 	if (error == 0 && uap->oattr != NULL) {
 		bzero(oattr.__reserved, sizeof(oattr.__reserved));
 		error = copyout(&oattr, uap->oattr, sizeof(oattr));
 	}
 	return (error);
 }
 
 int
 sys_kmq_timedreceive(struct thread *td, struct kmq_timedreceive_args *uap)
 {
 	struct mqueue *mq;
 	struct file *fp;
 	struct timespec *abs_timeout, ets;
 	int error;
 	int waitok;
 
 	AUDIT_ARG_FD(uap->mqd);
 	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
 	if (error)
 		return (error);
 	if (uap->abs_timeout != NULL) {
 		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
 		if (error != 0)
 			goto out;
 		abs_timeout = &ets;
 	} else
 		abs_timeout = NULL;
 	waitok = !(fp->f_flag & O_NONBLOCK);
 	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
 		uap->msg_prio, waitok, abs_timeout);
 out:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_kmq_timedsend(struct thread *td, struct kmq_timedsend_args *uap)
 {
 	struct mqueue *mq;
 	struct file *fp;
 	struct timespec *abs_timeout, ets;
 	int error, waitok;
 
 	AUDIT_ARG_FD(uap->mqd);
 	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
 	if (error)
 		return (error);
 	if (uap->abs_timeout != NULL) {
 		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
 		if (error != 0)
 			goto out;
 		abs_timeout = &ets;
 	} else
 		abs_timeout = NULL;
 	waitok = !(fp->f_flag & O_NONBLOCK);
 	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
 		uap->msg_prio, waitok, abs_timeout);
 out:
 	fdrop(fp, td);
 	return (error);
 }
 
 static int
 kern_kmq_notify(struct thread *td, int mqd, struct sigevent *sigev)
 {
 	struct filedesc *fdp;
 	struct proc *p;
 	struct mqueue *mq;
 	struct file *fp, *fp2;
 	struct mqueue_notifier *nt, *newnt = NULL;
 	int error;
 
 	AUDIT_ARG_FD(mqd);
 	if (sigev != NULL) {
 		if (sigev->sigev_notify != SIGEV_SIGNAL &&
 		    sigev->sigev_notify != SIGEV_THREAD_ID &&
 		    sigev->sigev_notify != SIGEV_NONE)
 			return (EINVAL);
 		if ((sigev->sigev_notify == SIGEV_SIGNAL ||
 		    sigev->sigev_notify == SIGEV_THREAD_ID) &&
 		    !_SIG_VALID(sigev->sigev_signo))
 			return (EINVAL);
 	}
 	p = td->td_proc;
 	fdp = td->td_proc->p_fd;
 	error = getmq(td, mqd, &fp, NULL, &mq);
 	if (error)
 		return (error);
 again:
 	FILEDESC_SLOCK(fdp);
 	fp2 = fget_noref(fdp, mqd);
 	if (fp2 == NULL) {
 		FILEDESC_SUNLOCK(fdp);
 		error = EBADF;
 		goto out;
 	}
 #ifdef CAPABILITIES
 	error = cap_check(cap_rights(fdp, mqd), &cap_event_rights);
 	if (error) {
 		FILEDESC_SUNLOCK(fdp);
 		goto out;
 	}
 #endif
 	if (fp2 != fp) {
 		FILEDESC_SUNLOCK(fdp);
 		error = EBADF;
 		goto out;
 	}
 	mtx_lock(&mq->mq_mutex);
 	FILEDESC_SUNLOCK(fdp);
 	if (sigev != NULL) {
 		if (mq->mq_notifier != NULL) {
 			error = EBUSY;
 		} else {
 			PROC_LOCK(p);
 			nt = notifier_search(p, mqd);
 			if (nt == NULL) {
 				if (newnt == NULL) {
 					PROC_UNLOCK(p);
 					mtx_unlock(&mq->mq_mutex);
 					newnt = notifier_alloc();
 					goto again;
 				}
 			}
 
 			if (nt != NULL) {
 				sigqueue_take(&nt->nt_ksi);
 				if (newnt != NULL) {
 					notifier_free(newnt);
 					newnt = NULL;
 				}
 			} else {
 				nt = newnt;
 				newnt = NULL;
 				ksiginfo_init(&nt->nt_ksi);
 				nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT;
 				nt->nt_ksi.ksi_code = SI_MESGQ;
 				nt->nt_proc = p;
 				nt->nt_ksi.ksi_mqd = mqd;
 				notifier_insert(p, nt);
 			}
 			nt->nt_sigev = *sigev;
 			mq->mq_notifier = nt;
 			PROC_UNLOCK(p);
 			/*
 			 * if there is no receivers and message queue
 			 * is not empty, we should send notification
 			 * as soon as possible.
 			 */
 			if (mq->mq_receivers == 0 &&
 			    !TAILQ_EMPTY(&mq->mq_msgq))
 				mqueue_send_notification(mq);
 		}
 	} else {
 		notifier_remove(p, mq, mqd);
 	}
 	mtx_unlock(&mq->mq_mutex);
 
 out:
 	fdrop(fp, td);
 	if (newnt != NULL)
 		notifier_free(newnt);
 	return (error);
 }
 
 int
 sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap)
 {
 	struct sigevent ev, *evp;
 	int error;
 
 	if (uap->sigev == NULL) {
 		evp = NULL;
 	} else {
 		error = copyin(uap->sigev, &ev, sizeof(ev));
 		if (error != 0)
 			return (error);
 		evp = &ev;
 	}
 	return (kern_kmq_notify(td, uap->mqd, evp));
 }
 
 static void
 mqueue_fdclose(struct thread *td, int fd, struct file *fp)
 {
 	struct mqueue *mq;
 #ifdef INVARIANTS
 	struct filedesc *fdp;
 
 	fdp = td->td_proc->p_fd;
 	FILEDESC_LOCK_ASSERT(fdp);
 #endif
 
 	if (fp->f_ops == &mqueueops) {
 		mq = FPTOMQ(fp);
 		mtx_lock(&mq->mq_mutex);
 		notifier_remove(td->td_proc, mq, fd);
 
 		/* have to wakeup thread in same process */
 		if (mq->mq_flags & MQ_RSEL) {
 			mq->mq_flags &= ~MQ_RSEL;
 			selwakeup(&mq->mq_rsel);
 		}
 		if (mq->mq_flags & MQ_WSEL) {
 			mq->mq_flags &= ~MQ_WSEL;
 			selwakeup(&mq->mq_wsel);
 		}
 		mtx_unlock(&mq->mq_mutex);
 	}
 }
 
 static void
 mq_proc_exit(void *arg __unused, struct proc *p)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	struct mqueue *mq;
 	int i;
 
 	fdp = p->p_fd;
 	FILEDESC_SLOCK(fdp);
 	for (i = 0; i < fdp->fd_nfiles; ++i) {
 		fp = fget_noref(fdp, i);
 		if (fp != NULL && fp->f_ops == &mqueueops) {
 			mq = FPTOMQ(fp);
 			mtx_lock(&mq->mq_mutex);
 			notifier_remove(p, FPTOMQ(fp), i);
 			mtx_unlock(&mq->mq_mutex);
 		}
 	}
 	FILEDESC_SUNLOCK(fdp);
 	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
 }
 
 static int
 mqf_poll(struct file *fp, int events, struct ucred *active_cred,
 	struct thread *td)
 {
 	struct mqueue *mq = FPTOMQ(fp);
 	int revents = 0;
 
 	mtx_lock(&mq->mq_mutex);
 	if (events & (POLLIN | POLLRDNORM)) {
 		if (mq->mq_curmsgs) {
 			revents |= events & (POLLIN | POLLRDNORM);
 		} else {
 			mq->mq_flags |= MQ_RSEL;
 			selrecord(td, &mq->mq_rsel);
  		}
 	}
 	if (events & POLLOUT) {
 		if (mq->mq_curmsgs < mq->mq_maxmsg)
 			revents |= POLLOUT;
 		else {
 			mq->mq_flags |= MQ_WSEL;
 			selrecord(td, &mq->mq_wsel);
 		}
 	}
 	mtx_unlock(&mq->mq_mutex);
 	return (revents);
 }
 
 static int
 mqf_close(struct file *fp, struct thread *td)
 {
 	struct mqfs_node *pn;
 
 	fp->f_ops = &badfileops;
 	pn = fp->f_data;
 	fp->f_data = NULL;
 	sx_xlock(&mqfs_data.mi_lock);
 	mqnode_release(pn);
 	sx_xunlock(&mqfs_data.mi_lock);
 	return (0);
 }
 
 static int
 mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred)
 {
 	struct mqfs_node *pn = fp->f_data;
 
 	bzero(st, sizeof *st);
 	sx_xlock(&mqfs_data.mi_lock);
 	st->st_atim = pn->mn_atime;
 	st->st_mtim = pn->mn_mtime;
 	st->st_ctim = pn->mn_ctime;
 	st->st_birthtim = pn->mn_birth;
 	st->st_uid = pn->mn_uid;
 	st->st_gid = pn->mn_gid;
 	st->st_mode = S_IFIFO | pn->mn_mode;
 	sx_xunlock(&mqfs_data.mi_lock);
 	return (0);
 }
 
 static int
 mqf_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
     struct thread *td)
 {
 	struct mqfs_node *pn;
 	int error;
 
 	error = 0;
 	pn = fp->f_data;
 	sx_xlock(&mqfs_data.mi_lock);
 	error = vaccess(VREG, pn->mn_mode, pn->mn_uid, pn->mn_gid, VADMIN,
 	    active_cred);
 	if (error != 0)
 		goto out;
 	pn->mn_mode = mode & ACCESSPERMS;
 out:
 	sx_xunlock(&mqfs_data.mi_lock);
 	return (error);
 }
 
 static int
 mqf_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
     struct thread *td)
 {
 	struct mqfs_node *pn;
 	int error;
 
 	error = 0;
 	pn = fp->f_data;
 	sx_xlock(&mqfs_data.mi_lock);
 	if (uid == (uid_t)-1)
 		uid = pn->mn_uid;
 	if (gid == (gid_t)-1)
 		gid = pn->mn_gid;
 	if (((uid != pn->mn_uid && uid != active_cred->cr_uid) ||
 	    (gid != pn->mn_gid && !groupmember(gid, active_cred))) &&
 	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN)))
 		goto out;
 	pn->mn_uid = uid;
 	pn->mn_gid = gid;
 out:
 	sx_xunlock(&mqfs_data.mi_lock);
 	return (error);
 }
 
 static int
 mqf_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct mqueue *mq = FPTOMQ(fp);
 	int error = 0;
 
 	if (kn->kn_filter == EVFILT_READ) {
 		kn->kn_fop = &mq_rfiltops;
 		knlist_add(&mq->mq_rsel.si_note, kn, 0);
 	} else if (kn->kn_filter == EVFILT_WRITE) {
 		kn->kn_fop = &mq_wfiltops;
 		knlist_add(&mq->mq_wsel.si_note, kn, 0);
 	} else
 		error = EINVAL;
 	return (error);
 }
 
 static void
 filt_mqdetach(struct knote *kn)
 {
 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
 
 	if (kn->kn_filter == EVFILT_READ)
 		knlist_remove(&mq->mq_rsel.si_note, kn, 0);
 	else if (kn->kn_filter == EVFILT_WRITE)
 		knlist_remove(&mq->mq_wsel.si_note, kn, 0);
 	else
 		panic("filt_mqdetach");
 }
 
 static int
 filt_mqread(struct knote *kn, long hint)
 {
 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
 
 	mtx_assert(&mq->mq_mutex, MA_OWNED);
 	return (mq->mq_curmsgs != 0);
 }
 
 static int
 filt_mqwrite(struct knote *kn, long hint)
 {
 	struct mqueue *mq = FPTOMQ(kn->kn_fp);
 
 	mtx_assert(&mq->mq_mutex, MA_OWNED);
 	return (mq->mq_curmsgs < mq->mq_maxmsg);
 }
 
 static int
 mqf_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 
 	kif->kf_type = KF_TYPE_MQUEUE;
 	return (0);
 }
 
 static struct fileops mqueueops = {
 	.fo_read		= invfo_rdwr,
 	.fo_write		= invfo_rdwr,
 	.fo_truncate		= invfo_truncate,
 	.fo_ioctl		= invfo_ioctl,
 	.fo_poll		= mqf_poll,
 	.fo_kqfilter		= mqf_kqfilter,
 	.fo_stat		= mqf_stat,
 	.fo_close		= mqf_close,
 	.fo_chmod		= mqf_chmod,
 	.fo_chown		= mqf_chown,
 	.fo_sendfile		= invfo_sendfile,
 	.fo_fill_kinfo		= mqf_fill_kinfo,
 	.fo_flags		= DFLAG_PASSABLE,
 };
 
 static struct vop_vector mqfs_vnodeops = {
 	.vop_default 		= &default_vnodeops,
 	.vop_access		= mqfs_access,
 	.vop_cachedlookup	= mqfs_lookup,
 	.vop_lookup		= vfs_cache_lookup,
 	.vop_reclaim		= mqfs_reclaim,
 	.vop_create		= mqfs_create,
 	.vop_remove		= mqfs_remove,
 	.vop_inactive		= mqfs_inactive,
 	.vop_open		= mqfs_open,
 	.vop_close		= mqfs_close,
 	.vop_getattr		= mqfs_getattr,
 	.vop_setattr		= mqfs_setattr,
 	.vop_read		= mqfs_read,
 	.vop_write		= VOP_EOPNOTSUPP,
 	.vop_readdir		= mqfs_readdir,
 	.vop_mkdir		= VOP_EOPNOTSUPP,
 	.vop_rmdir		= VOP_EOPNOTSUPP
 };
 VFS_VOP_VECTOR_REGISTER(mqfs_vnodeops);
 
 static struct vfsops mqfs_vfsops = {
 	.vfs_init 		= mqfs_init,
 	.vfs_uninit		= mqfs_uninit,
 	.vfs_mount		= mqfs_mount,
 	.vfs_unmount		= mqfs_unmount,
 	.vfs_root		= mqfs_root,
 	.vfs_statfs		= mqfs_statfs,
 };
 
 static struct vfsconf mqueuefs_vfsconf = {
 	.vfc_version = VFS_VERSION,
 	.vfc_name = "mqueuefs",
 	.vfc_vfsops = &mqfs_vfsops,
 	.vfc_typenum = -1,
 	.vfc_flags = VFCF_SYNTHETIC
 };
 
 static struct syscall_helper_data mq_syscalls[] = {
 	SYSCALL_INIT_HELPER(kmq_open),
 	SYSCALL_INIT_HELPER_F(kmq_setattr, SYF_CAPENABLED),
 	SYSCALL_INIT_HELPER_F(kmq_timedsend, SYF_CAPENABLED),
 	SYSCALL_INIT_HELPER_F(kmq_timedreceive, SYF_CAPENABLED),
 	SYSCALL_INIT_HELPER_F(kmq_notify, SYF_CAPENABLED),
 	SYSCALL_INIT_HELPER(kmq_unlink),
 	SYSCALL_INIT_LAST
 };
 
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_syscall.h>
 #include <compat/freebsd32/freebsd32_util.h>
 
 static void
 mq_attr_from32(const struct mq_attr32 *from, struct mq_attr *to)
 {
 
 	to->mq_flags = from->mq_flags;
 	to->mq_maxmsg = from->mq_maxmsg;
 	to->mq_msgsize = from->mq_msgsize;
 	to->mq_curmsgs = from->mq_curmsgs;
 }
 
 static void
 mq_attr_to32(const struct mq_attr *from, struct mq_attr32 *to)
 {
 
 	to->mq_flags = from->mq_flags;
 	to->mq_maxmsg = from->mq_maxmsg;
 	to->mq_msgsize = from->mq_msgsize;
 	to->mq_curmsgs = from->mq_curmsgs;
 }
 
 int
 freebsd32_kmq_open(struct thread *td, struct freebsd32_kmq_open_args *uap)
 {
 	struct mq_attr attr;
 	struct mq_attr32 attr32;
 	int flags, error;
 
 	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
 		return (EINVAL);
 	flags = FFLAGS(uap->flags);
 	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
 		error = copyin(uap->attr, &attr32, sizeof(attr32));
 		if (error)
 			return (error);
 		mq_attr_from32(&attr32, &attr);
 	}
 	return (kern_kmq_open(td, uap->path, flags, uap->mode,
 	    uap->attr != NULL ? &attr : NULL));
 }
 
 int
 freebsd32_kmq_setattr(struct thread *td, struct freebsd32_kmq_setattr_args *uap)
 {
 	struct mq_attr attr, oattr;
 	struct mq_attr32 attr32, oattr32;
 	int error;
 
 	if (uap->attr != NULL) {
 		error = copyin(uap->attr, &attr32, sizeof(attr32));
 		if (error != 0)
 			return (error);
 		mq_attr_from32(&attr32, &attr);
 	}
 	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
 	    &oattr);
 	if (error == 0 && uap->oattr != NULL) {
 		mq_attr_to32(&oattr, &oattr32);
 		bzero(oattr32.__reserved, sizeof(oattr32.__reserved));
 		error = copyout(&oattr32, uap->oattr, sizeof(oattr32));
 	}
 	return (error);
 }
 
 int
 freebsd32_kmq_timedsend(struct thread *td,
     struct freebsd32_kmq_timedsend_args *uap)
 {
 	struct mqueue *mq;
 	struct file *fp;
 	struct timespec32 ets32;
 	struct timespec *abs_timeout, ets;
 	int error;
 	int waitok;
 
 	AUDIT_ARG_FD(uap->mqd);
 	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
 	if (error)
 		return (error);
 	if (uap->abs_timeout != NULL) {
 		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
 		if (error != 0)
 			goto out;
 		CP(ets32, ets, tv_sec);
 		CP(ets32, ets, tv_nsec);
 		abs_timeout = &ets;
 	} else
 		abs_timeout = NULL;
 	waitok = !(fp->f_flag & O_NONBLOCK);
 	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
 		uap->msg_prio, waitok, abs_timeout);
 out:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 freebsd32_kmq_timedreceive(struct thread *td,
     struct freebsd32_kmq_timedreceive_args *uap)
 {
 	struct mqueue *mq;
 	struct file *fp;
 	struct timespec32 ets32;
 	struct timespec *abs_timeout, ets;
 	int error, waitok;
 
 	AUDIT_ARG_FD(uap->mqd);
 	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
 	if (error)
 		return (error);
 	if (uap->abs_timeout != NULL) {
 		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
 		if (error != 0)
 			goto out;
 		CP(ets32, ets, tv_sec);
 		CP(ets32, ets, tv_nsec);
 		abs_timeout = &ets;
 	} else
 		abs_timeout = NULL;
 	waitok = !(fp->f_flag & O_NONBLOCK);
 	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
 		uap->msg_prio, waitok, abs_timeout);
 out:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 freebsd32_kmq_notify(struct thread *td, struct freebsd32_kmq_notify_args *uap)
 {
 	struct sigevent ev, *evp;
 	struct sigevent32 ev32;
 	int error;
 
 	if (uap->sigev == NULL) {
 		evp = NULL;
 	} else {
 		error = copyin(uap->sigev, &ev32, sizeof(ev32));
 		if (error != 0)
 			return (error);
 		error = convert_sigevent32(&ev32, &ev);
 		if (error != 0)
 			return (error);
 		evp = &ev;
 	}
 	return (kern_kmq_notify(td, uap->mqd, evp));
 }
 
 static struct syscall_helper_data mq32_syscalls[] = {
 	SYSCALL32_INIT_HELPER(freebsd32_kmq_open),
 	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_setattr, SYF_CAPENABLED),
 	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_timedsend, SYF_CAPENABLED),
 	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_timedreceive, SYF_CAPENABLED),
 	SYSCALL32_INIT_HELPER_F(freebsd32_kmq_notify, SYF_CAPENABLED),
 	SYSCALL32_INIT_HELPER_COMPAT(kmq_unlink),
 	SYSCALL_INIT_LAST
 };
 #endif
 
 static int
 mqinit(void)
 {
 	int error;
 
 	error = syscall_helper_register(mq_syscalls, SY_THR_STATIC_KLD);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	error = syscall32_helper_register(mq32_syscalls, SY_THR_STATIC_KLD);
 	if (error != 0)
 		return (error);
 #endif
 	return (0);
 }
 
 static int
 mqunload(void)
 {
 
 #ifdef COMPAT_FREEBSD32
 	syscall32_helper_unregister(mq32_syscalls);
 #endif
 	syscall_helper_unregister(mq_syscalls);
 	return (0);
 }
 
 static int
 mq_modload(struct module *module, int cmd, void *arg)
 {
 	int error = 0;
 
 	error = vfs_modevent(module, cmd, arg);
 	if (error != 0)
 		return (error);
 
 	switch (cmd) {
 	case MOD_LOAD:
 		error = mqinit();
 		if (error != 0)
 			mqunload();
 		break;
 	case MOD_UNLOAD:
 		error = mqunload();
 		break;
 	default:
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t mqueuefs_mod = {
 	"mqueuefs",
 	mq_modload,
 	&mqueuefs_vfsconf
 };
 DECLARE_MODULE(mqueuefs, mqueuefs_mod, SI_SUB_VFS, SI_ORDER_MIDDLE);
 MODULE_VERSION(mqueuefs, 1);
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 2b78c3e51907..1a4d2d5adc0a 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1,3565 +1,3565 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California. All Rights Reserved.
  * Copyright (c) 2004-2009 Robert N. M. Watson All Rights Reserved.
  * Copyright (c) 2018 Matthew Macy
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
  */
 
 /*
  * UNIX Domain (Local) Sockets
  *
  * This is an implementation of UNIX (local) domain sockets.  Each socket has
  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
  * may be connected to 0 or 1 other socket.  Datagram sockets may be
  * connected to 0, 1, or many other sockets.  Sockets may be created and
  * connected in pairs (socketpair(2)), or bound/connected to using the file
  * system name space.  For most purposes, only the receive socket buffer is
  * used, as sending on one socket delivers directly to the receive socket
  * buffer of a second socket.
  *
  * The implementation is substantially complicated by the fact that
  * "ancillary data", such as file descriptors or credentials, may be passed
  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
  * over other UNIX domain sockets requires the implementation of a simple
  * garbage collector to find and tear down cycles of disconnected sockets.
  *
  * TODO:
  *	RDM
  *	rethink name space problems
  *	need a proper out-of-band
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/vnode.h>
 
 #include <net/vnet.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 MALLOC_DECLARE(M_FILECAPS);
 
 static struct domain localdomain;
 
 static uma_zone_t	unp_zone;
 static unp_gen_t	unp_gencnt;	/* (l) */
 static u_int		unp_count;	/* (l) Count of local sockets. */
 static ino_t		unp_ino;	/* Prototype for fake inode numbers. */
 static int		unp_rights;	/* (g) File descriptors in flight. */
 static struct unp_head	unp_shead;	/* (l) List of stream sockets. */
 static struct unp_head	unp_dhead;	/* (l) List of datagram sockets. */
 static struct unp_head	unp_sphead;	/* (l) List of seqpacket sockets. */
 
 struct unp_defer {
 	SLIST_ENTRY(unp_defer) ud_link;
 	struct file *ud_fp;
 };
 static SLIST_HEAD(, unp_defer) unp_defers;
 static int unp_defers_count;
 
 static const struct sockaddr	sun_noname = { sizeof(sun_noname), AF_LOCAL };
 
 /*
  * Garbage collection of cyclic file descriptor/socket references occurs
  * asynchronously in a taskqueue context in order to avoid recursion and
  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  * code.  See unp_gc() for a full description.
  */
 static struct timeout_task unp_gc_task;
 
 /*
  * The close of unix domain sockets attached as SCM_RIGHTS is
  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  * The attached sockets might have another sockets attached.
  */
 static struct task	unp_defer_task;
 
 /*
  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  * stream sockets, although the total for sender and receiver is actually
  * only PIPSIZ.
  *
  * Datagram sockets really use the sendspace as the maximum datagram size,
  * and don't really want to reserve the sendspace.  Their recvspace should be
  * large enough for at least one max-size datagram plus address.
  */
 #ifndef PIPSIZ
 #define	PIPSIZ	8192
 #endif
 static u_long	unpst_sendspace = PIPSIZ;
 static u_long	unpst_recvspace = PIPSIZ;
 static u_long	unpdg_maxdgram = 2*1024;
 static u_long	unpdg_recvspace = 16*1024;	/* support 8KB syslog msgs */
 static u_long	unpsp_sendspace = PIPSIZ;	/* really max datagram size */
 static u_long	unpsp_recvspace = PIPSIZ;
 
 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Local domain");
 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_STREAM");
 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_DGRAM");
 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket,
     CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "SOCK_SEQPACKET");
 
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
 	   &unpst_sendspace, 0, "Default stream send space.");
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpst_recvspace, 0, "Default stream receive space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
 	   &unpdg_maxdgram, 0, "Maximum datagram size.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpdg_recvspace, 0, "Default datagram receive space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
 	   &unpsp_sendspace, 0, "Default seqpacket send space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpsp_recvspace, 0, "Default seqpacket receive space.");
 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
     "File descriptors in flight.");
 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
     &unp_defers_count, 0,
     "File descriptors deferred to taskqueue for close.");
 
 /*
  * Locking and synchronization:
  *
  * Several types of locks exist in the local domain socket implementation:
  * - a global linkage lock
  * - a global connection list lock
  * - the mtxpool lock
  * - per-unpcb mutexes
  *
  * The linkage lock protects the global socket lists, the generation number
  * counter and garbage collector state.
  *
  * The connection list lock protects the list of referring sockets in a datagram
  * socket PCB.  This lock is also overloaded to protect a global list of
  * sockets whose buffers contain socket references in the form of SCM_RIGHTS
  * messages.  To avoid recursion, such references are released by a dedicated
  * thread.
  *
  * The mtxpool lock protects the vnode from being modified while referenced.
  * Lock ordering rules require that it be acquired before any PCB locks.
  *
  * The unpcb lock (unp_mtx) protects the most commonly referenced fields in the
  * unpcb.  This includes the unp_conn field, which either links two connected
  * PCBs together (for connected socket types) or points at the destination
  * socket (for connectionless socket types).  The operations of creating or
  * destroying a connection therefore involve locking multiple PCBs.  To avoid
  * lock order reversals, in some cases this involves dropping a PCB lock and
  * using a reference counter to maintain liveness.
  *
  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  * allocated in pr_attach() and freed in pr_detach().  The validity of that
  * pointer is an invariant, so no lock is required to dereference the so_pcb
  * pointer if a valid socket reference is held by the caller.  In practice,
  * this is always true during operations performed on a socket.  Each unpcb
  * has a back-pointer to its socket, unp_socket, which will be stable under
  * the same circumstances.
  *
  * This pointer may only be safely dereferenced as long as a valid reference
  * to the unpcb is held.  Typically, this reference will be from the socket,
  * or from another unpcb when the referring unpcb's lock is held (in order
  * that the reference not be invalidated during use).  For example, to follow
  * unp->unp_conn->unp_socket, you need to hold a lock on unp_conn to guarantee
  * that detach is not run clearing unp_socket.
  *
  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  * protocols, bind() is a non-atomic operation, and connect() requires
  * potential sleeping in the protocol, due to potentially waiting on local or
  * distributed file systems.  We try to separate "lookup" operations, which
  * may sleep, and the IPC operations themselves, which typically can occur
  * with relative atomicity as locks can be held over the entire operation.
  *
  * Another tricky issue is simultaneous multi-threaded or multi-process
  * access to a single UNIX domain socket.  These are handled by the flags
  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  * binding, both of which involve dropping UNIX domain socket locks in order
  * to perform namei() and other file system operations.
  */
 static struct rwlock	unp_link_rwlock;
 static struct mtx	unp_defers_lock;
 
 #define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
 					    "unp_link_rwlock")
 
 #define	UNP_LINK_LOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_LOCKED)
 #define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_UNLOCKED)
 
 #define	UNP_LINK_RLOCK()		rw_rlock(&unp_link_rwlock)
 #define	UNP_LINK_RUNLOCK()		rw_runlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK()		rw_wlock(&unp_link_rwlock)
 #define	UNP_LINK_WUNLOCK()		rw_wunlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_WLOCKED)
 #define	UNP_LINK_WOWNED()		rw_wowned(&unp_link_rwlock)
 
 #define	UNP_DEFERRED_LOCK_INIT()	mtx_init(&unp_defers_lock, \
 					    "unp_defer", NULL, MTX_DEF)
 #define	UNP_DEFERRED_LOCK()		mtx_lock(&unp_defers_lock)
 #define	UNP_DEFERRED_UNLOCK()		mtx_unlock(&unp_defers_lock)
 
 #define UNP_REF_LIST_LOCK()		UNP_DEFERRED_LOCK();
 #define UNP_REF_LIST_UNLOCK()		UNP_DEFERRED_UNLOCK();
 
 #define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
 					    "unp", "unp",	\
 					    MTX_DUPOK|MTX_DEF)
 #define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCKPTR(unp)		(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
 #define	UNP_PCB_TRYLOCK(unp)		mtx_trylock(&(unp)->unp_mtx)
 #define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
 #define	UNP_PCB_OWNED(unp)		mtx_owned(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
 #define	UNP_PCB_UNLOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_NOTOWNED)
 
 static int	uipc_connect2(struct socket *, struct socket *);
 static int	uipc_ctloutput(struct socket *, struct sockopt *);
 static int	unp_connect(struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connectat(int, struct socket *, struct sockaddr *,
 		    struct thread *, bool);
 typedef enum { PRU_CONNECT, PRU_CONNECT2 } conn2_how;
 static void	unp_connect2(struct socket *so, struct socket *so2, conn2_how);
 static void	unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
 static void	unp_dispose(struct socket *so);
 static void	unp_shutdown(struct unpcb *);
 static void	unp_drop(struct unpcb *);
 static void	unp_gc(__unused void *, int);
 static void	unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
 static void	unp_discard(struct file *);
 static void	unp_freerights(struct filedescent **, int);
 static int	unp_internalize(struct mbuf **, struct thread *,
 		    struct mbuf **, u_int *, u_int *);
 static void	unp_internalize_fp(struct file *);
 static int	unp_externalize(struct mbuf *, struct mbuf **, int);
 static int	unp_externalize_fp(struct file *);
 static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *,
 		    int, struct mbuf **, u_int *, u_int *);
 static void	unp_process_defers(void * __unused, int);
 
 static void
 unp_pcb_hold(struct unpcb *unp)
 {
 	u_int old __unused;
 
 	old = refcount_acquire(&unp->unp_refcount);
 	KASSERT(old > 0, ("%s: unpcb %p has no references", __func__, unp));
 }
 
 static __result_use_check bool
 unp_pcb_rele(struct unpcb *unp)
 {
 	bool ret;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	if ((ret = refcount_release(&unp->unp_refcount))) {
 		UNP_PCB_UNLOCK(unp);
 		UNP_PCB_LOCK_DESTROY(unp);
 		uma_zfree(unp_zone, unp);
 	}
 	return (ret);
 }
 
 static void
 unp_pcb_rele_notlast(struct unpcb *unp)
 {
 	bool ret __unused;
 
 	ret = refcount_release(&unp->unp_refcount);
 	KASSERT(!ret, ("%s: unpcb %p has no references", __func__, unp));
 }
 
 static void
 unp_pcb_lock_pair(struct unpcb *unp, struct unpcb *unp2)
 {
 	UNP_PCB_UNLOCK_ASSERT(unp);
 	UNP_PCB_UNLOCK_ASSERT(unp2);
 
 	if (unp == unp2) {
 		UNP_PCB_LOCK(unp);
 	} else if ((uintptr_t)unp2 > (uintptr_t)unp) {
 		UNP_PCB_LOCK(unp);
 		UNP_PCB_LOCK(unp2);
 	} else {
 		UNP_PCB_LOCK(unp2);
 		UNP_PCB_LOCK(unp);
 	}
 }
 
 static void
 unp_pcb_unlock_pair(struct unpcb *unp, struct unpcb *unp2)
 {
 	UNP_PCB_UNLOCK(unp);
 	if (unp != unp2)
 		UNP_PCB_UNLOCK(unp2);
 }
 
 /*
  * Try to lock the connected peer of an already locked socket.  In some cases
  * this requires that we unlock the current socket.  The pairbusy counter is
  * used to block concurrent connection attempts while the lock is dropped.  The
  * caller must be careful to revalidate PCB state.
  */
 static struct unpcb *
 unp_pcb_lock_peer(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL)
 		return (NULL);
 	if (__predict_false(unp == unp2))
 		return (unp);
 
 	UNP_PCB_UNLOCK_ASSERT(unp2);
 
 	if (__predict_true(UNP_PCB_TRYLOCK(unp2)))
 		return (unp2);
 	if ((uintptr_t)unp2 > (uintptr_t)unp) {
 		UNP_PCB_LOCK(unp2);
 		return (unp2);
 	}
 	unp->unp_pairbusy++;
 	unp_pcb_hold(unp2);
 	UNP_PCB_UNLOCK(unp);
 
 	UNP_PCB_LOCK(unp2);
 	UNP_PCB_LOCK(unp);
 	KASSERT(unp->unp_conn == unp2 || unp->unp_conn == NULL,
 	    ("%s: socket %p was reconnected", __func__, unp));
 	if (--unp->unp_pairbusy == 0 && (unp->unp_flags & UNP_WAITING) != 0) {
 		unp->unp_flags &= ~UNP_WAITING;
 		wakeup(unp);
 	}
 	if (unp_pcb_rele(unp2)) {
 		/* unp2 is unlocked. */
 		return (NULL);
 	}
 	if (unp->unp_conn == NULL) {
 		UNP_PCB_UNLOCK(unp2);
 		return (NULL);
 	}
 	return (unp2);
 }
 
 static void
 uipc_abort(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
 	UNP_PCB_UNLOCK_ASSERT(unp);
 
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		unp_pcb_hold(unp2);
 		UNP_PCB_UNLOCK(unp);
 		unp_drop(unp2);
 	} else
 		UNP_PCB_UNLOCK(unp);
 }
 
 static int
 uipc_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	/*
 	 * Pass back name of connected socket, if it was bound and we are
 	 * still connected (our peer may have closed already!).
 	 */
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	unp2 = unp_pcb_lock_peer(unp);
 	if (unp2 != NULL && unp2->unp_addr != NULL)
 		sa = (struct sockaddr *)unp2->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	if (unp2 != NULL)
 		unp_pcb_unlock_pair(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_attach(struct socket *so, int proto, struct thread *td)
 {
 	u_long sendspace, recvspace;
 	struct unpcb *unp;
 	int error;
 	bool locked;
 
 	KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			sendspace = unpst_sendspace;
 			recvspace = unpst_recvspace;
 			break;
 
 		case SOCK_DGRAM:
 			STAILQ_INIT(&so->so_rcv.uxdg_mb);
 			STAILQ_INIT(&so->so_snd.uxdg_mb);
 			TAILQ_INIT(&so->so_rcv.uxdg_conns);
 			/*
 			 * Since send buffer is either bypassed or is a part
 			 * of one-to-many receive buffer, we assign both space
 			 * limits to unpdg_recvspace.
 			 */
 			sendspace = recvspace = unpdg_recvspace;
 			break;
 
 		case SOCK_SEQPACKET:
 			sendspace = unpsp_sendspace;
 			recvspace = unpsp_recvspace;
 			break;
 
 		default:
 			panic("uipc_attach");
 		}
 		error = soreserve(so, sendspace, recvspace);
 		if (error)
 			return (error);
 	}
 	unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
 	if (unp == NULL)
 		return (ENOBUFS);
 	LIST_INIT(&unp->unp_refs);
 	UNP_PCB_LOCK_INIT(unp);
 	unp->unp_socket = so;
 	so->so_pcb = unp;
 	refcount_init(&unp->unp_refcount, 1);
 
 	if ((locked = UNP_LINK_WOWNED()) == false)
 		UNP_LINK_WLOCK();
 
 	unp->unp_gencnt = ++unp_gencnt;
 	unp->unp_ino = ++unp_ino;
 	unp_count++;
 	switch (so->so_type) {
 	case SOCK_STREAM:
 		LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
 		break;
 
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
 		break;
 
 	case SOCK_SEQPACKET:
 		LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
 		break;
 
 	default:
 		panic("uipc_attach");
 	}
 
 	if (locked == false)
 		UNP_LINK_WUNLOCK();
 
 	return (0);
 }
 
 static int
 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	struct vattr vattr;
 	int error, namelen;
 	struct nameidata nd;
 	struct unpcb *unp;
 	struct vnode *vp;
 	struct mount *mp;
 	cap_rights_t rights;
 	char *buf;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
 
 	if (soun->sun_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 	if (namelen <= 0)
 		return (EINVAL);
 
 	/*
 	 * We don't allow simultaneous bind() calls on a single UNIX domain
 	 * socket, so flag in-progress operations, and return an error if an
 	 * operation is already in progress.
 	 *
 	 * Historically, we have not allowed a socket to be rebound, so this
 	 * also returns an error.  Not allowing re-binding simplifies the
 	 * implementation and avoids a great many possible failure modes.
 	 */
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (EINVAL);
 	}
 	if (unp->unp_flags & UNP_BINDING) {
 		UNP_PCB_UNLOCK(unp);
 		return (EALREADY);
 	}
 	unp->unp_flags |= UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 
 	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
 	bcopy(soun->sun_path, buf, namelen);
 	buf[namelen] = 0;
 
 restart:
-	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
+	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | NOCACHE,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_BINDAT));
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 	error = namei(&nd);
 	if (error)
 		goto error;
 	vp = nd.ni_vp;
 	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp != NULL) {
 			vrele(vp);
 			error = EADDRINUSE;
 			goto error;
 		}
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error)
 			goto error;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VSOCK;
 	vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask);
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 #endif
 	if (error == 0)
 		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	NDFREE_PNBUF(&nd);
 	if (error) {
 		VOP_VPUT_PAIR(nd.ni_dvp, NULL, true);
 		vn_finished_write(mp);
 		if (error == ERELOOKUP)
 			goto restart;
 		goto error;
 	}
 	vp = nd.ni_vp;
 	ASSERT_VOP_ELOCKED(vp, "uipc_bind");
 	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
 
 	UNP_PCB_LOCK(unp);
 	VOP_UNP_BIND(vp, unp);
 	unp->unp_vnode = vp;
 	unp->unp_addr = soun;
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	vref(vp);
 	VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
 	vn_finished_write(mp);
 	free(buf, M_TEMP);
 	return (0);
 
 error:
 	UNP_PCB_LOCK(unp);
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (uipc_bindat(AT_FDCWD, so, nam, td));
 }
 
 static int
 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connect: td != curthread"));
 	error = unp_connect(so, nam, td);
 	return (error);
 }
 
 static int
 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
 	error = unp_connectat(fd, so, nam, td, false);
 	return (error);
 }
 
 static void
 uipc_close(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct vnode *vp = NULL;
 	struct mtx *vplock;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
 
 	vplock = NULL;
 	if ((vp = unp->unp_vnode) != NULL) {
 		vplock = mtx_pool_find(mtxpool_sleep, vp);
 		mtx_lock(vplock);
 	}
 	UNP_PCB_LOCK(unp);
 	if (vp && unp->unp_vnode == NULL) {
 		mtx_unlock(vplock);
 		vp = NULL;
 	}
 	if (vp != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	if (vp) {
 		mtx_unlock(vplock);
 		vrele(vp);
 	}
 }
 
 static int
 uipc_connect2(struct socket *so1, struct socket *so2)
 {
 	struct unpcb *unp, *unp2;
 
 	if (so1->so_type != so2->so_type)
 		return (EPROTOTYPE);
 
 	unp = so1->so_pcb;
 	KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
 	unp2 = so2->so_pcb;
 	KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
 	unp_pcb_lock_pair(unp, unp2);
 	unp_connect2(so1, so2, PRU_CONNECT2);
 	unp_pcb_unlock_pair(unp, unp2);
 
 	return (0);
 }
 
 static void
 uipc_detach(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct mtx *vplock;
 	struct vnode *vp;
 	int local_unp_rights;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
 
 	vp = NULL;
 	vplock = NULL;
 
 	UNP_LINK_WLOCK();
 	LIST_REMOVE(unp, unp_link);
 	if (unp->unp_gcflag & UNPGC_DEAD)
 		LIST_REMOVE(unp, unp_dead);
 	unp->unp_gencnt = ++unp_gencnt;
 	--unp_count;
 	UNP_LINK_WUNLOCK();
 
 	UNP_PCB_UNLOCK_ASSERT(unp);
  restart:
 	if ((vp = unp->unp_vnode) != NULL) {
 		vplock = mtx_pool_find(mtxpool_sleep, vp);
 		mtx_lock(vplock);
 	}
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != vp && unp->unp_vnode != NULL) {
 		if (vplock)
 			mtx_unlock(vplock);
 		UNP_PCB_UNLOCK(unp);
 		goto restart;
 	}
 	if ((vp = unp->unp_vnode) != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 
 	UNP_REF_LIST_LOCK();
 	while (!LIST_EMPTY(&unp->unp_refs)) {
 		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
 
 		unp_pcb_hold(ref);
 		UNP_REF_LIST_UNLOCK();
 
 		MPASS(ref != unp);
 		UNP_PCB_UNLOCK_ASSERT(ref);
 		unp_drop(ref);
 		UNP_REF_LIST_LOCK();
 	}
 	UNP_REF_LIST_UNLOCK();
 
 	UNP_PCB_LOCK(unp);
 	local_unp_rights = unp_rights;
 	unp->unp_socket->so_pcb = NULL;
 	unp->unp_socket = NULL;
 	free(unp->unp_addr, M_SONAME);
 	unp->unp_addr = NULL;
 	if (!unp_pcb_rele(unp))
 		UNP_PCB_UNLOCK(unp);
 	if (vp) {
 		mtx_unlock(vplock);
 		vrele(vp);
 	}
 	if (local_unp_rights)
 		taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
 
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		/*
 		 * Everything should have been unlinked/freed by unp_dispose()
 		 * and/or unp_disconnect().
 		 */
 		MPASS(so->so_rcv.uxdg_peeked == NULL);
 		MPASS(STAILQ_EMPTY(&so->so_rcv.uxdg_mb));
 		MPASS(TAILQ_EMPTY(&so->so_rcv.uxdg_conns));
 		MPASS(STAILQ_EMPTY(&so->so_snd.uxdg_mb));
 	}
 }
 
 static int
 uipc_disconnect(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct unpcb *unp;
 	int error;
 
 	MPASS(so->so_type != SOCK_DGRAM);
 
 	/*
 	 * Synchronize with concurrent connection attempts.
 	 */
 	error = 0;
 	unp = sotounpcb(so);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0)
 		error = EINVAL;
 	else if (unp->unp_vnode == NULL)
 		error = EDESTADDRREQ;
 	if (error != 0) {
 		UNP_PCB_UNLOCK(unp);
 		return (error);
 	}
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0) {
 		cru2xt(td, &unp->unp_peercred);
 		solisten_proto(so, backlog);
 	}
 	SOCK_UNLOCK(so);
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 static int
 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_LINK_RLOCK();
 	/*
 	 * XXX: It seems that this test always fails even when connection is
 	 * established.  So, this else clause is added as workaround to
 	 * return PF_LOCAL sockaddr.
 	 */
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		if (unp2->unp_addr != NULL)
 			sa = (struct sockaddr *) unp2->unp_addr;
 		else
 			sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 		UNP_PCB_UNLOCK(unp2);
 	} else {
 		sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 	}
 	UNP_LINK_RUNLOCK();
 	return (0);
 }
 
 static int
 uipc_rcvd(struct socket *so, int flags)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	/*
 	 * Adjust backpressure on sender and wakeup any waiting to write.
 	 *
 	 * The unp lock is acquired to maintain the validity of the unp_conn
 	 * pointer; no lock on unp2 is required as unp2->unp_socket will be
 	 * static as long as we don't permit unp2 to disconnect from unp,
 	 * which is prevented by the lock on unp.  We cache values from
 	 * so_rcv to avoid holding the so_rcv lock over the entire
 	 * transaction on the remote so_snd.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	mbcnt = so->so_rcv.sb_mbcnt;
 	sbcc = sbavail(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	/*
 	 * There is a benign race condition at this point.  If we're planning to
 	 * clear SB_STOP, but uipc_send is called on the connected socket at
 	 * this instant, it might add data to the sockbuf and set SB_STOP.  Then
 	 * we would erroneously clear SB_STOP below, even though the sockbuf is
 	 * full.  The race is benign because the only ill effect is to allow the
 	 * sockbuf to exceed its size limit, and the size limits are not
 	 * strictly guaranteed anyway.
 	 */
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (0);
 	}
 	so2 = unp2->unp_socket;
 	SOCKBUF_LOCK(&so2->so_snd);
 	if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
 		so2->so_snd.sb_flags &= ~SB_STOP;
 	sowwakeup_locked(so2);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 	int error;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	error = 0;
 	if (flags & PRUS_OOB) {
 		error = EOPNOTSUPP;
 		goto release;
 	}
 	if (control != NULL &&
 	    (error = unp_internalize(&control, td, NULL, NULL, NULL)))
 		goto release;
 
 	unp2 = NULL;
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		if (nam != NULL) {
 			if ((error = unp_connect(so, nam, td)) != 0)
 				goto out;
 		} else {
 			error = ENOTCONN;
 			goto out;
 		}
 	}
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) == NULL) {
 		UNP_PCB_UNLOCK(unp);
 		error = ENOTCONN;
 		goto out;
 	} else if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		unp_pcb_unlock_pair(unp, unp2);
 		error = EPIPE;
 		goto out;
 	}
 	UNP_PCB_UNLOCK(unp);
 	if ((so2 = unp2->unp_socket) == NULL) {
 		UNP_PCB_UNLOCK(unp2);
 		error = ENOTCONN;
 		goto out;
 	}
 	SOCKBUF_LOCK(&so2->so_rcv);
 	if (unp2->unp_flags & UNP_WANTCRED_MASK) {
 		/*
 		 * Credentials are passed only once on SOCK_STREAM and
 		 * SOCK_SEQPACKET (LOCAL_CREDS => WANTCRED_ONESHOT), or
 		 * forever (LOCAL_CREDS_PERSISTENT => WANTCRED_ALWAYS).
 		 */
 		control = unp_addsockcred(td, control, unp2->unp_flags, NULL,
 		    NULL, NULL);
 		unp2->unp_flags &= ~UNP_WANTCRED_ONESHOT;
 	}
 
 	/*
 	 * Send to paired receive port and wake up readers.  Don't
 	 * check for space available in the receive buffer if we're
 	 * attaching ancillary data; Unix domain sockets only check
 	 * for space in the sending sockbuf, and that check is
 	 * performed one level up the stack.  At that level we cannot
 	 * precisely account for the amount of buffer space used
 	 * (e.g., because control messages are not yet internalized).
 	 */
 	switch (so->so_type) {
 	case SOCK_STREAM:
 		if (control != NULL) {
 			sbappendcontrol_locked(&so2->so_rcv, m,
 			    control, flags);
 			control = NULL;
 		} else
 			sbappend_locked(&so2->so_rcv, m, flags);
 		break;
 
 	case SOCK_SEQPACKET:
 		if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 		    &sun_noname, m, control))
 			control = NULL;
 		break;
 	}
 
 	mbcnt = so2->so_rcv.sb_mbcnt;
 	sbcc = sbavail(&so2->so_rcv);
 	if (sbcc)
 		sorwakeup_locked(so2);
 	else
 		SOCKBUF_UNLOCK(&so2->so_rcv);
 
 	/*
 	 * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 	 * it would be possible for uipc_rcvd to be called at this
 	 * point, drain the receiving sockbuf, clear SB_STOP, and then
 	 * we would set SB_STOP below.  That could lead to an empty
 	 * sockbuf having SB_STOP set
 	 */
 	SOCKBUF_LOCK(&so->so_snd);
 	if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 		so->so_snd.sb_flags |= SB_STOP;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	UNP_PCB_UNLOCK(unp2);
 	m = NULL;
 out:
 	/*
 	 * PRUS_EOF is equivalent to pr_send followed by pr_shutdown.
 	 */
 	if (flags & PRUS_EOF) {
 		UNP_PCB_LOCK(unp);
 		socantsendmore(so);
 		unp_shutdown(unp);
 		UNP_PCB_UNLOCK(unp);
 	}
 	if (control != NULL && error != 0)
 		unp_scan(control, unp_freerights);
 
 release:
 	if (control != NULL)
 		m_freem(control);
 	/*
 	 * In case of PRUS_NOTREADY, uipc_ready() is responsible
 	 * for freeing memory.
 	 */   
 	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
 		m_freem(m);
 	return (error);
 }
 
 /* PF_UNIX/SOCK_DGRAM version of sbspace() */
 static inline bool
 uipc_dgram_sbspace(struct sockbuf *sb, u_int cc, u_int mbcnt)
 {
 	u_int bleft, mleft;
 
 	/*
 	 * Negative space may happen if send(2) is followed by
 	 * setsockopt(SO_SNDBUF/SO_RCVBUF) that shrinks maximum.
 	 */
 	if (__predict_false(sb->sb_hiwat < sb->uxdg_cc ||
 	    sb->sb_mbmax < sb->uxdg_mbcnt))
 		return (false);
 
 	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE))
 		return (false);
 
 	bleft = sb->sb_hiwat - sb->uxdg_cc;
 	mleft = sb->sb_mbmax - sb->uxdg_mbcnt;
 
 	return (bleft >= cc && mleft >= mbcnt);
 }
 
 /*
  * PF_UNIX/SOCK_DGRAM send
  *
  * Allocate a record consisting of 3 mbufs in the sequence of
  * from -> control -> data and append it to the socket buffer.
  *
  * The first mbuf carries sender's name and is a pkthdr that stores
  * overall length of datagram, its memory consumption and control length.
  */
 #define	ctllen	PH_loc.thirtytwo[1]
 _Static_assert(offsetof(struct pkthdr, memlen) + sizeof(u_int) <=
     offsetof(struct pkthdr, ctllen), "unix/dgram can not store ctllen");
 static int
 uipc_sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *m, struct mbuf *c, int flags, struct thread *td)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *from;
 	struct socket *so2;
 	struct sockbuf *sb;
 	struct mbuf *f, *clast;
 	u_int cc, ctl, mbcnt;
 	u_int dcc __diagused, dctl __diagused, dmbcnt __diagused;
 	int error;
 
 	MPASS((uio != NULL && m == NULL) || (m != NULL && uio == NULL));
 
 	error = 0;
 	f = NULL;
 	ctl = 0;
 
 	if (__predict_false(flags & MSG_OOB)) {
 		error = EOPNOTSUPP;
 		goto out;
 	}
 	if (m == NULL) {
 		if (__predict_false(uio->uio_resid > unpdg_maxdgram)) {
 			error = EMSGSIZE;
 			goto out;
 		}
 		m = m_uiotombuf(uio, M_WAITOK, 0, max_hdr, M_PKTHDR);
 		if (__predict_false(m == NULL)) {
 			error = EFAULT;
 			goto out;
 		}
 		f = m_gethdr(M_WAITOK, MT_SONAME);
 		cc = m->m_pkthdr.len;
 		mbcnt = MSIZE + m->m_pkthdr.memlen;
 		if (c != NULL &&
 		    (error = unp_internalize(&c, td, &clast, &ctl, &mbcnt)))
 			goto out;
 	} else {
 		/* pr_sosend() with mbuf usually is a kernel thread. */
 
 		M_ASSERTPKTHDR(m);
 		if (__predict_false(c != NULL))
 			panic("%s: control from a kernel thread", __func__);
 
 		if (__predict_false(m->m_pkthdr.len > unpdg_maxdgram)) {
 			error = EMSGSIZE;
 			goto out;
 		}
 		if ((f = m_gethdr(M_NOWAIT, MT_SONAME)) == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 		/* Condition the foreign mbuf to our standards. */
 		m_clrprotoflags(m);
 		m_tag_delete_chain(m, NULL);
 		m->m_pkthdr.rcvif = NULL;
 		m->m_pkthdr.flowid = 0;
 		m->m_pkthdr.csum_flags = 0;
 		m->m_pkthdr.fibnum = 0;
 		m->m_pkthdr.rsstype = 0;
 
 		cc = m->m_pkthdr.len;
 		mbcnt = MSIZE;
 		for (struct mbuf *mb = m; mb != NULL; mb = mb->m_next) {
 			mbcnt += MSIZE;
 			if (mb->m_flags & M_EXT)
 				mbcnt += mb->m_ext.ext_size;
 		}
 	}
 
 	unp = sotounpcb(so);
 	MPASS(unp);
 
 	/*
 	 * XXXGL: would be cool to fully remove so_snd out of the equation
 	 * and avoid this lock, which is not only extraneous, but also being
 	 * released, thus still leaving possibility for a race.  We can easily
 	 * handle SBS_CANTSENDMORE/SS_ISCONNECTED complement in unpcb, but it
 	 * is more difficult to invent something to handle so_error.
 	 */
 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
 	if (error)
 		goto out2;
 	SOCK_SENDBUF_LOCK(so);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCK_SENDBUF_UNLOCK(so);
 		error = EPIPE;
 		goto out3;
 	}
 	if (so->so_error != 0) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCK_SENDBUF_UNLOCK(so);
 		goto out3;
 	}
 	if (((so->so_state & SS_ISCONNECTED) == 0) && addr == NULL) {
 		SOCK_SENDBUF_UNLOCK(so);
 		error = EDESTADDRREQ;
 		goto out3;
 	}
 	SOCK_SENDBUF_UNLOCK(so);
 
 	if (addr != NULL) {
 		if ((error = unp_connectat(AT_FDCWD, so, addr, td, true)))
 			goto out3;
 		UNP_PCB_LOCK_ASSERT(unp);
 		unp2 = unp->unp_conn;
 		UNP_PCB_LOCK_ASSERT(unp2);
 	} else {
 		UNP_PCB_LOCK(unp);
 		unp2 = unp_pcb_lock_peer(unp);
 		if (unp2 == NULL) {
 			UNP_PCB_UNLOCK(unp);
 			error = ENOTCONN;
 			goto out3;
 		}
 	}
 
 	if (unp2->unp_flags & UNP_WANTCRED_MASK)
 		c = unp_addsockcred(td, c, unp2->unp_flags, &clast, &ctl,
 		    &mbcnt);
 	if (unp->unp_addr != NULL)
 		from = (struct sockaddr *)unp->unp_addr;
 	else
 		from = &sun_noname;
 	f->m_len = from->sa_len;
 	MPASS(from->sa_len <= MLEN);
 	bcopy(from, mtod(f, void *), from->sa_len);
 	ctl += f->m_len;
 
 	/*
 	 * Concatenate mbufs: from -> control -> data.
 	 * Save overall cc and mbcnt in "from" mbuf.
 	 */
 	if (c != NULL) {
 #ifdef INVARIANTS
 		struct mbuf *mc;
 
 		for (mc = c; mc->m_next != NULL; mc = mc->m_next);
 		MPASS(mc == clast);
 #endif
 		f->m_next = c;
 		clast->m_next = m;
 		c = NULL;
 	} else
 		f->m_next = m;
 	m = NULL;
 #ifdef INVARIANTS
 	dcc = dctl = dmbcnt = 0;
 	for (struct mbuf *mb = f; mb != NULL; mb = mb->m_next) {
 		if (mb->m_type == MT_DATA)
 			dcc += mb->m_len;
 		else
 			dctl += mb->m_len;
 		dmbcnt += MSIZE;
 		if (mb->m_flags & M_EXT)
 			dmbcnt += mb->m_ext.ext_size;
 	}
 	MPASS(dcc == cc);
 	MPASS(dctl == ctl);
 	MPASS(dmbcnt == mbcnt);
 #endif
 	f->m_pkthdr.len = cc + ctl;
 	f->m_pkthdr.memlen = mbcnt;
 	f->m_pkthdr.ctllen = ctl;
 
 	/*
 	 * Destination socket buffer selection.
 	 *
 	 * Unconnected sends, when !(so->so_state & SS_ISCONNECTED) and the
 	 * destination address is supplied, create a temporary connection for
 	 * the run time of the function (see call to unp_connectat() above and
 	 * to unp_disconnect() below).  We distinguish them by condition of
 	 * (addr != NULL).  We intentionally avoid adding 'bool connected' for
 	 * that condition, since, again, through the run time of this code we
 	 * are always connected.  For such "unconnected" sends, the destination
 	 * buffer would be the receive buffer of destination socket so2.
 	 *
 	 * For connected sends, data lands on the send buffer of the sender's
 	 * socket "so".  Then, if we just added the very first datagram
 	 * on this send buffer, we need to add the send buffer on to the
 	 * receiving socket's buffer list.  We put ourselves on top of the
 	 * list.  Such logic gives infrequent senders priority over frequent
 	 * senders.
 	 *
 	 * Note on byte count management. As long as event methods kevent(2),
 	 * select(2) are not protocol specific (yet), we need to maintain
 	 * meaningful values on the receive buffer.  So, the receive buffer
 	 * would accumulate counters from all connected buffers potentially
 	 * having sb_ccc > sb_hiwat or sb_mbcnt > sb_mbmax.
 	 */
 	so2 = unp2->unp_socket;
 	sb = (addr == NULL) ? &so->so_snd : &so2->so_rcv;
 	SOCK_RECVBUF_LOCK(so2);
 	if (uipc_dgram_sbspace(sb, cc + ctl, mbcnt)) {
 		if (addr == NULL && STAILQ_EMPTY(&sb->uxdg_mb))
 			TAILQ_INSERT_HEAD(&so2->so_rcv.uxdg_conns, &so->so_snd,
 			    uxdg_clist);
 		STAILQ_INSERT_TAIL(&sb->uxdg_mb, f, m_stailqpkt);
 		sb->uxdg_cc += cc + ctl;
 		sb->uxdg_ctl += ctl;
 		sb->uxdg_mbcnt += mbcnt;
 		so2->so_rcv.sb_acc += cc + ctl;
 		so2->so_rcv.sb_ccc += cc + ctl;
 		so2->so_rcv.sb_ctl += ctl;
 		so2->so_rcv.sb_mbcnt += mbcnt;
 		sorwakeup_locked(so2);
 		f = NULL;
 	} else {
 		soroverflow_locked(so2);
 		error = (so->so_state & SS_NBIO) ? EAGAIN : ENOBUFS;
 	}
 
 	if (addr != NULL)
 		unp_disconnect(unp, unp2);
 	else
 		unp_pcb_unlock_pair(unp, unp2);
 
 	td->td_ru.ru_msgsnd++;
 
 out3:
 	SOCK_IO_SEND_UNLOCK(so);
 out2:
 	if (c)
 		unp_scan(c, unp_freerights);
 out:
 	if (f)
 		m_freem(f);
 	if (c)
 		m_freem(c);
 	if (m)
 		m_freem(m);
 
 	return (error);
 }
 
 /*
  * PF_UNIX/SOCK_DGRAM receive with MSG_PEEK.
  * The mbuf has already been unlinked from the uxdg_mb of socket buffer
  * and needs to be linked onto uxdg_peeked of receive socket buffer.
  */
 static int
 uipc_peek_dgram(struct socket *so, struct mbuf *m, struct sockaddr **psa,
     struct uio *uio, struct mbuf **controlp, int *flagsp)
 {
 	ssize_t len = 0;
 	int error;
 
 	so->so_rcv.uxdg_peeked = m;
 	so->so_rcv.uxdg_cc += m->m_pkthdr.len;
 	so->so_rcv.uxdg_ctl += m->m_pkthdr.ctllen;
 	so->so_rcv.uxdg_mbcnt += m->m_pkthdr.memlen;
 	SOCK_RECVBUF_UNLOCK(so);
 
 	KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type));
 	if (psa != NULL)
 		*psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK);
 
 	m = m->m_next;
 	KASSERT(m, ("%s: no data or control after soname", __func__));
 
 	/*
 	 * With MSG_PEEK the control isn't executed, just copied.
 	 */
 	while (m != NULL && m->m_type == MT_CONTROL) {
 		if (controlp != NULL) {
 			*controlp = m_copym(m, 0, m->m_len, M_WAITOK);
 			controlp = &(*controlp)->m_next;
 		}
 		m = m->m_next;
 	}
 	KASSERT(m == NULL || m->m_type == MT_DATA,
 	    ("%s: not MT_DATA mbuf %p", __func__, m));
 	while (m != NULL && uio->uio_resid > 0) {
 		len = uio->uio_resid;
 		if (len > m->m_len)
 			len = m->m_len;
 		error = uiomove(mtod(m, char *), (int)len, uio);
 		if (error) {
 			SOCK_IO_RECV_UNLOCK(so);
 			return (error);
 		}
 		if (len == m->m_len)
 			m = m->m_next;
 	}
 	SOCK_IO_RECV_UNLOCK(so);
 
 	if (flagsp != NULL) {
 		if (m != NULL) {
 			if (*flagsp & MSG_TRUNC) {
 				/* Report real length of the packet */
 				uio->uio_resid -= m_length(m, NULL) - len;
 			}
 			*flagsp |= MSG_TRUNC;
 		} else
 			*flagsp &= ~MSG_TRUNC;
 	}
 
 	return (0);
 }
 
 /*
  * PF_UNIX/SOCK_DGRAM receive
  */
 static int
 uipc_soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct sockbuf *sb = NULL;
 	struct mbuf *m;
 	int flags, error;
 	ssize_t len = 0;
 	bool nonblock;
 
 	MPASS(mp0 == NULL);
 
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 
 	flags = flagsp != NULL ? *flagsp : 0;
 	nonblock = (so->so_state & SS_NBIO) ||
 	    (flags & (MSG_DONTWAIT | MSG_NBIO));
 
 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
 	if (__predict_false(error))
 		return (error);
 
 	/*
 	 * Loop blocking while waiting for a datagram.  Prioritize connected
 	 * peers over unconnected sends.  Set sb to selected socket buffer
 	 * containing an mbuf on exit from the wait loop.  A datagram that
 	 * had already been peeked at has top priority.
 	 */
 	SOCK_RECVBUF_LOCK(so);
 	while ((m = so->so_rcv.uxdg_peeked) == NULL &&
 	    (sb = TAILQ_FIRST(&so->so_rcv.uxdg_conns)) == NULL &&
 	    (m = STAILQ_FIRST(&so->so_rcv.uxdg_mb)) == NULL) {
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCK_RECVBUF_UNLOCK(so);
 			SOCK_IO_RECV_UNLOCK(so);
 			return (error);
 		}
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
 		    uio->uio_resid == 0) {
 			SOCK_RECVBUF_UNLOCK(so);
 			SOCK_IO_RECV_UNLOCK(so);
 			return (0);
 		}
 		if (nonblock) {
 			SOCK_RECVBUF_UNLOCK(so);
 			SOCK_IO_RECV_UNLOCK(so);
 			return (EWOULDBLOCK);
 		}
 		error = sbwait(so, SO_RCV);
 		if (error) {
 			SOCK_RECVBUF_UNLOCK(so);
 			SOCK_IO_RECV_UNLOCK(so);
 			return (error);
 		}
 	}
 
 	if (sb == NULL)
 		sb = &so->so_rcv;
 	else if (m == NULL)
 		m = STAILQ_FIRST(&sb->uxdg_mb);
 	else
 		MPASS(m == so->so_rcv.uxdg_peeked);
 
 	MPASS(sb->uxdg_cc > 0);
 	M_ASSERTPKTHDR(m);
 	KASSERT(m->m_type == MT_SONAME, ("m->m_type == %d", m->m_type));
 
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	if (__predict_true(m != so->so_rcv.uxdg_peeked)) {
 		STAILQ_REMOVE_HEAD(&sb->uxdg_mb, m_stailqpkt);
 		if (STAILQ_EMPTY(&sb->uxdg_mb) && sb != &so->so_rcv)
 			TAILQ_REMOVE(&so->so_rcv.uxdg_conns, sb, uxdg_clist);
 	} else
 		so->so_rcv.uxdg_peeked = NULL;
 
 	sb->uxdg_cc -= m->m_pkthdr.len;
 	sb->uxdg_ctl -= m->m_pkthdr.ctllen;
 	sb->uxdg_mbcnt -= m->m_pkthdr.memlen;
 
 	if (__predict_false(flags & MSG_PEEK))
 		return (uipc_peek_dgram(so, m, psa, uio, controlp, flagsp));
 
 	so->so_rcv.sb_acc -= m->m_pkthdr.len;
 	so->so_rcv.sb_ccc -= m->m_pkthdr.len;
 	so->so_rcv.sb_ctl -= m->m_pkthdr.ctllen;
 	so->so_rcv.sb_mbcnt -= m->m_pkthdr.memlen;
 	SOCK_RECVBUF_UNLOCK(so);
 
 	if (psa != NULL)
 		*psa = sodupsockaddr(mtod(m, struct sockaddr *), M_WAITOK);
 	m = m_free(m);
 	KASSERT(m, ("%s: no data or control after soname", __func__));
 
 	/*
 	 * Packet to copyout() is now in 'm' and it is disconnected from the
 	 * queue.
 	 *
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  We call into the
 	 * unp_externalize() to perform externalization (or freeing if
 	 * controlp == NULL). In some cases there can be only MT_CONTROL mbufs
 	 * without MT_DATA mbufs.
 	 */
 	while (m != NULL && m->m_type == MT_CONTROL) {
 		struct mbuf *cm;
 
 		/* XXXGL: unp_externalize() is also dom_externalize() KBI and
 		 * it frees whole chain, so we must disconnect the mbuf.
 		 */
 		cm = m; m = m->m_next; cm->m_next = NULL;
 		error = unp_externalize(cm, controlp, flags);
 		if (error != 0) {
 			SOCK_IO_RECV_UNLOCK(so);
 			unp_scan(m, unp_freerights);
 			m_freem(m);
 			return (error);
 		}
 		if (controlp != NULL) {
 			while (*controlp != NULL)
 				controlp = &(*controlp)->m_next;
 		}
 	}
 	KASSERT(m == NULL || m->m_type == MT_DATA,
 	    ("%s: not MT_DATA mbuf %p", __func__, m));
 	while (m != NULL && uio->uio_resid > 0) {
 		len = uio->uio_resid;
 		if (len > m->m_len)
 			len = m->m_len;
 		error = uiomove(mtod(m, char *), (int)len, uio);
 		if (error) {
 			SOCK_IO_RECV_UNLOCK(so);
 			m_freem(m);
 			return (error);
 		}
 		if (len == m->m_len)
 			m = m_free(m);
 		else {
 			m->m_data += len;
 			m->m_len -= len;
 		}
 	}
 	SOCK_IO_RECV_UNLOCK(so);
 
 	if (m != NULL) {
 		if (flagsp != NULL) {
 			if (flags & MSG_TRUNC) {
 				/* Report real length of the packet */
 				uio->uio_resid -= m_length(m, NULL);
 			}
 			*flagsp |= MSG_TRUNC;
 		}
 		m_freem(m);
 	} else if (flagsp != NULL)
 		*flagsp &= ~MSG_TRUNC;
 
 	return (0);
 }
 
 static bool
 uipc_ready_scan(struct socket *so, struct mbuf *m, int count, int *errorp)
 {
 	struct mbuf *mb, *n;
 	struct sockbuf *sb;
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		SOCK_UNLOCK(so);
 		return (false);
 	}
 	mb = NULL;
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	if (sb->sb_fnrdy != NULL) {
 		for (mb = sb->sb_mb, n = mb->m_nextpkt; mb != NULL;) {
 			if (mb == m) {
 				*errorp = sbready(sb, m, count);
 				break;
 			}
 			mb = mb->m_next;
 			if (mb == NULL) {
 				mb = n;
 				if (mb != NULL)
 					n = mb->m_nextpkt;
 			}
 		}
 	}
 	SOCKBUF_UNLOCK(sb);
 	SOCK_UNLOCK(so);
 	return (mb != NULL);
 }
 
 static int
 uipc_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	int error, i;
 
 	unp = sotounpcb(so);
 
 	KASSERT(so->so_type == SOCK_STREAM,
 	    ("%s: unexpected socket type for %p", __func__, so));
 
 	UNP_PCB_LOCK(unp);
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 		UNP_PCB_UNLOCK(unp2);
 		return (error);
 	}
 	UNP_PCB_UNLOCK(unp);
 
 	/*
 	 * The receiving socket has been disconnected, but may still be valid.
 	 * In this case, the now-ready mbufs are still present in its socket
 	 * buffer, so perform an exhaustive search before giving up and freeing
 	 * the mbufs.
 	 */
 	UNP_LINK_RLOCK();
 	LIST_FOREACH(unp, &unp_shead, unp_link) {
 		if (uipc_ready_scan(unp->unp_socket, m, count, &error))
 			break;
 	}
 	UNP_LINK_RUNLOCK();
 
 	if (unp == NULL) {
 		for (i = 0; i < count; i++)
 			m = m_free(m);
 		error = ECONNRESET;
 	}
 	return (error);
 }
 
 static int
 uipc_sense(struct socket *so, struct stat *sb)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	sb->st_dev = NODEV;
 	sb->st_ino = unp->unp_ino;
 	return (0);
 }
 
 static int
 uipc_shutdown(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	socantsendmore(so);
 	unp_shutdown(unp);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_addr != NULL)
 		sa = (struct sockaddr *) unp->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct unpcb *unp;
 	struct xucred xu;
 	int error, optval;
 
 	if (sopt->sopt_level != SOL_LOCAL)
 		return (EINVAL);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case LOCAL_PEERCRED:
 			UNP_PCB_LOCK(unp);
 			if (unp->unp_flags & UNP_HAVEPC)
 				xu = unp->unp_peercred;
 			else {
 				if (so->so_type == SOCK_STREAM)
 					error = ENOTCONN;
 				else
 					error = EINVAL;
 			}
 			UNP_PCB_UNLOCK(unp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &xu, sizeof(xu));
 			break;
 
 		case LOCAL_CREDS:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED_ONESHOT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CREDS_PERSISTENT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED_ALWAYS ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CONNWAIT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EOPNOTSUPP;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case LOCAL_CREDS:
 		case LOCAL_CREDS_PERSISTENT:
 		case LOCAL_CONNWAIT:
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 
 #define	OPTSET(bit, exclusive) do {					\
 	UNP_PCB_LOCK(unp);						\
 	if (optval) {							\
 		if ((unp->unp_flags & (exclusive)) != 0) {		\
 			UNP_PCB_UNLOCK(unp);				\
 			error = EINVAL;					\
 			break;						\
 		}							\
 		unp->unp_flags |= (bit);				\
 	} else								\
 		unp->unp_flags &= ~(bit);				\
 	UNP_PCB_UNLOCK(unp);						\
 } while (0)
 
 			switch (sopt->sopt_name) {
 			case LOCAL_CREDS:
 				OPTSET(UNP_WANTCRED_ONESHOT, UNP_WANTCRED_ALWAYS);
 				break;
 
 			case LOCAL_CREDS_PERSISTENT:
 				OPTSET(UNP_WANTCRED_ALWAYS, UNP_WANTCRED_ONESHOT);
 				break;
 
 			case LOCAL_CONNWAIT:
 				OPTSET(UNP_CONNWAIT, 0);
 				break;
 
 			default:
 				break;
 			}
 			break;
 #undef	OPTSET
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static int
 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (unp_connectat(AT_FDCWD, so, nam, td, false));
 }
 
 static int
 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td, bool return_locked)
 {
 	struct mtx *vplock;
 	struct sockaddr_un *soun;
 	struct vnode *vp;
 	struct socket *so2;
 	struct unpcb *unp, *unp2, *unp3;
 	struct nameidata nd;
 	char buf[SOCK_MAXADDRLEN];
 	struct sockaddr *sa;
 	cap_rights_t rights;
 	int error, len;
 	bool connreq;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 	if (nam->sa_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 	if (len <= 0)
 		return (EINVAL);
 	soun = (struct sockaddr_un *)nam;
 	bcopy(soun->sun_path, buf, len);
 	buf[len] = 0;
 
 	error = 0;
 	unp = sotounpcb(so);
 	UNP_PCB_LOCK(unp);
 	for (;;) {
 		/*
 		 * Wait for connection state to stabilize.  If a connection
 		 * already exists, give up.  For datagram sockets, which permit
 		 * multiple consecutive connect(2) calls, upper layers are
 		 * responsible for disconnecting in advance of a subsequent
 		 * connect(2), but this is not synchronized with PCB connection
 		 * state.
 		 *
 		 * Also make sure that no threads are currently attempting to
 		 * lock the peer socket, to ensure that unp_conn cannot
 		 * transition between two valid sockets while locks are dropped.
 		 */
 		if (SOLISTENING(so))
 			error = EOPNOTSUPP;
 		else if (unp->unp_conn != NULL)
 			error = EISCONN;
 		else if ((unp->unp_flags & UNP_CONNECTING) != 0) {
 			error = EALREADY;
 		}
 		if (error != 0) {
 			UNP_PCB_UNLOCK(unp);
 			return (error);
 		}
 		if (unp->unp_pairbusy > 0) {
 			unp->unp_flags |= UNP_WAITING;
 			mtx_sleep(unp, UNP_PCB_LOCKPTR(unp), 0, "unpeer", 0);
 			continue;
 		}
 		break;
 	}
 	unp->unp_flags |= UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 
 	connreq = (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0;
 	if (connreq)
 		sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	else
 		sa = NULL;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init_one(&rights, CAP_CONNECTAT));
 	error = namei(&nd);
 	if (error)
 		vp = NULL;
 	else
 		vp = nd.ni_vp;
 	ASSERT_VOP_LOCKED(vp, "unp_connect");
-	NDFREE_NOTHING(&nd);
 	if (error)
 		goto bad;
+	NDFREE_PNBUF(&nd);
 
 	if (vp->v_type != VSOCK) {
 		error = ENOTSOCK;
 		goto bad;
 	}
 #ifdef MAC
 	error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 	if (error)
 		goto bad;
 #endif
 	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 	if (error)
 		goto bad;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 
 	vplock = mtx_pool_find(mtxpool_sleep, vp);
 	mtx_lock(vplock);
 	VOP_UNP_CONNECT(vp, &unp2);
 	if (unp2 == NULL) {
 		error = ECONNREFUSED;
 		goto bad2;
 	}
 	so2 = unp2->unp_socket;
 	if (so->so_type != so2->so_type) {
 		error = EPROTOTYPE;
 		goto bad2;
 	}
 	if (connreq) {
 		if (SOLISTENING(so2)) {
 			CURVNET_SET(so2->so_vnet);
 			so2 = sonewconn(so2, 0);
 			CURVNET_RESTORE();
 		} else
 			so2 = NULL;
 		if (so2 == NULL) {
 			error = ECONNREFUSED;
 			goto bad2;
 		}
 		unp3 = sotounpcb(so2);
 		unp_pcb_lock_pair(unp2, unp3);
 		if (unp2->unp_addr != NULL) {
 			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 			unp3->unp_addr = (struct sockaddr_un *) sa;
 			sa = NULL;
 		}
 
 		unp_copy_peercred(td, unp3, unp, unp2);
 
 		UNP_PCB_UNLOCK(unp2);
 		unp2 = unp3;
 
 		/*
 		 * It is safe to block on the PCB lock here since unp2 is
 		 * nascent and cannot be connected to any other sockets.
 		 */
 		UNP_PCB_LOCK(unp);
 #ifdef MAC
 		mac_socketpeer_set_from_socket(so, so2);
 		mac_socketpeer_set_from_socket(so2, so);
 #endif
 	} else {
 		unp_pcb_lock_pair(unp, unp2);
 	}
 	KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
 	    sotounpcb(so2) == unp2,
 	    ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 	unp_connect2(so, so2, PRU_CONNECT);
 	KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 	    ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 	unp->unp_flags &= ~UNP_CONNECTING;
 	if (!return_locked)
 		unp_pcb_unlock_pair(unp, unp2);
 bad2:
 	mtx_unlock(vplock);
 bad:
 	if (vp != NULL) {
 		/*
 		 * If we are returning locked (called via uipc_sosend_dgram()),
 		 * we need to be sure that vput() won't sleep.  This is
 		 * guaranteed by VOP_UNP_CONNECT() call above and unp2 lock.
 		 * SOCK_STREAM/SEQPACKET can't request return_locked (yet).
 		 */
 		MPASS(!(return_locked && connreq));
 		vput(vp);
 	}
 	free(sa, M_SONAME);
 	if (__predict_false(error)) {
 		UNP_PCB_LOCK(unp);
 		KASSERT((unp->unp_flags & UNP_CONNECTING) != 0,
 		    ("%s: unp %p has UNP_CONNECTING clear", __func__, unp));
 		unp->unp_flags &= ~UNP_CONNECTING;
 		UNP_PCB_UNLOCK(unp);
 	}
 	return (error);
 }
 
 /*
  * Set socket peer credentials at connection time.
  *
  * The client's PCB credentials are copied from its process structure.  The
  * server's PCB credentials are copied from the socket on which it called
  * listen(2).  uipc_listen cached that process's credentials at the time.
  */
 void
 unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
     struct unpcb *server_unp, struct unpcb *listen_unp)
 {
 	cru2xt(td, &client_unp->unp_peercred);
 	client_unp->unp_flags |= UNP_HAVEPC;
 
 	memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
 	    sizeof(server_unp->unp_peercred));
 	server_unp->unp_flags |= UNP_HAVEPC;
 	client_unp->unp_flags |= (listen_unp->unp_flags & UNP_WANTCRED_MASK);
 }
 
 static void
 unp_connect2(struct socket *so, struct socket *so2, conn2_how req)
 {
 	struct unpcb *unp;
 	struct unpcb *unp2;
 
 	MPASS(so2->so_type == so->so_type);
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 	unp2 = sotounpcb(so2);
 	KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 	KASSERT(unp->unp_conn == NULL,
 	    ("%s: socket %p is already connected", __func__, unp));
 
 	unp->unp_conn = unp2;
 	unp_pcb_hold(unp2);
 	unp_pcb_hold(unp);
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		UNP_REF_LIST_LOCK();
 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 		UNP_REF_LIST_UNLOCK();
 		soisconnected(so);
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		KASSERT(unp2->unp_conn == NULL,
 		    ("%s: socket %p is already connected", __func__, unp2));
 		unp2->unp_conn = unp;
 		if (req == PRU_CONNECT &&
 		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 			soisconnecting(so);
 		else
 			soisconnected(so);
 		soisconnected(so2);
 		break;
 
 	default:
 		panic("unp_connect2");
 	}
 }
 
 static void
 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 {
 	struct socket *so, *so2;
 	struct mbuf *m = NULL;
 #ifdef INVARIANTS
 	struct unpcb *unptmp;
 #endif
 
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 	KASSERT(unp->unp_conn == unp2,
 	    ("%s: unpcb %p is not connected to %p", __func__, unp, unp2));
 
 	unp->unp_conn = NULL;
 	so = unp->unp_socket;
 	so2 = unp2->unp_socket;
 	switch (unp->unp_socket->so_type) {
 	case SOCK_DGRAM:
 		/*
 		 * Remove our send socket buffer from the peer's receive buffer.
 		 * Move the data to the receive buffer only if it is empty.
 		 * This is a protection against a scenario where a peer
 		 * connects, floods and disconnects, effectively blocking
 		 * sendto() from unconnected sockets.
 		 */
 		SOCK_RECVBUF_LOCK(so2);
 		if (!STAILQ_EMPTY(&so->so_snd.uxdg_mb)) {
 			TAILQ_REMOVE(&so2->so_rcv.uxdg_conns, &so->so_snd,
 			    uxdg_clist);
 			if (__predict_true((so2->so_rcv.sb_state &
 			    SBS_CANTRCVMORE) == 0) &&
 			    STAILQ_EMPTY(&so2->so_rcv.uxdg_mb)) {
 				STAILQ_CONCAT(&so2->so_rcv.uxdg_mb,
 				    &so->so_snd.uxdg_mb);
 				so2->so_rcv.uxdg_cc += so->so_snd.uxdg_cc;
 				so2->so_rcv.uxdg_ctl += so->so_snd.uxdg_ctl;
 				so2->so_rcv.uxdg_mbcnt += so->so_snd.uxdg_mbcnt;
 			} else {
 				m = STAILQ_FIRST(&so->so_snd.uxdg_mb);
 				STAILQ_INIT(&so->so_snd.uxdg_mb);
 				so2->so_rcv.sb_acc -= so->so_snd.uxdg_cc;
 				so2->so_rcv.sb_ccc -= so->so_snd.uxdg_cc;
 				so2->so_rcv.sb_ctl -= so->so_snd.uxdg_ctl;
 				so2->so_rcv.sb_mbcnt -= so->so_snd.uxdg_mbcnt;
 			}
 			/* Note: so may reconnect. */
 			so->so_snd.uxdg_cc = 0;
 			so->so_snd.uxdg_ctl = 0;
 			so->so_snd.uxdg_mbcnt = 0;
 		}
 		SOCK_RECVBUF_UNLOCK(so2);
 		UNP_REF_LIST_LOCK();
 #ifdef INVARIANTS
 		LIST_FOREACH(unptmp, &unp2->unp_refs, unp_reflink) {
 			if (unptmp == unp)
 				break;
 		}
 		KASSERT(unptmp != NULL,
 		    ("%s: %p not found in reflist of %p", __func__, unp, unp2));
 #endif
 		LIST_REMOVE(unp, unp_reflink);
 		UNP_REF_LIST_UNLOCK();
 		if (so) {
 			SOCK_LOCK(so);
 			so->so_state &= ~SS_ISCONNECTED;
 			SOCK_UNLOCK(so);
 		}
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		if (so)
 			soisdisconnected(so);
 		MPASS(unp2->unp_conn == unp);
 		unp2->unp_conn = NULL;
 		if (so2)
 			soisdisconnected(so2);
 		break;
 	}
 
 	if (unp == unp2) {
 		unp_pcb_rele_notlast(unp);
 		if (!unp_pcb_rele(unp))
 			UNP_PCB_UNLOCK(unp);
 	} else {
 		if (!unp_pcb_rele(unp))
 			UNP_PCB_UNLOCK(unp);
 		if (!unp_pcb_rele(unp2))
 			UNP_PCB_UNLOCK(unp2);
 	}
 
 	if (m != NULL) {
 		unp_scan(m, unp_freerights);
 		m_freem(m);
 	}
 }
 
 /*
  * unp_pcblist() walks the global list of struct unpcb's to generate a
  * pointer list, bumping the refcount on each unpcb.  It then copies them out
  * sequentially, validating the generation number on each to see if it has
  * been detached.  All of this is necessary because copyout() may sleep on
  * disk I/O.
  */
 static int
 unp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	struct unpcb *unp, **unp_list;
 	unp_gen_t gencnt;
 	struct xunpgen *xug;
 	struct unp_head *head;
 	struct xunpcb *xu;
 	u_int i;
 	int error, n;
 
 	switch ((intptr_t)arg1) {
 	case SOCK_STREAM:
 		head = &unp_shead;
 		break;
 
 	case SOCK_DGRAM:
 		head = &unp_dhead;
 		break;
 
 	case SOCK_SEQPACKET:
 		head = &unp_sphead;
 		break;
 
 	default:
 		panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 	}
 
 	/*
 	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = unp_count;
 		req->oldidx = 2 * (sizeof *xug)
 			+ (n + n/8) * sizeof(struct xunpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK | M_ZERO);
 	UNP_LINK_RLOCK();
 	gencnt = unp_gencnt;
 	n = unp_count;
 	UNP_LINK_RUNLOCK();
 
 	xug->xug_len = sizeof *xug;
 	xug->xug_count = n;
 	xug->xug_gen = gencnt;
 	xug->xug_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, xug, sizeof *xug);
 	if (error) {
 		free(xug, M_TEMP);
 		return (error);
 	}
 
 	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 
 	UNP_LINK_RLOCK();
 	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 	     unp = LIST_NEXT(unp, unp_link)) {
 		UNP_PCB_LOCK(unp);
 		if (unp->unp_gencnt <= gencnt) {
 			if (cr_cansee(req->td->td_ucred,
 			    unp->unp_socket->so_cred)) {
 				UNP_PCB_UNLOCK(unp);
 				continue;
 			}
 			unp_list[i++] = unp;
 			unp_pcb_hold(unp);
 		}
 		UNP_PCB_UNLOCK(unp);
 	}
 	UNP_LINK_RUNLOCK();
 	n = i;			/* In case we lost some during malloc. */
 
 	error = 0;
 	xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 	for (i = 0; i < n; i++) {
 		unp = unp_list[i];
 		UNP_PCB_LOCK(unp);
 		if (unp_pcb_rele(unp))
 			continue;
 
 		if (unp->unp_gencnt <= gencnt) {
 			xu->xu_len = sizeof *xu;
 			xu->xu_unpp = (uintptr_t)unp;
 			/*
 			 * XXX - need more locking here to protect against
 			 * connect/disconnect races for SMP.
 			 */
 			if (unp->unp_addr != NULL)
 				bcopy(unp->unp_addr, &xu->xu_addr,
 				      unp->unp_addr->sun_len);
 			else
 				bzero(&xu->xu_addr, sizeof(xu->xu_addr));
 			if (unp->unp_conn != NULL &&
 			    unp->unp_conn->unp_addr != NULL)
 				bcopy(unp->unp_conn->unp_addr,
 				      &xu->xu_caddr,
 				      unp->unp_conn->unp_addr->sun_len);
 			else
 				bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
 			xu->unp_vnode = (uintptr_t)unp->unp_vnode;
 			xu->unp_conn = (uintptr_t)unp->unp_conn;
 			xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
 			xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
 			xu->unp_gencnt = unp->unp_gencnt;
 			sotoxsocket(unp->unp_socket, &xu->xu_socket);
 			UNP_PCB_UNLOCK(unp);
 			error = SYSCTL_OUT(req, xu, sizeof *xu);
 		} else {
 			UNP_PCB_UNLOCK(unp);
 		}
 	}
 	free(xu, M_TEMP);
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xug->xug_gen = unp_gencnt;
 		xug->xug_sogen = so_gencnt;
 		xug->xug_count = unp_count;
 		error = SYSCTL_OUT(req, xug, sizeof *xug);
 	}
 	free(unp_list, M_TEMP);
 	free(xug, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local datagram sockets");
 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local stream sockets");
 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
     "List of active local seqpacket sockets");
 
 static void
 unp_shutdown(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 	struct socket *so;
 
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	unp2 = unp->unp_conn;
 	if ((unp->unp_socket->so_type == SOCK_STREAM ||
 	    (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 		so = unp2->unp_socket;
 		if (so != NULL)
 			socantrcvmore(so);
 	}
 }
 
 static void
 unp_drop(struct unpcb *unp)
 {
 	struct socket *so;
 	struct unpcb *unp2;
 
 	/*
 	 * Regardless of whether the socket's peer dropped the connection
 	 * with this socket by aborting or disconnecting, POSIX requires
 	 * that ECONNRESET is returned.
 	 */
 
 	UNP_PCB_LOCK(unp);
 	so = unp->unp_socket;
 	if (so)
 		so->so_error = ECONNRESET;
 	if ((unp2 = unp_pcb_lock_peer(unp)) != NULL) {
 		/* Last reference dropped in unp_disconnect(). */
 		unp_pcb_rele_notlast(unp);
 		unp_disconnect(unp, unp2);
 	} else if (!unp_pcb_rele(unp)) {
 		UNP_PCB_UNLOCK(unp);
 	}
 }
 
 static void
 unp_freerights(struct filedescent **fdep, int fdcount)
 {
 	struct file *fp;
 	int i;
 
 	KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		filecaps_free(&fdep[i]->fde_caps);
 		unp_discard(fp);
 	}
 	free(fdep[0], M_FILECAPS);
 }
 
 static int
 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 {
 	struct thread *td = curthread;		/* XXX */
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	int i;
 	int *fdp;
 	struct filedesc *fdesc = td->td_proc->p_fd;
 	struct filedescent **fdep;
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error, newfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	error = 0;
 	if (controlp != NULL) /* controlp == NULL => free control messages */
 		*controlp = NULL;
 	while (cm != NULL) {
 		MPASS(clen >= sizeof(*cm) && clen >= cm->cmsg_len);
 
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 		if (cm->cmsg_level == SOL_SOCKET
 		    && cm->cmsg_type == SCM_RIGHTS) {
 			newfds = datalen / sizeof(*fdep);
 			if (newfds == 0)
 				goto next;
 			fdep = data;
 
 			/* If we're not outputting the descriptors free them. */
 			if (error || controlp == NULL) {
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 			FILEDESC_XLOCK(fdesc);
 
 			/*
 			 * Now change each pointer to an fd in the global
 			 * table to an integer that is the index to the local
 			 * fd table entry that we set up to point to the
 			 * global one we are transferring.
 			 */
 			newlen = newfds * sizeof(int);
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET, M_WAITOK);
 
 			fdp = (int *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			if ((error = fdallocn(td, 0, fdp, newfds))) {
 				FILEDESC_XUNLOCK(fdesc);
 				unp_freerights(fdep, newfds);
 				m_freem(*controlp);
 				*controlp = NULL;
 				goto next;
 			}
 			for (i = 0; i < newfds; i++, fdp++) {
 				_finstall(fdesc, fdep[i]->fde_file, *fdp,
 				    (flags & MSG_CMSG_CLOEXEC) != 0 ? O_CLOEXEC : 0,
 				    &fdep[i]->fde_caps);
 				unp_externalize_fp(fdep[i]->fde_file);
 			}
 
 			/*
 			 * The new type indicates that the mbuf data refers to
 			 * kernel resources that may need to be released before
 			 * the mbuf is freed.
 			 */
 			m_chtype(*controlp, MT_EXTCONTROL);
 			FILEDESC_XUNLOCK(fdesc);
 			free(fdep[0], M_FILECAPS);
 		} else {
 			/* We can just copy anything else across. */
 			if (error || controlp == NULL)
 				goto next;
 			*controlp = sbcreatecontrol(NULL, datalen,
 			    cm->cmsg_type, cm->cmsg_level, M_WAITOK);
 			bcopy(data,
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 			    datalen);
 		}
 		controlp = &(*controlp)->m_next;
 
 next:
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 	m_freem(control);
 	return (error);
 }
 
 static void
 unp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(unp_zone, maxsockets);
 }
 
 #ifdef INVARIANTS
 static void
 unp_zdtor(void *mem, int size __unused, void *arg __unused)
 {
 	struct unpcb *unp;
 
 	unp = mem;
 
 	KASSERT(LIST_EMPTY(&unp->unp_refs),
 	    ("%s: unpcb %p has lingering refs", __func__, unp));
 	KASSERT(unp->unp_socket == NULL,
 	    ("%s: unpcb %p has socket backpointer", __func__, unp));
 	KASSERT(unp->unp_vnode == NULL,
 	    ("%s: unpcb %p has vnode references", __func__, unp));
 	KASSERT(unp->unp_conn == NULL,
 	    ("%s: unpcb %p is still connected", __func__, unp));
 	KASSERT(unp->unp_addr == NULL,
 	    ("%s: unpcb %p has leaked addr", __func__, unp));
 }
 #endif
 
 static void
 unp_init(void *arg __unused)
 {
 	uma_dtor dtor;
 
 #ifdef INVARIANTS
 	dtor = unp_zdtor;
 #else
 	dtor = NULL;
 #endif
 	unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, dtor,
 	    NULL, NULL, UMA_ALIGN_CACHE, 0);
 	uma_zone_set_max(unp_zone, maxsockets);
 	uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	LIST_INIT(&unp_dhead);
 	LIST_INIT(&unp_shead);
 	LIST_INIT(&unp_sphead);
 	SLIST_INIT(&unp_defers);
 	TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 	TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 	UNP_LINK_LOCK_INIT();
 	UNP_DEFERRED_LOCK_INIT();
 }
 SYSINIT(unp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, unp_init, NULL);
 
 static void
 unp_internalize_cleanup_rights(struct mbuf *control)
 {
 	struct cmsghdr *cp;
 	struct mbuf *m;
 	void *data;
 	socklen_t datalen;
 
 	for (m = control; m != NULL; m = m->m_next) {
 		cp = mtod(m, struct cmsghdr *);
 		if (cp->cmsg_level != SOL_SOCKET ||
 		    cp->cmsg_type != SCM_RIGHTS)
 			continue;
 		data = CMSG_DATA(cp);
 		datalen = (caddr_t)cp + cp->cmsg_len - (caddr_t)data;
 		unp_freerights(data, datalen / sizeof(struct filedesc *));
 	}
 }
 
 static int
 unp_internalize(struct mbuf **controlp, struct thread *td,
     struct mbuf **clast, u_int *space, u_int *mbcnt)
 {
 	struct mbuf *control, **initial_controlp;
 	struct proc *p;
 	struct filedesc *fdesc;
 	struct bintime *bt;
 	struct cmsghdr *cm;
 	struct cmsgcred *cmcred;
 	struct filedescent *fde, **fdep, *fdev;
 	struct file *fp;
 	struct timeval *tv;
 	struct timespec *ts;
 	void *data;
 	socklen_t clen, datalen;
 	int i, j, error, *fdp, oldfds;
 	u_int newlen;
 
 	MPASS((*controlp)->m_next == NULL); /* COMPAT_OLDSOCK may violate */
 	UNP_LINK_UNLOCK_ASSERT();
 
 	p = td->td_proc;
 	fdesc = p->p_fd;
 	error = 0;
 	control = *controlp;
 	*controlp = NULL;
 	initial_controlp = controlp;
 	for (clen = control->m_len, cm = mtod(control, struct cmsghdr *),
 	    data = CMSG_DATA(cm);
 
 	    clen >= sizeof(*cm) && cm->cmsg_level == SOL_SOCKET &&
 	    clen >= cm->cmsg_len && cm->cmsg_len >= sizeof(*cm) &&
 	    (char *)cm + cm->cmsg_len >= (char *)data;
 
 	    clen -= min(CMSG_SPACE(datalen), clen),
 	    cm = (struct cmsghdr *) ((char *)cm + CMSG_SPACE(datalen)),
 	    data = CMSG_DATA(cm)) {
 		datalen = (char *)cm + cm->cmsg_len - (char *)data;
 		switch (cm->cmsg_type) {
 		case SCM_CREDS:
 			*controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 			    SCM_CREDS, SOL_SOCKET, M_WAITOK);
 			cmcred = (struct cmsgcred *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			cmcred->cmcred_pid = p->p_pid;
 			cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 			cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 			cmcred->cmcred_euid = td->td_ucred->cr_uid;
 			cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 			    CMGROUP_MAX);
 			for (i = 0; i < cmcred->cmcred_ngroups; i++)
 				cmcred->cmcred_groups[i] =
 				    td->td_ucred->cr_groups[i];
 			break;
 
 		case SCM_RIGHTS:
 			oldfds = datalen / sizeof (int);
 			if (oldfds == 0)
 				continue;
 			/* On some machines sizeof pointer is bigger than
 			 * sizeof int, so we need to check if data fits into
 			 * single mbuf.  We could allocate several mbufs, and
 			 * unp_externalize() should even properly handle that.
 			 * But it is not worth to complicate the code for an
 			 * insane scenario of passing over 200 file descriptors
 			 * at once.
 			 */
 			newlen = oldfds * sizeof(fdep[0]);
 			if (CMSG_SPACE(newlen) > MCLBYTES) {
 				error = EMSGSIZE;
 				goto out;
 			}
 			/*
 			 * Check that all the FDs passed in refer to legal
 			 * files.  If not, reject the entire operation.
 			 */
 			fdp = data;
 			FILEDESC_SLOCK(fdesc);
 			for (i = 0; i < oldfds; i++, fdp++) {
 				fp = fget_noref(fdesc, *fdp);
 				if (fp == NULL) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EOPNOTSUPP;
 					goto out;
 				}
 			}
 
 			/*
 			 * Now replace the integer FDs with pointers to the
 			 * file structure and capability rights.
 			 */
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET, M_WAITOK);
 			fdp = data;
 			for (i = 0; i < oldfds; i++, fdp++) {
 				if (!fhold(fdesc->fd_ofiles[*fdp].fde_file)) {
 					fdp = data;
 					for (j = 0; j < i; j++, fdp++) {
 						fdrop(fdesc->fd_ofiles[*fdp].
 						    fde_file, td);
 					}
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 			}
 			fdp = data;
 			fdep = (struct filedescent **)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 			    M_WAITOK);
 			for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 				fde = &fdesc->fd_ofiles[*fdp];
 				fdep[i] = fdev;
 				fdep[i]->fde_file = fde->fde_file;
 				filecaps_copy(&fde->fde_caps,
 				    &fdep[i]->fde_caps, true);
 				unp_internalize_fp(fdep[i]->fde_file);
 			}
 			FILEDESC_SUNLOCK(fdesc);
 			break;
 
 		case SCM_TIMESTAMP:
 			*controlp = sbcreatecontrol(NULL, sizeof(*tv),
 			    SCM_TIMESTAMP, SOL_SOCKET, M_WAITOK);
 			tv = (struct timeval *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			microtime(tv);
 			break;
 
 		case SCM_BINTIME:
 			*controlp = sbcreatecontrol(NULL, sizeof(*bt),
 			    SCM_BINTIME, SOL_SOCKET, M_WAITOK);
 			bt = (struct bintime *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			bintime(bt);
 			break;
 
 		case SCM_REALTIME:
 			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
 			    SCM_REALTIME, SOL_SOCKET, M_WAITOK);
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanotime(ts);
 			break;
 
 		case SCM_MONOTONIC:
 			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
 			    SCM_MONOTONIC, SOL_SOCKET, M_WAITOK);
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanouptime(ts);
 			break;
 
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		if (space != NULL) {
 			*space += (*controlp)->m_len;
 			*mbcnt += MSIZE;
 			if ((*controlp)->m_flags & M_EXT)
 				*mbcnt += (*controlp)->m_ext.ext_size;
 			*clast = *controlp;
 		}
 		controlp = &(*controlp)->m_next;
 	}
 	if (clen > 0)
 		error = EINVAL;
 
 out:
 	if (error != 0 && initial_controlp != NULL)
 		unp_internalize_cleanup_rights(*initial_controlp);
 	m_freem(control);
 	return (error);
 }
 
 static struct mbuf *
 unp_addsockcred(struct thread *td, struct mbuf *control, int mode,
     struct mbuf **clast, u_int *space, u_int *mbcnt)
 {
 	struct mbuf *m, *n, *n_prev;
 	const struct cmsghdr *cm;
 	int ngroups, i, cmsgtype;
 	size_t ctrlsz;
 
 	ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 	if (mode & UNP_WANTCRED_ALWAYS) {
 		ctrlsz = SOCKCRED2SIZE(ngroups);
 		cmsgtype = SCM_CREDS2;
 	} else {
 		ctrlsz = SOCKCREDSIZE(ngroups);
 		cmsgtype = SCM_CREDS;
 	}
 
 	m = sbcreatecontrol(NULL, ctrlsz, cmsgtype, SOL_SOCKET, M_NOWAIT);
 	if (m == NULL)
 		return (control);
 	MPASS((m->m_flags & M_EXT) == 0 && m->m_next == NULL);
 
 	if (mode & UNP_WANTCRED_ALWAYS) {
 		struct sockcred2 *sc;
 
 		sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 		sc->sc_version = 0;
 		sc->sc_pid = td->td_proc->p_pid;
 		sc->sc_uid = td->td_ucred->cr_ruid;
 		sc->sc_euid = td->td_ucred->cr_uid;
 		sc->sc_gid = td->td_ucred->cr_rgid;
 		sc->sc_egid = td->td_ucred->cr_gid;
 		sc->sc_ngroups = ngroups;
 		for (i = 0; i < sc->sc_ngroups; i++)
 			sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 	} else {
 		struct sockcred *sc;
 
 		sc = (void *)CMSG_DATA(mtod(m, struct cmsghdr *));
 		sc->sc_uid = td->td_ucred->cr_ruid;
 		sc->sc_euid = td->td_ucred->cr_uid;
 		sc->sc_gid = td->td_ucred->cr_rgid;
 		sc->sc_egid = td->td_ucred->cr_gid;
 		sc->sc_ngroups = ngroups;
 		for (i = 0; i < sc->sc_ngroups; i++)
 			sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 	}
 
 	/*
 	 * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 	 * created SCM_CREDS control message (struct sockcred) has another
 	 * format.
 	 */
 	if (control != NULL && cmsgtype == SCM_CREDS)
 		for (n = control, n_prev = NULL; n != NULL;) {
 			cm = mtod(n, struct cmsghdr *);
     			if (cm->cmsg_level == SOL_SOCKET &&
 			    cm->cmsg_type == SCM_CREDS) {
     				if (n_prev == NULL)
 					control = n->m_next;
 				else
 					n_prev->m_next = n->m_next;
 				if (space != NULL) {
 					MPASS(*space >= n->m_len);
 					*space -= n->m_len;
 					MPASS(*mbcnt >= MSIZE);
 					*mbcnt -= MSIZE;
 					if (n->m_flags & M_EXT) {
 						MPASS(*mbcnt >=
 						    n->m_ext.ext_size);
 						*mbcnt -= n->m_ext.ext_size;
 					}
 					MPASS(clast);
 					if (*clast == n) {
 						MPASS(n->m_next == NULL);
 						if (n_prev == NULL)
 							*clast = m;
 						else
 							*clast = n_prev;
 					}
 				}
 				n = m_free(n);
 			} else {
 				n_prev = n;
 				n = n->m_next;
 			}
 		}
 
 	/* Prepend it to the head. */
 	m->m_next = control;
 	if (space != NULL) {
 		*space += m->m_len;
 		*mbcnt += MSIZE;
 		if (control == NULL)
 			*clast = m;
 	}
 	return (m);
 }
 
 static struct unpcb *
 fptounp(struct file *fp)
 {
 	struct socket *so;
 
 	if (fp->f_type != DTYPE_SOCKET)
 		return (NULL);
 	if ((so = fp->f_data) == NULL)
 		return (NULL);
 	if (so->so_proto->pr_domain != &localdomain)
 		return (NULL);
 	return sotounpcb(so);
 }
 
 static void
 unp_discard(struct file *fp)
 {
 	struct unp_defer *dr;
 
 	if (unp_externalize_fp(fp)) {
 		dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 		dr->ud_fp = fp;
 		UNP_DEFERRED_LOCK();
 		SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 		UNP_DEFERRED_UNLOCK();
 		atomic_add_int(&unp_defers_count, 1);
 		taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 	} else
 		closef_nothread(fp);
 }
 
 static void
 unp_process_defers(void *arg __unused, int pending)
 {
 	struct unp_defer *dr;
 	SLIST_HEAD(, unp_defer) drl;
 	int count;
 
 	SLIST_INIT(&drl);
 	for (;;) {
 		UNP_DEFERRED_LOCK();
 		if (SLIST_FIRST(&unp_defers) == NULL) {
 			UNP_DEFERRED_UNLOCK();
 			break;
 		}
 		SLIST_SWAP(&unp_defers, &drl, unp_defer);
 		UNP_DEFERRED_UNLOCK();
 		count = 0;
 		while ((dr = SLIST_FIRST(&drl)) != NULL) {
 			SLIST_REMOVE_HEAD(&drl, ud_link);
 			closef_nothread(dr->ud_fp);
 			free(dr, M_TEMP);
 			count++;
 		}
 		atomic_add_int(&unp_defers_count, -count);
 	}
 }
 
 static void
 unp_internalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_file = fp;
 		unp->unp_msgcount++;
 	}
 	unp_rights++;
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 unp_externalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 	int ret;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_msgcount--;
 		ret = 1;
 	} else
 		ret = 0;
 	unp_rights--;
 	UNP_LINK_WUNLOCK();
 	return (ret);
 }
 
 /*
  * unp_defer indicates whether additional work has been defered for a future
  * pass through unp_gc().  It is thread local and does not require explicit
  * synchronization.
  */
 static int	unp_marked;
 
 static void
 unp_remove_dead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	/*
 	 * This function can only be called from the gc task.
 	 */
 	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 	    ("%s: not on gc callout", __func__));
 	UNP_LINK_LOCK_ASSERT();
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
 		unp->unp_gcrefs--;
 	}
 }
 
 static void
 unp_restore_undead_ref(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	/*
 	 * This function can only be called from the gc task.
 	 */
 	KASSERT(taskqueue_member(taskqueue_thread, curthread) != 0,
 	    ("%s: not on gc callout", __func__));
 	UNP_LINK_LOCK_ASSERT();
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if ((unp->unp_gcflag & UNPGC_DEAD) == 0)
 			continue;
 		unp->unp_gcrefs++;
 		unp_marked++;
 	}
 }
 
 static void
 unp_scan_socket(struct socket *so, void (*op)(struct filedescent **, int))
 {
 	struct sockbuf *sb;
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (sotounpcb(so)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
 		return;
 
 	SOCK_RECVBUF_LOCK(so);
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		unp_scan(STAILQ_FIRST(&so->so_rcv.uxdg_mb), op);
 		unp_scan(so->so_rcv.uxdg_peeked, op);
 		TAILQ_FOREACH(sb, &so->so_rcv.uxdg_conns, uxdg_clist)
 			unp_scan(STAILQ_FIRST(&sb->uxdg_mb), op);
 		break;
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		unp_scan(so->so_rcv.sb_mb, op);
 		break;
 	}
 	SOCK_RECVBUF_UNLOCK(so);
 }
 
 static void
 unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int))
 {
 	struct socket *so, *soa;
 
 	so = unp->unp_socket;
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		/*
 		 * Mark all sockets in our accept queue.
 		 */
 		TAILQ_FOREACH(soa, &so->sol_comp, so_list)
 			unp_scan_socket(soa, op);
 	} else {
 		/*
 		 * Mark all sockets we reference with RIGHTS.
 		 */
 		unp_scan_socket(so, op);
 	}
 	SOCK_UNLOCK(so);
 }
 
 static int unp_recycled;
 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
     "Number of unreachable sockets claimed by the garbage collector.");
 
 static int unp_taskcount;
 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
     "Number of times the garbage collector has run.");
 
 SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, 
     "Number of active local sockets.");
 
 static void
 unp_gc(__unused void *arg, int pending)
 {
 	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 				    NULL };
 	struct unp_head **head;
 	struct unp_head unp_deadhead;	/* List of potentially-dead sockets. */
 	struct file *f, **unref;
 	struct unpcb *unp, *unptmp;
 	int i, total, unp_unreachable;
 
 	LIST_INIT(&unp_deadhead);
 	unp_taskcount++;
 	UNP_LINK_RLOCK();
 	/*
 	 * First determine which sockets may be in cycles.
 	 */
 	unp_unreachable = 0;
 
 	for (head = heads; *head != NULL; head++)
 		LIST_FOREACH(unp, *head, unp_link) {
 			KASSERT((unp->unp_gcflag & ~UNPGC_IGNORE_RIGHTS) == 0,
 			    ("%s: unp %p has unexpected gc flags 0x%x",
 			    __func__, unp, (unsigned int)unp->unp_gcflag));
 
 			f = unp->unp_file;
 
 			/*
 			 * Check for an unreachable socket potentially in a
 			 * cycle.  It must be in a queue as indicated by
 			 * msgcount, and this must equal the file reference
 			 * count.  Note that when msgcount is 0 the file is
 			 * NULL.
 			 */
 			if (f != NULL && unp->unp_msgcount != 0 &&
 			    refcount_load(&f->f_count) == unp->unp_msgcount) {
 				LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead);
 				unp->unp_gcflag |= UNPGC_DEAD;
 				unp->unp_gcrefs = unp->unp_msgcount;
 				unp_unreachable++;
 			}
 		}
 
 	/*
 	 * Scan all sockets previously marked as potentially being in a cycle
 	 * and remove the references each socket holds on any UNPGC_DEAD
 	 * sockets in its queue.  After this step, all remaining references on
 	 * sockets marked UNPGC_DEAD should not be part of any cycle.
 	 */
 	LIST_FOREACH(unp, &unp_deadhead, unp_dead)
 		unp_gc_scan(unp, unp_remove_dead_ref);
 
 	/*
 	 * If a socket still has a non-negative refcount, it cannot be in a
 	 * cycle.  In this case increment refcount of all children iteratively.
 	 * Stop the scan once we do a complete loop without discovering
 	 * a new reachable socket.
 	 */
 	do {
 		unp_marked = 0;
 		LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp)
 			if (unp->unp_gcrefs > 0) {
 				unp->unp_gcflag &= ~UNPGC_DEAD;
 				LIST_REMOVE(unp, unp_dead);
 				KASSERT(unp_unreachable > 0,
 				    ("%s: unp_unreachable underflow.",
 				    __func__));
 				unp_unreachable--;
 				unp_gc_scan(unp, unp_restore_undead_ref);
 			}
 	} while (unp_marked);
 
 	UNP_LINK_RUNLOCK();
 
 	if (unp_unreachable == 0)
 		return;
 
 	/*
 	 * Allocate space for a local array of dead unpcbs.
 	 * TODO: can this path be simplified by instead using the local
 	 * dead list at unp_deadhead, after taking out references
 	 * on the file object and/or unpcb and dropping the link lock?
 	 */
 	unref = malloc(unp_unreachable * sizeof(struct file *),
 	    M_TEMP, M_WAITOK);
 
 	/*
 	 * Iterate looking for sockets which have been specifically marked
 	 * as unreachable and store them locally.
 	 */
 	UNP_LINK_RLOCK();
 	total = 0;
 	LIST_FOREACH(unp, &unp_deadhead, unp_dead) {
 		KASSERT((unp->unp_gcflag & UNPGC_DEAD) != 0,
 		    ("%s: unp %p not marked UNPGC_DEAD", __func__, unp));
 		unp->unp_gcflag &= ~UNPGC_DEAD;
 		f = unp->unp_file;
 		if (unp->unp_msgcount == 0 || f == NULL ||
 		    refcount_load(&f->f_count) != unp->unp_msgcount ||
 		    !fhold(f))
 			continue;
 		unref[total++] = f;
 		KASSERT(total <= unp_unreachable,
 		    ("%s: incorrect unreachable count.", __func__));
 	}
 	UNP_LINK_RUNLOCK();
 
 	/*
 	 * Now flush all sockets, free'ing rights.  This will free the
 	 * struct files associated with these sockets but leave each socket
 	 * with one remaining ref.
 	 */
 	for (i = 0; i < total; i++) {
 		struct socket *so;
 
 		so = unref[i]->f_data;
 		CURVNET_SET(so->so_vnet);
 		sorflush(so);
 		CURVNET_RESTORE();
 	}
 
 	/*
 	 * And finally release the sockets so they can be reclaimed.
 	 */
 	for (i = 0; i < total; i++)
 		fdrop(unref[i], NULL);
 	unp_recycled += total;
 	free(unref, M_TEMP);
 }
 
 /*
  * Synchronize against unp_gc, which can trip over data as we are freeing it.
  */
 static void
 unp_dispose(struct socket *so)
 {
 	struct sockbuf *sb;
 	struct unpcb *unp;
 	struct mbuf *m;
 
 	MPASS(!SOLISTENING(so));
 
 	unp = sotounpcb(so);
 	UNP_LINK_WLOCK();
 	unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
 	UNP_LINK_WUNLOCK();
 
 	/*
 	 * Grab our special mbufs before calling sbrelease().
 	 */
 	SOCK_RECVBUF_LOCK(so);
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		while ((sb = TAILQ_FIRST(&so->so_rcv.uxdg_conns)) != NULL) {
 			STAILQ_CONCAT(&so->so_rcv.uxdg_mb, &sb->uxdg_mb);
 			TAILQ_REMOVE(&so->so_rcv.uxdg_conns, sb, uxdg_clist);
 			/* Note: socket of sb may reconnect. */
 			sb->uxdg_cc = sb->uxdg_ctl = sb->uxdg_mbcnt = 0;
 		}
 		sb = &so->so_rcv;
 		if (sb->uxdg_peeked != NULL) {
 			STAILQ_INSERT_HEAD(&sb->uxdg_mb, sb->uxdg_peeked,
 			    m_stailqpkt);
 			sb->uxdg_peeked = NULL;
 		}
 		m = STAILQ_FIRST(&sb->uxdg_mb);
 		STAILQ_INIT(&sb->uxdg_mb);
 		/* XXX: our shortened sbrelease() */
 		(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
 		    RLIM_INFINITY);
 		/*
 		 * XXXGL Mark sb with SBS_CANTRCVMORE.  This is needed to
 		 * prevent uipc_sosend_dgram() or unp_disconnect() adding more
 		 * data to the socket.
 		 * We are now in dom_dispose and it could be a call from
 		 * soshutdown() or from the final sofree().  The sofree() case
 		 * is simple as it guarantees that no more sends will happen,
 		 * however we can race with unp_disconnect() from our peer.
 		 * The shutdown(2) case is more exotic.  It would call into
 		 * dom_dispose() only if socket is SS_ISCONNECTED.  This is
 		 * possible if we did connect(2) on this socket and we also
 		 * had it bound with bind(2) and receive connections from other
 		 * sockets.  Because soshutdown() violates POSIX (see comment
 		 * there) we will end up here shutting down our receive side.
 		 * Of course this will have affect not only on the peer we
 		 * connect(2)ed to, but also on all of the peers who had
 		 * connect(2)ed to us.  Their sends would end up with ENOBUFS.
 		 */
 		sb->sb_state |= SBS_CANTRCVMORE;
 		break;
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		sb = &so->so_rcv;
 		m = sbcut_locked(sb, sb->sb_ccc);
 		KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 		    ("%s: ccc %u mb %p mbcnt %u", __func__,
 		    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 		sbrelease_locked(so, SO_RCV);
 		break;
 	}
 	SOCK_RECVBUF_UNLOCK(so);
 	if (SOCK_IO_RECV_OWNED(so))
 		SOCK_IO_RECV_UNLOCK(so);
 
 	if (m != NULL) {
 		unp_scan(m, unp_freerights);
 		m_freem(m);
 	}
 }
 
 static void
 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 {
 	struct mbuf *m;
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen;
 
 	while (m0 != NULL) {
 		for (m = m0; m; m = m->m_next) {
 			if (m->m_type != MT_CONTROL)
 				continue;
 
 			cm = mtod(m, struct cmsghdr *);
 			clen = m->m_len;
 
 			while (cm != NULL) {
 				if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 					break;
 
 				data = CMSG_DATA(cm);
 				datalen = (caddr_t)cm + cm->cmsg_len
 				    - (caddr_t)data;
 
 				if (cm->cmsg_level == SOL_SOCKET &&
 				    cm->cmsg_type == SCM_RIGHTS) {
 					(*op)(data, datalen /
 					    sizeof(struct filedescent *));
 				}
 
 				if (CMSG_SPACE(datalen) < clen) {
 					clen -= CMSG_SPACE(datalen);
 					cm = (struct cmsghdr *)
 					    ((caddr_t)cm + CMSG_SPACE(datalen));
 				} else {
 					clen = 0;
 					cm = NULL;
 				}
 			}
 		}
 		m0 = m0->m_nextpkt;
 	}
 }
 
 /*
  * Definitions of protocols supported in the LOCAL domain.
  */
 static struct protosw streamproto = {
 	.pr_type =		SOCK_STREAM,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS|
 				    PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_abort = 		uipc_abort,
 	.pr_accept =		uipc_accept,
 	.pr_attach =		uipc_attach,
 	.pr_bind =		uipc_bind,
 	.pr_bindat =		uipc_bindat,
 	.pr_connect =		uipc_connect,
 	.pr_connectat =		uipc_connectat,
 	.pr_connect2 =		uipc_connect2,
 	.pr_detach =		uipc_detach,
 	.pr_disconnect =	uipc_disconnect,
 	.pr_listen =		uipc_listen,
 	.pr_peeraddr =		uipc_peeraddr,
 	.pr_rcvd =		uipc_rcvd,
 	.pr_send =		uipc_send,
 	.pr_ready =		uipc_ready,
 	.pr_sense =		uipc_sense,
 	.pr_shutdown =		uipc_shutdown,
 	.pr_sockaddr =		uipc_sockaddr,
 	.pr_soreceive =		soreceive_generic,
 	.pr_close =		uipc_close,
 };
 
 static struct protosw dgramproto = {
 	.pr_type =		SOCK_DGRAM,
 	.pr_flags =		PR_ATOMIC | PR_ADDR |PR_RIGHTS | PR_CAPATTACH |
 				    PR_SOCKBUF,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_abort = 		uipc_abort,
 	.pr_accept =		uipc_accept,
 	.pr_attach =		uipc_attach,
 	.pr_bind =		uipc_bind,
 	.pr_bindat =		uipc_bindat,
 	.pr_connect =		uipc_connect,
 	.pr_connectat =		uipc_connectat,
 	.pr_connect2 =		uipc_connect2,
 	.pr_detach =		uipc_detach,
 	.pr_disconnect =	uipc_disconnect,
 	.pr_peeraddr =		uipc_peeraddr,
 	.pr_sosend =		uipc_sosend_dgram,
 	.pr_sense =		uipc_sense,
 	.pr_shutdown =		uipc_shutdown,
 	.pr_sockaddr =		uipc_sockaddr,
 	.pr_soreceive =		uipc_soreceive_dgram,
 	.pr_close =		uipc_close,
 };
 
 static struct protosw seqpacketproto = {
 	.pr_type =		SOCK_SEQPACKET,
 	/*
 	 * XXXRW: For now, PR_ADDR because soreceive will bump into them
 	 * due to our use of sbappendaddr.  A new sbappend variants is needed
 	 * that supports both atomic record writes and control data.
 	 */
 	.pr_flags =		PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|
 				    PR_WANTRCVD|PR_RIGHTS|PR_CAPATTACH,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_abort =		uipc_abort,
 	.pr_accept =		uipc_accept,
 	.pr_attach =		uipc_attach,
 	.pr_bind =		uipc_bind,
 	.pr_bindat =		uipc_bindat,
 	.pr_connect =		uipc_connect,
 	.pr_connectat =		uipc_connectat,
 	.pr_connect2 =		uipc_connect2,
 	.pr_detach =		uipc_detach,
 	.pr_disconnect =	uipc_disconnect,
 	.pr_listen =		uipc_listen,
 	.pr_peeraddr =		uipc_peeraddr,
 	.pr_rcvd =		uipc_rcvd,
 	.pr_send =		uipc_send,
 	.pr_sense =		uipc_sense,
 	.pr_shutdown =		uipc_shutdown,
 	.pr_sockaddr =		uipc_sockaddr,
 	.pr_soreceive =		soreceive_generic,	/* XXX: or...? */
 	.pr_close =		uipc_close,
 };
 
 static struct domain localdomain = {
 	.dom_family =		AF_LOCAL,
 	.dom_name =		"local",
 	.dom_externalize =	unp_externalize,
 	.dom_dispose =		unp_dispose,
 	.dom_nprotosw =		3,
 	.dom_protosw =		{
 		&streamproto,
 		&dgramproto,
 		&seqpacketproto,
 	}
 };
 DOMAIN_SET(local);
 
 /*
  * A helper function called by VFS before socket-type vnode reclamation.
  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
  * use count.
  */
 void
 vfs_unp_reclaim(struct vnode *vp)
 {
 	struct unpcb *unp;
 	int active;
 	struct mtx *vplock;
 
 	ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 	KASSERT(vp->v_type == VSOCK,
 	    ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 
 	active = 0;
 	vplock = mtx_pool_find(mtxpool_sleep, vp);
 	mtx_lock(vplock);
 	VOP_UNP_CONNECT(vp, &unp);
 	if (unp == NULL)
 		goto done;
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == vp) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 		active = 1;
 	}
 	UNP_PCB_UNLOCK(unp);
  done:
 	mtx_unlock(vplock);
 	if (active)
 		vunref(vp);
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_unpflags(int unp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (unp_flags & UNP_HAVEPC) {
 		db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED_ALWAYS) {
 		db_printf("%sUNP_WANTCRED_ALWAYS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED_ONESHOT) {
 		db_printf("%sUNP_WANTCRED_ONESHOT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNWAIT) {
 		db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNECTING) {
 		db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_BINDING) {
 		db_printf("%sUNP_BINDING", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_xucred(int indent, struct xucred *xu)
 {
 	int comma, i;
 
 	db_print_indent(indent);
 	db_printf("cr_version: %u   cr_uid: %u   cr_pid: %d   cr_ngroups: %d\n",
 	    xu->cr_version, xu->cr_uid, xu->cr_pid, xu->cr_ngroups);
 	db_print_indent(indent);
 	db_printf("cr_groups: ");
 	comma = 0;
 	for (i = 0; i < xu->cr_ngroups; i++) {
 		db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 		comma = 1;
 	}
 	db_printf("\n");
 }
 
 static void
 db_print_unprefs(int indent, struct unp_head *uh)
 {
 	struct unpcb *unp;
 	int counter;
 
 	counter = 0;
 	LIST_FOREACH(unp, uh, unp_reflink) {
 		if (counter % 4 == 0)
 			db_print_indent(indent);
 		db_printf("%p  ", unp);
 		if (counter % 4 == 3)
 			db_printf("\n");
 		counter++;
 	}
 	if (counter != 0 && counter % 4 != 0)
 		db_printf("\n");
 }
 
 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 {
 	struct unpcb *unp;
 
         if (!have_addr) {
                 db_printf("usage: show unpcb <addr>\n");
                 return;
         }
         unp = (struct unpcb *)addr;
 
 	db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 	    unp->unp_vnode);
 
 	db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 	    unp->unp_conn);
 
 	db_printf("unp_refs:\n");
 	db_print_unprefs(2, &unp->unp_refs);
 
 	/* XXXRW: Would be nice to print the full address, if any. */
 	db_printf("unp_addr: %p\n", unp->unp_addr);
 
 	db_printf("unp_gencnt: %llu\n",
 	    (unsigned long long)unp->unp_gencnt);
 
 	db_printf("unp_flags: %x (", unp->unp_flags);
 	db_print_unpflags(unp->unp_flags);
 	db_printf(")\n");
 
 	db_printf("unp_peercred:\n");
 	db_print_xucred(2, &unp->unp_peercred);
 
 	db_printf("unp_refcount: %u\n", unp->unp_refcount);
 }
 #endif
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 9e25d6f8fefb..5f72506549bd 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -1,6135 +1,6117 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Poul-Henning Kamp of the FreeBSD Project.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cache.c	8.5 (Berkeley) 3/22/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/counter.h>
 #include <sys/filedesc.h>
 #include <sys/fnv_hash.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/seqc.h>
 #include <sys/sdt.h>
 #include <sys/smr.h>
 #include <sys/smp.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 #include <ck_queue.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 #ifdef INVARIANTS
 #include <machine/_inttypes.h>
 #endif
 
 #include <sys/capsicum.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <vm/uma.h>
 
 /*
  * High level overview of name caching in the VFS layer.
  *
  * Originally caching was implemented as part of UFS, later extracted to allow
  * use by other filesystems. A decision was made to make it optional and
  * completely detached from the rest of the kernel, which comes with limitations
  * outlined near the end of this comment block.
  *
  * This fundamental choice needs to be revisited. In the meantime, the current
  * state is described below. Significance of all notable routines is explained
  * in comments placed above their implementation. Scattered thoroughout the
  * file are TODO comments indicating shortcomings which can be fixed without
  * reworking everything (most of the fixes will likely be reusable). Various
  * details are omitted from this explanation to not clutter the overview, they
  * have to be checked by reading the code and associated commentary.
  *
  * Keep in mind that it's individual path components which are cached, not full
  * paths. That is, for a fully cached path "foo/bar/baz" there are 3 entries,
  * one for each name.
  *
  * I. Data organization
  *
  * Entries are described by "struct namecache" objects and stored in a hash
  * table. See cache_get_hash for more information.
  *
  * "struct vnode" contains pointers to source entries (names which can be found
  * when traversing through said vnode), destination entries (names of that
  * vnode (see "Limitations" for a breakdown on the subject) and a pointer to
  * the parent vnode.
  *
  * The (directory vnode; name) tuple reliably determines the target entry if
  * it exists.
  *
  * Since there are no small locks at this time (all are 32 bytes in size on
  * LP64), the code works around the problem by introducing lock arrays to
  * protect hash buckets and vnode lists.
  *
  * II. Filesystem integration
  *
  * Filesystems participating in name caching do the following:
  * - set vop_lookup routine to vfs_cache_lookup
  * - set vop_cachedlookup to whatever can perform the lookup if the above fails
  * - if they support lockless lookup (see below), vop_fplookup_vexec and
  *   vop_fplookup_symlink are set along with the MNTK_FPLOOKUP flag on the
  *   mount point
  * - call cache_purge or cache_vop_* routines to eliminate stale entries as
  *   applicable
  * - call cache_enter to add entries depending on the MAKEENTRY flag
  *
  * With the above in mind, there are 2 entry points when doing lookups:
  * - ... -> namei -> cache_fplookup -- this is the default
  * - ... -> VOP_LOOKUP -> vfs_cache_lookup -- normally only called by namei
  *   should the above fail
  *
  * Example code flow how an entry is added:
  * ... -> namei -> cache_fplookup -> cache_fplookup_noentry -> VOP_LOOKUP ->
  * vfs_cache_lookup -> VOP_CACHEDLOOKUP -> ufs_lookup_ino -> cache_enter
  *
  * III. Performance considerations
  *
  * For lockless case forward lookup avoids any writes to shared areas apart
  * from the terminal path component. In other words non-modifying lookups of
  * different files don't suffer any scalability problems in the namecache.
  * Looking up the same file is limited by VFS and goes beyond the scope of this
  * file.
  *
  * At least on amd64 the single-threaded bottleneck for long paths is hashing
  * (see cache_get_hash). There are cases where the code issues acquire fence
  * multiple times, they can be combined on architectures which suffer from it.
  *
  * For locked case each encountered vnode has to be referenced and locked in
  * order to be handed out to the caller (normally that's namei). This
  * introduces significant hit single-threaded and serialization multi-threaded.
  *
  * Reverse lookup (e.g., "getcwd") fully scales provided it is fully cached --
  * avoids any writes to shared areas to any components.
  *
  * Unrelated insertions are partially serialized on updating the global entry
  * counter and possibly serialized on colliding bucket or vnode locks.
  *
  * IV. Observability
  *
  * Note not everything has an explicit dtrace probe nor it should have, thus
  * some of the one-liners below depend on implementation details.
  *
  * Examples:
  *
  * # Check what lookups failed to be handled in a lockless manner. Column 1 is
  * # line number, column 2 is status code (see cache_fpl_status)
  * dtrace -n 'vfs:fplookup:lookup:done { @[arg1, arg2] = count(); }'
  *
  * # Lengths of names added by binary name
  * dtrace -n 'fbt::cache_enter_time:entry { @[execname] = quantize(args[2]->cn_namelen); }'
  *
  * # Same as above but only those which exceed 64 characters
  * dtrace -n 'fbt::cache_enter_time:entry /args[2]->cn_namelen > 64/ { @[execname] = quantize(args[2]->cn_namelen); }'
  *
  * # Who is performing lookups with spurious slashes (e.g., "foo//bar") and what
  * # path is it
  * dtrace -n 'fbt::cache_fplookup_skip_slashes:entry { @[execname, stringof(args[0]->cnp->cn_pnbuf)] = count(); }'
  *
  * V. Limitations and implementation defects
  *
  * - since it is possible there is no entry for an open file, tools like
  *   "procstat" may fail to resolve fd -> vnode -> path to anything
  * - even if a filesystem adds an entry, it may get purged (e.g., due to memory
  *   shortage) in which case the above problem applies
  * - hardlinks are not tracked, thus if a vnode is reachable in more than one
  *   way, resolving a name may return a different path than the one used to
  *   open it (even if said path is still valid)
  * - by default entries are not added for newly created files
  * - adding an entry may need to evict negative entry first, which happens in 2
  *   distinct places (evicting on lookup, adding in a later VOP) making it
  *   impossible to simply reuse it
  * - there is a simple scheme to evict negative entries as the cache is approaching
  *   its capacity, but it is very unclear if doing so is a good idea to begin with
  * - vnodes are subject to being recycled even if target inode is left in memory,
  *   which loses the name cache entries when it perhaps should not. in case of tmpfs
  *   names get duplicated -- kept by filesystem itself and namecache separately
  * - struct namecache has a fixed size and comes in 2 variants, often wasting space.
  *   now hard to replace with malloc due to dependence on SMR.
  * - lack of better integration with the kernel also turns nullfs into a layered
  *   filesystem instead of something which can take advantage of caching
  */
 
 static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Name cache");
 
 SDT_PROVIDER_DECLARE(vfs);
 SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, enter, duplicate, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE2(vfs, namecache, fullpath_smr, hit, "struct vnode *",
     "const char *");
 SDT_PROBE_DEFINE4(vfs, namecache, fullpath_smr, miss, "struct vnode *",
     "struct namecache *", "int", "int");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *",
     "char *", "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, miss, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, return, "int",
     "struct vnode *", "char *");
 SDT_PROBE_DEFINE3(vfs, namecache, lookup, hit, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, lookup, hit__negative,
     "struct vnode *", "char *");
 SDT_PROBE_DEFINE2(vfs, namecache, lookup, miss, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE2(vfs, namecache, removecnp, hit, "struct vnode *",
     "struct componentname *");
 SDT_PROBE_DEFINE2(vfs, namecache, removecnp, miss, "struct vnode *",
     "struct componentname *");
 SDT_PROBE_DEFINE3(vfs, namecache, purge, done, "struct vnode *", "size_t", "size_t");
 SDT_PROBE_DEFINE1(vfs, namecache, purge, batch, "int");
 SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done, "struct vnode *");
 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE2(vfs, namecache, evict_negative, done, "struct vnode *",
     "char *");
 SDT_PROBE_DEFINE1(vfs, namecache, symlink, alloc__fail, "size_t");
 
 SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata", "int", "bool");
 SDT_PROBE_DECLARE(vfs, namei, lookup, entry);
 SDT_PROBE_DECLARE(vfs, namei, lookup, return);
 
 static char __read_frequently cache_fast_lookup_enabled = true;
 
 /*
  * This structure describes the elements in the cache of recent
  * names looked up by namei.
  */
 struct negstate {
 	u_char neg_flag;
 	u_char neg_hit;
 };
 _Static_assert(sizeof(struct negstate) <= sizeof(struct vnode *),
     "the state must fit in a union with a pointer without growing it");
 
 struct	namecache {
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	CK_SLIST_ENTRY(namecache) nc_hash;/* hash chain */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
 	union {
 		struct	vnode *nu_vp;	/* vnode the name refers to */
 		struct	negstate nu_neg;/* negative entry state */
 	} n_un;
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	char	nc_name[];		/* segment name + nul */
 };
 
 /*
  * struct namecache_ts repeats struct namecache layout up to the
  * nc_nlen member.
  * struct namecache_ts is used in place of struct namecache when time(s) need
  * to be stored.  The nc_dotdottime field is used when a cache entry is mapping
  * both a non-dotdot directory name plus dotdot for the directory's
  * parent.
  *
  * See below for alignment requirement.
  */
 struct	namecache_ts {
 	struct	timespec nc_time;	/* timespec provided by fs */
 	struct	timespec nc_dotdottime;	/* dotdot timespec provided by fs */
 	int	nc_ticks;		/* ticks value when entry was added */
 	int	nc_pad;
 	struct namecache nc_nc;
 };
 
 TAILQ_HEAD(cache_freebatch, namecache);
 
 /*
  * At least mips n32 performs 64-bit accesses to timespec as found
  * in namecache_ts and requires them to be aligned. Since others
  * may be in the same spot suffer a little bit and enforce the
  * alignment for everyone. Note this is a nop for 64-bit platforms.
  */
 #define CACHE_ZONE_ALIGNMENT	UMA_ALIGNOF(time_t)
 
 /*
  * TODO: the initial value of CACHE_PATH_CUTOFF was inherited from the
  * 4.4 BSD codebase. Later on struct namecache was tweaked to become
  * smaller and the value was bumped to retain the total size, but it
  * was never re-evaluated for suitability. A simple test counting
  * lengths during package building shows that the value of 45 covers
  * about 86% of all added entries, reaching 99% at 65.
  *
  * Regardless of the above, use of dedicated zones instead of malloc may be
  * inducing additional waste. This may be hard to address as said zones are
  * tied to VFS SMR. Even if retaining them, the current split should be
  * re-evaluated.
  */
 #ifdef __LP64__
 #define	CACHE_PATH_CUTOFF	45
 #define	CACHE_LARGE_PAD		6
 #else
 #define	CACHE_PATH_CUTOFF	41
 #define	CACHE_LARGE_PAD		2
 #endif
 
 #define CACHE_ZONE_SMALL_SIZE		(offsetof(struct namecache, nc_name) + CACHE_PATH_CUTOFF + 1)
 #define CACHE_ZONE_SMALL_TS_SIZE	(offsetof(struct namecache_ts, nc_nc) + CACHE_ZONE_SMALL_SIZE)
 #define CACHE_ZONE_LARGE_SIZE		(offsetof(struct namecache, nc_name) + NAME_MAX + 1 + CACHE_LARGE_PAD)
 #define CACHE_ZONE_LARGE_TS_SIZE	(offsetof(struct namecache_ts, nc_nc) + CACHE_ZONE_LARGE_SIZE)
 
 _Static_assert((CACHE_ZONE_SMALL_SIZE % (CACHE_ZONE_ALIGNMENT + 1)) == 0, "bad zone size");
 _Static_assert((CACHE_ZONE_SMALL_TS_SIZE % (CACHE_ZONE_ALIGNMENT + 1)) == 0, "bad zone size");
 _Static_assert((CACHE_ZONE_LARGE_SIZE % (CACHE_ZONE_ALIGNMENT + 1)) == 0, "bad zone size");
 _Static_assert((CACHE_ZONE_LARGE_TS_SIZE % (CACHE_ZONE_ALIGNMENT + 1)) == 0, "bad zone size");
 
 #define	nc_vp		n_un.nu_vp
 #define	nc_neg		n_un.nu_neg
 
 /*
  * Flags in namecache.nc_flag
  */
 #define NCF_WHITE	0x01
 #define NCF_ISDOTDOT	0x02
 #define	NCF_TS		0x04
 #define	NCF_DTS		0x08
 #define	NCF_DVDROP	0x10
 #define	NCF_NEGATIVE	0x20
 #define	NCF_INVALID	0x40
 #define	NCF_WIP		0x80
 
 /*
  * Flags in negstate.neg_flag
  */
 #define NEG_HOT		0x01
 
 static bool	cache_neg_evict_cond(u_long lnumcache);
 
 /*
  * Mark an entry as invalid.
  *
  * This is called before it starts getting deconstructed.
  */
 static void
 cache_ncp_invalidate(struct namecache *ncp)
 {
 
 	KASSERT((ncp->nc_flag & NCF_INVALID) == 0,
 	    ("%s: entry %p already invalid", __func__, ncp));
 	atomic_store_char(&ncp->nc_flag, ncp->nc_flag | NCF_INVALID);
 	atomic_thread_fence_rel();
 }
 
 /*
  * Check whether the entry can be safely used.
  *
  * All places which elide locks are supposed to call this after they are
  * done with reading from an entry.
  */
 #define cache_ncp_canuse(ncp)	({					\
 	struct namecache *_ncp = (ncp);					\
 	u_char _nc_flag;						\
 									\
 	atomic_thread_fence_acq();					\
 	_nc_flag = atomic_load_char(&_ncp->nc_flag);			\
 	__predict_true((_nc_flag & (NCF_INVALID | NCF_WIP)) == 0);	\
 })
 
 /*
  * Like the above but also checks NCF_WHITE.
  */
 #define cache_fpl_neg_ncp_canuse(ncp)	({				\
 	struct namecache *_ncp = (ncp);					\
 	u_char _nc_flag;						\
 									\
 	atomic_thread_fence_acq();					\
 	_nc_flag = atomic_load_char(&_ncp->nc_flag);			\
 	__predict_true((_nc_flag & (NCF_INVALID | NCF_WIP | NCF_WHITE)) == 0);	\
 })
 
 VFS_SMR_DECLARE;
 
 static SYSCTL_NODE(_vfs_cache, OID_AUTO, param, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Name cache parameters");
 
 static u_int __read_mostly	ncsize; /* the size as computed on creation or resizing */
 SYSCTL_UINT(_vfs_cache_param, OID_AUTO, size, CTLFLAG_RW, &ncsize, 0,
     "Total namecache capacity");
 
 u_int ncsizefactor = 2;
 SYSCTL_UINT(_vfs_cache_param, OID_AUTO, sizefactor, CTLFLAG_RW, &ncsizefactor, 0,
     "Size factor for namecache");
 
 static u_long __read_mostly	ncnegfactor = 5; /* ratio of negative entries */
 SYSCTL_ULONG(_vfs_cache_param, OID_AUTO, negfactor, CTLFLAG_RW, &ncnegfactor, 0,
     "Ratio of negative namecache entries");
 
 /*
  * Negative entry % of namecache capacity above which automatic eviction is allowed.
  *
  * Check cache_neg_evict_cond for details.
  */
 static u_int ncnegminpct = 3;
 
 static u_int __read_mostly     neg_min; /* the above recomputed against ncsize */
 SYSCTL_UINT(_vfs_cache_param, OID_AUTO, negmin, CTLFLAG_RD, &neg_min, 0,
     "Negative entry count above which automatic eviction is allowed");
 
 /*
  * Structures associated with name caching.
  */
 #define NCHHASH(hash) \
 	(&nchashtbl[(hash) & nchash])
 static __read_mostly CK_SLIST_HEAD(nchashhead, namecache) *nchashtbl;/* Hash Table */
 static u_long __read_mostly	nchash;			/* size of hash table */
 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
     "Size of namecache hash table");
 static u_long __exclusive_cache_line	numneg;	/* number of negative entries allocated */
 static u_long __exclusive_cache_line	numcache;/* number of cache entries allocated */
 
 struct nchstats	nchstats;		/* cache effectiveness statistics */
 
 static u_int __exclusive_cache_line neg_cycle;
 
 #define ncneghash	3
 #define	numneglists	(ncneghash + 1)
 
 struct neglist {
 	struct mtx		nl_evict_lock;
 	struct mtx		nl_lock __aligned(CACHE_LINE_SIZE);
 	TAILQ_HEAD(, namecache) nl_list;
 	TAILQ_HEAD(, namecache) nl_hotlist;
 	u_long			nl_hotnum;
 } __aligned(CACHE_LINE_SIZE);
 
 static struct neglist neglists[numneglists];
 
 static inline struct neglist *
 NCP2NEGLIST(struct namecache *ncp)
 {
 
 	return (&neglists[(((uintptr_t)(ncp) >> 8) & ncneghash)]);
 }
 
 static inline struct negstate *
 NCP2NEGSTATE(struct namecache *ncp)
 {
 
 	MPASS(atomic_load_char(&ncp->nc_flag) & NCF_NEGATIVE);
 	return (&ncp->nc_neg);
 }
 
 #define	numbucketlocks (ncbuckethash + 1)
 static u_int __read_mostly  ncbuckethash;
 static struct mtx_padalign __read_mostly  *bucketlocks;
 #define	HASH2BUCKETLOCK(hash) \
 	((struct mtx *)(&bucketlocks[((hash) & ncbuckethash)]))
 
 #define	numvnodelocks (ncvnodehash + 1)
 static u_int __read_mostly  ncvnodehash;
 static struct mtx __read_mostly *vnodelocks;
 static inline struct mtx *
 VP2VNODELOCK(struct vnode *vp)
 {
 
 	return (&vnodelocks[(((uintptr_t)(vp) >> 8) & ncvnodehash)]);
 }
 
 static void
 cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
 {
 	struct namecache_ts *ncp_ts;
 
 	KASSERT((ncp->nc_flag & NCF_TS) != 0 ||
 	    (tsp == NULL && ticksp == NULL),
 	    ("No NCF_TS"));
 
 	if (tsp == NULL)
 		return;
 
 	ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
 	*tsp = ncp_ts->nc_time;
 	*ticksp = ncp_ts->nc_ticks;
 }
 
 #ifdef DEBUG_CACHE
 static int __read_mostly	doingcache = 1;	/* 1 => enable the cache */
 SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0,
     "VFS namecache enabled");
 #endif
 
 /* Export size information to userland */
 SYSCTL_INT(_debug_sizeof, OID_AUTO, namecache, CTLFLAG_RD, SYSCTL_NULL_INT_PTR,
     sizeof(struct namecache), "sizeof(struct namecache)");
 
 /*
  * The new name cache statistics
  */
 static SYSCTL_NODE(_vfs_cache, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Name cache statistics");
 
 #define STATNODE_ULONG(name, varname, descr)					\
 	SYSCTL_ULONG(_vfs_cache_stats, OID_AUTO, name, CTLFLAG_RD, &varname, 0, descr);
 #define STATNODE_COUNTER(name, varname, descr)					\
 	static COUNTER_U64_DEFINE_EARLY(varname);				\
 	SYSCTL_COUNTER_U64(_vfs_cache_stats, OID_AUTO, name, CTLFLAG_RD, &varname, \
 	    descr);
 STATNODE_ULONG(neg, numneg, "Number of negative cache entries");
 STATNODE_ULONG(count, numcache, "Number of cache entries");
 STATNODE_COUNTER(heldvnodes, numcachehv, "Number of namecache entries with vnodes held");
 STATNODE_COUNTER(drops, numdrops, "Number of dropped entries due to reaching the limit");
 STATNODE_COUNTER(dothits, dothits, "Number of '.' hits");
 STATNODE_COUNTER(dotdothis, dotdothits, "Number of '..' hits");
 STATNODE_COUNTER(miss, nummiss, "Number of cache misses");
 STATNODE_COUNTER(misszap, nummisszap, "Number of cache misses we do not want to cache");
 STATNODE_COUNTER(posszaps, numposzaps,
     "Number of cache hits (positive) we do not want to cache");
 STATNODE_COUNTER(poshits, numposhits, "Number of cache hits (positive)");
 STATNODE_COUNTER(negzaps, numnegzaps,
     "Number of cache hits (negative) we do not want to cache");
 STATNODE_COUNTER(neghits, numneghits, "Number of cache hits (negative)");
 /* These count for vn_getcwd(), too. */
 STATNODE_COUNTER(fullpathcalls, numfullpathcalls, "Number of fullpath search calls");
 STATNODE_COUNTER(fullpathfail1, numfullpathfail1, "Number of fullpath search errors (ENOTDIR)");
 STATNODE_COUNTER(fullpathfail2, numfullpathfail2,
     "Number of fullpath search errors (VOP_VPTOCNP failures)");
 STATNODE_COUNTER(fullpathfail4, numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
 STATNODE_COUNTER(fullpathfound, numfullpathfound, "Number of successful fullpath calls");
 STATNODE_COUNTER(symlinktoobig, symlinktoobig, "Number of times symlink did not fit the cache");
 
 /*
  * Debug or developer statistics.
  */
 static SYSCTL_NODE(_vfs_cache, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Name cache debugging");
 #define DEBUGNODE_ULONG(name, varname, descr)					\
 	SYSCTL_ULONG(_vfs_cache_debug, OID_AUTO, name, CTLFLAG_RD, &varname, 0, descr);
 #define DEBUGNODE_COUNTER(name, varname, descr)					\
 	static COUNTER_U64_DEFINE_EARLY(varname);				\
 	SYSCTL_COUNTER_U64(_vfs_cache_debug, OID_AUTO, name, CTLFLAG_RD, &varname, \
 	    descr);
 DEBUGNODE_COUNTER(zap_bucket_relock_success, zap_bucket_relock_success,
     "Number of successful removals after relocking");
 static long zap_bucket_fail;
 DEBUGNODE_ULONG(zap_bucket_fail, zap_bucket_fail, "");
 static long zap_bucket_fail2;
 DEBUGNODE_ULONG(zap_bucket_fail2, zap_bucket_fail2, "");
 static long cache_lock_vnodes_cel_3_failures;
 DEBUGNODE_ULONG(vnodes_cel_3_failures, cache_lock_vnodes_cel_3_failures,
     "Number of times 3-way vnode locking failed");
 
 static void cache_zap_locked(struct namecache *ncp);
 static int vn_fullpath_any_smr(struct vnode *vp, struct vnode *rdir, char *buf,
     char **retbuf, size_t *buflen, size_t addend);
 static int vn_fullpath_any(struct vnode *vp, struct vnode *rdir, char *buf,
     char **retbuf, size_t *buflen);
 static int vn_fullpath_dir(struct vnode *vp, struct vnode *rdir, char *buf,
     char **retbuf, size_t *len, size_t addend);
 
 static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries");
 
 static inline void
 cache_assert_vlp_locked(struct mtx *vlp)
 {
 
 	if (vlp != NULL)
 		mtx_assert(vlp, MA_OWNED);
 }
 
 static inline void
 cache_assert_vnode_locked(struct vnode *vp)
 {
 	struct mtx *vlp;
 
 	vlp = VP2VNODELOCK(vp);
 	cache_assert_vlp_locked(vlp);
 }
 
 /*
  * Directory vnodes with entries are held for two reasons:
  * 1. make them less of a target for reclamation in vnlru
  * 2. suffer smaller performance penalty in locked lookup as requeieing is avoided
  *
  * It will be feasible to stop doing it altogether if all filesystems start
  * supporting lockless lookup.
  */
 static void
 cache_hold_vnode(struct vnode *vp)
 {
 
 	cache_assert_vnode_locked(vp);
 	VNPASS(LIST_EMPTY(&vp->v_cache_src), vp);
 	vhold(vp);
 	counter_u64_add(numcachehv, 1);
 }
 
 static void
 cache_drop_vnode(struct vnode *vp)
 {
 
 	/*
 	 * Called after all locks are dropped, meaning we can't assert
 	 * on the state of v_cache_src.
 	 */
 	vdrop(vp);
 	counter_u64_add(numcachehv, -1);
 }
 
 /*
  * UMA zones.
  */
 static uma_zone_t __read_mostly cache_zone_small;
 static uma_zone_t __read_mostly cache_zone_small_ts;
 static uma_zone_t __read_mostly cache_zone_large;
 static uma_zone_t __read_mostly cache_zone_large_ts;
 
 char *
 cache_symlink_alloc(size_t size, int flags)
 {
 
 	if (size < CACHE_ZONE_SMALL_SIZE) {
 		return (uma_zalloc_smr(cache_zone_small, flags));
 	}
 	if (size < CACHE_ZONE_LARGE_SIZE) {
 		return (uma_zalloc_smr(cache_zone_large, flags));
 	}
 	counter_u64_add(symlinktoobig, 1);
 	SDT_PROBE1(vfs, namecache, symlink, alloc__fail, size);
 	return (NULL);
 }
 
 void
 cache_symlink_free(char *string, size_t size)
 {
 
 	MPASS(string != NULL);
 	KASSERT(size < CACHE_ZONE_LARGE_SIZE,
 	    ("%s: size %zu too big", __func__, size));
 
 	if (size < CACHE_ZONE_SMALL_SIZE) {
 		uma_zfree_smr(cache_zone_small, string);
 		return;
 	}
 	if (size < CACHE_ZONE_LARGE_SIZE) {
 		uma_zfree_smr(cache_zone_large, string);
 		return;
 	}
 	__assert_unreachable();
 }
 
 static struct namecache *
 cache_alloc_uma(int len, bool ts)
 {
 	struct namecache_ts *ncp_ts;
 	struct namecache *ncp;
 
 	if (__predict_false(ts)) {
 		if (len <= CACHE_PATH_CUTOFF)
 			ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK);
 		else
 			ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK);
 		ncp = &ncp_ts->nc_nc;
 	} else {
 		if (len <= CACHE_PATH_CUTOFF)
 			ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK);
 		else
 			ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK);
 	}
 	return (ncp);
 }
 
 static void
 cache_free_uma(struct namecache *ncp)
 {
 	struct namecache_ts *ncp_ts;
 
 	if (__predict_false(ncp->nc_flag & NCF_TS)) {
 		ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
 		if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
 			uma_zfree_smr(cache_zone_small_ts, ncp_ts);
 		else
 			uma_zfree_smr(cache_zone_large_ts, ncp_ts);
 	} else {
 		if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
 			uma_zfree_smr(cache_zone_small, ncp);
 		else
 			uma_zfree_smr(cache_zone_large, ncp);
 	}
 }
 
 static struct namecache *
 cache_alloc(int len, bool ts)
 {
 	u_long lnumcache;
 
 	/*
 	 * Avoid blowout in namecache entries.
 	 *
 	 * Bugs:
 	 * 1. filesystems may end up trying to add an already existing entry
 	 * (for example this can happen after a cache miss during concurrent
 	 * lookup), in which case we will call cache_neg_evict despite not
 	 * adding anything.
 	 * 2. the routine may fail to free anything and no provisions are made
 	 * to make it try harder (see the inside for failure modes)
 	 * 3. it only ever looks at negative entries.
 	 */
 	lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
 	if (cache_neg_evict_cond(lnumcache)) {
 		lnumcache = atomic_load_long(&numcache);
 	}
 	if (__predict_false(lnumcache >= ncsize)) {
 		atomic_subtract_long(&numcache, 1);
 		counter_u64_add(numdrops, 1);
 		return (NULL);
 	}
 	return (cache_alloc_uma(len, ts));
 }
 
 static void
 cache_free(struct namecache *ncp)
 {
 
 	MPASS(ncp != NULL);
 	if ((ncp->nc_flag & NCF_DVDROP) != 0) {
 		cache_drop_vnode(ncp->nc_dvp);
 	}
 	cache_free_uma(ncp);
 	atomic_subtract_long(&numcache, 1);
 }
 
 static void
 cache_free_batch(struct cache_freebatch *batch)
 {
 	struct namecache *ncp, *nnp;
 	int i;
 
 	i = 0;
 	if (TAILQ_EMPTY(batch))
 		goto out;
 	TAILQ_FOREACH_SAFE(ncp, batch, nc_dst, nnp) {
 		if ((ncp->nc_flag & NCF_DVDROP) != 0) {
 			cache_drop_vnode(ncp->nc_dvp);
 		}
 		cache_free_uma(ncp);
 		i++;
 	}
 	atomic_subtract_long(&numcache, i);
 out:
 	SDT_PROBE1(vfs, namecache, purge, batch, i);
 }
 
 /*
  * Hashing.
  *
  * The code was made to use FNV in 2001 and this choice needs to be revisited.
  *
  * Short summary of the difficulty:
  * The longest name which can be inserted is NAME_MAX characters in length (or
  * 255 at the time of writing this comment), while majority of names used in
  * practice are significantly shorter (mostly below 10). More importantly
  * majority of lookups performed find names are even shorter than that.
  *
  * This poses a problem where hashes which do better than FNV past word size
  * (or so) tend to come with additional overhead when finalizing the result,
  * making them noticeably slower for the most commonly used range.
  *
  * Consider a path like: /usr/obj/usr/src/sys/amd64/GENERIC/vnode_if.c
  *
  * When looking it up the most time consuming part by a large margin (at least
  * on amd64) is hashing.  Replacing FNV with something which pessimizes short
  * input would make the slowest part stand out even more.
  */
 
 /*
  * TODO: With the value stored we can do better than computing the hash based
  * on the address.
  */
 static void
 cache_prehash(struct vnode *vp)
 {
 
 	vp->v_nchash = fnv_32_buf(&vp, sizeof(vp), FNV1_32_INIT);
 }
 
 static uint32_t
 cache_get_hash(char *name, u_char len, struct vnode *dvp)
 {
 
 	return (fnv_32_buf(name, len, dvp->v_nchash));
 }
 
 static uint32_t
 cache_get_hash_iter_start(struct vnode *dvp)
 {
 
 	return (dvp->v_nchash);
 }
 
 static uint32_t
 cache_get_hash_iter(char c, uint32_t hash)
 {
 
 	return (fnv_32_buf(&c, 1, hash));
 }
 
 static uint32_t
 cache_get_hash_iter_finish(uint32_t hash)
 {
 
 	return (hash);
 }
 
 static inline struct nchashhead *
 NCP2BUCKET(struct namecache *ncp)
 {
 	uint32_t hash;
 
 	hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen, ncp->nc_dvp);
 	return (NCHHASH(hash));
 }
 
 static inline struct mtx *
 NCP2BUCKETLOCK(struct namecache *ncp)
 {
 	uint32_t hash;
 
 	hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen, ncp->nc_dvp);
 	return (HASH2BUCKETLOCK(hash));
 }
 
 #ifdef INVARIANTS
 static void
 cache_assert_bucket_locked(struct namecache *ncp)
 {
 	struct mtx *blp;
 
 	blp = NCP2BUCKETLOCK(ncp);
 	mtx_assert(blp, MA_OWNED);
 }
 
 static void
 cache_assert_bucket_unlocked(struct namecache *ncp)
 {
 	struct mtx *blp;
 
 	blp = NCP2BUCKETLOCK(ncp);
 	mtx_assert(blp, MA_NOTOWNED);
 }
 #else
 #define cache_assert_bucket_locked(x) do { } while (0)
 #define cache_assert_bucket_unlocked(x) do { } while (0)
 #endif
 
 #define cache_sort_vnodes(x, y)	_cache_sort_vnodes((void **)(x), (void **)(y))
 static void
 _cache_sort_vnodes(void **p1, void **p2)
 {
 	void *tmp;
 
 	MPASS(*p1 != NULL || *p2 != NULL);
 
 	if (*p1 > *p2) {
 		tmp = *p2;
 		*p2 = *p1;
 		*p1 = tmp;
 	}
 }
 
 static void
 cache_lock_all_buckets(void)
 {
 	u_int i;
 
 	for (i = 0; i < numbucketlocks; i++)
 		mtx_lock(&bucketlocks[i]);
 }
 
 static void
 cache_unlock_all_buckets(void)
 {
 	u_int i;
 
 	for (i = 0; i < numbucketlocks; i++)
 		mtx_unlock(&bucketlocks[i]);
 }
 
 static void
 cache_lock_all_vnodes(void)
 {
 	u_int i;
 
 	for (i = 0; i < numvnodelocks; i++)
 		mtx_lock(&vnodelocks[i]);
 }
 
 static void
 cache_unlock_all_vnodes(void)
 {
 	u_int i;
 
 	for (i = 0; i < numvnodelocks; i++)
 		mtx_unlock(&vnodelocks[i]);
 }
 
 static int
 cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
 	cache_sort_vnodes(&vlp1, &vlp2);
 
 	if (vlp1 != NULL) {
 		if (!mtx_trylock(vlp1))
 			return (EAGAIN);
 	}
 	if (!mtx_trylock(vlp2)) {
 		if (vlp1 != NULL)
 			mtx_unlock(vlp1);
 		return (EAGAIN);
 	}
 
 	return (0);
 }
 
 static void
 cache_lock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
 	MPASS(vlp1 != NULL || vlp2 != NULL);
 	MPASS(vlp1 <= vlp2);
 
 	if (vlp1 != NULL)
 		mtx_lock(vlp1);
 	if (vlp2 != NULL)
 		mtx_lock(vlp2);
 }
 
 static void
 cache_unlock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
 	MPASS(vlp1 != NULL || vlp2 != NULL);
 
 	if (vlp1 != NULL)
 		mtx_unlock(vlp1);
 	if (vlp2 != NULL)
 		mtx_unlock(vlp2);
 }
 
 static int
 sysctl_nchstats(SYSCTL_HANDLER_ARGS)
 {
 	struct nchstats snap;
 
 	if (req->oldptr == NULL)
 		return (SYSCTL_OUT(req, 0, sizeof(snap)));
 
 	snap = nchstats;
 	snap.ncs_goodhits = counter_u64_fetch(numposhits);
 	snap.ncs_neghits = counter_u64_fetch(numneghits);
 	snap.ncs_badhits = counter_u64_fetch(numposzaps) +
 	    counter_u64_fetch(numnegzaps);
 	snap.ncs_miss = counter_u64_fetch(nummisszap) +
 	    counter_u64_fetch(nummiss);
 
 	return (SYSCTL_OUT(req, &snap, sizeof(snap)));
 }
 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE | CTLFLAG_RD |
     CTLFLAG_MPSAFE, 0, 0, sysctl_nchstats, "LU",
     "VFS cache effectiveness statistics");
 
 static void
 cache_recalc_neg_min(u_int val)
 {
 
 	neg_min = (ncsize * val) / 100;
 }
 
 static int
 sysctl_negminpct(SYSCTL_HANDLER_ARGS)
 {
 	u_int val;
 	int error;
 
 	val = ncnegminpct;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (val == ncnegminpct)
 		return (0);
 	if (val < 0 || val > 99)
 		return (EINVAL);
 	ncnegminpct = val;
 	cache_recalc_neg_min(val);
 	return (0);
 }
 
 SYSCTL_PROC(_vfs_cache_param, OID_AUTO, negminpct,
     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_negminpct,
     "I", "Negative entry \% of namecache capacity above which automatic eviction is allowed");
 
 #ifdef DEBUG_CACHE
 /*
  * Grab an atomic snapshot of the name cache hash chain lengths
  */
 static SYSCTL_NODE(_debug, OID_AUTO, hashstat,
     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
     "hash table stats");
 
 static int
 sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
 {
 	struct nchashhead *ncpp;
 	struct namecache *ncp;
 	int i, error, n_nchash, *cntbuf;
 
 retry:
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	if (req->oldptr == NULL)
 		return SYSCTL_OUT(req, 0, n_nchash * sizeof(int));
 	cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK);
 	cache_lock_all_buckets();
 	if (n_nchash != nchash + 1) {
 		cache_unlock_all_buckets();
 		free(cntbuf, M_TEMP);
 		goto retry;
 	}
 	/* Scan hash tables counting entries */
 	for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++)
 		CK_SLIST_FOREACH(ncp, ncpp, nc_hash)
 			cntbuf[i]++;
 	cache_unlock_all_buckets();
 	for (error = 0, i = 0; i < n_nchash; i++)
 		if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0)
 			break;
 	free(cntbuf, M_TEMP);
 	return (error);
 }
 SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD|
     CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int",
     "nchash chain lengths");
 
 static int
 sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct nchashhead *ncpp;
 	struct namecache *ncp;
 	int n_nchash;
 	int count, maxlength, used, pct;
 
 	if (!req->oldptr)
 		return SYSCTL_OUT(req, 0, 4 * sizeof(int));
 
 	cache_lock_all_buckets();
 	n_nchash = nchash + 1;	/* nchash is max index, not count */
 	used = 0;
 	maxlength = 0;
 
 	/* Scan hash tables for applicable entries */
 	for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
 		count = 0;
 		CK_SLIST_FOREACH(ncp, ncpp, nc_hash) {
 			count++;
 		}
 		if (count)
 			used++;
 		if (maxlength < count)
 			maxlength = count;
 	}
 	n_nchash = nchash + 1;
 	cache_unlock_all_buckets();
 	pct = (used * 100) / (n_nchash / 100);
 	error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &used, sizeof(used));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength));
 	if (error)
 		return (error);
 	error = SYSCTL_OUT(req, &pct, sizeof(pct));
 	if (error)
 		return (error);
 	return (0);
 }
 SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD|
     CTLFLAG_MPSAFE, 0, 0, sysctl_debug_hashstat_nchash, "I",
     "nchash statistics (number of total/used buckets, maximum chain length, usage percentage)");
 #endif
 
 /*
  * Negative entries management
  *
  * Various workloads create plenty of negative entries and barely use them
  * afterwards. Moreover malicious users can keep performing bogus lookups
  * adding even more entries. For example "make tinderbox" as of writing this
  * comment ends up with 2.6M namecache entries in total, 1.2M of which are
  * negative.
  *
  * As such, a rather aggressive eviction method is needed. The currently
  * employed method is a placeholder.
  *
  * Entries are split over numneglists separate lists, each of which is further
  * split into hot and cold entries. Entries get promoted after getting a hit.
  * Eviction happens on addition of new entry.
  */
 static SYSCTL_NODE(_vfs_cache, OID_AUTO, neg, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Name cache negative entry statistics");
 
 SYSCTL_ULONG(_vfs_cache_neg, OID_AUTO, count, CTLFLAG_RD, &numneg, 0,
     "Number of negative cache entries");
 
 static COUNTER_U64_DEFINE_EARLY(neg_created);
 SYSCTL_COUNTER_U64(_vfs_cache_neg, OID_AUTO, created, CTLFLAG_RD, &neg_created,
     "Number of created negative entries");
 
 static COUNTER_U64_DEFINE_EARLY(neg_evicted);
 SYSCTL_COUNTER_U64(_vfs_cache_neg, OID_AUTO, evicted, CTLFLAG_RD, &neg_evicted,
     "Number of evicted negative entries");
 
 static COUNTER_U64_DEFINE_EARLY(neg_evict_skipped_empty);
 SYSCTL_COUNTER_U64(_vfs_cache_neg, OID_AUTO, evict_skipped_empty, CTLFLAG_RD,
     &neg_evict_skipped_empty,
     "Number of times evicting failed due to lack of entries");
 
 static COUNTER_U64_DEFINE_EARLY(neg_evict_skipped_missed);
 SYSCTL_COUNTER_U64(_vfs_cache_neg, OID_AUTO, evict_skipped_missed, CTLFLAG_RD,
     &neg_evict_skipped_missed,
     "Number of times evicting failed due to target entry disappearing");
 
 static COUNTER_U64_DEFINE_EARLY(neg_evict_skipped_contended);
 SYSCTL_COUNTER_U64(_vfs_cache_neg, OID_AUTO, evict_skipped_contended, CTLFLAG_RD,
     &neg_evict_skipped_contended,
     "Number of times evicting failed due to contention");
 
 SYSCTL_COUNTER_U64(_vfs_cache_neg, OID_AUTO, hits, CTLFLAG_RD, &numneghits,
     "Number of cache hits (negative)");
 
 static int
 sysctl_neg_hot(SYSCTL_HANDLER_ARGS)
 {
 	int i, out;
 
 	out = 0;
 	for (i = 0; i < numneglists; i++)
 		out += neglists[i].nl_hotnum;
 
 	return (SYSCTL_OUT(req, &out, sizeof(out)));
 }
 SYSCTL_PROC(_vfs_cache_neg, OID_AUTO, hot, CTLTYPE_INT | CTLFLAG_RD |
     CTLFLAG_MPSAFE, 0, 0, sysctl_neg_hot, "I",
     "Number of hot negative entries");
 
 static void
 cache_neg_init(struct namecache *ncp)
 {
 	struct negstate *ns;
 
 	ncp->nc_flag |= NCF_NEGATIVE;
 	ns = NCP2NEGSTATE(ncp);
 	ns->neg_flag = 0;
 	ns->neg_hit = 0;
 	counter_u64_add(neg_created, 1);
 }
 
 #define CACHE_NEG_PROMOTION_THRESH 2
 
 static bool
 cache_neg_hit_prep(struct namecache *ncp)
 {
 	struct negstate *ns;
 	u_char n;
 
 	ns = NCP2NEGSTATE(ncp);
 	n = atomic_load_char(&ns->neg_hit);
 	for (;;) {
 		if (n >= CACHE_NEG_PROMOTION_THRESH)
 			return (false);
 		if (atomic_fcmpset_8(&ns->neg_hit, &n, n + 1))
 			break;
 	}
 	return (n + 1 == CACHE_NEG_PROMOTION_THRESH);
 }
 
 /*
  * Nothing to do here but it is provided for completeness as some
  * cache_neg_hit_prep callers may end up returning without even
  * trying to promote.
  */
 #define cache_neg_hit_abort(ncp)	do { } while (0)
 
 static void
 cache_neg_hit_finish(struct namecache *ncp)
 {
 
 	SDT_PROBE2(vfs, namecache, lookup, hit__negative, ncp->nc_dvp, ncp->nc_name);
 	counter_u64_add(numneghits, 1);
 }
 
 /*
  * Move a negative entry to the hot list.
  */
 static void
 cache_neg_promote_locked(struct namecache *ncp)
 {
 	struct neglist *nl;
 	struct negstate *ns;
 
 	ns = NCP2NEGSTATE(ncp);
 	nl = NCP2NEGLIST(ncp);
 	mtx_assert(&nl->nl_lock, MA_OWNED);
 	if ((ns->neg_flag & NEG_HOT) == 0) {
 		TAILQ_REMOVE(&nl->nl_list, ncp, nc_dst);
 		TAILQ_INSERT_TAIL(&nl->nl_hotlist, ncp, nc_dst);
 		nl->nl_hotnum++;
 		ns->neg_flag |= NEG_HOT;
 	}
 }
 
 /*
  * Move a hot negative entry to the cold list.
  */
 static void
 cache_neg_demote_locked(struct namecache *ncp)
 {
 	struct neglist *nl;
 	struct negstate *ns;
 
 	ns = NCP2NEGSTATE(ncp);
 	nl = NCP2NEGLIST(ncp);
 	mtx_assert(&nl->nl_lock, MA_OWNED);
 	MPASS(ns->neg_flag & NEG_HOT);
 	TAILQ_REMOVE(&nl->nl_hotlist, ncp, nc_dst);
 	TAILQ_INSERT_TAIL(&nl->nl_list, ncp, nc_dst);
 	nl->nl_hotnum--;
 	ns->neg_flag &= ~NEG_HOT;
 	atomic_store_char(&ns->neg_hit, 0);
 }
 
 /*
  * Move a negative entry to the hot list if it matches the lookup.
  *
  * We have to take locks, but they may be contended and in the worst
  * case we may need to go off CPU. We don't want to spin within the
  * smr section and we can't block with it. Exiting the section means
  * the found entry could have been evicted. We are going to look it
  * up again.
  */
 static bool
 cache_neg_promote_cond(struct vnode *dvp, struct componentname *cnp,
     struct namecache *oncp, uint32_t hash)
 {
 	struct namecache *ncp;
 	struct neglist *nl;
 	u_char nc_flag;
 
 	nl = NCP2NEGLIST(oncp);
 
 	mtx_lock(&nl->nl_lock);
 	/*
 	 * For hash iteration.
 	 */
 	vfs_smr_enter();
 
 	/*
 	 * Avoid all surprises by only succeeding if we got the same entry and
 	 * bailing completely otherwise.
 	 * XXX There are no provisions to keep the vnode around, meaning we may
 	 * end up promoting a negative entry for a *new* vnode and returning
 	 * ENOENT on its account. This is the error we want to return anyway
 	 * and promotion is harmless.
 	 *
 	 * In particular at this point there can be a new ncp which matches the
 	 * search but hashes to a different neglist.
 	 */
 	CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		if (ncp == oncp)
 			break;
 	}
 
 	/*
 	 * No match to begin with.
 	 */
 	if (__predict_false(ncp == NULL)) {
 		goto out_abort;
 	}
 
 	/*
 	 * The newly found entry may be something different...
 	 */
 	if (!(ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 	    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))) {
 		goto out_abort;
 	}
 
 	/*
 	 * ... and not even negative.
 	 */
 	nc_flag = atomic_load_char(&ncp->nc_flag);
 	if ((nc_flag & NCF_NEGATIVE) == 0) {
 		goto out_abort;
 	}
 
 	if (!cache_ncp_canuse(ncp)) {
 		goto out_abort;
 	}
 
 	cache_neg_promote_locked(ncp);
 	cache_neg_hit_finish(ncp);
 	vfs_smr_exit();
 	mtx_unlock(&nl->nl_lock);
 	return (true);
 out_abort:
 	vfs_smr_exit();
 	mtx_unlock(&nl->nl_lock);
 	return (false);
 }
 
 static void
 cache_neg_promote(struct namecache *ncp)
 {
 	struct neglist *nl;
 
 	nl = NCP2NEGLIST(ncp);
 	mtx_lock(&nl->nl_lock);
 	cache_neg_promote_locked(ncp);
 	mtx_unlock(&nl->nl_lock);
 }
 
 static void
 cache_neg_insert(struct namecache *ncp)
 {
 	struct neglist *nl;
 
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
 	cache_assert_bucket_locked(ncp);
 	nl = NCP2NEGLIST(ncp);
 	mtx_lock(&nl->nl_lock);
 	TAILQ_INSERT_TAIL(&nl->nl_list, ncp, nc_dst);
 	mtx_unlock(&nl->nl_lock);
 	atomic_add_long(&numneg, 1);
 }
 
 static void
 cache_neg_remove(struct namecache *ncp)
 {
 	struct neglist *nl;
 	struct negstate *ns;
 
 	cache_assert_bucket_locked(ncp);
 	nl = NCP2NEGLIST(ncp);
 	ns = NCP2NEGSTATE(ncp);
 	mtx_lock(&nl->nl_lock);
 	if ((ns->neg_flag & NEG_HOT) != 0) {
 		TAILQ_REMOVE(&nl->nl_hotlist, ncp, nc_dst);
 		nl->nl_hotnum--;
 	} else {
 		TAILQ_REMOVE(&nl->nl_list, ncp, nc_dst);
 	}
 	mtx_unlock(&nl->nl_lock);
 	atomic_subtract_long(&numneg, 1);
 }
 
 static struct neglist *
 cache_neg_evict_select_list(void)
 {
 	struct neglist *nl;
 	u_int c;
 
 	c = atomic_fetchadd_int(&neg_cycle, 1) + 1;
 	nl = &neglists[c % numneglists];
 	if (!mtx_trylock(&nl->nl_evict_lock)) {
 		counter_u64_add(neg_evict_skipped_contended, 1);
 		return (NULL);
 	}
 	return (nl);
 }
 
 static struct namecache *
 cache_neg_evict_select_entry(struct neglist *nl)
 {
 	struct namecache *ncp, *lncp;
 	struct negstate *ns, *lns;
 	int i;
 
 	mtx_assert(&nl->nl_evict_lock, MA_OWNED);
 	mtx_assert(&nl->nl_lock, MA_OWNED);
 	ncp = TAILQ_FIRST(&nl->nl_list);
 	if (ncp == NULL)
 		return (NULL);
 	lncp = ncp;
 	lns = NCP2NEGSTATE(lncp);
 	for (i = 1; i < 4; i++) {
 		ncp = TAILQ_NEXT(ncp, nc_dst);
 		if (ncp == NULL)
 			break;
 		ns = NCP2NEGSTATE(ncp);
 		if (ns->neg_hit < lns->neg_hit) {
 			lncp = ncp;
 			lns = ns;
 		}
 	}
 	return (lncp);
 }
 
 static bool
 cache_neg_evict(void)
 {
 	struct namecache *ncp, *ncp2;
 	struct neglist *nl;
 	struct vnode *dvp;
 	struct mtx *dvlp;
 	struct mtx *blp;
 	uint32_t hash;
 	u_char nlen;
 	bool evicted;
 
 	nl = cache_neg_evict_select_list();
 	if (nl == NULL) {
 		return (false);
 	}
 
 	mtx_lock(&nl->nl_lock);
 	ncp = TAILQ_FIRST(&nl->nl_hotlist);
 	if (ncp != NULL) {
 		cache_neg_demote_locked(ncp);
 	}
 	ncp = cache_neg_evict_select_entry(nl);
 	if (ncp == NULL) {
 		counter_u64_add(neg_evict_skipped_empty, 1);
 		mtx_unlock(&nl->nl_lock);
 		mtx_unlock(&nl->nl_evict_lock);
 		return (false);
 	}
 	nlen = ncp->nc_nlen;
 	dvp = ncp->nc_dvp;
 	hash = cache_get_hash(ncp->nc_name, nlen, dvp);
 	dvlp = VP2VNODELOCK(dvp);
 	blp = HASH2BUCKETLOCK(hash);
 	mtx_unlock(&nl->nl_lock);
 	mtx_unlock(&nl->nl_evict_lock);
 	mtx_lock(dvlp);
 	mtx_lock(blp);
 	/*
 	 * Note that since all locks were dropped above, the entry may be
 	 * gone or reallocated to be something else.
 	 */
 	CK_SLIST_FOREACH(ncp2, (NCHHASH(hash)), nc_hash) {
 		if (ncp2 == ncp && ncp2->nc_dvp == dvp &&
 		    ncp2->nc_nlen == nlen && (ncp2->nc_flag & NCF_NEGATIVE) != 0)
 			break;
 	}
 	if (ncp2 == NULL) {
 		counter_u64_add(neg_evict_skipped_missed, 1);
 		ncp = NULL;
 		evicted = false;
 	} else {
 		MPASS(dvlp == VP2VNODELOCK(ncp->nc_dvp));
 		MPASS(blp == NCP2BUCKETLOCK(ncp));
 		SDT_PROBE2(vfs, namecache, evict_negative, done, ncp->nc_dvp,
 		    ncp->nc_name);
 		cache_zap_locked(ncp);
 		counter_u64_add(neg_evicted, 1);
 		evicted = true;
 	}
 	mtx_unlock(blp);
 	mtx_unlock(dvlp);
 	if (ncp != NULL)
 		cache_free(ncp);
 	return (evicted);
 }
 
 /*
  * Maybe evict a negative entry to create more room.
  *
  * The ncnegfactor parameter limits what fraction of the total count
  * can comprise of negative entries. However, if the cache is just
  * warming up this leads to excessive evictions.  As such, ncnegminpct
  * (recomputed to neg_min) dictates whether the above should be
  * applied.
  *
  * Try evicting if the cache is close to full capacity regardless of
  * other considerations.
  */
 static bool
 cache_neg_evict_cond(u_long lnumcache)
 {
 	u_long lnumneg;
 
 	if (ncsize - 1000 < lnumcache)
 		goto out_evict;
 	lnumneg = atomic_load_long(&numneg);
 	if (lnumneg < neg_min)
 		return (false);
 	if (lnumneg * ncnegfactor < lnumcache)
 		return (false);
 out_evict:
 	return (cache_neg_evict());
 }
 
 /*
  * cache_zap_locked():
  *
  *   Removes a namecache entry from cache, whether it contains an actual
  *   pointer to a vnode or if it is just a negative cache entry.
  */
 static void
 cache_zap_locked(struct namecache *ncp)
 {
 	struct nchashhead *ncpp;
 	struct vnode *dvp, *vp;
 
 	dvp = ncp->nc_dvp;
 	vp = ncp->nc_vp;
 
 	if (!(ncp->nc_flag & NCF_NEGATIVE))
 		cache_assert_vnode_locked(vp);
 	cache_assert_vnode_locked(dvp);
 	cache_assert_bucket_locked(ncp);
 
 	cache_ncp_invalidate(ncp);
 
 	ncpp = NCP2BUCKET(ncp);
 	CK_SLIST_REMOVE(ncpp, ncp, namecache, nc_hash);
 	if (!(ncp->nc_flag & NCF_NEGATIVE)) {
 		SDT_PROBE3(vfs, namecache, zap, done, dvp, ncp->nc_name, vp);
 		TAILQ_REMOVE(&vp->v_cache_dst, ncp, nc_dst);
 		if (ncp == vp->v_cache_dd) {
 			atomic_store_ptr(&vp->v_cache_dd, NULL);
 		}
 	} else {
 		SDT_PROBE2(vfs, namecache, zap_negative, done, dvp, ncp->nc_name);
 		cache_neg_remove(ncp);
 	}
 	if (ncp->nc_flag & NCF_ISDOTDOT) {
 		if (ncp == dvp->v_cache_dd) {
 			atomic_store_ptr(&dvp->v_cache_dd, NULL);
 		}
 	} else {
 		LIST_REMOVE(ncp, nc_src);
 		if (LIST_EMPTY(&dvp->v_cache_src)) {
 			ncp->nc_flag |= NCF_DVDROP;
 		}
 	}
 }
 
 static void
 cache_zap_negative_locked_vnode_kl(struct namecache *ncp, struct vnode *vp)
 {
 	struct mtx *blp;
 
 	MPASS(ncp->nc_dvp == vp);
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
 	cache_assert_vnode_locked(vp);
 
 	blp = NCP2BUCKETLOCK(ncp);
 	mtx_lock(blp);
 	cache_zap_locked(ncp);
 	mtx_unlock(blp);
 }
 
 static bool
 cache_zap_locked_vnode_kl2(struct namecache *ncp, struct vnode *vp,
     struct mtx **vlpp)
 {
 	struct mtx *pvlp, *vlp1, *vlp2, *to_unlock;
 	struct mtx *blp;
 
 	MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp);
 	cache_assert_vnode_locked(vp);
 
 	if (ncp->nc_flag & NCF_NEGATIVE) {
 		if (*vlpp != NULL) {
 			mtx_unlock(*vlpp);
 			*vlpp = NULL;
 		}
 		cache_zap_negative_locked_vnode_kl(ncp, vp);
 		return (true);
 	}
 
 	pvlp = VP2VNODELOCK(vp);
 	blp = NCP2BUCKETLOCK(ncp);
 	vlp1 = VP2VNODELOCK(ncp->nc_dvp);
 	vlp2 = VP2VNODELOCK(ncp->nc_vp);
 
 	if (*vlpp == vlp1 || *vlpp == vlp2) {
 		to_unlock = *vlpp;
 		*vlpp = NULL;
 	} else {
 		if (*vlpp != NULL) {
 			mtx_unlock(*vlpp);
 			*vlpp = NULL;
 		}
 		cache_sort_vnodes(&vlp1, &vlp2);
 		if (vlp1 == pvlp) {
 			mtx_lock(vlp2);
 			to_unlock = vlp2;
 		} else {
 			if (!mtx_trylock(vlp1))
 				goto out_relock;
 			to_unlock = vlp1;
 		}
 	}
 	mtx_lock(blp);
 	cache_zap_locked(ncp);
 	mtx_unlock(blp);
 	if (to_unlock != NULL)
 		mtx_unlock(to_unlock);
 	return (true);
 
 out_relock:
 	mtx_unlock(vlp2);
 	mtx_lock(vlp1);
 	mtx_lock(vlp2);
 	MPASS(*vlpp == NULL);
 	*vlpp = vlp1;
 	return (false);
 }
 
 /*
  * If trylocking failed we can get here. We know enough to take all needed locks
  * in the right order and re-lookup the entry.
  */
 static int
 cache_zap_unlocked_bucket(struct namecache *ncp, struct componentname *cnp,
     struct vnode *dvp, struct mtx *dvlp, struct mtx *vlp, uint32_t hash,
     struct mtx *blp)
 {
 	struct namecache *rncp;
 
 	cache_assert_bucket_unlocked(ncp);
 
 	cache_sort_vnodes(&dvlp, &vlp);
 	cache_lock_vnodes(dvlp, vlp);
 	mtx_lock(blp);
 	CK_SLIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) {
 		if (rncp == ncp && rncp->nc_dvp == dvp &&
 		    rncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(rncp->nc_name, cnp->cn_nameptr, rncp->nc_nlen))
 			break;
 	}
 	if (rncp != NULL) {
 		cache_zap_locked(rncp);
 		mtx_unlock(blp);
 		cache_unlock_vnodes(dvlp, vlp);
 		counter_u64_add(zap_bucket_relock_success, 1);
 		return (0);
 	}
 
 	mtx_unlock(blp);
 	cache_unlock_vnodes(dvlp, vlp);
 	return (EAGAIN);
 }
 
 static int __noinline
 cache_zap_locked_bucket(struct namecache *ncp, struct componentname *cnp,
     uint32_t hash, struct mtx *blp)
 {
 	struct mtx *dvlp, *vlp;
 	struct vnode *dvp;
 
 	cache_assert_bucket_locked(ncp);
 
 	dvlp = VP2VNODELOCK(ncp->nc_dvp);
 	vlp = NULL;
 	if (!(ncp->nc_flag & NCF_NEGATIVE))
 		vlp = VP2VNODELOCK(ncp->nc_vp);
 	if (cache_trylock_vnodes(dvlp, vlp) == 0) {
 		cache_zap_locked(ncp);
 		mtx_unlock(blp);
 		cache_unlock_vnodes(dvlp, vlp);
 		return (0);
 	}
 
 	dvp = ncp->nc_dvp;
 	mtx_unlock(blp);
 	return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
 }
 
 static __noinline int
 cache_remove_cnp(struct vnode *dvp, struct componentname *cnp)
 {
 	struct namecache *ncp;
 	struct mtx *blp;
 	struct mtx *dvlp, *dvlp2;
 	uint32_t hash;
 	int error;
 
 	if (cnp->cn_namelen == 2 &&
 	    cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.') {
 		dvlp = VP2VNODELOCK(dvp);
 		dvlp2 = NULL;
 		mtx_lock(dvlp);
 retry_dotdot:
 		ncp = dvp->v_cache_dd;
 		if (ncp == NULL) {
 			mtx_unlock(dvlp);
 			if (dvlp2 != NULL)
 				mtx_unlock(dvlp2);
 			SDT_PROBE2(vfs, namecache, removecnp, miss, dvp, cnp);
 			return (0);
 		}
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) {
 			if (!cache_zap_locked_vnode_kl2(ncp, dvp, &dvlp2))
 				goto retry_dotdot;
 			MPASS(dvp->v_cache_dd == NULL);
 			mtx_unlock(dvlp);
 			if (dvlp2 != NULL)
 				mtx_unlock(dvlp2);
 			cache_free(ncp);
 		} else {
 			atomic_store_ptr(&dvp->v_cache_dd, NULL);
 			mtx_unlock(dvlp);
 			if (dvlp2 != NULL)
 				mtx_unlock(dvlp2);
 		}
 		SDT_PROBE2(vfs, namecache, removecnp, hit, dvp, cnp);
 		return (1);
 	}
 
 	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
 	blp = HASH2BUCKETLOCK(hash);
 retry:
 	if (CK_SLIST_EMPTY(NCHHASH(hash)))
 		goto out_no_entry;
 
 	mtx_lock(blp);
 
 	CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
 	if (ncp == NULL) {
 		mtx_unlock(blp);
 		goto out_no_entry;
 	}
 
 	error = cache_zap_locked_bucket(ncp, cnp, hash, blp);
 	if (__predict_false(error != 0)) {
 		zap_bucket_fail++;
 		goto retry;
 	}
 	counter_u64_add(numposzaps, 1);
 	SDT_PROBE2(vfs, namecache, removecnp, hit, dvp, cnp);
 	cache_free(ncp);
 	return (1);
 out_no_entry:
 	counter_u64_add(nummisszap, 1);
 	SDT_PROBE2(vfs, namecache, removecnp, miss, dvp, cnp);
 	return (0);
 }
 
 static int __noinline
 cache_lookup_dot(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct timespec *tsp, int *ticksp)
 {
 	int ltype;
 
 	*vpp = dvp;
 	counter_u64_add(dothits, 1);
 	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ".", *vpp);
 	if (tsp != NULL)
 		timespecclear(tsp);
 	if (ticksp != NULL)
 		*ticksp = ticks;
 	vrefact(*vpp);
 	/*
 	 * When we lookup "." we still can be asked to lock it
 	 * differently...
 	 */
 	ltype = cnp->cn_lkflags & LK_TYPE_MASK;
 	if (ltype != VOP_ISLOCKED(*vpp)) {
 		if (ltype == LK_EXCLUSIVE) {
 			vn_lock(*vpp, LK_UPGRADE | LK_RETRY);
 			if (VN_IS_DOOMED((*vpp))) {
 				/* forced unmount */
 				vrele(*vpp);
 				*vpp = NULL;
 				return (ENOENT);
 			}
 		} else
 			vn_lock(*vpp, LK_DOWNGRADE | LK_RETRY);
 	}
 	return (-1);
 }
 
 static int __noinline
 cache_lookup_dotdot(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct timespec *tsp, int *ticksp)
 {
 	struct namecache_ts *ncp_ts;
 	struct namecache *ncp;
 	struct mtx *dvlp;
 	enum vgetstate vs;
 	int error, ltype;
 	bool whiteout;
 
 	MPASS((cnp->cn_flags & ISDOTDOT) != 0);
 
 	if ((cnp->cn_flags & MAKEENTRY) == 0) {
 		cache_remove_cnp(dvp, cnp);
 		return (0);
 	}
 
 	counter_u64_add(dotdothits, 1);
 retry:
 	dvlp = VP2VNODELOCK(dvp);
 	mtx_lock(dvlp);
 	ncp = dvp->v_cache_dd;
 	if (ncp == NULL) {
 		SDT_PROBE2(vfs, namecache, lookup, miss, dvp, "..");
 		mtx_unlock(dvlp);
 		return (0);
 	}
 	if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) {
 		if (ncp->nc_flag & NCF_NEGATIVE)
 			*vpp = NULL;
 		else
 			*vpp = ncp->nc_vp;
 	} else
 		*vpp = ncp->nc_dvp;
 	if (*vpp == NULL)
 		goto negative_success;
 	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, "..", *vpp);
 	cache_out_ts(ncp, tsp, ticksp);
 	if ((ncp->nc_flag & (NCF_ISDOTDOT | NCF_DTS)) ==
 	    NCF_DTS && tsp != NULL) {
 		ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
 		*tsp = ncp_ts->nc_dotdottime;
 	}
 
 	MPASS(dvp != *vpp);
 	ltype = VOP_ISLOCKED(dvp);
 	VOP_UNLOCK(dvp);
 	vs = vget_prep(*vpp);
 	mtx_unlock(dvlp);
 	error = vget_finish(*vpp, cnp->cn_lkflags, vs);
 	vn_lock(dvp, ltype | LK_RETRY);
 	if (VN_IS_DOOMED(dvp)) {
 		if (error == 0)
 			vput(*vpp);
 		*vpp = NULL;
 		return (ENOENT);
 	}
 	if (error) {
 		*vpp = NULL;
 		goto retry;
 	}
 	return (-1);
 negative_success:
 	if (__predict_false(cnp->cn_nameiop == CREATE)) {
 		if (cnp->cn_flags & ISLASTCN) {
 			counter_u64_add(numnegzaps, 1);
 			cache_zap_negative_locked_vnode_kl(ncp, dvp);
 			mtx_unlock(dvlp);
 			cache_free(ncp);
 			return (0);
 		}
 	}
 
 	whiteout = (ncp->nc_flag & NCF_WHITE);
 	cache_out_ts(ncp, tsp, ticksp);
 	if (cache_neg_hit_prep(ncp))
 		cache_neg_promote(ncp);
 	else
 		cache_neg_hit_finish(ncp);
 	mtx_unlock(dvlp);
 	if (whiteout)
 		cnp->cn_flags |= ISWHITEOUT;
 	return (ENOENT);
 }
 
 /**
  * Lookup a name in the name cache
  *
  * # Arguments
  *
  * - dvp:	Parent directory in which to search.
  * - vpp:	Return argument.  Will contain desired vnode on cache hit.
  * - cnp:	Parameters of the name search.  The most interesting bits of
  *   		the cn_flags field have the following meanings:
  *   	- MAKEENTRY:	If clear, free an entry from the cache rather than look
  *   			it up.
  *   	- ISDOTDOT:	Must be set if and only if cn_nameptr == ".."
  * - tsp:	Return storage for cache timestamp.  On a successful (positive
  *   		or negative) lookup, tsp will be filled with any timespec that
  *   		was stored when this cache entry was created.  However, it will
  *   		be clear for "." entries.
  * - ticks:	Return storage for alternate cache timestamp.  On a successful
  *   		(positive or negative) lookup, it will contain the ticks value
  *   		that was current when the cache entry was created, unless cnp
  *   		was ".".
  *
  * Either both tsp and ticks have to be provided or neither of them.
  *
  * # Returns
  *
  * - -1:	A positive cache hit.  vpp will contain the desired vnode.
  * - ENOENT:	A negative cache hit, or dvp was recycled out from under us due
  *		to a forced unmount.  vpp will not be modified.  If the entry
  *		is a whiteout, then the ISWHITEOUT flag will be set in
  *		cnp->cn_flags.
  * - 0:		A cache miss.  vpp will not be modified.
  *
  * # Locking
  *
  * On a cache hit, vpp will be returned locked and ref'd.  If we're looking up
  * .., dvp is unlocked.  If we're looking up . an extra ref is taken, but the
  * lock is not recursively acquired.
  */
 static int __noinline
 cache_lookup_fallback(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct timespec *tsp, int *ticksp)
 {
 	struct namecache *ncp;
 	struct mtx *blp;
 	uint32_t hash;
 	enum vgetstate vs;
 	int error;
 	bool whiteout;
 
 	MPASS((cnp->cn_flags & ISDOTDOT) == 0);
 	MPASS((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) != 0);
 
 retry:
 	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
 	blp = HASH2BUCKETLOCK(hash);
 	mtx_lock(blp);
 
 	CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
 	if (__predict_false(ncp == NULL)) {
 		mtx_unlock(blp);
 		SDT_PROBE2(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr);
 		counter_u64_add(nummiss, 1);
 		return (0);
 	}
 
 	if (ncp->nc_flag & NCF_NEGATIVE)
 		goto negative_success;
 
 	counter_u64_add(numposhits, 1);
 	*vpp = ncp->nc_vp;
 	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, *vpp);
 	cache_out_ts(ncp, tsp, ticksp);
 	MPASS(dvp != *vpp);
 	vs = vget_prep(*vpp);
 	mtx_unlock(blp);
 	error = vget_finish(*vpp, cnp->cn_lkflags, vs);
 	if (error) {
 		*vpp = NULL;
 		goto retry;
 	}
 	return (-1);
 negative_success:
 	/*
 	 * We don't get here with regular lookup apart from corner cases.
 	 */
 	if (__predict_true(cnp->cn_nameiop == CREATE)) {
 		if (cnp->cn_flags & ISLASTCN) {
 			counter_u64_add(numnegzaps, 1);
 			error = cache_zap_locked_bucket(ncp, cnp, hash, blp);
 			if (__predict_false(error != 0)) {
 				zap_bucket_fail2++;
 				goto retry;
 			}
 			cache_free(ncp);
 			return (0);
 		}
 	}
 
 	whiteout = (ncp->nc_flag & NCF_WHITE);
 	cache_out_ts(ncp, tsp, ticksp);
 	if (cache_neg_hit_prep(ncp))
 		cache_neg_promote(ncp);
 	else
 		cache_neg_hit_finish(ncp);
 	mtx_unlock(blp);
 	if (whiteout)
 		cnp->cn_flags |= ISWHITEOUT;
 	return (ENOENT);
 }
 
 int
 cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct timespec *tsp, int *ticksp)
 {
 	struct namecache *ncp;
 	uint32_t hash;
 	enum vgetstate vs;
 	int error;
 	bool whiteout, neg_promote;
 	u_short nc_flag;
 
 	MPASS((tsp == NULL && ticksp == NULL) || (tsp != NULL && ticksp != NULL));
 
 #ifdef DEBUG_CACHE
 	if (__predict_false(!doingcache)) {
 		cnp->cn_flags &= ~MAKEENTRY;
 		return (0);
 	}
 #endif
 
 	if (__predict_false(cnp->cn_nameptr[0] == '.')) {
 		if (cnp->cn_namelen == 1)
 			return (cache_lookup_dot(dvp, vpp, cnp, tsp, ticksp));
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.')
 			return (cache_lookup_dotdot(dvp, vpp, cnp, tsp, ticksp));
 	}
 
 	MPASS((cnp->cn_flags & ISDOTDOT) == 0);
 
 	if ((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) == 0) {
 		cache_remove_cnp(dvp, cnp);
 		return (0);
 	}
 
 	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
 	vfs_smr_enter();
 
 	CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
 	if (__predict_false(ncp == NULL)) {
 		vfs_smr_exit();
 		SDT_PROBE2(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr);
 		counter_u64_add(nummiss, 1);
 		return (0);
 	}
 
 	nc_flag = atomic_load_char(&ncp->nc_flag);
 	if (nc_flag & NCF_NEGATIVE)
 		goto negative_success;
 
 	counter_u64_add(numposhits, 1);
 	*vpp = ncp->nc_vp;
 	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, *vpp);
 	cache_out_ts(ncp, tsp, ticksp);
 	MPASS(dvp != *vpp);
 	if (!cache_ncp_canuse(ncp)) {
 		vfs_smr_exit();
 		*vpp = NULL;
 		goto out_fallback;
 	}
 	vs = vget_prep_smr(*vpp);
 	vfs_smr_exit();
 	if (__predict_false(vs == VGET_NONE)) {
 		*vpp = NULL;
 		goto out_fallback;
 	}
 	error = vget_finish(*vpp, cnp->cn_lkflags, vs);
 	if (error) {
 		*vpp = NULL;
 		goto out_fallback;
 	}
 	return (-1);
 negative_success:
 	if (cnp->cn_nameiop == CREATE) {
 		if (cnp->cn_flags & ISLASTCN) {
 			vfs_smr_exit();
 			goto out_fallback;
 		}
 	}
 
 	cache_out_ts(ncp, tsp, ticksp);
 	whiteout = (atomic_load_char(&ncp->nc_flag) & NCF_WHITE);
 	neg_promote = cache_neg_hit_prep(ncp);
 	if (!cache_ncp_canuse(ncp)) {
 		cache_neg_hit_abort(ncp);
 		vfs_smr_exit();
 		goto out_fallback;
 	}
 	if (neg_promote) {
 		vfs_smr_exit();
 		if (!cache_neg_promote_cond(dvp, cnp, ncp, hash))
 			goto out_fallback;
 	} else {
 		cache_neg_hit_finish(ncp);
 		vfs_smr_exit();
 	}
 	if (whiteout)
 		cnp->cn_flags |= ISWHITEOUT;
 	return (ENOENT);
 out_fallback:
 	return (cache_lookup_fallback(dvp, vpp, cnp, tsp, ticksp));
 }
 
 struct celockstate {
 	struct mtx *vlp[3];
 	struct mtx *blp[2];
 };
 CTASSERT((nitems(((struct celockstate *)0)->vlp) == 3));
 CTASSERT((nitems(((struct celockstate *)0)->blp) == 2));
 
 static inline void
 cache_celockstate_init(struct celockstate *cel)
 {
 
 	bzero(cel, sizeof(*cel));
 }
 
 static void
 cache_lock_vnodes_cel(struct celockstate *cel, struct vnode *vp,
     struct vnode *dvp)
 {
 	struct mtx *vlp1, *vlp2;
 
 	MPASS(cel->vlp[0] == NULL);
 	MPASS(cel->vlp[1] == NULL);
 	MPASS(cel->vlp[2] == NULL);
 
 	MPASS(vp != NULL || dvp != NULL);
 
 	vlp1 = VP2VNODELOCK(vp);
 	vlp2 = VP2VNODELOCK(dvp);
 	cache_sort_vnodes(&vlp1, &vlp2);
 
 	if (vlp1 != NULL) {
 		mtx_lock(vlp1);
 		cel->vlp[0] = vlp1;
 	}
 	mtx_lock(vlp2);
 	cel->vlp[1] = vlp2;
 }
 
 static void
 cache_unlock_vnodes_cel(struct celockstate *cel)
 {
 
 	MPASS(cel->vlp[0] != NULL || cel->vlp[1] != NULL);
 
 	if (cel->vlp[0] != NULL)
 		mtx_unlock(cel->vlp[0]);
 	if (cel->vlp[1] != NULL)
 		mtx_unlock(cel->vlp[1]);
 	if (cel->vlp[2] != NULL)
 		mtx_unlock(cel->vlp[2]);
 }
 
 static bool
 cache_lock_vnodes_cel_3(struct celockstate *cel, struct vnode *vp)
 {
 	struct mtx *vlp;
 	bool ret;
 
 	cache_assert_vlp_locked(cel->vlp[0]);
 	cache_assert_vlp_locked(cel->vlp[1]);
 	MPASS(cel->vlp[2] == NULL);
 
 	MPASS(vp != NULL);
 	vlp = VP2VNODELOCK(vp);
 
 	ret = true;
 	if (vlp >= cel->vlp[1]) {
 		mtx_lock(vlp);
 	} else {
 		if (mtx_trylock(vlp))
 			goto out;
 		cache_lock_vnodes_cel_3_failures++;
 		cache_unlock_vnodes_cel(cel);
 		if (vlp < cel->vlp[0]) {
 			mtx_lock(vlp);
 			mtx_lock(cel->vlp[0]);
 			mtx_lock(cel->vlp[1]);
 		} else {
 			if (cel->vlp[0] != NULL)
 				mtx_lock(cel->vlp[0]);
 			mtx_lock(vlp);
 			mtx_lock(cel->vlp[1]);
 		}
 		ret = false;
 	}
 out:
 	cel->vlp[2] = vlp;
 	return (ret);
 }
 
 static void
 cache_lock_buckets_cel(struct celockstate *cel, struct mtx *blp1,
     struct mtx *blp2)
 {
 
 	MPASS(cel->blp[0] == NULL);
 	MPASS(cel->blp[1] == NULL);
 
 	cache_sort_vnodes(&blp1, &blp2);
 
 	if (blp1 != NULL) {
 		mtx_lock(blp1);
 		cel->blp[0] = blp1;
 	}
 	mtx_lock(blp2);
 	cel->blp[1] = blp2;
 }
 
 static void
 cache_unlock_buckets_cel(struct celockstate *cel)
 {
 
 	if (cel->blp[0] != NULL)
 		mtx_unlock(cel->blp[0]);
 	mtx_unlock(cel->blp[1]);
 }
 
 /*
  * Lock part of the cache affected by the insertion.
  *
  * This means vnodelocks for dvp, vp and the relevant bucketlock.
  * However, insertion can result in removal of an old entry. In this
  * case we have an additional vnode and bucketlock pair to lock.
  *
  * That is, in the worst case we have to lock 3 vnodes and 2 bucketlocks, while
  * preserving the locking order (smaller address first).
  */
 static void
 cache_enter_lock(struct celockstate *cel, struct vnode *dvp, struct vnode *vp,
     uint32_t hash)
 {
 	struct namecache *ncp;
 	struct mtx *blps[2];
 	u_char nc_flag;
 
 	blps[0] = HASH2BUCKETLOCK(hash);
 	for (;;) {
 		blps[1] = NULL;
 		cache_lock_vnodes_cel(cel, dvp, vp);
 		if (vp == NULL || vp->v_type != VDIR)
 			break;
 		ncp = atomic_load_consume_ptr(&vp->v_cache_dd);
 		if (ncp == NULL)
 			break;
 		nc_flag = atomic_load_char(&ncp->nc_flag);
 		if ((nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 		MPASS(ncp->nc_dvp == vp);
 		blps[1] = NCP2BUCKETLOCK(ncp);
 		if ((nc_flag & NCF_NEGATIVE) != 0)
 			break;
 		if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp))
 			break;
 		/*
 		 * All vnodes got re-locked. Re-validate the state and if
 		 * nothing changed we are done. Otherwise restart.
 		 */
 		if (ncp == vp->v_cache_dd &&
 		    (ncp->nc_flag & NCF_ISDOTDOT) != 0 &&
 		    blps[1] == NCP2BUCKETLOCK(ncp) &&
 		    VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2])
 			break;
 		cache_unlock_vnodes_cel(cel);
 		cel->vlp[0] = NULL;
 		cel->vlp[1] = NULL;
 		cel->vlp[2] = NULL;
 	}
 	cache_lock_buckets_cel(cel, blps[0], blps[1]);
 }
 
 static void
 cache_enter_lock_dd(struct celockstate *cel, struct vnode *dvp, struct vnode *vp,
     uint32_t hash)
 {
 	struct namecache *ncp;
 	struct mtx *blps[2];
 	u_char nc_flag;
 
 	blps[0] = HASH2BUCKETLOCK(hash);
 	for (;;) {
 		blps[1] = NULL;
 		cache_lock_vnodes_cel(cel, dvp, vp);
 		ncp = atomic_load_consume_ptr(&dvp->v_cache_dd);
 		if (ncp == NULL)
 			break;
 		nc_flag = atomic_load_char(&ncp->nc_flag);
 		if ((nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 		MPASS(ncp->nc_dvp == dvp);
 		blps[1] = NCP2BUCKETLOCK(ncp);
 		if ((nc_flag & NCF_NEGATIVE) != 0)
 			break;
 		if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp))
 			break;
 		if (ncp == dvp->v_cache_dd &&
 		    (ncp->nc_flag & NCF_ISDOTDOT) != 0 &&
 		    blps[1] == NCP2BUCKETLOCK(ncp) &&
 		    VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2])
 			break;
 		cache_unlock_vnodes_cel(cel);
 		cel->vlp[0] = NULL;
 		cel->vlp[1] = NULL;
 		cel->vlp[2] = NULL;
 	}
 	cache_lock_buckets_cel(cel, blps[0], blps[1]);
 }
 
 static void
 cache_enter_unlock(struct celockstate *cel)
 {
 
 	cache_unlock_buckets_cel(cel);
 	cache_unlock_vnodes_cel(cel);
 }
 
 static void __noinline
 cache_enter_dotdot_prep(struct vnode *dvp, struct vnode *vp,
     struct componentname *cnp)
 {
 	struct celockstate cel;
 	struct namecache *ncp;
 	uint32_t hash;
 	int len;
 
 	if (atomic_load_ptr(&dvp->v_cache_dd) == NULL)
 		return;
 	len = cnp->cn_namelen;
 	cache_celockstate_init(&cel);
 	hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
 	cache_enter_lock_dd(&cel, dvp, vp, hash);
 	ncp = dvp->v_cache_dd;
 	if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT)) {
 		KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent"));
 		cache_zap_locked(ncp);
 	} else {
 		ncp = NULL;
 	}
 	atomic_store_ptr(&dvp->v_cache_dd, NULL);
 	cache_enter_unlock(&cel);
 	if (ncp != NULL)
 		cache_free(ncp);
 }
 
 /*
  * Add an entry to the cache.
  */
 void
 cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
     struct timespec *tsp, struct timespec *dtsp)
 {
 	struct celockstate cel;
 	struct namecache *ncp, *n2, *ndd;
 	struct namecache_ts *ncp_ts;
 	struct nchashhead *ncpp;
 	uint32_t hash;
 	int flag;
 	int len;
 
 	KASSERT(cnp->cn_namelen <= NAME_MAX,
 	    ("%s: passed len %ld exceeds NAME_MAX (%d)", __func__, cnp->cn_namelen,
 	    NAME_MAX));
 	VNPASS(!VN_IS_DOOMED(dvp), dvp);
 	VNPASS(dvp->v_type != VNON, dvp);
 	if (vp != NULL) {
 		VNPASS(!VN_IS_DOOMED(vp), vp);
 		VNPASS(vp->v_type != VNON, vp);
 	}
 	if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 		KASSERT(dvp == vp,
 		    ("%s: different vnodes for dot entry (%p; %p)\n", __func__,
 		    dvp, vp));
 	} else {
 		KASSERT(dvp != vp,
 		    ("%s: same vnode for non-dot entry [%s] (%p)\n", __func__,
 		    cnp->cn_nameptr, dvp));
 	}
 
 #ifdef DEBUG_CACHE
 	if (__predict_false(!doingcache))
 		return;
 #endif
 
 	flag = 0;
 	if (__predict_false(cnp->cn_nameptr[0] == '.')) {
 		if (cnp->cn_namelen == 1)
 			return;
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
 			cache_enter_dotdot_prep(dvp, vp, cnp);
 			flag = NCF_ISDOTDOT;
 		}
 	}
 
 	ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
 	if (ncp == NULL)
 		return;
 
 	cache_celockstate_init(&cel);
 	ndd = NULL;
 	ncp_ts = NULL;
 
 	/*
 	 * Calculate the hash key and setup as much of the new
 	 * namecache entry as possible before acquiring the lock.
 	 */
 	ncp->nc_flag = flag | NCF_WIP;
 	ncp->nc_vp = vp;
 	if (vp == NULL)
 		cache_neg_init(ncp);
 	ncp->nc_dvp = dvp;
 	if (tsp != NULL) {
 		ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
 		ncp_ts->nc_time = *tsp;
 		ncp_ts->nc_ticks = ticks;
 		ncp_ts->nc_nc.nc_flag |= NCF_TS;
 		if (dtsp != NULL) {
 			ncp_ts->nc_dotdottime = *dtsp;
 			ncp_ts->nc_nc.nc_flag |= NCF_DTS;
 		}
 	}
 	len = ncp->nc_nlen = cnp->cn_namelen;
 	hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
 	memcpy(ncp->nc_name, cnp->cn_nameptr, len);
 	ncp->nc_name[len] = '\0';
 	cache_enter_lock(&cel, dvp, vp, hash);
 
 	/*
 	 * See if this vnode or negative entry is already in the cache
 	 * with this name.  This can happen with concurrent lookups of
 	 * the same path name.
 	 */
 	ncpp = NCHHASH(hash);
 	CK_SLIST_FOREACH(n2, ncpp, nc_hash) {
 		if (n2->nc_dvp == dvp &&
 		    n2->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
 			MPASS(cache_ncp_canuse(n2));
 			if ((n2->nc_flag & NCF_NEGATIVE) != 0)
 				KASSERT(vp == NULL,
 				    ("%s: found entry pointing to a different vnode (%p != %p) ; name [%s]",
 				    __func__, NULL, vp, cnp->cn_nameptr));
 			else
 				KASSERT(n2->nc_vp == vp,
 				    ("%s: found entry pointing to a different vnode (%p != %p) ; name [%s]",
 				    __func__, n2->nc_vp, vp, cnp->cn_nameptr));
 			/*
 			 * Entries are supposed to be immutable unless in the
 			 * process of getting destroyed. Accommodating for
 			 * changing timestamps is possible but not worth it.
 			 * This should be harmless in terms of correctness, in
 			 * the worst case resulting in an earlier expiration.
 			 * Alternatively, the found entry can be replaced
 			 * altogether.
 			 */
 			MPASS((n2->nc_flag & (NCF_TS | NCF_DTS)) == (ncp->nc_flag & (NCF_TS | NCF_DTS)));
 #if 0
 			if (tsp != NULL) {
 				KASSERT((n2->nc_flag & NCF_TS) != 0,
 				    ("no NCF_TS"));
 				n2_ts = __containerof(n2, struct namecache_ts, nc_nc);
 				n2_ts->nc_time = ncp_ts->nc_time;
 				n2_ts->nc_ticks = ncp_ts->nc_ticks;
 				if (dtsp != NULL) {
 					n2_ts->nc_dotdottime = ncp_ts->nc_dotdottime;
 					n2_ts->nc_nc.nc_flag |= NCF_DTS;
 				}
 			}
 #endif
 			SDT_PROBE3(vfs, namecache, enter, duplicate, dvp, ncp->nc_name,
 			    vp);
 			goto out_unlock_free;
 		}
 	}
 
 	if (flag == NCF_ISDOTDOT) {
 		/*
 		 * See if we are trying to add .. entry, but some other lookup
 		 * has populated v_cache_dd pointer already.
 		 */
 		if (dvp->v_cache_dd != NULL)
 			goto out_unlock_free;
 		KASSERT(vp == NULL || vp->v_type == VDIR,
 		    ("wrong vnode type %p", vp));
 		atomic_thread_fence_rel();
 		atomic_store_ptr(&dvp->v_cache_dd, ncp);
 	}
 
 	if (vp != NULL) {
 		if (flag != NCF_ISDOTDOT) {
 			/*
 			 * For this case, the cache entry maps both the
 			 * directory name in it and the name ".." for the
 			 * directory's parent.
 			 */
 			if ((ndd = vp->v_cache_dd) != NULL) {
 				if ((ndd->nc_flag & NCF_ISDOTDOT) != 0)
 					cache_zap_locked(ndd);
 				else
 					ndd = NULL;
 			}
 			atomic_thread_fence_rel();
 			atomic_store_ptr(&vp->v_cache_dd, ncp);
 		} else if (vp->v_type != VDIR) {
 			if (vp->v_cache_dd != NULL) {
 				atomic_store_ptr(&vp->v_cache_dd, NULL);
 			}
 		}
 	}
 
 	if (flag != NCF_ISDOTDOT) {
 		if (LIST_EMPTY(&dvp->v_cache_src)) {
 			cache_hold_vnode(dvp);
 		}
 		LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
 	}
 
 	/*
 	 * If the entry is "negative", we place it into the
 	 * "negative" cache queue, otherwise, we place it into the
 	 * destination vnode's cache entries queue.
 	 */
 	if (vp != NULL) {
 		TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst);
 		SDT_PROBE3(vfs, namecache, enter, done, dvp, ncp->nc_name,
 		    vp);
 	} else {
 		if (cnp->cn_flags & ISWHITEOUT)
 			atomic_store_char(&ncp->nc_flag, ncp->nc_flag | NCF_WHITE);
 		cache_neg_insert(ncp);
 		SDT_PROBE2(vfs, namecache, enter_negative, done, dvp,
 		    ncp->nc_name);
 	}
 
 	/*
 	 * Insert the new namecache entry into the appropriate chain
 	 * within the cache entries table.
 	 */
 	CK_SLIST_INSERT_HEAD(ncpp, ncp, nc_hash);
 
 	atomic_thread_fence_rel();
 	/*
 	 * Mark the entry as fully constructed.
 	 * It is immutable past this point until its removal.
 	 */
 	atomic_store_char(&ncp->nc_flag, ncp->nc_flag & ~NCF_WIP);
 
 	cache_enter_unlock(&cel);
 	if (ndd != NULL)
 		cache_free(ndd);
 	return;
 out_unlock_free:
 	cache_enter_unlock(&cel);
 	cache_free(ncp);
 	return;
 }
 
 /*
  * A variant of the above accepting flags.
  *
  * - VFS_CACHE_DROPOLD -- if a conflicting entry is found, drop it.
  *
  * TODO: this routine is a hack. It blindly removes the old entry, even if it
  * happens to match and it is doing it in an inefficient manner. It was added
  * to accommodate NFS which runs into a case where the target for a given name
  * may change from under it. Note this does nothing to solve the following
  * race: 2 callers of cache_enter_time_flags pass a different target vnode for
  * the same [dvp, cnp]. It may be argued that code doing this is broken.
  */
 void
 cache_enter_time_flags(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
     struct timespec *tsp, struct timespec *dtsp, int flags)
 {
 
 	MPASS((flags & ~(VFS_CACHE_DROPOLD)) == 0);
 
 	if (flags & VFS_CACHE_DROPOLD)
 		cache_remove_cnp(dvp, cnp);
 	cache_enter_time(dvp, vp, cnp, tsp, dtsp);
 }
 
 static u_int
 cache_roundup_2(u_int val)
 {
 	u_int res;
 
 	for (res = 1; res <= val; res <<= 1)
 		continue;
 
 	return (res);
 }
 
 static struct nchashhead *
 nchinittbl(u_long elements, u_long *hashmask)
 {
 	struct nchashhead *hashtbl;
 	u_long hashsize, i;
 
 	hashsize = cache_roundup_2(elements) / 2;
 
 	hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), M_VFSCACHE, M_WAITOK);
 	for (i = 0; i < hashsize; i++)
 		CK_SLIST_INIT(&hashtbl[i]);
 	*hashmask = hashsize - 1;
 	return (hashtbl);
 }
 
 static void
 ncfreetbl(struct nchashhead *hashtbl)
 {
 
 	free(hashtbl, M_VFSCACHE);
 }
 
 /*
  * Name cache initialization, from vfs_init() when we are booting
  */
 static void
 nchinit(void *dummy __unused)
 {
 	u_int i;
 
 	cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL_SIZE,
 	    NULL, NULL, NULL, NULL, CACHE_ZONE_ALIGNMENT, UMA_ZONE_ZINIT);
 	cache_zone_small_ts = uma_zcreate("STS VFS Cache", CACHE_ZONE_SMALL_TS_SIZE,
 	    NULL, NULL, NULL, NULL, CACHE_ZONE_ALIGNMENT, UMA_ZONE_ZINIT);
 	cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE_SIZE,
 	    NULL, NULL, NULL, NULL, CACHE_ZONE_ALIGNMENT, UMA_ZONE_ZINIT);
 	cache_zone_large_ts = uma_zcreate("LTS VFS Cache", CACHE_ZONE_LARGE_TS_SIZE,
 	    NULL, NULL, NULL, NULL, CACHE_ZONE_ALIGNMENT, UMA_ZONE_ZINIT);
 
 	VFS_SMR_ZONE_SET(cache_zone_small);
 	VFS_SMR_ZONE_SET(cache_zone_small_ts);
 	VFS_SMR_ZONE_SET(cache_zone_large);
 	VFS_SMR_ZONE_SET(cache_zone_large_ts);
 
 	ncsize = desiredvnodes * ncsizefactor;
 	cache_recalc_neg_min(ncnegminpct);
 	nchashtbl = nchinittbl(desiredvnodes * 2, &nchash);
 	ncbuckethash = cache_roundup_2(mp_ncpus * mp_ncpus) - 1;
 	if (ncbuckethash < 7) /* arbitrarily chosen to avoid having one lock */
 		ncbuckethash = 7;
 	if (ncbuckethash > nchash)
 		ncbuckethash = nchash;
 	bucketlocks = malloc(sizeof(*bucketlocks) * numbucketlocks, M_VFSCACHE,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < numbucketlocks; i++)
 		mtx_init(&bucketlocks[i], "ncbuc", NULL, MTX_DUPOK | MTX_RECURSE);
 	ncvnodehash = ncbuckethash;
 	vnodelocks = malloc(sizeof(*vnodelocks) * numvnodelocks, M_VFSCACHE,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < numvnodelocks; i++)
 		mtx_init(&vnodelocks[i], "ncvn", NULL, MTX_DUPOK | MTX_RECURSE);
 
 	for (i = 0; i < numneglists; i++) {
 		mtx_init(&neglists[i].nl_evict_lock, "ncnege", NULL, MTX_DEF);
 		mtx_init(&neglists[i].nl_lock, "ncnegl", NULL, MTX_DEF);
 		TAILQ_INIT(&neglists[i].nl_list);
 		TAILQ_INIT(&neglists[i].nl_hotlist);
 	}
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
 
 void
 cache_vnode_init(struct vnode *vp)
 {
 
 	LIST_INIT(&vp->v_cache_src);
 	TAILQ_INIT(&vp->v_cache_dst);
 	vp->v_cache_dd = NULL;
 	cache_prehash(vp);
 }
 
 /*
  * Induce transient cache misses for lockless operation in cache_lookup() by
  * using a temporary hash table.
  *
  * This will force a fs lookup.
  *
  * Synchronisation is done in 2 steps, calling vfs_smr_synchronize each time
  * to observe all CPUs not performing the lookup.
  */
 static void
 cache_changesize_set_temp(struct nchashhead *temptbl, u_long temphash)
 {
 
 	MPASS(temphash < nchash);
 	/*
 	 * Change the size. The new size is smaller and can safely be used
 	 * against the existing table. All lookups which now hash wrong will
 	 * result in a cache miss, which all callers are supposed to know how
 	 * to handle.
 	 */
 	atomic_store_long(&nchash, temphash);
 	atomic_thread_fence_rel();
 	vfs_smr_synchronize();
 	/*
 	 * At this point everyone sees the updated hash value, but they still
 	 * see the old table.
 	 */
 	atomic_store_ptr(&nchashtbl, temptbl);
 	atomic_thread_fence_rel();
 	vfs_smr_synchronize();
 	/*
 	 * At this point everyone sees the updated table pointer and size pair.
 	 */
 }
 
 /*
  * Set the new hash table.
  *
  * Similarly to cache_changesize_set_temp(), this has to synchronize against
  * lockless operation in cache_lookup().
  */
 static void
 cache_changesize_set_new(struct nchashhead *new_tbl, u_long new_hash)
 {
 
 	MPASS(nchash < new_hash);
 	/*
 	 * Change the pointer first. This wont result in out of bounds access
 	 * since the temporary table is guaranteed to be smaller.
 	 */
 	atomic_store_ptr(&nchashtbl, new_tbl);
 	atomic_thread_fence_rel();
 	vfs_smr_synchronize();
 	/*
 	 * At this point everyone sees the updated pointer value, but they
 	 * still see the old size.
 	 */
 	atomic_store_long(&nchash, new_hash);
 	atomic_thread_fence_rel();
 	vfs_smr_synchronize();
 	/*
 	 * At this point everyone sees the updated table pointer and size pair.
 	 */
 }
 
 void
 cache_changesize(u_long newmaxvnodes)
 {
 	struct nchashhead *new_nchashtbl, *old_nchashtbl, *temptbl;
 	u_long new_nchash, old_nchash, temphash;
 	struct namecache *ncp;
 	uint32_t hash;
 	u_long newncsize;
 	int i;
 
 	newncsize = newmaxvnodes * ncsizefactor;
 	newmaxvnodes = cache_roundup_2(newmaxvnodes * 2);
 	if (newmaxvnodes < numbucketlocks)
 		newmaxvnodes = numbucketlocks;
 
 	new_nchashtbl = nchinittbl(newmaxvnodes, &new_nchash);
 	/* If same hash table size, nothing to do */
 	if (nchash == new_nchash) {
 		ncfreetbl(new_nchashtbl);
 		return;
 	}
 
 	temptbl = nchinittbl(1, &temphash);
 
 	/*
 	 * Move everything from the old hash table to the new table.
 	 * None of the namecache entries in the table can be removed
 	 * because to do so, they have to be removed from the hash table.
 	 */
 	cache_lock_all_vnodes();
 	cache_lock_all_buckets();
 	old_nchashtbl = nchashtbl;
 	old_nchash = nchash;
 	cache_changesize_set_temp(temptbl, temphash);
 	for (i = 0; i <= old_nchash; i++) {
 		while ((ncp = CK_SLIST_FIRST(&old_nchashtbl[i])) != NULL) {
 			hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen,
 			    ncp->nc_dvp);
 			CK_SLIST_REMOVE(&old_nchashtbl[i], ncp, namecache, nc_hash);
 			CK_SLIST_INSERT_HEAD(&new_nchashtbl[hash & new_nchash], ncp, nc_hash);
 		}
 	}
 	ncsize = newncsize;
 	cache_recalc_neg_min(ncnegminpct);
 	cache_changesize_set_new(new_nchashtbl, new_nchash);
 	cache_unlock_all_buckets();
 	cache_unlock_all_vnodes();
 	ncfreetbl(old_nchashtbl);
 	ncfreetbl(temptbl);
 }
 
 /*
  * Remove all entries from and to a particular vnode.
  */
 static void
 cache_purge_impl(struct vnode *vp)
 {
 	struct cache_freebatch batch;
 	struct namecache *ncp;
 	struct mtx *vlp, *vlp2;
 
 	TAILQ_INIT(&batch);
 	vlp = VP2VNODELOCK(vp);
 	vlp2 = NULL;
 	mtx_lock(vlp);
 retry:
 	while (!LIST_EMPTY(&vp->v_cache_src)) {
 		ncp = LIST_FIRST(&vp->v_cache_src);
 		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
 			goto retry;
 		TAILQ_INSERT_TAIL(&batch, ncp, nc_dst);
 	}
 	while (!TAILQ_EMPTY(&vp->v_cache_dst)) {
 		ncp = TAILQ_FIRST(&vp->v_cache_dst);
 		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
 			goto retry;
 		TAILQ_INSERT_TAIL(&batch, ncp, nc_dst);
 	}
 	ncp = vp->v_cache_dd;
 	if (ncp != NULL) {
 		KASSERT(ncp->nc_flag & NCF_ISDOTDOT,
 		   ("lost dotdot link"));
 		if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2))
 			goto retry;
 		TAILQ_INSERT_TAIL(&batch, ncp, nc_dst);
 	}
 	KASSERT(vp->v_cache_dd == NULL, ("incomplete purge"));
 	mtx_unlock(vlp);
 	if (vlp2 != NULL)
 		mtx_unlock(vlp2);
 	cache_free_batch(&batch);
 }
 
 /*
  * Opportunistic check to see if there is anything to do.
  */
 static bool
 cache_has_entries(struct vnode *vp)
 {
 
 	if (LIST_EMPTY(&vp->v_cache_src) && TAILQ_EMPTY(&vp->v_cache_dst) &&
 	    atomic_load_ptr(&vp->v_cache_dd) == NULL)
 		return (false);
 	return (true);
 }
 
 void
 cache_purge(struct vnode *vp)
 {
 
 	SDT_PROBE1(vfs, namecache, purge, done, vp);
 	if (!cache_has_entries(vp))
 		return;
 	cache_purge_impl(vp);
 }
 
 /*
  * Only to be used by vgone.
  */
 void
 cache_purge_vgone(struct vnode *vp)
 {
 	struct mtx *vlp;
 
 	VNPASS(VN_IS_DOOMED(vp), vp);
 	if (cache_has_entries(vp)) {
 		cache_purge_impl(vp);
 		return;
 	}
 
 	/*
 	 * Serialize against a potential thread doing cache_purge.
 	 */
 	vlp = VP2VNODELOCK(vp);
 	mtx_wait_unlocked(vlp);
 	if (cache_has_entries(vp)) {
 		cache_purge_impl(vp);
 		return;
 	}
 	return;
 }
 
 /*
  * Remove all negative entries for a particular directory vnode.
  */
 void
 cache_purge_negative(struct vnode *vp)
 {
 	struct cache_freebatch batch;
 	struct namecache *ncp, *nnp;
 	struct mtx *vlp;
 
 	SDT_PROBE1(vfs, namecache, purge_negative, done, vp);
 	if (LIST_EMPTY(&vp->v_cache_src))
 		return;
 	TAILQ_INIT(&batch);
 	vlp = VP2VNODELOCK(vp);
 	mtx_lock(vlp);
 	LIST_FOREACH_SAFE(ncp, &vp->v_cache_src, nc_src, nnp) {
 		if (!(ncp->nc_flag & NCF_NEGATIVE))
 			continue;
 		cache_zap_negative_locked_vnode_kl(ncp, vp);
 		TAILQ_INSERT_TAIL(&batch, ncp, nc_dst);
 	}
 	mtx_unlock(vlp);
 	cache_free_batch(&batch);
 }
 
 /*
  * Entry points for modifying VOP operations.
  */
 void
 cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
     struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp)
 {
 
 	ASSERT_VOP_IN_SEQC(fdvp);
 	ASSERT_VOP_IN_SEQC(fvp);
 	ASSERT_VOP_IN_SEQC(tdvp);
 	if (tvp != NULL)
 		ASSERT_VOP_IN_SEQC(tvp);
 
 	cache_purge(fvp);
 	if (tvp != NULL) {
 		cache_purge(tvp);
 		KASSERT(!cache_remove_cnp(tdvp, tcnp),
 		    ("%s: lingering negative entry", __func__));
 	} else {
 		cache_remove_cnp(tdvp, tcnp);
 	}
 
 	/*
 	 * TODO
 	 *
 	 * Historically renaming was always purging all revelang entries,
 	 * but that's quite wasteful. In particular turns out that in many cases
 	 * the target file is immediately accessed after rename, inducing a cache
 	 * miss.
 	 *
 	 * Recode this to reduce relocking and reuse the existing entry (if any)
 	 * instead of just removing it above and allocating a new one here.
 	 */
 	cache_enter(tdvp, fvp, tcnp);
 }
 
 void
 cache_vop_rmdir(struct vnode *dvp, struct vnode *vp)
 {
 
 	ASSERT_VOP_IN_SEQC(dvp);
 	ASSERT_VOP_IN_SEQC(vp);
 	cache_purge(vp);
 }
 
 #ifdef INVARIANTS
 /*
  * Validate that if an entry exists it matches.
  */
 void
 cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
 	struct namecache *ncp;
 	struct mtx *blp;
 	uint32_t hash;
 
 	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
 	if (CK_SLIST_EMPTY(NCHHASH(hash)))
 		return;
 	blp = HASH2BUCKETLOCK(hash);
 	mtx_lock(blp);
 	CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) {
 			if (ncp->nc_vp != vp)
 				panic("%s: mismatch (%p != %p); ncp %p [%s] dvp %p\n",
 				    __func__, vp, ncp->nc_vp, ncp, ncp->nc_name, ncp->nc_dvp);
 		}
 	}
 	mtx_unlock(blp);
 }
 #endif
 
 /*
  * Flush all entries referencing a particular filesystem.
  */
 void
 cache_purgevfs(struct mount *mp)
 {
 	struct vnode *vp, *mvp;
 	size_t visited, purged;
 
 	visited = purged = 0;
 	/*
 	 * Somewhat wasteful iteration over all vnodes. Would be better to
 	 * support filtering and avoid the interlock to begin with.
 	 */
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		visited++;
 		if (!cache_has_entries(vp)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		vholdl(vp);
 		VI_UNLOCK(vp);
 		cache_purge(vp);
 		purged++;
 		vdrop(vp);
 	}
 
 	SDT_PROBE3(vfs, namecache, purgevfs, done, mp, visited, purged);
 }
 
 /*
  * Perform canonical checks and cache lookup and pass on to filesystem
  * through the vop_cachedlookup only if needed.
  */
 
 int
 vfs_cache_lookup(struct vop_lookup_args *ap)
 {
 	struct vnode *dvp;
 	int error;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	int flags = cnp->cn_flags;
 
 	*vpp = NULL;
 	dvp = ap->a_dvp;
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 
 	error = vn_dir_check_exec(dvp, cnp);
 	if (error != 0)
 		return (error);
 
 	error = cache_lookup(dvp, vpp, cnp, NULL, NULL);
 	if (error == 0)
 		return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /* Implementation of the getcwd syscall. */
 int
 sys___getcwd(struct thread *td, struct __getcwd_args *uap)
 {
 	char *buf, *retbuf;
 	size_t buflen;
 	int error;
 
 	buflen = uap->buflen;
 	if (__predict_false(buflen < 2))
 		return (EINVAL);
 	if (buflen > MAXPATHLEN)
 		buflen = MAXPATHLEN;
 
 	buf = uma_zalloc(namei_zone, M_WAITOK);
 	error = vn_getcwd(buf, &retbuf, &buflen);
 	if (error == 0)
 		error = copyout(retbuf, uap->buf, buflen);
 	uma_zfree(namei_zone, buf);
 	return (error);
 }
 
 int
 vn_getcwd(char *buf, char **retbuf, size_t *buflen)
 {
 	struct pwd *pwd;
 	int error;
 
 	vfs_smr_enter();
 	pwd = pwd_get_smr();
 	error = vn_fullpath_any_smr(pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf,
 	    buflen, 0);
 	VFS_SMR_ASSERT_NOT_ENTERED();
 	if (error < 0) {
 		pwd = pwd_hold(curthread);
 		error = vn_fullpath_any(pwd->pwd_cdir, pwd->pwd_rdir, buf,
 		    retbuf, buflen);
 		pwd_drop(pwd);
 	}
 
 #ifdef KTRACE
 	if (KTRPOINT(curthread, KTR_NAMEI) && error == 0)
 		ktrnamei(*retbuf);
 #endif
 	return (error);
 }
 
 /*
  * Canonicalize a path by walking it forward and back.
  *
  * BUGS:
  * - Nothing guarantees the integrity of the entire chain. Consider the case
  *   where the path "foo/bar/baz/qux" is passed, but "bar" is moved out of
  *   "foo" into "quux" during the backwards walk. The result will be
  *   "quux/bar/baz/qux", which could not have been obtained by an incremental
  *   walk in userspace. Moreover, the path we return is inaccessible if the
  *   calling thread lacks permission to traverse "quux".
  */
 static int
 kern___realpathat(struct thread *td, int fd, const char *path, char *buf,
     size_t size, int flags, enum uio_seg pathseg)
 {
 	struct nameidata nd;
 	char *retbuf, *freebuf;
 	int error;
 
 	if (flags != 0)
 		return (EINVAL);
-	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | SAVENAME | WANTPARENT | AUDITVNODE1,
+	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | WANTPARENT | AUDITVNODE1,
 	    pathseg, path, fd, &cap_fstat_rights);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	error = vn_fullpath_hardlink(nd.ni_vp, nd.ni_dvp, nd.ni_cnd.cn_nameptr,
 	    nd.ni_cnd.cn_namelen, &retbuf, &freebuf, &size);
 	if (error == 0) {
 		error = copyout(retbuf, buf, size);
 		free(freebuf, M_TEMP);
 	}
 	NDFREE(&nd, 0);
 	return (error);
 }
 
 int
 sys___realpathat(struct thread *td, struct __realpathat_args *uap)
 {
 
 	return (kern___realpathat(td, uap->fd, uap->path, uap->buf, uap->size,
 	    uap->flags, UIO_USERSPACE));
 }
 
 /*
  * Retrieve the full filesystem path that correspond to a vnode from the name
  * cache (if available)
  */
 int
 vn_fullpath(struct vnode *vp, char **retbuf, char **freebuf)
 {
 	struct pwd *pwd;
 	char *buf;
 	size_t buflen;
 	int error;
 
 	if (__predict_false(vp == NULL))
 		return (EINVAL);
 
 	buflen = MAXPATHLEN;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	vfs_smr_enter();
 	pwd = pwd_get_smr();
 	error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, &buflen, 0);
 	VFS_SMR_ASSERT_NOT_ENTERED();
 	if (error < 0) {
 		pwd = pwd_hold(curthread);
 		error = vn_fullpath_any(vp, pwd->pwd_rdir, buf, retbuf, &buflen);
 		pwd_drop(pwd);
 	}
 	if (error == 0)
 		*freebuf = buf;
 	else
 		free(buf, M_TEMP);
 	return (error);
 }
 
 /*
  * This function is similar to vn_fullpath, but it attempts to lookup the
  * pathname relative to the global root mount point.  This is required for the
  * auditing sub-system, as audited pathnames must be absolute, relative to the
  * global root mount point.
  */
 int
 vn_fullpath_global(struct vnode *vp, char **retbuf, char **freebuf)
 {
 	char *buf;
 	size_t buflen;
 	int error;
 
 	if (__predict_false(vp == NULL))
 		return (EINVAL);
 	buflen = MAXPATHLEN;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 	vfs_smr_enter();
 	error = vn_fullpath_any_smr(vp, rootvnode, buf, retbuf, &buflen, 0);
 	VFS_SMR_ASSERT_NOT_ENTERED();
 	if (error < 0) {
 		error = vn_fullpath_any(vp, rootvnode, buf, retbuf, &buflen);
 	}
 	if (error == 0)
 		*freebuf = buf;
 	else
 		free(buf, M_TEMP);
 	return (error);
 }
 
 static struct namecache *
 vn_dd_from_dst(struct vnode *vp)
 {
 	struct namecache *ncp;
 
 	cache_assert_vnode_locked(vp);
 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			return (ncp);
 	}
 	return (NULL);
 }
 
 int
 vn_vptocnp(struct vnode **vp, char *buf, size_t *buflen)
 {
 	struct vnode *dvp;
 	struct namecache *ncp;
 	struct mtx *vlp;
 	int error;
 
 	vlp = VP2VNODELOCK(*vp);
 	mtx_lock(vlp);
 	ncp = (*vp)->v_cache_dd;
 	if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT) == 0) {
 		KASSERT(ncp == vn_dd_from_dst(*vp),
 		    ("%s: mismatch for dd entry (%p != %p)", __func__,
 		    ncp, vn_dd_from_dst(*vp)));
 	} else {
 		ncp = vn_dd_from_dst(*vp);
 	}
 	if (ncp != NULL) {
 		if (*buflen < ncp->nc_nlen) {
 			mtx_unlock(vlp);
 			vrele(*vp);
 			counter_u64_add(numfullpathfail4, 1);
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
 			    vp, NULL);
 			return (error);
 		}
 		*buflen -= ncp->nc_nlen;
 		memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen);
 		SDT_PROBE3(vfs, namecache, fullpath, hit, ncp->nc_dvp,
 		    ncp->nc_name, vp);
 		dvp = *vp;
 		*vp = ncp->nc_dvp;
 		vref(*vp);
 		mtx_unlock(vlp);
 		vrele(dvp);
 		return (0);
 	}
 	SDT_PROBE1(vfs, namecache, fullpath, miss, vp);
 
 	mtx_unlock(vlp);
 	vn_lock(*vp, LK_SHARED | LK_RETRY);
 	error = VOP_VPTOCNP(*vp, &dvp, buf, buflen);
 	vput(*vp);
 	if (error) {
 		counter_u64_add(numfullpathfail2, 1);
 		SDT_PROBE3(vfs, namecache, fullpath, return,  error, vp, NULL);
 		return (error);
 	}
 
 	*vp = dvp;
 	if (VN_IS_DOOMED(dvp)) {
 		/* forced unmount */
 		vrele(dvp);
 		error = ENOENT;
 		SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL);
 		return (error);
 	}
 	/*
 	 * *vp has its use count incremented still.
 	 */
 
 	return (0);
 }
 
 /*
  * Resolve a directory to a pathname.
  *
  * The name of the directory can always be found in the namecache or fetched
  * from the filesystem. There is also guaranteed to be only one parent, meaning
  * we can just follow vnodes up until we find the root.
  *
  * The vnode must be referenced.
  */
 static int
 vn_fullpath_dir(struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf,
     size_t *len, size_t addend)
 {
 #ifdef KDTRACE_HOOKS
 	struct vnode *startvp = vp;
 #endif
 	struct vnode *vp1;
 	size_t buflen;
 	int error;
 	bool slash_prefixed;
 
 	VNPASS(vp->v_type == VDIR || VN_IS_DOOMED(vp), vp);
 	VNPASS(vp->v_usecount > 0, vp);
 
 	buflen = *len;
 
 	slash_prefixed = true;
 	if (addend == 0) {
 		MPASS(*len >= 2);
 		buflen--;
 		buf[buflen] = '\0';
 		slash_prefixed = false;
 	}
 
 	error = 0;
 
 	SDT_PROBE1(vfs, namecache, fullpath, entry, vp);
 	counter_u64_add(numfullpathcalls, 1);
 	while (vp != rdir && vp != rootvnode) {
 		/*
 		 * The vp vnode must be already fully constructed,
 		 * since it is either found in namecache or obtained
 		 * from VOP_VPTOCNP().  We may test for VV_ROOT safely
 		 * without obtaining the vnode lock.
 		 */
 		if ((vp->v_vflag & VV_ROOT) != 0) {
 			vn_lock(vp, LK_RETRY | LK_SHARED);
 
 			/*
 			 * With the vnode locked, check for races with
 			 * unmount, forced or not.  Note that we
 			 * already verified that vp is not equal to
 			 * the root vnode, which means that
 			 * mnt_vnodecovered can be NULL only for the
 			 * case of unmount.
 			 */
 			if (VN_IS_DOOMED(vp) ||
 			    (vp1 = vp->v_mount->mnt_vnodecovered) == NULL ||
 			    vp1->v_mountedhere != vp->v_mount) {
 				vput(vp);
 				error = ENOENT;
 				SDT_PROBE3(vfs, namecache, fullpath, return,
 				    error, vp, NULL);
 				break;
 			}
 
 			vref(vp1);
 			vput(vp);
 			vp = vp1;
 			continue;
 		}
 		if (vp->v_type != VDIR) {
 			vrele(vp);
 			counter_u64_add(numfullpathfail1, 1);
 			error = ENOTDIR;
 			SDT_PROBE3(vfs, namecache, fullpath, return,
 			    error, vp, NULL);
 			break;
 		}
 		error = vn_vptocnp(&vp, buf, &buflen);
 		if (error)
 			break;
 		if (buflen == 0) {
 			vrele(vp);
 			error = ENOMEM;
 			SDT_PROBE3(vfs, namecache, fullpath, return, error,
 			    startvp, NULL);
 			break;
 		}
 		buf[--buflen] = '/';
 		slash_prefixed = true;
 	}
 	if (error)
 		return (error);
 	if (!slash_prefixed) {
 		if (buflen == 0) {
 			vrele(vp);
 			counter_u64_add(numfullpathfail4, 1);
 			SDT_PROBE3(vfs, namecache, fullpath, return, ENOMEM,
 			    startvp, NULL);
 			return (ENOMEM);
 		}
 		buf[--buflen] = '/';
 	}
 	counter_u64_add(numfullpathfound, 1);
 	vrele(vp);
 
 	*retbuf = buf + buflen;
 	SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, *retbuf);
 	*len -= buflen;
 	*len += addend;
 	return (0);
 }
 
 /*
  * Resolve an arbitrary vnode to a pathname.
  *
  * Note 2 caveats:
  * - hardlinks are not tracked, thus if the vnode is not a directory this can
  *   resolve to a different path than the one used to find it
  * - namecache is not mandatory, meaning names are not guaranteed to be added
  *   (in which case resolving fails)
  */
 static void __inline
 cache_rev_failed_impl(int *reason, int line)
 {
 
 	*reason = line;
 }
 #define cache_rev_failed(var)	cache_rev_failed_impl((var), __LINE__)
 
 static int
 vn_fullpath_any_smr(struct vnode *vp, struct vnode *rdir, char *buf,
     char **retbuf, size_t *buflen, size_t addend)
 {
 #ifdef KDTRACE_HOOKS
 	struct vnode *startvp = vp;
 #endif
 	struct vnode *tvp;
 	struct mount *mp;
 	struct namecache *ncp;
 	size_t orig_buflen;
 	int reason;
 	int error;
 #ifdef KDTRACE_HOOKS
 	int i;
 #endif
 	seqc_t vp_seqc, tvp_seqc;
 	u_char nc_flag;
 
 	VFS_SMR_ASSERT_ENTERED();
 
 	if (!atomic_load_char(&cache_fast_lookup_enabled)) {
 		vfs_smr_exit();
 		return (-1);
 	}
 
 	orig_buflen = *buflen;
 
 	if (addend == 0) {
 		MPASS(*buflen >= 2);
 		*buflen -= 1;
 		buf[*buflen] = '\0';
 	}
 
 	if (vp == rdir || vp == rootvnode) {
 		if (addend == 0) {
 			*buflen -= 1;
 			buf[*buflen] = '/';
 		}
 		goto out_ok;
 	}
 
 #ifdef KDTRACE_HOOKS
 	i = 0;
 #endif
 	error = -1;
 	ncp = NULL; /* for sdt probe down below */
 	vp_seqc = vn_seqc_read_any(vp);
 	if (seqc_in_modify(vp_seqc)) {
 		cache_rev_failed(&reason);
 		goto out_abort;
 	}
 
 	for (;;) {
 #ifdef KDTRACE_HOOKS
 		i++;
 #endif
 		if ((vp->v_vflag & VV_ROOT) != 0) {
 			mp = atomic_load_ptr(&vp->v_mount);
 			if (mp == NULL) {
 				cache_rev_failed(&reason);
 				goto out_abort;
 			}
 			tvp = atomic_load_ptr(&mp->mnt_vnodecovered);
 			tvp_seqc = vn_seqc_read_any(tvp);
 			if (seqc_in_modify(tvp_seqc)) {
 				cache_rev_failed(&reason);
 				goto out_abort;
 			}
 			if (!vn_seqc_consistent(vp, vp_seqc)) {
 				cache_rev_failed(&reason);
 				goto out_abort;
 			}
 			vp = tvp;
 			vp_seqc = tvp_seqc;
 			continue;
 		}
 		ncp = atomic_load_consume_ptr(&vp->v_cache_dd);
 		if (ncp == NULL) {
 			cache_rev_failed(&reason);
 			goto out_abort;
 		}
 		nc_flag = atomic_load_char(&ncp->nc_flag);
 		if ((nc_flag & NCF_ISDOTDOT) != 0) {
 			cache_rev_failed(&reason);
 			goto out_abort;
 		}
 		if (ncp->nc_nlen >= *buflen) {
 			cache_rev_failed(&reason);
 			error = ENOMEM;
 			goto out_abort;
 		}
 		*buflen -= ncp->nc_nlen;
 		memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen);
 		*buflen -= 1;
 		buf[*buflen] = '/';
 		tvp = ncp->nc_dvp;
 		tvp_seqc = vn_seqc_read_any(tvp);
 		if (seqc_in_modify(tvp_seqc)) {
 			cache_rev_failed(&reason);
 			goto out_abort;
 		}
 		if (!vn_seqc_consistent(vp, vp_seqc)) {
 			cache_rev_failed(&reason);
 			goto out_abort;
 		}
 		/*
 		 * Acquire fence provided by vn_seqc_read_any above.
 		 */
 		if (__predict_false(atomic_load_ptr(&vp->v_cache_dd) != ncp)) {
 			cache_rev_failed(&reason);
 			goto out_abort;
 		}
 		if (!cache_ncp_canuse(ncp)) {
 			cache_rev_failed(&reason);
 			goto out_abort;
 		}
 		vp = tvp;
 		vp_seqc = tvp_seqc;
 		if (vp == rdir || vp == rootvnode)
 			break;
 	}
 out_ok:
 	vfs_smr_exit();
 	*retbuf = buf + *buflen;
 	*buflen = orig_buflen - *buflen + addend;
 	SDT_PROBE2(vfs, namecache, fullpath_smr, hit, startvp, *retbuf);
 	return (0);
 
 out_abort:
 	*buflen = orig_buflen;
 	SDT_PROBE4(vfs, namecache, fullpath_smr, miss, startvp, ncp, reason, i);
 	vfs_smr_exit();
 	return (error);
 }
 
 static int
 vn_fullpath_any(struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf,
     size_t *buflen)
 {
 	size_t orig_buflen, addend;
 	int error;
 
 	if (*buflen < 2)
 		return (EINVAL);
 
 	orig_buflen = *buflen;
 
 	vref(vp);
 	addend = 0;
 	if (vp->v_type != VDIR) {
 		*buflen -= 1;
 		buf[*buflen] = '\0';
 		error = vn_vptocnp(&vp, buf, buflen);
 		if (error)
 			return (error);
 		if (*buflen == 0) {
 			vrele(vp);
 			return (ENOMEM);
 		}
 		*buflen -= 1;
 		buf[*buflen] = '/';
 		addend = orig_buflen - *buflen;
 	}
 
 	return (vn_fullpath_dir(vp, rdir, buf, retbuf, buflen, addend));
 }
 
 /*
  * Resolve an arbitrary vnode to a pathname (taking care of hardlinks).
  *
- * Since the namecache does not track hardlinks, the caller is
- * expected to first look up the target vnode with SAVENAME |
- * WANTPARENT flags passed to namei to get dvp and vp.
+ * Since the namecache does not track hardlinks, the caller is expected to
+ * first look up the target vnode with WANTPARENT flag passed to namei to get
+ * dvp and vp.
  *
  * Then we have 2 cases:
  * - if the found vnode is a directory, the path can be constructed just by
  *   following names up the chain
  * - otherwise we populate the buffer with the saved name and start resolving
  *   from the parent
  */
 int
 vn_fullpath_hardlink(struct vnode *vp, struct vnode *dvp,
     const char *hrdl_name, size_t hrdl_name_length,
     char **retbuf, char **freebuf, size_t *buflen)
 {
 	char *buf, *tmpbuf;
 	struct pwd *pwd;
 	size_t addend;
 	int error;
 	enum vtype type;
 
 	if (*buflen < 2)
 		return (EINVAL);
 	if (*buflen > MAXPATHLEN)
 		*buflen = MAXPATHLEN;
 
 	buf = malloc(*buflen, M_TEMP, M_WAITOK);
 
 	addend = 0;
 
 	/*
 	 * Check for VBAD to work around the vp_crossmp bug in lookup().
 	 *
 	 * For example consider tmpfs on /tmp and realpath /tmp. ni_vp will be
 	 * set to mount point's root vnode while ni_dvp will be vp_crossmp.
 	 * If the type is VDIR (like in this very case) we can skip looking
 	 * at ni_dvp in the first place. However, since vnodes get passed here
 	 * unlocked the target may transition to doomed state (type == VBAD)
 	 * before we get to evaluate the condition. If this happens, we will
 	 * populate part of the buffer and descend to vn_fullpath_dir with
 	 * vp == vp_crossmp. Prevent the problem by checking for VBAD.
 	 *
 	 * This should be atomic_load(&vp->v_type) but it is illegal to take
 	 * an address of a bit field, even if said field is sized to char.
 	 * Work around the problem by reading the value into a full-sized enum
 	 * and then re-reading it with atomic_load which will still prevent
 	 * the compiler from re-reading down the road.
 	 */
 	type = vp->v_type;
 	type = atomic_load_int(&type);
 	if (type == VBAD) {
 		error = ENOENT;
 		goto out_bad;
 	}
 	if (type != VDIR) {
 		addend = hrdl_name_length + 2;
 		if (*buflen < addend) {
 			error = ENOMEM;
 			goto out_bad;
 		}
 		*buflen -= addend;
 		tmpbuf = buf + *buflen;
 		tmpbuf[0] = '/';
 		memcpy(&tmpbuf[1], hrdl_name, hrdl_name_length);
 		tmpbuf[addend - 1] = '\0';
 		vp = dvp;
 	}
 
 	vfs_smr_enter();
 	pwd = pwd_get_smr();
 	error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, buflen,
 	    addend);
 	VFS_SMR_ASSERT_NOT_ENTERED();
 	if (error < 0) {
 		pwd = pwd_hold(curthread);
 		vref(vp);
 		error = vn_fullpath_dir(vp, pwd->pwd_rdir, buf, retbuf, buflen,
 		    addend);
 		pwd_drop(pwd);
 	}
 	if (error != 0)
 		goto out_bad;
 
 	*freebuf = buf;
 
 	return (0);
 out_bad:
 	free(buf, M_TEMP);
 	return (error);
 }
 
 struct vnode *
 vn_dir_dd_ino(struct vnode *vp)
 {
 	struct namecache *ncp;
 	struct vnode *ddvp;
 	struct mtx *vlp;
 	enum vgetstate vs;
 
 	ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino");
 	vlp = VP2VNODELOCK(vp);
 	mtx_lock(vlp);
 	TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) {
 		if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
 			continue;
 		ddvp = ncp->nc_dvp;
 		vs = vget_prep(ddvp);
 		mtx_unlock(vlp);
 		if (vget_finish(ddvp, LK_SHARED | LK_NOWAIT, vs))
 			return (NULL);
 		return (ddvp);
 	}
 	mtx_unlock(vlp);
 	return (NULL);
 }
 
 int
 vn_commname(struct vnode *vp, char *buf, u_int buflen)
 {
 	struct namecache *ncp;
 	struct mtx *vlp;
 	int l;
 
 	vlp = VP2VNODELOCK(vp);
 	mtx_lock(vlp);
 	TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst)
 		if ((ncp->nc_flag & NCF_ISDOTDOT) == 0)
 			break;
 	if (ncp == NULL) {
 		mtx_unlock(vlp);
 		return (ENOENT);
 	}
 	l = min(ncp->nc_nlen, buflen - 1);
 	memcpy(buf, ncp->nc_name, l);
 	mtx_unlock(vlp);
 	buf[l] = '\0';
 	return (0);
 }
 
 /*
  * This function updates path string to vnode's full global path
  * and checks the size of the new path string against the pathlen argument.
  *
  * Requires a locked, referenced vnode.
  * Vnode is re-locked on success or ENODEV, otherwise unlocked.
  *
  * If vp is a directory, the call to vn_fullpath_global() always succeeds
  * because it falls back to the ".." lookup if the namecache lookup fails.
  */
 int
 vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path,
     u_int pathlen)
 {
 	struct nameidata nd;
 	struct vnode *vp1;
 	char *rpath, *fbuf;
 	int error;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 
 	/* Construct global filesystem path from vp. */
 	VOP_UNLOCK(vp);
 	error = vn_fullpath_global(vp, &rpath, &fbuf);
 
 	if (error != 0) {
 		vrele(vp);
 		return (error);
 	}
 
 	if (strlen(rpath) >= pathlen) {
 		vrele(vp);
 		error = ENAMETOOLONG;
 		goto out;
 	}
 
 	/*
 	 * Re-lookup the vnode by path to detect a possible rename.
 	 * As a side effect, the vnode is relocked.
 	 * If vnode was renamed, return ENOENT.
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_SYSSPACE, path);
 	error = namei(&nd);
 	if (error != 0) {
 		vrele(vp);
 		goto out;
 	}
 	NDFREE_PNBUF(&nd);
 	vp1 = nd.ni_vp;
 	vrele(vp);
 	if (vp1 == vp)
 		strcpy(path, rpath);
 	else {
 		vput(vp1);
 		error = ENOENT;
 	}
 
 out:
 	free(fbuf, M_TEMP);
 	return (error);
 }
 
 #ifdef DDB
 static void
 db_print_vpath(struct vnode *vp)
 {
 
 	while (vp != NULL) {
 		db_printf("%p: ", vp);
 		if (vp == rootvnode) {
 			db_printf("/");
 			vp = NULL;
 		} else {
 			if (vp->v_vflag & VV_ROOT) {
 				db_printf("<mount point>");
 				vp = vp->v_mount->mnt_vnodecovered;
 			} else {
 				struct namecache *ncp;
 				char *ncn;
 				int i;
 
 				ncp = TAILQ_FIRST(&vp->v_cache_dst);
 				if (ncp != NULL) {
 					ncn = ncp->nc_name;
 					for (i = 0; i < ncp->nc_nlen; i++)
 						db_printf("%c", *ncn++);
 					vp = ncp->nc_dvp;
 				} else {
 					vp = NULL;
 				}
 			}
 		}
 		db_printf("\n");
 	}
 
 	return;
 }
 
 DB_SHOW_COMMAND(vpath, db_show_vpath)
 {
 	struct vnode *vp;
 
 	if (!have_addr) {
 		db_printf("usage: show vpath <struct vnode *>\n");
 		return;
 	}
 
 	vp = (struct vnode *)addr;
 	db_print_vpath(vp);
 }
 
 #endif
 
 static int cache_fast_lookup = 1;
 
 #define CACHE_FPL_FAILED	-2020
 
 void
 cache_fast_lookup_enabled_recalc(void)
 {
 	int lookup_flag;
 	int mac_on;
 
 #ifdef MAC
 	mac_on = mac_vnode_check_lookup_enabled();
 	mac_on |= mac_vnode_check_readlink_enabled();
 #else
 	mac_on = 0;
 #endif
 
 	lookup_flag = atomic_load_int(&cache_fast_lookup);
 	if (lookup_flag && !mac_on) {
 		atomic_store_char(&cache_fast_lookup_enabled, true);
 	} else {
 		atomic_store_char(&cache_fast_lookup_enabled, false);
 	}
 }
 
 static int
 syscal_vfs_cache_fast_lookup(SYSCTL_HANDLER_ARGS)
 {
 	int error, old;
 
 	old = atomic_load_int(&cache_fast_lookup);
 	error = sysctl_handle_int(oidp, arg1, arg2, req);
 	if (error == 0 && req->newptr && old != atomic_load_int(&cache_fast_lookup))
 		cache_fast_lookup_enabled_recalc();
 	return (error);
 }
 SYSCTL_PROC(_vfs, OID_AUTO, cache_fast_lookup, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
     &cache_fast_lookup, 0, syscal_vfs_cache_fast_lookup, "IU", "");
 
 /*
  * Components of nameidata (or objects it can point to) which may
  * need restoring in case fast path lookup fails.
  */
 struct nameidata_outer {
 	size_t ni_pathlen;
 	int cn_flags;
 };
 
 struct nameidata_saved {
 #ifdef INVARIANTS
 	char *cn_nameptr;
 	size_t ni_pathlen;
 #endif
 };
 
 #ifdef INVARIANTS
 struct cache_fpl_debug {
 	size_t ni_pathlen;
 };
 #endif
 
 struct cache_fpl {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	char *nulchar;
 	struct vnode *dvp;
 	struct vnode *tvp;
 	seqc_t dvp_seqc;
 	seqc_t tvp_seqc;
 	uint32_t hash;
 	struct nameidata_saved snd;
 	struct nameidata_outer snd_outer;
 	int line;
 	enum cache_fpl_status status:8;
 	bool in_smr;
 	bool fsearch;
-	bool savename;
 	struct pwd **pwd;
 #ifdef INVARIANTS
 	struct cache_fpl_debug debug;
 #endif
 };
 
 static bool cache_fplookup_mp_supported(struct mount *mp);
 static bool cache_fplookup_is_mp(struct cache_fpl *fpl);
 static int cache_fplookup_cross_mount(struct cache_fpl *fpl);
 static int cache_fplookup_partial_setup(struct cache_fpl *fpl);
 static int cache_fplookup_skip_slashes(struct cache_fpl *fpl);
 static int cache_fplookup_trailingslash(struct cache_fpl *fpl);
 static void cache_fpl_pathlen_dec(struct cache_fpl *fpl);
 static void cache_fpl_pathlen_inc(struct cache_fpl *fpl);
 static void cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n);
 static void cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n);
 
 static void
 cache_fpl_cleanup_cnp(struct componentname *cnp)
 {
 
 	uma_zfree(namei_zone, cnp->cn_pnbuf);
-#ifdef DIAGNOSTIC
 	cnp->cn_pnbuf = NULL;
 	cnp->cn_nameptr = NULL;
-#endif
 }
 
 static struct vnode *
 cache_fpl_handle_root(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 
 	MPASS(*(cnp->cn_nameptr) == '/');
 	cnp->cn_nameptr++;
 	cache_fpl_pathlen_dec(fpl);
 
 	if (__predict_false(*(cnp->cn_nameptr) == '/')) {
 		do {
 			cnp->cn_nameptr++;
 			cache_fpl_pathlen_dec(fpl);
 		} while (*(cnp->cn_nameptr) == '/');
 	}
 
 	return (ndp->ni_rootdir);
 }
 
 static void
 cache_fpl_checkpoint_outer(struct cache_fpl *fpl)
 {
 
 	fpl->snd_outer.ni_pathlen = fpl->ndp->ni_pathlen;
 	fpl->snd_outer.cn_flags = fpl->ndp->ni_cnd.cn_flags;
 }
 
 static void
 cache_fpl_checkpoint(struct cache_fpl *fpl)
 {
 
 #ifdef INVARIANTS
 	fpl->snd.cn_nameptr = fpl->ndp->ni_cnd.cn_nameptr;
 	fpl->snd.ni_pathlen = fpl->debug.ni_pathlen;
 #endif
 }
 
 static void
 cache_fpl_restore_partial(struct cache_fpl *fpl)
 {
 
 	fpl->ndp->ni_cnd.cn_flags = fpl->snd_outer.cn_flags;
 #ifdef INVARIANTS
 	fpl->debug.ni_pathlen = fpl->snd.ni_pathlen;
 #endif
 }
 
 static void
 cache_fpl_restore_abort(struct cache_fpl *fpl)
 {
 
 	cache_fpl_restore_partial(fpl);
 	/*
 	 * It is 0 on entry by API contract.
 	 */
 	fpl->ndp->ni_resflags = 0;
 	fpl->ndp->ni_cnd.cn_nameptr = fpl->ndp->ni_cnd.cn_pnbuf;
 	fpl->ndp->ni_pathlen = fpl->snd_outer.ni_pathlen;
 }
 
 #ifdef INVARIANTS
 #define cache_fpl_smr_assert_entered(fpl) ({			\
 	struct cache_fpl *_fpl = (fpl);				\
 	MPASS(_fpl->in_smr == true);				\
 	VFS_SMR_ASSERT_ENTERED();				\
 })
 #define cache_fpl_smr_assert_not_entered(fpl) ({		\
 	struct cache_fpl *_fpl = (fpl);				\
 	MPASS(_fpl->in_smr == false);				\
 	VFS_SMR_ASSERT_NOT_ENTERED();				\
 })
 static void
 cache_fpl_assert_status(struct cache_fpl *fpl)
 {
 
 	switch (fpl->status) {
 	case CACHE_FPL_STATUS_UNSET:
 		__assert_unreachable();
 		break;
 	case CACHE_FPL_STATUS_DESTROYED:
 	case CACHE_FPL_STATUS_ABORTED:
 	case CACHE_FPL_STATUS_PARTIAL:
 	case CACHE_FPL_STATUS_HANDLED:
 		break;
 	}
 }
 #else
 #define cache_fpl_smr_assert_entered(fpl) do { } while (0)
 #define cache_fpl_smr_assert_not_entered(fpl) do { } while (0)
 #define cache_fpl_assert_status(fpl) do { } while (0)
 #endif
 
 #define cache_fpl_smr_enter_initial(fpl) ({			\
 	struct cache_fpl *_fpl = (fpl);				\
 	vfs_smr_enter();					\
 	_fpl->in_smr = true;					\
 })
 
 #define cache_fpl_smr_enter(fpl) ({				\
 	struct cache_fpl *_fpl = (fpl);				\
 	MPASS(_fpl->in_smr == false);				\
 	vfs_smr_enter();					\
 	_fpl->in_smr = true;					\
 })
 
 #define cache_fpl_smr_exit(fpl) ({				\
 	struct cache_fpl *_fpl = (fpl);				\
 	MPASS(_fpl->in_smr == true);				\
 	vfs_smr_exit();						\
 	_fpl->in_smr = false;					\
 })
 
 static int
 cache_fpl_aborted_early_impl(struct cache_fpl *fpl, int line)
 {
 
 	if (fpl->status != CACHE_FPL_STATUS_UNSET) {
 		KASSERT(fpl->status == CACHE_FPL_STATUS_PARTIAL,
 		    ("%s: converting to abort from %d at %d, set at %d\n",
 		    __func__, fpl->status, line, fpl->line));
 	}
 	cache_fpl_smr_assert_not_entered(fpl);
 	fpl->status = CACHE_FPL_STATUS_ABORTED;
 	fpl->line = line;
 	return (CACHE_FPL_FAILED);
 }
 
 #define cache_fpl_aborted_early(x)	cache_fpl_aborted_early_impl((x), __LINE__)
 
 static int __noinline
 cache_fpl_aborted_impl(struct cache_fpl *fpl, int line)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 
 	if (fpl->status != CACHE_FPL_STATUS_UNSET) {
 		KASSERT(fpl->status == CACHE_FPL_STATUS_PARTIAL,
 		    ("%s: converting to abort from %d at %d, set at %d\n",
 		    __func__, fpl->status, line, fpl->line));
 	}
 	fpl->status = CACHE_FPL_STATUS_ABORTED;
 	fpl->line = line;
 	if (fpl->in_smr)
 		cache_fpl_smr_exit(fpl);
 	cache_fpl_restore_abort(fpl);
 	/*
 	 * Resolving symlinks overwrites data passed by the caller.
 	 * Let namei know.
 	 */
 	if (ndp->ni_loopcnt > 0) {
 		fpl->status = CACHE_FPL_STATUS_DESTROYED;
 		cache_fpl_cleanup_cnp(cnp);
 	}
 	return (CACHE_FPL_FAILED);
 }
 
 #define cache_fpl_aborted(x)	cache_fpl_aborted_impl((x), __LINE__)
 
 static int __noinline
 cache_fpl_partial_impl(struct cache_fpl *fpl, int line)
 {
 
 	KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET,
 	    ("%s: setting to partial at %d, but already set to %d at %d\n",
 	    __func__, line, fpl->status, fpl->line));
 	cache_fpl_smr_assert_entered(fpl);
 	fpl->status = CACHE_FPL_STATUS_PARTIAL;
 	fpl->line = line;
 	return (cache_fplookup_partial_setup(fpl));
 }
 
 #define cache_fpl_partial(x)	cache_fpl_partial_impl((x), __LINE__)
 
 static int
 cache_fpl_handled_impl(struct cache_fpl *fpl, int line)
 {
 
 	KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET,
 	    ("%s: setting to handled at %d, but already set to %d at %d\n",
 	    __func__, line, fpl->status, fpl->line));
 	cache_fpl_smr_assert_not_entered(fpl);
 	fpl->status = CACHE_FPL_STATUS_HANDLED;
 	fpl->line = line;
 	return (0);
 }
 
 #define cache_fpl_handled(x)	cache_fpl_handled_impl((x), __LINE__)
 
 static int
 cache_fpl_handled_error_impl(struct cache_fpl *fpl, int error, int line)
 {
 
 	KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET,
 	    ("%s: setting to handled at %d, but already set to %d at %d\n",
 	    __func__, line, fpl->status, fpl->line));
 	MPASS(error != 0);
 	MPASS(error != CACHE_FPL_FAILED);
 	cache_fpl_smr_assert_not_entered(fpl);
 	fpl->status = CACHE_FPL_STATUS_HANDLED;
 	fpl->line = line;
 	fpl->dvp = NULL;
 	fpl->tvp = NULL;
-	fpl->savename = false;
 	return (error);
 }
 
 #define cache_fpl_handled_error(x, e)	cache_fpl_handled_error_impl((x), (e), __LINE__)
 
 static bool
 cache_fpl_terminated(struct cache_fpl *fpl)
 {
 
 	return (fpl->status != CACHE_FPL_STATUS_UNSET);
 }
 
 #define CACHE_FPL_SUPPORTED_CN_FLAGS \
 	(NC_NOMAKEENTRY | NC_KEEPPOSENTRY | LOCKLEAF | LOCKPARENT | WANTPARENT | \
-	 FAILIFEXISTS | FOLLOW | EMPTYPATH | LOCKSHARED | SAVENAME | SAVESTART | \
-	 WILLBEDIR | ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK | \
-	 OPENREAD | OPENWRITE | WANTIOCTLCAPS)
+	 FAILIFEXISTS | FOLLOW | EMPTYPATH | LOCKSHARED | SAVESTART | WILLBEDIR | \
+	 ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK | OPENREAD | \
+	 OPENWRITE | WANTIOCTLCAPS)
 
 #define CACHE_FPL_INTERNAL_CN_FLAGS \
 	(ISDOTDOT | MAKEENTRY | ISLASTCN)
 
 _Static_assert((CACHE_FPL_SUPPORTED_CN_FLAGS & CACHE_FPL_INTERNAL_CN_FLAGS) == 0,
     "supported and internal flags overlap");
 
 static bool
 cache_fpl_islastcn(struct nameidata *ndp)
 {
 
 	return (*ndp->ni_next == 0);
 }
 
 static bool
 cache_fpl_istrailingslash(struct cache_fpl *fpl)
 {
 
 	MPASS(fpl->nulchar > fpl->cnp->cn_pnbuf);
 	return (*(fpl->nulchar - 1) == '/');
 }
 
 static bool
 cache_fpl_isdotdot(struct componentname *cnp)
 {
 
 	if (cnp->cn_namelen == 2 &&
 	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
 		return (true);
 	return (false);
 }
 
 static bool
 cache_can_fplookup(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	struct thread *td;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	td = curthread;
 
 	if (!atomic_load_char(&cache_fast_lookup_enabled)) {
 		cache_fpl_aborted_early(fpl);
 		return (false);
 	}
 	if ((cnp->cn_flags & ~CACHE_FPL_SUPPORTED_CN_FLAGS) != 0) {
 		cache_fpl_aborted_early(fpl);
 		return (false);
 	}
 	if (IN_CAPABILITY_MODE(td)) {
 		cache_fpl_aborted_early(fpl);
 		return (false);
 	}
 	if (AUDITING_TD(td)) {
 		cache_fpl_aborted_early(fpl);
 		return (false);
 	}
 	if (ndp->ni_startdir != NULL) {
 		cache_fpl_aborted_early(fpl);
 		return (false);
 	}
 	return (true);
 }
 
 static int __noinline
 cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	int error;
 	bool fsearch;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 
 	error = fgetvp_lookup_smr(ndp->ni_dirfd, ndp, vpp, &fsearch);
 	if (__predict_false(error != 0)) {
 		return (cache_fpl_aborted(fpl));
 	}
 	fpl->fsearch = fsearch;
 	if ((*vpp)->v_type != VDIR) {
 		if (!((cnp->cn_flags & EMPTYPATH) != 0 && cnp->cn_pnbuf[0] == '\0')) {
 			cache_fpl_smr_exit(fpl);
 			return (cache_fpl_handled_error(fpl, ENOTDIR));
 		}
 	}
 	return (0);
 }
 
 static int __noinline
 cache_fplookup_negative_promote(struct cache_fpl *fpl, struct namecache *oncp,
     uint32_t hash)
 {
 	struct componentname *cnp;
 	struct vnode *dvp;
 
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 
 	cache_fpl_smr_exit(fpl);
 	if (cache_neg_promote_cond(dvp, cnp, oncp, hash))
 		return (cache_fpl_handled_error(fpl, ENOENT));
 	else
 		return (cache_fpl_aborted(fpl));
 }
 
 /*
  * The target vnode is not supported, prepare for the slow path to take over.
  */
 static int __noinline
 cache_fplookup_partial_setup(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	enum vgetstate dvs;
 	struct vnode *dvp;
 	struct pwd *pwd;
 	seqc_t dvp_seqc;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	pwd = *(fpl->pwd);
 	dvp = fpl->dvp;
 	dvp_seqc = fpl->dvp_seqc;
 
 	if (!pwd_hold_smr(pwd)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	/*
 	 * Note that seqc is checked before the vnode is locked, so by
 	 * the time regular lookup gets to it it may have moved.
 	 *
 	 * Ultimately this does not affect correctness, any lookup errors
 	 * are userspace racing with itself. It is guaranteed that any
 	 * path which ultimately gets found could also have been found
 	 * by regular lookup going all the way in absence of concurrent
 	 * modifications.
 	 */
 	dvs = vget_prep_smr(dvp);
 	cache_fpl_smr_exit(fpl);
 	if (__predict_false(dvs == VGET_NONE)) {
 		pwd_drop(pwd);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	vget_finish_ref(dvp, dvs);
 	if (!vn_seqc_consistent(dvp, dvp_seqc)) {
 		vrele(dvp);
 		pwd_drop(pwd);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	cache_fpl_restore_partial(fpl);
 #ifdef INVARIANTS
 	if (cnp->cn_nameptr != fpl->snd.cn_nameptr) {
 		panic("%s: cn_nameptr mismatch (%p != %p) full [%s]\n", __func__,
 		    cnp->cn_nameptr, fpl->snd.cn_nameptr, cnp->cn_pnbuf);
 	}
 #endif
 
 	ndp->ni_startdir = dvp;
 	cnp->cn_flags |= MAKEENTRY;
 	if (cache_fpl_islastcn(ndp))
 		cnp->cn_flags |= ISLASTCN;
 	if (cache_fpl_isdotdot(cnp))
 		cnp->cn_flags |= ISDOTDOT;
 
 	/*
 	 * Skip potential extra slashes parsing did not take care of.
 	 * cache_fplookup_skip_slashes explains the mechanism.
 	 */
 	if (__predict_false(*(cnp->cn_nameptr) == '/')) {
 		do {
 			cnp->cn_nameptr++;
 			cache_fpl_pathlen_dec(fpl);
 		} while (*(cnp->cn_nameptr) == '/');
 	}
 
 	ndp->ni_pathlen = fpl->nulchar - cnp->cn_nameptr + 1;
 #ifdef INVARIANTS
 	if (ndp->ni_pathlen != fpl->debug.ni_pathlen) {
 		panic("%s: mismatch (%zu != %zu) nulchar %p nameptr %p [%s] ; full string [%s]\n",
 		    __func__, ndp->ni_pathlen, fpl->debug.ni_pathlen, fpl->nulchar,
 		    cnp->cn_nameptr, cnp->cn_nameptr, cnp->cn_pnbuf);
 	}
 #endif
 	return (0);
 }
 
 static int
 cache_fplookup_final_child(struct cache_fpl *fpl, enum vgetstate tvs)
 {
 	struct componentname *cnp;
 	struct vnode *tvp;
 	seqc_t tvp_seqc;
 	int error, lkflags;
 
 	cnp = fpl->cnp;
 	tvp = fpl->tvp;
 	tvp_seqc = fpl->tvp_seqc;
 
 	if ((cnp->cn_flags & LOCKLEAF) != 0) {
 		lkflags = LK_SHARED;
 		if ((cnp->cn_flags & LOCKSHARED) == 0)
 			lkflags = LK_EXCLUSIVE;
 		error = vget_finish(tvp, lkflags, tvs);
 		if (__predict_false(error != 0)) {
 			return (cache_fpl_aborted(fpl));
 		}
 	} else {
 		vget_finish_ref(tvp, tvs);
 	}
 
 	if (!vn_seqc_consistent(tvp, tvp_seqc)) {
 		if ((cnp->cn_flags & LOCKLEAF) != 0)
 			vput(tvp);
 		else
 			vrele(tvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	return (cache_fpl_handled(fpl));
 }
 
 /*
  * They want to possibly modify the state of the namecache.
  */
 static int __noinline
 cache_fplookup_final_modifying(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	enum vgetstate dvs;
 	struct vnode *dvp, *tvp;
 	struct mount *mp;
 	seqc_t dvp_seqc;
 	int error;
 	bool docache;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 	dvp_seqc = fpl->dvp_seqc;
 
 	MPASS(*(cnp->cn_nameptr) != '/');
 	MPASS(cache_fpl_islastcn(ndp));
 	if ((cnp->cn_flags & LOCKPARENT) == 0)
 		MPASS((cnp->cn_flags & WANTPARENT) != 0);
 	MPASS((cnp->cn_flags & TRAILINGSLASH) == 0);
 	MPASS(cnp->cn_nameiop == CREATE || cnp->cn_nameiop == DELETE ||
 	    cnp->cn_nameiop == RENAME);
 	MPASS((cnp->cn_flags & MAKEENTRY) == 0);
 	MPASS((cnp->cn_flags & ISDOTDOT) == 0);
 
 	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
 	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
 		docache = false;
 
 	/*
 	 * Regular lookup nulifies the slash, which we don't do here.
 	 * Don't take chances with filesystem routines seeing it for
 	 * the last entry.
 	 */
 	if (cache_fpl_istrailingslash(fpl)) {
 		return (cache_fpl_partial(fpl));
 	}
 
 	mp = atomic_load_ptr(&dvp->v_mount);
 	if (__predict_false(mp == NULL)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	if (__predict_false(mp->mnt_flag & MNT_RDONLY)) {
 		cache_fpl_smr_exit(fpl);
 		/*
 		 * Original code keeps not checking for CREATE which
 		 * might be a bug. For now let the old lookup decide.
 		 */
 		if (cnp->cn_nameiop == CREATE) {
 			return (cache_fpl_aborted(fpl));
 		}
 		return (cache_fpl_handled_error(fpl, EROFS));
 	}
 
 	if (fpl->tvp != NULL && (cnp->cn_flags & FAILIFEXISTS) != 0) {
 		cache_fpl_smr_exit(fpl);
 		return (cache_fpl_handled_error(fpl, EEXIST));
 	}
 
 	/*
 	 * Secure access to dvp; check cache_fplookup_partial_setup for
 	 * reasoning.
 	 *
 	 * XXX At least UFS requires its lookup routine to be called for
 	 * the last path component, which leads to some level of complication
 	 * and inefficiency:
 	 * - the target routine always locks the target vnode, but our caller
 	 *   may not need it locked
 	 * - some of the VOP machinery asserts that the parent is locked, which
 	 *   once more may be not required
 	 *
 	 * TODO: add a flag for filesystems which don't need this.
 	 */
 	dvs = vget_prep_smr(dvp);
 	cache_fpl_smr_exit(fpl);
 	if (__predict_false(dvs == VGET_NONE)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	vget_finish_ref(dvp, dvs);
 	if (!vn_seqc_consistent(dvp, dvp_seqc)) {
 		vrele(dvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	error = vn_lock(dvp, LK_EXCLUSIVE);
 	if (__predict_false(error != 0)) {
 		vrele(dvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	tvp = NULL;
 	cnp->cn_flags |= ISLASTCN;
 	if (docache)
 		cnp->cn_flags |= MAKEENTRY;
 	if (cache_fpl_isdotdot(cnp))
 		cnp->cn_flags |= ISDOTDOT;
 	cnp->cn_lkflags = LK_EXCLUSIVE;
 	error = VOP_LOOKUP(dvp, &tvp, cnp);
 	switch (error) {
 	case EJUSTRETURN:
 	case 0:
 		break;
 	case ENOTDIR:
 	case ENOENT:
 		vput(dvp);
 		return (cache_fpl_handled_error(fpl, error));
 	default:
 		vput(dvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	fpl->tvp = tvp;
-	fpl->savename = (cnp->cn_flags & SAVENAME) != 0;
 
 	if (tvp == NULL) {
 		if ((cnp->cn_flags & SAVESTART) != 0) {
 			ndp->ni_startdir = dvp;
 			vrefact(ndp->ni_startdir);
-			cnp->cn_flags |= SAVENAME;
-			fpl->savename = true;
 		}
 		MPASS(error == EJUSTRETURN);
 		if ((cnp->cn_flags & LOCKPARENT) == 0) {
 			VOP_UNLOCK(dvp);
 		}
 		return (cache_fpl_handled(fpl));
 	}
 
 	/*
 	 * There are very hairy corner cases concerning various flag combinations
 	 * and locking state. In particular here we only hold one lock instead of
 	 * two.
 	 *
 	 * Skip the complexity as it is of no significance for normal workloads.
 	 */
 	if (__predict_false(tvp == dvp)) {
 		vput(dvp);
 		vrele(tvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	/*
 	 * If they want the symlink itself we are fine, but if they want to
 	 * follow it regular lookup has to be engaged.
 	 */
 	if (tvp->v_type == VLNK) {
 		if ((cnp->cn_flags & FOLLOW) != 0) {
 			vput(dvp);
 			vput(tvp);
 			return (cache_fpl_aborted(fpl));
 		}
 	}
 
 	/*
 	 * Since we expect this to be the terminal vnode it should almost never
 	 * be a mount point.
 	 */
 	if (__predict_false(cache_fplookup_is_mp(fpl))) {
 		vput(dvp);
 		vput(tvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	if ((cnp->cn_flags & FAILIFEXISTS) != 0) {
 		vput(dvp);
 		vput(tvp);
 		return (cache_fpl_handled_error(fpl, EEXIST));
 	}
 
 	if ((cnp->cn_flags & LOCKLEAF) == 0) {
 		VOP_UNLOCK(tvp);
 	}
 
 	if ((cnp->cn_flags & LOCKPARENT) == 0) {
 		VOP_UNLOCK(dvp);
 	}
 
 	if ((cnp->cn_flags & SAVESTART) != 0) {
 		ndp->ni_startdir = dvp;
 		vrefact(ndp->ni_startdir);
-		cnp->cn_flags |= SAVENAME;
-		fpl->savename = true;
 	}
 
 	return (cache_fpl_handled(fpl));
 }
 
 static int __noinline
 cache_fplookup_modifying(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 
 	ndp = fpl->ndp;
 
 	if (!cache_fpl_islastcn(ndp)) {
 		return (cache_fpl_partial(fpl));
 	}
 	return (cache_fplookup_final_modifying(fpl));
 }
 
 static int __noinline
 cache_fplookup_final_withparent(struct cache_fpl *fpl)
 {
 	struct componentname *cnp;
 	enum vgetstate dvs, tvs;
 	struct vnode *dvp, *tvp;
 	seqc_t dvp_seqc;
 	int error;
 
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 	dvp_seqc = fpl->dvp_seqc;
 	tvp = fpl->tvp;
 
 	MPASS((cnp->cn_flags & (LOCKPARENT|WANTPARENT)) != 0);
 
 	/*
 	 * This is less efficient than it can be for simplicity.
 	 */
 	dvs = vget_prep_smr(dvp);
 	if (__predict_false(dvs == VGET_NONE)) {
 		return (cache_fpl_aborted(fpl));
 	}
 	tvs = vget_prep_smr(tvp);
 	if (__predict_false(tvs == VGET_NONE)) {
 		cache_fpl_smr_exit(fpl);
 		vget_abort(dvp, dvs);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	cache_fpl_smr_exit(fpl);
 
 	if ((cnp->cn_flags & LOCKPARENT) != 0) {
 		error = vget_finish(dvp, LK_EXCLUSIVE, dvs);
 		if (__predict_false(error != 0)) {
 			vget_abort(tvp, tvs);
 			return (cache_fpl_aborted(fpl));
 		}
 	} else {
 		vget_finish_ref(dvp, dvs);
 	}
 
 	if (!vn_seqc_consistent(dvp, dvp_seqc)) {
 		vget_abort(tvp, tvs);
 		if ((cnp->cn_flags & LOCKPARENT) != 0)
 			vput(dvp);
 		else
 			vrele(dvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	error = cache_fplookup_final_child(fpl, tvs);
 	if (__predict_false(error != 0)) {
 		MPASS(fpl->status == CACHE_FPL_STATUS_ABORTED ||
 		    fpl->status == CACHE_FPL_STATUS_DESTROYED);
 		if ((cnp->cn_flags & LOCKPARENT) != 0)
 			vput(dvp);
 		else
 			vrele(dvp);
 		return (error);
 	}
 
 	MPASS(fpl->status == CACHE_FPL_STATUS_HANDLED);
 	return (0);
 }
 
 static int
 cache_fplookup_final(struct cache_fpl *fpl)
 {
 	struct componentname *cnp;
 	enum vgetstate tvs;
 	struct vnode *dvp, *tvp;
 	seqc_t dvp_seqc;
 
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 	dvp_seqc = fpl->dvp_seqc;
 	tvp = fpl->tvp;
 
 	MPASS(*(cnp->cn_nameptr) != '/');
 
 	if (cnp->cn_nameiop != LOOKUP) {
 		return (cache_fplookup_final_modifying(fpl));
 	}
 
 	if ((cnp->cn_flags & (LOCKPARENT|WANTPARENT)) != 0)
 		return (cache_fplookup_final_withparent(fpl));
 
 	tvs = vget_prep_smr(tvp);
 	if (__predict_false(tvs == VGET_NONE)) {
 		return (cache_fpl_partial(fpl));
 	}
 
 	if (!vn_seqc_consistent(dvp, dvp_seqc)) {
 		cache_fpl_smr_exit(fpl);
 		vget_abort(tvp, tvs);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	cache_fpl_smr_exit(fpl);
 	return (cache_fplookup_final_child(fpl, tvs));
 }
 
 /*
  * Comment from locked lookup:
  * Check for degenerate name (e.g. / or "") which is a way of talking about a
  * directory, e.g. like "/." or ".".
  */
 static int __noinline
 cache_fplookup_degenerate(struct cache_fpl *fpl)
 {
 	struct componentname *cnp;
 	struct vnode *dvp;
 	enum vgetstate dvs;
 	int error, lkflags;
 #ifdef INVARIANTS
 	char *cp;
 #endif
 
 	fpl->tvp = fpl->dvp;
 	fpl->tvp_seqc = fpl->dvp_seqc;
 
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 
 #ifdef INVARIANTS
 	for (cp = cnp->cn_pnbuf; *cp != '\0'; cp++) {
 		KASSERT(*cp == '/',
 		    ("%s: encountered non-slash; string [%s]\n", __func__,
 		    cnp->cn_pnbuf));
 	}
 #endif
 
 	if (__predict_false(cnp->cn_nameiop != LOOKUP)) {
 		cache_fpl_smr_exit(fpl);
 		return (cache_fpl_handled_error(fpl, EISDIR));
 	}
 
 	MPASS((cnp->cn_flags & SAVESTART) == 0);
 
 	if ((cnp->cn_flags & (LOCKPARENT|WANTPARENT)) != 0) {
 		return (cache_fplookup_final_withparent(fpl));
 	}
 
 	dvs = vget_prep_smr(dvp);
 	cache_fpl_smr_exit(fpl);
 	if (__predict_false(dvs == VGET_NONE)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	if ((cnp->cn_flags & LOCKLEAF) != 0) {
 		lkflags = LK_SHARED;
 		if ((cnp->cn_flags & LOCKSHARED) == 0)
 			lkflags = LK_EXCLUSIVE;
 		error = vget_finish(dvp, lkflags, dvs);
 		if (__predict_false(error != 0)) {
 			return (cache_fpl_aborted(fpl));
 		}
 	} else {
 		vget_finish_ref(dvp, dvs);
 	}
 	return (cache_fpl_handled(fpl));
 }
 
 static int __noinline
 cache_fplookup_emptypath(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	enum vgetstate tvs;
 	struct vnode *tvp;
 	int error, lkflags;
 
 	fpl->tvp = fpl->dvp;
 	fpl->tvp_seqc = fpl->dvp_seqc;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	tvp = fpl->tvp;
 
 	MPASS(*cnp->cn_pnbuf == '\0');
 
 	if (__predict_false((cnp->cn_flags & EMPTYPATH) == 0)) {
 		cache_fpl_smr_exit(fpl);
 		return (cache_fpl_handled_error(fpl, ENOENT));
 	}
 
 	MPASS((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) == 0);
 
 	tvs = vget_prep_smr(tvp);
 	cache_fpl_smr_exit(fpl);
 	if (__predict_false(tvs == VGET_NONE)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	if ((cnp->cn_flags & LOCKLEAF) != 0) {
 		lkflags = LK_SHARED;
 		if ((cnp->cn_flags & LOCKSHARED) == 0)
 			lkflags = LK_EXCLUSIVE;
 		error = vget_finish(tvp, lkflags, tvs);
 		if (__predict_false(error != 0)) {
 			return (cache_fpl_aborted(fpl));
 		}
 	} else {
 		vget_finish_ref(tvp, tvs);
 	}
 
 	ndp->ni_resflags |= NIRES_EMPTYPATH;
 	return (cache_fpl_handled(fpl));
 }
 
 static int __noinline
 cache_fplookup_noentry(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	enum vgetstate dvs;
 	struct vnode *dvp, *tvp;
 	seqc_t dvp_seqc;
 	int error;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 	dvp_seqc = fpl->dvp_seqc;
 
 	MPASS((cnp->cn_flags & MAKEENTRY) == 0);
 	MPASS((cnp->cn_flags & ISDOTDOT) == 0);
 	if (cnp->cn_nameiop == LOOKUP)
 		MPASS((cnp->cn_flags & NOCACHE) == 0);
 	MPASS(!cache_fpl_isdotdot(cnp));
 
 	/*
 	 * Hack: delayed name len checking.
 	 */
 	if (__predict_false(cnp->cn_namelen > NAME_MAX)) {
 		cache_fpl_smr_exit(fpl);
 		return (cache_fpl_handled_error(fpl, ENAMETOOLONG));
 	}
 
 	if (cnp->cn_nameptr[0] == '/') {
 		return (cache_fplookup_skip_slashes(fpl));
 	}
 
 	if (cnp->cn_pnbuf[0] == '\0') {
 		return (cache_fplookup_emptypath(fpl));
 	}
 
 	if (cnp->cn_nameptr[0] == '\0') {
 		if (fpl->tvp == NULL) {
 			return (cache_fplookup_degenerate(fpl));
 		}
 		return (cache_fplookup_trailingslash(fpl));
 	}
 
 	if (cnp->cn_nameiop != LOOKUP) {
 		fpl->tvp = NULL;
 		return (cache_fplookup_modifying(fpl));
 	}
 
 	MPASS((cnp->cn_flags & SAVESTART) == 0);
 
 	/*
 	 * Only try to fill in the component if it is the last one,
 	 * otherwise not only there may be several to handle but the
 	 * walk may be complicated.
 	 */
 	if (!cache_fpl_islastcn(ndp)) {
 		return (cache_fpl_partial(fpl));
 	}
 
 	/*
 	 * Regular lookup nulifies the slash, which we don't do here.
 	 * Don't take chances with filesystem routines seeing it for
 	 * the last entry.
 	 */
 	if (cache_fpl_istrailingslash(fpl)) {
 		return (cache_fpl_partial(fpl));
 	}
 
 	/*
 	 * Secure access to dvp; check cache_fplookup_partial_setup for
 	 * reasoning.
 	 */
 	dvs = vget_prep_smr(dvp);
 	cache_fpl_smr_exit(fpl);
 	if (__predict_false(dvs == VGET_NONE)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	vget_finish_ref(dvp, dvs);
 	if (!vn_seqc_consistent(dvp, dvp_seqc)) {
 		vrele(dvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	error = vn_lock(dvp, LK_SHARED);
 	if (__predict_false(error != 0)) {
 		vrele(dvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	tvp = NULL;
 	/*
 	 * TODO: provide variants which don't require locking either vnode.
 	 */
 	cnp->cn_flags |= ISLASTCN | MAKEENTRY;
 	cnp->cn_lkflags = LK_SHARED;
 	if ((cnp->cn_flags & LOCKSHARED) == 0) {
 		cnp->cn_lkflags = LK_EXCLUSIVE;
 	}
 	error = VOP_LOOKUP(dvp, &tvp, cnp);
 	switch (error) {
 	case EJUSTRETURN:
 	case 0:
 		break;
 	case ENOTDIR:
 	case ENOENT:
 		vput(dvp);
 		return (cache_fpl_handled_error(fpl, error));
 	default:
 		vput(dvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	fpl->tvp = tvp;
-	if (!fpl->savename) {
-		MPASS((cnp->cn_flags & SAVENAME) == 0);
-	}
 
 	if (tvp == NULL) {
 		MPASS(error == EJUSTRETURN);
 		if ((cnp->cn_flags & (WANTPARENT | LOCKPARENT)) == 0) {
 			vput(dvp);
 		} else if ((cnp->cn_flags & LOCKPARENT) == 0) {
 			VOP_UNLOCK(dvp);
 		}
 		return (cache_fpl_handled(fpl));
 	}
 
 	if (tvp->v_type == VLNK) {
 		if ((cnp->cn_flags & FOLLOW) != 0) {
 			vput(dvp);
 			vput(tvp);
 			return (cache_fpl_aborted(fpl));
 		}
 	}
 
 	if (__predict_false(cache_fplookup_is_mp(fpl))) {
 		vput(dvp);
 		vput(tvp);
 		return (cache_fpl_aborted(fpl));
 	}
 
 	if ((cnp->cn_flags & LOCKLEAF) == 0) {
 		VOP_UNLOCK(tvp);
 	}
 
 	if ((cnp->cn_flags & (WANTPARENT | LOCKPARENT)) == 0) {
 		vput(dvp);
 	} else if ((cnp->cn_flags & LOCKPARENT) == 0) {
 		VOP_UNLOCK(dvp);
 	}
 	return (cache_fpl_handled(fpl));
 }
 
 static int __noinline
 cache_fplookup_dot(struct cache_fpl *fpl)
 {
 	int error;
 
 	MPASS(!seqc_in_modify(fpl->dvp_seqc));
 	/*
 	 * Just re-assign the value. seqc will be checked later for the first
 	 * non-dot path component in line and/or before deciding to return the
 	 * vnode.
 	 */
 	fpl->tvp = fpl->dvp;
 	fpl->tvp_seqc = fpl->dvp_seqc;
 
 	counter_u64_add(dothits, 1);
 	SDT_PROBE3(vfs, namecache, lookup, hit, fpl->dvp, ".", fpl->dvp);
 
 	error = 0;
 	if (cache_fplookup_is_mp(fpl)) {
 		error = cache_fplookup_cross_mount(fpl);
 	}
 	return (error);
 }
 
 static int __noinline
 cache_fplookup_dotdot(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	struct namecache *ncp;
 	struct vnode *dvp;
 	struct prison *pr;
 	u_char nc_flag;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 
 	MPASS(cache_fpl_isdotdot(cnp));
 
 	/*
 	 * XXX this is racy the same way regular lookup is
 	 */
 	for (pr = cnp->cn_cred->cr_prison; pr != NULL;
 	    pr = pr->pr_parent)
 		if (dvp == pr->pr_root)
 			break;
 
 	if (dvp == ndp->ni_rootdir ||
 	    dvp == ndp->ni_topdir ||
 	    dvp == rootvnode ||
 	    pr != NULL) {
 		fpl->tvp = dvp;
 		fpl->tvp_seqc = vn_seqc_read_any(dvp);
 		if (seqc_in_modify(fpl->tvp_seqc)) {
 			return (cache_fpl_aborted(fpl));
 		}
 		return (0);
 	}
 
 	if ((dvp->v_vflag & VV_ROOT) != 0) {
 		/*
 		 * TODO
 		 * The opposite of climb mount is needed here.
 		 */
 		return (cache_fpl_partial(fpl));
 	}
 
 	ncp = atomic_load_consume_ptr(&dvp->v_cache_dd);
 	if (ncp == NULL) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	nc_flag = atomic_load_char(&ncp->nc_flag);
 	if ((nc_flag & NCF_ISDOTDOT) != 0) {
 		if ((nc_flag & NCF_NEGATIVE) != 0)
 			return (cache_fpl_aborted(fpl));
 		fpl->tvp = ncp->nc_vp;
 	} else {
 		fpl->tvp = ncp->nc_dvp;
 	}
 
 	fpl->tvp_seqc = vn_seqc_read_any(fpl->tvp);
 	if (seqc_in_modify(fpl->tvp_seqc)) {
 		return (cache_fpl_partial(fpl));
 	}
 
 	/*
 	 * Acquire fence provided by vn_seqc_read_any above.
 	 */
 	if (__predict_false(atomic_load_ptr(&dvp->v_cache_dd) != ncp)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	if (!cache_ncp_canuse(ncp)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	counter_u64_add(dotdothits, 1);
 	return (0);
 }
 
 static int __noinline
 cache_fplookup_neg(struct cache_fpl *fpl, struct namecache *ncp, uint32_t hash)
 {
 	u_char nc_flag __diagused;
 	bool neg_promote;
 
 #ifdef INVARIANTS
 	nc_flag = atomic_load_char(&ncp->nc_flag);
 	MPASS((nc_flag & NCF_NEGATIVE) != 0);
 #endif
 	/*
 	 * If they want to create an entry we need to replace this one.
 	 */
 	if (__predict_false(fpl->cnp->cn_nameiop != LOOKUP)) {
 		fpl->tvp = NULL;
 		return (cache_fplookup_modifying(fpl));
 	}
 	neg_promote = cache_neg_hit_prep(ncp);
 	if (!cache_fpl_neg_ncp_canuse(ncp)) {
 		cache_neg_hit_abort(ncp);
 		return (cache_fpl_partial(fpl));
 	}
 	if (neg_promote) {
 		return (cache_fplookup_negative_promote(fpl, ncp, hash));
 	}
 	cache_neg_hit_finish(ncp);
 	cache_fpl_smr_exit(fpl);
 	return (cache_fpl_handled_error(fpl, ENOENT));
 }
 
 /*
  * Resolve a symlink. Called by filesystem-specific routines.
  *
  * Code flow is:
  * ... -> cache_fplookup_symlink -> VOP_FPLOOKUP_SYMLINK -> cache_symlink_resolve
  */
 int
 cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	size_t adjust;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 
 	if (__predict_false(len == 0)) {
 		return (ENOENT);
 	}
 
 	if (__predict_false(len > MAXPATHLEN - 2)) {
 		if (cache_fpl_istrailingslash(fpl)) {
 			return (EAGAIN);
 		}
 	}
 
 	ndp->ni_pathlen = fpl->nulchar - cnp->cn_nameptr - cnp->cn_namelen + 1;
 #ifdef INVARIANTS
 	if (ndp->ni_pathlen != fpl->debug.ni_pathlen) {
 		panic("%s: mismatch (%zu != %zu) nulchar %p nameptr %p [%s] ; full string [%s]\n",
 		    __func__, ndp->ni_pathlen, fpl->debug.ni_pathlen, fpl->nulchar,
 		    cnp->cn_nameptr, cnp->cn_nameptr, cnp->cn_pnbuf);
 	}
 #endif
 
 	if (__predict_false(len + ndp->ni_pathlen > MAXPATHLEN)) {
 		return (ENAMETOOLONG);
 	}
 
 	if (__predict_false(ndp->ni_loopcnt++ >= MAXSYMLINKS)) {
 		return (ELOOP);
 	}
 
 	adjust = len;
 	if (ndp->ni_pathlen > 1) {
 		bcopy(ndp->ni_next, cnp->cn_pnbuf + len, ndp->ni_pathlen);
 	} else {
 		if (cache_fpl_istrailingslash(fpl)) {
 			adjust = len + 1;
 			cnp->cn_pnbuf[len] = '/';
 			cnp->cn_pnbuf[len + 1] = '\0';
 		} else {
 			cnp->cn_pnbuf[len] = '\0';
 		}
 	}
 	bcopy(string, cnp->cn_pnbuf, len);
 
 	ndp->ni_pathlen += adjust;
 	cache_fpl_pathlen_add(fpl, adjust);
 	cnp->cn_nameptr = cnp->cn_pnbuf;
 	fpl->nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1];
 	fpl->tvp = NULL;
 	return (0);
 }
 
 static int __noinline
 cache_fplookup_symlink(struct cache_fpl *fpl)
 {
 	struct mount *mp;
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	struct vnode *dvp, *tvp;
 	int error;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 	tvp = fpl->tvp;
 
 	if (cache_fpl_islastcn(ndp)) {
 		if ((cnp->cn_flags & FOLLOW) == 0) {
 			return (cache_fplookup_final(fpl));
 		}
 	}
 
 	mp = atomic_load_ptr(&dvp->v_mount);
 	if (__predict_false(mp == NULL)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	/*
 	 * Note this check races against setting the flag just like regular
 	 * lookup.
 	 */
 	if (__predict_false((mp->mnt_flag & MNT_NOSYMFOLLOW) != 0)) {
 		cache_fpl_smr_exit(fpl);
 		return (cache_fpl_handled_error(fpl, EACCES));
 	}
 
 	error = VOP_FPLOOKUP_SYMLINK(tvp, fpl);
 	if (__predict_false(error != 0)) {
 		switch (error) {
 		case EAGAIN:
 			return (cache_fpl_partial(fpl));
 		case ENOENT:
 		case ENAMETOOLONG:
 		case ELOOP:
 			cache_fpl_smr_exit(fpl);
 			return (cache_fpl_handled_error(fpl, error));
 		default:
 			return (cache_fpl_aborted(fpl));
 		}
 	}
 
 	if (*(cnp->cn_nameptr) == '/') {
 		fpl->dvp = cache_fpl_handle_root(fpl);
 		fpl->dvp_seqc = vn_seqc_read_any(fpl->dvp);
 		if (seqc_in_modify(fpl->dvp_seqc)) {
 			return (cache_fpl_aborted(fpl));
 		}
 		/*
 		 * The main loop assumes that ->dvp points to a vnode belonging
 		 * to a filesystem which can do lockless lookup, but the absolute
 		 * symlink can be wandering off to one which does not.
 		 */
 		mp = atomic_load_ptr(&fpl->dvp->v_mount);
 		if (__predict_false(mp == NULL)) {
 			return (cache_fpl_aborted(fpl));
 		}
 		if (!cache_fplookup_mp_supported(mp)) {
 			cache_fpl_checkpoint(fpl);
 			return (cache_fpl_partial(fpl));
 		}
 	}
 	return (0);
 }
 
 static int
 cache_fplookup_next(struct cache_fpl *fpl)
 {
 	struct componentname *cnp;
 	struct namecache *ncp;
 	struct vnode *dvp, *tvp;
 	u_char nc_flag;
 	uint32_t hash;
 	int error;
 
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 	hash = fpl->hash;
 
 	if (__predict_false(cnp->cn_nameptr[0] == '.')) {
 		if (cnp->cn_namelen == 1) {
 			return (cache_fplookup_dot(fpl));
 		}
 		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
 			return (cache_fplookup_dotdot(fpl));
 		}
 	}
 
 	MPASS(!cache_fpl_isdotdot(cnp));
 
 	CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
 		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
 		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
 			break;
 	}
 
 	if (__predict_false(ncp == NULL)) {
 		return (cache_fplookup_noentry(fpl));
 	}
 
 	tvp = atomic_load_ptr(&ncp->nc_vp);
 	nc_flag = atomic_load_char(&ncp->nc_flag);
 	if ((nc_flag & NCF_NEGATIVE) != 0) {
 		return (cache_fplookup_neg(fpl, ncp, hash));
 	}
 
 	if (!cache_ncp_canuse(ncp)) {
 		return (cache_fpl_partial(fpl));
 	}
 
 	fpl->tvp = tvp;
 	fpl->tvp_seqc = vn_seqc_read_any(tvp);
 	if (seqc_in_modify(fpl->tvp_seqc)) {
 		return (cache_fpl_partial(fpl));
 	}
 
 	counter_u64_add(numposhits, 1);
 	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp);
 
 	error = 0;
 	if (cache_fplookup_is_mp(fpl)) {
 		error = cache_fplookup_cross_mount(fpl);
 	}
 	return (error);
 }
 
 static bool
 cache_fplookup_mp_supported(struct mount *mp)
 {
 
 	MPASS(mp != NULL);
 	if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) == 0)
 		return (false);
 	return (true);
 }
 
 /*
  * Walk up the mount stack (if any).
  *
  * Correctness is provided in the following ways:
  * - all vnodes are protected from freeing with SMR
  * - struct mount objects are type stable making them always safe to access
  * - stability of the particular mount is provided by busying it
  * - relationship between the vnode which is mounted on and the mount is
  *   verified with the vnode sequence counter after busying
  * - association between root vnode of the mount and the mount is protected
  *   by busy
  *
  * From that point on we can read the sequence counter of the root vnode
  * and get the next mount on the stack (if any) using the same protection.
  *
  * By the end of successful walk we are guaranteed the reached state was
  * indeed present at least at some point which matches the regular lookup.
  */
 static int __noinline
 cache_fplookup_climb_mount(struct cache_fpl *fpl)
 {
 	struct mount *mp, *prev_mp;
 	struct mount_pcpu *mpcpu, *prev_mpcpu;
 	struct vnode *vp;
 	seqc_t vp_seqc;
 
 	vp = fpl->tvp;
 	vp_seqc = fpl->tvp_seqc;
 
 	VNPASS(vp->v_type == VDIR || vp->v_type == VBAD, vp);
 	mp = atomic_load_ptr(&vp->v_mountedhere);
 	if (__predict_false(mp == NULL)) {
 		return (0);
 	}
 
 	prev_mp = NULL;
 	for (;;) {
 		if (!vfs_op_thread_enter_crit(mp, mpcpu)) {
 			if (prev_mp != NULL)
 				vfs_op_thread_exit_crit(prev_mp, prev_mpcpu);
 			return (cache_fpl_partial(fpl));
 		}
 		if (prev_mp != NULL)
 			vfs_op_thread_exit_crit(prev_mp, prev_mpcpu);
 		if (!vn_seqc_consistent(vp, vp_seqc)) {
 			vfs_op_thread_exit_crit(mp, mpcpu);
 			return (cache_fpl_partial(fpl));
 		}
 		if (!cache_fplookup_mp_supported(mp)) {
 			vfs_op_thread_exit_crit(mp, mpcpu);
 			return (cache_fpl_partial(fpl));
 		}
 		vp = atomic_load_ptr(&mp->mnt_rootvnode);
 		if (vp == NULL) {
 			vfs_op_thread_exit_crit(mp, mpcpu);
 			return (cache_fpl_partial(fpl));
 		}
 		vp_seqc = vn_seqc_read_any(vp);
 		if (seqc_in_modify(vp_seqc)) {
 			vfs_op_thread_exit_crit(mp, mpcpu);
 			return (cache_fpl_partial(fpl));
 		}
 		prev_mp = mp;
 		prev_mpcpu = mpcpu;
 		mp = atomic_load_ptr(&vp->v_mountedhere);
 		if (mp == NULL)
 			break;
 	}
 
 	vfs_op_thread_exit_crit(prev_mp, prev_mpcpu);
 	fpl->tvp = vp;
 	fpl->tvp_seqc = vp_seqc;
 	return (0);
 }
 
 static int __noinline
 cache_fplookup_cross_mount(struct cache_fpl *fpl)
 {
 	struct mount *mp;
 	struct mount_pcpu *mpcpu;
 	struct vnode *vp;
 	seqc_t vp_seqc;
 
 	vp = fpl->tvp;
 	vp_seqc = fpl->tvp_seqc;
 
 	VNPASS(vp->v_type == VDIR || vp->v_type == VBAD, vp);
 	mp = atomic_load_ptr(&vp->v_mountedhere);
 	if (__predict_false(mp == NULL)) {
 		return (0);
 	}
 
 	if (!vfs_op_thread_enter_crit(mp, mpcpu)) {
 		return (cache_fpl_partial(fpl));
 	}
 	if (!vn_seqc_consistent(vp, vp_seqc)) {
 		vfs_op_thread_exit_crit(mp, mpcpu);
 		return (cache_fpl_partial(fpl));
 	}
 	if (!cache_fplookup_mp_supported(mp)) {
 		vfs_op_thread_exit_crit(mp, mpcpu);
 		return (cache_fpl_partial(fpl));
 	}
 	vp = atomic_load_ptr(&mp->mnt_rootvnode);
 	if (__predict_false(vp == NULL)) {
 		vfs_op_thread_exit_crit(mp, mpcpu);
 		return (cache_fpl_partial(fpl));
 	}
 	vp_seqc = vn_seqc_read_any(vp);
 	vfs_op_thread_exit_crit(mp, mpcpu);
 	if (seqc_in_modify(vp_seqc)) {
 		return (cache_fpl_partial(fpl));
 	}
 	mp = atomic_load_ptr(&vp->v_mountedhere);
 	if (__predict_false(mp != NULL)) {
 		/*
 		 * There are possibly more mount points on top.
 		 * Normally this does not happen so for simplicity just start
 		 * over.
 		 */
 		return (cache_fplookup_climb_mount(fpl));
 	}
 
 	fpl->tvp = vp;
 	fpl->tvp_seqc = vp_seqc;
 	return (0);
 }
 
 /*
  * Check if a vnode is mounted on.
  */
 static bool
 cache_fplookup_is_mp(struct cache_fpl *fpl)
 {
 	struct vnode *vp;
 
 	vp = fpl->tvp;
 	return ((vn_irflag_read(vp) & VIRF_MOUNTPOINT) != 0);
 }
 
 /*
  * Parse the path.
  *
  * The code was originally copy-pasted from regular lookup and despite
  * clean ups leaves performance on the table. Any modifications here
  * must take into account that in case off fallback the resulting
  * nameidata state has to be compatible with the original.
  */
 
 /*
  * Debug ni_pathlen tracking.
  */
 #ifdef INVARIANTS
 static void
 cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n)
 {
 
 	fpl->debug.ni_pathlen += n;
 	KASSERT(fpl->debug.ni_pathlen <= PATH_MAX,
 	    ("%s: pathlen overflow to %zd\n", __func__, fpl->debug.ni_pathlen));
 }
 
 static void
 cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n)
 {
 
 	fpl->debug.ni_pathlen -= n;
 	KASSERT(fpl->debug.ni_pathlen <= PATH_MAX,
 	    ("%s: pathlen underflow to %zd\n", __func__, fpl->debug.ni_pathlen));
 }
 
 static void
 cache_fpl_pathlen_inc(struct cache_fpl *fpl)
 {
 
 	cache_fpl_pathlen_add(fpl, 1);
 }
 
 static void
 cache_fpl_pathlen_dec(struct cache_fpl *fpl)
 {
 
 	cache_fpl_pathlen_sub(fpl, 1);
 }
 #else
 static void
 cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n)
 {
 }
 
 static void
 cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n)
 {
 }
 
 static void
 cache_fpl_pathlen_inc(struct cache_fpl *fpl)
 {
 }
 
 static void
 cache_fpl_pathlen_dec(struct cache_fpl *fpl)
 {
 }
 #endif
 
 static void
 cache_fplookup_parse(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	struct vnode *dvp;
 	char *cp;
 	uint32_t hash;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 
 	/*
 	 * Find the end of this path component, it is either / or nul.
 	 *
 	 * Store / as a temporary sentinel so that we only have one character
 	 * to test for. Pathnames tend to be short so this should not be
 	 * resulting in cache misses.
 	 *
 	 * TODO: fix this to be word-sized.
 	 */
 	MPASS(&cnp->cn_nameptr[fpl->debug.ni_pathlen - 1] >= cnp->cn_pnbuf);
 	KASSERT(&cnp->cn_nameptr[fpl->debug.ni_pathlen - 1] == fpl->nulchar,
 	    ("%s: mismatch between pathlen (%zu) and nulchar (%p != %p), string [%s]\n",
 	    __func__, fpl->debug.ni_pathlen, &cnp->cn_nameptr[fpl->debug.ni_pathlen - 1],
 	    fpl->nulchar, cnp->cn_pnbuf));
 	KASSERT(*fpl->nulchar == '\0',
 	    ("%s: expected nul at %p; string [%s]\n", __func__, fpl->nulchar,
 	    cnp->cn_pnbuf));
 	hash = cache_get_hash_iter_start(dvp);
 	*fpl->nulchar = '/';
 	for (cp = cnp->cn_nameptr; *cp != '/'; cp++) {
 		KASSERT(*cp != '\0',
 		    ("%s: encountered unexpected nul; string [%s]\n", __func__,
 		    cnp->cn_nameptr));
 		hash = cache_get_hash_iter(*cp, hash);
 		continue;
 	}
 	*fpl->nulchar = '\0';
 	fpl->hash = cache_get_hash_iter_finish(hash);
 
 	cnp->cn_namelen = cp - cnp->cn_nameptr;
 	cache_fpl_pathlen_sub(fpl, cnp->cn_namelen);
 
 #ifdef INVARIANTS
 	/*
 	 * cache_get_hash only accepts lengths up to NAME_MAX. This is fine since
 	 * we are going to fail this lookup with ENAMETOOLONG (see below).
 	 */
 	if (cnp->cn_namelen <= NAME_MAX) {
 		if (fpl->hash != cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp)) {
 			panic("%s: mismatched hash for [%s] len %ld", __func__,
 			    cnp->cn_nameptr, cnp->cn_namelen);
 		}
 	}
 #endif
 
 	/*
 	 * Hack: we have to check if the found path component's length exceeds
 	 * NAME_MAX. However, the condition is very rarely true and check can
 	 * be elided in the common case -- if an entry was found in the cache,
 	 * then it could not have been too long to begin with.
 	 */
 	ndp->ni_next = cp;
 }
 
 static void
 cache_fplookup_parse_advance(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 
 	cnp->cn_nameptr = ndp->ni_next;
 	KASSERT(*(cnp->cn_nameptr) == '/',
 	    ("%s: should have seen slash at %p ; buf %p [%s]\n", __func__,
 	    cnp->cn_nameptr, cnp->cn_pnbuf, cnp->cn_pnbuf));
 	cnp->cn_nameptr++;
 	cache_fpl_pathlen_dec(fpl);
 }
 
 /*
  * Skip spurious slashes in a pathname (e.g., "foo///bar") and retry.
  *
  * Lockless lookup tries to elide checking for spurious slashes and should they
  * be present is guaranteed to fail to find an entry. In this case the caller
  * must check if the name starts with a slash and call this routine.  It is
  * going to fast forward across the spurious slashes and set the state up for
  * retry.
  */
 static int __noinline
 cache_fplookup_skip_slashes(struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 
 	MPASS(*(cnp->cn_nameptr) == '/');
 	do {
 		cnp->cn_nameptr++;
 		cache_fpl_pathlen_dec(fpl);
 	} while (*(cnp->cn_nameptr) == '/');
 
 	/*
 	 * Go back to one slash so that cache_fplookup_parse_advance has
 	 * something to skip.
 	 */
 	cnp->cn_nameptr--;
 	cache_fpl_pathlen_inc(fpl);
 
 	/*
 	 * cache_fplookup_parse_advance starts from ndp->ni_next
 	 */
 	ndp->ni_next = cnp->cn_nameptr;
 
 	/*
 	 * See cache_fplookup_dot.
 	 */
 	fpl->tvp = fpl->dvp;
 	fpl->tvp_seqc = fpl->dvp_seqc;
 
 	return (0);
 }
 
 /*
  * Handle trailing slashes (e.g., "foo/").
  *
  * If a trailing slash is found the terminal vnode must be a directory.
  * Regular lookup shortens the path by nulifying the first trailing slash and
  * sets the TRAILINGSLASH flag to denote this took place. There are several
  * checks on it performed later.
  *
  * Similarly to spurious slashes, lockless lookup handles this in a speculative
  * manner relying on an invariant that a non-directory vnode will get a miss.
  * In this case cn_nameptr[0] == '\0' and cn_namelen == 0.
  *
  * Thus for a path like "foo/bar/" the code unwinds the state back to "bar/"
  * and denotes this is the last path component, which avoids looping back.
  *
  * Only plain lookups are supported for now to restrict corner cases to handle.
  */
 static int __noinline
 cache_fplookup_trailingslash(struct cache_fpl *fpl)
 {
 #ifdef INVARIANTS
 	size_t ni_pathlen;
 #endif
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	struct namecache *ncp;
 	struct vnode *tvp;
 	char *cn_nameptr_orig, *cn_nameptr_slash;
 	seqc_t tvp_seqc;
 	u_char nc_flag;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 	tvp = fpl->tvp;
 	tvp_seqc = fpl->tvp_seqc;
 
 	MPASS(fpl->dvp == fpl->tvp);
 	KASSERT(cache_fpl_istrailingslash(fpl),
 	    ("%s: expected trailing slash at %p; string [%s]\n", __func__, fpl->nulchar - 1,
 	    cnp->cn_pnbuf));
 	KASSERT(cnp->cn_nameptr[0] == '\0',
 	    ("%s: expected nul char at %p; string [%s]\n", __func__, &cnp->cn_nameptr[0],
 	    cnp->cn_pnbuf));
 	KASSERT(cnp->cn_namelen == 0,
 	    ("%s: namelen 0 but got %ld; string [%s]\n", __func__, cnp->cn_namelen,
 	    cnp->cn_pnbuf));
 	MPASS(cnp->cn_nameptr > cnp->cn_pnbuf);
 
 	if (cnp->cn_nameiop != LOOKUP) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	if (__predict_false(tvp->v_type != VDIR)) {
 		if (!vn_seqc_consistent(tvp, tvp_seqc)) {
 			return (cache_fpl_aborted(fpl));
 		}
 		cache_fpl_smr_exit(fpl);
 		return (cache_fpl_handled_error(fpl, ENOTDIR));
 	}
 
 	/*
 	 * Denote the last component.
 	 */
 	ndp->ni_next = &cnp->cn_nameptr[0];
 	MPASS(cache_fpl_islastcn(ndp));
 
 	/*
 	 * Unwind trailing slashes.
 	 */
 	cn_nameptr_orig = cnp->cn_nameptr;
 	while (cnp->cn_nameptr >= cnp->cn_pnbuf) {
 		cnp->cn_nameptr--;
 		if (cnp->cn_nameptr[0] != '/') {
 			break;
 		}
 	}
 
 	/*
 	 * Unwind to the beginning of the path component.
 	 *
 	 * Note the path may or may not have started with a slash.
 	 */
 	cn_nameptr_slash = cnp->cn_nameptr;
 	while (cnp->cn_nameptr > cnp->cn_pnbuf) {
 		cnp->cn_nameptr--;
 		if (cnp->cn_nameptr[0] == '/') {
 			break;
 		}
 	}
 	if (cnp->cn_nameptr[0] == '/') {
 		cnp->cn_nameptr++;
 	}
 
 	cnp->cn_namelen = cn_nameptr_slash - cnp->cn_nameptr + 1;
 	cache_fpl_pathlen_add(fpl, cn_nameptr_orig - cnp->cn_nameptr);
 	cache_fpl_checkpoint(fpl);
 
 #ifdef INVARIANTS
 	ni_pathlen = fpl->nulchar - cnp->cn_nameptr + 1;
 	if (ni_pathlen != fpl->debug.ni_pathlen) {
 		panic("%s: mismatch (%zu != %zu) nulchar %p nameptr %p [%s] ; full string [%s]\n",
 		    __func__, ni_pathlen, fpl->debug.ni_pathlen, fpl->nulchar,
 		    cnp->cn_nameptr, cnp->cn_nameptr, cnp->cn_pnbuf);
 	}
 #endif
 
 	/*
 	 * If this was a "./" lookup the parent directory is already correct.
 	 */
 	if (cnp->cn_nameptr[0] == '.' && cnp->cn_namelen == 1) {
 		return (0);
 	}
 
 	/*
 	 * Otherwise we need to look it up.
 	 */
 	tvp = fpl->tvp;
 	ncp = atomic_load_consume_ptr(&tvp->v_cache_dd);
 	if (__predict_false(ncp == NULL)) {
 		return (cache_fpl_aborted(fpl));
 	}
 	nc_flag = atomic_load_char(&ncp->nc_flag);
 	if ((nc_flag & NCF_ISDOTDOT) != 0) {
 		return (cache_fpl_aborted(fpl));
 	}
 	fpl->dvp = ncp->nc_dvp;
 	fpl->dvp_seqc = vn_seqc_read_any(fpl->dvp);
 	if (seqc_in_modify(fpl->dvp_seqc)) {
 		return (cache_fpl_aborted(fpl));
 	}
 	return (0);
 }
 
 /*
  * See the API contract for VOP_FPLOOKUP_VEXEC.
  */
 static int __noinline
 cache_fplookup_failed_vexec(struct cache_fpl *fpl, int error)
 {
 	struct componentname *cnp;
 	struct vnode *dvp;
 	seqc_t dvp_seqc;
 
 	cnp = fpl->cnp;
 	dvp = fpl->dvp;
 	dvp_seqc = fpl->dvp_seqc;
 
 	/*
 	 * Hack: delayed empty path checking.
 	 */
 	if (cnp->cn_pnbuf[0] == '\0') {
 		return (cache_fplookup_emptypath(fpl));
 	}
 
 	/*
 	 * TODO: Due to ignoring trailing slashes lookup will perform a
 	 * permission check on the last dir when it should not be doing it.  It
 	 * may fail, but said failure should be ignored. It is possible to fix
 	 * it up fully without resorting to regular lookup, but for now just
 	 * abort.
 	 */
 	if (cache_fpl_istrailingslash(fpl)) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	/*
 	 * Hack: delayed degenerate path checking.
 	 */
 	if (cnp->cn_nameptr[0] == '\0' && fpl->tvp == NULL) {
 		return (cache_fplookup_degenerate(fpl));
 	}
 
 	/*
 	 * Hack: delayed name len checking.
 	 */
 	if (__predict_false(cnp->cn_namelen > NAME_MAX)) {
 		cache_fpl_smr_exit(fpl);
 		return (cache_fpl_handled_error(fpl, ENAMETOOLONG));
 	}
 
 	/*
 	 * Hack: they may be looking up foo/bar, where foo is not a directory.
 	 * In such a case we need to return ENOTDIR, but we may happen to get
 	 * here with a different error.
 	 */
 	if (dvp->v_type != VDIR) {
 		error = ENOTDIR;
 	}
 
 	/*
 	 * Hack: handle O_SEARCH.
 	 *
 	 * Open Group Base Specifications Issue 7, 2018 edition states:
 	 * <quote>
 	 * If the access mode of the open file description associated with the
 	 * file descriptor is not O_SEARCH, the function shall check whether
 	 * directory searches are permitted using the current permissions of
 	 * the directory underlying the file descriptor. If the access mode is
 	 * O_SEARCH, the function shall not perform the check.
 	 * </quote>
 	 *
 	 * Regular lookup tests for the NOEXECCHECK flag for every path
 	 * component to decide whether to do the permission check. However,
 	 * since most lookups never have the flag (and when they do it is only
 	 * present for the first path component), lockless lookup only acts on
 	 * it if there is a permission problem. Here the flag is represented
 	 * with a boolean so that we don't have to clear it on the way out.
 	 *
 	 * For simplicity this always aborts.
 	 * TODO: check if this is the first lookup and ignore the permission
 	 * problem. Note the flag has to survive fallback (if it happens to be
 	 * performed).
 	 */
 	if (fpl->fsearch) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	switch (error) {
 	case EAGAIN:
 		if (!vn_seqc_consistent(dvp, dvp_seqc)) {
 			error = cache_fpl_aborted(fpl);
 		} else {
 			cache_fpl_partial(fpl);
 		}
 		break;
 	default:
 		if (!vn_seqc_consistent(dvp, dvp_seqc)) {
 			error = cache_fpl_aborted(fpl);
 		} else {
 			cache_fpl_smr_exit(fpl);
 			cache_fpl_handled_error(fpl, error);
 		}
 		break;
 	}
 	return (error);
 }
 
 static int
 cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
 	struct mount *mp;
 	int error;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 
 	cache_fpl_checkpoint(fpl);
 
 	/*
 	 * The vnode at hand is almost always stable, skip checking for it.
 	 * Worst case this postpones the check towards the end of the iteration
 	 * of the main loop.
 	 */
 	fpl->dvp = dvp;
 	fpl->dvp_seqc = vn_seqc_read_notmodify(fpl->dvp);
 
 	mp = atomic_load_ptr(&dvp->v_mount);
 	if (__predict_false(mp == NULL || !cache_fplookup_mp_supported(mp))) {
 		return (cache_fpl_aborted(fpl));
 	}
 
 	MPASS(fpl->tvp == NULL);
 
 	for (;;) {
 		cache_fplookup_parse(fpl);
 
 		error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred);
 		if (__predict_false(error != 0)) {
 			error = cache_fplookup_failed_vexec(fpl, error);
 			break;
 		}
 
 		error = cache_fplookup_next(fpl);
 		if (__predict_false(cache_fpl_terminated(fpl))) {
 			break;
 		}
 
 		VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
 
 		if (fpl->tvp->v_type == VLNK) {
 			error = cache_fplookup_symlink(fpl);
 			if (cache_fpl_terminated(fpl)) {
 				break;
 			}
 		} else {
 			if (cache_fpl_islastcn(ndp)) {
 				error = cache_fplookup_final(fpl);
 				break;
 			}
 
 			if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) {
 				error = cache_fpl_aborted(fpl);
 				break;
 			}
 
 			fpl->dvp = fpl->tvp;
 			fpl->dvp_seqc = fpl->tvp_seqc;
 			cache_fplookup_parse_advance(fpl);
 		}
 
 		cache_fpl_checkpoint(fpl);
 	}
 
 	return (error);
 }
 
 /*
  * Fast path lookup protected with SMR and sequence counters.
  *
  * Note: all VOP_FPLOOKUP_VEXEC routines have a comment referencing this one.
  *
  * Filesystems can opt in by setting the MNTK_FPLOOKUP flag and meeting criteria
  * outlined below.
  *
  * Traditional vnode lookup conceptually looks like this:
  *
  * vn_lock(current);
  * for (;;) {
  *	next = find();
  *	vn_lock(next);
  *	vn_unlock(current);
  *	current = next;
  *	if (last)
  *	    break;
  * }
  * return (current);
  *
  * Each jump to the next vnode is safe memory-wise and atomic with respect to
  * any modifications thanks to holding respective locks.
  *
  * The same guarantee can be provided with a combination of safe memory
  * reclamation and sequence counters instead. If all operations which affect
  * the relationship between the current vnode and the one we are looking for
  * also modify the counter, we can verify whether all the conditions held as
  * we made the jump. This includes things like permissions, mount points etc.
  * Counter modification is provided by enclosing relevant places in
  * vn_seqc_write_begin()/end() calls.
  *
  * Thus this translates to:
  *
  * vfs_smr_enter();
  * dvp_seqc = seqc_read_any(dvp);
  * if (seqc_in_modify(dvp_seqc)) // someone is altering the vnode
  *     abort();
  * for (;;) {
  * 	tvp = find();
  * 	tvp_seqc = seqc_read_any(tvp);
  * 	if (seqc_in_modify(tvp_seqc)) // someone is altering the target vnode
  * 	    abort();
  * 	if (!seqc_consistent(dvp, dvp_seqc) // someone is altering the vnode
  * 	    abort();
  * 	dvp = tvp; // we know nothing of importance has changed
  * 	dvp_seqc = tvp_seqc; // store the counter for the tvp iteration
  * 	if (last)
  * 	    break;
  * }
  * vget(); // secure the vnode
  * if (!seqc_consistent(tvp, tvp_seqc) // final check
  * 	    abort();
  * // at this point we know nothing has changed for any parent<->child pair
  * // as they were crossed during the lookup, meaning we matched the guarantee
  * // of the locked variant
  * return (tvp);
  *
  * The API contract for VOP_FPLOOKUP_VEXEC routines is as follows:
  * - they are called while within vfs_smr protection which they must never exit
  * - EAGAIN can be returned to denote checking could not be performed, it is
  *   always valid to return it
  * - if the sequence counter has not changed the result must be valid
  * - if the sequence counter has changed both false positives and false negatives
  *   are permitted (since the result will be rejected later)
  * - for simple cases of unix permission checks vaccess_vexec_smr can be used
  *
  * Caveats to watch out for:
  * - vnodes are passed unlocked and unreferenced with nothing stopping
  *   VOP_RECLAIM, in turn meaning that ->v_data can become NULL. It is advised
  *   to use atomic_load_ptr to fetch it.
  * - the aforementioned object can also get freed, meaning absent other means it
  *   should be protected with vfs_smr
  * - either safely checking permissions as they are modified or guaranteeing
  *   their stability is left to the routine
  */
 int
 cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
     struct pwd **pwdp)
 {
 	struct cache_fpl fpl;
 	struct pwd *pwd;
 	struct vnode *dvp;
 	struct componentname *cnp;
 	int error;
 
 	fpl.status = CACHE_FPL_STATUS_UNSET;
 	fpl.in_smr = false;
 	fpl.ndp = ndp;
 	fpl.cnp = cnp = &ndp->ni_cnd;
 	MPASS(ndp->ni_lcf == 0);
 	KASSERT ((cnp->cn_flags & CACHE_FPL_INTERNAL_CN_FLAGS) == 0,
 	    ("%s: internal flags found in cn_flags %" PRIx64, __func__,
 	    cnp->cn_flags));
 	if ((cnp->cn_flags & SAVESTART) != 0) {
 		MPASS(cnp->cn_nameiop != LOOKUP);
 	}
 	MPASS(cnp->cn_nameptr == cnp->cn_pnbuf);
 
 	if (__predict_false(!cache_can_fplookup(&fpl))) {
 		*status = fpl.status;
 		SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
 		return (EOPNOTSUPP);
 	}
 
 	cache_fpl_checkpoint_outer(&fpl);
 
 	cache_fpl_smr_enter_initial(&fpl);
 #ifdef INVARIANTS
 	fpl.debug.ni_pathlen = ndp->ni_pathlen;
 #endif
 	fpl.nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1];
 	fpl.fsearch = false;
-	fpl.savename = (cnp->cn_flags & SAVENAME) != 0;
 	fpl.tvp = NULL; /* for degenerate path handling */
 	fpl.pwd = pwdp;
 	pwd = pwd_get_smr();
 	*(fpl.pwd) = pwd;
 	ndp->ni_rootdir = pwd->pwd_rdir;
 	ndp->ni_topdir = pwd->pwd_jdir;
 
 	if (cnp->cn_pnbuf[0] == '/') {
 		dvp = cache_fpl_handle_root(&fpl);
 		MPASS(ndp->ni_resflags == 0);
 		ndp->ni_resflags = NIRES_ABS;
 	} else {
 		if (ndp->ni_dirfd == AT_FDCWD) {
 			dvp = pwd->pwd_cdir;
 		} else {
 			error = cache_fplookup_dirfd(&fpl, &dvp);
 			if (__predict_false(error != 0)) {
 				goto out;
 			}
 		}
 	}
 
 	SDT_PROBE4(vfs, namei, lookup, entry, dvp, cnp->cn_pnbuf, cnp->cn_flags, true);
 	error = cache_fplookup_impl(dvp, &fpl);
 out:
 	cache_fpl_smr_assert_not_entered(&fpl);
 	cache_fpl_assert_status(&fpl);
 	*status = fpl.status;
 	if (SDT_PROBES_ENABLED()) {
 		SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
 		if (fpl.status == CACHE_FPL_STATUS_HANDLED)
 			SDT_PROBE4(vfs, namei, lookup, return, error, ndp->ni_vp, true,
 			    ndp);
 	}
 
 	if (__predict_true(fpl.status == CACHE_FPL_STATUS_HANDLED)) {
 		MPASS(error != CACHE_FPL_FAILED);
 		if (error != 0) {
+			cache_fpl_cleanup_cnp(fpl.cnp);
 			MPASS(fpl.dvp == NULL);
 			MPASS(fpl.tvp == NULL);
-			MPASS(fpl.savename == false);
 		}
 		ndp->ni_dvp = fpl.dvp;
 		ndp->ni_vp = fpl.tvp;
-		if (fpl.savename) {
-			cnp->cn_flags |= HASBUF;
-		} else {
-			cache_fpl_cleanup_cnp(cnp);
-		}
 	}
 	return (error);
 }
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 79c7fd8365fa..7fac64ff1b38 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -1,1810 +1,1705 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_lookup.c	8.4 (Berkeley) 2/16/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/dirent.h>
 #include <sys/kernel.h>
 #include <sys/capsicum.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/filedesc.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 #ifdef INVARIANTS
 #include <machine/_inttypes.h>
 #endif
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 #define	NAMEI_DIAGNOSTIC 1
 #undef NAMEI_DIAGNOSTIC
 
 #ifdef INVARIANTS
-static void NDVALIDATE(struct nameidata *);
-#else
-#define NDVALIDATE(ndp) do { } while (0)
+static void NDVALIDATE_impl(struct nameidata *, int);
 #endif
 
+#define NDVALIDATE(ndp) NDVALIDATE_impl(ndp, __LINE__)
+
+
 SDT_PROVIDER_DEFINE(vfs);
 SDT_PROBE_DEFINE4(vfs, namei, lookup, entry, "struct vnode *", "char *",
     "unsigned long", "bool");
 SDT_PROBE_DEFINE4(vfs, namei, lookup, return, "int", "struct vnode *", "bool",
     "struct nameidata");
 
 /* Allocation zone for namei. */
 uma_zone_t namei_zone;
 
 /* Placeholder vnode for mp traversal. */
 static struct vnode *vp_crossmp;
 
 static int
 crossmp_vop_islocked(struct vop_islocked_args *ap)
 {
 
 	return (LK_SHARED);
 }
 
 static int
 crossmp_vop_lock1(struct vop_lock1_args *ap)
 {
 	struct vnode *vp;
 	struct lock *lk __diagused;
 	const char *file __witness_used;
 	int flags, line __witness_used;
 
 	vp = ap->a_vp;
 	lk = vp->v_vnlock;
 	flags = ap->a_flags;
 	file = ap->a_file;
 	line = ap->a_line;
 
 	if ((flags & LK_SHARED) == 0)
 		panic("invalid lock request for crossmp");
 
 	WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line,
 	    flags & LK_INTERLOCK ? &VI_MTX(vp)->lock_object : NULL);
 	WITNESS_LOCK(&lk->lock_object, 0, file, line);
 	if ((flags & LK_INTERLOCK) != 0)
 		VI_UNLOCK(vp);
 	LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, ap->a_file, ap->a_line);
 	return (0);
 }
 
 static int
 crossmp_vop_unlock(struct vop_unlock_args *ap)
 {
 	struct vnode *vp;
 	struct lock *lk __diagused;
 
 	vp = ap->a_vp;
 	lk = vp->v_vnlock;
 
 	WITNESS_UNLOCK(&lk->lock_object, 0, LOCK_FILE, LOCK_LINE);
 	LOCK_LOG_LOCK("SUNLOCK", &lk->lock_object, 0, 0, LOCK_FILE,
 	    LOCK_LINE);
 	return (0);
 }
 
 static struct vop_vector crossmp_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_islocked =		crossmp_vop_islocked,
 	.vop_lock1 =		crossmp_vop_lock1,
 	.vop_unlock =		crossmp_vop_unlock,
 };
 /*
  * VFS_VOP_VECTOR_REGISTER(crossmp_vnodeops) is not used here since the vnode
  * gets allocated early. See nameiinit for the direct call below.
  */
 
 struct nameicap_tracker {
 	struct vnode *dp;
 	TAILQ_ENTRY(nameicap_tracker) nm_link;
 };
 
 /* Zone for cap mode tracker elements used for dotdot capability checks. */
 MALLOC_DEFINE(M_NAMEITRACKER, "namei_tracker", "namei tracking for dotdot");
 
 static void
 nameiinit(void *dummy __unused)
 {
 
 	namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL,
 	    UMA_ALIGN_PTR, 0);
 	vfs_vector_op_register(&crossmp_vnodeops);
 	getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp);
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);
 
 static int lookup_cap_dotdot = 1;
 SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot, CTLFLAG_RWTUN,
     &lookup_cap_dotdot, 0,
     "enables \"..\" components in path lookup in capability mode");
 static int lookup_cap_dotdot_nonlocal = 1;
 SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN,
     &lookup_cap_dotdot_nonlocal, 0,
     "enables \"..\" components in path lookup in capability mode "
     "on non-local mount");
 
 static void
 nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp)
 {
 	struct nameicap_tracker *nt;
 
 	if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR)
 		return;
 	nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head);
 	if (nt != NULL && nt->dp == dp)
 		return;
 	nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK);
 	vhold(dp);
 	nt->dp = dp;
 	TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link);
 }
 
 static void
 nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first)
 {
 	struct nameicap_tracker *nt, *nt1;
 
 	nt = first;
 	TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) {
 		TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link);
 		vdrop(nt->dp);
 		free(nt, M_NAMEITRACKER);
 	}
 }
 
 static void
 nameicap_cleanup(struct nameidata *ndp)
 {
 	KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) ||
 	    (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative"));
 	nameicap_cleanup_from(ndp, NULL);
 }
 
 /*
  * For dotdot lookups in capability mode, only allow the component
  * lookup to succeed if the resulting directory was already traversed
  * during the operation.  This catches situations where already
  * traversed directory is moved to different parent, and then we walk
  * over it with dotdots.
  *
  * Also allow to force failure of dotdot lookups for non-local
  * filesystems, where external agents might assist local lookups to
  * escape the compartment.
  */
 static int
 nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
 {
 	struct nameicap_tracker *nt;
 	struct mount *mp;
 
 	if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf &
 	    NI_LCF_STRICTRELATIVE) == 0)
 		return (0);
 	if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0)
 		return (ENOTCAPABLE);
 	mp = dp->v_mount;
 	if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL &&
 	    (mp->mnt_flag & MNT_LOCAL) == 0)
 		return (ENOTCAPABLE);
 	TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head,
 	    nm_link) {
 		if (dp == nt->dp) {
 			nt = TAILQ_NEXT(nt, nm_link);
 			if (nt != NULL)
 				nameicap_cleanup_from(ndp, nt);
 			return (0);
 		}
 	}
 	return (ENOTCAPABLE);
 }
 
 static void
 namei_cleanup_cnp(struct componentname *cnp)
 {
 
 	uma_zfree(namei_zone, cnp->cn_pnbuf);
-#ifdef DIAGNOSTIC
 	cnp->cn_pnbuf = NULL;
 	cnp->cn_nameptr = NULL;
-#endif
 }
 
 static int
 namei_handle_root(struct nameidata *ndp, struct vnode **dpp)
 {
 	struct componentname *cnp;
 
 	cnp = &ndp->ni_cnd;
 	if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0) {
 #ifdef KTRACE
 		if (KTRPOINT(curthread, KTR_CAPFAIL))
 			ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 #endif
 		return (ENOTCAPABLE);
 	}
 	while (*(cnp->cn_nameptr) == '/') {
 		cnp->cn_nameptr++;
 		ndp->ni_pathlen--;
 	}
 	*dpp = ndp->ni_rootdir;
 	vrefact(*dpp);
 	return (0);
 }
 
 static int
 namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
 {
 	struct componentname *cnp;
 	struct thread *td;
 	struct pwd *pwd;
 	int error;
 	bool startdir_used;
 
 	cnp = &ndp->ni_cnd;
 	td = curthread;
 
 	startdir_used = false;
 	*pwdp = NULL;
 	*dpp = NULL;
 
 #ifdef CAPABILITY_MODE
 	/*
 	 * In capability mode, lookups must be restricted to happen in
 	 * the subtree with the root specified by the file descriptor:
 	 * - The root must be real file descriptor, not the pseudo-descriptor
 	 *   AT_FDCWD.
 	 * - The passed path must be relative and not absolute.
 	 * - If lookup_cap_dotdot is disabled, path must not contain the
 	 *   '..' components.
 	 * - If lookup_cap_dotdot is enabled, we verify that all '..'
 	 *   components lookups result in the directories which were
 	 *   previously walked by us, which prevents an escape from
 	 *   the relative root.
 	 */
 	if (IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) {
 		ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
 		ndp->ni_resflags |= NIRES_STRICTREL;
 		if (ndp->ni_dirfd == AT_FDCWD) {
 #ifdef KTRACE
 			if (KTRPOINT(td, KTR_CAPFAIL))
 				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 #endif
 			return (ECAPMODE);
 		}
 	}
 #endif
 	error = 0;
 
 	/*
 	 * Get starting point for the translation.
 	 */
 	pwd = pwd_hold(td);
 	/*
 	 * The reference on ni_rootdir is acquired in the block below to avoid
 	 * back-to-back atomics for absolute lookups.
 	 */
 	ndp->ni_rootdir = pwd->pwd_rdir;
 	ndp->ni_topdir = pwd->pwd_jdir;
 
 	if (cnp->cn_pnbuf[0] == '/') {
 		ndp->ni_resflags |= NIRES_ABS;
 		error = namei_handle_root(ndp, dpp);
 	} else {
 		if (ndp->ni_startdir != NULL) {
 			*dpp = ndp->ni_startdir;
 			startdir_used = true;
 		} else if (ndp->ni_dirfd == AT_FDCWD) {
 			*dpp = pwd->pwd_cdir;
 			vrefact(*dpp);
 		} else {
 			if (cnp->cn_flags & AUDITVNODE1)
 				AUDIT_ARG_ATFD1(ndp->ni_dirfd);
 			if (cnp->cn_flags & AUDITVNODE2)
 				AUDIT_ARG_ATFD2(ndp->ni_dirfd);
 
 			error = fgetvp_lookup(ndp->ni_dirfd, ndp, dpp);
 		}
 		if (error == 0 && (*dpp)->v_type != VDIR &&
 		    (cnp->cn_pnbuf[0] != '\0' ||
 		    (cnp->cn_flags & EMPTYPATH) == 0))
 			error = ENOTDIR;
 	}
 	if (error == 0 && (cnp->cn_flags & RBENEATH) != 0) {
 		if (cnp->cn_pnbuf[0] == '/') {
 			error = ENOTCAPABLE;
 		} else if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0) {
 			ndp->ni_lcf |= NI_LCF_STRICTRELATIVE |
 			    NI_LCF_CAP_DOTDOT;
 		}
 	}
 
 	/*
 	 * If we are auditing the kernel pathname, save the user pathname.
 	 */
 	if (cnp->cn_flags & AUDITVNODE1)
 		AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf);
 	if (cnp->cn_flags & AUDITVNODE2)
 		AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf);
 	if (ndp->ni_startdir != NULL && !startdir_used)
 		vrele(ndp->ni_startdir);
 	if (error != 0) {
 		if (*dpp != NULL)
 			vrele(*dpp);
 		pwd_drop(pwd);
 		return (error);
 	}
 	if ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) != 0 &&
 	    lookup_cap_dotdot != 0)
 		ndp->ni_lcf |= NI_LCF_CAP_DOTDOT;
 	SDT_PROBE4(vfs, namei, lookup, entry, *dpp, cnp->cn_pnbuf,
 	    cnp->cn_flags, false);
 	*pwdp = pwd;
 	return (0);
 }
 
 static int
 namei_getpath(struct nameidata *ndp)
 {
 	struct componentname *cnp;
 	int error;
 
 	cnp = &ndp->ni_cnd;
 
 	/*
 	 * Get a buffer for the name to be translated, and copy the
 	 * name into the buffer.
 	 */
 	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
 	if (ndp->ni_segflg == UIO_SYSSPACE) {
 		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
 		    &ndp->ni_pathlen);
 	} else {
 		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
 		    &ndp->ni_pathlen);
 	}
 
 	return (error);
 }
 
 static int
 namei_emptypath(struct nameidata *ndp)
 {
 	struct componentname *cnp;
 	struct pwd *pwd;
 	struct vnode *dp;
 	int error;
 
 	cnp = &ndp->ni_cnd;
 	MPASS(*cnp->cn_pnbuf == '\0');
 	MPASS((cnp->cn_flags & EMPTYPATH) != 0);
 	MPASS((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) == 0);
 
 	ndp->ni_resflags |= NIRES_EMPTYPATH;
 	error = namei_setup(ndp, &dp, &pwd);
 	if (error != 0) {
-		namei_cleanup_cnp(cnp);
 		goto errout;
 	}
 
 	/*
 	 * Usecount on dp already provided by namei_setup.
 	 */
 	ndp->ni_vp = dp;
-	namei_cleanup_cnp(cnp);
 	pwd_drop(pwd);
 	NDVALIDATE(ndp);
 	if ((cnp->cn_flags & LOCKLEAF) != 0) {
 		VOP_LOCK(dp, (cnp->cn_flags & LOCKSHARED) != 0 ?
 		    LK_SHARED : LK_EXCLUSIVE);
 		if (VN_IS_DOOMED(dp)) {
 			vput(dp);
 			error = ENOENT;
 			goto errout;
 		}
 	}
 	SDT_PROBE4(vfs, namei, lookup, return, 0, ndp->ni_vp, false, ndp);
 	return (0);
 
 errout:
 	SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp);
+	namei_cleanup_cnp(cnp);
 	return (error);
 }
 
 static int __noinline
 namei_follow_link(struct nameidata *ndp)
 {
 	char *cp;
 	struct iovec aiov;
 	struct uio auio;
 	struct componentname *cnp;
 	struct thread *td;
 	int error, linklen;
 
 	error = 0;
 	cnp = &ndp->ni_cnd;
 	td = curthread;
 
 	if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
 		error = ELOOP;
 		goto out;
 	}
 #ifdef MAC
 	if ((cnp->cn_flags & NOMACCHECK) == 0) {
 		error = mac_vnode_check_readlink(td->td_ucred, ndp->ni_vp);
 		if (error != 0)
 			goto out;
 	}
 #endif
 	if (ndp->ni_pathlen > 1)
 		cp = uma_zalloc(namei_zone, M_WAITOK);
 	else
 		cp = cnp->cn_pnbuf;
 	aiov.iov_base = cp;
 	aiov.iov_len = MAXPATHLEN;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = 0;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_td = td;
 	auio.uio_resid = MAXPATHLEN;
 	error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
 	if (error != 0) {
 		if (ndp->ni_pathlen > 1)
 			uma_zfree(namei_zone, cp);
 		goto out;
 	}
 	linklen = MAXPATHLEN - auio.uio_resid;
 	if (linklen == 0) {
 		if (ndp->ni_pathlen > 1)
 			uma_zfree(namei_zone, cp);
 		error = ENOENT;
 		goto out;
 	}
 	if (linklen + ndp->ni_pathlen > MAXPATHLEN) {
 		if (ndp->ni_pathlen > 1)
 			uma_zfree(namei_zone, cp);
 		error = ENAMETOOLONG;
 		goto out;
 	}
 	if (ndp->ni_pathlen > 1) {
 		bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
 		uma_zfree(namei_zone, cnp->cn_pnbuf);
 		cnp->cn_pnbuf = cp;
 	} else
 		cnp->cn_pnbuf[linklen] = '\0';
 	ndp->ni_pathlen += linklen;
 out:
 	return (error);
 }
 
 /*
  * Convert a pathname into a pointer to a locked vnode.
  *
  * The FOLLOW flag is set when symbolic links are to be followed
  * when they occur at the end of the name translation process.
  * Symbolic links are always followed for all other pathname
  * components other than the last.
  *
  * The segflg defines whether the name is to be copied from user
  * space or kernel space.
  *
  * Overall outline of namei:
  *
  *	copy in name
  *	get starting directory
  *	while (!done && !error) {
  *		call lookup to search path.
  *		if symbolic link, massage name in buffer and continue
  *	}
  */
 int
 namei(struct nameidata *ndp)
 {
 	struct vnode *dp;	/* the directory we are searching */
 	struct componentname *cnp;
 	struct thread *td;
 	struct pwd *pwd;
 	int error;
 	enum cache_fpl_status status;
 
 	cnp = &ndp->ni_cnd;
 	td = curthread;
 #ifdef INVARIANTS
 	KASSERT((ndp->ni_debugflags & NAMEI_DBG_CALLED) == 0,
 	    ("%s: repeated call to namei without NDREINIT", __func__));
 	KASSERT(ndp->ni_debugflags == NAMEI_DBG_INITED,
 	    ("%s: bad debugflags %d", __func__, ndp->ni_debugflags));
 	ndp->ni_debugflags |= NAMEI_DBG_CALLED;
 	if (ndp->ni_startdir != NULL)
 		ndp->ni_debugflags |= NAMEI_DBG_HADSTARTDIR;
 	if (cnp->cn_flags & FAILIFEXISTS) {
 		KASSERT(cnp->cn_nameiop == CREATE,
 		    ("%s: FAILIFEXISTS passed for op %d", __func__, cnp->cn_nameiop));
 		/*
 		 * The limitation below is to restrict hairy corner cases.
 		 */
 		KASSERT((cnp->cn_flags & (LOCKPARENT | LOCKLEAF)) == LOCKPARENT,
 		    ("%s: FAILIFEXISTS must be passed with LOCKPARENT and without LOCKLEAF",
 		    __func__));
 	}
-	/*
-	 * For NDVALIDATE.
-	 *
-	 * While NDINIT may seem like a more natural place to do it, there are
-	 * callers which directly modify flags past invoking init.
-	 */
-	cnp->cn_origflags = cnp->cn_flags;
 #endif
 	ndp->ni_cnd.cn_cred = td->td_ucred;
 	KASSERT(ndp->ni_resflags == 0, ("%s: garbage in ni_resflags: %x\n",
 	    __func__, ndp->ni_resflags));
 	KASSERT(cnp->cn_cred && td->td_proc, ("namei: bad cred/proc"));
 	KASSERT((cnp->cn_flags & NAMEI_INTERNAL_FLAGS) == 0,
 	    ("namei: unexpected flags: %" PRIx64 "\n",
 	    cnp->cn_flags & NAMEI_INTERNAL_FLAGS));
 	if (cnp->cn_flags & NOCACHE)
 		KASSERT(cnp->cn_nameiop != LOOKUP,
 		    ("%s: NOCACHE passed with LOOKUP", __func__));
 	MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
 	    ndp->ni_startdir->v_type == VBAD);
 
 	ndp->ni_lcf = 0;
 	ndp->ni_loopcnt = 0;
 	ndp->ni_vp = NULL;
 
 	error = namei_getpath(ndp);
 	if (__predict_false(error != 0)) {
 		namei_cleanup_cnp(cnp);
 		SDT_PROBE4(vfs, namei, lookup, return, error, NULL,
 		    false, ndp);
 		return (error);
 	}
 
 	cnp->cn_nameptr = cnp->cn_pnbuf;
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_NAMEI)) {
 		ktrnamei(cnp->cn_pnbuf);
 	}
 #endif
 	TSNAMEI(curthread->td_proc->p_pid, cnp->cn_pnbuf);
 
 	/*
 	 * First try looking up the target without locking any vnodes.
 	 *
 	 * We may need to start from scratch or pick up where it left off.
 	 */
 	error = cache_fplookup(ndp, &status, &pwd);
 	switch (status) {
 	case CACHE_FPL_STATUS_UNSET:
 		__assert_unreachable();
 		break;
 	case CACHE_FPL_STATUS_HANDLED:
 		if (error == 0)
 			NDVALIDATE(ndp);
 		return (error);
 	case CACHE_FPL_STATUS_PARTIAL:
 		TAILQ_INIT(&ndp->ni_cap_tracker);
 		dp = ndp->ni_startdir;
 		break;
 	case CACHE_FPL_STATUS_DESTROYED:
 		ndp->ni_loopcnt = 0;
 		error = namei_getpath(ndp);
 		if (__predict_false(error != 0)) {
 			namei_cleanup_cnp(cnp);
 			return (error);
 		}
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		/* FALLTHROUGH */
 	case CACHE_FPL_STATUS_ABORTED:
 		TAILQ_INIT(&ndp->ni_cap_tracker);
 		MPASS(ndp->ni_lcf == 0);
 		if (*cnp->cn_pnbuf == '\0') {
 			if ((cnp->cn_flags & EMPTYPATH) != 0) {
 				return (namei_emptypath(ndp));
 			}
 			namei_cleanup_cnp(cnp);
 			SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL,
 			    false, ndp);
 			return (ENOENT);
 		}
 		error = namei_setup(ndp, &dp, &pwd);
 		if (error != 0) {
 			namei_cleanup_cnp(cnp);
 			return (error);
 		}
 		break;
 	}
 
 	/*
 	 * Locked lookup.
 	 */
 	for (;;) {
 		ndp->ni_startdir = dp;
 		error = vfs_lookup(ndp);
 		if (error != 0)
 			goto out;
 
 		/*
 		 * If not a symbolic link, we're done.
 		 */
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
 			SDT_PROBE4(vfs, namei, lookup, return, error,
 			    ndp->ni_vp, false, ndp);
-			if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) {
-				namei_cleanup_cnp(cnp);
-			} else
-				cnp->cn_flags |= HASBUF;
 			nameicap_cleanup(ndp);
 			pwd_drop(pwd);
 			NDVALIDATE(ndp);
 			return (0);
 		}
 		error = namei_follow_link(ndp);
 		if (error != 0)
 			break;
 		vput(ndp->ni_vp);
 		dp = ndp->ni_dvp;
 		/*
 		 * Check if root directory should replace current directory.
 		 */
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		if (*(cnp->cn_nameptr) == '/') {
 			vrele(dp);
 			error = namei_handle_root(ndp, &dp);
 			if (error != 0)
 				goto out;
 		}
 	}
 	vput(ndp->ni_vp);
 	ndp->ni_vp = NULL;
 	vrele(ndp->ni_dvp);
 out:
 	MPASS(error != 0);
 	SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp);
 	namei_cleanup_cnp(cnp);
 	nameicap_cleanup(ndp);
 	pwd_drop(pwd);
 	return (error);
 }
 
 static int
 compute_cn_lkflags(struct mount *mp, int lkflags, int cnflags)
 {
 
 	if (mp == NULL || ((lkflags & LK_SHARED) &&
 	    !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) {
 		lkflags &= ~LK_SHARED;
 		lkflags |= LK_EXCLUSIVE;
 	}
 	lkflags |= LK_NODDLKTREAT;
 	return (lkflags);
 }
 
 static __inline int
 needs_exclusive_leaf(struct mount *mp, int flags)
 {
 
 	/*
 	 * Intermediate nodes can use shared locks, we only need to
 	 * force an exclusive lock for leaf nodes.
 	 */
 	if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF))
 		return (0);
 
 	/* Always use exclusive locks if LOCKSHARED isn't set. */
 	if (!(flags & LOCKSHARED))
 		return (1);
 
 	/*
 	 * For lookups during open(), if the mount point supports
 	 * extended shared operations, then use a shared lock for the
 	 * leaf node, otherwise use an exclusive lock.
 	 */
 	if ((flags & ISOPEN) != 0)
 		return (!MNT_EXTENDED_SHARED(mp));
 
 	/*
 	 * Lookup requests outside of open() that specify LOCKSHARED
 	 * only need a shared lock on the leaf vnode.
 	 */
 	return (0);
 }
 
 /*
  * Various filesystems expect to be able to copy a name component with length
  * bounded by NAME_MAX into a directory entry buffer of size MAXNAMLEN.  Make
  * sure that these are the same size.
  */
 _Static_assert(MAXNAMLEN == NAME_MAX,
     "MAXNAMLEN and NAME_MAX have different values");
 
 static int __noinline
 vfs_lookup_degenerate(struct nameidata *ndp, struct vnode *dp, int wantparent)
 {
 	struct componentname *cnp;
 	struct mount *mp;
 	int error;
 
 	cnp = &ndp->ni_cnd;
 
 	cnp->cn_flags |= ISLASTCN;
 
 	mp = atomic_load_ptr(&dp->v_mount);
 	if (needs_exclusive_leaf(mp, cnp->cn_flags)) {
 		cnp->cn_lkflags &= ~LK_SHARED;
 		cnp->cn_lkflags |= LK_EXCLUSIVE;
 	}
 
 	vn_lock(dp,
 	    compute_cn_lkflags(mp, cnp->cn_lkflags | LK_RETRY,
 	    cnp->cn_flags));
 
 	if (dp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto bad;
 	}
 	if (cnp->cn_nameiop != LOOKUP) {
 		error = EISDIR;
 		goto bad;
 	}
 	if (wantparent) {
 		ndp->ni_dvp = dp;
 		VREF(dp);
 	}
 	ndp->ni_vp = dp;
 	cnp->cn_namelen = 0;
 
 	if (cnp->cn_flags & AUDITVNODE1)
 		AUDIT_ARG_VNODE1(dp);
 	else if (cnp->cn_flags & AUDITVNODE2)
 		AUDIT_ARG_VNODE2(dp);
 
 	if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF)))
 		VOP_UNLOCK(dp);
 	/* XXX This should probably move to the top of function. */
 	if (cnp->cn_flags & SAVESTART)
 		panic("lookup: SAVESTART");
 	return (0);
 bad:
 	VOP_UNLOCK(dp);
 	return (error);
 }
 
 /*
  * FAILIFEXISTS handling.
  *
  * XXX namei called with LOCKPARENT but not LOCKLEAF has the strange
  * behaviour of leaving the vnode unlocked if the target is the same
  * vnode as the parent.
  */
 static int __noinline
 vfs_lookup_failifexists(struct nameidata *ndp)
 {
 	struct componentname *cnp __diagused;
 
 	cnp = &ndp->ni_cnd;
 
 	MPASS((cnp->cn_flags & ISSYMLINK) == 0);
 	if (ndp->ni_vp == ndp->ni_dvp)
 		vrele(ndp->ni_dvp);
 	else
 		vput(ndp->ni_dvp);
 	vrele(ndp->ni_vp);
 	ndp->ni_dvp = NULL;
 	ndp->ni_vp = NULL;
 	NDFREE_PNBUF(ndp);
 	return (EEXIST);
 }
 
 /*
  * Search a pathname.
  * This is a very central and rather complicated routine.
  *
  * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
  * The starting directory is taken from ni_startdir. The pathname is
  * descended until done, or a symbolic link is encountered. The variable
  * ni_more is clear if the path is completed; it is set to one if a
  * symbolic link needing interpretation is encountered.
  *
  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
  * whether the name is to be looked up, created, renamed, or deleted.
  * When CREATE, RENAME, or DELETE is specified, information usable in
  * creating, renaming, or deleting a directory entry may be calculated.
  * If flag has LOCKPARENT or'ed into it, the parent directory is returned
  * locked. If flag has WANTPARENT or'ed into it, the parent directory is
  * returned unlocked. Otherwise the parent directory is not returned. If
  * the target of the pathname exists and LOCKLEAF is or'ed into the flag
  * the target is returned locked, otherwise it is returned unlocked.
  * When creating or renaming and LOCKPARENT is specified, the target may not
  * be ".".  When deleting and LOCKPARENT is specified, the target may be ".".
  *
  * Overall outline of lookup:
  *
  * dirloop:
  *	identify next component of name at ndp->ni_ptr
  *	handle degenerate case where name is null string
  *	if .. and crossing mount points and on mounted filesys, find parent
  *	call VOP_LOOKUP routine for next component name
  *	    directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
  *	    component vnode returned in ni_vp (if it exists), locked.
  *	if result vnode is mounted on and crossing mount points,
  *	    find mounted on vnode
  *	if more components of name, do next level at dirloop
  *	return the answer in ni_vp, locked if LOCKLEAF set
  *	    if LOCKPARENT set, return locked parent in ni_dvp
  *	    if WANTPARENT set, return unlocked parent in ni_dvp
  */
 int
 vfs_lookup(struct nameidata *ndp)
 {
 	char *cp;			/* pointer into pathname argument */
 	char *prev_ni_next;		/* saved ndp->ni_next */
 	char *nulchar;			/* location of '\0' in cn_pnbuf */
 	char *lastchar;			/* location of the last character */
 	struct vnode *dp = NULL;	/* the directory we are searching */
 	struct vnode *tdp;		/* saved dp */
 	struct mount *mp;		/* mount table entry */
 	struct prison *pr;
 	size_t prev_ni_pathlen;		/* saved ndp->ni_pathlen */
 	int docache;			/* == 0 do not cache last component */
 	int wantparent;			/* 1 => wantparent or lockparent flag */
 	int rdonly;			/* lookup read-only flag bit */
 	int error = 0;
 	int dpunlocked = 0;		/* dp has already been unlocked */
 	int relookup = 0;		/* do not consume the path component */
 	struct componentname *cnp = &ndp->ni_cnd;
 	int lkflags_save;
 	int ni_dvp_unlocked;
 
 	/*
 	 * Setup: break out flag bits into variables.
 	 */
 	ni_dvp_unlocked = 0;
 	wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT);
 	KASSERT(cnp->cn_nameiop == LOOKUP || wantparent,
 	    ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT."));
 	/*
 	 * When set to zero, docache causes the last component of the
 	 * pathname to be deleted from the cache and the full lookup
 	 * of the name to be done (via VOP_CACHEDLOOKUP()). Often
 	 * filesystems need some pre-computed values that are made
 	 * during the full lookup, for instance UFS sets dp->i_offset.
 	 *
 	 * The docache variable is set to zero when requested by the
 	 * NOCACHE flag and for all modifying operations except CREATE.
 	 */
 	docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE;
 	if (cnp->cn_nameiop == DELETE ||
 	    (wantparent && cnp->cn_nameiop != CREATE &&
 	     cnp->cn_nameiop != LOOKUP))
 		docache = 0;
 	rdonly = cnp->cn_flags & RDONLY;
 	cnp->cn_flags &= ~ISSYMLINK;
 	ndp->ni_dvp = NULL;
 
 	cnp->cn_lkflags = LK_SHARED;
 	dp = ndp->ni_startdir;
 	ndp->ni_startdir = NULLVP;
 
 	/*
 	 * Leading slashes, if any, are supposed to be skipped by the caller.
 	 */
 	MPASS(cnp->cn_nameptr[0] != '/');
 
 	/*
 	 * Check for degenerate name (e.g. / or "") which is a way of talking
 	 * about a directory, e.g. like "/." or ".".
 	 */
 	if (__predict_false(cnp->cn_nameptr[0] == '\0')) {
 		error = vfs_lookup_degenerate(ndp, dp, wantparent);
 		if (error == 0)
 			goto success_right_lock;
 		goto bad_unlocked;
 	}
 
 	/*
 	 * Nul-out trailing slashes (e.g., "foo///" -> "foo").
 	 *
 	 * This must be done before VOP_LOOKUP() because some fs's don't know
 	 * about trailing slashes.  Remember if there were trailing slashes to
 	 * handle symlinks, existing non-directories and non-existing files
 	 * that won't be directories specially later.
 	 */
 	MPASS(ndp->ni_pathlen >= 2);
 	lastchar = &cnp->cn_nameptr[ndp->ni_pathlen - 2];
 	if (*lastchar == '/') {
 		while (lastchar >= cnp->cn_pnbuf) {
 			*lastchar = '\0';
 			lastchar--;
 			ndp->ni_pathlen--;
 			if (*lastchar != '/') {
 				break;
 			}
 		}
 		cnp->cn_flags |= TRAILINGSLASH;
 	}
 
 	/*
 	 * We use shared locks until we hit the parent of the last cn then
 	 * we adjust based on the requesting flags.
 	 */
 	vn_lock(dp,
 	    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY,
 	    cnp->cn_flags));
 
 dirloop:
 	/*
 	 * Search a new directory.
 	 *
 	 * The last component of the filename is left accessible via
-	 * cnp->cn_nameptr for callers that need the name. Callers needing
-	 * the name set the SAVENAME flag. When done, they assume
-	 * responsibility for freeing the pathname buffer.
+	 * cnp->cn_nameptr. It has to be freed with a call to NDFREE*.
 	 *
 	 * Store / as a temporary sentinel so that we only have one character
 	 * to test for. Pathnames tend to be short so this should not be
 	 * resulting in cache misses.
 	 */
 	nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1];
 	KASSERT(*nulchar == '\0',
 	    ("%s: expected nul at %p; string [%s]\n", __func__, nulchar,
 	    cnp->cn_pnbuf));
 	*nulchar = '/';
 	for (cp = cnp->cn_nameptr; *cp != '/'; cp++) {
 		KASSERT(*cp != '\0',
 		    ("%s: encountered unexpected nul; string [%s]\n", __func__,
 		    cnp->cn_nameptr));
 		continue;
 	}
 	*nulchar = '\0';
 	cnp->cn_namelen = cp - cnp->cn_nameptr;
 	if (__predict_false(cnp->cn_namelen > NAME_MAX)) {
 		error = ENAMETOOLONG;
 		goto bad;
 	}
 #ifdef NAMEI_DIAGNOSTIC
 	{ char c = *cp;
 	*cp = '\0';
 	printf("{%s}: ", cnp->cn_nameptr);
 	*cp = c; }
 #endif
 	prev_ni_pathlen = ndp->ni_pathlen;
 	ndp->ni_pathlen -= cnp->cn_namelen;
 	KASSERT(ndp->ni_pathlen <= PATH_MAX,
 	    ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen));
 	prev_ni_next = ndp->ni_next;
 	ndp->ni_next = cp;
 
 	/*
 	 * Something else should be clearing this.
 	 */
 	cnp->cn_flags &= ~(ISDOTDOT|ISLASTCN);
 
 	cnp->cn_flags |= MAKEENTRY;
 	if (*cp == '\0' && docache == 0)
 		cnp->cn_flags &= ~MAKEENTRY;
 	if (cnp->cn_namelen == 2 &&
 	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
 		cnp->cn_flags |= ISDOTDOT;
 	if (*ndp->ni_next == 0) {
 		cnp->cn_flags |= ISLASTCN;
 
 		if (__predict_false(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' &&
 		    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))) {
 			error = EINVAL;
 			goto bad;
 		}
 	}
 
 	nameicap_tracker_add(ndp, dp);
 
 	/*
 	 * Make sure degenerate names don't get here, their handling was
 	 * previously found in this spot.
 	 */
 	MPASS(cnp->cn_nameptr[0] != '\0');
 
 	/*
 	 * Handle "..": five special cases.
 	 * 0. If doing a capability lookup and lookup_cap_dotdot is
 	 *    disabled, return ENOTCAPABLE.
 	 * 1. Return an error if this is the last component of
 	 *    the name and the operation is DELETE or RENAME.
 	 * 2. If at root directory (e.g. after chroot)
 	 *    or at absolute root directory
 	 *    then ignore it so can't get out.
 	 * 3. If this vnode is the root of a mounted
 	 *    filesystem, then replace it with the
 	 *    vnode which was mounted on so we take the
 	 *    .. in the other filesystem.
 	 * 4. If the vnode is the top directory of
 	 *    the jail or chroot, don't let them out.
 	 * 5. If doing a capability lookup and lookup_cap_dotdot is
 	 *    enabled, return ENOTCAPABLE if the lookup would escape
 	 *    from the initial file descriptor directory.  Checks are
 	 *    done by ensuring that namei() already traversed the
 	 *    result of dotdot lookup.
 	 */
 	if (cnp->cn_flags & ISDOTDOT) {
 		if ((ndp->ni_lcf & (NI_LCF_STRICTRELATIVE | NI_LCF_CAP_DOTDOT))
 		    == NI_LCF_STRICTRELATIVE) {
 #ifdef KTRACE
 			if (KTRPOINT(curthread, KTR_CAPFAIL))
 				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 #endif
 			error = ENOTCAPABLE;
 			goto bad;
 		}
 		if ((cnp->cn_flags & ISLASTCN) != 0 &&
 		    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 			error = EINVAL;
 			goto bad;
 		}
 		for (;;) {
 			for (pr = cnp->cn_cred->cr_prison; pr != NULL;
 			     pr = pr->pr_parent)
 				if (dp == pr->pr_root)
 					break;
 			if (dp == ndp->ni_rootdir || 
 			    dp == ndp->ni_topdir || 
 			    dp == rootvnode ||
 			    pr != NULL ||
 			    ((dp->v_vflag & VV_ROOT) != 0 &&
 			     (cnp->cn_flags & NOCROSSMOUNT) != 0)) {
 				ndp->ni_dvp = dp;
 				ndp->ni_vp = dp;
 				VREF(dp);
 				goto nextname;
 			}
 			if ((dp->v_vflag & VV_ROOT) == 0)
 				break;
 			if (VN_IS_DOOMED(dp)) {	/* forced unmount */
 				error = ENOENT;
 				goto bad;
 			}
 			tdp = dp;
 			dp = dp->v_mount->mnt_vnodecovered;
 			VREF(dp);
 			vput(tdp);
 			vn_lock(dp,
 			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
 			    LK_RETRY, ISDOTDOT));
 			error = nameicap_check_dotdot(ndp, dp);
 			if (error != 0) {
 #ifdef KTRACE
 				if (KTRPOINT(curthread, KTR_CAPFAIL))
 					ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 #endif
 				goto bad;
 			}
 		}
 	}
 
 	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 */
 unionlookup:
 #ifdef MAC
 	error = mac_vnode_check_lookup(cnp->cn_cred, dp, cnp);
 	if (__predict_false(error))
 		goto bad;
 #endif
 	ndp->ni_dvp = dp;
 	ndp->ni_vp = NULL;
 	ASSERT_VOP_LOCKED(dp, "lookup");
 	/*
 	 * If we have a shared lock we may need to upgrade the lock for the
 	 * last operation.
 	 */
 	if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN) &&
 	    dp != vp_crossmp && VOP_ISLOCKED(dp) == LK_SHARED)
 		vn_lock(dp, LK_UPGRADE|LK_RETRY);
 	if (VN_IS_DOOMED(dp)) {
 		error = ENOENT;
 		goto bad;
 	}
 	/*
 	 * If we're looking up the last component and we need an exclusive
 	 * lock, adjust our lkflags.
 	 */
 	if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags))
 		cnp->cn_lkflags = LK_EXCLUSIVE;
 #ifdef NAMEI_DIAGNOSTIC
 	vn_printf(dp, "lookup in ");
 #endif
 	lkflags_save = cnp->cn_lkflags;
 	cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags,
 	    cnp->cn_flags);
 	error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp);
 	cnp->cn_lkflags = lkflags_save;
 	if (error != 0) {
 		KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
 #ifdef NAMEI_DIAGNOSTIC
 		printf("not found\n");
 #endif
 		if ((error == ENOENT) &&
 		    (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) &&
 		    (dp->v_mount->mnt_flag & MNT_UNION)) {
 			tdp = dp;
 			dp = dp->v_mount->mnt_vnodecovered;
 			VREF(dp);
 			vput(tdp);
 			vn_lock(dp,
 			    compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags |
 			    LK_RETRY, cnp->cn_flags));
 			nameicap_tracker_add(ndp, dp);
 			goto unionlookup;
 		}
 
 		if (error == ERELOOKUP) {
 			vref(dp);
 			ndp->ni_vp = dp;
 			error = 0;
 			relookup = 1;
 			goto good;
 		}
 
 		if (error != EJUSTRETURN)
 			goto bad;
 		/*
 		 * At this point, we know we're at the end of the
 		 * pathname.  If creating / renaming, we can consider
 		 * allowing the file or directory to be created / renamed,
 		 * provided we're not on a read-only filesystem.
 		 */
 		if (rdonly) {
 			error = EROFS;
 			goto bad;
 		}
 		/* trailing slash only allowed for directories */
 		if ((cnp->cn_flags & TRAILINGSLASH) &&
 		    !(cnp->cn_flags & WILLBEDIR)) {
 			error = ENOENT;
 			goto bad;
 		}
 		if ((cnp->cn_flags & LOCKPARENT) == 0)
 			VOP_UNLOCK(dp);
 		/*
 		 * We return with ni_vp NULL to indicate that the entry
 		 * doesn't currently exist, leaving a pointer to the
 		 * (possibly locked) directory vnode in ndp->ni_dvp.
 		 */
 		if (cnp->cn_flags & SAVESTART) {
 			ndp->ni_startdir = ndp->ni_dvp;
 			VREF(ndp->ni_startdir);
 		}
 		goto success;
 	}
 
 good:
 #ifdef NAMEI_DIAGNOSTIC
 	printf("found\n");
 #endif
 	dp = ndp->ni_vp;
 
 	/*
 	 * Check to see if the vnode has been mounted on;
 	 * if so find the root of the mounted filesystem.
 	 */
 	while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
 	       (cnp->cn_flags & NOCROSSMOUNT) == 0) {
 		if (vfs_busy(mp, 0))
 			continue;
 		vput(dp);
 		if (dp != ndp->ni_dvp)
 			vput(ndp->ni_dvp);
 		else
 			vrele(ndp->ni_dvp);
 		vrefact(vp_crossmp);
 		ndp->ni_dvp = vp_crossmp;
 		error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags,
 		    cnp->cn_flags), &tdp);
 		vfs_unbusy(mp);
 		if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
 			panic("vp_crossmp exclusively locked or reclaimed");
 		if (error) {
 			dpunlocked = 1;
 			goto bad2;
 		}
 		ndp->ni_vp = dp = tdp;
 	}
 
 	/*
 	 * Check for symbolic link
 	 */
 	if ((dp->v_type == VLNK) &&
 	    ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) ||
 	     *ndp->ni_next == '/')) {
 		cnp->cn_flags |= ISSYMLINK;
 		if (VN_IS_DOOMED(dp)) {
 			/*
 			 * We can't know whether the directory was mounted with
 			 * NOSYMFOLLOW, so we can't follow safely.
 			 */
 			error = ENOENT;
 			goto bad2;
 		}
 		if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) {
 			error = EACCES;
 			goto bad2;
 		}
 		/*
 		 * Symlink code always expects an unlocked dvp.
 		 */
 		if (ndp->ni_dvp != ndp->ni_vp) {
 			VOP_UNLOCK(ndp->ni_dvp);
 			ni_dvp_unlocked = 1;
 		}
 		goto success;
 	}
 
 nextname:
 	/*
 	 * Not a symbolic link that we will follow.  Continue with the
 	 * next component if there is any; otherwise, we're done.
 	 */
 	KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/',
 	    ("lookup: invalid path state."));
 	if (relookup) {
 		relookup = 0;
 		ndp->ni_pathlen = prev_ni_pathlen;
 		ndp->ni_next = prev_ni_next;
 		if (ndp->ni_dvp != dp)
 			vput(ndp->ni_dvp);
 		else
 			vrele(ndp->ni_dvp);
 		goto dirloop;
 	}
 	if (cnp->cn_flags & ISDOTDOT) {
 		error = nameicap_check_dotdot(ndp, ndp->ni_vp);
 		if (error != 0) {
 #ifdef KTRACE
 			if (KTRPOINT(curthread, KTR_CAPFAIL))
 				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 #endif
 			goto bad2;
 		}
 	}
 	if (*ndp->ni_next == '/') {
 		cnp->cn_nameptr = ndp->ni_next;
 		while (*cnp->cn_nameptr == '/') {
 			cnp->cn_nameptr++;
 			ndp->ni_pathlen--;
 		}
 		if (ndp->ni_dvp != dp)
 			vput(ndp->ni_dvp);
 		else
 			vrele(ndp->ni_dvp);
 		goto dirloop;
 	}
 	/*
 	 * If we're processing a path with a trailing slash,
 	 * check that the end result is a directory.
 	 */
 	if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto bad2;
 	}
 	/*
 	 * Disallow directory write attempts on read-only filesystems.
 	 */
 	if (rdonly &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 		error = EROFS;
 		goto bad2;
 	}
 	if (cnp->cn_flags & SAVESTART) {
 		ndp->ni_startdir = ndp->ni_dvp;
 		VREF(ndp->ni_startdir);
 	}
 	if (!wantparent) {
 		ni_dvp_unlocked = 2;
 		if (ndp->ni_dvp != dp)
 			vput(ndp->ni_dvp);
 		else
 			vrele(ndp->ni_dvp);
 	} else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) {
 		VOP_UNLOCK(ndp->ni_dvp);
 		ni_dvp_unlocked = 1;
 	}
 
 	if (cnp->cn_flags & AUDITVNODE1)
 		AUDIT_ARG_VNODE1(dp);
 	else if (cnp->cn_flags & AUDITVNODE2)
 		AUDIT_ARG_VNODE2(dp);
 
 	if ((cnp->cn_flags & LOCKLEAF) == 0)
 		VOP_UNLOCK(dp);
 success:
 	/*
 	 * FIXME: for lookups which only cross a mount point to fetch the
 	 * root vnode, ni_dvp will be set to vp_crossmp. This can be a problem
 	 * if either WANTPARENT or LOCKPARENT is set.
 	 */
 	/*
 	 * Because of shared lookup we may have the vnode shared locked, but
 	 * the caller may want it to be exclusively locked.
 	 */
 	if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) &&
 	    VOP_ISLOCKED(dp) != LK_EXCLUSIVE) {
 		vn_lock(dp, LK_UPGRADE | LK_RETRY);
 		if (VN_IS_DOOMED(dp)) {
 			error = ENOENT;
 			goto bad2;
 		}
 	}
 success_right_lock:
 	if (ndp->ni_vp != NULL) {
 		if ((cnp->cn_flags & ISDOTDOT) == 0)
 			nameicap_tracker_add(ndp, ndp->ni_vp);
 		if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS)
 			return (vfs_lookup_failifexists(ndp));
 	}
 	return (0);
 
 bad2:
 	if (ni_dvp_unlocked != 2) {
 		if (dp != ndp->ni_dvp && !ni_dvp_unlocked)
 			vput(ndp->ni_dvp);
 		else
 			vrele(ndp->ni_dvp);
 	}
 bad:
 	if (!dpunlocked)
 		vput(dp);
 bad_unlocked:
 	ndp->ni_vp = NULL;
 	return (error);
 }
 
 /*
  * relookup - lookup a path name component
  *    Used by lookup to re-acquire things.
  */
 int
 vfs_relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
 {
 	struct vnode *dp = NULL;		/* the directory we are searching */
 	int rdonly;			/* lookup read-only flag bit */
 	int error = 0;
 
 	KASSERT(cnp->cn_flags & ISLASTCN,
 	    ("relookup: Not given last component."));
 	/*
 	 * Setup: break out flag bits into variables.
 	 */
 	KASSERT((cnp->cn_flags & (LOCKPARENT | WANTPARENT)) != 0,
 	    ("relookup: parent not wanted"));
 	rdonly = cnp->cn_flags & RDONLY;
 	cnp->cn_flags &= ~ISSYMLINK;
 	dp = dvp;
 	cnp->cn_lkflags = LK_EXCLUSIVE;
 	vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
 
 	/*
 	 * Search a new directory.
 	 *
-	 * The last component of the filename is left accessible via
-	 * cnp->cn_nameptr for callers that need the name. Callers needing
-	 * the name set the SAVENAME flag. When done, they assume
-	 * responsibility for freeing the pathname buffer.
+	 * See a comment in vfs_lookup for cnp->cn_nameptr.
 	 */
 #ifdef NAMEI_DIAGNOSTIC
 	printf("{%s}: ", cnp->cn_nameptr);
 #endif
 
 	/*
 	 * Check for "" which represents the root directory after slash
 	 * removal.
 	 */
 	if (cnp->cn_nameptr[0] == '\0') {
 		/*
 		 * Support only LOOKUP for "/" because lookup()
 		 * can't succeed for CREATE, DELETE and RENAME.
 		 */
 		KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP"));
 		KASSERT(dp->v_type == VDIR, ("dp is not a directory"));
 
 		if (!(cnp->cn_flags & LOCKLEAF))
 			VOP_UNLOCK(dp);
 		*vpp = dp;
 		/* XXX This should probably move to the top of function. */
 		if (cnp->cn_flags & SAVESTART)
 			panic("lookup: SAVESTART");
 		return (0);
 	}
 
 	if (cnp->cn_flags & ISDOTDOT)
 		panic ("relookup: lookup on dot-dot");
 
 	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 */
 #ifdef NAMEI_DIAGNOSTIC
 	vn_printf(dp, "search in ");
 #endif
 	if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) {
 		KASSERT(*vpp == NULL, ("leaf should be empty"));
 		if (error != EJUSTRETURN)
 			goto bad;
 		/*
 		 * If creating and at end of pathname, then can consider
 		 * allowing file to be created.
 		 */
 		if (rdonly) {
 			error = EROFS;
 			goto bad;
 		}
 		/* ASSERT(dvp == ndp->ni_startdir) */
 		if (cnp->cn_flags & SAVESTART)
 			VREF(dvp);
 		if ((cnp->cn_flags & LOCKPARENT) == 0)
 			VOP_UNLOCK(dp);
 		/*
 		 * We return with ni_vp NULL to indicate that the entry
 		 * doesn't currently exist, leaving a pointer to the
 		 * (possibly locked) directory vnode in ndp->ni_dvp.
 		 */
 		return (0);
 	}
 
 	dp = *vpp;
 
 	/*
 	 * Disallow directory write attempts on read-only filesystems.
 	 */
 	if (rdonly &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 		if (dvp == dp)
 			vrele(dvp);
 		else
 			vput(dvp);
 		error = EROFS;
 		goto bad;
 	}
 	/*
 	 * Set the parent lock/ref state to the requested state.
 	 */
 	if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp)
 		VOP_UNLOCK(dvp);
 	/*
 	 * Check for symbolic link
 	 */
 	KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW),
 	    ("relookup: symlink found.\n"));
 
 	/* ASSERT(dvp == ndp->ni_startdir) */
 	if (cnp->cn_flags & SAVESTART)
 		VREF(dvp);
 
 	if ((cnp->cn_flags & LOCKLEAF) == 0)
 		VOP_UNLOCK(dp);
 	return (0);
 bad:
 	vput(dp);
 	*vpp = NULL;
 	return (error);
 }
 
-/*
- * Free data allocated by namei(); see namei(9) for details.
- */
-void
-NDFREE_PNBUF(struct nameidata *ndp)
-{
-
-	if ((ndp->ni_cnd.cn_flags & HASBUF) != 0) {
-		MPASS((ndp->ni_cnd.cn_flags & (SAVENAME | SAVESTART)) != 0);
-		uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
-		ndp->ni_cnd.cn_flags &= ~HASBUF;
-	}
-}
-
-/*
- * NDFREE_PNBUF replacement for callers that know there is no buffer.
- *
- * This is a hack. Preferably the VFS layer would not produce anything more
- * than it was asked to do. Unfortunately several non-LOOKUP cases can add the
- * HASBUF flag to the result. Even then an interface could be implemented where
- * the caller specifies what they expected to see in the result and what they
- * are going to take care of.
- *
- * In the meantime provide this kludge as a trivial replacement for NDFREE_PNBUF
- * calls scattered throughout the kernel where we know for a fact the flag must not
- * be seen.
- */
-#ifdef INVARIANTS
-void
-NDFREE_NOTHING(struct nameidata *ndp)
-{
-	struct componentname *cnp;
-
-	cnp = &ndp->ni_cnd;
-	KASSERT(cnp->cn_nameiop == LOOKUP, ("%s: got non-LOOKUP op %d\n",
-	    __func__, cnp->cn_nameiop));
-	KASSERT((cnp->cn_flags & (SAVENAME | HASBUF)) == 0,
-	    ("%s: bad flags \%" PRIx64 "\n", __func__, cnp->cn_flags));
-}
-#endif
-
 void
 (NDFREE)(struct nameidata *ndp, const u_int flags)
 {
 	int unlock_dvp;
 	int unlock_vp;
 
 	unlock_dvp = 0;
 	unlock_vp = 0;
 
 	if (!(flags & NDF_NO_FREE_PNBUF)) {
 		NDFREE_PNBUF(ndp);
 	}
 	if (!(flags & NDF_NO_VP_UNLOCK) &&
 	    (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
 		unlock_vp = 1;
 	if (!(flags & NDF_NO_DVP_UNLOCK) &&
 	    (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
 	    ndp->ni_dvp != ndp->ni_vp)
 		unlock_dvp = 1;
 	if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) {
 		if (unlock_vp) {
 			vput(ndp->ni_vp);
 			unlock_vp = 0;
 		} else
 			vrele(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 	}
 	if (unlock_vp)
 		VOP_UNLOCK(ndp->ni_vp);
 	if (!(flags & NDF_NO_DVP_RELE) &&
 	    (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
 		if (unlock_dvp) {
 			vput(ndp->ni_dvp);
 			unlock_dvp = 0;
 		} else
 			vrele(ndp->ni_dvp);
 		ndp->ni_dvp = NULL;
 	}
 	if (unlock_dvp)
 		VOP_UNLOCK(ndp->ni_dvp);
 	if (!(flags & NDF_NO_STARTDIR_RELE) &&
 	    (ndp->ni_cnd.cn_flags & SAVESTART)) {
 		vrele(ndp->ni_startdir);
 		ndp->ni_startdir = NULL;
 	}
 }
 
 #ifdef INVARIANTS
 /*
  * Validate the final state of ndp after the lookup.
- *
- * Historically filesystems were allowed to modify cn_flags. Most notably they
- * can add SAVENAME to the request, resulting in HASBUF and pushing subsequent
- * clean up to the consumer. In practice this seems to only concern != LOOKUP
- * operations.
- *
- * As a step towards stricter API contract this routine validates the state to
- * clean up. Note validation is a work in progress with the intent of becoming
- * stricter over time.
  */
-#define NDMODIFYINGFLAGS (LOCKLEAF | LOCKPARENT | WANTPARENT | SAVENAME | SAVESTART | HASBUF)
 static void
-NDVALIDATE(struct nameidata *ndp)
+NDVALIDATE_impl(struct nameidata *ndp, int line)
 {
 	struct componentname *cnp;
-	uint64_t used, orig;
 
 	cnp = &ndp->ni_cnd;
-	orig = cnp->cn_origflags;
-	used = cnp->cn_flags;
-	switch (cnp->cn_nameiop) {
-	case LOOKUP:
-		/*
-		 * For plain lookup we require strict conformance -- nothing
-		 * to clean up if it was not requested by the caller.
-		 */
-		orig &= NDMODIFYINGFLAGS;
-		used &= NDMODIFYINGFLAGS;
-		if ((orig & (SAVENAME | SAVESTART)) != 0)
-			orig |= HASBUF;
-		if (orig != used) {
-			goto out_mismatch;
-		}
-		break;
-	case CREATE:
-	case DELETE:
-	case RENAME:
-		/*
-		 * Some filesystems set SAVENAME to provoke HASBUF, accommodate
-		 * for it until it gets fixed.
-		 */
-		orig &= NDMODIFYINGFLAGS;
-		orig |= (SAVENAME | HASBUF);
-		used &= NDMODIFYINGFLAGS;
-		used |= (SAVENAME | HASBUF);
-		if (orig != used) {
-			goto out_mismatch;
-		}
-		break;
-	}
-	return;
-out_mismatch:
-	panic("%s: mismatched flags for op %d: added %" PRIx64 ", "
-	    "removed %" PRIx64" (%" PRIx64" != %" PRIx64"; stored %" PRIx64" != %" PRIx64")",
-	    __func__, cnp->cn_nameiop, used & ~orig, orig &~ used,
-	    orig, used, cnp->cn_origflags, cnp->cn_flags);
+	if (cnp->cn_pnbuf == NULL)
+		panic("%s: got no buf! called from %d", __func__, line);
 }
+
 #endif
 
 /*
  * Determine if there is a suitable alternate filename under the specified
  * prefix for the specified path.  If the create flag is set, then the
  * alternate prefix will be used so long as the parent directory exists.
  * This is used by the various compatibility ABIs so that Linux binaries prefer
  * files under /compat/linux for example.  The chosen path (whether under
  * the prefix or under /) is returned in a kernel malloc'd buffer pointed
  * to by pathbuf.  The caller is responsible for free'ing the buffer from
  * the M_TEMP bucket if one is returned.
  */
 int
 kern_alternate_path(const char *prefix, const char *path, enum uio_seg pathseg,
     char **pathbuf, int create, int dirfd)
 {
 	struct nameidata nd, ndroot;
 	char *ptr, *buf, *cp;
 	size_t len, sz;
 	int error;
 
 	buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 	*pathbuf = buf;
 
 	/* Copy the prefix into the new pathname as a starting point. */
 	len = strlcpy(buf, prefix, MAXPATHLEN);
 	if (len >= MAXPATHLEN) {
 		*pathbuf = NULL;
 		free(buf, M_TEMP);
 		return (EINVAL);
 	}
 	sz = MAXPATHLEN - len;
 	ptr = buf + len;
 
 	/* Append the filename to the prefix. */
 	if (pathseg == UIO_SYSSPACE)
 		error = copystr(path, ptr, sz, &len);
 	else
 		error = copyinstr(path, ptr, sz, &len);
 
 	if (error) {
 		*pathbuf = NULL;
 		free(buf, M_TEMP);
 		return (error);
 	}
 
 	/* Only use a prefix with absolute pathnames. */
 	if (*ptr != '/') {
 		error = EINVAL;
 		goto keeporig;
 	}
 
 	if (dirfd != AT_FDCWD) {
 		/*
 		 * We want the original because the "prefix" is
 		 * included in the already opened dirfd.
 		 */
 		bcopy(ptr, buf, len);
 		return (0);
 	}
 
 	/*
 	 * We know that there is a / somewhere in this pathname.
 	 * Search backwards for it, to find the file's parent dir
 	 * to see if it exists in the alternate tree. If it does,
 	 * and we want to create a file (cflag is set). We don't
 	 * need to worry about the root comparison in this case.
 	 */
 
 	if (create) {
 		for (cp = &ptr[len] - 1; *cp != '/'; cp--);
 		*cp = '\0';
 
 		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf);
 		error = namei(&nd);
 		*cp = '/';
 		if (error != 0)
 			goto keeporig;
 	} else {
 		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, buf);
 
 		error = namei(&nd);
 		if (error != 0)
 			goto keeporig;
 
 		/*
 		 * We now compare the vnode of the prefix to the one
 		 * vnode asked. If they resolve to be the same, then we
 		 * ignore the match so that the real root gets used.
 		 * This avoids the problem of traversing "../.." to find the
 		 * root directory and never finding it, because "/" resolves
 		 * to the emulation root directory. This is expensive :-(
 		 */
 		NDINIT(&ndroot, LOOKUP, FOLLOW, UIO_SYSSPACE, prefix);
 
 		/* We shouldn't ever get an error from this namei(). */
 		error = namei(&ndroot);
 		if (error == 0) {
 			if (nd.ni_vp == ndroot.ni_vp)
 				error = ENOENT;
 
 			NDFREE_PNBUF(&ndroot);
 			vrele(ndroot.ni_vp);
 		}
 	}
 
 	NDFREE_PNBUF(&nd);
 	vrele(nd.ni_vp);
 
 keeporig:
 	/* If there was an error, use the original path name. */
 	if (error)
 		bcopy(ptr, buf, len);
 	return (error);
 }
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 62430f4aaf39..2386e5a08169 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,7075 +1,7075 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
  */
 
 /*
  * External virtual filesystem routines
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_watchdog.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/asan.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/capsicum.h>
 #include <sys/condvar.h>
 #include <sys/conf.h>
 #include <sys/counter.h>
 #include <sys/dirent.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/extattr.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/pctrie.h>
 #include <sys/priv.h>
 #include <sys/reboot.h>
 #include <sys/refcount.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
 #include <sys/smr.h>
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/watchdog.h>
 
 #include <machine/stdarg.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_kern.h>
 #include <vm/uma.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 static void	delmntque(struct vnode *vp);
 static int	flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo,
 		    int slpflag, int slptimeo);
 static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
 static void	v_init_counters(struct vnode *);
 static void	vn_seqc_init(struct vnode *);
 static void	vn_seqc_write_end_free(struct vnode *vp);
 static void	vgonel(struct vnode *);
 static bool	vhold_recycle_free(struct vnode *);
 static void	vdropl_recycle(struct vnode *vp);
 static void	vdrop_recycle(struct vnode *vp);
 static void	vfs_knllock(void *arg);
 static void	vfs_knlunlock(void *arg);
 static void	vfs_knl_assert_lock(void *arg, int what);
 static void	destroy_vpollinfo(struct vpollinfo *vi);
 static int	v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo,
 		    daddr_t startlbn, daddr_t endlbn);
 static void	vnlru_recalc(void);
 
 /*
  * Number of vnodes in existence.  Increased whenever getnewvnode()
  * allocates a new vnode, decreased in vdropl() for VIRF_DOOMED vnode.
  */
 static u_long __exclusive_cache_line numvnodes;
 
 SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0,
     "Number of vnodes in existence");
 
 static counter_u64_t vnodes_created;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created,
     "Number of vnodes created by getnewvnode");
 
 /*
  * Conversion tables for conversion from vnode types to inode formats
  * and back.
  */
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
 };
 int vttoif_tab[10] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT
 };
 
 /*
  * List of allocates vnodes in the system.
  */
 static TAILQ_HEAD(freelst, vnode) vnode_list;
 static struct vnode *vnode_list_free_marker;
 static struct vnode *vnode_list_reclaim_marker;
 
 /*
  * "Free" vnode target.  Free vnodes are rarely completely free, but are
  * just ones that are cheap to recycle.  Usually they are for files which
  * have been stat'd but not read; these usually have inode and namecache
  * data attached to them.  This target is the preferred minimum size of a
  * sub-cache consisting mostly of such files. The system balances the size
  * of this sub-cache with its complement to try to prevent either from
  * thrashing while the other is relatively inactive.  The targets express
  * a preference for the best balance.
  *
  * "Above" this target there are 2 further targets (watermarks) related
  * to recyling of free vnodes.  In the best-operating case, the cache is
  * exactly full, the free list has size between vlowat and vhiwat above the
  * free target, and recycling from it and normal use maintains this state.
  * Sometimes the free list is below vlowat or even empty, but this state
  * is even better for immediate use provided the cache is not full.
  * Otherwise, vnlru_proc() runs to reclaim enough vnodes (usually non-free
  * ones) to reach one of these states.  The watermarks are currently hard-
  * coded as 4% and 9% of the available space higher.  These and the default
  * of 25% for wantfreevnodes are too large if the memory size is large.
  * E.g., 9% of 75% of MAXVNODES is more than 566000 vnodes to reclaim
  * whenever vnlru_proc() becomes active.
  */
 static long wantfreevnodes;
 static long __exclusive_cache_line freevnodes;
 SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD,
     &freevnodes, 0, "Number of \"free\" vnodes");
 static long freevnodes_old;
 
 static counter_u64_t recycles_count;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count,
     "Number of vnodes recycled to meet vnode cache targets");
 
 static counter_u64_t recycles_free_count;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles_free, CTLFLAG_RD, &recycles_free_count,
     "Number of free vnodes recycled to meet vnode cache targets");
 
 static counter_u64_t deferred_inact;
 SYSCTL_COUNTER_U64(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact,
     "Number of times inactive processing was deferred");
 
 /* To keep more than one thread at a time from running vfs_getnewfsid */
 static struct mtx mntid_mtx;
 
 /*
  * Lock for any access to the following:
  *	vnode_list
  *	numvnodes
  *	freevnodes
  */
 static struct mtx __exclusive_cache_line vnode_list_mtx;
 
 /* Publicly exported FS */
 struct nfs_public nfs_pub;
 
 static uma_zone_t buf_trie_zone;
 static smr_t buf_trie_smr;
 
 /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
 static uma_zone_t vnode_zone;
 MALLOC_DEFINE(M_VNODEPOLL, "VN POLL", "vnode poll");
 
 __read_frequently smr_t vfs_smr;
 
 /*
  * The workitem queue.
  *
  * It is useful to delay writes of file data and filesystem metadata
  * for tens of seconds so that quickly created and deleted files need
  * not waste disk bandwidth being created and removed. To realize this,
  * we append vnodes to a "workitem" queue. When running with a soft
  * updates implementation, most pending metadata dependencies should
  * not wait for more than a few seconds. Thus, mounted on block devices
  * are delayed only about a half the time that file data is delayed.
  * Similarly, directory updates are more critical, so are only delayed
  * about a third the time that file data is delayed. Thus, there are
  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  * one each second (driven off the filesystem syncer process). The
  * syncer_delayno variable indicates the next queue that is to be processed.
  * Items that need to be processed soon are placed in this queue:
  *
  *	syncer_workitem_pending[syncer_delayno]
  *
  * A delay of fifteen seconds is done by placing the request fifteen
  * entries later in the queue:
  *
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
 static int syncer_delayno;
 static long syncer_mask;
 LIST_HEAD(synclist, bufobj);
 static struct synclist *syncer_workitem_pending;
 /*
  * The sync_mtx protects:
  *	bo->bo_synclist
  *	sync_vnode_count
  *	syncer_delayno
  *	syncer_state
  *	syncer_workitem_pending
  *	syncer_worklist_len
  *	rushjob
  */
 static struct mtx sync_mtx;
 static struct cv sync_wakeup;
 
 #define SYNCER_MAXDELAY		32
 static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 static int syncdelay = 30;		/* max time to delay syncing data */
 static int filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0,
     "Time to delay syncing files (in seconds)");
 static int dirdelay = 29;		/* time to delay syncing directories */
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0,
     "Time to delay syncing directories (in seconds)");
 static int metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0,
     "Time to delay syncing metadata (in seconds)");
 static int rushjob;		/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
     "Number of times I/O speeded up (rush requests)");
 
 #define	VDBATCH_SIZE 8
 struct vdbatch {
 	u_int index;
 	long freevnodes;
 	struct mtx lock;
 	struct vnode *tab[VDBATCH_SIZE];
 };
 DPCPU_DEFINE_STATIC(struct vdbatch, vd);
 
 static void	vdbatch_dequeue(struct vnode *vp);
 
 /*
  * When shutting down the syncer, run it at four times normal speed.
  */
 #define SYNCER_SHUTDOWN_SPEEDUP		4
 static int sync_vnode_count;
 static int syncer_worklist_len;
 static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }
     syncer_state;
 
 /* Target for maximum number of vnodes. */
 u_long desiredvnodes;
 static u_long gapvnodes;		/* gap between wanted and desired */
 static u_long vhiwat;		/* enough extras after expansion */
 static u_long vlowat;		/* minimal extras before expansion */
 static u_long vstir;		/* nonzero to stir non-free vnodes */
 static volatile int vsmalltrigger = 8;	/* pref to keep if > this many pages */
 
 static u_long vnlru_read_freevnodes(void);
 
 /*
  * Note that no attempt is made to sanitize these parameters.
  */
 static int
 sysctl_maxvnodes(SYSCTL_HANDLER_ARGS)
 {
 	u_long val;
 	int error;
 
 	val = desiredvnodes;
 	error = sysctl_handle_long(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (val == desiredvnodes)
 		return (0);
 	mtx_lock(&vnode_list_mtx);
 	desiredvnodes = val;
 	wantfreevnodes = desiredvnodes / 4;
 	vnlru_recalc();
 	mtx_unlock(&vnode_list_mtx);
 	/*
 	 * XXX There is no protection against multiple threads changing
 	 * desiredvnodes at the same time. Locking above only helps vnlru and
 	 * getnewvnode.
 	 */
 	vfs_hash_changesize(desiredvnodes);
 	cache_changesize(desiredvnodes);
 	return (0);
 }
 
 SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes,
     CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_maxvnodes,
     "LU", "Target for maximum number of vnodes");
 
 static int
 sysctl_wantfreevnodes(SYSCTL_HANDLER_ARGS)
 {
 	u_long val;
 	int error;
 
 	val = wantfreevnodes;
 	error = sysctl_handle_long(oidp, &val, 0, req);
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 
 	if (val == wantfreevnodes)
 		return (0);
 	mtx_lock(&vnode_list_mtx);
 	wantfreevnodes = val;
 	vnlru_recalc();
 	mtx_unlock(&vnode_list_mtx);
 	return (0);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, wantfreevnodes,
     CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, sysctl_wantfreevnodes,
     "LU", "Target for minimum number of \"free\" vnodes");
 
 SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW,
     &wantfreevnodes, 0, "Old name for vfs.wantfreevnodes (legacy)");
 static int vnlru_nowhere;
 SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW,
     &vnlru_nowhere, 0, "Number of times the vnlru process ran without success");
 
 static int
 sysctl_try_reclaim_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct vnode *vp;
 	struct nameidata nd;
 	char *buf;
 	unsigned long ndflags;
 	int error;
 
 	if (req->newptr == NULL)
 		return (EINVAL);
 	if (req->newlen >= PATH_MAX)
 		return (E2BIG);
 
 	buf = malloc(PATH_MAX, M_TEMP, M_WAITOK);
 	error = SYSCTL_IN(req, buf, req->newlen);
 	if (error != 0)
 		goto out;
 
 	buf[req->newlen] = '\0';
 
-	ndflags = LOCKLEAF | NOFOLLOW | AUDITVNODE1 | SAVENAME;
+	ndflags = LOCKLEAF | NOFOLLOW | AUDITVNODE1;
 	NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, buf);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	vp = nd.ni_vp;
 
 	if (VN_IS_DOOMED(vp)) {
 		/*
 		 * This vnode is being recycled.  Return != 0 to let the caller
 		 * know that the sysctl had no effect.  Return EAGAIN because a
 		 * subsequent call will likely succeed (since namei will create
 		 * a new vnode if necessary)
 		 */
 		error = EAGAIN;
 		goto putvnode;
 	}
 
 	counter_u64_add(recycles_count, 1);
 	vgone(vp);
 putvnode:
 	NDFREE(&nd, 0);
 out:
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 sysctl_ftry_reclaim_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct thread *td = curthread;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 	int fd;
 
 	if (req->newptr == NULL)
 		return (EBADF);
 
         error = sysctl_handle_int(oidp, &fd, 0, req);
         if (error != 0)
                 return (error);
 	error = getvnode(curthread, fd, &cap_fcntl_rights, &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 
 	error = vn_lock(vp, LK_EXCLUSIVE);
 	if (error != 0)
 		goto drop;
 
 	counter_u64_add(recycles_count, 1);
 	vgone(vp);
 	VOP_UNLOCK(vp);
 drop:
 	fdrop(fp, td);
 	return (error);
 }
 
 SYSCTL_PROC(_debug, OID_AUTO, try_reclaim_vnode,
     CTLTYPE_STRING | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0,
     sysctl_try_reclaim_vnode, "A", "Try to reclaim a vnode by its pathname");
 SYSCTL_PROC(_debug, OID_AUTO, ftry_reclaim_vnode,
     CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0,
     sysctl_ftry_reclaim_vnode, "I",
     "Try to reclaim a vnode by its file descriptor");
 
 /* Shift count for (uintptr_t)vp to initialize vp->v_hash. */
 #define vnsz2log 8
 #ifndef DEBUG_LOCKS
 _Static_assert(sizeof(struct vnode) >= 1UL << vnsz2log &&
     sizeof(struct vnode) < 1UL << (vnsz2log + 1),
     "vnsz2log needs to be updated");
 #endif
 
 /*
  * Support for the bufobj clean & dirty pctrie.
  */
 static void *
 buf_trie_alloc(struct pctrie *ptree)
 {
 	return (uma_zalloc_smr(buf_trie_zone, M_NOWAIT));
 }
 
 static void
 buf_trie_free(struct pctrie *ptree, void *node)
 {
 	uma_zfree_smr(buf_trie_zone, node);
 }
 PCTRIE_DEFINE_SMR(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free,
     buf_trie_smr);
 
 /*
  * Initialize the vnode management data structures.
  *
  * Reevaluate the following cap on the number of vnodes after the physical
  * memory size exceeds 512GB.  In the limit, as the physical memory size
  * grows, the ratio of the memory size in KB to vnodes approaches 64:1.
  */
 #ifndef	MAXVNODES_MAX
 #define	MAXVNODES_MAX	(512UL * 1024 * 1024 / 64)	/* 8M */
 #endif
 
 static MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
 
 static struct vnode *
 vn_alloc_marker(struct mount *mp)
 {
 	struct vnode *vp;
 
 	vp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
 	vp->v_type = VMARKER;
 	vp->v_mount = mp;
 
 	return (vp);
 }
 
 static void
 vn_free_marker(struct vnode *vp)
 {
 
 	MPASS(vp->v_type == VMARKER);
 	free(vp, M_VNODE_MARKER);
 }
 
 #ifdef KASAN
 static int
 vnode_ctor(void *mem, int size, void *arg __unused, int flags __unused)
 {
 	kasan_mark(mem, size, roundup2(size, UMA_ALIGN_PTR + 1), 0);
 	return (0);
 }
 
 static void
 vnode_dtor(void *mem, int size, void *arg __unused)
 {
 	size_t end1, end2, off1, off2;
 
 	_Static_assert(offsetof(struct vnode, v_vnodelist) <
 	    offsetof(struct vnode, v_dbatchcpu),
 	    "KASAN marks require updating");
 
 	off1 = offsetof(struct vnode, v_vnodelist);
 	off2 = offsetof(struct vnode, v_dbatchcpu);
 	end1 = off1 + sizeof(((struct vnode *)NULL)->v_vnodelist);
 	end2 = off2 + sizeof(((struct vnode *)NULL)->v_dbatchcpu);
 
 	/*
 	 * Access to the v_vnodelist and v_dbatchcpu fields are permitted even
 	 * after the vnode has been freed.  Try to get some KASAN coverage by
 	 * marking everything except those two fields as invalid.  Because
 	 * KASAN's tracking is not byte-granular, any preceding fields sharing
 	 * the same 8-byte aligned word must also be marked valid.
 	 */
 
 	/* Handle the area from the start until v_vnodelist... */
 	off1 = rounddown2(off1, KASAN_SHADOW_SCALE);
 	kasan_mark(mem, off1, off1, KASAN_UMA_FREED);
 
 	/* ... then the area between v_vnodelist and v_dbatchcpu ... */
 	off1 = roundup2(end1, KASAN_SHADOW_SCALE);
 	off2 = rounddown2(off2, KASAN_SHADOW_SCALE);
 	if (off2 > off1)
 		kasan_mark((void *)((char *)mem + off1), off2 - off1,
 		    off2 - off1, KASAN_UMA_FREED);
 
 	/* ... and finally the area from v_dbatchcpu to the end. */
 	off2 = roundup2(end2, KASAN_SHADOW_SCALE);
 	kasan_mark((void *)((char *)mem + off2), size - off2, size - off2,
 	    KASAN_UMA_FREED);
 }
 #endif /* KASAN */
 
 /*
  * Initialize a vnode as it first enters the zone.
  */
 static int
 vnode_init(void *mem, int size, int flags)
 {
 	struct vnode *vp;
 
 	vp = mem;
 	bzero(vp, size);
 	/*
 	 * Setup locks.
 	 */
 	vp->v_vnlock = &vp->v_lock;
 	mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
 	/*
 	 * By default, don't allow shared locks unless filesystems opt-in.
 	 */
 	lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT,
 	    LK_NOSHARE | LK_IS_VNODE);
 	/*
 	 * Initialize bufobj.
 	 */
 	bufobj_init(&vp->v_bufobj, vp);
 	/*
 	 * Initialize namecache.
 	 */
 	cache_vnode_init(vp);
 	/*
 	 * Initialize rangelocks.
 	 */
 	rangelock_init(&vp->v_rl);
 
 	vp->v_dbatchcpu = NOCPU;
 
 	/*
 	 * Check vhold_recycle_free for an explanation.
 	 */
 	vp->v_holdcnt = VHOLD_NO_SMR;
 	vp->v_type = VNON;
 	mtx_lock(&vnode_list_mtx);
 	TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist);
 	mtx_unlock(&vnode_list_mtx);
 	return (0);
 }
 
 /*
  * Free a vnode when it is cleared from the zone.
  */
 static void
 vnode_fini(void *mem, int size)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 
 	vp = mem;
 	vdbatch_dequeue(vp);
 	mtx_lock(&vnode_list_mtx);
 	TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
 	mtx_unlock(&vnode_list_mtx);
 	rangelock_destroy(&vp->v_rl);
 	lockdestroy(vp->v_vnlock);
 	mtx_destroy(&vp->v_interlock);
 	bo = &vp->v_bufobj;
 	rw_destroy(BO_LOCKPTR(bo));
 
 	kasan_mark(mem, size, size, 0);
 }
 
 /*
  * Provide the size of NFS nclnode and NFS fh for calculation of the
  * vnode memory consumption.  The size is specified directly to
  * eliminate dependency on NFS-private header.
  *
  * Other filesystems may use bigger or smaller (like UFS and ZFS)
  * private inode data, but the NFS-based estimation is ample enough.
  * Still, we care about differences in the size between 64- and 32-bit
  * platforms.
  *
  * Namecache structure size is heuristically
  * sizeof(struct namecache_ts) + CACHE_PATH_CUTOFF + 1.
  */
 #ifdef _LP64
 #define	NFS_NCLNODE_SZ	(528 + 64)
 #define	NC_SZ		148
 #else
 #define	NFS_NCLNODE_SZ	(360 + 32)
 #define	NC_SZ		92
 #endif
 
 static void
 vntblinit(void *dummy __unused)
 {
 	struct vdbatch *vd;
 	uma_ctor ctor;
 	uma_dtor dtor;
 	int cpu, physvnodes, virtvnodes;
 
 	/*
 	 * Desiredvnodes is a function of the physical memory size and the
 	 * kernel's heap size.  Generally speaking, it scales with the
 	 * physical memory size.  The ratio of desiredvnodes to the physical
 	 * memory size is 1:16 until desiredvnodes exceeds 98,304.
 	 * Thereafter, the
 	 * marginal ratio of desiredvnodes to the physical memory size is
 	 * 1:64.  However, desiredvnodes is limited by the kernel's heap
 	 * size.  The memory required by desiredvnodes vnodes and vm objects
 	 * must not exceed 1/10th of the kernel's heap size.
 	 */
 	physvnodes = maxproc + pgtok(vm_cnt.v_page_count) / 64 +
 	    3 * min(98304 * 16, pgtok(vm_cnt.v_page_count)) / 64;
 	virtvnodes = vm_kmem_size / (10 * (sizeof(struct vm_object) +
 	    sizeof(struct vnode) + NC_SZ * ncsizefactor + NFS_NCLNODE_SZ));
 	desiredvnodes = min(physvnodes, virtvnodes);
 	if (desiredvnodes > MAXVNODES_MAX) {
 		if (bootverbose)
 			printf("Reducing kern.maxvnodes %lu -> %lu\n",
 			    desiredvnodes, MAXVNODES_MAX);
 		desiredvnodes = MAXVNODES_MAX;
 	}
 	wantfreevnodes = desiredvnodes / 4;
 	mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF);
 	TAILQ_INIT(&vnode_list);
 	mtx_init(&vnode_list_mtx, "vnode_list", NULL, MTX_DEF);
 	/*
 	 * The lock is taken to appease WITNESS.
 	 */
 	mtx_lock(&vnode_list_mtx);
 	vnlru_recalc();
 	mtx_unlock(&vnode_list_mtx);
 	vnode_list_free_marker = vn_alloc_marker(NULL);
 	TAILQ_INSERT_HEAD(&vnode_list, vnode_list_free_marker, v_vnodelist);
 	vnode_list_reclaim_marker = vn_alloc_marker(NULL);
 	TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
 
 #ifdef KASAN
 	ctor = vnode_ctor;
 	dtor = vnode_dtor;
 #else
 	ctor = NULL;
 	dtor = NULL;
 #endif
 	vnode_zone = uma_zcreate("VNODE", sizeof(struct vnode), ctor, dtor,
 	    vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_NOKASAN);
 	uma_zone_set_smr(vnode_zone, vfs_smr);
 
 	/*
 	 * Preallocate enough nodes to support one-per buf so that
 	 * we can not fail an insert.  reassignbuf() callers can not
 	 * tolerate the insertion failure.
 	 */
 	buf_trie_zone = uma_zcreate("BUF TRIE", pctrie_node_size(),
 	    NULL, NULL, pctrie_zone_init, NULL, UMA_ALIGN_PTR, 
 	    UMA_ZONE_NOFREE | UMA_ZONE_SMR);
 	buf_trie_smr = uma_zone_get_smr(buf_trie_zone);
 	uma_prealloc(buf_trie_zone, nbuf);
 
 	vnodes_created = counter_u64_alloc(M_WAITOK);
 	recycles_count = counter_u64_alloc(M_WAITOK);
 	recycles_free_count = counter_u64_alloc(M_WAITOK);
 	deferred_inact = counter_u64_alloc(M_WAITOK);
 
 	/*
 	 * Initialize the filesystem syncer.
 	 */
 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
 	    &syncer_mask);
 	syncer_maxdelay = syncer_mask + 1;
 	mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
 	cv_init(&sync_wakeup, "syncer");
 
 	CPU_FOREACH(cpu) {
 		vd = DPCPU_ID_PTR((cpu), vd);
 		bzero(vd, sizeof(*vd));
 		mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF);
 	}
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Eventually, mountlist_mtx is not released on failure.
  *
  * vfs_busy() is a custom lock, it can block the caller.
  * vfs_busy() only sleeps if the unmount is active on the mount point.
  * For a mountpoint mp, vfs_busy-enforced lock is before lock of any
  * vnode belonging to mp.
  *
  * Lookup uses vfs_busy() to traverse mount points.
  * root fs			var fs
  * / vnode lock		A	/ vnode lock (/var)		D
  * /var vnode lock	B	/log vnode lock(/var/log)	E
  * vfs_busy lock	C	vfs_busy lock			F
  *
  * Within each file system, the lock order is C->A->B and F->D->E.
  *
  * When traversing across mounts, the system follows that lock order:
  *
  *        C->A->B
  *              |
  *              +->F->D->E
  *
  * The lookup() process for namei("/var") illustrates the process:
  *  VOP_LOOKUP() obtains B while A is held
  *  vfs_busy() obtains a shared lock on F while A and B are held
  *  vput() releases lock on B
  *  vput() releases lock on A
  *  VFS_ROOT() obtains lock on D while shared lock on F is held
  *  vfs_unbusy() releases shared lock on F
  *  vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A.
  *    Attempt to lock A (instead of vp_crossmp) while D is held would
  *    violate the global order, causing deadlocks.
  *
  * dounmount() locks B while F is drained.
  */
 int
 vfs_busy(struct mount *mp, int flags)
 {
 	struct mount_pcpu *mpcpu;
 
 	MPASS((flags & ~MBF_MASK) == 0);
 	CTR3(KTR_VFS, "%s: mp %p with flags %d", __func__, mp, flags);
 
 	if (vfs_op_thread_enter(mp, mpcpu)) {
 		MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
 		MPASS((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0);
 		MPASS((mp->mnt_kern_flag & MNTK_REFEXPIRE) == 0);
 		vfs_mp_count_add_pcpu(mpcpu, ref, 1);
 		vfs_mp_count_add_pcpu(mpcpu, lockref, 1);
 		vfs_op_thread_exit(mp, mpcpu);
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_unlock(&mountlist_mtx);
 		return (0);
 	}
 
 	MNT_ILOCK(mp);
 	vfs_assert_mount_counters(mp);
 	MNT_REF(mp);
 	/*
 	 * If mount point is currently being unmounted, sleep until the
 	 * mount point fate is decided.  If thread doing the unmounting fails,
 	 * it will clear MNTK_UNMOUNT flag before waking us up, indicating
 	 * that this mount point has survived the unmount attempt and vfs_busy
 	 * should retry.  Otherwise the unmounter thread will set MNTK_REFEXPIRE
 	 * flag in addition to MNTK_UNMOUNT, indicating that mount point is
 	 * about to be really destroyed.  vfs_busy needs to release its
 	 * reference on the mount point in this case and return with ENOENT,
 	 * telling the caller that mount mount it tried to busy is no longer
 	 * valid.
 	 */
 	while (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		KASSERT(TAILQ_EMPTY(&mp->mnt_uppers),
 		    ("%s: non-empty upper mount list with pending unmount",
 		    __func__));
 		if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) {
 			MNT_REL(mp);
 			MNT_IUNLOCK(mp);
 			CTR1(KTR_VFS, "%s: failed busying before sleeping",
 			    __func__);
 			return (ENOENT);
 		}
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_unlock(&mountlist_mtx);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		msleep(mp, MNT_MTX(mp), PVFS | PDROP, "vfs_busy", 0);
 		if (flags & MBF_MNTLSTLOCK)
 			mtx_lock(&mountlist_mtx);
 		MNT_ILOCK(mp);
 	}
 	if (flags & MBF_MNTLSTLOCK)
 		mtx_unlock(&mountlist_mtx);
 	mp->mnt_lockref++;
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(struct mount *mp)
 {
 	struct mount_pcpu *mpcpu;
 	int c;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 
 	if (vfs_op_thread_enter(mp, mpcpu)) {
 		MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
 		vfs_mp_count_sub_pcpu(mpcpu, lockref, 1);
 		vfs_mp_count_sub_pcpu(mpcpu, ref, 1);
 		vfs_op_thread_exit(mp, mpcpu);
 		return;
 	}
 
 	MNT_ILOCK(mp);
 	vfs_assert_mount_counters(mp);
 	MNT_REL(mp);
 	c = --mp->mnt_lockref;
 	if (mp->mnt_vfs_ops == 0) {
 		MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
 		MNT_IUNLOCK(mp);
 		return;
 	}
 	if (c < 0)
 		vfs_dump_mount_counters(mp);
 	if (c == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) {
 		MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT);
 		CTR1(KTR_VFS, "%s: waking up waiters", __func__);
 		mp->mnt_kern_flag &= ~MNTK_DRAINING;
 		wakeup(&mp->mnt_lockref);
 	}
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid_t *fsid)
 {
 	struct mount *mp;
 
 	CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid);
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (fsidcmp(&mp->mnt_stat.f_fsid, fsid) == 0) {
 			vfs_ref(mp);
 			mtx_unlock(&mountlist_mtx);
 			return (mp);
 		}
 	}
 	mtx_unlock(&mountlist_mtx);
 	CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid);
 	return ((struct mount *) 0);
 }
 
 /*
  * Lookup a mount point by filesystem identifier, busying it before
  * returning.
  *
  * To avoid congestion on mountlist_mtx, implement simple direct-mapped
  * cache for popular filesystem identifiers.  The cache is lockess, using
  * the fact that struct mount's are never freed.  In worst case we may
  * get pointer to unmounted or even different filesystem, so we have to
  * check what we got, and go slow way if so.
  */
 struct mount *
 vfs_busyfs(fsid_t *fsid)
 {
 #define	FSID_CACHE_SIZE	256
 	typedef struct mount * volatile vmp_t;
 	static vmp_t cache[FSID_CACHE_SIZE];
 	struct mount *mp;
 	int error;
 	uint32_t hash;
 
 	CTR2(KTR_VFS, "%s: fsid %p", __func__, fsid);
 	hash = fsid->val[0] ^ fsid->val[1];
 	hash = (hash >> 16 ^ hash) & (FSID_CACHE_SIZE - 1);
 	mp = cache[hash];
 	if (mp == NULL || fsidcmp(&mp->mnt_stat.f_fsid, fsid) != 0)
 		goto slow;
 	if (vfs_busy(mp, 0) != 0) {
 		cache[hash] = NULL;
 		goto slow;
 	}
 	if (fsidcmp(&mp->mnt_stat.f_fsid, fsid) == 0)
 		return (mp);
 	else
 	    vfs_unbusy(mp);
 
 slow:
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (fsidcmp(&mp->mnt_stat.f_fsid, fsid) == 0) {
 			error = vfs_busy(mp, MBF_MNTLSTLOCK);
 			if (error) {
 				cache[hash] = NULL;
 				mtx_unlock(&mountlist_mtx);
 				return (NULL);
 			}
 			cache[hash] = mp;
 			return (mp);
 		}
 	}
 	CTR2(KTR_VFS, "%s: lookup failed for %p id", __func__, fsid);
 	mtx_unlock(&mountlist_mtx);
 	return ((struct mount *) 0);
 }
 
 /*
  * Check if a user can access privileged mount options.
  */
 int
 vfs_suser(struct mount *mp, struct thread *td)
 {
 	int error;
 
 	if (jailed(td->td_ucred)) {
 		/*
 		 * If the jail of the calling thread lacks permission for
 		 * this type of file system, deny immediately.
 		 */
 		if (!prison_allow(td->td_ucred, mp->mnt_vfc->vfc_prison_flag))
 			return (EPERM);
 
 		/*
 		 * If the file system was mounted outside the jail of the
 		 * calling thread, deny immediately.
 		 */
 		if (prison_check(td->td_ucred, mp->mnt_cred) != 0)
 			return (EPERM);
 	}
 
 	/*
 	 * If file system supports delegated administration, we don't check
 	 * for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified
 	 * by the file system itself.
 	 * If this is not the user that did original mount, we check for
 	 * the PRIV_VFS_MOUNT_OWNER privilege.
 	 */
 	if (!(mp->mnt_vfc->vfc_flags & VFCF_DELEGADMIN) &&
 	    mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) {
 		if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0)
 			return (error);
 	}
 	return (0);
 }
 
 /*
  * Get a new unique fsid.  Try to make its val[0] unique, since this value
  * will be used to create fake device numbers for stat().  Also try (but
  * not so hard) make its val[0] unique mod 2^16, since some emulators only
  * support 16-bit device numbers.  We end up with unique val[0]'s for the
  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
  *
  * Keep in mind that several mounts may be running in parallel.  Starting
  * the search one past where the previous search terminated is both a
  * micro-optimization and a defense against returning the same fsid to
  * different mounts.
  */
 void
 vfs_getnewfsid(struct mount *mp)
 {
 	static uint16_t mntid_base;
 	struct mount *nmp;
 	fsid_t tfsid;
 	int mtype;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 	mtx_lock(&mntid_mtx);
 	mtype = mp->mnt_vfc->vfc_typenum;
 	tfsid.val[1] = mtype;
 	mtype = (mtype & 0xFF) << 24;
 	for (;;) {
 		tfsid.val[0] = makedev(255,
 		    mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
 		mntid_base++;
 		if ((nmp = vfs_getvfs(&tfsid)) == NULL)
 			break;
 		vfs_rel(nmp);
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
 	mtx_unlock(&mntid_mtx);
 }
 
 /*
  * Knob to control the precision of file timestamps:
  *
  *   0 = seconds only; nanoseconds zeroed.
  *   1 = seconds and nanoseconds, accurate within 1/HZ.
  *   2 = seconds and nanoseconds, truncated to microseconds.
  * >=3 = seconds and nanoseconds, maximum precision.
  */
 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
 
 static int timestamp_precision = TSP_USEC;
 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
     &timestamp_precision, 0, "File timestamp precision (0: seconds, "
     "1: sec + ns accurate to 1/HZ, 2: sec + ns truncated to us, "
     "3+: sec + ns (max. precision))");
 
 /*
  * Get a current timestamp.
  */
 void
 vfs_timestamp(struct timespec *tsp)
 {
 	struct timeval tv;
 
 	switch (timestamp_precision) {
 	case TSP_SEC:
 		tsp->tv_sec = time_second;
 		tsp->tv_nsec = 0;
 		break;
 	case TSP_HZ:
 		getnanotime(tsp);
 		break;
 	case TSP_USEC:
 		microtime(&tv);
 		TIMEVAL_TO_TIMESPEC(&tv, tsp);
 		break;
 	case TSP_NSEC:
 	default:
 		nanotime(tsp);
 		break;
 	}
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(struct vattr *vap)
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = VNOVAL;
 	vap->va_nlink = VNOVAL;
 	vap->va_uid = VNOVAL;
 	vap->va_gid = VNOVAL;
 	vap->va_fsid = VNOVAL;
 	vap->va_fileid = VNOVAL;
 	vap->va_blocksize = VNOVAL;
 	vap->va_rdev = VNOVAL;
 	vap->va_atime.tv_sec = VNOVAL;
 	vap->va_atime.tv_nsec = VNOVAL;
 	vap->va_mtime.tv_sec = VNOVAL;
 	vap->va_mtime.tv_nsec = VNOVAL;
 	vap->va_ctime.tv_sec = VNOVAL;
 	vap->va_ctime.tv_nsec = VNOVAL;
 	vap->va_birthtime.tv_sec = VNOVAL;
 	vap->va_birthtime.tv_nsec = VNOVAL;
 	vap->va_flags = VNOVAL;
 	vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * Try to reduce the total number of vnodes.
  *
  * This routine (and its user) are buggy in at least the following ways:
  * - all parameters were picked years ago when RAM sizes were significantly
  *   smaller
  * - it can pick vnodes based on pages used by the vm object, but filesystems
  *   like ZFS don't use it making the pick broken
  * - since ZFS has its own aging policy it gets partially combated by this one
  * - a dedicated method should be provided for filesystems to let them decide
  *   whether the vnode should be recycled
  *
  * This routine is called when we have too many vnodes.  It attempts
  * to free <count> vnodes and will potentially free vnodes that still
  * have VM backing store (VM backing store is typically the cause
  * of a vnode blowout so we want to do this).  Therefore, this operation
  * is not considered cheap.
  *
  * A number of conditions may prevent a vnode from being reclaimed.
  * the buffer cache may have references on the vnode, a directory
  * vnode may still have references due to the namei cache representing
  * underlying files, or the vnode may be in active use.   It is not
  * desirable to reuse such vnodes.  These conditions may cause the
  * number of vnodes to reach some minimum value regardless of what
  * you set kern.maxvnodes to.  Do not set kern.maxvnodes too low.
  *
  * @param reclaim_nc_src Only reclaim directories with outgoing namecache
  * 			 entries if this argument is strue
  * @param trigger	 Only reclaim vnodes with fewer than this many resident
  *			 pages.
  * @param target	 How many vnodes to reclaim.
  * @return		 The number of vnodes that were reclaimed.
  */
 static int
 vlrureclaim(bool reclaim_nc_src, int trigger, u_long target)
 {
 	struct vnode *vp, *mvp;
 	struct mount *mp;
 	struct vm_object *object;
 	u_long done;
 	bool retried;
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 
 	retried = false;
 	done = 0;
 
 	mvp = vnode_list_reclaim_marker;
 restart:
 	vp = mvp;
 	while (done < target) {
 		vp = TAILQ_NEXT(vp, v_vnodelist);
 		if (__predict_false(vp == NULL))
 			break;
 
 		if (__predict_false(vp->v_type == VMARKER))
 			continue;
 
 		/*
 		 * If it's been deconstructed already, it's still
 		 * referenced, or it exceeds the trigger, skip it.
 		 * Also skip free vnodes.  We are trying to make space
 		 * to expand the free list, not reduce it.
 		 */
 		if (vp->v_usecount > 0 || vp->v_holdcnt == 0 ||
 		    (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)))
 			goto next_iter;
 
 		if (vp->v_type == VBAD || vp->v_type == VNON)
 			goto next_iter;
 
 		object = atomic_load_ptr(&vp->v_object);
 		if (object == NULL || object->resident_page_count > trigger) {
 			goto next_iter;
 		}
 
 		/*
 		 * Handle races against vnode allocation. Filesystems lock the
 		 * vnode some time after it gets returned from getnewvnode,
 		 * despite type and hold count being manipulated earlier.
 		 * Resorting to checking v_mount restores guarantees present
 		 * before the global list was reworked to contain all vnodes.
 		 */
 		if (!VI_TRYLOCK(vp))
 			goto next_iter;
 		if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) {
 			VI_UNLOCK(vp);
 			goto next_iter;
 		}
 		if (vp->v_mount == NULL) {
 			VI_UNLOCK(vp);
 			goto next_iter;
 		}
 		vholdl(vp);
 		VI_UNLOCK(vp);
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		mtx_unlock(&vnode_list_mtx);
 
 		if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 			vdrop_recycle(vp);
 			goto next_iter_unlocked;
 		}
 		if (VOP_LOCK(vp, LK_EXCLUSIVE|LK_NOWAIT) != 0) {
 			vdrop_recycle(vp);
 			vn_finished_write(mp);
 			goto next_iter_unlocked;
 		}
 
 		VI_LOCK(vp);
 		if (vp->v_usecount > 0 ||
 		    (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) ||
 		    (vp->v_object != NULL && vp->v_object->handle == vp &&
 		    vp->v_object->resident_page_count > trigger)) {
 			VOP_UNLOCK(vp);
 			vdropl_recycle(vp);
 			vn_finished_write(mp);
 			goto next_iter_unlocked;
 		}
 		counter_u64_add(recycles_count, 1);
 		vgonel(vp);
 		VOP_UNLOCK(vp);
 		vdropl_recycle(vp);
 		vn_finished_write(mp);
 		done++;
 next_iter_unlocked:
 		if (should_yield())
 			kern_yield(PRI_USER);
 		mtx_lock(&vnode_list_mtx);
 		goto restart;
 next_iter:
 		MPASS(vp->v_type != VMARKER);
 		if (!should_yield())
 			continue;
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		mtx_unlock(&vnode_list_mtx);
 		kern_yield(PRI_USER);
 		mtx_lock(&vnode_list_mtx);
 		goto restart;
 	}
 	if (done == 0 && !retried) {
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist);
 		retried = true;
 		goto restart;
 	}
 	return (done);
 }
 
 static int max_vnlru_free = 10000; /* limit on vnode free requests per call */
 SYSCTL_INT(_debug, OID_AUTO, max_vnlru_free, CTLFLAG_RW, &max_vnlru_free,
     0,
     "limit on vnode free requests per call to the vnlru_free routine");
 
 /*
  * Attempt to reduce the free list by the requested amount.
  */
 static int
 vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	int ocount;
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	if (count > max_vnlru_free)
 		count = max_vnlru_free;
 	ocount = count;
 	vp = mvp;
 	for (;;) {
 		if (count == 0) {
 			break;
 		}
 		vp = TAILQ_NEXT(vp, v_vnodelist);
 		if (__predict_false(vp == NULL)) {
 			TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 			TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist);
 			break;
 		}
 		if (__predict_false(vp->v_type == VMARKER))
 			continue;
 		if (vp->v_holdcnt > 0)
 			continue;
 		/*
 		 * Don't recycle if our vnode is from different type
 		 * of mount point.  Note that mp is type-safe, the
 		 * check does not reach unmapped address even if
 		 * vnode is reclaimed.
 		 */
 		if (mnt_op != NULL && (mp = vp->v_mount) != NULL &&
 		    mp->mnt_op != mnt_op) {
 			continue;
 		}
 		if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) {
 			continue;
 		}
 		if (!vhold_recycle_free(vp))
 			continue;
 		TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 		TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist);
 		mtx_unlock(&vnode_list_mtx);
 		/*
 		 * FIXME: ignores the return value, meaning it may be nothing
 		 * got recycled but it claims otherwise to the caller.
 		 *
 		 * Originally the value started being ignored in 2005 with
 		 * 114a1006a8204aa156e1f9ad6476cdff89cada7f .
 		 *
 		 * Respecting the value can run into significant stalls if most
 		 * vnodes belong to one file system and it has writes
 		 * suspended.  In presence of many threads and millions of
 		 * vnodes they keep contending on the vnode_list_mtx lock only
 		 * to find vnodes they can't recycle.
 		 *
 		 * The solution would be to pre-check if the vnode is likely to
 		 * be recycle-able, but it needs to happen with the
 		 * vnode_list_mtx lock held. This runs into a problem where
 		 * VOP_GETWRITEMOUNT (currently needed to find out about if
 		 * writes are frozen) can take locks which LOR against it.
 		 *
 		 * Check nullfs for one example (null_getwritemount).
 		 */
 		vtryrecycle(vp);
 		count--;
 		mtx_lock(&vnode_list_mtx);
 		vp = mvp;
 	}
 	return (ocount - count);
 }
 
 static int
 vnlru_free_locked(int count)
 {
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	return (vnlru_free_impl(count, NULL, vnode_list_free_marker));
 }
 
 void
 vnlru_free_vfsops(int count, struct vfsops *mnt_op, struct vnode *mvp)
 {
 
 	MPASS(mnt_op != NULL);
 	MPASS(mvp != NULL);
 	VNPASS(mvp->v_type == VMARKER, mvp);
 	mtx_lock(&vnode_list_mtx);
 	vnlru_free_impl(count, mnt_op, mvp);
 	mtx_unlock(&vnode_list_mtx);
 }
 
 struct vnode *
 vnlru_alloc_marker(void)
 {
 	struct vnode *mvp;
 
 	mvp = vn_alloc_marker(NULL);
 	mtx_lock(&vnode_list_mtx);
 	TAILQ_INSERT_BEFORE(vnode_list_free_marker, mvp, v_vnodelist);
 	mtx_unlock(&vnode_list_mtx);
 	return (mvp);
 }
 
 void
 vnlru_free_marker(struct vnode *mvp)
 {
 	mtx_lock(&vnode_list_mtx);
 	TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
 	mtx_unlock(&vnode_list_mtx);
 	vn_free_marker(mvp);
 }
 
 static void
 vnlru_recalc(void)
 {
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	gapvnodes = imax(desiredvnodes - wantfreevnodes, 100);
 	vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */
 	vlowat = vhiwat / 2;
 }
 
 /*
  * Attempt to recycle vnodes in a context that is always safe to block.
  * Calling vlrurecycle() from the bowels of filesystem code has some
  * interesting deadlock problems.
  */
 static struct proc *vnlruproc;
 static int vnlruproc_sig;
 
 /*
  * The main freevnodes counter is only updated when threads requeue their vnode
  * batches. CPUs are conditionally walked to compute a more accurate total.
  *
  * Limit how much of a slop are we willing to tolerate. Note: the actual value
  * at any given moment can still exceed slop, but it should not be by significant
  * margin in practice.
  */
 #define VNLRU_FREEVNODES_SLOP 128
 
 static __inline void
 vfs_freevnodes_inc(void)
 {
 	struct vdbatch *vd;
 
 	critical_enter();
 	vd = DPCPU_PTR(vd);
 	vd->freevnodes++;
 	critical_exit();
 }
 
 static __inline void
 vfs_freevnodes_dec(void)
 {
 	struct vdbatch *vd;
 
 	critical_enter();
 	vd = DPCPU_PTR(vd);
 	vd->freevnodes--;
 	critical_exit();
 }
 
 static u_long
 vnlru_read_freevnodes(void)
 {
 	struct vdbatch *vd;
 	long slop;
 	int cpu;
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	if (freevnodes > freevnodes_old)
 		slop = freevnodes - freevnodes_old;
 	else
 		slop = freevnodes_old - freevnodes;
 	if (slop < VNLRU_FREEVNODES_SLOP)
 		return (freevnodes >= 0 ? freevnodes : 0);
 	freevnodes_old = freevnodes;
 	CPU_FOREACH(cpu) {
 		vd = DPCPU_ID_PTR((cpu), vd);
 		freevnodes_old += vd->freevnodes;
 	}
 	return (freevnodes_old >= 0 ? freevnodes_old : 0);
 }
 
 static bool
 vnlru_under(u_long rnumvnodes, u_long limit)
 {
 	u_long rfreevnodes, space;
 
 	if (__predict_false(rnumvnodes > desiredvnodes))
 		return (true);
 
 	space = desiredvnodes - rnumvnodes;
 	if (space < limit) {
 		rfreevnodes = vnlru_read_freevnodes();
 		if (rfreevnodes > wantfreevnodes)
 			space += rfreevnodes - wantfreevnodes;
 	}
 	return (space < limit);
 }
 
 static bool
 vnlru_under_unlocked(u_long rnumvnodes, u_long limit)
 {
 	long rfreevnodes, space;
 
 	if (__predict_false(rnumvnodes > desiredvnodes))
 		return (true);
 
 	space = desiredvnodes - rnumvnodes;
 	if (space < limit) {
 		rfreevnodes = atomic_load_long(&freevnodes);
 		if (rfreevnodes > wantfreevnodes)
 			space += rfreevnodes - wantfreevnodes;
 	}
 	return (space < limit);
 }
 
 static void
 vnlru_kick(void)
 {
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 	if (vnlruproc_sig == 0) {
 		vnlruproc_sig = 1;
 		wakeup(vnlruproc);
 	}
 }
 
 static void
 vnlru_proc(void)
 {
 	u_long rnumvnodes, rfreevnodes, target;
 	unsigned long onumvnodes;
 	int done, force, trigger, usevnodes;
 	bool reclaim_nc_src, want_reread;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc,
 	    SHUTDOWN_PRI_FIRST);
 
 	force = 0;
 	want_reread = false;
 	for (;;) {
 		kproc_suspend_check(vnlruproc);
 		mtx_lock(&vnode_list_mtx);
 		rnumvnodes = atomic_load_long(&numvnodes);
 
 		if (want_reread) {
 			force = vnlru_under(numvnodes, vhiwat) ? 1 : 0;
 			want_reread = false;
 		}
 
 		/*
 		 * If numvnodes is too large (due to desiredvnodes being
 		 * adjusted using its sysctl, or emergency growth), first
 		 * try to reduce it by discarding from the free list.
 		 */
 		if (rnumvnodes > desiredvnodes) {
 			vnlru_free_locked(rnumvnodes - desiredvnodes);
 			rnumvnodes = atomic_load_long(&numvnodes);
 		}
 		/*
 		 * Sleep if the vnode cache is in a good state.  This is
 		 * when it is not over-full and has space for about a 4%
 		 * or 9% expansion (by growing its size or inexcessively
 		 * reducing its free list).  Otherwise, try to reclaim
 		 * space for a 10% expansion.
 		 */
 		if (vstir && force == 0) {
 			force = 1;
 			vstir = 0;
 		}
 		if (force == 0 && !vnlru_under(rnumvnodes, vlowat)) {
 			vnlruproc_sig = 0;
 			wakeup(&vnlruproc_sig);
 			msleep(vnlruproc, &vnode_list_mtx,
 			    PVFS|PDROP, "vlruwt", hz);
 			continue;
 		}
 		rfreevnodes = vnlru_read_freevnodes();
 
 		onumvnodes = rnumvnodes;
 		/*
 		 * Calculate parameters for recycling.  These are the same
 		 * throughout the loop to give some semblance of fairness.
 		 * The trigger point is to avoid recycling vnodes with lots
 		 * of resident pages.  We aren't trying to free memory; we
 		 * are trying to recycle or at least free vnodes.
 		 */
 		if (rnumvnodes <= desiredvnodes)
 			usevnodes = rnumvnodes - rfreevnodes;
 		else
 			usevnodes = rnumvnodes;
 		if (usevnodes <= 0)
 			usevnodes = 1;
 		/*
 		 * The trigger value is chosen to give a conservatively
 		 * large value to ensure that it alone doesn't prevent
 		 * making progress.  The value can easily be so large that
 		 * it is effectively infinite in some congested and
 		 * misconfigured cases, and this is necessary.  Normally
 		 * it is about 8 to 100 (pages), which is quite large.
 		 */
 		trigger = vm_cnt.v_page_count * 2 / usevnodes;
 		if (force < 2)
 			trigger = vsmalltrigger;
 		reclaim_nc_src = force >= 3;
 		target = rnumvnodes * (int64_t)gapvnodes / imax(desiredvnodes, 1);
 		target = target / 10 + 1;
 		done = vlrureclaim(reclaim_nc_src, trigger, target);
 		mtx_unlock(&vnode_list_mtx);
 		if (onumvnodes > desiredvnodes && numvnodes <= desiredvnodes)
 			uma_reclaim(UMA_RECLAIM_DRAIN);
 		if (done == 0) {
 			if (force == 0 || force == 1) {
 				force = 2;
 				continue;
 			}
 			if (force == 2) {
 				force = 3;
 				continue;
 			}
 			want_reread = true;
 			force = 0;
 			vnlru_nowhere++;
 			tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3);
 		} else {
 			want_reread = true;
 			kern_yield(PRI_USER);
 		}
 	}
 }
 
 static struct kproc_desc vnlru_kp = {
 	"vnlru",
 	vnlru_proc,
 	&vnlruproc
 };
 SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start,
     &vnlru_kp);
 
 /*
  * Routines having to do with the management of the vnode table.
  */
 
 /*
  * Try to recycle a freed vnode.  We abort if anyone picks up a reference
  * before we actually vgone().  This function must be called with the vnode
  * held to prevent the vnode from being returned to the free list midway
  * through vgone().
  */
 static int
 vtryrecycle(struct vnode *vp)
 {
 	struct mount *vnmp;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	VNASSERT(vp->v_holdcnt, vp,
 	    ("vtryrecycle: Recycling vp %p without a reference.", vp));
 	/*
 	 * This vnode may found and locked via some other list, if so we
 	 * can't recycle it yet.
 	 */
 	if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, vp %p lock is already held",
 		    __func__, vp);
 		vdrop_recycle(vp);
 		return (EWOULDBLOCK);
 	}
 	/*
 	 * Don't recycle if its filesystem is being suspended.
 	 */
 	if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) {
 		VOP_UNLOCK(vp);
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, cannot start the write for %p",
 		    __func__, vp);
 		vdrop_recycle(vp);
 		return (EBUSY);
 	}
 	/*
 	 * If we got this far, we need to acquire the interlock and see if
 	 * anyone picked up this vnode from another list.  If not, we will
 	 * mark it with DOOMED via vgonel() so that anyone who does find it
 	 * will skip over it.
 	 */
 	VI_LOCK(vp);
 	if (vp->v_usecount) {
 		VOP_UNLOCK(vp);
 		vdropl_recycle(vp);
 		vn_finished_write(vnmp);
 		CTR2(KTR_VFS,
 		    "%s: impossible to recycle, %p is already referenced",
 		    __func__, vp);
 		return (EBUSY);
 	}
 	if (!VN_IS_DOOMED(vp)) {
 		counter_u64_add(recycles_free_count, 1);
 		vgonel(vp);
 	}
 	VOP_UNLOCK(vp);
 	vdropl_recycle(vp);
 	vn_finished_write(vnmp);
 	return (0);
 }
 
 /*
  * Allocate a new vnode.
  *
  * The operation never returns an error. Returning an error was disabled
  * in r145385 (dated 2005) with the following comment:
  *
  * XXX Not all VFS_VGET/ffs_vget callers check returns.
  *
  * Given the age of this commit (almost 15 years at the time of writing this
  * comment) restoring the ability to fail requires a significant audit of
  * all codepaths.
  *
  * The routine can try to free a vnode or stall for up to 1 second waiting for
  * vnlru to clear things up, but ultimately always performs a M_WAITOK allocation.
  */
 static u_long vn_alloc_cyclecount;
 
 static struct vnode * __noinline
 vn_alloc_hard(struct mount *mp)
 {
 	u_long rnumvnodes, rfreevnodes;
 
 	mtx_lock(&vnode_list_mtx);
 	rnumvnodes = atomic_load_long(&numvnodes);
 	if (rnumvnodes + 1 < desiredvnodes) {
 		vn_alloc_cyclecount = 0;
 		goto alloc;
 	}
 	rfreevnodes = vnlru_read_freevnodes();
 	if (vn_alloc_cyclecount++ >= rfreevnodes) {
 		vn_alloc_cyclecount = 0;
 		vstir = 1;
 	}
 	/*
 	 * Grow the vnode cache if it will not be above its target max
 	 * after growing.  Otherwise, if the free list is nonempty, try
 	 * to reclaim 1 item from it before growing the cache (possibly
 	 * above its target max if the reclamation failed or is delayed).
 	 * Otherwise, wait for some space.  In all cases, schedule
 	 * vnlru_proc() if we are getting short of space.  The watermarks
 	 * should be chosen so that we never wait or even reclaim from
 	 * the free list to below its target minimum.
 	 */
 	if (vnlru_free_locked(1) > 0)
 		goto alloc;
 	if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
 		/*
 		 * Wait for space for a new vnode.
 		 */
 		vnlru_kick();
 		msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz);
 		if (atomic_load_long(&numvnodes) + 1 > desiredvnodes &&
 		    vnlru_read_freevnodes() > 1)
 			vnlru_free_locked(1);
 	}
 alloc:
 	rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;
 	if (vnlru_under(rnumvnodes, vlowat))
 		vnlru_kick();
 	mtx_unlock(&vnode_list_mtx);
 	return (uma_zalloc_smr(vnode_zone, M_WAITOK));
 }
 
 static struct vnode *
 vn_alloc(struct mount *mp)
 {
 	u_long rnumvnodes;
 
 	if (__predict_false(vn_alloc_cyclecount != 0))
 		return (vn_alloc_hard(mp));
 	rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;
 	if (__predict_false(vnlru_under_unlocked(rnumvnodes, vlowat))) {
 		atomic_subtract_long(&numvnodes, 1);
 		return (vn_alloc_hard(mp));
 	}
 
 	return (uma_zalloc_smr(vnode_zone, M_WAITOK));
 }
 
 static void
 vn_free(struct vnode *vp)
 {
 
 	atomic_subtract_long(&numvnodes, 1);
 	uma_zfree_smr(vnode_zone, vp);
 }
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
     struct vnode **vpp)
 {
 	struct vnode *vp;
 	struct thread *td;
 	struct lock_object *lo;
 
 	CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag);
 
 	KASSERT(vops->registered,
 	    ("%s: not registered vector op %p\n", __func__, vops));
 
 	td = curthread;
 	if (td->td_vp_reserved != NULL) {
 		vp = td->td_vp_reserved;
 		td->td_vp_reserved = NULL;
 	} else {
 		vp = vn_alloc(mp);
 	}
 	counter_u64_add(vnodes_created, 1);
 	/*
 	 * Locks are given the generic name "vnode" when created.
 	 * Follow the historic practice of using the filesystem
 	 * name when they allocated, e.g., "zfs", "ufs", "nfs, etc.
 	 *
 	 * Locks live in a witness group keyed on their name. Thus,
 	 * when a lock is renamed, it must also move from the witness
 	 * group of its old name to the witness group of its new name.
 	 *
 	 * The change only needs to be made when the vnode moves
 	 * from one filesystem type to another. We ensure that each
 	 * filesystem use a single static name pointer for its tag so
 	 * that we can compare pointers rather than doing a strcmp().
 	 */
 	lo = &vp->v_vnlock->lock_object;
 #ifdef WITNESS
 	if (lo->lo_name != tag) {
 #endif
 		lo->lo_name = tag;
 #ifdef WITNESS
 		WITNESS_DESTROY(lo);
 		WITNESS_INIT(lo, tag);
 	}
 #endif
 	/*
 	 * By default, don't allow shared locks unless filesystems opt-in.
 	 */
 	vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE;
 	/*
 	 * Finalize various vnode identity bits.
 	 */
 	KASSERT(vp->v_object == NULL, ("stale v_object %p", vp));
 	KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp));
 	KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp));
 	vp->v_type = VNON;
 	vp->v_op = vops;
 	vp->v_irflag = 0;
 	v_init_counters(vp);
 	vn_seqc_init(vp);
 	vp->v_bufobj.bo_ops = &buf_ops_bio;
 #ifdef DIAGNOSTIC
 	if (mp == NULL && vops != &dead_vnodeops)
 		printf("NULL mp in getnewvnode(9), tag %s\n", tag);
 #endif
 #ifdef MAC
 	mac_vnode_init(vp);
 	if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
 		mac_vnode_associate_singlelabel(mp, vp);
 #endif
 	if (mp != NULL) {
 		vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize;
 	}
 
 	/*
 	 * For the filesystems which do not use vfs_hash_insert(),
 	 * still initialize v_hash to have vfs_hash_index() useful.
 	 * E.g., nullfs uses vfs_hash_index() on the lower vnode for
 	 * its own hashing.
 	 */
 	vp->v_hash = (uintptr_t)vp >> vnsz2log;
 
 	*vpp = vp;
 	return (0);
 }
 
 void
 getnewvnode_reserve(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	MPASS(td->td_vp_reserved == NULL);
 	td->td_vp_reserved = vn_alloc(NULL);
 }
 
 void
 getnewvnode_drop_reserve(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	if (td->td_vp_reserved != NULL) {
 		vn_free(td->td_vp_reserved);
 		td->td_vp_reserved = NULL;
 	}
 }
 
 static void __noinline
 freevnode(struct vnode *vp)
 {
 	struct bufobj *bo;
 
 	/*
 	 * The vnode has been marked for destruction, so free it.
 	 *
 	 * The vnode will be returned to the zone where it will
 	 * normally remain until it is needed for another vnode. We
 	 * need to cleanup (or verify that the cleanup has already
 	 * been done) any residual data left from its current use
 	 * so as not to contaminate the freshly allocated vnode.
 	 */
 	CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp);
 	/*
 	 * Paired with vgone.
 	 */
 	vn_seqc_write_end_free(vp);
 
 	bo = &vp->v_bufobj;
 	VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
 	VNPASS(vp->v_holdcnt == VHOLD_NO_SMR, vp);
 	VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
 	VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
 	VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
 	VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
 	VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp,
 	    ("clean blk trie not empty"));
 	VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
 	VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp,
 	    ("dirty blk trie not empty"));
 	VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst"));
 	VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src"));
 	VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
 	VNASSERT(TAILQ_EMPTY(&vp->v_rl.rl_waiters), vp,
 	    ("Dangling rangelock waiters"));
 	VNASSERT((vp->v_iflag & (VI_DOINGINACT | VI_OWEINACT)) == 0, vp,
 	    ("Leaked inactivation"));
 	VI_UNLOCK(vp);
 #ifdef MAC
 	mac_vnode_destroy(vp);
 #endif
 	if (vp->v_pollinfo != NULL) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		destroy_vpollinfo(vp->v_pollinfo);
 		VOP_UNLOCK(vp);
 		vp->v_pollinfo = NULL;
 	}
 	vp->v_mountedhere = NULL;
 	vp->v_unpcb = NULL;
 	vp->v_rdev = NULL;
 	vp->v_fifoinfo = NULL;
 	vp->v_iflag = 0;
 	vp->v_vflag = 0;
 	bo->bo_flag = 0;
 	vn_free(vp);
 }
 
 /*
  * Delete from old mount point vnode list, if on one.
  */
 static void
 delmntque(struct vnode *vp)
 {
 	struct mount *mp;
 
 	VNPASS((vp->v_mflag & VMP_LAZYLIST) == 0, vp);
 
 	mp = vp->v_mount;
 	MNT_ILOCK(mp);
 	VI_LOCK(vp);
 	vp->v_mount = NULL;
 	VNASSERT(mp->mnt_nvnodelistsize > 0, vp,
 		("bad mount point vnode list size"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 	mp->mnt_nvnodelistsize--;
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	/*
 	 * The caller expects the interlock to be still held.
 	 */
 	ASSERT_VI_LOCKED(vp, __func__);
 }
 
 static int
 insmntque1_int(struct vnode *vp, struct mount *mp, bool dtr)
 {
 
 	KASSERT(vp->v_mount == NULL,
 		("insmntque: vnode already on per mount vnode list"));
 	VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)"));
 	if ((mp->mnt_kern_flag & MNTK_UNLOCKED_INSMNTQUE) == 0) {
 		ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp");
 	} else {
 		KASSERT(!dtr,
 		    ("%s: can't have MNTK_UNLOCKED_INSMNTQUE and cleanup",
 		    __func__));
 	}
 
 	/*
 	 * We acquire the vnode interlock early to ensure that the
 	 * vnode cannot be recycled by another process releasing a
 	 * holdcnt on it before we get it on both the vnode list
 	 * and the active vnode list. The mount mutex protects only
 	 * manipulation of the vnode list and the vnode freelist
 	 * mutex protects only manipulation of the active vnode list.
 	 * Hence the need to hold the vnode interlock throughout.
 	 */
 	MNT_ILOCK(mp);
 	VI_LOCK(vp);
 	if (((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 &&
 	    ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 ||
 	    mp->mnt_nvnodelistsize == 0)) &&
 	    (vp->v_vflag & VV_FORCEINSMQ) == 0) {
 		VI_UNLOCK(vp);
 		MNT_IUNLOCK(mp);
 		if (dtr) {
 			vp->v_data = NULL;
 			vp->v_op = &dead_vnodeops;
 			vgone(vp);
 			vput(vp);
 		}
 		return (EBUSY);
 	}
 	vp->v_mount = mp;
 	MNT_REF(mp);
 	TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 	VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
 		("neg mount point vnode list size"));
 	mp->mnt_nvnodelistsize++;
 	VI_UNLOCK(vp);
 	MNT_IUNLOCK(mp);
 	return (0);
 }
 
 /*
  * Insert into list of vnodes for the new mount point, if available.
  * insmntque() reclaims the vnode on insertion failure, insmntque1()
  * leaves handling of the vnode to the caller.
  */
 int
 insmntque(struct vnode *vp, struct mount *mp)
 {
 	return (insmntque1_int(vp, mp, true));
 }
 
 int
 insmntque1(struct vnode *vp, struct mount *mp)
 {
 	return (insmntque1_int(vp, mp, false));
 }
 
 /*
  * Flush out and invalidate all buffers associated with a bufobj
  * Called with the underlying object locked.
  */
 int
 bufobj_invalbuf(struct bufobj *bo, int flags, int slpflag, int slptimeo)
 {
 	int error;
 
 	BO_LOCK(bo);
 	if (flags & V_SAVE) {
 		error = bufobj_wwait(bo, slpflag, slptimeo);
 		if (error) {
 			BO_UNLOCK(bo);
 			return (error);
 		}
 		if (bo->bo_dirty.bv_cnt > 0) {
 			BO_UNLOCK(bo);
 			do {
 				error = BO_SYNC(bo, MNT_WAIT);
 			} while (error == ERELOOKUP);
 			if (error != 0)
 				return (error);
 			BO_LOCK(bo);
 			if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
 				BO_UNLOCK(bo);
 				return (EBUSY);
 			}
 		}
 	}
 	/*
 	 * If you alter this loop please notice that interlock is dropped and
 	 * reacquired in flushbuflist.  Special care is needed to ensure that
 	 * no race conditions occur from this.
 	 */
 	do {
 		error = flushbuflist(&bo->bo_clean,
 		    flags, bo, slpflag, slptimeo);
 		if (error == 0 && !(flags & V_CLEANONLY))
 			error = flushbuflist(&bo->bo_dirty,
 			    flags, bo, slpflag, slptimeo);
 		if (error != 0 && error != EAGAIN) {
 			BO_UNLOCK(bo);
 			return (error);
 		}
 	} while (error != 0);
 
 	/*
 	 * Wait for I/O to complete.  XXX needs cleaning up.  The vnode can
 	 * have write I/O in-progress but if there is a VM object then the
 	 * VM object can also have read-I/O in-progress.
 	 */
 	do {
 		bufobj_wwait(bo, 0, 0);
 		if ((flags & V_VMIO) == 0 && bo->bo_object != NULL) {
 			BO_UNLOCK(bo);
 			vm_object_pip_wait_unlocked(bo->bo_object, "bovlbx");
 			BO_LOCK(bo);
 		}
 	} while (bo->bo_numoutput > 0);
 	BO_UNLOCK(bo);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	if (bo->bo_object != NULL &&
 	    (flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0) {
 		VM_OBJECT_WLOCK(bo->bo_object);
 		vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ?
 		    OBJPR_CLEANONLY : 0);
 		VM_OBJECT_WUNLOCK(bo->bo_object);
 	}
 
 #ifdef INVARIANTS
 	BO_LOCK(bo);
 	if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO |
 	    V_ALLOWCLEAN)) == 0 && (bo->bo_dirty.bv_cnt > 0 ||
 	    bo->bo_clean.bv_cnt > 0))
 		panic("vinvalbuf: flush failed");
 	if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY | V_VMIO)) == 0 &&
 	    bo->bo_dirty.bv_cnt > 0)
 		panic("vinvalbuf: flush dirty failed");
 	BO_UNLOCK(bo);
 #endif
 	return (0);
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo)
 {
 
 	CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
 	ASSERT_VOP_LOCKED(vp, "vinvalbuf");
 	if (vp->v_object != NULL && vp->v_object->handle != vp)
 		return (0);
 	return (bufobj_invalbuf(&vp->v_bufobj, flags, slpflag, slptimeo));
 }
 
 /*
  * Flush out buffers on the specified list.
  *
  */
 static int
 flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag,
     int slptimeo)
 {
 	struct buf *bp, *nbp;
 	int retval, error;
 	daddr_t lblkno;
 	b_xflags_t xflags;
 
 	ASSERT_BO_WLOCKED(bo);
 
 	retval = 0;
 	TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) {
 		/*
 		 * If we are flushing both V_NORMAL and V_ALT buffers then
 		 * do not skip any buffers. If we are flushing only V_NORMAL
 		 * buffers then skip buffers marked as BX_ALTDATA. If we are
 		 * flushing only V_ALT buffers then skip buffers not marked
 		 * as BX_ALTDATA.
 		 */
 		if (((flags & (V_NORMAL | V_ALT)) != (V_NORMAL | V_ALT)) &&
 		   (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA) != 0) ||
 		    ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0))) {
 			continue;
 		}
 		if (nbp != NULL) {
 			lblkno = nbp->b_lblkno;
 			xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN);
 		}
 		retval = EAGAIN;
 		error = BUF_TIMELOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo),
 		    "flushbuf", slpflag, slptimeo);
 		if (error) {
 			BO_LOCK(bo);
 			return (error != ENOLCK ? error : EAGAIN);
 		}
 		KASSERT(bp->b_bufobj == bo,
 		    ("bp %p wrong b_bufobj %p should be %p",
 		    bp, bp->b_bufobj, bo));
 		/*
 		 * XXX Since there are no node locks for NFS, I
 		 * believe there is a slight chance that a delayed
 		 * write will occur while sleeping just above, so
 		 * check for it.
 		 */
 		if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
 		    (flags & V_SAVE)) {
 			bremfree(bp);
 			bp->b_flags |= B_ASYNC;
 			bwrite(bp);
 			BO_LOCK(bo);
 			return (EAGAIN);	/* XXX: why not loop ? */
 		}
 		bremfree(bp);
 		bp->b_flags |= (B_INVAL | B_RELBUF);
 		bp->b_flags &= ~B_ASYNC;
 		brelse(bp);
 		BO_LOCK(bo);
 		if (nbp == NULL)
 			break;
 		nbp = gbincore(bo, lblkno);
 		if (nbp == NULL || (nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
 		    != xflags)
 			break;			/* nbp invalid */
 	}
 	return (retval);
 }
 
 int
 bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn)
 {
 	struct buf *bp;
 	int error;
 	daddr_t lblkno;
 
 	ASSERT_BO_LOCKED(bo);
 
 	for (lblkno = startn;;) {
 again:
 		bp = BUF_PCTRIE_LOOKUP_GE(&bufv->bv_root, lblkno);
 		if (bp == NULL || bp->b_lblkno >= endn ||
 		    bp->b_lblkno < startn)
 			break;
 		error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 		    LK_INTERLOCK, BO_LOCKPTR(bo), "brlsfl", 0, 0);
 		if (error != 0) {
 			BO_RLOCK(bo);
 			if (error == ENOLCK)
 				goto again;
 			return (error);
 		}
 		KASSERT(bp->b_bufobj == bo,
 		    ("bp %p wrong b_bufobj %p should be %p",
 		    bp, bp->b_bufobj, bo));
 		lblkno = bp->b_lblkno + 1;
 		if ((bp->b_flags & B_MANAGED) == 0)
 			bremfree(bp);
 		bp->b_flags |= B_RELBUF;
 		/*
 		 * In the VMIO case, use the B_NOREUSE flag to hint that the
 		 * pages backing each buffer in the range are unlikely to be
 		 * reused.  Dirty buffers will have the hint applied once
 		 * they've been written.
 		 */
 		if ((bp->b_flags & B_VMIO) != 0)
 			bp->b_flags |= B_NOREUSE;
 		brelse(bp);
 		BO_RLOCK(bo);
 	}
 	return (0);
 }
 
 /*
  * Truncate a file's buffer and pages to a specified length.  This
  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  * sync activity.
  */
 int
 vtruncbuf(struct vnode *vp, off_t length, int blksize)
 {
 	struct buf *bp, *nbp;
 	struct bufobj *bo;
 	daddr_t startlbn;
 
 	CTR4(KTR_VFS, "%s: vp %p with block %d:%ju", __func__,
 	    vp, blksize, (uintmax_t)length);
 
 	/*
 	 * Round up to the *next* lbn.
 	 */
 	startlbn = howmany(length, blksize);
 
 	ASSERT_VOP_LOCKED(vp, "vtruncbuf");
 
 	bo = &vp->v_bufobj;
 restart_unlocked:
 	BO_LOCK(bo);
 
 	while (v_inval_buf_range_locked(vp, bo, startlbn, INT64_MAX) == EAGAIN)
 		;
 
 	if (length > 0) {
 restartsync:
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno > 0)
 				continue;
 			/*
 			 * Since we hold the vnode lock this should only
 			 * fail if we're racing with the buf daemon.
 			 */
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK)
 				goto restart_unlocked;
 
 			VNASSERT((bp->b_flags & B_DELWRI), vp,
 			    ("buf(%p) on dirty queue without DELWRI", bp));
 
 			bremfree(bp);
 			bawrite(bp);
 			BO_LOCK(bo);
 			goto restartsync;
 		}
 	}
 
 	bufobj_wwait(bo, 0, 0);
 	BO_UNLOCK(bo);
 	vnode_pager_setsize(vp, length);
 
 	return (0);
 }
 
 /*
  * Invalidate the cached pages of a file's buffer within the range of block
  * numbers [startlbn, endlbn).
  */
 void
 v_inval_buf_range(struct vnode *vp, daddr_t startlbn, daddr_t endlbn,
     int blksize)
 {
 	struct bufobj *bo;
 	off_t start, end;
 
 	ASSERT_VOP_LOCKED(vp, "v_inval_buf_range");
 
 	start = blksize * startlbn;
 	end = blksize * endlbn;
 
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	MPASS(blksize == bo->bo_bsize);
 
 	while (v_inval_buf_range_locked(vp, bo, startlbn, endlbn) == EAGAIN)
 		;
 
 	BO_UNLOCK(bo);
 	vn_pages_remove(vp, OFF_TO_IDX(start), OFF_TO_IDX(end + PAGE_SIZE - 1));
 }
 
 static int
 v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo,
     daddr_t startlbn, daddr_t endlbn)
 {
 	struct buf *bp, *nbp;
 	bool anyfreed;
 
 	ASSERT_VOP_LOCKED(vp, "v_inval_buf_range_locked");
 	ASSERT_BO_LOCKED(bo);
 
 	do {
 		anyfreed = false;
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK) {
 				BO_LOCK(bo);
 				return (EAGAIN);
 			}
 
 			bremfree(bp);
 			bp->b_flags |= B_INVAL | B_RELBUF;
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 			anyfreed = true;
 
 			BO_LOCK(bo);
 			if (nbp != NULL &&
 			    (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
 			    nbp->b_vp != vp ||
 			    (nbp->b_flags & B_DELWRI) != 0))
 				return (EAGAIN);
 		}
 
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
 				continue;
 			if (BUF_LOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo)) == ENOLCK) {
 				BO_LOCK(bo);
 				return (EAGAIN);
 			}
 			bremfree(bp);
 			bp->b_flags |= B_INVAL | B_RELBUF;
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 			anyfreed = true;
 
 			BO_LOCK(bo);
 			if (nbp != NULL &&
 			    (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
 			    (nbp->b_vp != vp) ||
 			    (nbp->b_flags & B_DELWRI) == 0))
 				return (EAGAIN);
 		}
 	} while (anyfreed);
 	return (0);
 }
 
 static void
 buf_vlist_remove(struct buf *bp)
 {
 	struct bufv *bv;
 	b_xflags_t flags;
 
 	flags = bp->b_xflags;
 
 	KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
 	ASSERT_BO_WLOCKED(bp->b_bufobj);
 	KASSERT((flags & (BX_VNDIRTY | BX_VNCLEAN)) != 0 &&
 	    (flags & (BX_VNDIRTY | BX_VNCLEAN)) != (BX_VNDIRTY | BX_VNCLEAN),
 	    ("%s: buffer %p has invalid queue state", __func__, bp));
 
 	if ((flags & BX_VNDIRTY) != 0)
 		bv = &bp->b_bufobj->bo_dirty;
 	else
 		bv = &bp->b_bufobj->bo_clean;
 	BUF_PCTRIE_REMOVE(&bv->bv_root, bp->b_lblkno);
 	TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs);
 	bv->bv_cnt--;
 	bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 }
 
 /*
  * Add the buffer to the sorted clean or dirty block list.
  *
  * NOTE: xflags is passed as a constant, optimizing this inline function!
  */
 static void
 buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
 {
 	struct bufv *bv;
 	struct buf *n;
 	int error;
 
 	ASSERT_BO_WLOCKED(bo);
 	KASSERT((bo->bo_flag & BO_NOBUFS) == 0,
 	    ("buf_vlist_add: bo %p does not allow bufs", bo));
 	KASSERT((xflags & BX_VNDIRTY) == 0 || (bo->bo_flag & BO_DEAD) == 0,
 	    ("dead bo %p", bo));
 	KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0,
 	    ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags));
 	bp->b_xflags |= xflags;
 	if (xflags & BX_VNDIRTY)
 		bv = &bo->bo_dirty;
 	else
 		bv = &bo->bo_clean;
 
 	/*
 	 * Keep the list ordered.  Optimize empty list insertion.  Assume
 	 * we tend to grow at the tail so lookup_le should usually be cheaper
 	 * than _ge. 
 	 */
 	if (bv->bv_cnt == 0 ||
 	    bp->b_lblkno > TAILQ_LAST(&bv->bv_hd, buflists)->b_lblkno)
 		TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
 	else if ((n = BUF_PCTRIE_LOOKUP_LE(&bv->bv_root, bp->b_lblkno)) == NULL)
 		TAILQ_INSERT_HEAD(&bv->bv_hd, bp, b_bobufs);
 	else
 		TAILQ_INSERT_AFTER(&bv->bv_hd, n, bp, b_bobufs);
 	error = BUF_PCTRIE_INSERT(&bv->bv_root, bp);
 	if (error)
 		panic("buf_vlist_add:  Preallocated nodes insufficient.");
 	bv->bv_cnt++;
 }
 
 /*
  * Look up a buffer using the buffer tries.
  */
 struct buf *
 gbincore(struct bufobj *bo, daddr_t lblkno)
 {
 	struct buf *bp;
 
 	ASSERT_BO_LOCKED(bo);
 	bp = BUF_PCTRIE_LOOKUP(&bo->bo_clean.bv_root, lblkno);
 	if (bp != NULL)
 		return (bp);
 	return (BUF_PCTRIE_LOOKUP(&bo->bo_dirty.bv_root, lblkno));
 }
 
 /*
  * Look up a buf using the buffer tries, without the bufobj lock.  This relies
  * on SMR for safe lookup, and bufs being in a no-free zone to provide type
  * stability of the result.  Like other lockless lookups, the found buf may
  * already be invalid by the time this function returns.
  */
 struct buf *
 gbincore_unlocked(struct bufobj *bo, daddr_t lblkno)
 {
 	struct buf *bp;
 
 	ASSERT_BO_UNLOCKED(bo);
 	bp = BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_clean.bv_root, lblkno);
 	if (bp != NULL)
 		return (bp);
 	return (BUF_PCTRIE_LOOKUP_UNLOCKED(&bo->bo_dirty.bv_root, lblkno));
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(struct vnode *vp, struct buf *bp)
 {
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	ASSERT_BO_WLOCKED(bo);
 	VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free"));
 
 	CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags);
 	VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp,
 	    ("bgetvp: bp already attached! %p", bp));
 
 	vhold(vp);
 	bp->b_vp = vp;
 	bp->b_bufobj = bo;
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	buf_vlist_add(bp, bo, BX_VNCLEAN);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(struct buf *bp)
 {
 	struct bufobj *bo;
 	struct vnode *vp;
 
 	CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	vp = bp->b_vp;		/* XXX */
 	bo = bp->b_bufobj;
 	BO_LOCK(bo);
 	buf_vlist_remove(bp);
 	if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 		bo->bo_flag &= ~BO_ONWORKLST;
 		mtx_lock(&sync_mtx);
 		LIST_REMOVE(bo, bo_synclist);
 		syncer_worklist_len--;
 		mtx_unlock(&sync_mtx);
 	}
 	bp->b_vp = NULL;
 	bp->b_bufobj = NULL;
 	BO_UNLOCK(bo);
 	vdrop(vp);
 }
 
 /*
  * Add an item to the syncer work queue.
  */
 static void
 vn_syncer_add_to_worklist(struct bufobj *bo, int delay)
 {
 	int slot;
 
 	ASSERT_BO_WLOCKED(bo);
 
 	mtx_lock(&sync_mtx);
 	if (bo->bo_flag & BO_ONWORKLST)
 		LIST_REMOVE(bo, bo_synclist);
 	else {
 		bo->bo_flag |= BO_ONWORKLST;
 		syncer_worklist_len++;
 	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
 	slot = (syncer_delayno + delay) & syncer_mask;
 
 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist);
 	mtx_unlock(&sync_mtx);
 }
 
 static int
 sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS)
 {
 	int error, len;
 
 	mtx_lock(&sync_mtx);
 	len = syncer_worklist_len - sync_vnode_count;
 	mtx_unlock(&sync_mtx);
 	error = SYSCTL_OUT(req, &len, sizeof(len));
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, worklist_len,
     CTLTYPE_INT | CTLFLAG_MPSAFE| CTLFLAG_RD, NULL, 0,
     sysctl_vfs_worklist_len, "I", "Syncer thread worklist length");
 
 static struct proc *updateproc;
 static void sched_sync(void);
 static struct kproc_desc up_kp = {
 	"syncer",
 	sched_sync,
 	&updateproc
 };
 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp);
 
 static int
 sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
 {
 	struct vnode *vp;
 	struct mount *mp;
 
 	*bo = LIST_FIRST(slp);
 	if (*bo == NULL)
 		return (0);
 	vp = bo2vnode(*bo);
 	if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0)
 		return (1);
 	/*
 	 * We use vhold in case the vnode does not
 	 * successfully sync.  vhold prevents the vnode from
 	 * going away when we unlock the sync_mtx so that
 	 * we can acquire the vnode interlock.
 	 */
 	vholdl(vp);
 	mtx_unlock(&sync_mtx);
 	VI_UNLOCK(vp);
 	if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
 		vdrop(vp);
 		mtx_lock(&sync_mtx);
 		return (*bo == LIST_FIRST(slp));
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	(void) VOP_FSYNC(vp, MNT_LAZY, td);
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 	BO_LOCK(*bo);
 	if (((*bo)->bo_flag & BO_ONWORKLST) != 0) {
 		/*
 		 * Put us back on the worklist.  The worklist
 		 * routine will remove us from our current
 		 * position and then add us back in at a later
 		 * position.
 		 */
 		vn_syncer_add_to_worklist(*bo, syncdelay);
 	}
 	BO_UNLOCK(*bo);
 	vdrop(vp);
 	mtx_lock(&sync_mtx);
 	return (0);
 }
 
 static int first_printf = 1;
 
 /*
  * System filesystem synchronizer daemon.
  */
 static void
 sched_sync(void)
 {
 	struct synclist *next, *slp;
 	struct bufobj *bo;
 	long starttime;
 	struct thread *td = curthread;
 	int last_work_seen;
 	int net_worklist_len;
 	int syncer_final_iter;
 	int error;
 
 	last_work_seen = 0;
 	syncer_final_iter = 0;
 	syncer_state = SYNCER_RUNNING;
 	starttime = time_uptime;
 	td->td_pflags |= TDP_NORUNNINGBUF;
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
 	    SHUTDOWN_PRI_LAST);
 
 	mtx_lock(&sync_mtx);
 	for (;;) {
 		if (syncer_state == SYNCER_FINAL_DELAY &&
 		    syncer_final_iter == 0) {
 			mtx_unlock(&sync_mtx);
 			kproc_suspend_check(td->td_proc);
 			mtx_lock(&sync_mtx);
 		}
 		net_worklist_len = syncer_worklist_len - sync_vnode_count;
 		if (syncer_state != SYNCER_RUNNING &&
 		    starttime != time_uptime) {
 			if (first_printf) {
 				printf("\nSyncing disks, vnodes remaining... ");
 				first_printf = 0;
 			}
 			printf("%d ", net_worklist_len);
 		}
 		starttime = time_uptime;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 *
 		 * Skip over empty worklist slots when shutting down.
 		 */
 		do {
 			slp = &syncer_workitem_pending[syncer_delayno];
 			syncer_delayno += 1;
 			if (syncer_delayno == syncer_maxdelay)
 				syncer_delayno = 0;
 			next = &syncer_workitem_pending[syncer_delayno];
 			/*
 			 * If the worklist has wrapped since the
 			 * it was emptied of all but syncer vnodes,
 			 * switch to the FINAL_DELAY state and run
 			 * for one more second.
 			 */
 			if (syncer_state == SYNCER_SHUTTING_DOWN &&
 			    net_worklist_len == 0 &&
 			    last_work_seen == syncer_delayno) {
 				syncer_state = SYNCER_FINAL_DELAY;
 				syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP;
 			}
 		} while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) &&
 		    syncer_worklist_len > 0);
 
 		/*
 		 * Keep track of the last time there was anything
 		 * on the worklist other than syncer vnodes.
 		 * Return to the SHUTTING_DOWN state if any
 		 * new work appears.
 		 */
 		if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING)
 			last_work_seen = syncer_delayno;
 		if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY)
 			syncer_state = SYNCER_SHUTTING_DOWN;
 		while (!LIST_EMPTY(slp)) {
 			error = sync_vnode(slp, &bo, td);
 			if (error == 1) {
 				LIST_REMOVE(bo, bo_synclist);
 				LIST_INSERT_HEAD(next, bo, bo_synclist);
 				continue;
 			}
 
 			if (first_printf == 0) {
 				/*
 				 * Drop the sync mutex, because some watchdog
 				 * drivers need to sleep while patting
 				 */
 				mtx_unlock(&sync_mtx);
 				wdog_kern_pat(WD_LASTVAL);
 				mtx_lock(&sync_mtx);
 			}
 		}
 		if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0)
 			syncer_final_iter--;
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * Just sleep for a short period of time between
 		 * iterations when shutting down to allow some I/O
 		 * to happen.
 		 *
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
 		if (syncer_state != SYNCER_RUNNING ||
 		    time_uptime == starttime) {
 			thread_lock(td);
 			sched_prio(td, PPAUSE);
 			thread_unlock(td);
 		}
 		if (syncer_state != SYNCER_RUNNING)
 			cv_timedwait(&sync_wakeup, &sync_mtx,
 			    hz / SYNCER_SHUTDOWN_SPEEDUP);
 		else if (time_uptime == starttime)
 			cv_timedwait(&sync_wakeup, &sync_mtx, hz);
 	}
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer(void)
 {
 	int ret = 0;
 
 	mtx_lock(&sync_mtx);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
 		ret = 1;
 	}
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	return (ret);
 }
 
 /*
  * Tell the syncer to speed up its work and run though its work
  * list several times, then tell it to shut down.
  */
 static void
 syncer_shutdown(void *arg, int howto)
 {
 
 	if (howto & RB_NOSYNC)
 		return;
 	mtx_lock(&sync_mtx);
 	syncer_state = SYNCER_SHUTTING_DOWN;
 	rushjob = 0;
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	kproc_shutdown(arg, howto);
 }
 
 void
 syncer_suspend(void)
 {
 
 	syncer_shutdown(updateproc, 0);
 }
 
 void
 syncer_resume(void)
 {
 
 	mtx_lock(&sync_mtx);
 	first_printf = 1;
 	syncer_state = SYNCER_RUNNING;
 	mtx_unlock(&sync_mtx);
 	cv_broadcast(&sync_wakeup);
 	kproc_resume(updateproc);
 }
 
 /*
  * Move the buffer between the clean and dirty lists of its vnode.
  */
 void
 reassignbuf(struct buf *bp)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	int delay;
 #ifdef INVARIANTS
 	struct bufv *bv;
 #endif
 
 	vp = bp->b_vp;
 	bo = bp->b_bufobj;
 
 	KASSERT((bp->b_flags & B_PAGING) == 0,
 	    ("%s: cannot reassign paging buffer %p", __func__, bp));
 
 	CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X",
 	    bp, bp->b_vp, bp->b_flags);
 
 	BO_LOCK(bo);
 	buf_vlist_remove(bp);
 
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		if ((bo->bo_flag & BO_ONWORKLST) == 0) {
 			switch (vp->v_type) {
 			case VDIR:
 				delay = dirdelay;
 				break;
 			case VCHR:
 				delay = metadelay;
 				break;
 			default:
 				delay = filedelay;
 			}
 			vn_syncer_add_to_worklist(bo, delay);
 		}
 		buf_vlist_add(bp, bo, BX_VNDIRTY);
 	} else {
 		buf_vlist_add(bp, bo, BX_VNCLEAN);
 
 		if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) {
 			mtx_lock(&sync_mtx);
 			LIST_REMOVE(bo, bo_synclist);
 			syncer_worklist_len--;
 			mtx_unlock(&sync_mtx);
 			bo->bo_flag &= ~BO_ONWORKLST;
 		}
 	}
 #ifdef INVARIANTS
 	bv = &bo->bo_clean;
 	bp = TAILQ_FIRST(&bv->bv_hd);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bp = TAILQ_LAST(&bv->bv_hd, buflists);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bv = &bo->bo_dirty;
 	bp = TAILQ_FIRST(&bv->bv_hd);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 	bp = TAILQ_LAST(&bv->bv_hd, buflists);
 	KASSERT(bp == NULL || bp->b_bufobj == bo,
 	    ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
 #endif
 	BO_UNLOCK(bo);
 }
 
 static void
 v_init_counters(struct vnode *vp)
 {
 
 	VNASSERT(vp->v_type == VNON && vp->v_data == NULL && vp->v_iflag == 0,
 	    vp, ("%s called for an initialized vnode", __FUNCTION__));
 	ASSERT_VI_UNLOCKED(vp, __FUNCTION__);
 
 	refcount_init(&vp->v_holdcnt, 1);
 	refcount_init(&vp->v_usecount, 1);
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it.  VIRF_DOOMED is set if the vnode
  * is being destroyed.  Only callers who specify LK_RETRY will
  * see doomed vnodes.  If inactive processing was delayed in
  * vput try to do it here.
  *
  * usecount is manipulated using atomics without holding any locks.
  *
  * holdcnt can be manipulated using atomics without holding any locks,
  * except when transitioning 1<->0, in which case the interlock is held.
  *
  * Consumers which don't guarantee liveness of the vnode can use SMR to
  * try to get a reference. Note this operation can fail since the vnode
  * may be awaiting getting freed by the time they get to it.
  */
 enum vgetstate
 vget_prep_smr(struct vnode *vp)
 {
 	enum vgetstate vs;
 
 	VFS_SMR_ASSERT_ENTERED();
 
 	if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
 		vs = VGET_USECOUNT;
 	} else {
 		if (vhold_smr(vp))
 			vs = VGET_HOLDCNT;
 		else
 			vs = VGET_NONE;
 	}
 	return (vs);
 }
 
 enum vgetstate
 vget_prep(struct vnode *vp)
 {
 	enum vgetstate vs;
 
 	if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
 		vs = VGET_USECOUNT;
 	} else {
 		vhold(vp);
 		vs = VGET_HOLDCNT;
 	}
 	return (vs);
 }
 
 void
 vget_abort(struct vnode *vp, enum vgetstate vs)
 {
 
 	switch (vs) {
 	case VGET_USECOUNT:
 		vrele(vp);
 		break;
 	case VGET_HOLDCNT:
 		vdrop(vp);
 		break;
 	default:
 		__assert_unreachable();
 	}
 }
 
 int
 vget(struct vnode *vp, int flags)
 {
 	enum vgetstate vs;
 
 	vs = vget_prep(vp);
 	return (vget_finish(vp, flags, vs));
 }
 
 int
 vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
 {
 	int error;
 
 	if ((flags & LK_INTERLOCK) != 0)
 		ASSERT_VI_LOCKED(vp, __func__);
 	else
 		ASSERT_VI_UNLOCKED(vp, __func__);
 	VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp);
 	VNPASS(vp->v_holdcnt > 0, vp);
 	VNPASS(vs == VGET_HOLDCNT || vp->v_usecount > 0, vp);
 
 	error = vn_lock(vp, flags);
 	if (__predict_false(error != 0)) {
 		vget_abort(vp, vs);
 		CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
 		    vp);
 		return (error);
 	}
 
 	vget_finish_ref(vp, vs);
 	return (0);
 }
 
 void
 vget_finish_ref(struct vnode *vp, enum vgetstate vs)
 {
 	int old;
 
 	VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp);
 	VNPASS(vp->v_holdcnt > 0, vp);
 	VNPASS(vs == VGET_HOLDCNT || vp->v_usecount > 0, vp);
 
 	if (vs == VGET_USECOUNT)
 		return;
 
 	/*
 	 * We hold the vnode. If the usecount is 0 it will be utilized to keep
 	 * the vnode around. Otherwise someone else lended their hold count and
 	 * we have to drop ours.
 	 */
 	old = atomic_fetchadd_int(&vp->v_usecount, 1);
 	VNASSERT(old >= 0, vp, ("%s: wrong use count %d", __func__, old));
 	if (old != 0) {
 #ifdef INVARIANTS
 		old = atomic_fetchadd_int(&vp->v_holdcnt, -1);
 		VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old));
 #else
 		refcount_release(&vp->v_holdcnt);
 #endif
 	}
 }
 
 void
 vref(struct vnode *vp)
 {
 	enum vgetstate vs;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vs = vget_prep(vp);
 	vget_finish_ref(vp, vs);
 }
 
 void
 vrefact(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 #ifdef INVARIANTS
 	int old = atomic_fetchadd_int(&vp->v_usecount, 1);
 	VNASSERT(old > 0, vp, ("%s: wrong use count %d", __func__, old));
 #else
 	refcount_acquire(&vp->v_usecount);
 #endif
 }
 
 void
 vlazy(struct vnode *vp)
 {
 	struct mount *mp;
 
 	VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__));
 
 	if ((vp->v_mflag & VMP_LAZYLIST) != 0)
 		return;
 	/*
 	 * We may get here for inactive routines after the vnode got doomed.
 	 */
 	if (VN_IS_DOOMED(vp))
 		return;
 	mp = vp->v_mount;
 	mtx_lock(&mp->mnt_listmtx);
 	if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
 		vp->v_mflag |= VMP_LAZYLIST;
 		TAILQ_INSERT_TAIL(&mp->mnt_lazyvnodelist, vp, v_lazylist);
 		mp->mnt_lazyvnodelistsize++;
 	}
 	mtx_unlock(&mp->mnt_listmtx);
 }
 
 static void
 vunlazy(struct vnode *vp)
 {
 	struct mount *mp;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNPASS(!VN_IS_DOOMED(vp), vp);
 
 	mp = vp->v_mount;
 	mtx_lock(&mp->mnt_listmtx);
 	VNPASS(vp->v_mflag & VMP_LAZYLIST, vp);
 	/*
 	 * Don't remove the vnode from the lazy list if another thread
 	 * has increased the hold count. It may have re-enqueued the
 	 * vnode to the lazy list and is now responsible for its
 	 * removal.
 	 */
 	if (vp->v_holdcnt == 0) {
 		vp->v_mflag &= ~VMP_LAZYLIST;
 		TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist);
 		mp->mnt_lazyvnodelistsize--;
 	}
 	mtx_unlock(&mp->mnt_listmtx);
 }
 
 /*
  * This routine is only meant to be called from vgonel prior to dooming
  * the vnode.
  */
 static void
 vunlazy_gone(struct vnode *vp)
 {
 	struct mount *mp;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNPASS(!VN_IS_DOOMED(vp), vp);
 
 	if (vp->v_mflag & VMP_LAZYLIST) {
 		mp = vp->v_mount;
 		mtx_lock(&mp->mnt_listmtx);
 		VNPASS(vp->v_mflag & VMP_LAZYLIST, vp);
 		vp->v_mflag &= ~VMP_LAZYLIST;
 		TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist);
 		mp->mnt_lazyvnodelistsize--;
 		mtx_unlock(&mp->mnt_listmtx);
 	}
 }
 
 static void
 vdefer_inactive(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNASSERT(vp->v_holdcnt > 0, vp,
 	    ("%s: vnode without hold count", __func__));
 	if (VN_IS_DOOMED(vp)) {
 		vdropl(vp);
 		return;
 	}
 	if (vp->v_iflag & VI_DEFINACT) {
 		VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count"));
 		vdropl(vp);
 		return;
 	}
 	if (vp->v_usecount > 0) {
 		vp->v_iflag &= ~VI_OWEINACT;
 		vdropl(vp);
 		return;
 	}
 	vlazy(vp);
 	vp->v_iflag |= VI_DEFINACT;
 	VI_UNLOCK(vp);
 	counter_u64_add(deferred_inact, 1);
 }
 
 static void
 vdefer_inactive_unlocked(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	if ((vp->v_iflag & VI_OWEINACT) == 0) {
 		vdropl(vp);
 		return;
 	}
 	vdefer_inactive(vp);
 }
 
 enum vput_op { VRELE, VPUT, VUNREF };
 
 /*
  * Handle ->v_usecount transitioning to 0.
  *
  * By releasing the last usecount we take ownership of the hold count which
  * provides liveness of the vnode, meaning we have to vdrop.
  *
  * For all vnodes we may need to perform inactive processing. It requires an
  * exclusive lock on the vnode, while it is legal to call here with only a
  * shared lock (or no locks). If locking the vnode in an expected manner fails,
  * inactive processing gets deferred to the syncer.
  *
  * XXX Some filesystems pass in an exclusively locked vnode and strongly depend
  * on the lock being held all the way until VOP_INACTIVE. This in particular
  * happens with UFS which adds half-constructed vnodes to the hash, where they
  * can be found by other code.
  */
 static void
 vput_final(struct vnode *vp, enum vput_op func)
 {
 	int error;
 	bool want_unlock;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	VNPASS(vp->v_holdcnt > 0, vp);
 
 	VI_LOCK(vp);
 
 	/*
 	 * By the time we got here someone else might have transitioned
 	 * the count back to > 0.
 	 */
 	if (vp->v_usecount > 0)
 		goto out;
 
 	/*
 	 * If the vnode is doomed vgone already performed inactive processing
 	 * (if needed).
 	 */
 	if (VN_IS_DOOMED(vp))
 		goto out;
 
 	if (__predict_true(VOP_NEED_INACTIVE(vp) == 0))
 		goto out;
 
 	if (vp->v_iflag & VI_DOINGINACT)
 		goto out;
 
 	/*
 	 * Locking operations here will drop the interlock and possibly the
 	 * vnode lock, opening a window where the vnode can get doomed all the
 	 * while ->v_usecount is 0. Set VI_OWEINACT to let vgone know to
 	 * perform inactive.
 	 */
 	vp->v_iflag |= VI_OWEINACT;
 	want_unlock = false;
 	error = 0;
 	switch (func) {
 	case VRELE:
 		switch (VOP_ISLOCKED(vp)) {
 		case LK_EXCLUSIVE:
 			break;
 		case LK_EXCLOTHER:
 		case 0:
 			want_unlock = true;
 			error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK);
 			VI_LOCK(vp);
 			break;
 		default:
 			/*
 			 * The lock has at least one sharer, but we have no way
 			 * to conclude whether this is us. Play it safe and
 			 * defer processing.
 			 */
 			error = EAGAIN;
 			break;
 		}
 		break;
 	case VPUT:
 		want_unlock = true;
 		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK |
 			    LK_NOWAIT);
 			VI_LOCK(vp);
 		}
 		break;
 	case VUNREF:
 		if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 			error = VOP_LOCK(vp, LK_TRYUPGRADE | LK_INTERLOCK);
 			VI_LOCK(vp);
 		}
 		break;
 	}
 	if (error == 0) {
 		if (func == VUNREF) {
 			VNASSERT((vp->v_vflag & VV_UNREF) == 0, vp,
 			    ("recursive vunref"));
 			vp->v_vflag |= VV_UNREF;
 		}
 		for (;;) {
 			error = vinactive(vp);
 			if (want_unlock)
 				VOP_UNLOCK(vp);
 			if (error != ERELOOKUP || !want_unlock)
 				break;
 			VOP_LOCK(vp, LK_EXCLUSIVE);
 		}
 		if (func == VUNREF)
 			vp->v_vflag &= ~VV_UNREF;
 		vdropl(vp);
 	} else {
 		vdefer_inactive(vp);
 	}
 	return;
 out:
 	if (func == VPUT)
 		VOP_UNLOCK(vp);
 	vdropl(vp);
 }
 
 /*
  * Decrement ->v_usecount for a vnode.
  *
  * Releasing the last use count requires additional processing, see vput_final
  * above for details.
  *
  * Comment above each variant denotes lock state on entry and exit.
  */
 
 /*
  * in: any
  * out: same as passed in
  */
 void
 vrele(struct vnode *vp)
 {
 
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	if (!refcount_release(&vp->v_usecount))
 		return;
 	vput_final(vp, VRELE);
 }
 
 /*
  * in: locked
  * out: unlocked
  */
 void
 vput(struct vnode *vp)
 {
 
 	ASSERT_VOP_LOCKED(vp, __func__);
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	if (!refcount_release(&vp->v_usecount)) {
 		VOP_UNLOCK(vp);
 		return;
 	}
 	vput_final(vp, VPUT);
 }
 
 /*
  * in: locked
  * out: locked
  */
 void
 vunref(struct vnode *vp)
 {
 
 	ASSERT_VOP_LOCKED(vp, __func__);
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	if (!refcount_release(&vp->v_usecount))
 		return;
 	vput_final(vp, VUNREF);
 }
 
 void
 vhold(struct vnode *vp)
 {
 	int old;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	old = atomic_fetchadd_int(&vp->v_holdcnt, 1);
 	VNASSERT(old >= 0 && (old & VHOLD_ALL_FLAGS) == 0, vp,
 	    ("%s: wrong hold count %d", __func__, old));
 	if (old == 0)
 		vfs_freevnodes_dec();
 }
 
 void
 vholdnz(struct vnode *vp)
 {
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 #ifdef INVARIANTS
 	int old = atomic_fetchadd_int(&vp->v_holdcnt, 1);
 	VNASSERT(old > 0 && (old & VHOLD_ALL_FLAGS) == 0, vp,
 	    ("%s: wrong hold count %d", __func__, old));
 #else
 	atomic_add_int(&vp->v_holdcnt, 1);
 #endif
 }
 
 /*
  * Grab a hold count unless the vnode is freed.
  *
  * Only use this routine if vfs smr is the only protection you have against
  * freeing the vnode.
  *
  * The code loops trying to add a hold count as long as the VHOLD_NO_SMR flag
  * is not set.  After the flag is set the vnode becomes immutable to anyone but
  * the thread which managed to set the flag.
  *
  * It may be tempting to replace the loop with:
  * count = atomic_fetchadd_int(&vp->v_holdcnt, 1);
  * if (count & VHOLD_NO_SMR) {
  *     backpedal and error out;
  * }
  *
  * However, while this is more performant, it hinders debugging by eliminating
  * the previously mentioned invariant.
  */
 bool
 vhold_smr(struct vnode *vp)
 {
 	int count;
 
 	VFS_SMR_ASSERT_ENTERED();
 
 	count = atomic_load_int(&vp->v_holdcnt);
 	for (;;) {
 		if (count & VHOLD_NO_SMR) {
 			VNASSERT((count & ~VHOLD_NO_SMR) == 0, vp,
 			    ("non-zero hold count with flags %d\n", count));
 			return (false);
 		}
 		VNASSERT(count >= 0, vp, ("invalid hold count %d\n", count));
 		if (atomic_fcmpset_int(&vp->v_holdcnt, &count, count + 1)) {
 			if (count == 0)
 				vfs_freevnodes_dec();
 			return (true);
 		}
 	}
 }
 
 /*
  * Hold a free vnode for recycling.
  *
  * Note: vnode_init references this comment.
  *
  * Attempts to recycle only need the global vnode list lock and have no use for
  * SMR.
  *
  * However, vnodes get inserted into the global list before they get fully
  * initialized and stay there until UMA decides to free the memory. This in
  * particular means the target can be found before it becomes usable and after
  * it becomes recycled. Picking up such vnodes is guarded with v_holdcnt set to
  * VHOLD_NO_SMR.
  *
  * Note: the vnode may gain more references after we transition the count 0->1.
  */
 static bool
 vhold_recycle_free(struct vnode *vp)
 {
 	int count;
 
 	mtx_assert(&vnode_list_mtx, MA_OWNED);
 
 	count = atomic_load_int(&vp->v_holdcnt);
 	for (;;) {
 		if (count & VHOLD_NO_SMR) {
 			VNASSERT((count & ~VHOLD_NO_SMR) == 0, vp,
 			    ("non-zero hold count with flags %d\n", count));
 			return (false);
 		}
 		VNASSERT(count >= 0, vp, ("invalid hold count %d\n", count));
 		if (count > 0) {
 			return (false);
 		}
 		if (atomic_fcmpset_int(&vp->v_holdcnt, &count, count + 1)) {
 			vfs_freevnodes_dec();
 			return (true);
 		}
 	}
 }
 
 static void __noinline
 vdbatch_process(struct vdbatch *vd)
 {
 	struct vnode *vp;
 	int i;
 
 	mtx_assert(&vd->lock, MA_OWNED);
 	MPASS(curthread->td_pinned > 0);
 	MPASS(vd->index == VDBATCH_SIZE);
 
 	mtx_lock(&vnode_list_mtx);
 	critical_enter();
 	freevnodes += vd->freevnodes;
 	for (i = 0; i < VDBATCH_SIZE; i++) {
 		vp = vd->tab[i];
 		TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
 		TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
 		MPASS(vp->v_dbatchcpu != NOCPU);
 		vp->v_dbatchcpu = NOCPU;
 	}
 	mtx_unlock(&vnode_list_mtx);
 	vd->freevnodes = 0;
 	bzero(vd->tab, sizeof(vd->tab));
 	vd->index = 0;
 	critical_exit();
 }
 
 static void
 vdbatch_enqueue(struct vnode *vp)
 {
 	struct vdbatch *vd;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNASSERT(!VN_IS_DOOMED(vp), vp,
 	    ("%s: deferring requeue of a doomed vnode", __func__));
 
 	if (vp->v_dbatchcpu != NOCPU) {
 		VI_UNLOCK(vp);
 		return;
 	}
 
 	sched_pin();
 	vd = DPCPU_PTR(vd);
 	mtx_lock(&vd->lock);
 	MPASS(vd->index < VDBATCH_SIZE);
 	MPASS(vd->tab[vd->index] == NULL);
 	/*
 	 * A hack: we depend on being pinned so that we know what to put in
 	 * ->v_dbatchcpu.
 	 */
 	vp->v_dbatchcpu = curcpu;
 	vd->tab[vd->index] = vp;
 	vd->index++;
 	VI_UNLOCK(vp);
 	if (vd->index == VDBATCH_SIZE)
 		vdbatch_process(vd);
 	mtx_unlock(&vd->lock);
 	sched_unpin();
 }
 
 /*
  * This routine must only be called for vnodes which are about to be
  * deallocated. Supporting dequeue for arbitrary vndoes would require
  * validating that the locked batch matches.
  */
 static void
 vdbatch_dequeue(struct vnode *vp)
 {
 	struct vdbatch *vd;
 	int i;
 	short cpu;
 
 	VNASSERT(vp->v_type == VBAD || vp->v_type == VNON, vp,
 	    ("%s: called for a used vnode\n", __func__));
 
 	cpu = vp->v_dbatchcpu;
 	if (cpu == NOCPU)
 		return;
 
 	vd = DPCPU_ID_PTR(cpu, vd);
 	mtx_lock(&vd->lock);
 	for (i = 0; i < vd->index; i++) {
 		if (vd->tab[i] != vp)
 			continue;
 		vp->v_dbatchcpu = NOCPU;
 		vd->index--;
 		vd->tab[i] = vd->tab[vd->index];
 		vd->tab[vd->index] = NULL;
 		break;
 	}
 	mtx_unlock(&vd->lock);
 	/*
 	 * Either we dequeued the vnode above or the target CPU beat us to it.
 	 */
 	MPASS(vp->v_dbatchcpu == NOCPU);
 }
 
 /*
  * Drop the hold count of the vnode.  If this is the last reference to
  * the vnode we place it on the free list unless it has been vgone'd
  * (marked VIRF_DOOMED) in which case we will free it.
  *
  * Because the vnode vm object keeps a hold reference on the vnode if
  * there is at least one resident non-cached page, the vnode cannot
  * leave the active list without the page cleanup done.
  */
 static void __noinline
 vdropl_final(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNPASS(VN_IS_DOOMED(vp), vp);
 	/*
 	 * Set the VHOLD_NO_SMR flag.
 	 *
 	 * We may be racing against vhold_smr. If they win we can just pretend
 	 * we never got this far, they will vdrop later.
 	 */
 	if (__predict_false(!atomic_cmpset_int(&vp->v_holdcnt, 0, VHOLD_NO_SMR))) {
 		vfs_freevnodes_inc();
 		VI_UNLOCK(vp);
 		/*
 		 * We lost the aforementioned race. Any subsequent access is
 		 * invalid as they might have managed to vdropl on their own.
 		 */
 		return;
 	}
 	/*
 	 * Don't bump freevnodes as this one is going away.
 	 */
 	freevnode(vp);
 }
 
 void
 vdrop(struct vnode *vp)
 {
 
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (refcount_release_if_not_last(&vp->v_holdcnt))
 		return;
 	VI_LOCK(vp);
 	vdropl(vp);
 }
 
 static void __always_inline
 vdropl_impl(struct vnode *vp, bool enqueue)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (!refcount_release(&vp->v_holdcnt)) {
 		VI_UNLOCK(vp);
 		return;
 	}
 	VNPASS((vp->v_iflag & VI_OWEINACT) == 0, vp);
 	VNPASS((vp->v_iflag & VI_DEFINACT) == 0, vp);
 	if (VN_IS_DOOMED(vp)) {
 		vdropl_final(vp);
 		return;
 	}
 
 	vfs_freevnodes_inc();
 	if (vp->v_mflag & VMP_LAZYLIST) {
 		vunlazy(vp);
 	}
 
 	if (!enqueue) {
 		VI_UNLOCK(vp);
 		return;
 	}
 
 	/*
 	 * Also unlocks the interlock. We can't assert on it as we
 	 * released our hold and by now the vnode might have been
 	 * freed.
 	 */
 	vdbatch_enqueue(vp);
 }
 
 void
 vdropl(struct vnode *vp)
 {
 
 	vdropl_impl(vp, true);
 }
 
 /*
  * vdrop a vnode when recycling
  *
  * This is a special case routine only to be used when recycling, differs from
  * regular vdrop by not requeieing the vnode on LRU.
  *
  * Consider a case where vtryrecycle continuously fails with all vnodes (due to
  * e.g., frozen writes on the filesystem), filling the batch and causing it to
  * be requeued. Then vnlru will end up revisiting the same vnodes. This is a
  * loop which can last for as long as writes are frozen.
  */
 static void
 vdropl_recycle(struct vnode *vp)
 {
 
 	vdropl_impl(vp, false);
 }
 
 static void
 vdrop_recycle(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	vdropl_recycle(vp);
 }
 
 /*
  * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT
  * flags.  DOINGINACT prevents us from recursing in calls to vinactive.
  */
 static int
 vinactivef(struct vnode *vp)
 {
 	struct vm_object *obj;
 	int error;
 
 	ASSERT_VOP_ELOCKED(vp, "vinactive");
 	ASSERT_VI_LOCKED(vp, "vinactive");
 	VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp,
 	    ("vinactive: recursed on VI_DOINGINACT"));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_iflag |= VI_DOINGINACT;
 	vp->v_iflag &= ~VI_OWEINACT;
 	VI_UNLOCK(vp);
 	/*
 	 * Before moving off the active list, we must be sure that any
 	 * modified pages are converted into the vnode's dirty
 	 * buffers, since these will no longer be checked once the
 	 * vnode is on the inactive list.
 	 *
 	 * The write-out of the dirty pages is asynchronous.  At the
 	 * point that VOP_INACTIVE() is called, there could still be
 	 * pending I/O and dirty pages in the object.
 	 */
 	if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 &&
 	    vm_object_mightbedirty(obj)) {
 		VM_OBJECT_WLOCK(obj);
 		vm_object_page_clean(obj, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(obj);
 	}
 	error = VOP_INACTIVE(vp);
 	VI_LOCK(vp);
 	VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
 	    ("vinactive: lost VI_DOINGINACT"));
 	vp->v_iflag &= ~VI_DOINGINACT;
 	return (error);
 }
 
 int
 vinactive(struct vnode *vp)
 {
 
 	ASSERT_VOP_ELOCKED(vp, "vinactive");
 	ASSERT_VI_LOCKED(vp, "vinactive");
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 
 	if ((vp->v_iflag & VI_OWEINACT) == 0)
 		return (0);
 	if (vp->v_iflag & VI_DOINGINACT)
 		return (0);
 	if (vp->v_usecount > 0) {
 		vp->v_iflag &= ~VI_OWEINACT;
 		return (0);
 	}
 	return (vinactivef(vp));
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If FORCECLOSE is not specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If FORCECLOSE is specified, detach any active vnodes
  * that are found.
  *
  * If WRITECLOSE is set, only flush out regular file vnodes open for
  * writing.
  *
  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
  *
  * `rootrefs' specifies the base reference count for the root vnode
  * of this filesystem. The root vnode is considered busy if its
  * v_usecount exceeds this value. On a successful return, vflush(, td)
  * will call vrele() on the root vnode exactly rootrefs times.
  * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must
  * be zero.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "Print out busy vnodes");
 #endif
 
 int
 vflush(struct mount *mp, int rootrefs, int flags, struct thread *td)
 {
 	struct vnode *vp, *mvp, *rootvp = NULL;
 	struct vattr vattr;
 	int busy = 0, error;
 
 	CTR4(KTR_VFS, "%s: mp %p with rootrefs %d and flags %d", __func__, mp,
 	    rootrefs, flags);
 	if (rootrefs > 0) {
 		KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0,
 		    ("vflush: bad args"));
 		/*
 		 * Get the filesystem root vnode. We can vput() it
 		 * immediately, since with rootrefs > 0, it won't go away.
 		 */
 		if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp)) != 0) {
 			CTR2(KTR_VFS, "%s: vfs_root lookup failed with %d",
 			    __func__, error);
 			return (error);
 		}
 		vput(rootvp);
 	}
 loop:
 	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		vholdl(vp);
 		error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE);
 		if (error) {
 			vdrop(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		/*
 		 * Skip over a vnodes marked VV_SYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
 			VOP_UNLOCK(vp);
 			vdrop(vp);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, flush out unlinked but still open
 		 * files (even if open only for reading) and regular file
 		 * vnodes open for writing.
 		 */
 		if (flags & WRITECLOSE) {
 			if (vp->v_object != NULL) {
 				VM_OBJECT_WLOCK(vp->v_object);
 				vm_object_page_clean(vp->v_object, 0, 0, 0);
 				VM_OBJECT_WUNLOCK(vp->v_object);
 			}
 			do {
 				error = VOP_FSYNC(vp, MNT_WAIT, td);
 			} while (error == ERELOOKUP);
 			if (error != 0) {
 				VOP_UNLOCK(vp);
 				vdrop(vp);
 				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				return (error);
 			}
 			error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 			VI_LOCK(vp);
 
 			if ((vp->v_type == VNON ||
 			    (error == 0 && vattr.va_nlink > 0)) &&
 			    (vp->v_writecount <= 0 || vp->v_type != VREG)) {
 				VOP_UNLOCK(vp);
 				vdropl(vp);
 				continue;
 			}
 		} else
 			VI_LOCK(vp);
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 *
 		 * If FORCECLOSE is set, forcibly close the vnode.
 		 */
 		if (vp->v_usecount == 0 || (flags & FORCECLOSE)) {
 			vgonel(vp);
 		} else {
 			busy++;
 #ifdef DIAGNOSTIC
 			if (busyprt)
 				vn_printf(vp, "vflush: busy vnode ");
 #endif
 		}
 		VOP_UNLOCK(vp);
 		vdropl(vp);
 	}
 	if (rootrefs > 0 && (flags & FORCECLOSE) == 0) {
 		/*
 		 * If just the root vnode is busy, and if its refcount
 		 * is equal to `rootrefs', then go ahead and kill it.
 		 */
 		VI_LOCK(rootvp);
 		KASSERT(busy > 0, ("vflush: not busy"));
 		VNASSERT(rootvp->v_usecount >= rootrefs, rootvp,
 		    ("vflush: usecount %d < rootrefs %d",
 		     rootvp->v_usecount, rootrefs));
 		if (busy == 1 && rootvp->v_usecount == rootrefs) {
 			VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK);
 			vgone(rootvp);
 			VOP_UNLOCK(rootvp);
 			busy = 0;
 		} else
 			VI_UNLOCK(rootvp);
 	}
 	if (busy) {
 		CTR2(KTR_VFS, "%s: failing as %d vnodes are busy", __func__,
 		    busy);
 		return (EBUSY);
 	}
 	for (; rootrefs > 0; rootrefs--)
 		vrele(rootvp);
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  */
 int
 vrecycle(struct vnode *vp)
 {
 	int recycled;
 
 	VI_LOCK(vp);
 	recycled = vrecyclel(vp);
 	VI_UNLOCK(vp);
 	return (recycled);
 }
 
 /*
  * vrecycle, with the vp interlock held.
  */
 int
 vrecyclel(struct vnode *vp)
 {
 	int recycled;
 
 	ASSERT_VOP_ELOCKED(vp, __func__);
 	ASSERT_VI_LOCKED(vp, __func__);
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	recycled = 0;
 	if (vp->v_usecount == 0) {
 		recycled = 1;
 		vgonel(vp);
 	}
 	return (recycled);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(struct vnode *vp)
 {
 	VI_LOCK(vp);
 	vgonel(vp);
 	VI_UNLOCK(vp);
 }
 
 /*
  * Notify upper mounts about reclaimed or unlinked vnode.
  */
 void
 vfs_notify_upper(struct vnode *vp, enum vfs_notify_upper_type event)
 {
 	struct mount *mp;
 	struct mount_upper_node *ump;
 
 	mp = atomic_load_ptr(&vp->v_mount);
 	if (mp == NULL)
 		return;
 	if (TAILQ_EMPTY(&mp->mnt_notify))
 		return;
 
 	MNT_ILOCK(mp);
 	mp->mnt_upper_pending++;
 	KASSERT(mp->mnt_upper_pending > 0,
 	    ("%s: mnt_upper_pending %d", __func__, mp->mnt_upper_pending));
 	TAILQ_FOREACH(ump, &mp->mnt_notify, mnt_upper_link) {
 		MNT_IUNLOCK(mp);
 		switch (event) {
 		case VFS_NOTIFY_UPPER_RECLAIM:
 			VFS_RECLAIM_LOWERVP(ump->mp, vp);
 			break;
 		case VFS_NOTIFY_UPPER_UNLINK:
 			VFS_UNLINK_LOWERVP(ump->mp, vp);
 			break;
 		}
 		MNT_ILOCK(mp);
 	}
 	mp->mnt_upper_pending--;
 	if ((mp->mnt_kern_flag & MNTK_UPPER_WAITER) != 0 &&
 	    mp->mnt_upper_pending == 0) {
 		mp->mnt_kern_flag &= ~MNTK_UPPER_WAITER;
 		wakeup(&mp->mnt_uppers);
 	}
 	MNT_IUNLOCK(mp);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 static void
 vgonel(struct vnode *vp)
 {
 	struct thread *td;
 	struct mount *mp;
 	vm_object_t object;
 	bool active, doinginact, oweinact;
 
 	ASSERT_VOP_ELOCKED(vp, "vgonel");
 	ASSERT_VI_LOCKED(vp, "vgonel");
 	VNASSERT(vp->v_holdcnt, vp,
 	    ("vgonel: vp %p has no reference.", vp));
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	td = curthread;
 
 	/*
 	 * Don't vgonel if we're already doomed.
 	 */
 	if (VN_IS_DOOMED(vp))
 		return;
 	/*
 	 * Paired with freevnode.
 	 */
 	vn_seqc_write_begin_locked(vp);
 	vunlazy_gone(vp);
 	vn_irflag_set_locked(vp, VIRF_DOOMED);
 
 	/*
 	 * Check to see if the vnode is in use.  If so, we have to
 	 * call VOP_CLOSE() and VOP_INACTIVE().
 	 *
 	 * It could be that VOP_INACTIVE() requested reclamation, in
 	 * which case we should avoid recursion, so check
 	 * VI_DOINGINACT.  This is not precise but good enough.
 	 */
 	active = vp->v_usecount > 0;
 	oweinact = (vp->v_iflag & VI_OWEINACT) != 0;
 	doinginact = (vp->v_iflag & VI_DOINGINACT) != 0;
 
 	/*
 	 * If we need to do inactive VI_OWEINACT will be set.
 	 */
 	if (vp->v_iflag & VI_DEFINACT) {
 		VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count"));
 		vp->v_iflag &= ~VI_DEFINACT;
 		vdropl(vp);
 	} else {
 		VNASSERT(vp->v_holdcnt > 0, vp, ("vnode without hold count"));
 		VI_UNLOCK(vp);
 	}
 	cache_purge_vgone(vp);
 	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed.
 	 */
 	if (active)
 		VOP_CLOSE(vp, FNONBLOCK, NOCRED, td);
 	if (!doinginact) {
 		do {
 			if (oweinact || active) {
 				VI_LOCK(vp);
 				vinactivef(vp);
 				oweinact = (vp->v_iflag & VI_OWEINACT) != 0;
 				VI_UNLOCK(vp);
 			}
 		} while (oweinact);
 	}
 	if (vp->v_type == VSOCK)
 		vfs_unp_reclaim(vp);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 * If the flush fails, just toss the buffers.
 	 */
 	mp = NULL;
 	if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd))
 		(void) vn_start_secondary_write(vp, &mp, V_WAIT);
 	if (vinvalbuf(vp, V_SAVE, 0, 0) != 0) {
 		while (vinvalbuf(vp, 0, 0, 0) != 0)
 			;
 	}
 
 	BO_LOCK(&vp->v_bufobj);
 	KASSERT(TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd) &&
 	    vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
 	    TAILQ_EMPTY(&vp->v_bufobj.bo_clean.bv_hd) &&
 	    vp->v_bufobj.bo_clean.bv_cnt == 0,
 	    ("vp %p bufobj not invalidated", vp));
 
 	/*
 	 * For VMIO bufobj, BO_DEAD is set later, or in
 	 * vm_object_terminate() after the object's page queue is
 	 * flushed.
 	 */
 	object = vp->v_bufobj.bo_object;
 	if (object == NULL)
 		vp->v_bufobj.bo_flag |= BO_DEAD;
 	BO_UNLOCK(&vp->v_bufobj);
 
 	/*
 	 * Handle the VM part.  Tmpfs handles v_object on its own (the
 	 * OBJT_VNODE check).  Nullfs or other bypassing filesystems
 	 * should not touch the object borrowed from the lower vnode
 	 * (the handle check).
 	 */
 	if (object != NULL && object->type == OBJT_VNODE &&
 	    object->handle == vp)
 		vnode_destroy_vobject(vp);
 
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp))
 		panic("vgone: cannot reclaim");
 	if (mp != NULL)
 		vn_finished_secondary_write(mp);
 	VNASSERT(vp->v_object == NULL, vp,
 	    ("vop_reclaim left v_object vp=%p", vp));
 	/*
 	 * Clear the advisory locks and wake up waiting threads.
 	 */
 	if (vp->v_lockf != NULL) {
 		(void)VOP_ADVLOCKPURGE(vp);
 		vp->v_lockf = NULL;
 	}
 	/*
 	 * Delete from old mount point vnode list.
 	 */
 	if (vp->v_mount == NULL) {
 		VI_LOCK(vp);
 	} else {
 		delmntque(vp);
 		ASSERT_VI_LOCKED(vp, "vgonel 2");
 	}
 	/*
 	 * Done with purge, reset to the standard lock and invalidate
 	 * the vnode.
 	 */
 	vp->v_vnlock = &vp->v_lock;
 	vp->v_op = &dead_vnodeops;
 	vp->v_type = VBAD;
 }
 
 /*
  * Print out a description of a vnode.
  */
 static const char * const typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD",
  "VMARKER"};
 
 _Static_assert((VHOLD_ALL_FLAGS & ~VHOLD_NO_SMR) == 0,
     "new hold count flag not added to vn_printf");
 
 void
 vn_printf(struct vnode *vp, const char *fmt, ...)
 {
 	va_list ap;
 	char buf[256], buf2[16];
 	u_long flags;
 	u_int holdcnt;
 	short irflag;
 
 	va_start(ap, fmt);
 	vprintf(fmt, ap);
 	va_end(ap);
 	printf("%p: ", (void *)vp);
 	printf("type %s\n", typename[vp->v_type]);
 	holdcnt = atomic_load_int(&vp->v_holdcnt);
 	printf("    usecount %d, writecount %d, refcount %d seqc users %d",
 	    vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS,
 	    vp->v_seqc_users);
 	switch (vp->v_type) {
 	case VDIR:
 		printf(" mountedhere %p\n", vp->v_mountedhere);
 		break;
 	case VCHR:
 		printf(" rdev %p\n", vp->v_rdev);
 		break;
 	case VSOCK:
 		printf(" socket %p\n", vp->v_unpcb);
 		break;
 	case VFIFO:
 		printf(" fifoinfo %p\n", vp->v_fifoinfo);
 		break;
 	default:
 		printf("\n");
 		break;
 	}
 	buf[0] = '\0';
 	buf[1] = '\0';
 	if (holdcnt & VHOLD_NO_SMR)
 		strlcat(buf, "|VHOLD_NO_SMR", sizeof(buf));
 	printf("    hold count flags (%s)\n", buf + 1);
 
 	buf[0] = '\0';
 	buf[1] = '\0';
 	irflag = vn_irflag_read(vp);
 	if (irflag & VIRF_DOOMED)
 		strlcat(buf, "|VIRF_DOOMED", sizeof(buf));
 	if (irflag & VIRF_PGREAD)
 		strlcat(buf, "|VIRF_PGREAD", sizeof(buf));
 	if (irflag & VIRF_MOUNTPOINT)
 		strlcat(buf, "|VIRF_MOUNTPOINT", sizeof(buf));
 	if (irflag & VIRF_TEXT_REF)
 		strlcat(buf, "|VIRF_TEXT_REF", sizeof(buf));
 	flags = irflag & ~(VIRF_DOOMED | VIRF_PGREAD | VIRF_MOUNTPOINT | VIRF_TEXT_REF);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VIRF(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	if (vp->v_vflag & VV_ROOT)
 		strlcat(buf, "|VV_ROOT", sizeof(buf));
 	if (vp->v_vflag & VV_ISTTY)
 		strlcat(buf, "|VV_ISTTY", sizeof(buf));
 	if (vp->v_vflag & VV_NOSYNC)
 		strlcat(buf, "|VV_NOSYNC", sizeof(buf));
 	if (vp->v_vflag & VV_ETERNALDEV)
 		strlcat(buf, "|VV_ETERNALDEV", sizeof(buf));
 	if (vp->v_vflag & VV_CACHEDLABEL)
 		strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf));
 	if (vp->v_vflag & VV_VMSIZEVNLOCK)
 		strlcat(buf, "|VV_VMSIZEVNLOCK", sizeof(buf));
 	if (vp->v_vflag & VV_COPYONWRITE)
 		strlcat(buf, "|VV_COPYONWRITE", sizeof(buf));
 	if (vp->v_vflag & VV_SYSTEM)
 		strlcat(buf, "|VV_SYSTEM", sizeof(buf));
 	if (vp->v_vflag & VV_PROCDEP)
 		strlcat(buf, "|VV_PROCDEP", sizeof(buf));
 	if (vp->v_vflag & VV_DELETED)
 		strlcat(buf, "|VV_DELETED", sizeof(buf));
 	if (vp->v_vflag & VV_MD)
 		strlcat(buf, "|VV_MD", sizeof(buf));
 	if (vp->v_vflag & VV_FORCEINSMQ)
 		strlcat(buf, "|VV_FORCEINSMQ", sizeof(buf));
 	if (vp->v_vflag & VV_READLINK)
 		strlcat(buf, "|VV_READLINK", sizeof(buf));
 	flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_ETERNALDEV |
 	    VV_CACHEDLABEL | VV_VMSIZEVNLOCK | VV_COPYONWRITE | VV_SYSTEM |
 	    VV_PROCDEP | VV_DELETED | VV_MD | VV_FORCEINSMQ | VV_READLINK);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	if (vp->v_iflag & VI_MOUNT)
 		strlcat(buf, "|VI_MOUNT", sizeof(buf));
 	if (vp->v_iflag & VI_DOINGINACT)
 		strlcat(buf, "|VI_DOINGINACT", sizeof(buf));
 	if (vp->v_iflag & VI_OWEINACT)
 		strlcat(buf, "|VI_OWEINACT", sizeof(buf));
 	if (vp->v_iflag & VI_DEFINACT)
 		strlcat(buf, "|VI_DEFINACT", sizeof(buf));
 	if (vp->v_iflag & VI_FOPENING)
 		strlcat(buf, "|VI_FOPENING", sizeof(buf));
 	flags = vp->v_iflag & ~(VI_MOUNT | VI_DOINGINACT |
 	    VI_OWEINACT | VI_DEFINACT | VI_FOPENING);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	if (vp->v_mflag & VMP_LAZYLIST)
 		strlcat(buf, "|VMP_LAZYLIST", sizeof(buf));
 	flags = vp->v_mflag & ~(VMP_LAZYLIST);
 	if (flags != 0) {
 		snprintf(buf2, sizeof(buf2), "|VMP(0x%lx)", flags);
 		strlcat(buf, buf2, sizeof(buf));
 	}
 	printf("    flags (%s)", buf + 1);
 	if (mtx_owned(VI_MTX(vp)))
 		printf(" VI_LOCKed");
 	printf("\n");
 	if (vp->v_object != NULL)
 		printf("    v_object %p ref %d pages %d "
 		    "cleanbuf %d dirtybuf %d\n",
 		    vp->v_object, vp->v_object->ref_count,
 		    vp->v_object->resident_page_count,
 		    vp->v_bufobj.bo_clean.bv_cnt,
 		    vp->v_bufobj.bo_dirty.bv_cnt);
 	printf("    ");
 	lockmgr_printinfo(vp->v_vnlock);
 	if (vp->v_data != NULL)
 		VOP_PRINT(vp);
 }
 
 #ifdef DDB
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 DB_SHOW_COMMAND_FLAGS(lockedvnods, lockedvnodes, DB_CMD_MEMSAFE)
 {
 	struct mount *mp;
 	struct vnode *vp;
 
 	/*
 	 * Note: because this is DDB, we can't obey the locking semantics
 	 * for these structures, which means we could catch an inconsistent
 	 * state and dereference a nasty pointer.  Not much to be done
 	 * about that.
 	 */
 	db_printf("Locked vnodes\n");
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 			if (vp->v_type != VMARKER && VOP_ISLOCKED(vp))
 				vn_printf(vp, "vnode ");
 		}
 	}
 }
 
 /*
  * Show details about the given vnode.
  */
 DB_SHOW_COMMAND(vnode, db_show_vnode)
 {
 	struct vnode *vp;
 
 	if (!have_addr)
 		return;
 	vp = (struct vnode *)addr;
 	vn_printf(vp, "vnode ");
 }
 
 /*
  * Show details about the given mount point.
  */
 DB_SHOW_COMMAND(mount, db_show_mount)
 {
 	struct mount *mp;
 	struct vfsopt *opt;
 	struct statfs *sp;
 	struct vnode *vp;
 	char buf[512];
 	uint64_t mflags;
 	u_int flags;
 
 	if (!have_addr) {
 		/* No address given, print short info about all mount points. */
 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 			db_printf("%p %s on %s (%s)\n", mp,
 			    mp->mnt_stat.f_mntfromname,
 			    mp->mnt_stat.f_mntonname,
 			    mp->mnt_stat.f_fstypename);
 			if (db_pager_quit)
 				break;
 		}
 		db_printf("\nMore info: show mount <addr>\n");
 		return;
 	}
 
 	mp = (struct mount *)addr;
 	db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname,
 	    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename);
 
 	buf[0] = '\0';
 	mflags = mp->mnt_flag;
 #define	MNT_FLAG(flag)	do {						\
 	if (mflags & (flag)) {						\
 		if (buf[0] != '\0')					\
 			strlcat(buf, ", ", sizeof(buf));		\
 		strlcat(buf, (#flag) + 4, sizeof(buf));			\
 		mflags &= ~(flag);					\
 	}								\
 } while (0)
 	MNT_FLAG(MNT_RDONLY);
 	MNT_FLAG(MNT_SYNCHRONOUS);
 	MNT_FLAG(MNT_NOEXEC);
 	MNT_FLAG(MNT_NOSUID);
 	MNT_FLAG(MNT_NFS4ACLS);
 	MNT_FLAG(MNT_UNION);
 	MNT_FLAG(MNT_ASYNC);
 	MNT_FLAG(MNT_SUIDDIR);
 	MNT_FLAG(MNT_SOFTDEP);
 	MNT_FLAG(MNT_NOSYMFOLLOW);
 	MNT_FLAG(MNT_GJOURNAL);
 	MNT_FLAG(MNT_MULTILABEL);
 	MNT_FLAG(MNT_ACLS);
 	MNT_FLAG(MNT_NOATIME);
 	MNT_FLAG(MNT_NOCLUSTERR);
 	MNT_FLAG(MNT_NOCLUSTERW);
 	MNT_FLAG(MNT_SUJ);
 	MNT_FLAG(MNT_EXRDONLY);
 	MNT_FLAG(MNT_EXPORTED);
 	MNT_FLAG(MNT_DEFEXPORTED);
 	MNT_FLAG(MNT_EXPORTANON);
 	MNT_FLAG(MNT_EXKERB);
 	MNT_FLAG(MNT_EXPUBLIC);
 	MNT_FLAG(MNT_LOCAL);
 	MNT_FLAG(MNT_QUOTA);
 	MNT_FLAG(MNT_ROOTFS);
 	MNT_FLAG(MNT_USER);
 	MNT_FLAG(MNT_IGNORE);
 	MNT_FLAG(MNT_UPDATE);
 	MNT_FLAG(MNT_DELEXPORT);
 	MNT_FLAG(MNT_RELOAD);
 	MNT_FLAG(MNT_FORCE);
 	MNT_FLAG(MNT_SNAPSHOT);
 	MNT_FLAG(MNT_BYFSID);
 #undef MNT_FLAG
 	if (mflags != 0) {
 		if (buf[0] != '\0')
 			strlcat(buf, ", ", sizeof(buf));
 		snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 		    "0x%016jx", mflags);
 	}
 	db_printf("    mnt_flag = %s\n", buf);
 
 	buf[0] = '\0';
 	flags = mp->mnt_kern_flag;
 #define	MNT_KERN_FLAG(flag)	do {					\
 	if (flags & (flag)) {						\
 		if (buf[0] != '\0')					\
 			strlcat(buf, ", ", sizeof(buf));		\
 		strlcat(buf, (#flag) + 5, sizeof(buf));			\
 		flags &= ~(flag);					\
 	}								\
 } while (0)
 	MNT_KERN_FLAG(MNTK_UNMOUNTF);
 	MNT_KERN_FLAG(MNTK_ASYNC);
 	MNT_KERN_FLAG(MNTK_SOFTDEP);
 	MNT_KERN_FLAG(MNTK_NOMSYNC);
 	MNT_KERN_FLAG(MNTK_DRAINING);
 	MNT_KERN_FLAG(MNTK_REFEXPIRE);
 	MNT_KERN_FLAG(MNTK_EXTENDED_SHARED);
 	MNT_KERN_FLAG(MNTK_SHARED_WRITES);
 	MNT_KERN_FLAG(MNTK_NO_IOPF);
 	MNT_KERN_FLAG(MNTK_RECURSE);
 	MNT_KERN_FLAG(MNTK_UPPER_WAITER);
 	MNT_KERN_FLAG(MNTK_UNLOCKED_INSMNTQUE);
 	MNT_KERN_FLAG(MNTK_USES_BCACHE);
 	MNT_KERN_FLAG(MNTK_VMSETSIZE_BUG);
 	MNT_KERN_FLAG(MNTK_FPLOOKUP);
 	MNT_KERN_FLAG(MNTK_TASKQUEUE_WAITER);
 	MNT_KERN_FLAG(MNTK_NOASYNC);
 	MNT_KERN_FLAG(MNTK_UNMOUNT);
 	MNT_KERN_FLAG(MNTK_MWAIT);
 	MNT_KERN_FLAG(MNTK_SUSPEND);
 	MNT_KERN_FLAG(MNTK_SUSPEND2);
 	MNT_KERN_FLAG(MNTK_SUSPENDED);
 	MNT_KERN_FLAG(MNTK_NULL_NOCACHE);
 	MNT_KERN_FLAG(MNTK_LOOKUP_SHARED);
 #undef MNT_KERN_FLAG
 	if (flags != 0) {
 		if (buf[0] != '\0')
 			strlcat(buf, ", ", sizeof(buf));
 		snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf),
 		    "0x%08x", flags);
 	}
 	db_printf("    mnt_kern_flag = %s\n", buf);
 
 	db_printf("    mnt_opt = ");
 	opt = TAILQ_FIRST(mp->mnt_opt);
 	if (opt != NULL) {
 		db_printf("%s", opt->name);
 		opt = TAILQ_NEXT(opt, link);
 		while (opt != NULL) {
 			db_printf(", %s", opt->name);
 			opt = TAILQ_NEXT(opt, link);
 		}
 	}
 	db_printf("\n");
 
 	sp = &mp->mnt_stat;
 	db_printf("    mnt_stat = { version=%u type=%u flags=0x%016jx "
 	    "bsize=%ju iosize=%ju blocks=%ju bfree=%ju bavail=%jd files=%ju "
 	    "ffree=%jd syncwrites=%ju asyncwrites=%ju syncreads=%ju "
 	    "asyncreads=%ju namemax=%u owner=%u fsid=[%d, %d] }\n",
 	    (u_int)sp->f_version, (u_int)sp->f_type, (uintmax_t)sp->f_flags,
 	    (uintmax_t)sp->f_bsize, (uintmax_t)sp->f_iosize,
 	    (uintmax_t)sp->f_blocks, (uintmax_t)sp->f_bfree,
 	    (intmax_t)sp->f_bavail, (uintmax_t)sp->f_files,
 	    (intmax_t)sp->f_ffree, (uintmax_t)sp->f_syncwrites,
 	    (uintmax_t)sp->f_asyncwrites, (uintmax_t)sp->f_syncreads,
 	    (uintmax_t)sp->f_asyncreads, (u_int)sp->f_namemax,
 	    (u_int)sp->f_owner, (int)sp->f_fsid.val[0], (int)sp->f_fsid.val[1]);
 
 	db_printf("    mnt_cred = { uid=%u ruid=%u",
 	    (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid);
 	if (jailed(mp->mnt_cred))
 		db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id);
 	db_printf(" }\n");
 	db_printf("    mnt_ref = %d (with %d in the struct)\n",
 	    vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref);
 	db_printf("    mnt_gen = %d\n", mp->mnt_gen);
 	db_printf("    mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
 	db_printf("    mnt_lazyvnodelistsize = %d\n",
 	    mp->mnt_lazyvnodelistsize);
 	db_printf("    mnt_writeopcount = %d (with %d in the struct)\n",
 	    vfs_mount_fetch_counter(mp, MNT_COUNT_WRITEOPCOUNT), mp->mnt_writeopcount);
 	db_printf("    mnt_iosize_max = %d\n", mp->mnt_iosize_max);
 	db_printf("    mnt_hashseed = %u\n", mp->mnt_hashseed);
 	db_printf("    mnt_lockref = %d (with %d in the struct)\n",
 	    vfs_mount_fetch_counter(mp, MNT_COUNT_LOCKREF), mp->mnt_lockref);
 	db_printf("    mnt_secondary_writes = %d\n", mp->mnt_secondary_writes);
 	db_printf("    mnt_secondary_accwrites = %d\n",
 	    mp->mnt_secondary_accwrites);
 	db_printf("    mnt_gjprovider = %s\n",
 	    mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
 	db_printf("    mnt_vfs_ops = %d\n", mp->mnt_vfs_ops);
 
 	db_printf("\n\nList of active vnodes\n");
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		if (vp->v_type != VMARKER && vp->v_holdcnt > 0) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
 	db_printf("\n\nList of inactive vnodes\n");
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		if (vp->v_type != VMARKER && vp->v_holdcnt == 0) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
 }
 #endif	/* DDB */
 
 /*
  * Fill in a struct xvfsconf based on a struct vfsconf.
  */
 static int
 vfsconf2x(struct sysctl_req *req, struct vfsconf *vfsp)
 {
 	struct xvfsconf xvfsp;
 
 	bzero(&xvfsp, sizeof(xvfsp));
 	strcpy(xvfsp.vfc_name, vfsp->vfc_name);
 	xvfsp.vfc_typenum = vfsp->vfc_typenum;
 	xvfsp.vfc_refcount = vfsp->vfc_refcount;
 	xvfsp.vfc_flags = vfsp->vfc_flags;
 	/*
 	 * These are unused in userland, we keep them
 	 * to not break binary compatibility.
 	 */
 	xvfsp.vfc_vfsops = NULL;
 	xvfsp.vfc_next = NULL;
 	return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp)));
 }
 
 #ifdef COMPAT_FREEBSD32
 struct xvfsconf32 {
 	uint32_t	vfc_vfsops;
 	char		vfc_name[MFSNAMELEN];
 	int32_t		vfc_typenum;
 	int32_t		vfc_refcount;
 	int32_t		vfc_flags;
 	uint32_t	vfc_next;
 };
 
 static int
 vfsconf2x32(struct sysctl_req *req, struct vfsconf *vfsp)
 {
 	struct xvfsconf32 xvfsp;
 
 	bzero(&xvfsp, sizeof(xvfsp));
 	strcpy(xvfsp.vfc_name, vfsp->vfc_name);
 	xvfsp.vfc_typenum = vfsp->vfc_typenum;
 	xvfsp.vfc_refcount = vfsp->vfc_refcount;
 	xvfsp.vfc_flags = vfsp->vfc_flags;
 	return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp)));
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int
 sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS)
 {
 	struct vfsconf *vfsp;
 	int error;
 
 	error = 0;
 	vfsconf_slock();
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 #ifdef COMPAT_FREEBSD32
 		if (req->flags & SCTL_MASK32)
 			error = vfsconf2x32(req, vfsp);
 		else
 #endif
 			error = vfsconf2x(req, vfsp);
 		if (error)
 			break;
 	}
 	vfsconf_sunlock();
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLTYPE_OPAQUE | CTLFLAG_RD |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_vfs_conflist,
     "S,xvfsconf", "List of all configured filesystems");
 
 #ifndef BURN_BRIDGES
 static int	sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS);
 
 static int
 vfs_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 	log(LOG_WARNING, "userland calling deprecated sysctl, "
 	    "please rebuild world\n");
 
 #if 1 || defined(COMPAT_PRELITE2)
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		vfsconf_slock();
 		TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		}
 		vfsconf_sunlock();
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 #ifdef COMPAT_FREEBSD32
 		if (req->flags & SCTL_MASK32)
 			return (vfsconf2x32(req, vfsp));
 		else
 #endif
 			return (vfsconf2x(req, vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP |
     CTLFLAG_MPSAFE, vfs_sysctl,
     "Generic filesystem");
 
 #if 1 || defined(COMPAT_PRELITE2)
 
 static int
 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	vfsconf_slock();
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 		bzero(&ovfs, sizeof(ovfs));
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error != 0) {
 			vfsconf_sunlock();
 			return (error);
 		}
 	}
 	vfsconf_sunlock();
 	return (0);
 }
 
 #endif /* 1 || COMPAT_PRELITE2 */
 #endif /* !BURN_BRIDGES */
 
 #define KINFO_VNODESLOP		10
 #ifdef notyet
 /*
  * Dump vnode list (via sysctl).
  */
 /* ARGSUSED */
 static int
 sysctl_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct xvnode *xvn;
 	struct mount *mp;
 	struct vnode *vp;
 	int error, len, n;
 
 	/*
 	 * Stale numvnodes access is not fatal here.
 	 */
 	req->lock = 0;
 	len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn;
 	if (!req->oldptr)
 		/* Make an estimate */
 		return (SYSCTL_OUT(req, 0, len));
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK);
 	n = 0;
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
 			continue;
 		MNT_ILOCK(mp);
 		TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 			if (n == len)
 				break;
 			vref(vp);
 			xvn[n].xv_size = sizeof *xvn;
 			xvn[n].xv_vnode = vp;
 			xvn[n].xv_id = 0;	/* XXX compat */
 #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field
 			XV_COPY(usecount);
 			XV_COPY(writecount);
 			XV_COPY(holdcnt);
 			XV_COPY(mount);
 			XV_COPY(numoutput);
 			XV_COPY(type);
 #undef XV_COPY
 			xvn[n].xv_flag = vp->v_vflag;
 
 			switch (vp->v_type) {
 			case VREG:
 			case VDIR:
 			case VLNK:
 				break;
 			case VBLK:
 			case VCHR:
 				if (vp->v_rdev == NULL) {
 					vrele(vp);
 					continue;
 				}
 				xvn[n].xv_dev = dev2udev(vp->v_rdev);
 				break;
 			case VSOCK:
 				xvn[n].xv_socket = vp->v_socket;
 				break;
 			case VFIFO:
 				xvn[n].xv_fifo = vp->v_fifoinfo;
 				break;
 			case VNON:
 			case VBAD:
 			default:
 				/* shouldn't happen? */
 				vrele(vp);
 				continue;
 			}
 			vrele(vp);
 			++n;
 		}
 		MNT_IUNLOCK(mp);
 		mtx_lock(&mountlist_mtx);
 		vfs_unbusy(mp);
 		if (n == len)
 			break;
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	error = SYSCTL_OUT(req, xvn, n * sizeof *xvn);
 	free(xvn, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE | CTLFLAG_RD |
     CTLFLAG_MPSAFE, 0, 0, sysctl_vnode, "S,xvnode",
     "");
 #endif
 
 static void
 unmount_or_warn(struct mount *mp)
 {
 	int error;
 
 	error = dounmount(mp, MNT_FORCE, curthread);
 	if (error != 0) {
 		printf("unmount of %s failed (", mp->mnt_stat.f_mntonname);
 		if (error == EBUSY)
 			printf("BUSY)\n");
 		else
 			printf("%d)\n", error);
 	}
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall(void)
 {
 	struct mount *mp, *tmp;
 
 	CTR1(KTR_VFS, "%s: unmounting all filesystems", __func__);
 
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, tmp) {
 		vfs_ref(mp);
 
 		/*
 		 * Forcibly unmounting "/dev" before "/" would prevent clean
 		 * unmount of the latter.
 		 */
 		if (mp == rootdevmp)
 			continue;
 
 		unmount_or_warn(mp);
 	}
 
 	if (rootdevmp != NULL)
 		unmount_or_warn(rootdevmp);
 }
 
 static void
 vfs_deferred_inactive(struct vnode *vp, int lkflags)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, ("VI_DEFINACT still set"));
 	if ((vp->v_iflag & VI_OWEINACT) == 0) {
 		vdropl(vp);
 		return;
 	}
 	if (vn_lock(vp, lkflags) == 0) {
 		VI_LOCK(vp);
 		vinactive(vp);
 		VOP_UNLOCK(vp);
 		vdropl(vp);
 		return;
 	}
 	vdefer_inactive_unlocked(vp);
 }
 
 static int
 vfs_periodic_inactive_filter(struct vnode *vp, void *arg)
 {
 
 	return (vp->v_iflag & VI_DEFINACT);
 }
 
 static void __noinline
 vfs_periodic_inactive(struct mount *mp, int flags)
 {
 	struct vnode *vp, *mvp;
 	int lkflags;
 
 	lkflags = LK_EXCLUSIVE | LK_INTERLOCK;
 	if (flags != MNT_WAIT)
 		lkflags |= LK_NOWAIT;
 
 	MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, vfs_periodic_inactive_filter, NULL) {
 		if ((vp->v_iflag & VI_DEFINACT) == 0) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		vp->v_iflag &= ~VI_DEFINACT;
 		vfs_deferred_inactive(vp, lkflags);
 	}
 }
 
 static inline bool
 vfs_want_msync(struct vnode *vp)
 {
 	struct vm_object *obj;
 
 	/*
 	 * This test may be performed without any locks held.
 	 * We rely on vm_object's type stability.
 	 */
 	if (vp->v_vflag & VV_NOSYNC)
 		return (false);
 	obj = vp->v_object;
 	return (obj != NULL && vm_object_mightbedirty(obj));
 }
 
 static int
 vfs_periodic_msync_inactive_filter(struct vnode *vp, void *arg __unused)
 {
 
 	if (vp->v_vflag & VV_NOSYNC)
 		return (false);
 	if (vp->v_iflag & VI_DEFINACT)
 		return (true);
 	return (vfs_want_msync(vp));
 }
 
 static void __noinline
 vfs_periodic_msync_inactive(struct mount *mp, int flags)
 {
 	struct vnode *vp, *mvp;
 	struct vm_object *obj;
 	int lkflags, objflags;
 	bool seen_defer;
 
 	lkflags = LK_EXCLUSIVE | LK_INTERLOCK;
 	if (flags != MNT_WAIT) {
 		lkflags |= LK_NOWAIT;
 		objflags = OBJPC_NOSYNC;
 	} else {
 		objflags = OBJPC_SYNC;
 	}
 
 	MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, vfs_periodic_msync_inactive_filter, NULL) {
 		seen_defer = false;
 		if (vp->v_iflag & VI_DEFINACT) {
 			vp->v_iflag &= ~VI_DEFINACT;
 			seen_defer = true;
 		}
 		if (!vfs_want_msync(vp)) {
 			if (seen_defer)
 				vfs_deferred_inactive(vp, lkflags);
 			else
 				VI_UNLOCK(vp);
 			continue;
 		}
 		if (vget(vp, lkflags) == 0) {
 			obj = vp->v_object;
 			if (obj != NULL && (vp->v_vflag & VV_NOSYNC) == 0) {
 				VM_OBJECT_WLOCK(obj);
 				vm_object_page_clean(obj, 0, 0, objflags);
 				VM_OBJECT_WUNLOCK(obj);
 			}
 			vput(vp);
 			if (seen_defer)
 				vdrop(vp);
 		} else {
 			if (seen_defer)
 				vdefer_inactive_unlocked(vp);
 		}
 	}
 }
 
 void
 vfs_periodic(struct mount *mp, int flags)
 {
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
 
 	if ((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0)
 		vfs_periodic_inactive(mp, flags);
 	else
 		vfs_periodic_msync_inactive(mp, flags);
 }
 
 static void
 destroy_vpollinfo_free(struct vpollinfo *vi)
 {
 
 	knlist_destroy(&vi->vpi_selinfo.si_note);
 	mtx_destroy(&vi->vpi_lock);
 	free(vi, M_VNODEPOLL);
 }
 
 static void
 destroy_vpollinfo(struct vpollinfo *vi)
 {
 
 	knlist_clear(&vi->vpi_selinfo.si_note, 1);
 	seldrain(&vi->vpi_selinfo);
 	destroy_vpollinfo_free(vi);
 }
 
 /*
  * Initialize per-vnode helper structure to hold poll-related state.
  */
 void
 v_addpollinfo(struct vnode *vp)
 {
 	struct vpollinfo *vi;
 
 	if (vp->v_pollinfo != NULL)
 		return;
 	vi = malloc(sizeof(*vi), M_VNODEPOLL, M_WAITOK | M_ZERO);
 	mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF);
 	knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock,
 	    vfs_knlunlock, vfs_knl_assert_lock);
 	VI_LOCK(vp);
 	if (vp->v_pollinfo != NULL) {
 		VI_UNLOCK(vp);
 		destroy_vpollinfo_free(vi);
 		return;
 	}
 	vp->v_pollinfo = vi;
 	VI_UNLOCK(vp);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(struct vnode *vp, struct thread *td, int events)
 {
 
 	v_addpollinfo(vp);
 	mtx_lock(&vp->v_pollinfo->vpi_lock);
 	if (vp->v_pollinfo->vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo->vpi_revents;
 		vp->v_pollinfo->vpi_revents &= ~events;
 
 		mtx_unlock(&vp->v_pollinfo->vpi_lock);
 		return (events);
 	}
 	vp->v_pollinfo->vpi_events |= events;
 	selrecord(td, &vp->v_pollinfo->vpi_selinfo);
 	mtx_unlock(&vp->v_pollinfo->vpi_lock);
 	return (0);
 }
 
 /*
  * Routine to create and manage a filesystem syncer vnode.
  */
 #define sync_close ((int (*)(struct  vop_close_args *))nullop)
 static int	sync_fsync(struct  vop_fsync_args *);
 static int	sync_inactive(struct  vop_inactive_args *);
 static int	sync_reclaim(struct  vop_reclaim_args *);
 
 static struct vop_vector sync_vnodeops = {
 	.vop_bypass =	VOP_EOPNOTSUPP,
 	.vop_close =	sync_close,		/* close */
 	.vop_fsync =	sync_fsync,		/* fsync */
 	.vop_inactive =	sync_inactive,	/* inactive */
 	.vop_need_inactive = vop_stdneed_inactive, /* need_inactive */
 	.vop_reclaim =	sync_reclaim,	/* reclaim */
 	.vop_lock1 =	vop_stdlock,	/* lock */
 	.vop_unlock =	vop_stdunlock,	/* unlock */
 	.vop_islocked =	vop_stdislocked,	/* islocked */
 };
 VFS_VOP_VECTOR_REGISTER(sync_vnodeops);
 
 /*
  * Create a new filesystem syncer vnode for the specified mount point.
  */
 void
 vfs_allocate_syncvnode(struct mount *mp)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	static long start, incr, next;
 	int error;
 
 	/* Allocate a new vnode */
 	error = getnewvnode("syncer", mp, &sync_vnodeops, &vp);
 	if (error != 0)
 		panic("vfs_allocate_syncvnode: getnewvnode() failed");
 	vp->v_type = VNON;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vp->v_vflag |= VV_FORCEINSMQ;
 	error = insmntque1(vp, mp);
 	if (error != 0)
 		panic("vfs_allocate_syncvnode: insmntque() failed");
 	vp->v_vflag &= ~VV_FORCEINSMQ;
 	VOP_UNLOCK(vp);
 	/*
 	 * Place the vnode onto the syncer worklist. We attempt to
 	 * scatter them about on the list so that they will go off
 	 * at evenly distributed times even if all the filesystems
 	 * are mounted at once.
 	 */
 	next += incr;
 	if (next == 0 || next > syncer_maxdelay) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
 			start = syncer_maxdelay / 2;
 			incr = syncer_maxdelay;
 		}
 		next = start;
 	}
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0);
 	/* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */
 	mtx_lock(&sync_mtx);
 	sync_vnode_count++;
 	if (mp->mnt_syncer == NULL) {
 		mp->mnt_syncer = vp;
 		vp = NULL;
 	}
 	mtx_unlock(&sync_mtx);
 	BO_UNLOCK(bo);
 	if (vp != NULL) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		vgone(vp);
 		vput(vp);
 	}
 }
 
 void
 vfs_deallocate_syncvnode(struct mount *mp)
 {
 	struct vnode *vp;
 
 	mtx_lock(&sync_mtx);
 	vp = mp->mnt_syncer;
 	if (vp != NULL)
 		mp->mnt_syncer = NULL;
 	mtx_unlock(&sync_mtx);
 	if (vp != NULL)
 		vrele(vp);
 }
 
 /*
  * Do a lazy sync of the filesystem.
  */
 static int
 sync_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *syncvp = ap->a_vp;
 	struct mount *mp = syncvp->v_mount;
 	int error, save;
 	struct bufobj *bo;
 
 	/*
 	 * We only need to do something if this is a lazy evaluation.
 	 */
 	if (ap->a_waitfor != MNT_LAZY)
 		return (0);
 
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
 	bo = &syncvp->v_bufobj;
 	BO_LOCK(bo);
 	vn_syncer_add_to_worklist(bo, syncdelay);
 	BO_UNLOCK(bo);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
 	 * not already on the sync list.
 	 */
 	if (vfs_busy(mp, MBF_NOWAIT) != 0)
 		return (0);
 	VOP_UNLOCK(syncvp);
 	save = curthread_pflags_set(TDP_SYNCIO);
 	/*
 	 * The filesystem at hand may be idle with free vnodes stored in the
 	 * batch.  Return them instead of letting them stay there indefinitely.
 	 */
 	vfs_periodic(mp, MNT_NOWAIT);
 	error = VFS_SYNC(mp, MNT_LAZY);
 	curthread_pflags_restore(save);
 	vn_lock(syncvp, LK_EXCLUSIVE | LK_RETRY);
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * The syncer vnode is no referenced.
  */
 static int
 sync_inactive(struct vop_inactive_args *ap)
 {
 
 	vgone(ap->a_vp);
 	return (0);
 }
 
 /*
  * The syncer vnode is no longer needed and is being decommissioned.
  *
  * Modifications to the worklist must be protected by sync_mtx.
  */
 static int
 sync_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	mtx_lock(&sync_mtx);
 	if (vp->v_mount->mnt_syncer == vp)
 		vp->v_mount->mnt_syncer = NULL;
 	if (bo->bo_flag & BO_ONWORKLST) {
 		LIST_REMOVE(bo, bo_synclist);
 		syncer_worklist_len--;
 		sync_vnode_count--;
 		bo->bo_flag &= ~BO_ONWORKLST;
 	}
 	mtx_unlock(&sync_mtx);
 	BO_UNLOCK(bo);
 
 	return (0);
 }
 
 int
 vn_need_pageq_flush(struct vnode *vp)
 {
 	struct vm_object *obj;
 
 	obj = vp->v_object;
 	return (obj != NULL && (vp->v_vflag & VV_NOSYNC) == 0 &&
 	    vm_object_mightbedirty(obj));
 }
 
 /*
  * Check if vnode represents a disk device
  */
 bool
 vn_isdisk_error(struct vnode *vp, int *errp)
 {
 	int error;
 
 	if (vp->v_type != VCHR) {
 		error = ENOTBLK;
 		goto out;
 	}
 	error = 0;
 	dev_lock();
 	if (vp->v_rdev == NULL)
 		error = ENXIO;
 	else if (vp->v_rdev->si_devsw == NULL)
 		error = ENXIO;
 	else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK))
 		error = ENOTBLK;
 	dev_unlock();
 out:
 	*errp = error;
 	return (error == 0);
 }
 
 bool
 vn_isdisk(struct vnode *vp)
 {
 	int error;
 
 	return (vn_isdisk_error(vp, &error));
 }
 
 /*
  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
  * the comment above cache_fplookup for details.
  */
 int
 vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred)
 {
 	int error;
 
 	VFS_SMR_ASSERT_ENTERED();
 
 	/* Check the owner. */
 	if (cred->cr_uid == file_uid) {
 		if (file_mode & S_IXUSR)
 			return (0);
 		goto out_error;
 	}
 
 	/* Otherwise, check the groups (first match) */
 	if (groupmember(file_gid, cred)) {
 		if (file_mode & S_IXGRP)
 			return (0);
 		goto out_error;
 	}
 
 	/* Otherwise, check everyone else. */
 	if (file_mode & S_IXOTH)
 		return (0);
 out_error:
 	/*
 	 * Permission check failed, but it is possible denial will get overwritten
 	 * (e.g., when root is traversing through a 700 directory owned by someone
 	 * else).
 	 *
 	 * vaccess() calls priv_check_cred which in turn can descent into MAC
 	 * modules overriding this result. It's quite unclear what semantics
 	 * are allowed for them to operate, thus for safety we don't call them
 	 * from within the SMR section. This also means if any such modules
 	 * are present, we have to let the regular lookup decide.
 	 */
 	error = priv_check_cred_vfs_lookup_nomac(cred);
 	switch (error) {
 	case 0:
 		return (0);
 	case EAGAIN:
 		/*
 		 * MAC modules present.
 		 */
 		return (EAGAIN);
 	case EPERM:
 		return (EACCES);
 	default:
 		return (error);
 	}
 }
 
 /*
  * Common filesystem object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, and credentials.
  * Returns 0 on success, or an errno on failure.
  */
 int
 vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid,
     accmode_t accmode, struct ucred *cred)
 {
 	accmode_t dac_granted;
 	accmode_t priv_granted;
 
 	KASSERT((accmode & ~(VEXEC | VWRITE | VREAD | VADMIN | VAPPEND)) == 0,
 	    ("invalid bit in accmode"));
 	KASSERT((accmode & VAPPEND) == 0 || (accmode & VWRITE),
 	    ("VAPPEND without VWRITE"));
 
 	/*
 	 * Look for a normal, non-privileged way to access the file/directory
 	 * as requested.  If it exists, go with that.
 	 */
 
 	dac_granted = 0;
 
 	/* Check the owner. */
 	if (cred->cr_uid == file_uid) {
 		dac_granted |= VADMIN;
 		if (file_mode & S_IXUSR)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRUSR)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWUSR)
 			dac_granted |= (VWRITE | VAPPEND);
 
 		if ((accmode & dac_granted) == accmode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check the groups (first match) */
 	if (groupmember(file_gid, cred)) {
 		if (file_mode & S_IXGRP)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRGRP)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWGRP)
 			dac_granted |= (VWRITE | VAPPEND);
 
 		if ((accmode & dac_granted) == accmode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check everyone else. */
 	if (file_mode & S_IXOTH)
 		dac_granted |= VEXEC;
 	if (file_mode & S_IROTH)
 		dac_granted |= VREAD;
 	if (file_mode & S_IWOTH)
 		dac_granted |= (VWRITE | VAPPEND);
 	if ((accmode & dac_granted) == accmode)
 		return (0);
 
 privcheck:
 	/*
 	 * Build a privilege mask to determine if the set of privileges
 	 * satisfies the requirements when combined with the granted mask
 	 * from above.  For each privilege, if the privilege is required,
 	 * bitwise or the request type onto the priv_granted mask.
 	 */
 	priv_granted = 0;
 
 	if (type == VDIR) {
 		/*
 		 * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC
 		 * requests, instead of PRIV_VFS_EXEC.
 		 */
 		if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 		    !priv_check_cred(cred, PRIV_VFS_LOOKUP))
 			priv_granted |= VEXEC;
 	} else {
 		/*
 		 * Ensure that at least one execute bit is on. Otherwise,
 		 * a privileged user will always succeed, and we don't want
 		 * this to happen unless the file really is executable.
 		 */
 		if ((accmode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 		    (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 &&
 		    !priv_check_cred(cred, PRIV_VFS_EXEC))
 			priv_granted |= VEXEC;
 	}
 
 	if ((accmode & VREAD) && ((dac_granted & VREAD) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_READ))
 		priv_granted |= VREAD;
 
 	if ((accmode & VWRITE) && ((dac_granted & VWRITE) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_WRITE))
 		priv_granted |= (VWRITE | VAPPEND);
 
 	if ((accmode & VADMIN) && ((dac_granted & VADMIN) == 0) &&
 	    !priv_check_cred(cred, PRIV_VFS_ADMIN))
 		priv_granted |= VADMIN;
 
 	if ((accmode & (priv_granted | dac_granted)) == accmode) {
 		return (0);
 	}
 
 	return ((accmode & VADMIN) ? EPERM : EACCES);
 }
 
 /*
  * Credential check based on process requesting service, and per-attribute
  * permissions.
  */
 int
 extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred,
     struct thread *td, accmode_t accmode)
 {
 
 	/*
 	 * Kernel-invoked always succeeds.
 	 */
 	if (cred == NOCRED)
 		return (0);
 
 	/*
 	 * Do not allow privileged processes in jail to directly manipulate
 	 * system attributes.
 	 */
 	switch (attrnamespace) {
 	case EXTATTR_NAMESPACE_SYSTEM:
 		/* Potentially should be: return (EPERM); */
 		return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM));
 	case EXTATTR_NAMESPACE_USER:
 		return (VOP_ACCESS(vp, accmode, cred, td));
 	default:
 		return (EPERM);
 	}
 }
 
 #ifdef DEBUG_VFS_LOCKS
 int vfs_badlock_ddb = 1;	/* Drop into debugger on violation. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0,
     "Drop into debugger on lock violation");
 
 int vfs_badlock_mutex = 1;	/* Check for interlock across VOPs. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex,
     0, "Check for interlock across VOPs");
 
 int vfs_badlock_print = 1;	/* Print lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print,
     0, "Print lock violations");
 
 int vfs_badlock_vnode = 1;	/* Print vnode details on lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_vnode, CTLFLAG_RW, &vfs_badlock_vnode,
     0, "Print vnode details on lock violations");
 
 #ifdef KDB
 int vfs_badlock_backtrace = 1;	/* Print backtrace at lock violations. */
 SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW,
     &vfs_badlock_backtrace, 0, "Print backtrace at lock violations");
 #endif
 
 static void
 vfs_badlock(const char *msg, const char *str, struct vnode *vp)
 {
 
 #ifdef KDB
 	if (vfs_badlock_backtrace)
 		kdb_backtrace();
 #endif
 	if (vfs_badlock_vnode)
 		vn_printf(vp, "vnode ");
 	if (vfs_badlock_print)
 		printf("%s: %p %s\n", str, (void *)vp, msg);
 	if (vfs_badlock_ddb)
 		kdb_enter(KDB_WHY_VFSLOCK, "lock violation");
 }
 
 void
 assert_vi_locked(struct vnode *vp, const char *str)
 {
 
 	if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp)))
 		vfs_badlock("interlock is not locked but should be", str, vp);
 }
 
 void
 assert_vi_unlocked(struct vnode *vp, const char *str)
 {
 
 	if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp)))
 		vfs_badlock("interlock is locked but should not be", str, vp);
 }
 
 void
 assert_vop_locked(struct vnode *vp, const char *str)
 {
 	int locked;
 
 	if (KERNEL_PANICKED() || vp == NULL)
 		return;
 
 	locked = VOP_ISLOCKED(vp);
 	if (locked == 0 || locked == LK_EXCLOTHER)
 		vfs_badlock("is not locked but should be", str, vp);
 }
 
 void
 assert_vop_unlocked(struct vnode *vp, const char *str)
 {
 	if (KERNEL_PANICKED() || vp == NULL)
 		return;
 
 	if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE)
 		vfs_badlock("is locked but should not be", str, vp);
 }
 
 void
 assert_vop_elocked(struct vnode *vp, const char *str)
 {
 	if (KERNEL_PANICKED() || vp == NULL)
 		return;
 
 	if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
 		vfs_badlock("is not exclusive locked but should be", str, vp);
 }
 #endif /* DEBUG_VFS_LOCKS */
 
 void
 vop_rename_fail(struct vop_rename_args *ap)
 {
 
 	if (ap->a_tvp != NULL)
 		vput(ap->a_tvp);
 	if (ap->a_tdvp == ap->a_tvp)
 		vrele(ap->a_tdvp);
 	else
 		vput(ap->a_tdvp);
 	vrele(ap->a_fdvp);
 	vrele(ap->a_fvp);
 }
 
 void
 vop_rename_pre(void *ap)
 {
 	struct vop_rename_args *a = ap;
 
 #ifdef DEBUG_VFS_LOCKS
 	if (a->a_tvp)
 		ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME");
 	ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME");
 
 	/* Check the source (from). */
 	if (a->a_tdvp->v_vnlock != a->a_fdvp->v_vnlock &&
 	    (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fdvp->v_vnlock))
 		ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked");
 	if (a->a_tvp == NULL || a->a_tvp->v_vnlock != a->a_fvp->v_vnlock)
 		ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked");
 
 	/* Check the target. */
 	if (a->a_tvp)
 		ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked");
 	ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked");
 #endif
 	/*
 	 * It may be tempting to add vn_seqc_write_begin/end calls here and
 	 * in vop_rename_post but that's not going to work out since some
 	 * filesystems relookup vnodes mid-rename. This is probably a bug.
 	 *
 	 * For now filesystems are expected to do the relevant calls after they
 	 * decide what vnodes to operate on.
 	 */
 	if (a->a_tdvp != a->a_fdvp)
 		vhold(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
 		vhold(a->a_fvp);
 	vhold(a->a_tdvp);
 	if (a->a_tvp)
 		vhold(a->a_tvp);
 }
 
 #ifdef DEBUG_VFS_LOCKS
 void
 vop_fplookup_vexec_debugpre(void *ap __unused)
 {
 
 	VFS_SMR_ASSERT_ENTERED();
 }
 
 void
 vop_fplookup_vexec_debugpost(void *ap __unused, int rc __unused)
 {
 
 	VFS_SMR_ASSERT_ENTERED();
 }
 
 void
 vop_fplookup_symlink_debugpre(void *ap __unused)
 {
 
 	VFS_SMR_ASSERT_ENTERED();
 }
 
 void
 vop_fplookup_symlink_debugpost(void *ap __unused, int rc __unused)
 {
 
 	VFS_SMR_ASSERT_ENTERED();
 }
 
 static void
 vop_fsync_debugprepost(struct vnode *vp, const char *name)
 {
 	if (vp->v_type == VCHR)
 		;
 	else if (MNT_EXTENDED_SHARED(vp->v_mount))
 		ASSERT_VOP_LOCKED(vp, name);
 	else
 		ASSERT_VOP_ELOCKED(vp, name);
 }
 
 void
 vop_fsync_debugpre(void *a)
 {
 	struct vop_fsync_args *ap;
 
 	ap = a;
 	vop_fsync_debugprepost(ap->a_vp, "fsync");
 }
 
 void
 vop_fsync_debugpost(void *a, int rc __unused)
 {
 	struct vop_fsync_args *ap;
 
 	ap = a;
 	vop_fsync_debugprepost(ap->a_vp, "fsync");
 }
 
 void
 vop_fdatasync_debugpre(void *a)
 {
 	struct vop_fdatasync_args *ap;
 
 	ap = a;
 	vop_fsync_debugprepost(ap->a_vp, "fsync");
 }
 
 void
 vop_fdatasync_debugpost(void *a, int rc __unused)
 {
 	struct vop_fdatasync_args *ap;
 
 	ap = a;
 	vop_fsync_debugprepost(ap->a_vp, "fsync");
 }
 
 void
 vop_strategy_debugpre(void *ap)
 {
 	struct vop_strategy_args *a;
 	struct buf *bp;
 
 	a = ap;
 	bp = a->a_bp;
 
 	/*
 	 * Cluster ops lock their component buffers but not the IO container.
 	 */
 	if ((bp->b_flags & B_CLUSTER) != 0)
 		return;
 
 	if (!KERNEL_PANICKED() && !BUF_ISLOCKED(bp)) {
 		if (vfs_badlock_print)
 			printf(
 			    "VOP_STRATEGY: bp is not locked but should be\n");
 		if (vfs_badlock_ddb)
 			kdb_enter(KDB_WHY_VFSLOCK, "lock violation");
 	}
 }
 
 void
 vop_lock_debugpre(void *ap)
 {
 	struct vop_lock1_args *a = ap;
 
 	if ((a->a_flags & LK_INTERLOCK) == 0)
 		ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
 	else
 		ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK");
 }
 
 void
 vop_lock_debugpost(void *ap, int rc)
 {
 	struct vop_lock1_args *a = ap;
 
 	ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
 	if (rc == 0 && (a->a_flags & LK_EXCLOTHER) == 0)
 		ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK");
 }
 
 void
 vop_unlock_debugpre(void *ap)
 {
 	struct vop_unlock_args *a = ap;
 
 	ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK");
 }
 
 void
 vop_need_inactive_debugpre(void *ap)
 {
 	struct vop_need_inactive_args *a = ap;
 
 	ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE");
 }
 
 void
 vop_need_inactive_debugpost(void *ap, int rc)
 {
 	struct vop_need_inactive_args *a = ap;
 
 	ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE");
 }
 #endif
 
 void
 vop_create_pre(void *ap)
 {
 	struct vop_create_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_begin(dvp);
 }
 
 void
 vop_create_post(void *ap, int rc)
 {
 	struct vop_create_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_end(dvp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
 }
 
 void
 vop_whiteout_pre(void *ap)
 {
 	struct vop_whiteout_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_begin(dvp);
 }
 
 void
 vop_whiteout_post(void *ap, int rc)
 {
 	struct vop_whiteout_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_end(dvp);
 }
 
 void
 vop_deleteextattr_pre(void *ap)
 {
 	struct vop_deleteextattr_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	vn_seqc_write_begin(vp);
 }
 
 void
 vop_deleteextattr_post(void *ap, int rc)
 {
 	struct vop_deleteextattr_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	vn_seqc_write_end(vp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
 
 void
 vop_link_pre(void *ap)
 {
 	struct vop_link_args *a;
 	struct vnode *vp, *tdvp;
 
 	a = ap;
 	vp = a->a_vp;
 	tdvp = a->a_tdvp;
 	vn_seqc_write_begin(vp);
 	vn_seqc_write_begin(tdvp);
 }
 
 void
 vop_link_post(void *ap, int rc)
 {
 	struct vop_link_args *a;
 	struct vnode *vp, *tdvp;
 
 	a = ap;
 	vp = a->a_vp;
 	tdvp = a->a_tdvp;
 	vn_seqc_write_end(vp);
 	vn_seqc_write_end(tdvp);
 	if (!rc) {
 		VFS_KNOTE_LOCKED(vp, NOTE_LINK);
 		VFS_KNOTE_LOCKED(tdvp, NOTE_WRITE);
 	}
 }
 
 void
 vop_mkdir_pre(void *ap)
 {
 	struct vop_mkdir_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_begin(dvp);
 }
 
 void
 vop_mkdir_post(void *ap, int rc)
 {
 	struct vop_mkdir_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_end(dvp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
 }
 
 #ifdef DEBUG_VFS_LOCKS
 void
 vop_mkdir_debugpost(void *ap, int rc)
 {
 	struct vop_mkdir_args *a;
 
 	a = ap;
 	if (!rc)
 		cache_validate(a->a_dvp, *a->a_vpp, a->a_cnp);
 }
 #endif
 
 void
 vop_mknod_pre(void *ap)
 {
 	struct vop_mknod_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_begin(dvp);
 }
 
 void
 vop_mknod_post(void *ap, int rc)
 {
 	struct vop_mknod_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_end(dvp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
 }
 
 void
 vop_reclaim_post(void *ap, int rc)
 {
 	struct vop_reclaim_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	ASSERT_VOP_IN_SEQC(vp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(vp, NOTE_REVOKE);
 }
 
 void
 vop_remove_pre(void *ap)
 {
 	struct vop_remove_args *a;
 	struct vnode *dvp, *vp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vp = a->a_vp;
 	vn_seqc_write_begin(dvp);
 	vn_seqc_write_begin(vp);
 }
 
 void
 vop_remove_post(void *ap, int rc)
 {
 	struct vop_remove_args *a;
 	struct vnode *dvp, *vp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vp = a->a_vp;
 	vn_seqc_write_end(dvp);
 	vn_seqc_write_end(vp);
 	if (!rc) {
 		VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
 		VFS_KNOTE_LOCKED(vp, NOTE_DELETE);
 	}
 }
 
 void
 vop_rename_post(void *ap, int rc)
 {
 	struct vop_rename_args *a = ap;
 	long hint;
 
 	if (!rc) {
 		hint = NOTE_WRITE;
 		if (a->a_fdvp == a->a_tdvp) {
 			if (a->a_tvp != NULL && a->a_tvp->v_type == VDIR)
 				hint |= NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_fdvp, hint);
 			VFS_KNOTE_UNLOCKED(a->a_tdvp, hint);
 		} else {
 			hint |= NOTE_EXTEND;
 			if (a->a_fvp->v_type == VDIR)
 				hint |= NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_fdvp, hint);
 
 			if (a->a_fvp->v_type == VDIR && a->a_tvp != NULL &&
 			    a->a_tvp->v_type == VDIR)
 				hint &= ~NOTE_LINK;
 			VFS_KNOTE_UNLOCKED(a->a_tdvp, hint);
 		}
 
 		VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME);
 		if (a->a_tvp)
 			VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE);
 	}
 	if (a->a_tdvp != a->a_fdvp)
 		vdrop(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
 		vdrop(a->a_fvp);
 	vdrop(a->a_tdvp);
 	if (a->a_tvp)
 		vdrop(a->a_tvp);
 }
 
 void
 vop_rmdir_pre(void *ap)
 {
 	struct vop_rmdir_args *a;
 	struct vnode *dvp, *vp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vp = a->a_vp;
 	vn_seqc_write_begin(dvp);
 	vn_seqc_write_begin(vp);
 }
 
 void
 vop_rmdir_post(void *ap, int rc)
 {
 	struct vop_rmdir_args *a;
 	struct vnode *dvp, *vp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vp = a->a_vp;
 	vn_seqc_write_end(dvp);
 	vn_seqc_write_end(vp);
 	if (!rc) {
 		vp->v_vflag |= VV_UNLINKED;
 		VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
 		VFS_KNOTE_LOCKED(vp, NOTE_DELETE);
 	}
 }
 
 void
 vop_setattr_pre(void *ap)
 {
 	struct vop_setattr_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	vn_seqc_write_begin(vp);
 }
 
 void
 vop_setattr_post(void *ap, int rc)
 {
 	struct vop_setattr_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	vn_seqc_write_end(vp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB);
 }
 
 void
 vop_setacl_pre(void *ap)
 {
 	struct vop_setacl_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	vn_seqc_write_begin(vp);
 }
 
 void
 vop_setacl_post(void *ap, int rc __unused)
 {
 	struct vop_setacl_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	vn_seqc_write_end(vp);
 }
 
 void
 vop_setextattr_pre(void *ap)
 {
 	struct vop_setextattr_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	vn_seqc_write_begin(vp);
 }
 
 void
 vop_setextattr_post(void *ap, int rc)
 {
 	struct vop_setextattr_args *a;
 	struct vnode *vp;
 
 	a = ap;
 	vp = a->a_vp;
 	vn_seqc_write_end(vp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB);
 }
 
 void
 vop_symlink_pre(void *ap)
 {
 	struct vop_symlink_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_begin(dvp);
 }
 
 void
 vop_symlink_post(void *ap, int rc)
 {
 	struct vop_symlink_args *a;
 	struct vnode *dvp;
 
 	a = ap;
 	dvp = a->a_dvp;
 	vn_seqc_write_end(dvp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
 }
 
 void
 vop_open_post(void *ap, int rc)
 {
 	struct vop_open_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN);
 }
 
 void
 vop_close_post(void *ap, int rc)
 {
 	struct vop_close_args *a = ap;
 
 	if (!rc && (a->a_cred != NOCRED || /* filter out revokes */
 	    !VN_IS_DOOMED(a->a_vp))) {
 		VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ?
 		    NOTE_CLOSE_WRITE : NOTE_CLOSE);
 	}
 }
 
 void
 vop_read_post(void *ap, int rc)
 {
 	struct vop_read_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
 }
 
 void
 vop_read_pgcache_post(void *ap, int rc)
 {
 	struct vop_read_pgcache_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_UNLOCKED(a->a_vp, NOTE_READ);
 }
 
 void
 vop_readdir_post(void *ap, int rc)
 {
 	struct vop_readdir_args *a = ap;
 
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
 }
 
 static struct knlist fs_knlist;
 
 static void
 vfs_event_init(void *arg)
 {
 	knlist_init_mtx(&fs_knlist, NULL);
 }
 /* XXX - correct order? */
 SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL);
 
 void
 vfs_event_signal(fsid_t *fsid, uint32_t event, intptr_t data __unused)
 {
 
 	KNOTE_UNLOCKED(&fs_knlist, event);
 }
 
 static int	filt_fsattach(struct knote *kn);
 static void	filt_fsdetach(struct knote *kn);
 static int	filt_fsevent(struct knote *kn, long hint);
 
 struct filterops fs_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_fsattach,
 	.f_detach = filt_fsdetach,
 	.f_event = filt_fsevent
 };
 
 static int
 filt_fsattach(struct knote *kn)
 {
 
 	kn->kn_flags |= EV_CLEAR;
 	knlist_add(&fs_knlist, kn, 0);
 	return (0);
 }
 
 static void
 filt_fsdetach(struct knote *kn)
 {
 
 	knlist_remove(&fs_knlist, kn, 0);
 }
 
 static int
 filt_fsevent(struct knote *kn, long hint)
 {
 
 	kn->kn_fflags |= kn->kn_sfflags & hint;
 
 	return (kn->kn_fflags != 0);
 }
 
 static int
 sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS)
 {
 	struct vfsidctl vc;
 	int error;
 	struct mount *mp;
 
 	error = SYSCTL_IN(req, &vc, sizeof(vc));
 	if (error)
 		return (error);
 	if (vc.vc_vers != VFS_CTL_VERS1)
 		return (EINVAL);
 	mp = vfs_getvfs(&vc.vc_fsid);
 	if (mp == NULL)
 		return (ENOENT);
 	/* ensure that a specific sysctl goes to the right filesystem. */
 	if (strcmp(vc.vc_fstypename, "*") != 0 &&
 	    strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) {
 		vfs_rel(mp);
 		return (EINVAL);
 	}
 	VCTLTOREQ(&vc, req);
 	error = VFS_SYSCTL(mp, vc.vc_op, req);
 	vfs_rel(mp);
 	return (error);
 }
 
 SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLTYPE_OPAQUE | CTLFLAG_MPSAFE | CTLFLAG_WR,
     NULL, 0, sysctl_vfs_ctl, "",
     "Sysctl by fsid");
 
 /*
  * Function to initialize a va_filerev field sensibly.
  * XXX: Wouldn't a random number make a lot more sense ??
  */
 u_quad_t
 init_va_filerev(void)
 {
 	struct bintime bt;
 
 	getbinuptime(&bt);
 	return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL));
 }
 
 static int	filt_vfsread(struct knote *kn, long hint);
 static int	filt_vfswrite(struct knote *kn, long hint);
 static int	filt_vfsvnode(struct knote *kn, long hint);
 static void	filt_vfsdetach(struct knote *kn);
 static struct filterops vfsread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfsread
 };
 static struct filterops vfswrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfswrite
 };
 static struct filterops vfsvnode_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_vfsdetach,
 	.f_event = filt_vfsvnode
 };
 
 static void
 vfs_knllock(void *arg)
 {
 	struct vnode *vp = arg;
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 }
 
 static void
 vfs_knlunlock(void *arg)
 {
 	struct vnode *vp = arg;
 
 	VOP_UNLOCK(vp);
 }
 
 static void
 vfs_knl_assert_lock(void *arg, int what)
 {
 #ifdef DEBUG_VFS_LOCKS
 	struct vnode *vp = arg;
 
 	if (what == LA_LOCKED)
 		ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked");
 	else
 		ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked");
 #endif
 }
 
 int
 vfs_kqfilter(struct vop_kqfilter_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct knote *kn = ap->a_kn;
 	struct knlist *knl;
 
 	KASSERT(vp->v_type != VFIFO || (kn->kn_filter != EVFILT_READ &&
 	    kn->kn_filter != EVFILT_WRITE),
 	    ("READ/WRITE filter on a FIFO leaked through"));
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &vfsread_filtops;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &vfswrite_filtops;
 		break;
 	case EVFILT_VNODE:
 		kn->kn_fop = &vfsvnode_filtops;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	kn->kn_hook = (caddr_t)vp;
 
 	v_addpollinfo(vp);
 	if (vp->v_pollinfo == NULL)
 		return (ENOMEM);
 	knl = &vp->v_pollinfo->vpi_selinfo.si_note;
 	vhold(vp);
 	knlist_add(knl, kn, 0);
 
 	return (0);
 }
 
 /*
  * Detach knote from vnode
  */
 static void
 filt_vfsdetach(struct knote *kn)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 
 	KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo"));
 	knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0);
 	vdrop(vp);
 }
 
 /*ARGSUSED*/
 static int
 filt_vfsread(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 	struct vattr va;
 	int res;
 
 	/*
 	 * filesystem is gone, so set the EOF flag and schedule
 	 * the knote for deletion.
 	 */
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) {
 		VI_LOCK(vp);
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 		VI_UNLOCK(vp);
 		return (1);
 	}
 
 	if (VOP_GETATTR(vp, &va, curthread->td_ucred))
 		return (0);
 
 	VI_LOCK(vp);
 	kn->kn_data = va.va_size - kn->kn_fp->f_offset;
 	res = (kn->kn_sfflags & NOTE_FILE_POLL) != 0 || kn->kn_data != 0;
 	VI_UNLOCK(vp);
 	return (res);
 }
 
 /*ARGSUSED*/
 static int
 filt_vfswrite(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 
 	VI_LOCK(vp);
 
 	/*
 	 * filesystem is gone, so set the EOF flag and schedule
 	 * the knote for deletion.
 	 */
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD))
 		kn->kn_flags |= (EV_EOF | EV_ONESHOT);
 
 	kn->kn_data = 0;
 	VI_UNLOCK(vp);
 	return (1);
 }
 
 static int
 filt_vfsvnode(struct knote *kn, long hint)
 {
 	struct vnode *vp = (struct vnode *)kn->kn_hook;
 	int res;
 
 	VI_LOCK(vp);
 	if (kn->kn_sfflags & hint)
 		kn->kn_fflags |= hint;
 	if (hint == NOTE_REVOKE || (hint == 0 && vp->v_type == VBAD)) {
 		kn->kn_flags |= EV_EOF;
 		VI_UNLOCK(vp);
 		return (1);
 	}
 	res = (kn->kn_fflags != 0);
 	VI_UNLOCK(vp);
 	return (res);
 }
 
 /*
  * Returns whether the directory is empty or not.
  * If it is empty, the return value is 0; otherwise
  * the return value is an error value (which may
  * be ENOTEMPTY).
  */
 int
 vfs_emptydir(struct vnode *vp)
 {
 	struct uio uio;
 	struct iovec iov;
 	struct dirent *dirent, *dp, *endp;
 	int error, eof;
 
 	error = 0;
 	eof = 0;
 
 	ASSERT_VOP_LOCKED(vp, "vfs_emptydir");
 	VNASSERT(vp->v_type == VDIR, vp, ("vp is not a directory"));
 
 	dirent = malloc(sizeof(struct dirent), M_TEMP, M_WAITOK);
 	iov.iov_base = dirent;
 	iov.iov_len = sizeof(struct dirent);
 
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = 0;
 	uio.uio_resid = sizeof(struct dirent);
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_td = curthread;
 
 	while (eof == 0 && error == 0) {
 		error = VOP_READDIR(vp, &uio, curthread->td_ucred, &eof,
 		    NULL, NULL);
 		if (error != 0)
 			break;
 		endp = (void *)((uint8_t *)dirent +
 		    sizeof(struct dirent) - uio.uio_resid);
 		for (dp = dirent; dp < endp;
 		     dp = (void *)((uint8_t *)dp + GENERIC_DIRSIZ(dp))) {
 			if (dp->d_type == DT_WHT)
 				continue;
 			if (dp->d_namlen == 0)
 				continue;
 			if (dp->d_type != DT_DIR &&
 			    dp->d_type != DT_UNKNOWN) {
 				error = ENOTEMPTY;
 				break;
 			}
 			if (dp->d_namlen > 2) {
 				error = ENOTEMPTY;
 				break;
 			}
 			if (dp->d_namlen == 1 &&
 			    dp->d_name[0] != '.') {
 				error = ENOTEMPTY;
 				break;
 			}
 			if (dp->d_namlen == 2 &&
 			    dp->d_name[1] != '.') {
 				error = ENOTEMPTY;
 				break;
 			}
 			uio.uio_resid = sizeof(struct dirent);
 		}
 	}
 	free(dirent, M_TEMP);
 	return (error);
 }
 
 int
 vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off)
 {
 	int error;
 
 	if (dp->d_reclen > ap->a_uio->uio_resid)
 		return (ENAMETOOLONG);
 	error = uiomove(dp, dp->d_reclen, ap->a_uio);
 	if (error) {
 		if (ap->a_ncookies != NULL) {
 			if (ap->a_cookies != NULL)
 				free(ap->a_cookies, M_TEMP);
 			ap->a_cookies = NULL;
 			*ap->a_ncookies = 0;
 		}
 		return (error);
 	}
 	if (ap->a_ncookies == NULL)
 		return (0);
 
 	KASSERT(ap->a_cookies,
 	    ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!"));
 
 	*ap->a_cookies = realloc(*ap->a_cookies,
 	    (*ap->a_ncookies + 1) * sizeof(uint64_t), M_TEMP, M_WAITOK | M_ZERO);
 	(*ap->a_cookies)[*ap->a_ncookies] = off;
 	*ap->a_ncookies += 1;
 	return (0);
 }
 
 /*
  * The purpose of this routine is to remove granularity from accmode_t,
  * reducing it into standard unix access bits - VEXEC, VREAD, VWRITE,
  * VADMIN and VAPPEND.
  *
  * If it returns 0, the caller is supposed to continue with the usual
  * access checks using 'accmode' as modified by this routine.  If it
  * returns nonzero value, the caller is supposed to return that value
  * as errno.
  *
  * Note that after this routine runs, accmode may be zero.
  */
 int
 vfs_unixify_accmode(accmode_t *accmode)
 {
 	/*
 	 * There is no way to specify explicit "deny" rule using
 	 * file mode or POSIX.1e ACLs.
 	 */
 	if (*accmode & VEXPLICIT_DENY) {
 		*accmode = 0;
 		return (0);
 	}
 
 	/*
 	 * None of these can be translated into usual access bits.
 	 * Also, the common case for NFSv4 ACLs is to not contain
 	 * either of these bits. Caller should check for VWRITE
 	 * on the containing directory instead.
 	 */
 	if (*accmode & (VDELETE_CHILD | VDELETE))
 		return (EPERM);
 
 	if (*accmode & VADMIN_PERMS) {
 		*accmode &= ~VADMIN_PERMS;
 		*accmode |= VADMIN;
 	}
 
 	/*
 	 * There is no way to deny VREAD_ATTRIBUTES, VREAD_ACL
 	 * or VSYNCHRONIZE using file mode or POSIX.1e ACL.
 	 */
 	*accmode &= ~(VSTAT_PERMS | VSYNCHRONIZE);
 
 	return (0);
 }
 
 /*
  * Clear out a doomed vnode (if any) and replace it with a new one as long
  * as the fs is not being unmounted. Return the root vnode to the caller.
  */
 static int __noinline
 vfs_cache_root_fallback(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct vnode *vp;
 	int error;
 
 restart:
 	if (mp->mnt_rootvnode != NULL) {
 		MNT_ILOCK(mp);
 		vp = mp->mnt_rootvnode;
 		if (vp != NULL) {
 			if (!VN_IS_DOOMED(vp)) {
 				vrefact(vp);
 				MNT_IUNLOCK(mp);
 				error = vn_lock(vp, flags);
 				if (error == 0) {
 					*vpp = vp;
 					return (0);
 				}
 				vrele(vp);
 				goto restart;
 			}
 			/*
 			 * Clear the old one.
 			 */
 			mp->mnt_rootvnode = NULL;
 		}
 		MNT_IUNLOCK(mp);
 		if (vp != NULL) {
 			vfs_op_barrier_wait(mp);
 			vrele(vp);
 		}
 	}
 	error = VFS_CACHEDROOT(mp, flags, vpp);
 	if (error != 0)
 		return (error);
 	if (mp->mnt_vfs_ops == 0) {
 		MNT_ILOCK(mp);
 		if (mp->mnt_vfs_ops != 0) {
 			MNT_IUNLOCK(mp);
 			return (0);
 		}
 		if (mp->mnt_rootvnode == NULL) {
 			vrefact(*vpp);
 			mp->mnt_rootvnode = *vpp;
 		} else {
 			if (mp->mnt_rootvnode != *vpp) {
 				if (!VN_IS_DOOMED(mp->mnt_rootvnode)) {
 					panic("%s: mismatch between vnode returned "
 					    " by VFS_CACHEDROOT and the one cached "
 					    " (%p != %p)",
 					    __func__, *vpp, mp->mnt_rootvnode);
 				}
 			}
 		}
 		MNT_IUNLOCK(mp);
 	}
 	return (0);
 }
 
 int
 vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp)
 {
 	struct mount_pcpu *mpcpu;
 	struct vnode *vp;
 	int error;
 
 	if (!vfs_op_thread_enter(mp, mpcpu))
 		return (vfs_cache_root_fallback(mp, flags, vpp));
 	vp = atomic_load_ptr(&mp->mnt_rootvnode);
 	if (vp == NULL || VN_IS_DOOMED(vp)) {
 		vfs_op_thread_exit(mp, mpcpu);
 		return (vfs_cache_root_fallback(mp, flags, vpp));
 	}
 	vrefact(vp);
 	vfs_op_thread_exit(mp, mpcpu);
 	error = vn_lock(vp, flags);
 	if (error != 0) {
 		vrele(vp);
 		return (vfs_cache_root_fallback(mp, flags, vpp));
 	}
 	*vpp = vp;
 	return (0);
 }
 
 struct vnode *
 vfs_cache_root_clear(struct mount *mp)
 {
 	struct vnode *vp;
 
 	/*
 	 * ops > 0 guarantees there is nobody who can see this vnode
 	 */
 	MPASS(mp->mnt_vfs_ops > 0);
 	vp = mp->mnt_rootvnode;
 	if (vp != NULL)
 		vn_seqc_write_begin(vp);
 	mp->mnt_rootvnode = NULL;
 	return (vp);
 }
 
 void
 vfs_cache_root_set(struct mount *mp, struct vnode *vp)
 {
 
 	MPASS(mp->mnt_vfs_ops > 0);
 	vrefact(vp);
 	mp->mnt_rootvnode = vp;
 }
 
 /*
  * These are helper functions for filesystems to traverse all
  * their vnodes.  See MNT_VNODE_FOREACH_ALL() in sys/mount.h.
  *
  * This interface replaces MNT_VNODE_FOREACH.
  */
 
 struct vnode *
 __mnt_vnode_next_all(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 	MNT_ILOCK(mp);
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 	for (vp = TAILQ_NEXT(*mvp, v_nmntvnodes); vp != NULL;
 	    vp = TAILQ_NEXT(vp, v_nmntvnodes)) {
 		/* Allow a racy peek at VIRF_DOOMED to save a lock acquisition. */
 		if (vp->v_type == VMARKER || VN_IS_DOOMED(vp))
 			continue;
 		VI_LOCK(vp);
 		if (VN_IS_DOOMED(vp)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		break;
 	}
 	if (vp == NULL) {
 		__mnt_vnode_markerfree_all(mvp, mp);
 		/* MNT_IUNLOCK(mp); -- done in above function */
 		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
 		return (NULL);
 	}
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 	MNT_IUNLOCK(mp);
 	return (vp);
 }
 
 struct vnode *
 __mnt_vnode_first_all(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp;
 
 	*mvp = vn_alloc_marker(mp);
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 
 	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 		/* Allow a racy peek at VIRF_DOOMED to save a lock acquisition. */
 		if (vp->v_type == VMARKER || VN_IS_DOOMED(vp))
 			continue;
 		VI_LOCK(vp);
 		if (VN_IS_DOOMED(vp)) {
 			VI_UNLOCK(vp);
 			continue;
 		}
 		break;
 	}
 	if (vp == NULL) {
 		MNT_REL(mp);
 		MNT_IUNLOCK(mp);
 		vn_free_marker(*mvp);
 		*mvp = NULL;
 		return (NULL);
 	}
 	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
 	MNT_IUNLOCK(mp);
 	return (vp);
 }
 
 void
 __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp)
 {
 
 	if (*mvp == NULL) {
 		MNT_IUNLOCK(mp);
 		return;
 	}
 
 	mtx_assert(MNT_MTX(mp), MA_OWNED);
 
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	vn_free_marker(*mvp);
 	*mvp = NULL;
 }
 
 /*
  * These are helper functions for filesystems to traverse their
  * lazy vnodes.  See MNT_VNODE_FOREACH_LAZY() in sys/mount.h
  */
 static void
 mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp)
 {
 
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 
 	MNT_ILOCK(mp);
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 	vn_free_marker(*mvp);
 	*mvp = NULL;
 }
 
 /*
  * Relock the mp mount vnode list lock with the vp vnode interlock in the
  * conventional lock order during mnt_vnode_next_lazy iteration.
  *
  * On entry, the mount vnode list lock is held and the vnode interlock is not.
  * The list lock is dropped and reacquired.  On success, both locks are held.
  * On failure, the mount vnode list lock is held but the vnode interlock is
  * not, and the procedure may have yielded.
  */
 static bool
 mnt_vnode_next_lazy_relock(struct vnode *mvp, struct mount *mp,
     struct vnode *vp)
 {
 
 	VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER &&
 	    TAILQ_NEXT(mvp, v_lazylist) != NULL, mvp,
 	    ("%s: bad marker", __func__));
 	VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp,
 	    ("%s: inappropriate vnode", __func__));
 	ASSERT_VI_UNLOCKED(vp, __func__);
 	mtx_assert(&mp->mnt_listmtx, MA_OWNED);
 
 	TAILQ_REMOVE(&mp->mnt_lazyvnodelist, mvp, v_lazylist);
 	TAILQ_INSERT_BEFORE(vp, mvp, v_lazylist);
 
 	/*
 	 * Note we may be racing against vdrop which transitioned the hold
 	 * count to 0 and now waits for the ->mnt_listmtx lock. This is fine,
 	 * if we are the only user after we get the interlock we will just
 	 * vdrop.
 	 */
 	vhold(vp);
 	mtx_unlock(&mp->mnt_listmtx);
 	VI_LOCK(vp);
 	if (VN_IS_DOOMED(vp)) {
 		VNPASS((vp->v_mflag & VMP_LAZYLIST) == 0, vp);
 		goto out_lost;
 	}
 	VNPASS(vp->v_mflag & VMP_LAZYLIST, vp);
 	/*
 	 * There is nothing to do if we are the last user.
 	 */
 	if (!refcount_release_if_not_last(&vp->v_holdcnt))
 		goto out_lost;
 	mtx_lock(&mp->mnt_listmtx);
 	return (true);
 out_lost:
 	vdropl(vp);
 	maybe_yield();
 	mtx_lock(&mp->mnt_listmtx);
 	return (false);
 }
 
 static struct vnode *
 mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb,
     void *cbarg)
 {
 	struct vnode *vp;
 
 	mtx_assert(&mp->mnt_listmtx, MA_OWNED);
 	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
 restart:
 	vp = TAILQ_NEXT(*mvp, v_lazylist);
 	while (vp != NULL) {
 		if (vp->v_type == VMARKER) {
 			vp = TAILQ_NEXT(vp, v_lazylist);
 			continue;
 		}
 		/*
 		 * See if we want to process the vnode. Note we may encounter a
 		 * long string of vnodes we don't care about and hog the list
 		 * as a result. Check for it and requeue the marker.
 		 */
 		VNPASS(!VN_IS_DOOMED(vp), vp);
 		if (!cb(vp, cbarg)) {
 			if (!should_yield()) {
 				vp = TAILQ_NEXT(vp, v_lazylist);
 				continue;
 			}
 			TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp,
 			    v_lazylist);
 			TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp,
 			    v_lazylist);
 			mtx_unlock(&mp->mnt_listmtx);
 			kern_yield(PRI_USER);
 			mtx_lock(&mp->mnt_listmtx);
 			goto restart;
 		}
 		/*
 		 * Try-lock because this is the wrong lock order.
 		 */
 		if (!VI_TRYLOCK(vp) &&
 		    !mnt_vnode_next_lazy_relock(*mvp, mp, vp))
 			goto restart;
 		KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp));
 		KASSERT(vp->v_mount == mp || vp->v_mount == NULL,
 		    ("alien vnode on the lazy list %p %p", vp, mp));
 		VNPASS(vp->v_mount == mp, vp);
 		VNPASS(!VN_IS_DOOMED(vp), vp);
 		break;
 	}
 	TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist);
 
 	/* Check if we are done */
 	if (vp == NULL) {
 		mtx_unlock(&mp->mnt_listmtx);
 		mnt_vnode_markerfree_lazy(mvp, mp);
 		return (NULL);
 	}
 	TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp, v_lazylist);
 	mtx_unlock(&mp->mnt_listmtx);
 	ASSERT_VI_LOCKED(vp, "lazy iter");
 	return (vp);
 }
 
 struct vnode *
 __mnt_vnode_next_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb,
     void *cbarg)
 {
 
 	if (should_yield())
 		kern_yield(PRI_USER);
 	mtx_lock(&mp->mnt_listmtx);
 	return (mnt_vnode_next_lazy(mvp, mp, cb, cbarg));
 }
 
 struct vnode *
 __mnt_vnode_first_lazy(struct vnode **mvp, struct mount *mp, mnt_lazy_cb_t *cb,
     void *cbarg)
 {
 	struct vnode *vp;
 
 	if (TAILQ_EMPTY(&mp->mnt_lazyvnodelist))
 		return (NULL);
 
 	*mvp = vn_alloc_marker(mp);
 	MNT_ILOCK(mp);
 	MNT_REF(mp);
 	MNT_IUNLOCK(mp);
 
 	mtx_lock(&mp->mnt_listmtx);
 	vp = TAILQ_FIRST(&mp->mnt_lazyvnodelist);
 	if (vp == NULL) {
 		mtx_unlock(&mp->mnt_listmtx);
 		mnt_vnode_markerfree_lazy(mvp, mp);
 		return (NULL);
 	}
 	TAILQ_INSERT_BEFORE(vp, *mvp, v_lazylist);
 	return (mnt_vnode_next_lazy(mvp, mp, cb, cbarg));
 }
 
 void
 __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp)
 {
 
 	if (*mvp == NULL)
 		return;
 
 	mtx_lock(&mp->mnt_listmtx);
 	TAILQ_REMOVE(&mp->mnt_lazyvnodelist, *mvp, v_lazylist);
 	mtx_unlock(&mp->mnt_listmtx);
 	mnt_vnode_markerfree_lazy(mvp, mp);
 }
 
 int
 vn_dir_check_exec(struct vnode *vp, struct componentname *cnp)
 {
 
 	if ((cnp->cn_flags & NOEXECCHECK) != 0) {
 		cnp->cn_flags &= ~NOEXECCHECK;
 		return (0);
 	}
 
 	return (VOP_ACCESS(vp, VEXEC, cnp->cn_cred, curthread));
 }
 
 /*
  * Do not use this variant unless you have means other than the hold count
  * to prevent the vnode from getting freed.
  */
 void
 vn_seqc_write_begin_locked(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNPASS(vp->v_holdcnt > 0, vp);
 	VNPASS(vp->v_seqc_users >= 0, vp);
 	vp->v_seqc_users++;
 	if (vp->v_seqc_users == 1)
 		seqc_sleepable_write_begin(&vp->v_seqc);
 }
 
 void
 vn_seqc_write_begin(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	vn_seqc_write_begin_locked(vp);
 	VI_UNLOCK(vp);
 }
 
 void
 vn_seqc_write_end_locked(struct vnode *vp)
 {
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNPASS(vp->v_seqc_users > 0, vp);
 	vp->v_seqc_users--;
 	if (vp->v_seqc_users == 0)
 		seqc_sleepable_write_end(&vp->v_seqc);
 }
 
 void
 vn_seqc_write_end(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	vn_seqc_write_end_locked(vp);
 	VI_UNLOCK(vp);
 }
 
 /*
  * Special case handling for allocating and freeing vnodes.
  *
  * The counter remains unchanged on free so that a doomed vnode will
  * keep testing as in modify as long as it is accessible with SMR.
  */
 static void
 vn_seqc_init(struct vnode *vp)
 {
 
 	vp->v_seqc = 0;
 	vp->v_seqc_users = 0;
 }
 
 static void
 vn_seqc_write_end_free(struct vnode *vp)
 {
 
 	VNPASS(seqc_in_modify(vp->v_seqc), vp);
 	VNPASS(vp->v_seqc_users == 1, vp);
 }
 
 void
 vn_irflag_set_locked(struct vnode *vp, short toset)
 {
 	short flags;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	flags = vn_irflag_read(vp);
 	VNASSERT((flags & toset) == 0, vp,
 	    ("%s: some of the passed flags already set (have %d, passed %d)\n",
 	    __func__, flags, toset));
 	atomic_store_short(&vp->v_irflag, flags | toset);
 }
 
 void
 vn_irflag_set(struct vnode *vp, short toset)
 {
 
 	VI_LOCK(vp);
 	vn_irflag_set_locked(vp, toset);
 	VI_UNLOCK(vp);
 }
 
 void
 vn_irflag_set_cond_locked(struct vnode *vp, short toset)
 {
 	short flags;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	flags = vn_irflag_read(vp);
 	atomic_store_short(&vp->v_irflag, flags | toset);
 }
 
 void
 vn_irflag_set_cond(struct vnode *vp, short toset)
 {
 
 	VI_LOCK(vp);
 	vn_irflag_set_cond_locked(vp, toset);
 	VI_UNLOCK(vp);
 }
 
 void
 vn_irflag_unset_locked(struct vnode *vp, short tounset)
 {
 	short flags;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 	flags = vn_irflag_read(vp);
 	VNASSERT((flags & tounset) == tounset, vp,
 	    ("%s: some of the passed flags not set (have %d, passed %d)\n",
 	    __func__, flags, tounset));
 	atomic_store_short(&vp->v_irflag, flags & ~tounset);
 }
 
 void
 vn_irflag_unset(struct vnode *vp, short tounset)
 {
 
 	VI_LOCK(vp);
 	vn_irflag_unset_locked(vp, tounset);
 	VI_UNLOCK(vp);
 }
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 006ee24cac25..103f4dab519f 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1,5035 +1,5035 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #ifdef COMPAT_FREEBSD11
 #include <sys/abi_compat.h>
 #endif
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/capsicum.h>
 #include <sys/disk.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/namei.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/dirent.h>
 #include <sys/jail.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/stdarg.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 #include <fs/devfs/devfs.h>
 
 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
 
 static int kern_chflagsat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, u_long flags, int atflag);
 static int setfflags(struct thread *td, struct vnode *, u_long);
 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
 static int getutimens(const struct timespec *, enum uio_seg,
     struct timespec *, int *);
 static int setutimes(struct thread *td, struct vnode *,
     const struct timespec *, int, int);
 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
     struct thread *td);
 static int kern_fhlinkat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, fhandle_t *fhp);
 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg,
     size_t count, struct thread *td);
 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd,
     const char *path, enum uio_seg segflag);
 
 static uint64_t
 at2cnpflags(u_int at_flags, u_int mask)
 {
 	uint64_t res;
 
 	MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) !=
 	    (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW));
 
 	res = 0;
 	at_flags &= mask;
 	if ((at_flags & AT_RESOLVE_BENEATH) != 0)
 		res |= RBENEATH;
 	if ((at_flags & AT_SYMLINK_FOLLOW) != 0)
 		res |= FOLLOW;
 	/* NOFOLLOW is pseudo flag */
 	if ((mask & AT_SYMLINK_NOFOLLOW) != 0) {
 		res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW :
 		    FOLLOW;
 	}
 	if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0)
 		res |= EMPTYPATH;
 	return (res);
 }
 
 int
 kern_sync(struct thread *td)
 {
 	struct mount *mp, *nmp;
 	int save;
 
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			save = curthread_pflags_set(TDP_SYNCIO);
 			vfs_periodic(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT);
 			curthread_pflags_restore(save);
 			vn_finished_write(mp);
 		}
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp);
 	}
 	mtx_unlock(&mountlist_mtx);
 	return (0);
 }
 
 /*
  * Sync each mounted filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sync_args {
 	int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_sync(struct thread *td, struct sync_args *uap)
 {
 
 	return (kern_sync(td));
 }
 
 /*
  * Change filesystem quotas.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct quotactl_args {
 	char *path;
 	int cmd;
 	int uid;
 	caddr_t arg;
 };
 #endif
 int
 sys_quotactl(struct thread *td, struct quotactl_args *uap)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 	bool mp_busy;
 
 	AUDIT_ARG_CMD(uap->cmd);
 	AUDIT_ARG_UID(uap->uid);
 	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
 		return (EPERM);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 	    uap->path);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE_PNBUF(&nd);
 	mp = nd.ni_vp->v_mount;
 	vfs_ref(mp);
 	vput(nd.ni_vp);
 	error = vfs_busy(mp, 0);
 	if (error != 0) {
 		vfs_rel(mp);
 		return (error);
 	}
 	mp_busy = true;
 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy);
 
 	/*
 	 * Since quota on/off operations typically need to open quota
 	 * files, the implementation may need to unbusy the mount point
 	 * before calling into namei.  Otherwise, unmount might be
 	 * started between two vfs_busy() invocations (first is ours,
 	 * second is from mount point cross-walk code in lookup()),
 	 * causing deadlock.
 	 *
 	 * Avoid unbusying mp if the implementation indicates it has
 	 * already done so.
 	 */
 	if (mp_busy)
 		vfs_unbusy(mp);
 	vfs_rel(mp);
 	return (error);
 }
 
 /*
  * Used by statfs conversion routines to scale the block size up if
  * necessary so that all of the block counts are <= 'max_size'.  Note
  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  * value of 'n'.
  */
 void
 statfs_scale_blocks(struct statfs *sf, long max_size)
 {
 	uint64_t count;
 	int shift;
 
 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
 
 	/*
 	 * Attempt to scale the block counts to give a more accurate
 	 * overview to userland of the ratio of free space to used
 	 * space.  To do this, find the largest block count and compute
 	 * a divisor that lets it fit into a signed integer <= max_size.
 	 */
 	if (sf->f_bavail < 0)
 		count = -sf->f_bavail;
 	else
 		count = sf->f_bavail;
 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
 	if (count <= max_size)
 		return;
 
 	count >>= flsl(max_size);
 	shift = 0;
 	while (count > 0) {
 		shift++;
 		count >>=1;
 	}
 
 	sf->f_bsize <<= shift;
 	sf->f_blocks >>= shift;
 	sf->f_bfree >>= shift;
 	sf->f_bavail >>= shift;
 }
 
 static int
 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
 {
 	int error;
 
 	if (mp == NULL)
 		return (EBADF);
 	error = vfs_busy(mp, 0);
 	vfs_rel(mp);
 	if (error != 0)
 		return (error);
 #ifdef MAC
 	error = mac_mount_check_stat(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #endif
 	error = VFS_STATFS(mp, buf);
 	if (error != 0)
 		goto out;
 	if (priv_check_cred_vfs_generation(td->td_ucred)) {
 		buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
 		prison_enforce_statfs(td->td_ucred, mp, buf);
 	}
 out:
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct statfs_args {
 	char *path;
 	struct statfs *buf;
 };
 #endif
 int
 sys_statfs(struct thread *td, struct statfs_args *uap)
 {
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(struct statfs));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg,
     struct statfs *buf)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
+	NDFREE_PNBUF(&nd);
 	mp = vfs_ref_from_vp(nd.ni_vp);
-	NDFREE_NOTHING(&nd);
 	vrele(nd.ni_vp);
 	return (kern_do_statfs(td, mp, buf));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatfs_args {
 	int fd;
 	struct statfs *buf;
 };
 #endif
 int
 sys_fstatfs(struct thread *td, struct fstatfs_args *uap)
 {
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(struct statfs));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
 {
 	struct file *fp;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 #ifdef AUDIT
 	if (AUDITING_TD(td)) {
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		AUDIT_ARG_VNODE1(vp);
 		VOP_UNLOCK(vp);
 	}
 #endif
 	mp = vfs_ref_from_vp(vp);
 	fdrop(fp, td);
 	return (kern_do_statfs(td, mp, buf));
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfsstat_args {
 	struct statfs *buf;
 	long bufsize;
 	int mode;
 };
 #endif
 int
 sys_getfsstat(struct thread *td, struct getfsstat_args *uap)
 {
 	size_t count;
 	int error;
 
 	if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
 		return (EINVAL);
 	error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
 	    UIO_USERSPACE, uap->mode);
 	if (error == 0)
 		td->td_retval[0] = count;
 	return (error);
 }
 
 /*
  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  *	The caller is responsible for freeing memory which will be allocated
  *	in '*buf'.
  */
 int
 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
     size_t *countp, enum uio_seg bufseg, int mode)
 {
 	struct mount *mp, *nmp;
 	struct statfs *sfsp, *sp, *sptmp, *tofree;
 	size_t count, maxcount;
 	int error;
 
 	switch (mode) {
 	case MNT_WAIT:
 	case MNT_NOWAIT:
 		break;
 	default:
 		if (bufseg == UIO_SYSSPACE)
 			*buf = NULL;
 		return (EINVAL);
 	}
 restart:
 	maxcount = bufsize / sizeof(struct statfs);
 	if (bufsize == 0) {
 		sfsp = NULL;
 		tofree = NULL;
 	} else if (bufseg == UIO_USERSPACE) {
 		sfsp = *buf;
 		tofree = NULL;
 	} else /* if (bufseg == UIO_SYSSPACE) */ {
 		count = 0;
 		mtx_lock(&mountlist_mtx);
 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 			count++;
 		}
 		mtx_unlock(&mountlist_mtx);
 		if (maxcount > count)
 			maxcount = count;
 		tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs),
 		    M_STATFS, M_WAITOK);
 	}
 
 	count = 0;
 
 	/*
 	 * If there is no target buffer they only want the count.
 	 *
 	 * This could be TAILQ_FOREACH but it is open-coded to match the original
 	 * code below.
 	 */
 	if (sfsp == NULL) {
 		mtx_lock(&mountlist_mtx);
 		for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 			if (prison_canseemount(td->td_ucred, mp) != 0) {
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				continue;
 			}
 #ifdef MAC
 			if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				continue;
 			}
 #endif
 			count++;
 			nmp = TAILQ_NEXT(mp, mnt_list);
 		}
 		mtx_unlock(&mountlist_mtx);
 		*countp = count;
 		return (0);
 	}
 
 	/*
 	 * They want the entire thing.
 	 *
 	 * Short-circuit the corner case of no room for anything, avoids
 	 * relocking below.
 	 */
 	if (maxcount < 1) {
 		goto out;
 	}
 
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (prison_canseemount(td->td_ucred, mp) != 0) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 #ifdef MAC
 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 #endif
 		if (mode == MNT_WAIT) {
 			if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) {
 				/*
 				 * If vfs_busy() failed, and MBF_NOWAIT
 				 * wasn't passed, then the mp is gone.
 				 * Furthermore, because of MBF_MNTLSTLOCK,
 				 * the mountlist_mtx was dropped.  We have
 				 * no other choice than to start over.
 				 */
 				mtx_unlock(&mountlist_mtx);
 				free(tofree, M_STATFS);
 				goto restart;
 			}
 		} else {
 			if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) {
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				continue;
 			}
 		}
 		sp = &mp->mnt_stat;
 		/*
 		 * If MNT_NOWAIT is specified, do not refresh
 		 * the fsstat cache.
 		 */
 		if (mode != MNT_NOWAIT) {
 			error = VFS_STATFS(mp, sp);
 			if (error != 0) {
 				mtx_lock(&mountlist_mtx);
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				vfs_unbusy(mp);
 				continue;
 			}
 		}
 		if (priv_check_cred_vfs_generation(td->td_ucred)) {
 			sptmp = malloc(sizeof(struct statfs), M_STATFS,
 			    M_WAITOK);
 			*sptmp = *sp;
 			sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0;
 			prison_enforce_statfs(td->td_ucred, mp, sptmp);
 			sp = sptmp;
 		} else
 			sptmp = NULL;
 		if (bufseg == UIO_SYSSPACE) {
 			bcopy(sp, sfsp, sizeof(*sp));
 			free(sptmp, M_STATFS);
 		} else /* if (bufseg == UIO_USERSPACE) */ {
 			error = copyout(sp, sfsp, sizeof(*sp));
 			free(sptmp, M_STATFS);
 			if (error != 0) {
 				vfs_unbusy(mp);
 				return (error);
 			}
 		}
 		sfsp++;
 		count++;
 
 		if (count == maxcount) {
 			vfs_unbusy(mp);
 			goto out;
 		}
 
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp);
 	}
 	mtx_unlock(&mountlist_mtx);
 out:
 	*countp = count;
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD4
 /*
  * Get old format filesystem statistics.
  */
 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *);
 
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_statfs_args {
 	char *path;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
 	if (error == 0) {
 		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_fstatfs_args {
 	int fd;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sfp);
 	if (error == 0) {
 		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_getfsstat_args {
 	struct ostatfs *buf;
 	long bufsize;
 	int mode;
 };
 #endif
 int
 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap)
 {
 	struct statfs *buf, *sp;
 	struct ostatfs osb;
 	size_t count, size;
 	int error;
 
 	if (uap->bufsize < 0)
 		return (EINVAL);
 	count = uap->bufsize / sizeof(struct ostatfs);
 	if (count > SIZE_MAX / sizeof(struct statfs))
 		return (EINVAL);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
 	    uap->mode);
 	if (error == 0)
 		td->td_retval[0] = count;
 	if (size != 0) {
 		sp = buf;
 		while (count != 0 && error == 0) {
 			freebsd4_cvtstatfs(sp, &osb);
 			error = copyout(&osb, uap->buf, sizeof(osb));
 			sp++;
 			uap->buf++;
 			count--;
 		}
 		free(buf, M_STATFS);
 	}
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_fhstatfs_args {
 	struct fhandle *u_fhp;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	fhandle_t fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error != 0)
 		return (error);
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sfp);
 	if (error == 0) {
 		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Convert a new format statfs structure to an old format statfs structure.
  */
 static void
 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp)
 {
 
 	statfs_scale_blocks(nsp, LONG_MAX);
 	bzero(osp, sizeof(*osp));
 	osp->f_bsize = nsp->f_bsize;
 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
 	osp->f_blocks = nsp->f_blocks;
 	osp->f_bfree = nsp->f_bfree;
 	osp->f_bavail = nsp->f_bavail;
 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
 	osp->f_owner = nsp->f_owner;
 	osp->f_type = nsp->f_type;
 	osp->f_flags = nsp->f_flags;
 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
 	    MIN(MFSNAMELEN, OMFSNAMELEN));
 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
 	    MIN(MNAMELEN, OMNAMELEN));
 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
 	    MIN(MNAMELEN, OMNAMELEN));
 	osp->f_fsid = nsp->f_fsid;
 }
 #endif /* COMPAT_FREEBSD4 */
 
 #if defined(COMPAT_FREEBSD11)
 /*
  * Get old format filesystem statistics.
  */
 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *);
 
 int
 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap)
 {
 	struct freebsd11_statfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
 	if (error == 0) {
 		freebsd11_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 int
 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap)
 {
 	struct freebsd11_statfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sfp);
 	if (error == 0) {
 		freebsd11_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get statistics on all filesystems.
  */
 int
 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap)
 {
 	return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode));
 }
 
 int
 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf,
     long bufsize, int mode)
 {
 	struct freebsd11_statfs osb;
 	struct statfs *buf, *sp;
 	size_t count, size;
 	int error;
 
 	if (bufsize < 0)
 		return (EINVAL);
 
 	count = bufsize / sizeof(struct ostatfs);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode);
 	if (error == 0)
 		td->td_retval[0] = count;
 	if (size > 0) {
 		sp = buf;
 		while (count > 0 && error == 0) {
 			freebsd11_cvtstatfs(sp, &osb);
 			error = copyout(&osb, ubuf, sizeof(osb));
 			sp++;
 			ubuf++;
 			count--;
 		}
 		free(buf, M_STATFS);
 	}
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 int
 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap)
 {
 	struct freebsd11_statfs osb;
 	struct statfs *sfp;
 	fhandle_t fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error)
 		return (error);
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sfp);
 	if (error == 0) {
 		freebsd11_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Convert a new format statfs structure to an old format statfs structure.
  */
 static void
 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp)
 {
 
 	bzero(osp, sizeof(*osp));
 	osp->f_version = FREEBSD11_STATFS_VERSION;
 	osp->f_type = nsp->f_type;
 	osp->f_flags = nsp->f_flags;
 	osp->f_bsize = nsp->f_bsize;
 	osp->f_iosize = nsp->f_iosize;
 	osp->f_blocks = nsp->f_blocks;
 	osp->f_bfree = nsp->f_bfree;
 	osp->f_bavail = nsp->f_bavail;
 	osp->f_files = nsp->f_files;
 	osp->f_ffree = nsp->f_ffree;
 	osp->f_syncwrites = nsp->f_syncwrites;
 	osp->f_asyncwrites = nsp->f_asyncwrites;
 	osp->f_syncreads = nsp->f_syncreads;
 	osp->f_asyncreads = nsp->f_asyncreads;
 	osp->f_namemax = nsp->f_namemax;
 	osp->f_owner = nsp->f_owner;
 	osp->f_fsid = nsp->f_fsid;
 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
 	    MIN(MFSNAMELEN, sizeof(osp->f_fstypename)));
 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
 	    MIN(MNAMELEN, sizeof(osp->f_mntonname)));
 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
 	    MIN(MNAMELEN, sizeof(osp->f_mntfromname)));
 }
 #endif /* COMPAT_FREEBSD11 */
 
 /*
  * Change current working directory to a given file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchdir_args {
 	int	fd;
 };
 #endif
 int
 sys_fchdir(struct thread *td, struct fchdir_args *uap)
 {
 	struct vnode *vp, *tdp;
 	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
 	    &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 	vrefact(vp);
 	fdrop(fp, td);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	error = change_dir(vp, td);
 	while (!error && (mp = vp->v_mountedhere) != NULL) {
 		if (vfs_busy(mp, 0))
 			continue;
 		error = VFS_ROOT(mp, LK_SHARED, &tdp);
 		vfs_unbusy(mp);
 		if (error != 0)
 			break;
 		vput(vp);
 		vp = tdp;
 	}
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp);
 	pwd_chdir(td, vp);
 	return (0);
 }
 
 /*
  * Change current working directory (``.'').
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chdir_args {
 	char	*path;
 };
 #endif
 int
 sys_chdir(struct thread *td, struct chdir_args *uap)
 {
 
 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg)
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
 		vput(nd.ni_vp);
-		NDFREE_NOTHING(&nd);
+		NDFREE_PNBUF(&nd);
 		return (error);
 	}
 	VOP_UNLOCK(nd.ni_vp);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	pwd_chdir(td, nd.ni_vp);
 	return (0);
 }
 
 static int unprivileged_chroot = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW,
     &unprivileged_chroot, 0,
     "Unprivileged processes can use chroot(2)");
 /*
  * Change notion of root (``/'') directory.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chroot_args {
 	char	*path;
 };
 #endif
 int
 sys_chroot(struct thread *td, struct chroot_args *uap)
 {
 	struct nameidata nd;
 	struct proc *p;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_CHROOT);
 	if (error != 0) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 		if (unprivileged_chroot == 0 ||
 		    (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) {
 			PROC_UNLOCK(p);
 			return (error);
 		}
 		PROC_UNLOCK(p);
 	}
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    UIO_USERSPACE, uap->path);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	error = change_dir(nd.ni_vp, td);
 	if (error != 0)
 		goto e_vunlock;
 #ifdef MAC
 	error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
 	if (error != 0)
 		goto e_vunlock;
 #endif
 	VOP_UNLOCK(nd.ni_vp);
 	error = pwd_chroot(td, nd.ni_vp);
 	vrele(nd.ni_vp);
 	return (error);
 e_vunlock:
 	vput(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  * instance.
  */
 int
 change_dir(struct vnode *vp, struct thread *td)
 {
 #ifdef MAC
 	int error;
 #endif
 
 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 #ifdef MAC
 	error = mac_vnode_check_chdir(td->td_ucred, vp);
 	if (error != 0)
 		return (error);
 #endif
 	return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
 }
 
 static __inline void
 flags_to_rights(int flags, cap_rights_t *rightsp)
 {
 	if (flags & O_EXEC) {
 		cap_rights_set_one(rightsp, CAP_FEXECVE);
 		if (flags & O_PATH)
 			return;
 	} else {
 		switch ((flags & O_ACCMODE)) {
 		case O_RDONLY:
 			cap_rights_set_one(rightsp, CAP_READ);
 			break;
 		case O_RDWR:
 			cap_rights_set_one(rightsp, CAP_READ);
 			/* FALLTHROUGH */
 		case O_WRONLY:
 			cap_rights_set_one(rightsp, CAP_WRITE);
 			if (!(flags & (O_APPEND | O_TRUNC)))
 				cap_rights_set_one(rightsp, CAP_SEEK);
 			break;
 		}
 	}
 
 	if (flags & O_CREAT)
 		cap_rights_set_one(rightsp, CAP_CREATE);
 
 	if (flags & O_TRUNC)
 		cap_rights_set_one(rightsp, CAP_FTRUNCATE);
 
 	if (flags & (O_SYNC | O_FSYNC))
 		cap_rights_set_one(rightsp, CAP_FSYNC);
 
 	if (flags & (O_EXLOCK | O_SHLOCK))
 		cap_rights_set_one(rightsp, CAP_FLOCK);
 }
 
 /*
  * Check permissions, allocate an open file structure, and call the device
  * open routine if any.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct open_args {
 	char	*path;
 	int	flags;
 	int	mode;
 };
 #endif
 int
 sys_open(struct thread *td, struct open_args *uap)
 {
 
 	return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct openat_args {
 	int	fd;
 	char	*path;
 	int	flag;
 	int	mode;
 };
 #endif
 int
 sys_openat(struct thread *td, struct openat_args *uap)
 {
 
 	AUDIT_ARG_FD(uap->fd);
 	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 	    uap->mode));
 }
 
 int
 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
     int flags, int mode)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp;
 	struct pwddesc *pdp;
 	struct file *fp;
 	struct vnode *vp;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int cmode, error, indx;
 
 	indx = -1;
 	fdp = p->p_fd;
 	pdp = p->p_pd;
 
 	AUDIT_ARG_FFLAGS(flags);
 	AUDIT_ARG_MODE(mode);
 	cap_rights_init_one(&rights, CAP_LOOKUP);
 	flags_to_rights(flags, &rights);
 
 	/*
 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 	 * may be specified.  On the other hand, for O_PATH any mode
 	 * except O_EXEC is ignored.
 	 */
 	if ((flags & O_PATH) != 0) {
 		flags &= ~(O_CREAT | O_ACCMODE);
 	} else if ((flags & O_EXEC) != 0) {
 		if (flags & O_ACCMODE)
 			return (EINVAL);
 	} else if ((flags & O_ACCMODE) == O_ACCMODE) {
 		return (EINVAL);
 	} else {
 		flags = FFLAGS(flags);
 	}
 
 	/*
 	 * Allocate a file structure. The descriptor to reference it
 	 * is allocated and used by finstall_refed() below.
 	 */
 	error = falloc_noinstall(td, &fp);
 	if (error != 0)
 		return (error);
 	/* Set the flags early so the finit in devfs can pick them up. */
 	fp->f_flag = flags & FMASK;
 	cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS,
 	    pathseg, path, fd, &rights);
 	td->td_dupfd = -1;		/* XXX check for fdopen */
 	error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS,
 	    td->td_ucred, fp);
 	if (error != 0) {
 		/*
 		 * If the vn_open replaced the method vector, something
 		 * wonderous happened deep below and we just pass it up
 		 * pretending we know what we do.
 		 */
 		if (error == ENXIO && fp->f_ops != &badfileops) {
 			MPASS((flags & O_PATH) == 0);
 			goto success;
 		}
 
 		/*
 		 * Handle special fdopen() case. bleh.
 		 *
 		 * Don't do this for relative (capability) lookups; we don't
 		 * understand exactly what would happen, and we don't think
 		 * that it ever should.
 		 */
 		if ((nd.ni_resflags & NIRES_STRICTREL) == 0 &&
 		    (error == ENODEV || error == ENXIO) &&
 		    td->td_dupfd >= 0) {
 			error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 			    &indx);
 			if (error == 0)
 				goto success;
 		}
 
 		goto bad;
 	}
 	td->td_dupfd = 0;
 	NDFREE_PNBUF(&nd);
 	vp = nd.ni_vp;
 
 	/*
 	 * Store the vnode, for any f_type. Typically, the vnode use
 	 * count is decremented by direct call to vn_closefile() for
 	 * files that switched type in the cdevsw fdopen() method.
 	 */
 	fp->f_vnode = vp;
 
 	/*
 	 * If the file wasn't claimed by devfs bind it to the normal
 	 * vnode operations here.
 	 */
 	if (fp->f_ops == &badfileops) {
 		KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0,
 		    ("Unexpected fifo fp %p vp %p", fp, vp));
 		if ((flags & O_PATH) != 0) {
 			finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED),
 			    DTYPE_VNODE, NULL, &path_fileops);
 		} else {
 			finit_vnode(fp, flags, NULL, &vnops);
 		}
 	}
 
 	VOP_UNLOCK(vp);
 	if (flags & O_TRUNC) {
 		error = fo_truncate(fp, 0, td->td_ucred, td);
 		if (error != 0)
 			goto bad;
 	}
 success:
 	/*
 	 * If we haven't already installed the FD (for dupfdopen), do so now.
 	 */
 	if (indx == -1) {
 		struct filecaps *fcaps;
 
 #ifdef CAPABILITIES
 		if ((nd.ni_resflags & NIRES_STRICTREL) != 0)
 			fcaps = &nd.ni_filecaps;
 		else
 #endif
 			fcaps = NULL;
 		error = finstall_refed(td, fp, &indx, flags, fcaps);
 		/* On success finstall_refed() consumes fcaps. */
 		if (error != 0) {
 			goto bad;
 		}
 	} else {
 		NDFREE_IOCTLCAPS(&nd);
 		falloc_abort(td, fp);
 	}
 
 	td->td_retval[0] = indx;
 	return (0);
 bad:
 	KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 	NDFREE_IOCTLCAPS(&nd);
 	falloc_abort(td, fp);
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Create a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ocreat_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 ocreat(struct thread *td, struct ocreat_args *uap)
 {
 
 	return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 }
 #endif /* COMPAT_43 */
 
 /*
  * Create a special file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mknodat_args {
 	int	fd;
 	char	*path;
 	mode_t	mode;
 	dev_t	dev;
 };
 #endif
 int
 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 {
 
 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 	    uap->dev));
 }
 
 #if defined(COMPAT_FREEBSD11)
 int
 freebsd11_mknod(struct thread *td,
     struct freebsd11_mknod_args *uap)
 {
 
 	return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, uap->dev));
 }
 
 int
 freebsd11_mknodat(struct thread *td,
     struct freebsd11_mknodat_args *uap)
 {
 
 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 	    uap->dev));
 }
 #endif /* COMPAT_FREEBSD11 */
 
 int
 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
     int mode, dev_t dev)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error, whiteout = 0;
 
 	AUDIT_ARG_MODE(mode);
 	AUDIT_ARG_DEV(dev);
 	switch (mode & S_IFMT) {
 	case S_IFCHR:
 	case S_IFBLK:
 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 		if (error == 0 && dev == VNOVAL)
 			error = EINVAL;
 		break;
 	case S_IFWHT:
 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 		break;
 	case S_IFIFO:
 		if (dev == 0)
 			return (kern_mkfifoat(td, fd, path, pathseg, mode));
 		/* FALLTHROUGH */
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (error != 0)
 		return (error);
 	NDPREINIT(&nd);
 restart:
 	bwillwrite();
-	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
-	    NOCACHE, pathseg, path, fd, &cap_mknodat_rights);
+	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE,
+	    pathseg, path, fd, &cap_mknodat_rights);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE_PNBUF(&nd);
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(vp);
 		return (EEXIST);
 	} else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (mode & ALLPERMS) &
 		    ~td->td_proc->p_pd->pd_cmask;
 		vattr.va_rdev = dev;
 		whiteout = 0;
 
 		switch (mode & S_IFMT) {
 		case S_IFCHR:
 			vattr.va_type = VCHR;
 			break;
 		case S_IFBLK:
 			vattr.va_type = VBLK;
 			break;
 		case S_IFWHT:
 			whiteout = 1;
 			break;
 		default:
 			panic("kern_mknod: invalid mode");
 		}
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 #ifdef MAC
 	if (error == 0 && !whiteout)
 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 		    &nd.ni_cnd, &vattr);
 #endif
 	if (error == 0) {
 		if (whiteout)
 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 		else {
 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 						&nd.ni_cnd, &vattr);
 		}
 	}
 	VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL,
 	    true);
 	vn_finished_write(mp);
 	NDFREE_PNBUF(&nd);
 	if (error == ERELOOKUP)
 		goto restart;
 	return (error);
 }
 
 /*
  * Create a named pipe.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifo_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_mkfifo(struct thread *td, struct mkfifo_args *uap)
 {
 
 	return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifoat_args {
 	int	fd;
 	char	*path;
 	mode_t	mode;
 };
 #endif
 int
 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 {
 
 	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 int
 kern_mkfifoat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	AUDIT_ARG_MODE(mode);
 	NDPREINIT(&nd);
 restart:
 	bwillwrite();
-	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
-	    NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights);
+	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE,
+	    pathseg, path, fd, &cap_mkfifoat_rights);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE_PNBUF(&nd);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask;
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 #ifdef MAC
 out:
 #endif
 	VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 	vn_finished_write(mp);
 	NDFREE_PNBUF(&nd);
 	if (error == ERELOOKUP)
 		goto restart;
 	return (error);
 }
 
 /*
  * Make a hard file link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct link_args {
 	char	*path;
 	char	*link;
 };
 #endif
 int
 sys_link(struct thread *td, struct link_args *uap)
 {
 
 	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
 	    UIO_USERSPACE, AT_SYMLINK_FOLLOW));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct linkat_args {
 	int	fd1;
 	char	*path1;
 	int	fd2;
 	char	*path2;
 	int	flag;
 };
 #endif
 int
 sys_linkat(struct thread *td, struct linkat_args *uap)
 {
 
 	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 	    UIO_USERSPACE, uap->flag));
 }
 
 int hardlink_check_uid = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
     &hardlink_check_uid, 0,
     "Unprivileged processes cannot create hard links to files owned by other "
     "users");
 static int hardlink_check_gid = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
     &hardlink_check_gid, 0,
     "Unprivileged processes cannot create hard links to files owned by other "
     "groups");
 
 static int
 can_hardlink(struct vnode *vp, struct ucred *cred)
 {
 	struct vattr va;
 	int error;
 
 	if (!hardlink_check_uid && !hardlink_check_gid)
 		return (0);
 
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error != 0)
 		return (error);
 
 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 		error = priv_check_cred(cred, PRIV_VFS_LINK);
 		if (error != 0)
 			return (error);
 	}
 
 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 		error = priv_check_cred(cred, PRIV_VFS_LINK);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 int
 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1,
     const char *path2, enum uio_seg segflag, int flag)
 {
 	struct nameidata nd;
 	int error;
 
 	if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH |
 	    AT_EMPTY_PATH)) != 0)
 		return (EINVAL);
 
 	NDPREINIT(&nd);
 	do {
 		bwillwrite();
 		NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag,
 		    AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH),
 		    segflag, path1, fd1, &cap_linkat_source_rights);
 		if ((error = namei(&nd)) != 0)
 			return (error);
 		NDFREE_PNBUF(&nd);
 		if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) {
 			error = priv_check(td, PRIV_VFS_FHOPEN);
 			if (error != 0) {
 				vrele(nd.ni_vp);
 				return (error);
 			}
 		}
 		error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
 	} while (error ==  EAGAIN || error == ERELOOKUP);
 	return (error);
 }
 
 static int
 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path,
     enum uio_seg segflag)
 {
 	struct nameidata nd;
 	struct mount *mp;
 	int error;
 
 	if (vp->v_type == VDIR) {
 		vrele(vp);
 		return (EPERM);		/* POSIX */
 	}
 	NDINIT_ATRIGHTS(&nd, CREATE,
-	    LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd,
+	    LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd,
 	    &cap_linkat_target_rights);
 	if ((error = namei(&nd)) == 0) {
 		if (nd.ni_vp != NULL) {
 			NDFREE_PNBUF(&nd);
 			if (nd.ni_dvp == nd.ni_vp)
 				vrele(nd.ni_dvp);
 			else
 				vput(nd.ni_dvp);
 			vrele(nd.ni_vp);
 			vrele(vp);
 			return (EEXIST);
 		} else if (nd.ni_dvp->v_mount != vp->v_mount) {
 			/*
 			 * Cross-device link.  No need to recheck
 			 * vp->v_type, since it cannot change, except
 			 * to VBAD.
 			 */
 			NDFREE_PNBUF(&nd);
 			vput(nd.ni_dvp);
 			vrele(vp);
 			return (EXDEV);
 		} else if (vn_lock(vp, LK_EXCLUSIVE) == 0) {
 			error = can_hardlink(vp, td->td_ucred);
 #ifdef MAC
 			if (error == 0)
 				error = mac_vnode_check_link(td->td_ucred,
 				    nd.ni_dvp, vp, &nd.ni_cnd);
 #endif
 			if (error != 0) {
 				vput(vp);
 				vput(nd.ni_dvp);
 				NDFREE_PNBUF(&nd);
 				return (error);
 			}
 			error = vn_start_write(vp, &mp, V_NOWAIT);
 			if (error != 0) {
 				vput(vp);
 				vput(nd.ni_dvp);
 				NDFREE_PNBUF(&nd);
 				error = vn_start_write(NULL, &mp,
 				    V_XSLEEP | PCATCH);
 				if (error != 0)
 					return (error);
 				return (EAGAIN);
 			}
 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 			VOP_VPUT_PAIR(nd.ni_dvp, &vp, true);
 			vn_finished_write(mp);
 			NDFREE_PNBUF(&nd);
 			vp = NULL;
 		} else {
 			vput(nd.ni_dvp);
 			NDFREE_PNBUF(&nd);
 			vrele(vp);
 			return (EAGAIN);
 		}
 	}
 	if (vp != NULL)
 		vrele(vp);
 	return (error);
 }
 
 /*
  * Make a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct symlink_args {
 	char	*path;
 	char	*link;
 };
 #endif
 int
 sys_symlink(struct thread *td, struct symlink_args *uap)
 {
 
 	return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
 	    UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct symlinkat_args {
 	char	*path;
 	int	fd;
 	char	*path2;
 };
 #endif
 int
 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 {
 
 	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 	    UIO_USERSPACE));
 }
 
 int
 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2,
     enum uio_seg segflg)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	const char *syspath;
 	char *tmppath;
 	struct nameidata nd;
 	int error;
 
 	if (segflg == UIO_SYSSPACE) {
 		syspath = path1;
 	} else {
 		tmppath = uma_zalloc(namei_zone, M_WAITOK);
 		if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0)
 			goto out;
 		syspath = tmppath;
 	}
 	AUDIT_ARG_TEXT(syspath);
 	NDPREINIT(&nd);
 restart:
 	bwillwrite();
-	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
-	    NOCACHE, segflg, path2, fd, &cap_symlinkat_rights);
+	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg,
+	    path2, fd, &cap_symlinkat_rights);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE_PNBUF(&nd);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		nd.ni_vp = NULL;
 		error = EEXIST;
 		goto out;
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			goto out;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask;
 #ifdef MAC
 	vattr.va_type = VLNK;
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out2;
 #endif
 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 #ifdef MAC
 out2:
 #endif
 	VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 	vn_finished_write(mp);
 	NDFREE_PNBUF(&nd);
 	if (error == ERELOOKUP)
 		goto restart;
 out:
 	if (segflg != UIO_SYSSPACE)
 		uma_zfree(namei_zone, tmppath);
 	return (error);
 }
 
 /*
  * Delete a whiteout from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct undelete_args {
 	char *path;
 };
 #endif
 int
 sys_undelete(struct thread *td, struct undelete_args *uap)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 	NDPREINIT(&nd);
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 	    UIO_USERSPACE, uap->path);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE_PNBUF(&nd);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE_PNBUF(&nd);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	if (error == ERELOOKUP)
 		goto restart;
 	return (error);
 }
 
 /*
  * Delete a name from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unlink_args {
 	char	*path;
 };
 #endif
 int
 sys_unlink(struct thread *td, struct unlink_args *uap)
 {
 
 	return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE,
 	    0, 0));
 }
 
 static int
 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd,
     int flag, enum uio_seg pathseg, ino_t oldinum)
 {
 
 	if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0)
 		return (EINVAL);
 
 	if ((flag & AT_REMOVEDIR) != 0)
 		return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0));
 
 	return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct unlinkat_args {
 	int	fd;
 	char	*path;
 	int	flag;
 };
 #endif
 int
 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 {
 
 	return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag,
 	    UIO_USERSPACE, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct funlinkat_args {
 	int		dfd;
 	const char	*path;
 	int		fd;
 	int		flag;
 };
 #endif
 int
 sys_funlinkat(struct thread *td, struct funlinkat_args *uap)
 {
 
 	return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag,
 	    UIO_USERSPACE, 0));
 }
 
 int
 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
     enum uio_seg pathseg, int flag, ino_t oldinum)
 {
 	struct mount *mp;
 	struct file *fp;
 	struct vnode *vp;
 	struct nameidata nd;
 	struct stat sb;
 	int error;
 
 	fp = NULL;
 	if (fd != FD_NONE) {
 		error = getvnode_path(td, fd, &cap_no_rights, &fp);
 		if (error != 0)
 			return (error);
 	}
 
 	NDPREINIT(&nd);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 	    at2cnpflags(flag, AT_RESOLVE_BENEATH),
 	    pathseg, path, dfd, &cap_unlinkat_rights);
 	if ((error = namei(&nd)) != 0) {
 		if (error == EINVAL)
 			error = EPERM;
 		goto fdout;
 	}
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR && oldinum == 0) {
 		error = EPERM;		/* POSIX */
 	} else if (oldinum != 0 &&
 	    ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) &&
 	    sb.st_ino != oldinum) {
 		error = EIDRM;	/* Identifier removed */
 	} else if (fp != NULL && fp->f_vnode != vp) {
 		if (VN_IS_DOOMED(fp->f_vnode))
 			error = EBADF;
 		else
 			error = EDEADLK;
 	} else {
 		/*
 		 * The root of a mounted filesystem cannot be deleted.
 		 *
 		 * XXX: can this only be a VDIR case?
 		 */
 		if (vp->v_vflag & VV_ROOT)
 			error = EBUSY;
 	}
 	if (error == 0) {
 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 			NDFREE_PNBUF(&nd);
 			vput(nd.ni_dvp);
 			if (vp == nd.ni_dvp)
 				vrele(vp);
 			else
 				vput(vp);
 			if ((error = vn_start_write(NULL, &mp,
 			    V_XSLEEP | PCATCH)) != 0) {
 				goto fdout;
 			}
 			goto restart;
 		}
 #ifdef MAC
 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 		    &nd.ni_cnd);
 		if (error != 0)
 			goto out;
 #endif
 		vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 #ifdef MAC
 out:
 #endif
 		vn_finished_write(mp);
 	}
 	NDFREE_PNBUF(&nd);
 	vput(nd.ni_dvp);
 	if (vp == nd.ni_dvp)
 		vrele(vp);
 	else
 		vput(vp);
 	if (error == ERELOOKUP)
 		goto restart;
 fdout:
 	if (fp != NULL)
 		fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lseek_args {
 	int	fd;
 	int	pad;
 	off_t	offset;
 	int	whence;
 };
 #endif
 int
 sys_lseek(struct thread *td, struct lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 
 int
 kern_lseek(struct thread *td, int fd, off_t offset, int whence)
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = fget(td, fd, &cap_seek_rights, &fp);
 	if (error != 0)
 		return (error);
 	error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 	    fo_seek(fp, offset, whence, td) : ESPIPE;
 	fdrop(fp, td);
 	return (error);
 }
 
 #if defined(COMPAT_43)
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olseek_args {
 	int	fd;
 	long	offset;
 	int	whence;
 };
 #endif
 int
 olseek(struct thread *td, struct olseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_FREEBSD6)
 /* Version with the 'pad' argument */
 int
 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif
 
 /*
  * Check access permissions using passed credentials.
  */
 static int
 vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
      struct thread *td)
 {
 	accmode_t accmode;
 	int error;
 
 	/* Flags == 0 means only check for existence. */
 	if (user_flags == 0)
 		return (0);
 
 	accmode = 0;
 	if (user_flags & R_OK)
 		accmode |= VREAD;
 	if (user_flags & W_OK)
 		accmode |= VWRITE;
 	if (user_flags & X_OK)
 		accmode |= VEXEC;
 #ifdef MAC
 	error = mac_vnode_check_access(cred, vp, accmode);
 	if (error != 0)
 		return (error);
 #endif
 	if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 		error = VOP_ACCESS(vp, accmode, cred, td);
 	return (error);
 }
 
 /*
  * Check access permissions using "real" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct access_args {
 	char	*path;
 	int	amode;
 };
 #endif
 int
 sys_access(struct thread *td, struct access_args *uap)
 {
 
 	return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    0, uap->amode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct faccessat_args {
 	int	dirfd;
 	char	*path;
 	int	amode;
 	int	flag;
 }
 #endif
 int
 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 {
 
 	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 	    uap->amode));
 }
 
 int
 kern_accessat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, int flag, int amode)
 {
 	struct ucred *cred, *usecred;
 	struct vnode *vp;
 	struct nameidata nd;
 	int error;
 
 	if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH)) != 0)
 		return (EINVAL);
 	if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
 		return (EINVAL);
 
 	/*
 	 * Create and modify a temporary credential instead of one that
 	 * is potentially shared (if we need one).
 	 */
 	cred = td->td_ucred;
 	if ((flag & AT_EACCESS) == 0 &&
 	    ((cred->cr_uid != cred->cr_ruid ||
 	    cred->cr_rgid != cred->cr_groups[0]))) {
 		usecred = crdup(cred);
 		usecred->cr_uid = cred->cr_ruid;
 		usecred->cr_groups[0] = cred->cr_rgid;
 		td->td_ucred = usecred;
 	} else
 		usecred = cred;
 	AUDIT_ARG_VALUE(amode);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 	    AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH |
 	    AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	vp = nd.ni_vp;
 
 	error = vn_access(vp, amode, usecred, td);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	vput(vp);
 out:
 	if (usecred != cred) {
 		td->td_ucred = cred;
 		crfree(usecred);
 	}
 	return (error);
 }
 
 /*
  * Check access permissions using "effective" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct eaccess_args {
 	char	*path;
 	int	amode;
 };
 #endif
 int
 sys_eaccess(struct thread *td, struct eaccess_args *uap)
 {
 
 	return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    AT_EACCESS, uap->amode));
 }
 
 #if defined(COMPAT_43)
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ostat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 int
 ostat(struct thread *td, struct ostat_args *uap)
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	cvtstat(&sb, &osb);
 	return (copyout(&osb, uap->ub, sizeof (osb)));
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olstat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 int
 olstat(struct thread *td, struct olstat_args *uap)
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	cvtstat(&sb, &osb);
 	return (copyout(&osb, uap->ub, sizeof (osb)));
 }
 
 /*
  * Convert from an old to a new stat structure.
  * XXX: many values are blindly truncated.
  */
 void
 cvtstat(struct stat *st, struct ostat *ost)
 {
 
 	bzero(ost, sizeof(*ost));
 	ost->st_dev = st->st_dev;
 	ost->st_ino = st->st_ino;
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	ost->st_size = MIN(st->st_size, INT32_MAX);
 	ost->st_atim = st->st_atim;
 	ost->st_mtim = st->st_mtim;
 	ost->st_ctim = st->st_ctim;
 	ost->st_blksize = st->st_blksize;
 	ost->st_blocks = st->st_blocks;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 int ino64_trunc_error;
 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW,
     &ino64_trunc_error, 0,
     "Error on truncation of device, file or inode number, or link count");
 
 int
 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost)
 {
 
 	ost->st_dev = st->st_dev;
 	if (ost->st_dev != st->st_dev) {
 		switch (ino64_trunc_error) {
 		default:
 			/*
 			 * Since dev_t is almost raw, don't clamp to the
 			 * maximum for case 2, but ignore the error.
 			 */
 			break;
 		case 1:
 			return (EOVERFLOW);
 		}
 	}
 	ost->st_ino = st->st_ino;
 	if (ost->st_ino != st->st_ino) {
 		switch (ino64_trunc_error) {
 		default:
 		case 0:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		case 2:
 			ost->st_ino = UINT32_MAX;
 			break;
 		}
 	}
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	if (ost->st_nlink != st->st_nlink) {
 		switch (ino64_trunc_error) {
 		default:
 		case 0:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		case 2:
 			ost->st_nlink = UINT16_MAX;
 			break;
 		}
 	}
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	if (ost->st_rdev != st->st_rdev) {
 		switch (ino64_trunc_error) {
 		default:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		}
 	}
 	ost->st_atim = st->st_atim;
 	ost->st_mtim = st->st_mtim;
 	ost->st_ctim = st->st_ctim;
 	ost->st_size = st->st_size;
 	ost->st_blocks = st->st_blocks;
 	ost->st_blksize = st->st_blksize;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 	ost->st_lspare = 0;
 	ost->st_birthtim = st->st_birthtim;
 	bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim),
 	    sizeof(*ost) - offsetof(struct freebsd11_stat,
 	    st_birthtim) - sizeof(ost->st_birthtim));
 	return (0);
 }
 
 int
 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap)
 {
 	struct stat sb;
 	struct freebsd11_stat osb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat(&sb, &osb);
 	if (error == 0)
 		error = copyout(&osb, uap->ub, sizeof(osb));
 	return (error);
 }
 
 int
 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap)
 {
 	struct stat sb;
 	struct freebsd11_stat osb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat(&sb, &osb);
 	if (error == 0)
 		error = copyout(&osb, uap->ub, sizeof(osb));
 	return (error);
 }
 
 int
 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap)
 {
 	struct fhandle fh;
 	struct stat sb;
 	struct freebsd11_stat osb;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error != 0)
 		return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat(&sb, &osb);
 	if (error == 0)
 		error = copyout(&osb, uap->sb, sizeof(osb));
 	return (error);
 }
 
 int
 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap)
 {
 	struct stat sb;
 	struct freebsd11_stat osb;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat(&sb, &osb);
 	if (error == 0)
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	return (error);
 }
 #endif	/* COMPAT_FREEBSD11 */
 
 /*
  * Get file status
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatat_args {
 	int	fd;
 	char	*path;
 	struct stat	*buf;
 	int	flag;
 }
 #endif
 int
 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 {
 	struct stat sb;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error == 0)
 		error = copyout(&sb, uap->buf, sizeof (sb));
 	return (error);
 }
 
 int
 kern_statat(struct thread *td, int flag, int fd, const char *path,
     enum uio_seg pathseg, struct stat *sbp,
     void (*hook)(struct vnode *vp, struct stat *sbp))
 {
 	struct nameidata nd;
 	int error;
 
 	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 	    AT_EMPTY_PATH)) != 0)
 		return (EINVAL);
 
 	NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH |
 	    AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF |
 	    AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights);
 
 	if ((error = namei(&nd)) != 0) {
 		if (error == ENOTDIR &&
 		    (nd.ni_resflags & NIRES_EMPTYPATH) != 0)
 			error = kern_fstat(td, fd, sbp);
 		return (error);
 	}
 	error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED);
 	if (__predict_false(hook != NULL)) {
 		if (error == 0) {
 			hook(nd.ni_vp, sbp);
 		}
 	}
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	vput(nd.ni_vp);
 #ifdef __STAT_TIME_T_EXT
 	sbp->st_atim_ext = 0;
 	sbp->st_mtim_ext = 0;
 	sbp->st_ctim_ext = 0;
 	sbp->st_btim_ext = 0;
 #endif
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrstat_error(sbp, error);
 #endif
 	return (error);
 }
 
 #if defined(COMPAT_FREEBSD11)
 /*
  * Implementation of the NetBSD [l]stat() functions.
  */
 int
 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb)
 {
 	struct freebsd11_stat sb11;
 	int error;
 
 	error = freebsd11_cvtstat(sb, &sb11);
 	if (error != 0)
 		return (error);
 
 	bzero(nsb, sizeof(*nsb));
 	CP(sb11, *nsb, st_dev);
 	CP(sb11, *nsb, st_ino);
 	CP(sb11, *nsb, st_mode);
 	CP(sb11, *nsb, st_nlink);
 	CP(sb11, *nsb, st_uid);
 	CP(sb11, *nsb, st_gid);
 	CP(sb11, *nsb, st_rdev);
 	CP(sb11, *nsb, st_atim);
 	CP(sb11, *nsb, st_mtim);
 	CP(sb11, *nsb, st_ctim);
 	CP(sb11, *nsb, st_size);
 	CP(sb11, *nsb, st_blocks);
 	CP(sb11, *nsb, st_blksize);
 	CP(sb11, *nsb, st_flags);
 	CP(sb11, *nsb, st_gen);
 	CP(sb11, *nsb, st_birthtim);
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd11_nstat_args {
 	char	*path;
 	struct nstat *ub;
 };
 #endif
 int
 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap)
 {
 	struct stat sb;
 	struct nstat nsb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtnstat(&sb, &nsb);
 	if (error == 0)
 		error = copyout(&nsb, uap->ub, sizeof (nsb));
 	return (error);
 }
 
 /*
  * NetBSD lstat.  Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd11_nlstat_args {
 	char	*path;
 	struct nstat *ub;
 };
 #endif
 int
 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap)
 {
 	struct stat sb;
 	struct nstat nsb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtnstat(&sb, &nsb);
 	if (error == 0)
 		error = copyout(&nsb, uap->ub, sizeof (nsb));
 	return (error);
 }
 #endif /* COMPAT_FREEBSD11 */
 
 /*
  * Get configurable pathname variables.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 int
 sys_pathconf(struct thread *td, struct pathconf_args *uap)
 {
 	long value;
 	int error;
 
 	error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW,
 	    &value);
 	if (error == 0)
 		td->td_retval[0] = value;
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct lpathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 int
 sys_lpathconf(struct thread *td, struct lpathconf_args *uap)
 {
 	long value;
 	int error;
 
 	error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 	    NOFOLLOW, &value);
 	if (error == 0)
 		td->td_retval[0] = value;
 	return (error);
 }
 
 int
 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg,
     int name, u_long flags, long *valuep)
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 	       pathseg, path);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 
 	error = VOP_PATHCONF(nd.ni_vp, name, valuep);
 	vput(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readlink_args {
 	char	*path;
 	char	*buf;
 	size_t	count;
 };
 #endif
 int
 sys_readlink(struct thread *td, struct readlink_args *uap)
 {
 
 	return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->buf, UIO_USERSPACE, uap->count));
 }
 #ifndef _SYS_SYSPROTO_H_
 struct readlinkat_args {
 	int	fd;
 	char	*path;
 	char	*buf;
 	size_t	bufsize;
 };
 #endif
 int
 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 {
 
 	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->buf, UIO_USERSPACE, uap->bufsize));
 }
 
 int
 kern_readlinkat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count)
 {
 	struct vnode *vp;
 	struct nameidata nd;
 	int error;
 
 	if (count > IOSIZE_MAX)
 		return (EINVAL);
 
 	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 |
 	    EMPTYPATH, pathseg, path, fd);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	vp = nd.ni_vp;
 
 	error = kern_readlink_vp(vp, buf, bufseg, count, td);
 	vput(vp);
 
 	return (error);
 }
 
 /*
  * Helper function to readlink from a vnode
  */
 static int
 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count,
     struct thread *td)
 {
 	struct iovec aiov;
 	struct uio auio;
 	int error;
 
 	ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked");
 #ifdef MAC
 	error = mac_vnode_check_readlink(td->td_ucred, vp);
 	if (error != 0)
 		return (error);
 #endif
 	if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0)
 		return (EINVAL);
 
 	aiov.iov_base = buf;
 	aiov.iov_len = count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = 0;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = bufseg;
 	auio.uio_td = td;
 	auio.uio_resid = count;
 	error = VOP_READLINK(vp, &auio, td->td_ucred);
 	td->td_retval[0] = count - auio.uio_resid;
 	return (error);
 }
 
 /*
  * Common implementation code for chflags() and fchflags().
  */
 static int
 setfflags(struct thread *td, struct vnode *vp, u_long flags)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	/* We can't support the value matching VNOVAL. */
 	if (flags == VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Prevent non-root users from setting flags on devices.  When
 	 * a device is reused, users can retain ownership of the device
 	 * if they are allowed to set flags and programs assume that
 	 * chown can't fail when done as root.
 	 */
 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 		if (error != 0)
 			return (error);
 	}
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef MAC
 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chflags_args {
 	const char *path;
 	u_long	flags;
 };
 #endif
 int
 sys_chflags(struct thread *td, struct chflags_args *uap)
 {
 
 	return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct chflagsat_args {
 	int	fd;
 	const char *path;
 	u_long	flags;
 	int	atflag;
 }
 #endif
 int
 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 {
 
 	return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->flags, uap->atflag));
 }
 
 /*
  * Same as chflags() but doesn't follow symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchflags_args {
 	const char *path;
 	u_long flags;
 };
 #endif
 int
 sys_lchflags(struct thread *td, struct lchflags_args *uap)
 {
 
 	return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, AT_SYMLINK_NOFOLLOW));
 }
 
 static int
 kern_chflagsat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, u_long flags, int atflag)
 {
 	struct nameidata nd;
 	int error;
 
 	if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 	    AT_EMPTY_PATH)) != 0)
 		return (EINVAL);
 
 	AUDIT_ARG_FFLAGS(flags);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW |
 	    AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path,
 	    fd, &cap_fchflags_rights);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	error = setfflags(td, nd.ni_vp, flags);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchflags_args {
 	int	fd;
 	u_long	flags;
 };
 #endif
 int
 sys_fchflags(struct thread *td, struct fchflags_args *uap)
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_FFLAGS(uap->flags);
 	error = getvnode(td, uap->fd, &cap_fchflags_rights,
 	    &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	if (AUDITING_TD(td)) {
 		vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 		AUDIT_ARG_VNODE1(fp->f_vnode);
 		VOP_UNLOCK(fp->f_vnode);
 	}
 #endif
 	error = setfflags(td, fp->f_vnode, uap->flags);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for chmod(), lchmod() and fchmod().
  */
 int
 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 #ifdef MAC
 	error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, cred);
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Change mode of a file given path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_chmod(struct thread *td, struct chmod_args *uap)
 {
 
 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fchmodat_args {
 	int	dirfd;
 	char	*path;
 	mode_t	mode;
 	int	flag;
 }
 #endif
 int
 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 {
 
 	return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->mode, uap->flag));
 }
 
 /*
  * Change mode of a file given path name (don't follow links.)
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_lchmod(struct thread *td, struct lchmod_args *uap)
 {
 
 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, AT_SYMLINK_NOFOLLOW));
 }
 
 int
 kern_fchmodat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, mode_t mode, int flag)
 {
 	struct nameidata nd;
 	int error;
 
 	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 	    AT_EMPTY_PATH)) != 0)
 		return (EINVAL);
 
 	AUDIT_ARG_MODE(mode);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 	    AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path,
 	    fd, &cap_fchmod_rights);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Change mode of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchmod_args {
 	int	fd;
 	int	mode;
 };
 #endif
 int
 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_MODE(uap->mode);
 
 	error = fget(td, uap->fd, &cap_fchmod_rights, &fp);
 	if (error != 0)
 		return (error);
 	error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation for chown(), lchown(), and fchown()
  */
 int
 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
     gid_t gid)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = uid;
 	vattr.va_gid = gid;
 #ifdef MAC
 	error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 	    vattr.va_gid);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, cred);
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_chown(struct thread *td, struct chown_args *uap)
 {
 
 	return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
 	    uap->gid, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fchownat_args {
 	int fd;
 	const char * path;
 	uid_t uid;
 	gid_t gid;
 	int flag;
 };
 #endif
 int
 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 {
 
 	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 	    uap->gid, uap->flag));
 }
 
 int
 kern_fchownat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, int uid, int gid, int flag)
 {
 	struct nameidata nd;
 	int error;
 
 	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 	    AT_EMPTY_PATH)) != 0)
 		return (EINVAL);
 
 	AUDIT_ARG_OWNER(uid, gid);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 	    AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path,
 	    fd, &cap_fchown_rights);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name, do not cross symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_lchown(struct thread *td, struct lchown_args *uap)
 {
 
 	return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
 }
 
 /*
  * Set ownership given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchown_args {
 	int	fd;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_fchown(struct thread *td, struct fchown_args *uap)
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_OWNER(uap->uid, uap->gid);
 	error = fget(td, uap->fd, &cap_fchown_rights, &fp);
 	if (error != 0)
 		return (error);
 	error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), and futimes().
  */
 static int
 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg,
     struct timespec *tsp)
 {
 	struct timeval tv[2];
 	const struct timeval *tvp;
 	int error;
 
 	if (usrtvp == NULL) {
 		vfs_timestamp(&tsp[0]);
 		tsp[1] = tsp[0];
 	} else {
 		if (tvpseg == UIO_SYSSPACE) {
 			tvp = usrtvp;
 		} else {
 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 				return (error);
 			tvp = tv;
 		}
 
 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 			return (EINVAL);
 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 	}
 	return (0);
 }
 
 /*
  * Common implementation code for futimens(), utimensat().
  */
 #define	UTIMENS_NULL	0x1
 #define	UTIMENS_EXIT	0x2
 static int
 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
     struct timespec *tsp, int *retflags)
 {
 	struct timespec tsnow;
 	int error;
 
 	vfs_timestamp(&tsnow);
 	*retflags = 0;
 	if (usrtsp == NULL) {
 		tsp[0] = tsnow;
 		tsp[1] = tsnow;
 		*retflags |= UTIMENS_NULL;
 		return (0);
 	}
 	if (tspseg == UIO_SYSSPACE) {
 		tsp[0] = usrtsp[0];
 		tsp[1] = usrtsp[1];
 	} else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 		return (error);
 	if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 		*retflags |= UTIMENS_EXIT;
 	if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 		*retflags |= UTIMENS_NULL;
 	if (tsp[0].tv_nsec == UTIME_OMIT)
 		tsp[0].tv_sec = VNOVAL;
 	else if (tsp[0].tv_nsec == UTIME_NOW)
 		tsp[0] = tsnow;
 	else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 		return (EINVAL);
 	if (tsp[1].tv_nsec == UTIME_OMIT)
 		tsp[1].tv_sec = VNOVAL;
 	else if (tsp[1].tv_nsec == UTIME_NOW)
 		tsp[1] = tsnow;
 	else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
  * and utimensat().
  */
 static int
 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts,
     int numtimes, int nullflag)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	bool setbirthtime;
 
 	setbirthtime = false;
 	vattr.va_birthtime.tv_sec = VNOVAL;
 	vattr.va_birthtime.tv_nsec = 0;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 &&
 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 		setbirthtime = true;
 	VATTR_NULL(&vattr);
 	vattr.va_atime = ts[0];
 	vattr.va_mtime = ts[1];
 	if (setbirthtime)
 		vattr.va_birthtime = ts[1];
 	if (numtimes > 2)
 		vattr.va_birthtime = ts[2];
 	if (nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 #ifdef MAC
 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 	    vattr.va_mtime);
 #endif
 	if (error == 0)
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct utimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_utimes(struct thread *td, struct utimes_args *uap)
 {
 
 	return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->tptr, UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct futimesat_args {
 	int fd;
 	const char * path;
 	const struct timeval * times;
 };
 #endif
 int
 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 {
 
 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->times, UIO_USERSPACE));
 }
 
 int
 kern_utimesat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct nameidata nd;
 	struct timespec ts[2];
 	int error;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 	    &cap_futimes_rights);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lutimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_lutimes(struct thread *td, struct lutimes_args *uap)
 {
 
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 	    UIO_USERSPACE));
 }
 
 int
 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg,
     const struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct nameidata nd;
 	int error;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct futimes_args {
 	int	fd;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_futimes(struct thread *td, struct futimes_args *uap)
 {
 
 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 }
 
 int
 kern_futimes(struct thread *td, int fd, const struct timeval *tptr,
     enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getutimes(tptr, tptrseg, ts);
 	if (error != 0)
 		return (error);
 	error = getvnode(td, fd, &cap_futimes_rights, &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	if (AUDITING_TD(td)) {
 		vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 		AUDIT_ARG_VNODE1(fp->f_vnode);
 		VOP_UNLOCK(fp->f_vnode);
 	}
 #endif
 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_futimens(struct thread *td, struct futimens_args *uap)
 {
 
 	return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 }
 
 int
 kern_futimens(struct thread *td, int fd, const struct timespec *tptr,
     enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct file *fp;
 	int error, flags;
 
 	AUDIT_ARG_FD(fd);
 	error = getutimens(tptr, tptrseg, ts, &flags);
 	if (error != 0)
 		return (error);
 	if (flags & UTIMENS_EXIT)
 		return (0);
 	error = getvnode(td, fd, &cap_futimes_rights, &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	if (AUDITING_TD(td)) {
 		vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 		AUDIT_ARG_VNODE1(fp->f_vnode);
 		VOP_UNLOCK(fp->f_vnode);
 	}
 #endif
 	error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 {
 
 	return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->times, UIO_USERSPACE, uap->flag));
 }
 
 int
 kern_utimensat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg,
     int flag)
 {
 	struct nameidata nd;
 	struct timespec ts[2];
 	int error, flags;
 
 	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH |
 	    AT_EMPTY_PATH)) != 0)
 		return (EINVAL);
 
 	if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 		return (error);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW |
 	    AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1,
 	    pathseg, path, fd, &cap_futimes_rights);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	/*
 	 * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 	 * POSIX states:
 	 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 	 * "Search permission is denied by a component of the path prefix."
 	 */
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	if ((flags & UTIMENS_EXIT) == 0)
 		error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct truncate_args {
 	char	*path;
 	int	pad;
 	off_t	length;
 };
 #endif
 int
 sys_truncate(struct thread *td, struct truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 
 int
 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg,
     off_t length)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	void *rl_cookie;
 	struct nameidata nd;
 	int error;
 
 	if (length < 0)
 		return (EINVAL);
 	NDPREINIT(&nd);
 retry:
 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		vn_rangelock_unlock(vp, rl_cookie);
 		vrele(vp);
 		return (error);
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (vp->v_type == VDIR) {
 		error = EISDIR;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_vnode_check_write(td->td_ucred, NOCRED, vp);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 	if (error != 0)
 		goto out;
 
 	error = vn_truncate_locked(vp, length, false, td->td_ucred);
 out:
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 	vn_rangelock_unlock(vp, rl_cookie);
 	vrele(vp);
 	if (error == ERELOOKUP)
 		goto retry;
 	return (error);
 }
 
 #if defined(COMPAT_43)
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct otruncate_args {
 	char	*path;
 	long	length;
 };
 #endif
 int
 otruncate(struct thread *td, struct otruncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_FREEBSD6)
 /* Versions with the pad argument */
 int
 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 
 int
 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, uap->length));
 }
 #endif
 
 int
 kern_fsync(struct thread *td, int fd, bool fullsync)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getvnode(td, fd, &cap_fsync_rights, &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 #if 0
 	if (!fullsync)
 		/* XXXKIB: compete outstanding aio writes */;
 #endif
 retry:
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error != 0)
 		goto drop;
 	vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	if (vp->v_object != NULL) {
 		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(vp->v_object);
 	}
 	error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 	if (error == ERELOOKUP)
 		goto retry;
 drop:
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Sync an open file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fsync_args {
 	int	fd;
 };
 #endif
 int
 sys_fsync(struct thread *td, struct fsync_args *uap)
 {
 
 	return (kern_fsync(td, uap->fd, true));
 }
 
 int
 sys_fdatasync(struct thread *td, struct fdatasync_args *uap)
 {
 
 	return (kern_fsync(td, uap->fd, false));
 }
 
 /*
  * Rename files.  Source and destination must either both be directories, or
  * both not be directories.  If target is a directory, it must be empty.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rename_args {
 	char	*from;
 	char	*to;
 };
 #endif
 int
 sys_rename(struct thread *td, struct rename_args *uap)
 {
 
 	return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
 	    uap->to, UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct renameat_args {
 	int	oldfd;
 	char	*old;
 	int	newfd;
 	char	*new;
 };
 #endif
 int
 sys_renameat(struct thread *td, struct renameat_args *uap)
 {
 
 	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 	    UIO_USERSPACE));
 }
 
 #ifdef MAC
 static int
 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd,
     const char *new, enum uio_seg pathseg, struct nameidata *fromnd)
 {
 	int error;
 
 	NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 	    AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights);
 	if ((error = namei(fromnd)) != 0)
 		return (error);
 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp,
 	    fromnd->ni_vp, &fromnd->ni_cnd);
 	VOP_UNLOCK(fromnd->ni_dvp);
 	if (fromnd->ni_dvp != fromnd->ni_vp)
 		VOP_UNLOCK(fromnd->ni_vp);
 	if (error != 0) {
 		NDFREE_PNBUF(fromnd);
 		vrele(fromnd->ni_dvp);
 		vrele(fromnd->ni_vp);
 		if (fromnd->ni_startdir)
 			vrele(fromnd->ni_startdir);
 	}
 	return (error);
 }
 #endif
 
 int
 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
     const char *new, enum uio_seg pathseg)
 {
 	struct mount *mp = NULL;
 	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	uint64_t tondflags;
 	int error;
 
 again:
 	bwillwrite();
 #ifdef MAC
 	if (mac_vnode_check_rename_from_enabled()) {
 		error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg,
 		    &fromnd);
 		if (error != 0)
 			return (error);
 	} else {
 #endif
 	NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 	    pathseg, old, oldfd, &cap_renameat_source_rights);
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 #ifdef MAC
 	}
 #endif
 	fvp = fromnd.ni_vp;
 	tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2;
 	if (fromnd.ni_vp->v_type == VDIR)
 		tondflags |= WILLBEDIR;
 	NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd,
 	    &cap_renameat_target_rights);
 	if ((error = namei(&tond)) != 0) {
 		/* Translate error code for rename("dir1", "dir2/."). */
 		if (error == EISDIR && fvp->v_type == VDIR)
 			error = EINVAL;
 		NDFREE_PNBUF(&fromnd);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
 	error = vn_start_write(fvp, &mp, V_NOWAIT);
 	if (error != 0) {
 		NDFREE_PNBUF(&fromnd);
 		NDFREE_PNBUF(&tond);
 		if (tvp != NULL)
 			vput(tvp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		vrele(tond.ni_startdir);
 		if (fromnd.ni_startdir != NULL)
 			vrele(fromnd.ni_startdir);
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error != 0)
 			return (error);
 		goto again;
 	}
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = EISDIR;
 			goto out;
 		}
 #ifdef CAPABILITIES
 		if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) {
 			/*
 			 * If the target already exists we require CAP_UNLINKAT
 			 * from 'newfd', when newfd was used for the lookup.
 			 */
 			error = cap_check(&tond.ni_filecaps.fc_rights,
 			    &cap_unlinkat_rights);
 			if (error != 0)
 				goto out;
 		}
 #endif
 	}
 	if (fvp == tdvp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * If the source is the same as the destination (that is, if they
 	 * are links to the same vnode), then there is nothing to do.
 	 */
 	if (fvp == tvp)
 		error = ERESTART;
 #ifdef MAC
 	else
 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 #endif
 out:
 	if (error == 0) {
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 		    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 		NDFREE_PNBUF(&fromnd);
 		NDFREE_PNBUF(&tond);
 	} else {
 		NDFREE_PNBUF(&fromnd);
 		NDFREE_PNBUF(&tond);
 		if (tvp != NULL)
 			vput(tvp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
 	vn_finished_write(mp);
 out1:
 	if (fromnd.ni_startdir)
 		vrele(fromnd.ni_startdir);
 	if (error == ERESTART)
 		return (0);
 	if (error == ERELOOKUP)
 		goto again;
 	return (error);
 }
 
 /*
  * Make a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkdir_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_mkdir(struct thread *td, struct mkdir_args *uap)
 {
 
 	return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mkdirat_args {
 	int	fd;
 	char	*path;
 	mode_t	mode;
 };
 #endif
 int
 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 {
 
 	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
     int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	AUDIT_ARG_MODE(mode);
 	NDPREINIT(&nd);
 restart:
 	bwillwrite();
-	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
+	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 |
 	    NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR,
 	    segflg, path, fd, &cap_mkdirat_rights);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask;
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 #ifdef MAC
 out:
 #endif
 	NDFREE_PNBUF(&nd);
 	VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true);
 	vn_finished_write(mp);
 	if (error == ERELOOKUP)
 		goto restart;
 	return (error);
 }
 
 /*
  * Remove a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rmdir_args {
 	char	*path;
 };
 #endif
 int
 sys_rmdir(struct thread *td, struct rmdir_args *uap)
 {
 
 	return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE,
 	    0));
 }
 
 int
 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd,
     enum uio_seg pathseg, int flag)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct file *fp;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int error;
 
 	fp = NULL;
 	if (fd != FD_NONE) {
 		error = getvnode(td, fd, cap_rights_init_one(&rights,
 		    CAP_LOOKUP), &fp);
 		if (error != 0)
 			return (error);
 	}
 
 	NDPREINIT(&nd);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 	    at2cnpflags(flag, AT_RESOLVE_BENEATH),
 	    pathseg, path, dfd, &cap_unlinkat_rights);
 	if ((error = namei(&nd)) != 0)
 		goto fdout;
 	vp = nd.ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (nd.ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_vflag & VV_ROOT) {
 		error = EBUSY;
 		goto out;
 	}
 
 	if (fp != NULL && fp->f_vnode != vp) {
 		if (VN_IS_DOOMED(fp->f_vnode))
 			error = EBADF;
 		else
 			error = EDEADLK;
 		goto out;
 	}
 
 #ifdef MAC
 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 	    &nd.ni_cnd);
 	if (error != 0)
 		goto out;
 #endif
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE_PNBUF(&nd);
 		vput(vp);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			goto fdout;
 		goto restart;
 	}
 	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 	vn_finished_write(mp);
 out:
 	NDFREE_PNBUF(&nd);
 	vput(vp);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
 	if (error == ERELOOKUP)
 		goto restart;
 fdout:
 	if (fp != NULL)
 		fdrop(fp, td);
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 int
 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count,
     long *basep, void (*func)(struct freebsd11_dirent *))
 {
 	struct freebsd11_dirent dstdp;
 	struct dirent *dp, *edp;
 	char *dirbuf;
 	off_t base;
 	ssize_t resid, ucount;
 	int error;
 
 	/* XXX arbitrary sanity limit on `count'. */
 	count = min(count, 64 * 1024);
 
 	dirbuf = malloc(count, M_TEMP, M_WAITOK);
 
 	error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid,
 	    UIO_SYSSPACE);
 	if (error != 0)
 		goto done;
 	if (basep != NULL)
 		*basep = base;
 
 	ucount = 0;
 	for (dp = (struct dirent *)dirbuf,
 	    edp = (struct dirent *)&dirbuf[count - resid];
 	    ucount < count && dp < edp; ) {
 		if (dp->d_reclen == 0)
 			break;
 		MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0));
 		if (dp->d_namlen >= sizeof(dstdp.d_name))
 			continue;
 		dstdp.d_type = dp->d_type;
 		dstdp.d_namlen = dp->d_namlen;
 		dstdp.d_fileno = dp->d_fileno;		/* truncate */
 		if (dstdp.d_fileno != dp->d_fileno) {
 			switch (ino64_trunc_error) {
 			default:
 			case 0:
 				break;
 			case 1:
 				error = EOVERFLOW;
 				goto done;
 			case 2:
 				dstdp.d_fileno = UINT32_MAX;
 				break;
 			}
 		}
 		dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) +
 		    ((dp->d_namlen + 1 + 3) &~ 3);
 		bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 		bzero(dstdp.d_name + dstdp.d_namlen,
 		    dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) -
 		    dstdp.d_namlen);
 		MPASS(dstdp.d_reclen <= dp->d_reclen);
 		MPASS(ucount + dstdp.d_reclen <= count);
 		if (func != NULL)
 			func(&dstdp);
 		error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen);
 		if (error != 0)
 			break;
 		dp = (struct dirent *)((char *)dp + dp->d_reclen);
 		ucount += dstdp.d_reclen;
 	}
 
 done:
 	free(dirbuf, M_TEMP);
 	if (error == 0)
 		td->td_retval[0] = ucount;
 	return (error);
 }
 #endif /* COMPAT */
 
 #ifdef COMPAT_43
 static void
 ogetdirentries_cvt(struct freebsd11_dirent *dp)
 {
 #if (BYTE_ORDER == LITTLE_ENDIAN)
 	/*
 	 * The expected low byte of dp->d_namlen is our dp->d_type.
 	 * The high MBZ byte of dp->d_namlen is our dp->d_namlen.
 	 */
 	dp->d_type = dp->d_namlen;
 	dp->d_namlen = 0;
 #else
 	/*
 	 * The dp->d_type is the high byte of the expected dp->d_namlen,
 	 * so must be zero'ed.
 	 */
 	dp->d_type = 0;
 #endif
 }
 
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ogetdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 {
 	long loff;
 	int error;
 
 	error = kern_ogetdirentries(td, uap, &loff);
 	if (error == 0)
 		error = copyout(&loff, uap->basep, sizeof(long));
 	return (error);
 }
 
 int
 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
     long *ploff)
 {
 	long base;
 	int error;
 
 	/* XXX arbitrary sanity limit on `count'. */
 	if (uap->count > 64 * 1024)
 		return (EINVAL);
 
 	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 	    &base, ogetdirentries_cvt);
 
 	if (error == 0 && uap->basep != NULL)
 		error = copyout(&base, uap->basep, sizeof(long));
 
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_FREEBSD11)
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd11_getdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 freebsd11_getdirentries(struct thread *td,
     struct freebsd11_getdirentries_args *uap)
 {
 	long base;
 	int error;
 
 	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 	    &base, NULL);
 
 	if (error == 0 && uap->basep != NULL)
 		error = copyout(&base, uap->basep, sizeof(long));
 	return (error);
 }
 
 int
 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap)
 {
 	struct freebsd11_getdirentries_args ap;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.count = uap->count;
 	ap.basep = NULL;
 	return (freebsd11_getdirentries(td, &ap));
 }
 #endif /* COMPAT_FREEBSD11 */
 
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 int
 sys_getdirentries(struct thread *td, struct getdirentries_args *uap)
 {
 	off_t base;
 	int error;
 
 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 	    NULL, UIO_USERSPACE);
 	if (error != 0)
 		return (error);
 	if (uap->basep != NULL)
 		error = copyout(&base, uap->basep, sizeof(off_t));
 	return (error);
 }
 
 int
 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
     off_t *basep, ssize_t *residp, enum uio_seg bufseg)
 {
 	struct vnode *vp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	off_t loff;
 	int error, eofflag;
 	off_t foffset;
 
 	AUDIT_ARG_FD(fd);
 	if (count > IOSIZE_MAX)
 		return (EINVAL);
 	auio.uio_resid = count;
 	error = getvnode(td, fd, &cap_read_rights, &fp);
 	if (error != 0)
 		return (error);
 	if ((fp->f_flag & FREAD) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 	vp = fp->f_vnode;
 	foffset = foffset_lock(fp, 0);
 unionread:
 	if (vp->v_type != VDIR) {
 		error = EINVAL;
 		goto fail;
 	}
 	if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) {
 		error = ENOENT;
 		goto fail;
 	}
 	aiov.iov_base = buf;
 	aiov.iov_len = count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = bufseg;
 	auio.uio_td = td;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	loff = auio.uio_offset = foffset;
 #ifdef MAC
 	error = mac_vnode_check_readdir(td->td_ucred, vp);
 	if (error == 0)
 #endif
 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 		    NULL);
 	foffset = auio.uio_offset;
 	if (error != 0) {
 		VOP_UNLOCK(vp);
 		goto fail;
 	}
 	if (count == auio.uio_resid &&
 	    (vp->v_vflag & VV_ROOT) &&
 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
 		struct vnode *tvp = vp;
 
 		vp = vp->v_mount->mnt_vnodecovered;
 		VREF(vp);
 		fp->f_vnode = vp;
 		foffset = 0;
 		vput(tvp);
 		goto unionread;
 	}
 	VOP_UNLOCK(vp);
 	*basep = loff;
 	if (residp != NULL)
 		*residp = auio.uio_resid;
 	td->td_retval[0] = count - auio.uio_resid;
 fail:
 	foffset_unlock(fp, foffset, 0);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Set the mode mask for creation of filesystem nodes.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct umask_args {
 	int	newmask;
 };
 #endif
 int
 sys_umask(struct thread *td, struct umask_args *uap)
 {
 	struct pwddesc *pdp;
 
 	pdp = td->td_proc->p_pd;
 	PWDDESC_XLOCK(pdp);
 	td->td_retval[0] = pdp->pd_cmask;
 	pdp->pd_cmask = uap->newmask & ALLPERMS;
 	PWDDESC_XUNLOCK(pdp);
 	return (0);
 }
 
 /*
  * Void all references to file by ripping underlying filesystem away from
  * vnode.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct revoke_args {
 	char	*path;
 };
 #endif
 int
 sys_revoke(struct thread *td, struct revoke_args *uap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 	    uap->path);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_vnode_check_revoke(td->td_ucred, vp);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 	if (error != 0)
 		goto out;
 	if (td->td_ucred->cr_uid != vattr.va_uid) {
 		error = priv_check(td, PRIV_VFS_ADMIN);
 		if (error != 0)
 			goto out;
 	}
 	if (devfs_usecount(vp) > 0)
 		VOP_REVOKE(vp, REVOKEALL);
 out:
 	vput(vp);
 	return (error);
 }
 
 /*
  * This variant of getvnode() allows O_PATH files.  Caller should
  * ensure that returned file and vnode are only used for compatible
  * semantics.
  */
 int
 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp,
     struct file **fpp)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_unlocked(td, fd, rightsp, &fp);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * The file could be not of the vnode type, or it may be not
 	 * yet fully initialized, in which case the f_vnode pointer
 	 * may be set, but f_ops is still badfileops.  E.g.,
 	 * devfs_open() transiently create such situation to
 	 * facilitate csw d_fdopen().
 	 *
 	 * Dupfdopen() handling in kern_openat() installs the
 	 * half-baked file into the process descriptor table, allowing
 	 * other thread to dereference it. Guard against the race by
 	 * checking f_ops.
 	 */
 	if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) {
 		fdrop(fp, td);
 		*fpp = NULL;
 		return (EINVAL);
 	}
 
 	*fpp = fp;
 	return (0);
 }
 
 /*
  * Convert a user file descriptor to a kernel file entry and check
  * that, if it is a capability, the correct rights are present.
  * A reference on the file entry is held upon returning.
  */
 int
 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 	int error;
 
 	error = getvnode_path(td, fd, rightsp, fpp);
 	if (__predict_false(error != 0))
 		return (error);
 
 	/*
 	 * Filter out O_PATH file descriptors, most getvnode() callers
 	 * do not call fo_ methods.
 	 */
 	if (__predict_false((*fpp)->f_ops == &path_fileops)) {
 		fdrop(*fpp, td);
 		*fpp = NULL;
 		error = EBADF;
 	}
 
 	return (error);
 }
 
 /*
  * Get an (NFS) file handle.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lgetfh_args {
 	char *fname;
 	fhandle_t *fhp;
 };
 #endif
 int
 sys_lgetfh(struct thread *td, struct lgetfh_args *uap)
 {
 
 	return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname,
 	    UIO_USERSPACE, uap->fhp, UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getfh_args {
 	char *fname;
 	fhandle_t *fhp;
 };
 #endif
 int
 sys_getfh(struct thread *td, struct getfh_args *uap)
 {
 
 	return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE,
 	    uap->fhp, UIO_USERSPACE));
 }
 
 /*
  * syscall for the rpc.lockd to use to translate an open descriptor into
  * a NFS file handle.
  *
  * warning: do not remove the priv_check() call or this becomes one giant
  * security hole.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfhat_args {
 	int fd;
 	char *path;
 	fhandle_t *fhp;
 	int flags;
 };
 #endif
 int
 sys_getfhat(struct thread *td, struct getfhat_args *uap)
 {
 
 	return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->fhp, UIO_USERSPACE));
 }
 
 int
 kern_getfhat(struct thread *td, int flags, int fd, const char *path,
     enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg)
 {
 	struct nameidata nd;
 	fhandle_t fh;
 	struct vnode *vp;
 	int error;
 
 	if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0)
 		return (EINVAL);
 	error = priv_check(td, PRIV_VFS_GETFH);
 	if (error != 0)
 		return (error);
 	NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW |
 	    AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path,
 	    fd);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
-	NDFREE_NOTHING(&nd);
+	NDFREE_PNBUF(&nd);
 	vp = nd.ni_vp;
 	bzero(&fh, sizeof(fh));
 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VOP_VPTOFH(vp, &fh.fh_fid);
 	vput(vp);
 	if (error == 0) {
 		if (fhseg == UIO_USERSPACE)
 			error = copyout(&fh, fhp, sizeof (fh));
 		else
 			memcpy(fhp, &fh, sizeof(fh));
 	}
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fhlink_args {
 	fhandle_t *fhp;
 	const char *to;
 };
 #endif
 int
 sys_fhlink(struct thread *td, struct fhlink_args *uap)
 {
 
 	return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fhlinkat_args {
 	fhandle_t *fhp;
 	int tofd;
 	const char *to;
 };
 #endif
 int
 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap)
 {
 
 	return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp));
 }
 
 static int
 kern_fhlinkat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, fhandle_t *fhp)
 {
 	fhandle_t fh;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_GETFH);
 	if (error != 0)
 		return (error);
 	error = copyin(fhp, &fh, sizeof(fh));
 	if (error != 0)
 		return (error);
 	do {
 		bwillwrite();
 		if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 			return (ESTALE);
 		error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 		vfs_unbusy(mp);
 		if (error != 0)
 			return (error);
 		VOP_UNLOCK(vp);
 		error = kern_linkat_vp(td, vp, fd, path, pathseg);
 	} while (error == EAGAIN || error == ERELOOKUP);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fhreadlink_args {
 	fhandle_t *fhp;
 	char *buf;
 	size_t bufsize;
 };
 #endif
 int
 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap)
 {
 	fhandle_t fh;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_GETFH);
 	if (error != 0)
 		return (error);
 	if (uap->bufsize > IOSIZE_MAX)
 		return (EINVAL);
 	error = copyin(uap->fhp, &fh, sizeof(fh));
 	if (error != 0)
 		return (error);
 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
 	error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td);
 	vput(vp);
 	return (error);
 }
 
 /*
  * syscall for the rpc.lockd to use to translate a NFS file handle into an
  * open descriptor.
  *
  * warning: do not remove the priv_check() call or this becomes one giant
  * security hole.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhopen_args {
 	const struct fhandle *u_fhp;
 	int flags;
 };
 #endif
 int
 sys_fhopen(struct thread *td, struct fhopen_args *uap)
 {
 	return (kern_fhopen(td, uap->u_fhp, uap->flags));
 }
 
 int
 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct fhandle fhp;
 	struct file *fp;
 	int fmode, error;
 	int indx;
 
 	error = priv_check(td, PRIV_VFS_FHOPEN);
 	if (error != 0)
 		return (error);
 	indx = -1;
 	fmode = FFLAGS(flags);
 	/* why not allow a non-read/write open for our lockd? */
 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 		return (EINVAL);
 	error = copyin(u_fhp, &fhp, sizeof(fhp));
 	if (error != 0)
 		return(error);
 	/* find the mount point */
 	mp = vfs_busyfs(&fhp.fh_fsid);
 	if (mp == NULL)
 		return (ESTALE);
 	/* now give me my vnode, it gets returned to me locked */
 	error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
 
 	error = falloc_noinstall(td, &fp);
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	/*
 	 * An extra reference on `fp' has been held for us by
 	 * falloc_noinstall().
 	 */
 
 #ifdef INVARIANTS
 	td->td_dupfd = -1;
 #endif
 	error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 	if (error != 0) {
 		KASSERT(fp->f_ops == &badfileops,
 		    ("VOP_OPEN in fhopen() set f_ops"));
 		KASSERT(td->td_dupfd < 0,
 		    ("fhopen() encountered fdopen()"));
 
 		vput(vp);
 		goto bad;
 	}
 #ifdef INVARIANTS
 	td->td_dupfd = 0;
 #endif
 	fp->f_vnode = vp;
 	finit_vnode(fp, fmode, NULL, &vnops);
 	VOP_UNLOCK(vp);
 	if ((fmode & O_TRUNC) != 0) {
 		error = fo_truncate(fp, 0, td->td_ucred, td);
 		if (error != 0)
 			goto bad;
 	}
 
 	error = finstall(td, fp, &indx, fmode, NULL);
 bad:
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (error);
 }
 
 /*
  * Stat an (NFS) file handle.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstat_args {
 	struct fhandle *u_fhp;
 	struct stat *sb;
 };
 #endif
 int
 sys_fhstat(struct thread *td, struct fhstat_args *uap)
 {
 	struct stat sb;
 	struct fhandle fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fh));
 	if (error != 0)
 		return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error == 0)
 		error = copyout(&sb, uap->sb, sizeof(sb));
 	return (error);
 }
 
 int
 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_FHSTAT);
 	if (error != 0)
 		return (error);
 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
 	error = VOP_STAT(vp, sb, td->td_ucred, NOCRED);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstatfs_args {
 	struct fhandle *u_fhp;
 	struct statfs *buf;
 };
 #endif
 int
 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap)
 {
 	struct statfs *sfp;
 	fhandle_t fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error != 0)
 		return (error);
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(*sfp));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_FHSTATFS);
 	if (error != 0)
 		return (error);
 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 	if (error != 0) {
 		vfs_unbusy(mp);
 		return (error);
 	}
 	vput(vp);
 	error = prison_canseemount(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #ifdef MAC
 	error = mac_mount_check_stat(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #endif
 	error = VFS_STATFS(mp, buf);
 out:
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * Unlike madvise(2), we do not make a best effort to remember every
  * possible caching hint.  Instead, we remember the last setting with
  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
  * region of any current setting.
  */
 int
 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
     int advice)
 {
 	struct fadvise_info *fa, *new;
 	struct file *fp;
 	struct vnode *vp;
 	off_t end;
 	int error;
 
 	if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 		return (EINVAL);
 	AUDIT_ARG_VALUE(advice);
 	switch (advice) {
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_RANDOM:
 	case POSIX_FADV_NOREUSE:
 		new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 		break;
 	case POSIX_FADV_NORMAL:
 	case POSIX_FADV_WILLNEED:
 	case POSIX_FADV_DONTNEED:
 		new = NULL;
 		break;
 	default:
 		return (EINVAL);
 	}
 	/* XXX: CAP_POSIX_FADVISE? */
 	AUDIT_ARG_FD(fd);
 	error = fget(td, fd, &cap_no_rights, &fp);
 	if (error != 0)
 		goto out;
 	AUDIT_ARG_FILE(td->td_proc, fp);
 	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 		error = ESPIPE;
 		goto out;
 	}
 	if (fp->f_type != DTYPE_VNODE) {
 		error = ENODEV;
 		goto out;
 	}
 	vp = fp->f_vnode;
 	if (vp->v_type != VREG) {
 		error = ENODEV;
 		goto out;
 	}
 	if (len == 0)
 		end = OFF_MAX;
 	else
 		end = offset + len - 1;
 	switch (advice) {
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_RANDOM:
 	case POSIX_FADV_NOREUSE:
 		/*
 		 * Try to merge any existing non-standard region with
 		 * this new region if possible, otherwise create a new
 		 * non-standard region for this request.
 		 */
 		mtx_pool_lock(mtxpool_sleep, fp);
 		fa = fp->f_advice;
 		if (fa != NULL && fa->fa_advice == advice &&
 		    ((fa->fa_start <= end && fa->fa_end >= offset) ||
 		    (end != OFF_MAX && fa->fa_start == end + 1) ||
 		    (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 			if (offset < fa->fa_start)
 				fa->fa_start = offset;
 			if (end > fa->fa_end)
 				fa->fa_end = end;
 		} else {
 			new->fa_advice = advice;
 			new->fa_start = offset;
 			new->fa_end = end;
 			fp->f_advice = new;
 			new = fa;
 		}
 		mtx_pool_unlock(mtxpool_sleep, fp);
 		break;
 	case POSIX_FADV_NORMAL:
 		/*
 		 * If a the "normal" region overlaps with an existing
 		 * non-standard region, trim or remove the
 		 * non-standard region.
 		 */
 		mtx_pool_lock(mtxpool_sleep, fp);
 		fa = fp->f_advice;
 		if (fa != NULL) {
 			if (offset <= fa->fa_start && end >= fa->fa_end) {
 				new = fa;
 				fp->f_advice = NULL;
 			} else if (offset <= fa->fa_start &&
 			    end >= fa->fa_start)
 				fa->fa_start = end + 1;
 			else if (offset <= fa->fa_end && end >= fa->fa_end)
 				fa->fa_end = offset - 1;
 			else if (offset >= fa->fa_start && end <= fa->fa_end) {
 				/*
 				 * If the "normal" region is a middle
 				 * portion of the existing
 				 * non-standard region, just remove
 				 * the whole thing rather than picking
 				 * one side or the other to
 				 * preserve.
 				 */
 				new = fa;
 				fp->f_advice = NULL;
 			}
 		}
 		mtx_pool_unlock(mtxpool_sleep, fp);
 		break;
 	case POSIX_FADV_WILLNEED:
 	case POSIX_FADV_DONTNEED:
 		error = VOP_ADVISE(vp, offset, end, advice);
 		break;
 	}
 out:
 	if (fp != NULL)
 		fdrop(fp, td);
 	free(new, M_FADVISE);
 	return (error);
 }
 
 int
 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 {
 	int error;
 
 	error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 	    uap->advice);
 	return (kern_posix_error(td, error));
 }
 
 int
 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd,
     off_t *outoffp, size_t len, unsigned int flags)
 {
 	struct file *infp, *outfp;
 	struct vnode *invp, *outvp;
 	int error;
 	size_t retlen;
 	void *rl_rcookie, *rl_wcookie;
 	off_t savinoff, savoutoff;
 
 	infp = outfp = NULL;
 	rl_rcookie = rl_wcookie = NULL;
 	savinoff = -1;
 	error = 0;
 	retlen = 0;
 
 	if (flags != 0) {
 		error = EINVAL;
 		goto out;
 	}
 	if (len > SSIZE_MAX)
 		/*
 		 * Although the len argument is size_t, the return argument
 		 * is ssize_t (which is signed).  Therefore a size that won't
 		 * fit in ssize_t can't be returned.
 		 */
 		len = SSIZE_MAX;
 
 	/* Get the file structures for the file descriptors. */
 	error = fget_read(td, infd, &cap_read_rights, &infp);
 	if (error != 0)
 		goto out;
 	if (infp->f_ops == &badfileops) {
 		error = EBADF;
 		goto out;
 	}
 	if (infp->f_vnode == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 	error = fget_write(td, outfd, &cap_write_rights, &outfp);
 	if (error != 0)
 		goto out;
 	if (outfp->f_ops == &badfileops) {
 		error = EBADF;
 		goto out;
 	}
 	if (outfp->f_vnode == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/* Set the offset pointers to the correct place. */
 	if (inoffp == NULL)
 		inoffp = &infp->f_offset;
 	if (outoffp == NULL)
 		outoffp = &outfp->f_offset;
 	savinoff = *inoffp;
 	savoutoff = *outoffp;
 
 	invp = infp->f_vnode;
 	outvp = outfp->f_vnode;
 	/* Sanity check the f_flag bits. */
 	if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE ||
 	    (infp->f_flag & FREAD) == 0) {
 		error = EBADF;
 		goto out;
 	}
 
 	/* If len == 0, just return 0. */
 	if (len == 0)
 		goto out;
 
 	/*
 	 * If infp and outfp refer to the same file, the byte ranges cannot
 	 * overlap.
 	 */
 	if (invp == outvp && ((savinoff <= savoutoff && savinoff + len >
 	    savoutoff) || (savinoff > savoutoff && savoutoff + len >
 	    savinoff))) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/* Range lock the byte ranges for both invp and outvp. */
 	for (;;) {
 		rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp +
 		    len);
 		rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp +
 		    len);
 		if (rl_rcookie != NULL)
 			break;
 		vn_rangelock_unlock(outvp, rl_wcookie);
 		rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len);
 		vn_rangelock_unlock(invp, rl_rcookie);
 	}
 
 	retlen = len;
 	error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen,
 	    flags, infp->f_cred, outfp->f_cred, td);
 out:
 	if (rl_rcookie != NULL)
 		vn_rangelock_unlock(invp, rl_rcookie);
 	if (rl_wcookie != NULL)
 		vn_rangelock_unlock(outvp, rl_wcookie);
 	if (savinoff != -1 && (error == EINTR || error == ERESTART)) {
 		*inoffp = savinoff;
 		*outoffp = savoutoff;
 	}
 	if (outfp != NULL)
 		fdrop(outfp, td);
 	if (infp != NULL)
 		fdrop(infp, td);
 	td->td_retval[0] = retlen;
 	return (error);
 }
 
 int
 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap)
 {
 	off_t inoff, outoff, *inoffp, *outoffp;
 	int error;
 
 	inoffp = outoffp = NULL;
 	if (uap->inoffp != NULL) {
 		error = copyin(uap->inoffp, &inoff, sizeof(off_t));
 		if (error != 0)
 			return (error);
 		inoffp = &inoff;
 	}
 	if (uap->outoffp != NULL) {
 		error = copyin(uap->outoffp, &outoff, sizeof(off_t));
 		if (error != 0)
 			return (error);
 		outoffp = &outoff;
 	}
 	error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd,
 	    outoffp, uap->len, uap->flags);
 	if (error == 0 && uap->inoffp != NULL)
 		error = copyout(inoffp, uap->inoffp, sizeof(off_t));
 	if (error == 0 && uap->outoffp != NULL)
 		error = copyout(outoffp, uap->outoffp, sizeof(off_t));
 	return (error);
 }
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index f6f6546342c0..75e4051d8879 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -1,327 +1,317 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1985, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)namei.h	8.5 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_NAMEI_H_
 #define	_SYS_NAMEI_H_
 
 #include <sys/caprights.h>
 #include <sys/filedesc.h>
 #include <sys/queue.h>
 #include <sys/_seqc.h>
 #include <sys/_uio.h>
 
 enum nameiop { LOOKUP, CREATE, DELETE, RENAME };
 
 struct componentname {
 	/*
 	 * Arguments to lookup.
 	 */
 	u_int64_t cn_origflags;	/* flags to namei */
 	u_int64_t cn_flags;	/* flags to namei */
 	struct	ucred *cn_cred;	/* credentials */
 	enum nameiop cn_nameiop;	/* namei operation */
 	int	cn_lkflags;	/* Lock flags LK_EXCLUSIVE or LK_SHARED */
 	/*
 	 * Shared between lookup and commit routines.
 	 */
 	char	*cn_pnbuf;	/* pathname buffer */
 	char	*cn_nameptr;	/* pointer to looked up name */
 	long	cn_namelen;	/* length of looked up component */
 };
 
 struct nameicap_tracker;
 TAILQ_HEAD(nameicap_tracker_head, nameicap_tracker);
 
 /*
  * Encapsulation of namei parameters.
  */
 struct nameidata {
 	/*
 	 * Arguments to namei/lookup.
 	 */
 	const	char *ni_dirp;		/* pathname pointer */
 	enum	uio_seg ni_segflg;	/* location of pathname */
 	cap_rights_t *ni_rightsneeded;	/* rights required to look up vnode */
 	/*
 	 * Arguments to lookup.
 	 */
 	struct  vnode *ni_startdir;	/* starting directory */
 	struct	vnode *ni_rootdir;	/* logical root directory */
 	struct	vnode *ni_topdir;	/* logical top directory */
 	int	ni_dirfd;		/* starting directory for *at functions */
 	int	ni_lcf;			/* local call flags */
 	/*
 	 * Results: returned from namei
 	 */
 	struct filecaps ni_filecaps;	/* rights the *at base has */
 	/*
 	 * Results: returned from/manipulated by lookup
 	 */
 	struct	vnode *ni_vp;		/* vnode of result */
 	struct	vnode *ni_dvp;		/* vnode of intermediate directory */
 	/*
 	 * Results: flags returned from namei
 	 */
 	u_int	ni_resflags;
 	/*
 	 * Debug for validating API use by the callers.
 	 */
 	u_short	ni_debugflags;
 	/*
 	 * Shared between namei and lookup/commit routines.
 	 */
 	u_short	ni_loopcnt;		/* count of symlinks encountered */
 	size_t	ni_pathlen;		/* remaining chars in path */
 	char	*ni_next;		/* next location in pathname */
 	/*
 	 * Lookup parameters: this structure describes the subset of
 	 * information from the nameidata structure that is passed
 	 * through the VOP interface.
 	 */
 	struct componentname ni_cnd;
 	struct nameicap_tracker_head ni_cap_tracker;
 	/*
 	 * Private helper data for UFS, must be at the end.  See
 	 * NDINIT_PREFILL().
 	 */
 	seqc_t	ni_dvp_seqc;
 	seqc_t	ni_vp_seqc;
 };
 
 #ifdef _KERNEL
 
 enum cache_fpl_status { CACHE_FPL_STATUS_DESTROYED, CACHE_FPL_STATUS_ABORTED,
     CACHE_FPL_STATUS_PARTIAL, CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET };
 int	cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
     struct pwd **pwdp);
 
 /*
  * Flags for namei.
  *
  * If modifying the list make sure to check whether NDVALIDATE needs updating.
  */
 
 /*
  * Debug.
  */
 #define	NAMEI_DBG_INITED	0x0001
 #define	NAMEI_DBG_CALLED	0x0002
 #define	NAMEI_DBG_HADSTARTDIR	0x0004
 
 /*
  * namei operational modifier flags, stored in ni_cnd.flags
  */
 #define	NC_NOMAKEENTRY	0x0001	/* name must not be added to cache */
 #define	NC_KEEPPOSENTRY	0x0002	/* don't evict a positive entry */
 #define	NOCACHE		NC_NOMAKEENTRY	/* for compatibility with older code */
 #define	LOCKLEAF	0x0004	/* lock vnode on return */
 #define	LOCKPARENT	0x0008	/* want parent vnode returned locked */
 #define	WANTPARENT	0x0010	/* want parent vnode returned unlocked */
 #define	FAILIFEXISTS	0x0020	/* return EEXIST if found */
 #define	FOLLOW		0x0040	/* follow symbolic links */
 #define	EMPTYPATH	0x0080	/* Allow empty path for *at */
 #define	LOCKSHARED	0x0100	/* Shared lock leaf */
 #define	NOFOLLOW	0x0000	/* do not follow symbolic links (pseudo) */
 #define	RBENEATH	0x100000000ULL /* No escape, even tmp, from start dir */
 #define	MODMASK		0xf000001ffULL	/* mask of operational modifiers */
 
 /*
  * Namei parameter descriptors.
- *
- * SAVENAME may be set by either the callers of namei or by VOP_LOOKUP.
- * If the caller of namei sets the flag (for example execve wants to
- * know the name of the program that is being executed), then it must
- * free the buffer. If VOP_LOOKUP sets the flag, then the buffer must
- * be freed by either the commit routine or the VOP_ABORT routine.
- * SAVESTART is set only by the callers of namei. It implies SAVENAME
- * plus the addition of saving the parent directory that contains the
- * name in ni_startdir. It allows repeated calls to lookup for the
- * name being sought. The caller is responsible for releasing the
- * buffer and for vrele'ing ni_startdir.
  */
 #define	RDONLY		0x00000200 /* lookup with read-only semantics */
-#define	SAVENAME	0x00000400 /* save pathname buffer */
+/* UNUSED		0x00000400 */
 #define	SAVESTART	0x00000800 /* save starting directory */
 #define	ISWHITEOUT	0x00001000 /* found whiteout */
 #define	DOWHITEOUT	0x00002000 /* do whiteouts */
 #define	WILLBEDIR	0x00004000 /* new files will be dirs; allow trailing / */
 #define	ISOPEN		0x00008000 /* caller is opening; return a real vnode. */
 #define	NOCROSSMOUNT	0x00010000 /* do not cross mount points */
 #define	NOMACCHECK	0x00020000 /* do not perform MAC checks */
 #define	AUDITVNODE1	0x00040000 /* audit the looked up vnode information */
 #define	AUDITVNODE2	0x00080000 /* audit the looked up vnode information */
 #define	NOCAPCHECK	0x00100000 /* do not perform capability checks */
 #define	OPENREAD	0x00200000 /* open for reading */
 #define	OPENWRITE	0x00400000 /* open for writing */
 #define	WANTIOCTLCAPS	0x00800000 /* leave ioctl caps for the caller */
-#define	HASBUF		0x01000000 /* has allocated pathname buffer */
+/* UNUSED		0x01000000 */
 #define	NOEXECCHECK	0x02000000 /* do not perform exec check on dir */
 #define	MAKEENTRY	0x04000000 /* entry is to be added to name cache */
 #define	ISSYMLINK	0x08000000 /* symlink needs interpretation */
 #define	ISLASTCN	0x10000000 /* this is last component of pathname */
 #define	ISDOTDOT	0x20000000 /* current component name is .. */
 #define	TRAILINGSLASH	0x40000000 /* path ended in a slash */
 #define	PARAMASK	0x7ffffe00 /* mask of parameter descriptors */
 
 /*
  * Flags which must not be passed in by callers.
  */
 #define NAMEI_INTERNAL_FLAGS	\
-	(HASBUF | NOEXECCHECK | MAKEENTRY | ISSYMLINK | ISLASTCN | ISDOTDOT | \
+	(NOEXECCHECK | MAKEENTRY | ISSYMLINK | ISLASTCN | ISDOTDOT | \
 	 TRAILINGSLASH)
 
 /*
  * Namei results flags
  */
 #define	NIRES_ABS	0x00000001 /* Path was absolute */
 #define	NIRES_STRICTREL	0x00000002 /* Restricted lookup result */
 #define	NIRES_EMPTYPATH	0x00000004 /* EMPTYPATH used */
 
 /*
  * Flags in ni_lcf, valid for the duration of the namei call.
  */
 #define	NI_LCF_STRICTRELATIVE	0x0001	/* relative lookup only */
 #define	NI_LCF_CAP_DOTDOT	0x0002	/* ".." in strictrelative case */
 
 /*
  * Initialization of a nameidata structure.
  */
 #define	NDINIT(ndp, op, flags, segflg, namep)				\
 	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, &cap_no_rights)
 #define	NDINIT_AT(ndp, op, flags, segflg, namep, dirfd)			\
 	NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, &cap_no_rights)
 #define	NDINIT_ATRIGHTS(ndp, op, flags, segflg, namep, dirfd, rightsp) 	\
 	NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, rightsp)
 #define	NDINIT_ATVP(ndp, op, flags, segflg, namep, vp)			\
 	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, &cap_no_rights)
 
 /*
  * Note the constant pattern may *hide* bugs.
  * Note also that we enable debug checks for non-TIED KLDs
  * so that they can run on an INVARIANTS kernel without tripping over
  * assertions on ni_debugflags state.
  */
 #if defined(INVARIANTS) || (defined(KLD_MODULE) && !defined(KLD_TIED))
 #define NDINIT_PREFILL(arg)	memset(arg, 0xff, offsetof(struct nameidata,	\
     ni_dvp_seqc))
 #define NDINIT_DBG(arg)		{ (arg)->ni_debugflags = NAMEI_DBG_INITED; }
 #define NDREINIT_DBG(arg)	{						\
 	if (((arg)->ni_debugflags & NAMEI_DBG_INITED) == 0)			\
 		panic("namei data not inited");					\
 	if (((arg)->ni_debugflags & NAMEI_DBG_HADSTARTDIR) != 0)		\
 		panic("NDREINIT on namei data with NAMEI_DBG_HADSTARTDIR");	\
 	(arg)->ni_debugflags = NAMEI_DBG_INITED;				\
 }
 #else
 #define NDINIT_PREFILL(arg)	do { } while (0)
 #define NDINIT_DBG(arg)		do { } while (0)
 #define NDREINIT_DBG(arg)	do { } while (0)
 #endif
 
 #define NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, startdir, rightsp)	\
 do {										\
 	struct nameidata *_ndp = (ndp);						\
 	cap_rights_t *_rightsp = (rightsp);					\
 	MPASS(_rightsp != NULL);						\
 	NDINIT_PREFILL(_ndp);							\
 	NDINIT_DBG(_ndp);							\
 	_ndp->ni_cnd.cn_nameiop = op;						\
 	_ndp->ni_cnd.cn_flags = flags;						\
 	_ndp->ni_segflg = segflg;						\
 	_ndp->ni_dirp = namep;							\
 	_ndp->ni_dirfd = dirfd;							\
 	_ndp->ni_startdir = startdir;						\
 	_ndp->ni_resflags = 0;							\
 	filecaps_init(&_ndp->ni_filecaps);					\
 	_ndp->ni_rightsneeded = _rightsp;					\
 } while (0)
 
 #define NDREINIT(ndp)	do {							\
 	struct nameidata *_ndp = (ndp);						\
 	NDREINIT_DBG(_ndp);							\
 	filecaps_free(&_ndp->ni_filecaps);					\
 	_ndp->ni_resflags = 0;							\
 	_ndp->ni_startdir = NULL;						\
 } while (0)
 
 #define	NDPREINIT(ndp) do {							\
 	(ndp)->ni_dvp_seqc = SEQC_MOD;						\
 	(ndp)->ni_vp_seqc = SEQC_MOD;						\
 } while (0)
 
 #define NDF_NO_DVP_RELE		0x00000001
 #define NDF_NO_DVP_UNLOCK	0x00000002
 #define NDF_NO_DVP_PUT		0x00000003
 #define NDF_NO_VP_RELE		0x00000004
 #define NDF_NO_VP_UNLOCK	0x00000008
 #define NDF_NO_VP_PUT		0x0000000c
 #define NDF_NO_STARTDIR_RELE	0x00000010
 #define NDF_NO_FREE_PNBUF	0x00000020
 
 #define NDFREE_IOCTLCAPS(ndp) do {						\
 	struct nameidata *_ndp = (ndp);						\
 	filecaps_free(&_ndp->ni_filecaps);					\
 } while (0)
-void NDFREE_PNBUF(struct nameidata *);
-void NDFREE(struct nameidata *, const u_int);
 
-#ifdef INVARIANTS
-void NDFREE_NOTHING(struct nameidata *);
-#else
-#define NDFREE_NOTHING(ndp)	do { } while (0)
-#endif
+#define	NDFREE_PNBUF(ndp) do {							\
+	struct nameidata *_ndp = (ndp);						\
+	MPASS(_ndp->ni_cnd.cn_pnbuf != NULL);					\
+	uma_zfree(namei_zone, _ndp->ni_cnd.cn_pnbuf);				\
+	_ndp->ni_cnd.cn_pnbuf = NULL;						\
+} while (0)
+
+void NDFREE(struct nameidata *, const u_int);
 
 int	namei(struct nameidata *ndp);
 int	vfs_lookup(struct nameidata *ndp);
 int	vfs_relookup(struct vnode *dvp, struct vnode **vpp,
 	    struct componentname *cnp);
 #endif
 
 /*
  * Stats on usefulness of namei caches.
  */
 struct nchstats {
 	long	ncs_goodhits;		/* hits that we can really use */
 	long	ncs_neghits;		/* negative hits that we can use */
 	long	ncs_badhits;		/* hits we must drop */
 	long	ncs_falsehits;		/* hits with id mismatch */
 	long	ncs_miss;		/* misses */
 	long	ncs_long;		/* long names that ignore cache */
 	long	ncs_pass2;		/* names found with passes == 2 */
 	long	ncs_2passes;		/* number of times we attempt it */
 };
 
 extern struct nchstats nchstats;
 
 #endif /* !_SYS_NAMEI_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 2d22202b5fbd..e4ab2b31fe46 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -1,390 +1,390 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)param.h	8.3 (Berkeley) 4/4/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_PARAM_H_
 #define _SYS_PARAM_H_
 
 #include <sys/_null.h>
 
 #define	BSD	199506		/* System version (year & month). */
 #define BSD4_3	1
 #define BSD4_4	1
 
 /*
  * __FreeBSD_version numbers are documented in the Porter's Handbook.
  * If you bump the version for any reason, you should update the documentation
  * there.
  * Currently this lives here in the doc/ repository:
  *
  *	documentation/content/en/books/porters-handbook/versions/_index.adoc
  *
  * Encoding:	<major><two digit minor>Rxx
  *		'R' is in the range 0 to 4 if this is a release branch or
  *		X.0-CURRENT before releng/X.0 is created, otherwise 'R' is
  *		in the range 5 to 9.
  * Short hand: MMmmXXX
  *
  * __FreeBSD_version is bumped every time there's a change in the base system
  * that's noteworthy. A noteworthy change is any change which changes the
  * kernel's KBI in -CURRENT, one that changes some detail about the system that
  * external software (or the ports system) would want to know about, one that
  * adds a system call, one that adds or deletes a shipped library, a security
  * fix, or similar change not specifically noted here. Bumps should be limited
  * to one per day / a couple per week except for security fixes.
  *
  * The approved way to obtain this from a shell script is:
  *	awk '/^\#define[[:space:]]*__FreeBSD_version/ {print $3}'
  * Other methods to parse this file may work, but are not guaranteed against
  * future changes. The above script works back to FreeBSD 3.x when this macro
  * was introduced. This number is propagated to other places needing it that
  * cannot include sys/param.h and should only be updated here.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1400067
+#define __FreeBSD_version 1400068
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
  * which by definition is always true on FreeBSD. This macro is also defined
  * on other systems that use the kernel of FreeBSD, such as GNU/kFreeBSD.
  *
  * It is tempting to use this macro in userland code when we want to enable
  * kernel-specific routines, and in fact it's fine to do this in code that
  * is part of FreeBSD itself.  However, be aware that as presence of this
  * macro is still not widespread (e.g. older FreeBSD versions, 3rd party
  * compilers, etc), it is STRONGLY DISCOURAGED to check for this macro in
  * external applications without also checking for __FreeBSD__ as an
  * alternative.
  */
 #undef __FreeBSD_kernel__
 #define __FreeBSD_kernel__
 
 #if defined(_KERNEL) || defined(IN_RTLD)
 #define	P_OSREL_SIGWAIT			700000
 #define	P_OSREL_SIGSEGV			700004
 #define	P_OSREL_MAP_ANON		800104
 #define	P_OSREL_MAP_FSTRICT		1100036
 #define	P_OSREL_SHUTDOWN_ENOTCONN	1100077
 #define	P_OSREL_MAP_GUARD		1200035
 #define	P_OSREL_WRFSBASE		1200041
 #define	P_OSREL_CK_CYLGRP		1200046
 #define	P_OSREL_VMTOTAL64		1200054
 #define	P_OSREL_CK_SUPERBLOCK		1300000
 #define	P_OSREL_CK_INODE		1300005
 #define	P_OSREL_POWERPC_NEW_AUX_ARGS	1300070
 
 #define	P_OSREL_MAJOR(x)		((x) / 100000)
 #endif
 
 #ifndef LOCORE
 #include <sys/types.h>
 #endif
 
 /*
  * Machine-independent constants (some used in following include files).
  * Redefined constants are from POSIX 1003.1 limits file.
  *
  * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>)
  */
 #include <sys/syslimits.h>
 
 #define	MAXCOMLEN	19		/* max command name remembered */
 #define	MAXINTERP	PATH_MAX	/* max interpreter file name length */
 #define	MAXLOGNAME	33		/* max login name length (incl. NUL) */
 #define	MAXUPRC		CHILD_MAX	/* max simultaneous processes */
 #define	NCARGS		ARG_MAX		/* max bytes for an exec function */
 #define	NGROUPS		(NGROUPS_MAX+1)	/* max number groups */
 #define	NOFILE		OPEN_MAX	/* max open files per process */
 #define	NOGROUP		65535		/* marker for empty group set member */
 #define MAXHOSTNAMELEN	256		/* max hostname size */
 #define SPECNAMELEN	255		/* max length of devicename */
 
 /* More types and definitions used throughout the kernel. */
 #ifdef _KERNEL
 #include <sys/cdefs.h>
 #include <sys/errno.h>
 #ifndef LOCORE
 #include <sys/time.h>
 #include <sys/priority.h>
 #endif
 
 #ifndef FALSE
 #define	FALSE	0
 #endif
 #ifndef TRUE
 #define	TRUE	1
 #endif
 #endif
 
 #ifndef _KERNEL
 #ifndef LOCORE
 /* Signals. */
 #include <sys/signal.h>
 #endif
 #endif
 
 /* Machine type dependent parameters. */
 #include <machine/param.h>
 #ifndef _KERNEL
 #include <sys/limits.h>
 #endif
 
 #ifndef DEV_BSHIFT
 #define	DEV_BSHIFT	9		/* log2(DEV_BSIZE) */
 #endif
 #define	DEV_BSIZE	(1<<DEV_BSHIFT)
 
 #ifndef BLKDEV_IOSIZE
 #define BLKDEV_IOSIZE  PAGE_SIZE	/* default block device I/O size */
 #endif
 #ifndef DFLTPHYS
 #define DFLTPHYS	(64 * 1024)	/* default max raw I/O transfer size */
 #endif
 #ifndef MAXPHYS				/* max raw I/O transfer size */
 #ifdef __ILP32__
 #define MAXPHYS		(128 * 1024)
 #else
 #define MAXPHYS		(1024 * 1024)
 #endif
 #endif
 #ifndef MAXDUMPPGS
 #define MAXDUMPPGS	(DFLTPHYS/PAGE_SIZE)
 #endif
 
 /*
  * Constants related to network buffer management.
  * MCLBYTES must be no larger than PAGE_SIZE.
  */
 #ifndef	MSIZE
 #define	MSIZE		256		/* size of an mbuf */
 #endif
 
 #ifndef	MCLSHIFT
 #define MCLSHIFT	11		/* convert bytes to mbuf clusters */
 #endif	/* MCLSHIFT */
 
 #define MCLBYTES	(1 << MCLSHIFT)	/* size of an mbuf cluster */
 
 #if PAGE_SIZE <= 8192
 #define	MJUMPAGESIZE	PAGE_SIZE
 #else
 #define	MJUMPAGESIZE	(8 * 1024)
 #endif
 
 #define	MJUM9BYTES	(9 * 1024)	/* jumbo cluster 9k */
 #define	MJUM16BYTES	(16 * 1024)	/* jumbo cluster 16k */
 
 /*
  * Some macros for units conversion
  */
 
 /* clicks to bytes */
 #ifndef ctob
 #define ctob(x)	((x)<<PAGE_SHIFT)
 #endif
 
 /* bytes to clicks */
 #ifndef btoc
 #define btoc(x)	(((vm_offset_t)(x)+PAGE_MASK)>>PAGE_SHIFT)
 #endif
 
 /*
  * btodb() is messy and perhaps slow because `bytes' may be an off_t.  We
  * want to shift an unsigned type to avoid sign extension and we don't
  * want to widen `bytes' unnecessarily.  Assume that the result fits in
  * a daddr_t.
  */
 #ifndef btodb
 #define btodb(bytes)	 		/* calculates (bytes / DEV_BSIZE) */ \
 	(sizeof (bytes) > sizeof(long) \
 	 ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \
 	 : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT))
 #endif
 
 #ifndef dbtob
 #define dbtob(db)			/* calculates (db * DEV_BSIZE) */ \
 	((off_t)(db) << DEV_BSHIFT)
 #endif
 
 #define	PRIMASK		0x0ff
 #define	PCATCH		0x100	/* OR'd with pri for tsleep to check signals */
 #define	PDROP		0x200	/* OR'd with pri to stop re-entry of interlock mutex */
 #define	PNOLOCK		0x400	/* OR'd with pri to allow sleeping w/o a lock */
 #define	PRILASTFLAG	0x400	/* Last flag defined above */
 
 #define	NZERO	0		/* default "nice" */
 
 #define	NBBY	8		/* number of bits in a byte */
 #define	NBPW	sizeof(int)	/* number of bytes per word (integer) */
 
 #define	CMASK	022		/* default file mask: S_IWGRP|S_IWOTH */
 
 #define	NODEV	(dev_t)(-1)	/* non-existent device */
 
 /*
  * File system parameters and macros.
  *
  * MAXBSIZE -	Filesystems are made out of blocks of at most MAXBSIZE bytes
  *		per block.  MAXBSIZE may be made larger without effecting
  *		any existing filesystems as long as it does not exceed MAXPHYS,
  *		and may be made smaller at the risk of not being able to use
  *		filesystems which require a block size exceeding MAXBSIZE.
  *
  * MAXBCACHEBUF - Maximum size of a buffer in the buffer cache.  This must
  *		be >= MAXBSIZE and can be set differently for different
  *		architectures by defining it in <machine/param.h>.
  *		Making this larger allows NFS to do larger reads/writes.
  *
  * BKVASIZE -	Nominal buffer space per buffer, in bytes.  BKVASIZE is the
  *		minimum KVM memory reservation the kernel is willing to make.
  *		Filesystems can of course request smaller chunks.  Actual
  *		backing memory uses a chunk size of a page (PAGE_SIZE).
  *		The default value here can be overridden on a per-architecture
  *		basis by defining it in <machine/param.h>.
  *
  *		If you make BKVASIZE too small you risk seriously fragmenting
  *		the buffer KVM map which may slow things down a bit.  If you
  *		make it too big the kernel will not be able to optimally use
  *		the KVM memory reserved for the buffer cache and will wind
  *		up with too-few buffers.
  *
  *		The default is 16384, roughly 2x the block size used by a
  *		normal UFS filesystem.
  */
 #define MAXBSIZE	65536	/* must be power of 2 */
 #ifndef	MAXBCACHEBUF
 #define	MAXBCACHEBUF	MAXBSIZE /* must be a power of 2 >= MAXBSIZE */
 #endif
 #ifndef	BKVASIZE
 #define BKVASIZE	16384	/* must be power of 2 */
 #endif
 #define BKVAMASK	(BKVASIZE-1)
 
 /*
  * MAXPATHLEN defines the longest permissible path length after expanding
  * symbolic links. It is used to allocate a temporary buffer from the buffer
  * pool in which to do the name expansion, hence should be a power of two,
  * and must be less than or equal to MAXBSIZE.  MAXSYMLINKS defines the
  * maximum number of symbolic links that may be expanded in a path name.
  * It should be set high enough to allow all legitimate uses, but halt
  * infinite loops reasonably quickly.
  */
 #define	MAXPATHLEN	PATH_MAX
 #define MAXSYMLINKS	32
 
 /* Bit map related macros. */
 #define	setbit(a,i)	(((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY))
 #define	clrbit(a,i)	(((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY)))
 #define	isset(a,i)							\
 	(((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY)))
 #define	isclr(a,i)							\
 	((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
 
 /* Macros for counting and rounding. */
 #ifndef howmany
 #define	howmany(x, y)	(((x)+((y)-1))/(y))
 #endif
 #define	nitems(x)	(sizeof((x)) / sizeof((x)[0]))
 #define	rounddown(x, y)	(((x)/(y))*(y))
 #define	rounddown2(x, y) __align_down(x, y) /* if y is power of two */
 #define	roundup(x, y)	((((x)+((y)-1))/(y))*(y))  /* to any y */
 #define	roundup2(x, y)	__align_up(x, y) /* if y is powers of two */
 #define powerof2(x)	((((x)-1)&(x))==0)
 
 /* Macros for min/max. */
 #define	MIN(a,b) (((a)<(b))?(a):(b))
 #define	MAX(a,b) (((a)>(b))?(a):(b))
 
 #ifdef _KERNEL
 /*
  * Basic byte order function prototypes for non-inline functions.
  */
 #ifndef LOCORE
 #ifndef _BYTEORDER_PROTOTYPED
 #define	_BYTEORDER_PROTOTYPED
 __BEGIN_DECLS
 __uint32_t	 htonl(__uint32_t);
 __uint16_t	 htons(__uint16_t);
 __uint32_t	 ntohl(__uint32_t);
 __uint16_t	 ntohs(__uint16_t);
 __END_DECLS
 #endif
 #endif
 
 #ifndef _BYTEORDER_FUNC_DEFINED
 #define	_BYTEORDER_FUNC_DEFINED
 #define	htonl(x)	__htonl(x)
 #define	htons(x)	__htons(x)
 #define	ntohl(x)	__ntohl(x)
 #define	ntohs(x)	__ntohs(x)
 #endif /* !_BYTEORDER_FUNC_DEFINED */
 #endif /* _KERNEL */
 
 /*
  * Scale factor for scaled integers used to count %cpu time and load avgs.
  *
  * The number of CPU `tick's that map to a unique `%age' can be expressed
  * by the formula (1 / (2 ^ (FSHIFT - 11))).  Since the intermediate
  * calculation is done with 64-bit precision, the maximum load average that can
  * be calculated is approximately 2^32 / FSCALE.
  *
  * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age',
  * FSHIFT must be at least 11.  This gives a maximum load avg of 2 million.
  */
 #define	FSHIFT	11		/* bits to right of fixed binary point */
 #define FSCALE	(1<<FSHIFT)
 
 #define dbtoc(db)			/* calculates devblks to pages */ \
 	((db + (ctodb(1) - 1)) >> (PAGE_SHIFT - DEV_BSHIFT))
 
 #define ctodb(db)			/* calculates pages to devblks */ \
 	((db) << (PAGE_SHIFT - DEV_BSHIFT))
 
 /*
  * Old spelling of __containerof().
  */
 #define	member2struct(s, m, x)						\
 	((struct s *)(void *)((char *)(x) - offsetof(struct s, m)))
 
 /*
  * Access a variable length array that has been declared as a fixed
  * length array.
  */
 #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset])
 
 #endif	/* _SYS_PARAM_H_ */
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 9c6e95687f85..4c390f4c42ef 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1,1585 +1,1579 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_lookup.c	8.15 (Berkeley) 6/16/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ufs.h"
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/namei.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #ifdef UFS_DIRHASH
 #include <ufs/ufs/dirhash.h>
 #endif
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #ifdef DIAGNOSTIC
 static int	dirchk = 1;
 #else
 static int	dirchk = 0;
 #endif
 
 SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, "");
 
 static int
 ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred,
     struct thread *td)
 {
 	int error;
 
 #ifdef UFS_ACL
 	/*
 	 * NFSv4 Minor Version 1, draft-ietf-nfsv4-minorversion1-03.txt
 	 *
 	 * 3.16.2.1. ACE4_DELETE vs. ACE4_DELETE_CHILD
 	 */
 
 	/*
 	 * XXX: Is this check required?
 	 */
 	error = VOP_ACCESS(vdp, VEXEC, cred, td);
 	if (error)
 		return (error);
 
 	error = VOP_ACCESSX(tdp, VDELETE, cred, td);
 	if (error == 0)
 		return (0);
 
 	error = VOP_ACCESSX(vdp, VDELETE_CHILD, cred, td);
 	if (error == 0)
 		return (0);
 
 	error = VOP_ACCESSX(vdp, VEXPLICIT_DENY | VDELETE_CHILD, cred, td);
 	if (error)
 		return (error);
 
 #endif /* !UFS_ACL */
 
 	/*
 	 * Standard Unix access control - delete access requires VWRITE.
 	 */
 	error = VOP_ACCESS(vdp, VWRITE, cred, td);
 	if (error)
 		return (error);
 
 	/*
 	 * If directory is "sticky", then user must own
 	 * the directory, or the file in it, else she
 	 * may not delete it (unless she's root). This
 	 * implements append-only directories.
 	 */
 	if ((VTOI(vdp)->i_mode & ISVTX) &&
 	    VOP_ACCESS(vdp, VADMIN, cred, td) &&
 	    VOP_ACCESS(tdp, VADMIN, cred, td))
 		return (EPERM);
 
 	return (0);
 }
 
 /*
  * Convert a component of a pathname into a pointer to a locked inode.
  * This is a very central and rather complicated routine.
  * If the filesystem is not maintained in a strict tree hierarchy,
  * this can result in a deadlock situation (see comments in code below).
  *
  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
  * on whether the name is to be looked up, created, renamed, or deleted.
  * When CREATE, RENAME, or DELETE is specified, information usable in
  * creating, renaming, or deleting a directory entry may be calculated.
  * If flag has LOCKPARENT or'ed into it and the target of the pathname
  * exists, lookup returns both the target and its parent directory locked.
  * When creating or renaming and LOCKPARENT is specified, the target may
  * not be ".".  When deleting and LOCKPARENT is specified, the target may
  * be "."., but the caller must check to ensure it does an vrele and vput
  * instead of two vputs.
  *
  * This routine is actually used as VOP_CACHEDLOOKUP method, and the
  * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP
  * method.
  *
  * vfs_cache_lookup() performs the following for us:
  *	check that it is a directory
  *	check accessibility of directory
  *	check for modification attempts on read-only mounts
  *	if name found in cache
  *	    if at end of path and deleting or creating
  *		drop it
  *	     else
  *		return name.
  *	return VOP_CACHEDLOOKUP()
  *
  * Overall outline of ufs_lookup:
  *
  *	search for name in directory, to found or notfound
  * notfound:
  *	if creating, return locked directory, leaving info on available slots
  *	else return error
  * found:
  *	if at end of path and deleting, return information to allow delete
  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
  *	  inode and return info to allow rewrite
  *	if not at end, add name to cache; if at end and neither creating
  *	  nor deleting, add name to cache
  */
 int
 ufs_lookup(
 	struct vop_cachedlookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap)
 {
 
 	return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL));
 }
 
 int
 ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp,
     ino_t *dd_ino)
 {
 	struct inode *dp;		/* inode for directory being searched */
 	struct buf *bp;			/* a buffer of directory entries */
 	struct direct *ep;		/* the current directory entry */
 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
 	enum {NONE, COMPACT, FOUND} slotstatus;
 	doff_t slotoffset;		/* offset of area with free space */
 	doff_t i_diroff;		/* cached i_diroff value. */
 	doff_t i_offset;		/* cached i_offset value. */
 	int slotsize;			/* size of area at slotoffset */
 	int slotfreespace;		/* amount of space free in slot */
 	int slotneeded;			/* size of the entry we're seeking */
 	int numdirpasses;		/* strategy for directory search */
 	doff_t endsearch;		/* offset to end directory search */
 	doff_t prevoff;			/* prev entry dp->i_offset */
 	struct vnode *pdp;		/* saved dp during symlink work */
 	struct vnode *tdp;		/* returned by VFS_VGET */
 	doff_t enduseful;		/* pointer past last used dir slot */
 	u_long bmask;			/* block offset mask */
 	int namlen, error;
 	struct ucred *cred = cnp->cn_cred;
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	ino_t ino, ino1;
 	int ltype;
 
 	if (vpp != NULL)
 		*vpp = NULL;
 
 	dp = VTOI(vdp);
 	if (dp->i_effnlink == 0)
 		return (ENOENT);
 
 	/*
 	 * Create a vm object if vmiodirenable is enabled.
 	 * Alternatively we could call vnode_create_vobject
 	 * in VFS_VGET but we could end up creating objects
 	 * that are never used.
 	 */
 	vnode_create_vobject(vdp, DIP(dp, i_size), curthread);
 
 	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
 
 #ifdef DEBUG_VFS_LOCKS
 	/*
 	 * Assert that the directory vnode is locked, and locked
 	 * exclusively for the last component lookup for modifying
 	 * operations.
 	 *
 	 * The directory-modifying operations need to save
 	 * intermediate state in the inode between namei() call and
 	 * actual directory manipulations.  See fields in the struct
 	 * inode marked as 'used during directory lookup'.  We must
 	 * ensure that upgrade in namei() does not happen, since
 	 * upgrade might need to unlock vdp.  If quotas are enabled,
 	 * getinoquota() also requires exclusive lock to modify inode.
 	 */
 	ASSERT_VOP_LOCKED(vdp, "ufs_lookup1");
 	if ((nameiop == CREATE || nameiop == DELETE || nameiop == RENAME) &&
 	    (flags & (LOCKPARENT | ISLASTCN)) == (LOCKPARENT | ISLASTCN))
 		ASSERT_VOP_ELOCKED(vdp, "ufs_lookup2");
 #endif
 
 restart:
 	bp = NULL;
 	slotoffset = -1;
 
 	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 *
 	 * Suppress search for slots unless creating
 	 * file and at end of pathname, in which case
 	 * we watch for a place to put the new file in
 	 * case it doesn't already exist.
 	 */
 	ino = 0;
 	i_diroff = dp->i_diroff;
 	slotstatus = FOUND;
 	slotfreespace = slotsize = slotneeded = 0;
 	if ((nameiop == CREATE || nameiop == RENAME) &&
 	    (flags & ISLASTCN)) {
 		slotstatus = NONE;
 		slotneeded = DIRECTSIZ(cnp->cn_namelen);
 	}
 
 #ifdef UFS_DIRHASH
 	/*
 	 * Use dirhash for fast operations on large directories. The logic
 	 * to determine whether to hash the directory is contained within
 	 * ufsdirhash_build(); a zero return means that it decided to hash
 	 * this directory and it successfully built up the hash table.
 	 */
 	if (ufsdirhash_build(dp) == 0) {
 		/* Look for a free slot if needed. */
 		enduseful = dp->i_size;
 		if (slotstatus != FOUND) {
 			slotoffset = ufsdirhash_findfree(dp, slotneeded,
 			    &slotsize);
 			if (slotoffset >= 0) {
 				slotstatus = COMPACT;
 				enduseful = ufsdirhash_enduseful(dp);
 				if (enduseful < 0)
 					enduseful = dp->i_size;
 			}
 		}
 		/* Look up the component. */
 		numdirpasses = 1;
 		entryoffsetinblock = 0; /* silence compiler warning */
 		switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen,
 		    &i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) {
 		case 0:
 			ep = (struct direct *)((char *)bp->b_data +
 			    (i_offset & bmask));
 			goto foundentry;
 		case ENOENT:
 			i_offset = roundup2(dp->i_size, DIRBLKSIZ);
 			goto notfound;
 		default:
 			/* Something failed; just do a linear search. */
 			break;
 		}
 	}
 #endif /* UFS_DIRHASH */
 	/*
 	 * If there is cached information on a previous search of
 	 * this directory, pick up where we last left off.
 	 * We cache only lookups as these are the most common
 	 * and have the greatest payoff. Caching CREATE has little
 	 * benefit as it usually must search the entire directory
 	 * to determine that the entry does not exist. Caching the
 	 * location of the last DELETE or RENAME has not reduced
 	 * profiling time and hence has been removed in the interest
 	 * of simplicity.
 	 */
 	if (nameiop != LOOKUP || i_diroff == 0 || i_diroff >= dp->i_size) {
 		entryoffsetinblock = 0;
 		i_offset = 0;
 		numdirpasses = 1;
 	} else {
 		i_offset = i_diroff;
 		if ((entryoffsetinblock = i_offset & bmask) &&
 		    (error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp)))
 			return (error);
 		numdirpasses = 2;
 		nchstats.ncs_2passes++;
 	}
 	prevoff = i_offset;
 	endsearch = roundup2(dp->i_size, DIRBLKSIZ);
 	enduseful = 0;
 
 searchloop:
 	while (i_offset < endsearch) {
 		/*
 		 * If necessary, get the next directory block.
 		 */
 		if ((i_offset & bmask) == 0) {
 			if (bp != NULL)
 				brelse(bp);
 			error =
 			    UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp);
 			if (error)
 				return (error);
 			entryoffsetinblock = 0;
 		}
 		/*
 		 * If still looking for a slot, and at a DIRBLKSIZE
 		 * boundary, have to start looking for free space again.
 		 */
 		if (slotstatus == NONE &&
 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
 			slotoffset = -1;
 			slotfreespace = 0;
 		}
 		/*
 		 * Get pointer to next entry.
 		 * Full validation checks are slow, so we only check
 		 * enough to insure forward progress through the
 		 * directory. Complete checks can be run by patching
 		 * "dirchk" to be true.
 		 */
 		ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
 		if (ep->d_reclen == 0 || ep->d_reclen >
 		    DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
 		    (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) {
 			int i;
 
 			ufs_dirbad(dp, i_offset, "mangled entry");
 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
 			i_offset += i;
 			entryoffsetinblock += i;
 			continue;
 		}
 
 		/*
 		 * If an appropriate sized slot has not yet been found,
 		 * check to see if one is available. Also accumulate space
 		 * in the current block so that we can determine if
 		 * compaction is viable.
 		 */
 		if (slotstatus != FOUND) {
 			int size = ep->d_reclen;
 
 			if (ep->d_ino != 0)
 				size -= DIRSIZ(OFSFMT(vdp), ep);
 			if (size > 0) {
 				if (size >= slotneeded) {
 					slotstatus = FOUND;
 					slotoffset = i_offset;
 					slotsize = ep->d_reclen;
 				} else if (slotstatus == NONE) {
 					slotfreespace += size;
 					if (slotoffset == -1)
 						slotoffset = i_offset;
 					if (slotfreespace >= slotneeded) {
 						slotstatus = COMPACT;
 						slotsize = i_offset +
 						      ep->d_reclen - slotoffset;
 					}
 				}
 			}
 		}
 
 		/*
 		 * Check for a name match.
 		 */
 		if (ep->d_ino) {
 #			if (BYTE_ORDER == LITTLE_ENDIAN)
 				if (OFSFMT(vdp))
 					namlen = ep->d_type;
 				else
 					namlen = ep->d_namlen;
 #			else
 				namlen = ep->d_namlen;
 #			endif
 			if (namlen == cnp->cn_namelen &&
 				(cnp->cn_nameptr[0] == ep->d_name[0]) &&
 			    !bcmp(cnp->cn_nameptr, ep->d_name,
 				(unsigned)namlen)) {
 #ifdef UFS_DIRHASH
 foundentry:
 #endif
 				/*
 				 * Save directory entry's inode number and
 				 * reclen in ndp->ni_ufs area, and release
 				 * directory buffer.
 				 */
 				if (!OFSFMT(vdp) && ep->d_type == DT_WHT) {
 					slotstatus = FOUND;
 					slotoffset = i_offset;
 					slotsize = ep->d_reclen;
 					enduseful = dp->i_size;
 					cnp->cn_flags |= ISWHITEOUT;
 					numdirpasses--;
 					goto notfound;
 				}
 				ino = ep->d_ino;
 				goto found;
 			}
 		}
 		prevoff = i_offset;
 		i_offset += ep->d_reclen;
 		entryoffsetinblock += ep->d_reclen;
 		if (ep->d_ino)
 			enduseful = i_offset;
 	}
 notfound:
 	/*
 	 * If we started in the middle of the directory and failed
 	 * to find our target, we must check the beginning as well.
 	 */
 	if (numdirpasses == 2) {
 		numdirpasses--;
 		i_offset = 0;
 		endsearch = i_diroff;
 		goto searchloop;
 	}
 	if (bp != NULL)
 		brelse(bp);
 	/*
 	 * If creating, and at end of pathname and current
 	 * directory has not been removed, then can consider
 	 * allowing file to be created.
 	 */
 	if ((nameiop == CREATE || nameiop == RENAME ||
 	     (nameiop == DELETE &&
 	      (cnp->cn_flags & DOWHITEOUT) &&
 	      (cnp->cn_flags & ISWHITEOUT))) &&
 	    (flags & ISLASTCN) && dp->i_effnlink != 0) {
 		/*
 		 * Access for write is interpreted as allowing
 		 * creation of files in the directory.
 		 *
 		 * XXX: Fix the comment above.
 		 */
 		if (flags & WILLBEDIR)
 			error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, curthread);
 		else
 			error = VOP_ACCESS(vdp, VWRITE, cred, curthread);
 		if (error)
 			return (error);
 		/*
 		 * Return an indication of where the new directory
 		 * entry should be put.  If we didn't find a slot,
 		 * then set dp->i_count to 0 indicating
 		 * that the new slot belongs at the end of the
 		 * directory. If we found a slot, then the new entry
 		 * can be put in the range from dp->i_offset to
 		 * dp->i_offset + dp->i_count.
 		 */
 		if (slotstatus == NONE) {
 			SET_I_OFFSET(dp, roundup2(dp->i_size, DIRBLKSIZ));
 			SET_I_COUNT(dp, 0);
 			enduseful = I_OFFSET(dp);
 		} else if (nameiop == DELETE) {
 			SET_I_OFFSET(dp, slotoffset);
 			if ((I_OFFSET(dp) & (DIRBLKSIZ - 1)) == 0)
 				SET_I_COUNT(dp, 0);
 			else
 				SET_I_COUNT(dp, I_OFFSET(dp) - prevoff);
 		} else {
 			SET_I_OFFSET(dp, slotoffset);
 			SET_I_COUNT(dp, slotsize);
 			if (enduseful < slotoffset + slotsize)
 				enduseful = slotoffset + slotsize;
 		}
 		SET_I_ENDOFF(dp, roundup2(enduseful, DIRBLKSIZ));
 		/*
 		 * We return with the directory locked, so that
 		 * the parameters we set up above will still be
 		 * valid if we actually decide to do a direnter().
 		 * We return ni_vp == NULL to indicate that the entry
 		 * does not currently exist; we leave a pointer to
 		 * the (locked) directory inode in ndp->ni_dvp.
-		 * The pathname buffer is saved so that the name
-		 * can be obtained later.
 		 *
 		 * NB - if the directory is unlocked, then this
 		 * information cannot be used.
 		 */
-		cnp->cn_flags |= SAVENAME;
 		return (EJUSTRETURN);
 	}
 	/*
 	 * Insert name into cache (as non-existent) if appropriate.
 	 */
 	if ((cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(vdp, NULL, cnp);
 	return (ENOENT);
 
 found:
 	if (dd_ino != NULL)
 		*dd_ino = ino;
 	if (numdirpasses == 2)
 		nchstats.ncs_pass2++;
 	/*
 	 * Check that directory length properly reflects presence
 	 * of this entry.
 	 */
 	if (i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) {
 		ufs_dirbad(dp, i_offset, "i_size too small");
 		dp->i_size = i_offset + DIRSIZ(OFSFMT(vdp), ep);
 		DIP_SET(dp, i_size, dp->i_size);
 		UFS_INODE_SET_FLAG(dp, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 	}
 	brelse(bp);
 
 	/*
 	 * Found component in pathname.
 	 * If the final component of path name, save information
 	 * in the cache as to where the entry was found.
 	 */
 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
 		dp->i_diroff = rounddown2(i_offset, DIRBLKSIZ);
 
 	/*
 	 * If deleting, and at end of pathname, return
 	 * parameters which can be used to remove file.
 	 */
 	if (nameiop == DELETE && (flags & ISLASTCN)) {
 		if (flags & LOCKPARENT)
 			ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
 
 		if (VOP_ISLOCKED(vdp) == LK_EXCLUSIVE) {
 			/*
 			 * Return pointer to current entry in
 			 * dp->i_offset, and distance past previous
 			 * entry (if there is a previous entry in this
 			 * block) in dp->i_count.
 			 *
 			 * We shouldn't be setting these in the
 			 * WANTPARENT case (first lookup in rename()), but any
 			 * lookups that will result in directory changes will
 			 * overwrite these.
 			 */
 			SET_I_OFFSET(dp, i_offset);
 			if ((I_OFFSET(dp) & (DIRBLKSIZ - 1)) == 0)
 				SET_I_COUNT(dp, 0);
 			else
 				SET_I_COUNT(dp, I_OFFSET(dp) - prevoff);
 		}
 		if (dd_ino != NULL)
 			return (0);
 
 		/*
 		 * Save directory inode pointer in ndp->ni_dvp for
 		 * dirremove().
 		 */
 		if ((error = VFS_VGET(vdp->v_mount, ino,
 		    LK_EXCLUSIVE, &tdp)) != 0)
 			return (error);
 		error = ufs_delete_denied(vdp, tdp, cred, curthread);
 		if (error) {
 			vput(tdp);
 			return (error);
 		}
 		if (dp->i_number == ino) {
 			VREF(vdp);
 			*vpp = vdp;
 			vput(tdp);
 			return (0);
 		}
 
 		*vpp = tdp;
 		return (0);
 	}
 
 	/*
 	 * If rewriting (RENAME), return the inode and the
 	 * information required to rewrite the present directory
 	 * Must get inode of directory entry to verify it's a
 	 * regular file, or empty directory.
 	 */
 	if (nameiop == RENAME && (flags & ISLASTCN)) {
 		if (flags & WILLBEDIR)
 			error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, curthread);
 		else
 			error = VOP_ACCESS(vdp, VWRITE, cred, curthread);
 		if (error)
 			return (error);
 		/*
 		 * Careful about locking second inode.
 		 * This can only occur if the target is ".".
 		 */
 		SET_I_OFFSET(dp, i_offset);
 		if (dp->i_number == ino)
 			return (EISDIR);
 		if (dd_ino != NULL)
 			return (0);
 		if ((error = VFS_VGET(vdp->v_mount, ino,
 		    LK_EXCLUSIVE, &tdp)) != 0)
 			return (error);
 
 		error = ufs_delete_denied(vdp, tdp, cred, curthread);
 		if (error) {
 			vput(tdp);
 			return (error);
 		}
 
 #ifdef SunOS_doesnt_do_that
 		/*
 		 * The only purpose of this check is to return the correct
 		 * error.  Assume that we want to rename directory "a"
 		 * to a file "b", and that we have no ACL_WRITE_DATA on
 		 * a containing directory, but we _do_ have ACL_APPEND_DATA. 
 		 * In that case, the VOP_ACCESS check above will return 0,
 		 * and the operation will fail with ENOTDIR instead
 		 * of EACCESS.
 		 */
 		if (tdp->v_type == VDIR)
 			error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, curthread);
 		else
 			error = VOP_ACCESS(vdp, VWRITE, cred, curthread);
 		if (error) {
 			vput(tdp);
 			return (error);
 		}
 #endif
 
 		*vpp = tdp;
-		cnp->cn_flags |= SAVENAME;
 		return (0);
 	}
 	if (dd_ino != NULL)
 		return (0);
 
 	/*
 	 * Step through the translation in the name.  We do not `vput' the
 	 * directory because we may need it again if a symbolic link
 	 * is relative to the current directory.  Instead we save it
 	 * unlocked as "pdp".  We must get the target inode before unlocking
 	 * the directory to insure that the inode will not be removed
 	 * before we get it.  We prevent deadlock by always fetching
 	 * inodes from the root, moving down the directory tree. Thus
 	 * when following backward pointers ".." we must unlock the
 	 * parent directory before getting the requested directory.
 	 * There is a potential race condition here if both the current
 	 * and parent directories are removed before the VFS_VGET for the
 	 * inode associated with ".." returns.  We hope that this occurs
 	 * infrequently since we cannot avoid this race condition without
 	 * implementing a sophisticated deadlock detection algorithm.
 	 * Note also that this simple deadlock detection scheme will not
 	 * work if the filesystem has any hard links other than ".."
 	 * that point backwards in the directory structure.
 	 */
 	pdp = vdp;
 	if (flags & ISDOTDOT) {
 		error = vn_vget_ino(pdp, ino, cnp->cn_lkflags, &tdp);
 		if (error)
 			return (error);
 
 		/*
 		 * Recheck that ".." entry in the vdp directory points
 		 * to the inode we looked up before vdp lock was
 		 * dropped.
 		 */
 		error = ufs_lookup_ino(pdp, NULL, cnp, &ino1);
 		if (error) {
 			vput(tdp);
 			return (error);
 		}
 		if (ino1 != ino) {
 			vput(tdp);
 			goto restart;
 		}
 
 		*vpp = tdp;
 	} else if (dp->i_number == ino) {
 		VREF(vdp);	/* we want ourself, ie "." */
 		/*
 		 * When we lookup "." we still can be asked to lock it
 		 * differently.
 		 */
 		ltype = cnp->cn_lkflags & LK_TYPE_MASK;
 		if (ltype != VOP_ISLOCKED(vdp)) {
 			if (ltype == LK_EXCLUSIVE)
 				vn_lock(vdp, LK_UPGRADE | LK_RETRY);
 			else /* if (ltype == LK_SHARED) */
 				vn_lock(vdp, LK_DOWNGRADE | LK_RETRY);
 			/*
 			 * Relock for the "." case may left us with
 			 * reclaimed vnode.
 			 */
 			if (VN_IS_DOOMED(vdp)) {
 				vrele(vdp);
 				return (ENOENT);
 			}
 		}
 		*vpp = vdp;
 	} else {
 		error = VFS_VGET(pdp->v_mount, ino, cnp->cn_lkflags, &tdp);
 		if (error == 0 && VTOI(tdp)->i_mode == 0) {
 			vgone(tdp);
 			vput(tdp);
 			error = ENOENT;
 		}
 		if (error)
 			return (error);
 		*vpp = tdp;
 	}
 
 	/*
 	 * Insert name into cache if appropriate.
 	 */
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(vdp, *vpp, cnp);
 	return (0);
 }
 
 void
 ufs_dirbad(struct inode *ip, doff_t offset, char *how)
 {
 	struct mount *mp;
 
 	mp = ITOV(ip)->v_mount;
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		panic("ufs_dirbad: %s: bad dir ino %ju at offset %ld: %s",
 		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
 		    (long)offset, how);
 	else
 		(void)printf("%s: bad dir ino %ju at offset %ld: %s\n",
 		    mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number,
 		    (long)offset, how);
 }
 
 /*
  * Do consistency checking on a directory entry:
  *	record length must be multiple of 4
  *	entry must fit in rest of its DIRBLKSIZ block
  *	record must be large enough to contain entry
  *	name is not longer than UFS_MAXNAMLEN
  *	name must be as long as advertised, and null terminated
  */
 int
 ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock)
 {
 	int i, namlen;
 
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (OFSFMT(dp))
 			namlen = ep->d_type;
 		else
 			namlen = ep->d_namlen;
 #	else
 		namlen = ep->d_namlen;
 #	endif
 	if ((ep->d_reclen & 0x3) != 0 ||
 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
 	    ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > UFS_MAXNAMLEN) {
 		/*return (1); */
 		printf("First bad\n");
 		goto bad;
 	}
 	if (ep->d_ino == 0)
 		return (0);
 	for (i = 0; i < namlen; i++)
 		if (ep->d_name[i] == '\0') {
 			/*return (1); */
 			printf("Second bad\n");
 			goto bad;
 		}
 	if (ep->d_name[i])
 		goto bad;
 	return (0);
 bad:
 	return (1);
 }
 
 /*
  * Construct a new directory entry after a call to namei, using the
  * parameters that it left in the componentname argument cnp. The
  * argument ip is the inode to which the new directory entry will refer.
  */
 void
 ufs_makedirentry(struct inode *ip, struct componentname *cnp,
     struct direct *newdirp)
 {
 	u_int namelen;
 
 	namelen = (unsigned)cnp->cn_namelen;
-	KASSERT((cnp->cn_flags & SAVENAME) != 0,
-		("ufs_makedirentry: missing name"));
 	KASSERT(namelen <= UFS_MAXNAMLEN,
 		("ufs_makedirentry: name too long"));
 	newdirp->d_ino = ip->i_number;
 	newdirp->d_namlen = namelen;
 
 	/* Zero out after-name padding */
 	*(u_int32_t *)(&newdirp->d_name[namelen & ~(DIR_ROUNDUP - 1)]) = 0;
 
 	bcopy(cnp->cn_nameptr, newdirp->d_name, namelen);
 
 	if (!OFSFMT(ITOV(ip)))
 		newdirp->d_type = IFTODT(ip->i_mode);
 	else {
 		newdirp->d_type = 0;
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
 			{ u_char tmp = newdirp->d_namlen;
 			newdirp->d_namlen = newdirp->d_type;
 			newdirp->d_type = tmp; }
 #		endif
 	}
 }
 
 /*
  * Write a directory entry after a call to namei, using the parameters
  * that it left in nameidata. The argument dirp is the new directory
  * entry contents. Dvp is a pointer to the directory to be written,
  * which was left locked by namei. Remaining parameters (dp->i_offset, 
  * dp->i_count) indicate how the space for the new entry is to be obtained.
  * Non-null bp indicates that a directory is being created (for the
  * soft dependency code).
  */
 int
 ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
     struct componentname *cnp, struct buf *newdirbp)
 {
 	struct ucred *cr;
 	struct thread *td;
 	int newentrysize;
 	struct inode *dp;
 	struct buf *bp;
 	u_int dsize;
 	struct direct *ep, *nep;
 	u_int64_t old_isize;
 	int error, ret, blkoff, loc, spacefree, flags, namlen;
 	char *dirbuf;
 
 	td = curthread;	/* XXX */
 	cr = td->td_ucred;
 
 	dp = VTOI(dvp);
 	newentrysize = DIRSIZ(OFSFMT(dvp), dirp);
 
 	if (I_COUNT(dp) == 0) {
 		/*
 		 * If dp->i_count is 0, then namei could find no
 		 * space in the directory. Here, dp->i_offset will
 		 * be on a directory block boundary and we will write the
 		 * new entry into a fresh block.
 		 */
 		if (I_OFFSET(dp) & (DIRBLKSIZ - 1))
 			panic("ufs_direnter: newblk");
 		flags = BA_CLRBUF;
 		if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp))
 			flags |= IO_SYNC;
 #ifdef QUOTA
 		if ((error = getinoquota(dp)) != 0) {
 			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
 				bdwrite(newdirbp);
 			return (error);
 		}
 #endif
 		old_isize = dp->i_size;
 		vnode_pager_setsize(dvp, (u_long)I_OFFSET(dp) + DIRBLKSIZ);
 		if ((error = UFS_BALLOC(dvp, (off_t)I_OFFSET(dp), DIRBLKSIZ,
 		    cr, flags, &bp)) != 0) {
 			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
 				bdwrite(newdirbp);
 			vnode_pager_setsize(dvp, (u_long)old_isize);
 			return (error);
 		}
 		dp->i_size = I_OFFSET(dp) + DIRBLKSIZ;
 		DIP_SET(dp, i_size, dp->i_size);
 		SET_I_ENDOFF(dp, dp->i_size);
 		UFS_INODE_SET_FLAG(dp, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 		dirp->d_reclen = DIRBLKSIZ;
 		blkoff = I_OFFSET(dp) &
 		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
 		bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);
 #ifdef UFS_DIRHASH
 		if (dp->i_dirhash != NULL) {
 			ufsdirhash_newblk(dp, I_OFFSET(dp));
 			ufsdirhash_add(dp, dirp, I_OFFSET(dp));
 			ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
 			    I_OFFSET(dp));
 		}
 #endif
 		if (DOINGSOFTDEP(dvp)) {
 			/*
 			 * Ensure that the entire newly allocated block is a
 			 * valid directory so that future growth within the
 			 * block does not have to ensure that the block is
 			 * written before the inode.
 			 */
 			blkoff += DIRBLKSIZ;
 			while (blkoff < bp->b_bcount) {
 				((struct direct *)
 				   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
 				blkoff += DIRBLKSIZ;
 			}
 			if (softdep_setup_directory_add(bp, dp, I_OFFSET(dp),
 			    dirp->d_ino, newdirbp, 1))
 				UFS_INODE_SET_FLAG(dp, IN_NEEDSYNC);
 			if (newdirbp)
 				bdwrite(newdirbp);
 			bdwrite(bp);
 			return (UFS_UPDATE(dvp, 0));
 		}
 		if (DOINGASYNC(dvp)) {
 			bdwrite(bp);
 			return (UFS_UPDATE(dvp, 0));
 		}
 		error = bwrite(bp);
 		ret = UFS_UPDATE(dvp, 1);
 		if (error == 0)
 			return (ret);
 		return (error);
 	}
 
 	/*
 	 * If dp->i_count is non-zero, then namei found space for the new
 	 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
 	 * in the directory. To use this space, we may have to compact
 	 * the entries located there, by copying them together towards the
 	 * beginning of the block, leaving the free space in one usable
 	 * chunk at the end.
 	 */
 
 	/*
 	 * Increase size of directory if entry eats into new space.
 	 * This should never push the size past a new multiple of
 	 * DIRBLKSIZE.
 	 *
 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
 	 */
 	if (I_OFFSET(dp) + I_COUNT(dp) > dp->i_size) {
 		dp->i_size = I_OFFSET(dp) + I_COUNT(dp);
 		DIP_SET(dp, i_size, dp->i_size);
 		UFS_INODE_SET_FLAG(dp, IN_SIZEMOD | IN_MODIFIED);
 	}
 	/*
 	 * Get the block containing the space for the new directory entry.
 	 */
 	error = UFS_BLKATOFF(dvp, (off_t)I_OFFSET(dp), &dirbuf, &bp);
 	if (error) {
 		if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
 			bdwrite(newdirbp);
 		return (error);
 	}
 	/*
 	 * Find space for the new entry. In the simple case, the entry at
 	 * offset base will have the space. If it does not, then namei
 	 * arranged that compacting the region dp->i_offset to
 	 * dp->i_offset + dp->i_count would yield the space.
 	 */
 	ep = (struct direct *)dirbuf;
 	dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0;
 	spacefree = ep->d_reclen - dsize;
 	for (loc = ep->d_reclen; loc < I_COUNT(dp); ) {
 		nep = (struct direct *)(dirbuf + loc);
 
 		/* Trim the existing slot (NB: dsize may be zero). */
 		ep->d_reclen = dsize;
 		ep = (struct direct *)((char *)ep + dsize);
 
 		/* Read nep->d_reclen now as the bcopy() may clobber it. */
 		loc += nep->d_reclen;
 		if (nep->d_ino == 0) {
 			/*
 			 * A mid-block unused entry. Such entries are
 			 * never created by the kernel, but fsck_ffs
 			 * can create them (and it doesn't fix them).
 			 *
 			 * Add up the free space, and initialise the
 			 * relocated entry since we don't bcopy it.
 			 */
 			spacefree += nep->d_reclen;
 			ep->d_ino = 0;
 			dsize = 0;
 			continue;
 		}
 		dsize = DIRSIZ(OFSFMT(dvp), nep);
 		spacefree += nep->d_reclen - dsize;
 #ifdef UFS_DIRHASH
 		if (dp->i_dirhash != NULL)
 			ufsdirhash_move(dp, nep,
 			    I_OFFSET(dp) + ((char *)nep - dirbuf),
 			    I_OFFSET(dp) + ((char *)ep - dirbuf));
 #endif
 		if (DOINGSOFTDEP(dvp))
 			softdep_change_directoryentry_offset(bp, dp, dirbuf,
 			    (caddr_t)nep, (caddr_t)ep, dsize); 
 		else
 			bcopy((caddr_t)nep, (caddr_t)ep, dsize);
 	}
 	/*
 	 * Here, `ep' points to a directory entry containing `dsize' in-use
 	 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0,
 	 * then the entry is completely unused (dsize == 0). The value
 	 * of ep->d_reclen is always indeterminate.
 	 *
 	 * Update the pointer fields in the previous entry (if any),
 	 * copy in the new entry, and write out the block.
 	 */
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (OFSFMT(dvp))
 			namlen = ep->d_type;
 		else
 			namlen = ep->d_namlen;
 #	else
 		namlen = ep->d_namlen;
 #	endif
 	if (ep->d_ino == 0 ||
 	    (ep->d_ino == UFS_WINO && namlen == dirp->d_namlen &&
 	     bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
 		if (spacefree + dsize < newentrysize)
 			panic("ufs_direnter: compact1");
 		dirp->d_reclen = spacefree + dsize;
 	} else {
 		if (spacefree < newentrysize)
 			panic("ufs_direnter: compact2");
 		dirp->d_reclen = spacefree;
 		ep->d_reclen = dsize;
 		ep = (struct direct *)((char *)ep + dsize);
 	}
 #ifdef UFS_DIRHASH
 	if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
 	    dirp->d_reclen == spacefree))
 		ufsdirhash_add(dp, dirp, I_OFFSET(dp) + ((char *)ep - dirbuf));
 #endif
 	bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
 #ifdef UFS_DIRHASH
 	if (dp->i_dirhash != NULL)
 		ufsdirhash_checkblock(dp, dirbuf -
 		    (I_OFFSET(dp) & (DIRBLKSIZ - 1)),
 		    rounddown2(I_OFFSET(dp), DIRBLKSIZ));
 #endif
 
 	if (DOINGSOFTDEP(dvp)) {
 		(void) softdep_setup_directory_add(bp, dp,
 		    I_OFFSET(dp) + (caddr_t)ep - dirbuf,
 		    dirp->d_ino, newdirbp, 0);
 		if (newdirbp != NULL)
 			bdwrite(newdirbp);
 		bdwrite(bp);
 	} else {
 		if (DOINGASYNC(dvp)) {
 			bdwrite(bp);
 			error = 0;
 		} else {
 			error = bwrite(bp);
 		}
 	}
 
 	/*
 	 * If all went well, and the directory can be shortened,
 	 * mark directory inode with the truncation request.
 	 */
 	UFS_INODE_SET_FLAG(dp, IN_CHANGE | IN_UPDATE | (error == 0 &&
 	    I_ENDOFF(dp) != 0 && I_ENDOFF(dp) < dp->i_size ? IN_ENDOFF : 0));
 
 	return (error);
 }
 
 /*
  * Remove a directory entry after a call to namei, using
  * the parameters which it left in nameidata. The entry
  * dp->i_offset contains the offset into the directory of the
  * entry to be eliminated.  The dp->i_count field contains the
  * size of the previous record in the directory.  If this
  * is 0, the first entry is being deleted, so we need only
  * zero the inode number to mark the entry as free.  If the
  * entry is not the first in the directory, we must reclaim
  * the space of the now empty record by adding the record size
  * to the size of the previous entry.
  */
 int
 ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
 {
 	struct inode *dp;
 	struct direct *ep, *rep;
 	struct buf *bp;
 	off_t offset;
 	int error;
 
 	dp = VTOI(dvp);
 
 	/*
 	 * Adjust the link count early so softdep can block if necessary.
 	 */
 	if (ip) {
 		ip->i_effnlink--;
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (DOINGSOFTDEP(dvp)) {
 			softdep_setup_unlink(dp, ip);
 		} else {
 			ip->i_nlink--;
 			DIP_SET(ip, i_nlink, ip->i_nlink);
 			UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		}
 	}
 	if (flags & DOWHITEOUT)
 		offset = I_OFFSET(dp);
 	else
 		offset = I_OFFSET(dp) - I_COUNT(dp);
 	if ((error = UFS_BLKATOFF(dvp, offset, (char **)&ep, &bp)) != 0) {
 		if (ip) {
 			ip->i_effnlink++;
 			UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 			if (DOINGSOFTDEP(dvp)) {
 				softdep_change_linkcnt(ip);
 			} else {
 				ip->i_nlink++;
 				DIP_SET(ip, i_nlink, ip->i_nlink);
 				UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 			}
 		}
 		return (error);
 	}
 	if (flags & DOWHITEOUT) {
 		/*
 		 * Whiteout entry: set d_ino to UFS_WINO.
 		 */
 		ep->d_ino = UFS_WINO;
 		ep->d_type = DT_WHT;
 		goto out;
 	}
 	/* Set 'rep' to the entry being removed. */
 	if (I_COUNT(dp) == 0)
 		rep = ep;
 	else
 		rep = (struct direct *)((char *)ep + ep->d_reclen);
 #ifdef UFS_DIRHASH
 	/*
 	 * Remove the dirhash entry. This is complicated by the fact
 	 * that `ep' is the previous entry when dp->i_count != 0.
 	 */
 	if (dp->i_dirhash != NULL)
 		ufsdirhash_remove(dp, rep, I_OFFSET(dp));
 #endif
 	if (ip && rep->d_ino != ip->i_number)
 		panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n",
 		    (uintmax_t)ip->i_number, (uintmax_t)rep->d_ino);
 	/*
 	 * Zero out the file directory entry metadata to reduce disk
 	 * scavenging disclosure.
 	 */
 	bzero(&rep->d_name[0], rep->d_namlen);
 	rep->d_namlen = 0;
 	rep->d_type = 0;
 	rep->d_ino = 0;
 
 	if (I_COUNT(dp) != 0) {
 		/*
 		 * Collapse new free space into previous entry.
 		 */
 		ep->d_reclen += rep->d_reclen;
 		rep->d_reclen = 0;
 	}
 #ifdef UFS_DIRHASH
 	if (dp->i_dirhash != NULL)
 		ufsdirhash_checkblock(dp, (char *)ep -
 		    ((I_OFFSET(dp) - I_COUNT(dp)) & (DIRBLKSIZ - 1)),
 		    rounddown2(I_OFFSET(dp), DIRBLKSIZ));
 #endif
 out:
 	error = 0;
 	if (DOINGSOFTDEP(dvp)) {
 		if (ip)
 			softdep_setup_remove(bp, dp, ip, isrmdir);
 		if (softdep_slowdown(dvp))
 			error = bwrite(bp);
 		else
 			bdwrite(bp);
 	} else {
 		if (flags & DOWHITEOUT)
 			error = bwrite(bp);
 		else if (DOINGASYNC(dvp))
 			bdwrite(bp);
 		else
 			error = bwrite(bp);
 	}
 	UFS_INODE_SET_FLAG(dp, IN_CHANGE | IN_UPDATE);
 	/*
 	 * If the last named reference to a snapshot goes away,
 	 * drop its snapshot reference so that it will be reclaimed
 	 * when last open reference goes away.
 	 */
 	if (ip != NULL && IS_SNAPSHOT(ip) && ip->i_effnlink == 0)
 		UFS_SNAPGONE(ip);
 	return (error);
 }
 
 /*
  * Rewrite an existing directory entry to point at the inode
  * supplied.  The parameters describing the directory entry are
  * set up by a call to namei.
  */
 int
 ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
     int isrmdir)
 {
 	struct buf *bp;
 	struct direct *ep;
 	struct vnode *vdp = ITOV(dp);
 	int error;
 
 	/*
 	 * Drop the link before we lock the buf so softdep can block if
 	 * necessary.
 	 */
 	oip->i_effnlink--;
 	UFS_INODE_SET_FLAG(oip, IN_CHANGE);
 	if (DOINGSOFTDEP(vdp)) {
 		softdep_setup_unlink(dp, oip);
 	} else {
 		oip->i_nlink--;
 		DIP_SET(oip, i_nlink, oip->i_nlink);
 		UFS_INODE_SET_FLAG(oip, IN_CHANGE);
 	}
 
 	error = UFS_BLKATOFF(vdp, (off_t)I_OFFSET(dp), (char **)&ep, &bp);
 	if (error == 0 && ep->d_namlen == 2 && ep->d_name[1] == '.' &&
 	    ep->d_name[0] == '.' && ep->d_ino != oip->i_number) {
 		brelse(bp);
 		error = EIDRM;
 	}
 	if (error) {
 		oip->i_effnlink++;
 		UFS_INODE_SET_FLAG(oip, IN_CHANGE);
 		if (DOINGSOFTDEP(vdp)) {
 			softdep_change_linkcnt(oip);
 		} else {
 			oip->i_nlink++;
 			DIP_SET(oip, i_nlink, oip->i_nlink);
 			UFS_INODE_SET_FLAG(oip, IN_CHANGE);
 		}
 		return (error);
 	}
 	ep->d_ino = newinum;
 	if (!OFSFMT(vdp))
 		ep->d_type = newtype;
 	if (DOINGSOFTDEP(vdp)) {
 		softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
 		bdwrite(bp);
 	} else {
 		if (DOINGASYNC(vdp)) {
 			bdwrite(bp);
 			error = 0;
 		} else {
 			error = bwrite(bp);
 		}
 	}
 	UFS_INODE_SET_FLAG(dp, IN_CHANGE | IN_UPDATE);
 	/*
 	 * If the last named reference to a snapshot goes away,
 	 * drop its snapshot reference so that it will be reclaimed
 	 * when last open reference goes away.
 	 */
 	if (IS_SNAPSHOT(oip) && oip->i_effnlink == 0)
 		UFS_SNAPGONE(oip);
 	return (error);
 }
 
 /*
  * Check if a directory is empty or not.
  * Inode supplied must be locked.
  *
  * Using a struct dirtemplate here is not precisely
  * what we want, but better than using a struct direct.
  *
  * NB: does not handle corrupted directories.
  */
 int
 ufs_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred)
 {
 	doff_t off;
 	struct dirtemplate dbuf;
 	struct direct *dp = (struct direct *)&dbuf;
 	int error, namlen;
 	ssize_t count;
 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
 
 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ,
 		    off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred,
 		    NOCRED, &count, (struct thread *)0);
 		/*
 		 * Since we read MINDIRSIZ, residual must
 		 * be 0 unless we're at end of file.
 		 */
 		if (error || count != 0)
 			return (0);
 		/* avoid infinite loops */
 		if (dp->d_reclen == 0)
 			return (0);
 		/* skip empty entries */
 		if (dp->d_ino == 0 || dp->d_ino == UFS_WINO)
 			continue;
 		/* accept only "." and ".." */
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
 			if (OFSFMT(ITOV(ip)))
 				namlen = dp->d_type;
 			else
 				namlen = dp->d_namlen;
 #		else
 			namlen = dp->d_namlen;
 #		endif
 		if (namlen > 2)
 			return (0);
 		if (dp->d_name[0] != '.')
 			return (0);
 		/*
 		 * At this point namlen must be 1 or 2.
 		 * 1 implies ".", 2 implies ".." if second
 		 * char is also "."
 		 */
 		if (namlen == 1 && dp->d_ino == ip->i_number)
 			continue;
 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
 			continue;
 		return (0);
 	}
 	return (1);
 }
 
 static int
 ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino,
     struct vnode **dd_vp)
 {
 	struct dirtemplate dirbuf;
 	struct vnode *ddvp;
 	int error, namlen;
 
 	ASSERT_VOP_LOCKED(vp, "ufs_dir_dd_ino");
 	*dd_vp = NULL;
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 	/*
 	 * First check to see if we have it in the name cache.
 	 */
 	if ((ddvp = vn_dir_dd_ino(vp)) != NULL) {
 		KASSERT(ddvp->v_mount == vp->v_mount,
 		    ("ufs_dir_dd_ino: Unexpected mount point crossing"));
 		*dd_ino = VTOI(ddvp)->i_number;
 		*dd_vp = ddvp;
 		return (0);
 	}
 	/*
 	 * Have to read the directory.
 	 */
 	error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
 	    sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
 	    IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL, NULL);
 	if (error != 0)
 		return (error);
 #if (BYTE_ORDER == LITTLE_ENDIAN)
 	if (OFSFMT(vp))
 		namlen = dirbuf.dotdot_type;
 	else
 		namlen = dirbuf.dotdot_namlen;
 #else
 	namlen = dirbuf.dotdot_namlen;
 #endif
 	if (namlen != 2 || dirbuf.dotdot_name[0] != '.' ||
 	    dirbuf.dotdot_name[1] != '.')
 		return (ENOTDIR);
 	*dd_ino = dirbuf.dotdot_ino;
 	return (0);
 }
 
 /*
  * Check if source directory is in the path of the target directory.
  */
 int
 ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target,
     struct ucred *cred, ino_t *wait_ino)
 {
 	struct mount *mp;
 	struct vnode *tvp, *vp, *vp1;
 	int error;
 	ino_t dd_ino;
 
 	vp = tvp = ITOV(target);
 	mp = vp->v_mount;
 	*wait_ino = 0;
 	sx_assert(&VFSTOUFS(mp)->um_checkpath_lock, SA_XLOCKED);
 
 	if (target->i_number == source_ino)
 		return (EEXIST);
 	if (target->i_number == parent_ino)
 		return (0);
 	if (target->i_number == UFS_ROOTINO)
 		return (0);
 	for (;;) {
 		error = ufs_dir_dd_ino(vp, cred, &dd_ino, &vp1);
 		if (error != 0)
 			break;
 		if (dd_ino == source_ino) {
 			error = EINVAL;
 			break;
 		}
 		if (dd_ino == UFS_ROOTINO)
 			break;
 		if (dd_ino == parent_ino)
 			break;
 		if (vp1 == NULL) {
 			error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT,
 			    &vp1);
 			if (error != 0) {
 				*wait_ino = dd_ino;
 				break;
 			}
 		}
 		KASSERT(dd_ino == VTOI(vp1)->i_number,
 		    ("directory %ju reparented\n",
 		    (uintmax_t)VTOI(vp1)->i_number));
 		if (vp != tvp)
 			vput(vp);
 		vp = vp1;
 	}
 
 	if (error == ENOTDIR)
 		panic("checkpath: .. not a directory\n");
 	if (vp1 != NULL)
 		vput(vp1);
 	if (vp != tvp)
 		vput(vp);
 	return (error);
 }
 
 #ifdef DIAGNOSTIC
 static void
 ufs_assert_inode_offset_owner(struct inode *ip, struct iown_tracker *tr,
     const char *name, const char *file, int line)
 {
 	char msg[128];
 
 	snprintf(msg, sizeof(msg), "at %s@%d", file, line);
 	ASSERT_VOP_ELOCKED(ITOV(ip), msg);
 	MPASS((ip->i_mode & IFMT) == IFDIR);
 	if (curthread == tr->tr_owner && ip->i_lock_gen == tr->tr_gen)
 		return;
 	printf("locked at\n");
 	stack_print(&tr->tr_st);
 	printf("unlocked at\n");
 	stack_print(&tr->tr_unlock);
 	panic("%s ip %p %jd offset owner %p %d gen %d "
 	    "curthread %p %d gen %d at %s@%d\n",
 	    name, ip, (uintmax_t)ip->i_number, tr->tr_owner,
 	    tr->tr_owner->td_tid, tr->tr_gen,
 	    curthread, curthread->td_tid, ip->i_lock_gen,
 	    file, line);
 }
 
 static void
 ufs_set_inode_offset_owner(struct inode *ip, struct iown_tracker *tr,
     const char *file, int line)
 {
 	char msg[128];
 
 	snprintf(msg, sizeof(msg), "at %s@%d", file, line);
 	ASSERT_VOP_ELOCKED(ITOV(ip), msg);
 	MPASS((ip->i_mode & IFMT) == IFDIR);
 	tr->tr_owner = curthread;
 	tr->tr_gen = ip->i_lock_gen;
 	stack_save(&tr->tr_st);
 }
 
 static void
 ufs_init_one_tracker(struct iown_tracker *tr)
 {
 	tr->tr_owner = NULL;
 	stack_zero(&tr->tr_st);
 }
 
 void
 ufs_init_trackers(struct inode *ip)
 {
 	ufs_init_one_tracker(&ip->i_offset_tracker);
 	ufs_init_one_tracker(&ip->i_count_tracker);
 	ufs_init_one_tracker(&ip->i_endoff_tracker);
 }
 
 void
 ufs_unlock_tracker(struct inode *ip)
 {
 	if (ip->i_count_tracker.tr_gen == ip->i_lock_gen)
 		stack_save(&ip->i_count_tracker.tr_unlock);
 	if (ip->i_offset_tracker.tr_gen == ip->i_lock_gen)
 		stack_save(&ip->i_offset_tracker.tr_unlock);
 	if (ip->i_endoff_tracker.tr_gen == ip->i_lock_gen)
 		stack_save(&ip->i_endoff_tracker.tr_unlock);
 	ip->i_lock_gen++;
 }
 
 doff_t
 ufs_get_i_offset(struct inode *ip, const char *file, int line)
 {
 	ufs_assert_inode_offset_owner(ip, &ip->i_offset_tracker, "i_offset",
 	    file, line);
 	return (ip->i_offset);
 }
 
 void
 ufs_set_i_offset(struct inode *ip, doff_t off, const char *file, int line)
 {
 	ufs_set_inode_offset_owner(ip, &ip->i_offset_tracker, file, line);
 	ip->i_offset = off;
 }
 
 int32_t
 ufs_get_i_count(struct inode *ip, const char *file, int line)
 {
 	ufs_assert_inode_offset_owner(ip, &ip->i_count_tracker, "i_count",
 	    file, line);
 	return (ip->i_count);
 }
 
 void
 ufs_set_i_count(struct inode *ip, int32_t cnt, const char *file, int line)
 {
 	ufs_set_inode_offset_owner(ip, &ip->i_count_tracker, file, line);
 	ip->i_count = cnt;
 }
 
 doff_t
 ufs_get_i_endoff(struct inode *ip, const char *file, int line)
 {
 	ufs_assert_inode_offset_owner(ip, &ip->i_endoff_tracker, "i_endoff",
 	    file, line);
 	return (ip->i_endoff);
 }
 
 void
 ufs_set_i_endoff(struct inode *ip, doff_t off, const char *file, int line)
 {
 	ufs_set_inode_offset_owner(ip, &ip->i_endoff_tracker, file, line);
 	ip->i_endoff = off;
 }
 
 #endif
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index af01fb37fe53..cf101ae50750 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,3079 +1,3059 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_quota.h"
 #include "opt_suiddir.h"
 #include "opt_ufs.h"
 #include "opt_ffs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/stat.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
 #include <sys/refcount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/lockf.h>
 #include <sys/conf.h>
 #include <sys/acl.h>
 #include <sys/smr.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <sys/file.h>		/* XXX */
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <ufs/ufs/acl.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 #ifdef UFS_DIRHASH
 #include <ufs/ufs/dirhash.h>
 #endif
 #ifdef UFS_GJOURNAL
 #include <ufs/ufs/gjournal.h>
 FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS");
 #endif
 
 #ifdef QUOTA
 FEATURE(ufs_quota, "UFS disk quotas support");
 FEATURE(ufs_quota64, "64bit UFS disk quotas support");
 #endif
 
 #ifdef SUIDDIR
 FEATURE(suiddir,
     "Give all new files in directory the same ownership as the directory");
 #endif
 
 VFS_SMR_DECLARE;
 
 #include <ufs/ffs/ffs_extern.h>
 
 static vop_accessx_t	ufs_accessx;
 static vop_fplookup_vexec_t ufs_fplookup_vexec;
 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *,
     struct thread *);
 static vop_close_t	ufs_close;
 static vop_create_t	ufs_create;
 static vop_stat_t	ufs_stat;
 static vop_getattr_t	ufs_getattr;
 static vop_ioctl_t	ufs_ioctl;
 static vop_link_t	ufs_link;
 static int ufs_makeinode(int mode, struct vnode *, struct vnode **,
     struct componentname *, const char *);
 static vop_mmapped_t	ufs_mmapped;
 static vop_mkdir_t	ufs_mkdir;
 static vop_mknod_t	ufs_mknod;
 static vop_open_t	ufs_open;
 static vop_pathconf_t	ufs_pathconf;
 static vop_print_t	ufs_print;
 static vop_readlink_t	ufs_readlink;
 static vop_remove_t	ufs_remove;
 static vop_rename_t	ufs_rename;
 static vop_rmdir_t	ufs_rmdir;
 static vop_setattr_t	ufs_setattr;
 static vop_strategy_t	ufs_strategy;
 static vop_symlink_t	ufs_symlink;
 static vop_whiteout_t	ufs_whiteout;
 static vop_close_t	ufsfifo_close;
 
 SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
     "UFS filesystem");
 
 /*
  * A virgin directory (no blushing please).
  */
 static struct dirtemplate mastertemplate = {
 	0, 12, DT_DIR, 1, ".",
 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
 };
 static struct odirtemplate omastertemplate = {
 	0, 12, 1, ".",
 	0, DIRBLKSIZ - 12, 2, ".."
 };
 
 static void
 ufs_itimes_locked(struct vnode *vp)
 {
 	struct inode *ip;
 	struct timespec ts;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 
 	ip = VTOI(vp);
 	if (UFS_RDONLY(ip))
 		goto out;
 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
 		return;
 
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
 		UFS_INODE_SET_FLAG(ip, IN_LAZYMOD);
 	else if (((vp->v_mount->mnt_kern_flag &
 		    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
 		    (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
 		UFS_INODE_SET_FLAG(ip, IN_MODIFIED);
 	else if (ip->i_flag & IN_ACCESS)
 		UFS_INODE_SET_FLAG(ip, IN_LAZYACCESS);
 	vfs_timestamp(&ts);
 	if (ip->i_flag & IN_ACCESS) {
 		DIP_SET(ip, i_atime, ts.tv_sec);
 		DIP_SET(ip, i_atimensec, ts.tv_nsec);
 	}
 	if (ip->i_flag & IN_UPDATE) {
 		DIP_SET(ip, i_mtime, ts.tv_sec);
 		DIP_SET(ip, i_mtimensec, ts.tv_nsec);
 	}
 	if (ip->i_flag & IN_CHANGE) {
 		DIP_SET(ip, i_ctime, ts.tv_sec);
 		DIP_SET(ip, i_ctimensec, ts.tv_nsec);
 		DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1);
 	}
 
  out:
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
 }
 
 void
 ufs_itimes(struct vnode *vp)
 {
 	struct inode *ip;
 
 	ip = VTOI(vp);
 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
 		return;
 
 	VI_LOCK(vp);
 	ufs_itimes_locked(vp);
 	VI_UNLOCK(vp);
 }
 
 static int
 ufs_sync_nlink1(struct mount *mp)
 {
 	int error;
 
 	error = vfs_busy(mp, 0);
 	if (error == 0) {
 		VFS_SYNC(mp, MNT_WAIT);
 		vfs_unbusy(mp);
 		error = ERELOOKUP;
 	}
 	vfs_rel(mp);
 	return (error);
 }
 
 static int
 ufs_sync_nlink(struct vnode *vp, struct vnode *vp1)
 {
 	struct inode *ip;
 	struct mount *mp;
 	int error;
 
 	ip = VTOI(vp);
 	if (ip->i_nlink < UFS_LINK_MAX)
 		return (0);
 	if (!DOINGSOFTDEP(vp) || ip->i_effnlink >= UFS_LINK_MAX)
 		return (EMLINK);
 
 	mp = vp->v_mount;
 	vfs_ref(mp);
 	VOP_UNLOCK(vp);
 	if (vp1 != NULL)
 		VOP_UNLOCK(vp1);
 	error = ufs_sync_nlink1(mp);
 	vn_lock_pair(vp, false, vp1, false);
 	return (error);
 }
 
 /*
  * Create a regular file
  */
 static int
 ufs_create(
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap)
 {
 	int error;
 
 	error =
 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
 	    ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create");
 	if (error != 0)
 		return (error);
 	if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
 	return (0);
 }
 
 /*
  * Mknod vnode call
  */
 /* ARGSUSED */
 static int
 ufs_mknod(
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap)
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp = ap->a_vpp;
 	struct inode *ip;
 	ino_t ino;
 	int error;
 
 	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
 	    ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod");
 	if (error)
 		return (error);
 	ip = VTOI(*vpp);
 	UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
 	if (vap->va_rdev != VNOVAL) {
 		/*
 		 * Want to be able to use this to make badblock
 		 * inodes, so don't truncate the dev number.
 		 */
 		DIP_SET(ip, i_rdev, vap->va_rdev);
 	}
 	/*
 	 * Remove inode, then reload it through VFS_VGET().  This is
 	 * needed to do further inode initialization, for instance
 	 * fifo, which was too early for VFS_VGET() done as part of
 	 * UFS_VALLOC().
 	 */
 	(*vpp)->v_type = VNON;
 	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
 	vgone(*vpp);
 	vput(*vpp);
 	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
 	if (error) {
 		*vpp = NULL;
 		return (error);
 	}
 	return (0);
 }
 
 /*
  * Open called.
  */
 /* ARGSUSED */
 static int
 ufs_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip;
 
 	if (vp->v_type == VCHR || vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	ip = VTOI(vp);
 	vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
 	if (vp->v_type == VREG && (vn_irflag_read(vp) & VIRF_PGREAD) == 0 &&
 	    ip->i_ump->um_bsize >= PAGE_SIZE) {
 		vn_irflag_set_cond(vp, VIRF_PGREAD);
 	}
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 	if ((ip->i_flags & APPEND) &&
 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
 		return (EPERM);
 
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 static int
 ufs_close(
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	ufs_itimes(vp);
 	return (0);
 }
 
 static int
 ufs_accessx(
 	struct vop_accessx_args /* {
 		struct vnode *a_vp;
 		accmode_t a_accmode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	accmode_t accmode = ap->a_accmode;
 	int error;
 #ifdef UFS_ACL
 	struct acl *acl;
 	acl_type_t type;
 #endif
 
 	/*
 	 * Disallow write attempts on read-only filesystems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the filesystem.
 	 */
 	if (accmode & VMODIFY_PERMS) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			/*
 			 * Inode is accounted in the quotas only if struct
 			 * dquot is attached to it. VOP_ACCESS() is called
 			 * from vn_open_cred() and provides a convenient
 			 * point to call getinoquota().  The lock mode is
 			 * exclusive when the file is opening for write.
 			 */
 			if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
 				error = getinoquota(ip);
 				if (error != 0)
 					return (error);
 			}
 #endif
 			break;
 		default:
 			break;
 		}
 	}
 
 	/*
 	 * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
 	 * permits the owner of the file to remove the IMMUTABLE flag.
 	 */
 	if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
 	    (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
 		return (EPERM);
 
 #ifdef UFS_ACL
 	if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) {
 		if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
 			type = ACL_TYPE_NFS4;
 		else
 			type = ACL_TYPE_ACCESS;
 
 		acl = acl_alloc(M_WAITOK);
 		if (type == ACL_TYPE_NFS4)
 			error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td);
 		else
 			error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td);
 		switch (error) {
 		case 0:
 			if (type == ACL_TYPE_NFS4) {
 				error = vaccess_acl_nfs4(vp->v_type, ip->i_uid,
 				    ip->i_gid, acl, accmode, ap->a_cred);
 			} else {
 				error = vfs_unixify_accmode(&accmode);
 				if (error == 0)
 					error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
 					    ip->i_gid, acl, accmode, ap->a_cred);
 			}
 			break;
 		default:
 			if (error != EOPNOTSUPP)
 				printf(
 "ufs_accessx(): Error retrieving ACL on object (%d).\n",
 				    error);
 			/*
 			 * XXX: Fall back until debugged.  Should
 			 * eventually possibly log an error, and return
 			 * EPERM for safety.
 			 */
 			error = vfs_unixify_accmode(&accmode);
 			if (error == 0)
 				error = vaccess(vp->v_type, ip->i_mode,
 				    ip->i_uid, ip->i_gid, accmode, ap->a_cred);
 		}
 		acl_free(acl);
 
 		return (error);
 	}
 #endif /* !UFS_ACL */
 	error = vfs_unixify_accmode(&accmode);
 	if (error == 0)
 		error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
 		    accmode, ap->a_cred);
 	return (error);
 }
 
 /*
  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
  * the comment above cache_fplookup for details.
  */
 static int
 ufs_fplookup_vexec(
 	struct vop_fplookup_vexec_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 	struct ucred *cred;
 	mode_t all_x, mode;
 
 	vp = ap->a_vp;
 	ip = VTOI_SMR(vp);
 	if (__predict_false(ip == NULL))
 		return (EAGAIN);
 
 	/*
 	 * XXX ACL race
 	 *
 	 * ACLs are not supported and UFS clears/sets this flag on mount and
 	 * remount. However, we may still be racing with seeing them and there
 	 * is no provision to make sure they were accounted for. This matches
 	 * the behavior of the locked case, since the lookup there is also
 	 * racy: mount takes no measures to block anyone from progressing.
 	 */
 	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
 	mode = atomic_load_short(&ip->i_mode);
 	if (__predict_true((mode & all_x) == all_x))
 		return (0);
 
 	cred = ap->a_cred;
 	return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred));
 }
 
 /* ARGSUSED */
 static int
 ufs_stat(struct vop_stat_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct stat *sb = ap->a_sb;
 	int error;
 
 	error = vop_stat_helper_pre(ap);
 	if (__predict_false(error))
 		return (error);
 
 	VI_LOCK(vp);
 	ufs_itimes_locked(vp);
 	if (I_IS_UFS1(ip)) {
 		sb->st_atim.tv_sec = ip->i_din1->di_atime;
 		sb->st_atim.tv_nsec = ip->i_din1->di_atimensec;
 	} else {
 		sb->st_atim.tv_sec = ip->i_din2->di_atime;
 		sb->st_atim.tv_nsec = ip->i_din2->di_atimensec;
 	}
 	VI_UNLOCK(vp);
 
 	sb->st_dev = dev2udev(ITOUMP(ip)->um_dev);
 	sb->st_ino = ip->i_number;
 	sb->st_mode = (ip->i_mode & ~IFMT) | VTTOIF(vp->v_type);
 	sb->st_nlink = ip->i_effnlink;
 	sb->st_uid = ip->i_uid;
 	sb->st_gid = ip->i_gid;
 	if (I_IS_UFS1(ip)) {
 		sb->st_rdev = ip->i_din1->di_rdev;
 		sb->st_size = ip->i_din1->di_size;
 		sb->st_mtim.tv_sec = ip->i_din1->di_mtime;
 		sb->st_mtim.tv_nsec = ip->i_din1->di_mtimensec;
 		sb->st_ctim.tv_sec = ip->i_din1->di_ctime;
 		sb->st_ctim.tv_nsec = ip->i_din1->di_ctimensec;
 		sb->st_birthtim.tv_sec = -1;
 		sb->st_birthtim.tv_nsec = 0;
 		sb->st_blocks = dbtob((u_quad_t)ip->i_din1->di_blocks) / S_BLKSIZE;
 	} else {
 		sb->st_rdev = ip->i_din2->di_rdev;
 		sb->st_size = ip->i_din2->di_size;
 		sb->st_mtim.tv_sec = ip->i_din2->di_mtime;
 		sb->st_mtim.tv_nsec = ip->i_din2->di_mtimensec;
 		sb->st_ctim.tv_sec = ip->i_din2->di_ctime;
 		sb->st_ctim.tv_nsec = ip->i_din2->di_ctimensec;
 		sb->st_birthtim.tv_sec = ip->i_din2->di_birthtime;
 		sb->st_birthtim.tv_nsec = ip->i_din2->di_birthnsec;
 		sb->st_blocks = dbtob((u_quad_t)ip->i_din2->di_blocks) / S_BLKSIZE;
 	}
 
 	sb->st_blksize = max(PAGE_SIZE, vp->v_mount->mnt_stat.f_iosize);
 	sb->st_flags = ip->i_flags;
 	sb->st_gen = ip->i_gen;
 
 	return (vop_stat_helper_post(ap, error));
 }
 
 /* ARGSUSED */
 static int
 ufs_getattr(
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct vattr *vap = ap->a_vap;
 
 	VI_LOCK(vp);
 	ufs_itimes_locked(vp);
 	if (I_IS_UFS1(ip)) {
 		vap->va_atime.tv_sec = ip->i_din1->di_atime;
 		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
 	} else {
 		vap->va_atime.tv_sec = ip->i_din2->di_atime;
 		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
 	}
 	VI_UNLOCK(vp);
 	/*
 	 * Copy from inode table
 	 */
 	vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode & ~IFMT;
 	vap->va_nlink = ip->i_effnlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	if (I_IS_UFS1(ip)) {
 		vap->va_rdev = ip->i_din1->di_rdev;
 		vap->va_size = ip->i_din1->di_size;
 		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
 		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
 		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
 		vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
 		vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
 		vap->va_filerev = ip->i_din1->di_modrev;
 	} else {
 		vap->va_rdev = ip->i_din2->di_rdev;
 		vap->va_size = ip->i_din2->di_size;
 		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
 		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
 		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
 		vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
 		vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
 		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
 		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
 		vap->va_filerev = ip->i_din2->di_modrev;
 	}
 	vap->va_flags = ip->i_flags;
 	vap->va_gen = ip->i_gen;
 	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
 	vap->va_type = IFTOVT(ip->i_mode);
 	return (0);
 }
 
 /*
  * Set attribute vnode op. called from several syscalls
  */
 static int
 ufs_setattr(
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap)
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	int error;
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE |
 		    SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE |
 		    UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK |
 		    UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
 		    UF_SPARSE | UF_SYSTEM)) != 0)
 			return (EOPNOTSUPP);
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		/*
 		 * Callers may only modify the file flags on objects they
 		 * have VADMIN rights for.
 		 */
 		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 			return (error);
 		/*
 		 * Unprivileged processes are not permitted to unset system
 		 * flags, or modify flags if any system flags are set.
 		 * Privileged non-jail processes may not modify system flags
 		 * if securelevel > 0 and any existing system flags are set.
 		 * Privileged jail processes behave like privileged non-jail
 		 * processes if the PR_ALLOW_CHFLAGS permission bit is set;
 		 * otherwise, they behave like unprivileged processes.
 		 */
 		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) {
 			if (ip->i_flags &
 			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
 				error = securelevel_gt(cred, 0);
 				if (error)
 					return (error);
 			}
 			/* The snapshot flag cannot be toggled. */
 			if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT)
 				return (EPERM);
 		} else {
 			if (ip->i_flags &
 			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
 			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
 				return (EPERM);
 		}
 		ip->i_flags = vap->va_flags;
 		DIP_SET(ip, i_flags, vap->va_flags);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		error = UFS_UPDATE(vp, 0);
 		if (ip->i_flags & (IMMUTABLE | APPEND))
 			return (error);
 	}
 	/*
 	 * If immutable or append, no one can change any of its attributes
 	 * except the ones already handled (in some cases, file flags
 	 * including the immutability flags themselves for the superuser).
 	 */
 	if (ip->i_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 	/*
 	 * Go through the fields and update iff not VNOVAL.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
 		    td)) != 0)
 			return (error);
 	}
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * XXX most of the following special cases should be in
 		 * callers instead of in N filesystems.  The VDIR check
 		 * mostly already is.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			/*
 			 * Truncation should have an effect in these cases.
 			 * Disallow it if the filesystem is read-only or
 			 * the file is being snapshotted.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			if (IS_SNAPSHOT(ip))
 				return (EPERM);
 			break;
 		default:
 			/*
 			 * According to POSIX, the result is unspecified
 			 * for file types other than regular files,
 			 * directories and shared memory objects.  We
 			 * don't support shared memory objects in the file
 			 * system, and have dubious support for truncating
 			 * symlinks.  Just ignore the request in other cases.
 			 */
 			return (0);
 		}
 		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL |
 		    ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0),
 		    cred)) != 0)
 			return (error);
 	}
 	if (vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL ||
 	    vap->va_birthtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (IS_SNAPSHOT(ip))
 			return (EPERM);
 		error = vn_utimes_perm(vp, vap, cred, td);
 		if (error != 0)
 			return (error);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED);
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			ip->i_flag &= ~IN_ACCESS;
 			DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
 			DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			ip->i_flag &= ~IN_UPDATE;
 			DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
 			DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
 		}
 		if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) {
 			ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
 			ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
 		}
 		error = UFS_UPDATE(vp, 0);
 		if (error)
 			return (error);
 	}
 	error = 0;
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (IS_SNAPSHOT(ip) && (vap->va_mode & (S_IXUSR | S_IWUSR |
 		    S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)) != 0)
 			return (EPERM);
 		error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
 	}
 	return (error);
 }
 
 #ifdef UFS_ACL
 static int
 ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
     int file_owner_id, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct acl *aclp;
 
 	aclp = acl_alloc(M_WAITOK);
 	error = ufs_getacl_nfs4_internal(vp, aclp, td);
 	/*
 	 * We don't have to handle EOPNOTSUPP here, as the filesystem claims
 	 * it supports ACLs.
 	 */
 	if (error)
 		goto out;
 
 	acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
 	error = ufs_setacl_nfs4_internal(vp, aclp, td);
 
 out:
 	acl_free(aclp);
 	return (error);
 }
 #endif /* UFS_ACL */
 
 static int
 ufs_mmapped(
 	struct vop_mmapped_args /* {
 		struct vnode *a_vp;
 	} */ *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 	struct mount *mp;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	mp = vp->v_mount;
 
 	if ((mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS);
 	/*
 	 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there.
 	 */
 	return (0);
 }
 
 /*
  * Change the mode on a file.
  * Inode must be locked before calling.
  */
 static int
 ufs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
 {
 	struct inode *ip = VTOI(vp);
 	int newmode, error;
 
 	/*
 	 * To modify the permissions on a file, must possess VADMIN
 	 * for that file.
 	 */
 	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td)))
 		return (error);
 	/*
 	 * Privileged processes may set the sticky bit on non-directories,
 	 * as well as set the setgid bit on a file with a group that the
 	 * process is not a member of.  Both of these are allowed in
 	 * jail(8).
 	 */
 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE))
 			return (EFTYPE);
 	}
 	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
 		error = priv_check_cred(cred, PRIV_VFS_SETGID);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Deny setting setuid if we are not the file owner.
 	 */
 	if ((mode & ISUID) && ip->i_uid != cred->cr_uid) {
 		error = priv_check_cred(cred, PRIV_VFS_ADMIN);
 		if (error)
 			return (error);
 	}
 
 	newmode = ip->i_mode & ~ALLPERMS;
 	newmode |= (mode & ALLPERMS);
 	UFS_INODE_SET_MODE(ip, newmode);
 	DIP_SET(ip, i_mode, ip->i_mode);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 #ifdef UFS_ACL
 	if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
 		error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
 #endif
 	if (error == 0 && (ip->i_flag & IN_CHANGE) != 0)
 		error = UFS_UPDATE(vp, 0);
 
 	return (error);
 }
 
 /*
  * Perform chown operation on inode ip;
  * inode must be locked prior to call.
  */
 static int
 ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
     struct thread *td)
 {
 	struct inode *ip = VTOI(vp);
 	uid_t ouid;
 	gid_t ogid;
 	int error = 0;
 #ifdef QUOTA
 	int i;
 	ufs2_daddr_t change;
 #endif
 
 	if (uid == (uid_t)VNOVAL)
 		uid = ip->i_uid;
 	if (gid == (gid_t)VNOVAL)
 		gid = ip->i_gid;
 	/*
 	 * To modify the ownership of a file, must possess VADMIN for that
 	 * file.
 	 */
 	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
 		return (error);
 	/*
 	 * To change the owner of a file, or change the group of a file to a
 	 * group of which we are not a member, the caller must have
 	 * privilege.
 	 */
 	if (((uid != ip->i_uid && uid != cred->cr_uid) || 
 	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN)))
 		return (error);
 	ogid = ip->i_gid;
 	ouid = ip->i_uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) != 0)
 		return (error);
 	if (ouid == uid) {
 		dqrele(vp, ip->i_dquot[USRQUOTA]);
 		ip->i_dquot[USRQUOTA] = NODQUOT;
 	}
 	if (ogid == gid) {
 		dqrele(vp, ip->i_dquot[GRPQUOTA]);
 		ip->i_dquot[GRPQUOTA] = NODQUOT;
 	}
 	change = DIP(ip, i_blocks);
 	(void) chkdq(ip, -change, cred, CHOWN|FORCE);
 	(void) chkiq(ip, -1, cred, CHOWN|FORCE);
 	for (i = 0; i < MAXQUOTAS; i++) {
 		dqrele(vp, ip->i_dquot[i]);
 		ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	ip->i_gid = gid;
 	DIP_SET(ip, i_gid, gid);
 	ip->i_uid = uid;
 	DIP_SET(ip, i_uid, uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
 			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
 				goto good;
 			else
 				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
 		}
 		for (i = 0; i < MAXQUOTAS; i++) {
 			dqrele(vp, ip->i_dquot[i]);
 			ip->i_dquot[i] = NODQUOT;
 		}
 	}
 	ip->i_gid = ogid;
 	DIP_SET(ip, i_gid, ogid);
 	ip->i_uid = ouid;
 	DIP_SET(ip, i_uid, ouid);
 	if (getinoquota(ip) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		(void) chkdq(ip, change, cred, FORCE|CHOWN);
 		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
 		(void) getinoquota(ip);
 	}
 	return (error);
 good:
 	if (getinoquota(ip))
 		panic("ufs_chown: lost quota");
 #endif /* QUOTA */
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
 			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
 			DIP_SET(ip, i_mode, ip->i_mode);
 		}
 	}
 	error = UFS_UPDATE(vp, 0);
 	return (error);
 }
 
 static int
 ufs_remove(
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap)
 {
 	struct inode *ip;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	int error;
 	struct thread *td;
 
 	td = curthread;
 	ip = VTOI(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND))
 		return (EPERM);
 	if (DOINGSUJ(dvp)) {
 		error = softdep_prelink(dvp, vp, ap->a_cnp);
 		if (error != 0) {
 			MPASS(error == ERELOOKUP);
 			return (error);
 		}
 	}
 
 #ifdef UFS_GJOURNAL
 	ufs_gjournal_orphan(vp);
 #endif
 	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
 	if (ip->i_nlink <= 0)
 		vp->v_vflag |= VV_NOSYNC;
 	if (IS_SNAPSHOT(ip)) {
 		/*
 		 * Avoid deadlock where another thread is trying to
 		 * update the inodeblock for dvp and is waiting on
 		 * snaplk.  Temporary unlock the vnode lock for the
 		 * unlinked file and sync the directory.  This should
 		 * allow vput() of the directory to not block later on
 		 * while holding the snapshot vnode locked, assuming
 		 * that the directory hasn't been unlinked too.
 		 */
 		VOP_UNLOCK(vp);
 		(void) VOP_FSYNC(dvp, MNT_WAIT, td);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 	return (error);
 }
 
 static void
 print_bad_link_count(const char *funcname, struct vnode *dvp)
 {
 	struct inode *dip;
 
 	dip = VTOI(dvp);
 	uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n",
 	    funcname, dip->i_effnlink, (intmax_t)dip->i_number,
 	    dvp->v_mount->mnt_stat.f_mntonname);
 }
 
 /*
  * link vnode call
  */
 static int
 ufs_link(
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip;
 	struct direct newdir;
 	int error;
 
-#ifdef INVARIANTS
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("ufs_link: no name");
-#endif
-
 	if (DOINGSUJ(tdvp)) {
 		error = softdep_prelink(tdvp, vp, cnp);
 		if (error != 0) {
 			MPASS(error == ERELOOKUP);
 			return (error);
 		}
 	}
 
 	if (VTOI(tdvp)->i_effnlink < 2) {
 		print_bad_link_count("ufs_link", tdvp);
 		error = EINVAL;
 		goto out;
 	}
 	error = ufs_sync_nlink(vp, tdvp);
 	if (error != 0)
 		goto out;
 	ip = VTOI(vp);
 
 	/*
 	 * The file may have been removed after namei dropped the original
 	 * lock.
 	 */
 	if (ip->i_effnlink == 0) {
 		error = ENOENT;
 		goto out;
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	ip->i_effnlink++;
 	ip->i_nlink++;
 	DIP_SET(ip, i_nlink, ip->i_nlink);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if (DOINGSOFTDEP(vp))
 		softdep_setup_link(VTOI(tdvp), ip);
 	error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp));
 	if (!error) {
 		ufs_makedirentry(ip, cnp, &newdir);
 		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
 	}
 
 	if (error) {
 		ip->i_effnlink--;
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (DOINGSOFTDEP(vp))
 			softdep_revert_link(VTOI(tdvp), ip);
 	}
 out:
 	return (error);
 }
 
 /*
  * whiteout vnode call
  */
 static int
 ufs_whiteout(
 	struct vop_whiteout_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 		int a_flags;
 	} */ *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct direct newdir;
 	int error = 0;
 
 	if (DOINGSUJ(dvp) && (ap->a_flags == CREATE ||
 	    ap->a_flags == DELETE)) {
 		error = softdep_prelink(dvp, NULL, cnp);
 		if (error != 0) {
 			MPASS(error == ERELOOKUP);
 			return (error);
 		}
 	}
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		/* 4.4 format directories support whiteout operations */
 		if (!OFSFMT(dvp))
 			return (0);
 		return (EOPNOTSUPP);
 
 	case CREATE:
 		/* create a new directory whiteout */
 #ifdef INVARIANTS
-		if ((cnp->cn_flags & SAVENAME) == 0)
-			panic("ufs_whiteout: missing name");
 		if (OFSFMT(dvp))
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		newdir.d_ino = UFS_WINO;
 		newdir.d_namlen = cnp->cn_namelen;
 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 		newdir.d_type = DT_WHT;
 		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
 		break;
 
 	case DELETE:
 		/* remove an existing directory whiteout */
 #ifdef INVARIANTS
 		if (OFSFMT(dvp))
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		cnp->cn_flags &= ~DOWHITEOUT;
 		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
 		break;
 	default:
 		panic("ufs_whiteout: unknown op");
 	}
 	return (error);
 }
 
 static volatile int rename_restarts;
 SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
     __DEVOLATILE(int *, &rename_restarts), 0,
     "Times rename had to restart due to lock contention");
 
 /*
  * Rename system call.
  * 	rename("foo", "bar");
  * is essentially
  *	unlink("bar");
  *	link("foo", "bar");
  *	unlink("foo");
  * but ``atomically''.  Can't do full commit without saving state in the
  * inode on disk which isn't feasible at this time.  Best we can do is
  * always guarantee the target exists.
  *
  * Basic algorithm is:
  *
  * 1) Bump link count on source while we're linking it to the
  *    target.  This also ensure the inode won't be deleted out
  *    from underneath us while we work (it may be truncated by
  *    a concurrent `trunc' or `open' for creation).
  * 2) Link source to destination.  If destination already exists,
  *    delete it first.
  * 3) Unlink source reference to inode if still around. If a
  *    directory was moved and the parent of the destination
  *    is different from the source, patch the ".." entry in the
  *    directory.
  */
 static int
 ufs_rename(
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap)
 {
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *nvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct thread *td = curthread;
 	struct inode *fip, *tip, *tdp, *fdp;
 	struct direct newdir;
 	off_t endoff;
 	int doingdirectory, newparent;
 	int error = 0;
 	struct mount *mp;
 	ino_t ino;
 	seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s;
 	bool checkpath_locked, want_seqc_end;
 
 	checkpath_locked = want_seqc_end = false;
 
-#ifdef INVARIANTS
-	if ((tcnp->cn_flags & HASBUF) == 0 ||
-	    (fcnp->cn_flags & HASBUF) == 0)
-		panic("ufs_rename: no name");
-#endif
 	endoff = 0;
 	mp = tdvp->v_mount;
 	VOP_UNLOCK(tdvp);
 	if (tvp && tvp != tdvp)
 		VOP_UNLOCK(tvp);
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		mp = NULL;
 		goto releout;
 	}
 
 	fdvp_s = fvp_s = tdvp_s = tvp_s = SEQC_MOD;
 relock:
 	/* 
 	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
 	 * and fdvp and tdvp are the same directory.  Subsequently we need
 	 * to double-check all paths and in the directory rename case we
 	 * need to verify that we are not creating a directory loop.  To
 	 * handle this we acquire all but fdvp using non-blocking
 	 * acquisitions.  If we fail to acquire any lock in the path we will
 	 * drop all held locks, acquire the new lock in a blocking fashion,
 	 * and then release it and restart the rename.  This acquire/release
 	 * step ensures that we do not spin on a lock waiting for release.
 	 */
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto releout;
 		VOP_UNLOCK(tdvp);
 		atomic_add_int(&rename_restarts, 1);
 		goto relock;
 	}
 	/*
 	 * Re-resolve fvp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 	if (error) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		goto releout;
 	}
 	error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 	if (error) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if (error != EBUSY)
 			goto releout;
 		error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(nvp);
 		vrele(fvp);
 		fvp = nvp;
 		atomic_add_int(&rename_restarts, 1);
 		goto relock;
 	}
 	vrele(fvp);
 	fvp = nvp;
 	/*
 	 * Re-resolve tvp and acquire the vnode lock if present.
 	 */
 	error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
 	if (error != 0 && error != EJUSTRETURN) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		VOP_UNLOCK(fvp);
 		goto releout;
 	}
 	/*
 	 * If tvp disappeared we just carry on.
 	 */
 	if (error == EJUSTRETURN && tvp != NULL) {
 		vrele(tvp);
 		tvp = NULL;
 	}
 	/*
 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
 	 * if the non-blocking acquire fails.
 	 */
 	if (error == 0) {
 		nvp = NULL;
 		error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 		if (tvp)
 			vrele(tvp);
 		tvp = nvp;
 		if (error) {
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			VOP_UNLOCK(fvp);
 			if (error != EBUSY)
 				goto releout;
 			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 			if (error != 0)
 				goto releout;
 			vput(nvp);
 			atomic_add_int(&rename_restarts, 1);
 			goto relock;
 		}
 	}
 
 	if (DOINGSUJ(fdvp) &&
 	    (seqc_in_modify(fdvp_s) || !vn_seqc_consistent(fdvp, fdvp_s) ||
 	     seqc_in_modify(fvp_s) || !vn_seqc_consistent(fvp, fvp_s) ||
 	     seqc_in_modify(tdvp_s) || !vn_seqc_consistent(tdvp, tdvp_s) ||
 	     (tvp != NULL && (seqc_in_modify(tvp_s) ||
 	     !vn_seqc_consistent(tvp, tvp_s))))) {
 		error = softdep_prerename(fdvp, fvp, tdvp, tvp);
 		if (error != 0)
 			goto releout;
 	}
 
 	fdp = VTOI(fdvp);
 	fip = VTOI(fvp);
 	tdp = VTOI(tdvp);
 	tip = NULL;
 	if (tvp)
 		tip = VTOI(tvp);
 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
 		goto unlockout;
 	}
 	/*
 	 * Renaming a file to itself has no effect.  The upper layers should
 	 * not call us in that case.  However, things could change after
 	 * we drop the locks above.
 	 */
 	if (fvp == tvp) {
 		error = 0;
 		goto unlockout;
 	}
 	doingdirectory = 0;
 	newparent = 0;
 	ino = fip->i_number;
 	if (fip->i_nlink >= UFS_LINK_MAX) {
 		if (!DOINGSOFTDEP(fvp) || fip->i_effnlink >= UFS_LINK_MAX) {
 			error = EMLINK;
 			goto unlockout;
 		}
 		vfs_ref(mp);
 		MPASS(!want_seqc_end);
 		if (checkpath_locked) {
 			sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock);
 			checkpath_locked = false;
 		}
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(fvp);
 		vref(tdvp);
 		if (tvp != NULL)
 			vref(tvp);
 		VOP_VPUT_PAIR(tdvp, &tvp, true);
 		error = ufs_sync_nlink1(mp);
 		vrele(fdvp);
 		vrele(fvp);
 		vrele(tdvp);
 		if (tvp != NULL)
 			vrele(tvp);
 		return (error);
 	}
 	if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (fdp->i_flags & APPEND)) {
 		error = EPERM;
 		goto unlockout;
 	}
 	if ((fip->i_mode & IFMT) == IFDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    fdp == fip ||
 		    (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 			error = EINVAL;
 			goto unlockout;
 		}
 		if (fdp->i_number != tdp->i_number)
 			newparent = tdp->i_number;
 		doingdirectory = 1;
 	}
 	if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
 	    (tvp != NULL && tvp->v_type == VDIR &&
 	    tvp->v_mountedhere != NULL)) {
 		error = EXDEV;
 		goto unlockout;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory hierarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..".
 	 */
 	if (doingdirectory && newparent) {
 		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread);
 		if (error)
 			goto unlockout;
 
 		sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock);
 		checkpath_locked = true;
 		error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
 		    &ino);
 		/*
 		 * We encountered a lock that we have to wait for.  Unlock
 		 * everything else and VGET before restarting.
 		 */
 		if (ino) {
 			sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock);
 			checkpath_locked = false;
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(fvp);
 			VOP_UNLOCK(tdvp);
 			if (tvp)
 				VOP_UNLOCK(tvp);
 			error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
 			if (error == 0)
 				vput(nvp);
 			atomic_add_int(&rename_restarts, 1);
 			goto relock;
 		}
 		if (error)
 			goto unlockout;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("ufs_rename: lost to startdir");
 	}
 	if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
 	    tdp->i_effnlink == 0)
 		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
 
 	if (tvp != NULL)
 		vn_seqc_write_begin(tvp);
 	vn_seqc_write_begin(tdvp);
 	vn_seqc_write_begin(fvp);
 	vn_seqc_write_begin(fdvp);
 	want_seqc_end = true;
 
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
 	 *    completing our work, the link count
 	 *    may be wrong, but correctable.
 	 */
 	fip->i_effnlink++;
 	fip->i_nlink++;
 	DIP_SET(fip, i_nlink, fip->i_nlink);
 	UFS_INODE_SET_FLAG(fip, IN_CHANGE);
 	if (DOINGSOFTDEP(fvp))
 		softdep_setup_link(tdp, fip);
 	error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp));
 	if (error)
 		goto bad;
 
 	/*
 	 * 2) If target doesn't exist, link the target
 	 *    to the source and unlink the source.
 	 *    Otherwise, rewrite the target directory
 	 *    entry to reference the source inode and
 	 *    expunge the original entry's existence.
 	 */
 	if (tip == NULL) {
 		if (ITODEV(tdp) != ITODEV(fip))
 			panic("ufs_rename: EXDEV");
 		if (doingdirectory && newparent) {
 			/*
 			 * Account for ".." in new directory.
 			 * When source and destination have the same
 			 * parent we don't adjust the link count.  The
 			 * actual link modification is completed when
 			 * .. is rewritten below.
 			 */
 			if (tdp->i_nlink >= UFS_LINK_MAX) {
 				fip->i_effnlink--;
 				fip->i_nlink--;
 				DIP_SET(fip, i_nlink, fip->i_nlink);
 				UFS_INODE_SET_FLAG(fip, IN_CHANGE);
 				if (DOINGSOFTDEP(fvp))
 					softdep_revert_link(tdp, fip);
 				if (!DOINGSOFTDEP(tdvp) ||
 				    tdp->i_effnlink >= UFS_LINK_MAX) {
 					error = EMLINK;
 					goto unlockout;
 				}
 				MPASS(want_seqc_end);
 				if (tvp != NULL)
 					vn_seqc_write_end(tvp);
 				vn_seqc_write_end(tdvp);
 				vn_seqc_write_end(fvp);
 				vn_seqc_write_end(fdvp);
 				want_seqc_end = false;
 				vfs_ref(mp);
 				MPASS(checkpath_locked);
 				sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock);
 				checkpath_locked = false;
 				VOP_UNLOCK(fdvp);
 				VOP_UNLOCK(fvp);
 				vref(tdvp);
 				if (tvp != NULL)
 					vref(tvp);
 				VOP_VPUT_PAIR(tdvp, &tvp, true);
 				error = ufs_sync_nlink1(mp);
 				vrele(fdvp);
 				vrele(fvp);
 				vrele(tdvp);
 				if (tvp != NULL)
 					vrele(tvp);
 				return (error);
 			}
 		}
 		ufs_makedirentry(fip, tcnp, &newdir);
 		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
 		if (error)
 			goto bad;
 		/* Setup tdvp for directory compaction if needed. */
 		if (I_COUNT(tdp) != 0 && I_ENDOFF(tdp) != 0 &&
 		    I_ENDOFF(tdp) < tdp->i_size)
 			endoff = I_ENDOFF(tdp);
 	} else {
 		if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip))
 			panic("ufs_rename: EXDEV");
 		/*
 		 * Short circuit rename(foo, foo).
 		 */
 		if (tip->i_number == fip->i_number)
 			panic("ufs_rename: same file");
 		/*
 		 * If the parent directory is "sticky", then the caller
 		 * must possess VADMIN for the parent directory, or the
 		 * destination of the rename.  This implements append-only
 		 * directories.
 		 */
 		if ((tdp->i_mode & S_ISTXT) &&
 		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
 		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
 			error = EPERM;
 			goto bad;
 		}
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((tip->i_mode & IFMT) == IFDIR) {
 			if ((tip->i_effnlink > 2) ||
 			    !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
 		if (doingdirectory) {
 			if (!newparent) {
 				tdp->i_effnlink--;
 				if (DOINGSOFTDEP(tdvp))
 					softdep_change_linkcnt(tdp);
 			}
 			tip->i_effnlink--;
 			if (DOINGSOFTDEP(tvp))
 				softdep_change_linkcnt(tip);
 		}
 		error = ufs_dirrewrite(tdp, tip, fip->i_number,
 		    IFTODT(fip->i_mode),
 		    (doingdirectory && newparent) ? newparent : doingdirectory);
 		if (error) {
 			if (doingdirectory) {
 				if (!newparent) {
 					tdp->i_effnlink++;
 					if (DOINGSOFTDEP(tdvp))
 						softdep_change_linkcnt(tdp);
 				}
 				tip->i_effnlink++;
 				if (DOINGSOFTDEP(tvp))
 					softdep_change_linkcnt(tip);
 			}
 			goto bad;
 		}
 		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
 			/*
 			 * The only stuff left in the directory is "."
 			 * and "..". The "." reference is inconsequential
 			 * since we are quashing it. We have removed the "."
 			 * reference and the reference in the parent directory,
 			 * but there may be other hard links. The soft
 			 * dependency code will arrange to do these operations
 			 * after the parent directory entry has been deleted on
 			 * disk, so when running with that code we avoid doing
 			 * them now.
 			 */
 			if (!newparent) {
 				tdp->i_nlink--;
 				DIP_SET(tdp, i_nlink, tdp->i_nlink);
 				UFS_INODE_SET_FLAG(tdp, IN_CHANGE);
 			}
 			tip->i_nlink--;
 			DIP_SET(tip, i_nlink, tip->i_nlink);
 			UFS_INODE_SET_FLAG(tip, IN_CHANGE);
 		}
 	}
 
 	/*
 	 * 3) Unlink the source.  We have to resolve the path again to
 	 * fixup the directory offset and count for ufs_dirremove.
 	 */
 	if (fdvp == tdvp) {
 		error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 		if (error)
 			panic("ufs_rename: from entry went away!");
 		if (ino != fip->i_number)
 			panic("ufs_rename: ino mismatch %ju != %ju\n",
 			    (uintmax_t)ino, (uintmax_t)fip->i_number);
 	}
 	/*
 	 * If the source is a directory with a
 	 * new parent, the link count of the old
 	 * parent directory must be decremented
 	 * and ".." set to point to the new parent.
 	 */
 	if (doingdirectory && newparent) {
 		/*
 		 * If tip exists we simply use its link, otherwise we must
 		 * add a new one.
 		 */
 		if (tip == NULL) {
 			tdp->i_effnlink++;
 			tdp->i_nlink++;
 			DIP_SET(tdp, i_nlink, tdp->i_nlink);
 			UFS_INODE_SET_FLAG(tdp, IN_CHANGE);
 			if (DOINGSOFTDEP(tdvp))
 				softdep_setup_dotdot_link(tdp, fip);
 			error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) &&
 			    !DOINGASYNC(tdvp));
 			/* Don't go to bad here as the new link exists. */
 			if (error)
 				goto unlockout;
 		} else if (DOINGSUJ(tdvp))
 			/* Journal must account for each new link. */
 			softdep_setup_dotdot_link(tdp, fip);
 		SET_I_OFFSET(fip, mastertemplate.dot_reclen);
 		ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
 		cache_purge(fdvp);
 	}
 	error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
 	/*
 	 * The kern_renameat() looks up the fvp using the DELETE flag, which
 	 * causes the removal of the name cache entry for fvp.
 	 * As the relookup of the fvp is done in two steps:
 	 * ufs_lookup_ino() and then VFS_VGET(), another thread might do a
 	 * normal lookup of the from name just before the VFS_VGET() call,
 	 * causing the cache entry to be re-instantiated.
 	 *
 	 * The same issue also applies to tvp if it exists as
 	 * otherwise we may have a stale name cache entry for the new
 	 * name that references the old i-node if it has other links
 	 * or open file descriptors.
 	 */
 	cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp);
 
 unlockout:
 	if (want_seqc_end) {
 		if (tvp != NULL)
 			vn_seqc_write_end(tvp);
 		vn_seqc_write_end(tdvp);
 		vn_seqc_write_end(fvp);
 		vn_seqc_write_end(fdvp);
 	}
 
 	if (checkpath_locked)
 		sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock);
 
 	vput(fdvp);
 	vput(fvp);
 
 	/*
 	 * If compaction or fsync was requested do it in
 	 * ffs_vput_pair() now that other locks are no longer needed.
 	 */
 	if (error == 0 && endoff != 0) {
 		UFS_INODE_SET_FLAG(tdp, IN_ENDOFF);
 		SET_I_ENDOFF(tdp, endoff);
 	}
 	VOP_VPUT_PAIR(tdvp, &tvp, true);
 	return (error);
 
 bad:
 	fip->i_effnlink--;
 	fip->i_nlink--;
 	DIP_SET(fip, i_nlink, fip->i_nlink);
 	UFS_INODE_SET_FLAG(fip, IN_CHANGE);
 	if (DOINGSOFTDEP(fvp))
 		softdep_revert_link(tdp, fip);
 	goto unlockout;
 
 releout:
 	if (want_seqc_end) {
 		if (tvp != NULL)
 			vn_seqc_write_end(tvp);
 		vn_seqc_write_end(tdvp);
 		vn_seqc_write_end(fvp);
 		vn_seqc_write_end(fdvp);
 	}
 
 	vrele(fdvp);
 	vrele(fvp);
 	vrele(tdvp);
 	if (tvp)
 		vrele(tvp);
 
 	return (error);
 }
 
 #ifdef UFS_ACL
 static int
 ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
     mode_t dmode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct inode *ip = VTOI(tvp);
 	struct acl *dacl, *acl;
 
 	acl = acl_alloc(M_WAITOK);
 	dacl = acl_alloc(M_WAITOK);
 
 	/*
 	 * Retrieve default ACL from parent, if any.
 	 */
 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 	switch (error) {
 	case 0:
 		/*
 		 * Retrieved a default ACL, so merge mode and ACL if
 		 * necessary.  If the ACL is empty, fall through to
 		 * the "not defined or available" case.
 		 */
 		if (acl->acl_cnt != 0) {
 			dmode = acl_posix1e_newfilemode(dmode, acl);
 			UFS_INODE_SET_MODE(ip, dmode);
 			DIP_SET(ip, i_mode, dmode);
 			*dacl = *acl;
 			ufs_sync_acl_from_inode(ip, acl);
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case EOPNOTSUPP:
 		/*
 		 * Just use the mode as-is.
 		 */
 		UFS_INODE_SET_MODE(ip, dmode);
 		DIP_SET(ip, i_mode, dmode);
 		error = 0;
 		goto out;
 
 	default:
 		goto out;
 	}
 
 	/*
 	 * XXX: If we abort now, will Soft Updates notify the extattr
 	 * code that the EAs for the file need to be released?
 	 */
 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 	if (error == 0)
 		error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
 	switch (error) {
 	case 0:
 		break;
 
 	case EOPNOTSUPP:
 		/*
 		 * XXX: This should not happen, as EOPNOTSUPP above
 		 * was supposed to free acl.
 		 */
 		printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
 		/*
 		panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
 		 */
 		break;
 
 	default:
 		goto out;
 	}
 
 out:
 	acl_free(acl);
 	acl_free(dacl);
 
 	return (error);
 }
 
 static int
 ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
     mode_t mode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct inode *ip = VTOI(tvp);
 	struct acl *acl;
 
 	acl = acl_alloc(M_WAITOK);
 
 	/*
 	 * Retrieve default ACL for parent, if any.
 	 */
 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 	switch (error) {
 	case 0:
 		/*
 		 * Retrieved a default ACL, so merge mode and ACL if
 		 * necessary.
 		 */
 		if (acl->acl_cnt != 0) {
 			/*
 			 * Two possible ways for default ACL to not
 			 * be present.  First, the EA can be
 			 * undefined, or second, the default ACL can
 			 * be blank.  If it's blank, fall through to
 			 * the it's not defined case.
 			 */
 			mode = acl_posix1e_newfilemode(mode, acl);
 			UFS_INODE_SET_MODE(ip, mode);
 			DIP_SET(ip, i_mode, mode);
 			ufs_sync_acl_from_inode(ip, acl);
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case EOPNOTSUPP:
 		/*
 		 * Just use the mode as-is.
 		 */
 		UFS_INODE_SET_MODE(ip, mode);
 		DIP_SET(ip, i_mode, mode);
 		error = 0;
 		goto out;
 
 	default:
 		goto out;
 	}
 
 	/*
 	 * XXX: If we abort now, will Soft Updates notify the extattr
 	 * code that the EAs for the file need to be released?
 	 */
 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 	switch (error) {
 	case 0:
 		break;
 
 	case EOPNOTSUPP:
 		/*
 		 * XXX: This should not happen, as EOPNOTSUPP above was
 		 * supposed to free acl.
 		 */
 		printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 		    "but no VOP_SETACL()\n");
 		/* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 		    "but no VOP_SETACL()"); */
 		break;
 
 	default:
 		goto out;
 	}
 
 out:
 	acl_free(acl);
 
 	return (error);
 }
 
 static int
 ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
     mode_t child_mode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct acl *parent_aclp, *child_aclp;
 
 	parent_aclp = acl_alloc(M_WAITOK);
 	child_aclp = acl_alloc(M_WAITOK | M_ZERO);
 
 	error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td);
 	if (error)
 		goto out;
 	acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
 	    child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
 	error = ufs_setacl_nfs4_internal(tvp, child_aclp, td);
 	if (error)
 		goto out;
 out:
 	acl_free(parent_aclp);
 	acl_free(child_aclp);
 
 	return (error);
 }
 #endif
 
 /*
  * Mkdir system call
  */
 static int
 ufs_mkdir(
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	struct vnode *tvp;
 	struct buf *bp;
 	struct dirtemplate dirtemplate, *dtp;
 	struct direct newdir;
 	int error, dmode;
 	long blkoff;
 
-#ifdef INVARIANTS
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("ufs_mkdir: no name");
-#endif
 	dp = VTOI(dvp);
 	error = ufs_sync_nlink(dvp, NULL);
 	if (error != 0)
 		goto out;
 	dmode = vap->va_mode & 0777;
 	dmode |= IFDIR;
 
 	/*
 	 * Must simulate part of ufs_makeinode here to acquire the inode,
 	 * but not have it entered in the parent directory. The entry is
 	 * made later after writing "." and ".." entries.
 	 */
 	if (dp->i_effnlink < 2) {
 		print_bad_link_count("ufs_mkdir", dvp);
 		error = EINVAL;
 		goto out;
 	}
 
 	if (DOINGSUJ(dvp)) {
 		error = softdep_prelink(dvp, NULL, cnp);
 		if (error != 0) {
 			MPASS(error == ERELOOKUP);
 			return (error);
 		}
 	}
 
 	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
 	vn_seqc_write_begin(tvp);
 	ip = VTOI(tvp);
 	ip->i_gid = dp->i_gid;
 	DIP_SET(ip, i_gid, dp->i_gid);
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		gid_t ucred_group;
 		ucp = cnp->cn_cred;
 #endif
 		/*
 		 * If we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * The new directory also inherits the SUID bit.
 		 * If user's UID and dir UID are the same,
 		 * 'give it away' so that the SUID is still forced on.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (dp->i_mode & ISUID) && dp->i_uid) {
 			dmode |= ISUID;
 			ip->i_uid = dp->i_uid;
 			DIP_SET(ip, i_uid, dp->i_uid);
 #ifdef QUOTA
 			if (dp->i_uid != cnp->cn_cred->cr_uid) {
 				/*
 				 * Make sure the correct user gets charged
 				 * for the space.
 				 * Make a dummy credential for the victim.
 				 * XXX This seems to never be accessed out of
 				 * our context so a stack variable is ok.
 				 */
 				refcount_init(&ucred.cr_ref, 1);
 				ucred.cr_uid = ip->i_uid;
 				ucred.cr_ngroups = 1;
 				ucred.cr_groups = &ucred_group;
 				ucred.cr_groups[0] = dp->i_gid;
 				ucp = &ucred;
 			}
 #endif
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 		}
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			if (DOINGSOFTDEP(tvp))
 				softdep_revert_link(dp, ip);
 			UFS_VFREE(tvp, ip->i_number, dmode);
 			vn_seqc_write_end(tvp);
 			vgone(tvp);
 			vput(tvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 	DIP_SET(ip, i_uid, ip->i_uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_link(dp, ip);
 		UFS_VFREE(tvp, ip->i_number, dmode);
 		vn_seqc_write_end(tvp);
 		vgone(tvp);
 		vput(tvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
 	UFS_INODE_SET_MODE(ip, dmode);
 	DIP_SET(ip, i_mode, dmode);
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 2;
 	ip->i_nlink = 2;
 	DIP_SET(ip, i_nlink, 2);
 
 	if (cnp->cn_flags & ISWHITEOUT) {
 		ip->i_flags |= UF_OPAQUE;
 		DIP_SET(ip, i_flags, ip->i_flags);
 	}
 
 	/*
 	 * Bump link count in parent directory to reflect work done below.
 	 * Should be done before reference is created so cleanup is
 	 * possible if we crash.
 	 */
 	dp->i_effnlink++;
 	dp->i_nlink++;
 	DIP_SET(dp, i_nlink, dp->i_nlink);
 	UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 	if (DOINGSOFTDEP(dvp))
 		softdep_setup_mkdir(dp, ip);
 	error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp));
 	if (error)
 		goto bad;
 #ifdef MAC
 	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 		    dvp, tvp, cnp);
 		if (error)
 			goto bad;
 	}
 #endif
 #ifdef UFS_ACL
 	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 		error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
 		    cnp->cn_cred, curthread);
 		if (error)
 			goto bad;
 	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
 		    cnp->cn_cred, curthread);
 		if (error)
 			goto bad;
 	}
 #endif /* !UFS_ACL */
 
 	/*
 	 * Initialize directory with "." and ".." from static template.
 	 */
 	if (!OFSFMT(dvp))
 		dtp = &mastertemplate;
 	else
 		dtp = (struct dirtemplate *)&omastertemplate;
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = ip->i_number;
 	dirtemplate.dotdot_ino = dp->i_number;
 	vnode_pager_setsize(tvp, DIRBLKSIZ);
 	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
 	    BA_CLRBUF, &bp)) != 0)
 		goto bad;
 	ip->i_size = DIRBLKSIZ;
 	DIP_SET(ip, i_size, DIRBLKSIZ);
 	UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
 	if (DOINGSOFTDEP(tvp)) {
 		/*
 		 * Ensure that the entire newly allocated block is a
 		 * valid directory so that future growth within the
 		 * block does not have to ensure that the block is
 		 * written before the inode.
 		 */
 		blkoff = DIRBLKSIZ;
 		while (blkoff < bp->b_bcount) {
 			((struct direct *)
 			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
 			blkoff += DIRBLKSIZ;
 		}
 	}
 	if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) &&
 	    !DOINGASYNC(tvp))) != 0) {
 		(void)bwrite(bp);
 		goto bad;
 	}
 	/*
 	 * Directory set up, now install its entry in the parent directory.
 	 *
 	 * If we are not doing soft dependencies, then we must write out the
 	 * buffer containing the new directory body before entering the new 
 	 * name in the parent. If we are doing soft dependencies, then the
 	 * buffer containing the new directory body will be passed to and
 	 * released in the soft dependency code after the code has attached
 	 * an appropriate ordering dependency to the buffer which ensures that
 	 * the buffer is written before the new name is written in the parent.
 	 */
 	if (DOINGASYNC(dvp))
 		bdwrite(bp);
 	else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
 		goto bad;
 	ufs_makedirentry(ip, cnp, &newdir);
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
 
 bad:
 	if (error == 0) {
 		*ap->a_vpp = tvp;
 		vn_seqc_write_end(tvp);
 	} else {
 		dp->i_effnlink--;
 		dp->i_nlink--;
 		DIP_SET(dp, i_nlink, dp->i_nlink);
 		UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 		/*
 		 * No need to do an explicit VOP_TRUNCATE here, vrele will
 		 * do this for us because we set the link count to 0.
 		 */
 		ip->i_effnlink = 0;
 		ip->i_nlink = 0;
 		DIP_SET(ip, i_nlink, 0);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_mkdir(dp, ip);
 		vn_seqc_write_end(tvp);
 		vgone(tvp);
 		vput(tvp);
 	}
 out:
 	return (error);
 }
 
 /*
  * Rmdir system call.
  */
 static int
 ufs_rmdir(
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	int error;
 
 	ip = VTOI(vp);
 	dp = VTOI(dvp);
 
 	/*
 	 * Do not remove a directory that is in the process of being renamed.
 	 * Verify the directory is empty (and valid). Rmdir ".." will not be
 	 * valid since ".." will contain a reference to the current directory
 	 * and thus be non-empty. Do not allow the removal of mounted on
 	 * directories (this can happen when an NFS exported filesystem
 	 * tries to remove a locally mounted on directory).
 	 */
 	error = 0;
 	if (dp->i_effnlink <= 2) {
 		if (dp->i_effnlink == 2)
 			print_bad_link_count("ufs_rmdir", dvp);
 		error = EINVAL;
 		goto out;
 	}
 	if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	if ((dp->i_flags & APPEND)
 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 	if (vp->v_mountedhere != 0) {
 		error = EINVAL;
 		goto out;
 	}
 	if (DOINGSUJ(dvp)) {
 		error = softdep_prelink(dvp, vp, cnp);
 		if (error != 0) {
 			MPASS(error == ERELOOKUP);
 			return (error);
 		}
 	}
 
 #ifdef UFS_GJOURNAL
 	ufs_gjournal_orphan(vp);
 #endif
 	/*
 	 * Delete reference to directory before purging
 	 * inode.  If we crash in between, the directory
 	 * will be reattached to lost+found,
 	 */
 	dp->i_effnlink--;
 	ip->i_effnlink--;
 	if (DOINGSOFTDEP(vp))
 		softdep_setup_rmdir(dp, ip);
 	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
 	if (error) {
 		dp->i_effnlink++;
 		ip->i_effnlink++;
 		if (DOINGSOFTDEP(vp))
 			softdep_revert_rmdir(dp, ip);
 		goto out;
 	}
 	/*
 	 * The only stuff left in the directory is "." and "..". The "."
 	 * reference is inconsequential since we are quashing it. The soft
 	 * dependency code will arrange to do these operations after
 	 * the parent directory entry has been deleted on disk, so
 	 * when running with that code we avoid doing them now.
 	 */
 	if (!DOINGSOFTDEP(vp)) {
 		dp->i_nlink--;
 		DIP_SET(dp, i_nlink, dp->i_nlink);
 		UFS_INODE_SET_FLAG(dp, IN_CHANGE);
 		error = UFS_UPDATE(dvp, 0);
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	}
 	cache_vop_rmdir(dvp, vp);
 #ifdef UFS_DIRHASH
 	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
 	if (ip->i_dirhash != NULL)
 		ufsdirhash_free(ip);
 #endif
 out:
 	return (error);
 }
 
 /*
  * symlink -- make a symbolic link
  */
 static int
 ufs_symlink(
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		const char *a_target;
 	} */ *ap)
 {
 	struct vnode *vp, **vpp = ap->a_vpp;
 	struct inode *ip;
 	int len, error;
 
 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 	    vpp, ap->a_cnp, "ufs_symlink");
 	if (error)
 		return (error);
 	vp = *vpp;
 	len = strlen(ap->a_target);
 	if (len < VFSTOUFS(vp->v_mount)->um_maxsymlinklen) {
 		ip = VTOI(vp);
 		bcopy(ap->a_target, DIP(ip, i_shortlink), len);
 		ip->i_size = len;
 		DIP_SET(ip, i_size, len);
 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
 		error = UFS_UPDATE(vp, 0);
 	} else
 		error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target),
 		    len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
 	if (error)
 		vput(vp);
 	return (error);
 }
 
 /*
  * Vnode op for reading directories.
  */
 int
 ufs_readdir(
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *a_ncookies;
 		uint64_t **a_cookies;
 	} */ *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct buf *bp;
 	struct inode *ip;
 	struct direct *dp, *edp;
 	uint64_t *cookies;
 	struct dirent dstdp;
 	off_t offset, startoffset;
 	size_t readcnt, skipcnt;
 	ssize_t startresid;
 	u_int ncookies;
 	int error;
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	ip = VTOI(vp);
 	if (ip->i_effnlink == 0)
 		return (0);
 	if (ap->a_ncookies != NULL) {
 		if (uio->uio_resid < 0)
 			ncookies = 0;
 		else
 			ncookies = uio->uio_resid;
 		if (uio->uio_offset >= ip->i_size)
 			ncookies = 0;
 		else if (ip->i_size - uio->uio_offset < ncookies)
 			ncookies = ip->i_size - uio->uio_offset;
 		ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	} else {
 		ncookies = 0;
 		cookies = NULL;
 	}
 	offset = startoffset = uio->uio_offset;
 	startresid = uio->uio_resid;
 	error = 0;
 	while (error == 0 && uio->uio_resid > 0 &&
 	    uio->uio_offset < ip->i_size) {
 		error = UFS_BLKATOFF(vp, uio->uio_offset, NULL, &bp);
 		if (error)
 			break;
 		if (bp->b_offset + bp->b_bcount > ip->i_size)
 			readcnt = ip->i_size - bp->b_offset;
 		else
 			readcnt = bp->b_bcount;
 		skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
 		    ~(size_t)(DIRBLKSIZ - 1);
 		offset = bp->b_offset + skipcnt;
 		dp = (struct direct *)&bp->b_data[skipcnt];
 		edp = (struct direct *)&bp->b_data[readcnt];
 		while (error == 0 && uio->uio_resid > 0 && dp < edp) {
 			if (dp->d_reclen <= offsetof(struct direct, d_name) ||
 			    (caddr_t)dp + dp->d_reclen > (caddr_t)edp) {
 				error = EIO;
 				break;
 			}
 #if BYTE_ORDER == LITTLE_ENDIAN
 			/* Old filesystem format. */
 			if (OFSFMT(vp)) {
 				dstdp.d_namlen = dp->d_type;
 				dstdp.d_type = dp->d_namlen;
 			} else
 #endif
 			{
 				dstdp.d_namlen = dp->d_namlen;
 				dstdp.d_type = dp->d_type;
 			}
 			if (offsetof(struct direct, d_name) + dstdp.d_namlen >
 			    dp->d_reclen) {
 				error = EIO;
 				break;
 			}
 			if (offset < startoffset || dp->d_ino == 0)
 				goto nextentry;
 			dstdp.d_fileno = dp->d_ino;
 			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
 			bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 			/* NOTE: d_off is the offset of the *next* entry. */
 			dstdp.d_off = offset + dp->d_reclen;
 			dirent_terminate(&dstdp);
 			if (dstdp.d_reclen > uio->uio_resid) {
 				if (uio->uio_resid == startresid)
 					error = EINVAL;
 				else
 					error = EJUSTRETURN;
 				break;
 			}
 			/* Advance dp. */
 			error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
 			if (error)
 				break;
 			if (cookies != NULL) {
 				KASSERT(ncookies > 0,
 				    ("ufs_readdir: cookies buffer too small"));
 				*cookies = offset + dp->d_reclen;
 				cookies++;
 				ncookies--;
 			}
 nextentry:
 			offset += dp->d_reclen;
 			dp = (struct direct *)((caddr_t)dp + dp->d_reclen);
 		}
 		bqrelse(bp);
 		uio->uio_offset = offset;
 	}
 	/* We need to correct uio_offset. */
 	uio->uio_offset = offset;
 	if (error == EJUSTRETURN)
 		error = 0;
 	if (ap->a_ncookies != NULL) {
 		if (error == 0) {
 			ap->a_ncookies -= ncookies;
 		} else {
 			free(*ap->a_cookies, M_TEMP);
 			*ap->a_ncookies = 0;
 			*ap->a_cookies = NULL;
 		}
 	}
 	if (error == 0 && ap->a_eofflag)
 		*ap->a_eofflag = ip->i_size <= uio->uio_offset;
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  */
 static int
 ufs_readlink(
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	doff_t isize;
 
 	isize = ip->i_size;
 	if (isize < VFSTOUFS(vp->v_mount)->um_maxsymlinklen)
 		return (uiomove(DIP(ip, i_shortlink), isize, ap->a_uio));
 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  *
  * In order to be able to swap to a file, the ufs_bmaparray() operation may not
  * deadlock on memory.  See ufs_bmap() for details.
  */
 static int
 ufs_strategy(
 	struct vop_strategy_args /* {
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	} */ *ap)
 {
 	struct buf *bp = ap->a_bp;
 	struct vnode *vp = ap->a_vp;
 	ufs2_daddr_t blkno;
 	int error;
 
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
 		bp->b_blkno = blkno;
 		if (error) {
 			bp->b_error = error;
 			bp->b_ioflags |= BIO_ERROR;
 			bufdone(bp);
 			return (0);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		bufdone(bp);
 		return (0);
 	}
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 static int
 ufs_print(
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 
 	printf("\tnlink=%d, effnlink=%d, size=%jd", ip->i_nlink,
 	    ip->i_effnlink, (intmax_t)ip->i_size);
 	if (I_IS_UFS2(ip))
 		printf(", extsize %d", ip->i_din2->di_extsize);
 	printf("\n\tgeneration=%jx, uid=%d, gid=%d, flags=0x%b\n",
 	    (uintmax_t)ip->i_gen, ip->i_uid, ip->i_gid,
 	    (u_int)ip->i_flags, PRINT_INODE_FLAGS);
 	printf("\tino %lu, on dev %s", (u_long)ip->i_number,
 	    devtoname(ITODEV(ip)));
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the inode then do device close.
  */
 static int
 ufsfifo_close(
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap)
 {
 
 	ufs_close(ap);
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
  * Return POSIX pathconf information applicable to ufs filesystems.
  */
 static int
 ufs_pathconf(
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		int *a_retval;
 	} */ *ap)
 {
 	int error;
 
 	error = 0;
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = UFS_LINK_MAX;
 		break;
 	case _PC_NAME_MAX:
 		*ap->a_retval = UFS_MAXNAMLEN;
 		break;
 	case _PC_PIPE_BUF:
 		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
 			*ap->a_retval = PIPE_BUF;
 		else
 			error = EINVAL;
 		break;
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		break;
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		break;
 #ifdef UFS_ACL
 	case _PC_ACL_EXTENDED:
 		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 	case _PC_ACL_NFS4:
 		if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 #endif
 	case _PC_ACL_PATH_MAX:
 #ifdef UFS_ACL
 		if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS))
 			*ap->a_retval = ACL_MAX_ENTRIES;
 		else
 			*ap->a_retval = 3;
 #else
 		*ap->a_retval = 3;
 #endif
 		break;
 #ifdef MAC
 	case _PC_MAC_PRESENT:
 		if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 #endif
 	case _PC_MIN_HOLE_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_PRIO_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_SYNC_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ALLOC_SIZE_MIN:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
 		break;
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		break;
 	case _PC_REC_INCR_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_MAX_XFER_SIZE:
 		*ap->a_retval = -1; /* means ``unlimited'' */
 		break;
 	case _PC_REC_MIN_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_XFER_ALIGN:
 		*ap->a_retval = PAGE_SIZE;
 		break;
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		break;
 
 	default:
 		error = vop_stdpathconf(ap);
 		break;
 	}
 	return (error);
 }
 
 /*
  * Initialize the vnode associated with a new inode, handle aliased
  * vnodes.
  */
 int
 ufs_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp)
 {
 	struct inode *ip;
 	struct vnode *vp;
 
 	vp = *vpp;
 	ASSERT_VOP_LOCKED(vp, "ufs_vinit");
 	ip = VTOI(vp);
 	vp->v_type = IFTOVT(ip->i_mode);
 	/*
 	 * Only unallocated inodes should be of type VNON.
 	 */
 	if (ip->i_mode != 0 && vp->v_type == VNON)
 		return (EINVAL);
 	if (vp->v_type == VFIFO)
 		vp->v_op = fifoops;
 	if (ip->i_number == UFS_ROOTINO)
 		vp->v_vflag |= VV_ROOT;
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Allocate a new inode.
  * Vnode dvp must be locked.
  */
 static int
 ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp, const char *callfunc)
 {
 	struct inode *ip, *pdir;
 	struct direct newdir;
 	struct vnode *tvp;
 	int error;
 
 	pdir = VTOI(dvp);
-#ifdef INVARIANTS
-	if ((cnp->cn_flags & HASBUF) == 0)
-		panic("%s: no name", callfunc);
-#endif
 	*vpp = NULL;
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
 	if (pdir->i_effnlink < 2) {
 		print_bad_link_count(callfunc, dvp);
 		return (EINVAL);
 	}
 	if (DOINGSUJ(dvp)) {
 		error = softdep_prelink(dvp, NULL, cnp);
 		if (error != 0) {
 			MPASS(error == ERELOOKUP);
 			return (error);
 		}
 	}
 	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 	if (error)
 		return (error);
 	ip = VTOI(tvp);
 	ip->i_gid = pdir->i_gid;
 	DIP_SET(ip, i_gid, pdir->i_gid);
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		gid_t ucred_group;
 		ucp = cnp->cn_cred;
 #endif
 		/*
 		 * If we are not the owner of the directory,
 		 * and we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * Note that this drops off the execute bits for security.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (pdir->i_mode & ISUID) &&
 		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 			ip->i_uid = pdir->i_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 			mode &= ~07111;
 #ifdef QUOTA
 			/*
 			 * Make sure the correct user gets charged
 			 * for the space.
 			 * Quickly knock up a dummy credential for the victim.
 			 * XXX This seems to never be accessed out of our
 			 * context so a stack variable is ok.
 			 */
 			refcount_init(&ucred.cr_ref, 1);
 			ucred.cr_uid = ip->i_uid;
 			ucred.cr_ngroups = 1;
 			ucred.cr_groups = &ucred_group;
 			ucred.cr_groups[0] = pdir->i_gid;
 			ucp = &ucred;
 #endif
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 		}
 
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			if (DOINGSOFTDEP(tvp))
 				softdep_revert_link(pdir, ip);
 			UFS_VFREE(tvp, ip->i_number, mode);
 			vgone(tvp);
 			vput(tvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 	DIP_SET(ip, i_uid, ip->i_uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_link(pdir, ip);
 		UFS_VFREE(tvp, ip->i_number, mode);
 		vgone(tvp);
 		vput(tvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	vn_seqc_write_begin(tvp); /* Mostly to cover asserts */
 	UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
 	UFS_INODE_SET_MODE(ip, mode);
 	DIP_SET(ip, i_mode, mode);
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 1;
 	ip->i_nlink = 1;
 	DIP_SET(ip, i_nlink, 1);
 	if (DOINGSOFTDEP(tvp))
 		softdep_setup_create(VTOI(dvp), ip);
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) {
 		UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID);
 		DIP_SET(ip, i_mode, ip->i_mode);
 	}
 
 	if (cnp->cn_flags & ISWHITEOUT) {
 		ip->i_flags |= UF_OPAQUE;
 		DIP_SET(ip, i_flags, ip->i_flags);
 	}
 
 	/*
 	 * Make sure inode goes to disk before directory entry.
 	 */
 	error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp));
 	if (error)
 		goto bad;
 #ifdef MAC
 	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 		    dvp, tvp, cnp);
 		if (error)
 			goto bad;
 	}
 #endif
 #ifdef UFS_ACL
 	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 		error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
 		    cnp->cn_cred, curthread);
 		if (error)
 			goto bad;
 	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode,
 		    cnp->cn_cred, curthread);
 		if (error)
 			goto bad;
 	}
 #endif /* !UFS_ACL */
 	ufs_makedirentry(ip, cnp, &newdir);
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
 	if (error)
 		goto bad;
 	vn_seqc_write_end(tvp);
 	*vpp = tvp;
 	return (0);
 
 bad:
 	/*
 	 * Write error occurred trying to update the inode
 	 * or the directory so must deallocate the inode.
 	 */
 	ip->i_effnlink = 0;
 	ip->i_nlink = 0;
 	DIP_SET(ip, i_nlink, 0);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if (DOINGSOFTDEP(tvp))
 		softdep_revert_create(VTOI(dvp), ip);
 	vn_seqc_write_end(tvp);
 	vgone(tvp);
 	vput(tvp);
 	return (error);
 }
 
 static int
 ufs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	int error;
 
 	vp = ap->a_vp;
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 		error = vn_lock(vp, LK_SHARED);
 		if (error == 0) {
 			error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data);
 			VOP_UNLOCK(vp);
 		} else
 			error = EBADF;
 		return (error);
 	case FIOSEEKHOLE:
 		return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data,
 		    ap->a_cred));
 	default:
 		return (ENOTTY);
 	}
 }
 
 static int
 ufs_read_pgcache(struct vop_read_pgcache_args *ap)
 {
 	struct uio *uio;
 	struct vnode *vp;
 
 	uio = ap->a_uio;
 	vp = ap->a_vp;
 	VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp);
 
 	if (uio->uio_resid > ptoa(io_hold_cnt) || uio->uio_offset < 0 ||
 	    (ap->a_ioflag & IO_DIRECT) != 0)
 		return (EJUSTRETURN);
 	return (vn_read_from_obj(vp, uio));
 }
 
 /* Global vfs data structures for ufs. */
 struct vop_vector ufs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_fsync =		VOP_PANIC,
 	.vop_read =		VOP_PANIC,
 	.vop_reallocblks =	VOP_PANIC,
 	.vop_write =		VOP_PANIC,
 	.vop_accessx =		ufs_accessx,
 	.vop_bmap =		ufs_bmap,
 	.vop_fplookup_vexec =	ufs_fplookup_vexec,
 	.vop_fplookup_symlink =	VOP_EAGAIN,
 	.vop_cachedlookup =	ufs_lookup,
 	.vop_close =		ufs_close,
 	.vop_create =		ufs_create,
 	.vop_stat =		ufs_stat,
 	.vop_getattr =		ufs_getattr,
 	.vop_inactive =		ufs_inactive,
 	.vop_ioctl =		ufs_ioctl,
 	.vop_link =		ufs_link,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_mmapped =		ufs_mmapped,
 	.vop_mkdir =		ufs_mkdir,
 	.vop_mknod =		ufs_mknod,
 	.vop_need_inactive =	ufs_need_inactive,
 	.vop_open =		ufs_open,
 	.vop_pathconf =		ufs_pathconf,
 	.vop_poll =		vop_stdpoll,
 	.vop_print =		ufs_print,
 	.vop_read_pgcache =	ufs_read_pgcache,
 	.vop_readdir =		ufs_readdir,
 	.vop_readlink =		ufs_readlink,
 	.vop_reclaim =		ufs_reclaim,
 	.vop_remove =		ufs_remove,
 	.vop_rename =		ufs_rename,
 	.vop_rmdir =		ufs_rmdir,
 	.vop_setattr =		ufs_setattr,
 #ifdef MAC
 	.vop_setlabel =		vop_stdsetlabel_ea,
 #endif
 	.vop_strategy =		ufs_strategy,
 	.vop_symlink =		ufs_symlink,
 	.vop_whiteout =		ufs_whiteout,
 #ifdef UFS_EXTATTR
 	.vop_getextattr =	ufs_getextattr,
 	.vop_deleteextattr =	ufs_deleteextattr,
 	.vop_setextattr =	ufs_setextattr,
 #endif
 #ifdef UFS_ACL
 	.vop_getacl =		ufs_getacl,
 	.vop_setacl =		ufs_setacl,
 	.vop_aclcheck =		ufs_aclcheck,
 #endif
 };
 VFS_VOP_VECTOR_REGISTER(ufs_vnodeops);
 
 struct vop_vector ufs_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_fsync =		VOP_PANIC,
 	.vop_accessx =		ufs_accessx,
 	.vop_close =		ufsfifo_close,
 	.vop_getattr =		ufs_getattr,
 	.vop_inactive =		ufs_inactive,
 	.vop_pathconf = 	ufs_pathconf,
 	.vop_print =		ufs_print,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		ufs_reclaim,
 	.vop_setattr =		ufs_setattr,
 #ifdef MAC
 	.vop_setlabel =		vop_stdsetlabel_ea,
 #endif
 	.vop_write =		VOP_PANIC,
 #ifdef UFS_EXTATTR
 	.vop_getextattr =	ufs_getextattr,
 	.vop_deleteextattr =	ufs_deleteextattr,
 	.vop_setextattr =	ufs_setextattr,
 #endif
 #ifdef UFS_ACL
 	.vop_getacl =		ufs_getacl,
 	.vop_setacl =		ufs_setacl,
 	.vop_aclcheck =		ufs_aclcheck,
 #endif
 };
 VFS_VOP_VECTOR_REGISTER(ufs_fifoops);