diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
index 078ea5e52312..6417b7dac16b 100644
--- a/sys/fs/msdosfs/msdosfs_vnops.c
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -1,2008 +1,2010 @@
 /*	$NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*-
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/clock.h>
 #include <sys/dirent.h>
 #include <sys/lock.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vnode_pager.h>
 
 #include <fs/msdosfs/bpb.h>
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 #include <fs/msdosfs/fat.h>
 #include <fs/msdosfs/msdosfsmount.h>
 
 /*
  * Prototypes for MSDOSFS vnode operations
  */
 static vop_create_t	msdosfs_create;
 static vop_mknod_t	msdosfs_mknod;
 static vop_open_t	msdosfs_open;
 static vop_close_t	msdosfs_close;
 static vop_access_t	msdosfs_access;
 static vop_getattr_t	msdosfs_getattr;
 static vop_setattr_t	msdosfs_setattr;
 static vop_read_t	msdosfs_read;
 static vop_write_t	msdosfs_write;
 static vop_fsync_t	msdosfs_fsync;
 static vop_remove_t	msdosfs_remove;
 static vop_link_t	msdosfs_link;
 static vop_rename_t	msdosfs_rename;
 static vop_mkdir_t	msdosfs_mkdir;
 static vop_rmdir_t	msdosfs_rmdir;
 static vop_symlink_t	msdosfs_symlink;
 static vop_readdir_t	msdosfs_readdir;
 static vop_bmap_t	msdosfs_bmap;
 static vop_getpages_t	msdosfs_getpages;
 static vop_strategy_t	msdosfs_strategy;
 static vop_print_t	msdosfs_print;
 static vop_pathconf_t	msdosfs_pathconf;
 static vop_vptofh_t	msdosfs_vptofh;
 
 /*
  * Some general notes:
  *
  * In the ufs filesystem the inodes, superblocks, and indirect blocks are
  * read/written using the vnode for the filesystem. Blocks that represent
  * the contents of a file are read/written using the vnode for the file
  * (including directories when they are read/written as files). This
  * presents problems for the dos filesystem because data that should be in
  * an inode (if dos had them) resides in the directory itself.  Since we
  * must update directory entries without the benefit of having the vnode
  * for the directory we must use the vnode for the filesystem.  This means
  * that when a directory is actually read/written (via read, write, or
  * readdir, or seek) we must use the vnode for the filesystem instead of
  * the vnode for the directory as would happen in ufs. This is to insure we
  * retrieve the correct block from the buffer cache since the hash value is
  * based upon the vnode address and the desired block number.
  */
 
 /*
  * Create a regular file. On entry the directory to contain the file being
  * created is locked.  We must release before we return. We must also free
  * the pathname buffer pointed at by cnp->cn_pnbuf, always on error.
  */
 static int
 msdosfs_create(struct vop_create_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct denode ndirent;
 	struct denode *dep;
 	struct denode *pdep = VTODE(ap->a_dvp);
 	struct timespec ts;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap);
 #endif
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad;
 	}
 
 	/*
 	 * Create a directory entry for the file, then call createde() to
 	 * have it installed. NOTE: DOS files are always executable.  We
 	 * use the absence of the owner write bit to make the file
 	 * readonly.
 	 */
 	memset(&ndirent, 0, sizeof(ndirent));
 	error = uniqdosname(pdep, cnp, ndirent.de_Name);
 	if (error)
 		goto bad;
 
 	ndirent.de_Attributes = ATTR_ARCHIVE;
 	ndirent.de_LowerCase = 0;
 	ndirent.de_StartCluster = 0;
 	ndirent.de_FileSize = 0;
 	ndirent.de_pmp = pdep->de_pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	vfs_timestamp(&ts);
 	DETIMES(&ndirent, &ts, &ts, &ts);
 	error = createde(&ndirent, pdep, &dep, cnp);
 	if (error)
 		goto bad;
 	*ap->a_vpp = DETOV(dep);
 	if ((cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
 	return (0);
 
 bad:
 	return (error);
 }
 
 static int
 msdosfs_mknod(struct vop_mknod_args *ap)
 {
 
     return (EINVAL);
 }
 
 static int
 msdosfs_open(struct vop_open_args *ap)
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	vnode_create_vobject(ap->a_vp, dep->de_FileSize, ap->a_td);
 	return 0;
 }
 
 static int
 msdosfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 	struct timespec ts;
 
 	VI_LOCK(vp);
 	if (vp->v_usecount > 1) {
 		vfs_timestamp(&ts);
 		DETIMES(dep, &ts, &ts, &ts);
 	}
 	VI_UNLOCK(vp);
 	return 0;
 }
 
 static int
 msdosfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	mode_t file_mode;
 	accmode_t accmode = ap->a_accmode;
 
 	file_mode = S_IRWXU|S_IRWXG|S_IRWXO;
 	file_mode &= (vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
 
 	/*
 	 * Disallow writing to directories and regular files if the
 	 * filesystem is read-only.
 	 */
 	if (accmode & VWRITE) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 	}
 
 	return (vaccess(vp->v_type, file_mode, pmp->pm_uid, pmp->pm_gid,
 	    ap->a_accmode, ap->a_cred));
 }
 
 static int
 msdosfs_getattr(struct vop_getattr_args *ap)
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	mode_t mode;
 	struct timespec ts;
 	u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
 	uint64_t fileid;
 
 	vfs_timestamp(&ts);
 	DETIMES(dep, &ts, &ts, &ts);
 	vap->va_fsid = dev2udev(pmp->pm_dev);
 	/*
 	 * The following computation of the fileid must be the same as that
 	 * used in msdosfs_readdir() to compute d_fileno. If not, pwd
 	 * doesn't work.
 	 */
 	if (dep->de_Attributes & ATTR_DIRECTORY) {
 		fileid = (uint64_t)cntobn(pmp, dep->de_StartCluster) *
 		    dirsperblk;
 		if (dep->de_StartCluster == MSDOSFSROOT)
 			fileid = 1;
 	} else {
 		fileid = (uint64_t)cntobn(pmp, dep->de_dirclust) *
 		    dirsperblk;
 		if (dep->de_dirclust == MSDOSFSROOT)
 			fileid = (uint64_t)roottobn(pmp, 0) * dirsperblk;
 		fileid += (uoff_t)dep->de_diroffset / sizeof(struct direntry);
 	}
 	vap->va_fileid = fileid;
 
 	mode = S_IRWXU|S_IRWXG|S_IRWXO;
 	if (dep->de_Attributes & ATTR_READONLY)
 		mode &= ~(S_IWUSR|S_IWGRP|S_IWOTH);
 	vap->va_mode = mode &
 	    (ap->a_vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
 	vap->va_uid = pmp->pm_uid;
 	vap->va_gid = pmp->pm_gid;
 	vap->va_nlink = 1;
 	vap->va_rdev = NODEV;
 	vap->va_size = dep->de_FileSize;
 	fattime2timespec(dep->de_MDate, dep->de_MTime, 0, 0, &vap->va_mtime);
 	vap->va_ctime = vap->va_mtime;
 	if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) {
 		fattime2timespec(dep->de_ADate, 0, 0, 0, &vap->va_atime);
 		fattime2timespec(dep->de_CDate, dep->de_CTime, dep->de_CHun,
 		    0, &vap->va_birthtime);
 	} else {
 		vap->va_atime = vap->va_mtime;
 		vap->va_birthtime.tv_sec = -1;
 		vap->va_birthtime.tv_nsec = 0;
 	}
 	vap->va_flags = 0;
 	if (dep->de_Attributes & ATTR_ARCHIVE)
 		vap->va_flags |= UF_ARCHIVE;
 	if (dep->de_Attributes & ATTR_HIDDEN)
 		vap->va_flags |= UF_HIDDEN;
 	if (dep->de_Attributes & ATTR_READONLY)
 		vap->va_flags |= UF_READONLY;
 	if (dep->de_Attributes & ATTR_SYSTEM)
 		vap->va_flags |= UF_SYSTEM;
 	vap->va_gen = 0;
 	vap->va_blocksize = pmp->pm_bpcluster;
 	if (dep->de_StartCluster != MSDOSFSROOT)
 		vap->va_bytes =
 		    (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask;
 	else
 		vap->va_bytes = 0; /* FAT12/FAT16 root dir in reserved area */
 	vap->va_type = ap->a_vp->v_type;
 	vap->va_filerev = dep->de_modrev;
 	return (0);
 }
 
 static int
 msdosfs_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	int error = 0;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_setattr(): vp %p, vap %p, cred %p\n",
 	    ap->a_vp, vap, cred);
 #endif
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 #ifdef MSDOSFS_DEBUG
 		printf("msdosfs_setattr(): returning EINVAL\n");
 		printf("    va_type %d, va_nlink %llx, va_fsid %llx, va_fileid %llx\n",
 		    vap->va_type, (unsigned long long)vap->va_nlink,
 		    (unsigned long long)vap->va_fsid,
 		    (unsigned long long)vap->va_fileid);
 		printf("    va_blocksize %lx, va_rdev %llx, va_bytes %llx, va_gen %lx\n",
 		    vap->va_blocksize, (unsigned long long)vap->va_rdev,
 		    (unsigned long long)vap->va_bytes, vap->va_gen);
 		printf("    va_uid %x, va_gid %x\n",
 		    vap->va_uid, vap->va_gid);
 #endif
 		return (EINVAL);
 	}
 
 	/*
 	 * We don't allow setting attributes on the root directory.
 	 * The special case for the root directory is because before
 	 * FAT32, the root directory didn't have an entry for itself
 	 * (and was otherwise special).  With FAT32, the root
 	 * directory is not so special, but still doesn't have an
 	 * entry for itself.
 	 */
 	if (vp->v_vflag & VV_ROOT)
 		return (EINVAL);
 
 	if (vap->va_flags != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid) {
 			error = priv_check_cred(cred, PRIV_VFS_ADMIN);
 			if (error)
 				return (error);
 		}
 		/*
 		 * We are very inconsistent about handling unsupported
 		 * attributes.  We ignored the access time and the
 		 * read and execute bits.  We were strict for the other
 		 * attributes.
 		 */
 		if (vap->va_flags & ~(UF_ARCHIVE | UF_HIDDEN | UF_READONLY |
 		    UF_SYSTEM))
 			return EOPNOTSUPP;
 		if (vap->va_flags & UF_ARCHIVE)
 			dep->de_Attributes |= ATTR_ARCHIVE;
 		else
 			dep->de_Attributes &= ~ATTR_ARCHIVE;
 		if (vap->va_flags & UF_HIDDEN)
 			dep->de_Attributes |= ATTR_HIDDEN;
 		else
 			dep->de_Attributes &= ~ATTR_HIDDEN;
 		/* We don't allow changing the readonly bit on directories. */
 		if (vp->v_type != VDIR) {
 			if (vap->va_flags & UF_READONLY)
 				dep->de_Attributes |= ATTR_READONLY;
 			else
 				dep->de_Attributes &= ~ATTR_READONLY;
 		}
 		if (vap->va_flags & UF_SYSTEM)
 			dep->de_Attributes |= ATTR_SYSTEM;
 		else
 			dep->de_Attributes &= ~ATTR_SYSTEM;
 		dep->de_flag |= DE_MODIFIED;
 	}
 
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		uid_t uid;
 		gid_t gid;
 
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		uid = vap->va_uid;
 		if (uid == (uid_t)VNOVAL)
 			uid = pmp->pm_uid;
 		gid = vap->va_gid;
 		if (gid == (gid_t)VNOVAL)
 			gid = pmp->pm_gid;
 		if (cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
 		    (gid != pmp->pm_gid && !groupmember(gid, cred))) {
 			error = priv_check_cred(cred, PRIV_VFS_CHOWN);
 			if (error)
 				return (error);
 		}
 		if (uid != pmp->pm_uid || gid != pmp->pm_gid)
 			return EINVAL;
 	}
 
 	if (vap->va_size != VNOVAL) {
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VREG:
 			/*
 			 * Truncation is only supported for regular files,
 			 * Disallow it if the filesystem is read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			/*
 			 * According to POSIX, the result is unspecified
 			 * for file types other than regular files,
 			 * directories and shared memory objects.  We
 			 * don't support any file types except regular
 			 * files and directories in this file system, so
 			 * this (default) case is unreachable and can do
 			 * anything.  Keep falling through to detrunc()
 			 * for now.
 			 */
 			break;
 		}
 		error = vn_rlimit_trunc(vap->va_size, td);
 		if (error != 0)
 			return (error);
 		error = detrunc(dep, vap->va_size, 0, cred);
 		if (error)
 			return error;
 	}
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		error = vn_utimes_perm(vp, vap, cred, td);
 		if (error != 0)
 			return (error);
 		if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 &&
 		    vap->va_atime.tv_sec != VNOVAL) {
 			dep->de_flag &= ~DE_ACCESS;
 			timespec2fattime(&vap->va_atime, 0,
 			    &dep->de_ADate, NULL, NULL);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			dep->de_flag &= ~DE_UPDATE;
 			timespec2fattime(&vap->va_mtime, 0,
 			    &dep->de_MDate, &dep->de_MTime, NULL);
 		}
 		/*
 		 * We don't set the archive bit when modifying the time of
 		 * a directory to emulate the Windows/DOS behavior.
 		 */
 		if (vp->v_type != VDIR)
 			dep->de_Attributes |= ATTR_ARCHIVE;
 		dep->de_flag |= DE_MODIFIED;
 	}
 	/*
 	 * DOS files only have the ability to have their writability
 	 * attribute set, so we use the owner write bit to set the readonly
 	 * attribute.
 	 */
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid) {
 			error = priv_check_cred(cred, PRIV_VFS_ADMIN);
 			if (error)
 				return (error);
 		}
 		if (vp->v_type != VDIR) {
 			/* We ignore the read and execute bits. */
 			if (vap->va_mode & S_IWUSR)
 				dep->de_Attributes &= ~ATTR_READONLY;
 			else
 				dep->de_Attributes |= ATTR_READONLY;
 			dep->de_Attributes |= ATTR_ARCHIVE;
 			dep->de_flag |= DE_MODIFIED;
 		}
 	}
 	return (deupdat(dep, 0));
 }
 
 static int
 msdosfs_read(struct vop_read_args *ap)
 {
 	int error = 0;
 	int blsize;
 	int isadir;
 	ssize_t orig_resid;
 	u_int n;
 	u_long diff;
 	u_long on;
 	daddr_t lbn;
 	daddr_t rablock;
 	int rasize;
 	int seqcount;
 	struct buf *bp;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct uio *uio = ap->a_uio;
 
 	/*
 	 * If they didn't ask for any data, then we are done.
 	 */
 	orig_resid = uio->uio_resid;
 	if (orig_resid == 0)
 		return (0);
 
 	/*
 	 * The caller is supposed to ensure that
 	 * uio->uio_offset >= 0 and uio->uio_resid >= 0.
 	 * We don't need to check for large offsets as in ffs because
 	 * dep->de_FileSize <= MSDOSFS_FILESIZE_MAX < OFF_MAX, so large
 	 * offsets cannot cause overflow even in theory.
 	 */
 
 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 
 	isadir = dep->de_Attributes & ATTR_DIRECTORY;
 	do {
 		if (uio->uio_offset >= dep->de_FileSize)
 			break;
 		lbn = de_cluster(pmp, uio->uio_offset);
 		rablock = lbn + 1;
 		blsize = pmp->pm_bpcluster;
 		on = uio->uio_offset & pmp->pm_crbomask;
 		/*
 		 * If we are operating on a directory file then be sure to
 		 * do i/o with the vnode for the filesystem instead of the
 		 * vnode for the directory.
 		 */
 		if (isadir) {
 			/* convert cluster # to block # */
 			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
 			if (error == E2BIG) {
 				error = EINVAL;
 				break;
 			} else if (error)
 				break;
 			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
 		} else if (de_cn2off(pmp, rablock) >= dep->de_FileSize) {
 			error = bread(vp, lbn, blsize, NOCRED, &bp);
 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			error = cluster_read(vp, dep->de_FileSize, lbn, blsize,
 			    NOCRED, on + uio->uio_resid, seqcount, 0, &bp);
 		} else if (seqcount > 1) {
 			rasize = blsize;
 			error = breadn(vp, lbn,
 			    blsize, &rablock, &rasize, 1, NOCRED, &bp);
 		} else {
 			error = bread(vp, lbn, blsize, NOCRED, &bp);
 		}
 		if (error) {
 			brelse(bp);
 			break;
 		}
 		diff = pmp->pm_bpcluster - on;
 		n = diff > uio->uio_resid ? uio->uio_resid : diff;
 		diff = dep->de_FileSize - uio->uio_offset;
 		if (diff < n)
 			n = diff;
 		diff = blsize - bp->b_resid;
 		if (diff < n)
 			n = diff;
 		error = vn_io_fault_uiomove(bp->b_data + on, (int) n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
 	if (!isadir && (error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		dep->de_flag |= DE_ACCESS;
 	return (error);
 }
 
 /*
  * Write data to a file or directory.
  */
 static int
 msdosfs_write(struct vop_write_args *ap)
 {
 	int n;
 	int croffset;
 	ssize_t resid, r;
 	u_long osize;
 	int error = 0;
 	u_long count;
 	int seqcount;
 	daddr_t bn, lastcn;
 	struct buf *bp;
 	int ioflag = ap->a_ioflag;
 	struct uio *uio = ap->a_uio;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *thisvp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct ucred *cred = ap->a_cred;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n",
 	    vp, uio, ioflag, cred);
 	printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n",
 	    dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster);
 #endif
 
 	switch (vp->v_type) {
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = dep->de_FileSize;
 		thisvp = vp;
 		break;
 	case VDIR:
 		return EISDIR;
 	default:
 		panic("msdosfs_write(): bad file type");
 	}
 
 	/*
 	 * This is needed (unlike in ffs_write()) because we extend the
 	 * file outside of the loop but we don't want to extend the file
 	 * for writes of 0 bytes.
 	 */
 	if (uio->uio_resid == 0)
 		return (0);
 
 	/*
 	 * The caller is supposed to ensure that
 	 * uio->uio_offset >= 0 and uio->uio_resid >= 0.
 	 *
 	 * If they've exceeded their filesize limit, tell them about it.
 	 */
 	error = vn_rlimit_fsizex(vp, uio, MSDOSFS_FILESIZE_MAX, &r,
 	    uio->uio_td);
 	if (error != 0) {
 		vn_rlimit_fsizex_res(uio, r);
 		return (error);
 	}
 
 	/*
 	 * If the offset we are starting the write at is beyond the end of
 	 * the file, then they've done a seek.  Unix filesystems allow
 	 * files with holes in them, DOS doesn't so we must fill the hole
 	 * with zeroed blocks.
 	 */
 	if (uio->uio_offset > dep->de_FileSize) {
 		error = deextend(dep, uio->uio_offset, cred);
 		if (error != 0) {
 			vn_rlimit_fsizex_res(uio, r);
 			return (error);
 		}
 	}
 
 	/*
 	 * Remember some values in case the write fails.
 	 */
 	resid = uio->uio_resid;
 	osize = dep->de_FileSize;
 
 	/*
 	 * If we write beyond the end of the file, extend it to its ultimate
 	 * size ahead of the time to hopefully get a contiguous area.
 	 */
 	if (uio->uio_offset + resid > osize) {
 		count = de_clcount(pmp, uio->uio_offset + resid) -
 			de_clcount(pmp, osize);
 		error = extendfile(dep, count, NULL, NULL, 0);
 		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
 			goto errexit;
 		lastcn = dep->de_fc[FC_LASTFC].fc_frcn;
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
 	seqcount = ioflag >> IO_SEQSHIFT;
 	do {
 		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
 			error = ENOSPC;
 			break;
 		}
 
 		croffset = uio->uio_offset & pmp->pm_crbomask;
 		n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
 		if (uio->uio_offset + n > dep->de_FileSize) {
 			dep->de_FileSize = uio->uio_offset + n;
 			/* The object size needs to be set before buffer is allocated */
 			vnode_pager_setsize(vp, dep->de_FileSize);
 		}
 
 		bn = de_cluster(pmp, uio->uio_offset);
 		if ((uio->uio_offset & pmp->pm_crbomask) == 0
 		    && (de_cluster(pmp, uio->uio_offset + uio->uio_resid)
 			> de_cluster(pmp, uio->uio_offset)
 			|| uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
 			/*
 			 * If either the whole cluster gets written,
 			 * or we write the cluster from its start beyond EOF,
 			 * then no need to read data from disk.
 			 */
 			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0);
 			/*
 			 * This call to vfs_bio_clrbuf() ensures that
 			 * even if vn_io_fault_uiomove() below faults,
 			 * garbage from the newly instantiated buffer
 			 * is not exposed to the userspace via mmap().
 			 */
 			vfs_bio_clrbuf(bp);
 			/*
 			 * Do the bmap now, since pcbmap needs buffers
 			 * for the FAT table. (see msdosfs_strategy)
 			 */
 			if (bp->b_blkno == bp->b_lblkno) {
 				error = pcbmap(dep, bp->b_lblkno, &bn, 0, 0);
 				if (error)
 					bp->b_blkno = -1;
 				else
 					bp->b_blkno = bn;
 			}
 			if (bp->b_blkno == -1) {
 				brelse(bp);
 				if (!error)
 					error = EIO;		/* XXX */
 				break;
 			}
 		} else {
 			/*
 			 * The block we need to write into exists, so read it in.
 			 */
 			error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp);
 			if (error) {
 				break;
 			}
 		}
 
 		/*
 		 * Should these vnode_pager_* functions be done on dir
 		 * files?
 		 */
 
 		/*
 		 * Copy the data from user space into the buf header.
 		 */
 		error = vn_io_fault_uiomove(bp->b_data + croffset, n, uio);
 		if (error) {
 			brelse(bp);
 			break;
 		}
 
 		/* Prepare for clustered writes in some else clauses. */
 		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
 			bp->b_flags |= B_CLUSTEROK;
 
 		/*
 		 * If IO_SYNC, then each buffer is written synchronously.
 		 * Otherwise, if we have a severe page deficiency then
 		 * write the buffer asynchronously.  Otherwise, if on a
 		 * cluster boundary then write the buffer asynchronously,
 		 * combining it with contiguous clusters if permitted and
 		 * possible, since we don't expect more writes into this
 		 * buffer soon.  Otherwise, do a delayed write because we
 		 * expect more writes into this buffer soon.
 		 */
 		if (ioflag & IO_SYNC)
 			(void)bwrite(bp);
 		else if (vm_page_count_severe() || buf_dirty_count_severe())
 			bawrite(bp);
 		else if (n + croffset == pmp->pm_bpcluster) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
 				cluster_write(vp, &dep->de_clusterw, bp,
 				    dep->de_FileSize, seqcount, 0);
 			else
 				bawrite(bp);
 		} else
 			bdwrite(bp);
 		dep->de_flag |= DE_UPDATE;
 	} while (error == 0 && uio->uio_resid > 0);
 
 	/*
 	 * If the write failed and they want us to, truncate the file back
 	 * to the size it was before the write was attempted.
 	 */
 errexit:
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			detrunc(dep, osize, ioflag & IO_SYNC, NOCRED);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		} else {
 			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED);
 			if (uio->uio_resid != resid)
 				error = 0;
 		}
 	} else if (ioflag & IO_SYNC)
 		error = deupdat(dep, 1);
 	vn_rlimit_fsizex_res(uio, r);
 	return (error);
 }
 
 /*
  * Flush the blocks of a file to disk.
  */
 static int
 msdosfs_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *devvp;
 	int allerror, error;
 
 	vop_stdfsync(ap);
 
 	/*
 	* If the syncing request comes from fsync(2), sync the entire
 	* FAT and any other metadata that happens to be on devvp.  We
 	* need this mainly for the FAT.  We write the FAT sloppily, and
 	* syncing it all now is the best we can easily do to get all
 	* directory entries associated with the file (not just the file)
 	* fully synced.  The other metadata includes critical metadata
 	* for all directory entries, but only in the MNT_ASYNC case.  We
 	* will soon sync all metadata in the file's directory entry.
 	* Non-critical metadata for associated directory entries only
 	* gets synced accidentally, as in most file systems.
 	*/
 	if (ap->a_waitfor != MNT_NOWAIT) {
 		devvp = VTODE(ap->a_vp)->de_pmp->pm_devvp;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		allerror = VOP_FSYNC(devvp, MNT_WAIT, ap->a_td);
 		VOP_UNLOCK(devvp);
 	} else
 		allerror = 0;
 
 	error = deupdat(VTODE(ap->a_vp), ap->a_waitfor != MNT_NOWAIT);
 	if (allerror == 0)
 		allerror = error;
 	return (allerror);
 }
 
 static int
 msdosfs_remove(struct vop_remove_args *ap)
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	struct denode *ddep = VTODE(ap->a_dvp);
 	int error;
 
 	if (ap->a_vp->v_type == VDIR)
 		error = EPERM;
 	else
 		error = removede(ddep, dep);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount);
 #endif
 	return (error);
 }
 
 /*
  * DOS filesystems don't know what links are.
  */
 static int
 msdosfs_link(struct vop_link_args *ap)
 {
 	return (EOPNOTSUPP);
 }
 
 /*
  * Renames on files require moving the denode to a new hash queue since the
  * denode's location is used to compute which hash queue to put the file
  * in. Unless it is a rename in place.  For example "mv a b".
  *
  * What follows is the basic algorithm:
  *
  * if (file move) {
  *	if (dest file exists) {
  *		remove dest file
  *	}
  *	if (dest and src in same directory) {
  *		rewrite name in existing directory slot
  *	} else {
  *		write new entry in dest directory
  *		update offset and dirclust in denode
  *		move denode to new hash chain
  *		clear old directory entry
  *	}
  * } else {
  *	directory move
  *	if (dest directory exists) {
  *		if (dest is not empty) {
  *			return ENOTEMPTY
  *		}
  *		remove dest directory
  *	}
  *	if (dest and src in same directory) {
  *		rewrite name in existing entry
  *	} else {
  *		be sure dest is not a child of src directory
  *		write entry in dest directory
  *		update "." and ".." in moved directory
  *		clear old directory entry for moved directory
  *	}
  * }
  *
  * On entry:
  *	source's parent directory is unlocked
  *	source file or directory is unlocked
  *	destination's parent directory is locked
  *	destination file or directory is locked if it exists
  *
  * On exit:
  *	all denodes should be released
  */
 static int
 msdosfs_rename(struct vop_rename_args *ap)
 {
 	struct vnode *fdvp, *fvp, *tdvp, *tvp, *vp;
 	struct componentname *fcnp, *tcnp;
 	struct denode *fdip, *fip, *tdip, *tip, *nip;
 	u_char toname[12], oldname[11];
 	u_long to_diroffset;
 	bool checkpath_locked, doingdirectory, newparent;
 	int error;
 	u_long cn, pcl, blkoff;
 	daddr_t bn, wait_scn, scn;
 	struct msdosfsmount *pmp;
 	struct direntry *dotdotp;
 	struct buf *bp;
 
 	tdvp = ap->a_tdvp;
 	fvp = ap->a_fvp;
 	fdvp = ap->a_fdvp;
 	tvp = ap->a_tvp;
 	tcnp = ap->a_tcnp;
 	fcnp = ap->a_fcnp;
 	pmp = VFSTOMSDOSFS(fdvp->v_mount);
 
 	/*
 	 * Check for cross-device rename.
 	 */
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
 		error = EXDEV;
 		goto abortit;
 	}
 
 	/*
 	 * If source and dest are the same, do nothing.
 	 */
 	if (tvp == fvp) {
 		error = 0;
 		goto abortit;
 	}
 
 	/*
 	 * When the target exists, both the directory
 	 * and target vnodes are passed locked.
 	 */
 	VOP_UNLOCK(tdvp);
 	if (tvp != NULL && tvp != tdvp)
 		VOP_UNLOCK(tvp);
 
 	checkpath_locked = false;
 
 relock:
 	doingdirectory = newparent = false;
 
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error != 0)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(tdvp);
 		goto relock;
 	}
 
 	error = msdosfs_lookup_ino(fdvp, NULL, fcnp, &scn, &blkoff);
 	if (error != 0) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		goto releout;
 	}
 	error = deget(pmp, scn, blkoff, LK_EXCLUSIVE | LK_NOWAIT, &nip);
 	if (error != 0) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if (error != EBUSY)
 			goto releout;
 		error = deget(pmp, scn, blkoff, LK_EXCLUSIVE, &nip);
 		if (error != 0)
 			goto releout;
 		vp = fvp;
 		fvp = DETOV(nip);
 		VOP_UNLOCK(fvp);
 		vrele(vp);
 		goto relock;
 	}
 	vrele(fvp);
 	fvp = DETOV(nip);
 
 	error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
 	if (error != 0 && error != EJUSTRETURN) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		VOP_UNLOCK(fvp);
 		goto releout;
 	}
 	if (error == EJUSTRETURN && tvp != NULL) {
 		vrele(tvp);
 		tvp = NULL;
 	}
 	if (error == 0) {
 		nip = NULL;
 		error = deget(pmp, scn, blkoff, LK_EXCLUSIVE | LK_NOWAIT,
 		    &nip);
 		if (tvp != NULL) {
 			vrele(tvp);
 			tvp = NULL;
 		}
 		if (error != 0) {
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			VOP_UNLOCK(fvp);
 			if (error != EBUSY)
 				goto releout;
 			error = deget(pmp, scn, blkoff, LK_EXCLUSIVE,
 			    &nip);
 			if (error != 0)
 				goto releout;
 			vput(DETOV(nip));
 			goto relock;
 		}
 		tvp = DETOV(nip);
 	}
 
 	fdip = VTODE(fdvp);
 	fip = VTODE(fvp);
 	tdip = VTODE(tdvp);
 	tip = tvp != NULL ? VTODE(tvp) : NULL;
 
 	/*
 	 * Remember direntry place to use for destination
 	 */
 	to_diroffset = tdip->de_fndoffset;
 
 	/*
 	 * Be sure we are not renaming ".", "..", or an alias of ".". This
 	 * leads to a crippled directory tree.  It's pretty tough to do a
 	 * "ls" or "pwd" with the "." directory entry missing, and "cd .."
 	 * doesn't work if the ".." entry is missing.
 	 */
 	if ((fip->de_Attributes & ATTR_DIRECTORY) != 0) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    fdip == fip ||
 		    (fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (tcnp->cn_flags & ISDOTDOT) != 0) {
 			error = EINVAL;
 			goto unlock;
 		}
 		doingdirectory = true;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory hierarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". We must repeat the call
 	 * to namei, as the parent directory is unlocked by the
 	 * call to doscheckpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread);
 	if (fdip->de_StartCluster != tdip->de_StartCluster)
 		newparent = true;
 	if (doingdirectory && newparent) {
 		if (error != 0)	/* write access check above */
 			goto unlock;
 		lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL);
 		checkpath_locked = true;
 		error = doscheckpath(fip, tdip, &wait_scn);
 		if (wait_scn != 0) {
 			lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
 			checkpath_locked = false;
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			VOP_UNLOCK(fvp);
 			if (tvp != NULL && tvp != tdvp)
 				VOP_UNLOCK(tvp);
 			error = deget(pmp, wait_scn, 0, LK_EXCLUSIVE,
 			    &nip);
 			if (error == 0) {
 				vput(DETOV(nip));
 				goto relock;
 			}
 		}
 		if (error != 0)
 			goto unlock;
 	}
 
 	if (tip != NULL) {
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((tip->de_Attributes & ATTR_DIRECTORY) != 0) {
 			if (!dosdirempty(tip)) {
 				error = ENOTEMPTY;
 				goto unlock;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto unlock;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto unlock;
 		}
 		error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
 		MPASS(error == 0);
 		error = removede(tdip, tip);
 		if (error != 0)
 			goto unlock;
 		vput(tvp);
 		tvp = NULL;
 		tip = NULL;
 	}
 
 	/*
 	 * Convert the filename in tcnp into a dos filename. We copy this
 	 * into the denode and directory entry for the destination
 	 * file/directory.
 	 */
 	error = uniqdosname(tdip, tcnp, toname);
 	if (error != 0)
 		goto unlock;
 
 	/*
 	 * First write a new entry in the destination
 	 * directory and mark the entry in the source directory
 	 * as deleted.  Then move the denode to the correct hash
 	 * chain for its new location in the filesystem.  And, if
 	 * we moved a directory, then update its .. entry to point
 	 * to the new parent directory.
 	 */
 	memcpy(oldname, fip->de_Name, 11);
 	memcpy(fip->de_Name, toname, 11);	/* update denode */
 	error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
 	if (error == EJUSTRETURN) {
 		tdip->de_fndoffset = to_diroffset;
 		error = createde(fip, tdip, NULL, tcnp);
 	}
 	if (error != 0) {
 		memcpy(fip->de_Name, oldname, 11);
 		goto unlock;
 	}
 
 	/*
 	 * If fip is for a directory, then its name should always
 	 * be "." since it is for the directory entry in the
 	 * directory itself (msdosfs_lookup() always translates
 	 * to the "." entry so as to get a unique denode, except
 	 * for the root directory there are different
 	 * complications).  However, we just corrupted its name
 	 * to pass the correct name to createde().  Undo this.
 	 */
 	if ((fip->de_Attributes & ATTR_DIRECTORY) != 0)
 		memcpy(fip->de_Name, oldname, 11);
 	fip->de_refcnt++;
 	error = msdosfs_lookup_ino(fdvp, NULL, fcnp, &scn, &blkoff);
 	MPASS(error == 0);
 	error = removede(fdip, fip);
 	if (error != 0) {
 		printf("%s: removede %s %s err %d\n",
 		    pmp->pm_mountp->mnt_stat.f_mntonname,
 		    fdip->de_Name, fip->de_Name, error);
 		msdosfs_integrity_error(pmp);
 		goto unlock;
 	}
 	if (!doingdirectory) {
 		error = pcbmap(tdip, de_cluster(pmp, to_diroffset), 0,
 		    &fip->de_dirclust, 0);
 		if (error != 0) {
 			/*
 			 * XXX should downgrade to ro here,
 			 * fs is corrupt
 			 */
 			goto unlock;
 		}
 		if (fip->de_dirclust == MSDOSFSROOT)
 			fip->de_diroffset = to_diroffset;
 		else
 			fip->de_diroffset = to_diroffset & pmp->pm_crbomask;
 	}
 	reinsert(fip);
 
 	/*
 	 * If we moved a directory to a new parent directory, then we must
 	 * fixup the ".." entry in the moved directory.
 	 */
 	if (doingdirectory && newparent) {
 		cn = fip->de_StartCluster;
 		if (cn == MSDOSFSROOT) {
 			/* this should never happen */
 			panic("msdosfs_rename(): updating .. in root directory?");
 		} else
 			bn = cntobn(pmp, cn);
 		error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
 		    NOCRED, &bp);
 		if (error != 0) {
 			printf("%s: block read error %d while renaming dir\n",
 			    pmp->pm_mountp->mnt_stat.f_mntonname,
 			    error);
 			msdosfs_integrity_error(pmp);
 			goto unlock;
 		}
 		dotdotp = (struct direntry *)bp->b_data + 1;
 		pcl = tdip->de_StartCluster;
 		if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
 			pcl = MSDOSFSROOT;
 		putushort(dotdotp->deStartCluster, pcl);
 		if (FAT32(pmp))
 			putushort(dotdotp->deHighClust, pcl >> 16);
 		if (DOINGASYNC(fvp))
 			bdwrite(bp);
 		else if ((error = bwrite(bp)) != 0) {
 			printf("%s: block write error %d while renaming dir\n",
 			    pmp->pm_mountp->mnt_stat.f_mntonname,
 			    error);
 			msdosfs_integrity_error(pmp);
 			goto unlock;
 		}
 	}
 
 	/*
 	 * The msdosfs lookup is case insensitive. Several aliases may
 	 * be inserted for a single directory entry. As a consequnce,
 	 * name cache purge done by lookup for fvp when DELETE op for
 	 * namei is specified, might be not enough to expunge all
 	 * namecache entries that were installed for this direntry.
 	 */
 	cache_purge(fvp);
 
 unlock:
 	if (checkpath_locked)
 		lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
 	vput(fdvp);
 	vput(fvp);
 	if (tvp != NULL) {
 		if (tvp != tdvp)
 			vput(tvp);
 		else
 			vrele(tvp);
 	}
 	vput(tdvp);
 	return (error);
 releout:
 	MPASS(!checkpath_locked);
 	vrele(tdvp);
 	if (tvp != NULL)
 		vrele(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	return (error);
 abortit:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp != NULL)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	return (error);
 }
 
 static struct {
 	struct direntry dot;
 	struct direntry dotdot;
 } dosdirtemplate = {
 	{	".          ",				/* the . entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,					/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 }				/* filesize */
 	},
 	{	"..         ",				/* the .. entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,					/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 }				/* filesize */
 	}
 };
 
 static int
 msdosfs_mkdir(struct vop_mkdir_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct denode *dep;
 	struct denode *pdep = VTODE(ap->a_dvp);
 	struct direntry *denp;
 	struct msdosfsmount *pmp = pdep->de_pmp;
 	struct buf *bp;
 	u_long newcluster, pcl;
 	int bn;
 	int error;
 	struct denode ndirent;
 	struct timespec ts;
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad2;
 	}
 
 	/*
 	 * Allocate a cluster to hold the about to be created directory.
 	 */
 	error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL);
 	if (error)
 		goto bad2;
 
 	memset(&ndirent, 0, sizeof(ndirent));
 	ndirent.de_pmp = pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	vfs_timestamp(&ts);
 	DETIMES(&ndirent, &ts, &ts, &ts);
 
 	/*
 	 * Now fill the cluster with the "." and ".." entries. And write
 	 * the cluster to disk.  This way it is there for the parent
 	 * directory to be pointing at if there were a crash.
 	 */
 	bn = cntobn(pmp, newcluster);
 	/* always succeeds */
 	bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0, 0);
 	memset(bp->b_data, 0, pmp->pm_bpcluster);
 	memcpy(bp->b_data, &dosdirtemplate, sizeof dosdirtemplate);
 	denp = (struct direntry *)bp->b_data;
 	putushort(denp[0].deStartCluster, newcluster);
 	putushort(denp[0].deCDate, ndirent.de_CDate);
 	putushort(denp[0].deCTime, ndirent.de_CTime);
 	denp[0].deCHundredth = ndirent.de_CHun;
 	putushort(denp[0].deADate, ndirent.de_ADate);
 	putushort(denp[0].deMDate, ndirent.de_MDate);
 	putushort(denp[0].deMTime, ndirent.de_MTime);
 	pcl = pdep->de_StartCluster;
 	/*
 	 * Although the root directory has a non-magic starting cluster
 	 * number for FAT32, chkdsk and fsck_msdosfs still require
 	 * references to it in dotdot entries to be magic.
 	 */
 	if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
 		pcl = MSDOSFSROOT;
 	putushort(denp[1].deStartCluster, pcl);
 	putushort(denp[1].deCDate, ndirent.de_CDate);
 	putushort(denp[1].deCTime, ndirent.de_CTime);
 	denp[1].deCHundredth = ndirent.de_CHun;
 	putushort(denp[1].deADate, ndirent.de_ADate);
 	putushort(denp[1].deMDate, ndirent.de_MDate);
 	putushort(denp[1].deMTime, ndirent.de_MTime);
 	if (FAT32(pmp)) {
 		putushort(denp[0].deHighClust, newcluster >> 16);
 		putushort(denp[1].deHighClust, pcl >> 16);
 	}
 
 	if (DOINGASYNC(ap->a_dvp))
 		bdwrite(bp);
 	else if ((error = bwrite(bp)) != 0)
 		goto bad;
 
 	/*
 	 * Now build up a directory entry pointing to the newly allocated
 	 * cluster.  This will be written to an empty slot in the parent
 	 * directory.
 	 */
 	error = uniqdosname(pdep, cnp, ndirent.de_Name);
 	if (error)
 		goto bad;
 
 	ndirent.de_Attributes = ATTR_DIRECTORY;
 	ndirent.de_LowerCase = 0;
 	ndirent.de_StartCluster = newcluster;
 	ndirent.de_FileSize = 0;
 	error = createde(&ndirent, pdep, &dep, cnp);
 	if (error)
 		goto bad;
 	*ap->a_vpp = DETOV(dep);
 	return (0);
 
 bad:
 	clusterfree(pmp, newcluster);
 bad2:
 	return (error);
 }
 
 static int
 msdosfs_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct denode *ip, *dp;
 	int error;
 
 	ip = VTODE(vp);
 	dp = VTODE(dvp);
 
 	/*
 	 * Verify the directory is empty (and valid).
 	 * (Rmdir ".." won't be valid since
 	 *  ".." will contain a reference to
 	 *  the current directory and thus be
 	 *  non-empty.)
 	 */
 	error = 0;
 	if (!dosdirempty(ip)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	/*
 	 * Delete the entry from the directory.  For dos filesystems this
 	 * gets rid of the directory entry on disk, the in memory copy
 	 * still exists but the de_refcnt is <= 0.  This prevents it from
 	 * being found by deget().  When the vput() on dep is done we give
 	 * up access and eventually msdosfs_reclaim() will be called which
 	 * will remove it from the denode cache.
 	 */
 	error = removede(dp, ip);
 	if (error)
 		goto out;
 	/*
 	 * This is where we decrement the link count in the parent
 	 * directory.  Since dos filesystems don't do this we just purge
 	 * the name cache.
 	 */
 	cache_purge(dvp);
 	/*
 	 * Truncate the directory that is being deleted.
 	 */
 	error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred);
 	cache_purge(vp);
 
 out:
 	return (error);
 }
 
 /*
  * DOS filesystems don't know what symlinks are.
  */
 static int
 msdosfs_symlink(struct vop_symlink_args *ap)
 {
 	return (EOPNOTSUPP);
 }
 
 static int
 msdosfs_readdir(struct vop_readdir_args *ap)
 {
 	struct mbnambuf nb;
 	int error = 0;
 	int diff;
 	long n;
 	int blsize;
 	long on;
 	u_long cn;
 	u_long dirsperblk;
 	long bias = 0;
 	daddr_t bn, lbn;
 	struct buf *bp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct direntry *dentp;
 	struct dirent dirbuf;
 	struct uio *uio = ap->a_uio;
 	uint64_t *cookies = NULL;
 	int ncookies = 0;
 	off_t offset, off;
 	int chksum = -1;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n",
 	    ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
 #endif
 
 	/*
 	 * msdosfs_readdir() won't operate properly on regular files since
 	 * it does i/o only with the filesystem vnode, and hence can
 	 * retrieve the wrong block from the buffer cache for a plain file.
 	 * So, fail attempts to readdir() on a plain file.
 	 */
 	if ((dep->de_Attributes & ATTR_DIRECTORY) == 0)
 		return (ENOTDIR);
 
 	/*
 	 * To be safe, initialize dirbuf
 	 */
 	memset(dirbuf.d_name, 0, sizeof(dirbuf.d_name));
 
 	/*
 	 * If the user buffer is smaller than the size of one dos directory
 	 * entry or the file offset is not a multiple of the size of a
 	 * directory entry, then we fail the read.
 	 */
 	off = offset = uio->uio_offset;
 	if (uio->uio_resid < sizeof(struct direntry) ||
 	    (offset & (sizeof(struct direntry) - 1)))
 		return (EINVAL);
 
 	if (ap->a_ncookies) {
 		ncookies = uio->uio_resid / 16;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP,
 		       M_WAITOK);
 		*ap->a_cookies = cookies;
 		*ap->a_ncookies = ncookies;
 	}
 
 	dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
 
 	/*
 	 * If they are reading from the root directory then, we simulate
 	 * the . and .. entries since these don't exist in the root
 	 * directory.  We also set the offset bias to make up for having to
 	 * simulate these entries. By this I mean that at file offset 64 we
 	 * read the first entry in the root directory that lives on disk.
 	 */
 	if (dep->de_StartCluster == MSDOSFSROOT
 	    || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) {
 #if 0
 		printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n",
 		    offset);
 #endif
 		bias = 2 * sizeof(struct direntry);
 		if (offset < bias) {
 			for (n = (int)offset / sizeof(struct direntry);
 			     n < 2; n++) {
 				dirbuf.d_fileno = FAT32(pmp) ?
 				    (uint64_t)cntobn(pmp, pmp->pm_rootdirblk) *
 				    dirsperblk : 1;
 				dirbuf.d_type = DT_DIR;
 				switch (n) {
 				case 0:
 					dirbuf.d_namlen = 1;
 					dirbuf.d_name[0] = '.';
 					break;
 				case 1:
 					dirbuf.d_namlen = 2;
 					dirbuf.d_name[0] = '.';
 					dirbuf.d_name[1] = '.';
 					break;
 				}
 				dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 				/* NOTE: d_off is the offset of the *next* entry. */
 				dirbuf.d_off = offset + sizeof(struct direntry);
 				dirent_terminate(&dirbuf);
 				if (uio->uio_resid < dirbuf.d_reclen)
 					goto out;
 				error = uiomove(&dirbuf, dirbuf.d_reclen, uio);
 				if (error)
 					goto out;
 				offset += sizeof(struct direntry);
 				off = offset;
 				if (cookies) {
 					*cookies++ = offset;
 					if (--ncookies <= 0)
 						goto out;
 				}
 			}
 		}
 	}
 
 	mbnambuf_init(&nb);
 	off = offset;
 	while (uio->uio_resid > 0) {
 		lbn = de_cluster(pmp, offset - bias);
 		on = (offset - bias) & pmp->pm_crbomask;
 		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
 		diff = dep->de_FileSize - (offset - bias);
 		if (diff <= 0)
 			break;
 		n = min(n, diff);
 		error = pcbmap(dep, lbn, &bn, &cn, &blsize);
 		if (error)
 			break;
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
 		if (error) {
 			return (error);
 		}
 		n = min(n, blsize - bp->b_resid);
 		if (n == 0) {
 			brelse(bp);
 			return (EIO);
 		}
 
 		/*
 		 * Convert from dos directory entries to fs-independent
 		 * directory entries.
 		 */
 		for (dentp = (struct direntry *)(bp->b_data + on);
 		     (char *)dentp < bp->b_data + on + n;
 		     dentp++, offset += sizeof(struct direntry)) {
 #if 0
 			printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n",
 			    dentp, prev, crnt, dentp->deName[0], dentp->deAttributes);
 #endif
 			/*
 			 * If this is an unused entry, we can stop.
 			 */
 			if (dentp->deName[0] == SLOT_EMPTY) {
 				brelse(bp);
 				goto out;
 			}
 			/*
 			 * Skip deleted entries.
 			 */
 			if (dentp->deName[0] == SLOT_DELETED) {
 				chksum = -1;
 				mbnambuf_init(&nb);
 				continue;
 			}
 
 			/*
 			 * Handle Win95 long directory entries
 			 */
 			if (dentp->deAttributes == ATTR_WIN95) {
 				if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 					continue;
 				chksum = win2unixfn(&nb,
 				    (struct winentry *)dentp, chksum, pmp);
 				continue;
 			}
 
 			/*
 			 * Skip volume labels
 			 */
 			if (dentp->deAttributes & ATTR_VOLUME) {
 				chksum = -1;
 				mbnambuf_init(&nb);
 				continue;
 			}
 			/*
 			 * This computation of d_fileno must match
 			 * the computation of va_fileid in
 			 * msdosfs_getattr.
 			 */
 			if (dentp->deAttributes & ATTR_DIRECTORY) {
 				cn = getushort(dentp->deStartCluster);
 				if (FAT32(pmp)) {
 					cn |= getushort(dentp->deHighClust) <<
 					    16;
 					if (cn == MSDOSFSROOT)
 						cn = pmp->pm_rootdirblk;
 				}
 				if (cn == MSDOSFSROOT && !FAT32(pmp))
 					dirbuf.d_fileno = 1;
 				else
 					dirbuf.d_fileno = cntobn(pmp, cn) *
 					    dirsperblk;
 				dirbuf.d_type = DT_DIR;
 			} else {
 				dirbuf.d_fileno = (uoff_t)offset /
 				    sizeof(struct direntry);
 				dirbuf.d_type = DT_REG;
 			}
 
 			if (chksum != winChksum(dentp->deName)) {
 				dirbuf.d_namlen = dos2unixfn(dentp->deName,
 				    (u_char *)dirbuf.d_name,
 				    dentp->deLowerCase |
 					((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ?
 					(LCASE_BASE | LCASE_EXT) : 0),
 				    pmp);
 				mbnambuf_init(&nb);
 			} else
 				mbnambuf_flush(&nb, &dirbuf);
 			chksum = -1;
 			dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 			/* NOTE: d_off is the offset of the *next* entry. */
 			dirbuf.d_off = offset + sizeof(struct direntry);
 			dirent_terminate(&dirbuf);
 			if (uio->uio_resid < dirbuf.d_reclen) {
 				brelse(bp);
 				goto out;
 			}
 			error = uiomove(&dirbuf, dirbuf.d_reclen, uio);
 			if (error) {
 				brelse(bp);
 				goto out;
 			}
 			if (cookies) {
 				*cookies++ = offset + sizeof(struct direntry);
 				if (--ncookies <= 0) {
 					brelse(bp);
 					goto out;
 				}
 			}
 			off = offset + sizeof(struct direntry);
 		}
 		brelse(bp);
 	}
 out:
 	/* Subtract unused cookies */
 	if (ap->a_ncookies)
 		*ap->a_ncookies -= ncookies;
 
 	uio->uio_offset = off;
 
 	/*
 	 * Set the eofflag (NFS uses it)
 	 */
 	if (ap->a_eofflag) {
 		if (dep->de_FileSize - (offset - bias) <= 0)
 			*ap->a_eofflag = 1;
 		else
 			*ap->a_eofflag = 0;
 	}
 	return (error);
 }
 
 /*-
  * a_vp   - pointer to the file's vnode
  * a_bn   - logical block number within the file (cluster number for us)
  * a_bop  - where to return the bufobj of the special file containing the fs
  * a_bnp  - where to return the "physical" block number corresponding to a_bn
  *          (relative to the special file; units are blocks of size DEV_BSIZE)
  * a_runp - where to return the "run past" a_bn.  This is the count of logical
  *          blocks whose physical blocks (together with a_bn's physical block)
  *          are contiguous.
  * a_runb - where to return the "run before" a_bn.
  */
 static int
 msdosfs_bmap(struct vop_bmap_args *ap)
 {
 	struct fatcache savefc;
 	struct denode *dep;
 	struct mount *mp;
 	struct msdosfsmount *pmp;
 	struct vnode *vp;
 	daddr_t runbn;
 	u_long cn;
 	int bnpercn, error, maxio, maxrun, run;
 
 	vp = ap->a_vp;
 	dep = VTODE(vp);
 	pmp = dep->de_pmp;
 	if (ap->a_bop != NULL)
 		*ap->a_bop = &pmp->pm_devvp->v_bufobj;
 	if (ap->a_bnp == NULL)
 		return (0);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	cn = ap->a_bn;
 	if (cn != ap->a_bn)
 		return (EFBIG);
 	error = pcbmap(dep, cn, ap->a_bnp, NULL, NULL);
 	if (error != 0 || (ap->a_runp == NULL && ap->a_runb == NULL))
 		return (error);
 
 	/*
 	 * Prepare to back out updates of the fatchain cache after the one
 	 * for the first block done by pcbmap() above.  Without the backout,
 	 * then whenever the caller doesn't do i/o to all of the blocks that
 	 * we find, the single useful cache entry would be too far in advance
 	 * of the actual i/o to work for the next sequential i/o.  Then the
 	 * FAT would be searched from the beginning.  With the backout, the
 	 * FAT is searched starting at most a few blocks early.  This wastes
 	 * much less time.  Time is also wasted finding more blocks than the
 	 * caller will do i/o to.  This is necessary because the runlength
 	 * parameters are output-only.
 	 */
 	savefc = dep->de_fc[FC_LASTMAP];
 
 	mp = vp->v_mount;
 	maxio = mp->mnt_iosize_max / mp->mnt_stat.f_iosize;
 	bnpercn = de_cn2bn(pmp, 1);
 	if (ap->a_runp != NULL) {
 		maxrun = ulmin(maxio - 1, pmp->pm_maxcluster - cn);
 		for (run = 1; run <= maxrun; run++) {
 			if (pcbmap(dep, cn + run, &runbn, NULL, NULL) != 0 ||
 			    runbn != *ap->a_bnp + run * bnpercn)
 				break;
 		}
 		*ap->a_runp = run - 1;
 	}
 	if (ap->a_runb != NULL) {
 		maxrun = ulmin(maxio - 1, cn);
 		for (run = 1; run < maxrun; run++) {
 			if (pcbmap(dep, cn - run, &runbn, NULL, NULL) != 0 ||
 			    runbn != *ap->a_bnp - run * bnpercn)
 				break;
 		}
 		*ap->a_runb = run - 1;
 	}
 	dep->de_fc[FC_LASTMAP] = savefc;
 	return (0);
 }
 
 SYSCTL_NODE(_vfs, OID_AUTO, msdosfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "msdos filesystem");
 static int use_buf_pager = 1;
 SYSCTL_INT(_vfs_msdosfs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN,
     &use_buf_pager, 0,
     "Use buffer pager instead of bmap");
 
 static daddr_t
 msdosfs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
 {
 
 	return (de_cluster(VTODE(vp)->de_pmp, off));
 }
 
 static int
 msdosfs_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz)
 {
 
 	*sz = VTODE(vp)->de_pmp->pm_bpcluster;
 	return (0);
 }
 
 static int
 msdosfs_getpages(struct vop_getpages_args *ap)
 {
 
 	if (use_buf_pager)
 		return (vfs_bio_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		    ap->a_rbehind, ap->a_rahead, msdosfs_gbp_getblkno,
 		    msdosfs_gbp_getblksz));
 	return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 	    ap->a_rbehind, ap->a_rahead, NULL, NULL));
 }
 
 static int
 msdosfs_strategy(struct vop_strategy_args *ap)
 {
 	struct buf *bp = ap->a_bp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct bufobj *bo;
 	int error = 0;
 	daddr_t blkno;
 
 	/*
 	 * If we don't already know the filesystem relative block number
 	 * then get it using pcbmap().  If pcbmap() returns the block
 	 * number as -1 then we've got a hole in the file.  DOS filesystems
 	 * don't allow files with holes, so we shouldn't ever see this.
 	 */
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = pcbmap(dep, bp->b_lblkno, &blkno, 0, 0);
 		bp->b_blkno = blkno;
 		if (error) {
 			bp->b_error = error;
 			bp->b_ioflags |= BIO_ERROR;
 			bufdone(bp);
 			return (0);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if (bp->b_blkno == -1) {
 		bufdone(bp);
 		return (0);
 	}
 	/*
 	 * Read/write the block from/to the disk that contains the desired
 	 * file block.
 	 */
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bo = dep->de_pmp->pm_bo;
 	BO_STRATEGY(bo, bp);
 	return (0);
 }
 
 static int
 msdosfs_print(struct vop_print_args *ap)
 {
 	struct denode *dep = VTODE(ap->a_vp);
 
 	printf("\tstartcluster %lu, dircluster %lu, diroffset %lu, ",
 	       dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset);
 	printf("on dev %s\n", devtoname(dep->de_pmp->pm_dev));
 	return (0);
 }
 
 static int
 msdosfs_pathconf(struct vop_pathconf_args *ap)
 {
 	struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp;
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 32;
 		return (0);
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 0;
 		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 	/* NOTREACHED */
 }
 
 static int
 msdosfs_vptofh(struct vop_vptofh_args *ap)
 {
 	struct denode *dep;
 	struct defid *defhp;
+	_Static_assert(sizeof(struct defid) <= sizeof(struct fid),
+	    "struct defid cannot be larger than struct fid");
 
 	dep = VTODE(ap->a_vp);
 	defhp = (struct defid *)ap->a_fhp;
 	defhp->defid_len = sizeof(struct defid);
 	defhp->defid_dirclust = dep->de_dirclust;
 	defhp->defid_dirofs = dep->de_diroffset;
 	/* defhp->defid_gen = dep->de_gen; */
 	return (0);
 }
 
 /* Global vfs data structures for msdosfs */
 struct vop_vector msdosfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		msdosfs_access,
 	.vop_bmap =		msdosfs_bmap,
 	.vop_getpages =		msdosfs_getpages,
 	.vop_cachedlookup =	msdosfs_lookup,
 	.vop_open =		msdosfs_open,
 	.vop_close =		msdosfs_close,
 	.vop_create =		msdosfs_create,
 	.vop_fsync =		msdosfs_fsync,
 	.vop_fdatasync =	vop_stdfdatasync_buf,
 	.vop_getattr =		msdosfs_getattr,
 	.vop_inactive =		msdosfs_inactive,
 	.vop_link =		msdosfs_link,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_mkdir =		msdosfs_mkdir,
 	.vop_mknod =		msdosfs_mknod,
 	.vop_pathconf =		msdosfs_pathconf,
 	.vop_print =		msdosfs_print,
 	.vop_read =		msdosfs_read,
 	.vop_readdir =		msdosfs_readdir,
 	.vop_reclaim =		msdosfs_reclaim,
 	.vop_remove =		msdosfs_remove,
 	.vop_rename =		msdosfs_rename,
 	.vop_rmdir =		msdosfs_rmdir,
 	.vop_setattr =		msdosfs_setattr,
 	.vop_strategy =		msdosfs_strategy,
 	.vop_symlink =		msdosfs_symlink,
 	.vop_write =		msdosfs_write,
 	.vop_vptofh =		msdosfs_vptofh,
 };
 VFS_VOP_VECTOR_REGISTER(msdosfs_vnodeops);
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index 428c31f3c59a..162977b8abf7 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -1,2232 +1,2234 @@
 /*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
  * 2005 program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * tmpfs vnode interface.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/dirent.h>
 #include <sys/extattr.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/limits.h>
 #include <sys/lockf.h>
 #include <sys/lock.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/smr.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/swap_pager.h>
 
 #include <fs/tmpfs/tmpfs_vnops.h>
 #include <fs/tmpfs/tmpfs.h>
 
 SYSCTL_DECL(_vfs_tmpfs);
 VFS_SMR_DECLARE;
 
 static volatile int tmpfs_rename_restarts;
 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
     "Times rename had to restart due to lock contention");
 
 MALLOC_DEFINE(M_TMPFSEA, "tmpfs extattr", "tmpfs extattr structure");
 
 static int
 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
 {
 
 	return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
 }
 
 static int
 tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
 	struct tmpfs_node *dnode, *pnode;
 	struct tmpfs_mount *tm;
 	int error;
 
 	/* Caller assumes responsibility for ensuring access (VEXEC). */
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	*vpp = NULLVP;
 
 	/* We cannot be requesting the parent directory of the root node. */
 	MPASS(IMPLIES(dnode->tn_type == VDIR &&
 	    dnode->tn_dir.tn_parent == dnode,
 	    !(cnp->cn_flags & ISDOTDOT)));
 
 	TMPFS_ASSERT_LOCKED(dnode);
 	if (dnode->tn_dir.tn_parent == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	if (cnp->cn_flags & ISDOTDOT) {
 		tm = VFS_TO_TMPFS(dvp->v_mount);
 		pnode = dnode->tn_dir.tn_parent;
 		tmpfs_ref_node(pnode);
 		error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
 		    pnode, cnp->cn_lkflags, vpp);
 		tmpfs_free_node(tm, pnode);
 		if (error != 0)
 			goto out;
 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 		VREF(dvp);
 		*vpp = dvp;
 		error = 0;
 	} else {
 		de = tmpfs_dir_lookup(dnode, NULL, cnp);
 		if (de != NULL && de->td_node == NULL)
 			cnp->cn_flags |= ISWHITEOUT;
 		if (de == NULL || de->td_node == NULL) {
 			/*
 			 * The entry was not found in the directory.
 			 * This is OK if we are creating or renaming an
 			 * entry and are working on the last component of
 			 * the path name.
 			 */
 			if ((cnp->cn_flags & ISLASTCN) &&
 			    (cnp->cn_nameiop == CREATE || \
 			    cnp->cn_nameiop == RENAME ||
 			    (cnp->cn_nameiop == DELETE &&
 			    cnp->cn_flags & DOWHITEOUT &&
 			    cnp->cn_flags & ISWHITEOUT))) {
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
 				    curthread);
 				if (error != 0)
 					goto out;
 
 				error = EJUSTRETURN;
 			} else
 				error = ENOENT;
 		} else {
 			struct tmpfs_node *tnode;
 
 			/*
 			 * The entry was found, so get its associated
 			 * tmpfs_node.
 			 */
 			tnode = de->td_node;
 
 			/*
 			 * If we are not at the last path component and
 			 * found a non-directory or non-link entry (which
 			 * may itself be pointing to a directory), raise
 			 * an error.
 			 */
 			if ((tnode->tn_type != VDIR &&
 			    tnode->tn_type != VLNK) &&
 			    !(cnp->cn_flags & ISLASTCN)) {
 				error = ENOTDIR;
 				goto out;
 			}
 
 			/*
 			 * If we are deleting or renaming the entry, keep
 			 * track of its tmpfs_dirent so that it can be
 			 * easily deleted later.
 			 */
 			if ((cnp->cn_flags & ISLASTCN) &&
 			    (cnp->cn_nameiop == DELETE ||
 			    cnp->cn_nameiop == RENAME)) {
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
 				    curthread);
 				if (error != 0)
 					goto out;
 
 				/* Allocate a new vnode on the matching entry. */
 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
 				    cnp->cn_lkflags, vpp);
 				if (error != 0)
 					goto out;
 
 				if ((dnode->tn_mode & S_ISTXT) &&
 				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred,
 				  curthread) && VOP_ACCESS(*vpp, VADMIN,
 				  cnp->cn_cred, curthread)) {
 					error = EPERM;
 					vput(*vpp);
 					*vpp = NULL;
 					goto out;
 				}
 			} else {
 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
 				    cnp->cn_lkflags, vpp);
 				if (error != 0)
 					goto out;
 			}
 		}
 	}
 
 	/*
 	 * Store the result of this lookup in the cache.  Avoid this if the
 	 * request was for creation, as it does not improve timings on
 	 * emprical tests.
 	 */
 	if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
 		cache_enter(dvp, *vpp, cnp);
 
 out:
 #ifdef INVARIANTS
 	/*
 	 * If there were no errors, *vpp cannot be null and it must be
 	 * locked.
 	 */
 	if (error == 0) {
 		MPASS(*vpp != NULLVP);
 		ASSERT_VOP_LOCKED(*vpp, __func__);
 	} else {
 		MPASS(*vpp == NULL);
 	}
 #endif
 
 	return (error);
 }
 
 static int
 tmpfs_cached_lookup(struct vop_cachedlookup_args *v)
 {
 
 	return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp));
 }
 
 static int
 tmpfs_lookup(struct vop_lookup_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	int error;
 
 	/* Check accessibility of requested node as a first step. */
 	error = vn_dir_check_exec(dvp, cnp);
 	if (error != 0)
 		return (error);
 
 	return (tmpfs_lookup1(dvp, vpp, cnp));
 }
 
 static int
 tmpfs_create(struct vop_create_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 	int error;
 
 	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
 
 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
 	if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp))
 		cache_enter(dvp, *vpp, cnp);
 	return (error);
 }
 
 static int
 tmpfs_mknod(struct vop_mknod_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 
 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
 	    vap->va_type != VFIFO)
 		return (EINVAL);
 
 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL));
 }
 
 struct fileops tmpfs_fnops;
 
 static int
 tmpfs_open(struct vop_open_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	struct file *fp;
 	int error, mode;
 
 	vp = v->a_vp;
 	mode = v->a_mode;
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/*
 	 * The file is still active but all its names have been removed
 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
 	 * it is about to die.
 	 */
 	if (node->tn_links < 1)
 		return (ENOENT);
 
 	/* If the file is marked append-only, deny write requests. */
 	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
 		error = EPERM;
 	else {
 		error = 0;
 		/* For regular files, the call below is nop. */
 		KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
 		    OBJ_DEAD) == 0, ("dead object"));
 		vnode_create_vobject(vp, node->tn_size, v->a_td);
 	}
 
 	fp = v->a_fp;
 	MPASS(fp == NULL || fp->f_data == NULL);
 	if (error == 0 && fp != NULL && vp->v_type == VREG) {
 		tmpfs_ref_node(node);
 		finit_vnode(fp, mode, node, &tmpfs_fnops);
 	}
 
 	return (error);
 }
 
 static int
 tmpfs_close(struct vop_close_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	/* Update node times. */
 	tmpfs_update(vp);
 
 	return (0);
 }
 
 int
 tmpfs_fo_close(struct file *fp, struct thread *td)
 {
 	struct tmpfs_node *node;
 
 	node = fp->f_data;
 	if (node != NULL) {
 		MPASS(node->tn_type == VREG);
 		tmpfs_free_node(node->tn_reg.tn_tmp, node);
 	}
 	return (vnops.fo_close(fp, td));
 }
 
 /*
  * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
  * the comment above cache_fplookup for details.
  */
 int
 tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	struct ucred *cred;
 	mode_t all_x, mode;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE_SMR(vp);
 	if (__predict_false(node == NULL))
 		return (EAGAIN);
 
 	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
 	mode = atomic_load_short(&node->tn_mode);
 	if (__predict_true((mode & all_x) == all_x))
 		return (0);
 
 	cred = v->a_cred;
 	return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred));
 }
 
 static int
 tmpfs_access_locked(struct vnode *vp, struct tmpfs_node *node,
     accmode_t accmode, struct ucred *cred)
 {
 #ifdef DEBUG_VFS_LOCKS
 	if (!mtx_owned(TMPFS_NODE_MTX(node))) {
 		ASSERT_VOP_LOCKED(vp,
 		    "tmpfs_access_locked needs locked vnode or node");
 	}
 #endif
 
 	if ((accmode & VWRITE) != 0 && (node->tn_flags & IMMUTABLE) != 0)
 		return (EPERM);
 	return (vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid,
 	    accmode, cred));
 }
 
 int
 tmpfs_access(struct vop_access_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct ucred *cred = v->a_cred;
 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
 	mode_t all_x = S_IXUSR | S_IXGRP | S_IXOTH;
 	accmode_t accmode = v->a_accmode;
 
 	/*
 	 * Common case path lookup.
 	 */
 	if (__predict_true(accmode == VEXEC &&
 	    (node->tn_mode & all_x) == all_x))
 		return (0);
 
 	switch (vp->v_type) {
 	case VDIR:
 		/* FALLTHROUGH */
 	case VLNK:
 		/* FALLTHROUGH */
 	case VREG:
 		if ((accmode & VWRITE) != 0 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) != 0)
 			return (EROFS);
 		break;
 
 	case VBLK:
 		/* FALLTHROUGH */
 	case VCHR:
 		/* FALLTHROUGH */
 	case VSOCK:
 		/* FALLTHROUGH */
 	case VFIFO:
 		break;
 
 	default:
 		return (EINVAL);
 	}
 
 	return (tmpfs_access_locked(vp, node, accmode, cred));
 }
 
 int
 tmpfs_stat(struct vop_stat_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct stat *sb = v->a_sb;
 	struct tmpfs_node *node;
 	int error;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	tmpfs_update_getattr(vp);
 
 	error = vop_stat_helper_pre(v);
 	if (__predict_false(error))
 		return (error);
 
 	sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
 	sb->st_ino = node->tn_id;
 	sb->st_mode = node->tn_mode | VTTOIF(vp->v_type);
 	sb->st_nlink = node->tn_links;
 	sb->st_uid = node->tn_uid;
 	sb->st_gid = node->tn_gid;
 	sb->st_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
 		node->tn_rdev : NODEV;
 	sb->st_size = node->tn_size;
 	sb->st_atim.tv_sec = node->tn_atime.tv_sec;
 	sb->st_atim.tv_nsec = node->tn_atime.tv_nsec;
 	sb->st_mtim.tv_sec = node->tn_mtime.tv_sec;
 	sb->st_mtim.tv_nsec = node->tn_mtime.tv_nsec;
 	sb->st_ctim.tv_sec = node->tn_ctime.tv_sec;
 	sb->st_ctim.tv_nsec = node->tn_ctime.tv_nsec;
 	sb->st_birthtim.tv_sec = node->tn_birthtime.tv_sec;
 	sb->st_birthtim.tv_nsec = node->tn_birthtime.tv_nsec;
 	sb->st_blksize = PAGE_SIZE;
 	sb->st_flags = node->tn_flags;
 	sb->st_gen = node->tn_gen;
 	if (vp->v_type == VREG) {
 #ifdef __ILP32__
 		vm_object_t obj = node->tn_reg.tn_aobj;
 
 		/* Handle torn read */
 		VM_OBJECT_RLOCK(obj);
 #endif
 		sb->st_blocks = ptoa(node->tn_reg.tn_pages);
 #ifdef __ILP32__
 		VM_OBJECT_RUNLOCK(obj);
 #endif
 	} else {
 		sb->st_blocks = node->tn_size;
 	}
 	sb->st_blocks /= S_BLKSIZE;
 	return (vop_stat_helper_post(v, error));
 }
 
 int
 tmpfs_getattr(struct vop_getattr_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct vattr *vap = v->a_vap;
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	tmpfs_update_getattr(vp);
 
 	vap->va_type = vp->v_type;
 	vap->va_mode = node->tn_mode;
 	vap->va_nlink = node->tn_links;
 	vap->va_uid = node->tn_uid;
 	vap->va_gid = node->tn_gid;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	vap->va_fileid = node->tn_id;
 	vap->va_size = node->tn_size;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_atime = node->tn_atime;
 	vap->va_mtime = node->tn_mtime;
 	vap->va_ctime = node->tn_ctime;
 	vap->va_birthtime = node->tn_birthtime;
 	vap->va_gen = node->tn_gen;
 	vap->va_flags = node->tn_flags;
 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
 	    node->tn_rdev : NODEV;
 	if (vp->v_type == VREG) {
 #ifdef __ILP32__
 		vm_object_t obj = node->tn_reg.tn_aobj;
 
 		VM_OBJECT_RLOCK(obj);
 #endif
 		vap->va_bytes = ptoa(node->tn_reg.tn_pages);
 #ifdef __ILP32__
 		VM_OBJECT_RUNLOCK(obj);
 #endif
 	} else {
 		vap->va_bytes = node->tn_size;
 	}
 	vap->va_filerev = 0;
 
 	return (0);
 }
 
 int
 tmpfs_setattr(struct vop_setattr_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct vattr *vap = v->a_vap;
 	struct ucred *cred = v->a_cred;
 	struct thread *td = curthread;
 
 	int error;
 
 	ASSERT_VOP_IN_SEQC(vp);
 
 	error = 0;
 
 	/* Abort if any unsettable attribute is given. */
 	if (vap->va_type != VNON ||
 	    vap->va_nlink != VNOVAL ||
 	    vap->va_fsid != VNOVAL ||
 	    vap->va_fileid != VNOVAL ||
 	    vap->va_blocksize != VNOVAL ||
 	    vap->va_gen != VNOVAL ||
 	    vap->va_rdev != VNOVAL ||
 	    vap->va_bytes != VNOVAL)
 		error = EINVAL;
 
 	if (error == 0 && (vap->va_flags != VNOVAL))
 		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
 
 	if (error == 0 && (vap->va_size != VNOVAL))
 		error = tmpfs_chsize(vp, vap->va_size, cred, td);
 
 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
 
 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
 		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
 
 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
 	    vap->va_atime.tv_nsec != VNOVAL) ||
 	    (vap->va_mtime.tv_sec != VNOVAL &&
 	    vap->va_mtime.tv_nsec != VNOVAL) ||
 	    (vap->va_birthtime.tv_sec != VNOVAL &&
 	    vap->va_birthtime.tv_nsec != VNOVAL)))
 		error = tmpfs_chtimes(vp, vap, cred, td);
 
 	/*
 	 * Update the node times.  We give preference to the error codes
 	 * generated by this function rather than the ones that may arise
 	 * from tmpfs_update.
 	 */
 	tmpfs_update(vp);
 
 	return (error);
 }
 
 static int
 tmpfs_read(struct vop_read_args *v)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_node *node;
 
 	vp = v->a_vp;
 	if (vp->v_type != VREG)
 		return (EISDIR);
 	uio = v->a_uio;
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	node = VP_TO_TMPFS_NODE(vp);
 	tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node);
 	return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
 }
 
 static int
 tmpfs_read_pgcache(struct vop_read_pgcache_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	vm_object_t object;
 	off_t size;
 	int error;
 
 	vp = v->a_vp;
 	VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp);
 
 	if (v->a_uio->uio_offset < 0)
 		return (EINVAL);
 
 	error = EJUSTRETURN;
 	vfs_smr_enter();
 
 	node = VP_TO_TMPFS_NODE_SMR(vp);
 	if (node == NULL)
 		goto out_smr;
 	MPASS(node->tn_type == VREG);
 	MPASS(node->tn_refcount >= 1);
 	object = node->tn_reg.tn_aobj;
 	if (object == NULL)
 		goto out_smr;
 
 	MPASS(object->type == tmpfs_pager_type);
 	MPASS((object->flags & (OBJ_ANON | OBJ_DEAD | OBJ_SWAP)) ==
 	    OBJ_SWAP);
 	if (!VN_IS_DOOMED(vp)) {
 		/* size cannot become shorter due to rangelock. */
 		size = node->tn_size;
 		tmpfs_set_accessed(node->tn_reg.tn_tmp, node);
 		vfs_smr_exit();
 		error = uiomove_object(object, size, v->a_uio);
 		return (error);
 	}
 out_smr:
 	vfs_smr_exit();
 	return (error);
 }
 
 static int
 tmpfs_write(struct vop_write_args *v)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_node *node;
 	off_t oldsize;
 	ssize_t r;
 	int error, ioflag;
 	mode_t newmode;
 
 	vp = v->a_vp;
 	uio = v->a_uio;
 	ioflag = v->a_ioflag;
 	error = 0;
 	node = VP_TO_TMPFS_NODE(vp);
 	oldsize = node->tn_size;
 
 	if (uio->uio_offset < 0 || vp->v_type != VREG)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 	if (ioflag & IO_APPEND)
 		uio->uio_offset = node->tn_size;
 	error = vn_rlimit_fsizex(vp, uio, VFS_TO_TMPFS(vp->v_mount)->
 	    tm_maxfilesize, &r, uio->uio_td);
 	if (error != 0) {
 		vn_rlimit_fsizex_res(uio, r);
 		return (error);
 	}
 
 	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
 		    FALSE);
 		if (error != 0)
 			goto out;
 	}
 
 	error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
 	node->tn_status |= TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED;
 	node->tn_accessed = true;
 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
 		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID)) {
 			newmode = node->tn_mode & ~(S_ISUID | S_ISGID);
 			vn_seqc_write_begin(vp);
 			atomic_store_short(&node->tn_mode, newmode);
 			vn_seqc_write_end(vp);
 		}
 	}
 	if (error != 0)
 		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
 
 out:
 	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
 	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
 
 	vn_rlimit_fsizex_res(uio, r);
 	return (error);
 }
 
 static int
 tmpfs_deallocate(struct vop_deallocate_args *v)
 {
 	return (tmpfs_reg_punch_hole(v->a_vp, v->a_offset, v->a_len));
 }
 
 static int
 tmpfs_fsync(struct vop_fsync_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	tmpfs_check_mtime(vp);
 	tmpfs_update(vp);
 
 	return (0);
 }
 
 static int
 tmpfs_remove(struct vop_remove_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 
 	if (vp->v_type == VDIR) {
 		error = EISDIR;
 		goto out;
 	}
 
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
 	MPASS(de != NULL);
 
 	/* Files marked as immutable or append-only cannot be deleted. */
 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
 	    (dnode->tn_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Remove the entry from the directory; as it is a file, we do not
 	 * have to change the number of hard links of the directory. */
 	tmpfs_dir_detach(dvp, de);
 	if (v->a_cnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de);
 
 	node->tn_status |= TMPFS_NODE_CHANGED;
 	node->tn_accessed = true;
 	error = 0;
 
 out:
 	return (error);
 }
 
 static int
 tmpfs_link(struct vop_link_args *v)
 {
 	struct vnode *dvp = v->a_tdvp;
 	struct vnode *vp = v->a_vp;
 	struct componentname *cnp = v->a_cnp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_node *node;
 
 	MPASS(dvp != vp); /* XXX When can this be false? */
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* Ensure that we do not overflow the maximum number of links imposed
 	 * by the system. */
 	MPASS(node->tn_links <= TMPFS_LINK_MAX);
 	if (node->tn_links == TMPFS_LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 
 	/* We cannot create links of files marked immutable or append-only. */
 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Allocate a new directory entry to represent the node. */
 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
 	if (error != 0)
 		goto out;
 
 	/* Insert the new directory entry into the appropriate directory. */
 	if (cnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 	tmpfs_dir_attach(dvp, de);
 
 	/* vp link count has changed, so update node times. */
 	node->tn_status |= TMPFS_NODE_CHANGED;
 	tmpfs_update(vp);
 
 	error = 0;
 
 out:
 	return (error);
 }
 
 /*
  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
  * fail to acquire any lock in the path we will drop all held locks,
  * acquire the new lock in a blocking fashion, and then release it and
  * restart the rename.  This acquire/release step ensures that we do not
  * spin on a lock waiting for release.  On error release all vnode locks
  * and decrement references the way tmpfs_rename() would do.
  */
 static int
 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
     struct vnode *tdvp, struct vnode **tvpp,
     struct componentname *fcnp, struct componentname *tcnp)
 {
 	struct vnode *nvp;
 	struct mount *mp;
 	struct tmpfs_dirent *de;
 	int error, restarts = 0;
 
 	VOP_UNLOCK(tdvp);
 	if (*tvpp != NULL && *tvpp != tdvp)
 		VOP_UNLOCK(*tvpp);
 	mp = fdvp->v_mount;
 
 relock:
 	restarts += 1;
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto releout;
 		VOP_UNLOCK(tdvp);
 		goto relock;
 	}
 	/*
 	 * Re-resolve fvp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
 	if (de == NULL) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
 			error = EINVAL;
 		else
 			error = ENOENT;
 		goto releout;
 	}
 	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 	if (error != 0) {
 		VOP_UNLOCK(fdvp);
 		VOP_UNLOCK(tdvp);
 		if (error != EBUSY)
 			goto releout;
 		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(nvp);
 		/*
 		 * Concurrent rename race.
 		 */
 		if (nvp == tdvp) {
 			vrele(nvp);
 			error = EINVAL;
 			goto releout;
 		}
 		vrele(*fvpp);
 		*fvpp = nvp;
 		goto relock;
 	}
 	vrele(*fvpp);
 	*fvpp = nvp;
 	VOP_UNLOCK(*fvpp);
 	/*
 	 * Re-resolve tvp and acquire the vnode lock if present.
 	 */
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
 	/*
 	 * If tvp disappeared we just carry on.
 	 */
 	if (de == NULL && *tvpp != NULL) {
 		vrele(*tvpp);
 		*tvpp = NULL;
 	}
 	/*
 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
 	 * if the non-blocking acquire fails.
 	 */
 	if (de != NULL) {
 		nvp = NULL;
 		error = tmpfs_alloc_vp(mp, de->td_node,
 		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 		if (*tvpp != NULL)
 			vrele(*tvpp);
 		*tvpp = nvp;
 		if (error != 0) {
 			VOP_UNLOCK(fdvp);
 			VOP_UNLOCK(tdvp);
 			if (error != EBUSY)
 				goto releout;
 			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
 			    &nvp);
 			if (error != 0)
 				goto releout;
 			VOP_UNLOCK(nvp);
 			/*
 			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
 			 */
 			if (nvp == fdvp) {
 				error = ENOTEMPTY;
 				goto releout;
 			}
 			goto relock;
 		}
 	}
 	tmpfs_rename_restarts += restarts;
 
 	return (0);
 
 releout:
 	vrele(fdvp);
 	vrele(*fvpp);
 	vrele(tdvp);
 	if (*tvpp != NULL)
 		vrele(*tvpp);
 	tmpfs_rename_restarts += restarts;
 
 	return (error);
 }
 
 static int
 tmpfs_rename(struct vop_rename_args *v)
 {
 	struct vnode *fdvp = v->a_fdvp;
 	struct vnode *fvp = v->a_fvp;
 	struct componentname *fcnp = v->a_fcnp;
 	struct vnode *tdvp = v->a_tdvp;
 	struct vnode *tvp = v->a_tvp;
 	struct componentname *tcnp = v->a_tcnp;
 	char *newname;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *fdnode;
 	struct tmpfs_node *fnode;
 	struct tmpfs_node *tnode;
 	struct tmpfs_node *tdnode;
 	int error;
 	bool want_seqc_end;
 
 	want_seqc_end = false;
 
 	/*
 	 * Disallow cross-device renames.
 	 * XXX Why isn't this done by the caller?
 	 */
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/* If source and target are the same file, there is nothing to do. */
 	if (fvp == tvp) {
 		error = 0;
 		goto out;
 	}
 
 	/*
 	 * If we need to move the directory between entries, lock the
 	 * source so that we can safely operate on it.
 	 */
 	if (fdvp != tdvp && fdvp != tvp) {
 		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
 			    fcnp, tcnp);
 			if (error != 0)
 				return (error);
 			ASSERT_VOP_ELOCKED(fdvp,
 			    "tmpfs_rename: fdvp not locked");
 			ASSERT_VOP_ELOCKED(tdvp,
 			    "tmpfs_rename: tdvp not locked");
 			if (tvp != NULL)
 				ASSERT_VOP_ELOCKED(tvp,
 				    "tmpfs_rename: tvp not locked");
 			if (fvp == tvp) {
 				error = 0;
 				goto out_locked;
 			}
 		}
 	}
 
 	/*
 	 * Avoid manipulating '.' and '..' entries.
 	 */
 	if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 	    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) {
 		error = EINVAL;
 		goto out_locked;
 	}
 
 	if (tvp != NULL)
 		vn_seqc_write_begin(tvp);
 	vn_seqc_write_begin(tdvp);
 	vn_seqc_write_begin(fvp);
 	vn_seqc_write_begin(fdvp);
 	want_seqc_end = true;
 
 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
 	tdnode = VP_TO_TMPFS_DIR(tdvp);
 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
 	fdnode = VP_TO_TMPFS_DIR(fdvp);
 	fnode = VP_TO_TMPFS_NODE(fvp);
 	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
 
 	/*
 	 * Entry can disappear before we lock fdvp.
 	 */
 	if (de == NULL) {
 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
 			error = EINVAL;
 		else
 			error = ENOENT;
 		goto out_locked;
 	}
 	MPASS(de->td_node == fnode);
 
 	/*
 	 * If re-naming a directory to another preexisting directory
 	 * ensure that the target directory is empty so that its
 	 * removal causes no side effects.
 	 * Kern_rename guarantees the destination to be a directory
 	 * if the source is one.
 	 */
 	if (tvp != NULL) {
 		MPASS(tnode != NULL);
 
 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
 			error = EPERM;
 			goto out_locked;
 		}
 
 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
 			if (tnode->tn_size != 0 &&
 			    ((tcnp->cn_flags & IGNOREWHITEOUT) == 0 ||
 			    tnode->tn_size > tnode->tn_dir.tn_wht_size)) {
 				error = ENOTEMPTY;
 				goto out_locked;
 			}
 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
 			error = ENOTDIR;
 			goto out_locked;
 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
 			error = EISDIR;
 			goto out_locked;
 		} else {
 			MPASS(fnode->tn_type != VDIR &&
 				tnode->tn_type != VDIR);
 		}
 	}
 
 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
 		error = EPERM;
 		goto out_locked;
 	}
 
 	/*
 	 * Ensure that we have enough memory to hold the new name, if it
 	 * has to be changed.
 	 */
 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
 	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
 		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
 	} else
 		newname = NULL;
 
 	/*
 	 * If the node is being moved to another directory, we have to do
 	 * the move.
 	 */
 	if (fdnode != tdnode) {
 		/*
 		 * In case we are moving a directory, we have to adjust its
 		 * parent to point to the new parent.
 		 */
 		if (de->td_node->tn_type == VDIR) {
 			struct tmpfs_node *n;
 
 			TMPFS_NODE_LOCK(fnode);
 			error = tmpfs_access_locked(fvp, fnode, VWRITE,
 			    tcnp->cn_cred);
 			TMPFS_NODE_UNLOCK(fnode);
 			if (error) {
 				if (newname != NULL)
 					free(newname, M_TMPFSNAME);
 				goto out_locked;
 			}
 
 			/*
 			 * Ensure the target directory is not a child of the
 			 * directory being moved.  Otherwise, we'd end up
 			 * with stale nodes.
 			 */
 			n = tdnode;
 			/*
 			 * TMPFS_LOCK guaranties that no nodes are freed while
 			 * traversing the list. Nodes can only be marked as
 			 * removed: tn_parent == NULL.
 			 */
 			TMPFS_LOCK(tmp);
 			TMPFS_NODE_LOCK(n);
 			while (n != n->tn_dir.tn_parent) {
 				struct tmpfs_node *parent;
 
 				if (n == fnode) {
 					TMPFS_NODE_UNLOCK(n);
 					TMPFS_UNLOCK(tmp);
 					error = EINVAL;
 					if (newname != NULL)
 						free(newname, M_TMPFSNAME);
 					goto out_locked;
 				}
 				parent = n->tn_dir.tn_parent;
 				TMPFS_NODE_UNLOCK(n);
 				if (parent == NULL) {
 					n = NULL;
 					break;
 				}
 				TMPFS_NODE_LOCK(parent);
 				if (parent->tn_dir.tn_parent == NULL) {
 					TMPFS_NODE_UNLOCK(parent);
 					n = NULL;
 					break;
 				}
 				n = parent;
 			}
 			TMPFS_UNLOCK(tmp);
 			if (n == NULL) {
 				error = EINVAL;
 				if (newname != NULL)
 					    free(newname, M_TMPFSNAME);
 				goto out_locked;
 			}
 			TMPFS_NODE_UNLOCK(n);
 
 			/* Adjust the parent pointer. */
 			TMPFS_VALIDATE_DIR(fnode);
 			TMPFS_NODE_LOCK(de->td_node);
 			de->td_node->tn_dir.tn_parent = tdnode;
 			TMPFS_NODE_UNLOCK(de->td_node);
 
 			/*
 			 * As a result of changing the target of the '..'
 			 * entry, the link count of the source and target
 			 * directories has to be adjusted.
 			 */
 			TMPFS_NODE_LOCK(tdnode);
 			TMPFS_ASSERT_LOCKED(tdnode);
 			tdnode->tn_links++;
 			TMPFS_NODE_UNLOCK(tdnode);
 
 			TMPFS_NODE_LOCK(fdnode);
 			TMPFS_ASSERT_LOCKED(fdnode);
 			fdnode->tn_links--;
 			TMPFS_NODE_UNLOCK(fdnode);
 		}
 	}
 
 	/*
 	 * Do the move: just remove the entry from the source directory
 	 * and insert it into the target one.
 	 */
 	tmpfs_dir_detach(fdvp, de);
 
 	if (fcnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(fdvp, fcnp);
 	if (tcnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(tdvp, tcnp);
 
 	/*
 	 * If the name has changed, we need to make it effective by changing
 	 * it in the directory entry.
 	 */
 	if (newname != NULL) {
 		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
 
 		free(de->ud.td_name, M_TMPFSNAME);
 		de->ud.td_name = newname;
 		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
 
 		fnode->tn_status |= TMPFS_NODE_CHANGED;
 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
 	}
 
 	/*
 	 * If we are overwriting an entry, we have to remove the old one
 	 * from the target directory.
 	 */
 	if (tvp != NULL) {
 		struct tmpfs_dirent *tde;
 
 		/* Remove the old entry from the target directory. */
 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
 		tmpfs_dir_detach(tdvp, tde);
 
 		/*
 		 * If we are overwriting a directory, per the ENOTEMPTY check
 		 * above it must either be empty or contain only whiteout
 		 * entries.  In the latter case (which can only happen if
 		 * IGNOREWHITEOUT was passed in tcnp->cn_flags), clear the
 		 * whiteout entries to avoid leaking memory.
 		 */
 		if (tnode->tn_type == VDIR && tnode->tn_size > 0)
 			tmpfs_dir_clear_whiteouts(tvp);
 
 		/* Update node's ctime because of possible hardlinks. */
 		tnode->tn_status |= TMPFS_NODE_CHANGED;
 		tmpfs_update(tvp);
 
 		/*
 		 * Free the directory entry we just deleted.  Note that the
 		 * node referred by it will not be removed until the vnode is
 		 * really reclaimed.
 		 */
 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
 	}
 
 	tmpfs_dir_attach(tdvp, de);
 
 	if (tmpfs_use_nc(fvp)) {
 		cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp);
 	}
 
 	error = 0;
 
 out_locked:
 	if (fdvp != tdvp && fdvp != tvp)
 		VOP_UNLOCK(fdvp);
 
 out:
 	if (want_seqc_end) {
 		if (tvp != NULL)
 			vn_seqc_write_end(tvp);
 		vn_seqc_write_end(tdvp);
 		vn_seqc_write_end(fvp);
 		vn_seqc_write_end(fdvp);
 	}
 
 	/*
 	 * Release target nodes.
 	 * XXX: I don't understand when tdvp can be the same as tvp, but
 	 * other code takes care of this...
 	 */
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp != NULL)
 		vput(tvp);
 
 	/* Release source nodes. */
 	vrele(fdvp);
 	vrele(fvp);
 
 	return (error);
 }
 
 static int
 tmpfs_mkdir(struct vop_mkdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 
 	MPASS(vap->va_type == VDIR);
 
 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL));
 }
 
 static int
 tmpfs_rmdir(struct vop_rmdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
 	struct componentname *cnp = v->a_cnp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 
 	tmp = VFS_TO_TMPFS(dvp->v_mount);
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_DIR(vp);
 
 	/*
 	 * Directories with more than two non-whiteout entries ('.' and '..')
 	 * cannot be removed.
 	 */
 	if (node->tn_size != 0 &&
 	    ((cnp->cn_flags & IGNOREWHITEOUT) == 0 ||
 	    node->tn_size > node->tn_dir.tn_wht_size)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 
 	/* Check flags to see if we are allowed to remove the directory. */
 	if ((dnode->tn_flags & APPEND)
 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* This invariant holds only if we are not trying to remove "..".
 	 * We checked for that above so this is safe now. */
 	MPASS(node->tn_dir.tn_parent == dnode);
 
 	/* Get the directory entry associated with node (vp).  This was
 	 * filled by tmpfs_lookup while looking up the entry. */
 	de = tmpfs_dir_lookup(dnode, node, cnp);
 	MPASS(TMPFS_DIRENT_MATCHES(de,
 	    cnp->cn_nameptr,
 	    cnp->cn_namelen));
 
 	/* Detach the directory entry from the directory (dnode). */
 	tmpfs_dir_detach(dvp, de);
 
 	/*
 	 * If we are removing a directory, per the ENOTEMPTY check above it
 	 * must either be empty or contain only whiteout entries.  In the
 	 * latter case (which can only happen if IGNOREWHITEOUT was passed
 	 * in cnp->cn_flags), clear the whiteout entries to avoid leaking
 	 * memory.
 	 */
 	if (node->tn_size > 0)
 		tmpfs_dir_clear_whiteouts(vp);
 
 	if (cnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(dvp, cnp);
 
 	/* No vnode should be allocated for this entry from this point */
 	TMPFS_NODE_LOCK(node);
 	node->tn_links--;
 	node->tn_dir.tn_parent = NULL;
 	node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 	node->tn_accessed = true;
 
 	TMPFS_NODE_UNLOCK(node);
 
 	TMPFS_NODE_LOCK(dnode);
 	dnode->tn_links--;
 	dnode->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 	dnode->tn_accessed = true;
 	TMPFS_NODE_UNLOCK(dnode);
 
 	if (tmpfs_use_nc(dvp)) {
 		cache_vop_rmdir(dvp, vp);
 	}
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de);
 
 	/* Release the deleted vnode (will destroy the node, notify
 	 * interested parties and clean it from the cache). */
 
 	dnode->tn_status |= TMPFS_NODE_CHANGED;
 	tmpfs_update(dvp);
 
 	error = 0;
 
 out:
 	return (error);
 }
 
 static int
 tmpfs_symlink(struct vop_symlink_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 	const char *target = v->a_target;
 
 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
 	MPASS(vap->va_type == VLNK);
 #else
 	vap->va_type = VLNK;
 #endif
 
 	return (tmpfs_alloc_file(dvp, vpp, vap, cnp, target));
 }
 
 static int
 tmpfs_readdir(struct vop_readdir_args *va)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_mount *tm;
 	struct tmpfs_node *node;
 	uint64_t **cookies;
 	int *eofflag, *ncookies;
 	ssize_t startresid;
 	int error, maxcookies;
 
 	vp = va->a_vp;
 	uio = va->a_uio;
 	eofflag = va->a_eofflag;
 	cookies = va->a_cookies;
 	ncookies = va->a_ncookies;
 
 	/* This operation only makes sense on directory nodes. */
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	maxcookies = 0;
 	node = VP_TO_TMPFS_DIR(vp);
 	tm = VFS_TO_TMPFS(vp->v_mount);
 
 	startresid = uio->uio_resid;
 
 	/* Allocate cookies for NFS and compat modules. */
 	if (cookies != NULL && ncookies != NULL) {
 		maxcookies = howmany(node->tn_size,
 		    sizeof(struct tmpfs_dirent)) + 2;
 		*cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
 		    M_WAITOK);
 		*ncookies = 0;
 	}
 
 	if (cookies == NULL)
 		error = tmpfs_dir_getdents(tm, node, uio, 0, NULL, NULL);
 	else
 		error = tmpfs_dir_getdents(tm, node, uio, maxcookies, *cookies,
 		    ncookies);
 
 	/* Buffer was filled without hitting EOF. */
 	if (error == EJUSTRETURN)
 		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
 
 	if (error != 0 && cookies != NULL && ncookies != NULL) {
 		free(*cookies, M_TEMP);
 		*cookies = NULL;
 		*ncookies = 0;
 	}
 
 	if (eofflag != NULL)
 		*eofflag =
 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
 
 	return (error);
 }
 
 static int
 tmpfs_readlink(struct vop_readlink_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct uio *uio = v->a_uio;
 
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(uio->uio_offset == 0);
 	MPASS(vp->v_type == VLNK);
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	error = uiomove(node->tn_link_target, MIN(node->tn_size, uio->uio_resid),
 	    uio);
 	tmpfs_set_accessed(VFS_TO_TMPFS(vp->v_mount), node);
 
 	return (error);
 }
 
 /*
  * VOP_FPLOOKUP_SYMLINK routines are subject to special circumstances, see
  * the comment above cache_fplookup for details.
  *
  * Check tmpfs_alloc_node for tmpfs-specific synchronisation notes.
  */
 static int
 tmpfs_fplookup_symlink(struct vop_fplookup_symlink_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	char *symlink;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE_SMR(vp);
 	if (__predict_false(node == NULL))
 		return (EAGAIN);
 	if (!atomic_load_char(&node->tn_link_smr))
 		return (EAGAIN);
 	symlink = atomic_load_ptr(&node->tn_link_target);
 	if (symlink == NULL)
 		return (EAGAIN);
 
 	return (cache_symlink_resolve(v->a_fpl, symlink, node->tn_size));
 }
 
 static int
 tmpfs_inactive(struct vop_inactive_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	if (node->tn_links == 0)
 		vrecycle(vp);
 	else
 		tmpfs_check_mtime(vp);
 	return (0);
 }
 
 static int
 tmpfs_need_inactive(struct vop_need_inactive_args *ap)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 	struct vm_object *obj;
 
 	vp = ap->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	if (node->tn_links == 0)
 		goto need;
 	if (vp->v_type == VREG) {
 		obj = vp->v_object;
 		if (obj->generation != obj->cleangeneration)
 			goto need;
 	}
 	return (0);
 need:
 	return (1);
 }
 
 int
 tmpfs_reclaim(struct vop_reclaim_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 	bool unlock;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 
 	if (vp->v_type == VREG)
 		tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
 	vp->v_object = NULL;
 
 	TMPFS_LOCK(tmp);
 	TMPFS_NODE_LOCK(node);
 	tmpfs_free_vp(vp);
 
 	/*
 	 * If the node referenced by this vnode was deleted by the user,
 	 * we must free its associated data structures (now that the vnode
 	 * is being reclaimed).
 	 */
 	unlock = true;
 	if (node->tn_links == 0 &&
 	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
 		unlock = !tmpfs_free_node_locked(tmp, node, true);
 	}
 
 	if (unlock) {
 		TMPFS_NODE_UNLOCK(node);
 		TMPFS_UNLOCK(tmp);
 	}
 
 	MPASS(vp->v_data == NULL);
 	return (0);
 }
 
 int
 tmpfs_print(struct vop_print_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
 	    node, node->tn_flags, (uintmax_t)node->tn_links);
 	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
 	    node->tn_mode, node->tn_uid, node->tn_gid,
 	    (intmax_t)node->tn_size, node->tn_status);
 
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 
 	printf("\n");
 
 	return (0);
 }
 
 int
 tmpfs_pathconf(struct vop_pathconf_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	int name = v->a_name;
 	long *retval = v->a_retval;
 
 	int error;
 
 	error = 0;
 
 	switch (name) {
 	case _PC_LINK_MAX:
 		*retval = TMPFS_LINK_MAX;
 		break;
 
 	case _PC_SYMLINK_MAX:
 		*retval = MAXPATHLEN;
 		break;
 
 	case _PC_NAME_MAX:
 		*retval = NAME_MAX;
 		break;
 
 	case _PC_PIPE_BUF:
 		if (vp->v_type == VDIR || vp->v_type == VFIFO)
 			*retval = PIPE_BUF;
 		else
 			error = EINVAL;
 		break;
 
 	case _PC_CHOWN_RESTRICTED:
 		*retval = 1;
 		break;
 
 	case _PC_NO_TRUNC:
 		*retval = 1;
 		break;
 
 	case _PC_SYNC_IO:
 		*retval = 1;
 		break;
 
 	case _PC_FILESIZEBITS:
 		*retval = 64;
 		break;
 
 	case _PC_MIN_HOLE_SIZE:
 		*retval = PAGE_SIZE;
 		break;
 
 	default:
 		error = vop_stdpathconf(v);
 	}
 
 	return (error);
 }
 
 static int
 tmpfs_vptofh(struct vop_vptofh_args *ap)
 /*
 vop_vptofh {
 	IN struct vnode *a_vp;
 	IN struct fid *a_fhp;
 };
 */
 {
 	struct tmpfs_fid_data tfd;
 	struct tmpfs_node *node;
 	struct fid *fhp;
+	_Static_assert(sizeof(struct tmpfs_fid_data) <= sizeof(struct fid),
+	    "struct tmpfs_fid_data cannot be larger than struct fid");
 
 	node = VP_TO_TMPFS_NODE(ap->a_vp);
 	fhp = ap->a_fhp;
 	fhp->fid_len = sizeof(tfd);
 
 	/*
 	 * Copy into fid_data from the stack to avoid unaligned pointer use.
 	 * See the comment in sys/mount.h on struct fid for details.
 	 */
 	tfd.tfd_id = node->tn_id;
 	tfd.tfd_gen = node->tn_gen;
 	memcpy(fhp->fid_data, &tfd, fhp->fid_len);
 
 	return (0);
 }
 
 static int
 tmpfs_whiteout(struct vop_whiteout_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct tmpfs_dirent *de;
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		return (0);
 	case CREATE:
 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
 		if (de != NULL)
 			return (de->td_node == NULL ? 0 : EEXIST);
 		return (tmpfs_dir_whiteout_add(dvp, cnp));
 	case DELETE:
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 		return (0);
 	default:
 		panic("tmpfs_whiteout: unknown op");
 	}
 }
 
 static int
 tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp,
     struct tmpfs_dirent **pde)
 {
 	struct tmpfs_dir_cursor dc;
 	struct tmpfs_dirent *de;
 
 	for (de = tmpfs_dir_first(tnp, &dc); de != NULL;
 	     de = tmpfs_dir_next(tnp, &dc)) {
 		if (de->td_node == tn) {
 			*pde = de;
 			return (0);
 		}
 	}
 	return (ENOENT);
 }
 
 static int
 tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn,
     struct tmpfs_node *tnp, char *buf, size_t *buflen, struct vnode **dvp)
 {
 	struct tmpfs_dirent *de;
 	int error, i;
 
 	error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED,
 	    dvp);
 	if (error != 0)
 		return (error);
 	error = tmpfs_vptocnp_dir(tn, tnp, &de);
 	if (error == 0) {
 		i = *buflen;
 		i -= de->td_namelen;
 		if (i < 0) {
 			error = ENOMEM;
 		} else {
 			bcopy(de->ud.td_name, buf + i, de->td_namelen);
 			*buflen = i;
 		}
 	}
 	if (error == 0) {
 		if (vp != *dvp)
 			VOP_UNLOCK(*dvp);
 	} else {
 		if (vp != *dvp)
 			vput(*dvp);
 		else
 			vrele(vp);
 	}
 	return (error);
 }
 
 static int
 tmpfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp, **dvp;
 	struct tmpfs_node *tn, *tnp, *tnp1;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tm;
 	char *buf;
 	size_t *buflen;
 	int error;
 
 	vp = ap->a_vp;
 	dvp = ap->a_vpp;
 	buf = ap->a_buf;
 	buflen = ap->a_buflen;
 
 	tm = VFS_TO_TMPFS(vp->v_mount);
 	tn = VP_TO_TMPFS_NODE(vp);
 	if (tn->tn_type == VDIR) {
 		tnp = tn->tn_dir.tn_parent;
 		if (tnp == NULL)
 			return (ENOENT);
 		tmpfs_ref_node(tnp);
 		error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf,
 		    buflen, dvp);
 		tmpfs_free_node(tm, tnp);
 		return (error);
 	}
 restart:
 	TMPFS_LOCK(tm);
 restart_locked:
 	LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) {
 		if (tnp->tn_type != VDIR)
 			continue;
 		TMPFS_NODE_LOCK(tnp);
 		tmpfs_ref_node(tnp);
 
 		/*
 		 * tn_vnode cannot be instantiated while we hold the
 		 * node lock, so the directory cannot be changed while
 		 * we iterate over it.  Do this to avoid instantiating
 		 * vnode for directories which cannot point to our
 		 * node.
 		 */
 		error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp,
 		    &de) : 0;
 
 		if (error == 0) {
 			TMPFS_NODE_UNLOCK(tnp);
 			TMPFS_UNLOCK(tm);
 			error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen,
 			    dvp);
 			if (error == 0) {
 				tmpfs_free_node(tm, tnp);
 				return (0);
 			}
 			if (VN_IS_DOOMED(vp)) {
 				tmpfs_free_node(tm, tnp);
 				return (ENOENT);
 			}
 			TMPFS_LOCK(tm);
 			TMPFS_NODE_LOCK(tnp);
 		}
 		if (tmpfs_free_node_locked(tm, tnp, false)) {
 			goto restart;
 		} else {
 			KASSERT(tnp->tn_refcount > 0,
 			    ("node %p refcount zero", tnp));
 			if (tnp->tn_attached) {
 				tnp1 = LIST_NEXT(tnp, tn_entries);
 				TMPFS_NODE_UNLOCK(tnp);
 			} else {
 				TMPFS_NODE_UNLOCK(tnp);
 				goto restart_locked;
 			}
 		}
 	}
 	TMPFS_UNLOCK(tm);
 	return (ENOENT);
 }
 
 void
 tmpfs_extattr_free(struct tmpfs_extattr *ea)
 {
 	free(ea->ea_name, M_TMPFSEA);
 	free(ea->ea_value, M_TMPFSEA);
 	free(ea, M_TMPFSEA);
 }
 
 static bool
 tmpfs_extattr_update_mem(struct tmpfs_mount *tmp, ssize_t size)
 {
 	TMPFS_LOCK(tmp);
 	if (size > 0 &&
 	    !tmpfs_pages_check_avail(tmp, howmany(size, PAGE_SIZE))) {
 		TMPFS_UNLOCK(tmp);
 		return (false);
 	}
 	if (tmp->tm_ea_memory_inuse + size > tmp->tm_ea_memory_max) {
 		TMPFS_UNLOCK(tmp);
 		return (false);
 	}
 	tmp->tm_ea_memory_inuse += size;
 	TMPFS_UNLOCK(tmp);
 	return (true);
 }
 
 static int
 tmpfs_deleteextattr(struct vop_deleteextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 	struct tmpfs_extattr *ea;
 	size_t namelen;
 	ssize_t diff;
 	int error;
 
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error != 0)
 		return (error);
 	if (ap->a_name == NULL || ap->a_name[0] == '\0')
 		return (EINVAL);
 	namelen = strlen(ap->a_name);
 	if (namelen > EXTATTR_MAXNAMELEN)
 		return (EINVAL);
 
 	LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) {
 		if (ea->ea_namespace == ap->a_attrnamespace &&
 		    namelen == ea->ea_namelen &&
 		    memcmp(ap->a_name, ea->ea_name, namelen) == 0)
 			break;
 	}
 
 	if (ea == NULL)
 		return (ENOATTR);
 	LIST_REMOVE(ea, ea_extattrs);
 	diff = -(sizeof(struct tmpfs_extattr) + namelen + ea->ea_size);
 	tmpfs_extattr_update_mem(tmp, diff);
 	tmpfs_extattr_free(ea);
 	return (0);
 }
 
 static int
 tmpfs_getextattr(struct vop_getextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct tmpfs_node *node;
 	struct tmpfs_extattr *ea;
 	size_t namelen;
 	int error;
 
 	node = VP_TO_TMPFS_NODE(vp);
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error != 0)
 		return (error);
 	if (ap->a_name == NULL || ap->a_name[0] == '\0')
 		return (EINVAL);
 	namelen = strlen(ap->a_name);
 	if (namelen > EXTATTR_MAXNAMELEN)
 		return (EINVAL);
 
 	LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) {
 		if (ea->ea_namespace == ap->a_attrnamespace &&
 		    namelen == ea->ea_namelen &&
 		    memcmp(ap->a_name, ea->ea_name, namelen) == 0)
 			break;
 	}
 
 	if (ea == NULL)
 		return (ENOATTR);
 	if (ap->a_size != NULL)
 		*ap->a_size = ea->ea_size;
 	if (ap->a_uio != NULL && ea->ea_size != 0)
 		error = uiomove(ea->ea_value, ea->ea_size, ap->a_uio);
 	return (error);
 }
 
 static int
 tmpfs_listextattr(struct vop_listextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct tmpfs_node *node;
 	struct tmpfs_extattr *ea;
 	int error;
 
 	node = VP_TO_TMPFS_NODE(vp);
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error != 0)
 		return (error);
 	if (ap->a_size != NULL)
 		*ap->a_size = 0;
 
 	LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) {
 		if (ea->ea_namespace != ap->a_attrnamespace)
 			continue;
 		if (ap->a_size != NULL)
 			*ap->a_size += ea->ea_namelen + 1;
 		if (ap->a_uio != NULL) {
 			error = uiomove(&ea->ea_namelen, 1, ap->a_uio);
 			if (error != 0)
 				break;
 			error = uiomove(ea->ea_name, ea->ea_namelen, ap->a_uio);
 			if (error != 0)
 				break;
 		}
 	}
 
 	return (error);
 }
 
 static int
 tmpfs_setextattr(struct vop_setextattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 	struct tmpfs_extattr *ea;
 	struct tmpfs_extattr *new_ea;
 	size_t attr_size;
 	size_t namelen;
 	ssize_t diff;
 	int error;
 
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 	attr_size = ap->a_uio->uio_resid;
 	diff = 0;
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error != 0)
 		return (error);
 	if (ap->a_name == NULL || ap->a_name[0] == '\0')
 		return (EINVAL);
 	namelen = strlen(ap->a_name);
 	if (namelen > EXTATTR_MAXNAMELEN)
 		return (EINVAL);
 
 	LIST_FOREACH(ea, &node->tn_extattrs, ea_extattrs) {
 		if (ea->ea_namespace == ap->a_attrnamespace &&
 		    namelen == ea->ea_namelen &&
 		    memcmp(ap->a_name, ea->ea_name, namelen) == 0) {
 			diff -= sizeof(struct tmpfs_extattr) + ea->ea_namelen +
 			    ea->ea_size;
 			break;
 		}
 	}
 
 	diff += sizeof(struct tmpfs_extattr) + namelen + attr_size;
 	if (!tmpfs_extattr_update_mem(tmp, diff))
 		return (ENOSPC);
 	new_ea = malloc(sizeof(struct tmpfs_extattr), M_TMPFSEA, M_WAITOK);
 	new_ea->ea_namespace = ap->a_attrnamespace;
 	new_ea->ea_name = malloc(namelen, M_TMPFSEA, M_WAITOK);
 	new_ea->ea_namelen = namelen;
 	memcpy(new_ea->ea_name, ap->a_name, namelen);
 	if (attr_size != 0) {
 		new_ea->ea_value = malloc(attr_size, M_TMPFSEA, M_WAITOK);
 		new_ea->ea_size = attr_size;
 		error = uiomove(new_ea->ea_value, attr_size, ap->a_uio);
 	} else {
 		new_ea->ea_value = NULL;
 		new_ea->ea_size = 0;
 	}
 	if (error != 0) {
 		tmpfs_extattr_update_mem(tmp, -diff);
 		tmpfs_extattr_free(new_ea);
 		return (error);
 	}
 	if (ea != NULL) {
 		LIST_REMOVE(ea, ea_extattrs);
 		tmpfs_extattr_free(ea);
 	}
 	LIST_INSERT_HEAD(&node->tn_extattrs, new_ea, ea_extattrs);
 	return (0);
 }
 
 static off_t
 tmpfs_seek_data_locked(vm_object_t obj, off_t noff)
 {
 	vm_pindex_t p;
 
 	p = swap_pager_seek_data(obj, OFF_TO_IDX(noff));
 	return (p == OFF_TO_IDX(noff) ? noff : IDX_TO_OFF(p));
 }
 
 static int
 tmpfs_seek_clamp(struct tmpfs_node *tn, off_t *noff, bool seekdata)
 {
 	if (*noff < tn->tn_size)
 		return (0);
 	if (seekdata)
 		return (ENXIO);
 	*noff = tn->tn_size;
 	return (0);
 }
 
 static off_t
 tmpfs_seek_hole_locked(vm_object_t obj, off_t noff)
 {
 
 	return (IDX_TO_OFF(swap_pager_seek_hole(obj, OFF_TO_IDX(noff))));
 }
 
 static int
 tmpfs_seek_datahole(struct vnode *vp, off_t *off, bool seekdata)
 {
 	struct tmpfs_node *tn;
 	vm_object_t obj;
 	off_t noff;
 	int error;
 
 	if (vp->v_type != VREG)
 		return (ENOTTY);
 	tn = VP_TO_TMPFS_NODE(vp);
 	noff = *off;
 	if (noff < 0)
 		return (ENXIO);
 	error = tmpfs_seek_clamp(tn, &noff, seekdata);
 	if (error != 0)
 		return (error);
 	obj = tn->tn_reg.tn_aobj;
 
 	VM_OBJECT_RLOCK(obj);
 	noff = seekdata ? tmpfs_seek_data_locked(obj, noff) :
 	    tmpfs_seek_hole_locked(obj, noff);
 	VM_OBJECT_RUNLOCK(obj);
 
 	error = tmpfs_seek_clamp(tn, &noff, seekdata);
 	if (error == 0)
 		*off = noff;
 	return (error);
 }
 
 static int
 tmpfs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int error = 0;
 
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 	case FIOSEEKHOLE:
 		error = vn_lock(vp, LK_SHARED);
 		if (error != 0) {
 			error = EBADF;
 			break;
 		}
 		error = tmpfs_seek_datahole(vp, (off_t *)ap->a_data,
 		    ap->a_command == FIOSEEKDATA);
 		VOP_UNLOCK(vp);
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Vnode operations vector used for files stored in a tmpfs file system.
  */
 struct vop_vector tmpfs_vnodeop_entries = {
 	.vop_default =			&default_vnodeops,
 	.vop_lookup =			vfs_cache_lookup,
 	.vop_cachedlookup =		tmpfs_cached_lookup,
 	.vop_create =			tmpfs_create,
 	.vop_mknod =			tmpfs_mknod,
 	.vop_open =			tmpfs_open,
 	.vop_close =			tmpfs_close,
 	.vop_fplookup_vexec =		tmpfs_fplookup_vexec,
 	.vop_fplookup_symlink =		tmpfs_fplookup_symlink,
 	.vop_access =			tmpfs_access,
 	.vop_stat =			tmpfs_stat,
 	.vop_getattr =			tmpfs_getattr,
 	.vop_setattr =			tmpfs_setattr,
 	.vop_read =			tmpfs_read,
 	.vop_read_pgcache =		tmpfs_read_pgcache,
 	.vop_write =			tmpfs_write,
 	.vop_deallocate =		tmpfs_deallocate,
 	.vop_fsync =			tmpfs_fsync,
 	.vop_remove =			tmpfs_remove,
 	.vop_link =			tmpfs_link,
 	.vop_rename =			tmpfs_rename,
 	.vop_mkdir =			tmpfs_mkdir,
 	.vop_rmdir =			tmpfs_rmdir,
 	.vop_symlink =			tmpfs_symlink,
 	.vop_readdir =			tmpfs_readdir,
 	.vop_readlink =			tmpfs_readlink,
 	.vop_inactive =			tmpfs_inactive,
 	.vop_need_inactive =		tmpfs_need_inactive,
 	.vop_reclaim =			tmpfs_reclaim,
 	.vop_print =			tmpfs_print,
 	.vop_pathconf =			tmpfs_pathconf,
 	.vop_vptofh =			tmpfs_vptofh,
 	.vop_whiteout =			tmpfs_whiteout,
 	.vop_bmap =			VOP_EOPNOTSUPP,
 	.vop_vptocnp =			tmpfs_vptocnp,
 	.vop_lock1 =			vop_lock,
 	.vop_unlock = 			vop_unlock,
 	.vop_islocked = 		vop_islocked,
 	.vop_deleteextattr =		tmpfs_deleteextattr,
 	.vop_getextattr =		tmpfs_getextattr,
 	.vop_listextattr =		tmpfs_listextattr,
 	.vop_setextattr =		tmpfs_setextattr,
 	.vop_add_writecount =		vop_stdadd_writecount_nomsync,
 	.vop_ioctl =			tmpfs_ioctl,
 };
 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_entries);
 
 /*
  * Same vector for mounts which do not use namecache.
  */
 struct vop_vector tmpfs_vnodeop_nonc_entries = {
 	.vop_default =			&tmpfs_vnodeop_entries,
 	.vop_lookup =			tmpfs_lookup,
 };
 VFS_VOP_VECTOR_REGISTER(tmpfs_vnodeop_nonc_entries);
diff --git a/sys/fs/udf/udf_vnops.c b/sys/fs/udf/udf_vnops.c
index 98a779280690..88bf4917a851 100644
--- a/sys/fs/udf/udf_vnops.c
+++ b/sys/fs/udf/udf_vnops.c
@@ -1,1482 +1,1484 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2001, 2002 Scott Long <scottl@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* udf_vnops.c */
 /* Take care of the vnode side of things */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 #include <sys/bio.h>
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/iconv.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/queue.h>
 #include <sys/unistd.h>
 #include <sys/endian.h>
 
 #include <vm/uma.h>
 
 #include <fs/udf/ecma167-udf.h>
 #include <fs/udf/osta.h>
 #include <fs/udf/udf.h>
 #include <fs/udf/udf_mount.h>
 
 extern struct iconv_functions *udf_iconv;
 
 static vop_access_t	udf_access;
 static vop_getattr_t	udf_getattr;
 static vop_open_t	udf_open;
 static vop_ioctl_t	udf_ioctl;
 static vop_pathconf_t	udf_pathconf;
 static vop_print_t	udf_print;
 static vop_read_t	udf_read;
 static vop_readdir_t	udf_readdir;
 static vop_readlink_t	udf_readlink;
 static vop_setattr_t	udf_setattr;
 static vop_strategy_t	udf_strategy;
 static vop_bmap_t	udf_bmap;
 static vop_cachedlookup_t	udf_lookup;
 static vop_reclaim_t	udf_reclaim;
 static vop_vptofh_t	udf_vptofh;
 static int udf_readatoffset(struct udf_node *node, int *size, off_t offset,
     struct buf **bp, uint8_t **data);
 static int udf_bmap_internal(struct udf_node *node, off_t offset,
     daddr_t *sector, uint32_t *max_size);
 
 static struct vop_vector udf_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		udf_access,
 	.vop_bmap =		udf_bmap,
 	.vop_cachedlookup =	udf_lookup,
 	.vop_getattr =		udf_getattr,
 	.vop_ioctl =		udf_ioctl,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_open =		udf_open,
 	.vop_pathconf =		udf_pathconf,
 	.vop_print =		udf_print,
 	.vop_read =		udf_read,
 	.vop_readdir =		udf_readdir,
 	.vop_readlink =		udf_readlink,
 	.vop_reclaim =		udf_reclaim,
 	.vop_setattr =		udf_setattr,
 	.vop_strategy =		udf_strategy,
 	.vop_vptofh =		udf_vptofh,
 };
 VFS_VOP_VECTOR_REGISTER(udf_vnodeops);
 
 struct vop_vector udf_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_access =		udf_access,
 	.vop_getattr =		udf_getattr,
 	.vop_pathconf =		udf_pathconf,
 	.vop_print =		udf_print,
 	.vop_reclaim =		udf_reclaim,
 	.vop_setattr =		udf_setattr,
 	.vop_vptofh =		udf_vptofh,
 };
 VFS_VOP_VECTOR_REGISTER(udf_fifoops);
 
 static MALLOC_DEFINE(M_UDFFID, "udf_fid", "UDF FileId structure");
 static MALLOC_DEFINE(M_UDFDS, "udf_ds", "UDF Dirstream structure");
 
 #define UDF_INVALID_BMAP	-1
 
 int
 udf_allocv(struct mount *mp, struct vnode **vpp, struct thread *td)
 {
 	int error;
 	struct vnode *vp;
 
 	error = getnewvnode("udf", mp, &udf_vnodeops, &vp);
 	if (error) {
 		printf("udf_allocv: failed to allocate new vnode\n");
 		return (error);
 	}
 
 	*vpp = vp;
 	return (0);
 }
 
 /* Convert file entry permission (5 bits per owner/group/user) to a mode_t */
 static mode_t
 udf_permtomode(struct udf_node *node)
 {
 	uint32_t perm;
 	uint16_t flags;
 	mode_t mode;
 
 	perm = le32toh(node->fentry->perm);
 	flags = le16toh(node->fentry->icbtag.flags);
 
 	mode = perm & UDF_FENTRY_PERM_USER_MASK;
 	mode |= ((perm & UDF_FENTRY_PERM_GRP_MASK) >> 2);
 	mode |= ((perm & UDF_FENTRY_PERM_OWNER_MASK) >> 4);
 	mode |= ((flags & UDF_ICB_TAG_FLAGS_STICKY) << 4);
 	mode |= ((flags & UDF_ICB_TAG_FLAGS_SETGID) << 6);
 	mode |= ((flags & UDF_ICB_TAG_FLAGS_SETUID) << 8);
 
 	return (mode);
 }
 
 static int
 udf_access(struct vop_access_args *a)
 {
 	struct vnode *vp;
 	struct udf_node *node;
 	accmode_t accmode;
 	mode_t mode;
 
 	vp = a->a_vp;
 	node = VTON(vp);
 	accmode = a->a_accmode;
 
 	if (accmode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			return (EROFS);
 			/* NOT REACHED */
 		default:
 			break;
 		}
 	}
 
 	mode = udf_permtomode(node);
 
 	return (vaccess(vp->v_type, mode, node->fentry->uid, node->fentry->gid,
 	    accmode, a->a_cred));
 }
 
 static int
 udf_open(struct vop_open_args *ap) {
 	struct udf_node *np = VTON(ap->a_vp);
 	off_t fsize;
 
 	fsize = le64toh(np->fentry->inf_len);
 	vnode_create_vobject(ap->a_vp, fsize, ap->a_td);
 	return 0;
 }
 
 static const int mon_lens[2][12] = {
 	{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334},
 	{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335}
 };
 
 static int
 udf_isaleapyear(int year)
 {
 	int i;
 
 	i = (year % 4) ? 0 : 1;
 	i &= (year % 100) ? 1 : 0;
 	i |= (year % 400) ? 0 : 1;
 
 	return i;
 }
 
 /*
  * Timezone calculation compliments of Julian Elischer <julian@elischer.org>.
  */
 static void
 udf_timetotimespec(struct timestamp *time, struct timespec *t)
 {
 	int i, lpyear, daysinyear, year, startyear;
 	union {
 		uint16_t	u_tz_offset;
 		int16_t		s_tz_offset;
 	} tz;
 
 	/*
 	 * DirectCD seems to like using bogus year values.
 	 * Don't trust time->month as it will be used for an array index.
 	 */
 	year = le16toh(time->year);
 	if (year < 1970 || time->month < 1 || time->month > 12) {
 		t->tv_sec = 0;
 		t->tv_nsec = 0;
 		return;
 	}
 
 	/* Calculate the time and day */
 	t->tv_sec = time->second;
 	t->tv_sec += time->minute * 60;
 	t->tv_sec += time->hour * 3600;
 	t->tv_sec += (time->day - 1) * 3600 * 24;
 
 	/* Calculate the month */
 	lpyear = udf_isaleapyear(year);
 	t->tv_sec += mon_lens[lpyear][time->month - 1] * 3600 * 24;
 
 	/* Speed up the calculation */
 	startyear = 1970;
 	if (year > 2009) {
 		t->tv_sec += 1262304000;
 		startyear += 40;
 	} else if (year > 1999) {
 		t->tv_sec += 946684800;
 		startyear += 30;
 	} else if (year > 1989) {
 		t->tv_sec += 631152000;
 		startyear += 20;
 	} else if (year > 1979) {
 		t->tv_sec += 315532800;
 		startyear += 10;
 	}
 
 	daysinyear = (year - startyear) * 365;
 	for (i = startyear; i < year; i++)
 		daysinyear += udf_isaleapyear(i);
 	t->tv_sec += daysinyear * 3600 * 24;
 
 	/* Calculate microseconds */
 	t->tv_nsec = time->centisec * 10000 + time->hund_usec * 100 +
 	    time->usec;
 
 	/*
 	 * Calculate the time zone.  The timezone is 12 bit signed 2's
 	 * complement, so we gotta do some extra magic to handle it right.
 	 */
 	tz.u_tz_offset = le16toh(time->type_tz);
 	tz.u_tz_offset &= 0x0fff;
 	if (tz.u_tz_offset & 0x0800)
 		tz.u_tz_offset |= 0xf000;	/* extend the sign to 16 bits */
 	if ((le16toh(time->type_tz) & 0x1000) && (tz.s_tz_offset != -2047))
 		t->tv_sec -= tz.s_tz_offset * 60;
 
 	return;
 }
 
 static int
 udf_getattr(struct vop_getattr_args *a)
 {
 	struct vnode *vp;
 	struct udf_node *node;
 	struct vattr *vap;
 	struct file_entry *fentry;
 
 	vp = a->a_vp;
 	vap = a->a_vap;
 	node = VTON(vp);
 	fentry = node->fentry;
 
 	vap->va_fsid = dev2udev(node->udfmp->im_dev);
 	vap->va_fileid = node->hash_id;
 	vap->va_mode = udf_permtomode(node);
 	vap->va_nlink = le16toh(fentry->link_cnt);
 	/*
 	 * XXX The spec says that -1 is valid for uid/gid and indicates an
 	 * invalid uid/gid.  How should this be represented?
 	 */
 	vap->va_uid = (le32toh(fentry->uid) == -1) ? 0 : le32toh(fentry->uid);
 	vap->va_gid = (le32toh(fentry->gid) == -1) ? 0 : le32toh(fentry->gid);
 	udf_timetotimespec(&fentry->atime, &vap->va_atime);
 	udf_timetotimespec(&fentry->mtime, &vap->va_mtime);
 	vap->va_ctime = vap->va_mtime; /* XXX Stored as an Extended Attribute */
 	vap->va_rdev = NODEV;
 	if (vp->v_type & VDIR) {
 		/*
 		 * Directories that are recorded within their ICB will show
 		 * as having 0 blocks recorded.  Since tradition dictates
 		 * that directories consume at least one logical block,
 		 * make it appear so.
 		 */
 		if (fentry->logblks_rec != 0) {
 			vap->va_size =
 			    le64toh(fentry->logblks_rec) * node->udfmp->bsize;
 		} else {
 			vap->va_size = node->udfmp->bsize;
 		}
 	} else {
 		vap->va_size = le64toh(fentry->inf_len);
 	}
 	vap->va_flags = 0;
 	vap->va_gen = 1;
 	vap->va_blocksize = node->udfmp->bsize;
 	vap->va_bytes = le64toh(fentry->inf_len);
 	vap->va_type = vp->v_type;
 	vap->va_filerev = 0; /* XXX */
 	return (0);
 }
 
 static int
 udf_setattr(struct vop_setattr_args *a)
 {
 	struct vnode *vp;
 	struct vattr *vap;
 
 	vp = a->a_vp;
 	vap = a->a_vap;
 	if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)
 		return (EROFS);
 	if (vap->va_size != (u_quad_t)VNOVAL) {
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			return (EROFS);
 		case VCHR:
 		case VBLK:
 		case VSOCK:
 		case VFIFO:
 		case VNON:
 		case VBAD:
 		case VMARKER:
 			return (0);
 		}
 	}
 	return (0);
 }
 
 /*
  * File specific ioctls.
  */
 static int
 udf_ioctl(struct vop_ioctl_args *a)
 {
 	printf("%s called\n", __func__);
 	return (ENOTTY);
 }
 
 /*
  * I'm not sure that this has much value in a read-only filesystem, but
  * cd9660 has it too.
  */
 static int
 udf_pathconf(struct vop_pathconf_args *a)
 {
 
 	switch (a->a_name) {
 	case _PC_FILESIZEBITS:
 		*a->a_retval = 64;
 		return (0);
 	case _PC_LINK_MAX:
 		*a->a_retval = 65535;
 		return (0);
 	case _PC_NAME_MAX:
 		*a->a_retval = NAME_MAX;
 		return (0);
 	case _PC_SYMLINK_MAX:
 		*a->a_retval = MAXPATHLEN;
 		return (0);
 	case _PC_NO_TRUNC:
 		*a->a_retval = 1;
 		return (0);
 	case _PC_PIPE_BUF:
 		if (a->a_vp->v_type == VDIR || a->a_vp->v_type == VFIFO) {
 			*a->a_retval = PIPE_BUF;
 			return (0);
 		}
 		return (EINVAL);
 	default:
 		return (vop_stdpathconf(a));
 	}
 }
 
 static int
 udf_print(struct vop_print_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct udf_node *node = VTON(vp);
 
 	printf("    ino %lu, on dev %s", (u_long)node->hash_id,
 	    devtoname(node->udfmp->im_dev));
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 #define lblkno(udfmp, loc)	((loc) >> (udfmp)->bshift)
 #define blkoff(udfmp, loc)	((loc) & (udfmp)->bmask)
 #define lblktosize(udfmp, blk)	((blk) << (udfmp)->bshift)
 
 static inline int
 is_data_in_fentry(const struct udf_node *node)
 {
 	const struct file_entry *fentry = node->fentry;
 
 	return ((le16toh(fentry->icbtag.flags) & 0x7) == 3);
 }
 
 static int
 udf_read(struct vop_read_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct udf_node *node = VTON(vp);
 	struct udf_mnt *udfmp;
 	struct file_entry *fentry;
 	struct buf *bp;
 	uint8_t *data;
 	daddr_t lbn, rablock;
 	off_t diff, fsize;
 	ssize_t n;
 	int error = 0;
 	long size, on;
 
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	if (is_data_in_fentry(node)) {
 		fentry = node->fentry;
 		data = &fentry->data[le32toh(fentry->l_ea)];
 		fsize = le32toh(fentry->l_ad);
 
 		n = uio->uio_resid;
 		diff = fsize - uio->uio_offset;
 		if (diff <= 0)
 			return (0);
 		if (diff < n)
 			n = diff;
 		error = uiomove(data + uio->uio_offset, (int)n, uio);
 		return (error);
 	}
 
 	fsize = le64toh(node->fentry->inf_len);
 	udfmp = node->udfmp;
 	do {
 		lbn = lblkno(udfmp, uio->uio_offset);
 		on = blkoff(udfmp, uio->uio_offset);
 		n = min((u_int)(udfmp->bsize - on),
 			uio->uio_resid);
 		diff = fsize - uio->uio_offset;
 		if (diff <= 0)
 			return (0);
 		if (diff < n)
 			n = diff;
 		size = udfmp->bsize;
 		rablock = lbn + 1;
 		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			if (lblktosize(udfmp, rablock) < fsize) {
 				error = cluster_read(vp, fsize, lbn, size,
 				    NOCRED, uio->uio_resid,
 				    (ap->a_ioflag >> 16), 0, &bp);
 			} else {
 				error = bread(vp, lbn, size, NOCRED, &bp);
 			}
 		} else {
 			error = bread(vp, lbn, size, NOCRED, &bp);
 		}
 		if (error != 0) {
 			brelse(bp);
 			return (error);
 		}
 		n = min(n, size - bp->b_resid);
 
 		error = uiomove(bp->b_data + on, (int)n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
 	return (error);
 }
 
 /*
  * Call the OSTA routines to translate the name from a CS0 dstring to a
  * 16-bit Unicode String.  Hooks need to be placed in here to translate from
  * Unicode to the encoding that the kernel/user expects.  Return the length
  * of the translated string.
  */
 static int
 udf_transname(char *cs0string, char *destname, int len, struct udf_mnt *udfmp)
 {
 	unicode_t *transname;
 	char *unibuf, *unip;
 	int i, destlen;
 	ssize_t unilen = 0;
 	size_t destleft = MAXNAMLEN;
 
 	/* Convert 16-bit Unicode to destname */
 	if (udfmp->im_flags & UDFMNT_KICONV && udf_iconv) {
 		/* allocate a buffer big enough to hold an 8->16 bit expansion */
 		unibuf = uma_zalloc(udf_zone_trans, M_WAITOK);
 		unip = unibuf;
 		if ((unilen = (ssize_t)udf_UncompressUnicodeByte(len, cs0string, unibuf)) == -1) {
 			printf("udf: Unicode translation failed\n");
 			uma_zfree(udf_zone_trans, unibuf);
 			return 0;
 		}
 
 		while (unilen > 0 && destleft > 0) {
 			udf_iconv->conv(udfmp->im_d2l, __DECONST(const char **,
 			    &unibuf), (size_t *)&unilen, (char **)&destname,
 			    &destleft);
 			/* Unconverted character found */
 			if (unilen > 0 && destleft > 0) {
 				*destname++ = '?';
 				destleft--;
 				unibuf += 2;
 				unilen -= 2;
 			}
 		}
 		uma_zfree(udf_zone_trans, unip);
 		*destname = '\0';
 		destlen = MAXNAMLEN - (int)destleft;
 	} else {
 		/* allocate a buffer big enough to hold an 8->16 bit expansion */
 		transname = uma_zalloc(udf_zone_trans, M_WAITOK);
 
 		if ((unilen = (ssize_t)udf_UncompressUnicode(len, cs0string, transname)) == -1) {
 			printf("udf: Unicode translation failed\n");
 			uma_zfree(udf_zone_trans, transname);
 			return 0;
 		}
 
 		for (i = 0; i < unilen ; i++) {
 			if (transname[i] & 0xff00) {
 				destname[i] = '.';	/* Fudge the 16bit chars */
 			} else {
 				destname[i] = transname[i] & 0xff;
 			}
 		}
 		uma_zfree(udf_zone_trans, transname);
 		destname[unilen] = 0;
 		destlen = (int)unilen;
 	}
 
 	return (destlen);
 }
 
 /*
  * Compare a CS0 dstring with a name passed in from the VFS layer.  Return
  * 0 on a successful match, nonzero otherwise.  Unicode work may need to be done
  * here also.
  */
 static int
 udf_cmpname(char *cs0string, char *cmpname, int cs0len, int cmplen, struct udf_mnt *udfmp)
 {
 	char *transname;
 	int error = 0;
 
 	/* This is overkill, but not worth creating a new zone */
 	transname = uma_zalloc(udf_zone_trans, M_WAITOK);
 
 	cs0len = udf_transname(cs0string, transname, cs0len, udfmp);
 
 	/* Easy check.  If they aren't the same length, they aren't equal */
 	if ((cs0len == 0) || (cs0len != cmplen))
 		error = -1;
 	else
 		error = bcmp(transname, cmpname, cmplen);
 
 	uma_zfree(udf_zone_trans, transname);
 	return (error);
 }
 
 struct udf_uiodir {
 	struct dirent *dirent;
 	uint64_t *cookies;
 	int ncookies;
 	int acookies;
 	int eofflag;
 };
 
 static int
 udf_uiodir(struct udf_uiodir *uiodir, int de_size, struct uio *uio, long cookie)
 {
 	if (uiodir->cookies != NULL) {
 		if (++uiodir->acookies > uiodir->ncookies) {
 			uiodir->eofflag = 0;
 			return (-1);
 		}
 		*uiodir->cookies++ = cookie;
 	}
 
 	if (uio->uio_resid < de_size) {
 		uiodir->eofflag = 0;
 		return (-1);
 	}
 
 	return (uiomove(uiodir->dirent, de_size, uio));
 }
 
 static struct udf_dirstream *
 udf_opendir(struct udf_node *node, int offset, int fsize, struct udf_mnt *udfmp)
 {
 	struct udf_dirstream *ds;
 
 	ds = uma_zalloc(udf_zone_ds, M_WAITOK | M_ZERO);
 
 	ds->node = node;
 	ds->offset = offset;
 	ds->udfmp = udfmp;
 	ds->fsize = fsize;
 
 	return (ds);
 }
 
 static struct fileid_desc *
 udf_getfid(struct udf_dirstream *ds)
 {
 	struct fileid_desc *fid;
 	int error, frag_size = 0, total_fid_size;
 
 	/* End of directory? */
 	if (ds->offset + ds->off >= ds->fsize) {
 		ds->error = 0;
 		return (NULL);
 	}
 
 	/* Grab the first extent of the directory */
 	if (ds->off == 0) {
 		ds->size = 0;
 		error = udf_readatoffset(ds->node, &ds->size, ds->offset,
 		    &ds->bp, &ds->data);
 		if (error) {
 			ds->error = error;
 			if (ds->bp != NULL)
 				brelse(ds->bp);
 			return (NULL);
 		}
 	}
 
 	/*
 	 * Clean up from a previous fragmented FID.
 	 * XXX Is this the right place for this?
 	 */
 	if (ds->fid_fragment && ds->buf != NULL) {
 		ds->fid_fragment = 0;
 		free(ds->buf, M_UDFFID);
 	}
 
 	fid = (struct fileid_desc*)&ds->data[ds->off];
 
 	/*
 	 * Check to see if the fid is fragmented. The first test
 	 * ensures that we don't wander off the end of the buffer
 	 * looking for the l_iu and l_fi fields.
 	 */
 	if (ds->off + UDF_FID_SIZE > ds->size ||
 	    ds->off + le16toh(fid->l_iu) + fid->l_fi + UDF_FID_SIZE > ds->size){
 		/* Copy what we have of the fid into a buffer */
 		frag_size = ds->size - ds->off;
 		if (frag_size >= ds->udfmp->bsize) {
 			printf("udf: invalid FID fragment\n");
 			ds->error = EINVAL;
 			return (NULL);
 		}
 
 		/*
 		 * File ID descriptors can only be at most one
 		 * logical sector in size.
 		 */
 		ds->buf = malloc(ds->udfmp->bsize, M_UDFFID,
 		     M_WAITOK | M_ZERO);
 		bcopy(fid, ds->buf, frag_size);
 
 		/* Reduce all of the casting magic */
 		fid = (struct fileid_desc*)ds->buf;
 
 		if (ds->bp != NULL)
 			brelse(ds->bp);
 
 		/* Fetch the next allocation */
 		ds->offset += ds->size;
 		ds->size = 0;
 		error = udf_readatoffset(ds->node, &ds->size, ds->offset,
 		    &ds->bp, &ds->data);
 		if (error) {
 			ds->error = error;
 			return (NULL);
 		}
 
 		/*
 		 * If the fragment was so small that we didn't get
 		 * the l_iu and l_fi fields, copy those in.
 		 */
 		if (frag_size < UDF_FID_SIZE)
 			bcopy(ds->data, &ds->buf[frag_size],
 			    UDF_FID_SIZE - frag_size);
 
 		/*
 		 * Now that we have enough of the fid to work with,
 		 * copy in the rest of the fid from the new
 		 * allocation.
 		 */
 		total_fid_size = UDF_FID_SIZE + le16toh(fid->l_iu) + fid->l_fi;
 		if (total_fid_size > ds->udfmp->bsize) {
 			printf("udf: invalid FID\n");
 			ds->error = EIO;
 			return (NULL);
 		}
 		bcopy(ds->data, &ds->buf[frag_size],
 		    total_fid_size - frag_size);
 
 		ds->fid_fragment = 1;
 	} else {
 		total_fid_size = le16toh(fid->l_iu) + fid->l_fi + UDF_FID_SIZE;
 	}
 
 	/*
 	 * Update the offset. Align on a 4 byte boundary because the
 	 * UDF spec says so.
 	 */
 	ds->this_off = ds->offset + ds->off;
 	if (!ds->fid_fragment) {
 		ds->off += (total_fid_size + 3) & ~0x03;
 	} else {
 		ds->off = (total_fid_size - frag_size + 3) & ~0x03;
 	}
 
 	return (fid);
 }
 
 static void
 udf_closedir(struct udf_dirstream *ds)
 {
 
 	if (ds->bp != NULL)
 		brelse(ds->bp);
 
 	if (ds->fid_fragment && ds->buf != NULL)
 		free(ds->buf, M_UDFFID);
 
 	uma_zfree(udf_zone_ds, ds);
 }
 
 static int
 udf_readdir(struct vop_readdir_args *a)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct dirent dir;
 	struct udf_node *node;
 	struct udf_mnt *udfmp;
 	struct fileid_desc *fid;
 	struct udf_uiodir uiodir;
 	struct udf_dirstream *ds;
 	uint64_t *cookies = NULL;
 	int ncookies;
 	int error = 0;
 
 	vp = a->a_vp;
 	uio = a->a_uio;
 	node = VTON(vp);
 	udfmp = node->udfmp;
 	uiodir.eofflag = 1;
 
 	if (a->a_ncookies != NULL) {
 		/*
 		 * Guess how many entries are needed.  If we run out, this
 		 * function will be called again and thing will pick up were
 		 * it left off.
 		 */
 		ncookies = uio->uio_resid / 8;
 		cookies = malloc(sizeof(*cookies) * ncookies, M_TEMP, M_WAITOK);
 		uiodir.ncookies = ncookies;
 		uiodir.cookies = cookies;
 		uiodir.acookies = 0;
 	} else {
 		uiodir.cookies = NULL;
 	}
 
 	/*
 	 * Iterate through the file id descriptors.  Give the parent dir
 	 * entry special attention.
 	 */
 	ds = udf_opendir(node, uio->uio_offset, le64toh(node->fentry->inf_len),
 	    node->udfmp);
 
 	while ((fid = udf_getfid(ds)) != NULL) {
 		/* XXX Should we return an error on a bad fid? */
 		if (udf_checktag(&fid->tag, TAGID_FID)) {
 			printf("Invalid FID tag\n");
 			hexdump(fid, UDF_FID_SIZE, NULL, 0);
 			error = EIO;
 			break;
 		}
 
 		/* Is this a deleted file? */
 		if (fid->file_char & UDF_FILE_CHAR_DEL)
 			continue;
 
 		if ((fid->l_fi == 0) && (fid->file_char & UDF_FILE_CHAR_PAR)) {
 			/* Do up the '.' and '..' entries.  Dummy values are
 			 * used for the cookies since the offset here is
 			 * usually zero, and NFS doesn't like that value
 			 */
 			dir.d_fileno = node->hash_id;
 			dir.d_type = DT_DIR;
 			dir.d_name[0] = '.';
 			dir.d_namlen = 1;
 			dir.d_reclen = GENERIC_DIRSIZ(&dir);
 			dir.d_off = 1;
 			dirent_terminate(&dir);
 			uiodir.dirent = &dir;
 			error = udf_uiodir(&uiodir, dir.d_reclen, uio, 1);
 			if (error)
 				break;
 
 			dir.d_fileno = udf_getid(&fid->icb);
 			dir.d_type = DT_DIR;
 			dir.d_name[0] = '.';
 			dir.d_name[1] = '.';
 			dir.d_namlen = 2;
 			dir.d_reclen = GENERIC_DIRSIZ(&dir);
 			dir.d_off = 2;
 			dirent_terminate(&dir);
 			uiodir.dirent = &dir;
 			error = udf_uiodir(&uiodir, dir.d_reclen, uio, 2);
 		} else {
 			dir.d_namlen = udf_transname(&fid->data[fid->l_iu],
 			    &dir.d_name[0], fid->l_fi, udfmp);
 			dir.d_fileno = udf_getid(&fid->icb);
 			dir.d_type = (fid->file_char & UDF_FILE_CHAR_DIR) ?
 			    DT_DIR : DT_UNKNOWN;
 			dir.d_reclen = GENERIC_DIRSIZ(&dir);
 			dir.d_off = ds->this_off;
 			dirent_terminate(&dir);
 			uiodir.dirent = &dir;
 			error = udf_uiodir(&uiodir, dir.d_reclen, uio,
 			    ds->this_off);
 		}
 		if (error)
 			break;
 		uio->uio_offset = ds->offset + ds->off;
 	}
 
 	/* tell the calling layer whether we need to be called again */
 	*a->a_eofflag = uiodir.eofflag;
 
 	if (error < 0)
 		error = 0;
 	if (!error)
 		error = ds->error;
 
 	udf_closedir(ds);
 
 	if (a->a_ncookies != NULL) {
 		if (error)
 			free(cookies, M_TEMP);
 		else {
 			*a->a_ncookies = uiodir.acookies;
 			*a->a_cookies = cookies;
 		}
 	}
 
 	return (error);
 }
 
 static int
 udf_readlink(struct vop_readlink_args *ap)
 {
 	struct path_component *pc, *end;
 	struct vnode *vp;
 	struct uio uio;
 	struct iovec iov[1];
 	struct udf_node *node;
 	void *buf;
 	char *cp;
 	int error, len, root;
 
 	/*
 	 * A symbolic link in UDF is a list of variable-length path
 	 * component structures.  We build a pathname in the caller's
 	 * uio by traversing this list.
 	 */
 	vp = ap->a_vp;
 	node = VTON(vp);
 	len = le64toh(node->fentry->inf_len);
 	buf = malloc(len, M_DEVBUF, M_WAITOK);
 	iov[0].iov_len = len;
 	iov[0].iov_base = buf;
 	uio.uio_iov = iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = 0;
 	uio.uio_resid = iov[0].iov_len;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_td = curthread;
 	error = VOP_READ(vp, &uio, 0, ap->a_cred);
 	if (error)
 		goto error;
 
 	pc = buf;
 	end = (void *)((char *)buf + len);
 	root = 0;
 	while (pc < end) {
 		switch (pc->type) {
 		case UDF_PATH_ROOT:
 			/* Only allow this at the beginning of a path. */
 			if ((void *)pc != buf) {
 				error = EINVAL;
 				goto error;
 			}
 			cp = "/";
 			len = 1;
 			root = 1;
 			break;
 		case UDF_PATH_DOT:
 			cp = ".";
 			len = 1;
 			break;
 		case UDF_PATH_DOTDOT:
 			cp = "..";
 			len = 2;
 			break;
 		case UDF_PATH_PATH:
 			if (pc->length == 0) {
 				error = EINVAL;
 				goto error;
 			}
 			/*
 			 * XXX: We only support CS8 which appears to map
 			 * to ASCII directly.
 			 */
 			switch (pc->identifier[0]) {
 			case 8:
 				cp = pc->identifier + 1;
 				len = pc->length - 1;
 				break;
 			default:
 				error = EOPNOTSUPP;
 				goto error;
 			}
 			break;
 		default:
 			error = EINVAL;
 			goto error;
 		}
 
 		/*
 		 * If this is not the first component, insert a path
 		 * separator.
 		 */
 		if (pc != buf) {
 			/* If we started with root we already have a "/". */
 			if (root)
 				goto skipslash;
 			root = 0;
 			if (ap->a_uio->uio_resid < 1) {
 				error = ENAMETOOLONG;
 				goto error;
 			}
 			error = uiomove("/", 1, ap->a_uio);
 			if (error)
 				break;
 		}
 	skipslash:
 
 		/* Append string at 'cp' of length 'len' to our path. */
 		if (len > ap->a_uio->uio_resid) {
 			error = ENAMETOOLONG;
 			goto error;
 		}
 		error = uiomove(cp, len, ap->a_uio);
 		if (error)
 			break;
 
 		/* Advance to next component. */
 		pc = (void *)((char *)pc + 4 + pc->length);
 	}
 error:
 	free(buf, M_DEVBUF);
 	return (error);
 }
 
 static int
 udf_strategy(struct vop_strategy_args *a)
 {
 	struct buf *bp;
 	struct vnode *vp;
 	struct udf_node *node;
 	struct bufobj *bo;
 	off_t offset;
 	uint32_t maxsize;
 	daddr_t sector;
 	int error;
 
 	bp = a->a_bp;
 	vp = a->a_vp;
 	node = VTON(vp);
 
 	if (bp->b_blkno == bp->b_lblkno) {
 		offset = lblktosize(node->udfmp, bp->b_lblkno);
 		error = udf_bmap_internal(node, offset, &sector, &maxsize);
 		if (error) {
 			clrbuf(bp);
 			bp->b_blkno = -1;
 			bufdone(bp);
 			return (0);
 		}
 		/* bmap gives sector numbers, bio works with device blocks */
 		bp->b_blkno = sector << (node->udfmp->bshift - DEV_BSHIFT);
 	}
 	bo = node->udfmp->im_bo;
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	BO_STRATEGY(bo, bp);
 	return (0);
 }
 
 static int
 udf_bmap(struct vop_bmap_args *a)
 {
 	struct udf_node *node;
 	uint32_t max_size;
 	daddr_t lsector;
 	int nblk;
 	int error;
 
 	node = VTON(a->a_vp);
 
 	if (a->a_bop != NULL)
 		*a->a_bop = &node->udfmp->im_devvp->v_bufobj;
 	if (a->a_bnp == NULL)
 		return (0);
 	if (a->a_runb)
 		*a->a_runb = 0;
 
 	/*
 	 * UDF_INVALID_BMAP means data embedded into fentry, this is an internal
 	 * error that should not be propagated to calling code.
 	 * Most obvious mapping for this error is EOPNOTSUPP as we can not truly
 	 * translate block numbers in this case.
 	 * Incidentally, this return code will make vnode pager to use VOP_READ
 	 * to get data for mmap-ed pages and udf_read knows how to do the right
 	 * thing for this kind of files.
 	 */
 	error = udf_bmap_internal(node, a->a_bn << node->udfmp->bshift,
 	    &lsector, &max_size);
 	if (error == UDF_INVALID_BMAP)
 		return (EOPNOTSUPP);
 	if (error)
 		return (error);
 
 	/* Translate logical to physical sector number */
 	*a->a_bnp = lsector << (node->udfmp->bshift - DEV_BSHIFT);
 
 	/*
 	 * Determine maximum number of readahead blocks following the
 	 * requested block.
 	 */
 	if (a->a_runp) {
 		nblk = (max_size >> node->udfmp->bshift) - 1;
 		if (nblk <= 0)
 			*a->a_runp = 0;
 		else if (nblk >= (MAXBSIZE >> node->udfmp->bshift))
 			*a->a_runp = (MAXBSIZE >> node->udfmp->bshift) - 1;
 		else
 			*a->a_runp = nblk;
 	}
 
 	if (a->a_runb) {
 		*a->a_runb = 0;
 	}
 
 	return (0);
 }
 
 /*
  * The all powerful VOP_LOOKUP().
  */
 static int
 udf_lookup(struct vop_cachedlookup_args *a)
 {
 	struct vnode *dvp;
 	struct vnode *tdp = NULL;
 	struct vnode **vpp = a->a_vpp;
 	struct udf_node *node;
 	struct udf_mnt *udfmp;
 	struct fileid_desc *fid = NULL;
 	struct udf_dirstream *ds;
 	u_long nameiop;
 	u_long flags;
 	char *nameptr;
 	long namelen;
 	ino_t id = 0;
 	int offset, error = 0;
 	int fsize, lkflags, ltype, numdirpasses;
 
 	dvp = a->a_dvp;
 	node = VTON(dvp);
 	udfmp = node->udfmp;
 	nameiop = a->a_cnp->cn_nameiop;
 	flags = a->a_cnp->cn_flags;
 	lkflags = a->a_cnp->cn_lkflags;
 	nameptr = a->a_cnp->cn_nameptr;
 	namelen = a->a_cnp->cn_namelen;
 	fsize = le64toh(node->fentry->inf_len);
 
 	/*
 	 * If this is a LOOKUP and we've already partially searched through
 	 * the directory, pick up where we left off and flag that the
 	 * directory may need to be searched twice.  For a full description,
 	 * see /sys/fs/cd9660/cd9660_lookup.c:cd9660_lookup()
 	 */
 	if (nameiop != LOOKUP || node->diroff == 0 || node->diroff > fsize) {
 		offset = 0;
 		numdirpasses = 1;
 	} else {
 		offset = node->diroff;
 		numdirpasses = 2;
 		nchstats.ncs_2passes++;
 	}
 
 lookloop:
 	ds = udf_opendir(node, offset, fsize, udfmp);
 
 	while ((fid = udf_getfid(ds)) != NULL) {
 		/* XXX Should we return an error on a bad fid? */
 		if (udf_checktag(&fid->tag, TAGID_FID)) {
 			printf("udf_lookup: Invalid tag\n");
 			error = EIO;
 			break;
 		}
 
 		/* Is this a deleted file? */
 		if (fid->file_char & UDF_FILE_CHAR_DEL)
 			continue;
 
 		if ((fid->l_fi == 0) && (fid->file_char & UDF_FILE_CHAR_PAR)) {
 			if (flags & ISDOTDOT) {
 				id = udf_getid(&fid->icb);
 				break;
 			}
 		} else {
 			if (!(udf_cmpname(&fid->data[fid->l_iu],
 			    nameptr, fid->l_fi, namelen, udfmp))) {
 				id = udf_getid(&fid->icb);
 				break;
 			}
 		}
 	}
 
 	if (!error)
 		error = ds->error;
 
 	/* XXX Bail out here? */
 	if (error) {
 		udf_closedir(ds);
 		return (error);
 	}
 
 	/* Did we have a match? */
 	if (id) {
 		/*
 		 * Remember where this entry was if it's the final
 		 * component.
 		 */
 		if ((flags & ISLASTCN) && nameiop == LOOKUP)
 			node->diroff = ds->offset + ds->off;
 		if (numdirpasses == 2)
 			nchstats.ncs_pass2++;
 		udf_closedir(ds);
 
 		if (flags & ISDOTDOT) {
 			error = vn_vget_ino(dvp, id, lkflags, &tdp);
 		} else if (node->hash_id == id) {
 			VREF(dvp);	/* we want ourself, ie "." */
 			/*
 			 * When we lookup "." we still can be asked to lock it
 			 * differently.
 			 */
 			ltype = lkflags & LK_TYPE_MASK;
 			if (ltype != VOP_ISLOCKED(dvp)) {
 				if (ltype == LK_EXCLUSIVE)
 					vn_lock(dvp, LK_UPGRADE | LK_RETRY);
 				else /* if (ltype == LK_SHARED) */
 					vn_lock(dvp, LK_DOWNGRADE | LK_RETRY);
 			}
 			tdp = dvp;
 		} else
 			error = udf_vget(udfmp->im_mountp, id, lkflags, &tdp);
 		if (!error) {
 			*vpp = tdp;
 			/* Put this entry in the cache */
 			if (flags & MAKEENTRY)
 				cache_enter(dvp, *vpp, a->a_cnp);
 		}
 	} else {
 		/* Name wasn't found on this pass.  Do another pass? */
 		if (numdirpasses == 2) {
 			numdirpasses--;
 			offset = 0;
 			udf_closedir(ds);
 			goto lookloop;
 		}
 		udf_closedir(ds);
 
 		/* Enter name into cache as non-existant */
 		if (flags & MAKEENTRY)
 			cache_enter(dvp, *vpp, a->a_cnp);
 
 		if ((flags & ISLASTCN) &&
 		    (nameiop == CREATE || nameiop == RENAME)) {
 			error = EROFS;
 		} else {
 			error = ENOENT;
 		}
 	}
 
 	return (error);
 }
 
 static int
 udf_reclaim(struct vop_reclaim_args *a)
 {
 	struct vnode *vp;
 	struct udf_node *unode;
 
 	vp = a->a_vp;
 	unode = VTON(vp);
 
 	if (unode != NULL) {
 		vfs_hash_remove(vp);
 
 		if (unode->fentry != NULL)
 			free(unode->fentry, M_UDFFENTRY);
 		uma_zfree(udf_zone_node, unode);
 		vp->v_data = NULL;
 	}
 
 	return (0);
 }
 
 static int
 udf_vptofh(struct vop_vptofh_args *a)
 {
 	struct udf_node *node;
 	struct ifid *ifhp;
+	_Static_assert(sizeof(struct ifid) <= sizeof(struct fid),
+	    "struct ifid cannot be larger than struct fid");
 
 	node = VTON(a->a_vp);
 	ifhp = (struct ifid *)a->a_fhp;
 	ifhp->ifid_len = sizeof(struct ifid);
 	ifhp->ifid_ino = node->hash_id;
 
 	return (0);
 }
 
 /*
  * Read the block and then set the data pointer to correspond with the
  * offset passed in.  Only read in at most 'size' bytes, and then set 'size'
  * to the number of bytes pointed to.  If 'size' is zero, try to read in a
  * whole extent.
  *
  * Note that *bp may be assigned error or not.
  *
  */
 static int
 udf_readatoffset(struct udf_node *node, int *size, off_t offset,
     struct buf **bp, uint8_t **data)
 {
 	struct udf_mnt *udfmp = node->udfmp;
 	struct vnode *vp = node->i_vnode;
 	struct file_entry *fentry;
 	struct buf *bp1;
 	uint32_t max_size;
 	daddr_t sector;
 	off_t off;
 	int adj_size;
 	int error;
 
 	/*
 	 * This call is made *not* only to detect UDF_INVALID_BMAP case,
 	 * max_size is used as an ad-hoc read-ahead hint for "normal" case.
 	 */
 	error = udf_bmap_internal(node, offset, &sector, &max_size);
 	if (error == UDF_INVALID_BMAP) {
 		/*
 		 * This error means that the file *data* is stored in the
 		 * allocation descriptor field of the file entry.
 		 */
 		fentry = node->fentry;
 		*data = &fentry->data[le32toh(fentry->l_ea)];
 		*size = le32toh(fentry->l_ad);
 		if (offset >= *size)
 			*size = 0;
 		else {
 			*data += offset;
 			*size -= offset;
 		}
 		return (0);
 	} else if (error != 0) {
 		return (error);
 	}
 
 	/* Adjust the size so that it is within range */
 	if (*size == 0 || *size > max_size)
 		*size = max_size;
 
 	/*
 	 * Because we will read starting at block boundary, we need to adjust
 	 * how much we need to read so that all promised data is in.
 	 * Also, we can't promise to read more than MAXBSIZE bytes starting
 	 * from block boundary, so adjust what we promise too.
 	 */
 	off = blkoff(udfmp, offset);
 	*size = min(*size, MAXBSIZE - off);
 	adj_size = (*size + off + udfmp->bmask) & ~udfmp->bmask;
 	*bp = NULL;
 	if ((error = bread(vp, lblkno(udfmp, offset), adj_size, NOCRED, bp))) {
 		printf("warning: udf_readlblks returned error %d\n", error);
 		/* note: *bp may be non-NULL */
 		return (error);
 	}
 
 	bp1 = *bp;
 	*data = (uint8_t *)&bp1->b_data[offset & udfmp->bmask];
 	return (0);
 }
 
 /*
  * Translate a file offset into a logical block and then into a physical
  * block.
  * max_size - maximum number of bytes that can be read starting from given
  * offset, rather than beginning of calculated sector number
  */
 static int
 udf_bmap_internal(struct udf_node *node, off_t offset, daddr_t *sector,
     uint32_t *max_size)
 {
 	struct udf_mnt *udfmp;
 	struct file_entry *fentry;
 	void *icb;
 	struct icb_tag *tag;
 	uint32_t icblen = 0;
 	daddr_t lsector;
 	int ad_offset, ad_num = 0;
 	int i, p_offset;
 
 	udfmp = node->udfmp;
 	fentry = node->fentry;
 	tag = &fentry->icbtag;
 
 	switch (le16toh(tag->strat_type)) {
 	case 4:
 		break;
 
 	case 4096:
 		printf("Cannot deal with strategy4096 yet!\n");
 		return (ENODEV);
 
 	default:
 		printf("Unknown strategy type %d\n", tag->strat_type);
 		return (ENODEV);
 	}
 
 	switch (le16toh(tag->flags) & 0x7) {
 	case 0:
 		/*
 		 * The allocation descriptor field is filled with short_ad's.
 		 * If the offset is beyond the current extent, look for the
 		 * next extent.
 		 */
 		do {
 			offset -= icblen;
 			ad_offset = sizeof(struct short_ad) * ad_num;
 			if (ad_offset > le32toh(fentry->l_ad)) {
 				printf("File offset out of bounds\n");
 				return (EINVAL);
 			}
 			icb = GETICB(short_ad, fentry,
 			    le32toh(fentry->l_ea) + ad_offset);
 			icblen = GETICBLEN(short_ad, icb);
 			ad_num++;
 		} while(offset >= icblen);
 
 		lsector = (offset  >> udfmp->bshift) +
 		    le32toh(((struct short_ad *)(icb))->pos);
 
 		*max_size = icblen - offset;
 
 		break;
 	case 1:
 		/*
 		 * The allocation descriptor field is filled with long_ad's
 		 * If the offset is beyond the current extent, look for the
 		 * next extent.
 		 */
 		do {
 			offset -= icblen;
 			ad_offset = sizeof(struct long_ad) * ad_num;
 			if (ad_offset > le32toh(fentry->l_ad)) {
 				printf("File offset out of bounds\n");
 				return (EINVAL);
 			}
 			icb = GETICB(long_ad, fentry,
 			    le32toh(fentry->l_ea) + ad_offset);
 			icblen = GETICBLEN(long_ad, icb);
 			ad_num++;
 		} while(offset >= icblen);
 
 		lsector = (offset >> udfmp->bshift) +
 		    le32toh(((struct long_ad *)(icb))->loc.lb_num);
 
 		*max_size = icblen - offset;
 
 		break;
 	case 3:
 		/*
 		 * This type means that the file *data* is stored in the
 		 * allocation descriptor field of the file entry.
 		 */
 		*max_size = 0;
 		*sector = node->hash_id + udfmp->part_start;
 
 		return (UDF_INVALID_BMAP);
 	case 2:
 		/* DirectCD does not use extended_ad's */
 	default:
 		printf("Unsupported allocation descriptor %d\n",
 		       tag->flags & 0x7);
 		return (ENODEV);
 	}
 
 	*sector = lsector + udfmp->part_start;
 
 	/*
 	 * Check the sparing table.  Each entry represents the beginning of
 	 * a packet.
 	 */
 	if (udfmp->s_table != NULL) {
 		for (i = 0; i< udfmp->s_table_entries; i++) {
 			p_offset =
 			    lsector - le32toh(udfmp->s_table->entries[i].org);
 			if ((p_offset < udfmp->p_sectors) && (p_offset >= 0)) {
 				*sector =
 				   le32toh(udfmp->s_table->entries[i].map) +
 				    p_offset;
 				break;
 			}
 		}
 	}
 
 	return (0);
 }
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index e9849008cde2..6e83741975fd 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,2102 +1,2104 @@
 /*-
  * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause)
  *
  * Copyright (c) 2002, 2003 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Marshall
  * Kirk McKusick and Network Associates Laboratories, the Security
  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
  * research program
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ...
  */
 
 #include <sys/cdefs.h>
 #include "opt_directio.h"
 #include "opt_ffs.h"
 #include "opt_ufs.h"
 
 #include <sys/param.h>
 #include <sys/bio.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/extattr.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
 #include <sys/rwlock.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/dir.h>
 #ifdef UFS_DIRHASH
 #include <ufs/ufs/dirhash.h>
 #endif
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #define	ALIGNED_TO(ptr, s)	\
 	(((uintptr_t)(ptr) & (_Alignof(s) - 1)) == 0)
 
 #ifdef DIRECTIO
 extern int	ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);
 #endif
 static vop_fdatasync_t	ffs_fdatasync;
 static vop_fsync_t	ffs_fsync;
 static vop_getpages_t	ffs_getpages;
 static vop_getpages_async_t	ffs_getpages_async;
 static vop_lock1_t	ffs_lock;
 #ifdef INVARIANTS
 static vop_unlock_t	ffs_unlock_debug;
 #endif
 static vop_read_t	ffs_read;
 static vop_write_t	ffs_write;
 static int	ffs_extread(struct vnode *vp, struct uio *uio, int ioflag);
 static int	ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag,
 		    struct ucred *cred);
 static vop_strategy_t	ffsext_strategy;
 static vop_closeextattr_t	ffs_closeextattr;
 static vop_deleteextattr_t	ffs_deleteextattr;
 static vop_getextattr_t	ffs_getextattr;
 static vop_listextattr_t	ffs_listextattr;
 static vop_openextattr_t	ffs_openextattr;
 static vop_setextattr_t	ffs_setextattr;
 static vop_vptofh_t	ffs_vptofh;
 static vop_vput_pair_t	ffs_vput_pair;
 
 vop_fplookup_vexec_t ufs_fplookup_vexec;
 
 /* Global vfs data structures for ufs. */
 struct vop_vector ffs_vnodeops1 = {
 	.vop_default =		&ufs_vnodeops,
 	.vop_fsync =		ffs_fsync,
 	.vop_fdatasync =	ffs_fdatasync,
 	.vop_getpages =		ffs_getpages,
 	.vop_getpages_async =	ffs_getpages_async,
 	.vop_lock1 =		ffs_lock,
 #ifdef INVARIANTS
 	.vop_unlock =		ffs_unlock_debug,
 #endif
 	.vop_read =		ffs_read,
 	.vop_reallocblks =	ffs_reallocblks,
 	.vop_write =		ffs_write,
 	.vop_vptofh =		ffs_vptofh,
 	.vop_vput_pair =	ffs_vput_pair,
 	.vop_fplookup_vexec =	ufs_fplookup_vexec,
 	.vop_fplookup_symlink =	VOP_EAGAIN,
 };
 VFS_VOP_VECTOR_REGISTER(ffs_vnodeops1);
 
 struct vop_vector ffs_fifoops1 = {
 	.vop_default =		&ufs_fifoops,
 	.vop_fsync =		ffs_fsync,
 	.vop_fdatasync =	ffs_fdatasync,
 	.vop_lock1 =		ffs_lock,
 #ifdef INVARIANTS
 	.vop_unlock =		ffs_unlock_debug,
 #endif
 	.vop_vptofh =		ffs_vptofh,
 	.vop_fplookup_vexec =   VOP_EAGAIN,
 	.vop_fplookup_symlink = VOP_EAGAIN,
 };
 VFS_VOP_VECTOR_REGISTER(ffs_fifoops1);
 
 /* Global vfs data structures for ufs. */
 struct vop_vector ffs_vnodeops2 = {
 	.vop_default =		&ufs_vnodeops,
 	.vop_fsync =		ffs_fsync,
 	.vop_fdatasync =	ffs_fdatasync,
 	.vop_getpages =		ffs_getpages,
 	.vop_getpages_async =	ffs_getpages_async,
 	.vop_lock1 =		ffs_lock,
 #ifdef INVARIANTS
 	.vop_unlock =		ffs_unlock_debug,
 #endif
 	.vop_read =		ffs_read,
 	.vop_reallocblks =	ffs_reallocblks,
 	.vop_write =		ffs_write,
 	.vop_closeextattr =	ffs_closeextattr,
 	.vop_deleteextattr =	ffs_deleteextattr,
 	.vop_getextattr =	ffs_getextattr,
 	.vop_listextattr =	ffs_listextattr,
 	.vop_openextattr =	ffs_openextattr,
 	.vop_setextattr =	ffs_setextattr,
 	.vop_vptofh =		ffs_vptofh,
 	.vop_vput_pair =	ffs_vput_pair,
 	.vop_fplookup_vexec =	ufs_fplookup_vexec,
 	.vop_fplookup_symlink =	VOP_EAGAIN,
 };
 VFS_VOP_VECTOR_REGISTER(ffs_vnodeops2);
 
 struct vop_vector ffs_fifoops2 = {
 	.vop_default =		&ufs_fifoops,
 	.vop_fsync =		ffs_fsync,
 	.vop_fdatasync =	ffs_fdatasync,
 	.vop_lock1 =		ffs_lock,
 #ifdef INVARIANTS
 	.vop_unlock =		ffs_unlock_debug,
 #endif
 	.vop_reallocblks =	ffs_reallocblks,
 	.vop_strategy =		ffsext_strategy,
 	.vop_closeextattr =	ffs_closeextattr,
 	.vop_deleteextattr =	ffs_deleteextattr,
 	.vop_getextattr =	ffs_getextattr,
 	.vop_listextattr =	ffs_listextattr,
 	.vop_openextattr =	ffs_openextattr,
 	.vop_setextattr =	ffs_setextattr,
 	.vop_vptofh =		ffs_vptofh,
 	.vop_fplookup_vexec =   VOP_EAGAIN,
 	.vop_fplookup_symlink = VOP_EAGAIN,
 };
 VFS_VOP_VECTOR_REGISTER(ffs_fifoops2);
 
 /*
  * Synch an open file.
  */
 /* ARGSUSED */
 static int
 ffs_fsync(struct vop_fsync_args *ap)
 {
 	struct vnode *vp;
 	struct bufobj *bo;
 	int error;
 
 	vp = ap->a_vp;
 	bo = &vp->v_bufobj;
 retry:
 	error = ffs_syncvnode(vp, ap->a_waitfor, 0);
 	if (error)
 		return (error);
 	if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) {
 		error = softdep_fsync(vp);
 		if (error)
 			return (error);
 
 		/*
 		 * The softdep_fsync() function may drop vp lock,
 		 * allowing for dirty buffers to reappear on the
 		 * bo_dirty list. Recheck and resync as needed.
 		 */
 		BO_LOCK(bo);
 		if ((vp->v_type == VREG || vp->v_type == VDIR) &&
 		    (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
 			BO_UNLOCK(bo);
 			goto retry;
 		}
 		BO_UNLOCK(bo);
 	}
 	if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0))
 		return (ENXIO);
 	return (0);
 }
 
 int
 ffs_syncvnode(struct vnode *vp, int waitfor, int flags)
 {
 	struct inode *ip;
 	struct bufobj *bo;
 	struct ufsmount *ump;
 	struct buf *bp, *nbp;
 	ufs_lbn_t lbn;
 	int error, passes, wflag;
 	bool still_dirty, unlocked, wait;
 
 	ip = VTOI(vp);
 	bo = &vp->v_bufobj;
 	ump = VFSTOUFS(vp->v_mount);
 #ifdef WITNESS
 	wflag = IS_SNAPSHOT(ip) ? LK_NOWITNESS : 0;
 #else
 	wflag = 0;
 #endif
 
 	/*
 	 * When doing MNT_WAIT we must first flush all dependencies
 	 * on the inode.
 	 */
 	if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
 	    (error = softdep_sync_metadata(vp)) != 0) {
 		if (ffs_fsfail_cleanup(ump, error))
 			error = 0;
 		return (error);
 	}
 
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 	error = 0;
 	passes = 0;
 	wait = false;	/* Always do an async pass first. */
 	unlocked = false;
 	lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
 	BO_LOCK(bo);
 loop:
 	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
 		bp->b_vflags &= ~BV_SCANNED;
 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 		/*
 		 * Reasons to skip this buffer: it has already been considered
 		 * on this pass, the buffer has dependencies that will cause
 		 * it to be redirtied and it has not already been deferred,
 		 * or it is already being written.
 		 */
 		if ((bp->b_vflags & BV_SCANNED) != 0)
 			continue;
 		bp->b_vflags |= BV_SCANNED;
 		/*
 		 * Flush indirects in order, if requested.
 		 *
 		 * Note that if only datasync is requested, we can
 		 * skip indirect blocks when softupdates are not
 		 * active.  Otherwise we must flush them with data,
 		 * since dependencies prevent data block writes.
 		 */
 		if (waitfor == MNT_WAIT && bp->b_lblkno <= -UFS_NDADDR &&
 		    (lbn_level(bp->b_lblkno) >= passes ||
 		    ((flags & DATA_ONLY) != 0 && !DOINGSOFTDEP(vp))))
 			continue;
 		if (bp->b_lblkno > lbn)
 			panic("ffs_syncvnode: syncing truncated data.");
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
 			BO_UNLOCK(bo);
 		} else if (wait) {
 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 			    LK_INTERLOCK | wflag, BO_LOCKPTR(bo)) != 0) {
 				BO_LOCK(bo);
 				bp->b_vflags &= ~BV_SCANNED;
 				goto next_locked;
 			}
 		} else
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("ffs_fsync: not dirty");
 		/*
 		 * Check for dependencies and potentially complete them.
 		 */
 		if (!LIST_EMPTY(&bp->b_dep) &&
 		    (error = softdep_sync_buf(vp, bp,
 		    wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
 			/*
 			 * Lock order conflict, buffer was already unlocked,
 			 * and vnode possibly unlocked.
 			 */
 			if (error == ERELOOKUP) {
 				if (vp->v_data == NULL)
 					return (EBADF);
 				unlocked = true;
 				if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
 				    (error = softdep_sync_metadata(vp)) != 0) {
 					if (ffs_fsfail_cleanup(ump, error))
 						error = 0;
 					return (unlocked && error == 0 ?
 					    ERELOOKUP : error);
 				}
 				/* Re-evaluate inode size */
 				lbn = lblkno(ITOFS(ip), (ip->i_size +
 				    ITOFS(ip)->fs_bsize - 1));
 				goto next;
 			}
 			/* I/O error. */
 			if (error != EBUSY) {
 				BUF_UNLOCK(bp);
 				return (error);
 			}
 			/* If we deferred once, don't defer again. */
 		    	if ((bp->b_flags & B_DEFERRED) == 0) {
 				bp->b_flags |= B_DEFERRED;
 				BUF_UNLOCK(bp);
 				goto next;
 			}
 		}
 		if (wait) {
 			bremfree(bp);
 			error = bwrite(bp);
 			if (ffs_fsfail_cleanup(ump, error))
 				error = 0;
 			if (error != 0)
 				return (error);
 		} else if ((bp->b_flags & B_CLUSTEROK)) {
 			(void) vfs_bio_awrite(bp);
 		} else {
 			bremfree(bp);
 			(void) bawrite(bp);
 		}
 next:
 		/*
 		 * Since we may have slept during the I/O, we need
 		 * to start from a known point.
 		 */
 		BO_LOCK(bo);
 next_locked:
 		nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
 	}
 	if (waitfor != MNT_WAIT) {
 		BO_UNLOCK(bo);
 		if ((flags & NO_INO_UPDT) != 0)
 			return (unlocked ? ERELOOKUP : 0);
 		error = ffs_update(vp, 0);
 		if (error == 0 && unlocked)
 			error = ERELOOKUP;
 		return (error);
 	}
 	/* Drain IO to see if we're done. */
 	bufobj_wwait(bo, 0, 0);
 	/*
 	 * Block devices associated with filesystems may have new I/O
 	 * requests posted for them even if the vnode is locked, so no
 	 * amount of trying will get them clean.  We make several passes
 	 * as a best effort.
 	 *
 	 * Regular files may need multiple passes to flush all dependency
 	 * work as it is possible that we must write once per indirect
 	 * level, once for the leaf, and once for the inode and each of
 	 * these will be done with one sync and one async pass.
 	 */
 	if (bo->bo_dirty.bv_cnt > 0) {
 		if ((flags & DATA_ONLY) == 0) {
 			still_dirty = true;
 		} else {
 			/*
 			 * For data-only sync, dirty indirect buffers
 			 * are ignored.
 			 */
 			still_dirty = false;
 			TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
 				if (bp->b_lblkno > -UFS_NDADDR) {
 					still_dirty = true;
 					break;
 				}
 			}
 		}
 
 		if (still_dirty) {
 			/* Write the inode after sync passes to flush deps. */
 			if (wait && DOINGSOFTDEP(vp) &&
 			    (flags & NO_INO_UPDT) == 0) {
 				BO_UNLOCK(bo);
 				ffs_update(vp, 1);
 				BO_LOCK(bo);
 			}
 			/* switch between sync/async. */
 			wait = !wait;
 			if (wait || ++passes < UFS_NIADDR + 2)
 				goto loop;
 		}
 	}
 	BO_UNLOCK(bo);
 	error = 0;
 	if ((flags & DATA_ONLY) == 0) {
 		if ((flags & NO_INO_UPDT) == 0)
 			error = ffs_update(vp, 1);
 		if (DOINGSUJ(vp))
 			softdep_journal_fsync(VTOI(vp));
 	} else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) {
 		error = ffs_update(vp, 1);
 	}
 	if (error == 0 && unlocked)
 		error = ERELOOKUP;
 	if (error == 0)
 		ip->i_flag &= ~IN_NEEDSYNC;
 	return (error);
 }
 
 static int
 ffs_fdatasync(struct vop_fdatasync_args *ap)
 {
 
 	return (ffs_syncvnode(ap->a_vp, MNT_WAIT, DATA_ONLY));
 }
 
 static int
 ffs_lock(
 	struct vop_lock1_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		char *file;
 		int line;
 	} */ *ap)
 {
 #if !defined(NO_FFS_SNAPSHOT) || defined(DIAGNOSTIC)
 	struct vnode *vp = ap->a_vp;
 #endif	/* !NO_FFS_SNAPSHOT || DIAGNOSTIC */
 #ifdef DIAGNOSTIC
 	struct inode *ip;
 #endif	/* DIAGNOSTIC */
 	int result;
 #ifndef NO_FFS_SNAPSHOT
 	int flags;
 	struct lock *lkp;
 
 	/*
 	 * Adaptive spinning mixed with SU leads to trouble. use a giant hammer
 	 * and only use it when LK_NODDLKTREAT is set. Currently this means it
 	 * is only used during path lookup.
 	 */
 	if ((ap->a_flags & LK_NODDLKTREAT) != 0)
 		ap->a_flags |= LK_ADAPTIVE;
 	switch (ap->a_flags & LK_TYPE_MASK) {
 	case LK_SHARED:
 	case LK_UPGRADE:
 	case LK_EXCLUSIVE:
 		flags = ap->a_flags;
 		for (;;) {
 #ifdef DEBUG_VFS_LOCKS
 			VNPASS(vp->v_holdcnt != 0, vp);
 #endif	/* DEBUG_VFS_LOCKS */
 			lkp = vp->v_vnlock;
 			result = lockmgr_lock_flags(lkp, flags,
 			    &VI_MTX(vp)->lock_object, ap->a_file, ap->a_line);
 			if (lkp == vp->v_vnlock || result != 0)
 				break;
 			/*
 			 * Apparent success, except that the vnode
 			 * mutated between snapshot file vnode and
 			 * regular file vnode while this process
 			 * slept.  The lock currently held is not the
 			 * right lock.  Release it, and try to get the
 			 * new lock.
 			 */
 			lockmgr_unlock(lkp);
 			if ((flags & (LK_INTERLOCK | LK_NOWAIT)) ==
 			    (LK_INTERLOCK | LK_NOWAIT))
 				return (EBUSY);
 			if ((flags & LK_TYPE_MASK) == LK_UPGRADE)
 				flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
 			flags &= ~LK_INTERLOCK;
 		}
 #ifdef DIAGNOSTIC
 		switch (ap->a_flags & LK_TYPE_MASK) {
 		case LK_UPGRADE:
 		case LK_EXCLUSIVE:
 			if (result == 0 && vp->v_vnlock->lk_recurse == 0) {
 				ip = VTOI(vp);
 				if (ip != NULL)
 					ip->i_lock_gen++;
 			}
 		}
 #endif	/* DIAGNOSTIC */
 		break;
 	default:
 #ifdef DIAGNOSTIC
 		if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) {
 			ip = VTOI(vp);
 			if (ip != NULL)
 				ufs_unlock_tracker(ip);
 		}
 #endif	/* DIAGNOSTIC */
 		result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
 		break;
 	}
 #else	/* NO_FFS_SNAPSHOT */
 	/*
 	 * See above for an explanation.
 	 */
 	if ((ap->a_flags & LK_NODDLKTREAT) != 0)
 		ap->a_flags |= LK_ADAPTIVE;
 #ifdef DIAGNOSTIC
 	if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) {
 		ip = VTOI(vp);
 		if (ip != NULL)
 			ufs_unlock_tracker(ip);
 	}
 #endif	/* DIAGNOSTIC */
 	result =  VOP_LOCK1_APV(&ufs_vnodeops, ap);
 #endif	/* NO_FFS_SNAPSHOT */
 #ifdef DIAGNOSTIC
 	switch (ap->a_flags & LK_TYPE_MASK) {
 	case LK_UPGRADE:
 	case LK_EXCLUSIVE:
 		if (result == 0 && vp->v_vnlock->lk_recurse == 0) {
 			ip = VTOI(vp);
 			if (ip != NULL)
 				ip->i_lock_gen++;
 		}
 	}
 #endif	/* DIAGNOSTIC */
 	return (result);
 }
 
 #ifdef INVARIANTS
 static int
 ffs_unlock_debug(struct vop_unlock_args *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	if (ip->i_flag & UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE) {
 		if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
 			VI_LOCK(vp);
 			VNASSERT((vp->v_mflag & VMP_LAZYLIST), vp,
 			    ("%s: modified vnode (%x) not on lazy list",
 			    __func__, ip->i_flag));
 			VI_UNLOCK(vp);
 		}
 	}
 	KASSERT(vp->v_type != VDIR || vp->v_vnlock->lk_recurse != 0 ||
 	    (ip->i_flag & IN_ENDOFF) == 0,
 	    ("ufs dir vp %p ip %p flags %#x", vp, ip, ip->i_flag));
 #ifdef DIAGNOSTIC
 	if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && ip != NULL &&
 	    vp->v_vnlock->lk_recurse == 0)
 		ufs_unlock_tracker(ip);
 #endif
 	return (VOP_UNLOCK_APV(&ufs_vnodeops, ap));
 }
 #endif
 
 static int
 ffs_read_hole(struct uio *uio, long xfersize, long *size)
 {
 	ssize_t saved_resid, tlen;
 	int error;
 
 	while (xfersize > 0) {
 		tlen = min(xfersize, ZERO_REGION_SIZE);
 		saved_resid = uio->uio_resid;
 		error = vn_io_fault_uiomove(__DECONST(void *, zero_region),
 		    tlen, uio);
 		if (error != 0)
 			return (error);
 		tlen = saved_resid - uio->uio_resid;
 		xfersize -= tlen;
 		*size -= tlen;
 	}
 	return (0);
 }
 
 /*
  * Vnode op for reading.
  */
 static int
 ffs_read(
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 	struct uio *uio;
 	struct fs *fs;
 	struct buf *bp;
 	ufs_lbn_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
 	ssize_t orig_resid;
 	int bflag, error, ioflag, seqcount;
 
 	vp = ap->a_vp;
 	uio = ap->a_uio;
 	ioflag = ap->a_ioflag;
 	if (ap->a_ioflag & IO_EXT)
 #ifdef notyet
 		return (ffs_extread(vp, uio, ioflag));
 #else
 		panic("ffs_read+IO_EXT");
 #endif
 #ifdef DIRECTIO
 	if ((ioflag & IO_DIRECT) != 0) {
 		int workdone;
 
 		error = ffs_rawread(vp, uio, &workdone);
 		if (error != 0 || workdone != 0)
 			return error;
 	}
 #endif
 
 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 	ip = VTOI(vp);
 
 #ifdef INVARIANTS
 	if (uio->uio_rw != UIO_READ)
 		panic("ffs_read: mode");
 
 	if (vp->v_type == VLNK) {
 		if ((int)ip->i_size < VFSTOUFS(vp->v_mount)->um_maxsymlinklen)
 			panic("ffs_read: short symlink");
 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
 		panic("ffs_read: type %d",  vp->v_type);
 #endif
 	orig_resid = uio->uio_resid;
 	KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0"));
 	if (orig_resid == 0)
 		return (0);
 	KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0"));
 	fs = ITOFS(ip);
 	if (uio->uio_offset < ip->i_size &&
 	    uio->uio_offset >= fs->fs_maxfilesize)
 		return (EOVERFLOW);
 
 	bflag = GB_UNMAPPED | (uio->uio_segflg == UIO_NOCOPY ? 0 : GB_NOSPARSE);
 #ifdef WITNESS
 	bflag |= IS_SNAPSHOT(ip) ? GB_NOWITNESS : 0;
 #endif
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
 			break;
 		lbn = lblkno(fs, uio->uio_offset);
 		nextlbn = lbn + 1;
 
 		/*
 		 * size of buffer.  The buffer representing the
 		 * end of the file is rounded up to the size of
 		 * the block type ( fragment or full block,
 		 * depending ).
 		 */
 		size = blksize(fs, ip, lbn);
 		blkoffset = blkoff(fs, uio->uio_offset);
 
 		/*
 		 * The amount we want to transfer in this iteration is
 		 * one FS block less the amount of the data before
 		 * our startpoint (duh!)
 		 */
 		xfersize = fs->fs_bsize - blkoffset;
 
 		/*
 		 * But if we actually want less than the block,
 		 * or the file doesn't have a whole block more of data,
 		 * then use the lesser number.
 		 */
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 		if (bytesinfile < xfersize)
 			xfersize = bytesinfile;
 
 		if (lblktosize(fs, nextlbn) >= ip->i_size) {
 			/*
 			 * Don't do readahead if this is the end of the file.
 			 */
 			error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
 		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			/*
 			 * Otherwise if we are allowed to cluster,
 			 * grab as much as we can.
 			 *
 			 * XXX  This may not be a win if we are not
 			 * doing sequential access.
 			 */
 			error = cluster_read(vp, ip->i_size, lbn,
 			    size, NOCRED, blkoffset + uio->uio_resid,
 			    seqcount, bflag, &bp);
 		} else if (seqcount > 1) {
 			/*
 			 * If we are NOT allowed to cluster, then
 			 * if we appear to be acting sequentially,
 			 * fire off a request for a readahead
 			 * as well as a read. Note that the 4th and 5th
 			 * arguments point to arrays of the size specified in
 			 * the 6th argument.
 			 */
 			int nextsize = blksize(fs, ip, nextlbn);
 			error = breadn_flags(vp, lbn, lbn, size, &nextlbn,
 			    &nextsize, 1, NOCRED, bflag, NULL, &bp);
 		} else {
 			/*
 			 * Failing all of the above, just read what the
 			 * user asked for. Interestingly, the same as
 			 * the first option above.
 			 */
 			error = bread_gb(vp, lbn, size, NOCRED, bflag, &bp);
 		}
 		if (error == EJUSTRETURN) {
 			error = ffs_read_hole(uio, xfersize, &size);
 			if (error == 0)
 				continue;
 		}
 		if (error != 0) {
 			brelse(bp);
 			bp = NULL;
 			break;
 		}
 
 		/*
 		 * We should only get non-zero b_resid when an I/O error
 		 * has occurred, which should cause us to break above.
 		 * However, if the short read did not cause an error,
 		 * then we want to ensure that we do not uiomove bad
 		 * or uninitialized data.
 		 */
 		size -= bp->b_resid;
 		if (size < xfersize) {
 			if (size == 0)
 				break;
 			xfersize = size;
 		}
 
 		if (buf_mapped(bp)) {
 			error = vn_io_fault_uiomove((char *)bp->b_data +
 			    blkoffset, (int)xfersize, uio);
 		} else {
 			error = vn_io_fault_pgmove(bp->b_pages,
 			    blkoffset + (bp->b_offset & PAGE_MASK),
 			    (int)xfersize, uio);
 		}
 		if (error)
 			break;
 
 		vfs_bio_brelse(bp, ioflag);
 	}
 
 	/*
 	 * This can only happen in the case of an error
 	 * because the loop above resets bp to NULL on each iteration
 	 * and on normal completion has not set a new value into it.
 	 * so it must have come from a 'break' statement
 	 */
 	if (bp != NULL)
 		vfs_bio_brelse(bp, ioflag);
 
 	if ((error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
 		UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS);
 	return (error);
 }
 
 /*
  * Vnode op for writing.
  */
 static int
 ffs_write(
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct inode *ip;
 	struct fs *fs;
 	struct buf *bp;
 	ufs_lbn_t lbn;
 	off_t osize;
 	ssize_t resid, r;
 	int seqcount;
 	int blkoffset, error, flags, ioflag, size, xfersize;
 
 	vp = ap->a_vp;
 	if (DOINGSUJ(vp))
 		softdep_prealloc(vp, MNT_WAIT);
 	if (vp->v_data == NULL)
 		return (EBADF);
 
 	uio = ap->a_uio;
 	ioflag = ap->a_ioflag;
 	if (ap->a_ioflag & IO_EXT)
 #ifdef notyet
 		return (ffs_extwrite(vp, uio, ioflag, ap->a_cred));
 #else
 		panic("ffs_write+IO_EXT");
 #endif
 
 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
 	ip = VTOI(vp);
 
 #ifdef INVARIANTS
 	if (uio->uio_rw != UIO_WRITE)
 		panic("ffs_write: mode");
 #endif
 
 	switch (vp->v_type) {
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = ip->i_size;
 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
 			return (EPERM);
 		/* FALLTHROUGH */
 	case VLNK:
 		break;
 	case VDIR:
 		panic("ffs_write: dir write");
 		break;
 	default:
 		panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type,
 			(int)uio->uio_offset,
 			(int)uio->uio_resid
 		);
 	}
 
 	KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0"));
 	KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0"));
 	fs = ITOFS(ip);
 
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
 	 * file servers have no limits, I don't think it matters.
 	 */
 	error = vn_rlimit_fsizex(vp, uio, fs->fs_maxfilesize, &r,
 	    uio->uio_td);
 	if (error != 0) {
 		vn_rlimit_fsizex_res(uio, r);
 		return (error);
 	}
 
 	resid = uio->uio_resid;
 	osize = ip->i_size;
 	if (seqcount > BA_SEQMAX)
 		flags = BA_SEQMAX << BA_SEQSHIFT;
 	else
 		flags = seqcount << BA_SEQSHIFT;
 	if (ioflag & IO_SYNC)
 		flags |= IO_SYNC;
 	flags |= BA_UNMAPPED;
 
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
 		xfersize = fs->fs_bsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 		if (uio->uio_offset + xfersize > ip->i_size)
 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
 
 		/*
 		 * We must perform a read-before-write if the transfer size
 		 * does not cover the entire buffer.
 		 */
 		if (fs->fs_bsize > xfersize)
 			flags |= BA_CLRBUF;
 		else
 			flags &= ~BA_CLRBUF;
 /* XXX is uio->uio_offset the right thing here? */
 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
 		    ap->a_cred, flags, &bp);
 		if (error != 0) {
 			vnode_pager_setsize(vp, ip->i_size);
 			break;
 		}
 		if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL))
 			bp->b_flags |= B_NOCACHE;
 
 		if (uio->uio_offset + xfersize > ip->i_size) {
 			ip->i_size = uio->uio_offset + xfersize;
 			DIP_SET(ip, i_size, ip->i_size);
 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
 		}
 
 		size = blksize(fs, ip, lbn) - bp->b_resid;
 		if (size < xfersize)
 			xfersize = size;
 
 		if (buf_mapped(bp)) {
 			error = vn_io_fault_uiomove((char *)bp->b_data +
 			    blkoffset, (int)xfersize, uio);
 		} else {
 			error = vn_io_fault_pgmove(bp->b_pages,
 			    blkoffset + (bp->b_offset & PAGE_MASK),
 			    (int)xfersize, uio);
 		}
 		/*
 		 * If the buffer is not already filled and we encounter an
 		 * error while trying to fill it, we have to clear out any
 		 * garbage data from the pages instantiated for the buffer.
 		 * If we do not, a failed uiomove() during a write can leave
 		 * the prior contents of the pages exposed to a userland mmap.
 		 *
 		 * Note that we need only clear buffers with a transfer size
 		 * equal to the block size because buffers with a shorter
 		 * transfer size were cleared above by the call to UFS_BALLOC()
 		 * with the BA_CLRBUF flag set.
 		 *
 		 * If the source region for uiomove identically mmaps the
 		 * buffer, uiomove() performed the NOP copy, and the buffer
 		 * content remains valid because the page fault handler
 		 * validated the pages.
 		 */
 		if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
 		    fs->fs_bsize == xfersize) {
 			if (error == EFAULT && LIST_EMPTY(&bp->b_dep)) {
 				bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
 				brelse(bp);
 				break;
 			} else {
 				vfs_bio_clrbuf(bp);
 			}
 		}
 
 		vfs_bio_set_flags(bp, ioflag);
 
 		/*
 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
 		 * if we have a severe page deficiency write the buffer
 		 * asynchronously.  Otherwise try to cluster, and if that
 		 * doesn't do it then either do an async write (if O_DIRECT),
 		 * or a delayed write (if not).
 		 */
 		if (ioflag & IO_SYNC) {
 			(void)bwrite(bp);
 		} else if (vm_page_count_severe() ||
 			    buf_dirty_count_severe() ||
 			    (ioflag & IO_ASYNC)) {
 			bp->b_flags |= B_CLUSTEROK;
 			bawrite(bp);
 		} else if (xfersize + blkoffset == fs->fs_bsize) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
 				cluster_write(vp, &ip->i_clusterw, bp,
 				    ip->i_size, seqcount, GB_UNMAPPED);
 			} else {
 				bawrite(bp);
 			}
 		} else if (ioflag & IO_DIRECT) {
 			bp->b_flags |= B_CLUSTEROK;
 			bawrite(bp);
 		} else {
 			bp->b_flags |= B_CLUSTEROK;
 			bdwrite(bp);
 		}
 		if (error || xfersize == 0)
 			break;
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
 	    ap->a_cred) {
 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) {
 			vn_seqc_write_begin(vp);
 			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
 			DIP_SET(ip, i_mode, ip->i_mode);
 			vn_seqc_write_end(vp);
 		}
 	}
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			(void)ffs_truncate(vp, osize,
 			    IO_NORMAL | (ioflag & IO_SYNC), ap->a_cred);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 		if (!(ioflag & IO_DATASYNC) ||
 		    (ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)))
 			error = ffs_update(vp, 1);
 		if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
 			error = ENXIO;
 	}
 	vn_rlimit_fsizex_res(uio, r);
 	return (error);
 }
 
 /*
  * Extended attribute area reading.
  */
 static int
 ffs_extread(struct vnode *vp, struct uio *uio, int ioflag)
 {
 	struct inode *ip;
 	struct ufs2_dinode *dp;
 	struct fs *fs;
 	struct buf *bp;
 	ufs_lbn_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
 	ssize_t orig_resid;
 	int error;
 
 	ip = VTOI(vp);
 	fs = ITOFS(ip);
 	dp = ip->i_din2;
 
 #ifdef INVARIANTS
 	if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC)
 		panic("ffs_extread: mode");
 
 #endif
 	orig_resid = uio->uio_resid;
 	KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0"));
 	if (orig_resid == 0)
 		return (0);
 	KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0"));
 
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0)
 			break;
 		lbn = lblkno(fs, uio->uio_offset);
 		nextlbn = lbn + 1;
 
 		/*
 		 * size of buffer.  The buffer representing the
 		 * end of the file is rounded up to the size of
 		 * the block type ( fragment or full block,
 		 * depending ).
 		 */
 		size = sblksize(fs, dp->di_extsize, lbn);
 		blkoffset = blkoff(fs, uio->uio_offset);
 
 		/*
 		 * The amount we want to transfer in this iteration is
 		 * one FS block less the amount of the data before
 		 * our startpoint (duh!)
 		 */
 		xfersize = fs->fs_bsize - blkoffset;
 
 		/*
 		 * But if we actually want less than the block,
 		 * or the file doesn't have a whole block more of data,
 		 * then use the lesser number.
 		 */
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 		if (bytesinfile < xfersize)
 			xfersize = bytesinfile;
 
 		if (lblktosize(fs, nextlbn) >= dp->di_extsize) {
 			/*
 			 * Don't do readahead if this is the end of the info.
 			 */
 			error = bread(vp, -1 - lbn, size, NOCRED, &bp);
 		} else {
 			/*
 			 * If we have a second block, then
 			 * fire off a request for a readahead
 			 * as well as a read. Note that the 4th and 5th
 			 * arguments point to arrays of the size specified in
 			 * the 6th argument.
 			 */
 			int nextsize = sblksize(fs, dp->di_extsize, nextlbn);
 			nextlbn = -1 - nextlbn;
 			error = breadn(vp, -1 - lbn,
 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
 		}
 		if (error) {
 			brelse(bp);
 			bp = NULL;
 			break;
 		}
 
 		/*
 		 * We should only get non-zero b_resid when an I/O error
 		 * has occurred, which should cause us to break above.
 		 * However, if the short read did not cause an error,
 		 * then we want to ensure that we do not uiomove bad
 		 * or uninitialized data.
 		 */
 		size -= bp->b_resid;
 		if (size < xfersize) {
 			if (size == 0)
 				break;
 			xfersize = size;
 		}
 
 		error = uiomove((char *)bp->b_data + blkoffset,
 					(int)xfersize, uio);
 		if (error)
 			break;
 		vfs_bio_brelse(bp, ioflag);
 	}
 
 	/*
 	 * This can only happen in the case of an error
 	 * because the loop above resets bp to NULL on each iteration
 	 * and on normal completion has not set a new value into it.
 	 * so it must have come from a 'break' statement
 	 */
 	if (bp != NULL)
 		vfs_bio_brelse(bp, ioflag);
 	return (error);
 }
 
 /*
  * Extended attribute area writing.
  */
 static int
 ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred)
 {
 	struct inode *ip;
 	struct ufs2_dinode *dp;
 	struct fs *fs;
 	struct buf *bp;
 	ufs_lbn_t lbn;
 	off_t osize;
 	ssize_t resid;
 	int blkoffset, error, flags, size, xfersize;
 
 	ip = VTOI(vp);
 	fs = ITOFS(ip);
 	dp = ip->i_din2;
 
 #ifdef INVARIANTS
 	if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC)
 		panic("ffs_extwrite: mode");
 #endif
 
 	if (ioflag & IO_APPEND)
 		uio->uio_offset = dp->di_extsize;
 	KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0"));
 	KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0"));
 	if ((uoff_t)uio->uio_offset + uio->uio_resid >
 	    UFS_NXADDR * fs->fs_bsize)
 		return (EFBIG);
 
 	resid = uio->uio_resid;
 	osize = dp->di_extsize;
 	flags = IO_EXT;
 	if (ioflag & IO_SYNC)
 		flags |= IO_SYNC;
 
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
 		xfersize = fs->fs_bsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 
 		/*
 		 * We must perform a read-before-write if the transfer size
 		 * does not cover the entire buffer.
 		 */
 		if (fs->fs_bsize > xfersize)
 			flags |= BA_CLRBUF;
 		else
 			flags &= ~BA_CLRBUF;
 		error = UFS_BALLOC(vp, uio->uio_offset, xfersize,
 		    ucred, flags, &bp);
 		if (error != 0)
 			break;
 		/*
 		 * If the buffer is not valid we have to clear out any
 		 * garbage data from the pages instantiated for the buffer.
 		 * If we do not, a failed uiomove() during a write can leave
 		 * the prior contents of the pages exposed to a userland
 		 * mmap().  XXX deal with uiomove() errors a better way.
 		 */
 		if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize)
 			vfs_bio_clrbuf(bp);
 
 		if (uio->uio_offset + xfersize > dp->di_extsize) {
 			dp->di_extsize = uio->uio_offset + xfersize;
 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
 		}
 
 		size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid;
 		if (size < xfersize)
 			xfersize = size;
 
 		error =
 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 
 		vfs_bio_set_flags(bp, ioflag);
 
 		/*
 		 * If IO_SYNC each buffer is written synchronously.  Otherwise
 		 * if we have a severe page deficiency write the buffer
 		 * asynchronously.  Otherwise try to cluster, and if that
 		 * doesn't do it then either do an async write (if O_DIRECT),
 		 * or a delayed write (if not).
 		 */
 		if (ioflag & IO_SYNC) {
 			(void)bwrite(bp);
 		} else if (vm_page_count_severe() ||
 			    buf_dirty_count_severe() ||
 			    xfersize + blkoffset == fs->fs_bsize ||
 			    (ioflag & (IO_ASYNC | IO_DIRECT)))
 			bawrite(bp);
 		else
 			bdwrite(bp);
 		if (error || xfersize == 0)
 			break;
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
 		if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) {
 			vn_seqc_write_begin(vp);
 			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
 			dp->di_mode = ip->i_mode;
 			vn_seqc_write_end(vp);
 		}
 	}
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			(void)ffs_truncate(vp, osize,
 			    IO_EXT | (ioflag&IO_SYNC), ucred);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
 		error = ffs_update(vp, 1);
 	return (error);
 }
 
 /*
  * Vnode operating to retrieve a named extended attribute.
  *
  * Locate a particular EA (nspace:name) in the area (ptr:length), and return
  * the length of the EA, and possibly the pointer to the entry and to the data.
  */
 static int
 ffs_findextattr(uint8_t *ptr, uint64_t length, int nspace, const char *name,
     struct extattr **eapp, uint8_t **eac)
 {
 	struct extattr *eap, *eaend;
 	size_t nlen;
 
 	nlen = strlen(name);
 	KASSERT(ALIGNED_TO(ptr, struct extattr), ("unaligned"));
 	eap = (struct extattr *)ptr;
 	eaend = (struct extattr *)(ptr + length);
 	for (; eap < eaend; eap = EXTATTR_NEXT(eap)) {
 		KASSERT(EXTATTR_NEXT(eap) <= eaend,
 		    ("extattr next %p beyond %p", EXTATTR_NEXT(eap), eaend));
 		if (eap->ea_namespace != nspace || eap->ea_namelength != nlen
 		    || memcmp(eap->ea_name, name, nlen) != 0)
 			continue;
 		if (eapp != NULL)
 			*eapp = eap;
 		if (eac != NULL)
 			*eac = EXTATTR_CONTENT(eap);
 		return (EXTATTR_CONTENT_SIZE(eap));
 	}
 	return (-1);
 }
 
 static int
 ffs_rdextattr(uint8_t **p, struct vnode *vp, struct thread *td)
 {
 	const struct extattr *eap, *eaend, *eapnext;
 	struct inode *ip;
 	struct ufs2_dinode *dp;
 	struct fs *fs;
 	struct uio luio;
 	struct iovec liovec;
 	uint64_t easize;
 	int error;
 	uint8_t *eae;
 
 	ip = VTOI(vp);
 	fs = ITOFS(ip);
 	dp = ip->i_din2;
 	easize = dp->di_extsize;
 	if ((uoff_t)easize > UFS_NXADDR * fs->fs_bsize)
 		return (EFBIG);
 
 	eae = malloc(easize, M_TEMP, M_WAITOK);
 
 	liovec.iov_base = eae;
 	liovec.iov_len = easize;
 	luio.uio_iov = &liovec;
 	luio.uio_iovcnt = 1;
 	luio.uio_offset = 0;
 	luio.uio_resid = easize;
 	luio.uio_segflg = UIO_SYSSPACE;
 	luio.uio_rw = UIO_READ;
 	luio.uio_td = td;
 
 	error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC);
 	if (error) {
 		free(eae, M_TEMP);
 		return (error);
 	}
 	/* Validate disk xattrfile contents. */
 	for (eap = (void *)eae, eaend = (void *)(eae + easize); eap < eaend;
 	    eap = eapnext) {
 		/* Detect zeroed out tail */
 		if (eap->ea_length < sizeof(*eap) || eap->ea_length == 0) {
 			easize = (const uint8_t *)eap - eae;
 			break;
 		}
 			
 		eapnext = EXTATTR_NEXT(eap);
 		/* Bogusly long entry. */
 		if (eapnext > eaend) {
 			free(eae, M_TEMP);
 			return (EINTEGRITY);
 		}
 	}
 	ip->i_ea_len = easize;
 	*p = eae;
 	return (0);
 }
 
 static void
 ffs_lock_ea(struct vnode *vp)
 {
 	struct inode *ip;
 
 	ip = VTOI(vp);
 	VI_LOCK(vp);
 	while (ip->i_flag & IN_EA_LOCKED) {
 		UFS_INODE_SET_FLAG(ip, IN_EA_LOCKWAIT);
 		msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea",
 		    0);
 	}
 	UFS_INODE_SET_FLAG(ip, IN_EA_LOCKED);
 	VI_UNLOCK(vp);
 }
 
 static void
 ffs_unlock_ea(struct vnode *vp)
 {
 	struct inode *ip;
 
 	ip = VTOI(vp);
 	VI_LOCK(vp);
 	if (ip->i_flag & IN_EA_LOCKWAIT)
 		wakeup(&ip->i_ea_refs);
 	ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT);
 	VI_UNLOCK(vp);
 }
 
 static int
 ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td)
 {
 	struct inode *ip;
 	int error;
 
 	ip = VTOI(vp);
 
 	ffs_lock_ea(vp);
 	if (ip->i_ea_area != NULL) {
 		ip->i_ea_refs++;
 		ffs_unlock_ea(vp);
 		return (0);
 	}
 	error = ffs_rdextattr(&ip->i_ea_area, vp, td);
 	if (error) {
 		ffs_unlock_ea(vp);
 		return (error);
 	}
 	ip->i_ea_error = 0;
 	ip->i_ea_refs++;
 	ffs_unlock_ea(vp);
 	return (0);
 }
 
 /*
  * Vnode extattr transaction commit/abort
  */
 static int
 ffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td)
 {
 	struct inode *ip;
 	struct uio luio;
 	struct iovec *liovec;
 	struct ufs2_dinode *dp;
 	size_t ea_len, tlen;
 	int error, i, lcnt;
 	bool truncate;
 
 	ip = VTOI(vp);
 
 	ffs_lock_ea(vp);
 	if (ip->i_ea_area == NULL) {
 		ffs_unlock_ea(vp);
 		return (EINVAL);
 	}
 	dp = ip->i_din2;
 	error = ip->i_ea_error;
 	truncate = false;
 	if (commit && error == 0) {
 		ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit");
 		if (cred == NOCRED)
 			cred =  vp->v_mount->mnt_cred;
 
 		ea_len = MAX(ip->i_ea_len, dp->di_extsize);
 		for (lcnt = 1, tlen = ea_len - ip->i_ea_len; tlen > 0;) {
 			tlen -= MIN(ZERO_REGION_SIZE, tlen);
 			lcnt++;
 		}
 
 		liovec = __builtin_alloca(lcnt * sizeof(struct iovec));
 		luio.uio_iovcnt = lcnt;
 
 		liovec[0].iov_base = ip->i_ea_area;
 		liovec[0].iov_len = ip->i_ea_len;
 		for (i = 1, tlen = ea_len - ip->i_ea_len; i < lcnt; i++) {
 			liovec[i].iov_base = __DECONST(void *, zero_region);
 			liovec[i].iov_len = MIN(ZERO_REGION_SIZE, tlen);
 			tlen -= liovec[i].iov_len;
 		}
 		MPASS(tlen == 0);
 
 		luio.uio_iov = liovec;
 		luio.uio_offset = 0;
 		luio.uio_resid = ea_len;
 		luio.uio_segflg = UIO_SYSSPACE;
 		luio.uio_rw = UIO_WRITE;
 		luio.uio_td = td;
 		error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred);
 		if (error == 0 && ip->i_ea_len == 0)
 			truncate = true;
 	}
 	if (--ip->i_ea_refs == 0) {
 		free(ip->i_ea_area, M_TEMP);
 		ip->i_ea_area = NULL;
 		ip->i_ea_len = 0;
 		ip->i_ea_error = 0;
 	}
 	ffs_unlock_ea(vp);
 
 	if (truncate)
 		ffs_truncate(vp, 0, IO_EXT, cred);
 	return (error);
 }
 
 /*
  * Vnode extattr strategy routine for fifos.
  *
  * We need to check for a read or write of the external attributes.
  * Otherwise we just fall through and do the usual thing.
  */
 static int
 ffsext_strategy(
 	struct vop_strategy_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	} */ *ap)
 {
 	struct vnode *vp;
 	daddr_t lbn;
 
 	vp = ap->a_vp;
 	lbn = ap->a_bp->b_lblkno;
 	if (I_IS_UFS2(VTOI(vp)) && lbn < 0 && lbn >= -UFS_NXADDR)
 		return (VOP_STRATEGY_APV(&ufs_vnodeops, ap));
 	if (vp->v_type == VFIFO)
 		return (VOP_STRATEGY_APV(&ufs_fifoops, ap));
 	panic("spec nodes went here");
 }
 
 /*
  * Vnode extattr transaction commit/abort
  */
 static int
 ffs_openextattr(
 	struct vop_openextattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		IN struct ucred *a_cred;
 		IN struct thread *a_td;
 	} */ *ap)
 {
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td));
 }
 
 /*
  * Vnode extattr transaction commit/abort
  */
 static int
 ffs_closeextattr(
 	struct vop_closeextattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int a_commit;
 		IN struct ucred *a_cred;
 		IN struct thread *a_td;
 	} */ *ap)
 {
 	struct vnode *vp;
 
 	vp = ap->a_vp;
 	if (vp->v_type == VCHR || vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 	if (ap->a_commit && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0)
 		return (EROFS);
 
 	if (ap->a_commit && DOINGSUJ(vp)) {
 		ASSERT_VOP_ELOCKED(vp, "ffs_closeextattr commit");
 		softdep_prealloc(vp, MNT_WAIT);
 		if (vp->v_data == NULL)
 			return (EBADF);
 	}
 	return (ffs_close_ea(vp, ap->a_commit, ap->a_cred, ap->a_td));
 }
 
 /*
  * Vnode operation to remove a named attribute.
  */
 static int
 ffs_deleteextattr(
 	struct vop_deleteextattr_args /* {
 		IN struct vnode *a_vp;
 		IN int a_attrnamespace;
 		IN const char *a_name;
 		IN struct ucred *a_cred;
 		IN struct thread *a_td;
 	} */ *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 	struct extattr *eap;
 	uint32_t ul;
 	int olen, error, i, easize;
 	uint8_t *eae;
 	void *tmp;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 
 	if (vp->v_type == VCHR || vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 	if (strlen(ap->a_name) == 0)
 		return (EINVAL);
 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	error = extattr_check_cred(vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error) {
 		/*
 		 * ffs_lock_ea is not needed there, because the vnode
 		 * must be exclusively locked.
 		 */
 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 			ip->i_ea_error = error;
 		return (error);
 	}
 
 	if (DOINGSUJ(vp)) {
 		ASSERT_VOP_ELOCKED(vp, "ffs_deleteextattr");
 		softdep_prealloc(vp, MNT_WAIT);
 		if (vp->v_data == NULL)
 			return (EBADF);
 	}
 
 	error = ffs_open_ea(vp, ap->a_cred, ap->a_td);
 	if (error)
 		return (error);
 
 	/* CEM: delete could be done in-place instead */
 	eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK);
 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
 	easize = ip->i_ea_len;
 
 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 	    &eap, NULL);
 	if (olen == -1) {
 		/* delete but nonexistent */
 		free(eae, M_TEMP);
 		ffs_close_ea(vp, 0, ap->a_cred, ap->a_td);
 		return (ENOATTR);
 	}
 	ul = eap->ea_length;
 	i = (uint8_t *)EXTATTR_NEXT(eap) - eae;
 	bcopy(EXTATTR_NEXT(eap), eap, easize - i);
 	easize -= ul;
 
 	tmp = ip->i_ea_area;
 	ip->i_ea_area = eae;
 	ip->i_ea_len = easize;
 	free(tmp, M_TEMP);
 	error = ffs_close_ea(vp, 1, ap->a_cred, ap->a_td);
 	return (error);
 }
 
 /*
  * Vnode operation to retrieve a named extended attribute.
  */
 static int
 ffs_getextattr(
 	struct vop_getextattr_args /* {
 		IN struct vnode *a_vp;
 		IN int a_attrnamespace;
 		IN const char *a_name;
 		INOUT struct uio *a_uio;
 		OUT size_t *a_size;
 		IN struct ucred *a_cred;
 		IN struct thread *a_td;
 	} */ *ap)
 {
 	struct inode *ip;
 	uint8_t *eae, *p;
 	unsigned easize;
 	int error, ealen;
 
 	ip = VTOI(ap->a_vp);
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error)
 		return (error);
 
 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 	if (error)
 		return (error);
 
 	eae = ip->i_ea_area;
 	easize = ip->i_ea_len;
 
 	ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 	    NULL, &p);
 	if (ealen >= 0) {
 		error = 0;
 		if (ap->a_size != NULL)
 			*ap->a_size = ealen;
 		else if (ap->a_uio != NULL)
 			error = uiomove(p, ealen, ap->a_uio);
 	} else
 		error = ENOATTR;
 
 	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 	return (error);
 }
 
 /*
  * Vnode operation to retrieve extended attributes on a vnode.
  */
 static int
 ffs_listextattr(
 	struct vop_listextattr_args /* {
 		IN struct vnode *a_vp;
 		IN int a_attrnamespace;
 		INOUT struct uio *a_uio;
 		OUT size_t *a_size;
 		IN struct ucred *a_cred;
 		IN struct thread *a_td;
 	} */ *ap)
 {
 	struct inode *ip;
 	struct extattr *eap, *eaend;
 	int error, ealen;
 
 	ip = VTOI(ap->a_vp);
 
 	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VREAD);
 	if (error)
 		return (error);
 
 	error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td);
 	if (error)
 		return (error);
 
 	error = 0;
 	if (ap->a_size != NULL)
 		*ap->a_size = 0;
 
 	KASSERT(ALIGNED_TO(ip->i_ea_area, struct extattr), ("unaligned"));
 	eap = (struct extattr *)ip->i_ea_area;
 	eaend = (struct extattr *)(ip->i_ea_area + ip->i_ea_len);
 	for (; error == 0 && eap < eaend; eap = EXTATTR_NEXT(eap)) {
 		KASSERT(EXTATTR_NEXT(eap) <= eaend,
 		    ("extattr next %p beyond %p", EXTATTR_NEXT(eap), eaend));
 		if (eap->ea_namespace != ap->a_attrnamespace)
 			continue;
 
 		ealen = eap->ea_namelength;
 		if (ap->a_size != NULL)
 			*ap->a_size += ealen + 1;
 		else if (ap->a_uio != NULL)
 			error = uiomove(&eap->ea_namelength, ealen + 1,
 			    ap->a_uio);
 	}
 
 	ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td);
 	return (error);
 }
 
 /*
  * Vnode operation to set a named attribute.
  */
 static int
 ffs_setextattr(
 	struct vop_setextattr_args /* {
 		IN struct vnode *a_vp;
 		IN int a_attrnamespace;
 		IN const char *a_name;
 		INOUT struct uio *a_uio;
 		IN struct ucred *a_cred;
 		IN struct thread *a_td;
 	} */ *ap)
 {
 	struct vnode *vp;
 	struct inode *ip;
 	struct fs *fs;
 	struct extattr *eap;
 	uint32_t ealength, ul;
 	ssize_t ealen;
 	int olen, eapad1, eapad2, error, i, easize;
 	uint8_t *eae;
 	void *tmp;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ITOFS(ip);
 
 	if (vp->v_type == VCHR || vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 	if (strlen(ap->a_name) == 0)
 		return (EINVAL);
 
 	/* XXX Now unsupported API to delete EAs using NULL uio. */
 	if (ap->a_uio == NULL)
 		return (EOPNOTSUPP);
 
 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
 		return (EROFS);
 
 	ealen = ap->a_uio->uio_resid;
 	if (ealen < 0 || ealen > lblktosize(fs, UFS_NXADDR))
 		return (EINVAL);
 
 	error = extattr_check_cred(vp, ap->a_attrnamespace,
 	    ap->a_cred, ap->a_td, VWRITE);
 	if (error) {
 		/*
 		 * ffs_lock_ea is not needed there, because the vnode
 		 * must be exclusively locked.
 		 */
 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 			ip->i_ea_error = error;
 		return (error);
 	}
 
 	if (DOINGSUJ(vp)) {
 		ASSERT_VOP_ELOCKED(vp, "ffs_deleteextattr");
 		softdep_prealloc(vp, MNT_WAIT);
 		if (vp->v_data == NULL)
 			return (EBADF);
 	}
 
 	error = ffs_open_ea(vp, ap->a_cred, ap->a_td);
 	if (error)
 		return (error);
 
 	ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name);
 	eapad1 = roundup2(ealength, 8) - ealength;
 	eapad2 = roundup2(ealen, 8) - ealen;
 	ealength += eapad1 + ealen + eapad2;
 
 	/*
 	 * CEM: rewrites of the same size or smaller could be done in-place
 	 * instead.  (We don't acquire any fine-grained locks in here either,
 	 * so we could also do bigger writes in-place.)
 	 */
 	eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK);
 	bcopy(ip->i_ea_area, eae, ip->i_ea_len);
 	easize = ip->i_ea_len;
 
 	olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name,
 	    &eap, NULL);
         if (olen == -1) {
 		/* new, append at end */
 		KASSERT(ALIGNED_TO(eae + easize, struct extattr),
 		    ("unaligned"));
 		eap = (struct extattr *)(eae + easize);
 		easize += ealength;
 	} else {
 		ul = eap->ea_length;
 		i = (uint8_t *)EXTATTR_NEXT(eap) - eae;
 		if (ul != ealength) {
 			bcopy(EXTATTR_NEXT(eap), (uint8_t *)eap + ealength,
 			    easize - i);
 			easize += (ealength - ul);
 		}
 	}
 	if (easize > lblktosize(fs, UFS_NXADDR)) {
 		free(eae, M_TEMP);
 		ffs_close_ea(vp, 0, ap->a_cred, ap->a_td);
 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 			ip->i_ea_error = ENOSPC;
 		return (ENOSPC);
 	}
 	eap->ea_length = ealength;
 	eap->ea_namespace = ap->a_attrnamespace;
 	eap->ea_contentpadlen = eapad2;
 	eap->ea_namelength = strlen(ap->a_name);
 	memcpy(eap->ea_name, ap->a_name, strlen(ap->a_name));
 	bzero(&eap->ea_name[strlen(ap->a_name)], eapad1);
 	error = uiomove(EXTATTR_CONTENT(eap), ealen, ap->a_uio);
 	if (error) {
 		free(eae, M_TEMP);
 		ffs_close_ea(vp, 0, ap->a_cred, ap->a_td);
 		if (ip->i_ea_area != NULL && ip->i_ea_error == 0)
 			ip->i_ea_error = error;
 		return (error);
 	}
 	bzero((uint8_t *)EXTATTR_CONTENT(eap) + ealen, eapad2);
 
 	tmp = ip->i_ea_area;
 	ip->i_ea_area = eae;
 	ip->i_ea_len = easize;
 	free(tmp, M_TEMP);
 	error = ffs_close_ea(vp, 1, ap->a_cred, ap->a_td);
 	return (error);
 }
 
 /*
  * Vnode pointer to File handle
  */
 static int
 ffs_vptofh(
 	struct vop_vptofh_args /* {
 		IN struct vnode *a_vp;
 		IN struct fid *a_fhp;
 	} */ *ap)
 {
 	struct inode *ip;
 	struct ufid *ufhp;
+	_Static_assert(sizeof(struct ufid) <= sizeof(struct fid),
+	    "struct ufid cannot be larger than struct fid");
 
 	ip = VTOI(ap->a_vp);
 	ufhp = (struct ufid *)ap->a_fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 SYSCTL_DECL(_vfs_ffs);
 static int use_buf_pager = 1;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
     "Always use buffer pager instead of bmap");
 
 static daddr_t
 ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
 {
 
 	return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
 }
 
 static int
 ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz)
 {
 
 	*sz = blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn);
 	return (0);
 }
 
 static int
 ffs_getpages(struct vop_getpages_args *ap)
 {
 	struct vnode *vp;
 	struct ufsmount *um;
 
 	vp = ap->a_vp;
 	um = VFSTOUFS(vp->v_mount);
 
 	if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
 		return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
 		    ap->a_rbehind, ap->a_rahead, NULL, NULL));
 	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
 	    ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
 }
 
 static int
 ffs_getpages_async(struct vop_getpages_async_args *ap)
 {
 	struct vnode *vp;
 	struct ufsmount *um;
 	bool do_iodone;
 	int error;
 
 	vp = ap->a_vp;
 	um = VFSTOUFS(vp->v_mount);
 	do_iodone = true;
 
 	if (um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE) {
 		error = vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
 		    ap->a_rbehind, ap->a_rahead, ap->a_iodone, ap->a_arg);
 		if (error == 0)
 			do_iodone = false;
 	} else {
 		error = vfs_bio_getpages(vp, ap->a_m, ap->a_count,
 		    ap->a_rbehind, ap->a_rahead, ffs_gbp_getblkno,
 		    ffs_gbp_getblksz);
 	}
 	if (do_iodone && ap->a_iodone != NULL)
 		ap->a_iodone(ap->a_arg, ap->a_m, ap->a_count, error);
 
 	return (error);
 }
 
 static int
 ffs_vput_pair(struct vop_vput_pair_args *ap)
 {
 	struct mount *mp;
 	struct vnode *dvp, *vp, *vp1, **vpp;
 	struct inode *dp, *ip;
 	ino_t ip_ino;
 	uint64_t ip_gen;
 	int error, vp_locked;
 
 	dvp = ap->a_dvp;
 	dp = VTOI(dvp);
 	vpp = ap->a_vpp;
 	vp = vpp != NULL ? *vpp : NULL;
 
 	if ((dp->i_flag & (IN_NEEDSYNC | IN_ENDOFF)) == 0) {
 		vput(dvp);
 		if (vp != NULL && ap->a_unlock_vp)
 			vput(vp);
 		return (0);
 	}
 
 	mp = dvp->v_mount;
 	if (vp != NULL) {
 		if (ap->a_unlock_vp) {
 			vput(vp);
 		} else {
 			MPASS(vp->v_type != VNON);
 			vp_locked = VOP_ISLOCKED(vp);
 			ip = VTOI(vp);
 			ip_ino = ip->i_number;
 			ip_gen = ip->i_gen;
 			VOP_UNLOCK(vp);
 		}
 	}
 
 	/*
 	 * If compaction or fsync was requested do it in ffs_vput_pair()
 	 * now that other locks are no longer held.
          */
 	if ((dp->i_flag & IN_ENDOFF) != 0) {
 		VNASSERT(I_ENDOFF(dp) != 0 && I_ENDOFF(dp) < dp->i_size, dvp,
 		    ("IN_ENDOFF set but I_ENDOFF() is not"));
 		dp->i_flag &= ~IN_ENDOFF;
 		error = UFS_TRUNCATE(dvp, (off_t)I_ENDOFF(dp), IO_NORMAL |
 		    (DOINGASYNC(dvp) ? 0 : IO_SYNC), curthread->td_ucred);
 		if (error != 0 && error != ERELOOKUP) {
 			if (!ffs_fsfail_cleanup(VFSTOUFS(mp), error)) {
 				vn_printf(dvp,
 				    "IN_ENDOFF: failed to truncate, "
 				    "error %d\n", error);
 			}
 #ifdef UFS_DIRHASH
 			ufsdirhash_free(dp);
 #endif
 		}
 		SET_I_ENDOFF(dp, 0);
 	}
 	if ((dp->i_flag & IN_NEEDSYNC) != 0) {
 		do {
 			error = ffs_syncvnode(dvp, MNT_WAIT, 0);
 		} while (error == ERELOOKUP);
 	}
 
 	vput(dvp);
 
 	if (vp == NULL || ap->a_unlock_vp)
 		return (0);
 	MPASS(mp != NULL);
 
 	/*
 	 * It is possible that vp is reclaimed at this point. Only
 	 * routines that call us with a_unlock_vp == false can find
 	 * that their vp has been reclaimed. There are three areas
 	 * that are affected:
 	 * 1) vn_open_cred() - later VOPs could fail, but
 	 *    dead_open() returns 0 to simulate successful open.
 	 * 2) ffs_snapshot() - creation of snapshot fails with EBADF.
 	 * 3) NFS server (several places) - code is prepared to detect
 	 *    and respond to dead vnodes by returning ESTALE.
 	 */
 	VOP_LOCK(vp, vp_locked | LK_RETRY);
 	if (IS_UFS(vp))
 		return (0);
 
 	/*
 	 * Try harder to recover from reclaimed vp if reclaim was not
 	 * because underlying inode was cleared.  We saved inode
 	 * number and inode generation, so we can try to reinstantiate
 	 * exactly same version of inode.  If this fails, return
 	 * original doomed vnode and let caller to handle
 	 * consequences.
 	 *
 	 * Note that callers must keep write started around
 	 * VOP_VPUT_PAIR() calls, so it is safe to use mp without
 	 * busying it.
 	 */
 	VOP_UNLOCK(vp);
 	error = ffs_inotovp(mp, ip_ino, ip_gen, LK_EXCLUSIVE, &vp1,
 	    FFSV_REPLACE_DOOMED);
 	if (error != 0) {
 		VOP_LOCK(vp, vp_locked | LK_RETRY);
 	} else {
 		vrele(vp);
 		*vpp = vp1;
 	}
 	return (error);
 }