diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index 043cee74fde2..a1a06eb6c3e6 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -1,2123 +1,2123 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2000-2004
  *	Poul-Henning Kamp.  All rights reserved.
  * Copyright (c) 1989, 1992-1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
  * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43
  *
  * $FreeBSD$
  */
 
 /*
  * TODO:
  *	mkdir: want it ?
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/eventhandler.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/time.h>
 #include <sys/ttycom.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 static struct vop_vector devfs_vnodeops;
 static struct vop_vector devfs_specops;
 static struct fileops devfs_ops_f;
 
 #include <fs/devfs/devfs.h>
 #include <fs/devfs/devfs_int.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data");
 
 struct mtx	devfs_de_interlock;
 MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF);
 struct sx	clone_drain_lock;
 SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock");
 struct mtx	cdevpriv_mtx;
 MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF);
 
 SYSCTL_DECL(_vfs_devfs);
 
 static int devfs_dotimes;
 SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW,
     &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision");
 
 /*
  * Update devfs node timestamp.  Note that updates are unlocked and
  * stat(2) could see partially updated times.
  */
 static void
 devfs_timestamp(struct timespec *tsp)
 {
 	time_t ts;
 
 	if (devfs_dotimes) {
 		vfs_timestamp(tsp);
 	} else {
 		ts = time_second;
 		if (tsp->tv_sec != ts) {
 			tsp->tv_sec = ts;
 			tsp->tv_nsec = 0;
 		}
 	}
 }
 
 static int
 devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp,
     int *ref)
 {
 
 	*dswp = devvn_refthread(fp->f_vnode, devp, ref);
 	if (*devp != fp->f_data) {
 		if (*dswp != NULL)
 			dev_relthread(*devp, *ref);
 		return (ENXIO);
 	}
 	KASSERT((*devp)->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp)));
 	if (*dswp == NULL)
 		return (ENXIO);
 	curthread->td_fpop = fp;
 	return (0);
 }
 
 int
 devfs_get_cdevpriv(void **datap)
 {
 	struct file *fp;
 	struct cdev_privdata *p;
 	int error;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return (EBADF);
 	p = fp->f_cdevpriv;
 	if (p != NULL) {
 		error = 0;
 		*datap = p->cdpd_data;
 	} else
 		error = ENOENT;
 	return (error);
 }
 
 int
 devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr)
 {
 	struct file *fp;
 	struct cdev_priv *cdp;
 	struct cdev_privdata *p;
 	int error;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return (ENOENT);
 	cdp = cdev2priv((struct cdev *)fp->f_data);
 	p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK);
 	p->cdpd_data = priv;
 	p->cdpd_dtr = priv_dtr;
 	p->cdpd_fp = fp;
 	mtx_lock(&cdevpriv_mtx);
 	if (fp->f_cdevpriv == NULL) {
 		LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list);
 		fp->f_cdevpriv = p;
 		mtx_unlock(&cdevpriv_mtx);
 		error = 0;
 	} else {
 		mtx_unlock(&cdevpriv_mtx);
 		free(p, M_CDEVPDATA);
 		error = EBUSY;
 	}
 	return (error);
 }
 
 void
 devfs_destroy_cdevpriv(struct cdev_privdata *p)
 {
 
 	mtx_assert(&cdevpriv_mtx, MA_OWNED);
 	KASSERT(p->cdpd_fp->f_cdevpriv == p,
 	    ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p));
 	p->cdpd_fp->f_cdevpriv = NULL;
 	LIST_REMOVE(p, cdpd_list);
 	mtx_unlock(&cdevpriv_mtx);
 	(p->cdpd_dtr)(p->cdpd_data);
 	free(p, M_CDEVPDATA);
 }
 
 static void
 devfs_fpdrop(struct file *fp)
 {
 	struct cdev_privdata *p;
 
 	mtx_lock(&cdevpriv_mtx);
 	if ((p = fp->f_cdevpriv) == NULL) {
 		mtx_unlock(&cdevpriv_mtx);
 		return;
 	}
 	devfs_destroy_cdevpriv(p);
 }
 
 void
 devfs_clear_cdevpriv(void)
 {
 	struct file *fp;
 
 	fp = curthread->td_fpop;
 	if (fp == NULL)
 		return;
 	devfs_fpdrop(fp);
 }
 
 static void
 devfs_usecount_add(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 	struct cdev *dev;
 
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	VNPASS(vp->v_type == VCHR || vp->v_type == VBAD, vp);
 	if (VN_IS_DOOMED(vp)) {
 		goto out_unlock;
 	}
 
 	de = vp->v_data;
 	dev = vp->v_rdev;
 	MPASS(de != NULL);
 	MPASS(dev != NULL);
 	dev->si_usecount++;
 	de->de_usecount++;
 out_unlock:
 	VI_UNLOCK(vp);
 	mtx_unlock(&devfs_de_interlock);
 }
 
 static void
 devfs_usecount_subl(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 	struct cdev *dev;
 
 	mtx_assert(&devfs_de_interlock, MA_OWNED);
 	ASSERT_VI_LOCKED(vp, __func__);
 	VNPASS(vp->v_type == VCHR || vp->v_type == VBAD, vp);
 
 	de = vp->v_data;
 	dev = vp->v_rdev;
 	if (de == NULL)
 		return;
 	if (dev == NULL) {
 		MPASS(de->de_usecount == 0);
 		return;
 	}
 	if (dev->si_usecount < de->de_usecount)
 		panic("%s: si_usecount underflow for dev %p "
 		    "(has %ld, dirent has %d)\n",
 		    __func__, dev, dev->si_usecount, de->de_usecount);
 	if (VN_IS_DOOMED(vp)) {
 		dev->si_usecount -= de->de_usecount;
 		de->de_usecount = 0;
 	} else {
 		if (de->de_usecount == 0)
 			panic("%s: de_usecount underflow for dev %p\n",
 			    __func__, dev);
 		dev->si_usecount--;
 		de->de_usecount--;
 	}
 }
 
 static void
 devfs_usecount_sub(struct vnode *vp)
 {
 
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	devfs_usecount_subl(vp);
 	VI_UNLOCK(vp);
 	mtx_unlock(&devfs_de_interlock);
 }
 
 static int
 devfs_usecountl(struct vnode *vp)
 {
 
 	VNPASS(vp->v_type == VCHR, vp);
 	mtx_assert(&devfs_de_interlock, MA_OWNED);
 	ASSERT_VI_LOCKED(vp, __func__);
 	return (vp->v_rdev->si_usecount);
 }
 
 int
 devfs_usecount(struct vnode *vp)
 {
 	int count;
 
 	VNPASS(vp->v_type == VCHR, vp);
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	count = devfs_usecountl(vp);
 	VI_UNLOCK(vp);
 	mtx_unlock(&devfs_de_interlock);
 	return (count);
 }
 
 void
 devfs_ctty_ref(struct vnode *vp)
 {
 
 	vrefact(vp);
 	devfs_usecount_add(vp);
 }
 
 void
 devfs_ctty_unref(struct vnode *vp)
 {
 
 	devfs_usecount_sub(vp);
 	vrele(vp);
 }
 
 /*
  * On success devfs_populate_vp() returns with dmp->dm_lock held.
  */
 static int
 devfs_populate_vp(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	int locked;
 
 	ASSERT_VOP_LOCKED(vp, "devfs_populate_vp");
 
 	dmp = VFSTODEVFS(vp->v_mount);
 	if (!devfs_populate_needed(dmp)) {
 		sx_xlock(&dmp->dm_lock);
 		goto out_nopopulate;
 	}
 
 	locked = VOP_ISLOCKED(vp);
 
 	sx_xlock(&dmp->dm_lock);
 	DEVFS_DMP_HOLD(dmp);
 
 	/* Can't call devfs_populate() with the vnode lock held. */
 	VOP_UNLOCK(vp);
 	devfs_populate(dmp);
 
 	sx_xunlock(&dmp->dm_lock);
 	vn_lock(vp, locked | LK_RETRY);
 	sx_xlock(&dmp->dm_lock);
 	if (DEVFS_DMP_DROP(dmp)) {
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 		return (ERESTART);
 	}
 out_nopopulate:
 	if (VN_IS_DOOMED(vp)) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ERESTART);
 	}
 	de = vp->v_data;
 	KASSERT(de != NULL,
 	    ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed"));
 	if ((de->de_flags & DE_DOOMED) != 0) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ERESTART);
 	}
 
 	return (0);
 }
 
 static int
 devfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct devfs_mount *dmp;
 	char *buf = ap->a_buf;
 	size_t *buflen = ap->a_buflen;
 	struct devfs_dirent *dd, *de;
 	int i, error;
 
 	dmp = VFSTODEVFS(vp->v_mount);
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	if (vp->v_type != VCHR && vp->v_type != VDIR) {
 		error = ENOENT;
 		goto finished;
 	}
 
 	dd = vp->v_data;
 	if (vp->v_type == VDIR && dd == dmp->dm_rootdir) {
 		*dvp = vp;
 		vref(*dvp);
 		goto finished;
 	}
 
 	i = *buflen;
 	i -= dd->de_dirent->d_namlen;
 	if (i < 0) {
 		error = ENOMEM;
 		goto finished;
 	}
 	bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen);
 	*buflen = i;
 	de = devfs_parent_dirent(dd);
 	if (de == NULL) {
 		error = ENOENT;
 		goto finished;
 	}
 	mtx_lock(&devfs_de_interlock);
 	*dvp = de->de_vnode;
 	if (*dvp != NULL) {
 		VI_LOCK(*dvp);
 		mtx_unlock(&devfs_de_interlock);
 		vholdl(*dvp);
 		VI_UNLOCK(*dvp);
 		vref(*dvp);
 		vdrop(*dvp);
 	} else {
 		mtx_unlock(&devfs_de_interlock);
 		error = ENOENT;
 	}
 finished:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 /*
  * Construct the fully qualified path name relative to the mountpoint.
  * If a NULL cnp is provided, no '/' is appended to the resulting path.
  */
 char *
 devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd,
     struct componentname *cnp)
 {
 	int i;
 	struct devfs_dirent *de;
 
 	sx_assert(&dmp->dm_lock, SA_LOCKED);
 
 	i = SPECNAMELEN;
 	buf[i] = '\0';
 	if (cnp != NULL)
 		i -= cnp->cn_namelen;
 	if (i < 0)
 		 return (NULL);
 	if (cnp != NULL)
 		bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen);
 	de = dd;
 	while (de != dmp->dm_rootdir) {
 		if (cnp != NULL || i < SPECNAMELEN) {
 			i--;
 			if (i < 0)
 				 return (NULL);
 			buf[i] = '/';
 		}
 		i -= de->de_dirent->d_namlen;
 		if (i < 0)
 			 return (NULL);
 		bcopy(de->de_dirent->d_name, buf + i,
 		    de->de_dirent->d_namlen);
 		de = devfs_parent_dirent(de);
 		if (de == NULL)
 			return (NULL);
 	}
 	return (buf + i);
 }
 
 static int
 devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp,
 	struct devfs_dirent *de)
 {
 	int not_found;
 
 	not_found = 0;
 	if (de->de_flags & DE_DOOMED)
 		not_found = 1;
 	if (DEVFS_DE_DROP(de)) {
 		KASSERT(not_found == 1, ("DEVFS de dropped but not doomed"));
 		devfs_dirent_free(de);
 	}
 	if (DEVFS_DMP_DROP(dmp)) {
 		KASSERT(not_found == 1,
 			("DEVFS mount struct freed before dirent"));
 		not_found = 2;
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 	}
 	if (not_found == 1 || (drop_dm_lock && not_found != 2))
 		sx_unlock(&dmp->dm_lock);
 	return (not_found);
 }
 
 static void
 devfs_insmntque_dtr(struct vnode *vp, void *arg)
 {
 	struct devfs_dirent *de;
 
 	de = (struct devfs_dirent *)arg;
 	mtx_lock(&devfs_de_interlock);
 	vp->v_data = NULL;
 	de->de_vnode = NULL;
 	mtx_unlock(&devfs_de_interlock);
 	vgone(vp);
 	vput(vp);
 }
 
 /*
  * devfs_allocv shall be entered with dmp->dm_lock held, and it drops
  * it on return.
  */
 int
 devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
     struct vnode **vpp)
 {
 	int error;
 	struct vnode *vp;
 	struct cdev *dev;
 	struct devfs_mount *dmp;
 	struct cdevsw *dsw;
 	enum vgetstate vs;
 
 	dmp = VFSTODEVFS(mp);
 	if (de->de_flags & DE_DOOMED) {
 		sx_xunlock(&dmp->dm_lock);
 		return (ENOENT);
 	}
 loop:
 	DEVFS_DE_HOLD(de);
 	DEVFS_DMP_HOLD(dmp);
 	mtx_lock(&devfs_de_interlock);
 	vp = de->de_vnode;
 	if (vp != NULL) {
 		vs = vget_prep(vp);
 		mtx_unlock(&devfs_de_interlock);
 		sx_xunlock(&dmp->dm_lock);
 		vget_finish(vp, lockmode | LK_RETRY, vs);
 		sx_xlock(&dmp->dm_lock);
 		if (devfs_allocv_drop_refs(0, dmp, de)) {
 			vput(vp);
 			return (ENOENT);
 		}
 		else if (VN_IS_DOOMED(vp)) {
 			mtx_lock(&devfs_de_interlock);
 			if (de->de_vnode == vp) {
 				de->de_vnode = NULL;
 				vp->v_data = NULL;
 			}
 			mtx_unlock(&devfs_de_interlock);
 			vput(vp);
 			goto loop;
 		}
 		sx_xunlock(&dmp->dm_lock);
 		*vpp = vp;
 		return (0);
 	}
 	mtx_unlock(&devfs_de_interlock);
 	if (de->de_dirent->d_type == DT_CHR) {
 		if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) {
 			devfs_allocv_drop_refs(1, dmp, de);
 			return (ENOENT);
 		}
 		dev = &de->de_cdp->cdp_c;
 	} else {
 		dev = NULL;
 	}
 	error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp);
 	if (error != 0) {
 		devfs_allocv_drop_refs(1, dmp, de);
 		printf("devfs_allocv: failed to allocate new vnode\n");
 		return (error);
 	}
 
 	if (de->de_dirent->d_type == DT_CHR) {
 		vp->v_type = VCHR;
 		VI_LOCK(vp);
 		dev_lock();
 		dev_refl(dev);
 		/* XXX: v_rdev should be protect by vnode lock */
 		vp->v_rdev = dev;
 		VNPASS(vp->v_usecount == 1, vp);
 		/* Special casing of ttys for deadfs.  Probably redundant. */
 		dsw = dev->si_devsw;
 		if (dsw != NULL && (dsw->d_flags & D_TTY) != 0)
 			vp->v_vflag |= VV_ISTTY;
 		dev_unlock();
 		VI_UNLOCK(vp);
 		if ((dev->si_flags & SI_ETERNAL) != 0)
 			vp->v_vflag |= VV_ETERNALDEV;
 		vp->v_op = &devfs_specops;
 	} else if (de->de_dirent->d_type == DT_DIR) {
 		vp->v_type = VDIR;
 	} else if (de->de_dirent->d_type == DT_LNK) {
 		vp->v_type = VLNK;
 	} else {
 		vp->v_type = VBAD;
 	}
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS);
 	VN_LOCK_ASHARE(vp);
 	mtx_lock(&devfs_de_interlock);
 	vp->v_data = de;
 	de->de_vnode = vp;
 	mtx_unlock(&devfs_de_interlock);
 	error = insmntque1(vp, mp, devfs_insmntque_dtr, de);
 	if (error != 0) {
 		(void) devfs_allocv_drop_refs(1, dmp, de);
 		return (error);
 	}
 	if (devfs_allocv_drop_refs(0, dmp, de)) {
 		vput(vp);
 		return (ENOENT);
 	}
 #ifdef MAC
 	mac_devfs_vnode_associate(mp, de, vp);
 #endif
 	sx_xunlock(&dmp->dm_lock);
 	*vpp = vp;
 	return (0);
 }
 
 static int
 devfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct devfs_dirent *de;
 	struct proc *p;
 	int error;
 
 	de = vp->v_data;
 	if (vp->v_type == VDIR)
 		de = de->de_dir;
 
 	error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid,
 	    ap->a_accmode, ap->a_cred);
 	if (error == 0)
 		return (0);
 	if (error != EACCES)
 		return (error);
 	p = ap->a_td->td_proc;
 	/* We do, however, allow access to the controlling terminal */
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_CONTROLT)) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (p->p_session->s_ttydp == de->de_cdp)
 		error = 0;
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0,
     "devfs-only flag reuse failed");
 
 static int
 devfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp, *oldvp;
 	struct thread *td = ap->a_td;
 	struct proc *p;
 	struct cdev *dev = vp->v_rdev;
 	struct cdevsw *dsw;
 	struct devfs_dirent *de = vp->v_data;
 	int dflags, error, ref, vp_locked;
 
 	/*
 	 * XXX: Don't call d_close() if we were called because of
 	 * XXX: insmntque1() failure.
 	 */
 	if (vp->v_data == NULL)
 		return (0);
 
 	/*
 	 * Hack: a tty device that is a controlling terminal
 	 * has a reference from the session structure.
 	 * We cannot easily tell that a character device is
 	 * a controlling terminal, unless it is the closing
 	 * process' controlling terminal.  In that case,
 	 * if the reference count is 2 (this last descriptor
 	 * plus the session), release the reference from the session.
 	 */
 	if (de->de_usecount == 2 && td != NULL) {
 		p = td->td_proc;
 		PROC_LOCK(p);
 		if (vp == p->p_session->s_ttyvp) {
 			PROC_UNLOCK(p);
 			oldvp = NULL;
 			sx_xlock(&proctree_lock);
 			if (vp == p->p_session->s_ttyvp) {
 				SESS_LOCK(p->p_session);
 				mtx_lock(&devfs_de_interlock);
 				VI_LOCK(vp);
 				if (devfs_usecountl(vp) == 2 && !VN_IS_DOOMED(vp)) {
 					p->p_session->s_ttyvp = NULL;
 					p->p_session->s_ttydp = NULL;
 					oldvp = vp;
 				}
 				VI_UNLOCK(vp);
 				mtx_unlock(&devfs_de_interlock);
 				SESS_UNLOCK(p->p_session);
 			}
 			sx_xunlock(&proctree_lock);
 			if (oldvp != NULL)
 				devfs_ctty_unref(oldvp);
 		} else
 			PROC_UNLOCK(p);
 	}
 	/*
 	 * We do not want to really close the device if it
 	 * is still in use unless we are trying to close it
 	 * forcibly. Since every use (buffer, vnode, swap, cmap)
 	 * holds a reference to the vnode, and because we mark
 	 * any other vnodes that alias this device, when the
 	 * sum of the reference counts on all the aliased
 	 * vnodes descends to one, we are on last close.
 	 */
 	dsw = dev_refthread(dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	dflags = 0;
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	if (devfs_usecountl(vp) == 1)
 		dflags |= FLASTCLOSE;
 	devfs_usecount_subl(vp);
 	mtx_unlock(&devfs_de_interlock);
 	if (VN_IS_DOOMED(vp)) {
 		/* Forced close. */
 		dflags |= FREVOKE | FNONBLOCK;
 	} else if (dsw->d_flags & D_TRACKCLOSE) {
 		/* Keep device updated on status. */
 	} else if ((dflags & FLASTCLOSE) == 0) {
 		VI_UNLOCK(vp);
 		dev_relthread(dev, ref);
 		return (0);
 	}
 	vholdnz(vp);
 	VI_UNLOCK(vp);
 	vp_locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev)));
 	error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td);
 	dev_relthread(dev, ref);
 	vn_lock(vp, vp_locked | LK_RETRY);
 	vdrop(vp);
 	return (error);
 }
 
 static int
 devfs_close_f(struct file *fp, struct thread *td)
 {
 	int error;
 	struct file *fpop;
 
 	/*
 	 * NB: td may be NULL if this descriptor is closed due to
 	 * garbage collection from a closed UNIX domain socket.
 	 */
 	fpop = curthread->td_fpop;
 	curthread->td_fpop = fp;
 	error = vnops.fo_close(fp, td);
 	curthread->td_fpop = fpop;
 
 	/*
 	 * The f_cdevpriv cannot be assigned non-NULL value while we
 	 * are destroying the file.
 	 */
 	if (fp->f_cdevpriv != NULL)
 		devfs_fpdrop(fp);
 	return (error);
 }
 
 static int
 devfs_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	struct cdev *dev;
 	struct timeval boottime;
 	int error;
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	dmp = VFSTODEVFS(vp->v_mount);
 	sx_xunlock(&dmp->dm_lock);
 
 	de = vp->v_data;
 	KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
 	if (vp->v_type == VDIR) {
 		de = de->de_dir;
 		KASSERT(de != NULL,
 		    ("Null dir dirent in devfs_getattr vp=%p", vp));
 	}
 	vap->va_uid = de->de_uid;
 	vap->va_gid = de->de_gid;
 	vap->va_mode = de->de_mode;
 	if (vp->v_type == VLNK)
 		vap->va_size = strlen(de->de_symlink);
 	else if (vp->v_type == VDIR)
 		vap->va_size = vap->va_bytes = DEV_BSIZE;
 	else
 		vap->va_size = 0;
 	if (vp->v_type != VDIR)
 		vap->va_bytes = 0;
 	vap->va_blocksize = DEV_BSIZE;
 	vap->va_type = vp->v_type;
 
 	getboottime(&boottime);
 #define fix(aa)							\
 	do {							\
 		if ((aa).tv_sec <= 3600) {			\
 			(aa).tv_sec = boottime.tv_sec;		\
 			(aa).tv_nsec = boottime.tv_usec * 1000; \
 		}						\
 	} while (0)
 
 	if (vp->v_type != VCHR)  {
 		fix(de->de_atime);
 		vap->va_atime = de->de_atime;
 		fix(de->de_mtime);
 		vap->va_mtime = de->de_mtime;
 		fix(de->de_ctime);
 		vap->va_ctime = de->de_ctime;
 	} else {
 		dev = vp->v_rdev;
 		fix(dev->si_atime);
 		vap->va_atime = dev->si_atime;
 		fix(dev->si_mtime);
 		vap->va_mtime = dev->si_mtime;
 		fix(dev->si_ctime);
 		vap->va_ctime = dev->si_ctime;
 
 		vap->va_rdev = cdev2priv(dev)->cdp_inode;
 	}
 	vap->va_gen = 0;
 	vap->va_flags = 0;
 	vap->va_filerev = 0;
 	vap->va_nlink = de->de_links;
 	vap->va_fileid = de->de_inode;
 
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td)
 {
 	struct file *fpop;
 	int error;
 
 	fpop = td->td_fpop;
 	td->td_fpop = fp;
 	error = vnops.fo_ioctl(fp, com, data, cred, td);
 	td->td_fpop = fpop;
 	return (error);
 }
 
 void *
 fiodgname_buf_get_ptr(void *fgnp, u_long com)
 {
 	union {
 		struct fiodgname_arg	fgn;
 #ifdef COMPAT_FREEBSD32
 		struct fiodgname_arg32	fgn32;
 #endif
 	} *fgnup;
 
 	fgnup = fgnp;
 	switch (com) {
 	case FIODGNAME:
 		return (fgnup->fgn.buf);
 #ifdef COMPAT_FREEBSD32
 	case FIODGNAME_32:
 		return ((void *)(uintptr_t)fgnup->fgn32.buf);
 #endif
 	default:
 		panic("Unhandled ioctl command %ld", com);
 	}
 }
 
 static int
 devfs_ioctl(struct vop_ioctl_args *ap)
 {
 	struct fiodgname_arg *fgn;
 	struct vnode *vpold, *vp;
 	struct cdevsw *dsw;
 	struct thread *td;
 	struct session *sess;
 	struct cdev *dev;
 	int error, ref, i;
 	const char *p;
 	u_long com;
 
 	vp = ap->a_vp;
 	com = ap->a_command;
 	td = ap->a_td;
 
 	dsw = devvn_refthread(vp, &dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	KASSERT(dev->si_refcount > 0,
 	    ("devfs: un-referenced struct cdev *(%s)", devtoname(dev)));
 
 	switch (com) {
 	case FIODTYPE:
 		*(int *)ap->a_data = dsw->d_flags & D_TYPEMASK;
 		error = 0;
 		break;
 	case FIODGNAME:
 #ifdef	COMPAT_FREEBSD32
 	case FIODGNAME_32:
 #endif
 		fgn = ap->a_data;
 		p = devtoname(dev);
 		i = strlen(p) + 1;
 		if (i > fgn->len)
 			error = EINVAL;
 		else
 			error = copyout(p, fiodgname_buf_get_ptr(fgn, com), i);
 		break;
 	default:
 		error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td);
 	}
 
 	dev_relthread(dev, ref);
 	if (error == ENOIOCTL)
 		error = ENOTTY;
 
 	if (error == 0 && com == TIOCSCTTY) {
 		/*
 		 * Do nothing if reassigning same control tty, or if the
 		 * control tty has already disappeared.  If it disappeared,
 		 * it's because we were racing with TIOCNOTTY.  TIOCNOTTY
 		 * already took care of releasing the old vnode and we have
 		 * nothing left to do.
 		 */
 		sx_slock(&proctree_lock);
 		sess = td->td_proc->p_session;
 		if (sess->s_ttyvp == vp || sess->s_ttyp == NULL) {
 			sx_sunlock(&proctree_lock);
 			return (0);
 		}
 
 		devfs_ctty_ref(vp);
 		SESS_LOCK(sess);
 		vpold = sess->s_ttyvp;
 		sess->s_ttyvp = vp;
 		sess->s_ttydp = cdev2priv(dev);
 		SESS_UNLOCK(sess);
 
 		sx_sunlock(&proctree_lock);
 
 		/* Get rid of reference to old control tty */
 		if (vpold)
 			devfs_ctty_unref(vpold);
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_kqfilter_f(struct file *fp, struct knote *kn)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	int error, ref;
 	struct file *fpop;
 	struct thread *td;
 
 	td = curthread;
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error)
 		return (error);
 	error = dsw->d_kqfilter(dev, kn);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	return (error);
 }
 
 static inline int
 devfs_prison_check(struct devfs_dirent *de, struct thread *td)
 {
 	struct cdev_priv *cdp;
 	struct ucred *dcr;
 	struct proc *p;
 	int error;
 
 	cdp = de->de_cdp;
 	if (cdp == NULL)
 		return (0);
 	dcr = cdp->cdp_c.si_cred;
 	if (dcr == NULL)
 		return (0);
 
 	error = prison_check(td->td_ucred, dcr);
 	if (error == 0)
 		return (0);
 	/* We do, however, allow access to the controlling terminal */
 	p = td->td_proc;
 	PROC_LOCK(p);
 	if (!(p->p_flag & P_CONTROLT)) {
 		PROC_UNLOCK(p);
 		return (error);
 	}
 	if (p->p_session->s_ttydp == cdp)
 		error = 0;
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 static int
 devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct thread *td;
 	struct devfs_dirent *de, *dd;
 	struct devfs_dirent **dde;
 	struct devfs_mount *dmp;
 	struct mount *mp;
 	struct cdev *cdev;
 	int error, flags, nameiop, dvplocked;
 	char specname[SPECNAMELEN + 1], *pname;
 
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dvp = ap->a_dvp;
 	pname = cnp->cn_nameptr;
 	td = cnp->cn_thread;
 	flags = cnp->cn_flags;
 	nameiop = cnp->cn_nameiop;
 	mp = dvp->v_mount;
 	dmp = VFSTODEVFS(mp);
 	dd = dvp->v_data;
 	*vpp = NULLVP;
 
 	if ((flags & ISLASTCN) && nameiop == RENAME)
 		return (EOPNOTSUPP);
 
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 
 	if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT))
 		return (EIO);
 
 	error = vn_dir_check_exec(dvp, cnp);
 	if (error != 0)
 		return (error);
 
 	if (cnp->cn_namelen == 1 && *pname == '.') {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		*vpp = dvp;
 		VREF(dvp);
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		if ((flags & ISLASTCN) && nameiop != LOOKUP)
 			return (EINVAL);
 		de = devfs_parent_dirent(dd);
 		if (de == NULL)
 			return (ENOENT);
 		dvplocked = VOP_ISLOCKED(dvp);
 		VOP_UNLOCK(dvp);
 		error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK,
 		    vpp);
 		*dm_unlock = 0;
 		vn_lock(dvp, dvplocked | LK_RETRY);
 		return (error);
 	}
 
 	dd = dvp->v_data;
 	de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0);
 	while (de == NULL) {	/* While(...) so we can use break */
 
 		if (nameiop == DELETE)
 			return (ENOENT);
 
 		/*
 		 * OK, we didn't have an entry for the name we were asked for
 		 * so we try to see if anybody can create it on demand.
 		 */
 		pname = devfs_fqpn(specname, dmp, dd, cnp);
 		if (pname == NULL)
 			break;
 
 		cdev = NULL;
 		DEVFS_DMP_HOLD(dmp);
 		sx_xunlock(&dmp->dm_lock);
 		sx_slock(&clone_drain_lock);
 		EVENTHANDLER_INVOKE(dev_clone,
 		    td->td_ucred, pname, strlen(pname), &cdev);
 		sx_sunlock(&clone_drain_lock);
 
 		if (cdev == NULL)
 			sx_xlock(&dmp->dm_lock);
 		else if (devfs_populate_vp(dvp) != 0) {
 			*dm_unlock = 0;
 			sx_xlock(&dmp->dm_lock);
 			if (DEVFS_DMP_DROP(dmp)) {
 				sx_xunlock(&dmp->dm_lock);
 				devfs_unmount_final(dmp);
 			} else
 				sx_xunlock(&dmp->dm_lock);
 			dev_rel(cdev);
 			return (ENOENT);
 		}
 		if (DEVFS_DMP_DROP(dmp)) {
 			*dm_unlock = 0;
 			sx_xunlock(&dmp->dm_lock);
 			devfs_unmount_final(dmp);
 			if (cdev != NULL)
 				dev_rel(cdev);
 			return (ENOENT);
 		}
 
 		if (cdev == NULL)
 			break;
 
 		dev_lock();
 		dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx];
 		if (dde != NULL && *dde != NULL)
 			de = *dde;
 		dev_unlock();
 		dev_rel(cdev);
 		break;
 	}
 
 	if (de == NULL || de->de_flags & DE_WHITEOUT) {
 		if ((nameiop == CREATE || nameiop == RENAME) &&
 		    (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) {
 			cnp->cn_flags |= SAVENAME;
 			return (EJUSTRETURN);
 		}
 		return (ENOENT);
 	}
 
 	if (devfs_prison_check(de, td))
 		return (ENOENT);
 
 	if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) {
 		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
 		if (error)
 			return (error);
 		if (*vpp == dvp) {
 			VREF(dvp);
 			*vpp = dvp;
 			return (0);
 		}
 	}
 	error = devfs_allocv(de, mp, cnp->cn_lkflags & LK_TYPE_MASK, vpp);
 	*dm_unlock = 0;
 	return (error);
 }
 
 static int
 devfs_lookup(struct vop_lookup_args *ap)
 {
 	int j;
 	struct devfs_mount *dmp;
 	int dm_unlock;
 
 	if (devfs_populate_vp(ap->a_dvp) != 0)
 		return (ENOTDIR);
 
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
 	dm_unlock = 1;
 	j = devfs_lookupx(ap, &dm_unlock);
 	if (dm_unlock == 1)
 		sx_xunlock(&dmp->dm_lock);
 	return (j);
 }
 
 static int
 devfs_mknod(struct vop_mknod_args *ap)
 {
 	struct componentname *cnp;
 	struct vnode *dvp, **vpp;
 	struct devfs_dirent *dd, *de;
 	struct devfs_mount *dmp;
 	int error;
 
 	/*
 	 * The only type of node we should be creating here is a
 	 * character device, for anything else return EOPNOTSUPP.
 	 */
 	if (ap->a_vap->va_type != VCHR)
 		return (EOPNOTSUPP);
 	dvp = ap->a_dvp;
 	dmp = VFSTODEVFS(dvp->v_mount);
 
 	cnp = ap->a_cnp;
 	vpp = ap->a_vpp;
 	dd = dvp->v_data;
 
 	error = ENOENT;
 	sx_xlock(&dmp->dm_lock);
 	TAILQ_FOREACH(de, &dd->de_dlist, de_list) {
 		if (cnp->cn_namelen != de->de_dirent->d_namlen)
 			continue;
 		if (de->de_dirent->d_type == DT_CHR &&
 		    (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0)
 			continue;
 		if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name,
 		    de->de_dirent->d_namlen) != 0)
 			continue;
 		if (de->de_flags & DE_WHITEOUT)
 			break;
 		goto notfound;
 	}
 	if (de == NULL)
 		goto notfound;
 	de->de_flags &= ~DE_WHITEOUT;
 	error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp);
 	return (error);
 notfound:
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 devfs_open(struct vop_open_args *ap)
 {
 	struct thread *td = ap->a_td;
 	struct vnode *vp = ap->a_vp;
 	struct cdev *dev = vp->v_rdev;
 	struct file *fp = ap->a_fp;
 	int error, ref, vlocked;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (vp->v_type == VBLK)
 		return (ENXIO);
 
 	if (dev == NULL)
 		return (ENXIO);
 
 	/* Make this field valid before any I/O in d_open. */
 	if (dev->si_iosize_max == 0)
 		dev->si_iosize_max = DFLTPHYS;
 
 	dsw = dev_refthread(dev, &ref);
 	if (dsw == NULL)
 		return (ENXIO);
 	if (fp == NULL && dsw->d_fdopen != NULL) {
 		dev_relthread(dev, ref);
 		return (ENXIO);
 	}
 
 	if (vp->v_type == VCHR)
 		devfs_usecount_add(vp);
 
 	vlocked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 
 	fpop = td->td_fpop;
 	td->td_fpop = fp;
 	if (fp != NULL) {
 		fp->f_data = dev;
 		fp->f_vnode = vp;
 	}
 	if (dsw->d_fdopen != NULL)
 		error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
 	else
 		error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
 	/* Clean up any cdevpriv upon error. */
 	if (error != 0)
 		devfs_clear_cdevpriv();
 	td->td_fpop = fpop;
 
 	vn_lock(vp, vlocked | LK_RETRY);
 	if (error != 0 && vp->v_type == VCHR)
 		devfs_usecount_sub(vp);
 
 	dev_relthread(dev, ref);
 	if (error != 0) {
 		if (error == ERESTART)
 			error = EINTR;
 		return (error);
 	}
 
 #if 0	/* /dev/console */
 	KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp"));
 #else
 	if (fp == NULL)
 		return (error);
 #endif
 	if (fp->f_ops == &badfileops)
 		finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
 	return (error);
 }
 
 static int
 devfs_pathconf(struct vop_pathconf_args *ap)
 {
 
 	switch (ap->a_name) {
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_LINK_MAX:
 		*ap->a_retval = INT_MAX;
 		return (0);
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		return (0);
 	case _PC_MAX_CANON:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = MAX_CANON;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_MAX_INPUT:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = MAX_INPUT;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_VDISABLE:
 		if (ap->a_vp->v_vflag & VV_ISTTY) {
 			*ap->a_retval = _POSIX_VDISABLE;
 			return (0);
 		}
 		return (EINVAL);
 	case _PC_MAC_PRESENT:
 #ifdef MAC
 		/*
 		 * If MAC is enabled, devfs automatically supports
-		 * trivial non-persistant label storage.
+		 * trivial non-persistent label storage.
 		 */
 		*ap->a_retval = 1;
 #else
 		*ap->a_retval = 0;
 #endif
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (vop_stdpathconf(ap));
 	}
 	/* NOTREACHED */
 }
 
 /* ARGSUSED */
 static int
 devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	int error, ref;
 	struct file *fpop;
 
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_poll(fp, events, cred, td);
 		return (error);
 	}
 	error = dsw->d_poll(dev, events, td);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	return(error);
 }
 
 /*
  * Print out the contents of a special device vnode.
  */
 static int
 devfs_print(struct vop_print_args *ap)
 {
 
 	printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev));
 	return (0);
 }
 
 static int
 devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred,
     int flags, struct thread *td)
 {
 	struct cdev *dev;
 	int ioflag, error, ref;
 	ssize_t resid;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_read(fp, uio, cred, flags, td);
 		return (error);
 	}
 	resid = uio->uio_resid;
 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT);
 	if (ioflag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 
 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 	error = dsw->d_read(dev, uio, ioflag);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0))
 		devfs_timestamp(&dev->si_atime);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 
 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_R);
 	return (error);
 }
 
 static int
 devfs_readdir(struct vop_readdir_args *ap)
 {
 	int error;
 	struct uio *uio;
 	struct dirent *dp;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de;
 	struct devfs_mount *dmp;
 	off_t off;
 	int *tmp_ncookies = NULL;
 
 	if (ap->a_vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	uio = ap->a_uio;
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	/*
 	 * XXX: This is a temporary hack to get around this filesystem not
 	 * supporting cookies. We store the location of the ncookies pointer
 	 * in a temporary variable before calling vfs_subr.c:vfs_read_dirent()
 	 * and set the number of cookies to 0. We then set the pointer to
 	 * NULL so that vfs_read_dirent doesn't try to call realloc() on 
 	 * ap->a_cookies. Later in this function, we restore the ap->a_ncookies
 	 * pointer to its original location before returning to the caller.
 	 */
 	if (ap->a_ncookies != NULL) {
 		tmp_ncookies = ap->a_ncookies;
 		*ap->a_ncookies = 0;
 		ap->a_ncookies = NULL;
 	}
 
 	dmp = VFSTODEVFS(ap->a_vp->v_mount);
 	if (devfs_populate_vp(ap->a_vp) != 0) {
 		if (tmp_ncookies != NULL)
 			ap->a_ncookies = tmp_ncookies;
 		return (EIO);
 	}
 	error = 0;
 	de = ap->a_vp->v_data;
 	off = 0;
 	TAILQ_FOREACH(dd, &de->de_dlist, de_list) {
 		KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__));
 		if (dd->de_flags & (DE_COVERED | DE_WHITEOUT))
 			continue;
 		if (devfs_prison_check(dd, uio->uio_td))
 			continue;
 		if (dd->de_dirent->d_type == DT_DIR)
 			de = dd->de_dir;
 		else
 			de = dd;
 		dp = dd->de_dirent;
 		MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp));
 		if (dp->d_reclen > uio->uio_resid)
 			break;
 		dp->d_fileno = de->de_inode;
 		/* NOTE: d_off is the offset for the *next* entry. */
 		dp->d_off = off + dp->d_reclen;
 		if (off >= uio->uio_offset) {
 			error = vfs_read_dirent(ap, dp, off);
 			if (error)
 				break;
 		}
 		off += dp->d_reclen;
 	}
 	sx_xunlock(&dmp->dm_lock);
 	uio->uio_offset = off;
 
 	/*
 	 * Restore ap->a_ncookies if it wasn't originally NULL in the first
 	 * place.
 	 */
 	if (tmp_ncookies != NULL)
 		ap->a_ncookies = tmp_ncookies;
 
 	return (error);
 }
 
 static int
 devfs_readlink(struct vop_readlink_args *ap)
 {
 	struct devfs_dirent *de;
 
 	de = ap->a_vp->v_data;
 	return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio));
 }
 
 static void
 devfs_reclaiml(struct vnode *vp)
 {
 	struct devfs_dirent *de;
 
 	mtx_assert(&devfs_de_interlock, MA_OWNED);
 	de = vp->v_data;
 	if (de != NULL) {
 		MPASS(de->de_usecount == 0);
 		de->de_vnode = NULL;
 		vp->v_data = NULL;
 	}
 }
 
 static int
 devfs_reclaim(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 
 	vp = ap->a_vp;
 	mtx_lock(&devfs_de_interlock);
 	devfs_reclaiml(vp);
 	mtx_unlock(&devfs_de_interlock);
 	return (0);
 }
 
 static int
 devfs_reclaim_vchr(struct vop_reclaim_args *ap)
 {
 	struct vnode *vp;
 	struct cdev *dev;
 
 	vp = ap->a_vp;
 	MPASS(vp->v_type == VCHR);
 
 	mtx_lock(&devfs_de_interlock);
 	VI_LOCK(vp);
 	devfs_usecount_subl(vp);
 	devfs_reclaiml(vp);
 	mtx_unlock(&devfs_de_interlock);
 	dev_lock();
 	dev = vp->v_rdev;
 	vp->v_rdev = NULL;
 	dev_unlock();
 	VI_UNLOCK(vp);
 	if (dev != NULL)
 		dev_rel(dev);
 	return (0);
 }
 
 static int
 devfs_remove(struct vop_remove_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode *vp = ap->a_vp;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de, *de_covered;
 	struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount);
 
 	ASSERT_VOP_ELOCKED(dvp, "devfs_remove");
 	ASSERT_VOP_ELOCKED(vp, "devfs_remove");
 
 	sx_xlock(&dmp->dm_lock);
 	dd = ap->a_dvp->v_data;
 	de = vp->v_data;
 	if (de->de_cdp == NULL) {
 		TAILQ_REMOVE(&dd->de_dlist, de, de_list);
 		if (de->de_dirent->d_type == DT_LNK) {
 			de_covered = devfs_find(dd, de->de_dirent->d_name,
 			    de->de_dirent->d_namlen, 0);
 			if (de_covered != NULL)
 				de_covered->de_flags &= ~DE_COVERED;
 		}
 		/* We need to unlock dvp because devfs_delete() may lock it. */
 		VOP_UNLOCK(vp);
 		if (dvp != vp)
 			VOP_UNLOCK(dvp);
 		devfs_delete(dmp, de, 0);
 		sx_xunlock(&dmp->dm_lock);
 		if (dvp != vp)
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	} else {
 		de->de_flags |= DE_WHITEOUT;
 		sx_xunlock(&dmp->dm_lock);
 	}
 	return (0);
 }
 
 /*
  * Revoke is called on a tty when a terminal session ends.  The vnode
  * is orphaned by setting v_op to deadfs so we need to let go of it
  * as well so that we create a new one next time around.
  *
  */
 static int
 devfs_revoke(struct vop_revoke_args *ap)
 {
 	struct vnode *vp = ap->a_vp, *vp2;
 	struct cdev *dev;
 	struct cdev_priv *cdp;
 	struct devfs_dirent *de;
 	enum vgetstate vs;
 	u_int i;
 
 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL"));
 
 	dev = vp->v_rdev;
 	cdp = cdev2priv(dev);
 
 	dev_lock();
 	cdp->cdp_inuse++;
 	dev_unlock();
 
 	vhold(vp);
 	vgone(vp);
 	vdrop(vp);
 
 	VOP_UNLOCK(vp);
  loop:
 	for (;;) {
 		mtx_lock(&devfs_de_interlock);
 		dev_lock();
 		vp2 = NULL;
 		for (i = 0; i <= cdp->cdp_maxdirent; i++) {
 			de = cdp->cdp_dirents[i];
 			if (de == NULL)
 				continue;
 
 			vp2 = de->de_vnode;
 			if (vp2 != NULL) {
 				dev_unlock();
 				vs = vget_prep(vp2);
 				mtx_unlock(&devfs_de_interlock);
 				if (vget_finish(vp2, LK_EXCLUSIVE, vs) != 0)
 					goto loop;
 				vhold(vp2);
 				vgone(vp2);
 				vdrop(vp2);
 				vput(vp2);
 				break;
 			}
 		}
 		if (vp2 != NULL) {
 			continue;
 		}
 		dev_unlock();
 		mtx_unlock(&devfs_de_interlock);
 		break;
 	}
 	dev_lock();
 	cdp->cdp_inuse--;
 	if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) {
 		TAILQ_REMOVE(&cdevp_list, cdp, cdp_list);
 		dev_unlock();
 		dev_rel(&cdp->cdp_c);
 	} else
 		dev_unlock();
 
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	return (0);
 }
 
 static int
 devfs_rioctl(struct vop_ioctl_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_mount *dmp;
 	int error;
 
 	vp = ap->a_vp;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	if (VN_IS_DOOMED(vp)) {
 		VOP_UNLOCK(vp);
 		return (EBADF);
 	}
 	dmp = VFSTODEVFS(vp->v_mount);
 	sx_xlock(&dmp->dm_lock);
 	VOP_UNLOCK(vp);
 	DEVFS_DMP_HOLD(dmp);
 	devfs_populate(dmp);
 	if (DEVFS_DMP_DROP(dmp)) {
 		sx_xunlock(&dmp->dm_lock);
 		devfs_unmount_final(dmp);
 		return (ENOENT);
 	}
 	error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td);
 	sx_xunlock(&dmp->dm_lock);
 	return (error);
 }
 
 static int
 devfs_rread(struct vop_read_args *ap)
 {
 
 	if (ap->a_vp->v_type != VDIR)
 		return (EINVAL);
 	return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL));
 }
 
 static int
 devfs_setattr(struct vop_setattr_args *ap)
 {
 	struct devfs_dirent *de;
 	struct vattr *vap;
 	struct vnode *vp;
 	struct thread *td;
 	int c, error;
 	uid_t uid;
 	gid_t gid;
 
 	vap = ap->a_vap;
 	vp = ap->a_vp;
 	td = curthread;
 	if ((vap->va_type != VNON) ||
 	    (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) ||
 	    (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) ||
 	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
 	    (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) ||
 	    (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 
 	error = devfs_populate_vp(vp);
 	if (error != 0)
 		return (error);
 
 	de = vp->v_data;
 	if (vp->v_type == VDIR)
 		de = de->de_dir;
 
 	c = 0;
 	if (vap->va_uid == (uid_t)VNOVAL)
 		uid = de->de_uid;
 	else
 		uid = vap->va_uid;
 	if (vap->va_gid == (gid_t)VNOVAL)
 		gid = de->de_gid;
 	else
 		gid = vap->va_gid;
 	if (uid != de->de_uid || gid != de->de_gid) {
 		if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid ||
 		    (gid != de->de_gid && !groupmember(gid, ap->a_cred))) {
 			error = priv_check(td, PRIV_VFS_CHOWN);
 			if (error != 0)
 				goto ret;
 		}
 		de->de_uid = uid;
 		de->de_gid = gid;
 		c = 1;
 	}
 
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (ap->a_cred->cr_uid != de->de_uid) {
 			error = priv_check(td, PRIV_VFS_ADMIN);
 			if (error != 0)
 				goto ret;
 		}
 		de->de_mode = vap->va_mode;
 		c = 1;
 	}
 
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		error = vn_utimes_perm(vp, vap, ap->a_cred, td);
 		if (error != 0)
 			goto ret;
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			if (vp->v_type == VCHR)
 				vp->v_rdev->si_atime = vap->va_atime;
 			else
 				de->de_atime = vap->va_atime;
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			if (vp->v_type == VCHR)
 				vp->v_rdev->si_mtime = vap->va_mtime;
 			else
 				de->de_mtime = vap->va_mtime;
 		}
 		c = 1;
 	}
 
 	if (c) {
 		if (vp->v_type == VCHR)
 			vfs_timestamp(&vp->v_rdev->si_ctime);
 		else
 			vfs_timestamp(&de->de_mtime);
 	}
 
 ret:
 	sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock);
 	return (error);
 }
 
 #ifdef MAC
 static int
 devfs_setlabel(struct vop_setlabel_args *ap)
 {
 	struct vnode *vp;
 	struct devfs_dirent *de;
 
 	vp = ap->a_vp;
 	de = vp->v_data;
 
 	mac_vnode_relabel(ap->a_cred, vp, ap->a_label);
 	mac_devfs_update(vp->v_mount, de, vp);
 
 	return (0);
 }
 #endif
 
 static int
 devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td)
 {
 
 	return (vnops.fo_stat(fp, sb, cred, td));
 }
 
 static int
 devfs_symlink(struct vop_symlink_args *ap)
 {
 	int i, error;
 	struct devfs_dirent *dd;
 	struct devfs_dirent *de, *de_covered, *de_dotdot;
 	struct devfs_mount *dmp;
 
 	error = priv_check(curthread, PRIV_DEVFS_SYMLINK);
 	if (error)
 		return(error);
 	dmp = VFSTODEVFS(ap->a_dvp->v_mount);
 	if (devfs_populate_vp(ap->a_dvp) != 0)
 		return (ENOENT);
 
 	dd = ap->a_dvp->v_data;
 	de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen);
 	de->de_flags = DE_USER;
 	de->de_uid = 0;
 	de->de_gid = 0;
 	de->de_mode = 0755;
 	de->de_inode = alloc_unr(devfs_inos);
 	de->de_dir = dd;
 	de->de_dirent->d_type = DT_LNK;
 	i = strlen(ap->a_target) + 1;
 	de->de_symlink = malloc(i, M_DEVFS, M_WAITOK);
 	bcopy(ap->a_target, de->de_symlink, i);
 #ifdef MAC
 	mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de);
 #endif
 	de_covered = devfs_find(dd, de->de_dirent->d_name,
 	    de->de_dirent->d_namlen, 0);
 	if (de_covered != NULL) {
 		if ((de_covered->de_flags & DE_USER) != 0) {
 			devfs_delete(dmp, de, DEVFS_DEL_NORECURSE);
 			sx_xunlock(&dmp->dm_lock);
 			return (EEXIST);
 		}
 		KASSERT((de_covered->de_flags & DE_COVERED) == 0,
 		    ("devfs_symlink: entry %p already covered", de_covered));
 		de_covered->de_flags |= DE_COVERED;
 	}
 
 	de_dotdot = TAILQ_FIRST(&dd->de_dlist);		/* "." */
 	de_dotdot = TAILQ_NEXT(de_dotdot, de_list);	/* ".." */
 	TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list);
 	devfs_dir_ref_de(dmp, dd);
 	devfs_rules_apply(dmp, de);
 
 	return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp));
 }
 
 static int
 devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td)
 {
 
 	return (vnops.fo_truncate(fp, length, cred, td));
 }
 
 static int
 devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred,
     int flags, struct thread *td)
 {
 	struct cdev *dev;
 	int error, ioflag, ref;
 	ssize_t resid;
 	struct cdevsw *dsw;
 	struct file *fpop;
 
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0) {
 		error = vnops.fo_write(fp, uio, cred, flags, td);
 		return (error);
 	}
 	KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td));
 	ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC);
 	if (ioflag & O_DIRECT)
 		ioflag |= IO_DIRECT;
 	foffset_lock_uio(fp, uio, flags | FOF_NOLOCK);
 
 	resid = uio->uio_resid;
 
 	error = dsw->d_write(dev, uio, ioflag);
 	if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
 		devfs_timestamp(&dev->si_ctime);
 		dev->si_mtime = dev->si_ctime;
 	}
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 
 	foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF_W);
 	return (error);
 }
 
 static int
 devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
     vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
     struct thread *td)
 {
 	struct cdev *dev;
 	struct cdevsw *dsw;
 	struct mount *mp;
 	struct vnode *vp;
 	struct file *fpop;
 	vm_object_t object;
 	vm_prot_t maxprot;
 	int error, ref;
 
 	vp = fp->f_vnode;
 
 	/*
 	 * Ensure that file and memory protections are
 	 * compatible.
 	 */
 	mp = vp->v_mount;
 	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) {
 		maxprot = VM_PROT_NONE;
 		if ((prot & VM_PROT_EXECUTE) != 0)
 			return (EACCES);
 	} else
 		maxprot = VM_PROT_EXECUTE;
 	if ((fp->f_flag & FREAD) != 0)
 		maxprot |= VM_PROT_READ;
 	else if ((prot & VM_PROT_READ) != 0)
 		return (EACCES);
 
 	/*
 	 * If we are sharing potential changes via MAP_SHARED and we
 	 * are trying to get write permission although we opened it
 	 * without asking for it, bail out.
 	 *
 	 * Note that most character devices always share mappings.
 	 * The one exception is that D_MMAP_ANON devices
 	 * (i.e. /dev/zero) permit private writable mappings.
 	 *
 	 * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests
 	 * as well as updating maxprot to permit writing for
 	 * D_MMAP_ANON devices rather than doing that here.
 	 */
 	if ((flags & MAP_SHARED) != 0) {
 		if ((fp->f_flag & FWRITE) != 0)
 			maxprot |= VM_PROT_WRITE;
 		else if ((prot & VM_PROT_WRITE) != 0)
 			return (EACCES);
 	}
 	maxprot &= cap_maxprot;
 
 	fpop = td->td_fpop;
 	error = devfs_fp_check(fp, &dev, &dsw, &ref);
 	if (error != 0)
 		return (error);
 
 	error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff,
 	    &object);
 	td->td_fpop = fpop;
 	dev_relthread(dev, ref);
 	if (error != 0)
 		return (error);
 
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, FALSE, td);
 	if (error != 0)
 		vm_object_deallocate(object);
 	return (error);
 }
 
 dev_t
 dev2udev(struct cdev *x)
 {
 	if (x == NULL)
 		return (NODEV);
 	return (cdev2priv(x)->cdp_inode);
 }
 
 static struct fileops devfs_ops_f = {
 	.fo_read =	devfs_read_f,
 	.fo_write =	devfs_write_f,
 	.fo_truncate =	devfs_truncate_f,
 	.fo_ioctl =	devfs_ioctl_f,
 	.fo_poll =	devfs_poll_f,
 	.fo_kqfilter =	devfs_kqfilter_f,
 	.fo_stat =	devfs_stat_f,
 	.fo_close =	devfs_close_f,
 	.fo_chmod =	vn_chmod,
 	.fo_chown =	vn_chown,
 	.fo_sendfile =	vn_sendfile,
 	.fo_seek =	vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
 	.fo_mmap =	devfs_mmap_f,
 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
 /* Vops for non-CHR vnodes in /dev. */
 static struct vop_vector devfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		devfs_access,
 	.vop_getattr =		devfs_getattr,
 	.vop_ioctl =		devfs_rioctl,
 	.vop_lookup =		devfs_lookup,
 	.vop_mknod =		devfs_mknod,
 	.vop_pathconf =		devfs_pathconf,
 	.vop_read =		devfs_rread,
 	.vop_readdir =		devfs_readdir,
 	.vop_readlink =		devfs_readlink,
 	.vop_reclaim =		devfs_reclaim,
 	.vop_remove =		devfs_remove,
 	.vop_revoke =		devfs_revoke,
 	.vop_setattr =		devfs_setattr,
 #ifdef MAC
 	.vop_setlabel =		devfs_setlabel,
 #endif
 	.vop_symlink =		devfs_symlink,
 	.vop_vptocnp =		devfs_vptocnp,
 	.vop_lock1 =		vop_lock,
 	.vop_unlock =		vop_unlock,
 	.vop_islocked =		vop_islocked,
 };
 VFS_VOP_VECTOR_REGISTER(devfs_vnodeops);
 
 /* Vops for VCHR vnodes in /dev. */
 static struct vop_vector devfs_specops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		devfs_access,
 	.vop_bmap =		VOP_PANIC,
 	.vop_close =		devfs_close,
 	.vop_create =		VOP_PANIC,
 	.vop_fsync =		vop_stdfsync,
 	.vop_getattr =		devfs_getattr,
 	.vop_ioctl =		devfs_ioctl,
 	.vop_link =		VOP_PANIC,
 	.vop_mkdir =		VOP_PANIC,
 	.vop_mknod =		VOP_PANIC,
 	.vop_open =		devfs_open,
 	.vop_pathconf =		devfs_pathconf,
 	.vop_poll =		dead_poll,
 	.vop_print =		devfs_print,
 	.vop_read =		dead_read,
 	.vop_readdir =		VOP_PANIC,
 	.vop_readlink =		VOP_PANIC,
 	.vop_reallocblks =	VOP_PANIC,
 	.vop_reclaim =		devfs_reclaim_vchr,
 	.vop_remove =		devfs_remove,
 	.vop_rename =		VOP_PANIC,
 	.vop_revoke =		devfs_revoke,
 	.vop_rmdir =		VOP_PANIC,
 	.vop_setattr =		devfs_setattr,
 #ifdef MAC
 	.vop_setlabel =		devfs_setlabel,
 #endif
 	.vop_strategy =		VOP_PANIC,
 	.vop_symlink =		VOP_PANIC,
 	.vop_vptocnp =		devfs_vptocnp,
 	.vop_write =		dead_write,
 	.vop_lock1 =		vop_lock,
 	.vop_unlock =		vop_unlock,
 	.vop_islocked =		vop_islocked,
 };
 VFS_VOP_VECTOR_REGISTER(devfs_specops);
 
 /*
  * Our calling convention to the device drivers used to be that we passed
  * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ 
  * flags instead since that's what open(), close() and ioctl() takes and
  * we don't really want vnode.h in device drivers.
  * We solved the source compatibility by redefining some vnode flags to
  * be the same as the fcntl ones and by sending down the bitwise OR of
  * the respective fcntl/vnode flags.  These CTASSERTS make sure nobody
  * pulls the rug out under this.
  */
 CTASSERT(O_NONBLOCK == IO_NDELAY);
 CTASSERT(O_FSYNC == IO_SYNC);
diff --git a/sys/fs/ext2fs/ext2_alloc.c b/sys/fs/ext2fs/ext2_alloc.c
index 4c265a1a8bcc..79df804aceb8 100644
--- a/sys/fs/ext2fs/ext2_alloc.c
+++ b/sys/fs/ext2fs/ext2_alloc.c
@@ -1,1577 +1,1577 @@
 /*-
  *  modified for Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_alloc.c	8.8 (Berkeley) 2/21/94
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/vnode.h>
 #include <sys/sdt.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/buf.h>
 #include <sys/endian.h>
 
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/inode.h>
 #include <fs/ext2fs/ext2_mount.h>
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/ext2_extern.h>
 
 SDT_PROVIDER_DEFINE(ext2fs);
 /*
  * ext2fs trace probe:
  * arg0: verbosity. Higher numbers give more verbose messages
  * arg1: Textual message
  */
 SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*");
 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc,
     "ino_t", "e2fs_lbn_t", "e2fs_lbn_t");
 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t");
 SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t");
 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int");
 SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted,
     "int", "daddr_t", "char*");
 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t");
 SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t");
 
 static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
 static daddr_t	ext2_clusteralloc(struct inode *, int, daddr_t, int);
 static u_long	ext2_dirpref(struct inode *);
 static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int,
     daddr_t (*)(struct inode *, int, daddr_t, 
 						int));
 static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
 
 /*
  * Allocate a block in the filesystem.
  *
  * A preference may be optionally specified. If a preference is given
  * the following hierarchy is used to allocate a block:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate a block in the same cylinder group.
- *   4) quadradically rehash into other cylinder groups, until an
+ *   4) quadratically rehash into other cylinder groups, until an
  *        available block is located.
  * If no block preference is given the following hierarchy is used
  * to allocate a block:
  *   1) allocate a block in the cylinder group that contains the
  *        inode for the file.
- *   2) quadradically rehash into other cylinder groups, until an
+ *   2) quadratically rehash into other cylinder groups, until an
  *        available block is located.
  */
 int
 ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size,
     struct ucred *cred, e4fs_daddr_t *bnp)
 {
 	struct m_ext2fs *fs;
 	struct ext2mount *ump;
 	e4fs_daddr_t bno;
 	int cg;
 
 	*bnp = 0;
 	fs = ip->i_e2fs;
 	ump = ip->i_ump;
 	mtx_assert(EXT2_MTX(ump), MA_OWNED);
 #ifdef INVARIANTS
 	if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) {
 		vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n",
 		    (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt);
 		panic("ext2_alloc: bad size");
 	}
 	if (cred == NOCRED)
 		panic("ext2_alloc: missing credential");
 #endif		/* INVARIANTS */
 	if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0)
 		goto nospace;
 	if (cred->cr_uid != 0 &&
 	    fs->e2fs_fbcount < fs->e2fs_rbcount)
 		goto nospace;
 	if (bpref >= fs->e2fs_bcount)
 		bpref = 0;
 	if (bpref == 0)
 		cg = ino_to_cg(fs, ip->i_number);
 	else
 		cg = dtog(fs, bpref);
 	bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize,
 	    ext2_alloccg);
 	if (bno > 0) {
 		/* set next_alloc fields as done in block_getblk */
 		ip->i_next_alloc_block = lbn;
 		ip->i_next_alloc_goal = bno;
 
 		ip->i_blocks += btodb(fs->e2fs_bsize);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		*bnp = bno;
 		return (0);
 	}
 nospace:
 	EXT2_UNLOCK(ump);
 	SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block");
 	return (ENOSPC);
 }
 
 /*
  * Allocate EA's block for inode.
  */
 e4fs_daddr_t
 ext2_alloc_meta(struct inode *ip)
 {
 	struct m_ext2fs *fs;
 	daddr_t blk;
 
 	fs = ip->i_e2fs;
 
 	EXT2_LOCK(ip->i_ump);
 	blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize,
 	    ext2_alloccg);
 	if (0 == blk) {
 		EXT2_UNLOCK(ip->i_ump);
 		SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block");
 	}
 
 	return (blk);
 }
 
 /*
  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
  *
  * The vnode and an array of buffer pointers for a range of sequential
  * logical blocks to be made contiguous is given. The allocator attempts
  * to find a range of sequential blocks starting as close as possible to
  * an fs_rotdelay offset from the end of the allocation for the logical
  * block immediately preceding the current range. If successful, the
  * physical block numbers in the buffer pointers and in the inode are
  * changed to reflect the new allocation. If unsuccessful, the allocation
  * is left unchanged. The success in doing the reallocation is returned.
  * Note that the error return is not reflected back to the user. Rather
  * the previous block allocation will be used.
  */
 
 static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "EXT2FS filesystem");
 
 static int doasyncfree = 1;
 
 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
     "Use asynchronous writes to update block pointers when freeing blocks");
 
 static int doreallocblks = 0;
 
 SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
 
 int
 ext2_reallocblks(struct vop_reallocblks_args *ap)
 {
 	struct m_ext2fs *fs;
 	struct inode *ip;
 	struct vnode *vp;
 	struct buf *sbp, *ebp;
 	uint32_t *bap, *sbap, *ebap;
 	struct ext2mount *ump;
 	struct cluster_save *buflist;
 	struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp;
 	e2fs_lbn_t start_lbn, end_lbn;
 	int soff;
 	e2fs_daddr_t newblk, blkno;
 	int i, len, start_lvl, end_lvl, pref, ssize;
 
 	if (doreallocblks == 0)
 		return (ENOSPC);
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ip->i_e2fs;
 	ump = ip->i_ump;
 
 	if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS)
 		return (ENOSPC);
 
 	buflist = ap->a_buflist;
 	len = buflist->bs_nchildren;
 	start_lbn = buflist->bs_children[0]->b_lblkno;
 	end_lbn = start_lbn + len - 1;
 #ifdef INVARIANTS
 	for (i = 1; i < len; i++)
 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
 			panic("ext2_reallocblks: non-cluster");
 #endif
 	/*
 	 * If the cluster crosses the boundary for the first indirect
 	 * block, leave space for the indirect block. Indirect blocks
 	 * are initially laid out in a position after the last direct
 	 * block. Block reallocation would usually destroy locality by
 	 * moving the indirect block out of the way to make room for
 	 * data blocks if we didn't compensate here. We should also do
 	 * this for other indirect block boundaries, but it is only
 	 * important for the first one.
 	 */
 	if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR)
 		return (ENOSPC);
 	/*
 	 * If the latest allocation is in a new cylinder group, assume that
 	 * the filesystem has decided to move and do not force it back to
 	 * the previous cylinder group.
 	 */
 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
 		return (ENOSPC);
 	if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
 	    ext2_getlbns(vp, end_lbn, end_ap, &end_lvl))
 		return (ENOSPC);
 	/*
 	 * Get the starting offset and block map for the first block.
 	 */
 	if (start_lvl == 0) {
 		sbap = &ip->i_db[0];
 		soff = start_lbn;
 	} else {
 		idp = &start_ap[start_lvl - 1];
 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) {
 			brelse(sbp);
 			return (ENOSPC);
 		}
 		sbap = (u_int *)sbp->b_data;
 		soff = idp->in_off;
 	}
 	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
 	ebap = NULL;
 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 		ssize = len;
 	} else {
 #ifdef INVARIANTS
 		if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
 			panic("ext2_reallocblks: start == end");
 #endif
 		ssize = len - (idp->in_off + 1);
 		if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp))
 			goto fail;
 		ebap = (u_int *)ebp->b_data;
 	}
 	/*
 	 * Find the preferred location for the cluster.
 	 */
 	EXT2_LOCK(ump);
 	pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0);
 	/*
 	 * Search the block map looking for an allocation of the desired size.
 	 */
 	if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref,
 	    len, ext2_clusteralloc)) == 0) {
 		EXT2_UNLOCK(ump);
 		goto fail;
 	}
 	/*
 	 * We have found a new contiguous block.
 	 *
 	 * First we have to replace the old block pointers with the new
 	 * block pointers in the inode and indirect blocks associated
 	 * with the file.
 	 */
 	SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc,
 	    ip->i_number, start_lbn, end_lbn);
 	blkno = newblk;
 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
 		if (i == ssize) {
 			bap = ebap;
 			soff = -i;
 		}
 #ifdef INVARIANTS
 		if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap))
 			panic("ext2_reallocblks: alloc mismatch");
 #endif
 		SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap);
 		*bap++ = blkno;
 	}
 	/*
 	 * Next we must write out the modified inode and indirect blocks.
 	 * For strict correctness, the writes should be synchronous since
 	 * the old block values may have been written to disk. In practise
 	 * they are almost never written, but if we are concerned about
 	 * strict correctness, the `doasyncfree' flag should be set to zero.
 	 *
 	 * The test on `doasyncfree' should be changed to test a flag
 	 * that shows whether the associated buffers and inodes have
 	 * been written. The flag should be set when the cluster is
 	 * started and cleared whenever the buffer or inode is flushed.
 	 * We can then check below to see if it is set, and do the
 	 * synchronous write only when it has been cleared.
 	 */
 	if (sbap != &ip->i_db[0]) {
 		if (doasyncfree)
 			bdwrite(sbp);
 		else
 			bwrite(sbp);
 	} else {
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		if (!doasyncfree)
 			ext2_update(vp, 1);
 	}
 	if (ssize < len) {
 		if (doasyncfree)
 			bdwrite(ebp);
 		else
 			bwrite(ebp);
 	}
 	/*
 	 * Last, free the old blocks and assign the new blocks to the buffers.
 	 */
 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) {
 		ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
 		    fs->e2fs_bsize);
 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 		SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno);
 	}
 
 	return (0);
 
 fail:
 	if (ssize < len)
 		brelse(ebp);
 	if (sbap != &ip->i_db[0])
 		brelse(sbp);
 	return (ENOSPC);
 }
 
 /*
  * Allocate an inode in the filesystem.
  *
  */
 int
 ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp)
 {
 	struct timespec ts;
 	struct m_ext2fs *fs;
 	struct ext2mount *ump;
 	struct inode *pip;
 	struct inode *ip;
 	struct vnode *vp;
 	struct thread *td;
 	ino_t ino, ipref;
 	int error, cg;
 
 	*vpp = NULL;
 	pip = VTOI(pvp);
 	fs = pip->i_e2fs;
 	ump = pip->i_ump;
 
 	EXT2_LOCK(ump);
 	if (fs->e2fs_ficount == 0)
 		goto noinodes;
 	/*
 	 * If it is a directory then obtain a cylinder group based on
 	 * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is
 	 * always the next inode.
 	 */
 	if ((mode & IFMT) == IFDIR) {
 		cg = ext2_dirpref(pip);
 		if (fs->e2fs_contigdirs[cg] < 255)
 			fs->e2fs_contigdirs[cg]++;
 	} else {
 		cg = ino_to_cg(fs, pip->i_number);
 		if (fs->e2fs_contigdirs[cg] > 0)
 			fs->e2fs_contigdirs[cg]--;
 	}
 	ipref = cg * fs->e2fs_ipg + 1;
 	ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg);
 	if (ino == 0)
 		goto noinodes;
 
 	td = curthread;
 	error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL);
 	if (error || *vpp != NULL) {
 		return (error);
 	}
 
 	ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO);
 
 	/* Allocate a new vnode/inode. */
 	if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) {
 		free(ip, M_EXT2NODE);
 		return (error);
 	}
 
 	lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_e2fs = fs = ump->um_e2fs;
 	ip->i_ump = ump;
 	ip->i_number = ino;
 	ip->i_block_group = ino_to_cg(fs, ino);
 	ip->i_next_alloc_block = 0;
 	ip->i_next_alloc_goal = 0;
 
 	error = insmntque(vp, ump->um_mountp);
 	if (error) {
 		free(ip, M_EXT2NODE);
 		return (error);
 	}
 
 	error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL);
 	if (error || *vpp != NULL) {
 		*vpp = NULL;
 		free(ip, M_EXT2NODE);
 		return (error);
 	}
 
 	if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) {
 		vput(vp);
 		*vpp = NULL;
 		free(ip, M_EXT2NODE);
 		return (error);
 	}
 
 	if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS)
 	    && (S_ISREG(mode) || S_ISDIR(mode)))
 		ext4_ext_tree_init(ip);
 	else
 		memset(ip->i_data, 0, sizeof(ip->i_data));
 
 	/*
 	 * Set up a new generation number for this inode.
 	 * Avoid zero values.
 	 */
 	do {
 		ip->i_gen = arc4random();
 	} while (ip->i_gen == 0);
 
 	vfs_timestamp(&ts);
 	ip->i_birthtime = ts.tv_sec;
 	ip->i_birthnsec = ts.tv_nsec;
 
 	*vpp = vp;
 
 	return (0);
 
 noinodes:
 	EXT2_UNLOCK(ump);
 	SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes");
 	return (ENOSPC);
 }
 
 /*
  * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h
  */
 uint64_t
 e2fs_gd_get_b_bitmap(struct ext2_gd *gd)
 {
 
 	return (((uint64_t)(le32toh(gd->ext4bgd_b_bitmap_hi)) << 32) |
 	    le32toh(gd->ext2bgd_b_bitmap));
 }
 
 uint64_t
 e2fs_gd_get_i_bitmap(struct ext2_gd *gd)
 {
 
 	return (((uint64_t)(le32toh(gd->ext4bgd_i_bitmap_hi)) << 32) |
 	    le32toh(gd->ext2bgd_i_bitmap));
 }
 
 uint64_t
 e2fs_gd_get_i_tables(struct ext2_gd *gd)
 {
 
 	return (((uint64_t)(le32toh(gd->ext4bgd_i_tables_hi)) << 32) |
 	    le32toh(gd->ext2bgd_i_tables));
 }
 
 static uint32_t
 e2fs_gd_get_nbfree(struct ext2_gd *gd)
 {
 
 	return (((uint32_t)(le16toh(gd->ext4bgd_nbfree_hi)) << 16) |
 	    le16toh(gd->ext2bgd_nbfree));
 }
 
 static void
 e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val)
 {
 
 	gd->ext2bgd_nbfree = htole16(val & 0xffff);
 	gd->ext4bgd_nbfree_hi = htole16(val >> 16);
 }
 
 static uint32_t
 e2fs_gd_get_nifree(struct ext2_gd *gd)
 {
 
 	return (((uint32_t)(le16toh(gd->ext4bgd_nifree_hi)) << 16) |
 	    le16toh(gd->ext2bgd_nifree));
 }
 
 static void
 e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val)
 {
 
 	gd->ext2bgd_nifree = htole16(val & 0xffff);
 	gd->ext4bgd_nifree_hi = htole16(val >> 16);
 }
 
 uint32_t
 e2fs_gd_get_ndirs(struct ext2_gd *gd)
 {
 
 	return (((uint32_t)(le16toh(gd->ext4bgd_ndirs_hi)) << 16) |
 	    le16toh(gd->ext2bgd_ndirs));
 }
 
 static void
 e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val)
 {
 
 	gd->ext2bgd_ndirs = htole16(val & 0xffff);
 	gd->ext4bgd_ndirs_hi = htole16(val >> 16);
 }
 
 static uint32_t
 e2fs_gd_get_i_unused(struct ext2_gd *gd)
 {
 	return ((uint32_t)(le16toh(gd->ext4bgd_i_unused_hi) << 16) |
 	    le16toh(gd->ext4bgd_i_unused));
 }
 
 static void
 e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val)
 {
 
 	gd->ext4bgd_i_unused = htole16(val & 0xffff);
 	gd->ext4bgd_i_unused_hi = htole16(val >> 16);
 }
 
 /*
  * Find a cylinder to place a directory.
  *
  * The policy implemented by this algorithm is to allocate a
  * directory inode in the same cylinder group as its parent
  * directory, but also to reserve space for its files inodes
  * and data. Restrict the number of directories which may be
  * allocated one after another in the same cylinder group
  * without intervening allocation of files.
  *
  * If we allocate a first level directory then force allocation
  * in another cylinder group.
  *
  */
 static u_long
 ext2_dirpref(struct inode *pip)
 {
 	struct m_ext2fs *fs;
 	int cg, prefcg, cgsize;
 	uint64_t avgbfree, minbfree;
 	u_int avgifree, avgndir, curdirsize;
 	u_int minifree, maxndir;
 	u_int mincg, minndir;
 	u_int dirsize, maxcontigdirs;
 
 	mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED);
 	fs = pip->i_e2fs;
 
 	avgifree = fs->e2fs_ficount / fs->e2fs_gcount;
 	avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount;
 	avgndir = fs->e2fs_total_dir / fs->e2fs_gcount;
 
 	/*
 	 * Force allocation in another cg if creating a first level dir.
 	 */
 	ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref");
 	if (ITOV(pip)->v_vflag & VV_ROOT) {
 		prefcg = arc4random() % fs->e2fs_gcount;
 		mincg = prefcg;
 		minndir = fs->e2fs_ipg;
 		for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
 			if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir &&
 			    e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree &&
 			    e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) {
 				mincg = cg;
 				minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]);
 			}
 		for (cg = 0; cg < prefcg; cg++)
 			if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir &&
 			    e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree &&
 			    e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) {
 				mincg = cg;
 				minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]);
 			}
 		return (mincg);
 	}
 	/*
 	 * Count various limits which used for
 	 * optimal allocation of a directory inode.
 	 */
 	maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg);
 	minifree = avgifree - avgifree / 4;
 	if (minifree < 1)
 		minifree = 1;
 	minbfree = avgbfree - avgbfree / 4;
 	if (minbfree < 1)
 		minbfree = 1;
 	cgsize = fs->e2fs_fsize * fs->e2fs_fpg;
 	dirsize = AVGDIRSIZE;
 	curdirsize = avgndir ?
 	    (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0;
 	if (dirsize < curdirsize)
 		dirsize = curdirsize;
 	maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255);
 	maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR);
 	if (maxcontigdirs == 0)
 		maxcontigdirs = 1;
 
 	/*
 	 * Limit number of dirs in one cg and reserve space for
 	 * regular files, but only if we have no deficit in
 	 * inodes or space.
 	 */
 	prefcg = ino_to_cg(fs, pip->i_number);
 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
 		if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir &&
 		    e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree &&
 		    e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) {
 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
 				return (cg);
 		}
 	for (cg = 0; cg < prefcg; cg++)
 		if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir &&
 		    e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree &&
 		    e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) {
 			if (fs->e2fs_contigdirs[cg] < maxcontigdirs)
 				return (cg);
 		}
 	/*
 	 * This is a backstop when we have deficit in space.
 	 */
 	for (cg = prefcg; cg < fs->e2fs_gcount; cg++)
 		if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree)
 			return (cg);
 	for (cg = 0; cg < prefcg; cg++)
 		if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree)
 			break;
 	return (cg);
 }
 
 /*
  * Select the desired position for the next block in a file.
  *
  * we try to mimic what Remy does in inode_getblk/block_getblk
  *
  * we note: blocknr == 0 means that we're about to allocate either
  * a direct block or a pointer block at the first level of indirection
  * (In other words, stuff that will go in i_db[] or i_ib[])
  *
  * blocknr != 0 means that we're allocating a block that is none
  * of the above. Then, blocknr tells us the number of the block
  * that will hold the pointer
  */
 e4fs_daddr_t
 ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap,
     e2fs_daddr_t blocknr)
 {
 	struct m_ext2fs *fs;
 	int tmp;
 
 	fs = ip->i_e2fs;
 
 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
 
 	/*
 	 * If the next block is actually what we thought it is, then set the
 	 * goal to what we thought it should be.
 	 */
 	if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0)
 		return ip->i_next_alloc_goal;
 
 	/*
 	 * Now check whether we were provided with an array that basically
 	 * tells us previous blocks to which we want to stay close.
 	 */
 	if (bap)
 		for (tmp = indx - 1; tmp >= 0; tmp--)
 			if (bap[tmp])
 				return (le32toh(bap[tmp]));
 
 	/*
 	 * Else lets fall back to the blocknr or, if there is none, follow
 	 * the rule that a block should be allocated near its inode.
 	 */
 	return (blocknr ? blocknr :
 	    (e2fs_daddr_t)(ip->i_block_group *
 	    EXT2_BLOCKS_PER_GROUP(fs)) + le32toh(fs->e2fs->e2fs_first_dblock));
 }
 
 /*
  * Implement the cylinder overflow algorithm.
  *
  * The policy implemented by this algorithm is:
  *   1) allocate the block in its requested cylinder group.
- *   2) quadradically rehash on the cylinder group number.
+ *   2) quadratically rehash on the cylinder group number.
  *   3) brute force search for a free block.
  */
 static e4fs_daddr_t
 ext2_hashalloc(struct inode *ip, int cg, long pref, int size,
     daddr_t (*allocator) (struct inode *, int, daddr_t, int))
 {
 	struct m_ext2fs *fs;
 	e4fs_daddr_t result;
 	int i, icg = cg;
 
 	mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED);
 	fs = ip->i_e2fs;
 	/*
 	 * 1: preferred cylinder group
 	 */
 	result = (*allocator)(ip, cg, pref, size);
 	if (result)
 		return (result);
 	/*
 	 * 2: quadratic rehash
 	 */
 	for (i = 1; i < fs->e2fs_gcount; i *= 2) {
 		cg += i;
 		if (cg >= fs->e2fs_gcount)
 			cg -= fs->e2fs_gcount;
 		result = (*allocator)(ip, cg, 0, size);
 		if (result)
 			return (result);
 	}
 	/*
 	 * 3: brute force search
 	 * Note that we start at i == 2, since 0 was checked initially,
 	 * and 1 is always checked in the quadratic rehash.
 	 */
 	cg = (icg + 2) % fs->e2fs_gcount;
 	for (i = 2; i < fs->e2fs_gcount; i++) {
 		result = (*allocator)(ip, cg, 0, size);
 		if (result)
 			return (result);
 		cg++;
 		if (cg == fs->e2fs_gcount)
 			cg = 0;
 	}
 	return (0);
 }
 
 static uint64_t
 ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg)
 {
 
 	if (!ext2_cg_has_sb(fs, cg))
 		return (0);
 
 	if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG))
 		return (le32toh(fs->e2fs->e3fs_first_meta_bg));
 
 	return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) /
 	    EXT2_DESCS_PER_BLOCK(fs));
 }
 
 static uint64_t
 ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg)
 {
 	unsigned long metagroup;
 	int first, last;
 
 	metagroup = cg / EXT2_DESCS_PER_BLOCK(fs);
 	first = metagroup * EXT2_DESCS_PER_BLOCK(fs);
 	last = first + EXT2_DESCS_PER_BLOCK(fs) - 1;
 
 	if (cg == first || cg == first + 1 || cg == last)
 		return (1);
 
 	return (0);
 }
 
 uint64_t
 ext2_cg_number_gdb(struct m_ext2fs *fs, int cg)
 {
 	unsigned long first_meta_bg, metagroup;
 
 	first_meta_bg = le32toh(fs->e2fs->e3fs_first_meta_bg);
 	metagroup = cg / EXT2_DESCS_PER_BLOCK(fs);
 
 	if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
 	    metagroup < first_meta_bg)
 		return (ext2_cg_number_gdb_nometa(fs, cg));
 
 	return ext2_cg_number_gdb_meta(fs, cg);
 }
 
 static int
 ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg)
 {
 	int number;
 
 	number = ext2_cg_has_sb(fs, cg);
 
 	if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) ||
 	    cg < le32toh(fs->e2fs->e3fs_first_meta_bg) *
 	    EXT2_DESCS_PER_BLOCK(fs)) {
 		if (number) {
 			number += ext2_cg_number_gdb(fs, cg);
 			number += le16toh(fs->e2fs->e2fs_reserved_ngdb);
 		}
 	} else {
 		number += ext2_cg_number_gdb(fs, cg);
 	}
 
 	return (number);
 }
 
 static void
 ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
 {
 	int i;
 
 	if (start_bit >= end_bit)
 		return;
 
 	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
 		setbit(bitmap, i);
 	if (i < end_bit)
 		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
 }
 
 static int
 ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block)
 {
 
 	return ((block - le32toh(fs->e2fs->e2fs_first_dblock)) /
 	    fs->e2fs_bsize);
 }
 
 static int
 ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg)
 {
 
 	return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0);
 }
 
 static int
 ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp)
 {
 	int bit, bit_max, inodes_per_block;
 	uint64_t start, tmp;
 
 	if (!(le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_BLOCK_UNINIT))
 		return (0);
 
 	memset(bp->b_data, 0, fs->e2fs_bsize);
 
 	bit_max = ext2_number_base_meta_blocks(fs, cg);
 	if ((bit_max >> 3) >= fs->e2fs_bsize)
 		return (EINVAL);
 
 	for (bit = 0; bit < bit_max; bit++)
 		setbit(bp->b_data, bit);
 
 	start = (uint64_t)cg * fs->e2fs_bpg +
 	    le32toh(fs->e2fs->e2fs_first_dblock);
 
 	/* Set bits for block and inode bitmaps, and inode table. */
 	tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]);
 	if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
 	    ext2_block_in_group(fs, tmp, cg))
 		setbit(bp->b_data, tmp - start);
 
 	tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]);
 	if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
 	    ext2_block_in_group(fs, tmp, cg))
 		setbit(bp->b_data, tmp - start);
 
 	tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]);
 	inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs);
 	while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) +
 	    fs->e2fs_ipg / inodes_per_block ) {
 		if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) ||
 		    ext2_block_in_group(fs, tmp, cg))
 			setbit(bp->b_data, tmp - start);
 		tmp++;
 	}
 
 	/*
 	 * Also if the number of blocks within the group is less than
 	 * the blocksize * 8 ( which is the size of bitmap ), set rest
 	 * of the block bitmap to 1
 	 */
 	ext2_mark_bitmap_end(fs->e2fs_bpg, fs->e2fs_bsize * 8,
 	    bp->b_data);
 
 	/* Clean the flag */
 	fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
 	    fs->e2fs_gd[cg].ext4bgd_flags) & ~EXT2_BG_BLOCK_UNINIT);
 
 	return (0);
 }
 
 static int
 ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg)
 {
 	struct ext2_gd *gd;
 	uint64_t group_first_block;
 	unsigned int offset, max_bit;
 
 	if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) {
 		/*
 		 * It is not possible to check block bitmap in case of this
 		 * feature, because the inode and block bitmaps and inode table
 		 * blocks may not be in the group at all.
 		 * So, skip check in this case.
 		 */
 		return (0);
 	}
 
 	gd = &fs->e2fs_gd[cg];
 	max_bit = fs->e2fs_fpg;
 	group_first_block = ((uint64_t)cg) * fs->e2fs_fpg +
 	    le32toh(fs->e2fs->e2fs_first_dblock);
 
 	/* Check block bitmap block number */
 	offset = e2fs_gd_get_b_bitmap(gd) - group_first_block;
 	if (offset >= max_bit || !isset(bp->b_data, offset)) {
 		SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
 		    "bad block bitmap, group", cg);
 		return (EINVAL);
 	}
 
 	/* Check inode bitmap block number */
 	offset = e2fs_gd_get_i_bitmap(gd) - group_first_block;
 	if (offset >= max_bit || !isset(bp->b_data, offset)) {
 		SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
 		    "bad inode bitmap", cg);
 		return (EINVAL);
 	}
 
 	/* Check inode table */
 	offset = e2fs_gd_get_i_tables(gd) - group_first_block;
 	if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) {
 		SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error,
 		    "bad inode table, group", cg);
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 /*
  * Determine whether a block can be allocated.
  *
  * Check to see if a block of the appropriate size is available,
  * and if it is, allocate it.
  */
 static daddr_t
 ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size)
 {
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	struct ext2mount *ump;
 	daddr_t bno, runstart, runlen;
 	int bit, loc, end, error, start;
 	char *bbp;
 	/* XXX ondisk32 */
 	fs = ip->i_e2fs;
 	ump = ip->i_ump;
 	if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0)
 		return (0);
 
 	EXT2_UNLOCK(ump);
 	error = bread(ip->i_devvp, fsbtodb(fs,
 	    e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
 	    (int)fs->e2fs_bsize, NOCRED, &bp);
 	if (error)
 		goto fail;
 
 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
 	    EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
 		error = ext2_cg_block_bitmap_init(fs, cg, bp);
 		if (error)
 			goto fail;
 
 		ext2_gd_b_bitmap_csum_set(fs, cg, bp);
 	}
 	error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp);
 	if (error)
 		goto fail;
 
 	error = ext2_b_bitmap_validate(fs,bp, cg);
 	if (error)
 		goto fail;
 
 	/*
 	 * Check, that another thread did not not allocate the last block in
 	 * this group while we were waiting for the buffer.
 	 */
 	if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0)
 		goto fail;
 
 	bbp = (char *)bp->b_data;
 
 	if (dtog(fs, bpref) != cg)
 		bpref = 0;
 	if (bpref != 0) {
 		bpref = dtogd(fs, bpref);
 		/*
 		 * if the requested block is available, use it
 		 */
 		if (isclr(bbp, bpref)) {
 			bno = bpref;
 			goto gotit;
 		}
 	}
 	/*
 	 * no blocks in the requested cylinder, so take next
 	 * available one in this cylinder group.
 	 * first try to get 8 contigous blocks, then fall back to a single
 	 * block.
 	 */
 	if (bpref)
 		start = dtogd(fs, bpref) / NBBY;
 	else
 		start = 0;
 	end = howmany(fs->e2fs_fpg, NBBY) - start;
 retry:
 	runlen = 0;
 	runstart = 0;
 	for (loc = start; loc < end; loc++) {
 		if (bbp[loc] == (char)0xff) {
 			runlen = 0;
 			continue;
 		}
 
 		/* Start of a run, find the number of high clear bits. */
 		if (runlen == 0) {
 			bit = fls(bbp[loc]);
 			runlen = NBBY - bit;
 			runstart = loc * NBBY + bit;
 		} else if (bbp[loc] == 0) {
 			/* Continue a run. */
 			runlen += NBBY;
 		} else {
 			/*
 			 * Finish the current run.  If it isn't long
 			 * enough, start a new one.
 			 */
 			bit = ffs(bbp[loc]) - 1;
 			runlen += bit;
 			if (runlen >= 8) {
 				bno = runstart;
 				goto gotit;
 			}
 
 			/* Run was too short, start a new one. */
 			bit = fls(bbp[loc]);
 			runlen = NBBY - bit;
 			runstart = loc * NBBY + bit;
 		}
 
 		/* If the current run is long enough, use it. */
 		if (runlen >= 8) {
 			bno = runstart;
 			goto gotit;
 		}
 	}
 	if (start != 0) {
 		end = start;
 		start = 0;
 		goto retry;
 	}
 	bno = ext2_mapsearch(fs, bbp, bpref);
 	if (bno < 0)
 		goto fail;
 
 gotit:
 #ifdef INVARIANTS
 	if (isset(bbp, bno)) {
 		printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n",
 		    cg, (intmax_t)bno, fs->e2fs_fsmnt);
 		panic("ext2fs_alloccg: dup alloc");
 	}
 #endif
 	setbit(bbp, bno);
 	EXT2_LOCK(ump);
 	ext2_clusteracct(fs, bbp, cg, bno, -1);
 	fs->e2fs_fbcount--;
 	e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
 	    e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1);
 	fs->e2fs_fmod = 1;
 	EXT2_UNLOCK(ump);
 	ext2_gd_b_bitmap_csum_set(fs, cg, bp);
 	bdwrite(bp);
 	return (((uint64_t)cg) * fs->e2fs_fpg +
 	    le32toh(fs->e2fs->e2fs_first_dblock) + bno);
 
 fail:
 	brelse(bp);
 	EXT2_LOCK(ump);
 	return (0);
 }
 
 /*
  * Determine whether a cluster can be allocated.
  */
 static daddr_t
 ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
 {
 	struct m_ext2fs *fs;
 	struct ext2mount *ump;
 	struct buf *bp;
 	char *bbp;
 	int bit, error, got, i, loc, run;
 	int32_t *lp;
 	daddr_t bno;
 
 	fs = ip->i_e2fs;
 	ump = ip->i_ump;
 
 	if (fs->e2fs_maxcluster[cg] < len)
 		return (0);
 
 	EXT2_UNLOCK(ump);
 	error = bread(ip->i_devvp,
 	    fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
 	    (int)fs->e2fs_bsize, NOCRED, &bp);
 	if (error)
 		goto fail_lock;
 
 	bbp = (char *)bp->b_data;
 	EXT2_LOCK(ump);
 	/*
 	 * Check to see if a cluster of the needed size (or bigger) is
 	 * available in this cylinder group.
 	 */
 	lp = &fs->e2fs_clustersum[cg].cs_sum[len];
 	for (i = len; i <= fs->e2fs_contigsumsize; i++)
 		if (*lp++ > 0)
 			break;
 	if (i > fs->e2fs_contigsumsize) {
 		/*
 		 * Update the cluster summary information to reflect
 		 * the true maximum-sized cluster so that future cluster
 		 * allocation requests can avoid reading the bitmap only
 		 * to find no cluster.
 		 */
 		lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1];
 		for (i = len - 1; i > 0; i--)
 			if (*lp-- > 0)
 				break;
 		fs->e2fs_maxcluster[cg] = i;
 		goto fail;
 	}
 	EXT2_UNLOCK(ump);
 
 	/* Search the bitmap to find a big enough cluster like in FFS. */
 	if (dtog(fs, bpref) != cg)
 		bpref = 0;
 	if (bpref != 0)
 		bpref = dtogd(fs, bpref);
 	loc = bpref / NBBY;
 	bit = 1 << (bpref % NBBY);
 	for (run = 0, got = bpref; got < fs->e2fs_fpg; got++) {
 		if ((bbp[loc] & bit) != 0)
 			run = 0;
 		else {
 			run++;
 			if (run == len)
 				break;
 		}
 		if ((got & (NBBY - 1)) != (NBBY - 1))
 			bit <<= 1;
 		else {
 			loc++;
 			bit = 1;
 		}
 	}
 
 	if (got >= fs->e2fs_fpg)
 		goto fail_lock;
 
 	/* Allocate the cluster that we found. */
 	for (i = 1; i < len; i++)
 		if (!isclr(bbp, got - run + i))
 			panic("ext2_clusteralloc: map mismatch");
 
 	bno = got - run + 1;
 	if (bno >= fs->e2fs_fpg)
 		panic("ext2_clusteralloc: allocated out of group");
 
 	EXT2_LOCK(ump);
 	for (i = 0; i < len; i += fs->e2fs_fpb) {
 		setbit(bbp, bno + i);
 		ext2_clusteracct(fs, bbp, cg, bno + i, -1);
 		fs->e2fs_fbcount--;
 		e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
 		    e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1);
 	}
 	fs->e2fs_fmod = 1;
 	EXT2_UNLOCK(ump);
 
 	bdwrite(bp);
 	return (cg * fs->e2fs_fpg + le32toh(fs->e2fs->e2fs_first_dblock)
 	    + bno);
 
 fail_lock:
 	EXT2_LOCK(ump);
 fail:
 	brelse(bp);
 	return (0);
 }
 
 static int
 ext2_zero_inode_table(struct inode *ip, int cg)
 {
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	int i, all_blks, used_blks;
 
 	fs = ip->i_e2fs;
 
 	if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) & EXT2_BG_INODE_ZEROED)
 		return (0);
 
 	all_blks = le16toh(fs->e2fs->e2fs_inode_size) * fs->e2fs_ipg /
 	    fs->e2fs_bsize;
 
 	used_blks = howmany(fs->e2fs_ipg -
 	    e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]),
 	    fs->e2fs_bsize / EXT2_INODE_SIZE(fs));
 
 	for (i = 0; i < all_blks - used_blks; i++) {
 		bp = getblk(ip->i_devvp, fsbtodb(fs,
 		    e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i),
 		    fs->e2fs_bsize, 0, 0, 0);
 		if (!bp)
 			return (EIO);
 
 		vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize);
 		bawrite(bp);
 	}
 
 	fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
 	    fs->e2fs_gd[cg].ext4bgd_flags) | EXT2_BG_INODE_ZEROED);
 
 	return (0);
 }
 
 static void
 ext2_fix_bitmap_tail(unsigned char *bitmap, int first, int last)
 {
 	int i;
 
 	for (i = first; i <= last; i++)
 		bitmap[i] = 0xff;
 }
 
 /*
  * Determine whether an inode can be allocated.
  *
  * Check to see if an inode is available, and if it is,
  * allocate it using tode in the specified cylinder group.
  */
 static daddr_t
 ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode)
 {
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	struct ext2mount *ump;
 	int error, start, len, ifree, ibytes;
 	char *ibp, *loc;
 
 	ipref--;	/* to avoid a lot of (ipref -1) */
 	if (ipref == -1)
 		ipref = 0;
 	fs = ip->i_e2fs;
 	ump = ip->i_ump;
 	if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0)
 		return (0);
 	EXT2_UNLOCK(ump);
 	error = bread(ip->i_devvp, fsbtodb(fs,
 	    e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])),
 	    (int)fs->e2fs_bsize, NOCRED, &bp);
 	if (error) {
 		EXT2_LOCK(ump);
 		return (0);
 	}
 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
 	    EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
 		if (le16toh(fs->e2fs_gd[cg].ext4bgd_flags) &
 		    EXT2_BG_INODE_UNINIT) {
 			ibytes = fs->e2fs_ipg / 8;
 			memset(bp->b_data, 0, ibytes - 1);
 			ext2_fix_bitmap_tail(bp->b_data, ibytes,
 			    fs->e2fs_bsize - 1);
 			fs->e2fs_gd[cg].ext4bgd_flags = htole16(le16toh(
 			    fs->e2fs_gd[cg].ext4bgd_flags) &
 			    ~EXT2_BG_INODE_UNINIT);
 		}
 		ext2_gd_i_bitmap_csum_set(fs, cg, bp);
 		error = ext2_zero_inode_table(ip, cg);
 		if (error) {
 			brelse(bp);
 			EXT2_LOCK(ump);
 			return (0);
 		}
 	}
 	error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp);
 	if (error) {
 		brelse(bp);
 		EXT2_LOCK(ump);
 		return (0);
 	}
 	if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) {
 		/*
 		 * Another thread allocated the last i-node in this
 		 * group while we were waiting for the buffer.
 		 */
 		brelse(bp);
 		EXT2_LOCK(ump);
 		return (0);
 	}
 	ibp = (char *)bp->b_data;
 	if (ipref) {
 		ipref %= fs->e2fs_ipg;
 		if (isclr(ibp, ipref))
 			goto gotit;
 	}
 	start = ipref / NBBY;
 	len = howmany(fs->e2fs_ipg - ipref, NBBY);
 	loc = memcchr(&ibp[start], 0xff, len);
 	if (loc == NULL) {
 		len = start + 1;
 		start = 0;
 		loc = memcchr(&ibp[start], 0xff, len);
 		if (loc == NULL) {
 			SDT_PROBE3(ext2fs, , alloc,
 			    ext2_nodealloccg_bmap_corrupted, cg, ipref,
 			    fs->e2fs_fsmnt);
 			brelse(bp);
 			EXT2_LOCK(ump);
 			return (0);
 		}
 	}
 	ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1;
 gotit:
 	setbit(ibp, ipref);
 	EXT2_LOCK(ump);
 	e2fs_gd_set_nifree(&fs->e2fs_gd[cg],
 	    e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1);
 	if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) ||
 	    EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
 		ifree = fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]);
 		if (ipref + 1 > ifree)
 			e2fs_gd_set_i_unused(&fs->e2fs_gd[cg],
 			    fs->e2fs_ipg - (ipref + 1));
 	}
 	fs->e2fs_ficount--;
 	fs->e2fs_fmod = 1;
 	if ((mode & IFMT) == IFDIR) {
 		e2fs_gd_set_ndirs(&fs->e2fs_gd[cg],
 		    e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1);
 		fs->e2fs_total_dir++;
 	}
 	EXT2_UNLOCK(ump);
 	ext2_gd_i_bitmap_csum_set(fs, cg, bp);
 	bdwrite(bp);
 	return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1);
 }
 
 /*
  * Free a block or fragment.
  *
  */
 void
 ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size)
 {
 	struct m_ext2fs *fs;
 	struct buf *bp;
 	struct ext2mount *ump;
 	int cg, error;
 	char *bbp;
 
 	fs = ip->i_e2fs;
 	ump = ip->i_ump;
 	cg = dtog(fs, bno);
 	if (bno >= fs->e2fs_bcount) {
 		SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block,
 		    ip->i_number, bno);
 		return;
 	}
 	error = bread(ip->i_devvp,
 	    fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])),
 	    (int)fs->e2fs_bsize, NOCRED, &bp);
 	if (error) {
 		return;
 	}
 	bbp = (char *)bp->b_data;
 	bno = dtogd(fs, bno);
 	if (isclr(bbp, bno)) {
 		panic("ext2_blkfree: freeing free block %lld, fs=%s",
 		    (long long)bno, fs->e2fs_fsmnt);
 	}
 	clrbit(bbp, bno);
 	EXT2_LOCK(ump);
 	ext2_clusteracct(fs, bbp, cg, bno, 1);
 	fs->e2fs_fbcount++;
 	e2fs_gd_set_nbfree(&fs->e2fs_gd[cg],
 	    e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1);
 	fs->e2fs_fmod = 1;
 	EXT2_UNLOCK(ump);
 	ext2_gd_b_bitmap_csum_set(fs, cg, bp);
 	bdwrite(bp);
 }
 
 /*
  * Free an inode.
  *
  */
 int
 ext2_vfree(struct vnode *pvp, ino_t ino, int mode)
 {
 	struct m_ext2fs *fs;
 	struct inode *pip;
 	struct buf *bp;
 	struct ext2mount *ump;
 	int error, cg;
 	char *ibp;
 
 	pip = VTOI(pvp);
 	fs = pip->i_e2fs;
 	ump = pip->i_ump;
 	if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount)
 		panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s",
 		    pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt);
 
 	cg = ino_to_cg(fs, ino);
 	error = bread(pip->i_devvp,
 	    fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])),
 	    (int)fs->e2fs_bsize, NOCRED, &bp);
 	if (error) {
 		return (0);
 	}
 	ibp = (char *)bp->b_data;
 	ino = (ino - 1) % fs->e2fs_ipg;
 	if (isclr(ibp, ino)) {
 		SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree,
 		    fs->e2fs_fsmnt, ino);
 		if (fs->e2fs_ronly == 0)
 			panic("ext2_vfree: freeing free inode");
 	}
 	clrbit(ibp, ino);
 	EXT2_LOCK(ump);
 	fs->e2fs_ficount++;
 	e2fs_gd_set_nifree(&fs->e2fs_gd[cg],
 	    e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1);
 	if ((mode & IFMT) == IFDIR) {
 		e2fs_gd_set_ndirs(&fs->e2fs_gd[cg],
 		    e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1);
 		fs->e2fs_total_dir--;
 	}
 	fs->e2fs_fmod = 1;
 	EXT2_UNLOCK(ump);
 	ext2_gd_i_bitmap_csum_set(fs, cg, bp);
 	bdwrite(bp);
 	return (0);
 }
 
 /*
  * Find a block in the specified cylinder group.
  *
  * It is a panic if a request is made to find a block if none are
  * available.
  */
 static daddr_t
 ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref)
 {
 	char *loc;
 	int start, len;
 
 	/*
 	 * find the fragment by searching through the free block
 	 * map for an appropriate bit pattern
 	 */
 	if (bpref)
 		start = dtogd(fs, bpref) / NBBY;
 	else
 		start = 0;
 	len = howmany(fs->e2fs_fpg, NBBY) - start;
 	loc = memcchr(&bbp[start], 0xff, len);
 	if (loc == NULL) {
 		len = start + 1;
 		start = 0;
 		loc = memcchr(&bbp[start], 0xff, len);
 		if (loc == NULL) {
 			panic("ext2_mapsearch: map corrupted: start=%d, len=%d,"
 			    "fs=%s", start, len, fs->e2fs_fsmnt);
 			/* NOTREACHED */
 		}
 	}
 	return ((loc - bbp) * NBBY + ffs(~*loc) - 1);
 }
 
 int
 ext2_cg_has_sb(struct m_ext2fs *fs, int cg)
 {
 	int a3, a5, a7;
 
 	if (cg == 0)
 		return (1);
 
 	if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) {
 		if (cg == le32toh(fs->e2fs->e4fs_backup_bgs[0]) ||
 		    cg == le32toh(fs->e2fs->e4fs_backup_bgs[1]))
 			return (1);
 		return (0);
 	}
 
 	if ((cg <= 1) ||
 	    !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER))
 		return (1);
 
 	if (!(cg & 1))
 		return (0);
 
 	for (a3 = 3, a5 = 5, a7 = 7;
 	    a3 <= cg || a5 <= cg || a7 <= cg;
 	    a3 *= 3, a5 *= 5, a7 *= 7)
 		if (cg == a3 || cg == a5 || cg == a7)
 			return (1);
 	return (0);
 }