diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index cea935b12205..91660839ff13 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,1728 +1,1933 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_alloc.c	8.18 (Berkeley) 5/26/95
  * $FreeBSD$
  */
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
+#include <sys/file.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 #include <ufs/ufs/ufsmount.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 typedef ufs_daddr_t allocfcn_t __P((struct inode *ip, int cg, ufs_daddr_t bpref,
 				  int size));
 
 static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
 static ufs_daddr_t
 	      ffs_alloccgblk __P((struct inode *, struct buf *, ufs_daddr_t));
 #ifdef DIAGNOSTIC
 static int	ffs_checkblk __P((struct inode *, ufs_daddr_t, long));
 #endif
 static void	ffs_clusteracct	__P((struct fs *, struct cg *, ufs_daddr_t,
 				     int));
 static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
 	    int));
 static ino_t	ffs_dirpref __P((struct fs *));
 static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
 static void	ffs_fserr __P((struct fs *, u_int, char *));
 static u_long	ffs_hashalloc
 		    __P((struct inode *, int, long, int, allocfcn_t *));
 static ino_t	ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
 static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t,
 	    int));
 
 /*
  * Allocate a block in the file system.
  *
  * The size of the requested block is given, which must be some
  * multiple of fs_fsize and <= fs_bsize.
  * A preference may be optionally specified. If a preference is given
  * the following hierarchy is used to allocate a block:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate a block in the same cylinder group.
  *   4) quadradically rehash into other cylinder groups, until an
  *      available block is located.
  * If no block preference is given the following heirarchy is used
  * to allocate a block:
  *   1) allocate a block in the cylinder group that contains the
  *      inode for the file.
  *   2) quadradically rehash into other cylinder groups, until an
  *      available block is located.
  */
 int
 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
 	register struct inode *ip;
 	ufs_daddr_t lbn, bpref;
 	int size;
 	struct ucred *cred;
 	ufs_daddr_t *bnp;
 {
 	register struct fs *fs;
 	ufs_daddr_t bno;
 	int cg;
 #ifdef QUOTA
 	int error;
 #endif
 
 	*bnp = 0;
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
 		printf("dev = %s, bsize = %ld, size = %d, fs = %s\n",
 		    devtoname(ip->i_dev), (long)fs->fs_bsize, size,
 		    fs->fs_fsmnt);
 		panic("ffs_alloc: bad size");
 	}
 	if (cred == NOCRED)
 		panic("ffs_alloc: missing credential");
 #endif /* DIAGNOSTIC */
 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
 		goto nospace;
 	if (cred->cr_uid != 0 &&
 	    freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0)
 		goto nospace;
 #ifdef QUOTA
 	error = chkdq(ip, (long)btodb(size), cred, 0);
 	if (error)
 		return (error);
 #endif
 	if (bpref >= fs->fs_size)
 		bpref = 0;
 	if (bpref == 0)
 		cg = ino_to_cg(fs, ip->i_number);
 	else
 		cg = dtog(fs, bpref);
 	bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
 					 ffs_alloccg);
 	if (bno > 0) {
 		ip->i_blocks += btodb(size);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		*bnp = bno;
 		return (0);
 	}
 #ifdef QUOTA
 	/*
 	 * Restore user's disk quota because allocation failed.
 	 */
 	(void) chkdq(ip, (long)-btodb(size), cred, FORCE);
 #endif
 nospace:
 	ffs_fserr(fs, cred->cr_uid, "file system full");
 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
 	return (ENOSPC);
 }
 
 /*
  * Reallocate a fragment to a bigger size
  *
  * The number and size of the old block is given, and a preference
  * and new size is also specified. The allocator attempts to extend
  * the original block. Failing that, the regular block allocator is
  * invoked to get an appropriate block.
  */
 int
 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
 	register struct inode *ip;
 	ufs_daddr_t lbprev;
 	ufs_daddr_t bpref;
 	int osize, nsize;
 	struct ucred *cred;
 	struct buf **bpp;
 {
 	register struct fs *fs;
 	struct buf *bp;
 	int cg, request, error;
 	ufs_daddr_t bprev, bno;
 
 	*bpp = 0;
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
 	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
 		panic("ffs_realloccg: allocation on suspended filesystem");
 	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
 	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
 		printf(
 		"dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n",
 		    devtoname(ip->i_dev), (long)fs->fs_bsize, osize,
 		    nsize, fs->fs_fsmnt);
 		panic("ffs_realloccg: bad size");
 	}
 	if (cred == NOCRED)
 		panic("ffs_realloccg: missing credential");
 #endif /* DIAGNOSTIC */
 	if (cred->cr_uid != 0 &&
 	    freespace(fs, fs->fs_minfree) -  numfrags(fs, nsize - osize) < 0)
 		goto nospace;
 	if ((bprev = ip->i_db[lbprev]) == 0) {
 		printf("dev = %s, bsize = %ld, bprev = %ld, fs = %s\n",
 		    devtoname(ip->i_dev), (long)fs->fs_bsize, (long)bprev,
 		    fs->fs_fsmnt);
 		panic("ffs_realloccg: bad bprev");
 	}
 	/*
 	 * Allocate the extra space in the buffer.
 	 */
 	error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 
 	if( bp->b_blkno == bp->b_lblkno) {
 		if( lbprev >= NDADDR)
 			panic("ffs_realloccg: lbprev out of range");
 		bp->b_blkno = fsbtodb(fs, bprev);
 	}
 
 #ifdef QUOTA
 	error = chkdq(ip, (long)btodb(nsize - osize), cred, 0);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 #endif
 	/*
 	 * Check for extension in the existing location.
 	 */
 	cg = dtog(fs, bprev);
 	bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize);
 	if (bno) {
 		if (bp->b_blkno != fsbtodb(fs, bno))
 			panic("ffs_realloccg: bad blockno");
 		ip->i_blocks += btodb(nsize - osize);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
 		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
 		*bpp = bp;
 		return (0);
 	}
 	/*
 	 * Allocate a new disk location.
 	 */
 	if (bpref >= fs->fs_size)
 		bpref = 0;
 	switch ((int)fs->fs_optim) {
 	case FS_OPTSPACE:
 		/*
 		 * Allocate an exact sized fragment. Although this makes
 		 * best use of space, we will waste time relocating it if
 		 * the file continues to grow. If the fragmentation is
 		 * less than half of the minimum free reserve, we choose
 		 * to begin optimizing for time.
 		 */
 		request = nsize;
 		if (fs->fs_minfree <= 5 ||
 		    fs->fs_cstotal.cs_nffree >
 		    (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100))
 			break;
 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
 			fs->fs_fsmnt);
 		fs->fs_optim = FS_OPTTIME;
 		break;
 	case FS_OPTTIME:
 		/*
 		 * At this point we have discovered a file that is trying to
 		 * grow a small fragment to a larger fragment. To save time,
 		 * we allocate a full sized block, then free the unused portion.
 		 * If the file continues to grow, the `ffs_fragextend' call
 		 * above will be able to grow it in place without further
 		 * copying. If aberrant programs cause disk fragmentation to
 		 * grow within 2% of the free reserve, we choose to begin
 		 * optimizing for space.
 		 */
 		request = fs->fs_bsize;
 		if (fs->fs_cstotal.cs_nffree <
 		    (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100)
 			break;
 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
 			fs->fs_fsmnt);
 		fs->fs_optim = FS_OPTSPACE;
 		break;
 	default:
 		printf("dev = %s, optim = %ld, fs = %s\n",
 		    devtoname(ip->i_dev), (long)fs->fs_optim, fs->fs_fsmnt);
 		panic("ffs_realloccg: bad optim");
 		/* NOTREACHED */
 	}
 	bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
 					 ffs_alloccg);
 	if (bno > 0) {
 		bp->b_blkno = fsbtodb(fs, bno);
 		if (!DOINGSOFTDEP(ITOV(ip)))
 			ffs_blkfree(ip, bprev, (long)osize);
 		if (nsize < request)
 			ffs_blkfree(ip, bno + numfrags(fs, nsize),
 			    (long)(request - nsize));
 		ip->i_blocks += btodb(nsize - osize);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
 		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
 		*bpp = bp;
 		return (0);
 	}
 #ifdef QUOTA
 	/*
 	 * Restore user's disk quota because allocation failed.
 	 */
 	(void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
 #endif
 	brelse(bp);
 nospace:
 	/*
 	 * no space available
 	 */
 	ffs_fserr(fs, cred->cr_uid, "file system full");
 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
 	return (ENOSPC);
 }
 
-SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
-
 /*
  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
  *
  * The vnode and an array of buffer pointers for a range of sequential
  * logical blocks to be made contiguous is given. The allocator attempts
  * to find a range of sequential blocks starting as close as possible to
  * an fs_rotdelay offset from the end of the allocation for the logical
  * block immediately preceding the current range. If successful, the
  * physical block numbers in the buffer pointers and in the inode are
  * changed to reflect the new allocation. If unsuccessful, the allocation
  * is left unchanged. The success in doing the reallocation is returned.
  * Note that the error return is not reflected back to the user. Rather
  * the previous block allocation will be used.
  */
+
+SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
+
 static int doasyncfree = 1;
-SYSCTL_INT(_vfs_ffs, FFS_ASYNCFREE, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
 
 static int doreallocblks = 1;
-SYSCTL_INT(_vfs_ffs, FFS_REALLOCBLKS, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
+SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
 
 #ifdef DEBUG
 static volatile int prtrealloc = 0;
 #endif
 
 int
 ffs_reallocblks(ap)
 	struct vop_reallocblks_args /* {
 		struct vnode *a_vp;
 		struct cluster_save *a_buflist;
 	} */ *ap;
 {
 	struct fs *fs;
 	struct inode *ip;
 	struct vnode *vp;
 	struct buf *sbp, *ebp;
 	ufs_daddr_t *bap, *sbap, *ebap = 0;
 	struct cluster_save *buflist;
 	ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno;
 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
 	int i, len, start_lvl, end_lvl, pref, ssize;
 
 	if (doreallocblks == 0)
 		return (ENOSPC);
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	if (fs->fs_contigsumsize <= 0)
 		return (ENOSPC);
 	buflist = ap->a_buflist;
 	len = buflist->bs_nchildren;
 	start_lbn = buflist->bs_children[0]->b_lblkno;
 	end_lbn = start_lbn + len - 1;
 #ifdef DIAGNOSTIC
 	for (i = 0; i < len; i++)
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 1");
 	for (i = 1; i < len; i++)
 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
 			panic("ffs_reallocblks: non-logical cluster");
 	blkno = buflist->bs_children[0]->b_blkno;
 	ssize = fsbtodb(fs, fs->fs_frag);
 	for (i = 1; i < len - 1; i++)
 		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
 			panic("ffs_reallocblks: non-physical cluster %d", i);
 #endif
 	/*
 	 * If the latest allocation is in a new cylinder group, assume that
 	 * the filesystem has decided to move and do not force it back to
 	 * the previous cylinder group.
 	 */
 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
 		return (ENOSPC);
 	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
 	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
 		return (ENOSPC);
 	/*
 	 * Get the starting offset and block map for the first block.
 	 */
 	if (start_lvl == 0) {
 		sbap = &ip->i_db[0];
 		soff = start_lbn;
 	} else {
 		idp = &start_ap[start_lvl - 1];
 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
 			brelse(sbp);
 			return (ENOSPC);
 		}
 		sbap = (ufs_daddr_t *)sbp->b_data;
 		soff = idp->in_off;
 	}
 	/*
 	 * Find the preferred location for the cluster.
 	 */
 	pref = ffs_blkpref(ip, start_lbn, soff, sbap);
 	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 		ssize = len;
 	} else {
 #ifdef DIAGNOSTIC
 		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
 			panic("ffs_reallocblk: start == end");
 #endif
 		ssize = len - (idp->in_off + 1);
 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
 			goto fail;
 		ebap = (ufs_daddr_t *)ebp->b_data;
 	}
 	/*
 	 * Search the block map looking for an allocation of the desired size.
 	 */
 	if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
 	    len, ffs_clusteralloc)) == 0)
 		goto fail;
 	/*
 	 * We have found a new contiguous block.
 	 *
 	 * First we have to replace the old block pointers with the new
 	 * block pointers in the inode and indirect blocks associated
 	 * with the file.
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
 		printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
 		    start_lbn, end_lbn);
 #endif
 	blkno = newblk;
 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
 		if (i == ssize) {
 			bap = ebap;
 			soff = -i;
 		}
 #ifdef DIAGNOSTIC
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 2");
 		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
 			panic("ffs_reallocblks: alloc mismatch");
 #endif
 #ifdef DEBUG
 		if (prtrealloc)
 			printf(" %d,", *bap);
 #endif
 		if (DOINGSOFTDEP(vp)) {
 			if (sbap == &ip->i_db[0] && i < ssize)
 				softdep_setup_allocdirect(ip, start_lbn + i,
 				    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
 				    buflist->bs_children[i]);
 			else
 				softdep_setup_allocindir_page(ip, start_lbn + i,
 				    i < ssize ? sbp : ebp, soff + i, blkno,
 				    *bap, buflist->bs_children[i]);
 		}
 		*bap++ = blkno;
 	}
 	/*
 	 * Next we must write out the modified inode and indirect blocks.
 	 * For strict correctness, the writes should be synchronous since
 	 * the old block values may have been written to disk. In practise
 	 * they are almost never written, but if we are concerned about
 	 * strict correctness, the `doasyncfree' flag should be set to zero.
 	 *
 	 * The test on `doasyncfree' should be changed to test a flag
 	 * that shows whether the associated buffers and inodes have
 	 * been written. The flag should be set when the cluster is
 	 * started and cleared whenever the buffer or inode is flushed.
 	 * We can then check below to see if it is set, and do the
 	 * synchronous write only when it has been cleared.
 	 */
 	if (sbap != &ip->i_db[0]) {
 		if (doasyncfree)
 			bdwrite(sbp);
 		else
 			bwrite(sbp);
 	} else {
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		if (!doasyncfree)
 			UFS_UPDATE(vp, 1);
 	}
 	if (ssize < len) {
 		if (doasyncfree)
 			bdwrite(ebp);
 		else
 			bwrite(ebp);
 	}
 	/*
 	 * Last, free the old blocks and assign the new blocks to the buffers.
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
 		printf("\n\tnew:");
 #endif
 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
 		if (!DOINGSOFTDEP(vp))
 			ffs_blkfree(ip,
 			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
 			    fs->fs_bsize);
 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 #ifdef DIAGNOSTIC
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 3");
 #endif
 #ifdef DEBUG
 		if (prtrealloc)
 			printf(" %d,", blkno);
 #endif
 	}
 #ifdef DEBUG
 	if (prtrealloc) {
 		prtrealloc--;
 		printf("\n");
 	}
 #endif
 	return (0);
 
 fail:
 	if (ssize < len)
 		brelse(ebp);
 	if (sbap != &ip->i_db[0])
 		brelse(sbp);
 	return (ENOSPC);
 }
 
 /*
  * Allocate an inode in the file system.
  *
  * If allocating a directory, use ffs_dirpref to select the inode.
  * If allocating in a directory, the following hierarchy is followed:
  *   1) allocate the preferred inode.
  *   2) allocate an inode in the same cylinder group.
  *   3) quadradically rehash into other cylinder groups, until an
  *      available inode is located.
  * If no inode preference is given the following heirarchy is used
  * to allocate an inode:
  *   1) allocate an inode in cylinder group 0.
  *   2) quadradically rehash into other cylinder groups, until an
  *      available inode is located.
  */
 int
 ffs_valloc(pvp, mode, cred, vpp)
 	struct vnode *pvp;
 	int mode;
 	struct ucred *cred;
 	struct vnode **vpp;
 {
 	register struct inode *pip;
 	register struct fs *fs;
 	register struct inode *ip;
 	ino_t ino, ipref;
 	int cg, error;
 
 	*vpp = NULL;
 	pip = VTOI(pvp);
 	fs = pip->i_fs;
 	if (fs->fs_cstotal.cs_nifree == 0)
 		goto noinodes;
 
 	if ((mode & IFMT) == IFDIR)
 		ipref = ffs_dirpref(fs);
 	else
 		ipref = pip->i_number;
 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
 		ipref = 0;
 	cg = ino_to_cg(fs, ipref);
 	ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode,
 					(allocfcn_t *)ffs_nodealloccg);
 	if (ino == 0)
 		goto noinodes;
 	error = VFS_VGET(pvp->v_mount, ino, vpp);
 	if (error) {
 		UFS_VFREE(pvp, ino, mode);
 		return (error);
 	}
 	ip = VTOI(*vpp);
 	if (ip->i_mode) {
 		printf("mode = 0%o, inum = %lu, fs = %s\n",
 		    ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt);
 		panic("ffs_valloc: dup alloc");
 	}
-	if (ip->i_blocks) {				/* XXX */
+	if (ip->i_blocks && (fs->fs_flags & FS_UNCLEAN) == 0) {	    /* XXX */
 		printf("free inode %s/%lu had %ld blocks\n",
 		    fs->fs_fsmnt, (u_long)ino, (long)ip->i_blocks);
 		ip->i_blocks = 0;
 	}
 	ip->i_flags = 0;
 	/*
 	 * Set up a new generation number for this inode.
 	 */
 	if (ip->i_gen == 0 || ++ip->i_gen == 0)
 		ip->i_gen = random() / 2 + 1;
 	return (0);
 noinodes:
 	ffs_fserr(fs, cred->cr_uid, "out of inodes");
 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
 	return (ENOSPC);
 }
 
 /*
  * Find a cylinder to place a directory.
  *
  * The policy implemented by this algorithm is to select from
  * among those cylinder groups with above the average number of
  * free inodes, the one with the smallest number of directories.
  */
 static ino_t
 ffs_dirpref(fs)
 	register struct fs *fs;
 {
 	int cg, minndir, mincg, avgifree;
 
 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
 	minndir = fs->fs_ipg;
 	mincg = 0;
 	for (cg = 0; cg < fs->fs_ncg; cg++)
 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
 			mincg = cg;
 			minndir = fs->fs_cs(fs, cg).cs_ndir;
 		}
 	return ((ino_t)(fs->fs_ipg * mincg));
 }
 
 /*
  * Select the desired position for the next block in a file.  The file is
  * logically divided into sections. The first section is composed of the
  * direct blocks. Each additional section contains fs_maxbpg blocks.
  *
  * If no blocks have been allocated in the first section, the policy is to
  * request a block in the same cylinder group as the inode that describes
  * the file. If no blocks have been allocated in any other section, the
  * policy is to place the section in a cylinder group with a greater than
  * average number of free blocks.  An appropriate cylinder group is found
  * by using a rotor that sweeps the cylinder groups. When a new group of
  * blocks is needed, the sweep begins in the cylinder group following the
  * cylinder group from which the previous allocation was made. The sweep
  * continues until a cylinder group with greater than the average number
  * of free blocks is found. If the allocation is for the first block in an
  * indirect block, the information on the previous allocation is unavailable;
  * here a best guess is made based upon the logical block number being
  * allocated.
  *
  * If a section is already partially allocated, the policy is to
  * contiguously allocate fs_maxcontig blocks.  The end of one of these
  * contiguous blocks and the beginning of the next is physically separated
  * so that the disk head will be in transit between them for at least
  * fs_rotdelay milliseconds.  This is to allow time for the processor to
  * schedule another I/O transfer.
  */
 ufs_daddr_t
 ffs_blkpref(ip, lbn, indx, bap)
 	struct inode *ip;
 	ufs_daddr_t lbn;
 	int indx;
 	ufs_daddr_t *bap;
 {
 	register struct fs *fs;
 	register int cg;
 	int avgbfree, startcg;
 	ufs_daddr_t nextblk;
 
 	fs = ip->i_fs;
 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
 		if (lbn < NDADDR + NINDIR(fs)) {
 			cg = ino_to_cg(fs, ip->i_number);
 			return (fs->fs_fpg * cg + fs->fs_frag);
 		}
 		/*
 		 * Find a cylinder with greater than average number of
 		 * unused data blocks.
 		 */
 		if (indx == 0 || bap[indx - 1] == 0)
 			startcg =
 			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
 		else
 			startcg = dtog(fs, bap[indx - 1]) + 1;
 		startcg %= fs->fs_ncg;
 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 		for (cg = startcg; cg < fs->fs_ncg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				fs->fs_cgrotor = cg;
 				return (fs->fs_fpg * cg + fs->fs_frag);
 			}
 		for (cg = 0; cg <= startcg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				fs->fs_cgrotor = cg;
 				return (fs->fs_fpg * cg + fs->fs_frag);
 			}
 		return (0);
 	}
 	/*
 	 * One or more previous blocks have been laid out. If less
 	 * than fs_maxcontig previous blocks are contiguous, the
 	 * next block is requested contiguously, otherwise it is
 	 * requested rotationally delayed by fs_rotdelay milliseconds.
 	 */
 	nextblk = bap[indx - 1] + fs->fs_frag;
 	if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig ||
 	    bap[indx - fs->fs_maxcontig] +
 	    blkstofrags(fs, fs->fs_maxcontig) != nextblk)
 		return (nextblk);
 	/*
 	 * Here we convert ms of delay to frags as:
 	 * (frags) = (ms) * (rev/sec) * (sect/rev) /
 	 *	((sect/frag) * (ms/sec))
 	 * then round up to the next block.
 	 */
 	nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
 	    (NSPF(fs) * 1000), fs->fs_frag);
 	return (nextblk);
 }
 
 /*
  * Implement the cylinder overflow algorithm.
  *
  * The policy implemented by this algorithm is:
  *   1) allocate the block in its requested cylinder group.
  *   2) quadradically rehash on the cylinder group number.
  *   3) brute force search for a free block.
  */
 /*VARARGS5*/
 static u_long
 ffs_hashalloc(ip, cg, pref, size, allocator)
 	struct inode *ip;
 	int cg;
 	long pref;
 	int size;	/* size for data blocks, mode for inodes */
 	allocfcn_t *allocator;
 {
 	register struct fs *fs;
 	long result;	/* XXX why not same type as we return? */
 	int i, icg = cg;
 
 #ifdef DIAGNOSTIC
 	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
 		panic("ffs_hashalloc: allocation on suspended filesystem");
 #endif
 	fs = ip->i_fs;
 	/*
 	 * 1: preferred cylinder group
 	 */
 	result = (*allocator)(ip, cg, pref, size);
 	if (result)
 		return (result);
 	/*
 	 * 2: quadratic rehash
 	 */
 	for (i = 1; i < fs->fs_ncg; i *= 2) {
 		cg += i;
 		if (cg >= fs->fs_ncg)
 			cg -= fs->fs_ncg;
 		result = (*allocator)(ip, cg, 0, size);
 		if (result)
 			return (result);
 	}
 	/*
 	 * 3: brute force search
 	 * Note that we start at i == 2, since 0 was checked initially,
 	 * and 1 is always checked in the quadratic rehash.
 	 */
 	cg = (icg + 2) % fs->fs_ncg;
 	for (i = 2; i < fs->fs_ncg; i++) {
 		result = (*allocator)(ip, cg, 0, size);
 		if (result)
 			return (result);
 		cg++;
 		if (cg == fs->fs_ncg)
 			cg = 0;
 	}
 	return (0);
 }
 
 /*
  * Determine whether a fragment can be extended.
  *
  * Check to see if the necessary fragments are available, and
  * if they are, allocate them.
  */
 static ufs_daddr_t
 ffs_fragextend(ip, cg, bprev, osize, nsize)
 	struct inode *ip;
 	int cg;
 	long bprev;
 	int osize, nsize;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	long bno;
 	int frags, bbase;
 	int i, error;
 	u_int8_t *blksfree;
 
 	fs = ip->i_fs;
 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
 		return (0);
 	frags = numfrags(fs, nsize);
 	bbase = fragnum(fs, bprev);
 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
 		/* cannot extend across a block boundary */
 		return (0);
 	}
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (0);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp)) {
 		brelse(bp);
 		return (0);
 	}
 	bp->b_xflags |= BX_BKGRDWRITE;
 	cgp->cg_time = time_second;
 	bno = dtogd(fs, bprev);
 	blksfree = cg_blksfree(cgp);
 	for (i = numfrags(fs, osize); i < frags; i++)
 		if (isclr(blksfree, bno + i)) {
 			brelse(bp);
 			return (0);
 		}
 	/*
 	 * the current fragment can be extended
 	 * deduct the count on fragment being extended into
 	 * increase the count on the remaining fragment (if any)
 	 * allocate the extended piece
 	 */
 	for (i = frags; i < fs->fs_frag - bbase; i++)
 		if (isclr(blksfree, bno + i))
 			break;
 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
 	if (i != frags)
 		cgp->cg_frsum[i - frags]++;
 	for (i = numfrags(fs, osize); i < frags; i++) {
 		clrbit(blksfree, bno + i);
 		cgp->cg_cs.cs_nffree--;
 		fs->fs_cstotal.cs_nffree--;
 		fs->fs_cs(fs, cg).cs_nffree--;
 	}
 	fs->fs_fmod = 1;
 	if (DOINGSOFTDEP(ITOV(ip)))
 		softdep_setup_blkmapdep(bp, fs, bprev);
 	bdwrite(bp);
 	return (bprev);
 }
 
 /*
  * Determine whether a block can be allocated.
  *
  * Check to see if a block of the appropriate size is available,
  * and if it is, allocate it.
  */
 static ufs_daddr_t
 ffs_alloccg(ip, cg, bpref, size)
 	struct inode *ip;
 	int cg;
 	ufs_daddr_t bpref;
 	int size;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	register int i;
 	ufs_daddr_t bno, blkno;
 	int allocsiz, error, frags;
 	u_int8_t *blksfree;
 
 	fs = ip->i_fs;
 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
 		return (0);
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (0);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp) ||
 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
 		brelse(bp);
 		return (0);
 	}
 	bp->b_xflags |= BX_BKGRDWRITE;
 	cgp->cg_time = time_second;
 	if (size == fs->fs_bsize) {
 		bno = ffs_alloccgblk(ip, bp, bpref);
 		bdwrite(bp);
 		return (bno);
 	}
 	/*
 	 * check to see if any fragments are already available
 	 * allocsiz is the size which will be allocated, hacking
 	 * it down to a smaller size if necessary
 	 */
 	blksfree = cg_blksfree(cgp);
 	frags = numfrags(fs, size);
 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
 		if (cgp->cg_frsum[allocsiz] != 0)
 			break;
 	if (allocsiz == fs->fs_frag) {
 		/*
 		 * no fragments were available, so a block will be
 		 * allocated, and hacked up
 		 */
 		if (cgp->cg_cs.cs_nbfree == 0) {
 			brelse(bp);
 			return (0);
 		}
 		bno = ffs_alloccgblk(ip, bp, bpref);
 		bpref = dtogd(fs, bno);
 		for (i = frags; i < fs->fs_frag; i++)
 			setbit(blksfree, bpref + i);
 		i = fs->fs_frag - frags;
 		cgp->cg_cs.cs_nffree += i;
 		fs->fs_cstotal.cs_nffree += i;
 		fs->fs_cs(fs, cg).cs_nffree += i;
 		fs->fs_fmod = 1;
 		cgp->cg_frsum[i]++;
 		bdwrite(bp);
 		return (bno);
 	}
 	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
 	if (bno < 0) {
 		brelse(bp);
 		return (0);
 	}
 	for (i = 0; i < frags; i++)
 		clrbit(blksfree, bno + i);
 	cgp->cg_cs.cs_nffree -= frags;
 	fs->fs_cstotal.cs_nffree -= frags;
 	fs->fs_cs(fs, cg).cs_nffree -= frags;
 	fs->fs_fmod = 1;
 	cgp->cg_frsum[allocsiz]--;
 	if (frags != allocsiz)
 		cgp->cg_frsum[allocsiz - frags]++;
 	blkno = cg * fs->fs_fpg + bno;
 	if (DOINGSOFTDEP(ITOV(ip)))
 		softdep_setup_blkmapdep(bp, fs, blkno);
 	bdwrite(bp);
 	return ((u_long)blkno);
 }
 
 /*
  * Allocate a block in a cylinder group.
  *
  * This algorithm implements the following policy:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate the next available block on the block rotor for the
  *      specified cylinder group.
  * Note that this routine only allocates fs_bsize blocks; these
  * blocks may be fragmented by the routine that allocates them.
  */
 static ufs_daddr_t
 ffs_alloccgblk(ip, bp, bpref)
 	struct inode *ip;
 	struct buf *bp;
 	ufs_daddr_t bpref;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	ufs_daddr_t bno, blkno;
 	int cylno, pos, delta;
 	short *cylbp;
 	register int i;
 	u_int8_t *blksfree;
 
 	fs = ip->i_fs;
 	cgp = (struct cg *)bp->b_data;
 	blksfree = cg_blksfree(cgp);
 	if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
 		bpref = cgp->cg_rotor;
 		goto norot;
 	}
 	bpref = blknum(fs, bpref);
 	bpref = dtogd(fs, bpref);
 	/*
 	 * if the requested block is available, use it
 	 */
 	if (ffs_isblock(fs, blksfree, fragstoblks(fs, bpref))) {
 		bno = bpref;
 		goto gotit;
 	}
 	if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
 		/*
 		 * Block layout information is not available.
 		 * Leaving bpref unchanged means we take the
 		 * next available free block following the one
 		 * we just allocated. Hopefully this will at
 		 * least hit a track cache on drives of unknown
 		 * geometry (e.g. SCSI).
 		 */
 		goto norot;
 	}
 	/*
 	 * check for a block available on the same cylinder
 	 */
 	cylno = cbtocylno(fs, bpref);
 	if (cg_blktot(cgp)[cylno] == 0)
 		goto norot;
 	/*
 	 * check the summary information to see if a block is
 	 * available in the requested cylinder starting at the
 	 * requested rotational position and proceeding around.
 	 */
 	cylbp = cg_blks(fs, cgp, cylno);
 	pos = cbtorpos(fs, bpref);
 	for (i = pos; i < fs->fs_nrpos; i++)
 		if (cylbp[i] > 0)
 			break;
 	if (i == fs->fs_nrpos)
 		for (i = 0; i < pos; i++)
 			if (cylbp[i] > 0)
 				break;
 	if (cylbp[i] > 0) {
 		/*
 		 * found a rotational position, now find the actual
 		 * block. A panic if none is actually there.
 		 */
 		pos = cylno % fs->fs_cpc;
 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
 		if (fs_postbl(fs, pos)[i] == -1) {
 			printf("pos = %d, i = %d, fs = %s\n",
 			    pos, i, fs->fs_fsmnt);
 			panic("ffs_alloccgblk: cyl groups corrupted");
 		}
 		for (i = fs_postbl(fs, pos)[i];; ) {
 			if (ffs_isblock(fs, blksfree, bno + i)) {
 				bno = blkstofrags(fs, (bno + i));
 				goto gotit;
 			}
 			delta = fs_rotbl(fs)[i];
 			if (delta <= 0 ||
 			    delta + i > fragstoblks(fs, fs->fs_fpg))
 				break;
 			i += delta;
 		}
 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
 		panic("ffs_alloccgblk: can't find blk in cyl");
 	}
 norot:
 	/*
 	 * no blocks in the requested cylinder, so take next
 	 * available one in this cylinder group.
 	 */
 	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
 	if (bno < 0)
 		return (0);
 	cgp->cg_rotor = bno;
 gotit:
 	blkno = fragstoblks(fs, bno);
 	ffs_clrblock(fs, blksfree, (long)blkno);
 	ffs_clusteracct(fs, cgp, blkno, -1);
 	cgp->cg_cs.cs_nbfree--;
 	fs->fs_cstotal.cs_nbfree--;
 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
 	cylno = cbtocylno(fs, bno);
 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
 	cg_blktot(cgp)[cylno]--;
 	fs->fs_fmod = 1;
 	blkno = cgp->cg_cgx * fs->fs_fpg + bno;
 	if (DOINGSOFTDEP(ITOV(ip)))
 		softdep_setup_blkmapdep(bp, fs, blkno);
 	return (blkno);
 }
 
 /*
  * Determine whether a cluster can be allocated.
  *
  * We do not currently check for optimal rotational layout if there
  * are multiple choices in the same cylinder group. Instead we just
  * take the first one that we find following bpref.
  */
 static ufs_daddr_t
 ffs_clusteralloc(ip, cg, bpref, len)
 	struct inode *ip;
 	int cg;
 	ufs_daddr_t bpref;
 	int len;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	int i, got, run, bno, bit, map;
 	u_char *mapp;
 	int32_t *lp;
 	u_int8_t *blksfree;
 
 	fs = ip->i_fs;
 	if (fs->fs_maxcluster[cg] < len)
 		return (0);
 	if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
 	    NOCRED, &bp))
 		goto fail;
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp))
 		goto fail;
 	bp->b_xflags |= BX_BKGRDWRITE;
 	/*
 	 * Check to see if a cluster of the needed size (or bigger) is
 	 * available in this cylinder group.
 	 */
 	lp = &cg_clustersum(cgp)[len];
 	for (i = len; i <= fs->fs_contigsumsize; i++)
 		if (*lp++ > 0)
 			break;
 	if (i > fs->fs_contigsumsize) {
 		/*
 		 * This is the first time looking for a cluster in this
 		 * cylinder group. Update the cluster summary information
 		 * to reflect the true maximum sized cluster so that
 		 * future cluster allocation requests can avoid reading
 		 * the cylinder group map only to find no clusters.
 		 */
 		lp = &cg_clustersum(cgp)[len - 1];
 		for (i = len - 1; i > 0; i--)
 			if (*lp-- > 0)
 				break;
 		fs->fs_maxcluster[cg] = i;
 		goto fail;
 	}
 	/*
 	 * Search the cluster map to find a big enough cluster.
 	 * We take the first one that we find, even if it is larger
 	 * than we need as we prefer to get one close to the previous
 	 * block allocation. We do not search before the current
 	 * preference point as we do not want to allocate a block
 	 * that is allocated before the previous one (as we will
 	 * then have to wait for another pass of the elevator
 	 * algorithm before it will be read). We prefer to fail and
 	 * be recalled to try an allocation in the next cylinder group.
 	 */
 	if (dtog(fs, bpref) != cg)
 		bpref = 0;
 	else
 		bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
 	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
 	map = *mapp++;
 	bit = 1 << (bpref % NBBY);
 	for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
 		if ((map & bit) == 0) {
 			run = 0;
 		} else {
 			run++;
 			if (run == len)
 				break;
 		}
 		if ((got & (NBBY - 1)) != (NBBY - 1)) {
 			bit <<= 1;
 		} else {
 			map = *mapp++;
 			bit = 1;
 		}
 	}
 	if (got >= cgp->cg_nclusterblks)
 		goto fail;
 	/*
 	 * Allocate the cluster that we have found.
 	 */
 	blksfree = cg_blksfree(cgp);
 	for (i = 1; i <= len; i++)
 		if (!ffs_isblock(fs, blksfree, got - run + i))
 			panic("ffs_clusteralloc: map mismatch");
 	bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
 	if (dtog(fs, bno) != cg)
 		panic("ffs_clusteralloc: allocated out of group");
 	len = blkstofrags(fs, len);
 	for (i = 0; i < len; i += fs->fs_frag)
 		if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
 			panic("ffs_clusteralloc: lost block");
 	bdwrite(bp);
 	return (bno);
 
 fail:
 	brelse(bp);
 	return (0);
 }
 
 /*
  * Determine whether an inode can be allocated.
  *
  * Check to see if an inode is available, and if it is,
  * allocate it using the following policy:
  *   1) allocate the requested inode.
  *   2) allocate the next available inode after the requested
  *      inode in the specified cylinder group.
  */
 static ino_t
 ffs_nodealloccg(ip, cg, ipref, mode)
 	struct inode *ip;
 	int cg;
 	ufs_daddr_t ipref;
 	int mode;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	u_int8_t *inosused;
 	int error, start, len, loc, map, i;
 
 	fs = ip->i_fs;
 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
 		return (0);
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (0);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
 		brelse(bp);
 		return (0);
 	}
 	bp->b_xflags |= BX_BKGRDWRITE;
 	cgp->cg_time = time_second;
 	inosused = cg_inosused(cgp);
 	if (ipref) {
 		ipref %= fs->fs_ipg;
 		if (isclr(inosused, ipref))
 			goto gotit;
 	}
 	start = cgp->cg_irotor / NBBY;
 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
 	loc = skpc(0xff, len, &inosused[start]);
 	if (loc == 0) {
 		len = start + 1;
 		start = 0;
 		loc = skpc(0xff, len, &inosused[0]);
 		if (loc == 0) {
 			printf("cg = %d, irotor = %ld, fs = %s\n",
 			    cg, (long)cgp->cg_irotor, fs->fs_fsmnt);
 			panic("ffs_nodealloccg: map corrupted");
 			/* NOTREACHED */
 		}
 	}
 	i = start + len - loc;
 	map = inosused[i];
 	ipref = i * NBBY;
 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
 		if ((map & i) == 0) {
 			cgp->cg_irotor = ipref;
 			goto gotit;
 		}
 	}
 	printf("fs = %s\n", fs->fs_fsmnt);
 	panic("ffs_nodealloccg: block not in map");
 	/* NOTREACHED */
 gotit:
 	if (DOINGSOFTDEP(ITOV(ip)))
 		softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
 	setbit(inosused, ipref);
 	cgp->cg_cs.cs_nifree--;
 	fs->fs_cstotal.cs_nifree--;
 	fs->fs_cs(fs, cg).cs_nifree--;
 	fs->fs_fmod = 1;
 	if ((mode & IFMT) == IFDIR) {
 		cgp->cg_cs.cs_ndir++;
 		fs->fs_cstotal.cs_ndir++;
 		fs->fs_cs(fs, cg).cs_ndir++;
 	}
 	bdwrite(bp);
 	return (cg * fs->fs_ipg + ipref);
 }
 
 /*
  * Free a block or fragment.
  *
  * The specified block or fragment is placed back in the
  * free map. If a fragment is deallocated, a possible
  * block reassembly is checked.
  */
 void
 ffs_blkfree(ip, bno, size)
 	register struct inode *ip;
 	ufs_daddr_t bno;
 	long size;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	ufs_daddr_t fragno, cgbno;
 	int i, error, cg, blk, frags, bbase;
 	u_int8_t *blksfree;
 #ifdef DIAGNOSTIC
 	struct vnode *vp;
 #endif
 
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
 	if ((vp = ITOV(ip)) != NULL && vp->v_mount != NULL &&
 	    (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED))
 		panic("ffs_blkfree: deallocation on suspended filesystem");
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
 	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
 		printf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n",
 		    devtoname(ip->i_dev), (long)bno, (long)fs->fs_bsize, size,
 		    fs->fs_fsmnt);
 		panic("ffs_blkfree: bad size");
 	}
 #endif
 	if ((ip->i_devvp->v_flag & VCOPYONWRITE) &&
 	    ffs_snapblkfree(ip, bno, size))
 		return;
 	VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size);
 	cg = dtog(fs, bno);
 	if ((u_int)bno >= fs->fs_size) {
 		printf("bad block %ld, ino %lu\n",
 		    (long)bno, (u_long)ip->i_number);
 		ffs_fserr(fs, ip->i_uid, "bad block");
 		return;
 	}
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return;
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp)) {
 		brelse(bp);
 		return;
 	}
 	bp->b_xflags |= BX_BKGRDWRITE;
 	cgp->cg_time = time_second;
 	cgbno = dtogd(fs, bno);
 	blksfree = cg_blksfree(cgp);
 	if (size == fs->fs_bsize) {
 		fragno = fragstoblks(fs, cgbno);
 		if (!ffs_isfreeblock(fs, blksfree, fragno)) {
 			printf("dev = %s, block = %ld, fs = %s\n",
 			    devtoname(ip->i_dev), (long)bno, fs->fs_fsmnt);
 			panic("ffs_blkfree: freeing free block");
 		}
 		ffs_setblock(fs, blksfree, fragno);
 		ffs_clusteracct(fs, cgp, fragno, 1);
 		cgp->cg_cs.cs_nbfree++;
 		fs->fs_cstotal.cs_nbfree++;
 		fs->fs_cs(fs, cg).cs_nbfree++;
 		i = cbtocylno(fs, cgbno);
 		cg_blks(fs, cgp, i)[cbtorpos(fs, cgbno)]++;
 		cg_blktot(cgp)[i]++;
 	} else {
 		bbase = cgbno - fragnum(fs, cgbno);
 		/*
 		 * decrement the counts associated with the old frags
 		 */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
 		/*
 		 * deallocate the fragment
 		 */
 		frags = numfrags(fs, size);
 		for (i = 0; i < frags; i++) {
 			if (isset(blksfree, cgbno + i)) {
 				printf("dev = %s, block = %ld, fs = %s\n",
 				    devtoname(ip->i_dev), (long)(bno + i),
 				    fs->fs_fsmnt);
 				panic("ffs_blkfree: freeing free frag");
 			}
 			setbit(blksfree, cgbno + i);
 		}
 		cgp->cg_cs.cs_nffree += i;
 		fs->fs_cstotal.cs_nffree += i;
 		fs->fs_cs(fs, cg).cs_nffree += i;
 		/*
 		 * add back in counts associated with the new frags
 		 */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 		/*
 		 * if a complete block has been reassembled, account for it
 		 */
 		fragno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, blksfree, fragno)) {
 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
 			ffs_clusteracct(fs, cgp, fragno, 1);
 			cgp->cg_cs.cs_nbfree++;
 			fs->fs_cstotal.cs_nbfree++;
 			fs->fs_cs(fs, cg).cs_nbfree++;
 			i = cbtocylno(fs, bbase);
 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
 			cg_blktot(cgp)[i]++;
 		}
 	}
 	fs->fs_fmod = 1;
 	bdwrite(bp);
 }
 
 #ifdef DIAGNOSTIC
 /*
  * Verify allocation of a block or fragment. Returns true if block or
  * fragment is allocated, false if it is free.
  */
 static int
 ffs_checkblk(ip, bno, size)
 	struct inode *ip;
 	ufs_daddr_t bno;
 	long size;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp;
 	int i, error, frags, free;
 	u_int8_t *blksfree;
 
 	fs = ip->i_fs;
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
 		printf("bsize = %ld, size = %ld, fs = %s\n",
 		    (long)fs->fs_bsize, size, fs->fs_fsmnt);
 		panic("ffs_checkblk: bad size");
 	}
 	if ((u_int)bno >= fs->fs_size)
 		panic("ffs_checkblk: bad block %d", bno);
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error)
 		panic("ffs_checkblk: cg bread failed");
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp))
 		panic("ffs_checkblk: cg magic mismatch");
 	bp->b_xflags |= BX_BKGRDWRITE;
 	blksfree = cg_blksfree(cgp);
 	bno = dtogd(fs, bno);
 	if (size == fs->fs_bsize) {
 		free = ffs_isblock(fs, blksfree, fragstoblks(fs, bno));
 	} else {
 		frags = numfrags(fs, size);
 		for (free = 0, i = 0; i < frags; i++)
 			if (isset(blksfree, bno + i))
 				free++;
 		if (free != 0 && free != frags)
 			panic("ffs_checkblk: partially free fragment");
 	}
 	brelse(bp);
 	return (!free);
 }
 #endif /* DIAGNOSTIC */
 
 /*
  * Free an inode.
  */
 int
-ffs_vfree( pvp, ino, mode)
+ffs_vfree(pvp, ino, mode)
 	struct vnode *pvp;
 	ino_t ino;
 	int mode;
 {
 	if (DOINGSOFTDEP(pvp)) {
 		softdep_freefile(pvp, ino, mode);
 		return (0);
 	}
-	return (ffs_freefile(pvp, ino, mode));
+	return (ffs_freefile(VTOI(pvp), ino, mode));
 }
 
 /*
  * Do the actual free operation.
  * The specified inode is placed back in the free map.
  */
- int
- ffs_freefile( pvp, ino, mode)
-	struct vnode *pvp;
+int
+ffs_freefile(pip, ino, mode)
+	struct inode *pip;
 	ino_t ino;
 	int mode;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
-	register struct inode *pip;
 	struct buf *bp;
 	int error, cg;
 	u_int8_t *inosused;
 
-	pip = VTOI(pvp);
 	fs = pip->i_fs;
 	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
 		panic("ffs_vfree: range: dev = (%d,%d), ino = %d, fs = %s",
 		    major(pip->i_dev), minor(pip->i_dev), ino, fs->fs_fsmnt);
 	cg = ino_to_cg(fs, ino);
 	error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp)) {
 		brelse(bp);
 		return (0);
 	}
 	bp->b_xflags |= BX_BKGRDWRITE;
 	cgp->cg_time = time_second;
 	inosused = cg_inosused(cgp);
 	ino %= fs->fs_ipg;
 	if (isclr(inosused, ino)) {
-		printf("dev = %s, ino = %lu, fs = %s\n",
-		    devtoname(pip->i_dev), (u_long)ino, fs->fs_fsmnt);
+		printf("dev = %s, ino = %lu, fs = %s\n", devtoname(pip->i_dev),
+		    (u_long)ino + cg * fs->fs_ipg, fs->fs_fsmnt);
 		if (fs->fs_ronly == 0)
 			panic("ffs_vfree: freeing free inode");
 	}
 	clrbit(inosused, ino);
 	if (ino < cgp->cg_irotor)
 		cgp->cg_irotor = ino;
 	cgp->cg_cs.cs_nifree++;
 	fs->fs_cstotal.cs_nifree++;
 	fs->fs_cs(fs, cg).cs_nifree++;
 	if ((mode & IFMT) == IFDIR) {
 		cgp->cg_cs.cs_ndir--;
 		fs->fs_cstotal.cs_ndir--;
 		fs->fs_cs(fs, cg).cs_ndir--;
 	}
 	fs->fs_fmod = 1;
 	bdwrite(bp);
 	return (0);
 }
 
 /*
  * Find a block of the specified size in the specified cylinder group.
  *
  * It is a panic if a request is made to find a block if none are
  * available.
  */
 static ufs_daddr_t
 ffs_mapsearch(fs, cgp, bpref, allocsiz)
 	register struct fs *fs;
 	register struct cg *cgp;
 	ufs_daddr_t bpref;
 	int allocsiz;
 {
 	ufs_daddr_t bno;
 	int start, len, loc, i;
 	int blk, field, subfield, pos;
 	u_int8_t *blksfree;
 
 	/*
 	 * find the fragment by searching through the free block
 	 * map for an appropriate bit pattern
 	 */
 	if (bpref)
 		start = dtogd(fs, bpref) / NBBY;
 	else
 		start = cgp->cg_frotor / NBBY;
 	blksfree = cg_blksfree(cgp);
 	len = howmany(fs->fs_fpg, NBBY) - start;
 	loc = scanc((u_int)len, (u_char *)&blksfree[start],
 		(u_char *)fragtbl[fs->fs_frag],
 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 	if (loc == 0) {
 		len = start + 1;
 		start = 0;
 		loc = scanc((u_int)len, (u_char *)&blksfree[0],
 			(u_char *)fragtbl[fs->fs_frag],
 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 		if (loc == 0) {
 			printf("start = %d, len = %d, fs = %s\n",
 			    start, len, fs->fs_fsmnt);
 			panic("ffs_alloccg: map corrupted");
 			/* NOTREACHED */
 		}
 	}
 	bno = (start + len - loc) * NBBY;
 	cgp->cg_frotor = bno;
 	/*
 	 * found the byte in the map
 	 * sift through the bits to find the selected frag
 	 */
 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
 		blk = blkmap(fs, blksfree, bno);
 		blk <<= 1;
 		field = around[allocsiz];
 		subfield = inside[allocsiz];
 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
 			if ((blk & field) == subfield)
 				return (bno + pos);
 			field <<= 1;
 			subfield <<= 1;
 		}
 	}
 	printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt);
 	panic("ffs_alloccg: block not in map");
 	return (-1);
 }
 
 /*
  * Update the cluster map because of an allocation or free.
  *
  * Cnt == 1 means free; cnt == -1 means allocating.
  */
 static void
 ffs_clusteracct(fs, cgp, blkno, cnt)
 	struct fs *fs;
 	struct cg *cgp;
 	ufs_daddr_t blkno;
 	int cnt;
 {
 	int32_t *sump;
 	int32_t *lp;
 	u_char *freemapp, *mapp;
 	int i, start, end, forw, back, map, bit;
 
 	if (fs->fs_contigsumsize <= 0)
 		return;
 	freemapp = cg_clustersfree(cgp);
 	sump = cg_clustersum(cgp);
 	/*
 	 * Allocate or clear the actual block.
 	 */
 	if (cnt > 0)
 		setbit(freemapp, blkno);
 	else
 		clrbit(freemapp, blkno);
 	/*
 	 * Find the size of the cluster going forward.
 	 */
 	start = blkno + 1;
 	end = start + fs->fs_contigsumsize;
 	if (end >= cgp->cg_nclusterblks)
 		end = cgp->cg_nclusterblks;
 	mapp = &freemapp[start / NBBY];
 	map = *mapp++;
 	bit = 1 << (start % NBBY);
 	for (i = start; i < end; i++) {
 		if ((map & bit) == 0)
 			break;
 		if ((i & (NBBY - 1)) != (NBBY - 1)) {
 			bit <<= 1;
 		} else {
 			map = *mapp++;
 			bit = 1;
 		}
 	}
 	forw = i - start;
 	/*
 	 * Find the size of the cluster going backward.
 	 */
 	start = blkno - 1;
 	end = start - fs->fs_contigsumsize;
 	if (end < 0)
 		end = -1;
 	mapp = &freemapp[start / NBBY];
 	map = *mapp--;
 	bit = 1 << (start % NBBY);
 	for (i = start; i > end; i--) {
 		if ((map & bit) == 0)
 			break;
 		if ((i & (NBBY - 1)) != 0) {
 			bit >>= 1;
 		} else {
 			map = *mapp--;
 			bit = 1 << (NBBY - 1);
 		}
 	}
 	back = start - i;
 	/*
 	 * Account for old cluster and the possibly new forward and
 	 * back clusters.
 	 */
 	i = back + forw + 1;
 	if (i > fs->fs_contigsumsize)
 		i = fs->fs_contigsumsize;
 	sump[i] += cnt;
 	if (back > 0)
 		sump[back] -= cnt;
 	if (forw > 0)
 		sump[forw] -= cnt;
 	/*
 	 * Update cluster summary information.
 	 */
 	lp = &sump[fs->fs_contigsumsize];
 	for (i = fs->fs_contigsumsize; i > 0; i--)
 		if (*lp-- > 0)
 			break;
 	fs->fs_maxcluster[cgp->cg_cgx] = i;
 }
 
 /*
  * Fserr prints the name of a file system with an error diagnostic.
  *
  * The form of the error message is:
  *	fs: error message
  */
 static void
 ffs_fserr(fs, uid, cp)
 	struct fs *fs;
 	u_int uid;
 	char *cp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1,
 			p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp);
 }
+
+/*
+ * This function provides the capability for the fsck program to
+ * update an active filesystem. Six operations are provided:
+ *
+ * adjrefcnt(inode, amt) - adjusts the reference count on the
+ *	specified inode by the specified amount. Under normal
+ *	operation the count should always go down. Decrementing
+ *	the count to zero will cause the inode to be freed.
+ * adjblkcnt(inode, amt) - adjust the number of blocks used to
+ *	by the specifed amount.
+ * freedirs(inode, count) - directory inodes [inode..inode + count - 1]
+ *	are marked as free. Inodes should never have to be marked
+ *	as in use.
+ * freefiles(inode, count) - file inodes [inode..inode + count - 1]
+ *	are marked as free. Inodes should never have to be marked
+ *	as in use.
+ * freeblks(blockno, size) - blocks [blockno..blockno + size - 1]
+ *	are marked as free. Blocks should never have to be marked
+ *	as in use.
+ * setflags(flags, set/clear) - the fs_flags field has the specified
+ *	flags set (second parameter +1) or cleared (second parameter -1).
+ */
+
+static int sysctl_ffs_fsck __P((SYSCTL_HANDLER_ARGS));
+
+SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT,
+	0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count");
+
+SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Adjust Inode Used Blocks Count");
+
+SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Free Range of Directory Inodes");
+
+SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Free Range of File Inodes");
+
+SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Free Range of Blocks");
+
+SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR,
+	sysctl_ffs_fsck, "Change Filesystem Flags");
+
+#ifdef DEBUG
+static int fsckcmds = 0;
+SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, "");
+#endif /* DEBUG */
+
+static int
+sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
+{
+	struct fsck_cmd cmd;
+	struct inode tip;
+	struct ufsmount *ump;
+	struct vnode *vp;
+	struct inode *ip;
+	struct mount *mp;
+	struct fs *fs;
+	ufs_daddr_t blkno;
+	long blkcnt, blksize;
+	struct file *fp;
+	int filetype, error;
+
+	if (req->newlen > sizeof cmd)
+		return (EBADRPC);
+	if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0)
+		return (error);
+	if (cmd.version != FFS_CMD_VERSION)
+		return (ERPCMISMATCH);
+	if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0)
+		return (error);
+	mp = ((struct vnode *)fp->f_data)->v_mount;
+	if (mp->mnt_flag & MNT_RDONLY)
+		return (EROFS);
+	ump = VFSTOUFS(mp);
+	fs = ump->um_fs;
+	filetype = IFREG;
+
+	switch (oidp->oid_number) {
+
+	case FFS_SET_FLAGS:
+#ifdef DEBUG
+		if (fsckcmds)
+			printf("%s: %s flags\n", mp->mnt_stat.f_mntonname,
+			    cmd.size > 0 ? "set" : "clear");
+#endif /* DEBUG */
+		if (cmd.size > 0)
+			fs->fs_flags |= (long)cmd.value;
+		else
+			fs->fs_flags &= ~(long)cmd.value;
+		break;
+
+	case FFS_ADJ_REFCNT:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust inode %d count by %ld\n",
+			    mp->mnt_stat.f_mntonname, (ino_t)cmd.value,
+			    cmd.size);
+		}
+#endif /* DEBUG */
+		if ((error = VFS_VGET(mp, (ino_t)cmd.value, &vp)) != 0)
+			return (error);
+		ip = VTOI(vp);
+		ip->i_nlink += cmd.size;
+		ip->i_effnlink += cmd.size;
+		ip->i_flag |= IN_CHANGE;
+		if (DOINGSOFTDEP(vp))
+			softdep_change_linkcnt(ip);
+		vput(vp);
+		break;
+
+	case FFS_ADJ_BLKCNT:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("%s: adjust inode %d block count by %ld\n",
+			    mp->mnt_stat.f_mntonname, (ino_t)cmd.value,
+			    cmd.size);
+		}
+#endif /* DEBUG */
+		if ((error = VFS_VGET(mp, (ino_t)cmd.value, &vp)) != 0)
+			return (error);
+		ip = VTOI(vp);
+		ip->i_blocks += cmd.size;
+		ip->i_flag |= IN_CHANGE;
+		vput(vp);
+		break;
+
+	case FFS_DIR_FREE:
+		filetype = IFDIR;
+		/* fall through */
+
+	case FFS_FILE_FREE:
+#ifdef DEBUG
+		if (fsckcmds) {
+			if (cmd.size == 1)
+				printf("%s: free %s inode %d\n",
+				    mp->mnt_stat.f_mntonname,
+				    filetype == IFDIR ? "directory" : "file",
+				    (ino_t)cmd.value);
+			else
+				printf("%s: free %s inodes %d-%d\n",
+				    mp->mnt_stat.f_mntonname,
+				    filetype == IFDIR ? "directory" : "file",
+				    (ino_t)cmd.value);
+				    (ino_t)cmd.value + cmd.size - 1);
+		}
+#endif /* DEBUG */
+		tip.i_devvp = ump->um_devvp;
+		tip.i_dev = ump->um_dev;
+		tip.i_fs = fs;
+		while (cmd.size > 0) {
+			if ((error = ffs_freefile(&tip, cmd.value, filetype)))
+				return (error);
+			cmd.size -= 1;
+			cmd.value += 1;
+		}
+		break;
+
+	case FFS_BLK_FREE:
+#ifdef DEBUG
+		if (fsckcmds) {
+			if (cmd.size == 1)
+				printf("%s: free block %d\n",
+				    mp->mnt_stat.f_mntonname,
+				    (ufs_daddr_t)cmd.value);
+			else
+				printf("%s: free blocks %d-%ld\n",
+				    mp->mnt_stat.f_mntonname, 
+				    (ufs_daddr_t)cmd.value,
+				    (ufs_daddr_t)cmd.value + cmd.size - 1);
+		}
+#endif /* DEBUG */
+		tip.i_number = ROOTINO;
+		tip.i_devvp = ump->um_devvp;
+		tip.i_dev = ump->um_dev;
+		tip.i_fs = fs;
+		tip.i_size = cmd.size * fs->fs_fsize;
+		tip.i_uid = 0;
+		tip.i_vnode = NULL;
+		blkno = (ufs_daddr_t)cmd.value;
+		blkcnt = cmd.size;
+		blksize = fs->fs_frag - (blkno % fs->fs_frag);
+		while (blkcnt > 0) {
+			if (blksize > blkcnt)
+				blksize = blkcnt;
+			ffs_blkfree(&tip, blkno, blksize * fs->fs_fsize);
+			blkno += blksize;
+			blkcnt -= blksize;
+			blksize = fs->fs_frag;
+		}
+		break;
+
+	default:
+#ifdef DEBUG
+		if (fsckcmds) {
+			printf("Invalid request %d from fsck\n",
+			    oidp->oid_number);
+		}
+#endif /* DEBUG */
+		return(EINVAL);
+
+	}
+	return (0);
+}
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index a75598015558..4c8e22f27665 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,142 +1,127 @@
 /*-
  * Copyright (c) 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_extern.h	8.6 (Berkeley) 3/30/95
  * $FreeBSD$
  */
 
 #ifndef _UFS_FFS_EXTERN_H
 #define	_UFS_FFS_EXTERN_H
 
-/*
- * Sysctl values for the fast filesystem.
- */
-#define FFS_REALLOCBLKS		3	/* block reallocation enabled */
-#define FFS_ASYNCFREE		4	/* asynchronous block freeing enabled */
-#define	FFS_MAXID		5	/* number of valid ffs ids */
-
-#define FFS_NAMES { \
-	{ 0, 0 }, \
-	{ 0, 0 }, \
-	{ 0, 0 }, \
-	{ "doreallocblks", CTLTYPE_INT }, \
-	{ "doasyncfree", CTLTYPE_INT }, \
-}
-
 struct buf;
 struct fid;
 struct fs;
 struct inode;
 struct malloc_type;
 struct mount;
 struct proc;
 struct sockaddr;
 struct statfs;
 struct ucred;
 struct vnode;
 struct vop_balloc_args;
 struct vop_bmap_args;
 struct vop_fsync_args;
 struct vop_reallocblks_args;
 struct vop_copyonwrite_args;
 
 int	ffs_alloc __P((struct inode *,
 	    ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *));
 int	ffs_balloc __P((struct vop_balloc_args *));
 int	ffs_blkatoff __P((struct vnode *, off_t, char **, struct buf **));
 void	ffs_blkfree __P((struct inode *, ufs_daddr_t, long));
 ufs_daddr_t ffs_blkpref __P((struct inode *, ufs_daddr_t, int, ufs_daddr_t *));
 int	ffs_bmap __P((struct vop_bmap_args *));
 void	ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t));
 int	ffs_fhtovp __P((struct mount *, struct fid *, struct vnode **));
 int	ffs_flushfiles __P((struct mount *, int, struct proc *));
 void	ffs_fragacct __P((struct fs *, int, int32_t [], int));
-int	ffs_freefile __P(( struct vnode *, ino_t, int ));
+int	ffs_freefile __P((struct inode *, ino_t, int ));
 int	ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t));
 int	ffs_isfreeblock __P((struct fs *, unsigned char *, ufs_daddr_t));
 int	ffs_mountfs __P((struct vnode *, struct mount *, struct proc *,
 	     struct malloc_type *));
 int	ffs_mountroot __P((void));
 int	ffs_mount __P((struct mount *, char *, caddr_t, struct nameidata *,
 	    struct proc *));
 int	ffs_reallocblks __P((struct vop_reallocblks_args *));
 int	ffs_realloccg __P((struct inode *,
 	    ufs_daddr_t, ufs_daddr_t, int, int, struct ucred *, struct buf **));
 void	ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t));
 int	ffs_snapblkfree __P((struct inode *freeip, ufs_daddr_t bno, long size));
 void	ffs_snapremove __P((struct vnode *vp));
 int	ffs_snapshot __P((struct mount *mp, char *snapfile));
 void	ffs_snapshot_mount __P((struct mount *mp));
 void	ffs_snapshot_unmount __P((struct mount *mp));
 int	ffs_statfs __P((struct mount *, struct statfs *, struct proc *));
 int	ffs_sync __P((struct mount *, int, struct ucred *, struct proc *));
 int	ffs_truncate __P((struct vnode *, off_t, int, struct ucred *, struct proc *));
 int	ffs_unmount __P((struct mount *, int, struct proc *));
 int	ffs_update __P((struct vnode *, int));
 int	ffs_valloc __P((struct vnode *, int, struct ucred *, struct vnode **));
 
 int	ffs_vfree __P((struct vnode *, ino_t, int));
 int	ffs_vget __P((struct mount *, ino_t, struct vnode **));
 int	ffs_vptofh __P((struct vnode *, struct fid *));
 
 extern vop_t **ffs_vnodeop_p;
 extern vop_t **ffs_specop_p;
 extern vop_t **ffs_fifoop_p;
 
 /*
  * Soft update function prototypes.
  */
 void	softdep_initialize __P((void));
 int	softdep_mount __P((struct vnode *, struct mount *, struct fs *,
 	    struct ucred *));
 int	softdep_flushworklist __P((struct mount *, int *, struct proc *));
 int	softdep_flushfiles __P((struct mount *, int, struct proc *));
 void	softdep_update_inodeblock __P((struct inode *, struct buf *, int));
 void	softdep_load_inodeblock __P((struct inode *));
 void	softdep_freefile __P((struct vnode *, ino_t, int));
 void	softdep_setup_freeblocks __P((struct inode *, off_t));
 void	softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t));
 void	softdep_setup_blkmapdep __P((struct buf *, struct fs *, ufs_daddr_t));
 void	softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, ufs_daddr_t,
 	    ufs_daddr_t, long, long, struct buf *));
 void	softdep_setup_allocindir_meta __P((struct buf *, struct inode *,
 	    struct buf *, int, ufs_daddr_t));
 void	softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t,
 	    struct buf *, int, ufs_daddr_t, ufs_daddr_t, struct buf *));
 void	softdep_fsync_mountdev __P((struct vnode *));
 int	softdep_sync_metadata __P((struct vop_fsync_args *));
 /* XXX incorrectly moved to mount.h - should be indirect function */
 #if 0
 int	softdep_fsync __P((struct vnode *vp));
 #endif
 
 #endif /* !_UFS_FFS_EXTERN_H */
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 3c4ed20a10df..e8399a61fc48 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4945 +1,4958 @@
 /*
  * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
  *
  * The soft updates code is derived from the appendix of a University
  * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
  * "Soft Updates: A Solution to the Metadata Update Problem in File
  * Systems", CSE-TR-254-95, August 1995).
  *
  * Further information about soft updates can be obtained from:
  *
  *	Marshall Kirk McKusick		http://www.mckusick.com/softdep/
  *	1614 Oxford Street		mckusick@mckusick.com
  *	Berkeley, CA 94709-1608		+1-510-843-9542
  *	USA
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)ffs_softdep.c	9.59 (McKusick) 6/21/00
  * $FreeBSD$
  */
 
 /*
  * For now we want the safety net that the DIAGNOSTIC and DEBUG flags provide.
  */
 #ifndef DIAGNOSTIC
 #define DIAGNOSTIC
 #endif
 #ifndef DEBUG
 #define DEBUG
 #endif
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/syslog.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/softdep.h>
 #include <ufs/ffs/ffs_extern.h>
 #include <ufs/ufs/ufs_extern.h>
 
 /*
  * These definitions need to be adapted to the system to which
  * this file is being ported.
  */
 /*
  * malloc types defined for the softdep system.
  */
 static MALLOC_DEFINE(M_PAGEDEP, "pagedep","File page dependencies");
 static MALLOC_DEFINE(M_INODEDEP, "inodedep","Inode dependencies");
 static MALLOC_DEFINE(M_NEWBLK, "newblk","New block allocation");
 static MALLOC_DEFINE(M_BMSAFEMAP, "bmsafemap","Block or frag allocated from cyl group map");
 static MALLOC_DEFINE(M_ALLOCDIRECT, "allocdirect","Block or frag dependency for an inode");
 static MALLOC_DEFINE(M_INDIRDEP, "indirdep","Indirect block dependencies");
 static MALLOC_DEFINE(M_ALLOCINDIR, "allocindir","Block dependency for an indirect block");
 static MALLOC_DEFINE(M_FREEFRAG, "freefrag","Previously used frag for an inode");
 static MALLOC_DEFINE(M_FREEBLKS, "freeblks","Blocks freed from an inode");
 static MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated");
 static MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry");
 static MALLOC_DEFINE(M_MKDIR, "mkdir","New directory");
 static MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted");
 
 #define M_SOFTDEP_FLAGS	(M_WAITOK | M_USE_RESERVE)
 
 #define	D_PAGEDEP	0
 #define	D_INODEDEP	1
 #define	D_NEWBLK	2
 #define	D_BMSAFEMAP	3
 #define	D_ALLOCDIRECT	4
 #define	D_INDIRDEP	5
 #define	D_ALLOCINDIR	6
 #define	D_FREEFRAG	7
 #define	D_FREEBLKS	8
 #define	D_FREEFILE	9
 #define	D_DIRADD	10
 #define	D_MKDIR		11
 #define	D_DIRREM	12
 #define D_LAST		D_DIRREM
 
 /* 
  * translate from workitem type to memory type
  * MUST match the defines above, such that memtype[D_XXX] == M_XXX
  */
 static struct malloc_type *memtype[] = {
 	M_PAGEDEP,
 	M_INODEDEP,
 	M_NEWBLK,
 	M_BMSAFEMAP,
 	M_ALLOCDIRECT,
 	M_INDIRDEP,
 	M_ALLOCINDIR,
 	M_FREEFRAG,
 	M_FREEBLKS,
 	M_FREEFILE,
 	M_DIRADD,
 	M_MKDIR,
 	M_DIRREM
 };
 
 #define DtoM(type) (memtype[type])
 
 /*
  * Names of malloc types.
  */
 #define TYPENAME(type)  \
 	((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???")
 /*
  * End system adaptaion definitions.
  */
 
 /*
  * Internal function prototypes.
  */
 static	void softdep_error __P((char *, int));
 static	void drain_output __P((struct vnode *, int));
 static	int getdirtybuf __P((struct buf **, int));
 static	void clear_remove __P((struct proc *));
 static	void clear_inodedeps __P((struct proc *));
 static	int flush_pagedep_deps __P((struct vnode *, struct mount *,
 	    struct diraddhd *));
 static	int flush_inodedep_deps __P((struct fs *, ino_t));
 static	int handle_written_filepage __P((struct pagedep *, struct buf *));
 static  void diradd_inode_written __P((struct diradd *, struct inodedep *));
 static	int handle_written_inodeblock __P((struct inodedep *, struct buf *));
 static	void handle_allocdirect_partdone __P((struct allocdirect *));
 static	void handle_allocindir_partdone __P((struct allocindir *));
 static	void initiate_write_filepage __P((struct pagedep *, struct buf *));
 static	void handle_written_mkdir __P((struct mkdir *, int));
 static	void initiate_write_inodeblock __P((struct inodedep *, struct buf *));
 static	void handle_workitem_freefile __P((struct freefile *));
 static	void handle_workitem_remove __P((struct dirrem *));
 static	struct dirrem *newdirrem __P((struct buf *, struct inode *,
 	    struct inode *, int, struct dirrem **));
 static	void free_diradd __P((struct diradd *));
 static	void free_allocindir __P((struct allocindir *, struct inodedep *));
 static	int indir_trunc __P((struct inode *, ufs_daddr_t, int, ufs_lbn_t,
 	    long *));
 static	void deallocate_dependencies __P((struct buf *, struct inodedep *));
 static	void free_allocdirect __P((struct allocdirectlst *,
 	    struct allocdirect *, int));
 static	int check_inode_unwritten __P((struct inodedep *));
 static	int free_inodedep __P((struct inodedep *));
-static	void handle_workitem_freeblocks __P((struct freeblks *));
+static	void handle_workitem_freeblocks __P((struct freeblks *, int));
 static	void merge_inode_lists __P((struct inodedep *));
 static	void setup_allocindir_phase2 __P((struct buf *, struct inode *,
 	    struct allocindir *));
 static	struct allocindir *newallocindir __P((struct inode *, int, ufs_daddr_t,
 	    ufs_daddr_t));
 static	void handle_workitem_freefrag __P((struct freefrag *));
 static	struct freefrag *newfreefrag __P((struct inode *, ufs_daddr_t, long));
 static	void allocdirect_merge __P((struct allocdirectlst *,
 	    struct allocdirect *, struct allocdirect *));
 static	struct bmsafemap *bmsafemap_lookup __P((struct buf *));
 static	int newblk_lookup __P((struct fs *, ufs_daddr_t, int,
 	    struct newblk **));
 static	int inodedep_lookup __P((struct fs *, ino_t, int, struct inodedep **));
 static	int pagedep_lookup __P((struct inode *, ufs_lbn_t, int,
 	    struct pagedep **));
 static	void pause_timer __P((void *));
 static	int request_cleanup __P((int, int));
 static	int process_worklist_item __P((struct mount *, int));
 static	void add_to_worklist __P((struct worklist *));
 
 /*
  * Exported softdep operations.
  */
 static	void softdep_disk_io_initiation __P((struct buf *));
 static	void softdep_disk_write_complete __P((struct buf *));
 static	void softdep_deallocate_dependencies __P((struct buf *));
 static	void softdep_move_dependencies __P((struct buf *, struct buf *));
 static	int softdep_count_dependencies __P((struct buf *bp, int));
 
 struct bio_ops bioops = {
 	softdep_disk_io_initiation,		/* io_start */
 	softdep_disk_write_complete,		/* io_complete */
 	softdep_deallocate_dependencies,	/* io_deallocate */
 	softdep_move_dependencies,		/* io_movedeps */
 	softdep_count_dependencies,		/* io_countdeps */
 };
 
 /*
  * Locking primitives.
  *
  * For a uniprocessor, all we need to do is protect against disk
  * interrupts. For a multiprocessor, this lock would have to be
  * a mutex. A single mutex is used throughout this file, though
  * finer grain locking could be used if contention warranted it.
  *
  * For a multiprocessor, the sleep call would accept a lock and
  * release it after the sleep processing was complete. In a uniprocessor
  * implementation there is no such interlock, so we simple mark
  * the places where it needs to be done with the `interlocked' form
  * of the lock calls. Since the uniprocessor sleep already interlocks
  * the spl, there is nothing that really needs to be done.
  */
 #ifndef /* NOT */ DEBUG
 static struct lockit {
 	int	lkt_spl;
 } lk = { 0 };
 #define ACQUIRE_LOCK(lk)		(lk)->lkt_spl = splbio()
 #define FREE_LOCK(lk)			splx((lk)->lkt_spl)
 #define ACQUIRE_LOCK_INTERLOCKED(lk)
 #define FREE_LOCK_INTERLOCKED(lk)
 
 #else /* DEBUG */
 static struct lockit {
 	int	lkt_spl;
 	pid_t	lkt_held;
 } lk = { 0, -1 };
 static int lockcnt;
 
 static	void acquire_lock __P((struct lockit *));
 static	void free_lock __P((struct lockit *));
 static	void acquire_lock_interlocked __P((struct lockit *));
 static	void free_lock_interlocked __P((struct lockit *));
 
 #define ACQUIRE_LOCK(lk)		acquire_lock(lk)
 #define FREE_LOCK(lk)			free_lock(lk)
 #define ACQUIRE_LOCK_INTERLOCKED(lk)	acquire_lock_interlocked(lk)
 #define FREE_LOCK_INTERLOCKED(lk)	free_lock_interlocked(lk)
 
 static void
 acquire_lock(lk)
 	struct lockit *lk;
 {
 	pid_t holder;
 
 	if (lk->lkt_held != -1) {
 		holder = lk->lkt_held;
 		FREE_LOCK(lk);
 		if (holder == CURPROC->p_pid)
 			panic("softdep_lock: locking against myself");
 		else
 			panic("softdep_lock: lock held by %d", holder);
 	}
 	lk->lkt_spl = splbio();
 	lk->lkt_held = CURPROC->p_pid;
 	lockcnt++;
 }
 
 static void
 free_lock(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held == -1)
 		panic("softdep_unlock: lock not held");
 	lk->lkt_held = -1;
 	splx(lk->lkt_spl);
 }
 
 static void
 acquire_lock_interlocked(lk)
 	struct lockit *lk;
 {
 	pid_t holder;
 
 	if (lk->lkt_held != -1) {
 		holder = lk->lkt_held;
 		FREE_LOCK(lk);
 		if (holder == CURPROC->p_pid)
 			panic("softdep_lock_interlocked: locking against self");
 		else
 			panic("softdep_lock_interlocked: lock held by %d",
 			    holder);
 	}
 	lk->lkt_held = CURPROC->p_pid;
 	lockcnt++;
 }
 
 static void
 free_lock_interlocked(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held == -1)
 		panic("softdep_unlock_interlocked: lock not held");
 	lk->lkt_held = -1;
 }
 #endif /* DEBUG */
 
 /*
  * Place holder for real semaphores.
  */
 struct sema {
 	int	value;
 	pid_t	holder;
 	char	*name;
 	int	prio;
 	int	timo;
 };
 static	void sema_init __P((struct sema *, char *, int, int));
 static	int sema_get __P((struct sema *, struct lockit *));
 static	void sema_release __P((struct sema *));
 
 static void
 sema_init(semap, name, prio, timo)
 	struct sema *semap;
 	char *name;
 	int prio, timo;
 {
 
 	semap->holder = -1;
 	semap->value = 0;
 	semap->name = name;
 	semap->prio = prio;
 	semap->timo = timo;
 }
 
 static int
 sema_get(semap, interlock)
 	struct sema *semap;
 	struct lockit *interlock;
 {
 
 	if (semap->value++ > 0) {
 		if (interlock != NULL)
 			FREE_LOCK_INTERLOCKED(interlock);
 		tsleep((caddr_t)semap, semap->prio, semap->name, semap->timo);
 		if (interlock != NULL) {
 			ACQUIRE_LOCK_INTERLOCKED(interlock);
 			FREE_LOCK(interlock);
 		}
 		return (0);
 	}
 	semap->holder = CURPROC->p_pid;
 	if (interlock != NULL)
 		FREE_LOCK(interlock);
 	return (1);
 }
 
 static void
 sema_release(semap)
 	struct sema *semap;
 {
 
 	if (semap->value <= 0 || semap->holder != CURPROC->p_pid) {
 		if (lk.lkt_held != -1)
 			FREE_LOCK(&lk);
 		panic("sema_release: not held");
 	}
 	if (--semap->value > 0) {
 		semap->value = 0;
 		wakeup(semap);
 	}
 	semap->holder = -1;
 }
 
 /*
  * Worklist queue management.
  * These routines require that the lock be held.
  */
 #ifndef /* NOT */ DEBUG
 #define WORKLIST_INSERT(head, item) do {	\
 	(item)->wk_state |= ONWORKLIST;		\
 	LIST_INSERT_HEAD(head, item, wk_list);	\
 } while (0)
 #define WORKLIST_REMOVE(item) do {		\
 	(item)->wk_state &= ~ONWORKLIST;	\
 	LIST_REMOVE(item, wk_list);		\
 } while (0)
 #define WORKITEM_FREE(item, type) FREE(item, DtoM(type))
 
 #else /* DEBUG */
 static	void worklist_insert __P((struct workhead *, struct worklist *));
 static	void worklist_remove __P((struct worklist *));
 static	void workitem_free __P((struct worklist *, int));
 
 #define WORKLIST_INSERT(head, item) worklist_insert(head, item)
 #define WORKLIST_REMOVE(item) worklist_remove(item)
 #define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item, type)
 
 static void
 worklist_insert(head, item)
 	struct workhead *head;
 	struct worklist *item;
 {
 
 	if (lk.lkt_held == -1)
 		panic("worklist_insert: lock not held");
 	if (item->wk_state & ONWORKLIST) {
 		FREE_LOCK(&lk);
 		panic("worklist_insert: already on list");
 	}
 	item->wk_state |= ONWORKLIST;
 	LIST_INSERT_HEAD(head, item, wk_list);
 }
 
 static void
 worklist_remove(item)
 	struct worklist *item;
 {
 
 	if (lk.lkt_held == -1)
 		panic("worklist_remove: lock not held");
 	if ((item->wk_state & ONWORKLIST) == 0) {
 		FREE_LOCK(&lk);
 		panic("worklist_remove: not on list");
 	}
 	item->wk_state &= ~ONWORKLIST;
 	LIST_REMOVE(item, wk_list);
 }
 
 static void
 workitem_free(item, type)
 	struct worklist *item;
 	int type;
 {
 
 	if (item->wk_state & ONWORKLIST) {
 		if (lk.lkt_held != -1)
 			FREE_LOCK(&lk);
 		panic("workitem_free: still on list");
 	}
 	if (item->wk_type != type) {
 		if (lk.lkt_held != -1)
 			FREE_LOCK(&lk);
 		panic("workitem_free: type mismatch");
 	}
 	FREE(item, DtoM(type));
 }
 #endif /* DEBUG */
 
 /*
  * Workitem queue management
  */
 static struct workhead softdep_workitem_pending;
 static int num_on_worklist;	/* number of worklist items to be processed */
 static int softdep_worklist_busy; /* 1 => trying to do unmount */
 static int softdep_worklist_req; /* serialized waiters */
 static int max_softdeps;	/* maximum number of structs before slowdown */
 static int tickdelay = 2;	/* number of ticks to pause during slowdown */
 static int proc_waiting;	/* tracks whether we have a timeout posted */
 static int *stat_countp;	/* statistic to count in proc_waiting timeout */
 static struct callout_handle handle; /* handle on posted proc_waiting timeout */
 static struct proc *filesys_syncer; /* proc of filesystem syncer process */
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
 #define FLUSH_INODES	1
 static int req_clear_remove;	/* syncer process flush some freeblks */
 #define FLUSH_REMOVE	2
 /*
  * runtime statistics
  */
 static int stat_worklist_push;	/* number of worklist cleanups */
 static int stat_blk_limit_push;	/* number of times block limit neared */
 static int stat_ino_limit_push;	/* number of times inode limit neared */
 static int stat_blk_limit_hit;	/* number of times block slowdown imposed */
 static int stat_ino_limit_hit;	/* number of times inode slowdown imposed */
 static int stat_sync_limit_hit;	/* number of synchronous slowdowns imposed */
 static int stat_indir_blk_ptrs;	/* bufs redirtied as indir ptrs not written */
 static int stat_inode_bitmap;	/* bufs redirtied as inode bitmap not written */
 static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
 static int stat_dir_entry;	/* bufs redirtied as dir entry cannot write */
 #ifdef DEBUG
 #include <vm/vm.h>
 #include <sys/sysctl.h>
 SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, worklist_push, CTLFLAG_RW, &stat_worklist_push, 0,"");
 SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");
 SYSCTL_INT(_debug, OID_AUTO, ino_limit_push, CTLFLAG_RW, &stat_ino_limit_push, 0,"");
 SYSCTL_INT(_debug, OID_AUTO, blk_limit_hit, CTLFLAG_RW, &stat_blk_limit_hit, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, ino_limit_hit, CTLFLAG_RW, &stat_ino_limit_hit, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, sync_limit_hit, CTLFLAG_RW, &stat_sync_limit_hit, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");
 #endif /* DEBUG */
 
 /*
  * Add an item to the end of the work queue.
  * This routine requires that the lock be held.
  * This is the only routine that adds items to the list.
  * The following routine is the only one that removes items
  * and does so in order from first to last.
  */
 static void
 add_to_worklist(wk)
 	struct worklist *wk;
 {
 	static struct worklist *worklist_tail;
 
 	if (wk->wk_state & ONWORKLIST) {
 		if (lk.lkt_held != -1)
 			FREE_LOCK(&lk);
 		panic("add_to_worklist: already on list");
 	}
 	wk->wk_state |= ONWORKLIST;
 	if (LIST_FIRST(&softdep_workitem_pending) == NULL)
 		LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list);
 	else
 		LIST_INSERT_AFTER(worklist_tail, wk, wk_list);
 	worklist_tail = wk;
 	num_on_worklist += 1;
 }
 
 /*
  * Process that runs once per second to handle items in the background queue.
  *
  * Note that we ensure that everything is done in the order in which they
  * appear in the queue. The code below depends on this property to ensure
  * that blocks of a file are freed before the inode itself is freed. This
  * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
  * until all the old ones have been purged from the dependency lists.
  */
 int 
 softdep_process_worklist(matchmnt)
 	struct mount *matchmnt;
 {
 	struct proc *p = CURPROC;
 	int matchcnt, loopcount;
 	long starttime;
 
 	/*
 	 * Record the process identifier of our caller so that we can give
 	 * this process preferential treatment in request_cleanup below.
 	 */
 	filesys_syncer = p;
 	matchcnt = 0;
 
 	/*
 	 * There is no danger of having multiple processes run this
 	 * code, but we have to single-thread it when softdep_flushfiles()
 	 * is in operation to get an accurate count of the number of items
 	 * related to its mount point that are in the list.
 	 */
 	if (matchmnt == NULL) {
 		if (softdep_worklist_busy < 0)
 			return(-1);
 		softdep_worklist_busy += 1;
 	}
 
 	/*
 	 * If requested, try removing inode or removal dependencies.
 	 */
 	if (req_clear_inodedeps) {
 		clear_inodedeps(p);
 		req_clear_inodedeps -= 1;
 		wakeup_one(&proc_waiting);
 	}
 	if (req_clear_remove) {
 		clear_remove(p);
 		req_clear_remove -= 1;
 		wakeup_one(&proc_waiting);
 	}
 	loopcount = 1;
 	starttime = time_second;
 	while (num_on_worklist > 0) {
 		matchcnt += process_worklist_item(matchmnt, 0);
 
 		/*
 		 * If a umount operation wants to run the worklist
 		 * accurately, abort.
 		 */
 		if (softdep_worklist_req && matchmnt == NULL) {
 			matchcnt = -1;
 			break;
 		}
 
 		/*
 		 * If requested, try removing inode or removal dependencies.
 		 */
 		if (req_clear_inodedeps) {
 			clear_inodedeps(p);
 			req_clear_inodedeps -= 1;
 			wakeup_one(&proc_waiting);
 		}
 		if (req_clear_remove) {
 			clear_remove(p);
 			req_clear_remove -= 1;
 			wakeup_one(&proc_waiting);
 		}
 		/*
 		 * We do not generally want to stop for buffer space, but if
 		 * we are really being a buffer hog, we will stop and wait.
 		 */
 		if (loopcount++ % 128 == 0)
 			bwillwrite();
 		/*
 		 * Never allow processing to run for more than one
 		 * second. Otherwise the other syncer tasks may get
 		 * excessively backlogged.
 		 */
 		if (starttime != time_second && matchmnt == NULL) {
 			matchcnt = -1;
 			break;
 		}
 	}
 	if (matchmnt == NULL) {
 		softdep_worklist_busy -= 1;
 		if (softdep_worklist_req && softdep_worklist_busy == 0)
 			wakeup(&softdep_worklist_req);
 	}
 	return (matchcnt);
 }
 
 /*
  * Process one item on the worklist.
  */
 static int
 process_worklist_item(matchmnt, flags)
 	struct mount *matchmnt;
 	int flags;
 {
 	struct worklist *wk;
 	struct dirrem *dirrem;
 	struct mount *mp;
 	struct vnode *vp;
 	int matchcnt = 0;
 
 	ACQUIRE_LOCK(&lk);
 	/*
 	 * Normally we just process each item on the worklist in order.
 	 * However, if we are in a situation where we cannot lock any
 	 * inodes, we have to skip over any dirrem requests whose
 	 * vnodes are resident and locked.
 	 */
 	LIST_FOREACH(wk, &softdep_workitem_pending, wk_list) {
 		if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
 			break;
 		dirrem = WK_DIRREM(wk);
 		vp = ufs_ihashlookup(VFSTOUFS(dirrem->dm_mnt)->um_dev,
 		    dirrem->dm_oldinum);
 		if (vp == NULL || !VOP_ISLOCKED(vp, CURPROC))
 			break;
 	}
 	if (wk == 0) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 	WORKLIST_REMOVE(wk);
 	num_on_worklist -= 1;
 	FREE_LOCK(&lk);
 	switch (wk->wk_type) {
 
 	case D_DIRREM:
 		/* removal of a directory entry */
 		mp = WK_DIRREM(wk)->dm_mnt;
 		if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
 			panic("%s: dirrem on suspended filesystem",
 				"process_worklist_item");
 		if (mp == matchmnt)
 			matchcnt += 1;
 		handle_workitem_remove(WK_DIRREM(wk));
 		break;
 
 	case D_FREEBLKS:
 		/* releasing blocks and/or fragments from a file */
 		mp = WK_FREEBLKS(wk)->fb_mnt;
 		if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
 			panic("%s: freeblks on suspended filesystem",
 				"process_worklist_item");
 		if (mp == matchmnt)
 			matchcnt += 1;
-		handle_workitem_freeblocks(WK_FREEBLKS(wk));
+		handle_workitem_freeblocks(WK_FREEBLKS(wk), flags & LK_NOWAIT);
 		break;
 
 	case D_FREEFRAG:
 		/* releasing a fragment when replaced as a file grows */
 		mp = WK_FREEFRAG(wk)->ff_mnt;
 		if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
 			panic("%s: freefrag on suspended filesystem",
 				"process_worklist_item");
 		if (mp == matchmnt)
 			matchcnt += 1;
 		handle_workitem_freefrag(WK_FREEFRAG(wk));
 		break;
 
 	case D_FREEFILE:
 		/* releasing an inode when its link count drops to 0 */
 		mp = WK_FREEFILE(wk)->fx_mnt;
 		if (vn_write_suspend_wait(NULL, mp, V_NOWAIT))
 			panic("%s: freefile on suspended filesystem",
 				"process_worklist_item");
 		if (mp == matchmnt)
 			matchcnt += 1;
 		handle_workitem_freefile(WK_FREEFILE(wk));
 		break;
 
 	default:
 		panic("%s_process_worklist: Unknown type %s",
 		    "softdep", TYPENAME(wk->wk_type));
 		/* NOTREACHED */
 	}
 	return (matchcnt);
 }
 
 /*
  * Move dependencies from one buffer to another.
  */
 static void
 softdep_move_dependencies(oldbp, newbp)
 	struct buf *oldbp;
 	struct buf *newbp;
 {
 	struct worklist *wk, *wktail;
 
 	if (LIST_FIRST(&newbp->b_dep) != NULL)
 		panic("softdep_move_dependencies: need merge code");
 	wktail = 0;
 	ACQUIRE_LOCK(&lk);
 	while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
 		LIST_REMOVE(wk, wk_list);
 		if (wktail == 0)
 			LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
 		else
 			LIST_INSERT_AFTER(wktail, wk, wk_list);
 		wktail = wk;
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Purge the work list of all items associated with a particular mount point.
  */
 int
 softdep_flushworklist(oldmnt, countp, p)
 	struct mount *oldmnt;
 	int *countp;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	int count, error = 0;
 
 	/*
 	 * Await our turn to clear out the queue, then serialize access.
 	 */
 	while (softdep_worklist_busy) {
 		softdep_worklist_req += 1;
 		tsleep(&softdep_worklist_req, PRIBIO, "softflush", 0);
 		softdep_worklist_req -= 1;
 	}
 	softdep_worklist_busy = -1;
 	/*
 	 * Alternately flush the block device associated with the mount
 	 * point and process any dependencies that the flushing
 	 * creates. We continue until no more worklist dependencies
 	 * are found.
 	 */
 	*countp = 0;
 	devvp = VFSTOUFS(oldmnt)->um_devvp;
 	while ((count = softdep_process_worklist(oldmnt)) > 0) {
 		*countp += count;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
 		VOP_UNLOCK(devvp, 0, p);
 		if (error)
 			break;
 	}
 	softdep_worklist_busy = 0;
 	if (softdep_worklist_req)
 		wakeup(&softdep_worklist_req);
 	return (error);
 }
 
 /*
  * Flush all vnodes and worklist items associated with a specified mount point.
  */
 int
 softdep_flushfiles(oldmnt, flags, p)
 	struct mount *oldmnt;
 	int flags;
 	struct proc *p;
 {
 	int error, count, loopcnt;
 
 	/*
 	 * Alternately flush the vnodes associated with the mount
 	 * point and process any dependencies that the flushing
 	 * creates. In theory, this loop can happen at most twice,
 	 * but we give it a few extra just to be sure.
 	 */
 	for (loopcnt = 10; loopcnt > 0; loopcnt--) {
 		/*
 		 * Do another flush in case any vnodes were brought in
 		 * as part of the cleanup operations.
 		 */
 		if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
 			break;
 		if ((error = softdep_flushworklist(oldmnt, &count, p)) != 0 ||
 		    count == 0)
 			break;
 	}
 	/*
 	 * If we are unmounting then it is an error to fail. If we
 	 * are simply trying to downgrade to read-only, then filesystem
 	 * activity can keep us busy forever, so we just fail with EBUSY.
 	 */
 	if (loopcnt == 0) {
 		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT)
 			panic("softdep_flushfiles: looping");
 		error = EBUSY;
 	}
 	return (error);
 }
 
 /*
  * Structure hashing.
  * 
  * There are three types of structures that can be looked up:
  *	1) pagedep structures identified by mount point, inode number,
  *	   and logical block.
  *	2) inodedep structures identified by mount point and inode number.
  *	3) newblk structures identified by mount point and
  *	   physical block number.
  *
  * The "pagedep" and "inodedep" dependency structures are hashed
  * separately from the file blocks and inodes to which they correspond.
  * This separation helps when the in-memory copy of an inode or
  * file block must be replaced. It also obviates the need to access
  * an inode or file page when simply updating (or de-allocating)
  * dependency structures. Lookup of newblk structures is needed to
  * find newly allocated blocks when trying to associate them with
  * their allocdirect or allocindir structure.
  *
  * The lookup routines optionally create and hash a new instance when
  * an existing entry is not found.
  */
 #define DEPALLOC	0x0001	/* allocate structure if lookup fails */
 #define NODELAY		0x0002	/* cannot do background work */
 
 /*
  * Structures and routines associated with pagedep caching.
  */
 LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
 u_long	pagedep_hash;		/* size of hash table - 1 */
 #define	PAGEDEP_HASH(mp, inum, lbn) \
 	(&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
 	    pagedep_hash])
 static struct sema pagedep_in_progress;
 
 /*
  * Look up a pagedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in pagedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 pagedep_lookup(ip, lbn, flags, pagedeppp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	int flags;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 	struct pagedep_hashhead *pagedephd;
 	struct mount *mp;
 	int i;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("pagedep_lookup: lock not held");
 #endif
 	mp = ITOV(ip)->v_mount;
 	pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn);
 top:
 	LIST_FOREACH(pagedep, pagedephd, pd_hash)
 		if (ip->i_number == pagedep->pd_ino &&
 		    lbn == pagedep->pd_lbn &&
 		    mp == pagedep->pd_mnt)
 			break;
 	if (pagedep) {
 		*pagedeppp = pagedep;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*pagedeppp = NULL;
 		return (0);
 	}
 	if (sema_get(&pagedep_in_progress, &lk) == 0) {
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 	MALLOC(pagedep, struct pagedep *, sizeof(struct pagedep), M_PAGEDEP,
 		M_SOFTDEP_FLAGS|M_ZERO);
 	pagedep->pd_list.wk_type = D_PAGEDEP;
 	pagedep->pd_mnt = mp;
 	pagedep->pd_ino = ip->i_number;
 	pagedep->pd_lbn = lbn;
 	LIST_INIT(&pagedep->pd_dirremhd);
 	LIST_INIT(&pagedep->pd_pendinghd);
 	for (i = 0; i < DAHASHSZ; i++)
 		LIST_INIT(&pagedep->pd_diraddhd[i]);
 	ACQUIRE_LOCK(&lk);
 	LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
 	sema_release(&pagedep_in_progress);
 	*pagedeppp = pagedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with inodedep caching.
  */
 LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
 static u_long	inodedep_hash;	/* size of hash table - 1 */
 static long	num_inodedep;	/* number of inodedep allocated */
 #define	INODEDEP_HASH(fs, inum) \
       (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
 static struct sema inodedep_in_progress;
 
 /*
  * Look up a inodedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in inodedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 inodedep_lookup(fs, inum, flags, inodedeppp)
 	struct fs *fs;
 	ino_t inum;
 	int flags;
 	struct inodedep **inodedeppp;
 {
 	struct inodedep *inodedep;
 	struct inodedep_hashhead *inodedephd;
 	int firsttry;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("inodedep_lookup: lock not held");
 #endif
 	firsttry = 1;
 	inodedephd = INODEDEP_HASH(fs, inum);
 top:
 	LIST_FOREACH(inodedep, inodedephd, id_hash)
 		if (inum == inodedep->id_ino && fs == inodedep->id_fs)
 			break;
 	if (inodedep) {
 		*inodedeppp = inodedep;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*inodedeppp = NULL;
 		return (0);
 	}
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 */
 	if (num_inodedep > max_softdeps && firsttry && (flags & NODELAY) == 0 &&
 	    request_cleanup(FLUSH_INODES, 1)) {
 		firsttry = 0;
 		goto top;
 	}
 	if (sema_get(&inodedep_in_progress, &lk) == 0) {
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 	num_inodedep += 1;
 	MALLOC(inodedep, struct inodedep *, sizeof(struct inodedep),
 		M_INODEDEP, M_SOFTDEP_FLAGS);
 	inodedep->id_list.wk_type = D_INODEDEP;
 	inodedep->id_fs = fs;
 	inodedep->id_ino = inum;
 	inodedep->id_state = ALLCOMPLETE;
 	inodedep->id_nlinkdelta = 0;
 	inodedep->id_savedino = NULL;
 	inodedep->id_savedsize = -1;
 	inodedep->id_buf = NULL;
 	LIST_INIT(&inodedep->id_pendinghd);
 	LIST_INIT(&inodedep->id_inowait);
 	LIST_INIT(&inodedep->id_bufwait);
 	TAILQ_INIT(&inodedep->id_inoupdt);
 	TAILQ_INIT(&inodedep->id_newinoupdt);
 	ACQUIRE_LOCK(&lk);
 	LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
 	sema_release(&inodedep_in_progress);
 	*inodedeppp = inodedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with newblk caching.
  */
 LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
 u_long	newblk_hash;		/* size of hash table - 1 */
 #define	NEWBLK_HASH(fs, inum) \
 	(&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
 static struct sema newblk_in_progress;
 
 /*
  * Look up a newblk. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in newblkpp.
  */
 static int
 newblk_lookup(fs, newblkno, flags, newblkpp)
 	struct fs *fs;
 	ufs_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
 {
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
 
 	newblkhd = NEWBLK_HASH(fs, newblkno);
 top:
 	LIST_FOREACH(newblk, newblkhd, nb_hash)
 		if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
 			break;
 	if (newblk) {
 		*newblkpp = newblk;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*newblkpp = NULL;
 		return (0);
 	}
 	if (sema_get(&newblk_in_progress, 0) == 0)
 		goto top;
 	MALLOC(newblk, struct newblk *, sizeof(struct newblk),
 		M_NEWBLK, M_SOFTDEP_FLAGS);
 	newblk->nb_state = 0;
 	newblk->nb_fs = fs;
 	newblk->nb_newblkno = newblkno;
 	LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
 	sema_release(&newblk_in_progress);
 	*newblkpp = newblk;
 	return (0);
 }
 
 /*
  * Executed during filesystem system initialization before
  * mounting any file systems.
  */
 void 
 softdep_initialize()
 {
 
 	LIST_INIT(&mkdirlisthd);
 	LIST_INIT(&softdep_workitem_pending);
 	max_softdeps = min(desiredvnodes * 8,
 		M_INODEDEP->ks_limit / (2 * sizeof(struct inodedep)));
 	pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
 	    &pagedep_hash);
 	sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0);
 	inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
 	sema_init(&inodedep_in_progress, "inodedep", PRIBIO, 0);
 	newblk_hashtbl = hashinit(64, M_NEWBLK, &newblk_hash);
 	sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
 }
 
 /*
  * Called at mount time to notify the dependency code that a
  * filesystem wishes to use it.
  */
 int
 softdep_mount(devvp, mp, fs, cred)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 	struct csum cstotal;
 	struct cg *cgp;
 	struct buf *bp;
 	int error, cyl;
 
 	mp->mnt_flag &= ~MNT_ASYNC;
 	mp->mnt_flag |= MNT_SOFTDEP;
 	/*
 	 * When doing soft updates, the counters in the
 	 * superblock may have gotten out of sync, so we have
 	 * to scan the cylinder groups and recalculate them.
 	 */
 	if (fs->fs_clean != 0)
 		return (0);
 	bzero(&cstotal, sizeof cstotal);
 	for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
 		if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
 		    fs->fs_cgsize, cred, &bp)) != 0) {
 			brelse(bp);
 			return (error);
 		}
 		cgp = (struct cg *)bp->b_data;
 		cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
 		cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
 		cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
 		cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
 		fs->fs_cs(fs, cyl) = cgp->cg_cs;
 		brelse(bp);
 	}
 #ifdef DEBUG
 	if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))
-		printf("ffs_mountfs: superblock updated for soft updates\n");
+		printf("%s: superblock summary recomputed\n", fs->fs_fsmnt);
 #endif
 	bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);
 	return (0);
 }
 
 /*
  * Protecting the freemaps (or bitmaps).
  * 
  * To eliminate the need to execute fsck before mounting a file system
  * after a power failure, one must (conservatively) guarantee that the
  * on-disk copy of the bitmaps never indicate that a live inode or block is
  * free.  So, when a block or inode is allocated, the bitmap should be
  * updated (on disk) before any new pointers.  When a block or inode is
  * freed, the bitmap should not be updated until all pointers have been
  * reset.  The latter dependency is handled by the delayed de-allocation
  * approach described below for block and inode de-allocation.  The former
  * dependency is handled by calling the following procedure when a block or
  * inode is allocated. When an inode is allocated an "inodedep" is created
  * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
  * Each "inodedep" is also inserted into the hash indexing structure so
  * that any additional link additions can be made dependent on the inode
  * allocation.
  * 
  * The ufs file system maintains a number of free block counts (e.g., per
  * cylinder group, per cylinder and per <cylinder, rotational position> pair)
  * in addition to the bitmaps.  These counts are used to improve efficiency
  * during allocation and therefore must be consistent with the bitmaps.
  * There is no convenient way to guarantee post-crash consistency of these
  * counts with simple update ordering, for two main reasons: (1) The counts
  * and bitmaps for a single cylinder group block are not in the same disk
  * sector.  If a disk write is interrupted (e.g., by power failure), one may
  * be written and the other not.  (2) Some of the counts are located in the
  * superblock rather than the cylinder group block. So, we focus our soft
  * updates implementation on protecting the bitmaps. When mounting a
  * filesystem, we recompute the auxiliary counts from the bitmaps.
  */
 
 /*
  * Called just after updating the cylinder group block to allocate an inode.
  */
 void
 softdep_setup_inomapdep(bp, ip, newinum)
 	struct buf *bp;		/* buffer for cylgroup block with inode map */
 	struct inode *ip;	/* inode related to allocation */
 	ino_t newinum;		/* new inode number being allocated */
 {
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 
 	/*
 	 * Create a dependency for the newly allocated inode.
 	 * Panic if it already exists as something is seriously wrong.
 	 * Otherwise add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if ((inodedep_lookup(ip->i_fs, newinum, DEPALLOC|NODELAY, &inodedep))) {
 		FREE_LOCK(&lk);
 		panic("softdep_setup_inomapdep: found inode");
 	}
 	inodedep->id_buf = bp;
 	inodedep->id_state &= ~DEPCOMPLETE;
 	bmsafemap = bmsafemap_lookup(bp);
 	LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called just after updating the cylinder group block to
  * allocate block or fragment.
  */
 void
 softdep_setup_blkmapdep(bp, fs, newblkno)
 	struct buf *bp;		/* buffer for cylgroup block with block map */
 	struct fs *fs;		/* filesystem doing allocation */
 	ufs_daddr_t newblkno;	/* number of newly allocated block */
 {
 	struct newblk *newblk;
 	struct bmsafemap *bmsafemap;
 
 	/*
 	 * Create a dependency for the newly allocated block.
 	 * Add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0)
 		panic("softdep_setup_blkmapdep: found block");
 	ACQUIRE_LOCK(&lk);
 	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
 	LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Find the bmsafemap associated with a cylinder group buffer.
  * If none exists, create one. The buffer must be locked when
  * this routine is called and this routine must be called with
  * splbio interrupts blocked.
  */
 static struct bmsafemap *
 bmsafemap_lookup(bp)
 	struct buf *bp;
 {
 	struct bmsafemap *bmsafemap;
 	struct worklist *wk;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("bmsafemap_lookup: lock not held");
 #endif
 	LIST_FOREACH(wk, &bp->b_dep, wk_list)
 		if (wk->wk_type == D_BMSAFEMAP)
 			return (WK_BMSAFEMAP(wk));
 	FREE_LOCK(&lk);
 	MALLOC(bmsafemap, struct bmsafemap *, sizeof(struct bmsafemap),
 		M_BMSAFEMAP, M_SOFTDEP_FLAGS);
 	bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
 	bmsafemap->sm_list.wk_state = 0;
 	bmsafemap->sm_buf = bp;
 	LIST_INIT(&bmsafemap->sm_allocdirecthd);
 	LIST_INIT(&bmsafemap->sm_allocindirhd);
 	LIST_INIT(&bmsafemap->sm_inodedephd);
 	LIST_INIT(&bmsafemap->sm_newblkhd);
 	ACQUIRE_LOCK(&lk);
 	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
 	return (bmsafemap);
 }
 
 /*
  * Direct block allocation dependencies.
  * 
  * When a new block is allocated, the corresponding disk locations must be
  * initialized (with zeros or new data) before the on-disk inode points to
  * them.  Also, the freemap from which the block was allocated must be
  * updated (on disk) before the inode's pointer. These two dependencies are
  * independent of each other and are needed for all file blocks and indirect
  * blocks that are pointed to directly by the inode.  Just before the
  * "in-core" version of the inode is updated with a newly allocated block
  * number, a procedure (below) is called to setup allocation dependency
  * structures.  These structures are removed when the corresponding
  * dependencies are satisfied or when the block allocation becomes obsolete
  * (i.e., the file is deleted, the block is de-allocated, or the block is a
  * fragment that gets upgraded).  All of these cases are handled in
  * procedures described later.
  * 
  * When a file extension causes a fragment to be upgraded, either to a larger
  * fragment or to a full block, the on-disk location may change (if the
  * previous fragment could not simply be extended). In this case, the old
  * fragment must be de-allocated, but not until after the inode's pointer has
  * been updated. In most cases, this is handled by later procedures, which
  * will construct a "freefrag" structure to be added to the workitem queue
  * when the inode update is complete (or obsolete).  The main exception to
  * this is when an allocation occurs while a pending allocation dependency
  * (for the same block pointer) remains.  This case is handled in the main
  * allocation dependency setup procedure by immediately freeing the
  * unreferenced fragments.
  */ 
 void 
 softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;	/* inode to which block is being added */
 	ufs_lbn_t lbn;		/* block pointer within inode */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 unless frag */
 	long newsize;		/* size of new block */
 	long oldsize;		/* size of new block */
 	struct buf *bp;		/* bp for allocated block */
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
 	struct bmsafemap *bmsafemap;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct newblk *newblk;
 
 	MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),
 		M_ALLOCDIRECT, M_SOFTDEP_FLAGS|M_ZERO);
 	adp->ad_list.wk_type = D_ALLOCDIRECT;
 	adp->ad_lbn = lbn;
 	adp->ad_newblkno = newblkno;
 	adp->ad_oldblkno = oldblkno;
 	adp->ad_newsize = newsize;
 	adp->ad_oldsize = oldsize;
 	adp->ad_state = ATTACHED;
 	if (newblkno == oldblkno)
 		adp->ad_freefrag = NULL;
 	else
 		adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
 
 	if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocdirect: lost block");
 
 	ACQUIRE_LOCK(&lk);
 	inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC | NODELAY, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	if (newblk->nb_state == DEPCOMPLETE) {
 		adp->ad_state |= DEPCOMPLETE;
 		adp->ad_buf = NULL;
 	} else {
 		bmsafemap = newblk->nb_bmsafemap;
 		adp->ad_buf = bmsafemap->sm_buf;
 		LIST_REMOVE(newblk, nb_deps);
 		LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
 	}
 	LIST_REMOVE(newblk, nb_hash);
 	FREE(newblk, M_NEWBLK);
 
 	WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
 	if (lbn >= NDADDR) {
 		/* allocating an indirect block */
 		if (oldblkno != 0) {
 			FREE_LOCK(&lk);
 			panic("softdep_setup_allocdirect: non-zero indir");
 		}
 	} else {
 		/*
 		 * Allocating a direct block.
 		 *
 		 * If we are allocating a directory block, then we must
 		 * allocate an associated pagedep to track additions and
 		 * deletions.
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR &&
 		    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
 			WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	}
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
 	 * first uncommitted block (the size of the file stored on disk
 	 * ends at the end of the lowest committed fragment, or if there
 	 * are no fragments, at the end of the highest committed block).
 	 * Since files generally grow, the typical case is that the new
 	 * block is to be added at the end of the list. We speed this
 	 * special case by checking against the last allocdirect in the
 	 * list before laboriously traversing the list looking for the
 	 * insertion point.
 	 */
 	adphead = &inodedep->id_newinoupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
 	if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_lbn == lbn)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(&lk);
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
 		if (oldadp->ad_lbn >= lbn)
 			break;
 	}
 	if (oldadp == NULL) {
 		FREE_LOCK(&lk);
 		panic("softdep_setup_allocdirect: lost entry");
 	}
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_lbn == lbn)
 		allocdirect_merge(adphead, adp, oldadp);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Replace an old allocdirect dependency with a newer one.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 allocdirect_merge(adphead, newadp, oldadp)
 	struct allocdirectlst *adphead;	/* head of list holding allocdirects */
 	struct allocdirect *newadp;	/* allocdirect being added */
 	struct allocdirect *oldadp;	/* existing allocdirect being checked */
 {
 	struct freefrag *freefrag;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("allocdirect_merge: lock not held");
 #endif
 	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
 	    newadp->ad_oldsize != oldadp->ad_newsize ||
 	    newadp->ad_lbn >= NDADDR) {
 		FREE_LOCK(&lk);
 		panic("allocdirect_check: old %d != new %d || lbn %ld >= %d",
 		    newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn,
 		    NDADDR);
 	}
 	newadp->ad_oldblkno = oldadp->ad_oldblkno;
 	newadp->ad_oldsize = oldadp->ad_oldsize;
 	/*
 	 * If the old dependency had a fragment to free or had never
 	 * previously had a block allocated, then the new dependency
 	 * can immediately post its freefrag and adopt the old freefrag.
 	 * This action is done by swapping the freefrag dependencies.
 	 * The new dependency gains the old one's freefrag, and the
 	 * old one gets the new one and then immediately puts it on
 	 * the worklist when it is freed by free_allocdirect. It is
 	 * not possible to do this swap when the old dependency had a
 	 * non-zero size but no previous fragment to free. This condition
 	 * arises when the new block is an extension of the old block.
 	 * Here, the first part of the fragment allocated to the new
 	 * dependency is part of the block currently claimed on disk by
 	 * the old dependency, so cannot legitimately be freed until the
 	 * conditions for the new dependency are fulfilled.
 	 */
 	if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
 		freefrag = newadp->ad_freefrag;
 		newadp->ad_freefrag = oldadp->ad_freefrag;
 		oldadp->ad_freefrag = freefrag;
 	}
 	free_allocdirect(adphead, oldadp, 0);
 }
 		
 /*
  * Allocate a new freefrag structure if needed.
  */
 static struct freefrag *
 newfreefrag(ip, blkno, size)
 	struct inode *ip;
 	ufs_daddr_t blkno;
 	long size;
 {
 	struct freefrag *freefrag;
 	struct fs *fs;
 
 	if (blkno == 0)
 		return (NULL);
 	fs = ip->i_fs;
 	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
 		panic("newfreefrag: frag size");
 	MALLOC(freefrag, struct freefrag *, sizeof(struct freefrag),
 		M_FREEFRAG, M_SOFTDEP_FLAGS);
 	freefrag->ff_list.wk_type = D_FREEFRAG;
 	freefrag->ff_state = ip->i_uid & ~ONWORKLIST;	/* XXX - used below */
 	freefrag->ff_inum = ip->i_number;
 	freefrag->ff_mnt = ITOV(ip)->v_mount;
 	freefrag->ff_devvp = ip->i_devvp;
 	freefrag->ff_blkno = blkno;
 	freefrag->ff_fragsize = size;
 	return (freefrag);
 }
 
 /*
  * This workitem de-allocates fragments that were replaced during
  * file block allocation.
  */
 static void 
 handle_workitem_freefrag(freefrag)
 	struct freefrag *freefrag;
 {
 	struct inode tip;
 
 	tip.i_vnode = NULL;
 	tip.i_fs = VFSTOUFS(freefrag->ff_mnt)->um_fs;
 	tip.i_devvp = freefrag->ff_devvp;
 	tip.i_dev = freefrag->ff_devvp->v_rdev;
 	tip.i_number = freefrag->ff_inum;
 	tip.i_uid = freefrag->ff_state & ~ONWORKLIST;	/* XXX - set above */
 	ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
 	FREE(freefrag, M_FREEFRAG);
 }
 
 /*
  * Indirect block allocation dependencies.
  * 
  * The same dependencies that exist for a direct block also exist when
  * a new block is allocated and pointed to by an entry in a block of
  * indirect pointers. The undo/redo states described above are also
  * used here. Because an indirect block contains many pointers that
  * may have dependencies, a second copy of the entire in-memory indirect
  * block is kept. The buffer cache copy is always completely up-to-date.
  * The second copy, which is used only as a source for disk writes,
  * contains only the safe pointers (i.e., those that have no remaining
  * update dependencies). The second copy is freed when all pointers
  * are safe. The cache is not allowed to replace indirect blocks with
  * pending update dependencies. If a buffer containing an indirect
  * block with dependencies is written, these routines will mark it
  * dirty again. It can only be successfully written once all the
  * dependencies are removed. The ffs_fsync routine in conjunction with
  * softdep_sync_metadata work together to get all the dependencies
  * removed so that a file can be successfully written to disk. Three
  * procedures are used when setting up indirect block pointer
  * dependencies. The division is necessary because of the organization
  * of the "balloc" routine and because of the distinction between file
  * pages and file metadata blocks.
  */
 
 /*
  * Allocate a new allocindir structure.
  */
 static struct allocindir *
 newallocindir(ip, ptrno, newblkno, oldblkno)
 	struct inode *ip;	/* inode for file being extended */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 if none */
 {
 	struct allocindir *aip;
 
 	MALLOC(aip, struct allocindir *, sizeof(struct allocindir),
 		M_ALLOCINDIR, M_SOFTDEP_FLAGS|M_ZERO);
 	aip->ai_list.wk_type = D_ALLOCINDIR;
 	aip->ai_state = ATTACHED;
 	aip->ai_offset = ptrno;
 	aip->ai_newblkno = newblkno;
 	aip->ai_oldblkno = oldblkno;
 	aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize);
 	return (aip);
 }
 
 /*
  * Called just before setting an indirect block pointer
  * to a newly allocated file page.
  */
 void
 softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	struct inode *ip;	/* inode for file being extended */
 	ufs_lbn_t lbn;		/* allocated block number within file */
 	struct buf *bp;		/* buffer with indirect blk referencing page */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 if none */
 	struct buf *nbp;	/* buffer holding allocated page */
 {
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 
 	aip = newallocindir(ip, ptrno, newblkno, oldblkno);
 	ACQUIRE_LOCK(&lk);
 	/*
 	 * If we are allocating a directory page, then we must
 	 * allocate an associated pagedep to track additions and
 	 * deletions.
 	 */
 	if ((ip->i_mode & IFMT) == IFDIR &&
 	    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
 	FREE_LOCK(&lk);
 	setup_allocindir_phase2(bp, ip, aip);
 }
 
 /*
  * Called just before setting an indirect block pointer to a
  * newly allocated indirect block.
  */
 void
 softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
 	struct buf *nbp;	/* newly allocated indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct buf *bp;		/* indirect block referencing allocated block */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 {
 	struct allocindir *aip;
 
 	aip = newallocindir(ip, ptrno, newblkno, 0);
 	ACQUIRE_LOCK(&lk);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
 	FREE_LOCK(&lk);
 	setup_allocindir_phase2(bp, ip, aip);
 }
 
 /*
  * Called to finish the allocation of the "aip" allocated
  * by one of the two routines above.
  */
 static void 
 setup_allocindir_phase2(bp, ip, aip)
 	struct buf *bp;		/* in-memory copy of the indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct allocindir *aip;	/* allocindir allocated by the above routines */
 {
 	struct worklist *wk;
 	struct indirdep *indirdep, *newindirdep;
 	struct bmsafemap *bmsafemap;
 	struct allocindir *oldaip;
 	struct freefrag *freefrag;
 	struct newblk *newblk;
 
 	if (bp->b_lblkno >= 0)
 		panic("setup_allocindir_phase2: not indir blk");
 	for (indirdep = NULL, newindirdep = NULL; ; ) {
 		ACQUIRE_LOCK(&lk);
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			if (wk->wk_type != D_INDIRDEP)
 				continue;
 			indirdep = WK_INDIRDEP(wk);
 			break;
 		}
 		if (indirdep == NULL && newindirdep) {
 			indirdep = newindirdep;
 			WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
 			newindirdep = NULL;
 		}
 		FREE_LOCK(&lk);
 		if (indirdep) {
 			if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
 			    &newblk) == 0)
 				panic("setup_allocindir: lost block");
 			ACQUIRE_LOCK(&lk);
 			if (newblk->nb_state == DEPCOMPLETE) {
 				aip->ai_state |= DEPCOMPLETE;
 				aip->ai_buf = NULL;
 			} else {
 				bmsafemap = newblk->nb_bmsafemap;
 				aip->ai_buf = bmsafemap->sm_buf;
 				LIST_REMOVE(newblk, nb_deps);
 				LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,
 				    aip, ai_deps);
 			}
 			LIST_REMOVE(newblk, nb_hash);
 			FREE(newblk, M_NEWBLK);
 			aip->ai_indirdep = indirdep;
 			/*
 			 * Check to see if there is an existing dependency
 			 * for this block. If there is, merge the old
 			 * dependency into the new one.
 			 */
 			if (aip->ai_oldblkno == 0)
 				oldaip = NULL;
 			else
 
 				LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next)
 					if (oldaip->ai_offset == aip->ai_offset)
 						break;
 			freefrag = NULL;
 			if (oldaip != NULL) {
 				if (oldaip->ai_newblkno != aip->ai_oldblkno) {
 					FREE_LOCK(&lk);
 					panic("setup_allocindir_phase2: blkno");
 				}
 				aip->ai_oldblkno = oldaip->ai_oldblkno;
 				freefrag = aip->ai_freefrag;
 				aip->ai_freefrag = oldaip->ai_freefrag;
 				oldaip->ai_freefrag = NULL;
 				free_allocindir(oldaip, NULL);
 			}
 			LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
 			((ufs_daddr_t *)indirdep->ir_savebp->b_data)
 			    [aip->ai_offset] = aip->ai_oldblkno;
 			FREE_LOCK(&lk);
 			if (freefrag != NULL)
 				handle_workitem_freefrag(freefrag);
 		}
 		if (newindirdep) {
 			if (indirdep->ir_savebp != NULL)
 				brelse(newindirdep->ir_savebp);
 			WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP);
 		}
 		if (indirdep)
 			break;
 		MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),
 			M_INDIRDEP, M_SOFTDEP_FLAGS);
 		newindirdep->ir_list.wk_type = D_INDIRDEP;
 		newindirdep->ir_state = ATTACHED;
 		LIST_INIT(&newindirdep->ir_deplisthd);
 		LIST_INIT(&newindirdep->ir_donehd);
 		if (bp->b_blkno == bp->b_lblkno) {
 			VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
 				NULL, NULL);
 		}
 		newindirdep->ir_savebp =
 		    getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
 		BUF_KERNPROC(newindirdep->ir_savebp);
 		bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
 	}
 }
 
 /*
  * Block de-allocation dependencies.
  * 
  * When blocks are de-allocated, the on-disk pointers must be nullified before
  * the blocks are made available for use by other files.  (The true
  * requirement is that old pointers must be nullified before new on-disk
  * pointers are set.  We chose this slightly more stringent requirement to
  * reduce complexity.) Our implementation handles this dependency by updating
  * the inode (or indirect block) appropriately but delaying the actual block
  * de-allocation (i.e., freemap and free space count manipulation) until
  * after the updated versions reach stable storage.  After the disk is
  * updated, the blocks can be safely de-allocated whenever it is convenient.
  * This implementation handles only the common case of reducing a file's
  * length to zero. Other cases are handled by the conventional synchronous
  * write approach.
  *
  * The ffs implementation with which we worked double-checks
  * the state of the block pointers and file size as it reduces
  * a file's length.  Some of this code is replicated here in our
  * soft updates implementation.  The freeblks->fb_chkcnt field is
  * used to transfer a part of this information to the procedure
  * that eventually de-allocates the blocks.
  *
  * This routine should be called from the routine that shortens
  * a file's length, before the inode's size or block pointers
  * are modified. It will save the block pointer information for
  * later release and zero the inode so that the calling routine
  * can release it.
  */
 void
 softdep_setup_freeblocks(ip, length)
 	struct inode *ip;	/* The inode whose length is to be reduced */
 	off_t length;		/* The new length for the file */
 {
 	struct freeblks *freeblks;
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
 	struct vnode *vp;
 	struct buf *bp;
 	struct fs *fs;
 	int i, delay, error;
 
 	fs = ip->i_fs;
 	if (length != 0)
 		panic("softde_setup_freeblocks: non-zero length");
 	MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
 		M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO);
 	freeblks->fb_list.wk_type = D_FREEBLKS;
 	freeblks->fb_uid = ip->i_uid;
 	freeblks->fb_previousinum = ip->i_number;
 	freeblks->fb_devvp = ip->i_devvp;
 	freeblks->fb_mnt = ITOV(ip)->v_mount;
 	freeblks->fb_oldsize = ip->i_size;
 	freeblks->fb_newsize = length;
 	freeblks->fb_chkcnt = ip->i_blocks;
 	for (i = 0; i < NDADDR; i++) {
 		freeblks->fb_dblks[i] = ip->i_db[i];
 		ip->i_db[i] = 0;
 	}
 	for (i = 0; i < NIADDR; i++) {
 		freeblks->fb_iblks[i] = ip->i_ib[i];
 		ip->i_ib[i] = 0;
 	}
 	ip->i_blocks = 0;
 	ip->i_size = 0;
 	/*
 	 * Push the zero'ed inode to to its disk buffer so that we are free
 	 * to delete its dependencies below. Once the dependencies are gone
 	 * the buffer can be safely released.
 	 */
 	if ((error = bread(ip->i_devvp,
 	    fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, NOCRED, &bp)) != 0)
 		softdep_error("softdep_setup_freeblocks", error);
 	*((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) =
 	    ip->i_din;
 	/*
 	 * Find and eliminate any inode dependencies.
 	 */
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0) {
 		FREE_LOCK(&lk);
 		panic("softdep_setup_freeblocks: inode busy");
 	}
 	/*
 	 * Add the freeblks structure to the list of operations that
 	 * must await the zero'ed inode being written to disk. If we
 	 * still have a bitmap dependency (delay == 0), then the inode
 	 * has never been written to disk, so we can process the
 	 * freeblks below once we have deleted the dependencies.
 	 */
 	delay = (inodedep->id_state & DEPCOMPLETE);
 	if (delay)
 		WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
 	/*
 	 * Because the file length has been truncated to zero, any
 	 * pending block allocation dependency structures associated
 	 * with this inode are obsolete and can simply be de-allocated.
 	 * We must first merge the two dependency lists to get rid of
 	 * any duplicate freefrag structures, then purge the merged list.
 	 * If we still have a bitmap dependency, then the inode has never
 	 * been written to disk, so we can free any fragments without delay.
 	 */
 	merge_inode_lists(inodedep);
 	while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
 		free_allocdirect(&inodedep->id_inoupdt, adp, delay);
 	FREE_LOCK(&lk);
 	bdwrite(bp);
 	/*
 	 * We must wait for any I/O in progress to finish so that
 	 * all potential buffers on the dirty list will be visible.
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
 	vp = ITOV(ip);
 	ACQUIRE_LOCK(&lk);
 	drain_output(vp, 1);
 	while (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
 		bp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 		(void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
 		deallocate_dependencies(bp, inodedep);
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 		FREE_LOCK(&lk);
 		brelse(bp);
 		ACQUIRE_LOCK(&lk);
 	}
 	if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	FREE_LOCK(&lk);
 	/*
 	 * If the inode has never been written to disk (delay == 0),
 	 * then we can process the freeblks now that we have deleted
 	 * the dependencies.
 	 */
 	if (!delay)
-		handle_workitem_freeblocks(freeblks);
+		handle_workitem_freeblocks(freeblks, 0);
 }
 
 /*
  * Reclaim any dependency structures from a buffer that is about to
  * be reallocated to a new vnode. The buffer must be locked, thus,
  * no I/O completion operations can occur while we are manipulating
  * its associated dependencies. The mutex is held so that other I/O's
  * associated with related dependencies do not occur.
  */
 static void
 deallocate_dependencies(bp, inodedep)
 	struct buf *bp;
 	struct inodedep *inodedep;
 {
 	struct worklist *wk;
 	struct indirdep *indirdep;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct dirrem *dirrem;
 	struct diradd *dap;
 	int i;
 
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		switch (wk->wk_type) {
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			/*
 			 * None of the indirect pointers will ever be visible,
 			 * so they can simply be tossed. GOINGAWAY ensures
 			 * that allocated pointers will be saved in the buffer
 			 * cache until they are freed. Note that they will
 			 * only be able to be found by their physical address
 			 * since the inode mapping the logical address will
 			 * be gone. The save buffer used for the safe copy
 			 * was allocated in setup_allocindir_phase2 using
 			 * the physical address so it could be used for this
 			 * purpose. Hence we swap the safe copy with the real
 			 * copy, allowing the safe copy to be freed and holding
 			 * on to the real copy for later use in indir_trunc.
 			 */
 			if (indirdep->ir_state & GOINGAWAY) {
 				FREE_LOCK(&lk);
 				panic("deallocate_dependencies: already gone");
 			}
 			indirdep->ir_state |= GOINGAWAY;
 			while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
 				free_allocindir(aip, inodedep);
 			if (bp->b_lblkno >= 0 ||
 			    bp->b_blkno != indirdep->ir_savebp->b_lblkno) {
 				FREE_LOCK(&lk);
 				panic("deallocate_dependencies: not indir");
 			}
 			bcopy(bp->b_data, indirdep->ir_savebp->b_data,
 			    bp->b_bcount);
 			WORKLIST_REMOVE(wk);
 			WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk);
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			/*
 			 * None of the directory additions will ever be
 			 * visible, so they can simply be tossed.
 			 */
 			for (i = 0; i < DAHASHSZ; i++)
 				while ((dap =
 				    LIST_FIRST(&pagedep->pd_diraddhd[i])))
 					free_diradd(dap);
 			while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0)
 				free_diradd(dap);
 			/*
 			 * Copy any directory remove dependencies to the list
 			 * to be processed after the zero'ed inode is written.
 			 * If the inode has already been written, then they 
 			 * can be dumped directly onto the work list.
 			 */
 			LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) {
 				LIST_REMOVE(dirrem, dm_next);
 				dirrem->dm_dirinum = pagedep->pd_ino;
 				if (inodedep == NULL ||
 				    (inodedep->id_state & ALLCOMPLETE) ==
 				     ALLCOMPLETE)
 					add_to_worklist(&dirrem->dm_list);
 				else
 					WORKLIST_INSERT(&inodedep->id_bufwait,
 					    &dirrem->dm_list);
 			}
 			WORKLIST_REMOVE(&pagedep->pd_list);
 			LIST_REMOVE(pagedep, pd_hash);
 			WORKITEM_FREE(pagedep, D_PAGEDEP);
 			continue;
 
 		case D_ALLOCINDIR:
 			free_allocindir(WK_ALLOCINDIR(wk), inodedep);
 			continue;
 
 		case D_ALLOCDIRECT:
 		case D_INODEDEP:
 			FREE_LOCK(&lk);
 			panic("deallocate_dependencies: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 
 		default:
 			FREE_LOCK(&lk);
 			panic("deallocate_dependencies: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 }
 
 /*
  * Free an allocdirect. Generate a new freefrag work request if appropriate.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_allocdirect(adphead, adp, delay)
 	struct allocdirectlst *adphead;
 	struct allocdirect *adp;
 	int delay;
 {
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_allocdirect: lock not held");
 #endif
 	if ((adp->ad_state & DEPCOMPLETE) == 0)
 		LIST_REMOVE(adp, ad_deps);
 	TAILQ_REMOVE(adphead, adp, ad_next);
 	if ((adp->ad_state & COMPLETE) == 0)
 		WORKLIST_REMOVE(&adp->ad_list);
 	if (adp->ad_freefrag != NULL) {
 		if (delay)
 			WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
 			    &adp->ad_freefrag->ff_list);
 		else
 			add_to_worklist(&adp->ad_freefrag->ff_list);
 	}
 	WORKITEM_FREE(adp, D_ALLOCDIRECT);
 }
 
 /*
  * Prepare an inode to be freed. The actual free operation is not
  * done until the zero'ed inode has been written to disk.
  */
 void
 softdep_freefile(pvp, ino, mode)
 		struct vnode *pvp;
 		ino_t ino;
 		int mode;
 {
 	struct inode *ip = VTOI(pvp);
 	struct inodedep *inodedep;
 	struct freefile *freefile;
 
 	/*
 	 * This sets up the inode de-allocation dependency.
 	 */
 	MALLOC(freefile, struct freefile *, sizeof(struct freefile),
 		M_FREEFILE, M_SOFTDEP_FLAGS);
 	freefile->fx_list.wk_type = D_FREEFILE;
 	freefile->fx_list.wk_state = 0;
 	freefile->fx_mode = mode;
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ip->i_devvp;
 	freefile->fx_mnt = ITOV(ip)->v_mount;
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
 	 * been written to disk. If the allocated inode has never been
 	 * written to disk, then the on-disk inode is zero'ed. In either
 	 * case we can free the file immediately.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0 ||
 	    check_inode_unwritten(inodedep)) {
 		FREE_LOCK(&lk);
 		handle_workitem_freefile(freefile);
 		return;
 	}
 	WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Check to see if an inode has never been written to disk. If
  * so free the inodedep and return success, otherwise return failure.
  * This routine must be called with splbio interrupts blocked.
  *
  * If we still have a bitmap dependency, then the inode has never
  * been written to disk. Drop the dependency as it is no longer
  * necessary since the inode is being deallocated. We set the
  * ALLCOMPLETE flags since the bitmap now properly shows that the
  * inode is not allocated. Even if the inode is actively being
  * written, it has been rolled back to its zero'ed state, so we
  * are ensured that a zero inode is what is on the disk. For short
  * lived files, this change will usually result in removing all the
  * dependencies from the inode so that it can be freed immediately.
  */
 static int
 check_inode_unwritten(inodedep)
 	struct inodedep *inodedep;
 {
 
 	if ((inodedep->id_state & DEPCOMPLETE) != 0 ||
 	    LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
 	    LIST_FIRST(&inodedep->id_bufwait) != NULL ||
 	    LIST_FIRST(&inodedep->id_inowait) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
 	    inodedep->id_nlinkdelta != 0)
 		return (0);
 	inodedep->id_state |= ALLCOMPLETE;
 	LIST_REMOVE(inodedep, id_deps);
 	inodedep->id_buf = NULL;
 	if (inodedep->id_state & ONWORKLIST)
 		WORKLIST_REMOVE(&inodedep->id_list);
 	if (inodedep->id_savedino != NULL) {
 		FREE(inodedep->id_savedino, M_INODEDEP);
 		inodedep->id_savedino = NULL;
 	}
 	if (free_inodedep(inodedep) == 0) {
 		FREE_LOCK(&lk);
 		panic("check_inode_unwritten: busy inode");
 	}
 	return (1);
 }
 
 /*
  * Try to free an inodedep structure. Return 1 if it could be freed.
  */
 static int
 free_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
 
 	if ((inodedep->id_state & ONWORKLIST) != 0 ||
 	    (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
 	    LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
 	    LIST_FIRST(&inodedep->id_bufwait) != NULL ||
 	    LIST_FIRST(&inodedep->id_inowait) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
 	    inodedep->id_nlinkdelta != 0 || inodedep->id_savedino != NULL)
 		return (0);
 	LIST_REMOVE(inodedep, id_hash);
 	WORKITEM_FREE(inodedep, D_INODEDEP);
 	num_inodedep -= 1;
 	return (1);
 }
 
 /*
  * This workitem routine performs the block de-allocation.
  * The workitem is added to the pending list after the updated
  * inode block has been written to disk.  As mentioned above,
  * checks regarding the number of blocks de-allocated (compared
  * to the number of blocks allocated for the file) are also
  * performed in this function.
  */
 static void
-handle_workitem_freeblocks(freeblks)
+handle_workitem_freeblocks(freeblks, flags)
 	struct freeblks *freeblks;
+	int flags;
 {
-	struct inode tip;
+	struct inode tip, *ip;
+	struct vnode *vp;
 	ufs_daddr_t bn;
 	struct fs *fs;
 	int i, level, bsize;
 	long nblocks, blocksreleased = 0;
 	int error, allerror = 0;
 	ufs_lbn_t baselbns[NIADDR], tmpval;
 
 	tip.i_fs = fs = VFSTOUFS(freeblks->fb_mnt)->um_fs;
 	tip.i_number = freeblks->fb_previousinum;
 	tip.i_devvp = freeblks->fb_devvp;
 	tip.i_dev = freeblks->fb_devvp->v_rdev;
 	tip.i_size = freeblks->fb_oldsize;
 	tip.i_uid = freeblks->fb_uid;
 	tip.i_vnode = NULL;
 	tmpval = 1;
 	baselbns[0] = NDADDR;
 	for (i = 1; i < NIADDR; i++) {
 		tmpval *= NINDIR(fs);
 		baselbns[i] = baselbns[i - 1] + tmpval;
 	}
 	nblocks = btodb(fs->fs_bsize);
 	blocksreleased = 0;
 	/*
 	 * Indirect blocks first.
 	 */
 	for (level = (NIADDR - 1); level >= 0; level--) {
 		if ((bn = freeblks->fb_iblks[level]) == 0)
 			continue;
 		if ((error = indir_trunc(&tip, fsbtodb(fs, bn), level,
 		    baselbns[level], &blocksreleased)) == 0)
 			allerror = error;
 		ffs_blkfree(&tip, bn, fs->fs_bsize);
 		blocksreleased += nblocks;
 	}
 	/*
 	 * All direct blocks or frags.
 	 */
 	for (i = (NDADDR - 1); i >= 0; i--) {
 		if ((bn = freeblks->fb_dblks[i]) == 0)
 			continue;
 		bsize = blksize(fs, &tip, i);
 		ffs_blkfree(&tip, bn, bsize);
 		blocksreleased += btodb(bsize);
 	}
+	/*
+	 * If we still have not finished background cleanup, then check
+	 * to see if the block count needs to be adjusted.
+	 */
+	if (freeblks->fb_chkcnt != blocksreleased &&
+	    (fs->fs_flags & FS_UNCLEAN) != 0 && (flags & LK_NOWAIT) == 0 &&
+	    VFS_VGET(freeblks->fb_mnt, freeblks->fb_previousinum, &vp) == 0) {
+		ip = VTOI(vp);
+		ip->i_blocks += freeblks->fb_chkcnt - blocksreleased;
+		ip->i_flag |= IN_CHANGE;
+		vput(vp);
+	}
 
 #ifdef DIAGNOSTIC
-	if (freeblks->fb_chkcnt != blocksreleased)
+	if (freeblks->fb_chkcnt != blocksreleased &&
+	    ((fs->fs_flags & FS_UNCLEAN) == 0 || (flags & LK_NOWAIT) != 0))
 		printf("handle_workitem_freeblocks: block count");
 	if (allerror)
 		softdep_error("handle_workitem_freeblks", allerror);
 #endif /* DIAGNOSTIC */
+
 	WORKITEM_FREE(freeblks, D_FREEBLKS);
 }
 
 /*
  * Release blocks associated with the inode ip and stored in the indirect
  * block dbn. If level is greater than SINGLE, the block is an indirect block
  * and recursive calls to indirtrunc must be used to cleanse other indirect
  * blocks.
  */
 static int
 indir_trunc(ip, dbn, level, lbn, countp)
 	struct inode *ip;
 	ufs_daddr_t dbn;
 	int level;
 	ufs_lbn_t lbn;
 	long *countp;
 {
 	struct buf *bp;
 	ufs_daddr_t *bap;
 	ufs_daddr_t nb;
 	struct fs *fs;
 	struct worklist *wk;
 	struct indirdep *indirdep;
 	int i, lbnadd, nblocks;
 	int error, allerror = 0;
 
 	fs = ip->i_fs;
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	/*
 	 * Get buffer of block pointers to be freed. This routine is not
 	 * called until the zero'ed inode has been written, so it is safe
 	 * to free blocks as they are encountered. Because the inode has
 	 * been zero'ed, calls to bmap on these blocks will fail. So, we
 	 * have to use the on-disk address and the block device for the
 	 * filesystem to look them up. If the file was deleted before its
 	 * indirect blocks were all written to disk, the routine that set
 	 * us up (deallocate_dependencies) will have arranged to leave
 	 * a complete copy of the indirect block in memory for our use.
 	 * Otherwise we have to read the blocks in from the disk.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if ((bp = incore(ip->i_devvp, dbn)) != NULL &&
 	    (wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		if (wk->wk_type != D_INDIRDEP ||
 		    (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||
 		    (indirdep->ir_state & GOINGAWAY) == 0) {
 			FREE_LOCK(&lk);
 			panic("indir_trunc: lost indirdep");
 		}
 		WORKLIST_REMOVE(wk);
 		WORKITEM_FREE(indirdep, D_INDIRDEP);
 		if (LIST_FIRST(&bp->b_dep) != NULL) {
 			FREE_LOCK(&lk);
 			panic("indir_trunc: dangling dep");
 		}
 		FREE_LOCK(&lk);
 	} else {
 		FREE_LOCK(&lk);
 		error = bread(ip->i_devvp, dbn, (int)fs->fs_bsize, NOCRED, &bp);
 		if (error)
 			return (error);
 	}
 	/*
 	 * Recursively free indirect blocks.
 	 */
 	bap = (ufs_daddr_t *)bp->b_data;
 	nblocks = btodb(fs->fs_bsize);
 	for (i = NINDIR(fs) - 1; i >= 0; i--) {
 		if ((nb = bap[i]) == 0)
 			continue;
 		if (level != 0) {
 			if ((error = indir_trunc(ip, fsbtodb(fs, nb),
 			     level - 1, lbn + (i * lbnadd), countp)) != 0)
 				allerror = error;
 		}
 		ffs_blkfree(ip, nb, fs->fs_bsize);
 		*countp += nblocks;
 	}
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	return (allerror);
 }
 
 /*
  * Free an allocindir.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_allocindir(aip, inodedep)
 	struct allocindir *aip;
 	struct inodedep *inodedep;
 {
 	struct freefrag *freefrag;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_allocindir: lock not held");
 #endif
 	if ((aip->ai_state & DEPCOMPLETE) == 0)
 		LIST_REMOVE(aip, ai_deps);
 	if (aip->ai_state & ONWORKLIST)
 		WORKLIST_REMOVE(&aip->ai_list);
 	LIST_REMOVE(aip, ai_next);
 	if ((freefrag = aip->ai_freefrag) != NULL) {
 		if (inodedep == NULL)
 			add_to_worklist(&freefrag->ff_list);
 		else
 			WORKLIST_INSERT(&inodedep->id_bufwait,
 			    &freefrag->ff_list);
 	}
 	WORKITEM_FREE(aip, D_ALLOCINDIR);
 }
 
 /*
  * Directory entry addition dependencies.
  * 
  * When adding a new directory entry, the inode (with its incremented link
  * count) must be written to disk before the directory entry's pointer to it.
  * Also, if the inode is newly allocated, the corresponding freemap must be
  * updated (on disk) before the directory entry's pointer. These requirements
  * are met via undo/redo on the directory entry's pointer, which consists
  * simply of the inode number.
  * 
  * As directory entries are added and deleted, the free space within a
  * directory block can become fragmented.  The ufs file system will compact
  * a fragmented directory block to make space for a new entry. When this
  * occurs, the offsets of previously added entries change. Any "diradd"
  * dependency structures corresponding to these entries must be updated with
  * the new offsets.
  */
 
 /*
  * This routine is called after the in-memory inode's link
  * count has been incremented, but before the directory entry's
  * pointer to the inode has been set.
  */
 void 
 softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for directory */
 	off_t diroffset;	/* offset of new entry in directory */
 	long newinum;		/* inode referenced by new directory entry */
 	struct buf *newdirbp;	/* non-NULL => contents of new mkdir */
 {
 	int offset;		/* offset of new entry within directory block */
 	ufs_lbn_t lbn;		/* block in directory containing new entry */
 	struct fs *fs;
 	struct diradd *dap;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir1, *mkdir2;
 
 	/*
 	 * Whiteouts have no dependencies.
 	 */
 	if (newinum == WINO) {
 		if (newdirbp != NULL)
 			bdwrite(newdirbp);
 		return;
 	}
 
 	fs = dp->i_fs;
 	lbn = lblkno(fs, diroffset);
 	offset = blkoff(fs, diroffset);
 	MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD,
 		M_SOFTDEP_FLAGS|M_ZERO);
 	dap->da_list.wk_type = D_DIRADD;
 	dap->da_offset = offset;
 	dap->da_newinum = newinum;
 	dap->da_state = ATTACHED;
 	if (newdirbp == NULL) {
 		dap->da_state |= DEPCOMPLETE;
 		ACQUIRE_LOCK(&lk);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
 		MALLOC(mkdir1, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
 		    M_SOFTDEP_FLAGS);
 		mkdir1->md_list.wk_type = D_MKDIR;
 		mkdir1->md_state = MKDIR_BODY;
 		mkdir1->md_diradd = dap;
 		MALLOC(mkdir2, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
 		    M_SOFTDEP_FLAGS);
 		mkdir2->md_list.wk_type = D_MKDIR;
 		mkdir2->md_state = MKDIR_PARENT;
 		mkdir2->md_diradd = dap;
 		/*
 		 * Dependency on "." and ".." being written to disk.
 		 */
 		mkdir1->md_buf = newdirbp;
 		ACQUIRE_LOCK(&lk);
 		LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
 		WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);
 		FREE_LOCK(&lk);
 		bdwrite(newdirbp);
 		/*
 		 * Dependency on link count increase for parent directory
 		 */
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(dp->i_fs, dp->i_number, 0, &inodedep) == 0
 		    || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 			dap->da_state &= ~MKDIR_PARENT;
 			WORKITEM_FREE(mkdir2, D_MKDIR);
 		} else {
 			LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
 			WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);
 		}
 	}
 	/*
 	 * Link into parent directory pagedep to await its being written.
 	 */
 	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
 	/*
 	 * Link into its inodedep. Put it on the id_bufwait list if the inode
 	 * is not yet written. If it is written, do the post-inode write
 	 * processing to put it on the id_pendinghd list.
 	 */
 	(void) inodedep_lookup(fs, newinum, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
 		diradd_inode_written(dap, inodedep);
 	else
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	FREE_LOCK(&lk);
 }
 
 /*
  * This procedure is called to change the offset of a directory
  * entry when compacting a directory block which must be owned
  * exclusively by the caller. Note that the actual entry movement
  * must be done in this procedure to ensure that no I/O completions
  * occur while the move is in progress.
  */
 void 
 softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
 	struct inode *dp;	/* inode for directory */
 	caddr_t base;		/* address of dp->i_offset */
 	caddr_t oldloc;		/* address of old directory location */
 	caddr_t newloc;		/* address of new directory location */
 	int entrysize;		/* size of directory entry */
 {
 	int offset, oldoffset, newoffset;
 	struct pagedep *pagedep;
 	struct diradd *dap;
 	ufs_lbn_t lbn;
 
 	ACQUIRE_LOCK(&lk);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
 		goto done;
 	oldoffset = offset + (oldloc - base);
 	newoffset = offset + (newloc - base);
 
 	LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(oldoffset)], da_pdlist) {
 		if (dap->da_offset != oldoffset)
 			continue;
 		dap->da_offset = newoffset;
 		if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset))
 			break;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],
 		    dap, da_pdlist);
 		break;
 	}
 	if (dap == NULL) {
 
 		LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist) {
 			if (dap->da_offset == oldoffset) {
 				dap->da_offset = newoffset;
 				break;
 			}
 		}
 	}
 done:
 	bcopy(oldloc, newloc, entrysize);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Free a diradd dependency structure. This routine must be called
  * with splbio interrupts blocked.
  */
 static void
 free_diradd(dap)
 	struct diradd *dap;
 {
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir, *nextmd;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_diradd: lock not held");
 #endif
 	WORKLIST_REMOVE(&dap->da_list);
 	LIST_REMOVE(dap, da_pdlist);
 	if ((dap->da_state & DIRCHG) == 0) {
 		pagedep = dap->da_pagedep;
 	} else {
 		dirrem = dap->da_previous;
 		pagedep = dirrem->dm_pagedep;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)->um_fs, dap->da_newinum,
 	    0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != dap)
 				continue;
 			dap->da_state &= ~mkdir->md_state;
 			WORKLIST_REMOVE(&mkdir->md_list);
 			LIST_REMOVE(mkdir, md_mkdirs);
 			WORKITEM_FREE(mkdir, D_MKDIR);
 		}
 		if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 			FREE_LOCK(&lk);
 			panic("free_diradd: unfound ref");
 		}
 	}
 	WORKITEM_FREE(dap, D_DIRADD);
 }
 
 /*
  * Directory entry removal dependencies.
  * 
  * When removing a directory entry, the entry's inode pointer must be
  * zero'ed on disk before the corresponding inode's link count is decremented
  * (possibly freeing the inode for re-use). This dependency is handled by
  * updating the directory entry but delaying the inode count reduction until
  * after the directory block has been written to disk. After this point, the
  * inode count can be decremented whenever it is convenient.
  */
 
 /*
  * This routine should be called immediately after removing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will do this task when it is safe.
  */
 void 
 softdep_setup_remove(bp, dp, ip, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	struct dirrem *dirrem, *prevdirrem;
 
 	/*
 	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
 
 	/*
 	 * If the COMPLETE flag is clear, then there were no active
 	 * entries and we want to roll back to a zeroed entry until
 	 * the new inode is committed to disk. If the COMPLETE flag is
 	 * set then we have deleted an entry that never made it to
 	 * disk. If the entry we deleted resulted from a name change,
 	 * then the old name still resides on disk. We cannot delete
 	 * its inode (returned to us in prevdirrem) until the zeroed
 	 * directory entry gets to disk. The new inode has never been
 	 * referenced on the disk, so can be deleted immediately.
 	 */
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
 		    dm_next);
 		FREE_LOCK(&lk);
 	} else {
 		if (prevdirrem != NULL)
 			LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
 			    prevdirrem, dm_next);
 		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
 		FREE_LOCK(&lk);
 		handle_workitem_remove(dirrem);
 	}
 }
 
 /*
  * Allocate a new dirrem if appropriate and return it along with
  * its associated pagedep. Called without a lock, returns with lock.
  */
 static long num_dirrem;		/* number of dirrem allocated */
 static struct dirrem *
 newdirrem(bp, dp, ip, isrmdir, prevdirremp)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 	struct dirrem **prevdirremp; /* previously referenced inode, if any */
 {
 	int offset;
 	ufs_lbn_t lbn;
 	struct diradd *dap;
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 
 	/*
 	 * Whiteouts have no deletion dependencies.
 	 */
 	if (ip == NULL)
 		panic("newdirrem: whiteout");
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 * Limiting the number of dirrem structures will also limit
 	 * the number of freefile and freeblks structures.
 	 */
 	if (num_dirrem > max_softdeps / 2)
 		(void) request_cleanup(FLUSH_REMOVE, 0);
 	num_dirrem += 1;
 	MALLOC(dirrem, struct dirrem *, sizeof(struct dirrem),
 		M_DIRREM, M_SOFTDEP_FLAGS|M_ZERO);
 	dirrem->dm_list.wk_type = D_DIRREM;
 	dirrem->dm_state = isrmdir ? RMDIR : 0;
 	dirrem->dm_mnt = ITOV(ip)->v_mount;
 	dirrem->dm_oldinum = ip->i_number;
 	*prevdirremp = NULL;
 
 	ACQUIRE_LOCK(&lk);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	dirrem->dm_pagedep = pagedep;
 	/*
 	 * Check for a diradd dependency for the same directory entry.
 	 * If present, then both dependencies become obsolete and can
 	 * be de-allocated. Check for an entry on both the pd_dirraddhd
 	 * list and the pd_pendinghd list.
 	 */
 
 	LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
 		if (dap->da_offset == offset)
 			break;
 	if (dap == NULL) {
 
 		LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
 			if (dap->da_offset == offset)
 				break;
 		if (dap == NULL)
 			return (dirrem);
 	}
 	/*
 	 * Must be ATTACHED at this point.
 	 */
 	if ((dap->da_state & ATTACHED) == 0) {
 		FREE_LOCK(&lk);
 		panic("newdirrem: not ATTACHED");
 	}
 	if (dap->da_newinum != ip->i_number) {
 		FREE_LOCK(&lk);
 		panic("newdirrem: inum %d should be %d",
 		    ip->i_number, dap->da_newinum);
 	}
 	/*
 	 * If we are deleting a changed name that never made it to disk,
 	 * then return the dirrem describing the previous inode (which
 	 * represents the inode currently referenced from this entry on disk).
 	 */
 	if ((dap->da_state & DIRCHG) != 0) {
 		*prevdirremp = dap->da_previous;
 		dap->da_state &= ~DIRCHG;
 		dap->da_pagedep = pagedep;
 	}
 	/*
 	 * We are deleting an entry that never made it to disk.
 	 * Mark it COMPLETE so we can delete its inode immediately.
 	 */
 	dirrem->dm_state |= COMPLETE;
 	free_diradd(dap);
 	return (dirrem);
 }
 
 /*
  * Directory entry change dependencies.
  * 
  * Changing an existing directory entry requires that an add operation
  * be completed first followed by a deletion. The semantics for the addition
  * are identical to the description of adding a new entry above except
  * that the rollback is to the old inode number rather than zero. Once
  * the addition dependency is completed, the removal is done as described
  * in the removal routine above.
  */
 
 /*
  * This routine should be called immediately after changing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will perform this task when it is safe.
  */
 void 
 softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	long newinum;		/* new inode number for changed entry */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	int offset;
 	struct diradd *dap = NULL;
 	struct dirrem *dirrem, *prevdirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 
 	offset = blkoff(dp->i_fs, dp->i_offset);
 
 	/*
 	 * Whiteouts do not need diradd dependencies.
 	 */
 	if (newinum != WINO) {
 		MALLOC(dap, struct diradd *, sizeof(struct diradd),
 		    M_DIRADD, M_SOFTDEP_FLAGS|M_ZERO);
 		dap->da_list.wk_type = D_DIRADD;
 		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
 		dap->da_offset = offset;
 		dap->da_newinum = newinum;
 	}
 
 	/*
 	 * Allocate a new dirrem and ACQUIRE_LOCK.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
 	pagedep = dirrem->dm_pagedep;
 	/*
 	 * The possible values for isrmdir:
 	 *	0 - non-directory file rename
 	 *	1 - directory rename within same directory
 	 *   inum - directory rename to new directory of given inode number
 	 * When renaming to a new directory, we are both deleting and
 	 * creating a new directory entry, so the link count on the new
 	 * directory should not change. Thus we do not need the followup
 	 * dirrem which is usually done in handle_workitem_remove. We set
 	 * the DIRCHG flag to tell handle_workitem_remove to skip the 
 	 * followup dirrem.
 	 */
 	if (isrmdir > 1)
 		dirrem->dm_state |= DIRCHG;
 
 	/*
 	 * Whiteouts have no additional dependencies,
 	 * so just put the dirrem on the correct list.
 	 */
 	if (newinum == WINO) {
 		if ((dirrem->dm_state & COMPLETE) == 0) {
 			LIST_INSERT_HEAD(&pagedep->pd_dirremhd, dirrem,
 			    dm_next);
 		} else {
 			dirrem->dm_dirinum = pagedep->pd_ino;
 			add_to_worklist(&dirrem->dm_list);
 		}
 		FREE_LOCK(&lk);
 		return;
 	}
 
 	/*
 	 * If the COMPLETE flag is clear, then there were no active
 	 * entries and we want to roll back to the previous inode until
 	 * the new inode is committed to disk. If the COMPLETE flag is
 	 * set, then we have deleted an entry that never made it to disk.
 	 * If the entry we deleted resulted from a name change, then the old
 	 * inode reference still resides on disk. Any rollback that we do
 	 * needs to be to that old inode (returned to us in prevdirrem). If
 	 * the entry we deleted resulted from a create, then there is
 	 * no entry on the disk, so we want to roll back to zero rather
 	 * than the uncommitted inode. In either of the COMPLETE cases we
 	 * want to immediately free the unwritten and unreferenced inode.
 	 */
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		dap->da_previous = dirrem;
 	} else {
 		if (prevdirrem != NULL) {
 			dap->da_previous = prevdirrem;
 		} else {
 			dap->da_state &= ~DIRCHG;
 			dap->da_pagedep = pagedep;
 		}
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	/*
 	 * Link into its inodedep. Put it on the id_bufwait list if the inode
 	 * is not yet written. If it is written, do the post-inode write
 	 * processing to put it on the id_pendinghd list.
 	 */
 	if (inodedep_lookup(dp->i_fs, newinum, DEPALLOC, &inodedep) == 0 ||
 	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state |= COMPLETE;
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 	} else {
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
 		    dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called whenever the link count on an inode is changed.
  * It creates an inode dependency so that the new reference(s)
  * to the inode cannot be committed to disk until the updated
  * inode has been written.
  */
 void
 softdep_change_linkcnt(ip)
 	struct inode *ip;	/* the inode with the increased link count */
 {
 	struct inodedep *inodedep;
 
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
 	if (ip->i_nlink < ip->i_effnlink) {
 		FREE_LOCK(&lk);
 		panic("softdep_change_linkcnt: bad delta");
 	}
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	FREE_LOCK(&lk);
 }
 
 /*
  * This workitem decrements the inode's link count.
  * If the link count reaches zero, the file is removed.
  */
 static void 
 handle_workitem_remove(dirrem)
 	struct dirrem *dirrem;
 {
 	struct proc *p = CURPROC;	/* XXX */
 	struct inodedep *inodedep;
 	struct vnode *vp;
 	struct inode *ip;
 	ino_t oldinum;
 	int error;
 
 	if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)) != 0) {
 		softdep_error("handle_workitem_remove: vget", error);
 		return;
 	}
 	ip = VTOI(vp);
 	ACQUIRE_LOCK(&lk);
 	if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0){
 		FREE_LOCK(&lk);
 		panic("handle_workitem_remove: lost inodedep");
 	}
 	/*
 	 * Normal file deletion.
 	 */
 	if ((dirrem->dm_state & RMDIR) == 0) {
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 		if (ip->i_nlink < ip->i_effnlink) {
 			FREE_LOCK(&lk);
 			panic("handle_workitem_remove: bad file delta");
 		}
 		inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 		FREE_LOCK(&lk);
 		vput(vp);
 		num_dirrem -= 1;
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		return;
 	}
 	/*
 	 * Directory deletion. Decrement reference count for both the
 	 * just deleted parent directory entry and the reference for ".".
 	 * Next truncate the directory to length zero. When the
 	 * truncation completes, arrange to have the reference count on
 	 * the parent decremented to account for the loss of "..".
 	 */
 	ip->i_nlink -= 2;
 	ip->i_flag |= IN_CHANGE;
 	if (ip->i_nlink < ip->i_effnlink) {
 		FREE_LOCK(&lk);
 		panic("handle_workitem_remove: bad dir delta");
 	}
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	FREE_LOCK(&lk);
 	if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0)
 		softdep_error("handle_workitem_remove: truncate", error);
 	/*
 	 * Rename a directory to a new parent. Since, we are both deleting
 	 * and creating a new directory entry, the link count on the new
 	 * directory should not change. Thus we skip the followup dirrem.
 	 */
 	if (dirrem->dm_state & DIRCHG) {
 		vput(vp);
 		num_dirrem -= 1;
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		return;
 	}
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
 	 * been written to disk. If the allocated inode has never been
 	 * written to disk, then the on-disk inode is zero'ed. In either
 	 * case we can remove the file immediately.
 	 */
 	ACQUIRE_LOCK(&lk);
 	dirrem->dm_state = 0;
 	oldinum = dirrem->dm_oldinum;
 	dirrem->dm_oldinum = dirrem->dm_dirinum;
 	if (inodedep_lookup(ip->i_fs, oldinum, 0, &inodedep) == 0 ||
 	    check_inode_unwritten(inodedep)) {
 		FREE_LOCK(&lk);
 		vput(vp);
 		handle_workitem_remove(dirrem);
 		return;
 	}
 	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
 	FREE_LOCK(&lk);
 	vput(vp);
 }
 
 /*
  * Inode de-allocation dependencies.
  * 
  * When an inode's link count is reduced to zero, it can be de-allocated. We
  * found it convenient to postpone de-allocation until after the inode is
  * written to disk with its new link count (zero).  At this point, all of the
  * on-disk inode's block pointers are nullified and, with careful dependency
  * list ordering, all dependencies related to the inode will be satisfied and
  * the corresponding dependency structures de-allocated.  So, if/when the
  * inode is reused, there will be no mixing of old dependencies with new
  * ones.  This artificial dependency is set up by the block de-allocation
  * procedure above (softdep_setup_freeblocks) and completed by the
  * following procedure.
  */
 static void 
 handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
 	struct fs *fs;
-	struct vnode vp;
 	struct inode tip;
 	struct inodedep *idp;
 	int error;
 
 	fs = VFSTOUFS(freefile->fx_mnt)->um_fs;
 #ifdef DEBUG
 	ACQUIRE_LOCK(&lk);
 	error = inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp);
 	FREE_LOCK(&lk);
 	if (error)
 		panic("handle_workitem_freefile: inodedep survived");
 #endif
 	tip.i_devvp = freefile->fx_devvp;
 	tip.i_dev = freefile->fx_devvp->v_rdev;
 	tip.i_fs = fs;
-	tip.i_vnode = &vp;
-	vp.v_data = &tip;
-	if ((error = ffs_freefile(&vp, freefile->fx_oldinum, freefile->fx_mode)) != 0)
+	if ((error = ffs_freefile(&tip, freefile->fx_oldinum, freefile->fx_mode)) != 0)
 		softdep_error("handle_workitem_freefile", error);
 	WORKITEM_FREE(freefile, D_FREEFILE);
 }
 
 /*
  * Disk writes.
  * 
  * The dependency structures constructed above are most actively used when file
  * system blocks are written to disk.  No constraints are placed on when a
  * block can be written, but unsatisfied update dependencies are made safe by
  * modifying (or replacing) the source memory for the duration of the disk
  * write.  When the disk write completes, the memory block is again brought
  * up-to-date.
  *
  * In-core inode structure reclamation.
  * 
  * Because there are a finite number of "in-core" inode structures, they are
  * reused regularly.  By transferring all inode-related dependencies to the
  * in-memory inode block and indexing them separately (via "inodedep"s), we
  * can allow "in-core" inode structures to be reused at any time and avoid
  * any increase in contention.
  *
  * Called just before entering the device driver to initiate a new disk I/O.
  * The buffer must be locked, thus, no I/O completion operations can occur
  * while we are manipulating its associated dependencies.
  */
 static void 
 softdep_disk_io_initiation(bp)
 	struct buf *bp;		/* structure describing disk write to occur */
 {
 	struct worklist *wk, *nextwk;
 	struct indirdep *indirdep;
 
 	/*
 	 * We only care about write operations. There should never
 	 * be dependencies for reads.
 	 */
 	if (bp->b_iocmd == BIO_READ)
 		panic("softdep_disk_io_initiation: read");
 	/*
 	 * Do any necessary pre-I/O processing.
 	 */
 	for (wk = LIST_FIRST(&bp->b_dep); wk; wk = nextwk) {
 		nextwk = LIST_NEXT(wk, wk_list);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			initiate_write_filepage(WK_PAGEDEP(wk), bp);
 			continue;
 
 		case D_INODEDEP:
 			initiate_write_inodeblock(WK_INODEDEP(wk), bp);
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (indirdep->ir_state & GOINGAWAY)
 				panic("disk_io_initiation: indirdep gone");
 			/*
 			 * If there are no remaining dependencies, this
 			 * will be writing the real pointers, so the
 			 * dependency can be freed.
 			 */
 			if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) {
 				indirdep->ir_savebp->b_flags |= B_INVAL | B_NOCACHE;
 				brelse(indirdep->ir_savebp);
 				/* inline expand WORKLIST_REMOVE(wk); */
 				wk->wk_state &= ~ONWORKLIST;
 				LIST_REMOVE(wk, wk_list);
 				WORKITEM_FREE(indirdep, D_INDIRDEP);
 				continue;
 			}
 			/*
 			 * Replace up-to-date version with safe version.
 			 */
 			MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount,
 			    M_INDIRDEP, M_SOFTDEP_FLAGS);
 			ACQUIRE_LOCK(&lk);
 			indirdep->ir_state &= ~ATTACHED;
 			indirdep->ir_state |= UNDONE;
 			bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
 			bcopy(indirdep->ir_savebp->b_data, bp->b_data,
 			    bp->b_bcount);
 			FREE_LOCK(&lk);
 			continue;
 
 		case D_MKDIR:
 		case D_BMSAFEMAP:
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			continue;
 
 		default:
 			panic("handle_disk_io_initiation: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 }
 
 /*
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in a directory. The buffer must be locked,
  * thus, no I/O completion operations can occur while we are
  * manipulating its associated dependencies.
  */
 static void
 initiate_write_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;
 {
 	struct diradd *dap;
 	struct direct *ep;
 	int i;
 
 	if (pagedep->pd_state & IOSTARTED) {
 		/*
 		 * This can only happen if there is a driver that does not
 		 * understand chaining. Here biodone will reissue the call
 		 * to strategy for the incomplete buffers.
 		 */
 		printf("initiate_write_filepage: already started\n");
 		return;
 	}
 	pagedep->pd_state |= IOSTARTED;
 	ACQUIRE_LOCK(&lk);
 	for (i = 0; i < DAHASHSZ; i++) {
 		LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			if (ep->d_ino != dap->da_newinum) {
 				FREE_LOCK(&lk);
 				panic("%s: dir inum %d != new %d",
 				    "initiate_write_filepage",
 				    ep->d_ino, dap->da_newinum);
 			}
 			if (dap->da_state & DIRCHG)
 				ep->d_ino = dap->da_previous->dm_oldinum;
 			else
 				ep->d_ino = 0;
 			dap->da_state &= ~ATTACHED;
 			dap->da_state |= UNDONE;
 		}
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in an inodeblock. The buffer must be
  * locked, thus, no I/O completion operations can occur while we
  * are manipulating its associated dependencies.
  */
 static void 
 initiate_write_inodeblock(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;			/* The inode block */
 {
 	struct allocdirect *adp, *lastadp;
 	struct dinode *dp;
 	struct fs *fs;
 	ufs_lbn_t prevlbn = 0;
 	int i, deplist;
 
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
 	dp = (struct dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino != NULL)
 			panic("initiate_write_inodeblock: already doing I/O");
 		MALLOC(inodedep->id_savedino, struct dinode *,
 		    sizeof(struct dinode), M_INODEDEP, M_SOFTDEP_FLAGS);
 		*inodedep->id_savedino = *dp;
 		bzero((caddr_t)dp, sizeof(struct dinode));
 		return;
 	}
 	/*
 	 * If no dependencies, then there is nothing to roll back.
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
 		return;
 	/*
 	 * Set the dependencies to busy.
 	 */
 	ACQUIRE_LOCK(&lk);
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef DIAGNOSTIC
 		if (deplist != 0 && prevlbn >= adp->ad_lbn) {
 			FREE_LOCK(&lk);
 			panic("softdep_write_inodeblock: lbn order");
 		}
 		prevlbn = adp->ad_lbn;
 		if (adp->ad_lbn < NDADDR &&
 		    dp->di_db[adp->ad_lbn] != adp->ad_newblkno) {
 			FREE_LOCK(&lk);
 			panic("%s: direct pointer #%ld mismatch %d != %d",
 			    "softdep_write_inodeblock", adp->ad_lbn,
 			    dp->di_db[adp->ad_lbn], adp->ad_newblkno);
 		}
 		if (adp->ad_lbn >= NDADDR &&
 		    dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno) {
 			FREE_LOCK(&lk);
 			panic("%s: indirect pointer #%ld mismatch %d != %d",
 			    "softdep_write_inodeblock", adp->ad_lbn - NDADDR,
 			    dp->di_ib[adp->ad_lbn - NDADDR], adp->ad_newblkno);
 		}
 		deplist |= 1 << adp->ad_lbn;
 		if ((adp->ad_state & ATTACHED) == 0) {
 			FREE_LOCK(&lk);
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 		}
 #endif /* DIAGNOSTIC */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		if (adp->ad_lbn >= NDADDR)
 			break;
 		dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
 		for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
 #ifdef DIAGNOSTIC
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0) {
 				FREE_LOCK(&lk);
 				panic("softdep_write_inodeblock: lost dep1");
 			}
 #endif /* DIAGNOSTIC */
 			dp->di_db[i] = 0;
 		}
 		for (i = 0; i < NIADDR; i++) {
 #ifdef DIAGNOSTIC
 			if (dp->di_ib[i] != 0 &&
 			    (deplist & ((1 << NDADDR) << i)) == 0) {
 				FREE_LOCK(&lk);
 				panic("softdep_write_inodeblock: lost dep2");
 			}
 #endif /* DIAGNOSTIC */
 			dp->di_ib[i] = 0;
 		}
 		FREE_LOCK(&lk);
 		return;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the file,
 	 * roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_lbn; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * The only dependencies are for indirect blocks.
 	 *
 	 * The file size for indirect block additions is not guaranteed.
 	 * Such a guarantee would be non-trivial to achieve. The conventional
 	 * synchronous write implementation also does not make this guarantee.
 	 * Fsck should catch and fix discrepancies. Arguably, the file size
 	 * can be over-estimated without destroying integrity when the file
 	 * moves into the indirect blocks (i.e., is large). If we want to
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_lbn - NDADDR] = 0;
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called during the completion interrupt
  * service routine for a disk write (from the procedure called
  * by the device driver to inform the file system caches of
  * a request completion).  It should be called early in this
  * procedure, before the block is made available to other
  * processes or other routines are called.
  */
 static void 
 softdep_disk_write_complete(bp)
 	struct buf *bp;		/* describes the completed disk write */
 {
 	struct worklist *wk;
 	struct workhead reattach;
 	struct newblk *newblk;
 	struct allocindir *aip;
 	struct allocdirect *adp;
 	struct indirdep *indirdep;
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 
 #ifdef DEBUG
 	if (lk.lkt_held != -1)
 		panic("softdep_disk_write_complete: lock is held");
 	lk.lkt_held = -2;
 #endif
 	LIST_INIT(&reattach);
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			if (handle_written_filepage(WK_PAGEDEP(wk), bp))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_INODEDEP:
 			if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_BMSAFEMAP:
 			bmsafemap = WK_BMSAFEMAP(wk);
 			while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) {
 				newblk->nb_state |= DEPCOMPLETE;
 				newblk->nb_bmsafemap = NULL;
 				LIST_REMOVE(newblk, nb_deps);
 			}
 			while ((adp =
 			   LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {
 				adp->ad_state |= DEPCOMPLETE;
 				adp->ad_buf = NULL;
 				LIST_REMOVE(adp, ad_deps);
 				handle_allocdirect_partdone(adp);
 			}
 			while ((aip =
 			    LIST_FIRST(&bmsafemap->sm_allocindirhd))) {
 				aip->ai_state |= DEPCOMPLETE;
 				aip->ai_buf = NULL;
 				LIST_REMOVE(aip, ai_deps);
 				handle_allocindir_partdone(aip);
 			}
 			while ((inodedep =
 			     LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {
 				inodedep->id_state |= DEPCOMPLETE;
 				LIST_REMOVE(inodedep, id_deps);
 				inodedep->id_buf = NULL;
 			}
 			WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
 			continue;
 
 		case D_ALLOCDIRECT:
 			adp = WK_ALLOCDIRECT(wk);
 			adp->ad_state |= COMPLETE;
 			handle_allocdirect_partdone(adp);
 			continue;
 
 		case D_ALLOCINDIR:
 			aip = WK_ALLOCINDIR(wk);
 			aip->ai_state |= COMPLETE;
 			handle_allocindir_partdone(aip);
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (indirdep->ir_state & GOINGAWAY) {
 				lk.lkt_held = -1;
 				panic("disk_write_complete: indirdep gone");
 			}
 			bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
 			FREE(indirdep->ir_saveddata, M_INDIRDEP);
 			indirdep->ir_saveddata = 0;
 			indirdep->ir_state &= ~UNDONE;
 			indirdep->ir_state |= ATTACHED;
 			while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
 				handle_allocindir_partdone(aip);
 				if (aip == LIST_FIRST(&indirdep->ir_donehd)) {
 					lk.lkt_held = -1;
 					panic("disk_write_complete: not gone");
 				}
 			}
 			WORKLIST_INSERT(&reattach, wk);
 			if ((bp->b_flags & B_DELWRI) == 0)
 				stat_indir_blk_ptrs++;
 			bdirty(bp);
 			continue;
 
 		default:
 			lk.lkt_held = -1;
 			panic("handle_disk_write_complete: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	/*
 	 * Reattach any requests that must be redone.
 	 */
 	while ((wk = LIST_FIRST(&reattach)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
 #ifdef DEBUG
 	if (lk.lkt_held != -2)
 		panic("softdep_disk_write_complete: lock lost");
 	lk.lkt_held = -1;
 #endif
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
  * this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static void 
 handle_allocdirect_partdone(adp)
 	struct allocdirect *adp;	/* the completed allocdirect */
 {
 	struct allocdirect *listadp;
 	struct inodedep *inodedep;
 	long bsize, delay;
 
 	if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (adp->ad_buf != NULL) {
 		lk.lkt_held = -1;
 		panic("handle_allocdirect_partdone: dangling dep");
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem. Thus, we cannot free any
 	 * allocdirects after one whose ad_oldblkno claims a fragment as
 	 * these blocks must be rolled back to zero before writing the inode.
 	 * We check the currently active set of allocdirects in id_inoupdt.
 	 */
 	inodedep = adp->ad_inodedep;
 	bsize = inodedep->id_fs->fs_bsize;
 	TAILQ_FOREACH(listadp, &inodedep->id_inoupdt, ad_next) {
 		/* found our block */
 		if (listadp == adp)
 			break;
 		/* continue if ad_oldlbn is not a fragment */
 		if (listadp->ad_oldsize == 0 ||
 		    listadp->ad_oldsize == bsize)
 			continue;
 		/* hit a fragment */
 		return;
 	}
 	/*
 	 * If we have reached the end of the current list without
 	 * finding the just finished dependency, then it must be
 	 * on the future dependency list. Future dependencies cannot
 	 * be freed until they are moved to the current list.
 	 */
 	if (listadp == NULL) {
 #ifdef DEBUG
 		TAILQ_FOREACH(listadp, &inodedep->id_newinoupdt, ad_next)
 			/* found our block */
 			if (listadp == adp)
 				break;
 		if (listadp == NULL) {
 			lk.lkt_held = -1;
 			panic("handle_allocdirect_partdone: lost dep");
 		}
 #endif /* DEBUG */
 		return;
 	}
 	/*
 	 * If we have found the just finished dependency, then free
 	 * it along with anything that follows it that is complete.
 	 * If the inode still has a bitmap dependency, then it has
 	 * never been written to disk, hence the on-disk inode cannot
 	 * reference the old fragment so we can free it without delay.
 	 */
 	delay = (inodedep->id_state & DEPCOMPLETE);
 	for (; adp; adp = listadp) {
 		listadp = TAILQ_NEXT(adp, ad_next);
 		if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 			return;
 		free_allocdirect(&inodedep->id_inoupdt, adp, delay);
 	}
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
  * this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static void
 handle_allocindir_partdone(aip)
 	struct allocindir *aip;		/* the completed allocindir */
 {
 	struct indirdep *indirdep;
 
 	if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (aip->ai_buf != NULL) {
 		lk.lkt_held = -1;
 		panic("handle_allocindir_partdone: dangling dependency");
 	}
 	indirdep = aip->ai_indirdep;
 	if (indirdep->ir_state & UNDONE) {
 		LIST_REMOVE(aip, ai_next);
 		LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
 		return;
 	}
 	((ufs_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
 	    aip->ai_newblkno;
 	LIST_REMOVE(aip, ai_next);
 	if (aip->ai_freefrag != NULL)
 		add_to_worklist(&aip->ai_freefrag->ff_list);
 	WORKITEM_FREE(aip, D_ALLOCINDIR);
 }
 
 /*
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
  * that this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static int 
 handle_written_inodeblock(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;		/* buffer containing the inode block */
 {
 	struct worklist *wk, *filefree;
 	struct allocdirect *adp, *nextadp;
 	struct dinode *dp;
 	int hadchanges;
 
 	if ((inodedep->id_state & IOSTARTED) == 0) {
 		lk.lkt_held = -1;
 		panic("handle_written_inodeblock: not started");
 	}
 	inodedep->id_state &= ~IOSTARTED;
 	inodedep->id_state |= COMPLETE;
 	dp = (struct dinode *)bp->b_data +
 	    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
 	/*
 	 * If we had to rollback the inode allocation because of
 	 * bitmaps being incomplete, then simply restore it.
 	 * Keep the block dirty so that it will not be reclaimed until
 	 * all associated dependencies have been cleared and the
 	 * corresponding updates written to disk.
 	 */
 	if (inodedep->id_savedino != NULL) {
 		*dp = *inodedep->id_savedino;
 		FREE(inodedep->id_savedino, M_INODEDEP);
 		inodedep->id_savedino = NULL;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_inode_bitmap++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * Roll forward anything that had to be rolled back before 
 	 * the inode could be updated.
 	 */
 	hadchanges = 0;
 	for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED) {
 			lk.lkt_held = -1;
 			panic("handle_written_inodeblock: new entry");
 		}
 		if (adp->ad_lbn < NDADDR) {
 			if (dp->di_db[adp->ad_lbn] != adp->ad_oldblkno) {
 				lk.lkt_held = -1;
 				panic("%s: %s #%ld mismatch %d != %d",
 				    "handle_written_inodeblock",
 				    "direct pointer", adp->ad_lbn,
 				    dp->di_db[adp->ad_lbn], adp->ad_oldblkno);
 			}
 			dp->di_db[adp->ad_lbn] = adp->ad_newblkno;
 		} else {
 			if (dp->di_ib[adp->ad_lbn - NDADDR] != 0) {
 				lk.lkt_held = -1;
 				panic("%s: %s #%ld allocated as %d",
 				    "handle_written_inodeblock",
 				    "indirect pointer", adp->ad_lbn - NDADDR,
 				    dp->di_ib[adp->ad_lbn - NDADDR]);
 			}
 			dp->di_ib[adp->ad_lbn - NDADDR] = adp->ad_newblkno;
 		}
 		adp->ad_state &= ~UNDONE;
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
 	}
 	if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
 		stat_direct_blk_ptrs++;
 	/*
 	 * Reset the file size to its most up-to-date value.
 	 */
 	if (inodedep->id_savedsize == -1) {
 		lk.lkt_held = -1;
 		panic("handle_written_inodeblock: bad size");
 	}
 	if (dp->di_size != inodedep->id_savedsize) {
 		dp->di_size = inodedep->id_savedsize;
 		hadchanges = 1;
 	}
 	inodedep->id_savedsize = -1;
 	/*
 	 * If there were any rollbacks in the inode block, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (hadchanges)
 		bdirty(bp);
 	/*
 	 * Process any allocdirects that completed during the update.
 	 */
 	if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
 		handle_allocdirect_partdone(adp);
 	/*
 	 * Process deallocations that were held pending until the
 	 * inode had been written to disk. Freeing of the inode
 	 * is delayed until after all blocks have been freed to
 	 * avoid creation of new <vfsid, inum, lbn> triples
 	 * before the old ones have been deleted.
 	 */
 	filefree = NULL;
 	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 
 		case D_FREEFILE:
 			/*
 			 * We defer adding filefree to the worklist until
 			 * all other additions have been made to ensure
 			 * that it will be done after all the old blocks
 			 * have been freed.
 			 */
 			if (filefree != NULL) {
 				lk.lkt_held = -1;
 				panic("handle_written_inodeblock: filefree");
 			}
 			filefree = wk;
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
 			continue;
 
 		case D_DIRADD:
 			diradd_inode_written(WK_DIRADD(wk), inodedep);
 			continue;
 
 		case D_FREEBLKS:
 		case D_FREEFRAG:
 		case D_DIRREM:
 			add_to_worklist(wk);
 			continue;
 
 		default:
 			lk.lkt_held = -1;
 			panic("handle_written_inodeblock: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	if (filefree != NULL) {
 		if (free_inodedep(inodedep) == 0) {
 			lk.lkt_held = -1;
 			panic("handle_written_inodeblock: live inodedep");
 		}
 		add_to_worklist(filefree);
 		return (0);
 	}
 
 	/*
 	 * If no outstanding dependencies, free it.
 	 */
 	if (free_inodedep(inodedep) || TAILQ_FIRST(&inodedep->id_inoupdt) == 0)
 		return (0);
 	return (hadchanges);
 }
 
 /*
  * Process a diradd entry after its dependent inode has been written.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 diradd_inode_written(dap, inodedep)
 	struct diradd *dap;
 	struct inodedep *inodedep;
 {
 	struct pagedep *pagedep;
 
 	dap->da_state |= COMPLETE;
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 	WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 }
 
 /*
  * Handle the completion of a mkdir dependency.
  */
 static void
 handle_written_mkdir(mkdir, type)
 	struct mkdir *mkdir;
 	int type;
 {
 	struct diradd *dap;
 	struct pagedep *pagedep;
 
 	if (mkdir->md_state != type) {
 		lk.lkt_held = -1;
 		panic("handle_written_mkdir: bad type");
 	}
 	dap = mkdir->md_diradd;
 	dap->da_state &= ~type;
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
 		dap->da_state |= DEPCOMPLETE;
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 	LIST_REMOVE(mkdir, md_mkdirs);
 	WORKITEM_FREE(mkdir, D_MKDIR);
 }
 
 /*
  * Called from within softdep_disk_write_complete above.
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
  * Note that this routine is always called from interrupt level
  * with further splbio interrupts blocked.
  */
 static int 
 handle_written_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;		/* buffer containing the written page */
 {
 	struct dirrem *dirrem;
 	struct diradd *dap, *nextdap;
 	struct direct *ep;
 	int i, chgs;
 
 	if ((pagedep->pd_state & IOSTARTED) == 0) {
 		lk.lkt_held = -1;
 		panic("handle_written_filepage: not started");
 	}
 	pagedep->pd_state &= ~IOSTARTED;
 	/*
 	 * Process any directory removals that have been committed.
 	 */
 	while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
 		LIST_REMOVE(dirrem, dm_next);
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	/*
 	 * Free any directory additions that have been committed.
 	 */
 	while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
 		free_diradd(dap);
 	/*
 	 * Uncommitted directory entries must be restored.
 	 */
 	for (chgs = 0, i = 0; i < DAHASHSZ; i++) {
 		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
 		     dap = nextdap) {
 			nextdap = LIST_NEXT(dap, da_pdlist);
 			if (dap->da_state & ATTACHED) {
 				lk.lkt_held = -1;
 				panic("handle_written_filepage: attached");
 			}
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			ep->d_ino = dap->da_newinum;
 			dap->da_state &= ~UNDONE;
 			dap->da_state |= ATTACHED;
 			chgs = 1;
 			/*
 			 * If the inode referenced by the directory has
 			 * been written out, then the dependency can be
 			 * moved to the pending list.
 			 */
 			if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 				LIST_REMOVE(dap, da_pdlist);
 				LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,
 				    da_pdlist);
 			}
 		}
 	}
 	/*
 	 * If there were any rollbacks in the directory, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (chgs) {
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_dir_entry++;
 		bdirty(bp);
 	}
 	/*
 	 * If no dependencies remain, the pagedep will be freed.
 	 * Otherwise it will remain to update the page before it
 	 * is written back to disk.
 	 */
 	if (LIST_FIRST(&pagedep->pd_pendinghd) == 0) {
 		for (i = 0; i < DAHASHSZ; i++)
 			if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
 				break;
 		if (i == DAHASHSZ) {
 			LIST_REMOVE(pagedep, pd_hash);
 			WORKITEM_FREE(pagedep, D_PAGEDEP);
 			return (0);
 		}
 	}
 	return (1);
 }
 
 /*
  * Writing back in-core inode structures.
  * 
  * The file system only accesses an inode's contents when it occupies an
  * "in-core" inode structure.  These "in-core" structures are separate from
  * the page frames used to cache inode blocks.  Only the latter are
  * transferred to/from the disk.  So, when the updated contents of the
  * "in-core" inode structure are copied to the corresponding in-memory inode
  * block, the dependencies are also transferred.  The following procedure is
  * called when copying a dirty "in-core" inode to a cached inode block.
  */
 
 /*
  * Called when an inode is loaded from disk. If the effective link count
  * differed from the actual link count when it was last flushed, then we
  * need to ensure that the correct effective link count is put back.
  */
 void 
 softdep_load_inodeblock(ip)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 {
 	struct inodedep *inodedep;
 
 	/*
 	 * Check for alternate nlink count.
 	 */
 	ip->i_effnlink = ip->i_nlink;
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
 	ip->i_effnlink -= inodedep->id_nlinkdelta;
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called just before the "in-core" inode
  * information is to be copied to the in-memory inode block.
  * Recall that an inode block contains several inodes. If
  * the force flag is set, then the dependencies will be
  * cleared so that the update can always be made. Note that
  * the buffer is locked when this routine is called, so we
  * will never be in the middle of writing the inode block 
  * to disk.
  */
 void 
 softdep_update_inodeblock(ip, bp, waitfor)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 	struct buf *bp;		/* the buffer containing the inode block */
 	int waitfor;		/* nonzero => update must be allowed */
 {
 	struct inodedep *inodedep;
 	struct worklist *wk;
 	int error, gotit;
 
 	/*
 	 * If the effective link count is not equal to the actual link
 	 * count, then we must track the difference in an inodedep while
 	 * the inode is (potentially) tossed out of the cache. Otherwise,
 	 * if there is no existing inodedep, then there are no dependencies
 	 * to track.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		if (ip->i_effnlink != ip->i_nlink)
 			panic("softdep_update_inodeblock: bad link count");
 		return;
 	}
 	if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink) {
 		FREE_LOCK(&lk);
 		panic("softdep_update_inodeblock: bad delta");
 	}
 	/*
 	 * Changes have been initiated. Anything depending on these
 	 * changes cannot occur until this inode has been written.
 	 */
 	inodedep->id_state &= ~COMPLETE;
 	if ((inodedep->id_state & ONWORKLIST) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
 	/*
 	 * Any new dependencies associated with the incore inode must 
 	 * now be moved to the list associated with the buffer holding
 	 * the in-memory copy of the inode. Once merged process any
 	 * allocdirects that are completed by the merger.
 	 */
 	merge_inode_lists(inodedep);
 	if (TAILQ_FIRST(&inodedep->id_inoupdt) != NULL)
 		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));
 	/*
 	 * Now that the inode has been pushed into the buffer, the
 	 * operations dependent on the inode being written to disk
 	 * can be moved to the id_bufwait so that they will be
 	 * processed when the buffer I/O completes.
 	 */
 	while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&inodedep->id_bufwait, wk);
 	}
 	/*
 	 * Newly allocated inodes cannot be written until the bitmap
 	 * that allocates them have been written (indicated by
 	 * DEPCOMPLETE being set in id_state). If we are doing a
 	 * forced sync (e.g., an fsync on a file), we force the bitmap
 	 * to be written so that the update can be done.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) != 0 || waitfor == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
 	gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT);
 	FREE_LOCK(&lk);
 	if (gotit &&
 	    (error = BUF_WRITE(inodedep->id_buf)) != 0)
 		softdep_error("softdep_update_inodeblock: bwrite", error);
 	if ((inodedep->id_state & DEPCOMPLETE) == 0)
 		panic("softdep_update_inodeblock: update failed");
 }
 
 /*
  * Merge the new inode dependency list (id_newinoupdt) into the old
  * inode dependency list (id_inoupdt). This routine must be called
  * with splbio interrupts blocked.
  */
 static void
 merge_inode_lists(inodedep)
 	struct inodedep *inodedep;
 {
 	struct allocdirect *listadp, *newadp;
 
 	newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
 	for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp && newadp;) {
 		if (listadp->ad_lbn < newadp->ad_lbn) {
 			listadp = TAILQ_NEXT(listadp, ad_next);
 			continue;
 		}
 		TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
 		TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
 		if (listadp->ad_lbn == newadp->ad_lbn) {
 			allocdirect_merge(&inodedep->id_inoupdt, newadp,
 			    listadp);
 			listadp = newadp;
 		}
 		newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
 	}
 	while ((newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)) != NULL) {
 		TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
 		TAILQ_INSERT_TAIL(&inodedep->id_inoupdt, newadp, ad_next);
 	}
 }
 
 /*
  * If we are doing an fsync, then we must ensure that any directory
  * entries for the inode have been written after the inode gets to disk.
  */
 int
 softdep_fsync(vp)
 	struct vnode *vp;	/* the "in_core" copy of the inode */
 {
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct worklist *wk;
 	struct diradd *dap;
 	struct mount *mnt;
 	struct vnode *pvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct fs *fs;
 	struct proc *p = CURPROC;		/* XXX */
 	int error, flushparent;
 	ino_t parentino;
 	ufs_lbn_t lbn;
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 	if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
 	    LIST_FIRST(&inodedep->id_bufwait) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL) {
 		FREE_LOCK(&lk);
 		panic("softdep_fsync: pending ops");
 	}
 	for (error = 0, flushparent = 0; ; ) {
 		if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
 			break;
 		if (wk->wk_type != D_DIRADD) {
 			FREE_LOCK(&lk);
 			panic("softdep_fsync: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 		}
 		dap = WK_DIRADD(wk);
 		/*
 		 * Flush our parent if this directory entry
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		mnt = pagedep->pd_mnt;
 		parentino = pagedep->pd_ino;
 		lbn = pagedep->pd_lbn;
 		if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE) {
 			FREE_LOCK(&lk);
 			panic("softdep_fsync: dirty");
 		}
 		flushparent = dap->da_state & MKDIR_PARENT;
 		/*
 		 * If we are being fsync'ed as part of vgone'ing this vnode,
 		 * then we will not be able to release and recover the
 		 * vnode below, so we just have to give up on writing its
 		 * directory entry out. It will eventually be written, just
 		 * not now, but then the user was not asking to have it
 		 * written, so we are not breaking any promises.
 		 */
 		if (vp->v_flag & VXLOCK)
 			break;
 		/*
 		 * We prevent deadlock by always fetching inodes from the
 		 * root, moving down the directory tree. Thus, when fetching
 		 * our parent directory, we must unlock ourselves before
 		 * requesting the lock on our parent. See the comment in
 		 * ufs_lookup for details on possible races.
 		 */
 		FREE_LOCK(&lk);
 		VOP_UNLOCK(vp, 0, p);
 		error = VFS_VGET(mnt, parentino, &pvp);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		if (error != 0)
 			return (error);
 		if (flushparent) {
 			if ((error = UFS_UPDATE(pvp, 1)) != 0) {
 				vput(pvp);
 				return (error);
 			}
 		}
 		/*
 		 * Flush directory page containing the inode's name.
 		 */
 		error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), p->p_ucred,
 		    &bp);
 		if (error == 0)
 			error = BUF_WRITE(bp);
 		vput(pvp);
 		if (error != 0)
 			return (error);
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
 			break;
 	}
 	FREE_LOCK(&lk);
 	return (0);
 }
 
 /*
  * Flush all the dirty bitmaps associated with the block device
  * before flushing the rest of the dirty blocks so as to reduce
  * the number of dependencies that will have to be rolled back.
  */
 void
 softdep_fsync_mountdev(vp)
 	struct vnode *vp;
 {
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 
 	if (!vn_isdisk(vp, NULL))
 		panic("softdep_fsync_mountdev: vnode not a disk");
 	ACQUIRE_LOCK(&lk);
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		/* 
 		 * If it is already scheduled, skip to the next buffer.
 		 */
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0) {
 			FREE_LOCK(&lk);
 			panic("softdep_fsync_mountdev: not dirty");
 		}
 		/*
 		 * We are only interested in bitmaps with outstanding
 		 * dependencies.
 		 */
 		if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
 		    wk->wk_type != D_BMSAFEMAP ||
 		    (bp->b_xflags & BX_BKGRDINPROG)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		bremfree(bp);
 		FREE_LOCK(&lk);
 		(void) bawrite(bp);
 		ACQUIRE_LOCK(&lk);
 		/*
 		 * Since we may have slept during the I/O, we need 
 		 * to start from a known point.
 		 */
 		nbp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 	}
 	drain_output(vp, 1);
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called when we are trying to synchronously flush a
  * file. This routine must eliminate any filesystem metadata dependencies
  * so that the syncing routine can succeed by pushing the dirty blocks
  * associated with the file. If any I/O errors occur, they are returned.
  */
 int
 softdep_sync_metadata(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct pagedep *pagedep;
 	struct allocdirect *adp;
 	struct allocindir *aip;
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 	int i, error, waitfor;
 
 	/*
 	 * Check whether this vnode is involved in a filesystem
 	 * that is doing soft dependency processing.
 	 */
 	if (!vn_isdisk(vp, NULL)) {
 		if (!DOINGSOFTDEP(vp))
 			return (0);
 	} else
 		if (vp->v_rdev->si_mountpoint == NULL ||
 		    (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP) == 0)
 			return (0);
 	/*
 	 * Ensure that any direct block dependencies have been cleared.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if ((error = flush_inodedep_deps(VTOI(vp)->i_fs, VTOI(vp)->i_number))) {
 		FREE_LOCK(&lk);
 		return (error);
 	}
 	/*
 	 * For most files, the only metadata dependencies are the
 	 * cylinder group maps that allocate their inode or blocks.
 	 * The block allocation dependencies can be found by traversing
 	 * the dependency lists for any buffers that remain on their
 	 * dirty buffer list. The inode allocation dependency will
 	 * be resolved when the inode is updated with MNT_WAIT.
 	 * This work is done in two passes. The first pass grabs most
 	 * of the buffers and begins asynchronously writing them. The
 	 * only way to wait for these asynchronous writes is to sleep
 	 * on the filesystem vnode which may stay busy for a long time
 	 * if the filesystem is active. So, instead, we make a second
 	 * pass over the dependencies blocking on each write. In the
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
 	 */
 	waitfor = MNT_NOWAIT;
 top:
 	if (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT) == 0) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 	bp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 loop:
 	/*
 	 * As we hold the buffer locked, none of its dependencies
 	 * will disappear.
 	 */
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		switch (wk->wk_type) {
 
 		case D_ALLOCDIRECT:
 			adp = WK_ALLOCDIRECT(wk);
 			if (adp->ad_state & DEPCOMPLETE)
 				break;
 			nbp = adp->ad_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = BUF_WRITE(nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_ALLOCINDIR:
 			aip = WK_ALLOCINDIR(wk);
 			if (aip->ai_state & DEPCOMPLETE)
 				break;
 			nbp = aip->ai_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = BUF_WRITE(nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_INDIRDEP:
 		restart:
 
 			LIST_FOREACH(aip, &WK_INDIRDEP(wk)->ir_deplisthd, ai_next) {
 				if (aip->ai_state & DEPCOMPLETE)
 					continue;
 				nbp = aip->ai_buf;
 				if (getdirtybuf(&nbp, MNT_WAIT) == 0)
 					goto restart;
 				FREE_LOCK(&lk);
 				if ((error = BUF_WRITE(nbp)) != 0) {
 					bawrite(bp);
 					return (error);
 				}
 				ACQUIRE_LOCK(&lk);
 				goto restart;
 			}
 			break;
 
 		case D_INODEDEP:
 			if ((error = flush_inodedep_deps(WK_INODEDEP(wk)->id_fs,
 			    WK_INODEDEP(wk)->id_ino)) != 0) {
 				FREE_LOCK(&lk);
 				bawrite(bp);
 				return (error);
 			}
 			break;
 
 		case D_PAGEDEP:
 			/*
 			 * We are trying to sync a directory that may
 			 * have dependencies on both its own metadata
 			 * and/or dependencies on the inodes of any
 			 * recently allocated files. We walk its diradd
 			 * lists pushing out the associated inode.
 			 */
 			pagedep = WK_PAGEDEP(wk);
 			for (i = 0; i < DAHASHSZ; i++) {
 				if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
 					continue;
 				if ((error =
 				    flush_pagedep_deps(vp, pagedep->pd_mnt,
 						&pagedep->pd_diraddhd[i]))) {
 					FREE_LOCK(&lk);
 					bawrite(bp);
 					return (error);
 				}
 			}
 			break;
 
 		case D_MKDIR:
 			/*
 			 * This case should never happen if the vnode has
 			 * been properly sync'ed. However, if this function
 			 * is used at a place where the vnode has not yet
 			 * been sync'ed, this dependency can show up. So,
 			 * rather than panic, just flush it.
 			 */
 			nbp = WK_MKDIR(wk)->md_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = BUF_WRITE(nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_BMSAFEMAP:
 			/*
 			 * This case should never happen if the vnode has
 			 * been properly sync'ed. However, if this function
 			 * is used at a place where the vnode has not yet
 			 * been sync'ed, this dependency can show up. So,
 			 * rather than panic, just flush it.
 			 */
 			nbp = WK_BMSAFEMAP(wk)->sm_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = BUF_WRITE(nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		default:
 			FREE_LOCK(&lk);
 			panic("softdep_sync_metadata: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	(void) getdirtybuf(&TAILQ_NEXT(bp, b_vnbufs), MNT_WAIT);
 	nbp = TAILQ_NEXT(bp, b_vnbufs);
 	FREE_LOCK(&lk);
 	bawrite(bp);
 	ACQUIRE_LOCK(&lk);
 	if (nbp != NULL) {
 		bp = nbp;
 		goto loop;
 	}
 	/*
 	 * We must wait for any I/O in progress to finish so that
 	 * all potential buffers on the dirty list will be visible.
 	 * Once they are all there, proceed with the second pass
 	 * which will wait for the I/O as per above.
 	 */
 	drain_output(vp, 1);
 	/*
 	 * The brief unlock is to allow any pent up dependency
 	 * processing to be done.
 	 */
 	if (waitfor == MNT_NOWAIT) {
 		waitfor = MNT_WAIT;
 		FREE_LOCK(&lk);
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 
 	/*
 	 * If we have managed to get rid of all the dirty buffers,
 	 * then we are done. For certain directories and block
 	 * devices, we may need to do further work.
 	 */
 	if (TAILQ_FIRST(&vp->v_dirtyblkhd) == NULL) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 
 	FREE_LOCK(&lk);
 	/*
 	 * If we are trying to sync a block device, some of its buffers may
 	 * contain metadata that cannot be written until the contents of some
 	 * partially written files have been written to disk. The only easy
 	 * way to accomplish this is to sync the entire filesystem (luckily
 	 * this happens rarely).
 	 */
 	if (vn_isdisk(vp, NULL) && 
 	    vp->v_rdev->si_mountpoint && !VOP_ISLOCKED(vp, NULL) &&
 	    (error = VFS_SYNC(vp->v_rdev->si_mountpoint, MNT_WAIT, ap->a_cred,
 	     ap->a_p)) != 0)
 		return (error);
 	return (0);
 }
 
 /*
  * Flush the dependencies associated with an inodedep.
  * Called with splbio blocked.
  */
 static int
 flush_inodedep_deps(fs, ino)
 	struct fs *fs;
 	ino_t ino;
 {
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
 	int error, waitfor;
 	struct buf *bp;
 
 	/*
 	 * This work is done in two passes. The first pass grabs most
 	 * of the buffers and begins asynchronously writing them. The
 	 * only way to wait for these asynchronous writes is to sleep
 	 * on the filesystem vnode which may stay busy for a long time
 	 * if the filesystem is active. So, instead, we make a second
 	 * pass over the dependencies blocking on each write. In the
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
 	 * We give a brief window at the top of the loop to allow
 	 * any pending I/O to complete.
 	 */
 	for (waitfor = MNT_NOWAIT; ; ) {
 		FREE_LOCK(&lk);
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			return (0);
 		TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
 			if (adp->ad_state & DEPCOMPLETE)
 				continue;
 			bp = adp->ad_buf;
 			if (getdirtybuf(&bp, waitfor) == 0) {
 				if (waitfor == MNT_NOWAIT)
 					continue;
 				break;
 			}
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(bp);
 			} else if ((error = BUF_WRITE(bp)) != 0) {
 				ACQUIRE_LOCK(&lk);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 		}
 		if (adp != NULL)
 			continue;
 		TAILQ_FOREACH(adp, &inodedep->id_newinoupdt, ad_next) {
 			if (adp->ad_state & DEPCOMPLETE)
 				continue;
 			bp = adp->ad_buf;
 			if (getdirtybuf(&bp, waitfor) == 0) {
 				if (waitfor == MNT_NOWAIT)
 					continue;
 				break;
 			}
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(bp);
 			} else if ((error = BUF_WRITE(bp)) != 0) {
 				ACQUIRE_LOCK(&lk);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 		}
 		if (adp != NULL)
 			continue;
 		/*
 		 * If pass2, we are done, otherwise do pass 2.
 		 */
 		if (waitfor == MNT_WAIT)
 			break;
 		waitfor = MNT_WAIT;
 	}
 	/*
 	 * Try freeing inodedep in case all dependencies have been removed.
 	 */
 	if (inodedep_lookup(fs, ino, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	return (0);
 }
 
 /*
  * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
  * Called with splbio blocked.
  */
 static int
 flush_pagedep_deps(pvp, mp, diraddhdp)
 	struct vnode *pvp;
 	struct mount *mp;
 	struct diraddhd *diraddhdp;
 {
 	struct proc *p = CURPROC;	/* XXX */
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	struct diradd *dap;
 	struct vnode *vp;
 	int gotit, error = 0;
 	struct buf *bp;
 	ino_t inum;
 
 	ump = VFSTOUFS(mp);
 	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
 		/*
 		 * Flush ourselves if this directory entry
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
 			FREE_LOCK(&lk);
 			if ((error = UFS_UPDATE(pvp, 1)) != 0)
 				break;
 			ACQUIRE_LOCK(&lk);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			if (dap->da_state & MKDIR_PARENT) {
 				FREE_LOCK(&lk);
 				panic("flush_pagedep_deps: MKDIR_PARENT");
 			}
 		}
 		/*
 		 * A newly allocated directory must have its "." and
 		 * ".." entries written out before its name can be
 		 * committed in its parent. We do not want or need
 		 * the full semantics of a synchronous VOP_FSYNC as
 		 * that may end up here again, once for each directory
 		 * level in the filesystem. Instead, we push the blocks
 		 * and wait for them to clear. We have to fsync twice
 		 * because the first call may choose to defer blocks
 		 * that still have dependencies, but deferral will
 		 * happen at most once.
 		 */
 		inum = dap->da_newinum;
 		if (dap->da_state & MKDIR_BODY) {
 			FREE_LOCK(&lk);
 			if ((error = VFS_VGET(mp, inum, &vp)) != 0)
 				break;
 			if ((error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)) ||
 			    (error=VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
 				vput(vp);
 				break;
 			}
 			drain_output(vp, 0);
 			vput(vp);
 			ACQUIRE_LOCK(&lk);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			if (dap->da_state & MKDIR_BODY) {
 				FREE_LOCK(&lk);
 				panic("flush_pagedep_deps: MKDIR_BODY");
 			}
 		}
 		/*
 		 * Flush the inode on which the directory entry depends.
 		 * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
 		 * the only remaining dependency is that the updated inode
 		 * count must get pushed to disk. The inode has already
 		 * been pushed into its inode buffer (via VOP_UPDATE) at
 		 * the time of the reference count change. So we need only
 		 * locate that buffer, ensure that there will be no rollback
 		 * caused by a bitmap dependency, then write the inode buffer.
 		 */
 		if (inodedep_lookup(ump->um_fs, inum, 0, &inodedep) == 0) {
 			FREE_LOCK(&lk);
 			panic("flush_pagedep_deps: lost inode");
 		}
 		/*
 		 * If the inode still has bitmap dependencies,
 		 * push them to disk.
 		 */
 		if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 			gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT);
 			FREE_LOCK(&lk);
 			if (gotit &&
 			    (error = BUF_WRITE(inodedep->id_buf)) != 0)
 				break;
 			ACQUIRE_LOCK(&lk);
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 		}
 		/*
 		 * If the inode is still sitting in a buffer waiting
 		 * to be written, push it to disk.
 		 */
 		FREE_LOCK(&lk);
 		if ((error = bread(ump->um_devvp,
 		    fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
 		    (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0)
 			break;
 		if ((error = BUF_WRITE(bp)) != 0)
 			break;
 		ACQUIRE_LOCK(&lk);
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
 		 */
 		if (dap == LIST_FIRST(diraddhdp)) {
 			FREE_LOCK(&lk);
 			panic("flush_pagedep_deps: flush failed");
 		}
 	}
 	if (error)
 		ACQUIRE_LOCK(&lk);
 	return (error);
 }
 
 /*
  * A large burst of file addition or deletion activity can drive the
  * memory load excessively high. First attempt to slow things down
  * using the techniques below. If that fails, this routine requests
  * the offending operations to fall back to running synchronously
  * until the memory load returns to a reasonable level.
  */
 int
 softdep_slowdown(vp)
 	struct vnode *vp;
 {
 	int max_softdeps_hard;
 
 	max_softdeps_hard = max_softdeps * 11 / 10;
 	if (num_dirrem < max_softdeps_hard / 2 &&
 	    num_inodedep < max_softdeps_hard)
 		return (0);
 	stat_sync_limit_hit += 1;
 	return (1);
 }
 
 /*
  * If memory utilization has gotten too high, deliberately slow things
  * down and speed up the I/O processing.
  */
 static int
 request_cleanup(resource, islocked)
 	int resource;
 	int islocked;
 {
 	struct proc *p = CURPROC;
 
 	/*
 	 * We never hold up the filesystem syncer process.
 	 */
 	if (p == filesys_syncer)
 		return (0);
 	/*
 	 * First check to see if the work list has gotten backlogged.
 	 * If it has, co-opt this process to help clean up two entries.
 	 * Because this process may hold inodes locked, we cannot
 	 * handle any remove requests that might block on a locked
 	 * inode as that could lead to deadlock.
 	 */
 	if (num_on_worklist > max_softdeps / 10) {
 		if (islocked)
 			FREE_LOCK(&lk);
 		process_worklist_item(NULL, LK_NOWAIT);
 		process_worklist_item(NULL, LK_NOWAIT);
 		stat_worklist_push += 2;
 		if (islocked)
 			ACQUIRE_LOCK(&lk);
 		return(1);
 	}
 	/*
 	 * Next, we attempt to speed up the syncer process. If that
 	 * is successful, then we allow the process to continue.
 	 */
 	if (speedup_syncer())
 		return(0);
 	/*
 	 * If we are resource constrained on inode dependencies, try
 	 * flushing some dirty inodes. Otherwise, we are constrained
 	 * by file deletions, so try accelerating flushes of directories
 	 * with removal dependencies. We would like to do the cleanup
 	 * here, but we probably hold an inode locked at this point and 
 	 * that might deadlock against one that we try to clean. So,
 	 * the best that we can do is request the syncer daemon to do
 	 * the cleanup for us.
 	 */
 	switch (resource) {
 
 	case FLUSH_INODES:
 		stat_ino_limit_push += 1;
 		req_clear_inodedeps += 1;
 		stat_countp = &stat_ino_limit_hit;
 		break;
 
 	case FLUSH_REMOVE:
 		stat_blk_limit_push += 1;
 		req_clear_remove += 1;
 		stat_countp = &stat_blk_limit_hit;
 		break;
 
 	default:
 		if (islocked)
 			FREE_LOCK(&lk);
 		panic("request_cleanup: unknown type");
 	}
 	/*
 	 * Hopefully the syncer daemon will catch up and awaken us.
 	 * We wait at most tickdelay before proceeding in any case.
 	 */
 	if (islocked == 0)
 		ACQUIRE_LOCK(&lk);
 	proc_waiting += 1;
 	if (handle.callout == NULL)
 		handle = timeout(pause_timer, 0, tickdelay > 2 ? tickdelay : 2);
 	FREE_LOCK_INTERLOCKED(&lk);
 	(void) tsleep((caddr_t)&proc_waiting, PPAUSE, "softupdate", 0);
 	ACQUIRE_LOCK_INTERLOCKED(&lk);
 	proc_waiting -= 1;
 	if (islocked == 0)
 		FREE_LOCK(&lk);
 	return (1);
 }
 
 /*
  * Awaken processes pausing in request_cleanup and clear proc_waiting
  * to indicate that there is no longer a timer running.
  */
 void
 pause_timer(arg)
 	void *arg;
 {
 
 	*stat_countp += 1;
 	wakeup_one(&proc_waiting);
 	if (proc_waiting > 0)
 		handle = timeout(pause_timer, 0, tickdelay > 2 ? tickdelay : 2);
 	else
 		handle.callout = NULL;
 }
 
 /*
  * Flush out a directory with at least one removal dependency in an effort to
  * reduce the number of dirrem, freefile, and freeblks dependency structures.
  */
 static void
 clear_remove(p)
 	struct proc *p;
 {
 	struct pagedep_hashhead *pagedephd;
 	struct pagedep *pagedep;
 	static int next = 0;
 	struct mount *mp;
 	struct vnode *vp;
 	int error, cnt;
 	ino_t ino;
 
 	ACQUIRE_LOCK(&lk);
 	for (cnt = 0; cnt < pagedep_hash; cnt++) {
 		pagedephd = &pagedep_hashtbl[next++];
 		if (next >= pagedep_hash)
 			next = 0;
 		LIST_FOREACH(pagedep, pagedephd, pd_hash) {
 			if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL)
 				continue;
 			mp = pagedep->pd_mnt;
 			ino = pagedep->pd_ino;
 			FREE_LOCK(&lk);
 			if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
 				continue;
 			if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 				softdep_error("clear_remove: vget", error);
 				vn_finished_write(mp);
 				return;
 			}
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
 				softdep_error("clear_remove: fsync", error);
 			drain_output(vp, 0);
 			vput(vp);
 			vn_finished_write(mp);
 			return;
 		}
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Clear out a block of dirty inodes in an effort to reduce
  * the number of inodedep dependency structures.
  */
 static void
 clear_inodedeps(p)
 	struct proc *p;
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
 	static int next = 0;
 	struct mount *mp;
 	struct vnode *vp;
 	struct fs *fs;
 	int error, cnt;
 	ino_t firstino, lastino, ino;
 
 	ACQUIRE_LOCK(&lk);
 	/*
 	 * Pick a random inode dependency to be cleared.
 	 * We will then gather up all the inodes in its block 
 	 * that have dependencies and flush them out.
 	 */
 	for (cnt = 0; cnt < inodedep_hash; cnt++) {
 		inodedephd = &inodedep_hashtbl[next++];
 		if (next >= inodedep_hash)
 			next = 0;
 		if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
 			break;
 	}
 	if (inodedep == NULL)
 		return;
 	/*
 	 * Ugly code to find mount point given pointer to superblock.
 	 */
 	fs = inodedep->id_fs;
 	TAILQ_FOREACH(mp, &mountlist, mnt_list)
 		if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs)
 			break;
 	/*
 	 * Find the last inode in the block with dependencies.
 	 */
 	firstino = inodedep->id_ino & ~(INOPB(fs) - 1);
 	for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
 		if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0)
 			break;
 	/*
 	 * Asynchronously push all but the last inode with dependencies.
 	 * Synchronously push the last inode with dependencies to ensure
 	 * that the inode block gets written to free up the inodedeps.
 	 */
 	for (ino = firstino; ino <= lastino; ino++) {
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			continue;
 		FREE_LOCK(&lk);
 		if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
 			continue;
 		if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 			softdep_error("clear_inodedeps: vget", error);
 			vn_finished_write(mp);
 			return;
 		}
 		if (ino == lastino) {
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p)))
 				softdep_error("clear_inodedeps: fsync1", error);
 		} else {
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
 				softdep_error("clear_inodedeps: fsync2", error);
 			drain_output(vp, 0);
 		}
 		vput(vp);
 		vn_finished_write(mp);
 		ACQUIRE_LOCK(&lk);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Function to determine if the buffer has outstanding dependencies
  * that will cause a roll-back if the buffer is written. If wantcount
  * is set, return number of dependencies, otherwise just yes or no.
  */
 static int
 softdep_count_dependencies(bp, wantcount)
 	struct buf *bp;
 	int wantcount;
 {
 	struct worklist *wk;
 	struct inodedep *inodedep;
 	struct indirdep *indirdep;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct diradd *dap;
 	int i, retval;
 
 	retval = 0;
 	ACQUIRE_LOCK(&lk);
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		switch (wk->wk_type) {
 
 		case D_INODEDEP:
 			inodedep = WK_INODEDEP(wk);
 			if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 				/* bitmap allocation dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
 				/* direct block pointer dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 
 			LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
 				/* indirect block pointer dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			for (i = 0; i < DAHASHSZ; i++) {
 
 				LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
 					/* directory entry dependency */
 					retval += 1;
 					if (!wantcount)
 						goto out;
 				}
 			}
 			continue;
 
 		case D_BMSAFEMAP:
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 		case D_MKDIR:
 			/* never a dependency on these blocks */
 			continue;
 
 		default:
 			FREE_LOCK(&lk);
 			panic("softdep_check_for_rollback: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 out:
 	FREE_LOCK(&lk);
 	return retval;
 }
 
 /*
  * Acquire exclusive access to a buffer.
  * Must be called with splbio blocked.
  * Return 1 if buffer was acquired.
  */
 static int
 getdirtybuf(bpp, waitfor)
 	struct buf **bpp;
 	int waitfor;
 {
 	struct buf *bp;
 
 	for (;;) {
 		if ((bp = *bpp) == NULL)
 			return (0);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
 			if ((bp->b_xflags & BX_BKGRDINPROG) == 0)
 				break;
 			BUF_UNLOCK(bp);
 			if (waitfor != MNT_WAIT)
 				return (0);
 			bp->b_xflags |= BX_BKGRDWAIT;
 			FREE_LOCK_INTERLOCKED(&lk);
 			tsleep(&bp->b_xflags, PRIBIO, "getbuf", 0);
 			ACQUIRE_LOCK_INTERLOCKED(&lk);
 			continue;
 		}
 		if (waitfor != MNT_WAIT)
 			return (0);
 		FREE_LOCK_INTERLOCKED(&lk);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL) != ENOLCK)
 			panic("getdirtybuf: inconsistent lock");
 		ACQUIRE_LOCK_INTERLOCKED(&lk);
 	}
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		BUF_UNLOCK(bp);
 		return (0);
 	}
 	bremfree(bp);
 	return (1);
 }
 
 /*
  * Wait for pending output on a vnode to complete.
  * Must be called with vnode locked.
  */
 static void
 drain_output(vp, islocked)
 	struct vnode *vp;
 	int islocked;
 {
 
 	if (!islocked)
 		ACQUIRE_LOCK(&lk);
 	while (vp->v_numoutput) {
 		vp->v_flag |= VBWAIT;
 		FREE_LOCK_INTERLOCKED(&lk);
 		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0);
 		ACQUIRE_LOCK_INTERLOCKED(&lk);
 	}
 	if (!islocked)
 		FREE_LOCK(&lk);
 }
 
 /*
  * Called whenever a buffer that is being invalidated or reallocated
  * contains dependencies. This should only happen if an I/O error has
  * occurred. The routine is called with the buffer locked.
  */ 
 static void
 softdep_deallocate_dependencies(bp)
 	struct buf *bp;
 {
 
 	if ((bp->b_ioflags & BIO_ERROR) == 0)
 		panic("softdep_deallocate_dependencies: dangling deps");
 	softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
 	panic("softdep_deallocate_dependencies: unrecovered I/O error");
 }
 
 /*
  * Function to handle asynchronous write errors in the filesystem.
  */
 void
 softdep_error(func, error)
 	char *func;
 	int error;
 {
 
 	/* XXX should do something better! */
 	printf("%s: got error %d while accessing filesystem\n", func, error);
 }
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index bac00b918ec8..9803a222ab5a 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,1317 +1,1319 @@
 /*
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
  * $FreeBSD$
  */
 
 #include "opt_quota.h"
 #include "opt_ufs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 
 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
 
 static int	ffs_sbupdate __P((struct ufsmount *, int));
 int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
 static int	ffs_oldfscompat __P((struct fs *));
 static int	ffs_init __P((struct vfsconf *));
 
 static struct vfsops ufs_vfsops = {
 	ffs_mount,
 	ufs_start,
 	ffs_unmount,
 	ufs_root,
 	ufs_quotactl,
 	ffs_statfs,
 	ffs_sync,
 	ffs_vget,
 	ffs_fhtovp,
 	ufs_check_export,
 	ffs_vptofh,
 	ffs_init,
 	vfs_stduninit,
 #ifdef UFS_EXTATTR
 	ufs_extattrctl,
 #else
 	vfs_stdextattrctl,
 #endif
 };
 
 VFS_SET(ufs_vfsops, ufs, 0);
 
 /*
  * ffs_mount
  *
  * Called when mounting local physical media
  *
  * PARAMETERS:
  *		mountroot
  *			mp	mount point structure
  *			path	NULL (flag for root mount!!!)
  *			data	<unused>
  *			ndp	<unused>
  *			p	process (user credentials check [statfs])
  *
  *		mount
  *			mp	mount point structure
  *			path	path to mount point
  *			data	pointer to argument struct in user space
  *			ndp	mount point namei() return (used for
  *				credentials on reload), reused to look
  *				up block device.
  *			p	process (user credentials check)
  *
  * RETURNS:	0	Success
  *		!0	error number (errno.h)
  *
  * LOCK STATE:
  *
  *		ENTRY
  *			mount point is locked
  *		EXIT
  *			mount point is locked
  *
  * NOTES:
  *		A NULL path can be used for a flag since the mount
  *		system call will fail with EFAULT in copyinstr in
  *		namei() if it is a genuine NULL from the user.
  */
 int
 ffs_mount(mp, path, data, ndp, p)
         struct mount		*mp;	/* mount struct pointer*/
         char			*path;	/* path to mount point*/
         caddr_t			data;	/* arguments to FS specific mount*/
         struct nameidata	*ndp;	/* mount point credentials*/
         struct proc		*p;	/* process requesting mount*/
 {
 	size_t		size;
 	struct vnode	*devvp;
 	struct ufs_args args;
 	struct ufsmount *ump = 0;
 	register struct fs *fs;
 	int error, flags;
 	mode_t accessmode;
 
 	/*
 	 * Use NULL path to indicate we are mounting the root file system.
 	 */
 	if (path == NULL) {
 		if ((error = bdevvp(rootdev, &rootvp))) {
 			printf("ffs_mountroot: can't find rootvp\n");
 			return (error);
 		}
 
 		if ((error = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0)
 			return (error);
 
 		(void)VFS_STATFS(mp, &mp->mnt_stat, p);
 		return (0);
 	}
 
 	/*
 	 * Mounting non-root file system or updating a file system
 	 */
 	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
 		return (error);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		devvp = ump->um_devvp;
 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (mp->mnt_flag & MNT_SOFTDEP) {
 				error = softdep_flushfiles(mp, flags, p);
 			} else {
 				error = ffs_flushfiles(mp, flags, p);
 			}
 			if (error) {
 				vn_finished_write(mp);
 				return (error);
 			}
 			fs->fs_ronly = 1;
 			if ((fs->fs_flags & FS_UNCLEAN) == 0)
 				fs->fs_clean = 1;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
 				fs->fs_ronly = 0;
 				fs->fs_clean = 0;
 				vn_finished_write(mp);
 				return (error);
 			}
 			vn_finished_write(mp);
 		}
 		if ((mp->mnt_flag & MNT_RELOAD) &&
 		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p)) != 0)
 			return (error);
 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
 				    p->p_ucred, p)) != 0) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 			fs->fs_flags &= ~FS_UNCLEAN;
 			if (fs->fs_clean == 0) {
 				fs->fs_flags |= FS_UNCLEAN;
-				if (mp->mnt_flag & MNT_FORCE) {
+				if ((mp->mnt_flag & MNT_FORCE) ||
+				    (fs->fs_flags & FS_DOSOFTDEP)) {
 					printf("WARNING: %s was not %s\n",
 					   fs->fs_fsmnt, "properly dismounted");
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->fs_fsmnt);
 					return (EPERM);
 				}
 			}
 			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
 				return (error);
 			fs->fs_ronly = 0;
 			fs->fs_clean = 0;
 			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
 				vn_finished_write(mp);
 				return (error);
 			}
 			/* check to see if we need to start softdep */
 			if ((fs->fs_flags & FS_DOSOFTDEP) &&
 			    (error = softdep_mount(devvp, mp, fs, p->p_ucred))){
 				vn_finished_write(mp);
 				return (error);
 			}
 			if (fs->fs_snapinum[0] != 0)
 				ffs_snapshot_mount(mp);
 			vn_finished_write(mp);
 		}
 		/*
 		 * Soft updates is incompatible with "async",
 		 * so if we are doing softupdates stop the user
 		 * from setting the async flag in an update.
 		 * Softdep_mount() clears it in an initial mount 
 		 * or ro->rw remount.
 		 */
 		if (mp->mnt_flag & MNT_SOFTDEP)
 			mp->mnt_flag &= ~MNT_ASYNC;
 		/*
 		 * If not updating name, process export requests.
 		 */
 		if (args.fspec == 0)
 			return (vfs_export(mp, &ump->um_export, &args.export));
 		/*
 		 * If this is a snapshot request, take the snapshot.
 		 */
 		if (mp->mnt_flag & MNT_SNAPSHOT)
 			return (ffs_snapshot(mp, args.fspec));
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	if ((error = namei(ndp)) != 0)
 		return (error);
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 	devvp = ndp->ni_vp;
 	if (!vn_isdisk(devvp, &error)) {
 		vrele(devvp);
 		return (error);
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p))!= 0){
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		/*
 		 * Update only
 		 *
 		 * If it's not the same vnode, or at least the same device
 		 * then it's not correct.
 		 */
 
 		if (devvp != ump->um_devvp &&
 		    devvp->v_rdev != ump->um_devvp->v_rdev)
 			error = EINVAL;	/* needs translation */
 		vrele(devvp);
 		if (error)
 			return (error);
 	} else {
 		/*
 		 * New mount
 		 *
 		 * We need the name for the mount point (also used for
 		 * "last mounted on") copied in. If an error occurs,
 		 * the mount point is discarded by the upper level code.
 		 * Note that vfs_mount() populates f_mntonname for us.
 		 */
 		if ((error = ffs_mountfs(devvp, mp, p, M_FFSNODE)) != 0) {
 			vrele(devvp);
 			return (error);
 		}
 	}
 	/*
 	 * Save "mounted from" device name info for mount point (NULL pad).
 	 */
 	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
 	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	/*
 	 * Initialize filesystem stat information in mount struct.
 	 */
 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
 	return (0);
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). The filesystem must
  * be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
  */
 int
 ffs_reload(mp, cred, p)
 	register struct mount *mp;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct vnode *vp, *nvp, *devvp;
 	struct inode *ip;
 	void *space;
 	struct buf *bp;
 	struct fs *fs, *newfs;
 	struct partinfo dpart;
 	dev_t dev;
 	int i, blks, size, error;
 	int32_t *lp;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mp)->um_devvp;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		panic("ffs_reload: dirty1");
 
 	dev = devvp->v_rdev;
 
 	/*
 	 * Only VMIO the backing device if the backing device is a real
 	 * block device.  See ffs_mountmfs() for more details.
 	 */
 	if (devvp->v_tag != VT_MFS && vn_isdisk(devvp, NULL)) {
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		vfs_object_create(devvp, p, p->p_ucred);
 		mtx_lock(&devvp->v_interlock);
 		VOP_UNLOCK(devvp, LK_INTERLOCK, p);
 	}
 
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
 		size = DEV_BSIZE;
 	else
 		size = dpart.disklab->d_secsize;
 	if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0)
 		return (error);
 	newfs = (struct fs *)bp->b_data;
 	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
 		newfs->fs_bsize < sizeof(struct fs)) {
 			brelse(bp);
 			return (EIO);		/* XXX needs translation */
 	}
 	fs = VFSTOUFS(mp)->um_fs;
 	/*
 	 * Copy pointer fields back into superblock before copying in	XXX
 	 * new superblock. These should really be in the ufsmount.	XXX
 	 * Note that important parameters (eg fs_ncg) are unchanged.
 	 */
 	newfs->fs_csp = fs->fs_csp;
 	newfs->fs_maxcluster = fs->fs_maxcluster;
 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	ffs_oldfscompat(fs);
 
 	/*
 	 * Step 3: re-read summary information from disk.
 	 */
 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
 	space = fs->fs_csp;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    NOCRED, &bp);
 		if (error)
 			return (error);
 		bcopy(bp->b_data, space, (u_int)size);
 		space = (char *)space + size;
 		brelse(bp);
 	}
 	/*
 	 * We no longer know anything about clusters per cylinder group.
 	 */
 	if (fs->fs_contigsumsize > 0) {
 		lp = fs->fs_maxcluster;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 	}
 
 loop:
 	mtx_lock(&mntvnode_mtx);
 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
 		if (vp->v_mount != mp) {
 			mtx_unlock(&mntvnode_mtx);
 			goto loop;
 		}
 		nvp = LIST_NEXT(vp, v_mntvnodes);
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
 		if (vrecycle(vp, &mntvnode_mtx, p))
 			goto loop;
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
 		mtx_lock(&vp->v_interlock);
 		mtx_unlock(&mntvnode_mtx);
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
 			panic("ffs_reload: dirty2");
 		/*
 		 * Step 6: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error =
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			return (error);
 		}
 		ip->i_din = *((struct dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
 		ip->i_effnlink = ip->i_nlink;
 		brelse(bp);
 		vput(vp);
 		mtx_lock(&mntvnode_mtx);
 	}
 	mtx_unlock(&mntvnode_mtx);
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot
  */
 int
 ffs_mountfs(devvp, mp, p, malloctype)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct malloc_type *malloctype;
 {
 	register struct ufsmount *ump;
 	struct buf *bp;
 	register struct fs *fs;
 	dev_t dev;
 	struct partinfo dpart;
 	void *space;
 	int error, i, blks, size, ronly;
 	int32_t *lp;
 	struct ucred *cred;
 	u_int64_t maxfilesize;					/* XXX */
 	size_t strsize;
 	int ncount;
 
 	dev = devvp->v_rdev;
 	cred = p ? p->p_ucred : NOCRED;
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	ncount = vcount(devvp);
 
 	if (ncount > 1 && devvp != rootvp)
 		return (EBUSY);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		return (error);
 
 	/*
 	 * Only VMIO the backing device if the backing device is a real
 	 * block device.  This excludes the original MFS implementation.
 	 * Note that it is optional that the backing device be VMIOed.  This
 	 * increases the opportunity for metadata caching.
 	 */
 	if (devvp->v_tag != VT_MFS && vn_isdisk(devvp, NULL)) {
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		vfs_object_create(devvp, p, cred);
 		mtx_lock(&devvp->v_interlock);
 		VOP_UNLOCK(devvp, LK_INTERLOCK, p);
 	}
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		return (error);
 	if (devvp->v_rdev->si_iosize_max > mp->mnt_iosize_max)
 		mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
 	if (mp->mnt_iosize_max > MAXPHYS)
 		mp->mnt_iosize_max = MAXPHYS;
 
 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
 		size = DEV_BSIZE;
 	else
 		size = dpart.disklab->d_secsize;
 
 	bp = NULL;
 	ump = NULL;
 	if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0)
 		goto out;
 	fs = (struct fs *)bp->b_data;
 	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 	    fs->fs_bsize < sizeof(struct fs)) {
 		error = EINVAL;		/* XXX needs translation */
 		goto out;
 	}
 	fs->fs_fmod = 0;
 	fs->fs_flags &= ~FS_UNCLEAN;
 	if (fs->fs_clean == 0) {
 		fs->fs_flags |= FS_UNCLEAN;
-		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
+		if (ronly || (mp->mnt_flag & MNT_FORCE) ||
+		    (fs->fs_flags & FS_DOSOFTDEP)) {
 			printf(
 "WARNING: %s was not properly dismounted\n",
 			    fs->fs_fsmnt);
 		} else {
 			printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 			    fs->fs_fsmnt);
 			error = EPERM;
 			goto out;
 		}
 	}
 	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
 	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
 		error = EROFS;          /* needs translation */
 		goto out;
 	}
 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO);
 	ump->um_malloctype = malloctype;
 	ump->um_i_effnlink_valid = 1;
 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
 	    M_WAITOK);
 	ump->um_blkatoff = ffs_blkatoff;
 	ump->um_truncate = ffs_truncate;
 	ump->um_update = ffs_update;
 	ump->um_valloc = ffs_valloc;
 	ump->um_vfree = ffs_vfree;
 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_fs;
 	fs->fs_ronly = ronly;
 	size = fs->fs_cssize;
 	blks = howmany(size, fs->fs_fsize);
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
 	space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
 	fs->fs_csp = space;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    cred, &bp)) != 0) {
 			free(fs->fs_csp, M_UFSMNT);
 			goto out;
 		}
 		bcopy(bp->b_data, space, (u_int)size);
 		space = (char *)space + size;
 		brelse(bp);
 		bp = NULL;
 	}
 	if (fs->fs_contigsumsize > 0) {
 		fs->fs_maxcluster = lp = space;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 	}
 	mp->mnt_data = (qaddr_t)ump;
 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 
 	    vfs_getvfs(&mp->mnt_stat.f_fsid)) 
 		vfs_getnewfsid(mp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	mp->mnt_flag |= MNT_LOCAL;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = fs->fs_nindir;
 	ump->um_bptrtodb = fs->fs_fsbtodb;
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP;
 #ifdef UFS_EXTATTR
 	ufs_extattr_uepm_init(&ump->um_extattr);
 #endif
 	devvp->v_rdev->si_mountpoint = mp;
 	ffs_oldfscompat(fs);
 
 	/*
 	 * Set FS local "last mounted on" information (NULL pad)
 	 */
 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
 			fs->fs_fsmnt,			/* copy area*/
 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
 			&strsize);			/* real size*/
 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
 
 	if( mp->mnt_flag & MNT_ROOTFS) {
 		/*
 		 * Root mount; update timestamp in mount structure.
 		 * this will be used by the common root mount code
 		 * to update the system clock.
 		 */
 		mp->mnt_time = fs->fs_time;
 	}
 
 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
 	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
 	if (ronly == 0) {
 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 			free(fs->fs_csp, M_UFSMNT);
 			goto out;
 		}
 		if (fs->fs_snapinum[0] != 0)
 			ffs_snapshot_mount(mp);
 		fs->fs_fmod = 1;
 		fs->fs_clean = 0;
 		(void) ffs_sbupdate(ump, MNT_WAIT);
 	}
 #ifdef UFS_EXTATTR
 #ifdef UFS_EXTATTR_AUTOSTART
 	/*
 	 *
 	 * Auto-starting does the following:
 	 *	- check for /.attribute in the fs, and extattr_start if so
 	 *	- for each file in .attribute, enable that file with
 	 * 	  an attribute of the same name.
 	 * Not clear how to report errors -- probably eat them.
 	 * This would all happen while the file system was busy/not
 	 * available, so would effectively be "atomic".
 	 */
 	(void) ufs_extattr_autostart(mp, p);
 #endif /* !UFS_EXTATTR_AUTOSTART */
 #endif /* !UFS_EXTATTR */
 	return (0);
 out:
 	devvp->v_rdev->si_mountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
 	if (ump) {
 		free(ump->um_fs, M_UFSMNT);
 		free(ump, M_UFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 /*
  * Sanity checks for old file systems.
  *
  * XXX - goes away some day.
  */
 static int
 ffs_oldfscompat(fs)
 	struct fs *fs;
 {
 
 	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
 	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
 		fs->fs_nrpos = 8;				/* XXX */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
 #if 0
 		int i;						/* XXX */
 		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
 								/* XXX */
 		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
 		for (i = 0; i < NIADDR; i++) {			/* XXX */
 			sizepb *= NINDIR(fs);			/* XXX */
 			fs->fs_maxfilesize += sizepb;		/* XXX */
 		}						/* XXX */
 #endif
 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
 		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
 		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
 	}							/* XXX */
 	return (0);
 }
 
 /*
  * unmount system call
  */
 int
 ffs_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct ufsmount *ump = VFSTOUFS(mp);
 	register struct fs *fs;
 	int error, flags;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
 		flags |= FORCECLOSE;
 	}
 #ifdef UFS_EXTATTR
 	if ((error = ufs_extattr_stop(mp, p)))
 		if (error != EOPNOTSUPP)
 			printf("ffs_unmount: ufs_extattr_stop returned %d\n",
 			    error);
 	ufs_extattr_uepm_destroy(&ump->um_extattr);
 #endif
 	if (mp->mnt_flag & MNT_SOFTDEP) {
 		if ((error = softdep_flushfiles(mp, flags, p)) != 0)
 			return (error);
 	} else {
 		if ((error = ffs_flushfiles(mp, flags, p)) != 0)
 			return (error);
 	}
 	fs = ump->um_fs;
 	if (fs->fs_ronly == 0) {
 		fs->fs_clean = fs->fs_flags & FS_UNCLEAN ? 0 : 1;
 		error = ffs_sbupdate(ump, MNT_WAIT);
 		if (error) {
 			fs->fs_clean = 0;
 			return (error);
 		}
 	}
 	ump->um_devvp->v_rdev->si_mountpoint = NULL;
 
 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0);
 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, p);
 
 	vrele(ump->um_devvp);
 
 	free(fs->fs_csp, M_UFSMNT);
 	free(fs, M_UFSMNT);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 int
 ffs_flushfiles(mp, flags, p)
 	register struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	int error;
 
 	ump = VFSTOUFS(mp);
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		int i;
 		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
 		if (error)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if (ump->um_quotas[i] == NULLVP)
 				continue;
 			quotaoff(p, mp, i);
 		}
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
 #endif
 	if (ump->um_devvp->v_flag & VCOPYONWRITE) {
 		if ((error = vflush(mp, NULL, SKIPSYSTEM | flags)) != 0)
 			return (error);
 		ffs_snapshot_unmount(mp);
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
         /*
 	 * Flush all the files.
 	 */
 	if ((error = vflush(mp, NULL, flags)) != 0)
 		return (error);
 	/*
 	 * Flush filesystem metadata.
 	 */
 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
 	VOP_UNLOCK(ump->um_devvp, 0, p);
 	return (error);
 }
 
 /*
  * Get file system statistics.
  */
 int
 ffs_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	register struct fs *fs;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_magic != FS_MAGIC)
 		panic("ffs_statfs");
 	sbp->f_bsize = fs->fs_fsize;
 	sbp->f_iosize = fs->fs_bsize;
 	sbp->f_blocks = fs->fs_dsize;
 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 		fs->fs_cstotal.cs_nffree;
 	sbp->f_bavail = freespace(fs, fs->fs_minfree);
 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
 	}
 	return (0);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
 int
 ffs_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *nvp, *vp;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	int error, count, wait, lockreq, allerror = 0;
 
 	fs = ump->um_fs;
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
 		printf("fs = %s\n", fs->fs_fsmnt);
 		panic("ffs_sync: rofs mod");
 	}
 	/*
 	 * Write back each (modified) inode.
 	 */
 	wait = 0;
 	lockreq = LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK;
 	if (waitfor == MNT_WAIT) {
 		wait = 1;
 		lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
 	}
 	mtx_lock(&mntvnode_mtx);
 loop:
 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		mtx_lock(&vp->v_interlock);
 		nvp = LIST_NEXT(vp, v_mntvnodes);
 		ip = VTOI(vp);
 		if (vp->v_type == VNON || ((ip->i_flag &
 		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		     TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
 			mtx_unlock(&vp->v_interlock);
 			continue;
 		}
 		if (vp->v_type != VCHR) {
 			mtx_unlock(&mntvnode_mtx);
 			if ((error = vget(vp, lockreq, p)) != 0) {
 				mtx_lock(&mntvnode_mtx);
 				if (error == ENOENT)
 					goto loop;
 				continue;
 			}
 			if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
 				allerror = error;
 			VOP_UNLOCK(vp, 0, p);
 			vrele(vp);
 			mtx_lock(&mntvnode_mtx);
 		} else {
 			mtx_unlock(&mntvnode_mtx);
 			mtx_unlock(&vp->v_interlock);
 			UFS_UPDATE(vp, wait);
 			mtx_lock(&mntvnode_mtx);
 		}
 	}
 	mtx_unlock(&mntvnode_mtx);
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
 	if (waitfor == MNT_WAIT) {
 		if ((error = softdep_flushworklist(ump->um_mountp, &count, p)))
 			allerror = error;
 		/* Flushed work items may create new vnodes to clean */
 		if (count) {
 			mtx_lock(&mntvnode_mtx);
 			goto loop;
 		}
 	}
 #ifdef QUOTA
 	qsync(mp);
 #endif
 	if (waitfor != MNT_LAZY) {
 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
 			allerror = error;
 		VOP_UNLOCK(ump->um_devvp, 0, p);
 	}
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
 		allerror = error;
 	return (allerror);
 }
 
 /*
  * Look up a FFS dinode number to find its incore vnode, otherwise read it
  * in from disk.  If it is in core, wait for the lock bit to clear, then
  * return the inode locked.  Detection and handling of mount points must be
  * done by the calling routine.
  */
 static int ffs_inode_hash_lock;
 /*
  * ffs_inode_hash_lock is a variable to manage mutual exclusion
  * of vnode allocation and intertion to the hash, especially to
  * avoid holding more than one vnodes for the same inode in the
  * hash table. ffs_inode_hash_lock must hence be tested-and-set
  * or cleared atomically, accomplished by ffs_inode_hash_mtx.
  * 
  * As vnode allocation may block during MALLOC() and zone
  * allocation, we should also do msleep() to give away the CPU
  * if anyone else is allocating a vnode. lockmgr is not suitable
  * here because someone else may insert to the hash table the
  * vnode we are trying to allocate during our sleep, in which
  * case the hash table needs to be examined once again after
  * waking up.
  */
 static struct mtx ffs_inode_hash_mtx;
 
 int
 ffs_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	struct fs *fs;
 	struct inode *ip;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	dev_t dev;
 	int error, want_wakeup;
 
 	ump = VFSTOUFS(mp);
 	dev = ump->um_dev;
 restart:
 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
 		return (0);
 	}
 
 	/*
 	 * Lock out the creation of new entries in the FFS hash table in
 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
 	 * may occur!
 	 */
 	mtx_lock(&ffs_inode_hash_mtx);
 	if (ffs_inode_hash_lock) {
 		while (ffs_inode_hash_lock) {
 			ffs_inode_hash_lock = -1;
 			msleep(&ffs_inode_hash_lock, &ffs_inode_hash_mtx, PVM, "ffsvgt", 0);
 		}
 		mtx_unlock(&ffs_inode_hash_mtx);
 		goto restart;
 	}
 	ffs_inode_hash_lock = 1;
 	mtx_unlock(&ffs_inode_hash_mtx);
 
 	/*
 	 * If this MALLOC() is performed after the getnewvnode()
 	 * it might block, leaving a vnode with a NULL v_data to be
 	 * found by ffs_sync() if a sync happens to fire right then,
 	 * which will cause a panic because ffs_sync() blindly
 	 * dereferences vp->v_data (as well it should).
 	 */
 	MALLOC(ip, struct inode *, sizeof(struct inode), 
 	    ump->um_malloctype, M_WAITOK);
 
 	/* Allocate a new vnode/inode. */
 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
 	if (error) {
 		/*
 		 * Do not wake up processes while holding the mutex,
 		 * otherwise the processes waken up immediately hit
 		 * themselves into the mutex.
 		 */
 		mtx_lock(&ffs_inode_hash_mtx);
 		want_wakeup = ffs_inode_hash_lock < 0;
 		ffs_inode_hash_lock = 0;
 		mtx_unlock(&ffs_inode_hash_mtx);
 		if (want_wakeup)
 			wakeup(&ffs_inode_hash_lock);
 		*vpp = NULL;
 		FREE(ip, ump->um_malloctype);
 		return (error);
 	}
 	bzero((caddr_t)ip, sizeof(struct inode));
 	/*
 	 * FFS supports lock sharing in the stack of vnodes
 	 */
 	vp->v_vnlock = &vp->v_lock;
 	lockinit(vp->v_vnlock, PINOD, "inode", 0, LK_CANRECURSE);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_fs = fs = ump->um_fs;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 #ifdef QUOTA
 	{
 		int i;
 		for (i = 0; i < MAXQUOTAS; i++)
 			ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	ufs_ihashins(ip);
 
 	/*
 	 * Do not wake up processes while holding the mutex,
 	 * otherwise the processes waken up immediately hit
 	 * themselves into the mutex.
 	 */
 	mtx_lock(&ffs_inode_hash_mtx);
 	want_wakeup = ffs_inode_hash_lock < 0;
 	ffs_inode_hash_lock = 0;
 	mtx_unlock(&ffs_inode_hash_mtx);
 	if (want_wakeup)
 		wakeup(&ffs_inode_hash_lock);
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->fs_bsize, NOCRED, &bp);
 	if (error) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		brelse(bp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
 	if (DOINGSOFTDEP(vp))
 		softdep_load_inodeblock(ip);
 	else
 		ip->i_effnlink = ip->i_nlink;
 	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
 	if (error) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	/*
 	 * Finish inode initialization now that aliasing has been resolved.
 	 */
 	ip->i_devvp = ump->um_devvp;
 	VREF(ip->i_devvp);
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		ip->i_gen = random() / 2 + 1;
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 			ip->i_flag |= IN_MODIFIED;
 	}
 	/*
 	 * Ensure that uid and gid are correct. This is a temporary
 	 * fix until fsck has been changed to do the update.
 	 */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
 		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
 		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
 	}						/* XXX */
 
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - call ffs_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 int
 ffs_fhtovp(mp, fhp, vpp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct vnode **vpp;
 {
 	register struct ufid *ufhp;
 	struct fs *fs;
 
 	ufhp = (struct ufid *)fhp;
 	fs = VFSTOUFS(mp)->um_fs;
 	if (ufhp->ufid_ino < ROOTINO ||
 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
 		return (ESTALE);
 	return (ufs_fhtovp(mp, ufhp, vpp));
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 int
 ffs_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct inode *ip;
 	register struct ufid *ufhp;
 
 	ip = VTOI(vp);
 	ufhp = (struct ufid *)fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 /*
  * Initialize the filesystem; just use ufs_init.
  */
 static int
 ffs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	softdep_initialize();
 	mtx_init(&ffs_inode_hash_mtx, "ifsvgt", MTX_DEF);
 	return (ufs_init(vfsp));
 }
 
 /*
  * Write a superblock and associated information back to disk.
  */
 static int
 ffs_sbupdate(mp, waitfor)
 	struct ufsmount *mp;
 	int waitfor;
 {
 	register struct fs *dfs, *fs = mp->um_fs;
 	register struct buf *bp;
 	int blks;
 	void *space;
 	int i, size, error, allerror = 0;
 
 	/*
 	 * First write back the summary information.
 	 */
 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
 	space = fs->fs_csp;
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
 		    size, 0, 0);
 		bcopy(space, bp->b_data, (u_int)size);
 		space = (char *)space + size;
 		if (waitfor != MNT_WAIT)
 			bawrite(bp);
 		else if ((error = bwrite(bp)) != 0)
 			allerror = error;
 	}
 	/*
 	 * Now write back the superblock itself. If any errors occurred
 	 * up to this point, then fail so that the superblock avoids
 	 * being written out as clean.
 	 */
 	if (allerror)
 		return (allerror);
 	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
 	fs->fs_fmod = 0;
 	fs->fs_time = time_second;
 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 	/* Restore compatibility to old file systems.		   XXX */
 	dfs = (struct fs *)bp->b_data;				/* XXX */
 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
 		dfs->fs_nrpos = -1;				/* XXX */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
 		int32_t *lp, tmp;				/* XXX */
 								/* XXX */
 		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
 		tmp = lp[4];					/* XXX */
 		for (i = 4; i > 0; i--)				/* XXX */
 			lp[i] = lp[i-1];			/* XXX */
 		lp[0] = tmp;					/* XXX */
 	}							/* XXX */
 	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
 	if (waitfor != MNT_WAIT)
 		bawrite(bp);
 	else if ((error = bwrite(bp)) != 0)
 		allerror = error;
 	return (allerror);
 }
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
index 53d497ce7473..4083c14161ab 100644
--- a/sys/ufs/ffs/fs.h
+++ b/sys/ufs/ffs/fs.h
@@ -1,547 +1,569 @@
 /*
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fs.h	8.13 (Berkeley) 3/21/95
  * $FreeBSD$
  */
 
 #ifndef _UFS_FFS_FS_H_
 #define _UFS_FFS_FS_H_
 
 /*
  * Each disk drive contains some number of file systems.
  * A file system consists of a number of cylinder groups.
  * Each cylinder group has inodes and data.
  *
  * A file system is described by its super-block, which in turn
  * describes the cylinder groups.  The super-block is critical
  * data and is replicated in each cylinder group to protect against
  * catastrophic loss.  This is done at `newfs' time and the critical
  * super-block data does not change, so the copies need not be
  * referenced further unless disaster strikes.
  *
  * For file system fs, the offsets of the various blocks of interest
  * are given in the super block as:
  *	[fs->fs_sblkno]		Super-block
  *	[fs->fs_cblkno]		Cylinder group block
  *	[fs->fs_iblkno]		Inode blocks
  *	[fs->fs_dblkno]		Data blocks
  * The beginning of cylinder group cg in fs, is given by
  * the ``cgbase(fs, cg)'' macro.
  *
  * The first boot and super blocks are given in absolute disk addresses.
  * The byte-offset forms are preferred, as they don't imply a sector size.
  */
 #define BBSIZE		8192
 #define SBSIZE		8192
 #define	BBOFF		((off_t)(0))
 #define	SBOFF		((off_t)(BBOFF + BBSIZE))
 #define	BBLOCK		((ufs_daddr_t)(0))
 #define	SBLOCK		((ufs_daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE))
 
 /*
  * Addresses stored in inodes are capable of addressing fragments
  * of `blocks'. File system blocks of at most size MAXBSIZE can
  * be optionally broken into 2, 4, or 8 pieces, each of which is
  * addressable; these pieces may be DEV_BSIZE, or some multiple of
  * a DEV_BSIZE unit.
  *
  * Large files consist of exclusively large data blocks.  To avoid
  * undue wasted disk space, the last data block of a small file may be
  * allocated as only as many fragments of a large block as are
  * necessary.  The file system format retains only a single pointer
  * to such a fragment, which is a piece of a single large block that
  * has been divided.  The size of such a fragment is determinable from
  * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
  *
  * The file system records space availability at the fragment level;
  * to determine block availability, aligned fragments are examined.
  */
 
 /*
  * MINBSIZE is the smallest allowable block size.
  * In order to insure that it is possible to create files of size
  * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
  * MINBSIZE must be big enough to hold a cylinder group block,
  * thus changes to (struct cg) must keep its size within MINBSIZE.
  * Note that super blocks are always of size SBSIZE,
  * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE.
  */
 #define MINBSIZE	4096
 
 /*
  * The path name on which the file system is mounted is maintained
  * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
  * the super block for this name.
  */
 #define MAXMNTLEN	512
 
 /*
  * There is a 128-byte region in the superblock reserved for in-core
  * pointers to summary information. Originally this included an array
  * of pointers to blocks of struct csum; now there are just two
  * pointers and the remaining space is padded with fs_ocsp[].
  *
  * NOCSPTRS determines the size of this padding. One pointer (fs_csp)
  * is taken away to point to a contiguous array of struct csum for
  * all cylinder groups; a second (fs_maxcluster) points to an array
  * of cluster sizes that is computed as cylinder groups are inspected.
  */
 #define	NOCSPTRS	((128 / sizeof(void *)) - 2)
 
 /*
  * A summary of contiguous blocks of various sizes is maintained
  * in each cylinder group. Normally this is set by the initial
  * value of fs_maxcontig. To conserve space, a maximum summary size
  * is set by FS_MAXCONTIG.
  */
 #define FS_MAXCONTIG	16
 
 /*
  * MINFREE gives the minimum acceptable percentage of file system
  * blocks which may be free. If the freelist drops below this level
  * only the superuser may continue to allocate blocks. This may
  * be set to 0 if no reserve of free blocks is deemed necessary,
  * however throughput drops by fifty percent if the file system
  * is run at between 95% and 100% full; thus the minimum default
  * value of fs_minfree is 5%. However, to get good clustering
  * performance, 10% is a better choice. hence we use 10% as our
  * default value. With 10% free space, fragmentation is not a
  * problem, so we choose to optimize for time.
  */
 #define MINFREE		8
 #define DEFAULTOPT	FS_OPTTIME
 
 /*
  * The maximum number of snapshot nodes that can be associated
  * with each filesystem. This limit affects only the number of
  * snapshot files that can be recorded within the superblock so
  * that they can be found when the filesystem is mounted. However,
  * maintaining too many will slow the filesystem performance, so
  * having this limit is a good idea.
  */
 #define FSMAXSNAP 20
 
 /*
  * Used to identify special blocks in snapshots:
  *
  * BLK_NOCOPY - A block that was unallocated at the time the snapshot
  *	was taken, hence does not need to be copied when written.
  * BLK_SNAP - A block held by another snapshot that is not needed by this
  *	snapshot. When the other snapshot is freed, the BLK_SNAP entries
  *	are converted to BLK_NOCOPY. These are needed to allow fsck to
  *	identify blocks that are in use by other snapshots (which are
  *	expunged from this snapshot).
  */
 #define BLK_NOCOPY ((ufs_daddr_t)(1))
 #define BLK_SNAP ((ufs_daddr_t)(2))
 
+/*
+ * Sysctl values for the fast filesystem.
+ */
+#define	FFS_ADJ_REFCNT		 1	/* adjust inode reference count */
+#define	FFS_ADJ_BLKCNT		 2	/* adjust inode used block count */
+#define	FFS_BLK_FREE		 3	/* free range of blocks in map */
+#define	FFS_DIR_FREE		 4	/* free specified dir inodes in map */
+#define	FFS_FILE_FREE		 5	/* free specified file inodes in map */
+#define	FFS_SET_FLAGS		 6	/* set filesystem flags */
+#define	FFS_MAXID		 7	/* number of valid ffs ids */
+
+/*
+ * Command structure passed in to the filesystem to adjust filesystem values.
+ */
+#define	FFS_CMD_VERSION		0x05181979	/* version ID */
+struct fsck_cmd {
+	int	version;	/* version of command structure */
+	int	handle;		/* reference to filesystem to be changed */
+	off_t	value;		/* inode or block number to be affected */
+	long	size;		/* amount or range to be adjusted */
+};
+
 /*
  * Per cylinder group information; summarized in blocks allocated
  * from first cylinder group data blocks.  These blocks have to be
  * read in from fs_csaddr (size fs_cssize) in addition to the
  * super block.
  */
 struct csum {
 	int32_t	cs_ndir;		/* number of directories */
 	int32_t	cs_nbfree;		/* number of free blocks */
 	int32_t	cs_nifree;		/* number of free inodes */
 	int32_t	cs_nffree;		/* number of free frags */
 };
 
 /*
  * Super block for an FFS file system.
  */
 struct fs {
 	int32_t	 fs_firstfield;		/* historic file system linked list, */
 	int32_t	 fs_unused_1;		/*     used for incore super blocks */
 	ufs_daddr_t fs_sblkno;		/* addr of super-block in filesys */
 	ufs_daddr_t fs_cblkno;		/* offset of cyl-block in filesys */
 	ufs_daddr_t fs_iblkno;		/* offset of inode-blocks in filesys */
 	ufs_daddr_t fs_dblkno;		/* offset of first data after cg */
 	int32_t	 fs_cgoffset;		/* cylinder group offset in cylinder */
 	int32_t	 fs_cgmask;		/* used to calc mod fs_ntrak */
 	time_t 	 fs_time;		/* last time written */
 	int32_t	 fs_size;		/* number of blocks in fs */
 	int32_t	 fs_dsize;		/* number of data blocks in fs */
 	int32_t	 fs_ncg;		/* number of cylinder groups */
 	int32_t	 fs_bsize;		/* size of basic blocks in fs */
 	int32_t	 fs_fsize;		/* size of frag blocks in fs */
 	int32_t	 fs_frag;		/* number of frags in a block in fs */
 /* these are configuration parameters */
 	int32_t	 fs_minfree;		/* minimum percentage of free blocks */
 	int32_t	 fs_rotdelay;		/* num of ms for optimal next block */
 	int32_t	 fs_rps;		/* disk revolutions per second */
 /* these fields can be computed from the others */
 	int32_t	 fs_bmask;		/* ``blkoff'' calc of blk offsets */
 	int32_t	 fs_fmask;		/* ``fragoff'' calc of frag offsets */
 	int32_t	 fs_bshift;		/* ``lblkno'' calc of logical blkno */
 	int32_t	 fs_fshift;		/* ``numfrags'' calc number of frags */
 /* these are configuration parameters */
 	int32_t	 fs_maxcontig;		/* max number of contiguous blks */
 	int32_t	 fs_maxbpg;		/* max number of blks per cyl group */
 /* these fields can be computed from the others */
 	int32_t	 fs_fragshift;		/* block to frag shift */
 	int32_t	 fs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
 	int32_t	 fs_sbsize;		/* actual size of super block */
 	int32_t	 fs_csmask;		/* csum block offset (now unused) */
 	int32_t	 fs_csshift;		/* csum block number (now unused) */
 	int32_t	 fs_nindir;		/* value of NINDIR */
 	int32_t	 fs_inopb;		/* value of INOPB */
 	int32_t	 fs_nspf;		/* value of NSPF */
 /* yet another configuration parameter */
 	int32_t	 fs_optim;		/* optimization preference, see below */
 /* these fields are derived from the hardware */
 	int32_t	 fs_npsect;		/* # sectors/track including spares */
 	int32_t	 fs_interleave;		/* hardware sector interleave */
 	int32_t	 fs_trackskew;		/* sector 0 skew, per track */
 /* fs_id takes the space of the unused fs_headswitch and fs_trkseek fields */
 	int32_t	 fs_id[2];		/* unique filesystem id */
 /* sizes determined by number of cylinder groups and their sizes */
 	ufs_daddr_t fs_csaddr;		/* blk addr of cyl grp summary area */
 	int32_t	 fs_cssize;		/* size of cyl grp summary area */
 	int32_t	 fs_cgsize;		/* cylinder group size */
 /* these fields are derived from the hardware */
 	int32_t	 fs_ntrak;		/* tracks per cylinder */
 	int32_t	 fs_nsect;		/* sectors per track */
 	int32_t  fs_spc;			/* sectors per cylinder */
 /* this comes from the disk driver partitioning */
 	int32_t	 fs_ncyl;		/* cylinders in file system */
 /* these fields can be computed from the others */
 	int32_t	 fs_cpg;			/* cylinders per group */
 	int32_t	 fs_ipg;			/* inodes per group */
 	int32_t	 fs_fpg;			/* blocks per group * fs_frag */
 /* this data must be re-computed after crashes */
 	struct	csum fs_cstotal;	/* cylinder summary information */
 /* these fields are cleared at mount time */
 	int8_t   fs_fmod;		/* super block modified flag */
 	int8_t   fs_clean;		/* file system is clean flag */
 	int8_t 	 fs_ronly;		/* mounted read-only flag */
 	int8_t   fs_flags;		/* see FS_ flags below */
 	u_char	 fs_fsmnt[MAXMNTLEN];	/* name mounted on */
 /* these fields retain the current block allocation info */
 	int32_t	 fs_cgrotor;		/* last cg searched */
 	void 	*fs_ocsp[NOCSPTRS];	/* padding; was list of fs_cs buffers */
 	struct csum *fs_csp;		/* cg summary info buffer for fs_cs */
 	int32_t	*fs_maxcluster;		/* max cluster in each cyl group */
 	int32_t	 fs_cpc;		/* cyl per cycle in postbl */
 	int16_t	 fs_opostbl[16][8];	/* old rotation block list head */
 	int32_t	 fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
 	int32_t	 fs_sparecon[30];	/* reserved for future constants */
 	int32_t	 fs_contigsumsize;	/* size of cluster summary array */ 
 	int32_t	 fs_maxsymlinklen;	/* max length of an internal symlink */
 	int32_t	 fs_inodefmt;		/* format of on-disk inodes */
 	u_int64_t fs_maxfilesize;	/* maximum representable file size */
 	int64_t	 fs_qbmask;		/* ~fs_bmask for use with 64-bit size */
 	int64_t	 fs_qfmask;		/* ~fs_fmask for use with 64-bit size */
 	int32_t	 fs_state;		/* validate fs_clean field */
 	int32_t	 fs_postblformat;	/* format of positional layout tables */
 	int32_t	 fs_nrpos;		/* number of rotational positions */
 	int32_t	 fs_postbloff;		/* (u_int16) rotation block list head */
 	int32_t	 fs_rotbloff;		/* (u_int8) blocks for each rotation */
 	int32_t	 fs_magic;		/* magic number */
 	u_int8_t fs_space[1];		/* list of blocks for each rotation */
 /* actually longer */
 };
 
 /*
  * Filesystem identification
  */
 #define	FS_MAGIC	0x011954	/* the fast filesystem magic number */
 #define	FS_OKAY		0x7c269d38	/* superblock checksum */
 #define FS_42INODEFMT	-1		/* 4.2BSD inode format */
 #define FS_44INODEFMT	2		/* 4.4BSD inode format */
 
 /*
  * Preference for optimization.
  */
 #define FS_OPTTIME	0	/* minimize allocation time */
 #define FS_OPTSPACE	1	/* minimize disk fragmentation */
 
 /*
  * Filesystem flags.
  */
 #define FS_UNCLEAN    0x01    /* filesystem not clean at mount */
 #define FS_DOSOFTDEP  0x02    /* filesystem using soft dependencies */
 
 /*
  * Rotational layout table format types
  */
 #define FS_42POSTBLFMT		-1	/* 4.2BSD rotational table format */
 #define FS_DYNAMICPOSTBLFMT	1	/* dynamic rotational table format */
 /*
  * Macros for access to superblock array structures
  */
 #define fs_postbl(fs, cylno) \
     (((fs)->fs_postblformat == FS_42POSTBLFMT) \
     ? ((fs)->fs_opostbl[cylno]) \
     : ((int16_t *)((u_int8_t *)(fs) + \
 	(fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos))
 #define fs_rotbl(fs) \
     (((fs)->fs_postblformat == FS_42POSTBLFMT) \
     ? ((fs)->fs_space) \
     : ((u_int8_t *)((u_int8_t *)(fs) + (fs)->fs_rotbloff)))
 
 /*
  * The size of a cylinder group is calculated by CGSIZE. The maximum size
  * is limited by the fact that cylinder groups are at most one block.
  * Its size is derived from the size of the maps maintained in the
  * cylinder group and the (struct cg) size.
  */
 #define CGSIZE(fs) \
     /* base cg */	(sizeof(struct cg) + sizeof(int32_t) + \
     /* blktot size */	(fs)->fs_cpg * sizeof(int32_t) + \
     /* blks size */	(fs)->fs_cpg * (fs)->fs_nrpos * sizeof(int16_t) + \
     /* inode map */	howmany((fs)->fs_ipg, NBBY) + \
     /* block map */	howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\
     /* if present */	((fs)->fs_contigsumsize <= 0 ? 0 : \
     /* cluster sum */	(fs)->fs_contigsumsize * sizeof(int32_t) + \
     /* cluster map */	howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY)))
 
 /*
  * Convert cylinder group to base address of its global summary info.
  */
 #define fs_cs(fs, indx) fs_csp[indx]
 
 /*
  * Cylinder group block for a file system.
  */
 #define	CG_MAGIC	0x090255
 struct cg {
 	int32_t	 cg_firstfield;		/* historic cyl groups linked list */
 	int32_t	 cg_magic;		/* magic number */
 	time_t	 cg_time;		/* time last written */
 	int32_t	 cg_cgx;		/* we are the cgx'th cylinder group */
 	int16_t	 cg_ncyl;		/* number of cyl's this cg */
 	int16_t	 cg_niblk;		/* number of inode blocks this cg */
 	int32_t	 cg_ndblk;		/* number of data blocks this cg */
 	struct	csum cg_cs;		/* cylinder summary information */
 	int32_t	 cg_rotor;		/* position of last used block */
 	int32_t	 cg_frotor;		/* position of last used frag */
 	int32_t	 cg_irotor;		/* position of last used inode */
 	int32_t	 cg_frsum[MAXFRAG];	/* counts of available frags */
 	int32_t	 cg_btotoff;		/* (int32) block totals per cylinder */
 	int32_t	 cg_boff;		/* (u_int16) free block positions */
 	int32_t	 cg_iusedoff;		/* (u_int8) used inode map */
 	int32_t	 cg_freeoff;		/* (u_int8) free block map */
 	int32_t	 cg_nextfreeoff;	/* (u_int8) next available space */
 	int32_t	 cg_clustersumoff;	/* (u_int32) counts of avail clusters */
 	int32_t	 cg_clusteroff;		/* (u_int8) free cluster map */
 	int32_t	 cg_nclusterblks;	/* number of clusters this cg */
 	int32_t	 cg_sparecon[13];	/* reserved for future use */
 	u_int8_t cg_space[1];		/* space for cylinder group maps */
 /* actually longer */
 };
 
 /*
  * Macros for access to cylinder group array structures
  */
 #define cg_blktot(cgp) \
     (((cgp)->cg_magic != CG_MAGIC) \
     ? (((struct ocg *)(cgp))->cg_btot) \
     : ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_btotoff)))
 #define cg_blks(fs, cgp, cylno) \
     (((cgp)->cg_magic != CG_MAGIC) \
     ? (((struct ocg *)(cgp))->cg_b[cylno]) \
     : ((int16_t *)((u_int8_t *)(cgp) + \
 	(cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos))
 #define cg_inosused(cgp) \
     (((cgp)->cg_magic != CG_MAGIC) \
     ? (((struct ocg *)(cgp))->cg_iused) \
     : ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff)))
 #define cg_blksfree(cgp) \
     (((cgp)->cg_magic != CG_MAGIC) \
     ? (((struct ocg *)(cgp))->cg_free) \
     : ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff)))
 #define cg_chkmagic(cgp) \
     ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC)
 #define cg_clustersfree(cgp) \
     ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff))
 #define cg_clustersum(cgp) \
     ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_clustersumoff))
 
 /*
  * The following structure is defined
  * for compatibility with old file systems.
  */
 struct ocg {
 	int32_t	 cg_firstfield;		/* historic linked list of cyl groups */
 	int32_t	 cg_unused_1;		/*     used for incore cyl groups */
 	time_t	 cg_time;		/* time last written */
 	int32_t	 cg_cgx;		/* we are the cgx'th cylinder group */
 	int16_t	 cg_ncyl;		/* number of cyl's this cg */
 	int16_t	 cg_niblk;		/* number of inode blocks this cg */
 	int32_t	 cg_ndblk;		/* number of data blocks this cg */
 	struct	csum cg_cs;		/* cylinder summary information */
 	int32_t	 cg_rotor;		/* position of last used block */
 	int32_t	 cg_frotor;		/* position of last used frag */
 	int32_t	 cg_irotor;		/* position of last used inode */
 	int32_t	 cg_frsum[8];		/* counts of available frags */
 	int32_t	 cg_btot[32];		/* block totals per cylinder */
 	int16_t	 cg_b[32][8];		/* positions of free blocks */
 	u_int8_t cg_iused[256];		/* used inode map */
 	int32_t	 cg_magic;		/* magic number */
 	u_int8_t cg_free[1];		/* free block map */
 /* actually longer */
 };
 
 /*
  * Turn file system block numbers into disk block addresses.
  * This maps file system blocks to device size blocks.
  */
 #define fsbtodb(fs, b)	((b) << (fs)->fs_fsbtodb)
 #define	dbtofsb(fs, b)	((b) >> (fs)->fs_fsbtodb)
 
 /*
  * Cylinder group macros to locate things in cylinder groups.
  * They calc file system addresses of cylinder group data structures.
  */
 #define	cgbase(fs, c)	((ufs_daddr_t)((fs)->fs_fpg * (c)))
 #define	cgdmin(fs, c)	(cgstart(fs, c) + (fs)->fs_dblkno)	/* 1st data */
 #define	cgimin(fs, c)	(cgstart(fs, c) + (fs)->fs_iblkno)	/* inode blk */
 #define	cgsblock(fs, c)	(cgstart(fs, c) + (fs)->fs_sblkno)	/* super blk */
 #define	cgtod(fs, c)	(cgstart(fs, c) + (fs)->fs_cblkno)	/* cg block */
 #define cgstart(fs, c)							\
 	(cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask)))
 
 /*
  * Macros for handling inode numbers:
  *     inode number to file system block offset.
  *     inode number to cylinder group number.
  *     inode number to file system block address.
  */
 #define	ino_to_cg(fs, x)	((x) / (fs)->fs_ipg)
 #define	ino_to_fsba(fs, x)						\
 	((ufs_daddr_t)(cgimin(fs, ino_to_cg(fs, x)) +			\
 	    (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs))))))
 #define	ino_to_fsbo(fs, x)	((x) % INOPB(fs))
 
 /*
  * Give cylinder group number for a file system block.
  * Give cylinder group block number for a file system block.
  */
 #define	dtog(fs, d)	((d) / (fs)->fs_fpg)
 #define	dtogd(fs, d)	((d) % (fs)->fs_fpg)
 
 /*
  * Extract the bits for a block from a map.
  * Compute the cylinder and rotational position of a cyl block addr.
  */
 #define blkmap(fs, map, loc) \
     (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
 #define cbtocylno(fs, bno) \
     ((bno) * NSPF(fs) / (fs)->fs_spc)
 #define cbtorpos(fs, bno) \
     (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \
      (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \
      (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect)
 
 /*
  * The following macros optimize certain frequently calculated
  * quantities by using shifts and masks in place of divisions
  * modulos and multiplications.
  */
 #define blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
 	((loc) & (fs)->fs_qbmask)
 #define fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
 	((loc) & (fs)->fs_qfmask)
 #define lblktosize(fs, blk)	/* calculates ((off_t)blk * fs->fs_bsize) */ \
 	((off_t)(blk) << (fs)->fs_bshift)
 /* Use this only when `blk' is known to be small, e.g., < NDADDR. */
 #define smalllblktosize(fs, blk)    /* calculates (blk * fs->fs_bsize) */ \
 	((blk) << (fs)->fs_bshift)
 #define lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
 	((loc) >> (fs)->fs_bshift)
 #define numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
 	((loc) >> (fs)->fs_fshift)
 #define blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
 	(((size) + (fs)->fs_qbmask) & (fs)->fs_bmask)
 #define fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
 	(((size) + (fs)->fs_qfmask) & (fs)->fs_fmask)
 #define fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
 	((frags) >> (fs)->fs_fragshift)
 #define blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
 	((blks) << (fs)->fs_fragshift)
 #define fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
 	((fsb) & ((fs)->fs_frag - 1))
 #define blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
 	((fsb) &~ ((fs)->fs_frag - 1))
 
 /*
  * Determine the number of available frags given a
  * percentage to hold in reserve.
  */
 #define freespace(fs, percentreserved) \
 	(blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
 	(fs)->fs_cstotal.cs_nffree - \
 	((off_t)((fs)->fs_dsize) * (percentreserved) / 100))
 
 /*
  * Determining the size of a file block in the file system.
  */
 #define blksize(fs, ip, lbn) \
 	(((lbn) >= NDADDR || (ip)->i_size >= smalllblktosize(fs, (lbn) + 1)) \
 	    ? (fs)->fs_bsize \
 	    : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
 #define dblksize(fs, dip, lbn) \
 	(((lbn) >= NDADDR || \
 	  (dip)->di_size >= (u_int64_t)smalllblktosize(fs, (lbn) + 1)) \
 	    ? (fs)->fs_bsize \
 	    : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
 #define sblksize(fs, size, lbn) \
 	(((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \
 	  ? (fs)->fs_bsize \
 	  : (fragroundup(fs, blkoff(fs, (size)))))
 
 
 /*
  * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte
  * sector size.
  */
 #define	NSPB(fs)	((fs)->fs_nspf << (fs)->fs_fragshift)
 #define	NSPF(fs)	((fs)->fs_nspf)
 
 /*
  * Number of inodes in a secondary storage block/fragment.
  */
 #define	INOPB(fs)	((fs)->fs_inopb)
 #define	INOPF(fs)	((fs)->fs_inopb >> (fs)->fs_fragshift)
 
 /*
  * Number of indirects in a file system block.
  */
 #define	NINDIR(fs)	((fs)->fs_nindir)
 
 extern int inside[], around[];
 extern u_char *fragtbl[];
 
 #endif
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 96f0649491f9..d4ed8b3efaad 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,163 +1,171 @@
 /*
  * Copyright (c) 1991, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_inode.c	8.9 (Berkeley) 5/14/95
  * $FreeBSD$
  */
 
 #include "opt_quota.h"
 #include "opt_ufs.h"
 
 #include <sys/param.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 /*
  * Last reference to an inode.  If necessary, write or delete it.
  */
 int
 ufs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct proc *p = ap->a_p;
 	int mode, error = 0;
 
 	if (prtactive && vp->v_usecount != 0)
 		vprint("ufs_inactive: pushing active", vp);
 
 	/*
 	 * Ignore inodes related to stale file handles.
 	 */
 	if (ip->i_mode == 0)
 		goto out;
 	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 		(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
 #ifdef QUOTA
 		if (!getinoquota(ip))
 			(void)chkiq(ip, -1, NOCRED, 0);
 #endif
 #ifdef UFS_EXTATTR
 		ufs_extattr_vnode_inactive(ap->a_vp, ap->a_p);
 #endif
 		error = UFS_TRUNCATE(vp, (off_t)0, 0, NOCRED, p);
+		/*
+		 * Setting the mode to zero needs to wait for the inode
+		 * to be written just as does a change to the link count.
+		 * So, rather than creating a new entry point to do the
+		 * same thing, we just use softdep_change_linkcnt().
+		 */
 		ip->i_rdev = 0;
 		mode = ip->i_mode;
 		ip->i_mode = 0;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		if (DOINGSOFTDEP(vp))
+			softdep_change_linkcnt(ip);
 		UFS_VFREE(vp, ip->i_number, mode);
 	}
 	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
 		if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
 		    vn_write_suspend_wait(vp, NULL, V_NOWAIT)) {
 			ip->i_flag &= ~IN_ACCESS;
 		} else {
 			(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
 			UFS_UPDATE(vp, 0);
 		}
 	}
 out:
 	VOP_UNLOCK(vp, 0, p);
 	/*
 	 * If we are done with the inode, reclaim it
 	 * so that it can be reused immediately.
 	 */
 	if (ip->i_mode == 0)
 		vrecycle(vp, NULL, p);
 	return (error);
 }
 
 /*
  * Reclaim an inode so that it can be used for other purposes.
  */
 int
 ufs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct inode *ip;
 	register struct vnode *vp = ap->a_vp;
 #ifdef QUOTA
 	int i;
 #endif
 
 	if (prtactive && vp->v_usecount != 0)
 		vprint("ufs_reclaim: pushing active", vp);
 	ip = VTOI(vp);
 	if (ip->i_flag & IN_LAZYMOD) {
 		ip->i_flag |= IN_MODIFIED;
 		UFS_UPDATE(vp, 0);
 	}
 	/*
 	 * Remove the inode from its hash chain.
 	 */
 	ufs_ihashrem(ip);
 	/*
 	 * Purge old data structures associated with the inode.
 	 */
 	cache_purge(vp);
 	if (ip->i_devvp) {
 		vrele(ip->i_devvp);
 		ip->i_devvp = 0;
 	}
 #ifdef QUOTA
 	for (i = 0; i < MAXQUOTAS; i++) {
 		if (ip->i_dquot[i] != NODQUOT) {
 			dqrele(vp, ip->i_dquot[i]);
 			ip->i_dquot[i] = NODQUOT;
 		}
 	}
 #endif
 	lockdestroy(&vp->v_lock);
 	FREE(vp->v_data, VFSTOUFS(vp->v_mount)->um_malloctype);
 	vp->v_data = 0;
 	return (0);
 }