D26136.id79022.diff
No OneTemporary
Actions

Size

54 KB

Referenced Files

None

Subscribers

None

D26136.id79022.diff
View Options

	Index: sys/kern/subr_syscall.c
	===================================================================
	--- sys/kern/subr_syscall.c
	+++ sys/kern/subr_syscall.c
	@@ -205,6 +205,8 @@

	KASSERT((td->td_pflags & TDP_FORKING) == 0,
	("fork() did not clear TDP_FORKING upon completion"));
	+ KASSERT(td->td_errno != ERELOOKUP,
	+ ("ERELOOKUP not consumed syscall %d", td->td_sa.code));

	p = td->td_proc;
	sa = &td->td_sa;
	Index: sys/kern/uipc_usrreq.c
	===================================================================
	--- sys/kern/uipc_usrreq.c
	+++ sys/kern/uipc_usrreq.c
	@@ -671,6 +671,8 @@
	vput(nd.ni_dvp);
	if (error) {
	vn_finished_write(mp);
	+ if (error == ERELOOKUP)
	+ goto restart;
	goto error;
	}
	vp = nd.ni_vp;
	Index: sys/kern/vfs_subr.c
	===================================================================
	--- sys/kern/vfs_subr.c
	+++ sys/kern/vfs_subr.c
	@@ -1915,7 +1915,10 @@
	}
	if (bo->bo_dirty.bv_cnt > 0) {
	BO_UNLOCK(bo);
	- if ((error = BO_SYNC(bo, MNT_WAIT)) != 0)
	+ do {
	+ error = BO_SYNC(bo, MNT_WAIT);
	+ } while (error == ERELOOKUP);
	+ if (error != 0)
	return (error);
	/*
	* XXX We could save a lock/unlock if this was only
	@@ -3636,7 +3639,9 @@
	vm_object_page_clean(vp->v_object, 0, 0, 0);
	VM_OBJECT_WUNLOCK(vp->v_object);
	}
	- error = VOP_FSYNC(vp, MNT_WAIT, td);
	+ do {
	+ error = VOP_FSYNC(vp, MNT_WAIT, td);
	+ } while (error == ERELOOKUP);
	if (error != 0) {
	VOP_UNLOCK(vp);
	vdrop(vp);
	Index: sys/kern/vfs_syscalls.c
	===================================================================
	--- sys/kern/vfs_syscalls.c
	+++ sys/kern/vfs_syscalls.c
	@@ -1384,6 +1384,8 @@
	NDFREE(&nd, NDF_ONLY_PNBUF);
	vput(nd.ni_dvp);
	vn_finished_write(mp);
	+ if (error == ERELOOKUP)
	+ goto restart;
	return (error);
	}

	@@ -1470,6 +1472,8 @@
	vput(nd.ni_dvp);
	vn_finished_write(mp);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	+ if (error == ERELOOKUP)
	+ goto restart;
	return (error);
	}

	@@ -1568,7 +1572,7 @@
	return (error);
	NDFREE(&nd, NDF_ONLY_PNBUF);
	error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
	- } while (error == EAGAIN);
	+ } while (error == EAGAIN \|\| error == ERELOOKUP);
	return (error);
	}

	@@ -1741,6 +1745,8 @@
	NDFREE(&nd, NDF_ONLY_PNBUF);
	vput(nd.ni_dvp);
	vn_finished_write(mp);
	+ if (error == ERELOOKUP)
	+ goto restart;
	out:
	if (segflg != UIO_SYSSPACE)
	uma_zfree(namei_zone, tmppath);
	@@ -1791,6 +1797,8 @@
	NDFREE(&nd, NDF_ONLY_PNBUF);
	vput(nd.ni_dvp);
	vn_finished_write(mp);
	+ if (error == ERELOOKUP)
	+ goto restart;
	return (error);
	}

	@@ -1937,6 +1945,8 @@
	vrele(vp);
	else
	vput(vp);
	+ if (error == ERELOOKUP)
	+ goto restart;
	fdout:
	if (fp != NULL)
	fdrop(fp, td);
	@@ -3395,7 +3405,8 @@
	int error;

	if (length < 0)
	- return(EINVAL);
	+ return (EINVAL);
	+retry:
	NDINIT(&nd, LOOKUP, FOLLOW \| AUDITVNODE1, pathseg, path, td);
	if ((error = namei(&nd)) != 0)
	return (error);
	@@ -3424,6 +3435,8 @@
	vn_finished_write(mp);
	vn_rangelock_unlock(vp, rl_cookie);
	vrele(vp);
	+ if (error == ERELOOKUP)
	+ goto retry;
	return (error);
	}

	@@ -3479,6 +3492,7 @@
	if (!fullsync)
	/* XXXKIB: compete outstanding aio writes */;
	#endif
	+retry:
	error = vn_start_write(vp, &mp, V_WAIT \| PCATCH);
	if (error != 0)
	goto drop;
	@@ -3498,6 +3512,8 @@
	error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
	VOP_UNLOCK(vp);
	vn_finished_write(mp);
	+ if (error == ERELOOKUP)
	+ goto retry;
	drop:
	fdrop(fp, td);
	return (error);
	@@ -3710,6 +3726,8 @@
	vrele(fromnd.ni_startdir);
	if (error == -1)
	return (0);
	+ if (error == ERELOOKUP)
	+ goto again;
	return (error);
	}

	@@ -3803,6 +3821,8 @@
	if (error == 0)
	vput(nd.ni_vp);
	vn_finished_write(mp);
	+ if (error == ERELOOKUP)
	+ goto restart;
	return (error);
	}

	@@ -3903,6 +3923,8 @@
	vrele(nd.ni_dvp);
	else
	vput(nd.ni_dvp);
	+ if (error == ERELOOKUP)
	+ goto restart;
	fdout:
	if (fp != NULL)
	fdrop(fp, td);
	@@ -4416,7 +4438,8 @@
	if (error != 0)
	return (error);
	VOP_UNLOCK(vp);
	- } while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN);
	+ error = kern_linkat_vp(td, vp, fd, path, pathseg);
	+ } while (error == EAGAIN \|\| error == ERELOOKUP);
	return (error);
	}

	Index: sys/kern/vfs_vnops.c
	===================================================================
	--- sys/kern/vfs_vnops.c
	+++ sys/kern/vfs_vnops.c
	@@ -70,6 +70,7 @@
	#include <sys/filio.h>
	#include <sys/resourcevar.h>
	#include <sys/rwlock.h>
	+#include <sys/prng.h>
	#include <sys/sx.h>
	#include <sys/sleepqueue.h>
	#include <sys/sysctl.h>
	@@ -274,6 +275,8 @@
	vn_finished_write(mp);
	if (error) {
	NDFREE(ndp, NDF_ONLY_PNBUF);
	+ if (error == ERELOOKUP)
	+ goto restart;
	return (error);
	}
	fmode &= ~O_TRUNC;
	@@ -1523,6 +1526,7 @@

	vp = fp->f_vnode;

	+retry:
	/*
	* Lock the whole range for truncation. Otherwise split i/o
	* might happen partly before and partly after the truncation.
	@@ -1549,6 +1553,8 @@
	vn_finished_write(mp);
	out1:
	vn_rangelock_unlock(vp, rl_cookie);
	+ if (error == ERELOOKUP)
	+ goto retry;
	return (error);
	}

	@@ -3315,3 +3321,79 @@

	return (error);
	}
	+
	+/*
	+ * Lock pair of vnodes vp1, vp2, avoiding lock order
	+ * reversal. vp1_locked indicates if vp1 is exclusively locked or not,
	+ * vnode must not be shared-locked by the current thread. Same for
	+ * vp2 and vp2_locked. One of the vnodes can be NULL.
	+ *
	+ * Function returns with both vnodes exclusively locked, and
	+ * guarantees that it does not create lock order reversal with other
	+ * threads during its execution. Both vnodes could be unlocked
	+ * temporary (and reclaimed).
	+ */
	+void
	+vn_lock_pair(struct vnode vp1, bool vp1_locked, struct vnode vp2,
	+ bool vp2_locked)
	+{
	+ int error;
	+
	+ if (vp1 == NULL && vp2 == NULL)
	+ return;
	+ if (vp1 != NULL) {
	+ if (vp1_locked)
	+ ASSERT_VOP_ELOCKED(vp1, "vp1");
	+ else
	+ ASSERT_VOP_UNLOCKED(vp1, "vp1");
	+ } else {
	+ vp1_locked = true;
	+ }
	+ if (vp2 != NULL) {
	+ if (vp2_locked)
	+ ASSERT_VOP_ELOCKED(vp2, "vp2");
	+ else
	+ ASSERT_VOP_UNLOCKED(vp2, "vp2");
	+ } else {
	+ vp2_locked = true;
	+ }
	+ if (!vp1_locked && !vp2_locked) {
	+ vn_lock(vp1, LK_EXCLUSIVE \| LK_RETRY);
	+ vp1_locked = true;
	+ }
	+
	+ for (;;) {
	+ if (vp1_locked && vp2_locked)
	+ break;
	+ if (vp1_locked && vp2 != NULL) {
	+ if (vp1 != NULL) {
	+ error = VOP_LOCK1(vp2, LK_EXCLUSIVE \| LK_NOWAIT,
	+ __FILE__, __LINE__);
	+ if (error == 0)
	+ break;
	+ VOP_UNLOCK(vp1);
	+ vp1_locked = false;
	+ pause("vlp1", prng32_bounded(100));
	+ }
	+ vn_lock(vp2, LK_EXCLUSIVE \| LK_RETRY);
	+ vp2_locked = true;
	+ }
	+ if (vp2_locked && vp1 != NULL) {
	+ if (vp2 != NULL) {
	+ error = VOP_LOCK1(vp1, LK_EXCLUSIVE \| LK_NOWAIT,
	+ __FILE__, __LINE__);
	+ if (error == 0)
	+ break;
	+ VOP_UNLOCK(vp2);
	+ vp2_locked = false;
	+ pause("vlp2", prng32_bounded(100));
	+ }
	+ vn_lock(vp1, LK_EXCLUSIVE \| LK_RETRY);
	+ vp1_locked = true;
	+ }
	+ }
	+ if (vp1 != NULL)
	+ ASSERT_VOP_ELOCKED(vp1, "vp1 ret");
	+ if (vp2 != NULL)
	+ ASSERT_VOP_ELOCKED(vp2, "vp2 ret");
	+}
	Index: sys/sys/vnode.h
	===================================================================
	--- sys/sys/vnode.h
	+++ sys/sys/vnode.h
	@@ -769,6 +769,9 @@
	int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
	struct uio *uio);

	+void vn_lock_pair(struct vnode vp1, bool vp1_locked, struct vnode vp2,
	+ bool vp2_locked);
	+
	void vn_seqc_write_begin_unheld_locked(struct vnode *vp);
	void vn_seqc_write_begin_unheld(struct vnode *vp);
	void vn_seqc_write_begin_locked(struct vnode *vp);
	Index: sys/ufs/ffs/ffs_alloc.c
	===================================================================
	--- sys/ufs/ffs/ffs_alloc.c
	+++ sys/ufs/ffs/ffs_alloc.c
	@@ -3468,7 +3468,7 @@
	break;
	}
	dp = VTOI(dvp);
	- dp->i_offset = 12; /* XXX mastertemplate.dot_reclen */
	+ SET_I_OFFSET(dp, 12); /* XXX mastertemplate.dot_reclen */
	error = ufs_dirrewrite(dp, VTOI(fdvp), (ino_t)cmd.size,
	DT_DIR, 0);
	cache_purge(fdvp);
	Index: sys/ufs/ffs/ffs_extern.h
	===================================================================
	--- sys/ufs/ffs/ffs_extern.h
	+++ sys/ufs/ffs/ffs_extern.h
	@@ -173,6 +173,9 @@
	void softdep_freefile(struct vnode *, ino_t, int);
	int softdep_request_cleanup(struct fs , struct vnode ,
	struct ucred *, int);
	+int softdep_prerename(struct vnode , struct vnode , struct vnode *,
	+ struct vnode *);
	+int softdep_prelink(struct vnode , struct vnode , int);
	void softdep_setup_freeblocks(struct inode *, off_t, int);
	void softdep_setup_inomapdep(struct buf , struct inode , ino_t, int);
	void softdep_setup_blkmapdep(struct buf , struct mount , ufs2_daddr_t,
	Index: sys/ufs/ffs/ffs_inode.c
	===================================================================
	--- sys/ufs/ffs/ffs_inode.c
	+++ sys/ufs/ffs/ffs_inode.c
	@@ -67,6 +67,17 @@
	static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
	ufs2_daddr_t, int, ufs2_daddr_t *);

	+static void
	+ffs_inode_bwrite(struct vnode vp, struct buf bp, int flags)
	+{
	+ if ((flags & IO_SYNC) != 0)
	+ bwrite(bp);
	+ else if (DOINGASYNC(vp))
	+ bdwrite(bp);
	+ else
	+ bawrite(bp);
	+}
	+
	/*
	* Update the access, modified, and inode change times as specified by the
	* IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode
	@@ -357,12 +368,7 @@
	DIP_SET(ip, i_size, length);
	if (bp->b_bufsize == fs->fs_bsize)
	bp->b_flags \|= B_CLUSTEROK;
	- if (flags & IO_SYNC)
	- bwrite(bp);
	- else if (DOINGASYNC(vp))
	- bdwrite(bp);
	- else
	- bawrite(bp);
	+ ffs_inode_bwrite(vp, bp, flags);
	UFS_INODE_SET_FLAG(ip, IN_SIZEMOD \| IN_CHANGE \| IN_UPDATE);
	return (ffs_update(vp, waitforupdate));
	}
	@@ -456,6 +462,8 @@
	error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
	if (error)
	return (error);
	+ ffs_inode_bwrite(vp, bp, flags);
	+
	/*
	* When we are doing soft updates and the UFS_BALLOC
	* above fills in a direct block hole with a full sized
	@@ -468,6 +476,10 @@
	fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
	(error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
	return (error);
	+
	+ error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
	+ if (error)
	+ return (error);
	ip->i_size = length;
	DIP_SET(ip, i_size, length);
	size = blksize(fs, ip, lbn);
	@@ -478,12 +490,7 @@
	allocbuf(bp, size);
	if (bp->b_bufsize == fs->fs_bsize)
	bp->b_flags \|= B_CLUSTEROK;
	- if (flags & IO_SYNC)
	- bwrite(bp);
	- else if (DOINGASYNC(vp))
	- bdwrite(bp);
	- else
	- bawrite(bp);
	+ ffs_inode_bwrite(vp, bp, flags);
	UFS_INODE_SET_FLAG(ip, IN_SIZEMOD \| IN_CHANGE \| IN_UPDATE);
	}
	/*
	Index: sys/ufs/ffs/ffs_snapshot.c
	===================================================================
	--- sys/ufs/ffs/ffs_snapshot.c
	+++ sys/ufs/ffs/ffs_snapshot.c
	@@ -301,6 +301,8 @@
	NDFREE(&nd, NDF_ONLY_PNBUF);
	vn_finished_write(wrtmp);
	vrele(nd.ni_dvp);
	+ if (error == ERELOOKUP)
	+ goto restart;
	return (error);
	}
	vp = nd.ni_vp;
	@@ -368,8 +370,12 @@
	if (error)
	goto out;
	bawrite(nbp);
	- if (cg % 10 == 0)
	- ffs_syncvnode(vp, MNT_WAIT, 0);
	+ if (cg % 10 == 0) {
	+ error = ffs_syncvnode(vp, MNT_WAIT, 0);
	+ /* vp possibly reclaimed if unlocked */
	+ if (error != 0)
	+ goto out;
	+ }
	}
	/*
	* Copy all the cylinder group maps. Although the
	@@ -391,8 +397,8 @@
	goto out;
	error = cgaccount(cg, vp, nbp, 1);
	bawrite(nbp);
	- if (cg % 10 == 0)
	- ffs_syncvnode(vp, MNT_WAIT, 0);
	+ if (cg % 10 == 0 && error == 0)
	+ error = ffs_syncvnode(vp, MNT_WAIT, 0);
	if (error)
	goto out;
	}
	Index: sys/ufs/ffs/ffs_softdep.c
	===================================================================
	--- sys/ufs/ffs/ffs_softdep.c
	+++ sys/ufs/ffs/ffs_softdep.c
	@@ -609,6 +609,27 @@
	panic("softdep_freework called");
	}

	+int
	+softdep_prerename(fdvp, fvp, tdvp, tvp)
	+ struct vnode *fdvp;
	+ struct vnode *fvp;
	+ struct vnode *tdvp;
	+ struct vnode *tvp;
	+{
	+
	+ panic("softdep_prerename called");
	+}
	+
	+void
	+softdep_prelink(dvp, vp, will_direnter)
	+ struct vnode *dvp;
	+ struct vnode *vp;
	+ int will_direnter;
	+{
	+
	+ panic("softdep_prelink called");
	+}
	+
	#else

	FEATURE(softupdates, "FFS soft-updates support");
	@@ -748,7 +769,7 @@
	static void clear_unlinked_inodedep(struct inodedep *);
	static struct inodedep first_unlinked_inodedep(struct ufsmount );
	static int flush_pagedep_deps(struct vnode , struct mount ,
	- struct diraddhd *);
	+ struct diraddhd , struct buf );
	static int free_pagedep(struct pagedep *);
	static int flush_newblk_dep(struct vnode , struct mount , ufs_lbn_t);
	static int flush_inodedep_deps(struct vnode , struct mount , ino_t);
	@@ -925,7 +946,6 @@
	static int journal_space(struct ufsmount *, int);
	static void journal_suspend(struct ufsmount *);
	static int journal_unsuspend(struct ufsmount *ump);
	-static void softdep_prelink(struct vnode , struct vnode );
	static void add_to_journal(struct worklist *);
	static void remove_from_journal(struct worklist *);
	static bool softdep_excess_items(struct ufsmount *, int);
	@@ -1389,6 +1409,136 @@
	/* List of all filesystems mounted with soft updates */
	static TAILQ_HEAD(, mount_softdeps) softdepmounts;

	+/*
	+ * This function fetches inode inum on mount point mp. We already
	+ * hold a locked vnode vp, and might have a locked buffer bp belonging
	+ * to vp.
	+
	+ * We must not block on acquiring the new inode lock as we will get
	+ * into a lock-order reversal with the buffer lock and possibly get a
	+ * deadlock. Thus is we cannot instantiate the requested vnode
	+ * without sleeping on its lock, we must unlock the vnode and the
	+ * buffer before doing a blocking lock for the inode. We return
	+ * ERELOOKUP if we have had to unlock either the vnode or the buffer so
	+ * that the function can reassess its state.
	+ *
	+ * Top-level VFS code (for syscalls and other consumers, e.g. callers
	+ * of VOP_FSYNC() in syncer) check for ERELOOKUP and restart at safe
	+ * point.
	+ *
	+ * Since callers expect to operate on fully constructed vnode, we also
	+ * recheck v_data after relock, and return ENOENT if NULL.
	+ *
	+ * If unlocking bp, we must unroll dequeueing its unfinished
	+ * dependencies, and clear scan flag, before unlocking. If unlocking
	+ * vp while it is under deactivation, we re-queue deactivation.
	+ */
	+static int
	+get_parent_vp(struct vnode vp, struct mount mp, ino_t inum, struct buf *bp,
	+ struct diraddhd diraddhdp, struct diraddhd unfinishedp,
	+ struct vnode **rvp)
	+{
	+ struct vnode *pvp;
	+ struct diradd *dap;
	+ int error;
	+ bool bplocked;
	+
	+ ASSERT_VOP_ELOCKED(vp, "child vnode must be locked");
	+ for (bplocked = true, pvp = NULL;;) {
	+ error = ffs_vgetf(mp, inum, LK_EXCLUSIVE \| LK_NOWAIT, &pvp,
	+ FFSV_FORCEINSMQ);
	+ if (error == 0) {
	+ /*
	+ * Since we could have unlocked vp, the inode
	+ * number could no longer indicate a
	+ * constructed node. In this case, we must
	+ * restart the syscall.
	+ */
	+ if (VTOI(pvp)->i_mode == 0 \|\| !bplocked) {
	+ if (VTOI(pvp)->i_mode == 0)
	+ vgone(pvp);
	+ vput(pvp);
	+ error = ERELOOKUP;
	+ goto out;
	+ }
	+
	+ error = 0;
	+ goto out1;
	+ }
	+ if (bp != NULL && bplocked) {
	+ /*
	+ * Requeue unfinished dependencies before
	+ * unlocking buffer, which could make
	+ * diraddhdp invalid.
	+ */
	+ ACQUIRE_LOCK(VFSTOUFS(mp));
	+ while ((dap = LIST_FIRST(unfinishedp)) != NULL) {
	+ LIST_REMOVE(dap, da_pdlist);
	+ LIST_INSERT_HEAD(diraddhdp, dap, da_pdlist);
	+ }
	+ FREE_LOCK(VFSTOUFS(mp));
	+ bp->b_vflags &= ~BV_SCANNED;
	+ BUF_NOREC(bp);
	+ BUF_UNLOCK(bp);
	+ bplocked = false;
	+ }
	+
	+ /*
	+ * Do not drop vnode lock while inactivating. This
	+ * would result in leaks of the VI flags and
	+ * reclaiming of non-truncated vnode. Instead,
	+ * re-schedule inactivation hoping that we would be
	+ * able to sync inode later.
	+ */
	+ if ((vp->v_iflag & VI_DOINGINACT) != 0) {
	+ VI_LOCK(vp);
	+ vp->v_iflag \|= VI_OWEINACT;
	+ VI_UNLOCK(vp);
	+ return (ERELOOKUP);
	+ }
	+
	+ VOP_UNLOCK(vp);
	+ error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &pvp,
	+ FFSV_FORCEINSMQ);
	+ if (error != 0) {
	+ MPASS(error != ERELOOKUP);
	+ vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	+ break;
	+ }
	+ if (VTOI(pvp)->i_mode == 0) {
	+ vgone(pvp);
	+ vput(pvp);
	+ pvp = NULL;
	+ vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	+ error = ERELOOKUP;
	+ break;
	+ }
	+ error = vn_lock(vp, LK_EXCLUSIVE \| LK_NOWAIT);
	+ if (error == 0)
	+ break;
	+ vput(pvp);
	+ pvp = NULL;
	+ vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	+ if (vp->v_data == NULL) {
	+ error = ENOENT;
	+ break;
	+ }
	+ }
	+ if (bp != NULL) {
	+ MPASS(!bplocked);
	+ error = ERELOOKUP;
	+ }
	+ if (error != 0 && pvp != NULL) {
	+ vput(pvp);
	+ pvp = NULL;
	+ }
	+out1:
	+ *rvp = pvp;
	+out:
	+ ASSERT_VOP_ELOCKED(vp, "child vnode must be locked on return");
	+ return (error);
	+}
	+
	/*
	* This function cleans the worklist for a filesystem.
	* Each filesystem running with soft dependencies gets its own
	@@ -3095,48 +3245,207 @@
	return (0);
	}

	+/*
	+ * Try hard to sync all data and metadata for the vnode, and workitems
	+ * flushing which might conflict with the vnode lock. This is a
	+ * helper for softdep_prerename().
	+ */
	+static int
	+softdep_prehandle_vnode(ump, vp)
	+ struct ufsmount *ump;
	+ struct vnode *vp;
	+{
	+ int error;
	+
	+ ASSERT_VOP_ELOCKED(vp, "prehandle");
	+ if (vp->v_data == NULL)
	+ return (0);
	+ error = VOP_FSYNC(vp, MNT_WAIT, curthread);
	+ if (error != 0)
	+ return (error);
	+ ACQUIRE_LOCK(ump);
	+ process_removes(vp);
	+ process_truncates(vp);
	+ FREE_LOCK(ump);
	+ return (0);
	+}
	+
	+/*
	+ * Must be called from VOP_RENAME() after all vnodes are locked.
	+ * Ensures that there is enough journal space for rename. It is
	+ * sufficiently different from softdep_prelink() by having to handle
	+ * four vnodes.
	+ */
	+int
	+softdep_prerename(fdvp, fvp, tdvp, tvp)
	+ struct vnode *fdvp;
	+ struct vnode *fvp;
	+ struct vnode *tdvp;
	+ struct vnode *tvp;
	+{
	+ struct ufsmount *ump;
	+ int error;
	+
	+ ump = VFSTOUFS(fdvp->v_mount);
	+
	+ if (journal_space(ump, 0))
	+ return (0);
	+
	+ VOP_UNLOCK(tdvp);
	+ VOP_UNLOCK(fvp);
	+ if (tvp != NULL && tvp != tdvp)
	+ VOP_UNLOCK(tvp);
	+
	+ error = softdep_prehandle_vnode(ump, fdvp);
	+ VOP_UNLOCK(fdvp);
	+ if (error != 0)
	+ return (error);
	+
	+ VOP_LOCK(fvp, LK_EXCLUSIVE \| LK_RETRY);
	+ error = softdep_prehandle_vnode(ump, fvp);
	+ VOP_UNLOCK(fvp);
	+ if (error != 0)
	+ return (error);
	+
	+ if (tdvp != fdvp) {
	+ VOP_LOCK(tdvp, LK_EXCLUSIVE \| LK_RETRY);
	+ error = softdep_prehandle_vnode(ump, tdvp);
	+ VOP_UNLOCK(tdvp);
	+ if (error != 0)
	+ return (error);
	+ }
	+
	+ if (tvp != fvp && tvp != NULL) {
	+ VOP_LOCK(tvp, LK_EXCLUSIVE \| LK_RETRY);
	+ error = softdep_prehandle_vnode(ump, tvp);
	+ VOP_UNLOCK(tvp);
	+ if (error != 0)
	+ return (error);
	+ }
	+
	+ ACQUIRE_LOCK(ump);
	+ softdep_speedup(ump);
	+ process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
	+ if (journal_space(ump, 0) == 0) {
	+ softdep_speedup(ump);
	+ if (journal_space(ump, 1) == 0)
	+ journal_suspend(ump);
	+ }
	+ FREE_LOCK(ump);
	+ return (ERELOOKUP);
	+}
	+
	/*
	* Before adjusting a link count on a vnode verify that we have sufficient
	* journal space. If not, process operations that depend on the currently
	* locked pair of vnodes to try to flush space as the syncer, buf daemon,
	* and softdep flush threads can not acquire these locks to reclaim space.
	+ *
	+ * Returns 0 if all owned locks are still valid and were not dropped
	+ * in the process, in other case it returns either an error from sync,
	+ * or ERELOOKUP if any of the locks were re-acquired. In the later
	+ * case, the state of the vnodes cannot be relied upon and our VFS
	+ * syscall must be restarted at top level from the lookup.
	*/
	-static void
	-softdep_prelink(dvp, vp)
	+int
	+softdep_prelink(dvp, vp, will_direnter)
	struct vnode *dvp;
	struct vnode *vp;
	+ int will_direnter;
	{
	struct ufsmount *ump;
	+ int error, error1;

	+ ASSERT_VOP_ELOCKED(dvp, "prelink dvp");
	+ if (vp != NULL)
	+ ASSERT_VOP_ELOCKED(vp, "prelink vp");
	ump = VFSTOUFS(dvp->v_mount);
	- LOCK_OWNED(ump);
	+
	/*
	* Nothing to do if we have sufficient journal space.
	* If we currently hold the snapshot lock, we must avoid
	* handling other resources that could cause deadlock.
	+ *
	+ * will_direnter == 1: In case allocated a directory block in
	+ * an indirect block, we must prevent holes in the directory
	+ * created if directory entries are written out of order. To
	+ * accomplish this we fsync when we extend a directory into
	+ * indirects. During rename it's not safe to drop the tvp
	+ * lock so sync must be delayed until it is.
	+ *
	+ * This synchronous step could be removed if fsck and the
	+ * kernel were taught to fill in sparse directories rather
	+ * than panic.
	*/
	- if (journal_space(ump, 0) \|\| (vp && IS_SNAPSHOT(VTOI(vp))))
	- return;
	+ if (journal_space(ump, 0) \|\| (vp != NULL && IS_SNAPSHOT(VTOI(vp)))) {
	+ error = 0;
	+ if (will_direnter && (vp == NULL \|\| !IS_SNAPSHOT(VTOI(vp)))) {
	+ if (vp != NULL)
	+ VOP_UNLOCK(vp);
	+ error = ffs_syncvnode(dvp, MNT_WAIT, 0);
	+ if (vp != NULL) {
	+ error1 = vn_lock(vp, LK_EXCLUSIVE \| LK_NOWAIT);
	+ if (error1 != 0) {
	+ vn_lock_pair(dvp, true, vp, false);
	+ if (error == 0)
	+ error = ERELOOKUP;
	+ } else if (vp->v_data == NULL) {
	+ error = ERELOOKUP;
	+ }
	+ }
	+ }
	+ return (error);
	+ }
	+
	stat_journal_low++;
	- FREE_LOCK(ump);
	- if (vp)
	+ if (vp != NULL) {
	+ VOP_UNLOCK(dvp);
	ffs_syncvnode(vp, MNT_NOWAIT, 0);
	+ vn_lock_pair(dvp, false, vp, true);
	+ if (dvp->v_data == NULL)
	+ return (ERELOOKUP);
	+ }
	+ if (vp != NULL)
	+ VOP_UNLOCK(vp);
	ffs_syncvnode(dvp, MNT_WAIT, 0);
	- ACQUIRE_LOCK(ump);
	+ VOP_UNLOCK(dvp);
	+
	/* Process vp before dvp as it may create .. removes. */
	- if (vp) {
	+ if (vp != NULL) {
	+ vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	+ if (vp->v_data == NULL) {
	+ vn_lock_pair(dvp, false, vp, true);
	+ return (ERELOOKUP);
	+ }
	+ ACQUIRE_LOCK(ump);
	process_removes(vp);
	process_truncates(vp);
	+ FREE_LOCK(ump);
	+ VOP_UNLOCK(vp);
	+ }
	+
	+ vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY);
	+ if (dvp->v_data == NULL) {
	+ vn_lock_pair(dvp, true, vp, false);
	+ return (ERELOOKUP);
	}
	+
	+ ACQUIRE_LOCK(ump);
	process_removes(dvp);
	process_truncates(dvp);
	+ VOP_UNLOCK(dvp);
	softdep_speedup(ump);
	+
	process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
	if (journal_space(ump, 0) == 0) {
	softdep_speedup(ump);
	if (journal_space(ump, 1) == 0)
	journal_suspend(ump);
	}
	+ FREE_LOCK(ump);
	+
	+ vn_lock_pair(dvp, false, vp, false);
	+ return (ERELOOKUP);
	}

	static void
	@@ -4742,7 +5051,6 @@
	KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
	("softdep_setup_create: No addref structure present."));
	}
	- softdep_prelink(dvp, NULL);
	FREE_LOCK(ITOUMP(dp));
	}

	@@ -4777,7 +5085,6 @@
	if (jaddref)
	TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
	if_deps);
	- softdep_prelink(dvp, ITOV(ip));
	FREE_LOCK(ITOUMP(dp));
	}

	@@ -4808,7 +5115,6 @@
	if (jaddref)
	TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
	if_deps);
	- softdep_prelink(dvp, ITOV(ip));
	FREE_LOCK(ITOUMP(dp));
	}

	@@ -4858,7 +5164,6 @@
	if (DOINGSUJ(dvp))
	TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
	&dotdotaddref->ja_ref, if_deps);
	- softdep_prelink(ITOV(dp), NULL);
	FREE_LOCK(ITOUMP(dp));
	}

	@@ -4879,7 +5184,6 @@
	ACQUIRE_LOCK(ITOUMP(dp));
	(void) inodedep_lookup_ip(ip);
	(void) inodedep_lookup_ip(dp);
	- softdep_prelink(dvp, ITOV(ip));
	FREE_LOCK(ITOUMP(dp));
	}

	@@ -4900,7 +5204,6 @@
	ACQUIRE_LOCK(ITOUMP(dp));
	(void) inodedep_lookup_ip(ip);
	(void) inodedep_lookup_ip(dp);
	- softdep_prelink(dvp, ITOV(ip));
	FREE_LOCK(ITOUMP(dp));
	}

	@@ -8764,11 +9067,11 @@
	if (MOUNTEDSUJ(mp)) {
	flags = DEPALLOC;
	jmvref = newjmvref(dp, de->d_ino,
	- dp->i_offset + (oldloc - base),
	- dp->i_offset + (newloc - base));
	+ I_OFFSET(dp) + (oldloc - base),
	+ I_OFFSET(dp) + (newloc - base));
	}
	- lbn = lblkno(ump->um_fs, dp->i_offset);
	- offset = blkoff(ump->um_fs, dp->i_offset);
	+ lbn = lblkno(ump->um_fs, I_OFFSET(dp));
	+ offset = blkoff(ump->um_fs, I_OFFSET(dp));
	oldoffset = offset + (oldloc - base);
	newoffset = offset + (newloc - base);
	ACQUIRE_LOCK(ump);
	@@ -9280,7 +9583,7 @@
	jremref = dotremref = dotdotremref = NULL;
	if (DOINGSUJ(dvp)) {
	if (isrmdir) {
	- jremref = newjremref(dirrem, dp, ip, dp->i_offset,
	+ jremref = newjremref(dirrem, dp, ip, I_OFFSET(dp),
	ip->i_effnlink + 2);
	dotremref = newjremref(dirrem, ip, ip, DOT_OFFSET,
	ip->i_effnlink + 1);
	@@ -9288,12 +9591,12 @@
	dp->i_effnlink + 1);
	dotdotremref->jr_state \|= MKDIR_PARENT;
	} else
	- jremref = newjremref(dirrem, dp, ip, dp->i_offset,
	+ jremref = newjremref(dirrem, dp, ip, I_OFFSET(dp),
	ip->i_effnlink + 1);
	}
	ACQUIRE_LOCK(ump);
	- lbn = lblkno(ump->um_fs, dp->i_offset);
	- offset = blkoff(ump->um_fs, dp->i_offset);
	+ lbn = lblkno(ump->um_fs, I_OFFSET(dp));
	+ offset = blkoff(ump->um_fs, I_OFFSET(dp));
	pagedep_lookup(UFSTOVFS(ump), bp, dp->i_number, lbn, DEPALLOC,
	&pagedep);
	dirrem->dm_pagedep = pagedep;
	@@ -9304,7 +9607,7 @@
	* the jremref is preserved for any potential diradd in this
	* location. This can not coincide with a rmdir.
	*/
	- if (dp->i_offset == DOTDOT_OFFSET) {
	+ if (I_OFFSET(dp) == DOTDOT_OFFSET) {
	if (isrmdir)
	panic("newdirrem: .. directory change during remove?");
	jremref = cancel_mkdir_dotdot(dp, dirrem, jremref);
	@@ -9405,7 +9708,7 @@

	mp = ITOVFS(dp);
	ump = VFSTOUFS(mp);
	- offset = blkoff(ump->um_fs, dp->i_offset);
	+ offset = blkoff(ump->um_fs, I_OFFSET(dp));
	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
	("softdep_setup_directory_change called on non-softdep filesystem"));

	@@ -9508,7 +9811,7 @@
	KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
	("softdep_setup_directory_change: bad jaddref %p",
	jaddref));
	- jaddref->ja_diroff = dp->i_offset;
	+ jaddref->ja_diroff = I_OFFSET(dp);
	jaddref->ja_diradd = dap;
	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
	dap, da_pdlist);
	@@ -9527,7 +9830,7 @@
	* committed when need to move the dot and dotdot references to
	* this new name.
	*/
	- if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET)
	+ if (inodedep->id_mkdiradd && I_OFFSET(dp) != DOTDOT_OFFSET)
	merge_diradd(inodedep, dap);
	FREE_LOCK(ump);
	}
	@@ -12622,25 +12925,12 @@
	* for details on possible races.
	*/
	FREE_LOCK(ump);
	- if (ffs_vgetf(mp, parentino, LK_NOWAIT \| LK_EXCLUSIVE, &pvp,
	- FFSV_FORCEINSMQ)) {
	- /*
	- * Unmount cannot proceed after unlock because
	- * caller must have called vn_start_write().
	- */
	- VOP_UNLOCK(vp);
	- error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE,
	- &pvp, FFSV_FORCEINSMQ);
	- MPASS(VTOI(pvp)->i_mode != 0);
	- vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	- if (VN_IS_DOOMED(vp)) {
	- if (error == 0)
	- vput(pvp);
	- error = ENOENT;
	- }
	- if (error != 0)
	- return (error);
	- }
	+ error = get_parent_vp(vp, mp, parentino, NULL, NULL, NULL,
	+ &pvp);
	+ if (error == ERELOOKUP)
	+ error = 0;
	+ if (error != 0)
	+ return (error);
	/*
	* All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
	* that are contained in direct blocks will be resolved by
	@@ -12964,9 +13254,11 @@
	for (i = 0; i < DAHASHSZ; i++) {
	if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
	continue;
	- if ((error = flush_pagedep_deps(vp, wk->wk_mp,
	- &pagedep->pd_diraddhd[i]))) {
	- BUF_NOREC(bp);
	+ error = flush_pagedep_deps(vp, wk->wk_mp,
	+ &pagedep->pd_diraddhd[i], bp);
	+ if (error != 0) {
	+ if (error != ERELOOKUP)
	+ BUF_NOREC(bp);
	goto out_unlock;
	}
	}
	@@ -13200,10 +13492,11 @@
	* Eliminate a pagedep dependency by flushing out all its diradd dependencies.
	*/
	static int
	-flush_pagedep_deps(pvp, mp, diraddhdp)
	+flush_pagedep_deps(pvp, mp, diraddhdp, locked_bp)
	struct vnode *pvp;
	struct mount *mp;
	struct diraddhd *diraddhdp;
	+ struct buf *locked_bp;
	{
	struct inodedep *inodedep;
	struct inoref *inoref;
	@@ -13270,10 +13563,10 @@
	}
	if (dap->da_state & MKDIR_BODY) {
	FREE_LOCK(ump);
	- if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
	- FFSV_FORCEINSMQ)))
	+ error = get_parent_vp(pvp, mp, inum, locked_bp,
	+ diraddhdp, &unfinished, &vp);
	+ if (error != 0)
	break;
	- MPASS(VTOI(vp)->i_mode != 0);
	error = flush_newblk_dep(vp, mp, 0);
	/*
	* If we still have the dependency we might need to
	@@ -13335,10 +13628,10 @@
	*/
	if (dap == LIST_FIRST(diraddhdp)) {
	FREE_LOCK(ump);
	- if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
	- FFSV_FORCEINSMQ)))
	+ error = get_parent_vp(pvp, mp, inum, locked_bp,
	+ diraddhdp, &unfinished, &vp);
	+ if (error != 0)
	break;
	- MPASS(VTOI(vp)->i_mode != 0);
	error = ffs_update(vp, 1);
	vput(vp);
	if (error)
	Index: sys/ufs/ffs/ffs_vfsops.c
	===================================================================
	--- sys/ufs/ffs/ffs_vfsops.c
	+++ sys/ufs/ffs/ffs_vfsops.c
	@@ -1865,8 +1865,14 @@
	#ifdef QUOTA
	qsyncvp(vp);
	#endif
	- if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0)
	- allerror = error;
	+ for (;;) {
	+ error = ffs_syncvnode(vp, waitfor, 0);
	+ if (error == ERELOOKUP)
	+ continue;
	+ if (error != 0)
	+ allerror = error;
	+ break;
	+ }
	vput(vp);
	}
	/*
	@@ -2005,6 +2011,9 @@
	ip->i_nextclustercg = -1;
	ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
	ip->i_mode = 0; /* ensure error cases below throw away vnode */
	+#ifdef DIAGNOSTIC
	+ ufs_init_trackers(ip);
	+#endif
	#ifdef QUOTA
	{
	int i;
	Index: sys/ufs/ffs/ffs_vnops.c
	===================================================================
	--- sys/ufs/ffs/ffs_vnops.c
	+++ sys/ufs/ffs/ffs_vnops.c
	@@ -253,7 +253,7 @@
	struct buf bp, nbp;
	ufs_lbn_t lbn;
	int error, passes;
	- bool still_dirty, wait;
	+ bool still_dirty, unlocked, wait;

	ip = VTOI(vp);
	ip->i_flag &= ~IN_NEEDSYNC;
	@@ -277,6 +277,7 @@
	error = 0;
	passes = 0;
	wait = false; /* Always do an async pass first. */
	+ unlocked = false;
	lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
	BO_LOCK(bo);
	loop:
	@@ -325,6 +326,26 @@
	if (!LIST_EMPTY(&bp->b_dep) &&
	(error = softdep_sync_buf(vp, bp,
	wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
	+ /*
	+ * Lock order conflict, buffer was already unlocked,
	+ * and vnode possibly unlocked.
	+ */
	+ if (error == ERELOOKUP) {
	+ if (vp->v_data == NULL)
	+ return (EBADF);
	+ unlocked = true;
	+ if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
	+ (error = softdep_sync_metadata(vp)) != 0) {
	+ if (ffs_fsfail_cleanup(ump, error))
	+ error = 0;
	+ return (unlocked && error == 0 ?
	+ ERELOOKUP : error);
	+ }
	+ /* Re-evaluate inode size */
	+ lbn = lblkno(ITOFS(ip), (ip->i_size +
	+ ITOFS(ip)->fs_bsize - 1));
	+ goto next;
	+ }
	/* I/O error. */
	if (error != EBUSY) {
	BUF_UNLOCK(bp);
	@@ -361,9 +382,11 @@
	if (waitfor != MNT_WAIT) {
	BO_UNLOCK(bo);
	if ((flags & NO_INO_UPDT) != 0)
	- return (0);
	- else
	- return (ffs_update(vp, 0));
	+ return (unlocked ? ERELOOKUP : 0);
	+ error = ffs_update(vp, 0);
	+ if (error == 0 && unlocked)
	+ error = ERELOOKUP;
	+ return (error);
	}
	/* Drain IO to see if we're done. */
	bufobj_wwait(bo, 0, 0);
	@@ -419,6 +442,8 @@
	} else if ((ip->i_flags & (IN_SIZEMOD \| IN_IBLKDATA)) != 0) {
	error = ffs_update(vp, 1);
	}
	+ if (error == 0 && unlocked)
	+ error = ERELOOKUP;
	return (error);
	}

	@@ -434,16 +459,18 @@
	struct vop_lock1_args /* {
	struct vnode *a_vp;
	int a_flags;
	- struct thread *a_td;
	char *file;
	int line;
	} / ap;
	{
	+ struct vnode *vp = ap->a_vp;
	+#ifdef DIAGNOSTIC
	+ struct inode *ip;
	+#endif
	+ int result;
	#ifndef NO_FFS_SNAPSHOT
	- struct vnode *vp;
	int flags;
	struct lock *lkp;
	- int result;

	/*
	* Adaptive spinning mixed with SU leads to trouble. use a giant hammer
	@@ -456,7 +483,6 @@
	case LK_SHARED:
	case LK_UPGRADE:
	case LK_EXCLUSIVE:
	- vp = ap->a_vp;
	flags = ap->a_flags;
	for (;;) {
	#ifdef DEBUG_VFS_LOCKS
	@@ -483,28 +509,67 @@
	flags = (flags & ~LK_TYPE_MASK) \| LK_EXCLUSIVE;
	flags &= ~LK_INTERLOCK;
	}
	+#ifdef DIAGNOSTIC
	+ switch (ap->a_flags & LK_TYPE_MASK) {
	+ case LK_UPGRADE:
	+ case LK_EXCLUSIVE:
	+ if (result == 0 && vp->v_vnlock->lk_recurse == 0) {
	+ ip = VTOI(vp);
	+ if (ip != NULL)
	+ ip->i_lock_gen++;
	+ }
	+ }
	+#endif
	break;
	default:
	+#ifdef DIAGNOSTIC
	+ if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) {
	+ ip = VTOI(vp);
	+ if (ip != NULL)
	+ ufs_unlock_tracker(ip);
	+ }
	+#endif
	result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
	+ break;
	}
	- return (result);
	#else
	/*
	* See above for an explanation.
	*/
	if ((ap->a_flags & LK_NODDLKTREAT) != 0)
	ap->a_flags \|= LK_ADAPTIVE;
	- return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
	+#ifdef DIAGNOSTIC
	+ if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) {
	+ ip = VTOI(vp);
	+ if (ip != NULL)
	+ ufs_unlock_tracker(ip);
	+ }
	#endif
	+ result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
	+#endif
	+#ifdef DIAGNOSTIC
	+ switch (ap->a_flags & LK_TYPE_MASK) {
	+ case LK_UPGRADE:
	+ case LK_EXCLUSIVE:
	+ if (result == 0 && vp->v_vnlock->lk_recurse == 0) {
	+ ip = VTOI(vp);
	+ if (ip != NULL)
	+ ip->i_lock_gen++;
	+ }
	+ }
	+#endif
	+ return (result);
	}

	#ifdef INVARIANTS
	static int
	ffs_unlock_debug(struct vop_unlock_args *ap)
	{
	- struct vnode *vp = ap->a_vp;
	- struct inode *ip = VTOI(vp);
	+ struct vnode *vp;
	+ struct inode *ip;

	+ vp = ap->a_vp;
	+ ip = VTOI(vp);
	if (ip->i_flag & UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE) {
	if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
	VI_LOCK(vp);
	@@ -514,6 +579,11 @@
	VI_UNLOCK(vp);
	}
	}
	+#ifdef DIAGNOSTIC
	+ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && ip != NULL &&
	+ vp->v_vnlock->lk_recurse == 0)
	+ ufs_unlock_tracker(ip);
	+#endif
	return (VOP_UNLOCK_APV(&ufs_vnodeops, ap));
	}
	#endif
	Index: sys/ufs/ufs/inode.h
	===================================================================
	--- sys/ufs/ufs/inode.h
	+++ sys/ufs/ufs/inode.h
	@@ -44,12 +44,24 @@
	#include <sys/queue.h>
	#include <ufs/ufs/dinode.h>
	#include <sys/seqc.h>
	+#ifdef DIAGNOSTIC
	+#include <sys/stack.h>
	+#endif

	/*
	* This must agree with the definition in <ufs/ufs/dir.h>.
	*/
	#define doff_t int32_t

	+#ifdef DIAGNOSTIC
	+struct iown_tracker {
	+ struct thread *tr_owner;
	+ struct stack tr_st;
	+ struct stack tr_unlock;
	+ int tr_gen;
	+};
	+#endif
	+
	/*
	* The inode is used to describe each active (or recently active) file in the
	* UFS filesystem. It is composed of two types of information. The first part
	@@ -94,6 +106,12 @@
	doff_t i_endoff; /* End of useful stuff in directory. */
	doff_t i_diroff; /* Offset in dir, where we found last entry. */
	doff_t i_offset; /* Offset of free space in directory. */
	+#ifdef DIAGNOSTIC
	+ int i_lock_gen;
	+ struct iown_tracker i_count_tracker;
	+ struct iown_tracker i_endoff_tracker;
	+ struct iown_tracker i_offset_tracker;
	+#endif

	int i_nextclustercg; /* last cg searched for cluster */

	@@ -254,6 +272,35 @@
	uint32_t ufid_ino; /* File number (ino). */
	uint32_t ufid_gen; /* Generation number. */
	};
	+
	+#ifdef DIAGNOSTIC
	+void ufs_init_trackers(struct inode *ip);
	+void ufs_unlock_tracker(struct inode *ip);
	+
	+doff_t ufs_get_i_offset(struct inode ip, const char file, int line);
	+void ufs_set_i_offset(struct inode ip, doff_t off, const char file, int line);
	+#define I_OFFSET(ip) ufs_get_i_offset(ip, __FILE__, __LINE__)
	+#define SET_I_OFFSET(ip, off) ufs_set_i_offset(ip, off, __FILE__, __LINE__)
	+
	+int32_t ufs_get_i_count(struct inode ip, const char file, int line);
	+void ufs_set_i_count(struct inode ip, int32_t cnt, const char file, int line);
	+#define I_COUNT(ip) ufs_get_i_count(ip, __FILE__, __LINE__)
	+#define SET_I_COUNT(ip, cnt) ufs_set_i_count(ip, cnt, __FILE__, __LINE__)
	+
	+doff_t ufs_get_i_endoff(struct inode ip, const char file, int line);
	+void ufs_set_i_endoff(struct inode ip, doff_t off, const char file, int line);
	+#define I_ENDOFF(ip) ufs_get_i_endoff(ip, __FILE__, __LINE__)
	+#define SET_I_ENDOFF(ip, off) ufs_set_i_endoff(ip, off, __FILE__, __LINE__)
	+
	+#else
	+#define I_OFFSET(ip) ((ip)->i_offset)
	+#define SET_I_OFFSET(ip, off) ((ip)->i_offset = (off))
	+#define I_COUNT(ip) ((ip)->i_count)
	+#define SET_I_COUNT(ip, cnt) ((ip)->i_count = cnt)
	+#define I_ENDOFF(ip) ((ip)->i_endoff)
	+#define SET_I_ENDOFF(ip, off) ((ip)->i_endoff = off)
	+#endif
	+
	#endif /* _KERNEL */

	#endif /* !_UFS_UFS_INODE_H_ */
	Index: sys/ufs/ufs/ufs_inode.c
	===================================================================
	--- sys/ufs/ufs/ufs_inode.c
	+++ sys/ufs/ufs/ufs_inode.c
	@@ -166,7 +166,8 @@
	isize += ip->i_din2->di_extsize;
	if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip))
	error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT \| IO_NORMAL, NOCRED);
	- if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) {
	+ if (ip->i_nlink <= 0 && ip->i_mode != 0 && !UFS_RDONLY(ip) &&
	+ (vp->v_iflag & VI_OWEINACT) == 0) {
	#ifdef QUOTA
	if (!getinoquota(ip))
	(void)chkiq(ip, -1, NOCRED, FORCE);
	@@ -207,10 +208,12 @@
	* If we are done with the inode, reclaim it
	* so that it can be reused immediately.
	*/
	- if (ip->i_mode == 0)
	+ if (ip->i_mode == 0 && (vp->v_iflag & VI_OWEINACT) == 0)
	vrecycle(vp);
	if (mp != NULL)
	vn_finished_secondary_write(mp);
	+ if (error == ERELOOKUP)
	+ error = 0;
	return (error);
	}

	Index: sys/ufs/ufs/ufs_lookup.c
	===================================================================
	--- sys/ufs/ufs/ufs_lookup.c
	+++ sys/ufs/ufs/ufs_lookup.c
	@@ -66,6 +66,7 @@
	#endif
	#include <ufs/ufs/ufsmount.h>
	#include <ufs/ufs/ufs_extern.h>
	+#include <ufs/ffs/ffs_extern.h>

	#ifdef DIAGNOSTIC
	static int dirchk = 1;
	@@ -504,22 +505,22 @@
	* dp->i_offset + dp->i_count.
	*/
	if (slotstatus == NONE) {
	- dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
	- dp->i_count = 0;
	- enduseful = dp->i_offset;
	+ SET_I_OFFSET(dp, roundup2(dp->i_size, DIRBLKSIZ));
	+ SET_I_COUNT(dp, 0);
	+ enduseful = I_OFFSET(dp);
	} else if (nameiop == DELETE) {
	- dp->i_offset = slotoffset;
	- if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
	- dp->i_count = 0;
	+ SET_I_OFFSET(dp, slotoffset);
	+ if ((I_OFFSET(dp) & (DIRBLKSIZ - 1)) == 0)
	+ SET_I_COUNT(dp, 0);
	else
	- dp->i_count = dp->i_offset - prevoff;
	+ SET_I_COUNT(dp, I_OFFSET(dp) - prevoff);
	} else {
	- dp->i_offset = slotoffset;
	- dp->i_count = slotsize;
	+ SET_I_OFFSET(dp, slotoffset);
	+ SET_I_COUNT(dp, slotsize);
	if (enduseful < slotoffset + slotsize)
	enduseful = slotoffset + slotsize;
	}
	- dp->i_endoff = roundup2(enduseful, DIRBLKSIZ);
	+ SET_I_ENDOFF(dp, roundup2(enduseful, DIRBLKSIZ));
	/*
	* We return with the directory locked, so that
	* the parameters we set up above will still be
	@@ -575,24 +576,32 @@
	if (nameiop == DELETE && (flags & ISLASTCN)) {
	if (flags & LOCKPARENT)
	ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
	- /*
	- * Return pointer to current entry in dp->i_offset,
	- * and distance past previous entry (if there
	- * is a previous entry in this block) in dp->i_count.
	- * Save directory inode pointer in ndp->ni_dvp for dirremove().
	- *
	- * Technically we shouldn't be setting these in the
	- * WANTPARENT case (first lookup in rename()), but any
	- * lookups that will result in directory changes will
	- * overwrite these.
	- */
	- dp->i_offset = i_offset;
	- if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
	- dp->i_count = 0;
	- else
	- dp->i_count = dp->i_offset - prevoff;
	+
	+ if (VOP_ISLOCKED(vdp) == LK_EXCLUSIVE) {
	+ /*
	+ * Return pointer to current entry in
	+ * dp->i_offset, and distance past previous
	+ * entry (if there is a previous entry in this
	+ * block) in dp->i_count.
	+ *
	+ * We shouldn't be setting these in the
	+ * WANTPARENT case (first lookup in rename()), but any
	+ * lookups that will result in directory changes will
	+ * overwrite these.
	+ */
	+ SET_I_OFFSET(dp, i_offset);
	+ if ((I_OFFSET(dp) & (DIRBLKSIZ - 1)) == 0)
	+ SET_I_COUNT(dp, 0);
	+ else
	+ SET_I_COUNT(dp, I_OFFSET(dp) - prevoff);
	+ }
	if (dd_ino != NULL)
	return (0);
	+
	+ /*
	+ * Save directory inode pointer in ndp->ni_dvp for
	+ * dirremove().
	+ */
	if ((error = VFS_VGET(vdp->v_mount, ino,
	LK_EXCLUSIVE, &tdp)) != 0)
	return (error);
	@@ -629,7 +638,7 @@
	* Careful about locking second inode.
	* This can only occur if the target is ".".
	*/
	- dp->i_offset = i_offset;
	+ SET_I_OFFSET(dp, i_offset);
	if (dp->i_number == ino)
	return (EISDIR);
	if (dd_ino != NULL)
	@@ -887,14 +896,14 @@
	dp = VTOI(dvp);
	newentrysize = DIRSIZ(OFSFMT(dvp), dirp);

	- if (dp->i_count == 0) {
	+ if (I_COUNT(dp) == 0) {
	/*
	* If dp->i_count is 0, then namei could find no
	* space in the directory. Here, dp->i_offset will
	* be on a directory block boundary and we will write the
	* new entry into a fresh block.
	*/
	- if (dp->i_offset & (DIRBLKSIZ - 1))
	+ if (I_OFFSET(dp) & (DIRBLKSIZ - 1))
	panic("ufs_direnter: newblk");
	flags = BA_CLRBUF;
	if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp))
	@@ -907,28 +916,28 @@
	}
	#endif
	old_isize = dp->i_size;
	- vnode_pager_setsize(dvp, (u_long)dp->i_offset + DIRBLKSIZ);
	- if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
	+ vnode_pager_setsize(dvp, (u_long)I_OFFSET(dp) + DIRBLKSIZ);
	+ if ((error = UFS_BALLOC(dvp, (off_t)I_OFFSET(dp), DIRBLKSIZ,
	cr, flags, &bp)) != 0) {
	if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
	bdwrite(newdirbp);
	vnode_pager_setsize(dvp, (u_long)old_isize);
	return (error);
	}
	- dp->i_size = dp->i_offset + DIRBLKSIZ;
	+ dp->i_size = I_OFFSET(dp) + DIRBLKSIZ;
	DIP_SET(dp, i_size, dp->i_size);
	- dp->i_endoff = dp->i_size;
	+ SET_I_ENDOFF(dp, dp->i_size);
	UFS_INODE_SET_FLAG(dp, IN_SIZEMOD \| IN_CHANGE \| IN_UPDATE);
	dirp->d_reclen = DIRBLKSIZ;
	- blkoff = dp->i_offset &
	+ blkoff = I_OFFSET(dp) &
	(VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
	bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);
	#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL) {
	- ufsdirhash_newblk(dp, dp->i_offset);
	- ufsdirhash_add(dp, dirp, dp->i_offset);
	+ ufsdirhash_newblk(dp, I_OFFSET(dp));
	+ ufsdirhash_add(dp, dirp, I_OFFSET(dp));
	ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
	- dp->i_offset);
	+ I_OFFSET(dp));
	}
	#endif
	if (DOINGSOFTDEP(dvp)) {
	@@ -944,7 +953,7 @@
	(bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
	blkoff += DIRBLKSIZ;
	}
	- if (softdep_setup_directory_add(bp, dp, dp->i_offset,
	+ if (softdep_setup_directory_add(bp, dp, I_OFFSET(dp),
	dirp->d_ino, newdirbp, 1))
	UFS_INODE_SET_FLAG(dp, IN_NEEDSYNC);
	if (newdirbp)
	@@ -952,27 +961,7 @@
	bdwrite(bp);
	if ((dp->i_flag & IN_NEEDSYNC) == 0)
	return (UFS_UPDATE(dvp, 0));
	- /*
	- * We have just allocated a directory block in an
	- * indirect block. We must prevent holes in the
	- * directory created if directory entries are
	- * written out of order. To accomplish this we
	- * fsync when we extend a directory into indirects.
	- * During rename it's not safe to drop the tvp lock
	- * so sync must be delayed until it is.
	- *
	- * This synchronous step could be removed if fsck and
	- * the kernel were taught to fill in sparse
	- * directories rather than panic.
	- */
	- if (isrename)
	- return (0);
	- if (tvp != NULL)
	- VOP_UNLOCK(tvp);
	- (void) VOP_FSYNC(dvp, MNT_WAIT, td);
	- if (tvp != NULL)
	- vn_lock(tvp, LK_EXCLUSIVE \| LK_RETRY);
	- return (error);
	+ return (0);
	}
	if (DOINGASYNC(dvp)) {
	bdwrite(bp);
	@@ -1001,15 +990,15 @@
	*
	* N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
	*/
	- if (dp->i_offset + dp->i_count > dp->i_size) {
	- dp->i_size = dp->i_offset + dp->i_count;
	+ if (I_OFFSET(dp) + I_COUNT(dp) > dp->i_size) {
	+ dp->i_size = I_OFFSET(dp) + I_COUNT(dp);
	DIP_SET(dp, i_size, dp->i_size);
	UFS_INODE_SET_FLAG(dp, IN_SIZEMOD \| IN_MODIFIED);
	}
	/*
	* Get the block containing the space for the new directory entry.
	*/
	- error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp);
	+ error = UFS_BLKATOFF(dvp, (off_t)I_OFFSET(dp), &dirbuf, &bp);
	if (error) {
	if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
	bdwrite(newdirbp);
	@@ -1024,7 +1013,7 @@
	ep = (struct direct *)dirbuf;
	dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0;
	spacefree = ep->d_reclen - dsize;
	- for (loc = ep->d_reclen; loc < dp->i_count; ) {
	+ for (loc = ep->d_reclen; loc < I_COUNT(dp); ) {
	nep = (struct direct *)(dirbuf + loc);

	/* Trim the existing slot (NB: dsize may be zero). */
	@@ -1052,8 +1041,8 @@
	#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL)
	ufsdirhash_move(dp, nep,
	- dp->i_offset + ((char *)nep - dirbuf),
	- dp->i_offset + ((char *)ep - dirbuf));
	+ I_OFFSET(dp) + ((char *)nep - dirbuf),
	+ I_OFFSET(dp) + ((char *)ep - dirbuf));
	#endif
	if (DOINGSOFTDEP(dvp))
	softdep_change_directoryentry_offset(bp, dp, dirbuf,
	@@ -1094,19 +1083,19 @@
	#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL && (ep->d_ino == 0 \|\|
	dirp->d_reclen == spacefree))
	- ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf));
	+ ufsdirhash_add(dp, dirp, I_OFFSET(dp) + ((char *)ep - dirbuf));
	#endif
	bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
	#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL)
	ufsdirhash_checkblock(dp, dirbuf -
	- (dp->i_offset & (DIRBLKSIZ - 1)),
	- rounddown2(dp->i_offset, DIRBLKSIZ));
	+ (I_OFFSET(dp) & (DIRBLKSIZ - 1)),
	+ rounddown2(I_OFFSET(dp), DIRBLKSIZ));
	#endif

	if (DOINGSOFTDEP(dvp)) {
	(void) softdep_setup_directory_add(bp, dp,
	- dp->i_offset + (caddr_t)ep - dirbuf,
	+ I_OFFSET(dp) + (caddr_t)ep - dirbuf,
	dirp->d_ino, newdirbp, 0);
	if (newdirbp != NULL)
	bdwrite(newdirbp);
	@@ -1128,10 +1117,10 @@
	* lock on the newly entered node.
	*/
	if (isrename == 0 && error == 0 &&
	- dp->i_endoff && dp->i_endoff < dp->i_size) {
	+ I_ENDOFF(dp) != 0 && I_ENDOFF(dp) < dp->i_size) {
	if (tvp != NULL)
	VOP_UNLOCK(tvp);
	- error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
	+ error = UFS_TRUNCATE(dvp, (off_t)I_ENDOFF(dp),
	IO_NORMAL \| (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr);
	if (error != 0)
	vn_printf(dvp,
	@@ -1139,7 +1128,7 @@
	error);
	#ifdef UFS_DIRHASH
	if (error == 0 && dp->i_dirhash != NULL)
	- ufsdirhash_dirtrunc(dp, dp->i_endoff);
	+ ufsdirhash_dirtrunc(dp, I_ENDOFF(dp));
	#endif
	error = 0;
	if (tvp != NULL)
	@@ -1190,9 +1179,9 @@
	}
	}
	if (flags & DOWHITEOUT)
	- offset = dp->i_offset;
	+ offset = I_OFFSET(dp);
	else
	- offset = dp->i_offset - dp->i_count;
	+ offset = I_OFFSET(dp) - I_COUNT(dp);
	if ((error = UFS_BLKATOFF(dvp, offset, (char **)&ep, &bp)) != 0) {
	if (ip) {
	ip->i_effnlink++;
	@@ -1216,7 +1205,7 @@
	goto out;
	}
	/* Set 'rep' to the entry being removed. */
	- if (dp->i_count == 0)
	+ if (I_COUNT(dp) == 0)
	rep = ep;
	else
	rep = (struct direct )((char )ep + ep->d_reclen);
	@@ -1226,7 +1215,7 @@
	* that `ep' is the previous entry when dp->i_count != 0.
	*/
	if (dp->i_dirhash != NULL)
	- ufsdirhash_remove(dp, rep, dp->i_offset);
	+ ufsdirhash_remove(dp, rep, I_OFFSET(dp));
	#endif
	if (ip && rep->d_ino != ip->i_number)
	panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n",
	@@ -1240,7 +1229,7 @@
	rep->d_type = 0;
	rep->d_ino = 0;

	- if (dp->i_count != 0) {
	+ if (I_COUNT(dp) != 0) {
	/*
	* Collapse new free space into previous entry.
	*/
	@@ -1250,8 +1239,8 @@
	#ifdef UFS_DIRHASH
	if (dp->i_dirhash != NULL)
	ufsdirhash_checkblock(dp, (char *)ep -
	- ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)),
	- rounddown2(dp->i_offset, DIRBLKSIZ));
	+ ((I_OFFSET(dp) - I_COUNT(dp)) & (DIRBLKSIZ - 1)),
	+ rounddown2(I_OFFSET(dp), DIRBLKSIZ));
	#endif
	out:
	error = 0;
	@@ -1313,7 +1302,7 @@
	UFS_INODE_SET_FLAG(oip, IN_CHANGE);
	}

	- error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
	+ error = UFS_BLKATOFF(vdp, (off_t)I_OFFSET(dp), (char **)&ep, &bp);
	if (error == 0 && ep->d_namlen == 2 && ep->d_name[1] == '.' &&
	ep->d_name[0] == '.' && ep->d_ino != oip->i_number) {
	brelse(bp);
	@@ -1522,3 +1511,115 @@
	vput(vp);
	return (error);
	}
	+
	+#ifdef DIAGNOSTIC
	+static void
	+ufs_assert_inode_offset_owner(struct inode ip, struct iown_tracker tr,
	+ const char name, const char file, int line)
	+{
	+ char msg[128];
	+
	+ snprintf(msg, sizeof(msg), "at %s@%d", file, line);
	+ ASSERT_VOP_ELOCKED(ITOV(ip), msg);
	+ MPASS((ip->i_mode & IFMT) == IFDIR);
	+ if (curthread == tr->tr_owner && ip->i_lock_gen == tr->tr_gen)
	+ return;
	+ printf("locked at\n");
	+ stack_print(&tr->tr_st);
	+ printf("unlocked at\n");
	+ stack_print(&tr->tr_unlock);
	+ panic("%s ip %p %jd offset owner %p %d gen %d "
	+ "curthread %p %d gen %d at %s@%d\n",
	+ name, ip, (uintmax_t)ip->i_number, tr->tr_owner,
	+ tr->tr_owner->td_tid, tr->tr_gen,
	+ curthread, curthread->td_tid, ip->i_lock_gen,
	+ file, line);
	+}
	+
	+static void
	+ufs_set_inode_offset_owner(struct inode ip, struct iown_tracker tr,
	+ const char *file, int line)
	+{
	+ char msg[128];
	+
	+ snprintf(msg, sizeof(msg), "at %s@%d", file, line);
	+ ASSERT_VOP_ELOCKED(ITOV(ip), msg);
	+ MPASS((ip->i_mode & IFMT) == IFDIR);
	+ tr->tr_owner = curthread;
	+ tr->tr_gen = ip->i_lock_gen;
	+ stack_save(&tr->tr_st);
	+}
	+
	+static void
	+ufs_init_one_tracker(struct iown_tracker *tr)
	+{
	+ tr->tr_owner = NULL;
	+ stack_zero(&tr->tr_st);
	+}
	+
	+void
	+ufs_init_trackers(struct inode *ip)
	+{
	+ ufs_init_one_tracker(&ip->i_offset_tracker);
	+ ufs_init_one_tracker(&ip->i_count_tracker);
	+ ufs_init_one_tracker(&ip->i_endoff_tracker);
	+}
	+
	+void
	+ufs_unlock_tracker(struct inode *ip)
	+{
	+ if (ip->i_count_tracker.tr_gen == ip->i_lock_gen)
	+ stack_save(&ip->i_count_tracker.tr_unlock);
	+ if (ip->i_offset_tracker.tr_gen == ip->i_lock_gen)
	+ stack_save(&ip->i_offset_tracker.tr_unlock);
	+ if (ip->i_endoff_tracker.tr_gen == ip->i_lock_gen)
	+ stack_save(&ip->i_endoff_tracker.tr_unlock);
	+ ip->i_lock_gen++;
	+}
	+
	+doff_t
	+ufs_get_i_offset(struct inode ip, const char file, int line)
	+{
	+ ufs_assert_inode_offset_owner(ip, &ip->i_offset_tracker, "i_offset",
	+ file, line);
	+ return (ip->i_offset);
	+}
	+
	+void
	+ufs_set_i_offset(struct inode ip, doff_t off, const char file, int line)
	+{
	+ ufs_set_inode_offset_owner(ip, &ip->i_offset_tracker, file, line);
	+ ip->i_offset = off;
	+}
	+
	+int32_t
	+ufs_get_i_count(struct inode ip, const char file, int line)
	+{
	+ ufs_assert_inode_offset_owner(ip, &ip->i_count_tracker, "i_count",
	+ file, line);
	+ return (ip->i_count);
	+}
	+
	+void
	+ufs_set_i_count(struct inode ip, int32_t cnt, const char file, int line)
	+{
	+ ufs_set_inode_offset_owner(ip, &ip->i_count_tracker, file, line);
	+ ip->i_count = cnt;
	+}
	+
	+doff_t
	+ufs_get_i_endoff(struct inode ip, const char file, int line)
	+{
	+ ufs_assert_inode_offset_owner(ip, &ip->i_endoff_tracker, "i_endoff",
	+ file, line);
	+ return (ip->i_endoff);
	+}
	+
	+void
	+ufs_set_i_endoff(struct inode ip, doff_t off, const char file, int line)
	+{
	+ ufs_set_inode_offset_owner(ip, &ip->i_endoff_tracker, file, line);
	+ ip->i_endoff = off;
	+}
	+
	+#endif
	Index: sys/ufs/ufs/ufs_vnops.c
	===================================================================
	--- sys/ufs/ufs/ufs_vnops.c
	+++ sys/ufs/ufs/ufs_vnops.c
	@@ -1006,10 +1006,16 @@
	td = curthread;
	ip = VTOI(vp);
	if ((ip->i_flags & (NOUNLINK \| IMMUTABLE \| APPEND)) \|\|
	- (VTOI(dvp)->i_flags & APPEND)) {
	- error = EPERM;
	- goto out;
	+ (VTOI(dvp)->i_flags & APPEND))
	+ return (EPERM);
	+ if (DOINGSOFTDEP(dvp)) {
	+ error = softdep_prelink(dvp, vp, true);
	+ if (error != 0) {
	+ MPASS(error == ERELOOKUP);
	+ return (error);
	+ }
	}
	+
	#ifdef UFS_GJOURNAL
	ufs_gjournal_orphan(vp);
	#endif
	@@ -1030,7 +1036,6 @@
	(void) VOP_FSYNC(dvp, MNT_WAIT, td);
	vn_lock(vp, LK_EXCLUSIVE \| LK_RETRY);
	}
	-out:
	return (error);
	}

	@@ -1067,6 +1072,15 @@
	if ((cnp->cn_flags & HASBUF) == 0)
	panic("ufs_link: no name");
	#endif
	+
	+ if (DOINGSOFTDEP(tdvp)) {
	+ error = softdep_prelink(tdvp, vp, true);
	+ if (error != 0) {
	+ MPASS(error == ERELOOKUP);
	+ return (error);
	+ }
	+ }
	+
	if (VTOI(tdvp)->i_effnlink < 2) {
	print_bad_link_count("ufs_link", tdvp);
	error = EINVAL;
	@@ -1089,6 +1103,7 @@
	error = EPERM;
	goto out;
	}
	+
	ip->i_effnlink++;
	ip->i_nlink++;
	DIP_SET(ip, i_nlink, ip->i_nlink);
	@@ -1129,6 +1144,15 @@
	struct direct newdir;
	int error = 0;

	+ if (DOINGSOFTDEP(dvp) && (ap->a_flags == CREATE \|\|
	+ ap->a_flags == DELETE)) {
	+ error = softdep_prelink(dvp, NULL, true);
	+ if (error != 0) {
	+ MPASS(error == ERELOOKUP);
	+ return (error);
	+ }
	+ }
	+
	switch (ap->a_flags) {
	case LOOKUP:
	/* 4.4 format directories support whiteout operations */
	@@ -1338,6 +1362,18 @@
	goto relock;
	}
	}
	+
	+ if (DOINGSOFTDEP(fdvp)) {
	+ error = softdep_prerename(fdvp, fvp, tdvp, tvp);
	+ if (error != 0) {
	+ if (error == ERELOOKUP) {
	+ atomic_add_int(&rename_restarts, 1);
	+ goto relock;
	+ }
	+ goto releout;
	+ }
	+ }
	+
	fdp = VTOI(fdvp);
	fip = VTOI(fvp);
	tdp = VTOI(tdvp);
	@@ -1481,9 +1517,9 @@
	if (error)
	goto bad;
	/* Setup tdvp for directory compaction if needed. */
	- if (tdp->i_count && tdp->i_endoff &&
	- tdp->i_endoff < tdp->i_size)
	- endoff = tdp->i_endoff;
	+ if (I_COUNT(tdp) != 0 && I_ENDOFF(tdp) != 0 &&
	+ I_ENDOFF(tdp) < tdp->i_size)
	+ endoff = I_ENDOFF(tdp);
	} else {
	if (ITODEV(tip) != ITODEV(tdp) \|\| ITODEV(tip) != ITODEV(fip))
	panic("ufs_rename: EXDEV");
	@@ -1611,7 +1647,7 @@
	} else if (DOINGSUJ(tdvp))
	/* Journal must account for each new link. */
	softdep_setup_dotdot_link(tdp, fip);
	- fip->i_offset = mastertemplate.dot_reclen;
	+ SET_I_OFFSET(fip, mastertemplate.dot_reclen);
	ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
	cache_purge(fdvp);
	}
	@@ -1649,8 +1685,10 @@
	* are no longer needed.
	*/
	if (error == 0 && endoff != 0) {
	- error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL \|
	- (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
	+ do {
	+ error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL \|
	+ (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
	+ } while (error == ERELOOKUP);
	if (error != 0 && !ffs_fsfail_cleanup(VFSTOUFS(mp), error))
	vn_printf(tdvp,
	"ufs_rename: failed to truncate, error %d\n",
	@@ -1668,8 +1706,11 @@
	*/
	error = 0;
	}
	- if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
	- error = VOP_FSYNC(tdvp, MNT_WAIT, td);
	+ if (error == 0 && tdp->i_flag & IN_NEEDSYNC) {
	+ do {
	+ error = VOP_FSYNC(tdvp, MNT_WAIT, td);
	+ } while (error == ERELOOKUP);
	+ }
	vput(tdvp);
	return (error);

	@@ -1918,6 +1959,7 @@
	}
	dmode = vap->va_mode & 0777;
	dmode \|= IFDIR;
	+
	/*
	* Must simulate part of ufs_makeinode here to acquire the inode,
	* but not have it entered in the parent directory. The entry is
	@@ -1928,6 +1970,15 @@
	error = EINVAL;
	goto out;
	}
	+
	+ if (DOINGSOFTDEP(dvp)) {
	+ error = softdep_prelink(dvp, NULL, true);
	+ if (error != 0) {
	+ MPASS(error == ERELOOKUP);
	+ return (error);
	+ }
	+ }
	+
	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
	if (error)
	goto out;
	@@ -2184,6 +2235,14 @@
	error = EINVAL;
	goto out;
	}
	+ if (DOINGSOFTDEP(dvp)) {
	+ error = softdep_prelink(dvp, vp, false);
	+ if (error != 0) {
	+ MPASS(error == ERELOOKUP);
	+ return (error);
	+ }
	+ }
	+
	#ifdef UFS_GJOURNAL
	ufs_gjournal_orphan(vp);
	#endif
	@@ -2703,6 +2762,13 @@
	print_bad_link_count(callfunc, dvp);
	return (EINVAL);
	}
	+ if (DOINGSOFTDEP(dvp)) {
	+ error = softdep_prelink(dvp, NULL, true);
	+ if (error != 0) {
	+ MPASS(error == ERELOOKUP);
	+ return (error);
	+ }
	+ }
	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
	if (error)
	return (error);

File Metadata

Mime Type: text/plain
Expires: Thu, Mar 6, 4:01 PM (8 h, 32 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 17016534
Default Alt Text: D26136.id79022.diff (54 KB)

D26136.id79022.diffNo OneTemporaryActions

D26136.id79022.diffView Options

File Metadata

Event Timeline

D26136.id79022.diff
No OneTemporary
Actions

D26136.id79022.diff
View Options