Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144985774
D26136.id78375.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
51 KB
Referenced Files
None
Subscribers
None
D26136.id78375.diff
View Options
Index: sys/fs/nullfs/null_vnops.c
===================================================================
--- sys/fs/nullfs/null_vnops.c
+++ sys/fs/nullfs/null_vnops.c
@@ -227,6 +227,7 @@
struct vnode *old_vps[VDESC_MAX_VPS];
struct vnode **vps_p[VDESC_MAX_VPS];
struct vnode ***vppp;
+ struct vnode *lvp;
struct vnodeop_desc *descp = ap->a_desc;
int reles, i;
@@ -295,6 +296,23 @@
if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
break; /* bail out at end of list */
if (old_vps[i]) {
+ lvp = *(vps_p[i]);
+
+ /*
+ * If lowervp was unlocked during VOP
+ * operation, nullfs upper vnode could have
+ * been reclaimed, which changes its v_vnlock
+ * back to private v_lock. In this case we
+ * must move lock ownership from lower to
+ * upper (reclaimed) vnode.
+ */
+ if (lvp != NULLVP &&
+ VOP_ISLOCKED(lvp) == LK_EXCLUSIVE &&
+ old_vps[i]->v_vnlock != lvp->v_vnlock) {
+ VOP_UNLOCK(lvp);
+ VOP_LOCK(old_vps[i], LK_EXCLUSIVE | LK_RETRY);
+ }
+
*(vps_p[i]) = old_vps[i];
#if 0
if (reles & VDESC_VP0_WILLUNLOCK)
Index: sys/kern/uipc_usrreq.c
===================================================================
--- sys/kern/uipc_usrreq.c
+++ sys/kern/uipc_usrreq.c
@@ -671,6 +671,8 @@
vput(nd.ni_dvp);
if (error) {
vn_finished_write(mp);
+ if (error == ERELOOKUP)
+ goto restart;
goto error;
}
vp = nd.ni_vp;
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -1794,6 +1794,8 @@
VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
VNASSERT(TAILQ_EMPTY(&vp->v_rl.rl_waiters), vp,
("Dangling rangelock waiters"));
+ VNASSERT((vp->v_iflag & (VI_DOINGINACT | VI_OWEINACT)) == 0, vp,
+ ("Leaked inactivation"));
VI_UNLOCK(vp);
#ifdef MAC
mac_vnode_destroy(vp);
@@ -3803,7 +3805,7 @@
struct thread *td;
struct mount *mp;
vm_object_t object;
- bool active, oweinact;
+ bool active, doinginact, oweinact;
ASSERT_VOP_ELOCKED(vp, "vgonel");
ASSERT_VI_LOCKED(vp, "vgonel");
@@ -3825,11 +3827,17 @@
vp->v_irflag |= VIRF_DOOMED;
/*
- * Check to see if the vnode is in use. If so, we have to call
- * VOP_CLOSE() and VOP_INACTIVE().
+ * Check to see if the vnode is in use. If so, we have to
+ * call VOP_CLOSE() and VOP_INACTIVE().
+ *
+ * It could be that VOP_INACTIVE() requested reclamation, in
+ * which case we should avoid recursion, so check
+ * VI_DOINGINACT. This is not precise but good enough.
*/
active = vp->v_usecount > 0;
oweinact = (vp->v_iflag & VI_OWEINACT) != 0;
+ doinginact = (vp->v_iflag & VI_DOINGINACT) != 0;
+
/*
* If we need to do inactive VI_OWEINACT will be set.
*/
@@ -3850,7 +3858,7 @@
*/
if (active)
VOP_CLOSE(vp, FNONBLOCK, NOCRED, td);
- if (oweinact || active) {
+ if ((oweinact || active) && !doinginact) {
VI_LOCK(vp);
vinactivef(vp);
VI_UNLOCK(vp);
Index: sys/kern/vfs_syscalls.c
===================================================================
--- sys/kern/vfs_syscalls.c
+++ sys/kern/vfs_syscalls.c
@@ -1384,6 +1384,8 @@
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
+ if (error == ERELOOKUP)
+ goto restart;
return (error);
}
@@ -1470,6 +1472,8 @@
vput(nd.ni_dvp);
vn_finished_write(mp);
NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (error == ERELOOKUP)
+ goto restart;
return (error);
}
@@ -1568,7 +1572,7 @@
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
- } while (error == EAGAIN);
+ } while (error == EAGAIN || error == ERELOOKUP);
return (error);
}
@@ -1741,6 +1745,8 @@
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
+ if (error == ERELOOKUP)
+ goto restart;
out:
if (segflg != UIO_SYSSPACE)
uma_zfree(namei_zone, tmppath);
@@ -1791,6 +1797,8 @@
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
vn_finished_write(mp);
+ if (error == ERELOOKUP)
+ goto restart;
return (error);
}
@@ -1937,6 +1945,8 @@
vrele(vp);
else
vput(vp);
+ if (error == ERELOOKUP)
+ goto restart;
fdout:
if (fp != NULL)
fdrop(fp, td);
@@ -3710,6 +3720,8 @@
vrele(fromnd.ni_startdir);
if (error == -1)
return (0);
+ if (error == ERELOOKUP)
+ goto again;
return (error);
}
@@ -3803,6 +3815,8 @@
if (error == 0)
vput(nd.ni_vp);
vn_finished_write(mp);
+ if (error == ERELOOKUP)
+ goto restart;
return (error);
}
@@ -3903,6 +3917,8 @@
vrele(nd.ni_dvp);
else
vput(nd.ni_dvp);
+ if (error == ERELOOKUP)
+ goto restart;
fdout:
if (fp != NULL)
fdrop(fp, td);
@@ -4416,7 +4432,8 @@
if (error != 0)
return (error);
VOP_UNLOCK(vp);
- } while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN);
+ error = kern_linkat_vp(td, vp, fd, path, pathseg);
+ } while (error == EAGAIN || error == ERELOOKUP);
return (error);
}
Index: sys/kern/vfs_vnops.c
===================================================================
--- sys/kern/vfs_vnops.c
+++ sys/kern/vfs_vnops.c
@@ -70,6 +70,7 @@
#include <sys/filio.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
+#include <sys/prng.h>
#include <sys/sx.h>
#include <sys/sleepqueue.h>
#include <sys/sysctl.h>
@@ -274,6 +275,8 @@
vn_finished_write(mp);
if (error) {
NDFREE(ndp, NDF_ONLY_PNBUF);
+ if (error == ERELOOKUP)
+ goto restart;
return (error);
}
fmode &= ~O_TRUNC;
@@ -3315,3 +3318,66 @@
return (error);
}
+
+void
+vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
+ bool vp2_locked)
+{
+ int error;
+
+ if (vp1 != NULL) {
+ if (vp1_locked)
+ ASSERT_VOP_ELOCKED(vp1, "vp1");
+ else
+ ASSERT_VOP_UNLOCKED(vp1, "vp1");
+ } else {
+ vp1_locked = true;
+ }
+ if (vp2 != NULL) {
+ if (vp2_locked)
+ ASSERT_VOP_ELOCKED(vp2, "vp2");
+ else
+ ASSERT_VOP_UNLOCKED(vp2, "vp2");
+ } else {
+ vp2_locked = true;
+ }
+ if (!vp1_locked && !vp2_locked) {
+ vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
+ vp1_locked = true;
+ }
+
+ for (;;) {
+ if (vp1_locked && vp2_locked)
+ break;
+ if (vp1_locked && vp2 != NULL) {
+ if (vp1 != NULL) {
+ error = VOP_LOCK1(vp2, LK_EXCLUSIVE | LK_NOWAIT,
+ __FILE__, __LINE__);
+ if (error == 0)
+ break;
+ VOP_UNLOCK(vp1);
+ vp1_locked = false;
+ pause("vlp1", prng32_bounded(100));
+ }
+ vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY);
+ vp2_locked = true;
+ }
+ if (vp2_locked && vp1 != NULL) {
+ if (vp2 != NULL) {
+ error = VOP_LOCK1(vp1, LK_EXCLUSIVE | LK_NOWAIT,
+ __FILE__, __LINE__);
+ if (error == 0)
+ break;
+ VOP_UNLOCK(vp2);
+ vp2_locked = false;
+ pause("vlp2", prng32_bounded(100));
+ }
+ vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
+ vp1_locked = true;
+ }
+ }
+ if (vp1 != NULL)
+ ASSERT_VOP_ELOCKED(vp1, "vp1 ret");
+ if (vp2 != NULL)
+ ASSERT_VOP_ELOCKED(vp2, "vp2 ret");
+}
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -761,6 +761,9 @@
int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize,
struct uio *uio);
+void vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
+ bool vp2_locked);
+
void vn_seqc_write_begin_unheld_locked(struct vnode *vp);
void vn_seqc_write_begin_unheld(struct vnode *vp);
void vn_seqc_write_begin_locked(struct vnode *vp);
Index: sys/ufs/ffs/ffs_alloc.c
===================================================================
--- sys/ufs/ffs/ffs_alloc.c
+++ sys/ufs/ffs/ffs_alloc.c
@@ -3468,7 +3468,7 @@
break;
}
dp = VTOI(dvp);
- dp->i_offset = 12; /* XXX mastertemplate.dot_reclen */
+ SET_I_OFFSET(dp, 12); /* XXX mastertemplate.dot_reclen */
error = ufs_dirrewrite(dp, VTOI(fdvp), (ino_t)cmd.size,
DT_DIR, 0);
cache_purge(fdvp);
Index: sys/ufs/ffs/ffs_extern.h
===================================================================
--- sys/ufs/ffs/ffs_extern.h
+++ sys/ufs/ffs/ffs_extern.h
@@ -173,6 +173,9 @@
void softdep_freefile(struct vnode *, ino_t, int);
int softdep_request_cleanup(struct fs *, struct vnode *,
struct ucred *, int);
+int softdep_prerename(struct vnode *, struct vnode *, struct vnode *,
+ struct vnode *);
+int softdep_prelink(struct vnode *, struct vnode *, int);
void softdep_setup_freeblocks(struct inode *, off_t, int);
void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int);
void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t,
Index: sys/ufs/ffs/ffs_inode.c
===================================================================
--- sys/ufs/ffs/ffs_inode.c
+++ sys/ufs/ffs/ffs_inode.c
@@ -67,6 +67,17 @@
static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
ufs2_daddr_t, int, ufs2_daddr_t *);
+static void
+ffs_inode_bwrite(struct vnode *vp, struct buf *bp, int flags)
+{
+ if ((flags & IO_SYNC) != 0)
+ bwrite(bp);
+ else if (DOINGASYNC(vp))
+ bdwrite(bp);
+ else
+ bawrite(bp);
+}
+
/*
* Update the access, modified, and inode change times as specified by the
* IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode
@@ -357,12 +368,7 @@
DIP_SET(ip, i_size, length);
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
- if (flags & IO_SYNC)
- bwrite(bp);
- else if (DOINGASYNC(vp))
- bdwrite(bp);
- else
- bawrite(bp);
+ ffs_inode_bwrite(vp, bp, flags);
UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
return (ffs_update(vp, waitforupdate));
}
@@ -456,6 +462,8 @@
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error)
return (error);
+ ffs_inode_bwrite(vp, bp, flags);
+
/*
* When we are doing soft updates and the UFS_BALLOC
* above fills in a direct block hole with a full sized
@@ -468,6 +476,10 @@
fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
(error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
return (error);
+
+ error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
+ if (error)
+ return (error);
ip->i_size = length;
DIP_SET(ip, i_size, length);
size = blksize(fs, ip, lbn);
@@ -478,12 +490,7 @@
allocbuf(bp, size);
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
- if (flags & IO_SYNC)
- bwrite(bp);
- else if (DOINGASYNC(vp))
- bdwrite(bp);
- else
- bawrite(bp);
+ ffs_inode_bwrite(vp, bp, flags);
UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
}
/*
Index: sys/ufs/ffs/ffs_snapshot.c
===================================================================
--- sys/ufs/ffs/ffs_snapshot.c
+++ sys/ufs/ffs/ffs_snapshot.c
@@ -301,6 +301,8 @@
NDFREE(&nd, NDF_ONLY_PNBUF);
vn_finished_write(wrtmp);
vrele(nd.ni_dvp);
+ if (error == ERELOOKUP)
+ goto restart;
return (error);
}
vp = nd.ni_vp;
@@ -368,8 +370,12 @@
if (error)
goto out;
bawrite(nbp);
- if (cg % 10 == 0)
- ffs_syncvnode(vp, MNT_WAIT, 0);
+ if (cg % 10 == 0) {
+ error = ffs_syncvnode(vp, MNT_WAIT, 0);
+ /* vp possibly reclaimed if unlocked */
+ if (error != 0)
+ goto out;
+ }
}
/*
* Copy all the cylinder group maps. Although the
@@ -391,8 +397,8 @@
goto out;
error = cgaccount(cg, vp, nbp, 1);
bawrite(nbp);
- if (cg % 10 == 0)
- ffs_syncvnode(vp, MNT_WAIT, 0);
+ if (cg % 10 == 0 && error == 0)
+ error = ffs_syncvnode(vp, MNT_WAIT, 0);
if (error)
goto out;
}
Index: sys/ufs/ffs/ffs_softdep.c
===================================================================
--- sys/ufs/ffs/ffs_softdep.c
+++ sys/ufs/ffs/ffs_softdep.c
@@ -609,6 +609,26 @@
panic("softdep_freework called");
}
+int
+softdep_prerename(fdvp, fvp, tdvp, tvp)
+ struct vnode *fdvp;
+ struct vnode *fvp;
+ struct vnode *tdvp;
+ struct vnode *tvp;
+{
+
+ panic("softdep_prerename called");
+}
+
+void
+softdep_prelink(dvp, vp)
+ struct vnode *dvp;
+ struct vnode *vp;
+{
+
+ panic("softdep_prelink called");
+}
+
#else
FEATURE(softupdates, "FFS soft-updates support");
@@ -748,7 +768,7 @@
static void clear_unlinked_inodedep(struct inodedep *);
static struct inodedep *first_unlinked_inodedep(struct ufsmount *);
static int flush_pagedep_deps(struct vnode *, struct mount *,
- struct diraddhd *);
+ struct diraddhd *, struct buf *);
static int free_pagedep(struct pagedep *);
static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
@@ -925,7 +945,6 @@
static int journal_space(struct ufsmount *, int);
static void journal_suspend(struct ufsmount *);
static int journal_unsuspend(struct ufsmount *ump);
-static void softdep_prelink(struct vnode *, struct vnode *);
static void add_to_journal(struct worklist *);
static void remove_from_journal(struct worklist *);
static bool softdep_excess_items(struct ufsmount *, int);
@@ -1389,6 +1408,112 @@
/* List of all filesystems mounted with soft updates */
static TAILQ_HEAD(, mount_softdeps) softdepmounts;
+static int
+get_parent_vp(struct vnode *vp, struct mount *mp, ino_t inum, struct buf *bp,
+ struct diraddhd *diraddhdp, struct diraddhd *unfinishedp,
+ struct vnode **rvp)
+{
+ struct vnode *pvp;
+ struct diradd *dap;
+ int error;
+ bool bplocked;
+
+ ASSERT_VOP_ELOCKED(vp, "child vnode must be locked");
+ for (bplocked = true, pvp = NULL;;) {
+ error = ffs_vgetf(mp, inum, LK_EXCLUSIVE | LK_NOWAIT, &pvp,
+ FFSV_FORCEINSMQ);
+ if (error == 0) {
+ /*
+ * Since we could have unlocked vp, the inode
+ * number could no longer indicate a
+ * constructed node. In this case, we must
+ * restart the syscall.
+ */
+ if (VTOI(pvp)->i_mode == 0 || !bplocked) {
+ if (VTOI(pvp)->i_mode == 0)
+ vgone(pvp);
+ vput(pvp);
+ error = ERELOOKUP;
+ goto out;
+ }
+
+ error = 0;
+ goto out1;
+ }
+ if (bp != NULL && bplocked) {
+ /*
+ * Requeue unfinished dependencies before
+ * unlocking buffer, which could make
+ * diraddhdp invalid.
+ */
+ ACQUIRE_LOCK(VFSTOUFS(mp));
+ while ((dap = LIST_FIRST(unfinishedp)) != NULL) {
+ LIST_REMOVE(dap, da_pdlist);
+ LIST_INSERT_HEAD(diraddhdp, dap, da_pdlist);
+ }
+ FREE_LOCK(VFSTOUFS(mp));
+ bp->b_vflags &= ~BV_SCANNED;
+ BUF_NOREC(bp);
+ BUF_UNLOCK(bp);
+ bplocked = false;
+ }
+
+ /*
+ * Do not drop vnode lock while inactivating. This
+ * would result in leaks of the VI flags and
+ * reclaiming of non-truncated vnode. Instead,
+ * re-schedule inactivation hoping that we would be
+ * able to sync inode later.
+ */
+ if ((vp->v_iflag & VI_DOINGINACT) != 0) {
+ VI_LOCK(vp);
+ vp->v_iflag |= VI_OWEINACT;
+ VI_UNLOCK(vp);
+ return (ERELOOKUP);
+ }
+
+ VOP_UNLOCK(vp);
+ error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &pvp,
+ FFSV_FORCEINSMQ);
+ if (error != 0) {
+ MPASS(error != ERELOOKUP);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ break;
+ }
+ if (VTOI(pvp)->i_mode == 0) {
+ vgone(pvp);
+ vput(pvp);
+ pvp = NULL;
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ error = ERELOOKUP;
+ break;
+ }
+ error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ if (error == 0)
+ break;
+ vput(pvp);
+ pvp = NULL;
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ if (vp->v_data == NULL) {
+ error = ENOENT;
+ break;
+ }
+ }
+ if (bp != NULL) {
+ MPASS(!bplocked);
+ error = ERELOOKUP;
+ }
+ if (error != 0 && pvp != NULL) {
+ vput(pvp);
+ pvp = NULL;
+ }
+out1:
+ *rvp = pvp;
+out:
+ ASSERT_VOP_ELOCKED(vp, "child vnode must be locked on return");
+ return (error);
+}
+
/*
* This function cleans the worklist for a filesystem.
* Each filesystem running with soft dependencies gets its own
@@ -3095,48 +3220,191 @@
return (0);
}
+static int
+softdep_prehandle_vnode(ump, vp)
+ struct ufsmount *ump;
+ struct vnode *vp;
+{
+ int error;
+
+ ASSERT_VOP_ELOCKED(vp, "prehandle");
+ if (vp->v_data == NULL)
+ return (0);
+ error = VOP_FSYNC(vp, MNT_WAIT, curthread);
+ if (error != 0)
+ return (error);
+ ACQUIRE_LOCK(ump);
+ process_removes(vp);
+ process_truncates(vp);
+ FREE_LOCK(ump);
+ return (0);
+}
+
+int
+softdep_prerename(fdvp, fvp, tdvp, tvp)
+ struct vnode *fdvp;
+ struct vnode *fvp;
+ struct vnode *tdvp;
+ struct vnode *tvp;
+{
+ struct ufsmount *ump;
+ int error;
+
+ ump = VFSTOUFS(fdvp->v_mount);
+
+ if (journal_space(ump, 0))
+ return (0);
+
+ VOP_UNLOCK(tdvp);
+ VOP_UNLOCK(fvp);
+ if (tvp != NULL && tvp != tdvp)
+ VOP_UNLOCK(tvp);
+
+ error = softdep_prehandle_vnode(ump, fdvp);
+ VOP_UNLOCK(fdvp);
+ if (error != 0)
+ return (error);
+
+ VOP_LOCK(fvp, LK_EXCLUSIVE | LK_RETRY);
+ error = softdep_prehandle_vnode(ump, fvp);
+ VOP_UNLOCK(fvp);
+ if (error != 0)
+ return (error);
+
+ if (tdvp != fdvp) {
+ VOP_LOCK(tdvp, LK_EXCLUSIVE | LK_RETRY);
+ error = softdep_prehandle_vnode(ump, tdvp);
+ VOP_UNLOCK(tdvp);
+ if (error != 0)
+ return (error);
+ }
+
+ if (tvp != fvp && tvp != NULL) {
+ VOP_LOCK(tvp, LK_EXCLUSIVE | LK_RETRY);
+ error = softdep_prehandle_vnode(ump, tvp);
+ VOP_UNLOCK(tvp);
+ if (error != 0)
+ return (error);
+ }
+
+ ACQUIRE_LOCK(ump);
+ softdep_speedup(ump);
+ process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
+ if (journal_space(ump, 0) == 0) {
+ softdep_speedup(ump);
+ if (journal_space(ump, 1) == 0)
+ journal_suspend(ump);
+ }
+ FREE_LOCK(ump);
+ return (ERELOOKUP);
+}
+
/*
* Before adjusting a link count on a vnode verify that we have sufficient
* journal space. If not, process operations that depend on the currently
* locked pair of vnodes to try to flush space as the syncer, buf daemon,
* and softdep flush threads can not acquire these locks to reclaim space.
*/
-static void
-softdep_prelink(dvp, vp)
+int
+softdep_prelink(dvp, vp, will_direnter)
struct vnode *dvp;
struct vnode *vp;
+ int will_direnter;
{
struct ufsmount *ump;
+ int error, error1;
+ ASSERT_VOP_ELOCKED(dvp, "prelink dvp");
+ if (vp != NULL)
+ ASSERT_VOP_ELOCKED(vp, "prelink vp");
ump = VFSTOUFS(dvp->v_mount);
- LOCK_OWNED(ump);
+
/*
* Nothing to do if we have sufficient journal space.
* If we currently hold the snapshot lock, we must avoid
* handling other resources that could cause deadlock.
+ *
+ * In case allocated a directory block in an
+ * indirect block, we must prevent holes in the
+ * directory created if directory entries are
+ * written out of order. To accomplish this we
+ * fsync when we extend a directory into indirects.
+ * During rename it's not safe to drop the tvp lock
+ * so sync must be delayed until it is.
+ *
+ * This synchronous step could be removed if fsck and
+ * the kernel were taught to fill in sparse
+ * directories rather than panic.
*/
- if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
- return;
+ if (journal_space(ump, 0) || (vp != NULL && IS_SNAPSHOT(VTOI(vp)))) {
+ error = 0;
+ if (will_direnter && (vp == NULL || !IS_SNAPSHOT(VTOI(vp)))) {
+ if (vp != NULL)
+ VOP_UNLOCK(vp);
+ error = ffs_syncvnode(dvp, MNT_WAIT, 0);
+ if (vp != NULL) {
+ error1 = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
+ if (error1 != 0) {
+ vn_lock_pair(dvp, true, vp, false);
+ if (error == 0)
+ error = ERELOOKUP;
+ } else if (vp->v_data == NULL) {
+ error = ERELOOKUP;
+ }
+ }
+ }
+ return (error);
+ }
+
stat_journal_low++;
- FREE_LOCK(ump);
- if (vp)
+ if (vp != NULL) {
+ VOP_UNLOCK(dvp);
ffs_syncvnode(vp, MNT_NOWAIT, 0);
+ vn_lock_pair(dvp, false, vp, true);
+ if (dvp->v_data == NULL)
+ return (ERELOOKUP);
+ }
+ if (vp != NULL)
+ VOP_UNLOCK(vp);
ffs_syncvnode(dvp, MNT_WAIT, 0);
- ACQUIRE_LOCK(ump);
+ VOP_UNLOCK(dvp);
+
/* Process vp before dvp as it may create .. removes. */
- if (vp) {
+ if (vp != NULL) {
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ if (vp->v_data == NULL) {
+ vn_lock_pair(dvp, false, vp, true);
+ return (ERELOOKUP);
+ }
+ ACQUIRE_LOCK(ump);
process_removes(vp);
process_truncates(vp);
+ FREE_LOCK(ump);
+ VOP_UNLOCK(vp);
+ }
+
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ if (dvp->v_data == NULL) {
+ vn_lock_pair(dvp, true, vp, false);
+ return (ERELOOKUP);
}
+
+ ACQUIRE_LOCK(ump);
process_removes(dvp);
process_truncates(dvp);
+ VOP_UNLOCK(dvp);
softdep_speedup(ump);
+
process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
if (journal_space(ump, 0) == 0) {
softdep_speedup(ump);
if (journal_space(ump, 1) == 0)
journal_suspend(ump);
}
+ FREE_LOCK(ump);
+
+ vn_lock_pair(dvp, false, vp, false);
+ return (ERELOOKUP);
}
static void
@@ -4742,7 +5010,6 @@
KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
("softdep_setup_create: No addref structure present."));
}
- softdep_prelink(dvp, NULL);
FREE_LOCK(ITOUMP(dp));
}
@@ -4777,7 +5044,6 @@
if (jaddref)
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
if_deps);
- softdep_prelink(dvp, ITOV(ip));
FREE_LOCK(ITOUMP(dp));
}
@@ -4808,7 +5074,6 @@
if (jaddref)
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
if_deps);
- softdep_prelink(dvp, ITOV(ip));
FREE_LOCK(ITOUMP(dp));
}
@@ -4858,7 +5123,6 @@
if (DOINGSUJ(dvp))
TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
&dotdotaddref->ja_ref, if_deps);
- softdep_prelink(ITOV(dp), NULL);
FREE_LOCK(ITOUMP(dp));
}
@@ -4879,7 +5143,6 @@
ACQUIRE_LOCK(ITOUMP(dp));
(void) inodedep_lookup_ip(ip);
(void) inodedep_lookup_ip(dp);
- softdep_prelink(dvp, ITOV(ip));
FREE_LOCK(ITOUMP(dp));
}
@@ -4900,7 +5163,6 @@
ACQUIRE_LOCK(ITOUMP(dp));
(void) inodedep_lookup_ip(ip);
(void) inodedep_lookup_ip(dp);
- softdep_prelink(dvp, ITOV(ip));
FREE_LOCK(ITOUMP(dp));
}
@@ -8764,11 +9026,11 @@
if (MOUNTEDSUJ(mp)) {
flags = DEPALLOC;
jmvref = newjmvref(dp, de->d_ino,
- dp->i_offset + (oldloc - base),
- dp->i_offset + (newloc - base));
+ I_OFFSET(dp) + (oldloc - base),
+ I_OFFSET(dp) + (newloc - base));
}
- lbn = lblkno(ump->um_fs, dp->i_offset);
- offset = blkoff(ump->um_fs, dp->i_offset);
+ lbn = lblkno(ump->um_fs, I_OFFSET(dp));
+ offset = blkoff(ump->um_fs, I_OFFSET(dp));
oldoffset = offset + (oldloc - base);
newoffset = offset + (newloc - base);
ACQUIRE_LOCK(ump);
@@ -9280,7 +9542,7 @@
jremref = dotremref = dotdotremref = NULL;
if (DOINGSUJ(dvp)) {
if (isrmdir) {
- jremref = newjremref(dirrem, dp, ip, dp->i_offset,
+ jremref = newjremref(dirrem, dp, ip, I_OFFSET(dp),
ip->i_effnlink + 2);
dotremref = newjremref(dirrem, ip, ip, DOT_OFFSET,
ip->i_effnlink + 1);
@@ -9288,12 +9550,12 @@
dp->i_effnlink + 1);
dotdotremref->jr_state |= MKDIR_PARENT;
} else
- jremref = newjremref(dirrem, dp, ip, dp->i_offset,
+ jremref = newjremref(dirrem, dp, ip, I_OFFSET(dp),
ip->i_effnlink + 1);
}
ACQUIRE_LOCK(ump);
- lbn = lblkno(ump->um_fs, dp->i_offset);
- offset = blkoff(ump->um_fs, dp->i_offset);
+ lbn = lblkno(ump->um_fs, I_OFFSET(dp));
+ offset = blkoff(ump->um_fs, I_OFFSET(dp));
pagedep_lookup(UFSTOVFS(ump), bp, dp->i_number, lbn, DEPALLOC,
&pagedep);
dirrem->dm_pagedep = pagedep;
@@ -9304,7 +9566,7 @@
* the jremref is preserved for any potential diradd in this
* location. This can not coincide with a rmdir.
*/
- if (dp->i_offset == DOTDOT_OFFSET) {
+ if (I_OFFSET(dp) == DOTDOT_OFFSET) {
if (isrmdir)
panic("newdirrem: .. directory change during remove?");
jremref = cancel_mkdir_dotdot(dp, dirrem, jremref);
@@ -9405,7 +9667,7 @@
mp = ITOVFS(dp);
ump = VFSTOUFS(mp);
- offset = blkoff(ump->um_fs, dp->i_offset);
+ offset = blkoff(ump->um_fs, I_OFFSET(dp));
KASSERT(MOUNTEDSOFTDEP(mp) != 0,
("softdep_setup_directory_change called on non-softdep filesystem"));
@@ -9508,7 +9770,7 @@
KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
("softdep_setup_directory_change: bad jaddref %p",
jaddref));
- jaddref->ja_diroff = dp->i_offset;
+ jaddref->ja_diroff = I_OFFSET(dp);
jaddref->ja_diradd = dap;
LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
dap, da_pdlist);
@@ -9527,7 +9789,7 @@
* committed when need to move the dot and dotdot references to
* this new name.
*/
- if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET)
+ if (inodedep->id_mkdiradd && I_OFFSET(dp) != DOTDOT_OFFSET)
merge_diradd(inodedep, dap);
FREE_LOCK(ump);
}
@@ -12622,25 +12884,12 @@
* for details on possible races.
*/
FREE_LOCK(ump);
- if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp,
- FFSV_FORCEINSMQ)) {
- /*
- * Unmount cannot proceed after unlock because
- * caller must have called vn_start_write().
- */
- VOP_UNLOCK(vp);
- error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE,
- &pvp, FFSV_FORCEINSMQ);
- MPASS(VTOI(pvp)->i_mode != 0);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- if (VN_IS_DOOMED(vp)) {
- if (error == 0)
- vput(pvp);
- error = ENOENT;
- }
- if (error != 0)
- return (error);
- }
+ error = get_parent_vp(vp, mp, parentino, NULL, NULL, NULL,
+ &pvp);
+ if (error == ERELOOKUP)
+ error = 0;
+ if (error != 0)
+ return (error);
/*
* All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
* that are contained in direct blocks will be resolved by
@@ -12964,9 +13213,11 @@
for (i = 0; i < DAHASHSZ; i++) {
if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
continue;
- if ((error = flush_pagedep_deps(vp, wk->wk_mp,
- &pagedep->pd_diraddhd[i]))) {
- BUF_NOREC(bp);
+ error = flush_pagedep_deps(vp, wk->wk_mp,
+ &pagedep->pd_diraddhd[i], bp);
+ if (error != 0) {
+ if (error != ERELOOKUP)
+ BUF_NOREC(bp);
goto out_unlock;
}
}
@@ -13200,10 +13451,11 @@
* Eliminate a pagedep dependency by flushing out all its diradd dependencies.
*/
static int
-flush_pagedep_deps(pvp, mp, diraddhdp)
+flush_pagedep_deps(pvp, mp, diraddhdp, locked_bp)
struct vnode *pvp;
struct mount *mp;
struct diraddhd *diraddhdp;
+ struct buf *locked_bp;
{
struct inodedep *inodedep;
struct inoref *inoref;
@@ -13270,10 +13522,10 @@
}
if (dap->da_state & MKDIR_BODY) {
FREE_LOCK(ump);
- if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
- FFSV_FORCEINSMQ)))
+ error = get_parent_vp(pvp, mp, inum, locked_bp,
+ diraddhdp, &unfinished, &vp);
+ if (error != 0)
break;
- MPASS(VTOI(vp)->i_mode != 0);
error = flush_newblk_dep(vp, mp, 0);
/*
* If we still have the dependency we might need to
@@ -13335,10 +13587,10 @@
*/
if (dap == LIST_FIRST(diraddhdp)) {
FREE_LOCK(ump);
- if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
- FFSV_FORCEINSMQ)))
+ error = get_parent_vp(pvp, mp, inum, locked_bp,
+ diraddhdp, &unfinished, &vp);
+ if (error != 0)
break;
- MPASS(VTOI(vp)->i_mode != 0);
error = ffs_update(vp, 1);
vput(vp);
if (error)
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- sys/ufs/ffs/ffs_vfsops.c
+++ sys/ufs/ffs/ffs_vfsops.c
@@ -2005,6 +2005,9 @@
ip->i_nextclustercg = -1;
ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2;
ip->i_mode = 0; /* ensure error cases below throw away vnode */
+#ifdef DIAGNOSTIC
+ ufs_init_trackers(ip);
+#endif
#ifdef QUOTA
{
int i;
Index: sys/ufs/ffs/ffs_vnops.c
===================================================================
--- sys/ufs/ffs/ffs_vnops.c
+++ sys/ufs/ffs/ffs_vnops.c
@@ -253,7 +253,7 @@
struct buf *bp, *nbp;
ufs_lbn_t lbn;
int error, passes;
- bool still_dirty, wait;
+ bool still_dirty, unlocked, wait;
ip = VTOI(vp);
ip->i_flag &= ~IN_NEEDSYNC;
@@ -277,6 +277,7 @@
error = 0;
passes = 0;
wait = false; /* Always do an async pass first. */
+ unlocked = false;
lbn = lblkno(ITOFS(ip), (ip->i_size + ITOFS(ip)->fs_bsize - 1));
BO_LOCK(bo);
loop:
@@ -325,6 +326,26 @@
if (!LIST_EMPTY(&bp->b_dep) &&
(error = softdep_sync_buf(vp, bp,
wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
+ /*
+ * Lock order conflict, buffer was already unlocked,
+ * and vnode possibly unlocked.
+ */
+ if (error == ERELOOKUP) {
+ if (vp->v_data == NULL)
+ return (EBADF);
+ unlocked = true;
+ if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
+ (error = softdep_sync_metadata(vp)) != 0) {
+ if (ffs_fsfail_cleanup(ump, error))
+ error = 0;
+ return (unlocked && error == 0 ?
+ ERELOOKUP : error);
+ }
+ /* Re-evaluate inode size */
+ lbn = lblkno(ITOFS(ip), (ip->i_size +
+ ITOFS(ip)->fs_bsize - 1));
+ goto next;
+ }
/* I/O error. */
if (error != EBUSY) {
BUF_UNLOCK(bp);
@@ -361,9 +382,11 @@
if (waitfor != MNT_WAIT) {
BO_UNLOCK(bo);
if ((flags & NO_INO_UPDT) != 0)
- return (0);
- else
- return (ffs_update(vp, 0));
+ return (unlocked ? ERELOOKUP : 0);
+ error = ffs_update(vp, 0);
+ if (error == 0 && unlocked)
+ error = ERELOOKUP;
+ return (error);
}
/* Drain IO to see if we're done. */
bufobj_wwait(bo, 0, 0);
@@ -419,6 +442,8 @@
} else if ((ip->i_flags & (IN_SIZEMOD | IN_IBLKDATA)) != 0) {
error = ffs_update(vp, 1);
}
+ if (error == 0 && unlocked)
+ error = ERELOOKUP;
return (error);
}
@@ -434,16 +459,18 @@
struct vop_lock1_args /* {
struct vnode *a_vp;
int a_flags;
- struct thread *a_td;
char *file;
int line;
} */ *ap;
{
+ struct vnode *vp = ap->a_vp;
+#ifdef DIAGNOSTIC
+ struct inode *ip;
+#endif
+ int result;
#ifndef NO_FFS_SNAPSHOT
- struct vnode *vp;
int flags;
struct lock *lkp;
- int result;
/*
* Adaptive spinning mixed with SU leads to trouble. use a giant hammer
@@ -456,7 +483,6 @@
case LK_SHARED:
case LK_UPGRADE:
case LK_EXCLUSIVE:
- vp = ap->a_vp;
flags = ap->a_flags;
for (;;) {
#ifdef DEBUG_VFS_LOCKS
@@ -483,28 +509,65 @@
flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE;
flags &= ~LK_INTERLOCK;
}
+ switch (ap->a_flags & LK_TYPE_MASK) {
+ case LK_UPGRADE:
+ case LK_EXCLUSIVE:
+ if (result == 0 && vp->v_vnlock->lk_recurse == 0) {
+ ip = VTOI(vp);
+ if (ip != NULL)
+ ip->i_lock_gen++;
+ }
+ }
break;
default:
+#ifdef DIAGNOSTIC
+ if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) {
+ ip = VTOI(vp);
+ if (ip != NULL)
+ ufs_unlock_tracker(ip);
+ }
+#endif
result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
+ break;
}
- return (result);
#else
/*
* See above for an explanation.
*/
if ((ap->a_flags & LK_NODDLKTREAT) != 0)
ap->a_flags |= LK_ADAPTIVE;
- return (VOP_LOCK1_APV(&ufs_vnodeops, ap));
+#ifdef DIAGNOSTIC
+ if ((ap->a_flags & LK_TYPE_MASK) == LK_DOWNGRADE) {
+ ip = VTOI(vp);
+ if (ip != NULL)
+ ufs_unlock_tracker(ip);
+ }
#endif
+ result = VOP_LOCK1_APV(&ufs_vnodeops, ap);
+#endif
+#ifdef DIAGNOSTIC
+ switch (ap->a_flags & LK_TYPE_MASK) {
+ case LK_UPGRADE:
+ case LK_EXCLUSIVE:
+ if (result == 0 && vp->v_vnlock->lk_recurse == 0) {
+ ip = VTOI(vp);
+ if (ip != NULL)
+ ip->i_lock_gen++;
+ }
+ }
+#endif
+ return (result);
}
#ifdef INVARIANTS
static int
ffs_unlock_debug(struct vop_unlock_args *ap)
{
- struct vnode *vp = ap->a_vp;
- struct inode *ip = VTOI(vp);
+ struct vnode *vp;
+ struct inode *ip;
+ vp = ap->a_vp;
+ ip = VTOI(vp);
if (ip->i_flag & UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE) {
if ((vp->v_mflag & VMP_LAZYLIST) == 0) {
VI_LOCK(vp);
@@ -514,6 +577,11 @@
VI_UNLOCK(vp);
}
}
+#ifdef DIAGNOSTIC
+ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && ip != NULL &&
+ vp->v_vnlock->lk_recurse == 0)
+ ufs_unlock_tracker(ip);
+#endif
return (VOP_UNLOCK_APV(&ufs_vnodeops, ap));
}
#endif
Index: sys/ufs/ufs/inode.h
===================================================================
--- sys/ufs/ufs/inode.h
+++ sys/ufs/ufs/inode.h
@@ -44,12 +44,24 @@
#include <sys/queue.h>
#include <ufs/ufs/dinode.h>
#include <sys/seqc.h>
+#ifdef DIAGNOSTIC
+#include <sys/stack.h>
+#endif
/*
* This must agree with the definition in <ufs/ufs/dir.h>.
*/
#define doff_t int32_t
+#ifdef DIAGNOSTIC
+struct iown_tracker {
+ struct thread *tr_owner;
+ struct stack tr_st;
+ struct stack tr_unlock;
+ int tr_gen;
+};
+#endif
+
/*
* The inode is used to describe each active (or recently active) file in the
* UFS filesystem. It is composed of two types of information. The first part
@@ -94,6 +106,12 @@
doff_t i_endoff; /* End of useful stuff in directory. */
doff_t i_diroff; /* Offset in dir, where we found last entry. */
doff_t i_offset; /* Offset of free space in directory. */
+#ifdef DIAGNOSTIC
+ int i_lock_gen;
+ struct iown_tracker i_count_tracker;
+ struct iown_tracker i_endoff_tracker;
+ struct iown_tracker i_offset_tracker;
+#endif
int i_nextclustercg; /* last cg searched for cluster */
@@ -254,6 +272,35 @@
uint32_t ufid_ino; /* File number (ino). */
uint32_t ufid_gen; /* Generation number. */
};
+
+#ifdef DIAGNOSTIC
+void ufs_init_trackers(struct inode *ip);
+void ufs_unlock_tracker(struct inode *ip);
+
+doff_t ufs_get_i_offset(struct inode *ip, const char *file, int line);
+void ufs_set_i_offset(struct inode *ip, doff_t off, const char *file, int line);
+#define I_OFFSET(ip) ufs_get_i_offset(ip, __FILE__, __LINE__)
+#define SET_I_OFFSET(ip, off) ufs_set_i_offset(ip, off, __FILE__, __LINE__)
+
+int32_t ufs_get_i_count(struct inode *ip, const char *file, int line);
+void ufs_set_i_count(struct inode *ip, int32_t cnt, const char *file, int line);
+#define I_COUNT(ip) ufs_get_i_count(ip, __FILE__, __LINE__)
+#define SET_I_COUNT(ip, cnt) ufs_set_i_count(ip, cnt, __FILE__, __LINE__)
+
+doff_t ufs_get_i_endoff(struct inode *ip, const char *file, int line);
+void ufs_set_i_endoff(struct inode *ip, doff_t off, const char *file, int line);
+#define I_ENDOFF(ip) ufs_get_i_endoff(ip, __FILE__, __LINE__)
+#define SET_I_ENDOFF(ip, off) ufs_set_i_endoff(ip, off, __FILE__, __LINE__)
+
+#else
+#define I_OFFSET(ip) ((ip)->i_offset)
+#define SET_I_OFFSET(ip, off) ((ip)->i_offset = (off))
+#define I_COUNT(ip) ((ip)->i_count)
+#define SET_I_COUNT(ip, cnt) ((ip)->i_count = cnt)
+#define I_ENDOFF(ip) ((ip)->i_endoff)
+#define SET_I_ENDOFF(ip, off) ((ip)->i_endoff = off)
+#endif
+
#endif /* _KERNEL */
#endif /* !_UFS_UFS_INODE_H_ */
Index: sys/ufs/ufs/ufs_inode.c
===================================================================
--- sys/ufs/ufs/ufs_inode.c
+++ sys/ufs/ufs/ufs_inode.c
@@ -167,7 +167,8 @@
isize += ip->i_din2->di_extsize;
if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip))
error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, NOCRED);
- if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) {
+ if (ip->i_nlink <= 0 && ip->i_mode != 0 && !UFS_RDONLY(ip) &&
+ (vp->v_iflag & VI_OWEINACT) == 0) {
#ifdef QUOTA
if (!getinoquota(ip))
(void)chkiq(ip, -1, NOCRED, FORCE);
@@ -208,10 +209,12 @@
* If we are done with the inode, reclaim it
* so that it can be reused immediately.
*/
- if (ip->i_mode == 0)
+ if (ip->i_mode == 0 && (vp->v_iflag & VI_OWEINACT) == 0)
vrecycle(vp);
if (mp != NULL)
vn_finished_secondary_write(mp);
+ if (error == ERELOOKUP)
+ error = 0;
return (error);
}
Index: sys/ufs/ufs/ufs_lookup.c
===================================================================
--- sys/ufs/ufs/ufs_lookup.c
+++ sys/ufs/ufs/ufs_lookup.c
@@ -66,6 +66,7 @@
#endif
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
+#include <ufs/ffs/ffs_extern.h>
#ifdef DIAGNOSTIC
static int dirchk = 1;
@@ -504,22 +505,22 @@
* dp->i_offset + dp->i_count.
*/
if (slotstatus == NONE) {
- dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
- dp->i_count = 0;
- enduseful = dp->i_offset;
+ SET_I_OFFSET(dp, roundup2(dp->i_size, DIRBLKSIZ));
+ SET_I_COUNT(dp, 0);
+ enduseful = I_OFFSET(dp);
} else if (nameiop == DELETE) {
- dp->i_offset = slotoffset;
- if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
- dp->i_count = 0;
+ SET_I_OFFSET(dp, slotoffset);
+ if ((I_OFFSET(dp) & (DIRBLKSIZ - 1)) == 0)
+ SET_I_COUNT(dp, 0);
else
- dp->i_count = dp->i_offset - prevoff;
+ SET_I_COUNT(dp, I_OFFSET(dp) - prevoff);
} else {
- dp->i_offset = slotoffset;
- dp->i_count = slotsize;
+ SET_I_OFFSET(dp, slotoffset);
+ SET_I_COUNT(dp, slotsize);
if (enduseful < slotoffset + slotsize)
enduseful = slotoffset + slotsize;
}
- dp->i_endoff = roundup2(enduseful, DIRBLKSIZ);
+ SET_I_ENDOFF(dp, roundup2(enduseful, DIRBLKSIZ));
/*
* We return with the directory locked, so that
* the parameters we set up above will still be
@@ -575,24 +576,32 @@
if (nameiop == DELETE && (flags & ISLASTCN)) {
if (flags & LOCKPARENT)
ASSERT_VOP_ELOCKED(vdp, __FUNCTION__);
- /*
- * Return pointer to current entry in dp->i_offset,
- * and distance past previous entry (if there
- * is a previous entry in this block) in dp->i_count.
- * Save directory inode pointer in ndp->ni_dvp for dirremove().
- *
- * Technically we shouldn't be setting these in the
- * WANTPARENT case (first lookup in rename()), but any
- * lookups that will result in directory changes will
- * overwrite these.
- */
- dp->i_offset = i_offset;
- if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
- dp->i_count = 0;
- else
- dp->i_count = dp->i_offset - prevoff;
+
+ if (VOP_ISLOCKED(vdp) == LK_EXCLUSIVE) {
+ /*
+ * Return pointer to current entry in
+ * dp->i_offset, and distance past previous
+ * entry (if there is a previous entry in this
+ * block) in dp->i_count.
+ *
+ * We shouldn't be setting these in the
+ * WANTPARENT case (first lookup in rename()), but any
+ * lookups that will result in directory changes will
+ * overwrite these.
+ */
+ SET_I_OFFSET(dp, i_offset);
+ if ((I_OFFSET(dp) & (DIRBLKSIZ - 1)) == 0)
+ SET_I_COUNT(dp, 0);
+ else
+ SET_I_COUNT(dp, I_OFFSET(dp) - prevoff);
+ }
if (dd_ino != NULL)
return (0);
+
+ /*
+ * Save directory inode pointer in ndp->ni_dvp for
+ * dirremove().
+ */
if ((error = VFS_VGET(vdp->v_mount, ino,
LK_EXCLUSIVE, &tdp)) != 0)
return (error);
@@ -629,7 +638,7 @@
* Careful about locking second inode.
* This can only occur if the target is ".".
*/
- dp->i_offset = i_offset;
+ SET_I_OFFSET(dp, i_offset);
if (dp->i_number == ino)
return (EISDIR);
if (dd_ino != NULL)
@@ -887,14 +896,14 @@
dp = VTOI(dvp);
newentrysize = DIRSIZ(OFSFMT(dvp), dirp);
- if (dp->i_count == 0) {
+ if (I_COUNT(dp) == 0) {
/*
* If dp->i_count is 0, then namei could find no
* space in the directory. Here, dp->i_offset will
* be on a directory block boundary and we will write the
* new entry into a fresh block.
*/
- if (dp->i_offset & (DIRBLKSIZ - 1))
+ if (I_OFFSET(dp) & (DIRBLKSIZ - 1))
panic("ufs_direnter: newblk");
flags = BA_CLRBUF;
if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp))
@@ -907,28 +916,28 @@
}
#endif
old_isize = dp->i_size;
- vnode_pager_setsize(dvp, (u_long)dp->i_offset + DIRBLKSIZ);
- if ((error = UFS_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
+ vnode_pager_setsize(dvp, (u_long)I_OFFSET(dp) + DIRBLKSIZ);
+ if ((error = UFS_BALLOC(dvp, (off_t)I_OFFSET(dp), DIRBLKSIZ,
cr, flags, &bp)) != 0) {
if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
bdwrite(newdirbp);
vnode_pager_setsize(dvp, (u_long)old_isize);
return (error);
}
- dp->i_size = dp->i_offset + DIRBLKSIZ;
+ dp->i_size = I_OFFSET(dp) + DIRBLKSIZ;
DIP_SET(dp, i_size, dp->i_size);
- dp->i_endoff = dp->i_size;
+ SET_I_ENDOFF(dp, dp->i_size);
UFS_INODE_SET_FLAG(dp, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
dirp->d_reclen = DIRBLKSIZ;
- blkoff = dp->i_offset &
+ blkoff = I_OFFSET(dp) &
(VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);
#ifdef UFS_DIRHASH
if (dp->i_dirhash != NULL) {
- ufsdirhash_newblk(dp, dp->i_offset);
- ufsdirhash_add(dp, dirp, dp->i_offset);
+ ufsdirhash_newblk(dp, I_OFFSET(dp));
+ ufsdirhash_add(dp, dirp, I_OFFSET(dp));
ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff,
- dp->i_offset);
+ I_OFFSET(dp));
}
#endif
if (DOINGSOFTDEP(dvp)) {
@@ -944,7 +953,7 @@
(bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
blkoff += DIRBLKSIZ;
}
- if (softdep_setup_directory_add(bp, dp, dp->i_offset,
+ if (softdep_setup_directory_add(bp, dp, I_OFFSET(dp),
dirp->d_ino, newdirbp, 1))
UFS_INODE_SET_FLAG(dp, IN_NEEDSYNC);
if (newdirbp)
@@ -952,27 +961,7 @@
bdwrite(bp);
if ((dp->i_flag & IN_NEEDSYNC) == 0)
return (UFS_UPDATE(dvp, 0));
- /*
- * We have just allocated a directory block in an
- * indirect block. We must prevent holes in the
- * directory created if directory entries are
- * written out of order. To accomplish this we
- * fsync when we extend a directory into indirects.
- * During rename it's not safe to drop the tvp lock
- * so sync must be delayed until it is.
- *
- * This synchronous step could be removed if fsck and
- * the kernel were taught to fill in sparse
- * directories rather than panic.
- */
- if (isrename)
- return (0);
- if (tvp != NULL)
- VOP_UNLOCK(tvp);
- (void) VOP_FSYNC(dvp, MNT_WAIT, td);
- if (tvp != NULL)
- vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
- return (error);
+ return (0);
}
if (DOINGASYNC(dvp)) {
bdwrite(bp);
@@ -1001,15 +990,15 @@
*
* N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
*/
- if (dp->i_offset + dp->i_count > dp->i_size) {
- dp->i_size = dp->i_offset + dp->i_count;
+ if (I_OFFSET(dp) + I_COUNT(dp) > dp->i_size) {
+ dp->i_size = I_OFFSET(dp) + I_COUNT(dp);
DIP_SET(dp, i_size, dp->i_size);
UFS_INODE_SET_FLAG(dp, IN_SIZEMOD | IN_MODIFIED);
}
/*
* Get the block containing the space for the new directory entry.
*/
- error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp);
+ error = UFS_BLKATOFF(dvp, (off_t)I_OFFSET(dp), &dirbuf, &bp);
if (error) {
if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
bdwrite(newdirbp);
@@ -1024,7 +1013,7 @@
ep = (struct direct *)dirbuf;
dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0;
spacefree = ep->d_reclen - dsize;
- for (loc = ep->d_reclen; loc < dp->i_count; ) {
+ for (loc = ep->d_reclen; loc < I_COUNT(dp); ) {
nep = (struct direct *)(dirbuf + loc);
/* Trim the existing slot (NB: dsize may be zero). */
@@ -1052,8 +1041,8 @@
#ifdef UFS_DIRHASH
if (dp->i_dirhash != NULL)
ufsdirhash_move(dp, nep,
- dp->i_offset + ((char *)nep - dirbuf),
- dp->i_offset + ((char *)ep - dirbuf));
+ I_OFFSET(dp) + ((char *)nep - dirbuf),
+ I_OFFSET(dp) + ((char *)ep - dirbuf));
#endif
if (DOINGSOFTDEP(dvp))
softdep_change_directoryentry_offset(bp, dp, dirbuf,
@@ -1094,19 +1083,19 @@
#ifdef UFS_DIRHASH
if (dp->i_dirhash != NULL && (ep->d_ino == 0 ||
dirp->d_reclen == spacefree))
- ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf));
+ ufsdirhash_add(dp, dirp, I_OFFSET(dp) + ((char *)ep - dirbuf));
#endif
bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
#ifdef UFS_DIRHASH
if (dp->i_dirhash != NULL)
ufsdirhash_checkblock(dp, dirbuf -
- (dp->i_offset & (DIRBLKSIZ - 1)),
- rounddown2(dp->i_offset, DIRBLKSIZ));
+ (I_OFFSET(dp) & (DIRBLKSIZ - 1)),
+ rounddown2(I_OFFSET(dp), DIRBLKSIZ));
#endif
if (DOINGSOFTDEP(dvp)) {
(void) softdep_setup_directory_add(bp, dp,
- dp->i_offset + (caddr_t)ep - dirbuf,
+ I_OFFSET(dp) + (caddr_t)ep - dirbuf,
dirp->d_ino, newdirbp, 0);
if (newdirbp != NULL)
bdwrite(newdirbp);
@@ -1128,10 +1117,10 @@
* lock on the newly entered node.
*/
if (isrename == 0 && error == 0 &&
- dp->i_endoff && dp->i_endoff < dp->i_size) {
+ I_ENDOFF(dp) != 0 && I_ENDOFF(dp) < dp->i_size) {
if (tvp != NULL)
VOP_UNLOCK(tvp);
- error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
+ error = UFS_TRUNCATE(dvp, (off_t)I_ENDOFF(dp),
IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr);
if (error != 0)
vn_printf(dvp,
@@ -1139,7 +1128,7 @@
error);
#ifdef UFS_DIRHASH
if (error == 0 && dp->i_dirhash != NULL)
- ufsdirhash_dirtrunc(dp, dp->i_endoff);
+ ufsdirhash_dirtrunc(dp, I_ENDOFF(dp));
#endif
error = 0;
if (tvp != NULL)
@@ -1190,9 +1179,9 @@
}
}
if (flags & DOWHITEOUT)
- offset = dp->i_offset;
+ offset = I_OFFSET(dp);
else
- offset = dp->i_offset - dp->i_count;
+ offset = I_OFFSET(dp) - I_COUNT(dp);
if ((error = UFS_BLKATOFF(dvp, offset, (char **)&ep, &bp)) != 0) {
if (ip) {
ip->i_effnlink++;
@@ -1216,7 +1205,7 @@
goto out;
}
/* Set 'rep' to the entry being removed. */
- if (dp->i_count == 0)
+ if (I_COUNT(dp) == 0)
rep = ep;
else
rep = (struct direct *)((char *)ep + ep->d_reclen);
@@ -1226,7 +1215,7 @@
* that `ep' is the previous entry when dp->i_count != 0.
*/
if (dp->i_dirhash != NULL)
- ufsdirhash_remove(dp, rep, dp->i_offset);
+ ufsdirhash_remove(dp, rep, I_OFFSET(dp));
#endif
if (ip && rep->d_ino != ip->i_number)
panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n",
@@ -1240,7 +1229,7 @@
rep->d_type = 0;
rep->d_ino = 0;
- if (dp->i_count != 0) {
+ if (I_COUNT(dp) != 0) {
/*
* Collapse new free space into previous entry.
*/
@@ -1250,8 +1239,8 @@
#ifdef UFS_DIRHASH
if (dp->i_dirhash != NULL)
ufsdirhash_checkblock(dp, (char *)ep -
- ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)),
- rounddown2(dp->i_offset, DIRBLKSIZ));
+ ((I_OFFSET(dp) - I_COUNT(dp)) & (DIRBLKSIZ - 1)),
+ rounddown2(I_OFFSET(dp), DIRBLKSIZ));
#endif
out:
error = 0;
@@ -1313,7 +1302,7 @@
UFS_INODE_SET_FLAG(oip, IN_CHANGE);
}
- error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
+ error = UFS_BLKATOFF(vdp, (off_t)I_OFFSET(dp), (char **)&ep, &bp);
if (error == 0 && ep->d_namlen == 2 && ep->d_name[1] == '.' &&
ep->d_name[0] == '.' && ep->d_ino != oip->i_number) {
brelse(bp);
@@ -1522,3 +1511,115 @@
vput(vp);
return (error);
}
+
+#ifdef DIAGNOSTIC
+static void
+ufs_assert_inode_offset_owner(struct inode *ip, struct iown_tracker *tr,
+ const char *name, const char *file, int line)
+{
+ char msg[128];
+
+ snprintf(msg, sizeof(msg), "at %s@%d", file, line);
+ ASSERT_VOP_ELOCKED(ITOV(ip), msg);
+ MPASS((ip->i_mode & IFMT) == IFDIR);
+ if (curthread == tr->tr_owner && ip->i_lock_gen == tr->tr_gen)
+ return;
+ printf("locked at\n");
+ stack_print(&tr->tr_st);
+ printf("unlocked at\n");
+ stack_print(&tr->tr_unlock);
+ panic("%s ip %p %jd offset owner %p %d gen %d "
+ "curthread %p %d gen %d at %s@%d\n",
+ name, ip, (uintmax_t)ip->i_number, tr->tr_owner,
+ tr->tr_owner->td_tid, tr->tr_gen,
+ curthread, curthread->td_tid, ip->i_lock_gen,
+ file, line);
+}
+
+static void
+ufs_set_inode_offset_owner(struct inode *ip, struct iown_tracker *tr,
+ const char *file, int line)
+{
+ char msg[128];
+
+ snprintf(msg, sizeof(msg), "at %s@%d", file, line);
+ ASSERT_VOP_ELOCKED(ITOV(ip), msg);
+ MPASS((ip->i_mode & IFMT) == IFDIR);
+ tr->tr_owner = curthread;
+ tr->tr_gen = ip->i_lock_gen;
+ stack_save(&tr->tr_st);
+}
+
+static void
+ufs_init_one_tracker(struct iown_tracker *tr)
+{
+ tr->tr_owner = NULL;
+ stack_zero(&tr->tr_st);
+}
+
+void
+ufs_init_trackers(struct inode *ip)
+{
+ ufs_init_one_tracker(&ip->i_offset_tracker);
+ ufs_init_one_tracker(&ip->i_count_tracker);
+ ufs_init_one_tracker(&ip->i_endoff_tracker);
+}
+
+void
+ufs_unlock_tracker(struct inode *ip)
+{
+ if (ip->i_count_tracker.tr_gen == ip->i_lock_gen)
+ stack_save(&ip->i_count_tracker.tr_unlock);
+ if (ip->i_offset_tracker.tr_gen == ip->i_lock_gen)
+ stack_save(&ip->i_offset_tracker.tr_unlock);
+ if (ip->i_endoff_tracker.tr_gen == ip->i_lock_gen)
+ stack_save(&ip->i_endoff_tracker.tr_unlock);
+ ip->i_lock_gen++;
+}
+
+doff_t
+ufs_get_i_offset(struct inode *ip, const char *file, int line)
+{
+ ufs_assert_inode_offset_owner(ip, &ip->i_offset_tracker, "i_offset",
+ file, line);
+ return (ip->i_offset);
+}
+
+void
+ufs_set_i_offset(struct inode *ip, doff_t off, const char *file, int line)
+{
+ ufs_set_inode_offset_owner(ip, &ip->i_offset_tracker, file, line);
+ ip->i_offset = off;
+}
+
+int32_t
+ufs_get_i_count(struct inode *ip, const char *file, int line)
+{
+ ufs_assert_inode_offset_owner(ip, &ip->i_count_tracker, "i_count",
+ file, line);
+ return (ip->i_count);
+}
+
+void
+ufs_set_i_count(struct inode *ip, int32_t cnt, const char *file, int line)
+{
+ ufs_set_inode_offset_owner(ip, &ip->i_count_tracker, file, line);
+ ip->i_count = cnt;
+}
+
+doff_t
+ufs_get_i_endoff(struct inode *ip, const char *file, int line)
+{
+ ufs_assert_inode_offset_owner(ip, &ip->i_endoff_tracker, "i_endoff",
+ file, line);
+ return (ip->i_endoff);
+}
+
+void
+ufs_set_i_endoff(struct inode *ip, doff_t off, const char *file, int line)
+{
+ ufs_set_inode_offset_owner(ip, &ip->i_endoff_tracker, file, line);
+ ip->i_endoff = off;
+}
+
+#endif
Index: sys/ufs/ufs/ufs_vnops.c
===================================================================
--- sys/ufs/ufs/ufs_vnops.c
+++ sys/ufs/ufs/ufs_vnops.c
@@ -1067,6 +1067,15 @@
if ((cnp->cn_flags & HASBUF) == 0)
panic("ufs_link: no name");
#endif
+
+ if (DOINGSOFTDEP(tdvp)) {
+ error = softdep_prelink(tdvp, vp, true);
+ if (error != 0) {
+ MPASS(error == ERELOOKUP);
+ return (error);
+ }
+ }
+
if (VTOI(tdvp)->i_effnlink < 2) {
print_bad_link_count("ufs_link", tdvp);
error = EINVAL;
@@ -1089,6 +1098,7 @@
error = EPERM;
goto out;
}
+
ip->i_effnlink++;
ip->i_nlink++;
DIP_SET(ip, i_nlink, ip->i_nlink);
@@ -1129,6 +1139,7 @@
struct direct newdir;
int error = 0;
+//XXXKIB error = VOP_FSYNC(dvp, MNT_WAIT);
switch (ap->a_flags) {
case LOOKUP:
/* 4.4 format directories support whiteout operations */
@@ -1338,6 +1349,18 @@
goto relock;
}
}
+
+ if (DOINGSOFTDEP(fdvp)) {
+ error = softdep_prerename(fdvp, fvp, tdvp, tvp);
+ if (error != 0) {
+ if (error == ERELOOKUP) {
+ atomic_add_int(&rename_restarts, 1);
+ goto relock;
+ }
+ goto releout;
+ }
+ }
+
fdp = VTOI(fdvp);
fip = VTOI(fvp);
tdp = VTOI(tdvp);
@@ -1481,9 +1504,9 @@
if (error)
goto bad;
/* Setup tdvp for directory compaction if needed. */
- if (tdp->i_count && tdp->i_endoff &&
- tdp->i_endoff < tdp->i_size)
- endoff = tdp->i_endoff;
+ if (I_COUNT(tdp) != 0 && I_ENDOFF(tdp) != 0 &&
+ I_ENDOFF(tdp) < tdp->i_size)
+ endoff = I_ENDOFF(tdp);
} else {
if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip))
panic("ufs_rename: EXDEV");
@@ -1611,7 +1634,7 @@
} else if (DOINGSUJ(tdvp))
/* Journal must account for each new link. */
softdep_setup_dotdot_link(tdp, fip);
- fip->i_offset = mastertemplate.dot_reclen;
+ SET_I_OFFSET(fip, mastertemplate.dot_reclen);
ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
cache_purge(fdvp);
}
@@ -1649,8 +1672,10 @@
* are no longer needed.
*/
if (error == 0 && endoff != 0) {
- error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
- (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
+ do {
+ error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
+ (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
+ } while (error == ERELOOKUP);
if (error != 0 && !ffs_fsfail_cleanup(VFSTOUFS(mp), error))
vn_printf(tdvp,
"ufs_rename: failed to truncate, error %d\n",
@@ -1668,8 +1693,11 @@
*/
error = 0;
}
- if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
- error = VOP_FSYNC(tdvp, MNT_WAIT, td);
+ if (error == 0 && tdp->i_flag & IN_NEEDSYNC) {
+ do {
+ error = VOP_FSYNC(tdvp, MNT_WAIT, td);
+ } while (error == ERELOOKUP);
+ }
vput(tdvp);
return (error);
@@ -1918,6 +1946,7 @@
}
dmode = vap->va_mode & 0777;
dmode |= IFDIR;
+
/*
* Must simulate part of ufs_makeinode here to acquire the inode,
* but not have it entered in the parent directory. The entry is
@@ -1928,6 +1957,15 @@
error = EINVAL;
goto out;
}
+
+ if (DOINGSOFTDEP(dvp)) {
+ error = softdep_prelink(dvp, NULL, true);
+ if (error != 0) {
+ MPASS(error == ERELOOKUP);
+ return (error);
+ }
+ }
+
error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
if (error)
goto out;
@@ -2184,6 +2222,14 @@
error = EINVAL;
goto out;
}
+ if (DOINGSOFTDEP(dvp)) {
+ error = softdep_prelink(dvp, vp, false);
+ if (error != 0) {
+ MPASS(error == ERELOOKUP);
+ return (error);
+ }
+ }
+
#ifdef UFS_GJOURNAL
ufs_gjournal_orphan(vp);
#endif
@@ -2704,6 +2750,13 @@
print_bad_link_count(callfunc, dvp);
return (EINVAL);
}
+ if (DOINGSOFTDEP(dvp)) {
+ error = softdep_prelink(dvp, NULL, true);
+ if (error != 0) {
+ MPASS(error == ERELOOKUP);
+ return (error);
+ }
+ }
error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
if (error)
return (error);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 15, 6:26 PM (19 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28751963
Default Alt Text
D26136.id78375.diff (51 KB)
Attached To
Mode
D26136: Handle LoR in flush_pagedep_deps().
Attached
Detach File
Event Timeline
Log In to Comment