Changeset View
Standalone View
sys/ufs/ffs/ffs_softdep.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 615 Lines • ▼ Show 20 Lines | |||||
static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"soft updates stats"); | "soft updates stats"); | ||||
static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, | static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"total dependencies allocated"); | "total dependencies allocated"); | ||||
static SYSCTL_NODE(_debug_softdep, OID_AUTO, highuse, | static SYSCTL_NODE(_debug_softdep, OID_AUTO, highuse, | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"high use dependencies allocated"); | "high use dependencies allocated"); | ||||
static SYSCTL_NODE(_debug_softdep, OID_AUTO, current, | static SYSCTL_NODE(_debug_softdep, OID_AUTO, current, | ||||
mckusick: This is missing the third (int) parameter. | |||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"current dependencies allocated"); | "current dependencies allocated"); | ||||
static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, | static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"current dependencies written"); | "current dependencies written"); | ||||
unsigned long dep_current[D_LAST + 1]; | unsigned long dep_current[D_LAST + 1]; | ||||
unsigned long dep_highuse[D_LAST + 1]; | unsigned long dep_highuse[D_LAST + 1]; | ||||
▲ Show 20 Lines • Show All 750 Lines • ▼ Show 20 Lines | |||||
SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW, | SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW, | ||||
&compute_summary_at_mount, 0, "Recompute summary at mount"); | &compute_summary_at_mount, 0, "Recompute summary at mount"); | ||||
static int print_threads = 0; | static int print_threads = 0; | ||||
SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW, | SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW, | ||||
&print_threads, 0, "Notify flusher thread start/stop"); | &print_threads, 0, "Notify flusher thread start/stop"); | ||||
/* List of all filesystems mounted with soft updates */ | /* List of all filesystems mounted with soft updates */ | ||||
static TAILQ_HEAD(, mount_softdeps) softdepmounts; | static TAILQ_HEAD(, mount_softdeps) softdepmounts; | ||||
static int | |||||
get_parent_vp(struct vnode *vp, struct mount *mp, ino_t inum, | |||||
struct vnode **rvp) | |||||
{ | |||||
int error; | |||||
ASSERT_VOP_ELOCKED(vp, "child vnode must be locked"); | |||||
error = ffs_vgetf(mp, inum, LK_NOWAIT | LK_EXCLUSIVE, rvp, | |||||
Done Inline ActionsI think it's supposed to be "Thus if" markj: I think it's supposed to be "Thus if" | |||||
FFSV_FORCEINSMQ); | |||||
if (error == 0) | |||||
Done Inline Actions"before blocking on the vnode lock" markj: "before blocking on the vnode lock" | |||||
return (0); | |||||
VOP_UNLOCK(vp); | |||||
Done Inline Actionss/function/caller/ perhaps markj: s/function/caller/ perhaps | |||||
error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, rvp, FFSV_FORCEINSMQ); | |||||
MPASS(VTOI(*rvp)->i_mode != 0); | |||||
Done Inline ActionsWhy not break;? markj: Why not `break;`? | |||||
Done Inline ActionsNo point in checking for doomed. kib: No point in checking for doomed. | |||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); | |||||
markjUnsubmitted Done Inline ActionsDo we actually know that inum corresponds to a parent of vp? Can't it be the other way around? markj: Do we actually know that inum corresponds to a parent of vp? Can't it be the other way around? | |||||
kibAuthorUnsubmitted Done Inline ActionsYes, this is the valid question and Peter' report most likely indicates that. I will update the review with the patch I sent to him. kib: Yes, this is the valid question and Peter' report most likely indicates that. I will update… | |||||
Done Inline ActionsShouldn't this parameter be pvp (pvp is the parameter whose mode we are checking)? mckusick: Shouldn't this parameter be pvp (pvp is the parameter whose mode we are checking)? | |||||
if (VN_IS_DOOMED(vp)) { | |||||
if (error == 0) | |||||
vput(*rvp); | |||||
error = ENOENT; | |||||
} | |||||
return (error); | |||||
} | |||||
Done Inline ActionsWhy is the assertion only needed in the fallback case? I had to look up the log message for r356126 to understand the assertion. A KASSERT() with a message would be a bit nicer IMO, maybe "vnode %p has pending deactivation", assuming I understood correctly. markj: Why is the assertion only needed in the fallback case?
I had to look up the log message for… | |||||
Done Inline ActionsThis function needs a comment. Here is a first cut assuming I understand its purpose: /* * This function fetches inode inum on mount point mp. As we * already hold a locked buffer, we must not block on acquiring * the new inode lock as we will get into a lock-order reversal * with the buffer lock and possibly get a deadlock. Thus we * must unlock the buffer before doing a blocking lock for the * inode. We return ERESTART if we have had to unlock the buffer * so that the function can reassess its state. */ mckusick: This function needs a comment. Here is a first cut assuming I understand its purpose:
```
/*
*… | |||||
/* | /* | ||||
* This function cleans the worklist for a filesystem. | * This function cleans the worklist for a filesystem. | ||||
* Each filesystem running with soft dependencies gets its own | * Each filesystem running with soft dependencies gets its own | ||||
* thread to run in this function. The thread is started up in | * thread to run in this function. The thread is started up in | ||||
* softdep_mount and shutdown in softdep_unmount. They show up | * softdep_mount and shutdown in softdep_unmount. They show up | ||||
* as part of the kernel "bufdaemon" process whose process | * as part of the kernel "bufdaemon" process whose process | ||||
* entry is available in bufdaemonproc. | * entry is available in bufdaemonproc. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 1,689 Lines • ▼ Show 20 Lines | if (journal_space(ump, 0) == 0) { | ||||
softdep_speedup(ump); | softdep_speedup(ump); | ||||
if (journal_space(ump, 1) == 0) | if (journal_space(ump, 1) == 0) | ||||
journal_suspend(ump); | journal_suspend(ump); | ||||
} | } | ||||
FREE_LOCK(ump); | FREE_LOCK(ump); | ||||
return (0); | return (0); | ||||
} | } | ||||
Done Inline ActionsIt would be useful to have a comment describing what this function does. mckusick: It would be useful to have a comment describing what this function does. | |||||
Done Inline ActionsThis is just a helper for softdep_prerename(). kib: This is just a helper for softdep_prerename(). | |||||
/* | /* | ||||
* Before adjusting a link count on a vnode verify that we have sufficient | * Before adjusting a link count on a vnode verify that we have sufficient | ||||
* journal space. If not, process operations that depend on the currently | * journal space. If not, process operations that depend on the currently | ||||
* locked pair of vnodes to try to flush space as the syncer, buf daemon, | * locked pair of vnodes to try to flush space as the syncer, buf daemon, | ||||
* and softdep flush threads can not acquire these locks to reclaim space. | * and softdep flush threads can not acquire these locks to reclaim space. | ||||
*/ | */ | ||||
static void | static void | ||||
Done Inline Actions"prehandle" is a pretty generic name. If it's supposed to be a softdep_prerename() helper, maybe softdep_prerename_vnode()? markj: "prehandle" is a pretty generic name. If it's supposed to be a softdep_prerename() helper… | |||||
Done Inline ActionsI considered to use it in softdep_prelink() as well, but not now. kib: I considered to use it in softdep_prelink() as well, but not now. | |||||
softdep_prelink(dvp, vp) | softdep_prelink(dvp, vp) | ||||
struct vnode *dvp; | struct vnode *dvp; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
{ | { | ||||
Done Inline ActionsIs it int on purpose? Some callers are using bool literals. markj: Is it `int` on purpose? Some callers are using bool literals. | |||||
Done Inline ActionsK&R definition + bool look strange to me. kib: K&R definition + bool look strange to me. | |||||
struct ufsmount *ump; | struct ufsmount *ump; | ||||
ump = VFSTOUFS(dvp->v_mount); | ump = VFSTOUFS(dvp->v_mount); | ||||
LOCK_OWNED(ump); | LOCK_OWNED(ump); | ||||
/* | /* | ||||
* Nothing to do if we have sufficient journal space. | * Nothing to do if we have sufficient journal space. | ||||
* If we currently hold the snapshot lock, we must avoid | * If we currently hold the snapshot lock, we must avoid | ||||
* handling other resources that could cause deadlock. | * handling other resources that could cause deadlock. | ||||
*/ | */ | ||||
Done Inline ActionsIt would be useful to have a comment describing what this function does. Presumably similar to the one for the next function below (softdep_prelink()). mckusick: It would be useful to have a comment describing what this function does. Presumably similar to… | |||||
Done Inline ActionsI did it, and also updated comment for softdep_prelink() which now follows different protocol and must be called earlier in VOPs, before any mutation is done. But it is premature to document this work IMO, it is still not finished from the code PoV. Peter reported at least one more unfixed bug. kib: I did it, and also updated comment for softdep_prelink() which now follows different protocol… | |||||
if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp)))) | if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp)))) | ||||
return; | return; | ||||
stat_journal_low++; | stat_journal_low++; | ||||
FREE_LOCK(ump); | FREE_LOCK(ump); | ||||
if (vp) | if (vp) | ||||
ffs_syncvnode(vp, MNT_NOWAIT, 0); | ffs_syncvnode(vp, MNT_NOWAIT, 0); | ||||
ffs_syncvnode(dvp, MNT_WAIT, 0); | ffs_syncvnode(dvp, MNT_WAIT, 0); | ||||
Done Inline ActionsThese three lines are the reason why the current patch does not work in +J case. Really, we cannot call ffs_syncvnode() this way. There are more issues in -J case as well, but I did not analyzed it completely. kib: These three lines are the reason why the current patch does not work in +J case. Really, we… | |||||
ACQUIRE_LOCK(ump); | ACQUIRE_LOCK(ump); | ||||
/* Process vp before dvp as it may create .. removes. */ | /* Process vp before dvp as it may create .. removes. */ | ||||
if (vp) { | if (vp) { | ||||
process_removes(vp); | process_removes(vp); | ||||
process_truncates(vp); | process_truncates(vp); | ||||
} | } | ||||
process_removes(dvp); | process_removes(dvp); | ||||
process_truncates(dvp); | process_truncates(dvp); | ||||
▲ Show 20 Lines • Show All 9,491 Lines • ▼ Show 20 Lines | for (error = 0, flushparent = 0; ; ) { | ||||
* We prevent deadlock by always fetching inodes from the | * We prevent deadlock by always fetching inodes from the | ||||
* root, moving down the directory tree. Thus, when fetching | * root, moving down the directory tree. Thus, when fetching | ||||
* our parent directory, we first try to get the lock. If | * our parent directory, we first try to get the lock. If | ||||
* that fails, we must unlock ourselves before requesting | * that fails, we must unlock ourselves before requesting | ||||
* the lock on our parent. See the comment in ufs_lookup | * the lock on our parent. See the comment in ufs_lookup | ||||
* for details on possible races. | * for details on possible races. | ||||
*/ | */ | ||||
FREE_LOCK(ump); | FREE_LOCK(ump); | ||||
if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp, | error = get_parent_vp(vp, mp, parentino, &pvp); | ||||
FFSV_FORCEINSMQ)) { | |||||
/* | |||||
* Unmount cannot proceed after unlock because | |||||
* caller must have called vn_start_write(). | |||||
*/ | |||||
VOP_UNLOCK(vp); | |||||
error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE, | |||||
&pvp, FFSV_FORCEINSMQ); | |||||
MPASS(VTOI(pvp)->i_mode != 0); | |||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); | |||||
if (VN_IS_DOOMED(vp)) { | |||||
if (error == 0) | |||||
vput(pvp); | |||||
error = ENOENT; | |||||
} | |||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
Done Inline ActionsWhy can't we just use vn_vget_ino() here? mckusick: Why can't we just use vn_vget_ino() here? | |||||
Done Inline ActionsNo, unfortunately not. Issue is that the recorded ino might be no longer the parent of the vp. The example which started this patch actually flips parent and child around. We have to assume that two nodes are unrelated and engage into the algorithm which never sleeps with either of vnode locked. kib: No, unfortunately not. Issue is that the recorded ino might be no longer the parent of the vp. | |||||
} | |||||
/* | /* | ||||
* All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps | * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps | ||||
* that are contained in direct blocks will be resolved by | * that are contained in direct blocks will be resolved by | ||||
* doing a ffs_update. Pagedeps contained in indirect blocks | * doing a ffs_update. Pagedeps contained in indirect blocks | ||||
* may require a complete sync'ing of the directory. So, we | * may require a complete sync'ing of the directory. So, we | ||||
* try the cheap and fast ffs_update first, and if that fails, | * try the cheap and fast ffs_update first, and if that fails, | ||||
* then we do the slower ffs_syncvnode of the directory. | * then we do the slower ffs_syncvnode of the directory. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 614 Lines • ▼ Show 20 Lines | TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) { | ||||
if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) | if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY)) | ||||
== DEPCOMPLETE) { | == DEPCOMPLETE) { | ||||
jwait(&inoref->if_list, MNT_WAIT); | jwait(&inoref->if_list, MNT_WAIT); | ||||
goto restart; | goto restart; | ||||
} | } | ||||
} | } | ||||
if (dap->da_state & MKDIR_BODY) { | if (dap->da_state & MKDIR_BODY) { | ||||
FREE_LOCK(ump); | FREE_LOCK(ump); | ||||
if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp, | error = get_parent_vp(pvp, mp, inum, &vp); | ||||
FFSV_FORCEINSMQ))) | if (error != 0) | ||||
break; | break; | ||||
MPASS(VTOI(vp)->i_mode != 0); | |||||
error = flush_newblk_dep(vp, mp, 0); | error = flush_newblk_dep(vp, mp, 0); | ||||
/* | /* | ||||
* If we still have the dependency we might need to | * If we still have the dependency we might need to | ||||
* update the vnode to sync the new link count to | * update the vnode to sync the new link count to | ||||
* disk. | * disk. | ||||
*/ | */ | ||||
if (error == 0 && dap == LIST_FIRST(diraddhdp)) | if (error == 0 && dap == LIST_FIRST(diraddhdp)) | ||||
error = ffs_update(vp, 1); | error = ffs_update(vp, 1); | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | retry: | ||||
} | } | ||||
/* | /* | ||||
* If the inode is still sitting in a buffer waiting | * If the inode is still sitting in a buffer waiting | ||||
* to be written or waiting for the link count to be | * to be written or waiting for the link count to be | ||||
* adjusted update it here to flush it to disk. | * adjusted update it here to flush it to disk. | ||||
*/ | */ | ||||
if (dap == LIST_FIRST(diraddhdp)) { | if (dap == LIST_FIRST(diraddhdp)) { | ||||
FREE_LOCK(ump); | FREE_LOCK(ump); | ||||
if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp, | error = get_parent_vp(pvp, mp, inum, &vp); | ||||
FFSV_FORCEINSMQ))) | if (error) | ||||
break; | break; | ||||
MPASS(VTOI(vp)->i_mode != 0); | |||||
error = ffs_update(vp, 1); | error = ffs_update(vp, 1); | ||||
vput(vp); | vput(vp); | ||||
if (error) | if (error) | ||||
break; | break; | ||||
ACQUIRE_LOCK(ump); | ACQUIRE_LOCK(ump); | ||||
} | } | ||||
/* | /* | ||||
* If we have failed to get rid of all the dependencies | * If we have failed to get rid of all the dependencies | ||||
▲ Show 20 Lines • Show All 1,469 Lines • Show Last 20 Lines |
This is missing the third (int) parameter.