Changeset View
Standalone View
sys/kern/vfs_subr.c
Show First 20 Lines • Show All 333 Lines • ▼ Show 20 Lines | SYSCTL_PROC(_kern, KERN_MAXVNODES, maxvnodes, | ||||
CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &desiredvnodes, 0, | CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RW, &desiredvnodes, 0, | ||||
sysctl_update_desiredvnodes, "I", "Target for maximum number of vnodes"); | sysctl_update_desiredvnodes, "I", "Target for maximum number of vnodes"); | ||||
SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, | SYSCTL_ULONG(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, | ||||
&wantfreevnodes, 0, "Old name for vfs.wantfreevnodes (legacy)"); | &wantfreevnodes, 0, "Old name for vfs.wantfreevnodes (legacy)"); | ||||
static int vnlru_nowhere; | static int vnlru_nowhere; | ||||
SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, | SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, | ||||
&vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); | &vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); | ||||
#ifdef INVARIANTS | |||||
kib: Why only under INVARIANTS ? | |||||
Done Inline ActionsBecause it's strictly a facility for testing. It probably isn't even useful for debugging. asomers: Because it's strictly a facility for testing. It probably isn't even useful for debugging. | |||||
Done Inline ActionsIt might be useful for debugging, I did not tried it to state this definitely. Currently we achieve the same effect with umount -f. kib: It might be useful for debugging, I did not tried it to state this definitely. Currently we… | |||||
Done Inline ActionsOk, I'll enable it unconditionally. asomers: Ok, I'll enable it unconditionally. | |||||
static int | |||||
sysctl_try_reclaim_vnode(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
struct vnode *vp; | |||||
struct nameidata nd; | |||||
char *buf; | |||||
Done Inline ActionsWe do not allocate so large buffers on stack. kib: We do not allocate so large buffers on stack. | |||||
unsigned long ndflags; | |||||
int error; | |||||
Not Done Inline ActionsMaybe priv check for root here? cem: Maybe priv check for root here? | |||||
Done Inline ActionsSince this is a sysctl handler, the thread must be privileged. asomers: Since this is a sysctl handler, the thread must be privileged. | |||||
if (req->newptr == NULL) | |||||
return (EINVAL); | |||||
Not Done Inline ActionsI do not see a need in this blank line. kib: I do not see a need in this blank line. | |||||
Not Done Inline ActionsI think this is off-by-one since you zero byte at req->newlen. You must check some privilege to do the operation. kib: I think this is off-by-one since you zero byte at req->newlen.
You must check some privilege… | |||||
Done Inline ActionsI left out a privilege check because only root has the ability to write to sysctls. Or is there some case where non-root users can do it too? asomers: I left out a privilege check because only root has the ability to write to sysctls. Or is… | |||||
if (req->newlen > PATH_MAX) | |||||
return (E2BIG); | |||||
buf = malloc(PATH_MAX + 1, M_TEMP, M_WAITOK); | |||||
error = SYSCTL_IN(req, buf, req->newlen); | |||||
if (error != 0) | |||||
goto out; | |||||
Done Inline ActionsThis overflows the buffer when req->newlen == sizeof(buf). cem: This overflows the buffer when `req->newlen == sizeof(buf)`. | |||||
buf[req->newlen] = '\0'; | |||||
Done Inline ActionsIndent should be +4 spaces. Previous line is under-filled. kib: Indent should be +4 spaces. Previous line is under-filled. | |||||
Not Done Inline ActionsYou can probably use LOCKSHARED for the lookup to avoid invalidating / blocking on any existing shared holders in the case that the vnode is active. cem: You can probably use LOCKSHARED for the lookup to avoid invalidating / blocking on any existing… | |||||
Done Inline ActionsBut the vnode needs to be locked exclusively for VOP_RECLAIM. asomers: But the vnode needs to be locked exclusively for `VOP_RECLAIM`. | |||||
Done Inline ActionsYou can upgrade the lock after performing usecount check. This avoids unnecessarily xlocking vnodes the routine isn’t going to act on anyway. cem: You can upgrade the lock after performing usecount check. This avoids unnecessarily xlocking… | |||||
Done Inline ActionsI'm removing the usecount check at kib's suggestion. And the doomed check should very rarely fail. So there will no longer be any reason to do the lookup with a shared lock and then upgrade. asomers: I'm removing the usecount check at kib's suggestion. And the doomed check should very rarely… | |||||
Not Done Inline ActionsYes, the optimization was only valid if we were checking usecount. cem: Yes, the optimization was only valid if we were checking usecount. | |||||
ndflags = LOCKLEAF| NOFOLLOW | AUDITVNODE1 | NOCACHE | SAVENAME; | |||||
NDINIT(&nd, LOOKUP, ndflags, UIO_SYSSPACE, buf, curthread); | |||||
Not Done Inline ActionsNote that vnode can be alive but the path not cached in namecache, so this comment is misleading, unless made more precise. kib: Note that vnode can be alive but the path not cached in namecache, so this comment is… | |||||
if ((error = namei(&nd)) != 0) | |||||
goto out; | |||||
vp = nd.ni_vp; | |||||
Done Inline ActionsThe latter two can be shortened to NDF_NO_VP_PUT. cem: The latter two can be shortened to `NDF_NO_VP_PUT`. | |||||
if ((vp->v_iflag & VI_DOOMED) != 0) { | |||||
Not Done Inline ActionsWhy disallowing v_usecount > 1 ? I think for this tool to be useful, it needs to allow reclaiming open vnodes. Result of '&' is not boolean, you should use != 0. kib: Why disallowing v_usecount > 1 ? I think for this tool to be useful, it needs to allow… | |||||
Done Inline ActionsMy intent was to simulate what the vnlru thread might do anyway. But I can remove that restriction if you think it would be useful. asomers: My intent was to simulate what the vnlru thread might do anyway. But I can remove that… | |||||
Not Done Inline ActionsYes, it would be more useful if there is no such restriction, see above about forced umount. kib: Yes, it would be more useful if there is no such restriction, see above about forced umount. | |||||
Not Done Inline ActionsMight be more clear to use atomic_load(vp->v_usecount), but no difference on any freebsd arch. I'd pick different errors for v_usecount>1 <=> doomed cases. cem: Might be more clear to use `atomic_load(vp->v_usecount)`, but no difference on any freebsd arch. | |||||
Done Inline ActionsLike what, EAGAIN? I don't want to return 0, because a test program may want to know whether the sysctl had any effect. asomers: Like what, `EAGAIN`? I don't want to return 0, because a test program may want to know whether… | |||||
Not Done Inline ActionsLike Ebusy for usecount and ebadf for doomed. cem: Like Ebusy for usecount and ebadf for doomed. | |||||
Done Inline ActionsI think that EBADF is not a good choice in this case, since a doomed vnode is one that is already being recycled. That's an argument for returning 0. However, I do think that a test case might want to know whether the sysctl had any effect at all, so I don't want to return 0 in that case. I think EAGAIN is appropriate, because once the doomed vnode is fully reclaimed a subsequent call will succeed (because namei will create a new vnode if necessary). asomers: I think that `EBADF` is not a good choice in this case, since a doomed vnode is one that is… | |||||
Not Done Inline ActionsEBADF is consistent with the spirit / status of existing doomed vnodes, i.e., dead_vnodeops mostly return EBADF. Arguably, there should be a distinct error code between "that fd isn't valid in the fd table" and "the referenced vnode has actually been torn down/revoked under you", but that's outside the scope of this revision. I don't believe EAGAIN makes sense in this context. cem: `EBADF` is consistent with the spirit / status of existing doomed vnodes, i.e., `dead_vnodeops`… | |||||
Done Inline ActionsIt's probably impossible to get into that situation, anyway. Even if the vnode were doomed when the sysctl started, namei would allocate a new one. And it can't become doomed after namei returns since it's locked and referenced. asomers: It's probably impossible to get into that situation, anyway. Even if the vnode were doomed… | |||||
Not Done Inline ActionsSure; hypothetically you could have two APIs here. One that takes a path and does namei, and another that takes an fd. Both are interesting cases. The fd case could be DOOMED, although you're correct that the current lookup LOCKLEAF cannot succeed and return a doomed vnode (AFAIK). cem: Sure; hypothetically you could have two APIs here. One that takes a path and does namei, and… | |||||
error = EBUSY; | |||||
goto putvnode; | |||||
} | |||||
vhold(vp); | |||||
Not Done Inline ActionsWhat is the reason to vhold() the vnode for which you already bumped usecount ? kib: What is the reason to vhold() the vnode for which you already bumped usecount ? | |||||
Done Inline ActionsBecause that's what devfs_revoke and vlrureclaim do. Are they wrong? Or am I missing something? asomers: Because that's what `devfs_revoke` and `vlrureclaim` do. Are they wrong? Or am I missing… | |||||
Not Done Inline ActionsYou already took the use count on the vnode, which implies hold count. For vlrureclaim, the function tries hard not to bump usecount to not unfree the vnode. So it needs a hold reference at least. For devfs, devfs_close() unlocks the vnode, again without extra ref, so the vhold() prevent race with a parallel reclaim. Nothing of that is relevant after vget() inside namei(). You own both refs. kib: You already took the use count on the vnode, which implies hold count.
For vlrureclaim, the… | |||||
Done Inline ActionsDo you mean devfs_revoke? It doesn't unlock the vnode until after the vhold/vdrop, so how is that relevant? asomers: Do you mean `devfs_revoke`? It doesn't unlock the vnode until after the `vhold`/`vdrop`, so… | |||||
Done Inline ActionsNo, I mean devfs_close(). vgone() calls VOP_CLOSE() when needed. vgone() might also unlock the vnode if there are layered mounts over the vnode' mp like nullfs, but it is not typical for devfs. kib: No, I mean devfs_close(). vgone() calls VOP_CLOSE() when needed. vgone() might also unlock… | |||||
counter_u64_add(recycles_count, 1); | |||||
vgone(vp); | |||||
vdrop(vp); | |||||
putvnode: | |||||
NDFREE(&nd, 0); | |||||
out: | |||||
free(buf, M_TEMP); | |||||
return (error); | |||||
} | |||||
Not Done Inline ActionsI do not see how error can be non-zero here. kib: I do not see how error can be non-zero here. | |||||
Done Inline ActionsOops. That's leftover from the LK_UPGRADE that I briefly added, but then removed. asomers: Oops. That's leftover from the `LK_UPGRADE` that I briefly added, but then removed. | |||||
SYSCTL_PROC(_debug, OID_AUTO, try_reclaim_vnode, | |||||
CTLTYPE_STRING | CTLFLAG_MPSAFE | CTLFLAG_WR, NULL, 0, | |||||
sysctl_try_reclaim_vnode, "A", "Try to reclaim a vnode by its pathname"); | |||||
#endif /* INVARIANTS */ | |||||
/* Shift count for (uintptr_t)vp to initialize vp->v_hash. */ | /* Shift count for (uintptr_t)vp to initialize vp->v_hash. */ | ||||
static int vnsz2log; | static int vnsz2log; | ||||
/* | /* | ||||
* Support for the bufobj clean & dirty pctrie. | * Support for the bufobj clean & dirty pctrie. | ||||
Not Done Inline ActionsDo not use initialization in local's declaration. kib: Do not use initialization in local's declaration. | |||||
*/ | */ | ||||
static void * | static void * | ||||
buf_trie_alloc(struct pctrie *ptree) | buf_trie_alloc(struct pctrie *ptree) | ||||
{ | { | ||||
return uma_zalloc(buf_trie_zone, M_NOWAIT); | return uma_zalloc(buf_trie_zone, M_NOWAIT); | ||||
} | } | ||||
Done Inline ActionsFor me, ioctl_rights check does not sound right. There is no specific rights which would match this operation, of course, but IMO fcntl or unlink are somewhat closer in spirit to what is done. kib: For me, ioctl_rights check does not sound right. There is no specific rights which would match… | |||||
Done Inline ActionsI don't think that unlink rights are correct, because unlink affects the data on disk. I'll use fcntl. asomers: I don't think that unlink rights are correct, because unlink affects the data on disk. I'll… | |||||
static void | static void | ||||
buf_trie_free(struct pctrie *ptree, void *node) | buf_trie_free(struct pctrie *ptree, void *node) | ||||
{ | { | ||||
uma_zfree(buf_trie_zone, node); | uma_zfree(buf_trie_zone, node); | ||||
Done Inline ActionsThe flag must be checked under the vnode lock. kib: The flag must be checked under the vnode lock. | |||||
} | } | ||||
PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); | PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free); | ||||
/* | /* | ||||
* Initialize the vnode management data structures. | * Initialize the vnode management data structures. | ||||
* | * | ||||
Done Inline ActionsIf you not pass LK_RETRY, then vn_lock() returns error for doomed vnode, which removes the need to manually check for VI_DOOMED. kib: If you not pass LK_RETRY, then vn_lock() returns error for doomed vnode, which removes the need… | |||||
* Reevaluate the following cap on the number of vnodes after the physical | * Reevaluate the following cap on the number of vnodes after the physical | ||||
* memory size exceeds 512GB. In the limit, as the physical memory size | * memory size exceeds 512GB. In the limit, as the physical memory size | ||||
* grows, the ratio of the memory size in KB to vnodes approaches 64:1. | * grows, the ratio of the memory size in KB to vnodes approaches 64:1. | ||||
*/ | */ | ||||
#ifndef MAXVNODES_MAX | #ifndef MAXVNODES_MAX | ||||
#define MAXVNODES_MAX (512 * 1024 * 1024 / 64) /* 8M */ | #define MAXVNODES_MAX (512 * 1024 * 1024 / 64) /* 8M */ | ||||
#endif | #endif | ||||
▲ Show 20 Lines • Show All 5,198 Lines • Show Last 20 Lines |
Why only under INVARIANTS ?