Changeset View
Changeset View
Standalone View
Standalone View
head/sys/kern/vfs_subr.c
Show First 20 Lines • Show All 1,269 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
mtx_assert(&vnode_list_mtx, MA_OWNED); | mtx_assert(&vnode_list_mtx, MA_OWNED); | ||||
gapvnodes = imax(desiredvnodes - wantfreevnodes, 100); | gapvnodes = imax(desiredvnodes - wantfreevnodes, 100); | ||||
vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */ | vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */ | ||||
vlowat = vhiwat / 2; | vlowat = vhiwat / 2; | ||||
} | } | ||||
/* XXX some names and initialization are bad for limits and watermarks. */ | |||||
static int | |||||
vspace(void) | |||||
{ | |||||
u_long rnumvnodes, rfreevnodes; | |||||
int space; | |||||
rnumvnodes = atomic_load_long(&numvnodes); | |||||
rfreevnodes = atomic_load_long(&freevnodes); | |||||
if (rnumvnodes > desiredvnodes) | |||||
return (0); | |||||
space = desiredvnodes - rnumvnodes; | |||||
if (freevnodes > wantfreevnodes) | |||||
space += rfreevnodes - wantfreevnodes; | |||||
return (space); | |||||
} | |||||
/* | /* | ||||
* Attempt to recycle vnodes in a context that is always safe to block. | * Attempt to recycle vnodes in a context that is always safe to block. | ||||
* Calling vlrurecycle() from the bowels of filesystem code has some | * Calling vlrurecycle() from the bowels of filesystem code has some | ||||
* interesting deadlock problems. | * interesting deadlock problems. | ||||
*/ | */ | ||||
static struct proc *vnlruproc; | static struct proc *vnlruproc; | ||||
static int vnlruproc_sig; | static int vnlruproc_sig; | ||||
static bool | |||||
vnlru_under(u_long rnumvnodes, u_long limit) | |||||
{ | |||||
u_long rfreevnodes, space; | |||||
if (__predict_false(rnumvnodes > desiredvnodes)) | |||||
return (true); | |||||
space = desiredvnodes - rnumvnodes; | |||||
if (space < limit) { | |||||
rfreevnodes = atomic_load_long(&freevnodes); | |||||
if (rfreevnodes > wantfreevnodes) | |||||
space += rfreevnodes - wantfreevnodes; | |||||
} | |||||
return (space < limit); | |||||
} | |||||
static void | static void | ||||
vnlru_kick(void) | |||||
{ | |||||
mtx_assert(&vnode_list_mtx, MA_OWNED); | |||||
if (vnlruproc_sig == 0) { | |||||
vnlruproc_sig = 1; | |||||
wakeup(vnlruproc); | |||||
} | |||||
} | |||||
static void | |||||
vnlru_proc(void) | vnlru_proc(void) | ||||
{ | { | ||||
u_long rnumvnodes, rfreevnodes, target; | u_long rnumvnodes, rfreevnodes, target; | ||||
unsigned long onumvnodes; | unsigned long onumvnodes; | ||||
int done, force, trigger, usevnodes, vsp; | int done, force, trigger, usevnodes; | ||||
bool reclaim_nc_src; | bool reclaim_nc_src; | ||||
EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc, | EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc, | ||||
SHUTDOWN_PRI_FIRST); | SHUTDOWN_PRI_FIRST); | ||||
force = 0; | force = 0; | ||||
for (;;) { | for (;;) { | ||||
kproc_suspend_check(vnlruproc); | kproc_suspend_check(vnlruproc); | ||||
mtx_lock(&vnode_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
rnumvnodes = atomic_load_long(&numvnodes); | rnumvnodes = atomic_load_long(&numvnodes); | ||||
/* | /* | ||||
* If numvnodes is too large (due to desiredvnodes being | * If numvnodes is too large (due to desiredvnodes being | ||||
* adjusted using its sysctl, or emergency growth), first | * adjusted using its sysctl, or emergency growth), first | ||||
* try to reduce it by discarding from the free list. | * try to reduce it by discarding from the free list. | ||||
*/ | */ | ||||
if (rnumvnodes > desiredvnodes) | if (rnumvnodes > desiredvnodes) { | ||||
vnlru_free_locked(rnumvnodes - desiredvnodes, NULL); | vnlru_free_locked(rnumvnodes - desiredvnodes, NULL); | ||||
rnumvnodes = atomic_load_long(&numvnodes); | |||||
} | |||||
/* | /* | ||||
* Sleep if the vnode cache is in a good state. This is | * Sleep if the vnode cache is in a good state. This is | ||||
* when it is not over-full and has space for about a 4% | * when it is not over-full and has space for about a 4% | ||||
* or 9% expansion (by growing its size or inexcessively | * or 9% expansion (by growing its size or inexcessively | ||||
* reducing its free list). Otherwise, try to reclaim | * reducing its free list). Otherwise, try to reclaim | ||||
* space for a 10% expansion. | * space for a 10% expansion. | ||||
*/ | */ | ||||
if (vstir && force == 0) { | if (vstir && force == 0) { | ||||
force = 1; | force = 1; | ||||
vstir = 0; | vstir = 0; | ||||
} | } | ||||
vsp = vspace(); | if (force == 0 && !vnlru_under(rnumvnodes, vlowat)) { | ||||
if (vsp >= vlowat && force == 0) { | |||||
vnlruproc_sig = 0; | vnlruproc_sig = 0; | ||||
wakeup(&vnlruproc_sig); | wakeup(&vnlruproc_sig); | ||||
msleep(vnlruproc, &vnode_list_mtx, | msleep(vnlruproc, &vnode_list_mtx, | ||||
PVFS|PDROP, "vlruwt", hz); | PVFS|PDROP, "vlruwt", hz); | ||||
continue; | continue; | ||||
} | } | ||||
rfreevnodes = atomic_load_long(&freevnodes); | rfreevnodes = atomic_load_long(&freevnodes); | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | if (done == 0) { | ||||
vnlru_nowhere++; | vnlru_nowhere++; | ||||
tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); | tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); | ||||
} else | } else | ||||
kern_yield(PRI_USER); | kern_yield(PRI_USER); | ||||
/* | /* | ||||
* After becoming active to expand above low water, keep | * After becoming active to expand above low water, keep | ||||
* active until above high water. | * active until above high water. | ||||
*/ | */ | ||||
vsp = vspace(); | force = vnlru_under(numvnodes, vhiwat) ? 1 : 0; | ||||
force = vsp < vhiwat; | |||||
} | } | ||||
} | } | ||||
static struct kproc_desc vnlru_kp = { | static struct kproc_desc vnlru_kp = { | ||||
"vnlru", | "vnlru", | ||||
vnlru_proc, | vnlru_proc, | ||||
&vnlruproc | &vnlruproc | ||||
}; | }; | ||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | if (!VN_IS_DOOMED(vp)) { | ||||
vgonel(vp); | vgonel(vp); | ||||
} | } | ||||
VOP_UNLOCK(vp); | VOP_UNLOCK(vp); | ||||
VI_UNLOCK(vp); | VI_UNLOCK(vp); | ||||
vn_finished_write(vnmp); | vn_finished_write(vnmp); | ||||
return (0); | return (0); | ||||
} | } | ||||
static void | |||||
vcheckspace(void) | |||||
{ | |||||
int vsp; | |||||
vsp = vspace(); | |||||
if (vsp < vlowat && vnlruproc_sig == 0) { | |||||
vnlruproc_sig = 1; | |||||
wakeup(vnlruproc); | |||||
} | |||||
} | |||||
/* | /* | ||||
* Wait if necessary for space for a new vnode. | * Allocate a new vnode. | ||||
* | |||||
* The operation never returns an error. Returning an error was disabled | |||||
* in r145385 (dated 2005) with the following comment: | |||||
* | |||||
* XXX Not all VFS_VGET/ffs_vget callers check returns. | |||||
* | |||||
* Given the age of this commit (almost 15 years at the time of writing this | |||||
* comment) restoring the ability to fail requires a significant audit of | |||||
* all codepaths. | |||||
* | |||||
* The routine can try to free a vnode or stall for up to 1 second waiting for | |||||
* vnlru to clear things up, but ultimately always performs a M_WAITOK allocation. | |||||
*/ | */ | ||||
static int | |||||
vn_alloc_wait(int suspended) | |||||
{ | |||||
mtx_assert(&vnode_list_mtx, MA_OWNED); | |||||
if (numvnodes >= desiredvnodes) { | |||||
if (suspended) { | |||||
/* | |||||
* The file system is being suspended. We cannot | |||||
* risk a deadlock here, so allow allocation of | |||||
* another vnode even if this would give too many. | |||||
*/ | |||||
return (0); | |||||
} | |||||
if (vnlruproc_sig == 0) { | |||||
vnlruproc_sig = 1; /* avoid unnecessary wakeups */ | |||||
wakeup(vnlruproc); | |||||
} | |||||
msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, | |||||
"vlruwk", hz); | |||||
} | |||||
/* Post-adjust like the pre-adjust in getnewvnode(). */ | |||||
if (numvnodes + 1 > desiredvnodes && freevnodes > 1) | |||||
vnlru_free_locked(1, NULL); | |||||
return (numvnodes >= desiredvnodes ? ENFILE : 0); | |||||
} | |||||
static struct vnode * | static struct vnode * | ||||
vn_alloc(struct mount *mp) | vn_alloc(struct mount *mp) | ||||
{ | { | ||||
static int cyclecount; | u_long rnumvnodes, rfreevnodes; | ||||
int error __unused; | static u_long cyclecount; | ||||
mtx_lock(&vnode_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
if (numvnodes < desiredvnodes) | rnumvnodes = atomic_load_long(&numvnodes); | ||||
if (rnumvnodes + 1 < desiredvnodes) { | |||||
cyclecount = 0; | cyclecount = 0; | ||||
else if (cyclecount++ >= freevnodes) { | goto alloc; | ||||
} | |||||
rfreevnodes = atomic_load_long(&freevnodes); | |||||
if (cyclecount++ >= rfreevnodes) { | |||||
cyclecount = 0; | cyclecount = 0; | ||||
vstir = 1; | vstir = 1; | ||||
} | } | ||||
/* | /* | ||||
* Grow the vnode cache if it will not be above its target max | * Grow the vnode cache if it will not be above its target max | ||||
* after growing. Otherwise, if the free list is nonempty, try | * after growing. Otherwise, if the free list is nonempty, try | ||||
* to reclaim 1 item from it before growing the cache (possibly | * to reclaim 1 item from it before growing the cache (possibly | ||||
* above its target max if the reclamation failed or is delayed). | * above its target max if the reclamation failed or is delayed). | ||||
* Otherwise, wait for some space. In all cases, schedule | * Otherwise, wait for some space. In all cases, schedule | ||||
* vnlru_proc() if we are getting short of space. The watermarks | * vnlru_proc() if we are getting short of space. The watermarks | ||||
* should be chosen so that we never wait or even reclaim from | * should be chosen so that we never wait or even reclaim from | ||||
* the free list to below its target minimum. | * the free list to below its target minimum. | ||||
*/ | */ | ||||
if (numvnodes + 1 <= desiredvnodes) | if (rfreevnodes > 0) { | ||||
; | |||||
else if (freevnodes > 0) | |||||
vnlru_free_locked(1, NULL); | vnlru_free_locked(1, NULL); | ||||
else { | goto alloc; | ||||
error = vn_alloc_wait(mp != NULL && (mp->mnt_kern_flag & | |||||
MNTK_SUSPEND)); | |||||
#if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ | |||||
if (error != 0) { | |||||
mtx_unlock(&vnode_list_mtx); | |||||
return (error); | |||||
} | } | ||||
#endif | if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) { | ||||
/* | |||||
* Wait for space for a new vnode. | |||||
*/ | |||||
vnlru_kick(); | |||||
msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz); | |||||
if (atomic_load_long(&numvnodes) + 1 > desiredvnodes && | |||||
atomic_load_long(&freevnodes) > 1) | |||||
vnlru_free_locked(1, NULL); | |||||
} | } | ||||
vcheckspace(); | alloc: | ||||
atomic_add_long(&numvnodes, 1); | rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1; | ||||
if (vnlru_under(rnumvnodes, vlowat)) | |||||
vnlru_kick(); | |||||
mtx_unlock(&vnode_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
return (uma_zalloc(vnode_zone, M_WAITOK)); | return (uma_zalloc(vnode_zone, M_WAITOK)); | ||||
} | } | ||||
static void | static void | ||||
vn_free(struct vnode *vp) | vn_free(struct vnode *vp) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 4,722 Lines • Show Last 20 Lines |