Changeset View
Changeset View
Standalone View
Standalone View
sys/kern/vfs_subr.c
Show First 20 Lines • Show All 108 Lines • ▼ Show 20 Lines | |||||
static void v_init_counters(struct vnode *); | static void v_init_counters(struct vnode *); | ||||
static void v_incr_devcount(struct vnode *); | static void v_incr_devcount(struct vnode *); | ||||
static void v_decr_devcount(struct vnode *); | static void v_decr_devcount(struct vnode *); | ||||
static void vgonel(struct vnode *); | static void vgonel(struct vnode *); | ||||
static void vfs_knllock(void *arg); | static void vfs_knllock(void *arg); | ||||
static void vfs_knlunlock(void *arg); | static void vfs_knlunlock(void *arg); | ||||
static void vfs_knl_assert_locked(void *arg); | static void vfs_knl_assert_locked(void *arg); | ||||
static void vfs_knl_assert_unlocked(void *arg); | static void vfs_knl_assert_unlocked(void *arg); | ||||
static void vnlru_return_batches(struct vfsops *mnt_op); | |||||
static void destroy_vpollinfo(struct vpollinfo *vi); | static void destroy_vpollinfo(struct vpollinfo *vi); | ||||
static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, | static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, | ||||
daddr_t startlbn, daddr_t endlbn); | daddr_t startlbn, daddr_t endlbn); | ||||
/* | /* | ||||
* These fences are intended for cases where some synchronization is | * These fences are intended for cases where some synchronization is | ||||
* needed between access of v_iflags and lockless vnode refcount (v_holdcnt | * needed between access of v_iflags and lockless vnode refcount (v_holdcnt | ||||
* and v_usecount) updates. Access to v_iflags is generally synchronized | * and v_usecount) updates. Access to v_iflags is generally synchronized | ||||
Show All 17 Lines | |||||
SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, | SYSCTL_ULONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, | ||||
"Number of vnodes in existence"); | "Number of vnodes in existence"); | ||||
static counter_u64_t vnodes_created; | static counter_u64_t vnodes_created; | ||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, | SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, | ||||
"Number of vnodes created by getnewvnode"); | "Number of vnodes created by getnewvnode"); | ||||
static u_long mnt_free_list_batch = 128; | |||||
SYSCTL_ULONG(_vfs, OID_AUTO, mnt_free_list_batch, CTLFLAG_RW, | |||||
&mnt_free_list_batch, 0, "Limit of vnodes held on mnt's free list"); | |||||
/* | /* | ||||
* Conversion tables for conversion from vnode types to inode formats | * Conversion tables for conversion from vnode types to inode formats | ||||
* and back. | * and back. | ||||
*/ | */ | ||||
enum vtype iftovt_tab[16] = { | enum vtype iftovt_tab[16] = { | ||||
VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, | VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, | ||||
VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON | VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON | ||||
}; | }; | ||||
int vttoif_tab[10] = { | int vttoif_tab[10] = { | ||||
0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, | 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, | ||||
S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT | S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT | ||||
}; | }; | ||||
/* | /* | ||||
* List of vnodes that are ready for recycling. | * List of allocates vnodes in the system. | ||||
*/ | */ | ||||
static TAILQ_HEAD(freelst, vnode) vnode_free_list; | static TAILQ_HEAD(freelst, vnode) vnode_list; | ||||
static struct vnode *vnode_list_marker; | |||||
/* | /* | ||||
* "Free" vnode target. Free vnodes are rarely completely free, but are | * "Free" vnode target. Free vnodes are rarely completely free, but are | ||||
* just ones that are cheap to recycle. Usually they are for files which | * just ones that are cheap to recycle. Usually they are for files which | ||||
* have been stat'd but not read; these usually have inode and namecache | * have been stat'd but not read; these usually have inode and namecache | ||||
* data attached to them. This target is the preferred minimum size of a | * data attached to them. This target is the preferred minimum size of a | ||||
* sub-cache consisting mostly of such files. The system balances the size | * sub-cache consisting mostly of such files. The system balances the size | ||||
* of this sub-cache with its complement to try to prevent either from | * of this sub-cache with its complement to try to prevent either from | ||||
Show All 11 Lines | |||||
* coded as 4% and 9% of the available space higher. These and the default | * coded as 4% and 9% of the available space higher. These and the default | ||||
* of 25% for wantfreevnodes are too large if the memory size is large. | * of 25% for wantfreevnodes are too large if the memory size is large. | ||||
* E.g., 9% of 75% of MAXVNODES is more than 566000 vnodes to reclaim | * E.g., 9% of 75% of MAXVNODES is more than 566000 vnodes to reclaim | ||||
* whenever vnlru_proc() becomes active. | * whenever vnlru_proc() becomes active. | ||||
*/ | */ | ||||
static u_long wantfreevnodes; | static u_long wantfreevnodes; | ||||
SYSCTL_ULONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, | SYSCTL_ULONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, | ||||
&wantfreevnodes, 0, "Target for minimum number of \"free\" vnodes"); | &wantfreevnodes, 0, "Target for minimum number of \"free\" vnodes"); | ||||
static u_long freevnodes; | static u_long __exclusive_cache_line freevnodes; | ||||
SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, | SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, | ||||
&freevnodes, 0, "Number of \"free\" vnodes"); | &freevnodes, 0, "Number of \"free\" vnodes"); | ||||
static counter_u64_t recycles_count; | static counter_u64_t recycles_count; | ||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, | SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, | ||||
"Number of vnodes recycled to meet vnode cache targets"); | "Number of vnodes recycled to meet vnode cache targets"); | ||||
static counter_u64_t recycles_free_count; | static counter_u64_t recycles_free_count; | ||||
Show All 13 Lines | |||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact, | SYSCTL_COUNTER_U64(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact, | ||||
"Number of times inactive processing was deferred"); | "Number of times inactive processing was deferred"); | ||||
/* To keep more than one thread at a time from running vfs_getnewfsid */ | /* To keep more than one thread at a time from running vfs_getnewfsid */ | ||||
static struct mtx mntid_mtx; | static struct mtx mntid_mtx; | ||||
/* | /* | ||||
* Lock for any access to the following: | * Lock for any access to the following: | ||||
* vnode_free_list | * vnode_list | ||||
* numvnodes | * numvnodes | ||||
* freevnodes | * freevnodes | ||||
*/ | */ | ||||
static struct mtx __exclusive_cache_line vnode_free_list_mtx; | static struct mtx __exclusive_cache_line vnode_list_mtx; | ||||
/* Publicly exported FS */ | /* Publicly exported FS */ | ||||
struct nfs_public nfs_pub; | struct nfs_public nfs_pub; | ||||
static uma_zone_t buf_trie_zone; | static uma_zone_t buf_trie_zone; | ||||
/* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ | /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ | ||||
static uma_zone_t vnode_zone; | static uma_zone_t vnode_zone; | ||||
▲ Show 20 Lines • Show All 270 Lines • ▼ Show 20 Lines | vnode_init(void *mem, int size, int flags) | ||||
* Initialize namecache. | * Initialize namecache. | ||||
*/ | */ | ||||
LIST_INIT(&vp->v_cache_src); | LIST_INIT(&vp->v_cache_src); | ||||
TAILQ_INIT(&vp->v_cache_dst); | TAILQ_INIT(&vp->v_cache_dst); | ||||
/* | /* | ||||
* Initialize rangelocks. | * Initialize rangelocks. | ||||
*/ | */ | ||||
rangelock_init(&vp->v_rl); | rangelock_init(&vp->v_rl); | ||||
mtx_lock(&vnode_list_mtx); | |||||
TAILQ_INSERT_BEFORE(vnode_list_marker, vp, v_vnodelist); | |||||
mtx_unlock(&vnode_list_mtx); | |||||
return (0); | return (0); | ||||
} | } | ||||
/* | /* | ||||
* Free a vnode when it is cleared from the zone. | * Free a vnode when it is cleared from the zone. | ||||
*/ | */ | ||||
static void | static void | ||||
vnode_fini(void *mem, int size) | vnode_fini(void *mem, int size) | ||||
{ | { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct bufobj *bo; | struct bufobj *bo; | ||||
vp = mem; | vp = mem; | ||||
mtx_lock(&vnode_list_mtx); | |||||
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); | |||||
mtx_unlock(&vnode_list_mtx); | |||||
rangelock_destroy(&vp->v_rl); | rangelock_destroy(&vp->v_rl); | ||||
lockdestroy(vp->v_vnlock); | lockdestroy(vp->v_vnlock); | ||||
mtx_destroy(&vp->v_interlock); | mtx_destroy(&vp->v_interlock); | ||||
bo = &vp->v_bufobj; | bo = &vp->v_bufobj; | ||||
rw_destroy(BO_LOCKPTR(bo)); | rw_destroy(BO_LOCKPTR(bo)); | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | vntblinit(void *dummy __unused) | ||||
if (desiredvnodes > MAXVNODES_MAX) { | if (desiredvnodes > MAXVNODES_MAX) { | ||||
if (bootverbose) | if (bootverbose) | ||||
printf("Reducing kern.maxvnodes %d -> %d\n", | printf("Reducing kern.maxvnodes %d -> %d\n", | ||||
desiredvnodes, MAXVNODES_MAX); | desiredvnodes, MAXVNODES_MAX); | ||||
desiredvnodes = MAXVNODES_MAX; | desiredvnodes = MAXVNODES_MAX; | ||||
} | } | ||||
wantfreevnodes = desiredvnodes / 4; | wantfreevnodes = desiredvnodes / 4; | ||||
mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); | mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); | ||||
TAILQ_INIT(&vnode_free_list); | TAILQ_INIT(&vnode_list); | ||||
mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); | mtx_init(&vnode_list_mtx, "vnode_list", NULL, MTX_DEF); | ||||
vnode_list_marker = vn_alloc_marker(NULL); | |||||
TAILQ_INSERT_HEAD(&vnode_list, vnode_list_marker, v_vnodelist); | |||||
vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, | vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, | ||||
vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); | vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); | ||||
vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), | vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), | ||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); | NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); | ||||
/* | /* | ||||
* Preallocate enough nodes to support one-per buf so that | * Preallocate enough nodes to support one-per buf so that | ||||
* we can not fail an insert. reassignbuf() callers can not | * we can not fail an insert. reassignbuf() callers can not | ||||
* tolerate the insertion failure. | * tolerate the insertion failure. | ||||
▲ Show 20 Lines • Show All 452 Lines • ▼ Show 20 Lines | while (count != 0 && done < target) { | ||||
/* | /* | ||||
* If it's been deconstructed already, it's still | * If it's been deconstructed already, it's still | ||||
* referenced, or it exceeds the trigger, skip it. | * referenced, or it exceeds the trigger, skip it. | ||||
* Also skip free vnodes. We are trying to make space | * Also skip free vnodes. We are trying to make space | ||||
* to expand the free list, not reduce it. | * to expand the free list, not reduce it. | ||||
*/ | */ | ||||
if (vp->v_usecount || | if (vp->v_usecount || | ||||
(!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || | (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || | ||||
((vp->v_iflag & VI_FREE) != 0) || | vp->v_holdcnt == 0 || | ||||
VN_IS_DOOMED(vp) || (vp->v_object != NULL && | VN_IS_DOOMED(vp) || (vp->v_object != NULL && | ||||
vp->v_object->resident_page_count > trigger)) { | vp->v_object->resident_page_count > trigger)) { | ||||
VI_UNLOCK(vp); | VI_UNLOCK(vp); | ||||
goto next_iter; | goto next_iter; | ||||
} | } | ||||
MNT_IUNLOCK(mp); | MNT_IUNLOCK(mp); | ||||
vholdl(vp); | vholdl(vp); | ||||
if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT)) { | if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT)) { | ||||
▲ Show 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Attempt to reduce the free list by the requested amount. | * Attempt to reduce the free list by the requested amount. | ||||
*/ | */ | ||||
static void | static void | ||||
vnlru_free_locked(int count, struct vfsops *mnt_op) | vnlru_free_locked(int count, struct vfsops *mnt_op) | ||||
{ | { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct mount *mp; | struct mount *mp; | ||||
bool tried_batches; | |||||
tried_batches = false; | mtx_assert(&vnode_list_mtx, MA_OWNED); | ||||
mtx_assert(&vnode_free_list_mtx, MA_OWNED); | |||||
if (count > max_vnlru_free) | if (count > max_vnlru_free) | ||||
count = max_vnlru_free; | count = max_vnlru_free; | ||||
for (; count > 0; count--) { | restart: | ||||
vp = TAILQ_FIRST(&vnode_free_list); | vp = vnode_list_marker; | ||||
/* | while (count > 0) { | ||||
* The list can be modified while the free_list_mtx | vp = TAILQ_NEXT(vp, v_vnodelist); | ||||
* has been dropped and vp could be NULL here. | if (__predict_false(vp == NULL)) { | ||||
*/ | TAILQ_REMOVE(&vnode_list, vnode_list_marker, v_vnodelist); | ||||
if (vp == NULL) { | TAILQ_INSERT_TAIL(&vnode_list, vnode_list_marker, v_vnodelist); | ||||
if (tried_batches) | |||||
break; | break; | ||||
mtx_unlock(&vnode_free_list_mtx); | |||||
vnlru_return_batches(mnt_op); | |||||
tried_batches = true; | |||||
mtx_lock(&vnode_free_list_mtx); | |||||
continue; | |||||
} | } | ||||
if (__predict_false(vp->v_type == VMARKER)) | |||||
continue; | |||||
VNASSERT(vp->v_op != NULL, vp, | |||||
("vnlru_free: vnode already reclaimed.")); | |||||
KASSERT((vp->v_iflag & VI_FREE) != 0, | |||||
("Removing vnode not on freelist")); | |||||
KASSERT((vp->v_iflag & VI_ACTIVE) == 0, | |||||
("Mangling active vnode")); | |||||
TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); | |||||
/* | /* | ||||
* Don't recycle if our vnode is from different type | * Don't recycle if our vnode is from different type | ||||
* of mount point. Note that mp is type-safe, the | * of mount point. Note that mp is type-safe, the | ||||
* check does not reach unmapped address even if | * check does not reach unmapped address even if | ||||
* vnode is reclaimed. | * vnode is reclaimed. | ||||
* Don't recycle if we can't get the interlock without | * Don't recycle if we can't get the interlock without | ||||
* blocking. | * blocking. | ||||
*/ | */ | ||||
if ((mnt_op != NULL && (mp = vp->v_mount) != NULL && | if (vp->v_holdcnt > 0 || (mnt_op != NULL && (mp = vp->v_mount) != NULL && | ||||
mp->mnt_op != mnt_op) || !VI_TRYLOCK(vp)) { | mp->mnt_op != mnt_op) || !VI_TRYLOCK(vp)) { | ||||
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist); | |||||
continue; | continue; | ||||
} | } | ||||
VNASSERT((vp->v_iflag & VI_FREE) != 0 && vp->v_holdcnt == 0, | TAILQ_REMOVE(&vnode_list, vnode_list_marker, v_vnodelist); | ||||
vp, ("vp inconsistent on freelist")); | TAILQ_INSERT_AFTER(&vnode_list, vp, vnode_list_marker, v_vnodelist); | ||||
if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) { | |||||
/* | |||||
* The clear of VI_FREE prevents activation of the | |||||
* vnode. There is no sense in putting the vnode on | |||||
* the mount point active list, only to remove it | |||||
* later during recycling. Inline the relevant part | |||||
* of vholdl(), to avoid triggering assertions or | |||||
* activating. | |||||
*/ | |||||
freevnodes--; | |||||
vp->v_iflag &= ~VI_FREE; | |||||
VNODE_REFCOUNT_FENCE_REL(); | |||||
refcount_acquire(&vp->v_holdcnt); | |||||
mtx_unlock(&vnode_free_list_mtx); | |||||
VI_UNLOCK(vp); | VI_UNLOCK(vp); | ||||
continue; | |||||
} | |||||
vholdl(vp); | |||||
count--; | |||||
mtx_unlock(&vnode_list_mtx); | |||||
VI_UNLOCK(vp); | |||||
vtryrecycle(vp); | vtryrecycle(vp); | ||||
/* | /* | ||||
* If the recycled succeeded this vdrop will actually free | * If the recycled succeeded this vdrop will actually free | ||||
* the vnode. If not it will simply place it back on | * the vnode. If not it will simply place it back on | ||||
* the free list. | * the free list. | ||||
*/ | */ | ||||
vdrop(vp); | vdrop(vp); | ||||
mtx_lock(&vnode_free_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
goto restart; | |||||
} | } | ||||
} | } | ||||
void | void | ||||
vnlru_free(int count, struct vfsops *mnt_op) | vnlru_free(int count, struct vfsops *mnt_op) | ||||
{ | { | ||||
mtx_lock(&vnode_free_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
vnlru_free_locked(count, mnt_op); | vnlru_free_locked(count, mnt_op); | ||||
mtx_unlock(&vnode_free_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
} | } | ||||
/* XXX some names and initialization are bad for limits and watermarks. */ | /* XXX some names and initialization are bad for limits and watermarks. */ | ||||
static int | static int | ||||
vspace(void) | vspace(void) | ||||
{ | { | ||||
u_long rnumvnodes, rfreevnodes; | u_long rnumvnodes, rfreevnodes; | ||||
int space; | int space; | ||||
gapvnodes = imax(desiredvnodes - wantfreevnodes, 100); | gapvnodes = imax(desiredvnodes - wantfreevnodes, 100); | ||||
vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */ | vhiwat = gapvnodes / 11; /* 9% -- just under the 10% in vlrureclaim() */ | ||||
vlowat = vhiwat / 2; | vlowat = vhiwat / 2; | ||||
rnumvnodes = atomic_load_long(&numvnodes); | rnumvnodes = atomic_load_long(&numvnodes); | ||||
rfreevnodes = atomic_load_long(&freevnodes); | rfreevnodes = atomic_load_long(&freevnodes); | ||||
if (rnumvnodes > desiredvnodes) | if (rnumvnodes > desiredvnodes) | ||||
return (0); | return (0); | ||||
space = desiredvnodes - rnumvnodes; | space = desiredvnodes - rnumvnodes; | ||||
if (freevnodes > wantfreevnodes) | if (freevnodes > wantfreevnodes) | ||||
space += rfreevnodes - wantfreevnodes; | space += rfreevnodes - wantfreevnodes; | ||||
return (space); | return (space); | ||||
} | } | ||||
static void | |||||
vnlru_return_batch_locked(struct mount *mp) | |||||
{ | |||||
struct vnode *vp; | |||||
mtx_assert(&mp->mnt_listmtx, MA_OWNED); | |||||
if (mp->mnt_tmpfreevnodelistsize == 0) | |||||
return; | |||||
TAILQ_FOREACH(vp, &mp->mnt_tmpfreevnodelist, v_actfreelist) { | |||||
VNASSERT((vp->v_mflag & VMP_TMPMNTFREELIST) != 0, vp, | |||||
("vnode without VMP_TMPMNTFREELIST on mnt_tmpfreevnodelist")); | |||||
vp->v_mflag &= ~VMP_TMPMNTFREELIST; | |||||
} | |||||
mtx_lock(&vnode_free_list_mtx); | |||||
TAILQ_CONCAT(&vnode_free_list, &mp->mnt_tmpfreevnodelist, v_actfreelist); | |||||
freevnodes += mp->mnt_tmpfreevnodelistsize; | |||||
mtx_unlock(&vnode_free_list_mtx); | |||||
mp->mnt_tmpfreevnodelistsize = 0; | |||||
} | |||||
static void | |||||
vnlru_return_batch(struct mount *mp) | |||||
{ | |||||
mtx_lock(&mp->mnt_listmtx); | |||||
vnlru_return_batch_locked(mp); | |||||
mtx_unlock(&mp->mnt_listmtx); | |||||
} | |||||
static void | |||||
vnlru_return_batches(struct vfsops *mnt_op) | |||||
{ | |||||
struct mount *mp, *nmp; | |||||
bool need_unbusy; | |||||
mtx_lock(&mountlist_mtx); | |||||
for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { | |||||
need_unbusy = false; | |||||
if (mnt_op != NULL && mp->mnt_op != mnt_op) | |||||
goto next; | |||||
if (mp->mnt_tmpfreevnodelistsize == 0) | |||||
goto next; | |||||
if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) == 0) { | |||||
vnlru_return_batch(mp); | |||||
need_unbusy = true; | |||||
mtx_lock(&mountlist_mtx); | |||||
} | |||||
next: | |||||
nmp = TAILQ_NEXT(mp, mnt_list); | |||||
if (need_unbusy) | |||||
vfs_unbusy(mp); | |||||
} | |||||
mtx_unlock(&mountlist_mtx); | |||||
} | |||||
/* | /* | ||||
* Attempt to recycle vnodes in a context that is always safe to block. | * Attempt to recycle vnodes in a context that is always safe to block. | ||||
* Calling vlrurecycle() from the bowels of filesystem code has some | * Calling vlrurecycle() from the bowels of filesystem code has some | ||||
* interesting deadlock problems. | * interesting deadlock problems. | ||||
*/ | */ | ||||
static struct proc *vnlruproc; | static struct proc *vnlruproc; | ||||
static int vnlruproc_sig; | static int vnlruproc_sig; | ||||
static void | static void | ||||
vnlru_proc(void) | vnlru_proc(void) | ||||
{ | { | ||||
u_long rnumvnodes, rfreevnodes; | u_long rnumvnodes, rfreevnodes; | ||||
struct mount *mp, *nmp; | struct mount *mp, *nmp; | ||||
unsigned long onumvnodes; | unsigned long onumvnodes; | ||||
int done, force, trigger, usevnodes, vsp; | int done, force, trigger, usevnodes, vsp; | ||||
bool reclaim_nc_src; | bool reclaim_nc_src; | ||||
EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc, | EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, vnlruproc, | ||||
SHUTDOWN_PRI_FIRST); | SHUTDOWN_PRI_FIRST); | ||||
force = 0; | force = 0; | ||||
for (;;) { | for (;;) { | ||||
kproc_suspend_check(vnlruproc); | kproc_suspend_check(vnlruproc); | ||||
mtx_lock(&vnode_free_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
rnumvnodes = atomic_load_long(&numvnodes); | rnumvnodes = atomic_load_long(&numvnodes); | ||||
/* | /* | ||||
* If numvnodes is too large (due to desiredvnodes being | * If numvnodes is too large (due to desiredvnodes being | ||||
* adjusted using its sysctl, or emergency growth), first | * adjusted using its sysctl, or emergency growth), first | ||||
* try to reduce it by discarding from the free list. | * try to reduce it by discarding from the free list. | ||||
*/ | */ | ||||
if (rnumvnodes > desiredvnodes) | if (rnumvnodes > desiredvnodes) | ||||
vnlru_free_locked(rnumvnodes - desiredvnodes, NULL); | vnlru_free_locked(rnumvnodes - desiredvnodes, NULL); | ||||
/* | /* | ||||
* Sleep if the vnode cache is in a good state. This is | * Sleep if the vnode cache is in a good state. This is | ||||
* when it is not over-full and has space for about a 4% | * when it is not over-full and has space for about a 4% | ||||
* or 9% expansion (by growing its size or inexcessively | * or 9% expansion (by growing its size or inexcessively | ||||
* reducing its free list). Otherwise, try to reclaim | * reducing its free list). Otherwise, try to reclaim | ||||
* space for a 10% expansion. | * space for a 10% expansion. | ||||
*/ | */ | ||||
if (vstir && force == 0) { | if (vstir && force == 0) { | ||||
force = 1; | force = 1; | ||||
vstir = 0; | vstir = 0; | ||||
} | } | ||||
vsp = vspace(); | vsp = vspace(); | ||||
if (vsp >= vlowat && force == 0) { | if (vsp >= vlowat && force == 0) { | ||||
vnlruproc_sig = 0; | vnlruproc_sig = 0; | ||||
wakeup(&vnlruproc_sig); | wakeup(&vnlruproc_sig); | ||||
msleep(vnlruproc, &vnode_free_list_mtx, | msleep(vnlruproc, &vnode_list_mtx, | ||||
PVFS|PDROP, "vlruwt", hz); | PVFS|PDROP, "vlruwt", hz); | ||||
continue; | continue; | ||||
} | } | ||||
mtx_unlock(&vnode_free_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
done = 0; | done = 0; | ||||
rnumvnodes = atomic_load_long(&numvnodes); | rnumvnodes = atomic_load_long(&numvnodes); | ||||
rfreevnodes = atomic_load_long(&freevnodes); | rfreevnodes = atomic_load_long(&freevnodes); | ||||
onumvnodes = rnumvnodes; | onumvnodes = rnumvnodes; | ||||
/* | /* | ||||
* Calculate parameters for recycling. These are the same | * Calculate parameters for recycling. These are the same | ||||
* throughout the loop to give some semblance of fairness. | * throughout the loop to give some semblance of fairness. | ||||
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Wait if necessary for space for a new vnode. | * Wait if necessary for space for a new vnode. | ||||
*/ | */ | ||||
static int | static int | ||||
getnewvnode_wait(int suspended) | getnewvnode_wait(int suspended) | ||||
{ | { | ||||
mtx_assert(&vnode_free_list_mtx, MA_OWNED); | mtx_assert(&vnode_list_mtx, MA_OWNED); | ||||
if (numvnodes >= desiredvnodes) { | if (numvnodes >= desiredvnodes) { | ||||
if (suspended) { | if (suspended) { | ||||
/* | /* | ||||
* The file system is being suspended. We cannot | * The file system is being suspended. We cannot | ||||
* risk a deadlock here, so allow allocation of | * risk a deadlock here, so allow allocation of | ||||
* another vnode even if this would give too many. | * another vnode even if this would give too many. | ||||
*/ | */ | ||||
return (0); | return (0); | ||||
} | } | ||||
if (vnlruproc_sig == 0) { | if (vnlruproc_sig == 0) { | ||||
vnlruproc_sig = 1; /* avoid unnecessary wakeups */ | vnlruproc_sig = 1; /* avoid unnecessary wakeups */ | ||||
wakeup(vnlruproc); | wakeup(vnlruproc); | ||||
} | } | ||||
msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS, | msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, | ||||
"vlruwk", hz); | "vlruwk", hz); | ||||
} | } | ||||
/* Post-adjust like the pre-adjust in getnewvnode(). */ | /* Post-adjust like the pre-adjust in getnewvnode(). */ | ||||
if (numvnodes + 1 > desiredvnodes && freevnodes > 1) | if (numvnodes + 1 > desiredvnodes && freevnodes > 1) | ||||
vnlru_free_locked(1, NULL); | vnlru_free_locked(1, NULL); | ||||
return (numvnodes >= desiredvnodes ? ENFILE : 0); | return (numvnodes >= desiredvnodes ? ENFILE : 0); | ||||
} | } | ||||
/* | /* | ||||
* This hack is fragile, and probably not needed any more now that the | * This hack is fragile, and probably not needed any more now that the | ||||
* watermark handling works. | * watermark handling works. | ||||
*/ | */ | ||||
void | void | ||||
getnewvnode_reserve(u_int count) | getnewvnode_reserve(u_int count) | ||||
{ | { | ||||
u_long rnumvnodes, rfreevnodes; | u_long rnumvnodes, rfreevnodes; | ||||
struct thread *td; | struct thread *td; | ||||
/* Pre-adjust like the pre-adjust in getnewvnode(), with any count. */ | /* Pre-adjust like the pre-adjust in getnewvnode(), with any count. */ | ||||
/* XXX no longer so quick, but this part is not racy. */ | /* XXX no longer so quick, but this part is not racy. */ | ||||
mtx_lock(&vnode_free_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
rnumvnodes = atomic_load_long(&numvnodes); | rnumvnodes = atomic_load_long(&numvnodes); | ||||
rfreevnodes = atomic_load_long(&freevnodes); | rfreevnodes = atomic_load_long(&freevnodes); | ||||
if (rnumvnodes + count > desiredvnodes && rfreevnodes > wantfreevnodes) | if (rnumvnodes + count > desiredvnodes && rfreevnodes > wantfreevnodes) | ||||
vnlru_free_locked(ulmin(rnumvnodes + count - desiredvnodes, | vnlru_free_locked(ulmin(rnumvnodes + count - desiredvnodes, | ||||
rfreevnodes - wantfreevnodes), NULL); | rfreevnodes - wantfreevnodes), NULL); | ||||
mtx_unlock(&vnode_free_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
td = curthread; | td = curthread; | ||||
/* First try to be quick and racy. */ | /* First try to be quick and racy. */ | ||||
if (atomic_fetchadd_long(&numvnodes, count) + count <= desiredvnodes) { | if (atomic_fetchadd_long(&numvnodes, count) + count <= desiredvnodes) { | ||||
td->td_vp_reserv += count; | td->td_vp_reserv += count; | ||||
vcheckspace(); /* XXX no longer so quick, but more racy */ | vcheckspace(); /* XXX no longer so quick, but more racy */ | ||||
return; | return; | ||||
} else | } else | ||||
atomic_subtract_long(&numvnodes, count); | atomic_subtract_long(&numvnodes, count); | ||||
mtx_lock(&vnode_free_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
while (count > 0) { | while (count > 0) { | ||||
if (getnewvnode_wait(0) == 0) { | if (getnewvnode_wait(0) == 0) { | ||||
count--; | count--; | ||||
td->td_vp_reserv++; | td->td_vp_reserv++; | ||||
atomic_add_long(&numvnodes, 1); | atomic_add_long(&numvnodes, 1); | ||||
} | } | ||||
} | } | ||||
vcheckspace(); | vcheckspace(); | ||||
mtx_unlock(&vnode_free_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
} | } | ||||
/* | /* | ||||
* This hack is fragile, especially if desiredvnodes or wantvnodes are | * This hack is fragile, especially if desiredvnodes or wantvnodes are | ||||
* misconfgured or changed significantly. Reducing desiredvnodes below | * misconfgured or changed significantly. Reducing desiredvnodes below | ||||
* the reserved amount should cause bizarre behaviour like reducing it | * the reserved amount should cause bizarre behaviour like reducing it | ||||
* below the number of active vnodes -- the system will try to reduce | * below the number of active vnodes -- the system will try to reduce | ||||
* numvnodes to match, but should fail, so the subtraction below should | * numvnodes to match, but should fail, so the subtraction below should | ||||
Show All 28 Lines | KASSERT(vops->registered, | ||||
("%s: not registered vector op %p\n", __func__, vops)); | ("%s: not registered vector op %p\n", __func__, vops)); | ||||
vp = NULL; | vp = NULL; | ||||
td = curthread; | td = curthread; | ||||
if (td->td_vp_reserv > 0) { | if (td->td_vp_reserv > 0) { | ||||
td->td_vp_reserv -= 1; | td->td_vp_reserv -= 1; | ||||
goto alloc; | goto alloc; | ||||
} | } | ||||
mtx_lock(&vnode_free_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
if (numvnodes < desiredvnodes) | if (numvnodes < desiredvnodes) | ||||
cyclecount = 0; | cyclecount = 0; | ||||
else if (cyclecount++ >= freevnodes) { | else if (cyclecount++ >= freevnodes) { | ||||
cyclecount = 0; | cyclecount = 0; | ||||
vstir = 1; | vstir = 1; | ||||
} | } | ||||
/* | /* | ||||
* Grow the vnode cache if it will not be above its target max | * Grow the vnode cache if it will not be above its target max | ||||
Show All 9 Lines | if (numvnodes + 1 <= desiredvnodes) | ||||
; | ; | ||||
else if (freevnodes > 0) | else if (freevnodes > 0) | ||||
vnlru_free_locked(1, NULL); | vnlru_free_locked(1, NULL); | ||||
else { | else { | ||||
error = getnewvnode_wait(mp != NULL && (mp->mnt_kern_flag & | error = getnewvnode_wait(mp != NULL && (mp->mnt_kern_flag & | ||||
MNTK_SUSPEND)); | MNTK_SUSPEND)); | ||||
#if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ | #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ | ||||
if (error != 0) { | if (error != 0) { | ||||
mtx_unlock(&vnode_free_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
return (error); | return (error); | ||||
} | } | ||||
#endif | #endif | ||||
} | } | ||||
vcheckspace(); | vcheckspace(); | ||||
atomic_add_long(&numvnodes, 1); | atomic_add_long(&numvnodes, 1); | ||||
mtx_unlock(&vnode_free_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
alloc: | alloc: | ||||
counter_u64_add(vnodes_created, 1); | counter_u64_add(vnodes_created, 1); | ||||
vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK); | vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK); | ||||
/* | /* | ||||
* Locks are given the generic name "vnode" when created. | * Locks are given the generic name "vnode" when created. | ||||
* Follow the historic practice of using the filesystem | * Follow the historic practice of using the filesystem | ||||
* name when they allocated, e.g., "zfs", "ufs", "nfs, etc. | * name when they allocated, e.g., "zfs", "ufs", "nfs, etc. | ||||
* | * | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | freevnode(struct vnode *vp) | ||||
* normally remain until it is needed for another vnode. We | * normally remain until it is needed for another vnode. We | ||||
* need to cleanup (or verify that the cleanup has already | * need to cleanup (or verify that the cleanup has already | ||||
* been done) any residual data left from its current use | * been done) any residual data left from its current use | ||||
* so as not to contaminate the freshly allocated vnode. | * so as not to contaminate the freshly allocated vnode. | ||||
*/ | */ | ||||
CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); | CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); | ||||
atomic_subtract_long(&numvnodes, 1); | atomic_subtract_long(&numvnodes, 1); | ||||
bo = &vp->v_bufobj; | bo = &vp->v_bufobj; | ||||
VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, | |||||
("cleaned vnode still on the free list.")); | |||||
VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); | VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); | ||||
VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); | VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); | ||||
VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); | VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); | ||||
VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); | VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); | ||||
VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); | VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); | ||||
VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); | VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); | ||||
VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp, | VNASSERT(pctrie_is_empty(&bo->bo_clean.bv_root), vp, | ||||
("clean blk trie not empty")); | ("clean blk trie not empty")); | ||||
Show All 37 Lines | |||||
{ | { | ||||
struct mount *mp; | struct mount *mp; | ||||
mp = vp->v_mount; | mp = vp->v_mount; | ||||
if (mp == NULL) | if (mp == NULL) | ||||
return; | return; | ||||
MNT_ILOCK(mp); | MNT_ILOCK(mp); | ||||
VI_LOCK(vp); | VI_LOCK(vp); | ||||
KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize, | |||||
("Active vnode list size %d > Vnode list size %d", | |||||
mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize)); | |||||
if (vp->v_iflag & VI_ACTIVE) { | |||||
vp->v_iflag &= ~VI_ACTIVE; | |||||
mtx_lock(&mp->mnt_listmtx); | |||||
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); | |||||
mp->mnt_activevnodelistsize--; | |||||
mtx_unlock(&mp->mnt_listmtx); | |||||
} | |||||
if (vp->v_mflag & VMP_DIRTYLIST) { | if (vp->v_mflag & VMP_DIRTYLIST) { | ||||
mtx_lock(&mp->mnt_listmtx); | mtx_lock(&mp->mnt_listmtx); | ||||
if (vp->v_mflag & VMP_DIRTYLIST) { | if (vp->v_mflag & VMP_DIRTYLIST) { | ||||
vp->v_mflag &= ~VMP_DIRTYLIST; | vp->v_mflag &= ~VMP_DIRTYLIST; | ||||
TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist); | TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist); | ||||
mp->mnt_dirtyvnodelistsize--; | mp->mnt_dirtyvnodelistsize--; | ||||
} | } | ||||
mtx_unlock(&mp->mnt_listmtx); | mtx_unlock(&mp->mnt_listmtx); | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | if (((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 && | ||||
return (EBUSY); | return (EBUSY); | ||||
} | } | ||||
vp->v_mount = mp; | vp->v_mount = mp; | ||||
MNT_REF(mp); | MNT_REF(mp); | ||||
TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); | TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); | ||||
VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, | VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, | ||||
("neg mount point vnode list size")); | ("neg mount point vnode list size")); | ||||
mp->mnt_nvnodelistsize++; | mp->mnt_nvnodelistsize++; | ||||
KASSERT((vp->v_iflag & VI_ACTIVE) == 0, | |||||
("Activating already active vnode")); | |||||
vp->v_iflag |= VI_ACTIVE; | |||||
mtx_lock(&mp->mnt_listmtx); | |||||
TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); | |||||
mp->mnt_activevnodelistsize++; | |||||
mtx_unlock(&mp->mnt_listmtx); | |||||
VI_UNLOCK(vp); | VI_UNLOCK(vp); | ||||
MNT_IUNLOCK(mp); | MNT_IUNLOCK(mp); | ||||
return (0); | return (0); | ||||
} | } | ||||
int | int | ||||
insmntque(struct vnode *vp, struct mount *mp) | insmntque(struct vnode *vp, struct mount *mp) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 1,343 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
* Increase the hold count and activate if this is the first reference. | * Increase the hold count and activate if this is the first reference. | ||||
*/ | */ | ||||
static void | static void | ||||
vhold_activate(struct vnode *vp) | vhold_activate(struct vnode *vp) | ||||
{ | { | ||||
struct mount *mp; | |||||
ASSERT_VI_LOCKED(vp, __func__); | ASSERT_VI_LOCKED(vp, __func__); | ||||
VNASSERT(vp->v_holdcnt == 0, vp, | VNASSERT(vp->v_holdcnt == 0, vp, | ||||
("%s: wrong hold count", __func__)); | ("%s: wrong hold count", __func__)); | ||||
VNASSERT(vp->v_op != NULL, vp, | VNASSERT(vp->v_op != NULL, vp, | ||||
("%s: vnode already reclaimed.", __func__)); | ("%s: vnode already reclaimed.", __func__)); | ||||
/* | atomic_subtract_long(&freevnodes, 1); | ||||
* Remove a vnode from the free list, mark it as in use, | |||||
* and put it on the active list. | |||||
*/ | |||||
VNASSERT(vp->v_mount != NULL, vp, | |||||
("_vhold: vnode not on per mount vnode list")); | |||||
mp = vp->v_mount; | |||||
mtx_lock(&mp->mnt_listmtx); | |||||
if ((vp->v_mflag & VMP_TMPMNTFREELIST) != 0) { | |||||
TAILQ_REMOVE(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist); | |||||
mp->mnt_tmpfreevnodelistsize--; | |||||
vp->v_mflag &= ~VMP_TMPMNTFREELIST; | |||||
} else { | |||||
mtx_lock(&vnode_free_list_mtx); | |||||
TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); | |||||
freevnodes--; | |||||
mtx_unlock(&vnode_free_list_mtx); | |||||
} | |||||
KASSERT((vp->v_iflag & VI_ACTIVE) == 0, | |||||
("Activating already active vnode")); | |||||
vp->v_iflag &= ~VI_FREE; | |||||
vp->v_iflag |= VI_ACTIVE; | |||||
TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); | |||||
mp->mnt_activevnodelistsize++; | |||||
mtx_unlock(&mp->mnt_listmtx); | |||||
refcount_acquire(&vp->v_holdcnt); | refcount_acquire(&vp->v_holdcnt); | ||||
} | } | ||||
void | void | ||||
vhold(struct vnode *vp) | vhold(struct vnode *vp) | ||||
{ | { | ||||
ASSERT_VI_UNLOCKED(vp, __func__); | ASSERT_VI_UNLOCKED(vp, __func__); | ||||
CTR2(KTR_VFS, "%s: vp %p", __func__, vp); | CTR2(KTR_VFS, "%s: vp %p", __func__, vp); | ||||
if (refcount_acquire_if_not_zero(&vp->v_holdcnt)) { | if (refcount_acquire_if_not_zero(&vp->v_holdcnt)) | ||||
VNODE_REFCOUNT_FENCE_ACQ(); | |||||
VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, | |||||
("vhold: vnode with holdcnt is free")); | |||||
return; | return; | ||||
} | |||||
VI_LOCK(vp); | VI_LOCK(vp); | ||||
vholdl(vp); | vholdl(vp); | ||||
VI_UNLOCK(vp); | VI_UNLOCK(vp); | ||||
} | } | ||||
void | void | ||||
vholdl(struct vnode *vp) | vholdl(struct vnode *vp) | ||||
{ | { | ||||
ASSERT_VI_LOCKED(vp, __func__); | ASSERT_VI_LOCKED(vp, __func__); | ||||
CTR2(KTR_VFS, "%s: vp %p", __func__, vp); | CTR2(KTR_VFS, "%s: vp %p", __func__, vp); | ||||
if ((vp->v_iflag & VI_FREE) == 0) { | if (vp->v_holdcnt > 0) { | ||||
refcount_acquire(&vp->v_holdcnt); | refcount_acquire(&vp->v_holdcnt); | ||||
return; | return; | ||||
} | } | ||||
vhold_activate(vp); | vhold_activate(vp); | ||||
} | } | ||||
void | void | ||||
vholdnz(struct vnode *vp) | vholdnz(struct vnode *vp) | ||||
Show All 26 Lines | vdrop_deactivate(struct vnode *vp) | ||||
/* | /* | ||||
* Mark a vnode as free: remove it from its active list | * Mark a vnode as free: remove it from its active list | ||||
* and put it up for recycling on the freelist. | * and put it up for recycling on the freelist. | ||||
*/ | */ | ||||
VNASSERT(!VN_IS_DOOMED(vp), vp, | VNASSERT(!VN_IS_DOOMED(vp), vp, | ||||
("vdrop: returning doomed vnode")); | ("vdrop: returning doomed vnode")); | ||||
VNASSERT(vp->v_op != NULL, vp, | VNASSERT(vp->v_op != NULL, vp, | ||||
("vdrop: vnode already reclaimed.")); | ("vdrop: vnode already reclaimed.")); | ||||
VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, | VNASSERT(vp->v_holdcnt == 0, vp, | ||||
("vnode already free")); | ("vdrop: freeing when we shouldn't")); | ||||
VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, | VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, | ||||
("vnode with VI_OWEINACT set")); | ("vnode with VI_OWEINACT set")); | ||||
VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, | VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, | ||||
("vnode with VI_DEFINACT set")); | ("vnode with VI_DEFINACT set")); | ||||
VNASSERT(vp->v_holdcnt == 0, vp, | if (vp->v_mflag & VMP_DIRTYLIST) { | ||||
("vdrop: freeing when we shouldn't")); | |||||
mp = vp->v_mount; | mp = vp->v_mount; | ||||
mtx_lock(&mp->mnt_listmtx); | mtx_lock(&mp->mnt_listmtx); | ||||
if (vp->v_mflag & VMP_DIRTYLIST) { | |||||
vp->v_mflag &= ~VMP_DIRTYLIST; | vp->v_mflag &= ~VMP_DIRTYLIST; | ||||
TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist); | TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist); | ||||
mp->mnt_dirtyvnodelistsize--; | mp->mnt_dirtyvnodelistsize--; | ||||
mtx_unlock(&mp->mnt_listmtx); | |||||
} | } | ||||
if (vp->v_iflag & VI_ACTIVE) { | mtx_lock(&vnode_list_mtx); | ||||
vp->v_iflag &= ~VI_ACTIVE; | TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); | ||||
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); | TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); | ||||
mp->mnt_activevnodelistsize--; | mtx_unlock(&vnode_list_mtx); | ||||
} | atomic_add_long(&freevnodes, 1); | ||||
TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist); | |||||
mp->mnt_tmpfreevnodelistsize++; | |||||
vp->v_iflag |= VI_FREE; | |||||
vp->v_mflag |= VMP_TMPMNTFREELIST; | |||||
VI_UNLOCK(vp); | VI_UNLOCK(vp); | ||||
if (mp->mnt_tmpfreevnodelistsize >= mnt_free_list_batch) | |||||
vnlru_return_batch_locked(mp); | |||||
mtx_unlock(&mp->mnt_listmtx); | |||||
} | } | ||||
void | void | ||||
vdrop(struct vnode *vp) | vdrop(struct vnode *vp) | ||||
{ | { | ||||
ASSERT_VI_UNLOCKED(vp, __func__); | ASSERT_VI_UNLOCKED(vp, __func__); | ||||
CTR2(KTR_VFS, "%s: vp %p", __func__, vp); | CTR2(KTR_VFS, "%s: vp %p", __func__, vp); | ||||
▲ Show 20 Lines • Show All 530 Lines • ▼ Show 20 Lines | vn_printf(struct vnode *vp, const char *fmt, ...) | ||||
if (flags != 0) { | if (flags != 0) { | ||||
snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags); | snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags); | ||||
strlcat(buf, buf2, sizeof(buf)); | strlcat(buf, buf2, sizeof(buf)); | ||||
} | } | ||||
if (vp->v_iflag & VI_TEXT_REF) | if (vp->v_iflag & VI_TEXT_REF) | ||||
strlcat(buf, "|VI_TEXT_REF", sizeof(buf)); | strlcat(buf, "|VI_TEXT_REF", sizeof(buf)); | ||||
if (vp->v_iflag & VI_MOUNT) | if (vp->v_iflag & VI_MOUNT) | ||||
strlcat(buf, "|VI_MOUNT", sizeof(buf)); | strlcat(buf, "|VI_MOUNT", sizeof(buf)); | ||||
if (vp->v_iflag & VI_FREE) | |||||
strlcat(buf, "|VI_FREE", sizeof(buf)); | |||||
if (vp->v_iflag & VI_ACTIVE) | |||||
strlcat(buf, "|VI_ACTIVE", sizeof(buf)); | |||||
if (vp->v_iflag & VI_DOINGINACT) | if (vp->v_iflag & VI_DOINGINACT) | ||||
strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); | strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); | ||||
if (vp->v_iflag & VI_OWEINACT) | if (vp->v_iflag & VI_OWEINACT) | ||||
strlcat(buf, "|VI_OWEINACT", sizeof(buf)); | strlcat(buf, "|VI_OWEINACT", sizeof(buf)); | ||||
if (vp->v_iflag & VI_DEFINACT) | if (vp->v_iflag & VI_DEFINACT) | ||||
strlcat(buf, "|VI_DEFINACT", sizeof(buf)); | strlcat(buf, "|VI_DEFINACT", sizeof(buf)); | ||||
flags = vp->v_iflag & ~(VI_TEXT_REF | VI_MOUNT | VI_FREE | VI_ACTIVE | | flags = vp->v_iflag & ~(VI_TEXT_REF | VI_MOUNT | VI_DOINGINACT | | ||||
VI_DOINGINACT | VI_OWEINACT | VI_DEFINACT); | VI_OWEINACT | VI_DEFINACT); | ||||
if (flags != 0) { | if (flags != 0) { | ||||
snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); | snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); | ||||
strlcat(buf, buf2, sizeof(buf)); | strlcat(buf, buf2, sizeof(buf)); | ||||
} | } | ||||
if (vp->v_mflag & VMP_TMPMNTFREELIST) | |||||
strlcat(buf, "|VMP_TMPMNTFREELIST", sizeof(buf)); | |||||
if (vp->v_mflag & VMP_DIRTYLIST) | if (vp->v_mflag & VMP_DIRTYLIST) | ||||
strlcat(buf, "|VMP_DIRTYLIST", sizeof(buf)); | strlcat(buf, "|VMP_DIRTYLIST", sizeof(buf)); | ||||
flags = vp->v_mflag & ~(VMP_TMPMNTFREELIST | VMP_DIRTYLIST); | flags = vp->v_mflag & ~(VMP_DIRTYLIST); | ||||
if (flags != 0) { | if (flags != 0) { | ||||
snprintf(buf2, sizeof(buf2), "|VMP(0x%lx)", flags); | snprintf(buf2, sizeof(buf2), "|VMP(0x%lx)", flags); | ||||
strlcat(buf, buf2, sizeof(buf)); | strlcat(buf, buf2, sizeof(buf)); | ||||
} | } | ||||
printf(" flags (%s)\n", buf + 1); | printf(" flags (%s)\n", buf + 1); | ||||
if (mtx_owned(VI_MTX(vp))) | if (mtx_owned(VI_MTX(vp))) | ||||
printf(" VI_LOCKed"); | printf(" VI_LOCKed"); | ||||
if (vp->v_object != NULL) | if (vp->v_object != NULL) | ||||
▲ Show 20 Lines • Show All 201 Lines • ▼ Show 20 Lines | db_printf(" mnt_cred = { uid=%u ruid=%u", | ||||
(u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid); | (u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid); | ||||
if (jailed(mp->mnt_cred)) | if (jailed(mp->mnt_cred)) | ||||
db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id); | db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id); | ||||
db_printf(" }\n"); | db_printf(" }\n"); | ||||
db_printf(" mnt_ref = %d (with %d in the struct)\n", | db_printf(" mnt_ref = %d (with %d in the struct)\n", | ||||
vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref); | vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref); | ||||
db_printf(" mnt_gen = %d\n", mp->mnt_gen); | db_printf(" mnt_gen = %d\n", mp->mnt_gen); | ||||
db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); | db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); | ||||
db_printf(" mnt_activevnodelistsize = %d\n", | |||||
mp->mnt_activevnodelistsize); | |||||
db_printf(" mnt_dirtyvnodelistsize = %d\n", | db_printf(" mnt_dirtyvnodelistsize = %d\n", | ||||
mp->mnt_dirtyvnodelistsize); | mp->mnt_dirtyvnodelistsize); | ||||
db_printf(" mnt_writeopcount = %d (with %d in the struct)\n", | db_printf(" mnt_writeopcount = %d (with %d in the struct)\n", | ||||
vfs_mount_fetch_counter(mp, MNT_COUNT_WRITEOPCOUNT), mp->mnt_writeopcount); | vfs_mount_fetch_counter(mp, MNT_COUNT_WRITEOPCOUNT), mp->mnt_writeopcount); | ||||
db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); | db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); | ||||
db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); | db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); | ||||
db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); | db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); | ||||
db_printf(" mnt_lockref = %d (with %d in the struct)\n", | db_printf(" mnt_lockref = %d (with %d in the struct)\n", | ||||
vfs_mount_fetch_counter(mp, MNT_COUNT_LOCKREF), mp->mnt_lockref); | vfs_mount_fetch_counter(mp, MNT_COUNT_LOCKREF), mp->mnt_lockref); | ||||
db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); | db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); | ||||
db_printf(" mnt_secondary_accwrites = %d\n", | db_printf(" mnt_secondary_accwrites = %d\n", | ||||
mp->mnt_secondary_accwrites); | mp->mnt_secondary_accwrites); | ||||
db_printf(" mnt_gjprovider = %s\n", | db_printf(" mnt_gjprovider = %s\n", | ||||
mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL"); | mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL"); | ||||
db_printf(" mnt_vfs_ops = %d\n", mp->mnt_vfs_ops); | db_printf(" mnt_vfs_ops = %d\n", mp->mnt_vfs_ops); | ||||
db_printf("\n\nList of active vnodes\n"); | db_printf("\n\nList of active vnodes\n"); | ||||
TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) { | TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { | ||||
if (vp->v_type != VMARKER) { | if (vp->v_type != VMARKER && vp->v_holdcnt > 0) { | ||||
vn_printf(vp, "vnode "); | vn_printf(vp, "vnode "); | ||||
if (db_pager_quit) | if (db_pager_quit) | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
db_printf("\n\nList of inactive vnodes\n"); | db_printf("\n\nList of inactive vnodes\n"); | ||||
TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { | TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { | ||||
if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) { | if (vp->v_type != VMARKER && vp->v_holdcnt == 0) { | ||||
vn_printf(vp, "vnode "); | vn_printf(vp, "vnode "); | ||||
if (db_pager_quit) | if (db_pager_quit) | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
#endif /* DDB */ | #endif /* DDB */ | ||||
▲ Show 20 Lines • Show All 633 Lines • ▼ Show 20 Lines | if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { | ||||
vfs_unbusy(mp); | vfs_unbusy(mp); | ||||
return (0); | return (0); | ||||
} | } | ||||
save = curthread_pflags_set(TDP_SYNCIO); | save = curthread_pflags_set(TDP_SYNCIO); | ||||
/* | /* | ||||
* The filesystem at hand may be idle with free vnodes stored in the | * The filesystem at hand may be idle with free vnodes stored in the | ||||
* batch. Return them instead of letting them stay there indefinitely. | * batch. Return them instead of letting them stay there indefinitely. | ||||
*/ | */ | ||||
vnlru_return_batch(mp); | |||||
vfs_periodic(mp, MNT_NOWAIT); | vfs_periodic(mp, MNT_NOWAIT); | ||||
error = VFS_SYNC(mp, MNT_LAZY); | error = VFS_SYNC(mp, MNT_LAZY); | ||||
curthread_pflags_restore(save); | curthread_pflags_restore(save); | ||||
vn_finished_write(mp); | vn_finished_write(mp); | ||||
vfs_unbusy(mp); | vfs_unbusy(mp); | ||||
return (error); | return (error); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,270 Lines • ▼ Show 20 Lines | |||||
mnt_vnode_markerfree_dirty(struct vnode **mvp, struct mount *mp) | mnt_vnode_markerfree_dirty(struct vnode **mvp, struct mount *mp) | ||||
{ | { | ||||
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); | KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); | ||||
MNT_ILOCK(mp); | MNT_ILOCK(mp); | ||||
MNT_REL(mp); | MNT_REL(mp); | ||||
MNT_IUNLOCK(mp); | MNT_IUNLOCK(mp); | ||||
free(*mvp, M_VNODE_MARKER); | vn_free_marker(*mvp); | ||||
*mvp = NULL; | *mvp = NULL; | ||||
} | } | ||||
/* | /* | ||||
* Relock the mp mount vnode list lock with the vp vnode interlock in the | * Relock the mp mount vnode list lock with the vp vnode interlock in the | ||||
* conventional lock order during mnt_vnode_next_dirty iteration. | * conventional lock order during mnt_vnode_next_dirty iteration. | ||||
* | * | ||||
* On entry, the mount vnode list lock is held and the vnode interlock is not. | * On entry, the mount vnode list lock is held and the vnode interlock is not. | ||||
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines | restart: | ||||
if (vp == NULL) { | if (vp == NULL) { | ||||
mtx_unlock(&mp->mnt_listmtx); | mtx_unlock(&mp->mnt_listmtx); | ||||
mnt_vnode_markerfree_dirty(mvp, mp); | mnt_vnode_markerfree_dirty(mvp, mp); | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
TAILQ_INSERT_AFTER(&mp->mnt_dirtyvnodelist, vp, *mvp, v_dirtylist); | TAILQ_INSERT_AFTER(&mp->mnt_dirtyvnodelist, vp, *mvp, v_dirtylist); | ||||
mtx_unlock(&mp->mnt_listmtx); | mtx_unlock(&mp->mnt_listmtx); | ||||
ASSERT_VI_LOCKED(vp, "dirty iter"); | ASSERT_VI_LOCKED(vp, "dirty iter"); | ||||
KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp)); | |||||
return (vp); | return (vp); | ||||
} | } | ||||
struct vnode * | struct vnode * | ||||
__mnt_vnode_next_dirty(struct vnode **mvp, struct mount *mp, mnt_dirty_cb_t *cb, | __mnt_vnode_next_dirty(struct vnode **mvp, struct mount *mp, mnt_dirty_cb_t *cb, | ||||
void *cbarg) | void *cbarg) | ||||
{ | { | ||||
if (should_yield()) | if (should_yield()) | ||||
kern_yield(PRI_USER); | kern_yield(PRI_USER); | ||||
mtx_lock(&mp->mnt_listmtx); | mtx_lock(&mp->mnt_listmtx); | ||||
return (mnt_vnode_next_dirty(mvp, mp, cb, cbarg)); | return (mnt_vnode_next_dirty(mvp, mp, cb, cbarg)); | ||||
} | } | ||||
struct vnode * | struct vnode * | ||||
__mnt_vnode_first_dirty(struct vnode **mvp, struct mount *mp, mnt_dirty_cb_t *cb, | __mnt_vnode_first_dirty(struct vnode **mvp, struct mount *mp, mnt_dirty_cb_t *cb, | ||||
void *cbarg) | void *cbarg) | ||||
{ | { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); | *mvp = vn_alloc_marker(mp); | ||||
MNT_ILOCK(mp); | MNT_ILOCK(mp); | ||||
MNT_REF(mp); | MNT_REF(mp); | ||||
MNT_IUNLOCK(mp); | MNT_IUNLOCK(mp); | ||||
(*mvp)->v_type = VMARKER; | |||||
(*mvp)->v_mount = mp; | |||||
mtx_lock(&mp->mnt_listmtx); | mtx_lock(&mp->mnt_listmtx); | ||||
vp = TAILQ_FIRST(&mp->mnt_dirtyvnodelist); | vp = TAILQ_FIRST(&mp->mnt_dirtyvnodelist); | ||||
if (vp == NULL) { | if (vp == NULL) { | ||||
mtx_unlock(&mp->mnt_listmtx); | mtx_unlock(&mp->mnt_listmtx); | ||||
mnt_vnode_markerfree_dirty(mvp, mp); | mnt_vnode_markerfree_dirty(mvp, mp); | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
Show All 16 Lines |