Changeset View
Changeset View
Standalone View
Standalone View
sys/kern/vfs_subr.c
Show First 20 Lines • Show All 296 Lines • ▼ Show 20 Lines | |||||
static int metadelay = 28; /* time to delay syncing metadata */ | static int metadelay = 28; /* time to delay syncing metadata */ | ||||
SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, | SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, | ||||
"Time to delay syncing metadata (in seconds)"); | "Time to delay syncing metadata (in seconds)"); | ||||
static int rushjob; /* number of slots to run ASAP */ | static int rushjob; /* number of slots to run ASAP */ | ||||
static int stat_rush_requests; /* number of times I/O speeded up */ | static int stat_rush_requests; /* number of times I/O speeded up */ | ||||
SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, | SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, | ||||
"Number of times I/O speeded up (rush requests)"); | "Number of times I/O speeded up (rush requests)"); | ||||
#define VDBATCH_SIZE 8 | |||||
jeff: We are starting to repeat this pattern in multiple places. Might be worth making a new api… | |||||
struct vdbatch { | |||||
u_int index; | |||||
u_int count; | |||||
struct mtx lock; | |||||
struct vnode *tab[VDBATCH_SIZE]; | |||||
}; | |||||
DPCPU_DEFINE_STATIC(struct vdbatch, vd); | |||||
/* | /* | ||||
* When shutting down the syncer, run it at four times normal speed. | * When shutting down the syncer, run it at four times normal speed. | ||||
*/ | */ | ||||
#define SYNCER_SHUTDOWN_SPEEDUP 4 | #define SYNCER_SHUTDOWN_SPEEDUP 4 | ||||
static int sync_vnode_count; | static int sync_vnode_count; | ||||
static int syncer_worklist_len; | static int syncer_worklist_len; | ||||
static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } | static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } | ||||
syncer_state; | syncer_state; | ||||
/* Target for maximum number of vnodes. */ | /* Target for maximum number of vnodes. */ | ||||
int desiredvnodes; | int desiredvnodes; | ||||
static int gapvnodes; /* gap between wanted and desired */ | static int gapvnodes; /* gap between wanted and desired */ | ||||
static int vhiwat; /* enough extras after expansion */ | static int vhiwat; /* enough extras after expansion */ | ||||
static int vlowat; /* minimal extras before expansion */ | static int vlowat; /* minimal extras before expansion */ | ||||
static int vstir; /* nonzero to stir non-free vnodes */ | static int vstir; /* nonzero to stir non-free vnodes */ | ||||
static volatile int vsmalltrigger = 8; /* pref to keep if > this many pages */ | static volatile int vsmalltrigger = 8; /* pref to keep if > this many pages */ | ||||
static void vdbatch_dequeue(struct vnode *vp); | |||||
static int | static int | ||||
sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS) | sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
int error, old_desiredvnodes; | int error, old_desiredvnodes; | ||||
old_desiredvnodes = desiredvnodes; | old_desiredvnodes = desiredvnodes; | ||||
if ((error = sysctl_handle_int(oidp, arg1, arg2, req)) != 0) | if ((error = sysctl_handle_int(oidp, arg1, arg2, req)) != 0) | ||||
return (error); | return (error); | ||||
▲ Show 20 Lines • Show All 163 Lines • ▼ Show 20 Lines | vnode_init(void *mem, int size, int flags) | ||||
*/ | */ | ||||
LIST_INIT(&vp->v_cache_src); | LIST_INIT(&vp->v_cache_src); | ||||
TAILQ_INIT(&vp->v_cache_dst); | TAILQ_INIT(&vp->v_cache_dst); | ||||
/* | /* | ||||
* Initialize rangelocks. | * Initialize rangelocks. | ||||
*/ | */ | ||||
rangelock_init(&vp->v_rl); | rangelock_init(&vp->v_rl); | ||||
vp->v_dbatchcpu = NOCPU; | |||||
mtx_lock(&vnode_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
TAILQ_INSERT_BEFORE(vnode_list_marker, vp, v_vnodelist); | TAILQ_INSERT_BEFORE(vnode_list_marker, vp, v_vnodelist); | ||||
mtx_unlock(&vnode_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
return (0); | return (0); | ||||
} | } | ||||
/* | /* | ||||
* Free a vnode when it is cleared from the zone. | * Free a vnode when it is cleared from the zone. | ||||
*/ | */ | ||||
static void | static void | ||||
vnode_fini(void *mem, int size) | vnode_fini(void *mem, int size) | ||||
{ | { | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct bufobj *bo; | struct bufobj *bo; | ||||
vp = mem; | vp = mem; | ||||
vdbatch_dequeue(vp); | |||||
mtx_lock(&vnode_list_mtx); | mtx_lock(&vnode_list_mtx); | ||||
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); | TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); | ||||
mtx_unlock(&vnode_list_mtx); | mtx_unlock(&vnode_list_mtx); | ||||
rangelock_destroy(&vp->v_rl); | rangelock_destroy(&vp->v_rl); | ||||
lockdestroy(vp->v_vnlock); | lockdestroy(vp->v_vnlock); | ||||
mtx_destroy(&vp->v_interlock); | mtx_destroy(&vp->v_interlock); | ||||
bo = &vp->v_bufobj; | bo = &vp->v_bufobj; | ||||
rw_destroy(BO_LOCKPTR(bo)); | rw_destroy(BO_LOCKPTR(bo)); | ||||
Show All 18 Lines | |||||
#else | #else | ||||
#define NFS_NCLNODE_SZ (360 + 32) | #define NFS_NCLNODE_SZ (360 + 32) | ||||
#define NC_SZ 92 | #define NC_SZ 92 | ||||
#endif | #endif | ||||
static void | static void | ||||
vntblinit(void *dummy __unused) | vntblinit(void *dummy __unused) | ||||
{ | { | ||||
struct vdbatch *vd; | |||||
int cpu, physvnodes, virtvnodes; | |||||
u_int i; | u_int i; | ||||
int physvnodes, virtvnodes; | |||||
/* | /* | ||||
* Desiredvnodes is a function of the physical memory size and the | * Desiredvnodes is a function of the physical memory size and the | ||||
* kernel's heap size. Generally speaking, it scales with the | * kernel's heap size. Generally speaking, it scales with the | ||||
* physical memory size. The ratio of desiredvnodes to the physical | * physical memory size. The ratio of desiredvnodes to the physical | ||||
* memory size is 1:16 until desiredvnodes exceeds 98,304. | * memory size is 1:16 until desiredvnodes exceeds 98,304. | ||||
* Thereafter, the | * Thereafter, the | ||||
* marginal ratio of desiredvnodes to the physical memory size is | * marginal ratio of desiredvnodes to the physical memory size is | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | vntblinit(void *dummy __unused) | ||||
syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, | syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, | ||||
&syncer_mask); | &syncer_mask); | ||||
syncer_maxdelay = syncer_mask + 1; | syncer_maxdelay = syncer_mask + 1; | ||||
mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); | mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); | ||||
cv_init(&sync_wakeup, "syncer"); | cv_init(&sync_wakeup, "syncer"); | ||||
for (i = 1; i <= sizeof(struct vnode); i <<= 1) | for (i = 1; i <= sizeof(struct vnode); i <<= 1) | ||||
vnsz2log++; | vnsz2log++; | ||||
vnsz2log--; | vnsz2log--; | ||||
CPU_FOREACH(cpu) { | |||||
vd = DPCPU_ID_PTR((cpu), vd); | |||||
bzero(vd, sizeof(*vd)); | |||||
mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF); | |||||
} | } | ||||
} | |||||
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); | SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); | ||||
/* | /* | ||||
* Mark a mount point as busy. Used to synchronize access and to delay | * Mark a mount point as busy. Used to synchronize access and to delay | ||||
* unmounting. Eventually, mountlist_mtx is not released on failure. | * unmounting. Eventually, mountlist_mtx is not released on failure. | ||||
* | * | ||||
* vfs_busy() is a custom lock, it can block the caller. | * vfs_busy() is a custom lock, it can block the caller. | ||||
▲ Show 20 Lines • Show All 2,505 Lines • ▼ Show 20 Lines | |||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
int old = atomic_fetchadd_int(&vp->v_holdcnt, 1); | int old = atomic_fetchadd_int(&vp->v_holdcnt, 1); | ||||
VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__)); | VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__)); | ||||
#else | #else | ||||
atomic_add_int(&vp->v_holdcnt, 1); | atomic_add_int(&vp->v_holdcnt, 1); | ||||
#endif | #endif | ||||
} | } | ||||
static void | |||||
vdbatch_process(struct vdbatch *vd) | |||||
{ | |||||
struct vnode *vp; | |||||
int i; | |||||
mtx_assert(&vd->lock, MA_OWNED); | |||||
mtx_lock(&vnode_list_mtx); | |||||
for (i = 0; i < VDBATCH_SIZE; i++) { | |||||
vp = vd->tab[i]; | |||||
if (vp == NULL) | |||||
continue; | |||||
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); | |||||
TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); | |||||
MPASS(vp->v_dbatchcpu != NOCPU); | |||||
vp->v_dbatchcpu = NOCPU; | |||||
vd->tab[i] = NULL; | |||||
} | |||||
vd->index = 0; | |||||
vd->count = 0; | |||||
mtx_unlock(&vnode_list_mtx); | |||||
} | |||||
static void | |||||
vdbatch_enqueue(struct vnode *vp) | |||||
{ | |||||
struct vdbatch *vd; | |||||
int i; | |||||
ASSERT_VI_LOCKED(vp, __func__); | |||||
VNASSERT(!VN_IS_DOOMED(vp), vp, | |||||
("%s: deferring requeue of a doomed vnode", __func__)); | |||||
if (vp->v_dbatchcpu != NOCPU) { | |||||
VI_UNLOCK(vp); | |||||
return; | |||||
} | |||||
/* | /* | ||||
* A hack: pin us to the current CPU so that we know what to put in | |||||
* ->v_dbatchcpu. | |||||
*/ | |||||
sched_pin(); | |||||
vd = DPCPU_PTR(vd); | |||||
mtx_lock(&vd->lock); | |||||
KASSERT(vd->index < VDBATCH_SIZE, ("%s: invalid index %u\n", | |||||
__func__, vd->index)); | |||||
for (i = vd->index; i < VDBATCH_SIZE; i++) { | |||||
if (vd->tab[i] != NULL) | |||||
continue; | |||||
MPASS(curthread->td_pinned); | |||||
vp->v_dbatchcpu = curcpu; | |||||
vd->tab[i] = vp; | |||||
vd->index = i + 1; | |||||
vd->count++; | |||||
break; | |||||
} | |||||
KASSERT(vp->v_dbatchcpu != NOCPU, | |||||
("%s: failed to fit the vnode (index %u, count %u)\n", | |||||
__func__, vd->index, vd->count)); | |||||
VI_UNLOCK(vp); | |||||
sched_unpin(); | |||||
KASSERT(vd->count <= VDBATCH_SIZE, ("%s: invalid batch count %u\n", | |||||
Not Done Inline ActionsI would leave this pinned until vd is unlocked. It probably won't matter but it will potentially reduce contention. jeff: I would leave this pinned until vd is unlocked. It probably won't matter but it will… | |||||
__func__, vd->count)); | |||||
if (vd->count == VDBATCH_SIZE) | |||||
vdbatch_process(vd); | |||||
mtx_unlock(&vd->lock); | |||||
} | |||||
static void | |||||
vdbatch_dequeue(struct vnode *vp) | |||||
{ | |||||
struct vdbatch *vd; | |||||
int i; | |||||
short cpu; | |||||
cpu = atomic_load_short(&vp->v_dbatchcpu); | |||||
if (cpu == NOCPU) | |||||
return; | |||||
vd = DPCPU_ID_PTR(cpu, vd); | |||||
mtx_lock(&vd->lock); | |||||
for (i = 0; i < VDBATCH_SIZE; i++) { | |||||
if (vd->tab[i] != vp) | |||||
continue; | |||||
vp->v_dbatchcpu = NOCPU; | |||||
vd->tab[i] = NULL; | |||||
if (i < vd->index) | |||||
vd->index = i; | |||||
vd->count--; | |||||
break; | |||||
} | |||||
mtx_unlock(&vd->lock); | |||||
} | |||||
/* | |||||
* Drop the hold count of the vnode. If this is the last reference to | * Drop the hold count of the vnode. If this is the last reference to | ||||
* the vnode we place it on the free list unless it has been vgone'd | * the vnode we place it on the free list unless it has been vgone'd | ||||
* (marked VIRF_DOOMED) in which case we will free it. | * (marked VIRF_DOOMED) in which case we will free it. | ||||
* | * | ||||
* Because the vnode vm object keeps a hold reference on the vnode if | * Because the vnode vm object keeps a hold reference on the vnode if | ||||
* there is at least one resident non-cached page, the vnode cannot | * there is at least one resident non-cached page, the vnode cannot | ||||
* leave the active list without the page cleanup done. | * leave the active list without the page cleanup done. | ||||
*/ | */ | ||||
Show All 20 Lines | vdrop_deactivate(struct vnode *vp) | ||||
if (vp->v_mflag & VMP_DIRTYLIST) { | if (vp->v_mflag & VMP_DIRTYLIST) { | ||||
mp = vp->v_mount; | mp = vp->v_mount; | ||||
mtx_lock(&mp->mnt_listmtx); | mtx_lock(&mp->mnt_listmtx); | ||||
vp->v_mflag &= ~VMP_DIRTYLIST; | vp->v_mflag &= ~VMP_DIRTYLIST; | ||||
TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist); | TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist); | ||||
mp->mnt_dirtyvnodelistsize--; | mp->mnt_dirtyvnodelistsize--; | ||||
mtx_unlock(&mp->mnt_listmtx); | mtx_unlock(&mp->mnt_listmtx); | ||||
} | } | ||||
mtx_lock(&vnode_list_mtx); | |||||
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); | |||||
TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); | |||||
mtx_unlock(&vnode_list_mtx); | |||||
atomic_add_long(&freevnodes, 1); | atomic_add_long(&freevnodes, 1); | ||||
VI_UNLOCK(vp); | vdbatch_enqueue(vp); | ||||
} | } | ||||
void | void | ||||
vdrop(struct vnode *vp) | vdrop(struct vnode *vp) | ||||
{ | { | ||||
ASSERT_VI_UNLOCKED(vp, __func__); | ASSERT_VI_UNLOCKED(vp, __func__); | ||||
CTR2(KTR_VFS, "%s: vp %p", __func__, vp); | CTR2(KTR_VFS, "%s: vp %p", __func__, vp); | ||||
▲ Show 20 Lines • Show All 2,936 Lines • Show Last 20 Lines |
We are starting to repeat this pattern in multiple places. Might be worth making a new api eventually.