Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -302,6 +302,14 @@ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "Number of times I/O speeded up (rush requests)"); +#define VDBATCH_SIZE 8 +struct vdbatch { + int index; + struct mtx lock; + struct vnode *tab[VDBATCH_SIZE]; +}; +DPCPU_DEFINE_STATIC(struct vdbatch, vd); + /* * When shutting down the syncer, run it at four times normal speed. */ @@ -319,6 +327,8 @@ static int vstir; /* nonzero to stir non-free vnodes */ static volatile int vsmalltrigger = 8; /* pref to keep if > this many pages */ +static void vdrop_deferred_dequeue(struct vnode *vp); + static int sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS) { @@ -498,6 +508,8 @@ */ rangelock_init(&vp->v_rl); + vp->v_cpudefer = NOCPU; + mtx_lock(&vnode_list_mtx); TAILQ_INSERT_BEFORE(vnode_list_marker, vp, v_vnodelist); mtx_unlock(&vnode_list_mtx); @@ -514,6 +526,7 @@ struct bufobj *bo; vp = mem; + vdrop_deferred_dequeue(vp); mtx_lock(&vnode_list_mtx); TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); mtx_unlock(&vnode_list_mtx); @@ -548,8 +561,9 @@ static void vntblinit(void *dummy __unused) { + struct vdbatch *vd; + int cpu, physvnodes, virtvnodes; u_int i; - int physvnodes, virtvnodes; /* * Desiredvnodes is a function of the physical memory size and the @@ -613,6 +627,12 @@ for (i = 1; i <= sizeof(struct vnode); i <<= 1) vnsz2log++; vnsz2log--; + + CPU_FOREACH(cpu) { + vd = DPCPU_ID_PTR((cpu), vd); + bzero(vd, sizeof(*vd)); + mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF); + } } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL); @@ -3135,6 +3155,87 @@ #endif } +static void +vdrop_deferred_dequeue(struct vnode *vp) +{ + struct vdbatch *vd; + int i; + short cpu; + + cpu = atomic_load_short(&vp->v_cpudefer); + if (cpu == NOCPU) + return; + vd = DPCPU_ID_PTR(cpu, vd); + mtx_lock(&vd->lock); + for (i = 0; i < VDBATCH_SIZE; i++) { + if (vd->tab[i] != vp) + continue; + vp->v_cpudefer = NOCPU; + vd->tab[i] = NULL; + vd->index = i; + break; + } + mtx_unlock(&vd->lock); +} + +static void +vdrop_requeue_batch(struct vdbatch *vd) +{ + int i; + + mtx_assert(&vd->lock, MA_OWNED); + + for (i = 0; i < VDBATCH_SIZE; i++) { + if (vd->tab[i] == NULL) + continue; + TAILQ_REMOVE(&vnode_list, vd->tab[i], v_vnodelist); + TAILQ_INSERT_TAIL(&vnode_list, vd->tab[i], v_vnodelist); + vd->tab[i]->v_cpudefer = NOCPU; + vd->tab[i] = NULL; + } + vd->index = 0; +} + +static void +vdrop_defer_requeue(struct vnode *vp) +{ + struct vdbatch *vd; + int i; + + ASSERT_VI_LOCKED(vp, __func__); + VNASSERT(!VN_IS_DOOMED(vp), vp, + ("%s: deferring requeue of a doomed vnode", __func__)); + + if (vp->v_cpudefer != NOCPU) { + VI_UNLOCK(vp); + return; + } + sched_pin(); + vd = DPCPU_PTR(vd); + mtx_lock(&vd->lock); + for (i = vd->index; i < VDBATCH_SIZE; i++) { + if (vd->tab[i] != NULL) + continue; + vp->v_cpudefer = curcpu; + vd->tab[i] = vp; + vd->index++; + break; + } + if (vp->v_cpudefer == NOCPU) { + panic("%s: failed to fit the vnode", __func__); + } + VI_UNLOCK(vp); + sched_unpin(); + if (vd->index < VDBATCH_SIZE) { + mtx_unlock(&vd->lock); + return; + } + mtx_lock(&vnode_list_mtx); + vdrop_requeue_batch(vd); + mtx_unlock(&vnode_list_mtx); + mtx_unlock(&vd->lock); +} + /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we place it on the free list unless it has been vgone'd @@ -3172,12 +3273,8 @@ mp->mnt_dirtyvnodelistsize--; mtx_unlock(&mp->mnt_listmtx); } - mtx_lock(&vnode_list_mtx); - TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); - TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); - mtx_unlock(&vnode_list_mtx); atomic_add_long(&freevnodes, 1); - VI_UNLOCK(vp); + vdrop_defer_requeue(vp); } void Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -171,7 +171,8 @@ u_int v_usecount; /* I ref count of users */ u_int v_iflag; /* i vnode flags (see below) */ u_int v_vflag; /* v vnode flags */ - u_int v_mflag; /* l mnt-specific vnode flags */ + u_short v_mflag; /* l mnt-specific vnode flags */ + short v_cpudefer; int v_writecount; /* I ref count of writers or (negative) text users */ u_int v_hash;