Differential D22998 Diff 66299 sys/kern/vfs_subr.c

Changeset View

Standalone View

sys/kern/vfs_subr.c

Show First 20 Lines • Show All 296 Lines • ▼ Show 20 Lines
static int metadelay = 28; /* time to delay syncing metadata */		static int metadelay = 28; /* time to delay syncing metadata */
SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0,		SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0,
"Time to delay syncing metadata (in seconds)");		"Time to delay syncing metadata (in seconds)");
static int rushjob; /* number of slots to run ASAP */		static int rushjob; /* number of slots to run ASAP */
static int stat_rush_requests; /* number of times I/O speeded up */		static int stat_rush_requests; /* number of times I/O speeded up */
SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,		SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
"Number of times I/O speeded up (rush requests)");		"Number of times I/O speeded up (rush requests)");

		#define VDBATCH_SIZE 8
		jeffUnsubmitted Not Done Inline Actions We are starting to repeat this pattern in multiple places. Might be worth making a new api eventually. jeff: We are starting to repeat this pattern in multiple places. Might be worth making a new api…
		struct vdbatch {
		u_int index;
		u_int count;
		struct mtx lock;
		struct vnode *tab[VDBATCH_SIZE];
		};
		DPCPU_DEFINE_STATIC(struct vdbatch, vd);

/*		/*
* When shutting down the syncer, run it at four times normal speed.		* When shutting down the syncer, run it at four times normal speed.
*/		*/
#define SYNCER_SHUTDOWN_SPEEDUP 4		#define SYNCER_SHUTDOWN_SPEEDUP 4
static int sync_vnode_count;		static int sync_vnode_count;
static int syncer_worklist_len;		static int syncer_worklist_len;
static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }		static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }
syncer_state;		syncer_state;

/* Target for maximum number of vnodes. */		/* Target for maximum number of vnodes. */
int desiredvnodes;		int desiredvnodes;
static int gapvnodes; /* gap between wanted and desired */		static int gapvnodes; /* gap between wanted and desired */
static int vhiwat; /* enough extras after expansion */		static int vhiwat; /* enough extras after expansion */
static int vlowat; /* minimal extras before expansion */		static int vlowat; /* minimal extras before expansion */
static int vstir; /* nonzero to stir non-free vnodes */		static int vstir; /* nonzero to stir non-free vnodes */
static volatile int vsmalltrigger = 8; /* pref to keep if > this many pages */		static volatile int vsmalltrigger = 8; /* pref to keep if > this many pages */

		static void vdbatch_dequeue(struct vnode *vp);

static int		static int
sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS)		sysctl_update_desiredvnodes(SYSCTL_HANDLER_ARGS)
{		{
int error, old_desiredvnodes;		int error, old_desiredvnodes;

old_desiredvnodes = desiredvnodes;		old_desiredvnodes = desiredvnodes;
if ((error = sysctl_handle_int(oidp, arg1, arg2, req)) != 0)		if ((error = sysctl_handle_int(oidp, arg1, arg2, req)) != 0)
return (error);		return (error);
▲ Show 20 Lines • Show All 163 Lines • ▼ Show 20 Lines	vnode_init(void *mem, int size, int flags)
*/		*/
LIST_INIT(&vp->v_cache_src);		LIST_INIT(&vp->v_cache_src);
TAILQ_INIT(&vp->v_cache_dst);		TAILQ_INIT(&vp->v_cache_dst);
/*		/*
* Initialize rangelocks.		* Initialize rangelocks.
*/		*/
rangelock_init(&vp->v_rl);		rangelock_init(&vp->v_rl);

		vp->v_dbatchcpu = NOCPU;

mtx_lock(&vnode_list_mtx);		mtx_lock(&vnode_list_mtx);
TAILQ_INSERT_BEFORE(vnode_list_marker, vp, v_vnodelist);		TAILQ_INSERT_BEFORE(vnode_list_marker, vp, v_vnodelist);
mtx_unlock(&vnode_list_mtx);		mtx_unlock(&vnode_list_mtx);
return (0);		return (0);
}		}

/*		/*
* Free a vnode when it is cleared from the zone.		* Free a vnode when it is cleared from the zone.
*/		*/
static void		static void
vnode_fini(void *mem, int size)		vnode_fini(void *mem, int size)
{		{
struct vnode *vp;		struct vnode *vp;
struct bufobj *bo;		struct bufobj *bo;

vp = mem;		vp = mem;
		vdbatch_dequeue(vp);
mtx_lock(&vnode_list_mtx);		mtx_lock(&vnode_list_mtx);
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);		TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
mtx_unlock(&vnode_list_mtx);		mtx_unlock(&vnode_list_mtx);
rangelock_destroy(&vp->v_rl);		rangelock_destroy(&vp->v_rl);
lockdestroy(vp->v_vnlock);		lockdestroy(vp->v_vnlock);
mtx_destroy(&vp->v_interlock);		mtx_destroy(&vp->v_interlock);
bo = &vp->v_bufobj;		bo = &vp->v_bufobj;
rw_destroy(BO_LOCKPTR(bo));		rw_destroy(BO_LOCKPTR(bo));
Show All 18 Lines
#else		#else
#define NFS_NCLNODE_SZ (360 + 32)		#define NFS_NCLNODE_SZ (360 + 32)
#define NC_SZ 92		#define NC_SZ 92
#endif		#endif

static void		static void
vntblinit(void *dummy __unused)		vntblinit(void *dummy __unused)
{		{
		struct vdbatch *vd;
		int cpu, physvnodes, virtvnodes;
u_int i;		u_int i;
int physvnodes, virtvnodes;

/*		/*
* Desiredvnodes is a function of the physical memory size and the		* Desiredvnodes is a function of the physical memory size and the
* kernel's heap size. Generally speaking, it scales with the		* kernel's heap size. Generally speaking, it scales with the
* physical memory size. The ratio of desiredvnodes to the physical		* physical memory size. The ratio of desiredvnodes to the physical
* memory size is 1:16 until desiredvnodes exceeds 98,304.		* memory size is 1:16 until desiredvnodes exceeds 98,304.
* Thereafter, the		* Thereafter, the
* marginal ratio of desiredvnodes to the physical memory size is		* marginal ratio of desiredvnodes to the physical memory size is
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines	vntblinit(void *dummy __unused)
syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,		syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
&syncer_mask);		&syncer_mask);
syncer_maxdelay = syncer_mask + 1;		syncer_maxdelay = syncer_mask + 1;
mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);		mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
cv_init(&sync_wakeup, "syncer");		cv_init(&sync_wakeup, "syncer");
for (i = 1; i <= sizeof(struct vnode); i <<= 1)		for (i = 1; i <= sizeof(struct vnode); i <<= 1)
vnsz2log++;		vnsz2log++;
vnsz2log--;		vnsz2log--;

		CPU_FOREACH(cpu) {
		vd = DPCPU_ID_PTR((cpu), vd);
		bzero(vd, sizeof(*vd));
		mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF);
}		}
		}
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);		SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);


/*		/*
* Mark a mount point as busy. Used to synchronize access and to delay		* Mark a mount point as busy. Used to synchronize access and to delay
* unmounting. Eventually, mountlist_mtx is not released on failure.		* unmounting. Eventually, mountlist_mtx is not released on failure.
*		*
* vfs_busy() is a custom lock, it can block the caller.		* vfs_busy() is a custom lock, it can block the caller.
▲ Show 20 Lines • Show All 2,505 Lines • ▼ Show 20 Lines
#ifdef INVARIANTS		#ifdef INVARIANTS
int old = atomic_fetchadd_int(&vp->v_holdcnt, 1);		int old = atomic_fetchadd_int(&vp->v_holdcnt, 1);
VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__));		VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__));
#else		#else
atomic_add_int(&vp->v_holdcnt, 1);		atomic_add_int(&vp->v_holdcnt, 1);
#endif		#endif
}		}

		static void
		vdbatch_process(struct vdbatch *vd)
		{
		struct vnode *vp;
		int i;

		mtx_assert(&vd->lock, MA_OWNED);

		mtx_lock(&vnode_list_mtx);
		for (i = 0; i < VDBATCH_SIZE; i++) {
		vp = vd->tab[i];
		if (vp == NULL)
		continue;
		TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
		TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
		MPASS(vp->v_dbatchcpu != NOCPU);
		vp->v_dbatchcpu = NOCPU;
		vd->tab[i] = NULL;
		}
		vd->index = 0;
		vd->count = 0;
		mtx_unlock(&vnode_list_mtx);
		}

		static void
		vdbatch_enqueue(struct vnode *vp)
		{
		struct vdbatch *vd;
		int i;

		ASSERT_VI_LOCKED(vp, __func__);
		VNASSERT(!VN_IS_DOOMED(vp), vp,
		("%s: deferring requeue of a doomed vnode", __func__));

		if (vp->v_dbatchcpu != NOCPU) {
		VI_UNLOCK(vp);
		return;
		}
/*		/*
		* A hack: pin us to the current CPU so that we know what to put in
		* ->v_dbatchcpu.
		*/
		sched_pin();
		vd = DPCPU_PTR(vd);
		mtx_lock(&vd->lock);
		KASSERT(vd->index < VDBATCH_SIZE, ("%s: invalid index %u\n",
		__func__, vd->index));
		for (i = vd->index; i < VDBATCH_SIZE; i++) {
		if (vd->tab[i] != NULL)
		continue;
		MPASS(curthread->td_pinned);
		vp->v_dbatchcpu = curcpu;
		vd->tab[i] = vp;
		vd->index = i + 1;
		vd->count++;
		break;
		}
		KASSERT(vp->v_dbatchcpu != NOCPU,
		("%s: failed to fit the vnode (index %u, count %u)\n",
		__func__, vd->index, vd->count));
		VI_UNLOCK(vp);
		sched_unpin();
		KASSERT(vd->count <= VDBATCH_SIZE, ("%s: invalid batch count %u\n",
		jeffUnsubmitted Not Done Inline Actions I would leave this pinned until vd is unlocked. It probably won't matter but it will potentially reduce contention. jeff: I would leave this pinned until vd is unlocked. It probably won't matter but it will…
		__func__, vd->count));
		if (vd->count == VDBATCH_SIZE)
		vdbatch_process(vd);
		mtx_unlock(&vd->lock);
		}

		static void
		vdbatch_dequeue(struct vnode *vp)
		{
		struct vdbatch *vd;
		int i;
		short cpu;

		cpu = atomic_load_short(&vp->v_dbatchcpu);
		if (cpu == NOCPU)
		return;
		vd = DPCPU_ID_PTR(cpu, vd);
		mtx_lock(&vd->lock);
		for (i = 0; i < VDBATCH_SIZE; i++) {
		if (vd->tab[i] != vp)
		continue;
		vp->v_dbatchcpu = NOCPU;
		vd->tab[i] = NULL;
		if (i < vd->index)
		vd->index = i;
		vd->count--;
		break;
		}
		mtx_unlock(&vd->lock);
		}

		/*
* Drop the hold count of the vnode. If this is the last reference to		* Drop the hold count of the vnode. If this is the last reference to
* the vnode we place it on the free list unless it has been vgone'd		* the vnode we place it on the free list unless it has been vgone'd
* (marked VIRF_DOOMED) in which case we will free it.		* (marked VIRF_DOOMED) in which case we will free it.
*		*
* Because the vnode vm object keeps a hold reference on the vnode if		* Because the vnode vm object keeps a hold reference on the vnode if
* there is at least one resident non-cached page, the vnode cannot		* there is at least one resident non-cached page, the vnode cannot
* leave the active list without the page cleanup done.		* leave the active list without the page cleanup done.
*/		*/
Show All 20 Lines	vdrop_deactivate(struct vnode *vp)
if (vp->v_mflag & VMP_DIRTYLIST) {		if (vp->v_mflag & VMP_DIRTYLIST) {
mp = vp->v_mount;		mp = vp->v_mount;
mtx_lock(&mp->mnt_listmtx);		mtx_lock(&mp->mnt_listmtx);
vp->v_mflag &= ~VMP_DIRTYLIST;		vp->v_mflag &= ~VMP_DIRTYLIST;
TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist);		TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist);
mp->mnt_dirtyvnodelistsize--;		mp->mnt_dirtyvnodelistsize--;
mtx_unlock(&mp->mnt_listmtx);		mtx_unlock(&mp->mnt_listmtx);
}		}
mtx_lock(&vnode_list_mtx);
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
mtx_unlock(&vnode_list_mtx);
atomic_add_long(&freevnodes, 1);		atomic_add_long(&freevnodes, 1);
VI_UNLOCK(vp);		vdbatch_enqueue(vp);
}		}

void		void
vdrop(struct vnode *vp)		vdrop(struct vnode *vp)
{		{

ASSERT_VI_UNLOCKED(vp, __func__);		ASSERT_VI_UNLOCKED(vp, __func__);
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);		CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
▲ Show 20 Lines • Show All 2,936 Lines • Show Last 20 Lines