Differential D22998 Diff 66670 head/sys/kern/vfs_subr.c

Changeset View

Standalone View

head/sys/kern/vfs_subr.c

Show First 20 Lines • Show All 289 Lines • ▼ Show 20 Lines
static int metadelay = 28; /* time to delay syncing metadata */		static int metadelay = 28; /* time to delay syncing metadata */
SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0,		SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0,
"Time to delay syncing metadata (in seconds)");		"Time to delay syncing metadata (in seconds)");
static int rushjob; /* number of slots to run ASAP */		static int rushjob; /* number of slots to run ASAP */
static int stat_rush_requests; /* number of times I/O speeded up */		static int stat_rush_requests; /* number of times I/O speeded up */
SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,		SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
"Number of times I/O speeded up (rush requests)");		"Number of times I/O speeded up (rush requests)");

		#define VDBATCH_SIZE 8
		struct vdbatch {
		u_int index;
		struct mtx lock;
		struct vnode *tab[VDBATCH_SIZE];
		};
		DPCPU_DEFINE_STATIC(struct vdbatch, vd);

		static void vdbatch_dequeue(struct vnode *vp);

/*		/*
* When shutting down the syncer, run it at four times normal speed.		* When shutting down the syncer, run it at four times normal speed.
*/		*/
#define SYNCER_SHUTDOWN_SPEEDUP 4		#define SYNCER_SHUTDOWN_SPEEDUP 4
static int sync_vnode_count;		static int sync_vnode_count;
static int syncer_worklist_len;		static int syncer_worklist_len;
static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }		static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }
syncer_state;		syncer_state;
▲ Show 20 Lines • Show All 241 Lines • ▼ Show 20 Lines	vnode_init(void *mem, int size, int flags)
*/		*/
LIST_INIT(&vp->v_cache_src);		LIST_INIT(&vp->v_cache_src);
TAILQ_INIT(&vp->v_cache_dst);		TAILQ_INIT(&vp->v_cache_dst);
/*		/*
* Initialize rangelocks.		* Initialize rangelocks.
*/		*/
rangelock_init(&vp->v_rl);		rangelock_init(&vp->v_rl);

		vp->v_dbatchcpu = NOCPU;

mtx_lock(&vnode_list_mtx);		mtx_lock(&vnode_list_mtx);
TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist);		TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist);
mtx_unlock(&vnode_list_mtx);		mtx_unlock(&vnode_list_mtx);
return (0);		return (0);
}		}

/*		/*
* Free a vnode when it is cleared from the zone.		* Free a vnode when it is cleared from the zone.
*/		*/
static void		static void
vnode_fini(void *mem, int size)		vnode_fini(void *mem, int size)
{		{
struct vnode *vp;		struct vnode *vp;
struct bufobj *bo;		struct bufobj *bo;

vp = mem;		vp = mem;
		vdbatch_dequeue(vp);
mtx_lock(&vnode_list_mtx);		mtx_lock(&vnode_list_mtx);
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);		TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
mtx_unlock(&vnode_list_mtx);		mtx_unlock(&vnode_list_mtx);
rangelock_destroy(&vp->v_rl);		rangelock_destroy(&vp->v_rl);
lockdestroy(vp->v_vnlock);		lockdestroy(vp->v_vnlock);
mtx_destroy(&vp->v_interlock);		mtx_destroy(&vp->v_interlock);
bo = &vp->v_bufobj;		bo = &vp->v_bufobj;
rw_destroy(BO_LOCKPTR(bo));		rw_destroy(BO_LOCKPTR(bo));
Show All 18 Lines
#else		#else
#define NFS_NCLNODE_SZ (360 + 32)		#define NFS_NCLNODE_SZ (360 + 32)
#define NC_SZ 92		#define NC_SZ 92
#endif		#endif

static void		static void
vntblinit(void *dummy __unused)		vntblinit(void *dummy __unused)
{		{
		struct vdbatch *vd;
		int cpu, physvnodes, virtvnodes;
u_int i;		u_int i;
int physvnodes, virtvnodes;

/*		/*
* Desiredvnodes is a function of the physical memory size and the		* Desiredvnodes is a function of the physical memory size and the
* kernel's heap size. Generally speaking, it scales with the		* kernel's heap size. Generally speaking, it scales with the
* physical memory size. The ratio of desiredvnodes to the physical		* physical memory size. The ratio of desiredvnodes to the physical
* memory size is 1:16 until desiredvnodes exceeds 98,304.		* memory size is 1:16 until desiredvnodes exceeds 98,304.
* Thereafter, the		* Thereafter, the
* marginal ratio of desiredvnodes to the physical memory size is		* marginal ratio of desiredvnodes to the physical memory size is
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	vntblinit(void *dummy __unused)
syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,		syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE,
&syncer_mask);		&syncer_mask);
syncer_maxdelay = syncer_mask + 1;		syncer_maxdelay = syncer_mask + 1;
mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);		mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF);
cv_init(&sync_wakeup, "syncer");		cv_init(&sync_wakeup, "syncer");
for (i = 1; i <= sizeof(struct vnode); i <<= 1)		for (i = 1; i <= sizeof(struct vnode); i <<= 1)
vnsz2log++;		vnsz2log++;
vnsz2log--;		vnsz2log--;

		CPU_FOREACH(cpu) {
		vd = DPCPU_ID_PTR((cpu), vd);
		bzero(vd, sizeof(*vd));
		mtx_init(&vd->lock, "vdbatch", NULL, MTX_DEF);
}		}
		}
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);		SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL);


/*		/*
* Mark a mount point as busy. Used to synchronize access and to delay		* Mark a mount point as busy. Used to synchronize access and to delay
* unmounting. Eventually, mountlist_mtx is not released on failure.		* unmounting. Eventually, mountlist_mtx is not released on failure.
*		*
* vfs_busy() is a custom lock, it can block the caller.		* vfs_busy() is a custom lock, it can block the caller.
▲ Show 20 Lines • Show All 2,513 Lines • ▼ Show 20 Lines
#ifdef INVARIANTS		#ifdef INVARIANTS
int old = atomic_fetchadd_int(&vp->v_holdcnt, 1);		int old = atomic_fetchadd_int(&vp->v_holdcnt, 1);
VNASSERT(old > 0, vp, ("%s: wrong hold count %d", __func__, old));		VNASSERT(old > 0, vp, ("%s: wrong hold count %d", __func__, old));
#else		#else
atomic_add_int(&vp->v_holdcnt, 1);		atomic_add_int(&vp->v_holdcnt, 1);
#endif		#endif
}		}

		static void __noinline
		vdbatch_process(struct vdbatch *vd)
		{
		struct vnode *vp;
		int i;

		mtx_assert(&vd->lock, MA_OWNED);
		MPASS(vd->index == VDBATCH_SIZE);

		mtx_lock(&vnode_list_mtx);
		for (i = 0; i < VDBATCH_SIZE; i++) {
		vp = vd->tab[i];
		TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
		TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
		MPASS(vp->v_dbatchcpu != NOCPU);
		vp->v_dbatchcpu = NOCPU;
		}
		bzero(vd->tab, sizeof(vd->tab));
		vd->index = 0;
		mtx_unlock(&vnode_list_mtx);
		}

		static void
		vdbatch_enqueue(struct vnode *vp)
		{
		struct vdbatch *vd;

		ASSERT_VI_LOCKED(vp, __func__);
		VNASSERT(!VN_IS_DOOMED(vp), vp,
		("%s: deferring requeue of a doomed vnode", __func__));

		if (vp->v_dbatchcpu != NOCPU) {
		VI_UNLOCK(vp);
		return;
		}

/*		/*
		* A hack: pin us to the current CPU so that we know what to put in
		* ->v_dbatchcpu.
		*/
		sched_pin();
		vd = DPCPU_PTR(vd);
		mtx_lock(&vd->lock);
		MPASS(vd->index < VDBATCH_SIZE);
		MPASS(vd->tab[vd->index] == NULL);
		vp->v_dbatchcpu = curcpu;
		vd->tab[vd->index] = vp;
		vd->index++;
		VI_UNLOCK(vp);
		if (vd->index == VDBATCH_SIZE)
		vdbatch_process(vd);
		mtx_unlock(&vd->lock);
		sched_unpin();
		}

		/*
		* This routine must only be called for vnodes which are about to be
		* deallocated. Supporting dequeue for arbitrary vndoes would require
		* validating that the locked batch matches.
		*/
		static void
		vdbatch_dequeue(struct vnode *vp)
		{
		struct vdbatch *vd;
		int i;
		short cpu;

		VNASSERT(vp->v_type == VBAD \|\| vp->v_type == VNON, vp,
		("%s: called for a used vnode\n", __func__));

		cpu = vp->v_dbatchcpu;
		if (cpu == NOCPU)
		return;

		vd = DPCPU_ID_PTR(cpu, vd);
		mtx_lock(&vd->lock);
		for (i = 0; i < vd->index; i++) {
		if (vd->tab[i] != vp)
		continue;
		vp->v_dbatchcpu = NOCPU;
		vd->index--;
		vd->tab[i] = vd->tab[vd->index];
		vd->tab[vd->index] = NULL;
		break;
		}
		mtx_unlock(&vd->lock);
		/*
		* Either we dequeued the vnode above or the target CPU beat us to it.
		*/
		MPASS(vp->v_dbatchcpu == NOCPU);
		}

		/*
* Drop the hold count of the vnode. If this is the last reference to		* Drop the hold count of the vnode. If this is the last reference to
* the vnode we place it on the free list unless it has been vgone'd		* the vnode we place it on the free list unless it has been vgone'd
* (marked VIRF_DOOMED) in which case we will free it.		* (marked VIRF_DOOMED) in which case we will free it.
*		*
* Because the vnode vm object keeps a hold reference on the vnode if		* Because the vnode vm object keeps a hold reference on the vnode if
* there is at least one resident non-cached page, the vnode cannot		* there is at least one resident non-cached page, the vnode cannot
* leave the active list without the page cleanup done.		* leave the active list without the page cleanup done.
*/		*/
Show All 20 Lines	vdrop_deactivate(struct vnode *vp)
if (vp->v_mflag & VMP_LAZYLIST) {		if (vp->v_mflag & VMP_LAZYLIST) {
mp = vp->v_mount;		mp = vp->v_mount;
mtx_lock(&mp->mnt_listmtx);		mtx_lock(&mp->mnt_listmtx);
vp->v_mflag &= ~VMP_LAZYLIST;		vp->v_mflag &= ~VMP_LAZYLIST;
TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist);		TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist);
mp->mnt_lazyvnodelistsize--;		mp->mnt_lazyvnodelistsize--;
mtx_unlock(&mp->mnt_listmtx);		mtx_unlock(&mp->mnt_listmtx);
}		}
mtx_lock(&vnode_list_mtx);
TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
mtx_unlock(&vnode_list_mtx);
atomic_add_long(&freevnodes, 1);		atomic_add_long(&freevnodes, 1);
VI_UNLOCK(vp);		vdbatch_enqueue(vp);
}		}

void		void
vdrop(struct vnode *vp)		vdrop(struct vnode *vp)
{		{

ASSERT_VI_UNLOCKED(vp, __func__);		ASSERT_VI_UNLOCKED(vp, __func__);
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);		CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
▲ Show 20 Lines • Show All 2,924 Lines • Show Last 20 Lines