Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F144363612
D44178.id54425.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D44178.id54425.diff
View Options
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -63,12 +63,43 @@
#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
#define NULLTOV(xp) ((xp)->null_vnode)
+struct cv;
+struct mtx;
+struct thread;
+#if __FreeBSD_version >= 1300139
+struct vnode;
+struct sx;
+#endif
+
+/*
+ * The recycle request types.
+ */
+enum {
+ NULL_RECYCLE_REQ_NONE,
+ NULL_RECYCLE_REQ_LOW_PAGES,
+ NULL_RECYCLE_REQ_LOW_KMEM,
+};
+
+extern uint64_t null_node_num;
+extern uint64_t null_node_inuse_num;
+extern int null_recycle_lowpages;
+extern int null_recycle_lowkmem;
+#if __FreeBSD_version >= 1300139
+extern struct vnode *null_recycle_marker;
+extern struct sx null_recycle_sx;
+#endif
+extern struct mtx null_recycle_lock;
+extern struct cv null_recycle_cv;
+extern int null_recycle_request;
+extern struct thread *null_recycle_td;
+
int nullfs_init(struct vfsconf *vfsp);
int nullfs_uninit(struct vfsconf *vfsp);
int null_nodeget(struct mount *mp, struct vnode *target, struct vnode **vpp);
struct vnode *null_hashget(struct mount *mp, struct vnode *lowervp);
void null_hashrem(struct null_node *xp);
int null_bypass(struct vop_generic_args *ap);
+void null_recycle_thread(void);
#ifdef DIAGNOSTIC
struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -36,14 +36,26 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/condvar.h>
+#include <sys/counter.h>
+#include <sys/eventhandler.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/malloc.h>
#include <sys/mount.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
#include <sys/vnode.h>
+#include <machine/atomic.h>
+
+#include <vm/vm.h>
+#include <vm/vm_pageout.h>
+
#include <fs/nullfs/null.h>
/*
@@ -59,14 +71,77 @@
static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
static struct rwlock null_hash_lock;
static u_long null_hash_mask;
+uint64_t null_node_num;
+uint64_t null_node_inuse_num;
+int null_recycle_lowpages = 20;
+int null_recycle_lowkmem = 80;
+counter_u64_t null_recycle_calls;
+
+#if __FreeBSD_version >= 1300139
+struct vnode *null_recycle_marker;
+struct sx null_recycle_sx;
+#endif
+struct mtx null_recycle_lock;
+struct cv null_recycle_cv;
+int null_recycle_request = NULL_RECYCLE_REQ_NONE;
+static eventhandler_tag null_event_lowmem = NULL;
+struct thread *null_recycle_td;
static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
static struct vnode * null_hashins(struct mount *, struct null_node *);
+static void null_lowmem(void *, int);
+
+static struct kthread_desc null_recycle_ktd = {
+ .arg0 = "nullfs recycle",
+ .func = null_recycle_thread,
+ .global_threadpp = &null_recycle_td,
+};
+SYSINIT(nullfs_recycle, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kthread_start,
+ &null_recycle_ktd);
+
+static int
+null_recycle_sysctl_handle_percent(SYSCTL_HANDLER_ARGS)
+{
+ int val, err;
+
+ val = atomic_load_int((int *)arg1);
+ err = sysctl_handle_int(oidp, &val, 0, req);
+ if (err != 0 || req->newptr == NULL)
+ return (err);
+
+ if ((val < 0) || (val > 100))
+ return (EINVAL);
+
+ atomic_store_int((int *)arg1, val);
+
+ return (0);
+}
+
+SYSCTL_DECL(_vfs);
+
+SYSCTL_NODE(_vfs, OID_AUTO, nullfs, CTLFLAG_RW, 0, "nullfs");
+SYSCTL_UQUAD(_vfs_nullfs, OID_AUTO, nodes, CTLFLAG_RD,
+ &null_node_num, 0, "number of nodes");
+SYSCTL_UQUAD(_vfs_nullfs, OID_AUTO, inuse, CTLFLAG_RD,
+ &null_node_inuse_num, 0, "number of nodes in use");
+
+SYSCTL_NODE(_vfs_nullfs, OID_AUTO, recycle, CTLFLAG_RW, 0, "nullfs recycle");
+SYSCTL_PROC(_vfs_nullfs_recycle, OID_AUTO, lowpages,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ &null_recycle_lowpages, 0, null_recycle_sysctl_handle_percent, "I",
+ "node ratio to recycle upon low pages, in percent");
+SYSCTL_PROC(_vfs_nullfs_recycle, OID_AUTO, lowkmem,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ &null_recycle_lowkmem, 0, null_recycle_sysctl_handle_percent, "I",
+ "node ratio to recycle upon low kernel memory, in percent");
+SYSCTL_COUNTER_U64(_vfs_nullfs_recycle, OID_AUTO, calls,
+ CTLFLAG_RD, &null_recycle_calls,
+ "nullfs recycle calls");
/*
- * Initialise cache headers
+ * Initialise cache headers and nullfs recycle
*/
int
nullfs_init(vfsp)
@@ -76,6 +151,15 @@
null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
&null_hash_mask);
rw_init(&null_hash_lock, "nullhs");
+ null_recycle_calls = counter_u64_alloc(M_WAITOK);
+ null_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, null_lowmem, NULL,
+ EVENTHANDLER_PRI_FIRST);
+#if __FreeBSD_version >= 1300139
+ null_recycle_marker = vnlru_alloc_marker();
+ sx_init(&null_recycle_sx, "nullfs recycle sx");
+#endif
+ mtx_init(&null_recycle_lock, "nullfs recycle lock", NULL, MTX_DEF);
+ cv_init(&null_recycle_cv, "nullfs recycle cv");
return (0);
}
@@ -86,6 +170,16 @@
rw_destroy(&null_hash_lock);
hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
+#if __FreeBSD_version >= 1300139
+ if (null_recycle_marker != NULL)
+ vnlru_free_marker(null_recycle_marker);
+ sx_destroy(&null_recycle_sx);
+#endif
+ cv_destroy(&null_recycle_cv);
+ mtx_destroy(&null_recycle_lock);
+ if (null_event_lowmem != NULL)
+ EVENTHANDLER_DEREGISTER(vm_lowmem, null_event_lowmem);
+ counter_u64_free(null_recycle_calls);
return (0);
}
@@ -162,6 +256,7 @@
}
LIST_INSERT_HEAD(hd, xp, null_hash);
rw_wunlock(&null_hash_lock);
+ atomic_add_rel_64(&null_node_num, 1);
return (NULLVP);
}
@@ -253,6 +348,7 @@
error = insmntque1(vp, mp, null_insmntque_dtr, xp);
if (error != 0)
return (error);
+ atomic_add_rel_64(&null_node_inuse_num, 1);
if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
vp->v_vflag |= VV_ROOT;
@@ -301,6 +397,7 @@
rw_wlock(&null_hash_lock);
LIST_REMOVE(xp, null_hash);
rw_wunlock(&null_hash_lock);
+ atomic_subtract_rel_64(&null_node_num, 1);
}
#ifdef DIAGNOSTIC
@@ -341,3 +438,38 @@
return (a->null_lowervp);
}
#endif
+
+/*
+ * Nullfs(5) adds a use count to the lower vnode, which prevents it from
+ * recycling. This design blocks the vnode recycle triggered by a filesystem,
+ * typically zfs(4). In such the case, commit the pruning on nullfs(5) in the
+ * hope of releasing the lower vnodes.
+ *
+ * Distinguish the degree of the low memory. The low page kernel events are
+ * not abnormal when the working set size of the kernel and user processes
+ * exceed the physical memory. The low kernel memory events, on the other
+ * hand, may lead to the system stall.
+ */
+static void
+null_lowmem(void *arg __unused, int howto)
+{
+ int req;
+
+ switch (howto) {
+ case VM_LOW_KMEM:
+ req = NULL_RECYCLE_REQ_LOW_KMEM;
+ break;
+
+ case VM_LOW_PAGES:
+ default: /* XXX */
+ req = NULL_RECYCLE_REQ_LOW_PAGES;
+ break;
+ }
+
+ mtx_lock(&null_recycle_lock);
+ if (null_recycle_request < req) {
+ null_recycle_request = req;
+ cv_broadcast(&null_recycle_cv);
+ }
+ mtx_unlock(&null_recycle_lock);
+}
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -43,13 +43,19 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/condvar.h>
+#include <sys/eventhandler.h>
#include <sys/fcntl.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mount.h>
+#include <sys/mutex.h>
#include <sys/namei.h>
#include <sys/proc.h>
+#include <sys/sx.h>
#include <sys/vnode.h>
#include <sys/jail.h>
@@ -67,6 +73,8 @@
static vfs_vget_t nullfs_vget;
static vfs_extattrctl_t nullfs_extattrctl;
+static struct vfsops null_vfsops;
+
/*
* Mount null layer
*/
@@ -208,6 +216,7 @@
mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT | MNTK_NOMSYNC;
mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
(MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
+ mp->mnt_fsvninusep = &null_node_inuse_num;
MNT_IUNLOCK(mp);
vfs_getnewfsid(mp);
if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
@@ -436,6 +445,112 @@
vdrop(vp);
}
+void
+null_recycle_thread(void)
+{
+ int hz_rem, recycle_percent;
+ int64_t vn_scan, node_inuse_delta;
+ uint64_t node_num, node_inuse_num;
+ struct timeval tv_now, tv_delta, tv_rem;
+ static struct timeval tv_last;
+ static const struct timeval tv_pause =
+ {.tv_sec = 1, .tv_usec = 0};
+ extern counter_u64_t null_recycle_calls;
+
+ EVENTHANDLER_REGISTER(shutdown_pre_sync, kthread_shutdown, null_recycle_td,
+ SHUTDOWN_PRI_FIRST);
+
+ for (;;) {
+ kthread_suspend_check();
+
+ node_num = atomic_load_acq_64(&null_node_num);
+ node_inuse_num = atomic_load_acq_64(&null_node_inuse_num);
+
+ /*
+ * Work around the in-use counter error that may happen under a heavy
+ * load.
+ *
+ * Fix the in-use counter value only when the counters are stable, ie
+ * their values do not change across multiple reads. Otherwise, defer
+ * the fix to the next chance.
+ */
+ if (__predict_false(node_num < node_inuse_num))
+ node_inuse_delta = node_inuse_num - node_num;
+ else if (__predict_false(((int64_t)node_inuse_num) < 0))
+ node_inuse_delta = (int64_t)node_inuse_num;
+ else
+ node_inuse_delta = 0;
+
+ if (__predict_false(0 != node_inuse_delta)) {
+ if (node_num == atomic_load_64(&null_node_num)) {
+ if (atomic_cmpset_64(&null_node_inuse_num,
+ node_inuse_num,
+ node_inuse_num - node_inuse_delta)) {
+ if (__predict_false(node_num != atomic_load_64(&null_node_num))) {
+ atomic_add_64(&null_node_inuse_num, node_inuse_delta);
+ }
+ }
+ }
+ }
+
+ getmicrotime(&tv_now);
+ tv_delta = tv_now;
+ timevalsub(&tv_delta, &tv_last);
+ if (timevalcmp(&tv_pause, &tv_delta, >=)) {
+ tv_rem = tv_pause;
+ timevalsub(&tv_rem, &tv_delta);
+ } else
+ timevalclear(&tv_rem);
+
+ mtx_lock(&null_recycle_lock);
+
+ if ((NULL_RECYCLE_REQ_NONE == null_recycle_request) || timevalisset(&tv_rem)) {
+ if (NULL_RECYCLE_REQ_NONE == null_recycle_request)
+ hz_rem = hz;
+ else
+ hz_rem = tvtohz(&tv_rem);
+ cv_timedwait(&null_recycle_cv, &null_recycle_lock, hz_rem);
+ mtx_unlock(&null_recycle_lock);
+ continue;
+ }
+
+ mtx_unlock(&null_recycle_lock);
+
+ counter_u64_add(null_recycle_calls, 1);
+
+ vn_scan = node_num - node_inuse_num - node_inuse_delta;
+
+ switch (null_recycle_request) {
+ case NULL_RECYCLE_REQ_LOW_KMEM:
+ recycle_percent = null_recycle_lowkmem;
+ break;
+
+ case NULL_RECYCLE_REQ_LOW_PAGES:
+ default: /* XXX */
+ recycle_percent = null_recycle_lowpages;
+ break;
+ }
+
+ vn_scan *= recycle_percent;
+ vn_scan /= 100;
+
+ if (vn_scan > 0) {
+#if __FreeBSD_version >= 1300139
+ sx_xlock(&null_recycle_sx);
+ vnlru_free_vfsops(vn_scan, &null_vfsops, null_recycle_marker);
+ sx_xunlock(&null_recycle_sx);
+#else
+ vnlru_free(vn_scan, &null_vfsops);
+#endif
+ }
+
+ mtx_lock(&null_recycle_lock);
+ null_recycle_request = NULL_RECYCLE_REQ_NONE;
+ mtx_unlock(&null_recycle_lock);
+ getmicrotime(&tv_last);
+ }
+}
+
static struct vfsops null_vfsops = {
.vfs_extattrctl = nullfs_extattrctl,
.vfs_fhtovp = nullfs_fhtovp,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Feb 9, 1:36 AM (11 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28522046
Default Alt Text
D44178.id54425.diff (11 KB)
Attached To
Mode
D44178: kern/nullfs: Implement the recycling on the nullfs nodes. (stable/13)
Attached
Detach File
Event Timeline
Log In to Comment