Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F158005335
D14705.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
27 KB
Referenced Files
None
Subscribers
None
D14705.diff
View Options
Index: head/sys/kern/vfs_bio.c
===================================================================
--- head/sys/kern/vfs_bio.c
+++ head/sys/kern/vfs_bio.c
@@ -50,6 +50,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bio.h>
+#include <sys/bitset.h>
#include <sys/conf.h>
#include <sys/counter.h>
#include <sys/buf.h>
@@ -100,6 +101,52 @@
.bop_bdflush = bufbdflush,
};
+struct bufqueue {
+ struct mtx_padalign bq_lock;
+ TAILQ_HEAD(, buf) bq_queue;
+ uint8_t bq_index;
+ uint16_t bq_subqueue;
+ int bq_len;
+} __aligned(CACHE_LINE_SIZE);
+
+#define BQ_LOCKPTR(bq) (&(bq)->bq_lock)
+#define BQ_LOCK(bq) mtx_lock(BQ_LOCKPTR((bq)))
+#define BQ_UNLOCK(bq) mtx_unlock(BQ_LOCKPTR((bq)))
+#define BQ_ASSERT_LOCKED(bq) mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED)
+
+struct bufdomain {
+ struct bufqueue bd_subq[MAXCPU + 1]; /* Per-cpu sub queues + global */
+ struct bufqueue bd_dirtyq;
+ struct bufqueue *bd_cleanq;
+ struct mtx_padalign bd_run_lock;
+ /* Constants */
+ long bd_maxbufspace;
+ long bd_hibufspace;
+ long bd_lobufspace;
+ long bd_bufspacethresh;
+ int bd_hifreebuffers;
+ int bd_lofreebuffers;
+ int bd_hidirtybuffers;
+ int bd_lodirtybuffers;
+ int bd_dirtybufthresh;
+ int bd_lim;
+ /* atomics */
+ int bd_wanted;
+ int __aligned(CACHE_LINE_SIZE) bd_numdirtybuffers;
+ int __aligned(CACHE_LINE_SIZE) bd_running;
+ long __aligned(CACHE_LINE_SIZE) bd_bufspace;
+ int __aligned(CACHE_LINE_SIZE) bd_freebuffers;
+} __aligned(CACHE_LINE_SIZE);
+
+#define BD_LOCKPTR(bd) (&(bd)->bd_cleanq->bq_lock)
+#define BD_LOCK(bd) mtx_lock(BD_LOCKPTR((bd)))
+#define BD_UNLOCK(bd) mtx_unlock(BD_LOCKPTR((bd)))
+#define BD_ASSERT_LOCKED(bd) mtx_assert(BD_LOCKPTR((bd)), MA_OWNED)
+#define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock)
+#define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd)))
+#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd)))
+#define BD_DOMAIN(bd) (bd - bdomain)
+
static struct buf *buf; /* buffer header pool */
extern struct buf *swbuf; /* Swap buffer header pool. */
caddr_t unmapped_buf;
@@ -123,8 +170,8 @@
daddr_t lblkno, daddr_t blkno);
static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int,
void (*)(struct buf *));
-static int buf_flush(struct vnode *vp, int);
-static int flushbufqueues(struct vnode *, int, int);
+static int buf_flush(struct vnode *vp, struct bufdomain *, int);
+static int flushbufqueues(struct vnode *, struct bufdomain *, int, int);
static void buf_daemon(void);
static __inline void bd_wakeup(void);
static int sysctl_runningspace(SYSCTL_HANDLER_ARGS);
@@ -133,6 +180,16 @@
static int buf_import(void *, void **, int, int, int);
static void buf_release(void *, void **, int);
static void maxbcachebuf_adjust(void);
+static inline struct bufdomain *bufdomain(struct buf *);
+static void bq_remove(struct bufqueue *bq, struct buf *bp);
+static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock);
+static int buf_recycle(struct bufdomain *, bool kva);
+static void bq_init(struct bufqueue *bq, int qindex, int cpu,
+ const char *lockname);
+static void bd_init(struct bufdomain *bd);
+static int bd_flushall(struct bufdomain *bd);
+static int sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS);
+static int sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS);
static int sysctl_bufspace(SYSCTL_HANDLER_ARGS);
int vmiodirenable = TRUE;
@@ -147,7 +204,9 @@
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace,
"Kernel virtual memory used for buffers");
static long maxbufspace;
-SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW, &maxbufspace, 0,
+SYSCTL_PROC(_vfs, OID_AUTO, maxbufspace,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &maxbufspace,
+ __offsetof(struct bufdomain, bd_maxbufspace), sysctl_bufdomain_long, "L",
"Maximum allowed value of bufspace (including metadata)");
static long bufmallocspace;
SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0,
@@ -156,14 +215,20 @@
SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace,
0, "Maximum amount of malloced memory for buffers");
static long lobufspace;
-SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RW, &lobufspace, 0,
+SYSCTL_PROC(_vfs, OID_AUTO, lobufspace,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lobufspace,
+ __offsetof(struct bufdomain, bd_lobufspace), sysctl_bufdomain_long, "L",
"Minimum amount of buffers we want to have");
long hibufspace;
-SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RW, &hibufspace, 0,
+SYSCTL_PROC(_vfs, OID_AUTO, hibufspace,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hibufspace,
+ __offsetof(struct bufdomain, bd_hibufspace), sysctl_bufdomain_long, "L",
"Maximum allowed value of bufspace (excluding metadata)");
long bufspacethresh;
-SYSCTL_LONG(_vfs, OID_AUTO, bufspacethresh, CTLFLAG_RW, &bufspacethresh,
- 0, "Bufspace consumed before waking the daemon to free some");
+SYSCTL_PROC(_vfs, OID_AUTO, bufspacethresh,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &bufspacethresh,
+ __offsetof(struct bufdomain, bd_bufspacethresh), sysctl_bufdomain_long, "L",
+ "Bufspace consumed before waking the daemon to free some");
static counter_u64_t buffreekvacnt;
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt,
"Number of times we have freed the KVA space from some buffer");
@@ -190,26 +255,37 @@
static int recursiveflushes;
SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes,
0, "Number of flushes skipped due to being recursive");
-static int numdirtybuffers;
-SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0,
+static int sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vfs, OID_AUTO, numdirtybuffers,
+ CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RD, NULL, 0, sysctl_numdirtybuffers, "I",
"Number of buffers that are dirty (has unwritten changes) at the moment");
static int lodirtybuffers;
-SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0,
+SYSCTL_PROC(_vfs, OID_AUTO, lodirtybuffers,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lodirtybuffers,
+ __offsetof(struct bufdomain, bd_lodirtybuffers), sysctl_bufdomain_int, "L",
"How many buffers we want to have free before bufdaemon can sleep");
static int hidirtybuffers;
-SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0,
+SYSCTL_PROC(_vfs, OID_AUTO, hidirtybuffers,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hidirtybuffers,
+ __offsetof(struct bufdomain, bd_hidirtybuffers), sysctl_bufdomain_int, "L",
"When the number of dirty buffers is considered severe");
int dirtybufthresh;
-SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh,
- 0, "Number of bdwrite to bawrite conversions to clear dirty buffers");
+SYSCTL_PROC(_vfs, OID_AUTO, dirtybufthresh,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &dirtybufthresh,
+ __offsetof(struct bufdomain, bd_dirtybufthresh), sysctl_bufdomain_int, "L",
+ "Number of bdwrite to bawrite conversions to clear dirty buffers");
static int numfreebuffers;
SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0,
"Number of free buffers");
static int lofreebuffers;
-SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0,
+SYSCTL_PROC(_vfs, OID_AUTO, lofreebuffers,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lofreebuffers,
+ __offsetof(struct bufdomain, bd_lofreebuffers), sysctl_bufdomain_int, "L",
"Target number of free buffers");
static int hifreebuffers;
-SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0,
+SYSCTL_PROC(_vfs, OID_AUTO, hifreebuffers,
+ CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hifreebuffers,
+ __offsetof(struct bufdomain, bd_hifreebuffers), sysctl_bufdomain_int, "L",
"Threshold for clean buffer recycling");
static counter_u64_t getnewbufcalls;
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD,
@@ -294,66 +370,19 @@
#define QUEUE_CLEAN 3 /* non-B_DELWRI buffers */
#define QUEUE_SENTINEL 4 /* not an queue index, but mark for sentinel */
-struct bufqueue {
- struct mtx_padalign bq_lock;
- TAILQ_HEAD(, buf) bq_queue;
- uint8_t bq_index;
- uint16_t bq_subqueue;
- int bq_len;
-} __aligned(CACHE_LINE_SIZE);
+/* Maximum number of buffer domains. */
+#define BUF_DOMAINS 8
-#define BQ_LOCKPTR(bq) (&(bq)->bq_lock)
-#define BQ_LOCK(bq) mtx_lock(BQ_LOCKPTR((bq)))
-#define BQ_UNLOCK(bq) mtx_unlock(BQ_LOCKPTR((bq)))
-#define BQ_ASSERT_LOCKED(bq) mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED)
+struct bufdomainset bdlodirty; /* Domains > lodirty */
+struct bufdomainset bdhidirty; /* Domains > hidirty */
-struct bufqueue __exclusive_cache_line bqempty;
-struct bufqueue __exclusive_cache_line bqdirty;
-
-struct bufdomain {
- struct bufqueue bd_subq[MAXCPU + 1]; /* Per-cpu sub queues + global */
- struct bufqueue *bd_cleanq;
- struct mtx_padalign bd_run_lock;
- /* Constants */
- long bd_maxbufspace;
- long bd_hibufspace;
- long bd_lobufspace;
- long bd_bufspacethresh;
- int bd_hifreebuffers;
- int bd_lofreebuffers;
- int bd_lim;
- /* atomics */
- int bd_wanted;
- int __aligned(CACHE_LINE_SIZE) bd_running;
- long __aligned(CACHE_LINE_SIZE) bd_bufspace;
- int __aligned(CACHE_LINE_SIZE) bd_freebuffers;
-} __aligned(CACHE_LINE_SIZE);
-
-#define BD_LOCKPTR(bd) (&(bd)->bd_cleanq->bq_lock)
-#define BD_LOCK(bd) mtx_lock(BD_LOCKPTR((bd)))
-#define BD_UNLOCK(bd) mtx_unlock(BD_LOCKPTR((bd)))
-#define BD_ASSERT_LOCKED(bd) mtx_assert(BD_LOCKPTR((bd)), MA_OWNED)
-#define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock)
-#define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd)))
-#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd)))
-#define BD_DOMAIN(bd) (bd - bdclean)
-
-/* Maximum number of clean buffer domains. */
-#define CLEAN_DOMAINS 8
-
/* Configured number of clean queues. */
-static int __read_mostly clean_domains;
+static int __read_mostly buf_domains;
-struct bufdomain __exclusive_cache_line bdclean[CLEAN_DOMAINS];
+BITSET_DEFINE(bufdomainset, BUF_DOMAINS);
+struct bufdomain __exclusive_cache_line bdomain[BUF_DOMAINS];
+struct bufqueue __exclusive_cache_line bqempty;
-static void bq_remove(struct bufqueue *bq, struct buf *bp);
-static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock);
-static int buf_recycle(struct bufdomain *, bool kva);
-static void bq_init(struct bufqueue *bq, int qindex, int cpu,
- const char *lockname);
-static void bd_init(struct bufdomain *bd);
-static int bd_flushall(struct bufdomain *bd);
-
/*
* per-cpu empty buffer cache.
*/
@@ -393,6 +422,44 @@
return (error);
}
+static int
+sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int value;
+ int i;
+
+ value = *(int *)arg1;
+ error = sysctl_handle_int(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ *(int *)arg1 = value;
+ for (i = 0; i < buf_domains; i++)
+ *(int *)(((uintptr_t)&bdomain[i]) + arg2) =
+ value / buf_domains;
+
+ return (error);
+}
+
+static int
+sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS)
+{
+ long value;
+ int error;
+ int i;
+
+ value = *(long *)arg1;
+ error = sysctl_handle_long(oidp, &value, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ *(long *)arg1 = value;
+ for (i = 0; i < buf_domains; i++)
+ *(long *)(((uintptr_t)&bdomain[i]) + arg2) =
+ value / buf_domains;
+
+ return (error);
+}
+
#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
static int
@@ -403,8 +470,8 @@
int i;
lvalue = 0;
- for (i = 0; i < clean_domains; i++)
- lvalue += bdclean[i].bd_bufspace;
+ for (i = 0; i < buf_domains; i++)
+ lvalue += bdomain[i].bd_bufspace;
if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long))
return (sysctl_handle_long(oidp, &lvalue, 0, req));
if (lvalue > INT_MAX)
@@ -421,12 +488,24 @@
int i;
lvalue = 0;
- for (i = 0; i < clean_domains; i++)
- lvalue += bdclean[i].bd_bufspace;
+ for (i = 0; i < buf_domains; i++)
+ lvalue += bdomain[i].bd_bufspace;
return (sysctl_handle_long(oidp, &lvalue, 0, req));
}
#endif
+static int
+sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS)
+{
+ int value;
+ int i;
+
+ value = 0;
+ for (i = 0; i < buf_domains; i++)
+ value += bdomain[i].bd_numdirtybuffers;
+ return (sysctl_handle_int(oidp, &value, 0, req));
+}
+
/*
* bdirtywakeup:
*
@@ -444,18 +523,59 @@
}
/*
+ * bd_clear:
+ *
+ * Clear a domain from the appropriate bitsets when dirtybuffers
+ * is decremented.
+ */
+static void
+bd_clear(struct bufdomain *bd)
+{
+
+ mtx_lock(&bdirtylock);
+ if (bd->bd_numdirtybuffers <= bd->bd_lodirtybuffers)
+ BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty);
+ if (bd->bd_numdirtybuffers <= bd->bd_hidirtybuffers)
+ BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty);
+ mtx_unlock(&bdirtylock);
+}
+
+/*
+ * bd_set:
+ *
+ * Set a domain in the appropriate bitsets when dirtybuffers
+ * is incremented.
+ */
+static void
+bd_set(struct bufdomain *bd)
+{
+
+ mtx_lock(&bdirtylock);
+ if (bd->bd_numdirtybuffers > bd->bd_lodirtybuffers)
+ BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty);
+ if (bd->bd_numdirtybuffers > bd->bd_hidirtybuffers)
+ BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty);
+ mtx_unlock(&bdirtylock);
+}
+
+/*
* bdirtysub:
*
* Decrement the numdirtybuffers count by one and wakeup any
* threads blocked in bwillwrite().
*/
static void
-bdirtysub(void)
+bdirtysub(struct buf *bp)
{
+ struct bufdomain *bd;
+ int num;
- if (atomic_fetchadd_int(&numdirtybuffers, -1) ==
- (lodirtybuffers + hidirtybuffers) / 2)
+ bd = bufdomain(bp);
+ num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, -1);
+ if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2)
bdirtywakeup();
+ if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers)
+ bd_clear(bd);
}
/*
@@ -465,16 +585,21 @@
* daemon if needed.
*/
static void
-bdirtyadd(void)
+bdirtyadd(struct buf *bp)
{
+ struct bufdomain *bd;
+ int num;
/*
* Only do the wakeup once as we cross the boundary. The
* buf daemon will keep running until the condition clears.
*/
- if (atomic_fetchadd_int(&numdirtybuffers, 1) ==
- (lodirtybuffers + hidirtybuffers) / 2)
+ bd = bufdomain(bp);
+ num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, 1);
+ if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2)
bd_wakeup();
+ if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers)
+ bd_set(bd);
}
/*
@@ -539,11 +664,11 @@
KASSERT((bp->b_flags & B_MALLOC) == 0,
("bufspace_adjust: malloc buf %p", bp));
- bd = &bdclean[bp->b_domain];
+ bd = bufdomain(bp);
diff = bufsize - bp->b_bufsize;
if (diff < 0) {
atomic_subtract_long(&bd->bd_bufspace, -diff);
- } else {
+ } else if (diff > 0) {
space = atomic_fetchadd_long(&bd->bd_bufspace, diff);
/* Wake up the daemon on the transition. */
if (space < bd->bd_bufspacethresh &&
@@ -638,7 +763,7 @@
* recursion.
*/
td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
- fl = buf_flush(vp, flushbufqtarget);
+ fl = buf_flush(vp, bd, flushbufqtarget);
td->td_pflags &= norunbuf;
BD_LOCK(bd);
if (fl != 0)
@@ -700,6 +825,15 @@
if (buf_recycle(bd, false) != 0) {
if (bd_flushall(bd))
continue;
+ /*
+ * Speedup dirty if we've run out of clean
+ * buffers. This is possible in particular
+ * because softdep may held many bufs locked
+ * pending writes to other bufs which are
+ * marked for delayed write, exhausting
+ * clean space until they are written.
+ */
+ bd_speedup();
BD_LOCK(bd);
if (bd->bd_wanted) {
msleep(&bd->bd_wanted, BD_LOCKPTR(bd),
@@ -1025,7 +1159,6 @@
("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf,
MAXBSIZE));
bq_init(&bqempty, QUEUE_EMPTY, -1, "bufq empty lock");
- bq_init(&bqdirty, QUEUE_DIRTY, -1, "bufq dirty lock");
mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF);
mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF);
mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF);
@@ -1093,7 +1226,6 @@
*/
hidirtybuffers = nbuf / 4 + 20;
dirtybufthresh = hidirtybuffers * 9 / 10;
- numdirtybuffers = 0;
/*
* To support extreme low-memory systems, make sure hidirtybuffers
* cannot eat up all available buffer space. This occurs when our
@@ -1128,22 +1260,26 @@
* One queue per-256mb up to the max. More queues gives better
* concurrency but less accurate LRU.
*/
- clean_domains = MIN(howmany(maxbufspace, 256*1024*1024), CLEAN_DOMAINS);
- for (i = 0 ; i < clean_domains; i++) {
+ buf_domains = MIN(howmany(maxbufspace, 256*1024*1024), BUF_DOMAINS);
+ for (i = 0 ; i < buf_domains; i++) {
struct bufdomain *bd;
- bd = &bdclean[i];
+ bd = &bdomain[i];
bd_init(bd);
- bd->bd_freebuffers = nbuf / clean_domains;
- bd->bd_hifreebuffers = hifreebuffers / clean_domains;
- bd->bd_lofreebuffers = lofreebuffers / clean_domains;
+ bd->bd_freebuffers = nbuf / buf_domains;
+ bd->bd_hifreebuffers = hifreebuffers / buf_domains;
+ bd->bd_lofreebuffers = lofreebuffers / buf_domains;
bd->bd_bufspace = 0;
- bd->bd_maxbufspace = maxbufspace / clean_domains;
- bd->bd_hibufspace = hibufspace / clean_domains;
- bd->bd_lobufspace = lobufspace / clean_domains;
- bd->bd_bufspacethresh = bufspacethresh / clean_domains;
+ bd->bd_maxbufspace = maxbufspace / buf_domains;
+ bd->bd_hibufspace = hibufspace / buf_domains;
+ bd->bd_lobufspace = lobufspace / buf_domains;
+ bd->bd_bufspacethresh = bufspacethresh / buf_domains;
+ bd->bd_numdirtybuffers = 0;
+ bd->bd_hidirtybuffers = hidirtybuffers / buf_domains;
+ bd->bd_lodirtybuffers = lodirtybuffers / buf_domains;
+ bd->bd_dirtybufthresh = dirtybufthresh / buf_domains;
/* Don't allow more than 2% of bufs in the per-cpu caches. */
- bd->bd_lim = nbuf / clean_domains / 50 / mp_ncpus;
+ bd->bd_lim = nbuf / buf_domains / 50 / mp_ncpus;
}
getnewbufcalls = counter_u64_alloc(M_WAITOK);
getnewbufrestarts = counter_u64_alloc(M_WAITOK);
@@ -1327,6 +1463,13 @@
(vm_offset_t)(bp->b_offset & PAGE_MASK));
}
+static inline struct bufdomain *
+bufdomain(struct buf *bp)
+{
+
+ return (&bdomain[bp->b_domain]);
+}
+
static struct bufqueue *
bufqueue(struct buf *bp)
{
@@ -1339,9 +1482,9 @@
case QUEUE_EMPTY:
return (&bqempty);
case QUEUE_DIRTY:
- return (&bqdirty);
+ return (&bufdomain(bp)->bd_dirtyq);
case QUEUE_CLEAN:
- return (&bdclean[bp->b_domain].bd_subq[bp->b_subqueue]);
+ return (&bufdomain(bp)->bd_subq[bp->b_subqueue]);
default:
break;
}
@@ -1404,14 +1547,14 @@
bq_remove(bq, bp);
BQ_UNLOCK(bq);
}
+ bd = bufdomain(bp);
if (qindex == QUEUE_CLEAN) {
- bd = &bdclean[bp->b_domain];
if (bd->bd_lim != 0)
bq = &bd->bd_subq[PCPU_GET(cpuid)];
else
bq = bd->bd_cleanq;
} else
- bq = &bqdirty;
+ bq = &bd->bd_dirtyq;
bq_insert(bq, bp, true);
}
@@ -1439,7 +1582,7 @@
if (!LIST_EMPTY(&bp->b_dep))
buf_deallocate(bp);
bufkva_free(bp);
- atomic_add_int(&bdclean[bp->b_domain].bd_freebuffers, 1);
+ atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1);
BUF_UNLOCK(bp);
uma_zfree(buf_zone, bp);
}
@@ -1715,9 +1858,10 @@
int domain;
int i;
- domain = bd - bdclean;
+ domain = bd - bdomain;
bd->bd_cleanq = &bd->bd_subq[mp_maxid + 1];
bq_init(bd->bd_cleanq, QUEUE_CLEAN, mp_maxid + 1, "bufq clean lock");
+ bq_init(&bd->bd_dirtyq, QUEUE_DIRTY, -1, "bufq dirty lock");
for (i = 0; i <= mp_maxid; i++)
bq_init(&bd->bd_subq[i], QUEUE_CLEAN, i,
"bufq clean subqueue lock");
@@ -1809,7 +1953,7 @@
if (bp->b_qindex != QUEUE_NONE)
panic("bq_insert: free buffer %p onto another queue?", bp);
- bd = &bdclean[bp->b_domain];
+ bd = bufdomain(bp);
if (bp->b_flags & B_AGE) {
/* Place this buf directly on the real queue. */
if (bq->bq_index == QUEUE_CLEAN)
@@ -1926,8 +2070,8 @@
done = false;
for (i = 0; i < 5; i++) {
- for (q = 0; q < clean_domains; q++)
- if (buf_recycle(&bdclean[q], true) != 0)
+ for (q = 0; q < buf_domains; q++)
+ if (buf_recycle(&bdomain[q], true) != 0)
done = true;
if (done)
break;
@@ -2319,7 +2463,7 @@
if ((bp->b_flags & B_DELWRI) == 0) {
bp->b_flags |= /* XXX B_DONE | */ B_DELWRI;
reassignbuf(bp);
- bdirtyadd();
+ bdirtyadd(bp);
}
}
@@ -2347,7 +2491,7 @@
if (bp->b_flags & B_DELWRI) {
bp->b_flags &= ~B_DELWRI;
reassignbuf(bp);
- bdirtysub();
+ bdirtysub(bp);
}
/*
* Since it is now being written, we can clear its deferred write flag.
@@ -2419,9 +2563,9 @@
bwillwrite(void)
{
- if (numdirtybuffers >= hidirtybuffers) {
+ if (buf_dirty_count_severe()) {
mtx_lock(&bdirtylock);
- while (numdirtybuffers >= hidirtybuffers) {
+ while (buf_dirty_count_severe()) {
bdirtywait = 1;
msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4),
"flswai", 0);
@@ -2437,7 +2581,7 @@
buf_dirty_count_severe(void)
{
- return(numdirtybuffers >= hidirtybuffers);
+ return (!BIT_EMPTY(BUF_DOMAINS, &bdhidirty));
}
/*
@@ -2523,7 +2667,7 @@
if (!LIST_EMPTY(&bp->b_dep))
buf_deallocate(bp);
if (bp->b_flags & B_DELWRI)
- bdirtysub();
+ bdirtysub(bp);
bp->b_flags &= ~(B_DELWRI | B_CACHE);
if ((bp->b_flags & B_VMIO) == 0) {
allocbuf(bp, 0);
@@ -3138,9 +3282,9 @@
else
metadata = false;
if (vp == NULL)
- bd = &bdclean[0];
+ bd = &bdomain[0];
else
- bd = &bdclean[vp->v_bufobj.bo_domain];
+ bd = &bdomain[vp->v_bufobj.bo_domain];
counter_u64_add(getnewbufcalls, 1);
reserved = false;
@@ -3186,11 +3330,11 @@
SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp);
static int
-buf_flush(struct vnode *vp, int target)
+buf_flush(struct vnode *vp, struct bufdomain *bd, int target)
{
int flushed;
- flushed = flushbufqueues(vp, target, 0);
+ flushed = flushbufqueues(vp, bd, target, 0);
if (flushed == 0) {
/*
* Could not find any buffers without rollback
@@ -3199,7 +3343,7 @@
*/
if (vp != NULL && target > 2)
target /= 2;
- flushbufqueues(vp, target, 1);
+ flushbufqueues(vp, bd, target, 1);
}
return (flushed);
}
@@ -3207,6 +3351,8 @@
static void
buf_daemon()
{
+ struct bufdomain *bd;
+ int speedupreq;
int lodirty;
int i;
@@ -3219,11 +3365,11 @@
/*
* Start the buf clean daemons as children threads.
*/
- for (i = 0 ; i < clean_domains; i++) {
+ for (i = 0 ; i < buf_domains; i++) {
int error;
error = kthread_add((void (*)(void *))bufspace_daemon,
- &bdclean[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i);
+ &bdomain[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i);
if (error)
panic("error %d spawning bufspace daemon", error);
}
@@ -3238,20 +3384,30 @@
mtx_unlock(&bdlock);
kproc_suspend_check(bufdaemonproc);
- lodirty = lodirtybuffers;
- if (bd_speedupreq) {
- lodirty = numdirtybuffers / 2;
- bd_speedupreq = 0;
- }
+
/*
- * Do the flush. Limit the amount of in-transit I/O we
- * allow to build up, otherwise we would completely saturate
- * the I/O system.
+ * Save speedupreq for this pass and reset to capture new
+ * requests.
*/
- while (numdirtybuffers > lodirty) {
- if (buf_flush(NULL, numdirtybuffers - lodirty) == 0)
- break;
- kern_yield(PRI_USER);
+ speedupreq = bd_speedupreq;
+ bd_speedupreq = 0;
+
+ /*
+ * Flush each domain sequentially according to its level and
+ * the speedup request.
+ */
+ for (i = 0; i < buf_domains; i++) {
+ bd = &bdomain[i];
+ if (speedupreq)
+ lodirty = bd->bd_numdirtybuffers / 2;
+ else
+ lodirty = bd->bd_lodirtybuffers;
+ while (bd->bd_numdirtybuffers > lodirty) {
+ if (buf_flush(NULL, bd,
+ bd->bd_numdirtybuffers - lodirty) == 0)
+ break;
+ kern_yield(PRI_USER);
+ }
}
/*
@@ -3265,7 +3421,7 @@
* to avoid endless loops on unlockable buffers.
*/
mtx_lock(&bdlock);
- if (numdirtybuffers <= lodirtybuffers) {
+ if (!BIT_EMPTY(BUF_DOMAINS, &bdlodirty)) {
/*
* We reached our low water mark, reset the
* request and sleep until we are needed again.
@@ -3304,7 +3460,8 @@
0, "Number of buffers flushed with dependecies that require rollbacks");
static int
-flushbufqueues(struct vnode *lvp, int target, int flushdeps)
+flushbufqueues(struct vnode *lvp, struct bufdomain *bd, int target,
+ int flushdeps)
{
struct bufqueue *bq;
struct buf *sentinel;
@@ -3317,7 +3474,7 @@
bool unlock;
flushed = 0;
- bq = &bqdirty;
+ bq = &bd->bd_dirtyq;
bp = NULL;
sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO);
sentinel->b_qindex = QUEUE_SENTINEL;
@@ -3653,7 +3810,7 @@
panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp);
}
counter_u64_add(mappingrestarts, 1);
- bufspace_wait(&bdclean[bp->b_domain], bp->b_vp, gbflags, 0, 0);
+ bufspace_wait(bufdomain(bp), bp->b_vp, gbflags, 0, 0);
}
has_addr:
if (need_mapping) {
@@ -3851,7 +4008,7 @@
*/
if (flags & GB_NOCREAT)
return NULL;
- if (bdclean[bo->bo_domain].bd_freebuffers == 0 &&
+ if (bdomain[bo->bo_domain].bd_freebuffers == 0 &&
TD_IS_IDLETHREAD(curthread))
return NULL;
@@ -3908,7 +4065,7 @@
if (gbincore(bo, blkno)) {
BO_UNLOCK(bo);
bp->b_flags |= B_INVAL;
- bufspace_release(&bdclean[bp->b_domain], maxsize);
+ bufspace_release(bufdomain(bp), maxsize);
brelse(bp);
goto loop;
}
@@ -3943,7 +4100,7 @@
}
allocbuf(bp, size);
- bufspace_release(&bdclean[bp->b_domain], maxsize);
+ bufspace_release(bufdomain(bp), maxsize);
bp->b_flags &= ~B_DONE;
}
CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp);
@@ -3972,7 +4129,7 @@
return (NULL);
}
allocbuf(bp, size);
- bufspace_release(&bdclean[bp->b_domain], maxsize);
+ bufspace_release(bufdomain(bp), maxsize);
bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */
BUF_ASSERT_HELD(bp);
return (bp);
@@ -4841,7 +4998,7 @@
static volatile int bufobj_cleanq;
bo->bo_domain =
- atomic_fetchadd_int(&bufobj_cleanq, 1) % clean_domains;
+ atomic_fetchadd_int(&bufobj_cleanq, 1) % buf_domains;
rw_init(BO_LOCKPTR(bo), "bufobj interlock");
bo->bo_private = private;
TAILQ_INIT(&bo->bo_clean.bv_hd);
@@ -5164,6 +5321,7 @@
}
db_printf("\n");
}
+ BUF_LOCKPRINTINFO(bp);
#if defined(FULL_BUF_TRACKING)
db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt);
@@ -5178,19 +5336,19 @@
db_printf("b_io_tracking: %s\n", bp->b_io_tracking);
#endif
db_printf(" ");
- BUF_LOCKPRINTINFO(bp);
}
DB_SHOW_COMMAND(bufqueues, bufqueues)
{
struct bufdomain *bd;
- int i, j;
+ struct buf *bp;
+ long total;
+ int i, j, cnt;
db_printf("bqempty: %d\n", bqempty.bq_len);
- db_printf("bqdirty: %d\n", bqdirty.bq_len);
- for (i = 0; i < clean_domains; i++) {
- bd = &bdclean[i];
+ for (i = 0; i < buf_domains; i++) {
+ bd = &bdomain[i];
db_printf("Buf domain %d\n", i);
db_printf("\tfreebufs\t%d\n", bd->bd_freebuffers);
db_printf("\tlofreebufs\t%d\n", bd->bd_lofreebuffers);
@@ -5202,13 +5360,43 @@
db_printf("\tlobufspace\t%ld\n", bd->bd_lobufspace);
db_printf("\tbufspacethresh\t%ld\n", bd->bd_bufspacethresh);
db_printf("\n");
- db_printf("\tcleanq count\t%d\n", bd->bd_cleanq->bq_len);
+ db_printf("\tnumdirtybuffers\t%d\n", bd->bd_numdirtybuffers);
+ db_printf("\tlodirtybuffers\t%d\n", bd->bd_lodirtybuffers);
+ db_printf("\thidirtybuffers\t%d\n", bd->bd_hidirtybuffers);
+ db_printf("\tdirtybufthresh\t%d\n", bd->bd_dirtybufthresh);
+ db_printf("\n");
+ total = 0;
+ TAILQ_FOREACH(bp, &bd->bd_cleanq->bq_queue, b_freelist)
+ total += bp->b_bufsize;
+ db_printf("\tcleanq count\t%d (%ld)\n",
+ bd->bd_cleanq->bq_len, total);
+ total = 0;
+ TAILQ_FOREACH(bp, &bd->bd_dirtyq.bq_queue, b_freelist)
+ total += bp->b_bufsize;
+ db_printf("\tdirtyq count\t%d (%ld)\n",
+ bd->bd_dirtyq.bq_len, total);
db_printf("\twakeup\t\t%d\n", bd->bd_wanted);
db_printf("\tlim\t\t%d\n", bd->bd_lim);
db_printf("\tCPU ");
for (j = 0; j <= mp_maxid; j++)
db_printf("%d, ", bd->bd_subq[j].bq_len);
db_printf("\n");
+ cnt = 0;
+ total = 0;
+ for (j = 0; j < nbuf; j++)
+ if (buf[j].b_domain == i && BUF_ISLOCKED(&buf[j])) {
+ cnt++;
+ total += buf[j].b_bufsize;
+ }
+ db_printf("\tLocked buffers: %d space %ld\n", cnt, total);
+ cnt = 0;
+ total = 0;
+ for (j = 0; j < nbuf; j++)
+ if (buf[j].b_domain == i) {
+ cnt++;
+ total += buf[j].b_bufsize;
+ }
+ db_printf("\tTotal buffers: %d space %ld\n", cnt, total);
}
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, May 28, 10:04 AM (19 h, 35 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33583087
Default Alt Text
D14705.diff (27 KB)
Attached To
Mode
D14705: Make dirty queues a per-domain property
Attached
Detach File
Event Timeline
Log In to Comment