Changeset View
Changeset View
Standalone View
Standalone View
head/sys/kern/vfs_bio.c
Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/bio.h> | #include <sys/bio.h> | ||||
#include <sys/bitset.h> | |||||
#include <sys/conf.h> | #include <sys/conf.h> | ||||
#include <sys/counter.h> | #include <sys/counter.h> | ||||
#include <sys/buf.h> | #include <sys/buf.h> | ||||
#include <sys/devicestat.h> | #include <sys/devicestat.h> | ||||
#include <sys/eventhandler.h> | #include <sys/eventhandler.h> | ||||
#include <sys/fail.h> | #include <sys/fail.h> | ||||
#include <sys/limits.h> | #include <sys/limits.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
Show All 34 Lines | |||||
struct buf_ops buf_ops_bio = { | struct buf_ops buf_ops_bio = { | ||||
.bop_name = "buf_ops_bio", | .bop_name = "buf_ops_bio", | ||||
.bop_write = bufwrite, | .bop_write = bufwrite, | ||||
.bop_strategy = bufstrategy, | .bop_strategy = bufstrategy, | ||||
.bop_sync = bufsync, | .bop_sync = bufsync, | ||||
.bop_bdflush = bufbdflush, | .bop_bdflush = bufbdflush, | ||||
}; | }; | ||||
struct bufqueue { | |||||
struct mtx_padalign bq_lock; | |||||
TAILQ_HEAD(, buf) bq_queue; | |||||
uint8_t bq_index; | |||||
uint16_t bq_subqueue; | |||||
int bq_len; | |||||
} __aligned(CACHE_LINE_SIZE); | |||||
#define BQ_LOCKPTR(bq) (&(bq)->bq_lock) | |||||
#define BQ_LOCK(bq) mtx_lock(BQ_LOCKPTR((bq))) | |||||
#define BQ_UNLOCK(bq) mtx_unlock(BQ_LOCKPTR((bq))) | |||||
#define BQ_ASSERT_LOCKED(bq) mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED) | |||||
struct bufdomain { | |||||
struct bufqueue bd_subq[MAXCPU + 1]; /* Per-cpu sub queues + global */ | |||||
struct bufqueue bd_dirtyq; | |||||
struct bufqueue *bd_cleanq; | |||||
struct mtx_padalign bd_run_lock; | |||||
/* Constants */ | |||||
long bd_maxbufspace; | |||||
long bd_hibufspace; | |||||
long bd_lobufspace; | |||||
long bd_bufspacethresh; | |||||
int bd_hifreebuffers; | |||||
int bd_lofreebuffers; | |||||
int bd_hidirtybuffers; | |||||
int bd_lodirtybuffers; | |||||
int bd_dirtybufthresh; | |||||
int bd_lim; | |||||
/* atomics */ | |||||
int bd_wanted; | |||||
int __aligned(CACHE_LINE_SIZE) bd_numdirtybuffers; | |||||
int __aligned(CACHE_LINE_SIZE) bd_running; | |||||
long __aligned(CACHE_LINE_SIZE) bd_bufspace; | |||||
int __aligned(CACHE_LINE_SIZE) bd_freebuffers; | |||||
} __aligned(CACHE_LINE_SIZE); | |||||
#define BD_LOCKPTR(bd) (&(bd)->bd_cleanq->bq_lock) | |||||
#define BD_LOCK(bd) mtx_lock(BD_LOCKPTR((bd))) | |||||
#define BD_UNLOCK(bd) mtx_unlock(BD_LOCKPTR((bd))) | |||||
#define BD_ASSERT_LOCKED(bd) mtx_assert(BD_LOCKPTR((bd)), MA_OWNED) | |||||
#define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock) | |||||
#define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd))) | |||||
#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd))) | |||||
#define BD_DOMAIN(bd) (bd - bdomain) | |||||
static struct buf *buf; /* buffer header pool */ | static struct buf *buf; /* buffer header pool */ | ||||
extern struct buf *swbuf; /* Swap buffer header pool. */ | extern struct buf *swbuf; /* Swap buffer header pool. */ | ||||
caddr_t unmapped_buf; | caddr_t unmapped_buf; | ||||
/* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */ | /* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */ | ||||
struct proc *bufdaemonproc; | struct proc *bufdaemonproc; | ||||
static int inmem(struct vnode *vp, daddr_t blkno); | static int inmem(struct vnode *vp, daddr_t blkno); | ||||
static void vm_hold_free_pages(struct buf *bp, int newbsize); | static void vm_hold_free_pages(struct buf *bp, int newbsize); | ||||
static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, | static void vm_hold_load_pages(struct buf *bp, vm_offset_t from, | ||||
vm_offset_t to); | vm_offset_t to); | ||||
static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m); | static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, vm_page_t m); | ||||
static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, | static void vfs_page_set_validclean(struct buf *bp, vm_ooffset_t off, | ||||
vm_page_t m); | vm_page_t m); | ||||
static void vfs_clean_pages_dirty_buf(struct buf *bp); | static void vfs_clean_pages_dirty_buf(struct buf *bp); | ||||
static void vfs_setdirty_locked_object(struct buf *bp); | static void vfs_setdirty_locked_object(struct buf *bp); | ||||
static void vfs_vmio_invalidate(struct buf *bp); | static void vfs_vmio_invalidate(struct buf *bp); | ||||
static void vfs_vmio_truncate(struct buf *bp, int npages); | static void vfs_vmio_truncate(struct buf *bp, int npages); | ||||
static void vfs_vmio_extend(struct buf *bp, int npages, int size); | static void vfs_vmio_extend(struct buf *bp, int npages, int size); | ||||
static int vfs_bio_clcheck(struct vnode *vp, int size, | static int vfs_bio_clcheck(struct vnode *vp, int size, | ||||
daddr_t lblkno, daddr_t blkno); | daddr_t lblkno, daddr_t blkno); | ||||
static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int, | static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int, | ||||
void (*)(struct buf *)); | void (*)(struct buf *)); | ||||
static int buf_flush(struct vnode *vp, int); | static int buf_flush(struct vnode *vp, struct bufdomain *, int); | ||||
static int flushbufqueues(struct vnode *, int, int); | static int flushbufqueues(struct vnode *, struct bufdomain *, int, int); | ||||
static void buf_daemon(void); | static void buf_daemon(void); | ||||
static __inline void bd_wakeup(void); | static __inline void bd_wakeup(void); | ||||
static int sysctl_runningspace(SYSCTL_HANDLER_ARGS); | static int sysctl_runningspace(SYSCTL_HANDLER_ARGS); | ||||
static void bufkva_reclaim(vmem_t *, int); | static void bufkva_reclaim(vmem_t *, int); | ||||
static void bufkva_free(struct buf *); | static void bufkva_free(struct buf *); | ||||
static int buf_import(void *, void **, int, int, int); | static int buf_import(void *, void **, int, int, int); | ||||
static void buf_release(void *, void **, int); | static void buf_release(void *, void **, int); | ||||
static void maxbcachebuf_adjust(void); | static void maxbcachebuf_adjust(void); | ||||
static inline struct bufdomain *bufdomain(struct buf *); | |||||
static void bq_remove(struct bufqueue *bq, struct buf *bp); | |||||
static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock); | |||||
static int buf_recycle(struct bufdomain *, bool kva); | |||||
static void bq_init(struct bufqueue *bq, int qindex, int cpu, | |||||
const char *lockname); | |||||
static void bd_init(struct bufdomain *bd); | |||||
static int bd_flushall(struct bufdomain *bd); | |||||
static int sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS); | |||||
static int sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS); | |||||
static int sysctl_bufspace(SYSCTL_HANDLER_ARGS); | static int sysctl_bufspace(SYSCTL_HANDLER_ARGS); | ||||
int vmiodirenable = TRUE; | int vmiodirenable = TRUE; | ||||
SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0, | SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW, &vmiodirenable, 0, | ||||
"Use the VM system for directory writes"); | "Use the VM system for directory writes"); | ||||
long runningbufspace; | long runningbufspace; | ||||
SYSCTL_LONG(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0, | SYSCTL_LONG(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD, &runningbufspace, 0, | ||||
"Amount of presently outstanding async buffer io"); | "Amount of presently outstanding async buffer io"); | ||||
SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RD, | SYSCTL_PROC(_vfs, OID_AUTO, bufspace, CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RD, | ||||
NULL, 0, sysctl_bufspace, "L", "Physical memory used for buffers"); | NULL, 0, sysctl_bufspace, "L", "Physical memory used for buffers"); | ||||
static counter_u64_t bufkvaspace; | static counter_u64_t bufkvaspace; | ||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace, | SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace, | ||||
"Kernel virtual memory used for buffers"); | "Kernel virtual memory used for buffers"); | ||||
static long maxbufspace; | static long maxbufspace; | ||||
SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW, &maxbufspace, 0, | SYSCTL_PROC(_vfs, OID_AUTO, maxbufspace, | ||||
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &maxbufspace, | |||||
__offsetof(struct bufdomain, bd_maxbufspace), sysctl_bufdomain_long, "L", | |||||
"Maximum allowed value of bufspace (including metadata)"); | "Maximum allowed value of bufspace (including metadata)"); | ||||
static long bufmallocspace; | static long bufmallocspace; | ||||
SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, | SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, | ||||
"Amount of malloced memory for buffers"); | "Amount of malloced memory for buffers"); | ||||
static long maxbufmallocspace; | static long maxbufmallocspace; | ||||
SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, | SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, | ||||
0, "Maximum amount of malloced memory for buffers"); | 0, "Maximum amount of malloced memory for buffers"); | ||||
static long lobufspace; | static long lobufspace; | ||||
SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RW, &lobufspace, 0, | SYSCTL_PROC(_vfs, OID_AUTO, lobufspace, | ||||
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lobufspace, | |||||
__offsetof(struct bufdomain, bd_lobufspace), sysctl_bufdomain_long, "L", | |||||
"Minimum amount of buffers we want to have"); | "Minimum amount of buffers we want to have"); | ||||
long hibufspace; | long hibufspace; | ||||
SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RW, &hibufspace, 0, | SYSCTL_PROC(_vfs, OID_AUTO, hibufspace, | ||||
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hibufspace, | |||||
__offsetof(struct bufdomain, bd_hibufspace), sysctl_bufdomain_long, "L", | |||||
"Maximum allowed value of bufspace (excluding metadata)"); | "Maximum allowed value of bufspace (excluding metadata)"); | ||||
long bufspacethresh; | long bufspacethresh; | ||||
SYSCTL_LONG(_vfs, OID_AUTO, bufspacethresh, CTLFLAG_RW, &bufspacethresh, | SYSCTL_PROC(_vfs, OID_AUTO, bufspacethresh, | ||||
0, "Bufspace consumed before waking the daemon to free some"); | CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &bufspacethresh, | ||||
__offsetof(struct bufdomain, bd_bufspacethresh), sysctl_bufdomain_long, "L", | |||||
"Bufspace consumed before waking the daemon to free some"); | |||||
static counter_u64_t buffreekvacnt; | static counter_u64_t buffreekvacnt; | ||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, | SYSCTL_COUNTER_U64(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, | ||||
"Number of times we have freed the KVA space from some buffer"); | "Number of times we have freed the KVA space from some buffer"); | ||||
static counter_u64_t bufdefragcnt; | static counter_u64_t bufdefragcnt; | ||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt, | SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW, &bufdefragcnt, | ||||
"Number of times we have had to repeat buffer allocation to defragment"); | "Number of times we have had to repeat buffer allocation to defragment"); | ||||
static long lorunningspace; | static long lorunningspace; | ||||
SYSCTL_PROC(_vfs, OID_AUTO, lorunningspace, CTLTYPE_LONG | CTLFLAG_MPSAFE | | SYSCTL_PROC(_vfs, OID_AUTO, lorunningspace, CTLTYPE_LONG | CTLFLAG_MPSAFE | | ||||
Show All 10 Lines | |||||
SYSCTL_INT(_vfs, OID_AUTO, bdwriteskip, CTLFLAG_RW, &bdwriteskip, | SYSCTL_INT(_vfs, OID_AUTO, bdwriteskip, CTLFLAG_RW, &bdwriteskip, | ||||
0, "Number of buffers supplied to bdwrite with snapshot deadlock risk"); | 0, "Number of buffers supplied to bdwrite with snapshot deadlock risk"); | ||||
int altbufferflushes; | int altbufferflushes; | ||||
SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes, | SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes, | ||||
0, "Number of fsync flushes to limit dirty buffers"); | 0, "Number of fsync flushes to limit dirty buffers"); | ||||
static int recursiveflushes; | static int recursiveflushes; | ||||
SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes, | SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes, | ||||
0, "Number of flushes skipped due to being recursive"); | 0, "Number of flushes skipped due to being recursive"); | ||||
static int numdirtybuffers; | static int sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS); | ||||
SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0, | SYSCTL_PROC(_vfs, OID_AUTO, numdirtybuffers, | ||||
CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RD, NULL, 0, sysctl_numdirtybuffers, "I", | |||||
"Number of buffers that are dirty (has unwritten changes) at the moment"); | "Number of buffers that are dirty (has unwritten changes) at the moment"); | ||||
static int lodirtybuffers; | static int lodirtybuffers; | ||||
SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0, | SYSCTL_PROC(_vfs, OID_AUTO, lodirtybuffers, | ||||
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lodirtybuffers, | |||||
__offsetof(struct bufdomain, bd_lodirtybuffers), sysctl_bufdomain_int, "L", | |||||
"How many buffers we want to have free before bufdaemon can sleep"); | "How many buffers we want to have free before bufdaemon can sleep"); | ||||
static int hidirtybuffers; | static int hidirtybuffers; | ||||
SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0, | SYSCTL_PROC(_vfs, OID_AUTO, hidirtybuffers, | ||||
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hidirtybuffers, | |||||
__offsetof(struct bufdomain, bd_hidirtybuffers), sysctl_bufdomain_int, "L", | |||||
"When the number of dirty buffers is considered severe"); | "When the number of dirty buffers is considered severe"); | ||||
int dirtybufthresh; | int dirtybufthresh; | ||||
SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh, | SYSCTL_PROC(_vfs, OID_AUTO, dirtybufthresh, | ||||
0, "Number of bdwrite to bawrite conversions to clear dirty buffers"); | CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &dirtybufthresh, | ||||
__offsetof(struct bufdomain, bd_dirtybufthresh), sysctl_bufdomain_int, "L", | |||||
"Number of bdwrite to bawrite conversions to clear dirty buffers"); | |||||
static int numfreebuffers; | static int numfreebuffers; | ||||
SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0, | SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0, | ||||
"Number of free buffers"); | "Number of free buffers"); | ||||
static int lofreebuffers; | static int lofreebuffers; | ||||
SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0, | SYSCTL_PROC(_vfs, OID_AUTO, lofreebuffers, | ||||
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &lofreebuffers, | |||||
__offsetof(struct bufdomain, bd_lofreebuffers), sysctl_bufdomain_int, "L", | |||||
"Target number of free buffers"); | "Target number of free buffers"); | ||||
static int hifreebuffers; | static int hifreebuffers; | ||||
SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0, | SYSCTL_PROC(_vfs, OID_AUTO, hifreebuffers, | ||||
CTLTYPE_LONG|CTLFLAG_MPSAFE|CTLFLAG_RW, &hifreebuffers, | |||||
__offsetof(struct bufdomain, bd_hifreebuffers), sysctl_bufdomain_int, "L", | |||||
"Threshold for clean buffer recycling"); | "Threshold for clean buffer recycling"); | ||||
static counter_u64_t getnewbufcalls; | static counter_u64_t getnewbufcalls; | ||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD, | ||||
&getnewbufcalls, "Number of calls to getnewbuf"); | &getnewbufcalls, "Number of calls to getnewbuf"); | ||||
static counter_u64_t getnewbufrestarts; | static counter_u64_t getnewbufrestarts; | ||||
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RD, | ||||
&getnewbufrestarts, | &getnewbufrestarts, | ||||
"Number of times getnewbuf has had to restart a buffer acquisition"); | "Number of times getnewbuf has had to restart a buffer acquisition"); | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | |||||
* Definitions for the buffer free lists. | * Definitions for the buffer free lists. | ||||
*/ | */ | ||||
#define QUEUE_NONE 0 /* on no queue */ | #define QUEUE_NONE 0 /* on no queue */ | ||||
#define QUEUE_EMPTY 1 /* empty buffer headers */ | #define QUEUE_EMPTY 1 /* empty buffer headers */ | ||||
#define QUEUE_DIRTY 2 /* B_DELWRI buffers */ | #define QUEUE_DIRTY 2 /* B_DELWRI buffers */ | ||||
#define QUEUE_CLEAN 3 /* non-B_DELWRI buffers */ | #define QUEUE_CLEAN 3 /* non-B_DELWRI buffers */ | ||||
#define QUEUE_SENTINEL 4 /* not an queue index, but mark for sentinel */ | #define QUEUE_SENTINEL 4 /* not an queue index, but mark for sentinel */ | ||||
struct bufqueue { | /* Maximum number of buffer domains. */ | ||||
struct mtx_padalign bq_lock; | #define BUF_DOMAINS 8 | ||||
TAILQ_HEAD(, buf) bq_queue; | |||||
uint8_t bq_index; | |||||
uint16_t bq_subqueue; | |||||
int bq_len; | |||||
} __aligned(CACHE_LINE_SIZE); | |||||
#define BQ_LOCKPTR(bq) (&(bq)->bq_lock) | struct bufdomainset bdlodirty; /* Domains > lodirty */ | ||||
#define BQ_LOCK(bq) mtx_lock(BQ_LOCKPTR((bq))) | struct bufdomainset bdhidirty; /* Domains > hidirty */ | ||||
#define BQ_UNLOCK(bq) mtx_unlock(BQ_LOCKPTR((bq))) | |||||
#define BQ_ASSERT_LOCKED(bq) mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED) | |||||
struct bufqueue __exclusive_cache_line bqempty; | |||||
struct bufqueue __exclusive_cache_line bqdirty; | |||||
struct bufdomain { | |||||
struct bufqueue bd_subq[MAXCPU + 1]; /* Per-cpu sub queues + global */ | |||||
struct bufqueue *bd_cleanq; | |||||
struct mtx_padalign bd_run_lock; | |||||
/* Constants */ | |||||
long bd_maxbufspace; | |||||
long bd_hibufspace; | |||||
long bd_lobufspace; | |||||
long bd_bufspacethresh; | |||||
int bd_hifreebuffers; | |||||
int bd_lofreebuffers; | |||||
int bd_lim; | |||||
/* atomics */ | |||||
int bd_wanted; | |||||
int __aligned(CACHE_LINE_SIZE) bd_running; | |||||
long __aligned(CACHE_LINE_SIZE) bd_bufspace; | |||||
int __aligned(CACHE_LINE_SIZE) bd_freebuffers; | |||||
} __aligned(CACHE_LINE_SIZE); | |||||
#define BD_LOCKPTR(bd) (&(bd)->bd_cleanq->bq_lock) | |||||
#define BD_LOCK(bd) mtx_lock(BD_LOCKPTR((bd))) | |||||
#define BD_UNLOCK(bd) mtx_unlock(BD_LOCKPTR((bd))) | |||||
#define BD_ASSERT_LOCKED(bd) mtx_assert(BD_LOCKPTR((bd)), MA_OWNED) | |||||
#define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock) | |||||
#define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd))) | |||||
#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd))) | |||||
#define BD_DOMAIN(bd) (bd - bdclean) | |||||
/* Maximum number of clean buffer domains. */ | |||||
#define CLEAN_DOMAINS 8 | |||||
/* Configured number of clean queues. */ | /* Configured number of clean queues. */ | ||||
static int __read_mostly clean_domains; | static int __read_mostly buf_domains; | ||||
struct bufdomain __exclusive_cache_line bdclean[CLEAN_DOMAINS]; | BITSET_DEFINE(bufdomainset, BUF_DOMAINS); | ||||
struct bufdomain __exclusive_cache_line bdomain[BUF_DOMAINS]; | |||||
struct bufqueue __exclusive_cache_line bqempty; | |||||
static void bq_remove(struct bufqueue *bq, struct buf *bp); | |||||
static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock); | |||||
static int buf_recycle(struct bufdomain *, bool kva); | |||||
static void bq_init(struct bufqueue *bq, int qindex, int cpu, | |||||
const char *lockname); | |||||
static void bd_init(struct bufdomain *bd); | |||||
static int bd_flushall(struct bufdomain *bd); | |||||
/* | /* | ||||
* per-cpu empty buffer cache. | * per-cpu empty buffer cache. | ||||
*/ | */ | ||||
uma_zone_t buf_zone; | uma_zone_t buf_zone; | ||||
/* | /* | ||||
* Single global constant for BUF_WMESG, to avoid getting multiple references. | * Single global constant for BUF_WMESG, to avoid getting multiple references. | ||||
* buf_wmesg is referred from macros. | * buf_wmesg is referred from macros. | ||||
Show All 23 Lines | if (value > hirunningspace) | ||||
error = EINVAL; | error = EINVAL; | ||||
else | else | ||||
lorunningspace = value; | lorunningspace = value; | ||||
} | } | ||||
mtx_unlock(&rbreqlock); | mtx_unlock(&rbreqlock); | ||||
return (error); | return (error); | ||||
} | } | ||||
static int | |||||
sysctl_bufdomain_int(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
int error; | |||||
int value; | |||||
int i; | |||||
value = *(int *)arg1; | |||||
error = sysctl_handle_int(oidp, &value, 0, req); | |||||
if (error != 0 || req->newptr == NULL) | |||||
return (error); | |||||
*(int *)arg1 = value; | |||||
for (i = 0; i < buf_domains; i++) | |||||
*(int *)(((uintptr_t)&bdomain[i]) + arg2) = | |||||
value / buf_domains; | |||||
return (error); | |||||
} | |||||
static int | |||||
sysctl_bufdomain_long(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
long value; | |||||
int error; | |||||
int i; | |||||
value = *(long *)arg1; | |||||
error = sysctl_handle_long(oidp, &value, 0, req); | |||||
if (error != 0 || req->newptr == NULL) | |||||
return (error); | |||||
*(long *)arg1 = value; | |||||
for (i = 0; i < buf_domains; i++) | |||||
*(long *)(((uintptr_t)&bdomain[i]) + arg2) = | |||||
value / buf_domains; | |||||
return (error); | |||||
} | |||||
#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ | #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ | ||||
defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) | defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) | ||||
static int | static int | ||||
sysctl_bufspace(SYSCTL_HANDLER_ARGS) | sysctl_bufspace(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
long lvalue; | long lvalue; | ||||
int ivalue; | int ivalue; | ||||
int i; | int i; | ||||
lvalue = 0; | lvalue = 0; | ||||
for (i = 0; i < clean_domains; i++) | for (i = 0; i < buf_domains; i++) | ||||
lvalue += bdclean[i].bd_bufspace; | lvalue += bdomain[i].bd_bufspace; | ||||
if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long)) | if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long)) | ||||
return (sysctl_handle_long(oidp, &lvalue, 0, req)); | return (sysctl_handle_long(oidp, &lvalue, 0, req)); | ||||
if (lvalue > INT_MAX) | if (lvalue > INT_MAX) | ||||
/* On overflow, still write out a long to trigger ENOMEM. */ | /* On overflow, still write out a long to trigger ENOMEM. */ | ||||
return (sysctl_handle_long(oidp, &lvalue, 0, req)); | return (sysctl_handle_long(oidp, &lvalue, 0, req)); | ||||
ivalue = lvalue; | ivalue = lvalue; | ||||
return (sysctl_handle_int(oidp, &ivalue, 0, req)); | return (sysctl_handle_int(oidp, &ivalue, 0, req)); | ||||
} | } | ||||
#else | #else | ||||
static int | static int | ||||
sysctl_bufspace(SYSCTL_HANDLER_ARGS) | sysctl_bufspace(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
long lvalue; | long lvalue; | ||||
int i; | int i; | ||||
lvalue = 0; | lvalue = 0; | ||||
for (i = 0; i < clean_domains; i++) | for (i = 0; i < buf_domains; i++) | ||||
lvalue += bdclean[i].bd_bufspace; | lvalue += bdomain[i].bd_bufspace; | ||||
return (sysctl_handle_long(oidp, &lvalue, 0, req)); | return (sysctl_handle_long(oidp, &lvalue, 0, req)); | ||||
} | } | ||||
#endif | #endif | ||||
static int | |||||
sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
int value; | |||||
int i; | |||||
value = 0; | |||||
for (i = 0; i < buf_domains; i++) | |||||
value += bdomain[i].bd_numdirtybuffers; | |||||
return (sysctl_handle_int(oidp, &value, 0, req)); | |||||
} | |||||
/* | /* | ||||
* bdirtywakeup: | * bdirtywakeup: | ||||
* | * | ||||
* Wakeup any bwillwrite() waiters. | * Wakeup any bwillwrite() waiters. | ||||
*/ | */ | ||||
static void | static void | ||||
bdirtywakeup(void) | bdirtywakeup(void) | ||||
{ | { | ||||
mtx_lock(&bdirtylock); | mtx_lock(&bdirtylock); | ||||
if (bdirtywait) { | if (bdirtywait) { | ||||
bdirtywait = 0; | bdirtywait = 0; | ||||
wakeup(&bdirtywait); | wakeup(&bdirtywait); | ||||
} | } | ||||
mtx_unlock(&bdirtylock); | mtx_unlock(&bdirtylock); | ||||
} | } | ||||
/* | /* | ||||
* bd_clear: | |||||
* | |||||
* Clear a domain from the appropriate bitsets when dirtybuffers | |||||
* is decremented. | |||||
*/ | |||||
static void | |||||
bd_clear(struct bufdomain *bd) | |||||
{ | |||||
mtx_lock(&bdirtylock); | |||||
if (bd->bd_numdirtybuffers <= bd->bd_lodirtybuffers) | |||||
BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty); | |||||
if (bd->bd_numdirtybuffers <= bd->bd_hidirtybuffers) | |||||
BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty); | |||||
mtx_unlock(&bdirtylock); | |||||
} | |||||
/* | |||||
* bd_set: | |||||
* | |||||
* Set a domain in the appropriate bitsets when dirtybuffers | |||||
* is incremented. | |||||
*/ | |||||
static void | |||||
bd_set(struct bufdomain *bd) | |||||
{ | |||||
mtx_lock(&bdirtylock); | |||||
if (bd->bd_numdirtybuffers > bd->bd_lodirtybuffers) | |||||
BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty); | |||||
if (bd->bd_numdirtybuffers > bd->bd_hidirtybuffers) | |||||
BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty); | |||||
mtx_unlock(&bdirtylock); | |||||
} | |||||
/* | |||||
* bdirtysub: | * bdirtysub: | ||||
* | * | ||||
* Decrement the numdirtybuffers count by one and wakeup any | * Decrement the numdirtybuffers count by one and wakeup any | ||||
* threads blocked in bwillwrite(). | * threads blocked in bwillwrite(). | ||||
*/ | */ | ||||
static void | static void | ||||
bdirtysub(void) | bdirtysub(struct buf *bp) | ||||
{ | { | ||||
struct bufdomain *bd; | |||||
int num; | |||||
if (atomic_fetchadd_int(&numdirtybuffers, -1) == | bd = bufdomain(bp); | ||||
(lodirtybuffers + hidirtybuffers) / 2) | num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, -1); | ||||
if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2) | |||||
bdirtywakeup(); | bdirtywakeup(); | ||||
if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers) | |||||
bd_clear(bd); | |||||
} | } | ||||
/* | /* | ||||
* bdirtyadd: | * bdirtyadd: | ||||
* | * | ||||
* Increment the numdirtybuffers count by one and wakeup the buf | * Increment the numdirtybuffers count by one and wakeup the buf | ||||
* daemon if needed. | * daemon if needed. | ||||
*/ | */ | ||||
static void | static void | ||||
bdirtyadd(void) | bdirtyadd(struct buf *bp) | ||||
{ | { | ||||
struct bufdomain *bd; | |||||
int num; | |||||
/* | /* | ||||
* Only do the wakeup once as we cross the boundary. The | * Only do the wakeup once as we cross the boundary. The | ||||
* buf daemon will keep running until the condition clears. | * buf daemon will keep running until the condition clears. | ||||
*/ | */ | ||||
if (atomic_fetchadd_int(&numdirtybuffers, 1) == | bd = bufdomain(bp); | ||||
(lodirtybuffers + hidirtybuffers) / 2) | num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, 1); | ||||
if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2) | |||||
bd_wakeup(); | bd_wakeup(); | ||||
if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers) | |||||
bd_set(bd); | |||||
} | } | ||||
/* | /* | ||||
* bufspace_daemon_wakeup: | * bufspace_daemon_wakeup: | ||||
* | * | ||||
* Wakeup the daemons responsible for freeing clean bufs. | * Wakeup the daemons responsible for freeing clean bufs. | ||||
*/ | */ | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | |||||
bufspace_adjust(struct buf *bp, int bufsize) | bufspace_adjust(struct buf *bp, int bufsize) | ||||
{ | { | ||||
struct bufdomain *bd; | struct bufdomain *bd; | ||||
long space; | long space; | ||||
int diff; | int diff; | ||||
KASSERT((bp->b_flags & B_MALLOC) == 0, | KASSERT((bp->b_flags & B_MALLOC) == 0, | ||||
("bufspace_adjust: malloc buf %p", bp)); | ("bufspace_adjust: malloc buf %p", bp)); | ||||
bd = &bdclean[bp->b_domain]; | bd = bufdomain(bp); | ||||
diff = bufsize - bp->b_bufsize; | diff = bufsize - bp->b_bufsize; | ||||
if (diff < 0) { | if (diff < 0) { | ||||
atomic_subtract_long(&bd->bd_bufspace, -diff); | atomic_subtract_long(&bd->bd_bufspace, -diff); | ||||
} else { | } else if (diff > 0) { | ||||
space = atomic_fetchadd_long(&bd->bd_bufspace, diff); | space = atomic_fetchadd_long(&bd->bd_bufspace, diff); | ||||
/* Wake up the daemon on the transition. */ | /* Wake up the daemon on the transition. */ | ||||
if (space < bd->bd_bufspacethresh && | if (space < bd->bd_bufspacethresh && | ||||
space + diff >= bd->bd_bufspacethresh) | space + diff >= bd->bd_bufspacethresh) | ||||
bufspace_daemon_wakeup(bd); | bufspace_daemon_wakeup(bd); | ||||
} | } | ||||
bp->b_bufsize = bufsize; | bp->b_bufsize = bufsize; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines | if (vp != NULL && vp->v_type != VCHR && | ||||
* Play bufdaemon. The getnewbuf() function | * Play bufdaemon. The getnewbuf() function | ||||
* may be called while the thread owns lock | * may be called while the thread owns lock | ||||
* for another dirty buffer for the same | * for another dirty buffer for the same | ||||
* vnode, which makes it impossible to use | * vnode, which makes it impossible to use | ||||
* VOP_FSYNC() there, due to the buffer lock | * VOP_FSYNC() there, due to the buffer lock | ||||
* recursion. | * recursion. | ||||
*/ | */ | ||||
td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; | td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; | ||||
fl = buf_flush(vp, flushbufqtarget); | fl = buf_flush(vp, bd, flushbufqtarget); | ||||
td->td_pflags &= norunbuf; | td->td_pflags &= norunbuf; | ||||
BD_LOCK(bd); | BD_LOCK(bd); | ||||
if (fl != 0) | if (fl != 0) | ||||
continue; | continue; | ||||
if (bd->bd_wanted == 0) | if (bd->bd_wanted == 0) | ||||
break; | break; | ||||
} | } | ||||
error = msleep(&bd->bd_wanted, BD_LOCKPTR(bd), | error = msleep(&bd->bd_wanted, BD_LOCKPTR(bd), | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | for (;;) { | ||||
* clean buffers. This is the 'bufspace' sleep below | * clean buffers. This is the 'bufspace' sleep below | ||||
* which will inefficiently trade bufs with bqrelse | * which will inefficiently trade bufs with bqrelse | ||||
* until we return to condition 2. | * until we return to condition 2. | ||||
*/ | */ | ||||
do { | do { | ||||
if (buf_recycle(bd, false) != 0) { | if (buf_recycle(bd, false) != 0) { | ||||
if (bd_flushall(bd)) | if (bd_flushall(bd)) | ||||
continue; | continue; | ||||
/* | |||||
* Speedup dirty if we've run out of clean | |||||
* buffers. This is possible in particular | |||||
* because softdep may held many bufs locked | |||||
* pending writes to other bufs which are | |||||
* marked for delayed write, exhausting | |||||
* clean space until they are written. | |||||
*/ | |||||
bd_speedup(); | |||||
BD_LOCK(bd); | BD_LOCK(bd); | ||||
if (bd->bd_wanted) { | if (bd->bd_wanted) { | ||||
msleep(&bd->bd_wanted, BD_LOCKPTR(bd), | msleep(&bd->bd_wanted, BD_LOCKPTR(bd), | ||||
PRIBIO|PDROP, "bufspace", hz/10); | PRIBIO|PDROP, "bufspace", hz/10); | ||||
} else | } else | ||||
BD_UNLOCK(bd); | BD_UNLOCK(bd); | ||||
} | } | ||||
maybe_yield(); | maybe_yield(); | ||||
▲ Show 20 Lines • Show All 309 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
struct buf *bp; | struct buf *bp; | ||||
int i; | int i; | ||||
KASSERT(maxbcachebuf >= MAXBSIZE, | KASSERT(maxbcachebuf >= MAXBSIZE, | ||||
("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf, | ("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf, | ||||
MAXBSIZE)); | MAXBSIZE)); | ||||
bq_init(&bqempty, QUEUE_EMPTY, -1, "bufq empty lock"); | bq_init(&bqempty, QUEUE_EMPTY, -1, "bufq empty lock"); | ||||
bq_init(&bqdirty, QUEUE_DIRTY, -1, "bufq dirty lock"); | |||||
mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF); | mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF); | ||||
mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF); | mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF); | ||||
mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF); | mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF); | ||||
unmapped_buf = (caddr_t)kva_alloc(MAXPHYS); | unmapped_buf = (caddr_t)kva_alloc(MAXPHYS); | ||||
/* finally, initialize each buffer header and stick on empty q */ | /* finally, initialize each buffer header and stick on empty q */ | ||||
for (i = 0; i < nbuf; i++) { | for (i = 0; i < nbuf; i++) { | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | bufinit(void) | ||||
maxbufmallocspace = hibufspace / 20; | maxbufmallocspace = hibufspace / 20; | ||||
/* | /* | ||||
* Reduce the chance of a deadlock occurring by limiting the number | * Reduce the chance of a deadlock occurring by limiting the number | ||||
* of delayed-write dirty buffers we allow to stack up. | * of delayed-write dirty buffers we allow to stack up. | ||||
*/ | */ | ||||
hidirtybuffers = nbuf / 4 + 20; | hidirtybuffers = nbuf / 4 + 20; | ||||
dirtybufthresh = hidirtybuffers * 9 / 10; | dirtybufthresh = hidirtybuffers * 9 / 10; | ||||
numdirtybuffers = 0; | |||||
/* | /* | ||||
* To support extreme low-memory systems, make sure hidirtybuffers | * To support extreme low-memory systems, make sure hidirtybuffers | ||||
* cannot eat up all available buffer space. This occurs when our | * cannot eat up all available buffer space. This occurs when our | ||||
* minimum cannot be met. We try to size hidirtybuffers to 3/4 our | * minimum cannot be met. We try to size hidirtybuffers to 3/4 our | ||||
* buffer space assuming BKVASIZE'd buffers. | * buffer space assuming BKVASIZE'd buffers. | ||||
*/ | */ | ||||
while ((long)hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) { | while ((long)hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) { | ||||
hidirtybuffers >>= 1; | hidirtybuffers >>= 1; | ||||
Show All 18 Lines | bufinit(void) | ||||
buf_zone = uma_zcache_create("buf free cache", sizeof(struct buf), | buf_zone = uma_zcache_create("buf free cache", sizeof(struct buf), | ||||
NULL, NULL, NULL, NULL, buf_import, buf_release, NULL, 0); | NULL, NULL, NULL, NULL, buf_import, buf_release, NULL, 0); | ||||
/* | /* | ||||
* Size the clean queue according to the amount of buffer space. | * Size the clean queue according to the amount of buffer space. | ||||
* One queue per-256mb up to the max. More queues gives better | * One queue per-256mb up to the max. More queues gives better | ||||
* concurrency but less accurate LRU. | * concurrency but less accurate LRU. | ||||
*/ | */ | ||||
clean_domains = MIN(howmany(maxbufspace, 256*1024*1024), CLEAN_DOMAINS); | buf_domains = MIN(howmany(maxbufspace, 256*1024*1024), BUF_DOMAINS); | ||||
for (i = 0 ; i < clean_domains; i++) { | for (i = 0 ; i < buf_domains; i++) { | ||||
struct bufdomain *bd; | struct bufdomain *bd; | ||||
bd = &bdclean[i]; | bd = &bdomain[i]; | ||||
bd_init(bd); | bd_init(bd); | ||||
bd->bd_freebuffers = nbuf / clean_domains; | bd->bd_freebuffers = nbuf / buf_domains; | ||||
bd->bd_hifreebuffers = hifreebuffers / clean_domains; | bd->bd_hifreebuffers = hifreebuffers / buf_domains; | ||||
bd->bd_lofreebuffers = lofreebuffers / clean_domains; | bd->bd_lofreebuffers = lofreebuffers / buf_domains; | ||||
bd->bd_bufspace = 0; | bd->bd_bufspace = 0; | ||||
bd->bd_maxbufspace = maxbufspace / clean_domains; | bd->bd_maxbufspace = maxbufspace / buf_domains; | ||||
bd->bd_hibufspace = hibufspace / clean_domains; | bd->bd_hibufspace = hibufspace / buf_domains; | ||||
bd->bd_lobufspace = lobufspace / clean_domains; | bd->bd_lobufspace = lobufspace / buf_domains; | ||||
bd->bd_bufspacethresh = bufspacethresh / clean_domains; | bd->bd_bufspacethresh = bufspacethresh / buf_domains; | ||||
bd->bd_numdirtybuffers = 0; | |||||
bd->bd_hidirtybuffers = hidirtybuffers / buf_domains; | |||||
bd->bd_lodirtybuffers = lodirtybuffers / buf_domains; | |||||
bd->bd_dirtybufthresh = dirtybufthresh / buf_domains; | |||||
/* Don't allow more than 2% of bufs in the per-cpu caches. */ | /* Don't allow more than 2% of bufs in the per-cpu caches. */ | ||||
bd->bd_lim = nbuf / clean_domains / 50 / mp_ncpus; | bd->bd_lim = nbuf / buf_domains / 50 / mp_ncpus; | ||||
} | } | ||||
getnewbufcalls = counter_u64_alloc(M_WAITOK); | getnewbufcalls = counter_u64_alloc(M_WAITOK); | ||||
getnewbufrestarts = counter_u64_alloc(M_WAITOK); | getnewbufrestarts = counter_u64_alloc(M_WAITOK); | ||||
mappingrestarts = counter_u64_alloc(M_WAITOK); | mappingrestarts = counter_u64_alloc(M_WAITOK); | ||||
numbufallocfails = counter_u64_alloc(M_WAITOK); | numbufallocfails = counter_u64_alloc(M_WAITOK); | ||||
notbufdflushes = counter_u64_alloc(M_WAITOK); | notbufdflushes = counter_u64_alloc(M_WAITOK); | ||||
buffreekvacnt = counter_u64_alloc(M_WAITOK); | buffreekvacnt = counter_u64_alloc(M_WAITOK); | ||||
bufdefragcnt = counter_u64_alloc(M_WAITOK); | bufdefragcnt = counter_u64_alloc(M_WAITOK); | ||||
▲ Show 20 Lines • Show All 167 Lines • ▼ Show 20 Lines | bpmap_qenter(struct buf *bp) | ||||
* bp->b_offset may be offset into the first page. | * bp->b_offset may be offset into the first page. | ||||
*/ | */ | ||||
bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data); | bp->b_data = (caddr_t)trunc_page((vm_offset_t)bp->b_data); | ||||
pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages); | pmap_qenter((vm_offset_t)bp->b_data, bp->b_pages, bp->b_npages); | ||||
bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | | bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | | ||||
(vm_offset_t)(bp->b_offset & PAGE_MASK)); | (vm_offset_t)(bp->b_offset & PAGE_MASK)); | ||||
} | } | ||||
static inline struct bufdomain * | |||||
bufdomain(struct buf *bp) | |||||
{ | |||||
return (&bdomain[bp->b_domain]); | |||||
} | |||||
static struct bufqueue * | static struct bufqueue * | ||||
bufqueue(struct buf *bp) | bufqueue(struct buf *bp) | ||||
{ | { | ||||
switch (bp->b_qindex) { | switch (bp->b_qindex) { | ||||
case QUEUE_NONE: | case QUEUE_NONE: | ||||
/* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||
case QUEUE_SENTINEL: | case QUEUE_SENTINEL: | ||||
return (NULL); | return (NULL); | ||||
case QUEUE_EMPTY: | case QUEUE_EMPTY: | ||||
return (&bqempty); | return (&bqempty); | ||||
case QUEUE_DIRTY: | case QUEUE_DIRTY: | ||||
return (&bqdirty); | return (&bufdomain(bp)->bd_dirtyq); | ||||
case QUEUE_CLEAN: | case QUEUE_CLEAN: | ||||
return (&bdclean[bp->b_domain].bd_subq[bp->b_subqueue]); | return (&bufdomain(bp)->bd_subq[bp->b_subqueue]); | ||||
default: | default: | ||||
break; | break; | ||||
} | } | ||||
panic("bufqueue(%p): Unhandled type %d\n", bp, bp->b_qindex); | panic("bufqueue(%p): Unhandled type %d\n", bp, bp->b_qindex); | ||||
} | } | ||||
/* | /* | ||||
* Return the locked bufqueue that bp is a member of. | * Return the locked bufqueue that bp is a member of. | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | if (bp->b_qindex == qindex) { | ||||
bp->b_flags &= ~B_REMFREE; | bp->b_flags &= ~B_REMFREE; | ||||
BUF_UNLOCK(bp); | BUF_UNLOCK(bp); | ||||
return; | return; | ||||
} | } | ||||
bq = bufqueue_acquire(bp); | bq = bufqueue_acquire(bp); | ||||
bq_remove(bq, bp); | bq_remove(bq, bp); | ||||
BQ_UNLOCK(bq); | BQ_UNLOCK(bq); | ||||
} | } | ||||
bd = bufdomain(bp); | |||||
if (qindex == QUEUE_CLEAN) { | if (qindex == QUEUE_CLEAN) { | ||||
bd = &bdclean[bp->b_domain]; | |||||
if (bd->bd_lim != 0) | if (bd->bd_lim != 0) | ||||
bq = &bd->bd_subq[PCPU_GET(cpuid)]; | bq = &bd->bd_subq[PCPU_GET(cpuid)]; | ||||
else | else | ||||
bq = bd->bd_cleanq; | bq = bd->bd_cleanq; | ||||
} else | } else | ||||
bq = &bqdirty; | bq = &bd->bd_dirtyq; | ||||
bq_insert(bq, bp, true); | bq_insert(bq, bp, true); | ||||
} | } | ||||
/* | /* | ||||
* buf_free: | * buf_free: | ||||
* | * | ||||
* Free a buffer to the buf zone once it no longer has valid contents. | * Free a buffer to the buf zone once it no longer has valid contents. | ||||
*/ | */ | ||||
Show All 11 Lines | buf_free(struct buf *bp) | ||||
} | } | ||||
if (bp->b_wcred != NOCRED) { | if (bp->b_wcred != NOCRED) { | ||||
crfree(bp->b_wcred); | crfree(bp->b_wcred); | ||||
bp->b_wcred = NOCRED; | bp->b_wcred = NOCRED; | ||||
} | } | ||||
if (!LIST_EMPTY(&bp->b_dep)) | if (!LIST_EMPTY(&bp->b_dep)) | ||||
buf_deallocate(bp); | buf_deallocate(bp); | ||||
bufkva_free(bp); | bufkva_free(bp); | ||||
atomic_add_int(&bdclean[bp->b_domain].bd_freebuffers, 1); | atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1); | ||||
BUF_UNLOCK(bp); | BUF_UNLOCK(bp); | ||||
uma_zfree(buf_zone, bp); | uma_zfree(buf_zone, bp); | ||||
} | } | ||||
/* | /* | ||||
* buf_import: | * buf_import: | ||||
* | * | ||||
* Import bufs into the uma cache from the buf list. The system still | * Import bufs into the uma cache from the buf list. The system still | ||||
▲ Show 20 Lines • Show All 259 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
static void | static void | ||||
bd_init(struct bufdomain *bd) | bd_init(struct bufdomain *bd) | ||||
{ | { | ||||
int domain; | int domain; | ||||
int i; | int i; | ||||
domain = bd - bdclean; | domain = bd - bdomain; | ||||
bd->bd_cleanq = &bd->bd_subq[mp_maxid + 1]; | bd->bd_cleanq = &bd->bd_subq[mp_maxid + 1]; | ||||
bq_init(bd->bd_cleanq, QUEUE_CLEAN, mp_maxid + 1, "bufq clean lock"); | bq_init(bd->bd_cleanq, QUEUE_CLEAN, mp_maxid + 1, "bufq clean lock"); | ||||
bq_init(&bd->bd_dirtyq, QUEUE_DIRTY, -1, "bufq dirty lock"); | |||||
for (i = 0; i <= mp_maxid; i++) | for (i = 0; i <= mp_maxid; i++) | ||||
bq_init(&bd->bd_subq[i], QUEUE_CLEAN, i, | bq_init(&bd->bd_subq[i], QUEUE_CLEAN, i, | ||||
"bufq clean subqueue lock"); | "bufq clean subqueue lock"); | ||||
mtx_init(&bd->bd_run_lock, "bufspace daemon run lock", NULL, MTX_DEF); | mtx_init(&bd->bd_run_lock, "bufspace daemon run lock", NULL, MTX_DEF); | ||||
} | } | ||||
/* | /* | ||||
* bq_remove: | * bq_remove: | ||||
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock) | bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock) | ||||
{ | { | ||||
struct bufdomain *bd; | struct bufdomain *bd; | ||||
if (bp->b_qindex != QUEUE_NONE) | if (bp->b_qindex != QUEUE_NONE) | ||||
panic("bq_insert: free buffer %p onto another queue?", bp); | panic("bq_insert: free buffer %p onto another queue?", bp); | ||||
bd = &bdclean[bp->b_domain]; | bd = bufdomain(bp); | ||||
if (bp->b_flags & B_AGE) { | if (bp->b_flags & B_AGE) { | ||||
/* Place this buf directly on the real queue. */ | /* Place this buf directly on the real queue. */ | ||||
if (bq->bq_index == QUEUE_CLEAN) | if (bq->bq_index == QUEUE_CLEAN) | ||||
bq = bd->bd_cleanq; | bq = bd->bd_cleanq; | ||||
BQ_LOCK(bq); | BQ_LOCK(bq); | ||||
TAILQ_INSERT_HEAD(&bq->bq_queue, bp, b_freelist); | TAILQ_INSERT_HEAD(&bq->bq_queue, bp, b_freelist); | ||||
} else { | } else { | ||||
BQ_LOCK(bq); | BQ_LOCK(bq); | ||||
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | |||||
bufkva_reclaim(vmem_t *vmem, int flags) | bufkva_reclaim(vmem_t *vmem, int flags) | ||||
{ | { | ||||
bool done; | bool done; | ||||
int q; | int q; | ||||
int i; | int i; | ||||
done = false; | done = false; | ||||
for (i = 0; i < 5; i++) { | for (i = 0; i < 5; i++) { | ||||
for (q = 0; q < clean_domains; q++) | for (q = 0; q < buf_domains; q++) | ||||
if (buf_recycle(&bdclean[q], true) != 0) | if (buf_recycle(&bdomain[q], true) != 0) | ||||
done = true; | done = true; | ||||
if (done) | if (done) | ||||
break; | break; | ||||
} | } | ||||
return; | return; | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 375 Lines • ▼ Show 20 Lines | KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE, | ||||
("bdirty: buffer %p still on queue %d", bp, bp->b_qindex)); | ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex)); | ||||
BUF_ASSERT_HELD(bp); | BUF_ASSERT_HELD(bp); | ||||
bp->b_flags &= ~(B_RELBUF); | bp->b_flags &= ~(B_RELBUF); | ||||
bp->b_iocmd = BIO_WRITE; | bp->b_iocmd = BIO_WRITE; | ||||
if ((bp->b_flags & B_DELWRI) == 0) { | if ((bp->b_flags & B_DELWRI) == 0) { | ||||
bp->b_flags |= /* XXX B_DONE | */ B_DELWRI; | bp->b_flags |= /* XXX B_DONE | */ B_DELWRI; | ||||
reassignbuf(bp); | reassignbuf(bp); | ||||
bdirtyadd(); | bdirtyadd(bp); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* bundirty: | * bundirty: | ||||
* | * | ||||
* Clear B_DELWRI for buffer. | * Clear B_DELWRI for buffer. | ||||
* | * | ||||
Show All 11 Lines | bundirty(struct buf *bp) | ||||
KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); | KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); | ||||
KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE, | KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE, | ||||
("bundirty: buffer %p still on queue %d", bp, bp->b_qindex)); | ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex)); | ||||
BUF_ASSERT_HELD(bp); | BUF_ASSERT_HELD(bp); | ||||
if (bp->b_flags & B_DELWRI) { | if (bp->b_flags & B_DELWRI) { | ||||
bp->b_flags &= ~B_DELWRI; | bp->b_flags &= ~B_DELWRI; | ||||
reassignbuf(bp); | reassignbuf(bp); | ||||
bdirtysub(); | bdirtysub(bp); | ||||
} | } | ||||
/* | /* | ||||
* Since it is now being written, we can clear its deferred write flag. | * Since it is now being written, we can clear its deferred write flag. | ||||
*/ | */ | ||||
bp->b_flags &= ~B_DEFERRED; | bp->b_flags &= ~B_DEFERRED; | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | |||||
* dirty buffers so we block here. By blocking prior to the locking | * dirty buffers so we block here. By blocking prior to the locking | ||||
* of any vnodes we attempt to avoid the situation where a locked vnode | * of any vnodes we attempt to avoid the situation where a locked vnode | ||||
* prevents the various system daemons from flushing related buffers. | * prevents the various system daemons from flushing related buffers. | ||||
*/ | */ | ||||
void | void | ||||
bwillwrite(void) | bwillwrite(void) | ||||
{ | { | ||||
if (numdirtybuffers >= hidirtybuffers) { | if (buf_dirty_count_severe()) { | ||||
mtx_lock(&bdirtylock); | mtx_lock(&bdirtylock); | ||||
while (numdirtybuffers >= hidirtybuffers) { | while (buf_dirty_count_severe()) { | ||||
bdirtywait = 1; | bdirtywait = 1; | ||||
msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4), | msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4), | ||||
"flswai", 0); | "flswai", 0); | ||||
} | } | ||||
mtx_unlock(&bdirtylock); | mtx_unlock(&bdirtylock); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Return true if we have too many dirty buffers. | * Return true if we have too many dirty buffers. | ||||
*/ | */ | ||||
int | int | ||||
buf_dirty_count_severe(void) | buf_dirty_count_severe(void) | ||||
{ | { | ||||
return(numdirtybuffers >= hidirtybuffers); | return (!BIT_EMPTY(BUF_DOMAINS, &bdhidirty)); | ||||
} | } | ||||
/* | /* | ||||
* brelse: | * brelse: | ||||
* | * | ||||
* Release a busy buffer and, if requested, free its resources. The | * Release a busy buffer and, if requested, free its resources. The | ||||
* buffer will be stashed in the appropriate bufqueue[] allowing it | * buffer will be stashed in the appropriate bufqueue[] allowing it | ||||
* to be accessed later as a cache entity or reused for other purposes. | * to be accessed later as a cache entity or reused for other purposes. | ||||
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines | if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) && | ||||
* Either a failed read I/O, or we were asked to free or not | * Either a failed read I/O, or we were asked to free or not | ||||
* cache the buffer, or we failed to write to a device that's | * cache the buffer, or we failed to write to a device that's | ||||
* no longer present. | * no longer present. | ||||
*/ | */ | ||||
bp->b_flags |= B_INVAL; | bp->b_flags |= B_INVAL; | ||||
if (!LIST_EMPTY(&bp->b_dep)) | if (!LIST_EMPTY(&bp->b_dep)) | ||||
buf_deallocate(bp); | buf_deallocate(bp); | ||||
if (bp->b_flags & B_DELWRI) | if (bp->b_flags & B_DELWRI) | ||||
bdirtysub(); | bdirtysub(bp); | ||||
bp->b_flags &= ~(B_DELWRI | B_CACHE); | bp->b_flags &= ~(B_DELWRI | B_CACHE); | ||||
if ((bp->b_flags & B_VMIO) == 0) { | if ((bp->b_flags & B_VMIO) == 0) { | ||||
allocbuf(bp, 0); | allocbuf(bp, 0); | ||||
if (bp->b_vp) | if (bp->b_vp) | ||||
brelvp(bp); | brelvp(bp); | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 598 Lines • ▼ Show 20 Lines | if (!unmapped_buf_allowed) | ||||
gbflags &= ~(GB_UNMAPPED | GB_KVAALLOC); | gbflags &= ~(GB_UNMAPPED | GB_KVAALLOC); | ||||
if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 || | if (vp == NULL || (vp->v_vflag & (VV_MD | VV_SYSTEM)) != 0 || | ||||
vp->v_type == VCHR) | vp->v_type == VCHR) | ||||
metadata = true; | metadata = true; | ||||
else | else | ||||
metadata = false; | metadata = false; | ||||
if (vp == NULL) | if (vp == NULL) | ||||
bd = &bdclean[0]; | bd = &bdomain[0]; | ||||
else | else | ||||
bd = &bdclean[vp->v_bufobj.bo_domain]; | bd = &bdomain[vp->v_bufobj.bo_domain]; | ||||
counter_u64_add(getnewbufcalls, 1); | counter_u64_add(getnewbufcalls, 1); | ||||
reserved = false; | reserved = false; | ||||
do { | do { | ||||
if (reserved == false && | if (reserved == false && | ||||
bufspace_reserve(bd, maxsize, metadata) != 0) { | bufspace_reserve(bd, maxsize, metadata) != 0) { | ||||
counter_u64_add(getnewbufrestarts, 1); | counter_u64_add(getnewbufrestarts, 1); | ||||
continue; | continue; | ||||
Show All 29 Lines | |||||
static struct kproc_desc buf_kp = { | static struct kproc_desc buf_kp = { | ||||
"bufdaemon", | "bufdaemon", | ||||
buf_daemon, | buf_daemon, | ||||
&bufdaemonproc | &bufdaemonproc | ||||
}; | }; | ||||
SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); | SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); | ||||
static int | static int | ||||
buf_flush(struct vnode *vp, int target) | buf_flush(struct vnode *vp, struct bufdomain *bd, int target) | ||||
{ | { | ||||
int flushed; | int flushed; | ||||
flushed = flushbufqueues(vp, target, 0); | flushed = flushbufqueues(vp, bd, target, 0); | ||||
if (flushed == 0) { | if (flushed == 0) { | ||||
/* | /* | ||||
* Could not find any buffers without rollback | * Could not find any buffers without rollback | ||||
* dependencies, so just write the first one | * dependencies, so just write the first one | ||||
* in the hopes of eventually making progress. | * in the hopes of eventually making progress. | ||||
*/ | */ | ||||
if (vp != NULL && target > 2) | if (vp != NULL && target > 2) | ||||
target /= 2; | target /= 2; | ||||
flushbufqueues(vp, target, 1); | flushbufqueues(vp, bd, target, 1); | ||||
} | } | ||||
return (flushed); | return (flushed); | ||||
} | } | ||||
static void | static void | ||||
buf_daemon() | buf_daemon() | ||||
{ | { | ||||
struct bufdomain *bd; | |||||
int speedupreq; | |||||
int lodirty; | int lodirty; | ||||
int i; | int i; | ||||
/* | /* | ||||
* This process needs to be suspended prior to shutdown sync. | * This process needs to be suspended prior to shutdown sync. | ||||
*/ | */ | ||||
EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, bufdaemonproc, | EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, bufdaemonproc, | ||||
SHUTDOWN_PRI_LAST); | SHUTDOWN_PRI_LAST); | ||||
/* | /* | ||||
* Start the buf clean daemons as children threads. | * Start the buf clean daemons as children threads. | ||||
*/ | */ | ||||
for (i = 0 ; i < clean_domains; i++) { | for (i = 0 ; i < buf_domains; i++) { | ||||
int error; | int error; | ||||
error = kthread_add((void (*)(void *))bufspace_daemon, | error = kthread_add((void (*)(void *))bufspace_daemon, | ||||
&bdclean[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i); | &bdomain[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i); | ||||
if (error) | if (error) | ||||
panic("error %d spawning bufspace daemon", error); | panic("error %d spawning bufspace daemon", error); | ||||
} | } | ||||
/* | /* | ||||
* This process is allowed to take the buffer cache to the limit | * This process is allowed to take the buffer cache to the limit | ||||
*/ | */ | ||||
curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED; | curthread->td_pflags |= TDP_NORUNNINGBUF | TDP_BUFNEED; | ||||
mtx_lock(&bdlock); | mtx_lock(&bdlock); | ||||
for (;;) { | for (;;) { | ||||
bd_request = 0; | bd_request = 0; | ||||
mtx_unlock(&bdlock); | mtx_unlock(&bdlock); | ||||
kproc_suspend_check(bufdaemonproc); | kproc_suspend_check(bufdaemonproc); | ||||
lodirty = lodirtybuffers; | |||||
if (bd_speedupreq) { | /* | ||||
lodirty = numdirtybuffers / 2; | * Save speedupreq for this pass and reset to capture new | ||||
* requests. | |||||
*/ | |||||
speedupreq = bd_speedupreq; | |||||
bd_speedupreq = 0; | bd_speedupreq = 0; | ||||
} | |||||
/* | /* | ||||
* Do the flush. Limit the amount of in-transit I/O we | * Flush each domain sequentially according to its level and | ||||
* allow to build up, otherwise we would completely saturate | * the speedup request. | ||||
* the I/O system. | |||||
*/ | */ | ||||
while (numdirtybuffers > lodirty) { | for (i = 0; i < buf_domains; i++) { | ||||
if (buf_flush(NULL, numdirtybuffers - lodirty) == 0) | bd = &bdomain[i]; | ||||
if (speedupreq) | |||||
lodirty = bd->bd_numdirtybuffers / 2; | |||||
else | |||||
lodirty = bd->bd_lodirtybuffers; | |||||
while (bd->bd_numdirtybuffers > lodirty) { | |||||
if (buf_flush(NULL, bd, | |||||
bd->bd_numdirtybuffers - lodirty) == 0) | |||||
break; | break; | ||||
kern_yield(PRI_USER); | kern_yield(PRI_USER); | ||||
} | } | ||||
} | |||||
/* | /* | ||||
* Only clear bd_request if we have reached our low water | * Only clear bd_request if we have reached our low water | ||||
* mark. The buf_daemon normally waits 1 second and | * mark. The buf_daemon normally waits 1 second and | ||||
* then incrementally flushes any dirty buffers that have | * then incrementally flushes any dirty buffers that have | ||||
* built up, within reason. | * built up, within reason. | ||||
* | * | ||||
* If we were unable to hit our low water mark and couldn't | * If we were unable to hit our low water mark and couldn't | ||||
* find any flushable buffers, we sleep for a short period | * find any flushable buffers, we sleep for a short period | ||||
* to avoid endless loops on unlockable buffers. | * to avoid endless loops on unlockable buffers. | ||||
*/ | */ | ||||
mtx_lock(&bdlock); | mtx_lock(&bdlock); | ||||
if (numdirtybuffers <= lodirtybuffers) { | if (!BIT_EMPTY(BUF_DOMAINS, &bdlodirty)) { | ||||
/* | /* | ||||
* We reached our low water mark, reset the | * We reached our low water mark, reset the | ||||
* request and sleep until we are needed again. | * request and sleep until we are needed again. | ||||
* The sleep is just so the suspend code works. | * The sleep is just so the suspend code works. | ||||
*/ | */ | ||||
bd_request = 0; | bd_request = 0; | ||||
/* | /* | ||||
* Do an extra wakeup in case dirty threshold | * Do an extra wakeup in case dirty threshold | ||||
Show All 22 Lines | |||||
* free up B_INVAL buffers instead of write them, which NFS is | * free up B_INVAL buffers instead of write them, which NFS is | ||||
* particularly sensitive to. | * particularly sensitive to. | ||||
*/ | */ | ||||
static int flushwithdeps = 0; | static int flushwithdeps = 0; | ||||
SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps, | SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps, | ||||
0, "Number of buffers flushed with dependecies that require rollbacks"); | 0, "Number of buffers flushed with dependecies that require rollbacks"); | ||||
static int | static int | ||||
flushbufqueues(struct vnode *lvp, int target, int flushdeps) | flushbufqueues(struct vnode *lvp, struct bufdomain *bd, int target, | ||||
int flushdeps) | |||||
{ | { | ||||
struct bufqueue *bq; | struct bufqueue *bq; | ||||
struct buf *sentinel; | struct buf *sentinel; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct mount *mp; | struct mount *mp; | ||||
struct buf *bp; | struct buf *bp; | ||||
int hasdeps; | int hasdeps; | ||||
int flushed; | int flushed; | ||||
int error; | int error; | ||||
bool unlock; | bool unlock; | ||||
flushed = 0; | flushed = 0; | ||||
bq = &bqdirty; | bq = &bd->bd_dirtyq; | ||||
bp = NULL; | bp = NULL; | ||||
sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO); | sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO); | ||||
sentinel->b_qindex = QUEUE_SENTINEL; | sentinel->b_qindex = QUEUE_SENTINEL; | ||||
BQ_LOCK(bq); | BQ_LOCK(bq); | ||||
TAILQ_INSERT_HEAD(&bq->bq_queue, sentinel, b_freelist); | TAILQ_INSERT_HEAD(&bq->bq_queue, sentinel, b_freelist); | ||||
BQ_UNLOCK(bq); | BQ_UNLOCK(bq); | ||||
while (flushed != target) { | while (flushed != target) { | ||||
maybe_yield(); | maybe_yield(); | ||||
▲ Show 20 Lines • Show All 319 Lines • ▼ Show 20 Lines | while (bufkva_alloc(bp, maxsize, gbflags) != 0) { | ||||
if ((gbflags & GB_NOWAIT_BD) != 0) { | if ((gbflags & GB_NOWAIT_BD) != 0) { | ||||
/* | /* | ||||
* XXXKIB: defragmentation cannot | * XXXKIB: defragmentation cannot | ||||
* succeed, not sure what else to do. | * succeed, not sure what else to do. | ||||
*/ | */ | ||||
panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp); | panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp); | ||||
} | } | ||||
counter_u64_add(mappingrestarts, 1); | counter_u64_add(mappingrestarts, 1); | ||||
bufspace_wait(&bdclean[bp->b_domain], bp->b_vp, gbflags, 0, 0); | bufspace_wait(bufdomain(bp), bp->b_vp, gbflags, 0, 0); | ||||
} | } | ||||
has_addr: | has_addr: | ||||
if (need_mapping) { | if (need_mapping) { | ||||
/* b_offset is handled by bpmap_qenter. */ | /* b_offset is handled by bpmap_qenter. */ | ||||
bp->b_data = bp->b_kvabase; | bp->b_data = bp->b_kvabase; | ||||
BUF_CHECK_MAPPED(bp); | BUF_CHECK_MAPPED(bp); | ||||
bpmap_qenter(bp); | bpmap_qenter(bp); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 181 Lines • ▼ Show 20 Lines | if (bp != NULL) { | ||||
*/ | */ | ||||
BO_RUNLOCK(bo); | BO_RUNLOCK(bo); | ||||
/* | /* | ||||
* If the user does not want us to create the buffer, bail out | * If the user does not want us to create the buffer, bail out | ||||
* here. | * here. | ||||
*/ | */ | ||||
if (flags & GB_NOCREAT) | if (flags & GB_NOCREAT) | ||||
return NULL; | return NULL; | ||||
if (bdclean[bo->bo_domain].bd_freebuffers == 0 && | if (bdomain[bo->bo_domain].bd_freebuffers == 0 && | ||||
TD_IS_IDLETHREAD(curthread)) | TD_IS_IDLETHREAD(curthread)) | ||||
return NULL; | return NULL; | ||||
bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize; | bsize = vn_isdisk(vp, NULL) ? DEV_BSIZE : bo->bo_bsize; | ||||
KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize")); | KASSERT(bsize != 0, ("bsize == 0, check bo->bo_bsize")); | ||||
offset = blkno * bsize; | offset = blkno * bsize; | ||||
vmio = vp->v_object != NULL; | vmio = vp->v_object != NULL; | ||||
if (vmio) { | if (vmio) { | ||||
Show All 40 Lines | if (bp != NULL) { | ||||
* with the vp especially considering limitations in | * with the vp especially considering limitations in | ||||
* the splay tree implementation when dealing with duplicate | * the splay tree implementation when dealing with duplicate | ||||
* lblkno's. | * lblkno's. | ||||
*/ | */ | ||||
BO_LOCK(bo); | BO_LOCK(bo); | ||||
if (gbincore(bo, blkno)) { | if (gbincore(bo, blkno)) { | ||||
BO_UNLOCK(bo); | BO_UNLOCK(bo); | ||||
bp->b_flags |= B_INVAL; | bp->b_flags |= B_INVAL; | ||||
bufspace_release(&bdclean[bp->b_domain], maxsize); | bufspace_release(bufdomain(bp), maxsize); | ||||
brelse(bp); | brelse(bp); | ||||
goto loop; | goto loop; | ||||
} | } | ||||
/* | /* | ||||
* Insert the buffer into the hash, so that it can | * Insert the buffer into the hash, so that it can | ||||
* be found by incore. | * be found by incore. | ||||
*/ | */ | ||||
Show All 18 Lines | if (vmio) { | ||||
bp->b_flags &= ~B_VMIO; | bp->b_flags &= ~B_VMIO; | ||||
KASSERT(bp->b_bufobj->bo_object == NULL, | KASSERT(bp->b_bufobj->bo_object == NULL, | ||||
("ARGH! has b_bufobj->bo_object %p %p\n", | ("ARGH! has b_bufobj->bo_object %p %p\n", | ||||
bp, bp->b_bufobj->bo_object)); | bp, bp->b_bufobj->bo_object)); | ||||
BUF_CHECK_MAPPED(bp); | BUF_CHECK_MAPPED(bp); | ||||
} | } | ||||
allocbuf(bp, size); | allocbuf(bp, size); | ||||
bufspace_release(&bdclean[bp->b_domain], maxsize); | bufspace_release(bufdomain(bp), maxsize); | ||||
bp->b_flags &= ~B_DONE; | bp->b_flags &= ~B_DONE; | ||||
} | } | ||||
CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp); | CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp); | ||||
BUF_ASSERT_HELD(bp); | BUF_ASSERT_HELD(bp); | ||||
end: | end: | ||||
buf_track(bp, __func__); | buf_track(bp, __func__); | ||||
KASSERT(bp->b_bufobj == bo, | KASSERT(bp->b_bufobj == bo, | ||||
("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); | ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); | ||||
Show All 12 Lines | geteblk(int size, int flags) | ||||
maxsize = (size + BKVAMASK) & ~BKVAMASK; | maxsize = (size + BKVAMASK) & ~BKVAMASK; | ||||
while ((bp = getnewbuf(NULL, 0, 0, maxsize, flags)) == NULL) { | while ((bp = getnewbuf(NULL, 0, 0, maxsize, flags)) == NULL) { | ||||
if ((flags & GB_NOWAIT_BD) && | if ((flags & GB_NOWAIT_BD) && | ||||
(curthread->td_pflags & TDP_BUFNEED) != 0) | (curthread->td_pflags & TDP_BUFNEED) != 0) | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
allocbuf(bp, size); | allocbuf(bp, size); | ||||
bufspace_release(&bdclean[bp->b_domain], maxsize); | bufspace_release(bufdomain(bp), maxsize); | ||||
bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */ | bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */ | ||||
BUF_ASSERT_HELD(bp); | BUF_ASSERT_HELD(bp); | ||||
return (bp); | return (bp); | ||||
} | } | ||||
/* | /* | ||||
* Truncate the backing store for a non-vmio buffer. | * Truncate the backing store for a non-vmio buffer. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 852 Lines • ▼ Show 20 Lines | |||||
* Initialize a struct bufobj before use. Memory is assumed zero filled. | * Initialize a struct bufobj before use. Memory is assumed zero filled. | ||||
*/ | */ | ||||
void | void | ||||
bufobj_init(struct bufobj *bo, void *private) | bufobj_init(struct bufobj *bo, void *private) | ||||
{ | { | ||||
static volatile int bufobj_cleanq; | static volatile int bufobj_cleanq; | ||||
bo->bo_domain = | bo->bo_domain = | ||||
atomic_fetchadd_int(&bufobj_cleanq, 1) % clean_domains; | atomic_fetchadd_int(&bufobj_cleanq, 1) % buf_domains; | ||||
rw_init(BO_LOCKPTR(bo), "bufobj interlock"); | rw_init(BO_LOCKPTR(bo), "bufobj interlock"); | ||||
bo->bo_private = private; | bo->bo_private = private; | ||||
TAILQ_INIT(&bo->bo_clean.bv_hd); | TAILQ_INIT(&bo->bo_clean.bv_hd); | ||||
TAILQ_INIT(&bo->bo_dirty.bv_hd); | TAILQ_INIT(&bo->bo_dirty.bv_hd); | ||||
} | } | ||||
void | void | ||||
bufobj_wrefl(struct bufobj *bo) | bufobj_wrefl(struct bufobj *bo) | ||||
▲ Show 20 Lines • Show All 306 Lines • ▼ Show 20 Lines | for (i = 0; i < bp->b_npages; i++) { | ||||
(u_long)VM_PAGE_TO_PHYS(m)); | (u_long)VM_PAGE_TO_PHYS(m)); | ||||
else | else | ||||
db_printf("( ??? )"); | db_printf("( ??? )"); | ||||
if ((i + 1) < bp->b_npages) | if ((i + 1) < bp->b_npages) | ||||
db_printf(","); | db_printf(","); | ||||
} | } | ||||
db_printf("\n"); | db_printf("\n"); | ||||
} | } | ||||
BUF_LOCKPRINTINFO(bp); | |||||
#if defined(FULL_BUF_TRACKING) | #if defined(FULL_BUF_TRACKING) | ||||
db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt); | db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt); | ||||
i = bp->b_io_tcnt % BUF_TRACKING_SIZE; | i = bp->b_io_tcnt % BUF_TRACKING_SIZE; | ||||
for (j = 1; j <= BUF_TRACKING_SIZE; j++) { | for (j = 1; j <= BUF_TRACKING_SIZE; j++) { | ||||
if (bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)] == NULL) | if (bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)] == NULL) | ||||
continue; | continue; | ||||
db_printf(" %2u: %s\n", j, | db_printf(" %2u: %s\n", j, | ||||
bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)]); | bp->b_io_tracking[BUF_TRACKING_ENTRY(i - j)]); | ||||
} | } | ||||
#elif defined(BUF_TRACKING) | #elif defined(BUF_TRACKING) | ||||
db_printf("b_io_tracking: %s\n", bp->b_io_tracking); | db_printf("b_io_tracking: %s\n", bp->b_io_tracking); | ||||
#endif | #endif | ||||
db_printf(" "); | db_printf(" "); | ||||
BUF_LOCKPRINTINFO(bp); | |||||
} | } | ||||
DB_SHOW_COMMAND(bufqueues, bufqueues) | DB_SHOW_COMMAND(bufqueues, bufqueues) | ||||
{ | { | ||||
struct bufdomain *bd; | struct bufdomain *bd; | ||||
int i, j; | struct buf *bp; | ||||
long total; | |||||
int i, j, cnt; | |||||
db_printf("bqempty: %d\n", bqempty.bq_len); | db_printf("bqempty: %d\n", bqempty.bq_len); | ||||
db_printf("bqdirty: %d\n", bqdirty.bq_len); | |||||
for (i = 0; i < clean_domains; i++) { | for (i = 0; i < buf_domains; i++) { | ||||
bd = &bdclean[i]; | bd = &bdomain[i]; | ||||
db_printf("Buf domain %d\n", i); | db_printf("Buf domain %d\n", i); | ||||
db_printf("\tfreebufs\t%d\n", bd->bd_freebuffers); | db_printf("\tfreebufs\t%d\n", bd->bd_freebuffers); | ||||
db_printf("\tlofreebufs\t%d\n", bd->bd_lofreebuffers); | db_printf("\tlofreebufs\t%d\n", bd->bd_lofreebuffers); | ||||
db_printf("\thifreebufs\t%d\n", bd->bd_hifreebuffers); | db_printf("\thifreebufs\t%d\n", bd->bd_hifreebuffers); | ||||
db_printf("\n"); | db_printf("\n"); | ||||
db_printf("\tbufspace\t%ld\n", bd->bd_bufspace); | db_printf("\tbufspace\t%ld\n", bd->bd_bufspace); | ||||
db_printf("\tmaxbufspace\t%ld\n", bd->bd_maxbufspace); | db_printf("\tmaxbufspace\t%ld\n", bd->bd_maxbufspace); | ||||
db_printf("\thibufspace\t%ld\n", bd->bd_hibufspace); | db_printf("\thibufspace\t%ld\n", bd->bd_hibufspace); | ||||
db_printf("\tlobufspace\t%ld\n", bd->bd_lobufspace); | db_printf("\tlobufspace\t%ld\n", bd->bd_lobufspace); | ||||
db_printf("\tbufspacethresh\t%ld\n", bd->bd_bufspacethresh); | db_printf("\tbufspacethresh\t%ld\n", bd->bd_bufspacethresh); | ||||
db_printf("\n"); | db_printf("\n"); | ||||
db_printf("\tcleanq count\t%d\n", bd->bd_cleanq->bq_len); | db_printf("\tnumdirtybuffers\t%d\n", bd->bd_numdirtybuffers); | ||||
db_printf("\tlodirtybuffers\t%d\n", bd->bd_lodirtybuffers); | |||||
db_printf("\thidirtybuffers\t%d\n", bd->bd_hidirtybuffers); | |||||
db_printf("\tdirtybufthresh\t%d\n", bd->bd_dirtybufthresh); | |||||
db_printf("\n"); | |||||
total = 0; | |||||
TAILQ_FOREACH(bp, &bd->bd_cleanq->bq_queue, b_freelist) | |||||
total += bp->b_bufsize; | |||||
db_printf("\tcleanq count\t%d (%ld)\n", | |||||
bd->bd_cleanq->bq_len, total); | |||||
total = 0; | |||||
TAILQ_FOREACH(bp, &bd->bd_dirtyq.bq_queue, b_freelist) | |||||
total += bp->b_bufsize; | |||||
db_printf("\tdirtyq count\t%d (%ld)\n", | |||||
bd->bd_dirtyq.bq_len, total); | |||||
db_printf("\twakeup\t\t%d\n", bd->bd_wanted); | db_printf("\twakeup\t\t%d\n", bd->bd_wanted); | ||||
db_printf("\tlim\t\t%d\n", bd->bd_lim); | db_printf("\tlim\t\t%d\n", bd->bd_lim); | ||||
db_printf("\tCPU "); | db_printf("\tCPU "); | ||||
for (j = 0; j <= mp_maxid; j++) | for (j = 0; j <= mp_maxid; j++) | ||||
db_printf("%d, ", bd->bd_subq[j].bq_len); | db_printf("%d, ", bd->bd_subq[j].bq_len); | ||||
db_printf("\n"); | db_printf("\n"); | ||||
cnt = 0; | |||||
total = 0; | |||||
for (j = 0; j < nbuf; j++) | |||||
if (buf[j].b_domain == i && BUF_ISLOCKED(&buf[j])) { | |||||
cnt++; | |||||
total += buf[j].b_bufsize; | |||||
} | |||||
db_printf("\tLocked buffers: %d space %ld\n", cnt, total); | |||||
cnt = 0; | |||||
total = 0; | |||||
for (j = 0; j < nbuf; j++) | |||||
if (buf[j].b_domain == i) { | |||||
cnt++; | |||||
total += buf[j].b_bufsize; | |||||
} | |||||
db_printf("\tTotal buffers: %d space %ld\n", cnt, total); | |||||
} | } | ||||
} | } | ||||
DB_SHOW_COMMAND(lockedbufs, lockedbufs) | DB_SHOW_COMMAND(lockedbufs, lockedbufs) | ||||
{ | { | ||||
struct buf *bp; | struct buf *bp; | ||||
int i; | int i; | ||||
▲ Show 20 Lines • Show All 56 Lines • Show Last 20 Lines |