Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F109061560
D17773.id51746.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
69 KB
Referenced Files
None
Subscribers
None
D17773.id51746.diff
View Options
Index: lib/libmemstat/memstat_uma.c
===================================================================
--- lib/libmemstat/memstat_uma.c
+++ lib/libmemstat/memstat_uma.c
@@ -448,12 +448,7 @@
mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size;
mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size;
mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
- if (kz.uk_ppera > 1)
- mtp->mt_countlimit = kz.uk_maxpages /
- kz.uk_ipers;
- else
- mtp->mt_countlimit = kz.uk_maxpages *
- kz.uk_ipers;
+ mtp->mt_countlimit = uz.uz_maxitems;
mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
for (i = 0; i < ndomains; i++) {
Index: sys/cam/cam_periph.c
===================================================================
--- sys/cam/cam_periph.c
+++ sys/cam/cam_periph.c
@@ -936,7 +936,7 @@
/*
* Get the buffer.
*/
- mapinfo->bp[i] = getpbuf(NULL);
+ mapinfo->bp[i] = uma_zalloc(pbuf_zone, M_WAITOK);
/* put our pointer in the data slot */
mapinfo->bp[i]->b_data = *data_ptrs[i];
@@ -962,9 +962,9 @@
for (j = 0; j < i; ++j) {
*data_ptrs[j] = mapinfo->bp[j]->b_caller1;
vunmapbuf(mapinfo->bp[j]);
- relpbuf(mapinfo->bp[j], NULL);
+ uma_zfree(pbuf_zone, mapinfo->bp[j]);
}
- relpbuf(mapinfo->bp[i], NULL);
+ uma_zfree(pbuf_zone, mapinfo->bp[i]);
PRELE(curproc);
return(EACCES);
}
@@ -1052,7 +1052,7 @@
vunmapbuf(mapinfo->bp[i]);
/* release the buffer */
- relpbuf(mapinfo->bp[i], NULL);
+ uma_zfree(pbuf_zone, mapinfo->bp[i]);
}
/* allow ourselves to be swapped once again */
Index: sys/dev/md/md.c
===================================================================
--- sys/dev/md/md.c
+++ sys/dev/md/md.c
@@ -231,7 +231,7 @@
#define NMASK (NINDIR-1)
static int nshift;
-static int md_vnode_pbuf_freecnt;
+static uma_zone_t md_pbuf_zone;
struct indir {
uintptr_t *array;
@@ -962,7 +962,7 @@
auio.uio_iovcnt = piov - auio.uio_iov;
piov = auio.uio_iov;
} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
- pb = getpbuf(&md_vnode_pbuf_freecnt);
+ pb = uma_zalloc(md_pbuf_zone, M_WAITOK);
bp->bio_resid = len;
unmapped_step:
npages = atop(min(MAXPHYS, round_page(len + (ma_offs &
@@ -1011,7 +1011,7 @@
if (len > 0)
goto unmapped_step;
}
- relpbuf(pb, &md_vnode_pbuf_freecnt);
+ uma_zfree(md_pbuf_zone, pb);
}
free(piov, M_MD);
@@ -2105,7 +2105,9 @@
sx_xunlock(&md_sx);
}
}
- md_vnode_pbuf_freecnt = nswbuf / 10;
+ md_pbuf_zone = uma_zsecond_create("mdpbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(md_pbuf_zone, nswbuf / 10);
status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
0600, MDCTL_NAME);
g_topology_lock();
@@ -2198,5 +2200,6 @@
sx_destroy(&md_sx);
if (status_dev != NULL)
destroy_dev(status_dev);
+ uma_zdestroy(md_pbuf_zone);
delete_unrhdr(md_uh);
}
Index: sys/dev/nvme/nvme_ctrlr.c
===================================================================
--- sys/dev/nvme/nvme_ctrlr.c
+++ sys/dev/nvme/nvme_ctrlr.c
@@ -1052,7 +1052,7 @@
* this passthrough command.
*/
PHOLD(curproc);
- buf = getpbuf(NULL);
+ buf = uma_zalloc(pbuf_zone, M_WAITOK);
buf->b_data = pt->buf;
buf->b_bufsize = pt->len;
buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;
@@ -1099,7 +1099,7 @@
err:
if (buf != NULL) {
- relpbuf(buf, NULL);
+ uma_zfree(pbuf_zone, buf);
PRELE(curproc);
}
Index: sys/fs/fuse/fuse_main.c
===================================================================
--- sys/fs/fuse/fuse_main.c
+++ sys/fs/fuse/fuse_main.c
@@ -84,7 +84,7 @@
extern struct vfsops fuse_vfsops;
extern struct cdevsw fuse_cdevsw;
extern struct vop_vector fuse_vnops;
-extern int fuse_pbuf_freecnt;
+extern uma_zone_t fuse_pbuf_zone;
static struct vfsconf fuse_vfsconf = {
.vfc_version = VFS_VERSION,
@@ -122,7 +122,6 @@
switch (what) {
case MOD_LOAD: /* kldload */
- fuse_pbuf_freecnt = nswbuf / 2 + 1;
mtx_init(&fuse_mtx, "fuse_mtx", NULL, MTX_DEF);
err = fuse_device_init();
if (err) {
@@ -130,6 +129,9 @@
return (err);
}
fuse_ipc_init();
+ fuse_pbuf_zone = uma_zsecond_create("fusepbuf", pbuf_ctor,
+ pbuf_dtor, pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(fuse_pbuf_zone, nswbuf / 2 + 1);
/* vfs_modevent ignores its first arg */
if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
@@ -144,6 +146,7 @@
if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
return (err);
fuse_bringdown(eh_tag);
+ uma_zdestroy(fuse_pbuf_zone);
break;
default:
return (EINVAL);
Index: sys/fs/fuse/fuse_vnops.c
===================================================================
--- sys/fs/fuse/fuse_vnops.c
+++ sys/fs/fuse/fuse_vnops.c
@@ -201,7 +201,7 @@
SYSCTL_INT(_vfs_fuse, OID_AUTO, reclaim_revoked, CTLFLAG_RW,
&fuse_reclaim_revoked, 0, "");
-int fuse_pbuf_freecnt = -1;
+uma_zone_t fuse_pbuf_zone;
#define fuse_vm_page_lock(m) vm_page_lock((m));
#define fuse_vm_page_unlock(m) vm_page_unlock((m));
@@ -1824,7 +1824,7 @@
* We use only the kva address for the buffer, but this is extremely
* convenient and fast.
*/
- bp = getpbuf(&fuse_pbuf_freecnt);
+ bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
kva = (vm_offset_t)bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -1845,7 +1845,7 @@
error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
pmap_qremove(kva, npages);
- relpbuf(bp, &fuse_pbuf_freecnt);
+ uma_zfree(fuse_pbuf_zone, bp);
if (error && (uio.uio_resid == count)) {
FS_DEBUG("error %d\n", error);
@@ -1958,7 +1958,7 @@
* We use only the kva address for the buffer, but this is extremely
* convenient and fast.
*/
- bp = getpbuf(&fuse_pbuf_freecnt);
+ bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
kva = (vm_offset_t)bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -1978,7 +1978,7 @@
error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
pmap_qremove(kva, npages);
- relpbuf(bp, &fuse_pbuf_freecnt);
+ uma_zfree(fuse_pbuf_zone, bp);
if (!error) {
int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
Index: sys/fs/nfsclient/nfs_clbio.c
===================================================================
--- sys/fs/nfsclient/nfs_clbio.c
+++ sys/fs/nfsclient/nfs_clbio.c
@@ -70,7 +70,7 @@
extern int newnfs_directio_enable;
extern int nfs_keep_dirty_on_error;
-int ncl_pbuf_freecnt = -1; /* start out unlimited */
+uma_zone_t ncl_pbuf_zone;
static struct buf *nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size,
struct thread *td);
@@ -182,7 +182,7 @@
* We use only the kva address for the buffer, but this is extremely
* convenient and fast.
*/
- bp = getpbuf(&ncl_pbuf_freecnt);
+ bp = uma_zalloc(ncl_pbuf_zone, M_WAITOK);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -203,7 +203,7 @@
error = ncl_readrpc(vp, &uio, cred);
pmap_qremove(kva, npages);
- relpbuf(bp, &ncl_pbuf_freecnt);
+ uma_zfree(ncl_pbuf_zone, bp);
if (error && (uio.uio_resid == count)) {
printf("ncl_getpages: error %d\n", error);
@@ -793,7 +793,7 @@
while (uiop->uio_resid > 0) {
size = MIN(uiop->uio_resid, wsize);
size = MIN(uiop->uio_iov->iov_len, size);
- bp = getpbuf(&ncl_pbuf_freecnt);
+ bp = uma_zalloc(ncl_pbuf_zone, M_WAITOK);
t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK);
t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK);
@@ -836,7 +836,7 @@
free(t_iov, M_NFSDIRECTIO);
free(t_uio, M_NFSDIRECTIO);
bp->b_vp = NULL;
- relpbuf(bp, &ncl_pbuf_freecnt);
+ uma_zfree(ncl_pbuf_zone, bp);
if (error == EINTR)
return (error);
goto do_sync;
@@ -1571,7 +1571,7 @@
mtx_unlock(&np->n_mtx);
}
bp->b_vp = NULL;
- relpbuf(bp, &ncl_pbuf_freecnt);
+ uma_zfree(ncl_pbuf_zone, bp);
}
/*
Index: sys/fs/nfsclient/nfs_clport.c
===================================================================
--- sys/fs/nfsclient/nfs_clport.c
+++ sys/fs/nfsclient/nfs_clport.c
@@ -79,7 +79,7 @@
extern struct vop_vector newnfs_fifoops;
extern uma_zone_t newnfsnode_zone;
extern struct buf_ops buf_ops_newnfs;
-extern int ncl_pbuf_freecnt;
+extern uma_zone_t ncl_pbuf_zone;
extern short nfsv4_cbport;
extern int nfscl_enablecallb;
extern int nfs_numnfscbd;
@@ -1023,7 +1023,9 @@
return;
inited = 1;
nfscl_inited = 1;
- ncl_pbuf_freecnt = nswbuf / 2 + 1;
+ ncl_pbuf_zone = uma_zsecond_create("nfspbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(ncl_pbuf_zone, nswbuf / 2 + 1);
}
/*
@@ -1357,6 +1359,7 @@
#if 0
ncl_call_invalcaches = NULL;
nfsd_call_nfscl = NULL;
+ uma_zdestroy(ncl_pbuf_zone);
/* and get rid of the mutexes */
mtx_destroy(&ncl_iod_mutex);
loaded = 0;
Index: sys/fs/smbfs/smbfs_io.c
===================================================================
--- sys/fs/smbfs/smbfs_io.c
+++ sys/fs/smbfs/smbfs_io.c
@@ -63,7 +63,7 @@
/*#define SMBFS_RWGENERIC*/
-extern int smbfs_pbuf_freecnt;
+extern uma_zone_t smbfs_pbuf_zone;
static int smbfs_fastlookup = 1;
@@ -468,7 +468,7 @@
scred = smbfs_malloc_scred();
smb_makescred(scred, td, cred);
- bp = getpbuf(&smbfs_pbuf_freecnt);
+ bp = uma_zalloc(smbfs_pbuf_zone, M_WAITOK);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -490,7 +490,7 @@
smbfs_free_scred(scred);
pmap_qremove(kva, npages);
- relpbuf(bp, &smbfs_pbuf_freecnt);
+ uma_zfree(smbfs_pbuf_zone, bp);
if (error && (uio.uio_resid == count)) {
printf("smbfs_getpages: error %d\n",error);
@@ -593,7 +593,7 @@
rtvals[i] = VM_PAGER_ERROR;
}
- bp = getpbuf(&smbfs_pbuf_freecnt);
+ bp = uma_zalloc(smbfs_pbuf_zone, M_WAITOK);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -621,7 +621,7 @@
pmap_qremove(kva, npages);
- relpbuf(bp, &smbfs_pbuf_freecnt);
+ uma_zfree(smbfs_pbuf_zone, bp);
if (error == 0) {
vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
Index: sys/fs/smbfs/smbfs_vfsops.c
===================================================================
--- sys/fs/smbfs/smbfs_vfsops.c
+++ sys/fs/smbfs/smbfs_vfsops.c
@@ -88,7 +88,7 @@
MODULE_DEPEND(smbfs, libiconv, 1, 1, 2);
MODULE_DEPEND(smbfs, libmchain, 1, 1, 1);
-int smbfs_pbuf_freecnt = -1; /* start out unlimited */
+uma_zone_t smbfs_pbuf_zone;
static int
smbfs_cmount(struct mntarg *ma, void * data, uint64_t flags)
@@ -367,7 +367,8 @@
int
smbfs_init(struct vfsconf *vfsp)
{
- smbfs_pbuf_freecnt = nswbuf / 2 + 1;
+ smbfs_pbuf_zone = uma_zsecond_create("smbpbuf", pbuf_ctor, pbuf_dtor, pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(smbfs_pbuf_zone, nswbuf / 2 + 1);
SMBVDEBUG("done.\n");
return 0;
}
@@ -377,6 +378,7 @@
smbfs_uninit(struct vfsconf *vfsp)
{
+ uma_zdestroy(smbfs_pbuf_zone);
SMBVDEBUG("done.\n");
return 0;
}
Index: sys/kern/kern_lock.c
===================================================================
--- sys/kern/kern_lock.c
+++ sys/kern/kern_lock.c
@@ -450,6 +450,8 @@
iflags |= LO_QUIET;
if (flags & LK_IS_VNODE)
iflags |= LO_IS_VNODE;
+ if (flags & LK_NEW)
+ iflags |= LO_NEW;
iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
Index: sys/kern/kern_physio.c
===================================================================
--- sys/kern/kern_physio.c
+++ sys/kern/kern_physio.c
@@ -104,7 +104,7 @@
maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1;
pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
} else {
- pbuf = getpbuf(NULL);
+ pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
sa = pbuf->b_data;
maxpages = btoc(MAXPHYS);
pages = pbuf->b_pages;
@@ -220,7 +220,7 @@
}
doerror:
if (pbuf)
- relpbuf(pbuf, NULL);
+ uma_zfree(pbuf_zone, pbuf);
else if (pages)
free(pages, M_DEVBUF);
g_destroy_bio(bp);
Index: sys/kern/vfs_aio.c
===================================================================
--- sys/kern/vfs_aio.c
+++ sys/kern/vfs_aio.c
@@ -1267,7 +1267,7 @@
goto unref;
}
- job->pbuf = pbuf = (struct buf *)getpbuf(NULL);
+ job->pbuf = pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
BUF_KERNPROC(pbuf);
AIO_LOCK(ki);
ki->kaio_buffer_count++;
@@ -1318,7 +1318,7 @@
AIO_LOCK(ki);
ki->kaio_buffer_count--;
AIO_UNLOCK(ki);
- relpbuf(pbuf, NULL);
+ uma_zfree(pbuf_zone, pbuf);
job->pbuf = NULL;
}
g_destroy_bio(bp);
@@ -2344,7 +2344,7 @@
ki = userp->p_aioinfo;
if (job->pbuf) {
pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages);
- relpbuf(job->pbuf, NULL);
+ uma_zfree(pbuf_zone, job->pbuf);
job->pbuf = NULL;
atomic_subtract_int(&num_buf_aio, 1);
AIO_LOCK(ki);
Index: sys/kern/vfs_bio.c
===================================================================
--- sys/kern/vfs_bio.c
+++ sys/kern/vfs_bio.c
@@ -86,7 +86,6 @@
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
#include <vm/swap_pager.h>
-#include "opt_swap.h"
static MALLOC_DEFINE(M_BIOBUF, "biobuf", "BIO buffer");
@@ -1017,10 +1016,6 @@
mtx_unlock(&bdlock);
}
-#ifndef NSWBUF_MIN
-#define NSWBUF_MIN 16
-#endif
-
#ifdef __i386__
#define TRANSIENT_DENOM 5
#else
@@ -1129,20 +1124,9 @@
nbuf = buf_sz / BKVASIZE;
}
- /*
- * swbufs are used as temporary holders for I/O, such as paging I/O.
- * We have no less then 16 and no more then 256.
- */
- nswbuf = min(nbuf / 4, 256);
- TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf);
- if (nswbuf < NSWBUF_MIN)
- nswbuf = NSWBUF_MIN;
-
/*
* Reserve space for the buffer cache buffers
*/
- swbuf = (void *)v;
- v = (caddr_t)(swbuf + nswbuf);
buf = (void *)v;
v = (caddr_t)(buf + nbuf);
Index: sys/kern/vfs_cluster.c
===================================================================
--- sys/kern/vfs_cluster.c
+++ sys/kern/vfs_cluster.c
@@ -63,7 +63,9 @@
#endif
static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer");
+static uma_zone_t cluster_pbuf_zone;
+static void cluster_init(void *);
static struct cluster_save *cluster_collectbufs(struct vnode *vp,
struct buf *last_bp, int gbflags);
static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize,
@@ -83,6 +85,17 @@
SYSCTL_INT(_vfs, OID_AUTO, read_min, CTLFLAG_RW, &read_min, 0,
"Cluster read min block count");
+SYSINIT(cluster, SI_SUB_CPU, SI_ORDER_ANY, cluster_init, NULL);
+
+static void
+cluster_init(void *dummy)
+{
+
+ cluster_pbuf_zone = uma_zsecond_create("clpbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(cluster_pbuf_zone, nswbuf / 2);
+}
+
/*
* Read data to a buf, including read-ahead if we find this to be beneficial.
* cluster_read replaces bread.
@@ -372,7 +385,7 @@
((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
return tbp;
- bp = trypbuf(&cluster_pbuf_freecnt);
+ bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT);
if (bp == NULL)
return tbp;
@@ -603,7 +616,7 @@
bufdone(tbp);
}
pbrelvp(bp);
- relpbuf(bp, &cluster_pbuf_freecnt);
+ uma_zfree(cluster_pbuf_zone, bp);
}
/*
@@ -856,9 +869,8 @@
(tbp->b_bcount != tbp->b_bufsize) ||
(tbp->b_bcount != size) ||
(len == 1) ||
- ((bp = (vp->v_vflag & VV_MD) != 0 ?
- trypbuf(&cluster_pbuf_freecnt) :
- getpbuf(&cluster_pbuf_freecnt)) == NULL)) {
+ ((bp = uma_zalloc(cluster_pbuf_zone,
+ (vp->v_vflag & VV_MD) != 0 ? M_NOWAIT : M_WAITOK)) == NULL)) {
totalwritten += tbp->b_bufsize;
bawrite(tbp);
++start_lbn;
Index: sys/sys/buf.h
===================================================================
--- sys/sys/buf.h
+++ sys/sys/buf.h
@@ -44,6 +44,7 @@
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/lockmgr.h>
+#include <vm/uma.h>
struct bio;
struct buf;
@@ -287,7 +288,7 @@
* Initialize a lock.
*/
#define BUF_LOCKINIT(bp) \
- lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
+ lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, LK_NEW)
/*
*
* Get a lock sleeping non-interruptably until it becomes available.
@@ -493,10 +494,6 @@
extern int dirtybufferflushes;
extern int altbufferflushes;
extern int nswbuf; /* Number of swap I/O buffer headers. */
-extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
-extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
-extern int vnode_async_pbuf_freecnt; /* Number of pbufs for vnode pager,
- asynchronous reads */
extern caddr_t unmapped_buf; /* Data address for unmapped buffers. */
static inline int
@@ -537,7 +534,6 @@
void bqrelse(struct buf *);
int vfs_bio_awrite(struct buf *);
void vfs_drain_busy_pages(struct buf *bp);
-struct buf * getpbuf(int *);
struct buf *incore(struct bufobj *, daddr_t);
struct buf *gbincore(struct bufobj *, daddr_t);
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
@@ -549,6 +545,11 @@
void bufdone(struct buf *);
void bd_speedup(void);
+extern uma_zone_t pbuf_zone;
+int pbuf_init(void *, int, int);
+int pbuf_ctor(void *, int, void *, int);
+void pbuf_dtor(void *, int, void *);
+
int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, long, int, int, struct buf **);
int cluster_wbuild(struct vnode *, long, daddr_t, int, int);
@@ -562,7 +563,6 @@
void vfs_unbusy_pages(struct buf *);
int vmapbuf(struct buf *, int);
void vunmapbuf(struct buf *);
-void relpbuf(struct buf *, int *);
void brelvp(struct buf *);
void bgetvp(struct vnode *, struct buf *);
void pbgetbo(struct bufobj *bo, struct buf *bp);
@@ -571,7 +571,6 @@
void pbrelvp(struct buf *);
int allocbuf(struct buf *bp, int size);
void reassignbuf(struct buf *);
-struct buf *trypbuf(int *);
void bwait(struct buf *, u_char, const char *);
void bdone(struct buf *);
Index: sys/sys/lockmgr.h
===================================================================
--- sys/sys/lockmgr.h
+++ sys/sys/lockmgr.h
@@ -143,7 +143,7 @@
/*
* Flags for lockinit().
*/
-#define LK_INIT_MASK 0x0000FF
+#define LK_INIT_MASK 0x0001FF
#define LK_CANRECURSE 0x000001
#define LK_NODUP 0x000002
#define LK_NOPROFILE 0x000004
@@ -152,6 +152,7 @@
#define LK_QUIET 0x000020
#define LK_ADAPTIVE 0x000040
#define LK_IS_VNODE 0x000080 /* Tell WITNESS about a VNODE lock */
+#define LK_NEW 0x000100
/*
* Additional attributes to be used in lockmgr().
Index: sys/ufs/ffs/ffs_rawread.c
===================================================================
--- sys/ufs/ffs/ffs_rawread.c
+++ sys/ufs/ffs/ffs_rawread.c
@@ -74,9 +74,7 @@
SYSCTL_DECL(_vfs_ffs);
-static int ffsrawbufcnt = 4;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0,
- "Buffers available for raw reads");
+static uma_zone_t ffsraw_pbuf_zone;
static int allowrawread = 1;
SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
@@ -90,7 +88,10 @@
ffs_rawread_setup(void *arg __unused)
{
- ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
+ ffsraw_pbuf_zone = uma_zsecond_create("ffsrawpbuf", pbuf_ctor,
+ pbuf_dtor, pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(ffsraw_pbuf_zone, (nswbuf > 100 ) ?
+ (nswbuf - (nswbuf >> 4)) : nswbuf - 8);
}
SYSINIT(ffs_raw, SI_SUB_VM_CONF, SI_ORDER_ANY, ffs_rawread_setup, NULL);
@@ -296,8 +297,7 @@
while (resid > 0) {
if (bp == NULL) { /* Setup first read */
- /* XXX: Leave some bufs for swap */
- bp = getpbuf(&ffsrawbufcnt);
+ bp = uma_zalloc(ffsraw_pbuf_zone, M_WAITOK);
pbgetvp(vp, bp);
error = ffs_rawread_readahead(vp, udata, offset,
resid, td, bp);
@@ -305,9 +305,9 @@
break;
if (resid > bp->b_bufsize) { /* Setup fist readahead */
- /* XXX: Leave bufs for swap */
if (rawreadahead != 0)
- nbp = trypbuf(&ffsrawbufcnt);
+ nbp = uma_zalloc(ffsraw_pbuf_zone,
+ M_NOWAIT);
else
nbp = NULL;
if (nbp != NULL) {
@@ -324,7 +324,8 @@
nbp);
if (nerror) {
pbrelvp(nbp);
- relpbuf(nbp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone,
+ nbp);
nbp = NULL;
}
}
@@ -365,7 +366,7 @@
if (resid <= bp->b_bufsize) { /* No more readaheads */
pbrelvp(nbp);
- relpbuf(nbp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone, nbp);
nbp = NULL;
} else { /* Setup next readahead */
nerror = ffs_rawread_readahead(vp,
@@ -379,7 +380,7 @@
nbp);
if (nerror != 0) {
pbrelvp(nbp);
- relpbuf(nbp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone, nbp);
nbp = NULL;
}
}
@@ -395,13 +396,13 @@
if (bp != NULL) {
pbrelvp(bp);
- relpbuf(bp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone, bp);
}
if (nbp != NULL) { /* Run down readahead buffer */
bwait(nbp, PRIBIO, "rawrd");
vunmapbuf(nbp);
pbrelvp(nbp);
- relpbuf(nbp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone, nbp);
}
if (error == 0)
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -324,9 +324,8 @@
static int swap_pager_full = 2; /* swap space exhaustion (task killing) */
static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/
-static int nsw_rcount; /* free read buffers */
-static int nsw_wcount_sync; /* limit write buffers / synchronous */
-static int nsw_wcount_async; /* limit write buffers / asynchronous */
+static struct mtx swbuf_mtx; /* to sync nsw_wcount_async */
+static int nsw_wcount_async; /* limit async write buffers */
static int nsw_wcount_async_max;/* assigned maximum */
static int nsw_cluster_max; /* maximum VOP I/O allowed */
@@ -352,6 +351,8 @@
(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
static struct pagerlst swap_pager_object_list[NOBJLISTS];
+static uma_zone_t swwbuf_zone;
+static uma_zone_t swrbuf_zone;
static uma_zone_t swblk_zone;
static uma_zone_t swpctrie_zone;
@@ -539,12 +540,16 @@
*/
nsw_cluster_max = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER);
- mtx_lock(&pbuf_mtx);
- nsw_rcount = (nswbuf + 1) / 2;
- nsw_wcount_sync = (nswbuf + 3) / 4;
nsw_wcount_async = 4;
nsw_wcount_async_max = nsw_wcount_async;
- mtx_unlock(&pbuf_mtx);
+ mtx_init(&swbuf_mtx, "async swbuf mutex", NULL, MTX_DEF);
+
+ swwbuf_zone = uma_zsecond_create("swwbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(swwbuf_zone, (nswbuf + 3) / 4);
+ swrbuf_zone = uma_zsecond_create("swrbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(swrbuf_zone, (nswbuf + 1) / 2);
/*
* Initialize our zone, guessing on the number we need based
@@ -1205,7 +1210,7 @@
("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex));
VM_OBJECT_WUNLOCK(object);
- bp = getpbuf(&nsw_rcount);
+ bp = uma_zalloc(swrbuf_zone, M_WAITOK);
/* Pages cannot leave the object while busy. */
for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
MPASS(p->pindex == bm->pindex + i);
@@ -1406,12 +1411,17 @@
* All I/O parameters have been satisfied, build the I/O
* request and assign the swap space.
*/
- if (sync == TRUE) {
- bp = getpbuf(&nsw_wcount_sync);
- } else {
- bp = getpbuf(&nsw_wcount_async);
- bp->b_flags = B_ASYNC;
+ if (sync != TRUE) {
+ mtx_lock(&swbuf_mtx);
+ while (nsw_wcount_async == 0)
+ msleep(&nsw_wcount_async, &swbuf_mtx, PVM,
+ "swbufa", 0);
+ nsw_wcount_async--;
+ mtx_unlock(&swbuf_mtx);
}
+ bp = uma_zalloc(swwbuf_zone, M_WAITOK);
+ if (sync != TRUE)
+ bp->b_flags = B_ASYNC;
bp->b_flags |= B_PAGING;
bp->b_iocmd = BIO_WRITE;
@@ -1634,15 +1644,13 @@
/*
* release the physical I/O buffer
*/
- relpbuf(
- bp,
- ((bp->b_iocmd == BIO_READ) ? &nsw_rcount :
- ((bp->b_flags & B_ASYNC) ?
- &nsw_wcount_async :
- &nsw_wcount_sync
- )
- )
- );
+ if (bp->b_flags & B_ASYNC) {
+ mtx_lock(&swbuf_mtx);
+ if (++nsw_wcount_async == 1)
+ wakeup(&nsw_wcount_async);
+ mtx_unlock(&swbuf_mtx);
+ }
+ uma_zfree((bp->b_iocmd == BIO_READ) ? swrbuf_zone : swwbuf_zone, bp);
}
int
@@ -2627,6 +2635,7 @@
bp->b_ioflags |= BIO_ERROR;
bp->b_resid = bp->b_bcount - bp2->bio_completed;
bp->b_error = bp2->bio_error;
+ bp->b_caller1 = NULL;
bufdone(bp);
sp = bp2->bio_caller1;
mtx_lock(&sw_dev_mtx);
@@ -2666,6 +2675,7 @@
return;
}
+ bp->b_caller1 = bio;
bio->bio_caller1 = sp;
bio->bio_caller2 = bp;
bio->bio_cmd = bp->b_iocmd;
@@ -2880,7 +2890,7 @@
if (new > nswbuf / 2 || new < 1)
return (EINVAL);
- mtx_lock(&pbuf_mtx);
+ mtx_lock(&swbuf_mtx);
while (nsw_wcount_async_max != new) {
/*
* Adjust difference. If the current async count is too low,
@@ -2895,11 +2905,11 @@
} else {
nsw_wcount_async_max -= nsw_wcount_async;
nsw_wcount_async = 0;
- msleep(&nsw_wcount_async, &pbuf_mtx, PSWP,
+ msleep(&nsw_wcount_async, &swbuf_mtx, PSWP,
"swpsysctl", 0);
}
}
- mtx_unlock(&pbuf_mtx);
+ mtx_unlock(&swbuf_mtx);
return (0);
}
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h
+++ sys/vm/uma.h
@@ -217,17 +217,6 @@
uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
uma_init zinit, uma_fini zfini, uma_zone_t master);
-/*
- * Add a second master to a secondary zone. This provides multiple data
- * backends for objects with the same size. Both masters must have
- * compatible allocation flags. Presently, UMA_ZONE_MALLOC type zones are
- * the only supported.
- *
- * Returns:
- * Error on failure, 0 on success.
- */
-int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
-
/*
* Create cache-only zones.
*
@@ -285,10 +274,6 @@
* NUMA aware Zone. Implements a best
* effort first-touch policy.
*/
-#define UMA_ZONE_NOBUCKETCACHE 0x20000 /*
- * Don't cache full buckets. Limit
- * UMA to per-cpu state.
- */
/*
* These flags are shared between the keg and zone. In zones wishing to add
@@ -511,6 +496,18 @@
*/
int uma_zone_set_max(uma_zone_t zone, int nitems);
+/*
+ * Sets a high limit on the number of items allowed in zone's bucket cache
+ *
+ * Arguments:
+ * zone The zone to limit
+ * nitems The requested upper limit on the number of items allowed
+ *
+ * Returns:
+ * int The effective value of nitems set
+ */
+int uma_zone_set_maxcache(uma_zone_t zone, int nitems);
+
/*
* Obtains the effective limit on the number of items in a zone
*
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -255,17 +255,17 @@
static void uma_timeout(void *);
static void uma_startup3(void);
static void *zone_alloc_item(uma_zone_t, void *, int, int);
+static void *zone_alloc_item_locked(uma_zone_t, void *, int, int);
static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
static void bucket_enable(void);
static void bucket_init(void);
static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
static void bucket_zone_drain(void);
-static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
+static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int, int);
static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
-static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int);
static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
-static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
+static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
uma_fini fini, int align, uint32_t flags);
static int zone_import(uma_zone_t, void **, int, int, int);
@@ -472,6 +472,7 @@
zdom->uzd_nitems -= bucket->ub_cnt;
if (ws && zdom->uzd_imin > zdom->uzd_nitems)
zdom->uzd_imin = zdom->uzd_nitems;
+ zone->uz_bkt_count -= bucket->ub_cnt;
}
return (bucket);
}
@@ -482,11 +483,14 @@
{
ZONE_LOCK_ASSERT(zone);
+ KASSERT(zone->uz_bkt_count < zone->uz_bkt_max, ("%s: zone %p overflow",
+ __func__, zone));
LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
zdom->uzd_nitems += bucket->ub_cnt;
if (ws && zdom->uzd_imax < zdom->uzd_nitems)
zdom->uzd_imax = zdom->uzd_nitems;
+ zone->uz_bkt_count += bucket->ub_cnt;
}
static void
@@ -509,15 +513,6 @@
taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
}
-static void
-zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
-{
- uma_klink_t klink;
-
- LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
- kegfn(klink->kl_keg);
-}
-
/*
* Routine called by timeout which is used to fire off some time interval
* based calculations. (stats, hash size, etc.)
@@ -562,8 +557,9 @@
* Returns nothing.
*/
static void
-keg_timeout(uma_keg_t keg)
+zone_timeout(uma_zone_t zone)
{
+ uma_keg_t keg = zone->uz_keg;
KEG_LOCK(keg);
/*
@@ -601,20 +597,11 @@
return;
}
}
- KEG_UNLOCK(keg);
-}
-
-static void
-zone_timeout(uma_zone_t zone)
-{
- int i;
- zone_foreach_keg(zone, &keg_timeout);
-
- ZONE_LOCK(zone);
- for (i = 0; i < vm_ndomains; i++)
+ for (int i = 0; i < vm_ndomains; i++)
zone_domain_update_wss(&zone->uz_domain[i]);
- ZONE_UNLOCK(zone);
+
+ KEG_UNLOCK(keg);
}
/*
@@ -744,6 +731,11 @@
for (i = 0; i < bucket->ub_cnt; i++)
zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
+ ZONE_LOCK(zone);
+ zone->uz_items -= bucket->ub_cnt;
+ if (zone->uz_sleepers && zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ ZONE_UNLOCK(zone);
bucket->ub_cnt = 0;
}
@@ -1029,7 +1021,7 @@
* we're running. Normally the uma_rwlock would protect us but we
* must be able to release and acquire the right lock for each keg.
*/
- zone_foreach_keg(zone, &keg_drain);
+ keg_drain(zone->uz_keg);
ZONE_LOCK(zone);
zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
wakeup(zone);
@@ -1068,7 +1060,8 @@
KASSERT(domain >= 0 && domain < vm_ndomains,
("keg_alloc_slab: domain %d out of range", domain));
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
+ MPASS(zone->uz_lockptr == &keg->uk_lock);
allocf = keg->uk_allocf;
KEG_UNLOCK(keg);
@@ -1164,8 +1157,7 @@
void *mem;
int pages;
- keg = zone_first_keg(zone);
-
+ keg = zone->uz_keg;
/*
* If we are in BOOT_BUCKETS or higher, than switch to real
* allocator. Zones with page sized slabs switch at BOOT_PAGEALLOC.
@@ -1303,7 +1295,7 @@
uma_keg_t keg;
TAILQ_INIT(&alloctail);
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
npages = howmany(bytes, PAGE_SIZE);
while (npages > 0) {
@@ -1526,8 +1518,6 @@
u_int shsize;
KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
- KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
- ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
@@ -1766,14 +1756,13 @@
zone->uz_sleeps = 0;
zone->uz_count = 0;
zone->uz_count_min = 0;
+ zone->uz_count_max = BUCKET_MAX;
zone->uz_flags = 0;
zone->uz_warning = NULL;
/* The domain structures follow the cpu structures. */
zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
+ zone->uz_bkt_max = ULONG_MAX;
timevalclear(&zone->uz_ratecheck);
- keg = arg->keg;
-
- ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
/*
* This is a pure cache zone, no kegs.
@@ -1787,6 +1776,7 @@
zone->uz_release = arg->release;
zone->uz_arg = arg->arg;
zone->uz_lockptr = &zone->uz_lock;
+ ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
rw_wlock(&uma_rwlock);
LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
rw_wunlock(&uma_rwlock);
@@ -1799,6 +1789,7 @@
zone->uz_import = (uma_import)zone_import;
zone->uz_release = (uma_release)zone_release;
zone->uz_arg = zone;
+ keg = arg->keg;
if (arg->flags & UMA_ZONE_SECONDARY) {
KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
@@ -1837,12 +1828,7 @@
return (error);
}
- /*
- * Link in the first keg.
- */
- zone->uz_klink.kl_keg = keg;
- LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
- zone->uz_lockptr = &keg->uk_lock;
+ zone->uz_keg = keg;
zone->uz_size = keg->uk_size;
zone->uz_flags |= (keg->uk_flags &
(UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
@@ -1908,12 +1894,10 @@
static void
zone_dtor(void *arg, int size, void *udata)
{
- uma_klink_t klink;
uma_zone_t zone;
uma_keg_t keg;
zone = (uma_zone_t)arg;
- keg = zone_first_keg(zone);
if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
cache_drain(zone);
@@ -1928,26 +1912,18 @@
* remove it... we dont care for now
*/
zone_drain_wait(zone, M_WAITOK);
- /*
- * Unlink all of our kegs.
- */
- while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
- klink->kl_keg = NULL;
- LIST_REMOVE(klink, kl_link);
- if (klink == &zone->uz_klink)
- continue;
- free(klink, M_TEMP);
- }
/*
* We only destroy kegs from non secondary zones.
*/
- if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
+ if ((keg = zone->uz_keg) != NULL &&
+ (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
rw_wlock(&uma_rwlock);
LIST_REMOVE(keg, uk_link);
rw_wunlock(&uma_rwlock);
zone_free_item(kegs, keg, NULL, SKIP_NONE);
}
- ZONE_LOCK_FINI(zone);
+ if (zone->uz_lockptr == &zone->uz_lock)
+ ZONE_LOCK_FINI(zone);
}
/*
@@ -2231,7 +2207,7 @@
uma_zone_t res;
bool locked;
- keg = zone_first_keg(master);
+ keg = master->uz_keg;
memset(&args, 0, sizeof(args));
args.name = name;
args.size = keg->uk_size;
@@ -2280,85 +2256,6 @@
return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
}
-static void
-zone_lock_pair(uma_zone_t a, uma_zone_t b)
-{
- if (a < b) {
- ZONE_LOCK(a);
- mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
- } else {
- ZONE_LOCK(b);
- mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
- }
-}
-
-static void
-zone_unlock_pair(uma_zone_t a, uma_zone_t b)
-{
-
- ZONE_UNLOCK(a);
- ZONE_UNLOCK(b);
-}
-
-int
-uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
-{
- uma_klink_t klink;
- uma_klink_t kl;
- int error;
-
- error = 0;
- klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
-
- zone_lock_pair(zone, master);
- /*
- * zone must use vtoslab() to resolve objects and must already be
- * a secondary.
- */
- if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
- != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
- error = EINVAL;
- goto out;
- }
- /*
- * The new master must also use vtoslab().
- */
- if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
- error = EINVAL;
- goto out;
- }
-
- /*
- * The underlying object must be the same size. rsize
- * may be different.
- */
- if (master->uz_size != zone->uz_size) {
- error = E2BIG;
- goto out;
- }
- /*
- * Put it at the end of the list.
- */
- klink->kl_keg = zone_first_keg(master);
- LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
- if (LIST_NEXT(kl, kl_link) == NULL) {
- LIST_INSERT_AFTER(kl, klink, kl_link);
- break;
- }
- }
- klink = NULL;
- zone->uz_flags |= UMA_ZFLAG_MULTI;
- zone->uz_slab = zone_fetch_slab_multi;
-
-out:
- zone_unlock_pair(zone, master);
- if (klink != NULL)
- free(klink, M_TEMP);
-
- return (error);
-}
-
-
/* See uma.h */
void
uma_zdestroy(uma_zone_t zone)
@@ -2420,7 +2317,7 @@
uma_bucket_t bucket;
uma_cache_t cache;
void *item;
- int cpu, domain, lockfail;
+ int cpu, domain, lockfail, maxbucket;
#ifdef INVARIANTS
bool skipdbg;
#endif
@@ -2541,8 +2438,10 @@
domain = UMA_ANYDOMAIN;
/* Short-circuit for zones without buckets and low memory. */
- if (zone->uz_count == 0 || bucketdisable)
+ if (zone->uz_count == 0 || bucketdisable) {
+ ZONE_LOCK(zone);
goto zalloc_item;
+ }
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
@@ -2590,8 +2489,17 @@
* We bump the uz count when the cache size is insufficient to
* handle the working set.
*/
- if (lockfail && zone->uz_count < BUCKET_MAX)
+ if (lockfail && zone->uz_count < zone->uz_count_max)
zone->uz_count++;
+
+ if (zone->uz_max_items > 0) {
+ if (zone->uz_items >= zone->uz_max_items)
+ goto zalloc_item;
+ maxbucket = MIN(zone->uz_count,
+ zone->uz_max_items - zone->uz_items);
+ } else
+ maxbucket = zone->uz_count;
+ zone->uz_items += maxbucket;
ZONE_UNLOCK(zone);
/*
@@ -2599,11 +2507,18 @@
* works we'll restart the allocation from the beginning and it
* will use the just filled bucket.
*/
- bucket = zone_alloc_bucket(zone, udata, domain, flags);
+ bucket = zone_alloc_bucket(zone, udata, domain, flags, maxbucket);
CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
zone->uz_name, zone, bucket);
+ ZONE_LOCK(zone);
if (bucket != NULL) {
- ZONE_LOCK(zone);
+ if (bucket->ub_cnt < maxbucket) {
+ MPASS(zone->uz_items >= maxbucket - bucket->ub_cnt);
+ zone->uz_items -= maxbucket - bucket->ub_cnt;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ }
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
@@ -2618,7 +2533,7 @@
domain == PCPU_GET(domain))) {
cache->uc_allocbucket = bucket;
zdom->uzd_imax += bucket->ub_cnt;
- } else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
+ } else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
critical_exit();
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
@@ -2628,13 +2543,18 @@
zone_put_bucket(zone, zdom, bucket, false);
ZONE_UNLOCK(zone);
goto zalloc_start;
+ } else {
+ zone->uz_items -= maxbucket;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items + 1 < zone->uz_max_items)
+ wakeup_one(zone);
}
/*
* We may not be able to get a bucket so return an actual item.
*/
zalloc_item:
- item = zone_alloc_item(zone, udata, domain, flags);
+ item = zone_alloc_item_locked(zone, udata, domain, flags);
return (item);
}
@@ -2677,6 +2597,7 @@
KASSERT(domain >= 0 && domain < vm_ndomains,
("keg_first_slab: domain %d out of range", domain));
+ KEG_LOCK_ASSERT(keg);
slab = NULL;
start = domain;
@@ -2702,7 +2623,7 @@
{
uint32_t reserve;
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
if (keg->uk_free <= reserve)
@@ -2720,7 +2641,7 @@
bool rr;
restart:
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
/*
* Use the keg's policy if upper layers haven't already specified a
@@ -2753,23 +2674,10 @@
if (flags & M_NOVM)
break;
- if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
- keg->uk_flags |= UMA_ZFLAG_FULL;
- /*
- * If this is not a multi-zone, set the FULL bit.
- * Otherwise slab_multi() takes care of it.
- */
- if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
- zone->uz_flags |= UMA_ZFLAG_FULL;
- zone_log_warning(zone);
- zone_maxaction(zone);
- }
- if (flags & M_NOWAIT)
- return (NULL);
- zone->uz_sleeps++;
- msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
- continue;
- }
+ KASSERT(zone->uz_max_items == 0 ||
+ zone->uz_items <= zone->uz_max_items,
+ ("%s: zone %p overflow", __func__, zone));
+
slab = keg_alloc_slab(keg, zone, domain, aflags);
/*
* If we got a slab here it's safe to mark it partially used
@@ -2812,7 +2720,7 @@
uma_slab_t slab;
if (keg == NULL) {
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
KEG_LOCK(keg);
}
@@ -2827,87 +2735,6 @@
return (NULL);
}
-/*
- * uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
- * with the keg locked. On NULL no lock is held.
- *
- * The last pointer is used to seed the search. It is not required.
- */
-static uma_slab_t
-zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int domain, int rflags)
-{
- uma_klink_t klink;
- uma_slab_t slab;
- uma_keg_t keg;
- int flags;
- int empty;
- int full;
-
- /*
- * Don't wait on the first pass. This will skip limit tests
- * as well. We don't want to block if we can find a provider
- * without blocking.
- */
- flags = (rflags & ~M_WAITOK) | M_NOWAIT;
- /*
- * Use the last slab allocated as a hint for where to start
- * the search.
- */
- if (last != NULL) {
- slab = keg_fetch_slab(last, zone, domain, flags);
- if (slab)
- return (slab);
- KEG_UNLOCK(last);
- }
- /*
- * Loop until we have a slab incase of transient failures
- * while M_WAITOK is specified. I'm not sure this is 100%
- * required but we've done it for so long now.
- */
- for (;;) {
- empty = 0;
- full = 0;
- /*
- * Search the available kegs for slabs. Be careful to hold the
- * correct lock while calling into the keg layer.
- */
- LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
- keg = klink->kl_keg;
- KEG_LOCK(keg);
- if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
- slab = keg_fetch_slab(keg, zone, domain, flags);
- if (slab)
- return (slab);
- }
- if (keg->uk_flags & UMA_ZFLAG_FULL)
- full++;
- else
- empty++;
- KEG_UNLOCK(keg);
- }
- if (rflags & (M_NOWAIT | M_NOVM))
- break;
- flags = rflags;
- /*
- * All kegs are full. XXX We can't atomically check all kegs
- * and sleep so just sleep for a short period and retry.
- */
- if (full && !empty) {
- ZONE_LOCK(zone);
- zone->uz_flags |= UMA_ZFLAG_FULL;
- zone->uz_sleeps++;
- zone_log_warning(zone);
- zone_maxaction(zone);
- msleep(zone, zone->uz_lockptr, PVM,
- "zonelimit", hz/100);
- zone->uz_flags &= ~UMA_ZFLAG_FULL;
- ZONE_UNLOCK(zone);
- continue;
- }
- }
- return (NULL);
-}
-
static void *
slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
{
@@ -2916,7 +2743,7 @@
uint8_t freei;
MPASS(keg == slab->us_keg);
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
@@ -2983,10 +2810,9 @@
}
static uma_bucket_t
-zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
+zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags, int max)
{
uma_bucket_t bucket;
- int max;
CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
@@ -2995,7 +2821,6 @@
if (bucket == NULL)
return (NULL);
- max = MIN(bucket->ub_entries, zone->uz_count);
bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
max, domain, flags);
@@ -3049,13 +2874,42 @@
static void *
zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
+{
+
+ ZONE_LOCK(zone);
+ return (zone_alloc_item_locked(zone, udata, domain, flags));
+}
+
+static void *
+zone_alloc_item_locked(uma_zone_t zone, void *udata, int domain, int flags)
{
void *item;
#ifdef INVARIANTS
bool skipdbg;
#endif
- item = NULL;
+ ZONE_LOCK_ASSERT(zone);
+
+ if (zone->uz_max_items > 0 && zone->uz_items >= zone->uz_max_items) {
+ zone_log_warning(zone);
+ zone_maxaction(zone);
+ if (flags & M_NOWAIT) {
+ ZONE_UNLOCK(zone);
+ return (NULL);
+ }
+ zone->uz_sleeps++;
+ zone->uz_sleepers++;
+ while (zone->uz_items >= zone->uz_max_items)
+ mtx_sleep(zone, zone->uz_lockptr, PVM, "zonelimit", 0);
+ zone->uz_sleepers--;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items + 1 < zone->uz_max_items)
+ wakeup_one(zone);
+ }
+
+ zone->uz_items++;
+ zone->uz_allocs++;
+ ZONE_UNLOCK(zone);
if (domain != UMA_ANYDOMAIN) {
/* avoid allocs targeting empty domains */
@@ -3064,7 +2918,6 @@
}
if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
goto fail;
- atomic_add_long(&zone->uz_allocs, 1);
#ifdef INVARIANTS
skipdbg = uma_dbg_zskip(zone, item);
@@ -3105,6 +2958,10 @@
fail:
CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
zone->uz_name, zone);
+ ZONE_LOCK(zone);
+ zone->uz_items--;
+ zone->uz_allocs--;
+ ZONE_UNLOCK(zone);
atomic_add_long(&zone->uz_fails, 1);
return (NULL);
}
@@ -3116,7 +2973,8 @@
uma_cache_t cache;
uma_bucket_t bucket;
uma_zone_domain_t zdom;
- int cpu, domain, lockfail;
+ int cpu, domain;
+ bool lockfail;
#ifdef INVARIANTS
bool skipdbg;
#endif
@@ -3162,7 +3020,7 @@
* The race here is acceptable. If we miss it we'll just have to wait
* a little longer for the limits to be reset.
*/
- if (zone->uz_flags & UMA_ZFLAG_FULL)
+ if (zone->uz_sleepers > 0)
goto zfree_item;
/*
@@ -3212,12 +3070,20 @@
if (zone->uz_count == 0 || bucketdisable)
goto zfree_item;
- lockfail = 0;
+ lockfail = false;
if (ZONE_TRYLOCK(zone) == 0) {
/* Record contention to size the buckets. */
ZONE_LOCK(zone);
- lockfail = 1;
+ lockfail = true;
}
+ /*
+ * Now we got the lock, check for sleepers and give a chance to
+ * first one to allocate. If item will end up on CPU cache,
+ * and they will wake up on wrong CPU, then they will go back
+ * to sleep.
+ */
+ if (zone->uz_sleepers > 0)
+ wakeup_one(zone);
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
@@ -3245,9 +3111,9 @@
"uma_zfree: zone %s(%p) putting bucket %p on free list",
zone->uz_name, zone, bucket);
/* ub_cnt is pointing to the last free item */
- KASSERT(bucket->ub_cnt != 0,
- ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
- if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
+ KASSERT(bucket->ub_cnt == bucket->ub_entries,
+ ("uma_zfree: Attempting to insert not full bucket onto the full list.\n"));
+ if (zone->uz_bkt_count >= zone->uz_bkt_max) {
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
bucket_free(zone, bucket, udata);
@@ -3260,7 +3126,7 @@
* We bump the uz count when the cache size is insufficient to
* handle the working set.
*/
- if (lockfail && zone->uz_count < BUCKET_MAX)
+ if (lockfail && zone->uz_count < zone->uz_count_max)
zone->uz_count++;
ZONE_UNLOCK(zone);
@@ -3291,8 +3157,6 @@
*/
zfree_item:
zone_free_item(zone, item, udata, SKIP_DTOR);
-
- return;
}
void
@@ -3315,12 +3179,15 @@
}
static void
-slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
+slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
{
+ uma_keg_t keg;
uma_domain_t dom;
uint8_t freei;
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ keg = zone->uz_keg;
+ MPASS(zone->uz_lockptr == &keg->uk_lock);
+ KEG_LOCK_ASSERT(keg);
MPASS(keg == slab->us_keg);
dom = &keg->uk_domain[slab->us_domain];
@@ -3350,11 +3217,9 @@
uma_slab_t slab;
uma_keg_t keg;
uint8_t *mem;
- int clearfull;
int i;
- clearfull = 0;
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
KEG_LOCK(keg);
for (i = 0; i < cnt; i++) {
item = bucket[i];
@@ -3368,37 +3233,11 @@
}
} else {
slab = vtoslab((vm_offset_t)item);
- if (slab->us_keg != keg) {
- KEG_UNLOCK(keg);
- keg = slab->us_keg;
- KEG_LOCK(keg);
- }
- }
- slab_free_item(keg, slab, item);
- if (keg->uk_flags & UMA_ZFLAG_FULL) {
- if (keg->uk_pages < keg->uk_maxpages) {
- keg->uk_flags &= ~UMA_ZFLAG_FULL;
- clearfull = 1;
- }
-
- /*
- * We can handle one more allocation. Since we're
- * clearing ZFLAG_FULL, wake up all procs blocked
- * on pages. This should be uncommon, so keeping this
- * simple for now (rather than adding count of blocked
- * threads etc).
- */
- wakeup(keg);
+ MPASS(slab->us_keg == keg);
}
+ slab_free_item(zone, slab, item);
}
KEG_UNLOCK(keg);
- if (clearfull) {
- ZONE_LOCK(zone);
- zone->uz_flags &= ~UMA_ZFLAG_FULL;
- wakeup(zone);
- ZONE_UNLOCK(zone);
- }
-
}
/*
@@ -3435,25 +3274,53 @@
if (skip < SKIP_FINI && zone->uz_fini)
zone->uz_fini(item, zone->uz_size);
- atomic_add_long(&zone->uz_frees, 1);
zone->uz_release(zone->uz_arg, &item, 1);
+
+ ZONE_LOCK(zone);
+ zone->uz_frees++;
+ zone->uz_items--;
+ if (zone->uz_sleepers > 0 && zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ ZONE_UNLOCK(zone);
}
/* See uma.h */
int
uma_zone_set_max(uma_zone_t zone, int nitems)
{
- uma_keg_t keg;
+ struct uma_bucket_zone *ubz;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- KEG_LOCK(keg);
- keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
- if (keg->uk_maxpages * keg->uk_ipers < nitems)
- keg->uk_maxpages += keg->uk_ppera;
- nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
- KEG_UNLOCK(keg);
+ /*
+ * If limit is very low we may need to limit how
+ * much items are allowed in CPU caches.
+ */
+ ubz = &bucket_zones[0];
+ for (; ubz->ubz_entries != 0; ubz++)
+ if (ubz->ubz_entries * 2 * mp_ncpus > nitems)
+ break;
+ if (ubz == &bucket_zones[0])
+ nitems = ubz->ubz_entries * 2 * mp_ncpus;
+ else
+ ubz--;
+
+ ZONE_LOCK(zone);
+ zone->uz_count_max = zone->uz_count = ubz->ubz_entries;
+ if (zone->uz_count_min > zone->uz_count_max)
+ zone->uz_count_min = zone->uz_count_max;
+ zone->uz_max_items = nitems;
+ ZONE_UNLOCK(zone);
+
+ return (nitems);
+}
+
+/* See uma.h */
+int
+uma_zone_set_maxcache(uma_zone_t zone, int nitems)
+{
+
+ ZONE_LOCK(zone);
+ zone->uz_bkt_max = nitems;
+ ZONE_UNLOCK(zone);
return (nitems);
}
@@ -3463,14 +3330,10 @@
uma_zone_get_max(uma_zone_t zone)
{
int nitems;
- uma_keg_t keg;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- KEG_LOCK(keg);
- nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
- KEG_UNLOCK(keg);
+ ZONE_LOCK(zone);
+ nitems = zone->uz_max_items;
+ ZONE_UNLOCK(zone);
return (nitems);
}
@@ -3524,8 +3387,7 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
KASSERT(keg->uk_pages == 0,
("uma_zone_set_init on non-empty keg"));
@@ -3539,8 +3401,7 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
KASSERT(keg->uk_pages == 0,
("uma_zone_set_fini on non-empty keg"));
@@ -3554,7 +3415,7 @@
{
ZONE_LOCK(zone);
- KASSERT(zone_first_keg(zone)->uk_pages == 0,
+ KASSERT(zone->uz_keg->uk_pages == 0,
("uma_zone_set_zinit on non-empty keg"));
zone->uz_init = zinit;
ZONE_UNLOCK(zone);
@@ -3566,7 +3427,7 @@
{
ZONE_LOCK(zone);
- KASSERT(zone_first_keg(zone)->uk_pages == 0,
+ KASSERT(zone->uz_keg->uk_pages == 0,
("uma_zone_set_zfini on non-empty keg"));
zone->uz_fini = zfini;
ZONE_UNLOCK(zone);
@@ -3579,7 +3440,7 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
+ KEG_GET(zone, keg);
KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
KEG_LOCK(keg);
keg->uk_freef = freef;
@@ -3593,7 +3454,7 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
keg->uk_allocf = allocf;
KEG_UNLOCK(keg);
@@ -3605,14 +3466,10 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return;
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
keg->uk_reserve = items;
KEG_UNLOCK(keg);
-
- return;
}
/* See uma.h */
@@ -3623,11 +3480,9 @@
vm_offset_t kva;
u_int pages;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- pages = count / keg->uk_ipers;
+ KEG_GET(zone, keg);
+ pages = count / keg->uk_ipers;
if (pages * keg->uk_ipers < count)
pages++;
pages *= keg->uk_ppera;
@@ -3645,7 +3500,6 @@
KEG_LOCK(keg);
keg->uk_kva = kva;
keg->uk_offset = 0;
- keg->uk_maxpages = pages;
#ifdef UMA_MD_SMALL_ALLOC
keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
#else
@@ -3667,9 +3521,7 @@
uma_keg_t keg;
int domain, flags, slabs;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return;
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
slabs = items / keg->uk_ipers;
if (slabs * keg->uk_ipers < items)
@@ -3758,7 +3610,7 @@
int full;
ZONE_LOCK(zone);
- full = (zone->uz_flags & UMA_ZFLAG_FULL);
+ full = zone->uz_sleepers > 0;
ZONE_UNLOCK(zone);
return (full);
}
@@ -3766,7 +3618,7 @@
int
uma_zone_exhausted_nolock(uma_zone_t zone)
{
- return (zone->uz_flags & UMA_ZFLAG_FULL);
+ return (zone->uz_sleepers > 0);
}
void *
@@ -3886,11 +3738,11 @@
int i;
printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
- "out %d free %d limit %d\n",
+ "out %d free %d\n",
keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
keg->uk_ipers, keg->uk_ppera,
(keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
- keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
+ keg->uk_free);
for (i = 0; i < vm_ndomains; i++) {
dom = &keg->uk_domain[i];
printf("Part slabs:\n");
@@ -3909,13 +3761,13 @@
uma_print_zone(uma_zone_t zone)
{
uma_cache_t cache;
- uma_klink_t kl;
int i;
- printf("zone: %s(%p) size %d flags %#x\n",
- zone->uz_name, zone, zone->uz_size, zone->uz_flags);
- LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
- uma_print_keg(kl->kl_keg);
+ printf("zone: %s(%p) size %d maxitems %lu flags %#x\n",
+ zone->uz_name, zone, zone->uz_size, zone->uz_max_items,
+ zone->uz_flags);
+ if (zone->uz_lockptr != &zone->uz_lock)
+ uma_print_keg(zone->uz_keg);
CPU_FOREACH(i) {
cache = &zone->uz_cpu[i];
printf("CPU %d Cache:\n", i);
@@ -3994,10 +3846,8 @@
uma_zone_domain_t zdom;
struct sbuf sbuf;
uma_cache_t cache;
- uma_klink_t kl;
uma_keg_t kz;
uma_zone_t z;
- uma_keg_t k;
int count, error, i;
error = sysctl_wire_old_buffer(req, 0);
@@ -4031,14 +3881,12 @@
uth.uth_align = kz->uk_align;
uth.uth_size = kz->uk_size;
uth.uth_rsize = kz->uk_rsize;
- LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
- k = kl->kl_keg;
- uth.uth_maxpages += k->uk_maxpages;
- uth.uth_pages += k->uk_pages;
- uth.uth_keg_free += k->uk_free;
- uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
- * k->uk_ipers;
- }
+ uth.uth_pages += (z->uz_items / kz->uk_ipers) *
+ kz->uk_ppera;
+ uth.uth_maxpages += (z->uz_max_items / kz->uk_ipers) *
+ kz->uk_ppera;
+ uth.uth_limit = z->uz_max_items;
+ uth.uth_keg_free += z->uz_keg->uk_free;
/*
* A zone is secondary is it is not the first entry
@@ -4135,8 +3983,10 @@
* zone is unlocked because the item's allocation state
* essentially holds a reference.
*/
+ if (zone->uz_lockptr == &zone->uz_lock)
+ return (NULL);
ZONE_LOCK(zone);
- keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
+ keg = zone->uz_keg;
if (keg->uk_flags & UMA_ZONE_HASH)
slab = hash_sfind(&keg->uk_hash, mem);
else
@@ -4150,12 +4000,11 @@
static bool
uma_dbg_zskip(uma_zone_t zone, void *mem)
{
- uma_keg_t keg;
- if ((keg = zone_first_keg(zone)) == NULL)
+ if (zone->uz_lockptr == &zone->uz_lock)
return (true);
- return (uma_dbg_kskip(keg, mem));
+ return (uma_dbg_kskip(zone->uz_keg, mem));
}
static bool
Index: sys/vm/uma_int.h
===================================================================
--- sys/vm/uma_int.h
+++ sys/vm/uma_int.h
@@ -223,7 +223,9 @@
*
*/
struct uma_keg {
- struct mtx uk_lock; /* Lock for the keg */
+ struct mtx uk_lock; /* Lock for the keg must be first.
+ * See shared uz_keg/uz_lockptr
+ * member of struct uma_zone. */
struct uma_hash uk_hash;
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
@@ -234,7 +236,6 @@
uint32_t uk_reserve; /* Number of reserved items. */
uint32_t uk_size; /* Requested size of each item */
uint32_t uk_rsize; /* Real size of each item */
- uint32_t uk_maxpages; /* Maximum number of pages to alloc */
uma_init uk_init; /* Keg's init routine */
uma_fini uk_fini; /* Keg's fini routine */
@@ -296,12 +297,6 @@
typedef struct uma_slab * uma_slab_t;
typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int, int);
-struct uma_klink {
- LIST_ENTRY(uma_klink) kl_link;
- uma_keg_t kl_keg;
-};
-typedef struct uma_klink *uma_klink_t;
-
struct uma_zone_domain {
LIST_HEAD(,uma_bucket) uzd_buckets; /* full buckets */
long uzd_nitems; /* total item count */
@@ -320,26 +315,30 @@
*/
struct uma_zone {
/* Offset 0, used in alloc/free fast/medium fast path and const. */
- struct mtx *uz_lockptr;
- const char *uz_name; /* Text name of the zone */
+ union {
+ uma_keg_t uz_keg; /* This zone's keg */
+ struct mtx *uz_lockptr; /* To keg or to self */
+ };
struct uma_zone_domain *uz_domain; /* per-domain buckets */
uint32_t uz_flags; /* Flags inherited from kegs */
uint32_t uz_size; /* Size inherited from kegs */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
- uma_init uz_init; /* Initializer for each item */
- uma_fini uz_fini; /* Finalizer for each item. */
+ uint64_t uz_items; /* Total items count */
+ uint64_t uz_max_items; /* Maximum number of items to alloc */
+ uint32_t uz_sleepers; /* Number of sleepers on memory */
+ uint16_t uz_count; /* Amount of items in full bucket */
+ uint16_t uz_count_max; /* Maximum amount of items there */
/* Offset 64, used in bucket replenish. */
uma_import uz_import; /* Import new memory to cache. */
uma_release uz_release; /* Release memory from cache. */
void *uz_arg; /* Import/release argument. */
+ uma_init uz_init; /* Initializer for each item */
+ uma_fini uz_fini; /* Finalizer for each item. */
uma_slaballoc uz_slab; /* Allocate a slab from the backend. */
- uint16_t uz_count; /* Amount of items in full bucket */
- uint16_t uz_count_min; /* Minimal amount of items there */
- /* 32bit pad on 64bit. */
- LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
- LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */
+ uint64_t uz_bkt_count; /* Items in bucket cache */
+ uint64_t uz_bkt_max; /* Maximum bucket cache size */
/* Offset 128 Rare. */
/*
@@ -348,19 +347,19 @@
* members to reduce alignment overhead.
*/
struct mtx uz_lock; /* Lock for the zone */
- struct uma_klink uz_klink; /* klink for first keg. */
+ LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
+ const char *uz_name; /* Text name of the zone */
/* The next two fields are used to print a rate-limited warnings. */
const char *uz_warning; /* Warning to print on failure */
struct timeval uz_ratecheck; /* Warnings rate-limiting */
struct task uz_maxaction; /* Task to run when at limit */
+ uint16_t uz_count_min; /* Minimal amount of items in bucket */
- /* 16 bytes of pad. */
-
- /* Offset 256, atomic stats. */
- volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */
- volatile u_long uz_fails; /* Total number of alloc failures */
- volatile u_long uz_frees; /* Total number of frees */
+ /* Offset 256, stats. */
+ uint64_t uz_allocs UMA_ALIGN; /* Total number of allocations */
uint64_t uz_sleeps; /* Total number of alloc sleeps */
+ uint64_t uz_frees; /* Total number of frees */
+ volatile u_long uz_fails; /* Total number of alloc failures */
/*
* This HAS to be the last item because we adjust the zone size
@@ -378,21 +377,11 @@
#define UMA_ZFLAG_DRAINING 0x08000000 /* Running zone_drain. */
#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */
#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */
-#define UMA_ZFLAG_FULL 0x40000000 /* Reached uz_maxpages */
#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */
#define UMA_ZFLAG_INHERIT \
(UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET)
-static inline uma_keg_t
-zone_first_keg(uma_zone_t zone)
-{
- uma_klink_t klink;
-
- klink = LIST_FIRST(&zone->uz_kegs);
- return (klink != NULL) ? klink->kl_keg : NULL;
-}
-
#undef UMA_ALIGN
#ifdef _KERNEL
@@ -417,6 +406,13 @@
#define KEG_LOCK_FINI(k) mtx_destroy(&(k)->uk_lock)
#define KEG_LOCK(k) mtx_lock(&(k)->uk_lock)
#define KEG_UNLOCK(k) mtx_unlock(&(k)->uk_lock)
+#define KEG_LOCK_ASSERT(k) mtx_assert(&(k)->uk_lock, MA_OWNED)
+
+#define KEG_GET(zone, keg) do { \
+ (keg) = (zone)->uz_keg; \
+ KASSERT((void *)(keg) != (void *)&(zone)->uz_lock, \
+ ("%s: Invalid zone %p type", __func__, (zone))); \
+ } while (0)
#define ZONE_LOCK_INIT(z, lc) \
do { \
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -222,7 +222,8 @@
vmd->vmd_pgcache = uma_zcache_create("vm pgcache",
sizeof(struct vm_page), NULL, NULL, NULL, NULL,
vm_page_import, vm_page_release, vmd,
- UMA_ZONE_NOBUCKETCACHE | UMA_ZONE_MAXBUCKET | UMA_ZONE_VM);
+ UMA_ZONE_MAXBUCKET | UMA_ZONE_VM);
+ (void )uma_zone_set_maxcache(vmd->vmd_pgcache, 0);
}
}
SYSINIT(vm_page2, SI_SUB_VM_CONF, SI_ORDER_ANY, vm_page_init_cache_zones, NULL);
Index: sys/vm/vm_pager.c
===================================================================
--- sys/vm/vm_pager.c
+++ sys/vm/vm_pager.c
@@ -85,10 +85,10 @@
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
+#include <vm/uma.h>
+#include "opt_swap.h"
-int cluster_pbuf_freecnt = -1; /* unlimited to begin with */
-
-struct buf *swbuf;
+uma_zone_t pbuf_zone;
static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *);
static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
@@ -167,9 +167,6 @@
* cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
* (MAXPHYS == 64k) if you want to get the most efficiency.
*/
-struct mtx_padalign __exclusive_cache_line pbuf_mtx;
-static TAILQ_HEAD(swqueue, buf) bswlist;
-static int bswneeded;
vm_offset_t swapbkva; /* swap buffers kva */
void
@@ -177,7 +174,6 @@
{
struct pagerops **pgops;
- TAILQ_INIT(&bswlist);
/*
* Initialize known pagers
*/
@@ -189,25 +185,24 @@
void
vm_pager_bufferinit(void)
{
- struct buf *bp;
- int i;
- mtx_init(&pbuf_mtx, "pbuf mutex", NULL, MTX_DEF);
- bp = swbuf;
/*
- * Now set up swap and physical I/O buffer headers.
+ * swbufs are used as temporary holders for I/O, such as paging I/O.
+ * We have no less then 16 and no more then 256.
*/
- for (i = 0; i < nswbuf; i++, bp++) {
- TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
- BUF_LOCKINIT(bp);
- LIST_INIT(&bp->b_dep);
- bp->b_rcred = bp->b_wcred = NOCRED;
- bp->b_xflags = 0;
- }
-
- cluster_pbuf_freecnt = nswbuf / 2;
- vnode_pbuf_freecnt = nswbuf / 2 + 1;
- vnode_async_pbuf_freecnt = nswbuf / 2;
+#ifndef NSWBUF_MIN
+#define NSWBUF_MIN 16
+#endif
+ nswbuf = min(nbuf / 4, 256);
+ TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf);
+ if (nswbuf < NSWBUF_MIN)
+ nswbuf = NSWBUF_MIN;
+
+ /* Main zone for paging bufs. */
+ pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf),
+ pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE,
+ UMA_ZONE_VM | UMA_ZONE_NOFREE);
+ uma_zone_set_max(pbuf_zone, nswbuf);
}
/*
@@ -347,110 +342,33 @@
return (object);
}
-/*
- * initialize a physical buffer
- */
-
-/*
- * XXX This probably belongs in vfs_bio.c
- */
-static void
-initpbuf(struct buf *bp)
+int
+pbuf_ctor(void *mem, int size, void *arg, int flags)
{
+ struct buf *bp = mem;
- KASSERT(bp->b_bufobj == NULL, ("initpbuf with bufobj"));
- KASSERT(bp->b_vp == NULL, ("initpbuf with vp"));
+ bp->b_vp = NULL;
+ bp->b_bufobj = NULL;
+
+ /* copied from initpbuf() */
bp->b_rcred = NOCRED;
bp->b_wcred = NOCRED;
- bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */
- bp->b_kvabase = (caddr_t)(MAXPHYS * (bp - swbuf)) + swapbkva;
+ bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */
bp->b_data = bp->b_kvabase;
- bp->b_kvasize = MAXPHYS;
- bp->b_flags = 0;
bp->b_xflags = 0;
+ bp->b_flags = 0;
bp->b_ioflags = 0;
bp->b_iodone = NULL;
bp->b_error = 0;
BUF_LOCK(bp, LK_EXCLUSIVE, NULL);
- buf_track(bp, __func__);
-}
-
-/*
- * allocate a physical buffer
- *
- * There are a limited number (nswbuf) of physical buffers. We need
- * to make sure that no single subsystem is able to hog all of them,
- * so each subsystem implements a counter which is typically initialized
- * to 1/2 nswbuf. getpbuf() decrements this counter in allocation and
- * increments it on release, and blocks if the counter hits zero. A
- * subsystem may initialize the counter to -1 to disable the feature,
- * but it must still be sure to match up all uses of getpbuf() with
- * relpbuf() using the same variable.
- *
- * NOTE: pfreecnt can be NULL, but this 'feature' will be removed
- * relatively soon when the rest of the subsystems get smart about it. XXX
- */
-struct buf *
-getpbuf(int *pfreecnt)
-{
- struct buf *bp;
- mtx_lock(&pbuf_mtx);
- for (;;) {
- if (pfreecnt != NULL) {
- while (*pfreecnt == 0) {
- msleep(pfreecnt, &pbuf_mtx, PVM, "wswbuf0", 0);
- }
- }
-
- /* get a bp from the swap buffer header pool */
- if ((bp = TAILQ_FIRST(&bswlist)) != NULL)
- break;
-
- bswneeded = 1;
- msleep(&bswneeded, &pbuf_mtx, PVM, "wswbuf1", 0);
- /* loop in case someone else grabbed one */
- }
- TAILQ_REMOVE(&bswlist, bp, b_freelist);
- if (pfreecnt)
- --*pfreecnt;
- mtx_unlock(&pbuf_mtx);
- initpbuf(bp);
- return (bp);
-}
-
-/*
- * allocate a physical buffer, if one is available.
- *
- * Note that there is no NULL hack here - all subsystems using this
- * call understand how to use pfreecnt.
- */
-struct buf *
-trypbuf(int *pfreecnt)
-{
- struct buf *bp;
-
- mtx_lock(&pbuf_mtx);
- if (*pfreecnt == 0 || (bp = TAILQ_FIRST(&bswlist)) == NULL) {
- mtx_unlock(&pbuf_mtx);
- return NULL;
- }
- TAILQ_REMOVE(&bswlist, bp, b_freelist);
- --*pfreecnt;
- mtx_unlock(&pbuf_mtx);
- initpbuf(bp);
- return (bp);
+ return (0);
}
-/*
- * release a physical buffer
- *
- * NOTE: pfreecnt can be NULL, but this 'feature' will be removed
- * relatively soon when the rest of the subsystems get smart about it. XXX
- */
void
-relpbuf(struct buf *bp, int *pfreecnt)
+pbuf_dtor(void *mem, int size, void *arg)
{
+ struct buf *bp = mem;
if (bp->b_rcred != NOCRED) {
crfree(bp->b_rcred);
@@ -461,24 +379,24 @@
bp->b_wcred = NOCRED;
}
- KASSERT(bp->b_vp == NULL, ("relpbuf with vp"));
- KASSERT(bp->b_bufobj == NULL, ("relpbuf with bufobj"));
-
- buf_track(bp, __func__);
BUF_UNLOCK(bp);
+}
+
+int
+pbuf_init(void *mem, int size, int flags)
+{
+ struct buf *bp = mem;
- mtx_lock(&pbuf_mtx);
- TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
+ bp->b_kvabase = (void *)kva_alloc(MAXPHYS);
+ if (bp->b_kvabase == NULL)
+ return (ENOMEM);
+ bp->b_kvasize = MAXPHYS;
+ BUF_LOCKINIT(bp);
+ LIST_INIT(&bp->b_dep);
+ bp->b_rcred = bp->b_wcred = NOCRED;
+ bp->b_xflags = 0;
- if (bswneeded) {
- bswneeded = 0;
- wakeup(&bswneeded);
- }
- if (pfreecnt) {
- if (++*pfreecnt == 1)
- wakeup(pfreecnt);
- }
- mtx_unlock(&pbuf_mtx);
+ return (0);
}
/*
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -58,6 +58,7 @@
#include "opt_vm.h"
#include <sys/param.h>
+#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
@@ -82,6 +83,7 @@
#include <vm/vm_map.h>
#include <vm/vnode_pager.h>
#include <vm/vm_extern.h>
+#include <vm/uma.h>
static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
daddr_t *rtaddress, int *run);
@@ -107,15 +109,27 @@
.pgo_haspage = vnode_pager_haspage,
};
-int vnode_pbuf_freecnt;
-int vnode_async_pbuf_freecnt;
-
static struct domainset *vnode_domainset = NULL;
SYSCTL_PROC(_debug, OID_AUTO, vnode_domainset, CTLTYPE_STRING | CTLFLAG_RW,
&vnode_domainset, 0, sysctl_handle_domainset, "A",
"Default vnode NUMA policy");
+static uma_zone_t vnode_pbuf_zone;
+
+static void
+vnode_pager_init(void *dummy)
+{
+
+ vnode_pbuf_zone = uma_zsecond_create("vnpbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(vnode_pbuf_zone, nswbuf * 8);
+#if 0
+ uma_prealloc(vnode_pbuf_zone, nswbuf * 8);
+#endif
+}
+SYSINIT(vnode_pager, SI_SUB_CPU, SI_ORDER_ANY, vnode_pager_init, NULL);
+
/* Create the VM system backing object for this vnode */
int
vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
@@ -563,7 +577,7 @@
break;
}
if (fileaddr != -1) {
- bp = getpbuf(&vnode_pbuf_freecnt);
+ bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK);
/* build a minimal buffer header */
bp->b_iocmd = BIO_READ;
@@ -595,7 +609,7 @@
*/
bp->b_vp = NULL;
pbrelbo(bp);
- relpbuf(bp, &vnode_pbuf_freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
if (error)
break;
} else
@@ -757,7 +771,7 @@
#ifdef INVARIANTS
off_t blkno0;
#endif
- int bsize, pagesperblock, *freecnt;
+ int bsize, pagesperblock;
int error, before, after, rbehind, rahead, poff, i;
int bytecount, secmask;
@@ -788,17 +802,7 @@
return (VM_PAGER_OK);
}
- /*
- * Synchronous and asynchronous paging operations use different
- * free pbuf counters. This is done to avoid asynchronous requests
- * to consume all pbufs.
- * Allocate the pbuf at the very beginning of the function, so that
- * if we are low on certain kind of pbufs don't even proceed to BMAP,
- * but sleep.
- */
- freecnt = iodone != NULL ?
- &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
- bp = getpbuf(freecnt);
+ bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK);
/*
* Get the underlying device blocks for the file with VOP_BMAP().
@@ -807,7 +811,7 @@
*/
error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before);
if (error == EOPNOTSUPP) {
- relpbuf(bp, freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
VM_OBJECT_WLOCK(object);
for (i = 0; i < count; i++) {
VM_CNT_INC(v_vnodein);
@@ -819,7 +823,7 @@
VM_OBJECT_WUNLOCK(object);
return (error);
} else if (error != 0) {
- relpbuf(bp, freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
return (VM_PAGER_ERROR);
}
@@ -828,7 +832,7 @@
* than a page size, then use special small filesystem code.
*/
if (pagesperblock == 0) {
- relpbuf(bp, freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
for (i = 0; i < count; i++) {
VM_CNT_INC(v_vnodein);
VM_CNT_INC(v_vnodepgsin);
@@ -847,7 +851,7 @@
KASSERT(count == 1,
("%s: array[%d] request to a sparse file %p", __func__,
count, vp));
- relpbuf(bp, freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
pmap_zero_page(m[0]);
KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty",
__func__, m[0]));
@@ -1061,7 +1065,7 @@
bp->b_pages[i] = NULL;
bp->b_vp = NULL;
pbrelbo(bp);
- relpbuf(bp, &vnode_pbuf_freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
}
}
@@ -1079,7 +1083,7 @@
bp->b_pages[i] = NULL;
bp->b_vp = NULL;
pbrelbo(bp);
- relpbuf(bp, &vnode_async_pbuf_freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
}
static int
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Feb 1, 7:19 AM (21 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16378192
Default Alt Text
D17773.id51746.diff (69 KB)
Attached To
Mode
D17773: UMA limits in zone & more
Attached
Detach File
Event Timeline
Log In to Comment