Page MenuHomeFreeBSD

D17773.id51746.diff
No OneTemporary

D17773.id51746.diff

Index: lib/libmemstat/memstat_uma.c
===================================================================
--- lib/libmemstat/memstat_uma.c
+++ lib/libmemstat/memstat_uma.c
@@ -448,12 +448,7 @@
mtp->mt_memalloced = mtp->mt_numallocs * mtp->mt_size;
mtp->mt_memfreed = mtp->mt_numfrees * mtp->mt_size;
mtp->mt_bytes = mtp->mt_memalloced - mtp->mt_memfreed;
- if (kz.uk_ppera > 1)
- mtp->mt_countlimit = kz.uk_maxpages /
- kz.uk_ipers;
- else
- mtp->mt_countlimit = kz.uk_maxpages *
- kz.uk_ipers;
+ mtp->mt_countlimit = uz.uz_maxitems;
mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
for (i = 0; i < ndomains; i++) {
Index: sys/cam/cam_periph.c
===================================================================
--- sys/cam/cam_periph.c
+++ sys/cam/cam_periph.c
@@ -936,7 +936,7 @@
/*
* Get the buffer.
*/
- mapinfo->bp[i] = getpbuf(NULL);
+ mapinfo->bp[i] = uma_zalloc(pbuf_zone, M_WAITOK);
/* put our pointer in the data slot */
mapinfo->bp[i]->b_data = *data_ptrs[i];
@@ -962,9 +962,9 @@
for (j = 0; j < i; ++j) {
*data_ptrs[j] = mapinfo->bp[j]->b_caller1;
vunmapbuf(mapinfo->bp[j]);
- relpbuf(mapinfo->bp[j], NULL);
+ uma_zfree(pbuf_zone, mapinfo->bp[j]);
}
- relpbuf(mapinfo->bp[i], NULL);
+ uma_zfree(pbuf_zone, mapinfo->bp[i]);
PRELE(curproc);
return(EACCES);
}
@@ -1052,7 +1052,7 @@
vunmapbuf(mapinfo->bp[i]);
/* release the buffer */
- relpbuf(mapinfo->bp[i], NULL);
+ uma_zfree(pbuf_zone, mapinfo->bp[i]);
}
/* allow ourselves to be swapped once again */
Index: sys/dev/md/md.c
===================================================================
--- sys/dev/md/md.c
+++ sys/dev/md/md.c
@@ -231,7 +231,7 @@
#define NMASK (NINDIR-1)
static int nshift;
-static int md_vnode_pbuf_freecnt;
+static uma_zone_t md_pbuf_zone;
struct indir {
uintptr_t *array;
@@ -962,7 +962,7 @@
auio.uio_iovcnt = piov - auio.uio_iov;
piov = auio.uio_iov;
} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
- pb = getpbuf(&md_vnode_pbuf_freecnt);
+ pb = uma_zalloc(md_pbuf_zone, M_WAITOK);
bp->bio_resid = len;
unmapped_step:
npages = atop(min(MAXPHYS, round_page(len + (ma_offs &
@@ -1011,7 +1011,7 @@
if (len > 0)
goto unmapped_step;
}
- relpbuf(pb, &md_vnode_pbuf_freecnt);
+ uma_zfree(md_pbuf_zone, pb);
}
free(piov, M_MD);
@@ -2105,7 +2105,9 @@
sx_xunlock(&md_sx);
}
}
- md_vnode_pbuf_freecnt = nswbuf / 10;
+ md_pbuf_zone = uma_zsecond_create("mdpbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(md_pbuf_zone, nswbuf / 10);
status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
0600, MDCTL_NAME);
g_topology_lock();
@@ -2198,5 +2200,6 @@
sx_destroy(&md_sx);
if (status_dev != NULL)
destroy_dev(status_dev);
+ uma_zdestroy(md_pbuf_zone);
delete_unrhdr(md_uh);
}
Index: sys/dev/nvme/nvme_ctrlr.c
===================================================================
--- sys/dev/nvme/nvme_ctrlr.c
+++ sys/dev/nvme/nvme_ctrlr.c
@@ -1052,7 +1052,7 @@
* this passthrough command.
*/
PHOLD(curproc);
- buf = getpbuf(NULL);
+ buf = uma_zalloc(pbuf_zone, M_WAITOK);
buf->b_data = pt->buf;
buf->b_bufsize = pt->len;
buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE;
@@ -1099,7 +1099,7 @@
err:
if (buf != NULL) {
- relpbuf(buf, NULL);
+ uma_zfree(pbuf_zone, buf);
PRELE(curproc);
}
Index: sys/fs/fuse/fuse_main.c
===================================================================
--- sys/fs/fuse/fuse_main.c
+++ sys/fs/fuse/fuse_main.c
@@ -84,7 +84,7 @@
extern struct vfsops fuse_vfsops;
extern struct cdevsw fuse_cdevsw;
extern struct vop_vector fuse_vnops;
-extern int fuse_pbuf_freecnt;
+extern uma_zone_t fuse_pbuf_zone;
static struct vfsconf fuse_vfsconf = {
.vfc_version = VFS_VERSION,
@@ -122,7 +122,6 @@
switch (what) {
case MOD_LOAD: /* kldload */
- fuse_pbuf_freecnt = nswbuf / 2 + 1;
mtx_init(&fuse_mtx, "fuse_mtx", NULL, MTX_DEF);
err = fuse_device_init();
if (err) {
@@ -130,6 +129,9 @@
return (err);
}
fuse_ipc_init();
+ fuse_pbuf_zone = uma_zsecond_create("fusepbuf", pbuf_ctor,
+ pbuf_dtor, pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(fuse_pbuf_zone, nswbuf / 2 + 1);
/* vfs_modevent ignores its first arg */
if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
@@ -144,6 +146,7 @@
if ((err = vfs_modevent(NULL, what, &fuse_vfsconf)))
return (err);
fuse_bringdown(eh_tag);
+ uma_zdestroy(fuse_pbuf_zone);
break;
default:
return (EINVAL);
Index: sys/fs/fuse/fuse_vnops.c
===================================================================
--- sys/fs/fuse/fuse_vnops.c
+++ sys/fs/fuse/fuse_vnops.c
@@ -201,7 +201,7 @@
SYSCTL_INT(_vfs_fuse, OID_AUTO, reclaim_revoked, CTLFLAG_RW,
&fuse_reclaim_revoked, 0, "");
-int fuse_pbuf_freecnt = -1;
+uma_zone_t fuse_pbuf_zone;
#define fuse_vm_page_lock(m) vm_page_lock((m));
#define fuse_vm_page_unlock(m) vm_page_unlock((m));
@@ -1824,7 +1824,7 @@
* We use only the kva address for the buffer, but this is extremely
* convenient and fast.
*/
- bp = getpbuf(&fuse_pbuf_freecnt);
+ bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
kva = (vm_offset_t)bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -1845,7 +1845,7 @@
error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
pmap_qremove(kva, npages);
- relpbuf(bp, &fuse_pbuf_freecnt);
+ uma_zfree(fuse_pbuf_zone, bp);
if (error && (uio.uio_resid == count)) {
FS_DEBUG("error %d\n", error);
@@ -1958,7 +1958,7 @@
* We use only the kva address for the buffer, but this is extremely
* convenient and fast.
*/
- bp = getpbuf(&fuse_pbuf_freecnt);
+ bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
kva = (vm_offset_t)bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -1978,7 +1978,7 @@
error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred);
pmap_qremove(kva, npages);
- relpbuf(bp, &fuse_pbuf_freecnt);
+ uma_zfree(fuse_pbuf_zone, bp);
if (!error) {
int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
Index: sys/fs/nfsclient/nfs_clbio.c
===================================================================
--- sys/fs/nfsclient/nfs_clbio.c
+++ sys/fs/nfsclient/nfs_clbio.c
@@ -70,7 +70,7 @@
extern int newnfs_directio_enable;
extern int nfs_keep_dirty_on_error;
-int ncl_pbuf_freecnt = -1; /* start out unlimited */
+uma_zone_t ncl_pbuf_zone;
static struct buf *nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size,
struct thread *td);
@@ -182,7 +182,7 @@
* We use only the kva address for the buffer, but this is extremely
* convenient and fast.
*/
- bp = getpbuf(&ncl_pbuf_freecnt);
+ bp = uma_zalloc(ncl_pbuf_zone, M_WAITOK);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -203,7 +203,7 @@
error = ncl_readrpc(vp, &uio, cred);
pmap_qremove(kva, npages);
- relpbuf(bp, &ncl_pbuf_freecnt);
+ uma_zfree(ncl_pbuf_zone, bp);
if (error && (uio.uio_resid == count)) {
printf("ncl_getpages: error %d\n", error);
@@ -793,7 +793,7 @@
while (uiop->uio_resid > 0) {
size = MIN(uiop->uio_resid, wsize);
size = MIN(uiop->uio_iov->iov_len, size);
- bp = getpbuf(&ncl_pbuf_freecnt);
+ bp = uma_zalloc(ncl_pbuf_zone, M_WAITOK);
t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK);
t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK);
@@ -836,7 +836,7 @@
free(t_iov, M_NFSDIRECTIO);
free(t_uio, M_NFSDIRECTIO);
bp->b_vp = NULL;
- relpbuf(bp, &ncl_pbuf_freecnt);
+ uma_zfree(ncl_pbuf_zone, bp);
if (error == EINTR)
return (error);
goto do_sync;
@@ -1571,7 +1571,7 @@
mtx_unlock(&np->n_mtx);
}
bp->b_vp = NULL;
- relpbuf(bp, &ncl_pbuf_freecnt);
+ uma_zfree(ncl_pbuf_zone, bp);
}
/*
Index: sys/fs/nfsclient/nfs_clport.c
===================================================================
--- sys/fs/nfsclient/nfs_clport.c
+++ sys/fs/nfsclient/nfs_clport.c
@@ -79,7 +79,7 @@
extern struct vop_vector newnfs_fifoops;
extern uma_zone_t newnfsnode_zone;
extern struct buf_ops buf_ops_newnfs;
-extern int ncl_pbuf_freecnt;
+extern uma_zone_t ncl_pbuf_zone;
extern short nfsv4_cbport;
extern int nfscl_enablecallb;
extern int nfs_numnfscbd;
@@ -1023,7 +1023,9 @@
return;
inited = 1;
nfscl_inited = 1;
- ncl_pbuf_freecnt = nswbuf / 2 + 1;
+ ncl_pbuf_zone = uma_zsecond_create("nfspbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(ncl_pbuf_zone, nswbuf / 2 + 1);
}
/*
@@ -1357,6 +1359,7 @@
#if 0
ncl_call_invalcaches = NULL;
nfsd_call_nfscl = NULL;
+ uma_zdestroy(ncl_pbuf_zone);
/* and get rid of the mutexes */
mtx_destroy(&ncl_iod_mutex);
loaded = 0;
Index: sys/fs/smbfs/smbfs_io.c
===================================================================
--- sys/fs/smbfs/smbfs_io.c
+++ sys/fs/smbfs/smbfs_io.c
@@ -63,7 +63,7 @@
/*#define SMBFS_RWGENERIC*/
-extern int smbfs_pbuf_freecnt;
+extern uma_zone_t smbfs_pbuf_zone;
static int smbfs_fastlookup = 1;
@@ -468,7 +468,7 @@
scred = smbfs_malloc_scred();
smb_makescred(scred, td, cred);
- bp = getpbuf(&smbfs_pbuf_freecnt);
+ bp = uma_zalloc(smbfs_pbuf_zone, M_WAITOK);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -490,7 +490,7 @@
smbfs_free_scred(scred);
pmap_qremove(kva, npages);
- relpbuf(bp, &smbfs_pbuf_freecnt);
+ uma_zfree(smbfs_pbuf_zone, bp);
if (error && (uio.uio_resid == count)) {
printf("smbfs_getpages: error %d\n",error);
@@ -593,7 +593,7 @@
rtvals[i] = VM_PAGER_ERROR;
}
- bp = getpbuf(&smbfs_pbuf_freecnt);
+ bp = uma_zalloc(smbfs_pbuf_zone, M_WAITOK);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -621,7 +621,7 @@
pmap_qremove(kva, npages);
- relpbuf(bp, &smbfs_pbuf_freecnt);
+ uma_zfree(smbfs_pbuf_zone, bp);
if (error == 0) {
vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid,
Index: sys/fs/smbfs/smbfs_vfsops.c
===================================================================
--- sys/fs/smbfs/smbfs_vfsops.c
+++ sys/fs/smbfs/smbfs_vfsops.c
@@ -88,7 +88,7 @@
MODULE_DEPEND(smbfs, libiconv, 1, 1, 2);
MODULE_DEPEND(smbfs, libmchain, 1, 1, 1);
-int smbfs_pbuf_freecnt = -1; /* start out unlimited */
+uma_zone_t smbfs_pbuf_zone;
static int
smbfs_cmount(struct mntarg *ma, void * data, uint64_t flags)
@@ -367,7 +367,8 @@
int
smbfs_init(struct vfsconf *vfsp)
{
- smbfs_pbuf_freecnt = nswbuf / 2 + 1;
+ smbfs_pbuf_zone = uma_zsecond_create("smbpbuf", pbuf_ctor, pbuf_dtor, pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(smbfs_pbuf_zone, nswbuf / 2 + 1);
SMBVDEBUG("done.\n");
return 0;
}
@@ -377,6 +378,7 @@
smbfs_uninit(struct vfsconf *vfsp)
{
+ uma_zdestroy(smbfs_pbuf_zone);
SMBVDEBUG("done.\n");
return 0;
}
Index: sys/kern/kern_lock.c
===================================================================
--- sys/kern/kern_lock.c
+++ sys/kern/kern_lock.c
@@ -450,6 +450,8 @@
iflags |= LO_QUIET;
if (flags & LK_IS_VNODE)
iflags |= LO_IS_VNODE;
+ if (flags & LK_NEW)
+ iflags |= LO_NEW;
iflags |= flags & (LK_ADAPTIVE | LK_NOSHARE);
lock_init(&lk->lock_object, &lock_class_lockmgr, wmesg, NULL, iflags);
Index: sys/kern/kern_physio.c
===================================================================
--- sys/kern/kern_physio.c
+++ sys/kern/kern_physio.c
@@ -104,7 +104,7 @@
maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1;
pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
} else {
- pbuf = getpbuf(NULL);
+ pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
sa = pbuf->b_data;
maxpages = btoc(MAXPHYS);
pages = pbuf->b_pages;
@@ -220,7 +220,7 @@
}
doerror:
if (pbuf)
- relpbuf(pbuf, NULL);
+ uma_zfree(pbuf_zone, pbuf);
else if (pages)
free(pages, M_DEVBUF);
g_destroy_bio(bp);
Index: sys/kern/vfs_aio.c
===================================================================
--- sys/kern/vfs_aio.c
+++ sys/kern/vfs_aio.c
@@ -1267,7 +1267,7 @@
goto unref;
}
- job->pbuf = pbuf = (struct buf *)getpbuf(NULL);
+ job->pbuf = pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
BUF_KERNPROC(pbuf);
AIO_LOCK(ki);
ki->kaio_buffer_count++;
@@ -1318,7 +1318,7 @@
AIO_LOCK(ki);
ki->kaio_buffer_count--;
AIO_UNLOCK(ki);
- relpbuf(pbuf, NULL);
+ uma_zfree(pbuf_zone, pbuf);
job->pbuf = NULL;
}
g_destroy_bio(bp);
@@ -2344,7 +2344,7 @@
ki = userp->p_aioinfo;
if (job->pbuf) {
pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages);
- relpbuf(job->pbuf, NULL);
+ uma_zfree(pbuf_zone, job->pbuf);
job->pbuf = NULL;
atomic_subtract_int(&num_buf_aio, 1);
AIO_LOCK(ki);
Index: sys/kern/vfs_bio.c
===================================================================
--- sys/kern/vfs_bio.c
+++ sys/kern/vfs_bio.c
@@ -86,7 +86,6 @@
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
#include <vm/swap_pager.h>
-#include "opt_swap.h"
static MALLOC_DEFINE(M_BIOBUF, "biobuf", "BIO buffer");
@@ -1017,10 +1016,6 @@
mtx_unlock(&bdlock);
}
-#ifndef NSWBUF_MIN
-#define NSWBUF_MIN 16
-#endif
-
#ifdef __i386__
#define TRANSIENT_DENOM 5
#else
@@ -1129,20 +1124,9 @@
nbuf = buf_sz / BKVASIZE;
}
- /*
- * swbufs are used as temporary holders for I/O, such as paging I/O.
- * We have no less then 16 and no more then 256.
- */
- nswbuf = min(nbuf / 4, 256);
- TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf);
- if (nswbuf < NSWBUF_MIN)
- nswbuf = NSWBUF_MIN;
-
/*
* Reserve space for the buffer cache buffers
*/
- swbuf = (void *)v;
- v = (caddr_t)(swbuf + nswbuf);
buf = (void *)v;
v = (caddr_t)(buf + nbuf);
Index: sys/kern/vfs_cluster.c
===================================================================
--- sys/kern/vfs_cluster.c
+++ sys/kern/vfs_cluster.c
@@ -63,7 +63,9 @@
#endif
static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer");
+static uma_zone_t cluster_pbuf_zone;
+static void cluster_init(void *);
static struct cluster_save *cluster_collectbufs(struct vnode *vp,
struct buf *last_bp, int gbflags);
static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize,
@@ -83,6 +85,17 @@
SYSCTL_INT(_vfs, OID_AUTO, read_min, CTLFLAG_RW, &read_min, 0,
"Cluster read min block count");
+SYSINIT(cluster, SI_SUB_CPU, SI_ORDER_ANY, cluster_init, NULL);
+
+static void
+cluster_init(void *dummy)
+{
+
+ cluster_pbuf_zone = uma_zsecond_create("clpbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(cluster_pbuf_zone, nswbuf / 2);
+}
+
/*
* Read data to a buf, including read-ahead if we find this to be beneficial.
* cluster_read replaces bread.
@@ -372,7 +385,7 @@
((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
return tbp;
- bp = trypbuf(&cluster_pbuf_freecnt);
+ bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT);
if (bp == NULL)
return tbp;
@@ -603,7 +616,7 @@
bufdone(tbp);
}
pbrelvp(bp);
- relpbuf(bp, &cluster_pbuf_freecnt);
+ uma_zfree(cluster_pbuf_zone, bp);
}
/*
@@ -856,9 +869,8 @@
(tbp->b_bcount != tbp->b_bufsize) ||
(tbp->b_bcount != size) ||
(len == 1) ||
- ((bp = (vp->v_vflag & VV_MD) != 0 ?
- trypbuf(&cluster_pbuf_freecnt) :
- getpbuf(&cluster_pbuf_freecnt)) == NULL)) {
+ ((bp = uma_zalloc(cluster_pbuf_zone,
+ (vp->v_vflag & VV_MD) != 0 ? M_NOWAIT : M_WAITOK)) == NULL)) {
totalwritten += tbp->b_bufsize;
bawrite(tbp);
++start_lbn;
Index: sys/sys/buf.h
===================================================================
--- sys/sys/buf.h
+++ sys/sys/buf.h
@@ -44,6 +44,7 @@
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/lockmgr.h>
+#include <vm/uma.h>
struct bio;
struct buf;
@@ -287,7 +288,7 @@
* Initialize a lock.
*/
#define BUF_LOCKINIT(bp) \
- lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
+ lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, LK_NEW)
/*
*
* Get a lock sleeping non-interruptably until it becomes available.
@@ -493,10 +494,6 @@
extern int dirtybufferflushes;
extern int altbufferflushes;
extern int nswbuf; /* Number of swap I/O buffer headers. */
-extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
-extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
-extern int vnode_async_pbuf_freecnt; /* Number of pbufs for vnode pager,
- asynchronous reads */
extern caddr_t unmapped_buf; /* Data address for unmapped buffers. */
static inline int
@@ -537,7 +534,6 @@
void bqrelse(struct buf *);
int vfs_bio_awrite(struct buf *);
void vfs_drain_busy_pages(struct buf *bp);
-struct buf * getpbuf(int *);
struct buf *incore(struct bufobj *, daddr_t);
struct buf *gbincore(struct bufobj *, daddr_t);
struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
@@ -549,6 +545,11 @@
void bufdone(struct buf *);
void bd_speedup(void);
+extern uma_zone_t pbuf_zone;
+int pbuf_init(void *, int, int);
+int pbuf_ctor(void *, int, void *, int);
+void pbuf_dtor(void *, int, void *);
+
int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
struct ucred *, long, int, int, struct buf **);
int cluster_wbuild(struct vnode *, long, daddr_t, int, int);
@@ -562,7 +563,6 @@
void vfs_unbusy_pages(struct buf *);
int vmapbuf(struct buf *, int);
void vunmapbuf(struct buf *);
-void relpbuf(struct buf *, int *);
void brelvp(struct buf *);
void bgetvp(struct vnode *, struct buf *);
void pbgetbo(struct bufobj *bo, struct buf *bp);
@@ -571,7 +571,6 @@
void pbrelvp(struct buf *);
int allocbuf(struct buf *bp, int size);
void reassignbuf(struct buf *);
-struct buf *trypbuf(int *);
void bwait(struct buf *, u_char, const char *);
void bdone(struct buf *);
Index: sys/sys/lockmgr.h
===================================================================
--- sys/sys/lockmgr.h
+++ sys/sys/lockmgr.h
@@ -143,7 +143,7 @@
/*
* Flags for lockinit().
*/
-#define LK_INIT_MASK 0x0000FF
+#define LK_INIT_MASK 0x0001FF
#define LK_CANRECURSE 0x000001
#define LK_NODUP 0x000002
#define LK_NOPROFILE 0x000004
@@ -152,6 +152,7 @@
#define LK_QUIET 0x000020
#define LK_ADAPTIVE 0x000040
#define LK_IS_VNODE 0x000080 /* Tell WITNESS about a VNODE lock */
+#define LK_NEW 0x000100
/*
* Additional attributes to be used in lockmgr().
Index: sys/ufs/ffs/ffs_rawread.c
===================================================================
--- sys/ufs/ffs/ffs_rawread.c
+++ sys/ufs/ffs/ffs_rawread.c
@@ -74,9 +74,7 @@
SYSCTL_DECL(_vfs_ffs);
-static int ffsrawbufcnt = 4;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0,
- "Buffers available for raw reads");
+static uma_zone_t ffsraw_pbuf_zone;
static int allowrawread = 1;
SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
@@ -90,7 +88,10 @@
ffs_rawread_setup(void *arg __unused)
{
- ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
+ ffsraw_pbuf_zone = uma_zsecond_create("ffsrawpbuf", pbuf_ctor,
+ pbuf_dtor, pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(ffsraw_pbuf_zone, (nswbuf > 100 ) ?
+ (nswbuf - (nswbuf >> 4)) : nswbuf - 8);
}
SYSINIT(ffs_raw, SI_SUB_VM_CONF, SI_ORDER_ANY, ffs_rawread_setup, NULL);
@@ -296,8 +297,7 @@
while (resid > 0) {
if (bp == NULL) { /* Setup first read */
- /* XXX: Leave some bufs for swap */
- bp = getpbuf(&ffsrawbufcnt);
+ bp = uma_zalloc(ffsraw_pbuf_zone, M_WAITOK);
pbgetvp(vp, bp);
error = ffs_rawread_readahead(vp, udata, offset,
resid, td, bp);
@@ -305,9 +305,9 @@
break;
if (resid > bp->b_bufsize) { /* Setup fist readahead */
- /* XXX: Leave bufs for swap */
if (rawreadahead != 0)
- nbp = trypbuf(&ffsrawbufcnt);
+ nbp = uma_zalloc(ffsraw_pbuf_zone,
+ M_NOWAIT);
else
nbp = NULL;
if (nbp != NULL) {
@@ -324,7 +324,8 @@
nbp);
if (nerror) {
pbrelvp(nbp);
- relpbuf(nbp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone,
+ nbp);
nbp = NULL;
}
}
@@ -365,7 +366,7 @@
if (resid <= bp->b_bufsize) { /* No more readaheads */
pbrelvp(nbp);
- relpbuf(nbp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone, nbp);
nbp = NULL;
} else { /* Setup next readahead */
nerror = ffs_rawread_readahead(vp,
@@ -379,7 +380,7 @@
nbp);
if (nerror != 0) {
pbrelvp(nbp);
- relpbuf(nbp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone, nbp);
nbp = NULL;
}
}
@@ -395,13 +396,13 @@
if (bp != NULL) {
pbrelvp(bp);
- relpbuf(bp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone, bp);
}
if (nbp != NULL) { /* Run down readahead buffer */
bwait(nbp, PRIBIO, "rawrd");
vunmapbuf(nbp);
pbrelvp(nbp);
- relpbuf(nbp, &ffsrawbufcnt);
+ uma_zfree(ffsraw_pbuf_zone, nbp);
}
if (error == 0)
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -324,9 +324,8 @@
static int swap_pager_full = 2; /* swap space exhaustion (task killing) */
static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/
-static int nsw_rcount; /* free read buffers */
-static int nsw_wcount_sync; /* limit write buffers / synchronous */
-static int nsw_wcount_async; /* limit write buffers / asynchronous */
+static struct mtx swbuf_mtx; /* to sync nsw_wcount_async */
+static int nsw_wcount_async; /* limit async write buffers */
static int nsw_wcount_async_max;/* assigned maximum */
static int nsw_cluster_max; /* maximum VOP I/O allowed */
@@ -352,6 +351,8 @@
(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
static struct pagerlst swap_pager_object_list[NOBJLISTS];
+static uma_zone_t swwbuf_zone;
+static uma_zone_t swrbuf_zone;
static uma_zone_t swblk_zone;
static uma_zone_t swpctrie_zone;
@@ -539,12 +540,16 @@
*/
nsw_cluster_max = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER);
- mtx_lock(&pbuf_mtx);
- nsw_rcount = (nswbuf + 1) / 2;
- nsw_wcount_sync = (nswbuf + 3) / 4;
nsw_wcount_async = 4;
nsw_wcount_async_max = nsw_wcount_async;
- mtx_unlock(&pbuf_mtx);
+ mtx_init(&swbuf_mtx, "async swbuf mutex", NULL, MTX_DEF);
+
+ swwbuf_zone = uma_zsecond_create("swwbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(swwbuf_zone, (nswbuf + 3) / 4);
+ swrbuf_zone = uma_zsecond_create("swrbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(swrbuf_zone, (nswbuf + 1) / 2);
/*
* Initialize our zone, guessing on the number we need based
@@ -1205,7 +1210,7 @@
("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex));
VM_OBJECT_WUNLOCK(object);
- bp = getpbuf(&nsw_rcount);
+ bp = uma_zalloc(swrbuf_zone, M_WAITOK);
/* Pages cannot leave the object while busy. */
for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
MPASS(p->pindex == bm->pindex + i);
@@ -1406,12 +1411,17 @@
* All I/O parameters have been satisfied, build the I/O
* request and assign the swap space.
*/
- if (sync == TRUE) {
- bp = getpbuf(&nsw_wcount_sync);
- } else {
- bp = getpbuf(&nsw_wcount_async);
- bp->b_flags = B_ASYNC;
+ if (sync != TRUE) {
+ mtx_lock(&swbuf_mtx);
+ while (nsw_wcount_async == 0)
+ msleep(&nsw_wcount_async, &swbuf_mtx, PVM,
+ "swbufa", 0);
+ nsw_wcount_async--;
+ mtx_unlock(&swbuf_mtx);
}
+ bp = uma_zalloc(swwbuf_zone, M_WAITOK);
+ if (sync != TRUE)
+ bp->b_flags = B_ASYNC;
bp->b_flags |= B_PAGING;
bp->b_iocmd = BIO_WRITE;
@@ -1634,15 +1644,13 @@
/*
* release the physical I/O buffer
*/
- relpbuf(
- bp,
- ((bp->b_iocmd == BIO_READ) ? &nsw_rcount :
- ((bp->b_flags & B_ASYNC) ?
- &nsw_wcount_async :
- &nsw_wcount_sync
- )
- )
- );
+ if (bp->b_flags & B_ASYNC) {
+ mtx_lock(&swbuf_mtx);
+ if (++nsw_wcount_async == 1)
+ wakeup(&nsw_wcount_async);
+ mtx_unlock(&swbuf_mtx);
+ }
+ uma_zfree((bp->b_iocmd == BIO_READ) ? swrbuf_zone : swwbuf_zone, bp);
}
int
@@ -2627,6 +2635,7 @@
bp->b_ioflags |= BIO_ERROR;
bp->b_resid = bp->b_bcount - bp2->bio_completed;
bp->b_error = bp2->bio_error;
+ bp->b_caller1 = NULL;
bufdone(bp);
sp = bp2->bio_caller1;
mtx_lock(&sw_dev_mtx);
@@ -2666,6 +2675,7 @@
return;
}
+ bp->b_caller1 = bio;
bio->bio_caller1 = sp;
bio->bio_caller2 = bp;
bio->bio_cmd = bp->b_iocmd;
@@ -2880,7 +2890,7 @@
if (new > nswbuf / 2 || new < 1)
return (EINVAL);
- mtx_lock(&pbuf_mtx);
+ mtx_lock(&swbuf_mtx);
while (nsw_wcount_async_max != new) {
/*
* Adjust difference. If the current async count is too low,
@@ -2895,11 +2905,11 @@
} else {
nsw_wcount_async_max -= nsw_wcount_async;
nsw_wcount_async = 0;
- msleep(&nsw_wcount_async, &pbuf_mtx, PSWP,
+ msleep(&nsw_wcount_async, &swbuf_mtx, PSWP,
"swpsysctl", 0);
}
}
- mtx_unlock(&pbuf_mtx);
+ mtx_unlock(&swbuf_mtx);
return (0);
}
Index: sys/vm/uma.h
===================================================================
--- sys/vm/uma.h
+++ sys/vm/uma.h
@@ -217,17 +217,6 @@
uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
uma_init zinit, uma_fini zfini, uma_zone_t master);
-/*
- * Add a second master to a secondary zone. This provides multiple data
- * backends for objects with the same size. Both masters must have
- * compatible allocation flags. Presently, UMA_ZONE_MALLOC type zones are
- * the only supported.
- *
- * Returns:
- * Error on failure, 0 on success.
- */
-int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
-
/*
* Create cache-only zones.
*
@@ -285,10 +274,6 @@
* NUMA aware Zone. Implements a best
* effort first-touch policy.
*/
-#define UMA_ZONE_NOBUCKETCACHE 0x20000 /*
- * Don't cache full buckets. Limit
- * UMA to per-cpu state.
- */
/*
* These flags are shared between the keg and zone. In zones wishing to add
@@ -511,6 +496,18 @@
*/
int uma_zone_set_max(uma_zone_t zone, int nitems);
+/*
+ * Sets a high limit on the number of items allowed in zone's bucket cache
+ *
+ * Arguments:
+ * zone The zone to limit
+ * nitems The requested upper limit on the number of items allowed
+ *
+ * Returns:
+ * int The effective value of nitems set
+ */
+int uma_zone_set_maxcache(uma_zone_t zone, int nitems);
+
/*
* Obtains the effective limit on the number of items in a zone
*
Index: sys/vm/uma_core.c
===================================================================
--- sys/vm/uma_core.c
+++ sys/vm/uma_core.c
@@ -255,17 +255,17 @@
static void uma_timeout(void *);
static void uma_startup3(void);
static void *zone_alloc_item(uma_zone_t, void *, int, int);
+static void *zone_alloc_item_locked(uma_zone_t, void *, int, int);
static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
static void bucket_enable(void);
static void bucket_init(void);
static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
static void bucket_zone_drain(void);
-static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int);
+static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int, int);
static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int);
-static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int);
static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
-static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
+static void slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item);
static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
uma_fini fini, int align, uint32_t flags);
static int zone_import(uma_zone_t, void **, int, int, int);
@@ -472,6 +472,7 @@
zdom->uzd_nitems -= bucket->ub_cnt;
if (ws && zdom->uzd_imin > zdom->uzd_nitems)
zdom->uzd_imin = zdom->uzd_nitems;
+ zone->uz_bkt_count -= bucket->ub_cnt;
}
return (bucket);
}
@@ -482,11 +483,14 @@
{
ZONE_LOCK_ASSERT(zone);
+ KASSERT(zone->uz_bkt_count < zone->uz_bkt_max, ("%s: zone %p overflow",
+ __func__, zone));
LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
zdom->uzd_nitems += bucket->ub_cnt;
if (ws && zdom->uzd_imax < zdom->uzd_nitems)
zdom->uzd_imax = zdom->uzd_nitems;
+ zone->uz_bkt_count += bucket->ub_cnt;
}
static void
@@ -509,15 +513,6 @@
taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
}
-static void
-zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
-{
- uma_klink_t klink;
-
- LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
- kegfn(klink->kl_keg);
-}
-
/*
* Routine called by timeout which is used to fire off some time interval
* based calculations. (stats, hash size, etc.)
@@ -562,8 +557,9 @@
* Returns nothing.
*/
static void
-keg_timeout(uma_keg_t keg)
+zone_timeout(uma_zone_t zone)
{
+ uma_keg_t keg = zone->uz_keg;
KEG_LOCK(keg);
/*
@@ -601,20 +597,11 @@
return;
}
}
- KEG_UNLOCK(keg);
-}
-
-static void
-zone_timeout(uma_zone_t zone)
-{
- int i;
- zone_foreach_keg(zone, &keg_timeout);
-
- ZONE_LOCK(zone);
- for (i = 0; i < vm_ndomains; i++)
+ for (int i = 0; i < vm_ndomains; i++)
zone_domain_update_wss(&zone->uz_domain[i]);
- ZONE_UNLOCK(zone);
+
+ KEG_UNLOCK(keg);
}
/*
@@ -744,6 +731,11 @@
for (i = 0; i < bucket->ub_cnt; i++)
zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
+ ZONE_LOCK(zone);
+ zone->uz_items -= bucket->ub_cnt;
+ if (zone->uz_sleepers && zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ ZONE_UNLOCK(zone);
bucket->ub_cnt = 0;
}
@@ -1029,7 +1021,7 @@
* we're running. Normally the uma_rwlock would protect us but we
* must be able to release and acquire the right lock for each keg.
*/
- zone_foreach_keg(zone, &keg_drain);
+ keg_drain(zone->uz_keg);
ZONE_LOCK(zone);
zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
wakeup(zone);
@@ -1068,7 +1060,8 @@
KASSERT(domain >= 0 && domain < vm_ndomains,
("keg_alloc_slab: domain %d out of range", domain));
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
+ MPASS(zone->uz_lockptr == &keg->uk_lock);
allocf = keg->uk_allocf;
KEG_UNLOCK(keg);
@@ -1164,8 +1157,7 @@
void *mem;
int pages;
- keg = zone_first_keg(zone);
-
+ keg = zone->uz_keg;
/*
* If we are in BOOT_BUCKETS or higher, than switch to real
* allocator. Zones with page sized slabs switch at BOOT_PAGEALLOC.
@@ -1303,7 +1295,7 @@
uma_keg_t keg;
TAILQ_INIT(&alloctail);
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
npages = howmany(bytes, PAGE_SIZE);
while (npages > 0) {
@@ -1526,8 +1518,6 @@
u_int shsize;
KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
- KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
- ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
@@ -1766,14 +1756,13 @@
zone->uz_sleeps = 0;
zone->uz_count = 0;
zone->uz_count_min = 0;
+ zone->uz_count_max = BUCKET_MAX;
zone->uz_flags = 0;
zone->uz_warning = NULL;
/* The domain structures follow the cpu structures. */
zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus];
+ zone->uz_bkt_max = ULONG_MAX;
timevalclear(&zone->uz_ratecheck);
- keg = arg->keg;
-
- ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
/*
* This is a pure cache zone, no kegs.
@@ -1787,6 +1776,7 @@
zone->uz_release = arg->release;
zone->uz_arg = arg->arg;
zone->uz_lockptr = &zone->uz_lock;
+ ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
rw_wlock(&uma_rwlock);
LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
rw_wunlock(&uma_rwlock);
@@ -1799,6 +1789,7 @@
zone->uz_import = (uma_import)zone_import;
zone->uz_release = (uma_release)zone_release;
zone->uz_arg = zone;
+ keg = arg->keg;
if (arg->flags & UMA_ZONE_SECONDARY) {
KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
@@ -1837,12 +1828,7 @@
return (error);
}
- /*
- * Link in the first keg.
- */
- zone->uz_klink.kl_keg = keg;
- LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
- zone->uz_lockptr = &keg->uk_lock;
+ zone->uz_keg = keg;
zone->uz_size = keg->uk_size;
zone->uz_flags |= (keg->uk_flags &
(UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
@@ -1908,12 +1894,10 @@
static void
zone_dtor(void *arg, int size, void *udata)
{
- uma_klink_t klink;
uma_zone_t zone;
uma_keg_t keg;
zone = (uma_zone_t)arg;
- keg = zone_first_keg(zone);
if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
cache_drain(zone);
@@ -1928,26 +1912,18 @@
* remove it... we dont care for now
*/
zone_drain_wait(zone, M_WAITOK);
- /*
- * Unlink all of our kegs.
- */
- while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
- klink->kl_keg = NULL;
- LIST_REMOVE(klink, kl_link);
- if (klink == &zone->uz_klink)
- continue;
- free(klink, M_TEMP);
- }
/*
* We only destroy kegs from non secondary zones.
*/
- if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
+ if ((keg = zone->uz_keg) != NULL &&
+ (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
rw_wlock(&uma_rwlock);
LIST_REMOVE(keg, uk_link);
rw_wunlock(&uma_rwlock);
zone_free_item(kegs, keg, NULL, SKIP_NONE);
}
- ZONE_LOCK_FINI(zone);
+ if (zone->uz_lockptr == &zone->uz_lock)
+ ZONE_LOCK_FINI(zone);
}
/*
@@ -2231,7 +2207,7 @@
uma_zone_t res;
bool locked;
- keg = zone_first_keg(master);
+ keg = master->uz_keg;
memset(&args, 0, sizeof(args));
args.name = name;
args.size = keg->uk_size;
@@ -2280,85 +2256,6 @@
return (zone_alloc_item(zones, &args, UMA_ANYDOMAIN, M_WAITOK));
}
-static void
-zone_lock_pair(uma_zone_t a, uma_zone_t b)
-{
- if (a < b) {
- ZONE_LOCK(a);
- mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
- } else {
- ZONE_LOCK(b);
- mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
- }
-}
-
-static void
-zone_unlock_pair(uma_zone_t a, uma_zone_t b)
-{
-
- ZONE_UNLOCK(a);
- ZONE_UNLOCK(b);
-}
-
-int
-uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
-{
- uma_klink_t klink;
- uma_klink_t kl;
- int error;
-
- error = 0;
- klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
-
- zone_lock_pair(zone, master);
- /*
- * zone must use vtoslab() to resolve objects and must already be
- * a secondary.
- */
- if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
- != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
- error = EINVAL;
- goto out;
- }
- /*
- * The new master must also use vtoslab().
- */
- if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
- error = EINVAL;
- goto out;
- }
-
- /*
- * The underlying object must be the same size. rsize
- * may be different.
- */
- if (master->uz_size != zone->uz_size) {
- error = E2BIG;
- goto out;
- }
- /*
- * Put it at the end of the list.
- */
- klink->kl_keg = zone_first_keg(master);
- LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
- if (LIST_NEXT(kl, kl_link) == NULL) {
- LIST_INSERT_AFTER(kl, klink, kl_link);
- break;
- }
- }
- klink = NULL;
- zone->uz_flags |= UMA_ZFLAG_MULTI;
- zone->uz_slab = zone_fetch_slab_multi;
-
-out:
- zone_unlock_pair(zone, master);
- if (klink != NULL)
- free(klink, M_TEMP);
-
- return (error);
-}
-
-
/* See uma.h */
void
uma_zdestroy(uma_zone_t zone)
@@ -2420,7 +2317,7 @@
uma_bucket_t bucket;
uma_cache_t cache;
void *item;
- int cpu, domain, lockfail;
+ int cpu, domain, lockfail, maxbucket;
#ifdef INVARIANTS
bool skipdbg;
#endif
@@ -2541,8 +2438,10 @@
domain = UMA_ANYDOMAIN;
/* Short-circuit for zones without buckets and low memory. */
- if (zone->uz_count == 0 || bucketdisable)
+ if (zone->uz_count == 0 || bucketdisable) {
+ ZONE_LOCK(zone);
goto zalloc_item;
+ }
/*
* Attempt to retrieve the item from the per-CPU cache has failed, so
@@ -2590,8 +2489,17 @@
* We bump the uz count when the cache size is insufficient to
* handle the working set.
*/
- if (lockfail && zone->uz_count < BUCKET_MAX)
+ if (lockfail && zone->uz_count < zone->uz_count_max)
zone->uz_count++;
+
+ if (zone->uz_max_items > 0) {
+ if (zone->uz_items >= zone->uz_max_items)
+ goto zalloc_item;
+ maxbucket = MIN(zone->uz_count,
+ zone->uz_max_items - zone->uz_items);
+ } else
+ maxbucket = zone->uz_count;
+ zone->uz_items += maxbucket;
ZONE_UNLOCK(zone);
/*
@@ -2599,11 +2507,18 @@
* works we'll restart the allocation from the beginning and it
* will use the just filled bucket.
*/
- bucket = zone_alloc_bucket(zone, udata, domain, flags);
+ bucket = zone_alloc_bucket(zone, udata, domain, flags, maxbucket);
CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p",
zone->uz_name, zone, bucket);
+ ZONE_LOCK(zone);
if (bucket != NULL) {
- ZONE_LOCK(zone);
+ if (bucket->ub_cnt < maxbucket) {
+ MPASS(zone->uz_items >= maxbucket - bucket->ub_cnt);
+ zone->uz_items -= maxbucket - bucket->ub_cnt;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ }
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
@@ -2618,7 +2533,7 @@
domain == PCPU_GET(domain))) {
cache->uc_allocbucket = bucket;
zdom->uzd_imax += bucket->ub_cnt;
- } else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
+ } else if (zone->uz_bkt_count >= zone->uz_bkt_max) {
critical_exit();
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
@@ -2628,13 +2543,18 @@
zone_put_bucket(zone, zdom, bucket, false);
ZONE_UNLOCK(zone);
goto zalloc_start;
+ } else {
+ zone->uz_items -= maxbucket;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items + 1 < zone->uz_max_items)
+ wakeup_one(zone);
}
/*
* We may not be able to get a bucket so return an actual item.
*/
zalloc_item:
- item = zone_alloc_item(zone, udata, domain, flags);
+ item = zone_alloc_item_locked(zone, udata, domain, flags);
return (item);
}
@@ -2677,6 +2597,7 @@
KASSERT(domain >= 0 && domain < vm_ndomains,
("keg_first_slab: domain %d out of range", domain));
+ KEG_LOCK_ASSERT(keg);
slab = NULL;
start = domain;
@@ -2702,7 +2623,7 @@
{
uint32_t reserve;
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
if (keg->uk_free <= reserve)
@@ -2720,7 +2641,7 @@
bool rr;
restart:
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
/*
* Use the keg's policy if upper layers haven't already specified a
@@ -2753,23 +2674,10 @@
if (flags & M_NOVM)
break;
- if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
- keg->uk_flags |= UMA_ZFLAG_FULL;
- /*
- * If this is not a multi-zone, set the FULL bit.
- * Otherwise slab_multi() takes care of it.
- */
- if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
- zone->uz_flags |= UMA_ZFLAG_FULL;
- zone_log_warning(zone);
- zone_maxaction(zone);
- }
- if (flags & M_NOWAIT)
- return (NULL);
- zone->uz_sleeps++;
- msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
- continue;
- }
+ KASSERT(zone->uz_max_items == 0 ||
+ zone->uz_items <= zone->uz_max_items,
+ ("%s: zone %p overflow", __func__, zone));
+
slab = keg_alloc_slab(keg, zone, domain, aflags);
/*
* If we got a slab here it's safe to mark it partially used
@@ -2812,7 +2720,7 @@
uma_slab_t slab;
if (keg == NULL) {
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
KEG_LOCK(keg);
}
@@ -2827,87 +2735,6 @@
return (NULL);
}
-/*
- * uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
- * with the keg locked. On NULL no lock is held.
- *
- * The last pointer is used to seed the search. It is not required.
- */
-static uma_slab_t
-zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int domain, int rflags)
-{
- uma_klink_t klink;
- uma_slab_t slab;
- uma_keg_t keg;
- int flags;
- int empty;
- int full;
-
- /*
- * Don't wait on the first pass. This will skip limit tests
- * as well. We don't want to block if we can find a provider
- * without blocking.
- */
- flags = (rflags & ~M_WAITOK) | M_NOWAIT;
- /*
- * Use the last slab allocated as a hint for where to start
- * the search.
- */
- if (last != NULL) {
- slab = keg_fetch_slab(last, zone, domain, flags);
- if (slab)
- return (slab);
- KEG_UNLOCK(last);
- }
- /*
- * Loop until we have a slab incase of transient failures
- * while M_WAITOK is specified. I'm not sure this is 100%
- * required but we've done it for so long now.
- */
- for (;;) {
- empty = 0;
- full = 0;
- /*
- * Search the available kegs for slabs. Be careful to hold the
- * correct lock while calling into the keg layer.
- */
- LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
- keg = klink->kl_keg;
- KEG_LOCK(keg);
- if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
- slab = keg_fetch_slab(keg, zone, domain, flags);
- if (slab)
- return (slab);
- }
- if (keg->uk_flags & UMA_ZFLAG_FULL)
- full++;
- else
- empty++;
- KEG_UNLOCK(keg);
- }
- if (rflags & (M_NOWAIT | M_NOVM))
- break;
- flags = rflags;
- /*
- * All kegs are full. XXX We can't atomically check all kegs
- * and sleep so just sleep for a short period and retry.
- */
- if (full && !empty) {
- ZONE_LOCK(zone);
- zone->uz_flags |= UMA_ZFLAG_FULL;
- zone->uz_sleeps++;
- zone_log_warning(zone);
- zone_maxaction(zone);
- msleep(zone, zone->uz_lockptr, PVM,
- "zonelimit", hz/100);
- zone->uz_flags &= ~UMA_ZFLAG_FULL;
- ZONE_UNLOCK(zone);
- continue;
- }
- }
- return (NULL);
-}
-
static void *
slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
{
@@ -2916,7 +2743,7 @@
uint8_t freei;
MPASS(keg == slab->us_keg);
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ KEG_LOCK_ASSERT(keg);
freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
@@ -2983,10 +2810,9 @@
}
static uma_bucket_t
-zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
+zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags, int max)
{
uma_bucket_t bucket;
- int max;
CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
@@ -2995,7 +2821,6 @@
if (bucket == NULL)
return (NULL);
- max = MIN(bucket->ub_entries, zone->uz_count);
bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
max, domain, flags);
@@ -3049,13 +2874,42 @@
static void *
zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
+{
+
+ ZONE_LOCK(zone);
+ return (zone_alloc_item_locked(zone, udata, domain, flags));
+}
+
+static void *
+zone_alloc_item_locked(uma_zone_t zone, void *udata, int domain, int flags)
{
void *item;
#ifdef INVARIANTS
bool skipdbg;
#endif
- item = NULL;
+ ZONE_LOCK_ASSERT(zone);
+
+ if (zone->uz_max_items > 0 && zone->uz_items >= zone->uz_max_items) {
+ zone_log_warning(zone);
+ zone_maxaction(zone);
+ if (flags & M_NOWAIT) {
+ ZONE_UNLOCK(zone);
+ return (NULL);
+ }
+ zone->uz_sleeps++;
+ zone->uz_sleepers++;
+ while (zone->uz_items >= zone->uz_max_items)
+ mtx_sleep(zone, zone->uz_lockptr, PVM, "zonelimit", 0);
+ zone->uz_sleepers--;
+ if (zone->uz_sleepers > 0 &&
+ zone->uz_items + 1 < zone->uz_max_items)
+ wakeup_one(zone);
+ }
+
+ zone->uz_items++;
+ zone->uz_allocs++;
+ ZONE_UNLOCK(zone);
if (domain != UMA_ANYDOMAIN) {
/* avoid allocs targeting empty domains */
@@ -3064,7 +2918,6 @@
}
if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
goto fail;
- atomic_add_long(&zone->uz_allocs, 1);
#ifdef INVARIANTS
skipdbg = uma_dbg_zskip(zone, item);
@@ -3105,6 +2958,10 @@
fail:
CTR2(KTR_UMA, "zone_alloc_item failed from %s(%p)",
zone->uz_name, zone);
+ ZONE_LOCK(zone);
+ zone->uz_items--;
+ zone->uz_allocs--;
+ ZONE_UNLOCK(zone);
atomic_add_long(&zone->uz_fails, 1);
return (NULL);
}
@@ -3116,7 +2973,8 @@
uma_cache_t cache;
uma_bucket_t bucket;
uma_zone_domain_t zdom;
- int cpu, domain, lockfail;
+ int cpu, domain;
+ bool lockfail;
#ifdef INVARIANTS
bool skipdbg;
#endif
@@ -3162,7 +3020,7 @@
* The race here is acceptable. If we miss it we'll just have to wait
* a little longer for the limits to be reset.
*/
- if (zone->uz_flags & UMA_ZFLAG_FULL)
+ if (zone->uz_sleepers > 0)
goto zfree_item;
/*
@@ -3212,12 +3070,20 @@
if (zone->uz_count == 0 || bucketdisable)
goto zfree_item;
- lockfail = 0;
+ lockfail = false;
if (ZONE_TRYLOCK(zone) == 0) {
/* Record contention to size the buckets. */
ZONE_LOCK(zone);
- lockfail = 1;
+ lockfail = true;
}
+ /*
+ * Now we got the lock, check for sleepers and give a chance to
+ * first one to allocate. If item will end up on CPU cache,
+ * and they will wake up on wrong CPU, then they will go back
+ * to sleep.
+ */
+ if (zone->uz_sleepers > 0)
+ wakeup_one(zone);
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
@@ -3245,9 +3111,9 @@
"uma_zfree: zone %s(%p) putting bucket %p on free list",
zone->uz_name, zone, bucket);
/* ub_cnt is pointing to the last free item */
- KASSERT(bucket->ub_cnt != 0,
- ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
- if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
+ KASSERT(bucket->ub_cnt == bucket->ub_entries,
+ ("uma_zfree: Attempting to insert not full bucket onto the full list.\n"));
+ if (zone->uz_bkt_count >= zone->uz_bkt_max) {
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
bucket_free(zone, bucket, udata);
@@ -3260,7 +3126,7 @@
* We bump the uz count when the cache size is insufficient to
* handle the working set.
*/
- if (lockfail && zone->uz_count < BUCKET_MAX)
+ if (lockfail && zone->uz_count < zone->uz_count_max)
zone->uz_count++;
ZONE_UNLOCK(zone);
@@ -3291,8 +3157,6 @@
*/
zfree_item:
zone_free_item(zone, item, udata, SKIP_DTOR);
-
- return;
}
void
@@ -3315,12 +3179,15 @@
}
static void
-slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
+slab_free_item(uma_zone_t zone, uma_slab_t slab, void *item)
{
+ uma_keg_t keg;
uma_domain_t dom;
uint8_t freei;
- mtx_assert(&keg->uk_lock, MA_OWNED);
+ keg = zone->uz_keg;
+ MPASS(zone->uz_lockptr == &keg->uk_lock);
+ KEG_LOCK_ASSERT(keg);
MPASS(keg == slab->us_keg);
dom = &keg->uk_domain[slab->us_domain];
@@ -3350,11 +3217,9 @@
uma_slab_t slab;
uma_keg_t keg;
uint8_t *mem;
- int clearfull;
int i;
- clearfull = 0;
- keg = zone_first_keg(zone);
+ keg = zone->uz_keg;
KEG_LOCK(keg);
for (i = 0; i < cnt; i++) {
item = bucket[i];
@@ -3368,37 +3233,11 @@
}
} else {
slab = vtoslab((vm_offset_t)item);
- if (slab->us_keg != keg) {
- KEG_UNLOCK(keg);
- keg = slab->us_keg;
- KEG_LOCK(keg);
- }
- }
- slab_free_item(keg, slab, item);
- if (keg->uk_flags & UMA_ZFLAG_FULL) {
- if (keg->uk_pages < keg->uk_maxpages) {
- keg->uk_flags &= ~UMA_ZFLAG_FULL;
- clearfull = 1;
- }
-
- /*
- * We can handle one more allocation. Since we're
- * clearing ZFLAG_FULL, wake up all procs blocked
- * on pages. This should be uncommon, so keeping this
- * simple for now (rather than adding count of blocked
- * threads etc).
- */
- wakeup(keg);
+ MPASS(slab->us_keg == keg);
}
+ slab_free_item(zone, slab, item);
}
KEG_UNLOCK(keg);
- if (clearfull) {
- ZONE_LOCK(zone);
- zone->uz_flags &= ~UMA_ZFLAG_FULL;
- wakeup(zone);
- ZONE_UNLOCK(zone);
- }
-
}
/*
@@ -3435,25 +3274,53 @@
if (skip < SKIP_FINI && zone->uz_fini)
zone->uz_fini(item, zone->uz_size);
- atomic_add_long(&zone->uz_frees, 1);
zone->uz_release(zone->uz_arg, &item, 1);
+
+ ZONE_LOCK(zone);
+ zone->uz_frees++;
+ zone->uz_items--;
+ if (zone->uz_sleepers > 0 && zone->uz_items < zone->uz_max_items)
+ wakeup_one(zone);
+ ZONE_UNLOCK(zone);
}
/* See uma.h */
int
uma_zone_set_max(uma_zone_t zone, int nitems)
{
- uma_keg_t keg;
+ struct uma_bucket_zone *ubz;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- KEG_LOCK(keg);
- keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
- if (keg->uk_maxpages * keg->uk_ipers < nitems)
- keg->uk_maxpages += keg->uk_ppera;
- nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
- KEG_UNLOCK(keg);
+ /*
+ * If limit is very low we may need to limit how
+ * much items are allowed in CPU caches.
+ */
+ ubz = &bucket_zones[0];
+ for (; ubz->ubz_entries != 0; ubz++)
+ if (ubz->ubz_entries * 2 * mp_ncpus > nitems)
+ break;
+ if (ubz == &bucket_zones[0])
+ nitems = ubz->ubz_entries * 2 * mp_ncpus;
+ else
+ ubz--;
+
+ ZONE_LOCK(zone);
+ zone->uz_count_max = zone->uz_count = ubz->ubz_entries;
+ if (zone->uz_count_min > zone->uz_count_max)
+ zone->uz_count_min = zone->uz_count_max;
+ zone->uz_max_items = nitems;
+ ZONE_UNLOCK(zone);
+
+ return (nitems);
+}
+
+/* See uma.h */
+int
+uma_zone_set_maxcache(uma_zone_t zone, int nitems)
+{
+
+ ZONE_LOCK(zone);
+ zone->uz_bkt_max = nitems;
+ ZONE_UNLOCK(zone);
return (nitems);
}
@@ -3463,14 +3330,10 @@
uma_zone_get_max(uma_zone_t zone)
{
int nitems;
- uma_keg_t keg;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- KEG_LOCK(keg);
- nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
- KEG_UNLOCK(keg);
+ ZONE_LOCK(zone);
+ nitems = zone->uz_max_items;
+ ZONE_UNLOCK(zone);
return (nitems);
}
@@ -3524,8 +3387,7 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
KASSERT(keg->uk_pages == 0,
("uma_zone_set_init on non-empty keg"));
@@ -3539,8 +3401,7 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
KASSERT(keg->uk_pages == 0,
("uma_zone_set_fini on non-empty keg"));
@@ -3554,7 +3415,7 @@
{
ZONE_LOCK(zone);
- KASSERT(zone_first_keg(zone)->uk_pages == 0,
+ KASSERT(zone->uz_keg->uk_pages == 0,
("uma_zone_set_zinit on non-empty keg"));
zone->uz_init = zinit;
ZONE_UNLOCK(zone);
@@ -3566,7 +3427,7 @@
{
ZONE_LOCK(zone);
- KASSERT(zone_first_keg(zone)->uk_pages == 0,
+ KASSERT(zone->uz_keg->uk_pages == 0,
("uma_zone_set_zfini on non-empty keg"));
zone->uz_fini = zfini;
ZONE_UNLOCK(zone);
@@ -3579,7 +3440,7 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
+ KEG_GET(zone, keg);
KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
KEG_LOCK(keg);
keg->uk_freef = freef;
@@ -3593,7 +3454,7 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
keg->uk_allocf = allocf;
KEG_UNLOCK(keg);
@@ -3605,14 +3466,10 @@
{
uma_keg_t keg;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return;
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
keg->uk_reserve = items;
KEG_UNLOCK(keg);
-
- return;
}
/* See uma.h */
@@ -3623,11 +3480,9 @@
vm_offset_t kva;
u_int pages;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return (0);
- pages = count / keg->uk_ipers;
+ KEG_GET(zone, keg);
+ pages = count / keg->uk_ipers;
if (pages * keg->uk_ipers < count)
pages++;
pages *= keg->uk_ppera;
@@ -3645,7 +3500,6 @@
KEG_LOCK(keg);
keg->uk_kva = kva;
keg->uk_offset = 0;
- keg->uk_maxpages = pages;
#ifdef UMA_MD_SMALL_ALLOC
keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
#else
@@ -3667,9 +3521,7 @@
uma_keg_t keg;
int domain, flags, slabs;
- keg = zone_first_keg(zone);
- if (keg == NULL)
- return;
+ KEG_GET(zone, keg);
KEG_LOCK(keg);
slabs = items / keg->uk_ipers;
if (slabs * keg->uk_ipers < items)
@@ -3758,7 +3610,7 @@
int full;
ZONE_LOCK(zone);
- full = (zone->uz_flags & UMA_ZFLAG_FULL);
+ full = zone->uz_sleepers > 0;
ZONE_UNLOCK(zone);
return (full);
}
@@ -3766,7 +3618,7 @@
int
uma_zone_exhausted_nolock(uma_zone_t zone)
{
- return (zone->uz_flags & UMA_ZFLAG_FULL);
+ return (zone->uz_sleepers > 0);
}
void *
@@ -3886,11 +3738,11 @@
int i;
printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
- "out %d free %d limit %d\n",
+ "out %d free %d\n",
keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
keg->uk_ipers, keg->uk_ppera,
(keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
- keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
+ keg->uk_free);
for (i = 0; i < vm_ndomains; i++) {
dom = &keg->uk_domain[i];
printf("Part slabs:\n");
@@ -3909,13 +3761,13 @@
uma_print_zone(uma_zone_t zone)
{
uma_cache_t cache;
- uma_klink_t kl;
int i;
- printf("zone: %s(%p) size %d flags %#x\n",
- zone->uz_name, zone, zone->uz_size, zone->uz_flags);
- LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
- uma_print_keg(kl->kl_keg);
+ printf("zone: %s(%p) size %d maxitems %lu flags %#x\n",
+ zone->uz_name, zone, zone->uz_size, zone->uz_max_items,
+ zone->uz_flags);
+ if (zone->uz_lockptr != &zone->uz_lock)
+ uma_print_keg(zone->uz_keg);
CPU_FOREACH(i) {
cache = &zone->uz_cpu[i];
printf("CPU %d Cache:\n", i);
@@ -3994,10 +3846,8 @@
uma_zone_domain_t zdom;
struct sbuf sbuf;
uma_cache_t cache;
- uma_klink_t kl;
uma_keg_t kz;
uma_zone_t z;
- uma_keg_t k;
int count, error, i;
error = sysctl_wire_old_buffer(req, 0);
@@ -4031,14 +3881,12 @@
uth.uth_align = kz->uk_align;
uth.uth_size = kz->uk_size;
uth.uth_rsize = kz->uk_rsize;
- LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
- k = kl->kl_keg;
- uth.uth_maxpages += k->uk_maxpages;
- uth.uth_pages += k->uk_pages;
- uth.uth_keg_free += k->uk_free;
- uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
- * k->uk_ipers;
- }
+ uth.uth_pages += (z->uz_items / kz->uk_ipers) *
+ kz->uk_ppera;
+ uth.uth_maxpages += (z->uz_max_items / kz->uk_ipers) *
+ kz->uk_ppera;
+ uth.uth_limit = z->uz_max_items;
+ uth.uth_keg_free += z->uz_keg->uk_free;
/*
* A zone is secondary is it is not the first entry
@@ -4135,8 +3983,10 @@
* zone is unlocked because the item's allocation state
* essentially holds a reference.
*/
+ if (zone->uz_lockptr == &zone->uz_lock)
+ return (NULL);
ZONE_LOCK(zone);
- keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
+ keg = zone->uz_keg;
if (keg->uk_flags & UMA_ZONE_HASH)
slab = hash_sfind(&keg->uk_hash, mem);
else
@@ -4150,12 +4000,11 @@
static bool
uma_dbg_zskip(uma_zone_t zone, void *mem)
{
- uma_keg_t keg;
- if ((keg = zone_first_keg(zone)) == NULL)
+ if (zone->uz_lockptr == &zone->uz_lock)
return (true);
- return (uma_dbg_kskip(keg, mem));
+ return (uma_dbg_kskip(zone->uz_keg, mem));
}
static bool
Index: sys/vm/uma_int.h
===================================================================
--- sys/vm/uma_int.h
+++ sys/vm/uma_int.h
@@ -223,7 +223,9 @@
*
*/
struct uma_keg {
- struct mtx uk_lock; /* Lock for the keg */
+ struct mtx uk_lock; /* Lock for the keg must be first.
+ * See shared uz_keg/uz_lockptr
+ * member of struct uma_zone. */
struct uma_hash uk_hash;
LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */
@@ -234,7 +236,6 @@
uint32_t uk_reserve; /* Number of reserved items. */
uint32_t uk_size; /* Requested size of each item */
uint32_t uk_rsize; /* Real size of each item */
- uint32_t uk_maxpages; /* Maximum number of pages to alloc */
uma_init uk_init; /* Keg's init routine */
uma_fini uk_fini; /* Keg's fini routine */
@@ -296,12 +297,6 @@
typedef struct uma_slab * uma_slab_t;
typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int, int);
-struct uma_klink {
- LIST_ENTRY(uma_klink) kl_link;
- uma_keg_t kl_keg;
-};
-typedef struct uma_klink *uma_klink_t;
-
struct uma_zone_domain {
LIST_HEAD(,uma_bucket) uzd_buckets; /* full buckets */
long uzd_nitems; /* total item count */
@@ -320,26 +315,30 @@
*/
struct uma_zone {
/* Offset 0, used in alloc/free fast/medium fast path and const. */
- struct mtx *uz_lockptr;
- const char *uz_name; /* Text name of the zone */
+ union {
+ uma_keg_t uz_keg; /* This zone's keg */
+ struct mtx *uz_lockptr; /* To keg or to self */
+ };
struct uma_zone_domain *uz_domain; /* per-domain buckets */
uint32_t uz_flags; /* Flags inherited from kegs */
uint32_t uz_size; /* Size inherited from kegs */
uma_ctor uz_ctor; /* Constructor for each allocation */
uma_dtor uz_dtor; /* Destructor */
- uma_init uz_init; /* Initializer for each item */
- uma_fini uz_fini; /* Finalizer for each item. */
+ uint64_t uz_items; /* Total items count */
+ uint64_t uz_max_items; /* Maximum number of items to alloc */
+ uint32_t uz_sleepers; /* Number of sleepers on memory */
+ uint16_t uz_count; /* Amount of items in full bucket */
+ uint16_t uz_count_max; /* Maximum amount of items there */
/* Offset 64, used in bucket replenish. */
uma_import uz_import; /* Import new memory to cache. */
uma_release uz_release; /* Release memory from cache. */
void *uz_arg; /* Import/release argument. */
+ uma_init uz_init; /* Initializer for each item */
+ uma_fini uz_fini; /* Finalizer for each item. */
uma_slaballoc uz_slab; /* Allocate a slab from the backend. */
- uint16_t uz_count; /* Amount of items in full bucket */
- uint16_t uz_count_min; /* Minimal amount of items there */
- /* 32bit pad on 64bit. */
- LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
- LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */
+ uint64_t uz_bkt_count; /* Items in bucket cache */
+ uint64_t uz_bkt_max; /* Maximum bucket cache size */
/* Offset 128 Rare. */
/*
@@ -348,19 +347,19 @@
* members to reduce alignment overhead.
*/
struct mtx uz_lock; /* Lock for the zone */
- struct uma_klink uz_klink; /* klink for first keg. */
+ LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */
+ const char *uz_name; /* Text name of the zone */
/* The next two fields are used to print a rate-limited warnings. */
const char *uz_warning; /* Warning to print on failure */
struct timeval uz_ratecheck; /* Warnings rate-limiting */
struct task uz_maxaction; /* Task to run when at limit */
+ uint16_t uz_count_min; /* Minimal amount of items in bucket */
- /* 16 bytes of pad. */
-
- /* Offset 256, atomic stats. */
- volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */
- volatile u_long uz_fails; /* Total number of alloc failures */
- volatile u_long uz_frees; /* Total number of frees */
+ /* Offset 256, stats. */
+ uint64_t uz_allocs UMA_ALIGN; /* Total number of allocations */
uint64_t uz_sleeps; /* Total number of alloc sleeps */
+ uint64_t uz_frees; /* Total number of frees */
+ volatile u_long uz_fails; /* Total number of alloc failures */
/*
* This HAS to be the last item because we adjust the zone size
@@ -378,21 +377,11 @@
#define UMA_ZFLAG_DRAINING 0x08000000 /* Running zone_drain. */
#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */
#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */
-#define UMA_ZFLAG_FULL 0x40000000 /* Reached uz_maxpages */
#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */
#define UMA_ZFLAG_INHERIT \
(UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET)
-static inline uma_keg_t
-zone_first_keg(uma_zone_t zone)
-{
- uma_klink_t klink;
-
- klink = LIST_FIRST(&zone->uz_kegs);
- return (klink != NULL) ? klink->kl_keg : NULL;
-}
-
#undef UMA_ALIGN
#ifdef _KERNEL
@@ -417,6 +406,13 @@
#define KEG_LOCK_FINI(k) mtx_destroy(&(k)->uk_lock)
#define KEG_LOCK(k) mtx_lock(&(k)->uk_lock)
#define KEG_UNLOCK(k) mtx_unlock(&(k)->uk_lock)
+#define KEG_LOCK_ASSERT(k) mtx_assert(&(k)->uk_lock, MA_OWNED)
+
+#define KEG_GET(zone, keg) do { \
+ (keg) = (zone)->uz_keg; \
+ KASSERT((void *)(keg) != (void *)&(zone)->uz_lock, \
+ ("%s: Invalid zone %p type", __func__, (zone))); \
+ } while (0)
#define ZONE_LOCK_INIT(z, lc) \
do { \
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -222,7 +222,8 @@
vmd->vmd_pgcache = uma_zcache_create("vm pgcache",
sizeof(struct vm_page), NULL, NULL, NULL, NULL,
vm_page_import, vm_page_release, vmd,
- UMA_ZONE_NOBUCKETCACHE | UMA_ZONE_MAXBUCKET | UMA_ZONE_VM);
+ UMA_ZONE_MAXBUCKET | UMA_ZONE_VM);
+ (void )uma_zone_set_maxcache(vmd->vmd_pgcache, 0);
}
}
SYSINIT(vm_page2, SI_SUB_VM_CONF, SI_ORDER_ANY, vm_page_init_cache_zones, NULL);
Index: sys/vm/vm_pager.c
===================================================================
--- sys/vm/vm_pager.c
+++ sys/vm/vm_pager.c
@@ -85,10 +85,10 @@
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
+#include <vm/uma.h>
+#include "opt_swap.h"
-int cluster_pbuf_freecnt = -1; /* unlimited to begin with */
-
-struct buf *swbuf;
+uma_zone_t pbuf_zone;
static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *);
static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t,
@@ -167,9 +167,6 @@
* cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
* (MAXPHYS == 64k) if you want to get the most efficiency.
*/
-struct mtx_padalign __exclusive_cache_line pbuf_mtx;
-static TAILQ_HEAD(swqueue, buf) bswlist;
-static int bswneeded;
vm_offset_t swapbkva; /* swap buffers kva */
void
@@ -177,7 +174,6 @@
{
struct pagerops **pgops;
- TAILQ_INIT(&bswlist);
/*
* Initialize known pagers
*/
@@ -189,25 +185,24 @@
void
vm_pager_bufferinit(void)
{
- struct buf *bp;
- int i;
- mtx_init(&pbuf_mtx, "pbuf mutex", NULL, MTX_DEF);
- bp = swbuf;
/*
- * Now set up swap and physical I/O buffer headers.
+ * swbufs are used as temporary holders for I/O, such as paging I/O.
+ * We have no less then 16 and no more then 256.
*/
- for (i = 0; i < nswbuf; i++, bp++) {
- TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
- BUF_LOCKINIT(bp);
- LIST_INIT(&bp->b_dep);
- bp->b_rcred = bp->b_wcred = NOCRED;
- bp->b_xflags = 0;
- }
-
- cluster_pbuf_freecnt = nswbuf / 2;
- vnode_pbuf_freecnt = nswbuf / 2 + 1;
- vnode_async_pbuf_freecnt = nswbuf / 2;
+#ifndef NSWBUF_MIN
+#define NSWBUF_MIN 16
+#endif
+ nswbuf = min(nbuf / 4, 256);
+ TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf);
+ if (nswbuf < NSWBUF_MIN)
+ nswbuf = NSWBUF_MIN;
+
+ /* Main zone for paging bufs. */
+ pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf),
+ pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE,
+ UMA_ZONE_VM | UMA_ZONE_NOFREE);
+ uma_zone_set_max(pbuf_zone, nswbuf);
}
/*
@@ -347,110 +342,33 @@
return (object);
}
-/*
- * initialize a physical buffer
- */
-
-/*
- * XXX This probably belongs in vfs_bio.c
- */
-static void
-initpbuf(struct buf *bp)
+int
+pbuf_ctor(void *mem, int size, void *arg, int flags)
{
+ struct buf *bp = mem;
- KASSERT(bp->b_bufobj == NULL, ("initpbuf with bufobj"));
- KASSERT(bp->b_vp == NULL, ("initpbuf with vp"));
+ bp->b_vp = NULL;
+ bp->b_bufobj = NULL;
+
+ /* copied from initpbuf() */
bp->b_rcred = NOCRED;
bp->b_wcred = NOCRED;
- bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */
- bp->b_kvabase = (caddr_t)(MAXPHYS * (bp - swbuf)) + swapbkva;
+ bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */
bp->b_data = bp->b_kvabase;
- bp->b_kvasize = MAXPHYS;
- bp->b_flags = 0;
bp->b_xflags = 0;
+ bp->b_flags = 0;
bp->b_ioflags = 0;
bp->b_iodone = NULL;
bp->b_error = 0;
BUF_LOCK(bp, LK_EXCLUSIVE, NULL);
- buf_track(bp, __func__);
-}
-
-/*
- * allocate a physical buffer
- *
- * There are a limited number (nswbuf) of physical buffers. We need
- * to make sure that no single subsystem is able to hog all of them,
- * so each subsystem implements a counter which is typically initialized
- * to 1/2 nswbuf. getpbuf() decrements this counter in allocation and
- * increments it on release, and blocks if the counter hits zero. A
- * subsystem may initialize the counter to -1 to disable the feature,
- * but it must still be sure to match up all uses of getpbuf() with
- * relpbuf() using the same variable.
- *
- * NOTE: pfreecnt can be NULL, but this 'feature' will be removed
- * relatively soon when the rest of the subsystems get smart about it. XXX
- */
-struct buf *
-getpbuf(int *pfreecnt)
-{
- struct buf *bp;
- mtx_lock(&pbuf_mtx);
- for (;;) {
- if (pfreecnt != NULL) {
- while (*pfreecnt == 0) {
- msleep(pfreecnt, &pbuf_mtx, PVM, "wswbuf0", 0);
- }
- }
-
- /* get a bp from the swap buffer header pool */
- if ((bp = TAILQ_FIRST(&bswlist)) != NULL)
- break;
-
- bswneeded = 1;
- msleep(&bswneeded, &pbuf_mtx, PVM, "wswbuf1", 0);
- /* loop in case someone else grabbed one */
- }
- TAILQ_REMOVE(&bswlist, bp, b_freelist);
- if (pfreecnt)
- --*pfreecnt;
- mtx_unlock(&pbuf_mtx);
- initpbuf(bp);
- return (bp);
-}
-
-/*
- * allocate a physical buffer, if one is available.
- *
- * Note that there is no NULL hack here - all subsystems using this
- * call understand how to use pfreecnt.
- */
-struct buf *
-trypbuf(int *pfreecnt)
-{
- struct buf *bp;
-
- mtx_lock(&pbuf_mtx);
- if (*pfreecnt == 0 || (bp = TAILQ_FIRST(&bswlist)) == NULL) {
- mtx_unlock(&pbuf_mtx);
- return NULL;
- }
- TAILQ_REMOVE(&bswlist, bp, b_freelist);
- --*pfreecnt;
- mtx_unlock(&pbuf_mtx);
- initpbuf(bp);
- return (bp);
+ return (0);
}
-/*
- * release a physical buffer
- *
- * NOTE: pfreecnt can be NULL, but this 'feature' will be removed
- * relatively soon when the rest of the subsystems get smart about it. XXX
- */
void
-relpbuf(struct buf *bp, int *pfreecnt)
+pbuf_dtor(void *mem, int size, void *arg)
{
+ struct buf *bp = mem;
if (bp->b_rcred != NOCRED) {
crfree(bp->b_rcred);
@@ -461,24 +379,24 @@
bp->b_wcred = NOCRED;
}
- KASSERT(bp->b_vp == NULL, ("relpbuf with vp"));
- KASSERT(bp->b_bufobj == NULL, ("relpbuf with bufobj"));
-
- buf_track(bp, __func__);
BUF_UNLOCK(bp);
+}
+
+int
+pbuf_init(void *mem, int size, int flags)
+{
+ struct buf *bp = mem;
- mtx_lock(&pbuf_mtx);
- TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
+ bp->b_kvabase = (void *)kva_alloc(MAXPHYS);
+ if (bp->b_kvabase == NULL)
+ return (ENOMEM);
+ bp->b_kvasize = MAXPHYS;
+ BUF_LOCKINIT(bp);
+ LIST_INIT(&bp->b_dep);
+ bp->b_rcred = bp->b_wcred = NOCRED;
+ bp->b_xflags = 0;
- if (bswneeded) {
- bswneeded = 0;
- wakeup(&bswneeded);
- }
- if (pfreecnt) {
- if (++*pfreecnt == 1)
- wakeup(pfreecnt);
- }
- mtx_unlock(&pbuf_mtx);
+ return (0);
}
/*
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -58,6 +58,7 @@
#include "opt_vm.h"
#include <sys/param.h>
+#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
@@ -82,6 +83,7 @@
#include <vm/vm_map.h>
#include <vm/vnode_pager.h>
#include <vm/vm_extern.h>
+#include <vm/uma.h>
static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address,
daddr_t *rtaddress, int *run);
@@ -107,15 +109,27 @@
.pgo_haspage = vnode_pager_haspage,
};
-int vnode_pbuf_freecnt;
-int vnode_async_pbuf_freecnt;
-
static struct domainset *vnode_domainset = NULL;
SYSCTL_PROC(_debug, OID_AUTO, vnode_domainset, CTLTYPE_STRING | CTLFLAG_RW,
&vnode_domainset, 0, sysctl_handle_domainset, "A",
"Default vnode NUMA policy");
+static uma_zone_t vnode_pbuf_zone;
+
+static void
+vnode_pager_init(void *dummy)
+{
+
+ vnode_pbuf_zone = uma_zsecond_create("vnpbuf", pbuf_ctor, pbuf_dtor,
+ pbuf_init, NULL, pbuf_zone);
+ uma_zone_set_max(vnode_pbuf_zone, nswbuf * 8);
+#if 0
+ uma_prealloc(vnode_pbuf_zone, nswbuf * 8);
+#endif
+}
+SYSINIT(vnode_pager, SI_SUB_CPU, SI_ORDER_ANY, vnode_pager_init, NULL);
+
/* Create the VM system backing object for this vnode */
int
vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
@@ -563,7 +577,7 @@
break;
}
if (fileaddr != -1) {
- bp = getpbuf(&vnode_pbuf_freecnt);
+ bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK);
/* build a minimal buffer header */
bp->b_iocmd = BIO_READ;
@@ -595,7 +609,7 @@
*/
bp->b_vp = NULL;
pbrelbo(bp);
- relpbuf(bp, &vnode_pbuf_freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
if (error)
break;
} else
@@ -757,7 +771,7 @@
#ifdef INVARIANTS
off_t blkno0;
#endif
- int bsize, pagesperblock, *freecnt;
+ int bsize, pagesperblock;
int error, before, after, rbehind, rahead, poff, i;
int bytecount, secmask;
@@ -788,17 +802,7 @@
return (VM_PAGER_OK);
}
- /*
- * Synchronous and asynchronous paging operations use different
- * free pbuf counters. This is done to avoid asynchronous requests
- * to consume all pbufs.
- * Allocate the pbuf at the very beginning of the function, so that
- * if we are low on certain kind of pbufs don't even proceed to BMAP,
- * but sleep.
- */
- freecnt = iodone != NULL ?
- &vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt;
- bp = getpbuf(freecnt);
+ bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK);
/*
* Get the underlying device blocks for the file with VOP_BMAP().
@@ -807,7 +811,7 @@
*/
error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before);
if (error == EOPNOTSUPP) {
- relpbuf(bp, freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
VM_OBJECT_WLOCK(object);
for (i = 0; i < count; i++) {
VM_CNT_INC(v_vnodein);
@@ -819,7 +823,7 @@
VM_OBJECT_WUNLOCK(object);
return (error);
} else if (error != 0) {
- relpbuf(bp, freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
return (VM_PAGER_ERROR);
}
@@ -828,7 +832,7 @@
* than a page size, then use special small filesystem code.
*/
if (pagesperblock == 0) {
- relpbuf(bp, freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
for (i = 0; i < count; i++) {
VM_CNT_INC(v_vnodein);
VM_CNT_INC(v_vnodepgsin);
@@ -847,7 +851,7 @@
KASSERT(count == 1,
("%s: array[%d] request to a sparse file %p", __func__,
count, vp));
- relpbuf(bp, freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
pmap_zero_page(m[0]);
KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty",
__func__, m[0]));
@@ -1061,7 +1065,7 @@
bp->b_pages[i] = NULL;
bp->b_vp = NULL;
pbrelbo(bp);
- relpbuf(bp, &vnode_pbuf_freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
}
}
@@ -1079,7 +1083,7 @@
bp->b_pages[i] = NULL;
bp->b_vp = NULL;
pbrelbo(bp);
- relpbuf(bp, &vnode_async_pbuf_freecnt);
+ uma_zfree(vnode_pbuf_zone, bp);
}
static int

File Metadata

Mime Type
text/plain
Expires
Sat, Feb 1, 7:19 AM (21 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16378192
Default Alt Text
D17773.id51746.diff (69 KB)

Event Timeline