Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F105729172
D27225.id79566.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
31 KB
Referenced Files
None
Subscribers
None
D27225.id79566.diff
View Options
Index: sys/dev/ahci/ahci.h
===================================================================
--- sys/dev/ahci/ahci.h
+++ sys/dev/ahci/ahci.h
@@ -310,13 +310,8 @@
#define AHCI_P_DEVSLP_DM 0x0e000000
#define AHCI_P_DEVSLP_DM_SHIFT 25
-/* Just to be sure, if building as module. */
-#if MAXPHYS < 512 * 1024
-#undef MAXPHYS
-#define MAXPHYS 512 * 1024
-#endif
/* Pessimistic prognosis on number of required S/G entries */
-#define AHCI_SG_ENTRIES (roundup(btoc(MAXPHYS) + 1, 8))
+#define AHCI_SG_ENTRIES (roundup(btoc(maxphys) + 1, 8))
/* Command list. 32 commands. First, 1Kbyte aligned. */
#define AHCI_CL_OFFSET 0
#define AHCI_CL_SIZE 32
@@ -344,7 +339,7 @@
u_int8_t cfis[64];
u_int8_t acmd[32];
u_int8_t reserved[32];
- struct ahci_dma_prd prd_tab[AHCI_SG_ENTRIES];
+ struct ahci_dma_prd prd_tab[];
} __packed;
struct ahci_cmd_list {
Index: sys/dev/ahci/ahci.c
===================================================================
--- sys/dev/ahci/ahci.c
+++ sys/dev/ahci/ahci.c
@@ -2868,7 +2868,7 @@
cpi->transport_version = XPORT_VERSION_UNSPECIFIED;
cpi->protocol = PROTO_ATA;
cpi->protocol_version = PROTO_VERSION_UNSPECIFIED;
- cpi->maxio = MAXPHYS;
+ cpi->maxio = maxphys;
/* ATI SB600 can't handle 256 sectors with FPDMA (NCQ). */
if (ch->quirks & AHCI_Q_MAXIO_64K)
cpi->maxio = min(cpi->maxio, 128 * 512);
Index: sys/dev/md/md.c
===================================================================
--- sys/dev/md/md.c
+++ sys/dev/md/md.c
@@ -960,9 +960,10 @@
piov = auio.uio_iov;
} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
pb = uma_zalloc(md_pbuf_zone, M_WAITOK);
+ MPASS((pb->b_flags & B_MAXPHYS) != 0);
bp->bio_resid = len;
unmapped_step:
- npages = atop(min(MAXPHYS, round_page(len + (ma_offs &
+ npages = atop(min(maxphys, round_page(len + (ma_offs &
PAGE_MASK))));
iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len);
KASSERT(iolen > 0, ("zero iolen"));
@@ -1684,7 +1685,7 @@
sectsize = DEV_BSIZE;
else
sectsize = mdr->md_sectorsize;
- if (sectsize > MAXPHYS || mdr->md_mediasize < sectsize)
+ if (sectsize > maxphys || mdr->md_mediasize < sectsize)
return (EINVAL);
if (mdr->md_options & MD_AUTOUNIT)
sc = mdnew(-1, &error, mdr->md_type);
Index: sys/dev/siis/siis.h
===================================================================
--- sys/dev/siis/siis.h
+++ sys/dev/siis/siis.h
@@ -263,13 +263,8 @@
#define SIIS_OFFSET 0x100
#define SIIS_STEP 0x80
-/* Just to be sure, if building as module. */
-#if MAXPHYS < 512 * 1024
-#undef MAXPHYS
-#define MAXPHYS 512 * 1024
-#endif
/* Pessimistic prognosis on number of required S/G entries */
-#define SIIS_SG_ENTRIES (roundup(btoc(MAXPHYS), 4) + 1)
+#define SIIS_SG_ENTRIES (roundup(btoc(maxphys), 4) + 1)
/* Command tables. Up to 32 commands, Each, 128byte aligned. */
#define SIIS_CT_OFFSET 0
#define SIIS_CT_SIZE (32 + 16 + SIIS_SG_ENTRIES * 16)
@@ -287,12 +282,12 @@
} __packed;
struct siis_cmd_ata {
- struct siis_dma_prd prd[1 + SIIS_SG_ENTRIES];
+ struct siis_dma_prd prd[1];
} __packed;
struct siis_cmd_atapi {
u_int8_t ccb[16];
- struct siis_dma_prd prd[SIIS_SG_ENTRIES];
+ struct siis_dma_prd prd[];
} __packed;
struct siis_cmd {
Index: sys/dev/siis/siis.c
===================================================================
--- sys/dev/siis/siis.c
+++ sys/dev/siis/siis.c
@@ -1967,7 +1967,7 @@
cpi->transport_version = XPORT_VERSION_UNSPECIFIED;
cpi->protocol = PROTO_ATA;
cpi->protocol_version = PROTO_VERSION_UNSPECIFIED;
- cpi->maxio = MAXPHYS;
+ cpi->maxio = maxphys;
cpi->hba_vendor = pci_get_vendor(parent);
cpi->hba_device = pci_get_device(parent);
cpi->hba_subvendor = pci_get_subvendor(parent);
Index: sys/fs/cd9660/cd9660_vfsops.c
===================================================================
--- sys/fs/cd9660/cd9660_vfsops.c
+++ sys/fs/cd9660/cd9660_vfsops.c
@@ -238,8 +238,8 @@
goto out;
if (devvp->v_rdev->si_iosize_max != 0)
mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
bo = &devvp->v_bufobj;
Index: sys/fs/ext2fs/ext2_vfsops.c
===================================================================
--- sys/fs/ext2fs/ext2_vfsops.c
+++ sys/fs/ext2fs/ext2_vfsops.c
@@ -876,8 +876,8 @@
bo->bo_ops = g_vfs_bufops;
if (devvp->v_rdev->si_iosize_max != 0)
mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
bp = NULL;
ump = NULL;
Index: sys/fs/fuse/fuse_vfsops.c
===================================================================
--- sys/fs/fuse/fuse_vfsops.c
+++ sys/fs/fuse/fuse_vfsops.c
@@ -441,7 +441,7 @@
}
memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN);
strlcpy(mp->mnt_stat.f_mntfromname, fspec, MNAMELEN);
- mp->mnt_iosize_max = MAXPHYS;
+ mp->mnt_iosize_max = maxphys;
/* Now handshaking with daemon */
fuse_internal_send_init(data, td);
Index: sys/fs/msdosfs/msdosfs_vfsops.c
===================================================================
--- sys/fs/msdosfs/msdosfs_vfsops.c
+++ sys/fs/msdosfs/msdosfs_vfsops.c
@@ -429,8 +429,8 @@
VOP_UNLOCK(devvp);
if (dev->si_iosize_max != 0)
mp->mnt_iosize_max = dev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
/*
* Read the boot sector of the filesystem, and then check the
Index: sys/fs/udf/udf_vfsops.c
===================================================================
--- sys/fs/udf/udf_vfsops.c
+++ sys/fs/udf/udf_vfsops.c
@@ -338,8 +338,8 @@
if (devvp->v_rdev->si_iosize_max != 0)
mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
/* XXX: should be M_WAITOK */
udfmp = malloc(sizeof(struct udf_mnt), M_UDFMOUNT,
Index: sys/kern/kern_mib.c
===================================================================
--- sys/kern/kern_mib.c
+++ sys/kern/kern_mib.c
@@ -146,8 +146,29 @@
SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW | CTLFLAG_MPSAFE,
kernelname, sizeof kernelname, "Name of kernel file booted");
-SYSCTL_INT(_kern, KERN_MAXPHYS, maxphys, CTLFLAG_RD | CTLFLAG_CAPRD,
- SYSCTL_NULL_INT_PTR, MAXPHYS, "Maximum block I/O access size");
+#ifdef COMPAT_FREEBSD12
+static int
+sysctl_maxphys(SYSCTL_HANDLER_ARGS)
+{
+ u_long lvalue;
+ int ivalue;
+
+ lvalue = maxphys;
+ if (sizeof(int) == sizeof(u_long) || req->oldlen >= sizeof(u_long))
+ return (sysctl_handle_long(oidp, &lvalue, 0, req));
+ if (lvalue > INT_MAX)
+ return (sysctl_handle_long(oidp, &lvalue, 0, req));
+ ivalue = lvalue;
+ return (sysctl_handle_int(oidp, &ivalue, 0, req));
+}
+SYSCTL_PROC(_kern, KERN_MAXPHYS, maxphys, CTLTYPE_LONG | CTLFLAG_RDTUN |
+ CTLFLAG_NOFETCH | CTLFLAG_CAPRD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_maxphys, "UL", "Maximum block I/O access size");
+#else
+SYSCTL_ULONG(_kern, KERN_MAXPHYS, maxphys,
+ CTLFLAG_RDTUN | CTLFLAG_NOFETCH | CTLFLAG_CAPRD,
+ &maxphys, 0, "Maximum block I/O access size");
+#endif
SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD|CTLFLAG_CAPRD,
&mp_ncpus, 0, "Number of active CPUs");
Index: sys/kern/kern_physio.c
===================================================================
--- sys/kern/kern_physio.c
+++ sys/kern/kern_physio.c
@@ -69,7 +69,7 @@
* need to reject any requests that will not fit into one buffer.
*/
if (dev->si_flags & SI_NOSPLIT &&
- (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS ||
+ (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > maxphys ||
uio->uio_iovcnt > 1)) {
/*
* Tell the user why his I/O was rejected.
@@ -78,10 +78,10 @@
uprintf("%s: request size=%zd > si_iosize_max=%d; "
"cannot split request\n", devtoname(dev),
uio->uio_resid, dev->si_iosize_max);
- if (uio->uio_resid > MAXPHYS)
- uprintf("%s: request size=%zd > MAXPHYS=%d; "
+ if (uio->uio_resid > maxphys)
+ uprintf("%s: request size=%zd > maxphys=%lu; "
"cannot split request\n", devtoname(dev),
- uio->uio_resid, MAXPHYS);
+ uio->uio_resid, maxphys);
if (uio->uio_iovcnt > 1)
uprintf("%s: request vectors=%d > 1; "
"cannot split request\n", devtoname(dev),
@@ -101,12 +101,13 @@
pages = NULL;
} else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
pbuf = NULL;
- maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1;
+ maxpages = btoc(MIN(uio->uio_resid, maxphys)) + 1;
pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
} else {
pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
+ MPASS((pbuf->b_flags & B_MAXPHYS) != 0);
sa = pbuf->b_data;
- maxpages = btoc(MAXPHYS);
+ maxpages = btoc(maxphys);
pages = pbuf->b_pages;
}
prot = VM_PROT_READ;
@@ -144,13 +145,13 @@
bp->bio_length = uio->uio_iov[i].iov_len;
if (bp->bio_length > dev->si_iosize_max)
bp->bio_length = dev->si_iosize_max;
- if (bp->bio_length > MAXPHYS)
- bp->bio_length = MAXPHYS;
+ if (bp->bio_length > maxphys)
+ bp->bio_length = maxphys;
/*
* Make sure the pbuf can map the request.
- * The pbuf has kvasize = MAXPHYS, so a request
- * larger than MAXPHYS - PAGE_SIZE must be
+ * The pbuf has kvasize = maxphys, so a request
+ * larger than maxphys - PAGE_SIZE must be
* page aligned or it will be fragmented.
*/
poff = (vm_offset_t)base & PAGE_MASK;
Index: sys/kern/kern_sendfile.c
===================================================================
--- sys/kern/kern_sendfile.c
+++ sys/kern/kern_sendfile.c
@@ -885,7 +885,7 @@
* do any heuristics and use exactly the value supplied by
* application. Otherwise, we allow readahead up to "rem".
* If application wants more, let it be, but there is no
- * reason to go above MAXPHYS. Also check against "obj_size",
+ * reason to go above maxphys. Also check against "obj_size",
* since vm_pager_has_page() can hint beyond EOF.
*/
if (flags & SF_USER_READAHEAD) {
@@ -895,7 +895,7 @@
npages;
rhpages += SF_READAHEAD(flags);
}
- rhpages = min(howmany(MAXPHYS, PAGE_SIZE), rhpages);
+ rhpages = min(howmany(maxphys, PAGE_SIZE), rhpages);
rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) -
npages, rhpages);
Index: sys/kern/subr_param.c
===================================================================
--- sys/kern/subr_param.c
+++ sys/kern/subr_param.c
@@ -99,9 +99,10 @@
int ngroups_max; /* max # groups per process */
int nswbuf;
pid_t pid_max = PID_MAX;
-long maxswzone; /* max swmeta KVA storage */
-long maxbcache; /* max buffer cache KVA storage */
-long maxpipekva; /* Limit on pipe KVA */
+u_long maxswzone; /* max swmeta KVA storage */
+u_long maxbcache; /* max buffer cache KVA storage */
+u_long maxpipekva; /* Limit on pipe KVA */
+u_long maxphys;
int vm_guest = VM_GUEST_NO; /* Running as virtual machine guest? */
u_long maxtsiz; /* max text size */
u_long dfldsiz; /* initial data size limit */
@@ -289,6 +290,8 @@
nbuf = NBUF;
TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt);
+ maxphys = MAXPHYS;
+ TUNABLE_ULONG_FETCH("kern.maxphys", &maxphys);
/*
* Physical buffers are pre-allocated buffers (struct buf) that
@@ -300,7 +303,7 @@
* The default for maxpipekva is min(1/64 of the kernel address space,
* max(1/64 of main memory, 512KB)). See sys_pipe.c for more details.
*/
- maxpipekva = (physpages / 64) * PAGE_SIZE;
+ maxpipekva = ptoa(physpages / 64);
TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva);
if (maxpipekva < 512 * 1024)
maxpipekva = 512 * 1024;
Index: sys/kern/vfs_aio.c
===================================================================
--- sys/kern/vfs_aio.c
+++ sys/kern/vfs_aio.c
@@ -1252,14 +1252,16 @@
ki = p->p_aioinfo;
poff = (vm_offset_t)cb->aio_buf & PAGE_MASK;
if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
- if (cb->aio_nbytes > MAXPHYS) {
+ if (cb->aio_nbytes > maxphys) {
error = -1;
goto unref;
}
pbuf = NULL;
+ job->pages = malloc(sizeof(vm_page_t) * atop(round_page(
+ cb->aio_nbytes)) + 1, M_TEMP, M_WAITOK | M_ZERO);
} else {
- if (cb->aio_nbytes > MAXPHYS - poff) {
+ if (cb->aio_nbytes > maxphys - poff) {
error = -1;
goto unref;
}
@@ -1273,6 +1275,7 @@
AIO_LOCK(ki);
ki->kaio_buffer_count++;
AIO_UNLOCK(ki);
+ job->pages = pbuf->b_pages;
}
job->bp = bp = g_alloc_bio();
@@ -1320,6 +1323,8 @@
AIO_UNLOCK(ki);
uma_zfree(pbuf_zone, pbuf);
job->pbuf = NULL;
+ } else {
+ free(job->pages, M_TEMP);
}
g_destroy_bio(bp);
job->bp = NULL;
@@ -2342,7 +2347,8 @@
/* Release mapping into kernel space. */
userp = job->userproc;
ki = userp->p_aioinfo;
- if (job->pbuf) {
+ vm_page_unhold_pages(job->pages, job->npages);
+ if (job->pbuf != NULL) {
pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages);
uma_zfree(pbuf_zone, job->pbuf);
job->pbuf = NULL;
@@ -2350,9 +2356,10 @@
AIO_LOCK(ki);
ki->kaio_buffer_count--;
AIO_UNLOCK(ki);
- } else
+ } else {
+ free(job->pages, M_TEMP);
atomic_subtract_int(&num_unmapped_aio, 1);
- vm_page_unhold_pages(job->pages, job->npages);
+ }
bp = job->bp;
job->bp = NULL;
Index: sys/kern/vfs_bio.c
===================================================================
--- sys/kern/vfs_bio.c
+++ sys/kern/vfs_bio.c
@@ -147,8 +147,14 @@
#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd)))
#define BD_DOMAIN(bd) (bd - bdomain)
-static struct buf *buf; /* buffer header pool */
-extern struct buf *swbuf; /* Swap buffer header pool. */
+static char *buf; /* buffer header pool */
+static struct buf *
+nbufp(unsigned i)
+{
+ return ((struct buf *)(buf + (sizeof(struct buf) +
+ sizeof(vm_page_t) * atop(maxbcachebuf)) * i));
+}
+
caddr_t __read_mostly unmapped_buf;
/* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */
@@ -994,8 +1000,8 @@
maxbcachebuf = i;
if (maxbcachebuf < MAXBSIZE)
maxbcachebuf = MAXBSIZE;
- if (maxbcachebuf > MAXPHYS)
- maxbcachebuf = MAXPHYS;
+ if (maxbcachebuf > maxphys)
+ maxbcachebuf = maxphys;
if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF)
printf("maxbcachebuf=%d\n", maxbcachebuf);
}
@@ -1113,10 +1119,10 @@
biotmap_sz = buf_sz / TRANSIENT_DENOM;
buf_sz -= biotmap_sz;
}
- if (biotmap_sz / INT_MAX > MAXPHYS)
+ if (biotmap_sz / INT_MAX > maxphys)
bio_transient_maxcnt = INT_MAX;
else
- bio_transient_maxcnt = biotmap_sz / MAXPHYS;
+ bio_transient_maxcnt = biotmap_sz / maxphys;
/*
* Artificially limit to 1024 simultaneous in-flight I/Os
* using the transient mapping.
@@ -1136,10 +1142,11 @@
/*
* Reserve space for the buffer cache buffers
*/
- buf = (void *)v;
- v = (caddr_t)(buf + nbuf);
+ buf = (char *)v;
+ v = (caddr_t)buf + (sizeof(struct buf) + sizeof(vm_page_t) *
+ atop(maxbcachebuf)) * nbuf;
- return(v);
+ return (v);
}
/* Initialize the buffer subsystem. Called before use of any buffers. */
@@ -1157,12 +1164,12 @@
mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF);
mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF);
- unmapped_buf = (caddr_t)kva_alloc(MAXPHYS);
+ unmapped_buf = (caddr_t)kva_alloc(maxphys);
/* finally, initialize each buffer header and stick on empty q */
for (i = 0; i < nbuf; i++) {
- bp = &buf[i];
- bzero(bp, sizeof *bp);
+ bp = nbufp(i);
+ bzero(bp, sizeof(*bp) + sizeof(vm_page_t) * atop(maxbcachebuf));
bp->b_flags = B_INVAL;
bp->b_rcred = NOCRED;
bp->b_wcred = NOCRED;
@@ -1246,7 +1253,8 @@
/* Setup the kva and free list allocators. */
vmem_set_reclaim(buffer_arena, bufkva_reclaim);
- buf_zone = uma_zcache_create("buf free cache", sizeof(struct buf),
+ buf_zone = uma_zcache_create("buf free cache",
+ sizeof(struct buf) + sizeof(vm_page_t) * atop(maxbcachebuf),
NULL, NULL, NULL, NULL, buf_import, buf_release, NULL, 0);
/*
@@ -1295,7 +1303,7 @@
KASSERT(bp->b_data != unmapped_buf,
("mapped buf: b_data was not updated %p", bp));
KASSERT(bp->b_data < unmapped_buf || bp->b_data >= unmapped_buf +
- MAXPHYS, ("b_data + b_offset unmapped %p", bp));
+ maxphys, ("b_data + b_offset unmapped %p", bp));
}
static inline void
@@ -1330,7 +1338,7 @@
{
static int first_buf_printf = 1;
struct buf *bp;
- int iter, nbusy, pbusy;
+ int i, iter, nbusy, pbusy;
#ifndef PREEMPTION
int subiter;
#endif
@@ -1348,9 +1356,11 @@
*/
for (iter = pbusy = 0; iter < 20; iter++) {
nbusy = 0;
- for (bp = &buf[nbuf]; --bp >= buf; )
+ for (i = nbuf - 1; i >= 0; i--) {
+ bp = nbufp(i);
if (isbufbusy(bp))
nbusy++;
+ }
if (nbusy == 0) {
if (first_buf_printf)
printf("All buffers synced.");
@@ -1391,7 +1401,8 @@
* a fsck if we're just a client of a wedged NFS server
*/
nbusy = 0;
- for (bp = &buf[nbuf]; --bp >= buf; ) {
+ for (i = nbuf - 1; i >= 0; i--) {
+ bp = nbufp(i);
if (isbufbusy(bp)) {
#if 0
/* XXX: This is bogus. We should probably have a BO_REMOTE flag instead */
@@ -1571,6 +1582,7 @@
buf_deallocate(bp);
bufkva_free(bp);
atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1);
+ MPASS((bp->b_flags & B_MAXPHYS) == 0);
BUF_UNLOCK(bp);
uma_zfree(buf_zone, bp);
}
@@ -1674,6 +1686,7 @@
("bp: %p still has %d vm pages\n", bp, bp->b_npages));
KASSERT(bp->b_kvasize == 0, ("bp: %p still has kva\n", bp));
KASSERT(bp->b_bufsize == 0, ("bp: %p still has bufspace\n", bp));
+ MPASS((bp->b_flags & B_MAXPHYS) == 0);
bp->b_domain = BD_DOMAIN(bd);
bp->b_flags = 0;
@@ -2018,6 +2031,9 @@
KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0,
("Invalid gbflags 0x%x in %s", gbflags, __func__));
+ MPASS((bp->b_flags & B_MAXPHYS) == 0);
+ KASSERT(maxsize <= maxbcachebuf,
+ ("bufkva_alloc kva too large %d %u", maxsize, maxbcachebuf));
bufkva_free(bp);
@@ -3036,6 +3052,10 @@
*/
obj = bp->b_bufobj->bo_object;
if (bp->b_npages < desiredpages) {
+ KASSERT(desiredpages <= atop(maxbcachebuf),
+ ("vfs_vmio_extend past maxbcachebuf %p %d %u",
+ bp, desiredpages, maxbcachebuf));
+
/*
* We must allocate system pages since blocking
* here could interfere with paging I/O, no
@@ -3163,7 +3183,7 @@
(vp->v_mount != 0) && /* Only on nodes that have the size info */
(bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
size = vp->v_mount->mnt_stat.f_iosize;
- maxcl = MAXPHYS / size;
+ maxcl = maxphys / size;
BO_RLOCK(bo);
for (i = 1; i < maxcl; i++)
@@ -4853,6 +4873,10 @@
to = round_page(to);
from = round_page(from);
index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ MPASS((bp->b_flags & B_MAXPHYS) == 0);
+ KASSERT(to - from <= maxbcachebuf,
+ ("vm_hold_load_pages too large %p %#jx %#jx %u",
+ bp, (uintmax_t)from, (uintmax_t)to, maxbcachebuf));
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
/*
@@ -4912,12 +4936,12 @@
vm_prot_t prot;
int pidx;
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
prot = VM_PROT_READ;
if (bp->b_iocmd == BIO_READ)
prot |= VM_PROT_WRITE; /* Less backwards than it looks */
if ((pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
- (vm_offset_t)uaddr, len, prot, bp->b_pages,
- btoc(MAXPHYS))) < 0)
+ (vm_offset_t)uaddr, len, prot, bp->b_pages, btoc(maxphys))) < 0)
return (-1);
bp->b_bufsize = len;
bp->b_npages = pidx;
@@ -5398,19 +5422,23 @@
db_printf("\n");
cnt = 0;
total = 0;
- for (j = 0; j < nbuf; j++)
- if (buf[j].b_domain == i && BUF_ISLOCKED(&buf[j])) {
+ for (j = 0; j < nbuf; j++) {
+ bp = nbufp(j);
+ if (bp->b_domain == i && BUF_ISLOCKED(bp)) {
cnt++;
- total += buf[j].b_bufsize;
+ total += bp->b_bufsize;
}
+ }
db_printf("\tLocked buffers: %d space %ld\n", cnt, total);
cnt = 0;
total = 0;
- for (j = 0; j < nbuf; j++)
- if (buf[j].b_domain == i) {
+ for (j = 0; j < nbuf; j++) {
+ bp = nbufp(j);
+ if (bp->b_domain == i) {
cnt++;
- total += buf[j].b_bufsize;
+ total += bp->b_bufsize;
}
+ }
db_printf("\tTotal buffers: %d space %ld\n", cnt, total);
}
}
@@ -5421,7 +5449,7 @@
int i;
for (i = 0; i < nbuf; i++) {
- bp = &buf[i];
+ bp = nbufp(i);
if (BUF_ISLOCKED(bp)) {
db_show_buffer((uintptr_t)bp, 1, 0, NULL);
db_printf("\n");
@@ -5464,7 +5492,7 @@
}
for (i = 0; i < nbuf; i++) {
- bp = &buf[i];
+ bp = nbufp(i);
if (bp->b_qindex == QUEUE_EMPTY)
nfree++;
else
Index: sys/kern/vfs_cluster.c
===================================================================
--- sys/kern/vfs_cluster.c
+++ sys/kern/vfs_cluster.c
@@ -386,6 +386,7 @@
bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT);
if (bp == NULL)
return tbp;
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
/*
* We are synthesizing a buffer out of vm_page_t's, but
@@ -871,6 +872,7 @@
--len;
continue;
}
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
/*
* We got a pbuf to make the cluster in.
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -974,8 +974,8 @@
iosize = vap->va_blocksize;
if (iosize == 0)
iosize = BLKDEV_IOSIZE;
- if (iosize > MAXPHYS)
- iosize = MAXPHYS;
+ if (iosize > maxphys)
+ iosize = maxphys;
buf = malloc(iosize, M_TEMP, M_WAITOK);
#ifdef __notyet__
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -3162,8 +3162,8 @@
struct sbuf *sb;
int error, full = 0, valid_len, max_len;
- /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
- max_len = MAXPHYS - 1;
+ /* Limit initial buffer size to maxphys to avoid DoS from userspace. */
+ max_len = maxphys - 1;
/* Prevent hostile input from being able to crash the system */
if (ifc->ifc_len <= 0)
Index: sys/sys/aio.h
===================================================================
--- sys/sys/aio.h
+++ sys/sys/aio.h
@@ -140,8 +140,8 @@
struct { /* BIO backend */
struct bio *bp; /* (*) BIO pointer */
struct buf *pbuf; /* (*) buffer pointer */
- struct vm_page *pages[btoc(MAXPHYS)+1]; /* (*) */
int npages; /* (*) number of pages */
+ struct vm_page **pages; /* (*) */
};
struct { /* fsync() requests */
int pending; /* (a) number of pending I/O */
Index: sys/sys/buf.h
===================================================================
--- sys/sys/buf.h
+++ sys/sys/buf.h
@@ -141,7 +141,6 @@
TAILQ_HEAD(cluster_list_head, buf) cluster_head;
TAILQ_ENTRY(buf) cluster_entry;
} b_cluster;
- struct vm_page *b_pages[btoc(MAXPHYS)];
int b_npages;
struct workhead b_dep; /* (D) List of filesystem dependencies. */
void *b_fsprivate1;
@@ -156,6 +155,7 @@
#elif defined(BUF_TRACKING)
const char *b_io_tracking;
#endif
+ struct vm_page *b_pages[];
};
#define b_object b_bufobj->bo_object
@@ -234,7 +234,7 @@
#define B_INVALONERR 0x00040000 /* Invalidate on write error. */
#define B_00080000 0x00080000 /* Available flag. */
#define B_00100000 0x00100000 /* Available flag. */
-#define B_00200000 0x00200000 /* Available flag. */
+#define B_MAXPHYS 0x00200000 /* nitems(b_pages[]) = atop(MAXPHYS). */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
#define B_FS_FLAG1 0x00800000 /* Available flag for FS use. */
#define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */
@@ -247,7 +247,7 @@
#define B_REMFREE 0x80000000 /* Delayed bremfree */
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
- "\33paging\32infreecnt\31nocopy\30b23\27relbuf\26b21\25b20" \
+ "\33paging\32infreecnt\31nocopy\30b23\27relbuf\26maxphys\25b20" \
"\24b19\23invalonerr\22clusterok\21malloc\20nocache\17b14\16inval" \
"\15reuse\14noreuse\13eintr\12done\11b8\10delwri" \
"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
@@ -496,8 +496,8 @@
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
-extern long maxswzone; /* Max KVA for swap structures */
-extern long maxbcache; /* Max KVA for buffer cache */
+extern u_long maxswzone; /* Max KVA for swap structures */
+extern u_long maxbcache; /* Max KVA for buffer cache */
extern int maxbcachebuf; /* Max buffer cache block size */
extern long runningbufspace;
extern long hibufspace;
Index: sys/sys/param.h
===================================================================
--- sys/sys/param.h
+++ sys/sys/param.h
@@ -160,7 +160,7 @@
#define DFLTPHYS (64 * 1024) /* default max raw I/O transfer size */
#endif
#ifndef MAXPHYS
-#define MAXPHYS (128 * 1024) /* max raw I/O transfer size */
+#define MAXPHYS (1024 * 1024) /* max raw I/O transfer size */
#endif
#ifndef MAXDUMPPGS
#define MAXDUMPPGS (DFLTPHYS/PAGE_SIZE)
Index: sys/sys/systm.h
===================================================================
--- sys/sys/systm.h
+++ sys/sys/systm.h
@@ -74,6 +74,8 @@
extern int ngroups_max; /* max # of supplemental groups */
extern int vm_guest; /* Running as virtual machine guest? */
+extern u_long maxphys;
+
/*
* Detected virtual machine guest types. The intention is to expand
* and/or add to the VM_GUEST_VM type if specific VM functionality is
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- sys/ufs/ffs/ffs_vfsops.c
+++ sys/ufs/ffs/ffs_vfsops.c
@@ -1055,8 +1055,8 @@
BO_UNLOCK(&odevvp->v_bufobj);
if (dev->si_iosize_max != 0)
mp->mnt_iosize_max = dev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
error = EINVAL;
vfs_mount_error(mp,
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -586,7 +586,7 @@
* but it isn't very efficient).
*
* The nsw_cluster_max is constrained by the bp->b_pages[]
- * array, which has MAXPHYS / PAGE_SIZE entries, and our locally
+ * array, which has maxphys / PAGE_SIZE entries, and our locally
* defined MAX_PAGEOUT_CLUSTER. Also be aware that swap ops are
* constrained by the swap device interleave stripe size.
*
@@ -601,7 +601,7 @@
* have one NFS swap device due to the command/ack latency over NFS.
* So it all works out pretty well.
*/
- nsw_cluster_max = min(MAXPHYS / PAGE_SIZE, MAX_PAGEOUT_CLUSTER);
+ nsw_cluster_max = min(maxphys / PAGE_SIZE, MAX_PAGEOUT_CLUSTER);
nsw_wcount_async = 4;
nsw_wcount_async_max = nsw_wcount_async;
@@ -1314,6 +1314,7 @@
VM_OBJECT_WUNLOCK(object);
bp = uma_zalloc(swrbuf_zone, M_WAITOK);
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
/* Pages cannot leave the object while busy. */
for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
MPASS(p->pindex == bm->pindex + i);
@@ -1522,8 +1523,9 @@
VM_OBJECT_WUNLOCK(object);
bp = uma_zalloc(swwbuf_zone, M_WAITOK);
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
if (async)
- bp->b_flags = B_ASYNC;
+ bp->b_flags |= B_ASYNC;
bp->b_flags |= B_PAGING;
bp->b_iocmd = BIO_WRITE;
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -115,7 +115,6 @@
#define PFFOR 4
#define VM_FAULT_READ_DEFAULT (1 + VM_FAULT_READ_AHEAD_INIT)
-#define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX)
#define VM_FAULT_DONTNEED_MIN 1048576
Index: sys/vm/vm_init.c
===================================================================
--- sys/vm/vm_init.c
+++ sys/vm/vm_init.c
@@ -212,7 +212,7 @@
/*
* Allocate the clean map to hold all of I/O virtual memory.
*/
- size = (long)nbuf * BKVASIZE + (long)bio_transient_maxcnt * MAXPHYS;
+ size = (long)nbuf * BKVASIZE + (long)bio_transient_maxcnt * maxphys;
kmi->clean_sva = firstaddr = kva_alloc(size);
kmi->clean_eva = firstaddr + size;
@@ -233,7 +233,7 @@
* And optionally transient bio space.
*/
if (bio_transient_maxcnt != 0) {
- size = (long)bio_transient_maxcnt * MAXPHYS;
+ size = (long)bio_transient_maxcnt * maxphys;
vmem_init(transient_arena, "transient arena",
firstaddr, size, PAGE_SIZE, 0, 0);
firstaddr += size;
Index: sys/vm/vm_map.h
===================================================================
--- sys/vm/vm_map.h
+++ sys/vm/vm_map.h
@@ -396,7 +396,7 @@
*/
#define VM_FAULT_READ_AHEAD_MIN 7
#define VM_FAULT_READ_AHEAD_INIT 15
-#define VM_FAULT_READ_AHEAD_MAX min(atop(MAXPHYS) - 1, UINT8_MAX)
+#define VM_FAULT_READ_AHEAD_MAX min(atop(maxphys) - 1, UINT8_MAX)
/*
* The following "find_space" options are supported by vm_map_find().
Index: sys/vm/vm_pager.c
===================================================================
--- sys/vm/vm_pager.c
+++ sys/vm/vm_pager.c
@@ -183,7 +183,8 @@
{
/* Main zone for paging bufs. */
- pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf),
+ pbuf_zone = uma_zcreate("pbuf",
+ sizeof(struct buf) + atop(maxphys) * sizeof(vm_page_t),
pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE,
UMA_ZONE_NOFREE);
/* Few systems may still use this zone directly, so it needs a limit. */
@@ -384,7 +385,7 @@
bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */
bp->b_data = bp->b_kvabase;
bp->b_xflags = 0;
- bp->b_flags = 0;
+ bp->b_flags = B_MAXPHYS;
bp->b_ioflags = 0;
bp->b_iodone = NULL;
bp->b_error = 0;
@@ -415,10 +416,10 @@
{
struct buf *bp = mem;
- bp->b_kvabase = (void *)kva_alloc(MAXPHYS);
+ bp->b_kvabase = (void *)kva_alloc(maxphys);
if (bp->b_kvabase == NULL)
return (ENOMEM);
- bp->b_kvasize = MAXPHYS;
+ bp->b_kvasize = maxphys;
BUF_LOCKINIT(bp);
LIST_INIT(&bp->b_dep);
bp->b_rcred = bp->b_wcred = NOCRED;
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -817,7 +817,7 @@
KASSERT(foff < object->un_pager.vnp.vnp_size,
("%s: page %p offset beyond vp %p size", __func__, m[0], vp));
- KASSERT(count <= nitems(bp->b_pages),
+ KASSERT(count <= atop(maxphys),
("%s: requested %d pages", __func__, count));
/*
@@ -832,6 +832,7 @@
}
bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK);
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
/*
* Get the underlying device blocks for the file with VOP_BMAP().
@@ -916,10 +917,10 @@
* Check that total amount of pages fit into buf. Trim rbehind and
* rahead evenly if not.
*/
- if (rbehind + rahead + count > nitems(bp->b_pages)) {
+ if (rbehind + rahead + count > atop(maxphys)) {
int trim, sum;
- trim = rbehind + rahead + count - nitems(bp->b_pages) + 1;
+ trim = rbehind + rahead + count - atop(maxphys) + 1;
sum = rbehind + rahead;
if (rbehind == before) {
/* Roundup rbehind trim to block size. */
@@ -930,9 +931,9 @@
rbehind -= trim * rbehind / sum;
rahead -= trim * rahead / sum;
}
- KASSERT(rbehind + rahead + count <= nitems(bp->b_pages),
- ("%s: behind %d ahead %d count %d", __func__,
- rbehind, rahead, count));
+ KASSERT(rbehind + rahead + count <= atop(maxphys),
+ ("%s: behind %d ahead %d count %d maxphys %lu", __func__,
+ rbehind, rahead, count, maxphys));
/*
* Fill in the bp->b_pages[] array with requested and optional
@@ -1014,7 +1015,7 @@
*a_rahead = bp->b_pgafter;
#ifdef INVARIANTS
- KASSERT(bp->b_npages <= nitems(bp->b_pages),
+ KASSERT(bp->b_npages <= atop(maxphys),
("%s: buf %p overflowed", __func__, bp));
for (int j = 1, prev = 0; j < bp->b_npages; j++) {
if (bp->b_pages[j] == bogus_page)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Dec 20, 7:17 PM (12 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15534782
Default Alt Text
D27225.id79566.diff (31 KB)
Attached To
Mode
D27225: Make MAXPHYS tunable.
Attached
Detach File
Event Timeline
Log In to Comment