Page MenuHomeFreeBSD

D27225.id79566.diff
No OneTemporary

D27225.id79566.diff

Index: sys/dev/ahci/ahci.h
===================================================================
--- sys/dev/ahci/ahci.h
+++ sys/dev/ahci/ahci.h
@@ -310,13 +310,8 @@
#define AHCI_P_DEVSLP_DM 0x0e000000
#define AHCI_P_DEVSLP_DM_SHIFT 25
-/* Just to be sure, if building as module. */
-#if MAXPHYS < 512 * 1024
-#undef MAXPHYS
-#define MAXPHYS 512 * 1024
-#endif
/* Pessimistic prognosis on number of required S/G entries */
-#define AHCI_SG_ENTRIES (roundup(btoc(MAXPHYS) + 1, 8))
+#define AHCI_SG_ENTRIES (roundup(btoc(maxphys) + 1, 8))
/* Command list. 32 commands. First, 1Kbyte aligned. */
#define AHCI_CL_OFFSET 0
#define AHCI_CL_SIZE 32
@@ -344,7 +339,7 @@
u_int8_t cfis[64];
u_int8_t acmd[32];
u_int8_t reserved[32];
- struct ahci_dma_prd prd_tab[AHCI_SG_ENTRIES];
+ struct ahci_dma_prd prd_tab[];
} __packed;
struct ahci_cmd_list {
Index: sys/dev/ahci/ahci.c
===================================================================
--- sys/dev/ahci/ahci.c
+++ sys/dev/ahci/ahci.c
@@ -2868,7 +2868,7 @@
cpi->transport_version = XPORT_VERSION_UNSPECIFIED;
cpi->protocol = PROTO_ATA;
cpi->protocol_version = PROTO_VERSION_UNSPECIFIED;
- cpi->maxio = MAXPHYS;
+ cpi->maxio = maxphys;
/* ATI SB600 can't handle 256 sectors with FPDMA (NCQ). */
if (ch->quirks & AHCI_Q_MAXIO_64K)
cpi->maxio = min(cpi->maxio, 128 * 512);
Index: sys/dev/md/md.c
===================================================================
--- sys/dev/md/md.c
+++ sys/dev/md/md.c
@@ -960,9 +960,10 @@
piov = auio.uio_iov;
} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
pb = uma_zalloc(md_pbuf_zone, M_WAITOK);
+ MPASS((pb->b_flags & B_MAXPHYS) != 0);
bp->bio_resid = len;
unmapped_step:
- npages = atop(min(MAXPHYS, round_page(len + (ma_offs &
+ npages = atop(min(maxphys, round_page(len + (ma_offs &
PAGE_MASK))));
iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len);
KASSERT(iolen > 0, ("zero iolen"));
@@ -1684,7 +1685,7 @@
sectsize = DEV_BSIZE;
else
sectsize = mdr->md_sectorsize;
- if (sectsize > MAXPHYS || mdr->md_mediasize < sectsize)
+ if (sectsize > maxphys || mdr->md_mediasize < sectsize)
return (EINVAL);
if (mdr->md_options & MD_AUTOUNIT)
sc = mdnew(-1, &error, mdr->md_type);
Index: sys/dev/siis/siis.h
===================================================================
--- sys/dev/siis/siis.h
+++ sys/dev/siis/siis.h
@@ -263,13 +263,8 @@
#define SIIS_OFFSET 0x100
#define SIIS_STEP 0x80
-/* Just to be sure, if building as module. */
-#if MAXPHYS < 512 * 1024
-#undef MAXPHYS
-#define MAXPHYS 512 * 1024
-#endif
/* Pessimistic prognosis on number of required S/G entries */
-#define SIIS_SG_ENTRIES (roundup(btoc(MAXPHYS), 4) + 1)
+#define SIIS_SG_ENTRIES (roundup(btoc(maxphys), 4) + 1)
/* Command tables. Up to 32 commands, Each, 128byte aligned. */
#define SIIS_CT_OFFSET 0
#define SIIS_CT_SIZE (32 + 16 + SIIS_SG_ENTRIES * 16)
@@ -287,12 +282,12 @@
} __packed;
struct siis_cmd_ata {
- struct siis_dma_prd prd[1 + SIIS_SG_ENTRIES];
+ struct siis_dma_prd prd[1];
} __packed;
struct siis_cmd_atapi {
u_int8_t ccb[16];
- struct siis_dma_prd prd[SIIS_SG_ENTRIES];
+ struct siis_dma_prd prd[];
} __packed;
struct siis_cmd {
Index: sys/dev/siis/siis.c
===================================================================
--- sys/dev/siis/siis.c
+++ sys/dev/siis/siis.c
@@ -1967,7 +1967,7 @@
cpi->transport_version = XPORT_VERSION_UNSPECIFIED;
cpi->protocol = PROTO_ATA;
cpi->protocol_version = PROTO_VERSION_UNSPECIFIED;
- cpi->maxio = MAXPHYS;
+ cpi->maxio = maxphys;
cpi->hba_vendor = pci_get_vendor(parent);
cpi->hba_device = pci_get_device(parent);
cpi->hba_subvendor = pci_get_subvendor(parent);
Index: sys/fs/cd9660/cd9660_vfsops.c
===================================================================
--- sys/fs/cd9660/cd9660_vfsops.c
+++ sys/fs/cd9660/cd9660_vfsops.c
@@ -238,8 +238,8 @@
goto out;
if (devvp->v_rdev->si_iosize_max != 0)
mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
bo = &devvp->v_bufobj;
Index: sys/fs/ext2fs/ext2_vfsops.c
===================================================================
--- sys/fs/ext2fs/ext2_vfsops.c
+++ sys/fs/ext2fs/ext2_vfsops.c
@@ -876,8 +876,8 @@
bo->bo_ops = g_vfs_bufops;
if (devvp->v_rdev->si_iosize_max != 0)
mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
bp = NULL;
ump = NULL;
Index: sys/fs/fuse/fuse_vfsops.c
===================================================================
--- sys/fs/fuse/fuse_vfsops.c
+++ sys/fs/fuse/fuse_vfsops.c
@@ -441,7 +441,7 @@
}
memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN);
strlcpy(mp->mnt_stat.f_mntfromname, fspec, MNAMELEN);
- mp->mnt_iosize_max = MAXPHYS;
+ mp->mnt_iosize_max = maxphys;
/* Now handshaking with daemon */
fuse_internal_send_init(data, td);
Index: sys/fs/msdosfs/msdosfs_vfsops.c
===================================================================
--- sys/fs/msdosfs/msdosfs_vfsops.c
+++ sys/fs/msdosfs/msdosfs_vfsops.c
@@ -429,8 +429,8 @@
VOP_UNLOCK(devvp);
if (dev->si_iosize_max != 0)
mp->mnt_iosize_max = dev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
/*
* Read the boot sector of the filesystem, and then check the
Index: sys/fs/udf/udf_vfsops.c
===================================================================
--- sys/fs/udf/udf_vfsops.c
+++ sys/fs/udf/udf_vfsops.c
@@ -338,8 +338,8 @@
if (devvp->v_rdev->si_iosize_max != 0)
mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
/* XXX: should be M_WAITOK */
udfmp = malloc(sizeof(struct udf_mnt), M_UDFMOUNT,
Index: sys/kern/kern_mib.c
===================================================================
--- sys/kern/kern_mib.c
+++ sys/kern/kern_mib.c
@@ -146,8 +146,29 @@
SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW | CTLFLAG_MPSAFE,
kernelname, sizeof kernelname, "Name of kernel file booted");
-SYSCTL_INT(_kern, KERN_MAXPHYS, maxphys, CTLFLAG_RD | CTLFLAG_CAPRD,
- SYSCTL_NULL_INT_PTR, MAXPHYS, "Maximum block I/O access size");
+#ifdef COMPAT_FREEBSD12
+static int
+sysctl_maxphys(SYSCTL_HANDLER_ARGS)
+{
+ u_long lvalue;
+ int ivalue;
+
+ lvalue = maxphys;
+ if (sizeof(int) == sizeof(u_long) || req->oldlen >= sizeof(u_long))
+ return (sysctl_handle_long(oidp, &lvalue, 0, req));
+ if (lvalue > INT_MAX)
+ return (sysctl_handle_long(oidp, &lvalue, 0, req));
+ ivalue = lvalue;
+ return (sysctl_handle_int(oidp, &ivalue, 0, req));
+}
+SYSCTL_PROC(_kern, KERN_MAXPHYS, maxphys, CTLTYPE_LONG | CTLFLAG_RDTUN |
+ CTLFLAG_NOFETCH | CTLFLAG_CAPRD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_maxphys, "UL", "Maximum block I/O access size");
+#else
+SYSCTL_ULONG(_kern, KERN_MAXPHYS, maxphys,
+ CTLFLAG_RDTUN | CTLFLAG_NOFETCH | CTLFLAG_CAPRD,
+ &maxphys, 0, "Maximum block I/O access size");
+#endif
SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD|CTLFLAG_CAPRD,
&mp_ncpus, 0, "Number of active CPUs");
Index: sys/kern/kern_physio.c
===================================================================
--- sys/kern/kern_physio.c
+++ sys/kern/kern_physio.c
@@ -69,7 +69,7 @@
* need to reject any requests that will not fit into one buffer.
*/
if (dev->si_flags & SI_NOSPLIT &&
- (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS ||
+ (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > maxphys ||
uio->uio_iovcnt > 1)) {
/*
* Tell the user why his I/O was rejected.
@@ -78,10 +78,10 @@
uprintf("%s: request size=%zd > si_iosize_max=%d; "
"cannot split request\n", devtoname(dev),
uio->uio_resid, dev->si_iosize_max);
- if (uio->uio_resid > MAXPHYS)
- uprintf("%s: request size=%zd > MAXPHYS=%d; "
+ if (uio->uio_resid > maxphys)
+ uprintf("%s: request size=%zd > maxphys=%lu; "
"cannot split request\n", devtoname(dev),
- uio->uio_resid, MAXPHYS);
+ uio->uio_resid, maxphys);
if (uio->uio_iovcnt > 1)
uprintf("%s: request vectors=%d > 1; "
"cannot split request\n", devtoname(dev),
@@ -101,12 +101,13 @@
pages = NULL;
} else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
pbuf = NULL;
- maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1;
+ maxpages = btoc(MIN(uio->uio_resid, maxphys)) + 1;
pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK);
} else {
pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
+ MPASS((pbuf->b_flags & B_MAXPHYS) != 0);
sa = pbuf->b_data;
- maxpages = btoc(MAXPHYS);
+ maxpages = btoc(maxphys);
pages = pbuf->b_pages;
}
prot = VM_PROT_READ;
@@ -144,13 +145,13 @@
bp->bio_length = uio->uio_iov[i].iov_len;
if (bp->bio_length > dev->si_iosize_max)
bp->bio_length = dev->si_iosize_max;
- if (bp->bio_length > MAXPHYS)
- bp->bio_length = MAXPHYS;
+ if (bp->bio_length > maxphys)
+ bp->bio_length = maxphys;
/*
* Make sure the pbuf can map the request.
- * The pbuf has kvasize = MAXPHYS, so a request
- * larger than MAXPHYS - PAGE_SIZE must be
+ * The pbuf has kvasize = maxphys, so a request
+ * larger than maxphys - PAGE_SIZE must be
* page aligned or it will be fragmented.
*/
poff = (vm_offset_t)base & PAGE_MASK;
Index: sys/kern/kern_sendfile.c
===================================================================
--- sys/kern/kern_sendfile.c
+++ sys/kern/kern_sendfile.c
@@ -885,7 +885,7 @@
* do any heuristics and use exactly the value supplied by
* application. Otherwise, we allow readahead up to "rem".
* If application wants more, let it be, but there is no
- * reason to go above MAXPHYS. Also check against "obj_size",
+ * reason to go above maxphys. Also check against "obj_size",
* since vm_pager_has_page() can hint beyond EOF.
*/
if (flags & SF_USER_READAHEAD) {
@@ -895,7 +895,7 @@
npages;
rhpages += SF_READAHEAD(flags);
}
- rhpages = min(howmany(MAXPHYS, PAGE_SIZE), rhpages);
+ rhpages = min(howmany(maxphys, PAGE_SIZE), rhpages);
rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) -
npages, rhpages);
Index: sys/kern/subr_param.c
===================================================================
--- sys/kern/subr_param.c
+++ sys/kern/subr_param.c
@@ -99,9 +99,10 @@
int ngroups_max; /* max # groups per process */
int nswbuf;
pid_t pid_max = PID_MAX;
-long maxswzone; /* max swmeta KVA storage */
-long maxbcache; /* max buffer cache KVA storage */
-long maxpipekva; /* Limit on pipe KVA */
+u_long maxswzone; /* max swmeta KVA storage */
+u_long maxbcache; /* max buffer cache KVA storage */
+u_long maxpipekva; /* Limit on pipe KVA */
+u_long maxphys;
int vm_guest = VM_GUEST_NO; /* Running as virtual machine guest? */
u_long maxtsiz; /* max text size */
u_long dfldsiz; /* initial data size limit */
@@ -289,6 +290,8 @@
nbuf = NBUF;
TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt);
+ maxphys = MAXPHYS;
+ TUNABLE_ULONG_FETCH("kern.maxphys", &maxphys);
/*
* Physical buffers are pre-allocated buffers (struct buf) that
@@ -300,7 +303,7 @@
* The default for maxpipekva is min(1/64 of the kernel address space,
* max(1/64 of main memory, 512KB)). See sys_pipe.c for more details.
*/
- maxpipekva = (physpages / 64) * PAGE_SIZE;
+ maxpipekva = ptoa(physpages / 64);
TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva);
if (maxpipekva < 512 * 1024)
maxpipekva = 512 * 1024;
Index: sys/kern/vfs_aio.c
===================================================================
--- sys/kern/vfs_aio.c
+++ sys/kern/vfs_aio.c
@@ -1252,14 +1252,16 @@
ki = p->p_aioinfo;
poff = (vm_offset_t)cb->aio_buf & PAGE_MASK;
if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) {
- if (cb->aio_nbytes > MAXPHYS) {
+ if (cb->aio_nbytes > maxphys) {
error = -1;
goto unref;
}
pbuf = NULL;
+ job->pages = malloc(sizeof(vm_page_t) * atop(round_page(
+ cb->aio_nbytes)) + 1, M_TEMP, M_WAITOK | M_ZERO);
} else {
- if (cb->aio_nbytes > MAXPHYS - poff) {
+ if (cb->aio_nbytes > maxphys - poff) {
error = -1;
goto unref;
}
@@ -1273,6 +1275,7 @@
AIO_LOCK(ki);
ki->kaio_buffer_count++;
AIO_UNLOCK(ki);
+ job->pages = pbuf->b_pages;
}
job->bp = bp = g_alloc_bio();
@@ -1320,6 +1323,8 @@
AIO_UNLOCK(ki);
uma_zfree(pbuf_zone, pbuf);
job->pbuf = NULL;
+ } else {
+ free(job->pages, M_TEMP);
}
g_destroy_bio(bp);
job->bp = NULL;
@@ -2342,7 +2347,8 @@
/* Release mapping into kernel space. */
userp = job->userproc;
ki = userp->p_aioinfo;
- if (job->pbuf) {
+ vm_page_unhold_pages(job->pages, job->npages);
+ if (job->pbuf != NULL) {
pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages);
uma_zfree(pbuf_zone, job->pbuf);
job->pbuf = NULL;
@@ -2350,9 +2356,10 @@
AIO_LOCK(ki);
ki->kaio_buffer_count--;
AIO_UNLOCK(ki);
- } else
+ } else {
+ free(job->pages, M_TEMP);
atomic_subtract_int(&num_unmapped_aio, 1);
- vm_page_unhold_pages(job->pages, job->npages);
+ }
bp = job->bp;
job->bp = NULL;
Index: sys/kern/vfs_bio.c
===================================================================
--- sys/kern/vfs_bio.c
+++ sys/kern/vfs_bio.c
@@ -147,8 +147,14 @@
#define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd)))
#define BD_DOMAIN(bd) (bd - bdomain)
-static struct buf *buf; /* buffer header pool */
-extern struct buf *swbuf; /* Swap buffer header pool. */
+static char *buf; /* buffer header pool */
+static struct buf *
+nbufp(unsigned i)
+{
+ return ((struct buf *)(buf + (sizeof(struct buf) +
+ sizeof(vm_page_t) * atop(maxbcachebuf)) * i));
+}
+
caddr_t __read_mostly unmapped_buf;
/* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */
@@ -994,8 +1000,8 @@
maxbcachebuf = i;
if (maxbcachebuf < MAXBSIZE)
maxbcachebuf = MAXBSIZE;
- if (maxbcachebuf > MAXPHYS)
- maxbcachebuf = MAXPHYS;
+ if (maxbcachebuf > maxphys)
+ maxbcachebuf = maxphys;
if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF)
printf("maxbcachebuf=%d\n", maxbcachebuf);
}
@@ -1113,10 +1119,10 @@
biotmap_sz = buf_sz / TRANSIENT_DENOM;
buf_sz -= biotmap_sz;
}
- if (biotmap_sz / INT_MAX > MAXPHYS)
+ if (biotmap_sz / INT_MAX > maxphys)
bio_transient_maxcnt = INT_MAX;
else
- bio_transient_maxcnt = biotmap_sz / MAXPHYS;
+ bio_transient_maxcnt = biotmap_sz / maxphys;
/*
* Artificially limit to 1024 simultaneous in-flight I/Os
* using the transient mapping.
@@ -1136,10 +1142,11 @@
/*
* Reserve space for the buffer cache buffers
*/
- buf = (void *)v;
- v = (caddr_t)(buf + nbuf);
+ buf = (char *)v;
+ v = (caddr_t)buf + (sizeof(struct buf) + sizeof(vm_page_t) *
+ atop(maxbcachebuf)) * nbuf;
- return(v);
+ return (v);
}
/* Initialize the buffer subsystem. Called before use of any buffers. */
@@ -1157,12 +1164,12 @@
mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF);
mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF);
- unmapped_buf = (caddr_t)kva_alloc(MAXPHYS);
+ unmapped_buf = (caddr_t)kva_alloc(maxphys);
/* finally, initialize each buffer header and stick on empty q */
for (i = 0; i < nbuf; i++) {
- bp = &buf[i];
- bzero(bp, sizeof *bp);
+ bp = nbufp(i);
+ bzero(bp, sizeof(*bp) + sizeof(vm_page_t) * atop(maxbcachebuf));
bp->b_flags = B_INVAL;
bp->b_rcred = NOCRED;
bp->b_wcred = NOCRED;
@@ -1246,7 +1253,8 @@
/* Setup the kva and free list allocators. */
vmem_set_reclaim(buffer_arena, bufkva_reclaim);
- buf_zone = uma_zcache_create("buf free cache", sizeof(struct buf),
+ buf_zone = uma_zcache_create("buf free cache",
+ sizeof(struct buf) + sizeof(vm_page_t) * atop(maxbcachebuf),
NULL, NULL, NULL, NULL, buf_import, buf_release, NULL, 0);
/*
@@ -1295,7 +1303,7 @@
KASSERT(bp->b_data != unmapped_buf,
("mapped buf: b_data was not updated %p", bp));
KASSERT(bp->b_data < unmapped_buf || bp->b_data >= unmapped_buf +
- MAXPHYS, ("b_data + b_offset unmapped %p", bp));
+ maxphys, ("b_data + b_offset unmapped %p", bp));
}
static inline void
@@ -1330,7 +1338,7 @@
{
static int first_buf_printf = 1;
struct buf *bp;
- int iter, nbusy, pbusy;
+ int i, iter, nbusy, pbusy;
#ifndef PREEMPTION
int subiter;
#endif
@@ -1348,9 +1356,11 @@
*/
for (iter = pbusy = 0; iter < 20; iter++) {
nbusy = 0;
- for (bp = &buf[nbuf]; --bp >= buf; )
+ for (i = nbuf - 1; i >= 0; i--) {
+ bp = nbufp(i);
if (isbufbusy(bp))
nbusy++;
+ }
if (nbusy == 0) {
if (first_buf_printf)
printf("All buffers synced.");
@@ -1391,7 +1401,8 @@
* a fsck if we're just a client of a wedged NFS server
*/
nbusy = 0;
- for (bp = &buf[nbuf]; --bp >= buf; ) {
+ for (i = nbuf - 1; i >= 0; i--) {
+ bp = nbufp(i);
if (isbufbusy(bp)) {
#if 0
/* XXX: This is bogus. We should probably have a BO_REMOTE flag instead */
@@ -1571,6 +1582,7 @@
buf_deallocate(bp);
bufkva_free(bp);
atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1);
+ MPASS((bp->b_flags & B_MAXPHYS) == 0);
BUF_UNLOCK(bp);
uma_zfree(buf_zone, bp);
}
@@ -1674,6 +1686,7 @@
("bp: %p still has %d vm pages\n", bp, bp->b_npages));
KASSERT(bp->b_kvasize == 0, ("bp: %p still has kva\n", bp));
KASSERT(bp->b_bufsize == 0, ("bp: %p still has bufspace\n", bp));
+ MPASS((bp->b_flags & B_MAXPHYS) == 0);
bp->b_domain = BD_DOMAIN(bd);
bp->b_flags = 0;
@@ -2018,6 +2031,9 @@
KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0,
("Invalid gbflags 0x%x in %s", gbflags, __func__));
+ MPASS((bp->b_flags & B_MAXPHYS) == 0);
+ KASSERT(maxsize <= maxbcachebuf,
+ ("bufkva_alloc kva too large %d %u", maxsize, maxbcachebuf));
bufkva_free(bp);
@@ -3036,6 +3052,10 @@
*/
obj = bp->b_bufobj->bo_object;
if (bp->b_npages < desiredpages) {
+ KASSERT(desiredpages <= atop(maxbcachebuf),
+ ("vfs_vmio_extend past maxbcachebuf %p %d %u",
+ bp, desiredpages, maxbcachebuf));
+
/*
* We must allocate system pages since blocking
* here could interfere with paging I/O, no
@@ -3163,7 +3183,7 @@
(vp->v_mount != 0) && /* Only on nodes that have the size info */
(bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
size = vp->v_mount->mnt_stat.f_iosize;
- maxcl = MAXPHYS / size;
+ maxcl = maxphys / size;
BO_RLOCK(bo);
for (i = 1; i < maxcl; i++)
@@ -4853,6 +4873,10 @@
to = round_page(to);
from = round_page(from);
index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ MPASS((bp->b_flags & B_MAXPHYS) == 0);
+ KASSERT(to - from <= maxbcachebuf,
+ ("vm_hold_load_pages too large %p %#jx %#jx %u",
+ bp, (uintmax_t)from, (uintmax_t)to, maxbcachebuf));
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
/*
@@ -4912,12 +4936,12 @@
vm_prot_t prot;
int pidx;
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
prot = VM_PROT_READ;
if (bp->b_iocmd == BIO_READ)
prot |= VM_PROT_WRITE; /* Less backwards than it looks */
if ((pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
- (vm_offset_t)uaddr, len, prot, bp->b_pages,
- btoc(MAXPHYS))) < 0)
+ (vm_offset_t)uaddr, len, prot, bp->b_pages, btoc(maxphys))) < 0)
return (-1);
bp->b_bufsize = len;
bp->b_npages = pidx;
@@ -5398,19 +5422,23 @@
db_printf("\n");
cnt = 0;
total = 0;
- for (j = 0; j < nbuf; j++)
- if (buf[j].b_domain == i && BUF_ISLOCKED(&buf[j])) {
+ for (j = 0; j < nbuf; j++) {
+ bp = nbufp(j);
+ if (bp->b_domain == i && BUF_ISLOCKED(bp)) {
cnt++;
- total += buf[j].b_bufsize;
+ total += bp->b_bufsize;
}
+ }
db_printf("\tLocked buffers: %d space %ld\n", cnt, total);
cnt = 0;
total = 0;
- for (j = 0; j < nbuf; j++)
- if (buf[j].b_domain == i) {
+ for (j = 0; j < nbuf; j++) {
+ bp = nbufp(j);
+ if (bp->b_domain == i) {
cnt++;
- total += buf[j].b_bufsize;
+ total += bp->b_bufsize;
}
+ }
db_printf("\tTotal buffers: %d space %ld\n", cnt, total);
}
}
@@ -5421,7 +5449,7 @@
int i;
for (i = 0; i < nbuf; i++) {
- bp = &buf[i];
+ bp = nbufp(i);
if (BUF_ISLOCKED(bp)) {
db_show_buffer((uintptr_t)bp, 1, 0, NULL);
db_printf("\n");
@@ -5464,7 +5492,7 @@
}
for (i = 0; i < nbuf; i++) {
- bp = &buf[i];
+ bp = nbufp(i);
if (bp->b_qindex == QUEUE_EMPTY)
nfree++;
else
Index: sys/kern/vfs_cluster.c
===================================================================
--- sys/kern/vfs_cluster.c
+++ sys/kern/vfs_cluster.c
@@ -386,6 +386,7 @@
bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT);
if (bp == NULL)
return tbp;
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
/*
* We are synthesizing a buffer out of vm_page_t's, but
@@ -871,6 +872,7 @@
--len;
continue;
}
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
/*
* We got a pbuf to make the cluster in.
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -974,8 +974,8 @@
iosize = vap->va_blocksize;
if (iosize == 0)
iosize = BLKDEV_IOSIZE;
- if (iosize > MAXPHYS)
- iosize = MAXPHYS;
+ if (iosize > maxphys)
+ iosize = maxphys;
buf = malloc(iosize, M_TEMP, M_WAITOK);
#ifdef __notyet__
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -3162,8 +3162,8 @@
struct sbuf *sb;
int error, full = 0, valid_len, max_len;
- /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
- max_len = MAXPHYS - 1;
+ /* Limit initial buffer size to maxphys to avoid DoS from userspace. */
+ max_len = maxphys - 1;
/* Prevent hostile input from being able to crash the system */
if (ifc->ifc_len <= 0)
Index: sys/sys/aio.h
===================================================================
--- sys/sys/aio.h
+++ sys/sys/aio.h
@@ -140,8 +140,8 @@
struct { /* BIO backend */
struct bio *bp; /* (*) BIO pointer */
struct buf *pbuf; /* (*) buffer pointer */
- struct vm_page *pages[btoc(MAXPHYS)+1]; /* (*) */
int npages; /* (*) number of pages */
+ struct vm_page **pages; /* (*) */
};
struct { /* fsync() requests */
int pending; /* (a) number of pending I/O */
Index: sys/sys/buf.h
===================================================================
--- sys/sys/buf.h
+++ sys/sys/buf.h
@@ -141,7 +141,6 @@
TAILQ_HEAD(cluster_list_head, buf) cluster_head;
TAILQ_ENTRY(buf) cluster_entry;
} b_cluster;
- struct vm_page *b_pages[btoc(MAXPHYS)];
int b_npages;
struct workhead b_dep; /* (D) List of filesystem dependencies. */
void *b_fsprivate1;
@@ -156,6 +155,7 @@
#elif defined(BUF_TRACKING)
const char *b_io_tracking;
#endif
+ struct vm_page *b_pages[];
};
#define b_object b_bufobj->bo_object
@@ -234,7 +234,7 @@
#define B_INVALONERR 0x00040000 /* Invalidate on write error. */
#define B_00080000 0x00080000 /* Available flag. */
#define B_00100000 0x00100000 /* Available flag. */
-#define B_00200000 0x00200000 /* Available flag. */
+#define B_MAXPHYS 0x00200000 /* nitems(b_pages[]) = atop(MAXPHYS). */
#define B_RELBUF 0x00400000 /* Release VMIO buffer. */
#define B_FS_FLAG1 0x00800000 /* Available flag for FS use. */
#define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */
@@ -247,7 +247,7 @@
#define B_REMFREE 0x80000000 /* Delayed bremfree */
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
- "\33paging\32infreecnt\31nocopy\30b23\27relbuf\26b21\25b20" \
+ "\33paging\32infreecnt\31nocopy\30b23\27relbuf\26maxphys\25b20" \
"\24b19\23invalonerr\22clusterok\21malloc\20nocache\17b14\16inval" \
"\15reuse\14noreuse\13eintr\12done\11b8\10delwri" \
"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
@@ -496,8 +496,8 @@
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
-extern long maxswzone; /* Max KVA for swap structures */
-extern long maxbcache; /* Max KVA for buffer cache */
+extern u_long maxswzone; /* Max KVA for swap structures */
+extern u_long maxbcache; /* Max KVA for buffer cache */
extern int maxbcachebuf; /* Max buffer cache block size */
extern long runningbufspace;
extern long hibufspace;
Index: sys/sys/param.h
===================================================================
--- sys/sys/param.h
+++ sys/sys/param.h
@@ -160,7 +160,7 @@
#define DFLTPHYS (64 * 1024) /* default max raw I/O transfer size */
#endif
#ifndef MAXPHYS
-#define MAXPHYS (128 * 1024) /* max raw I/O transfer size */
+#define MAXPHYS (1024 * 1024) /* max raw I/O transfer size */
#endif
#ifndef MAXDUMPPGS
#define MAXDUMPPGS (DFLTPHYS/PAGE_SIZE)
Index: sys/sys/systm.h
===================================================================
--- sys/sys/systm.h
+++ sys/sys/systm.h
@@ -74,6 +74,8 @@
extern int ngroups_max; /* max # of supplemental groups */
extern int vm_guest; /* Running as virtual machine guest? */
+extern u_long maxphys;
+
/*
* Detected virtual machine guest types. The intention is to expand
* and/or add to the VM_GUEST_VM type if specific VM functionality is
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- sys/ufs/ffs/ffs_vfsops.c
+++ sys/ufs/ffs/ffs_vfsops.c
@@ -1055,8 +1055,8 @@
BO_UNLOCK(&odevvp->v_bufobj);
if (dev->si_iosize_max != 0)
mp->mnt_iosize_max = dev->si_iosize_max;
- if (mp->mnt_iosize_max > MAXPHYS)
- mp->mnt_iosize_max = MAXPHYS;
+ if (mp->mnt_iosize_max > maxphys)
+ mp->mnt_iosize_max = maxphys;
if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) {
error = EINVAL;
vfs_mount_error(mp,
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -586,7 +586,7 @@
* but it isn't very efficient).
*
* The nsw_cluster_max is constrained by the bp->b_pages[]
- * array, which has MAXPHYS / PAGE_SIZE entries, and our locally
+ * array, which has maxphys / PAGE_SIZE entries, and our locally
* defined MAX_PAGEOUT_CLUSTER. Also be aware that swap ops are
* constrained by the swap device interleave stripe size.
*
@@ -601,7 +601,7 @@
* have one NFS swap device due to the command/ack latency over NFS.
* So it all works out pretty well.
*/
- nsw_cluster_max = min(MAXPHYS / PAGE_SIZE, MAX_PAGEOUT_CLUSTER);
+ nsw_cluster_max = min(maxphys / PAGE_SIZE, MAX_PAGEOUT_CLUSTER);
nsw_wcount_async = 4;
nsw_wcount_async_max = nsw_wcount_async;
@@ -1314,6 +1314,7 @@
VM_OBJECT_WUNLOCK(object);
bp = uma_zalloc(swrbuf_zone, M_WAITOK);
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
/* Pages cannot leave the object while busy. */
for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
MPASS(p->pindex == bm->pindex + i);
@@ -1522,8 +1523,9 @@
VM_OBJECT_WUNLOCK(object);
bp = uma_zalloc(swwbuf_zone, M_WAITOK);
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
if (async)
- bp->b_flags = B_ASYNC;
+ bp->b_flags |= B_ASYNC;
bp->b_flags |= B_PAGING;
bp->b_iocmd = BIO_WRITE;
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -115,7 +115,6 @@
#define PFFOR 4
#define VM_FAULT_READ_DEFAULT (1 + VM_FAULT_READ_AHEAD_INIT)
-#define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX)
#define VM_FAULT_DONTNEED_MIN 1048576
Index: sys/vm/vm_init.c
===================================================================
--- sys/vm/vm_init.c
+++ sys/vm/vm_init.c
@@ -212,7 +212,7 @@
/*
* Allocate the clean map to hold all of I/O virtual memory.
*/
- size = (long)nbuf * BKVASIZE + (long)bio_transient_maxcnt * MAXPHYS;
+ size = (long)nbuf * BKVASIZE + (long)bio_transient_maxcnt * maxphys;
kmi->clean_sva = firstaddr = kva_alloc(size);
kmi->clean_eva = firstaddr + size;
@@ -233,7 +233,7 @@
* And optionally transient bio space.
*/
if (bio_transient_maxcnt != 0) {
- size = (long)bio_transient_maxcnt * MAXPHYS;
+ size = (long)bio_transient_maxcnt * maxphys;
vmem_init(transient_arena, "transient arena",
firstaddr, size, PAGE_SIZE, 0, 0);
firstaddr += size;
Index: sys/vm/vm_map.h
===================================================================
--- sys/vm/vm_map.h
+++ sys/vm/vm_map.h
@@ -396,7 +396,7 @@
*/
#define VM_FAULT_READ_AHEAD_MIN 7
#define VM_FAULT_READ_AHEAD_INIT 15
-#define VM_FAULT_READ_AHEAD_MAX min(atop(MAXPHYS) - 1, UINT8_MAX)
+#define VM_FAULT_READ_AHEAD_MAX min(atop(maxphys) - 1, UINT8_MAX)
/*
* The following "find_space" options are supported by vm_map_find().
Index: sys/vm/vm_pager.c
===================================================================
--- sys/vm/vm_pager.c
+++ sys/vm/vm_pager.c
@@ -183,7 +183,8 @@
{
/* Main zone for paging bufs. */
- pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf),
+ pbuf_zone = uma_zcreate("pbuf",
+ sizeof(struct buf) + atop(maxphys) * sizeof(vm_page_t),
pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE,
UMA_ZONE_NOFREE);
/* Few systems may still use this zone directly, so it needs a limit. */
@@ -384,7 +385,7 @@
bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */
bp->b_data = bp->b_kvabase;
bp->b_xflags = 0;
- bp->b_flags = 0;
+ bp->b_flags = B_MAXPHYS;
bp->b_ioflags = 0;
bp->b_iodone = NULL;
bp->b_error = 0;
@@ -415,10 +416,10 @@
{
struct buf *bp = mem;
- bp->b_kvabase = (void *)kva_alloc(MAXPHYS);
+ bp->b_kvabase = (void *)kva_alloc(maxphys);
if (bp->b_kvabase == NULL)
return (ENOMEM);
- bp->b_kvasize = MAXPHYS;
+ bp->b_kvasize = maxphys;
BUF_LOCKINIT(bp);
LIST_INIT(&bp->b_dep);
bp->b_rcred = bp->b_wcred = NOCRED;
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -817,7 +817,7 @@
KASSERT(foff < object->un_pager.vnp.vnp_size,
("%s: page %p offset beyond vp %p size", __func__, m[0], vp));
- KASSERT(count <= nitems(bp->b_pages),
+ KASSERT(count <= atop(maxphys),
("%s: requested %d pages", __func__, count));
/*
@@ -832,6 +832,7 @@
}
bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK);
+ MPASS((bp->b_flags & B_MAXPHYS) != 0);
/*
* Get the underlying device blocks for the file with VOP_BMAP().
@@ -916,10 +917,10 @@
* Check that total amount of pages fit into buf. Trim rbehind and
* rahead evenly if not.
*/
- if (rbehind + rahead + count > nitems(bp->b_pages)) {
+ if (rbehind + rahead + count > atop(maxphys)) {
int trim, sum;
- trim = rbehind + rahead + count - nitems(bp->b_pages) + 1;
+ trim = rbehind + rahead + count - atop(maxphys) + 1;
sum = rbehind + rahead;
if (rbehind == before) {
/* Roundup rbehind trim to block size. */
@@ -930,9 +931,9 @@
rbehind -= trim * rbehind / sum;
rahead -= trim * rahead / sum;
}
- KASSERT(rbehind + rahead + count <= nitems(bp->b_pages),
- ("%s: behind %d ahead %d count %d", __func__,
- rbehind, rahead, count));
+ KASSERT(rbehind + rahead + count <= atop(maxphys),
+ ("%s: behind %d ahead %d count %d maxphys %lu", __func__,
+ rbehind, rahead, count, maxphys));
/*
* Fill in the bp->b_pages[] array with requested and optional
@@ -1014,7 +1015,7 @@
*a_rahead = bp->b_pgafter;
#ifdef INVARIANTS
- KASSERT(bp->b_npages <= nitems(bp->b_pages),
+ KASSERT(bp->b_npages <= atop(maxphys),
("%s: buf %p overflowed", __func__, bp));
for (int j = 1, prev = 0; j < bp->b_npages; j++) {
if (bp->b_pages[j] == bogus_page)

File Metadata

Mime Type
text/plain
Expires
Fri, Dec 20, 7:17 PM (12 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15534782
Default Alt Text
D27225.id79566.diff (31 KB)

Event Timeline