Index: sys/cam/ata/ata_da.c =================================================================== --- sys/cam/ata/ata_da.c +++ sys/cam/ata/ata_da.c @@ -3447,8 +3447,8 @@ maxio = softc->cpi.maxio; /* Honor max I/O size of SIM */ if (maxio == 0) maxio = DFLTPHYS; /* traditional default */ - else if (maxio > MAXPHYS) - maxio = MAXPHYS; /* for safety */ + else if (maxio > maxphys) + maxio = maxphys; /* for safety */ if (softc->flags & ADA_FLAG_CAN_48BIT) maxio = min(maxio, 65536 * softc->params.secsize); else /* 28bit ATA command limit */ Index: sys/cam/cam_compat.c =================================================================== --- sys/cam/cam_compat.c +++ sys/cam/cam_compat.c @@ -368,7 +368,7 @@ /* Remap the CCB into kernel address space */ bzero(&mapinfo, sizeof(mapinfo)); - cam_periph_mapmem(ccb, &mapinfo, MAXPHYS); + cam_periph_mapmem(ccb, &mapinfo, maxphys); dm = ccb->cdm.matches; /* Translate in-place: old fields are smaller */ Index: sys/cam/cam_periph.c =================================================================== --- sys/cam/cam_periph.c +++ sys/cam/cam_periph.c @@ -772,7 +772,7 @@ * Map user virtual pointers into kernel virtual address space, so we can * access the memory. This is now a generic function that centralizes most * of the sanity checks on the data flags, if any. - * This also only works for up to MAXPHYS memory. Since we use + * This also only works for up to maxphys memory. Since we use * buffers to map stuff in and out, we're limited to the buffer size. */ int @@ -788,8 +788,8 @@ bzero(mapinfo, sizeof(*mapinfo)); if (maxmap == 0) maxmap = DFLTPHYS; /* traditional default */ - else if (maxmap > MAXPHYS) - maxmap = MAXPHYS; /* for safety */ + else if (maxmap > maxphys) + maxmap = maxphys; /* for safety */ switch(ccb->ccb_h.func_code) { case XPT_DEV_MATCH: if (ccb->cdm.match_buf_len == 0) { @@ -813,9 +813,9 @@ } /* * This request will not go to the hardware, no reason - * to be so strict. vmapbuf() is able to map up to MAXPHYS. + * to be so strict. vmapbuf() is able to map up to maxphys. */ - maxmap = MAXPHYS; + maxmap = maxphys; break; case XPT_SCSI_IO: case XPT_CONT_TARGET_IO: @@ -881,9 +881,9 @@ /* * This request will not go to the hardware, no reason - * to be so strict. vmapbuf() is able to map up to MAXPHYS. + * to be so strict. vmapbuf() is able to map up to maxphys. */ - maxmap = MAXPHYS; + maxmap = maxphys; break; default: return(EINVAL); @@ -911,7 +911,7 @@ * boundary. */ misaligned[i] = (lengths[i] + - (((vm_offset_t)(*data_ptrs[i])) & PAGE_MASK) > MAXPHYS); + (((vm_offset_t)(*data_ptrs[i])) & PAGE_MASK) > maxphys); } /* Index: sys/cam/cam_xpt.c =================================================================== --- sys/cam/cam_xpt.c +++ sys/cam/cam_xpt.c @@ -553,7 +553,7 @@ * Map the pattern and match buffers into kernel * virtual address space. */ - error = cam_periph_mapmem(inccb, &mapinfo, MAXPHYS); + error = cam_periph_mapmem(inccb, &mapinfo, maxphys); if (error) { inccb->ccb_h.path = old_path; Index: sys/cam/ctl/ctl_backend_block.c =================================================================== --- sys/cam/ctl/ctl_backend_block.c +++ sys/cam/ctl/ctl_backend_block.c @@ -102,9 +102,11 @@ */ #define CTLBLK_HALF_IO_SIZE (512 * 1024) #define CTLBLK_MAX_IO_SIZE (CTLBLK_HALF_IO_SIZE * 2) -#define CTLBLK_MAX_SEG MIN(CTLBLK_HALF_IO_SIZE, MAXPHYS) -#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MAX_SEG, 1) +#define CTLBLK_MIN_SEG (128 * 1024) +#define CTLBLK_MAX_SEG MIN(CTLBLK_HALF_IO_SIZE, maxphys) +#define CTLBLK_HALF_SEGS MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MIN_SEG, 1) #define CTLBLK_MAX_SEGS (CTLBLK_HALF_SEGS * 2) +#define CTLBLK_NUM_SEGS (CTLBLK_MAX_IO_SIZE / CTLBLK_MAX_SEG) #ifdef CTLBLK_DEBUG #define DPRINTF(fmt, args...) \ @@ -189,10 +191,8 @@ int num_luns; SLIST_HEAD(, ctl_be_block_lun) lun_list; uma_zone_t beio_zone; - uma_zone_t buf_zone; -#if (CTLBLK_MAX_SEG > 131072) - uma_zone_t buf128_zone; -#endif + uma_zone_t bufmin_zone; + uma_zone_t bufmax_zone; }; static struct ctl_be_block_softc backend_block_softc; @@ -307,12 +307,13 @@ size_t len) { -#if (CTLBLK_MAX_SEG > 131072) - if (len <= 131072) - sg->addr = uma_zalloc(softc->buf128_zone, M_WAITOK); - else -#endif - sg->addr = uma_zalloc(softc->buf_zone, M_WAITOK); + if (len <= CTLBLK_MIN_SEG) { + sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK); + } else { + KASSERT(len <= CTLBLK_MAX_SEG, + ("Too large alloc %lu > %lu", len, CTLBLK_MAX_SEG)); + sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK); + } sg->len = len; } @@ -320,12 +321,13 @@ ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg) { -#if (CTLBLK_MAX_SEG > 131072) - if (sg->len <= 131072) - uma_zfree(softc->buf128_zone, sg->addr); - else -#endif - uma_zfree(softc->buf_zone, sg->addr); + if (sg->len <= CTLBLK_MIN_SEG) { + uma_zfree(softc->bufmin_zone, sg->addr); + } else { + KASSERT(sg->len <= CTLBLK_MAX_SEG, + ("Too large free %lu > %lu", sg->len, CTLBLK_MAX_SEG)); + uma_zfree(softc->bufmax_zone, sg->addr); + } } static struct ctl_be_block_io * @@ -1344,7 +1346,7 @@ else pbo = 0; len_left = (uint64_t)lbalen->len * cbe_lun->blocksize; - for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) { + for (i = 0, lba = 0; i < CTLBLK_NUM_SEGS && len_left > 0; i++) { /* * Setup the S/G entry for this chunk. */ @@ -1631,7 +1633,7 @@ * Setup the S/G entry for this chunk. */ ctl_alloc_seg(softc, &beio->sg_segs[i], - min(CTLBLK_MAX_SEG, len_left)); + MIN(CTLBLK_MAX_SEG, len_left)); DPRINTF("segment %d addr %p len %zd\n", i, beio->sg_segs[i].addr, beio->sg_segs[i].len); @@ -2802,12 +2804,11 @@ mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF); softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - softc->buf_zone = uma_zcreate("ctlblock", CTLBLK_MAX_SEG, + softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG, NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); -#if (CTLBLK_MAX_SEG > 131072) - softc->buf128_zone = uma_zcreate("ctlblock128", 131072, - NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); -#endif + if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) + softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG, + NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0); SLIST_INIT(&softc->lun_list); return (0); } @@ -2832,10 +2833,9 @@ mtx_lock(&softc->lock); } mtx_unlock(&softc->lock); - uma_zdestroy(softc->buf_zone); -#if (CTLBLK_MAX_SEG > 131072) - uma_zdestroy(softc->buf128_zone); -#endif + uma_zdestroy(softc->bufmin_zone); + if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG) + uma_zdestroy(softc->bufmax_zone); uma_zdestroy(softc->beio_zone); mtx_destroy(&softc->lock); sx_destroy(&softc->modify_lock); Index: sys/cam/mmc/mmc_da.c =================================================================== --- sys/cam/mmc/mmc_da.c +++ sys/cam/mmc/mmc_da.c @@ -1587,7 +1587,7 @@ part->disk->d_name = part->name; part->disk->d_drv1 = part; part->disk->d_maxsize = - MIN(MAXPHYS, sdda_get_max_data(periph, + MIN(maxphys, sdda_get_max_data(periph, (union ccb *)&cpi) * mmc_get_sector_size(periph)); part->disk->d_unit = cnt; part->disk->d_flags = 0; Index: sys/cam/nvme/nvme_da.c =================================================================== --- sys/cam/nvme/nvme_da.c +++ sys/cam/nvme/nvme_da.c @@ -906,8 +906,8 @@ maxio = cpi.maxio; /* Honor max I/O size of SIM */ if (maxio == 0) maxio = DFLTPHYS; /* traditional default */ - else if (maxio > MAXPHYS) - maxio = MAXPHYS; /* for safety */ + else if (maxio > maxphys) + maxio = maxphys; /* for safety */ disk->d_maxsize = maxio; flbas_fmt = (nsd->flbas >> NVME_NS_DATA_FLBAS_FORMAT_SHIFT) & NVME_NS_DATA_FLBAS_FORMAT_MASK; Index: sys/cam/scsi/scsi_cd.c =================================================================== --- sys/cam/scsi/scsi_cd.c +++ sys/cam/scsi/scsi_cd.c @@ -696,8 +696,8 @@ softc->disk->d_drv1 = periph; if (cpi.maxio == 0) softc->disk->d_maxsize = DFLTPHYS; /* traditional default */ - else if (cpi.maxio > MAXPHYS) - softc->disk->d_maxsize = MAXPHYS; /* for safety */ + else if (cpi.maxio > maxphys) + softc->disk->d_maxsize = maxphys; /* for safety */ else softc->disk->d_maxsize = cpi.maxio; softc->disk->d_flags = 0; Index: sys/cam/scsi/scsi_da.c =================================================================== --- sys/cam/scsi/scsi_da.c +++ sys/cam/scsi/scsi_da.c @@ -2921,8 +2921,8 @@ softc->disk->d_drv1 = periph; if (cpi.maxio == 0) softc->maxio = DFLTPHYS; /* traditional default */ - else if (cpi.maxio > MAXPHYS) - softc->maxio = MAXPHYS; /* for safety */ + else if (cpi.maxio > maxphys) + softc->maxio = maxphys; /* for safety */ else softc->maxio = cpi.maxio; if (softc->quirks & DA_Q_128KB) @@ -4819,7 +4819,7 @@ if (maxsector == 0) maxsector = -1; } - if (block_size >= MAXPHYS) { + if (block_size >= maxphys) { xpt_print(periph->path, "unsupportable block size %ju\n", (uintmax_t) block_size); Index: sys/cam/scsi/scsi_pass.c =================================================================== --- sys/cam/scsi/scsi_pass.c +++ sys/cam/scsi/scsi_pass.c @@ -583,15 +583,15 @@ periph->periph_name, periph->unit_number); snprintf(softc->io_zone_name, sizeof(softc->io_zone_name), "%s%dIO", periph->periph_name, periph->unit_number); - softc->io_zone_size = MAXPHYS; + softc->io_zone_size = maxphys; knlist_init_mtx(&softc->read_select.si_note, cam_periph_mtx(periph)); xpt_path_inq(&cpi, periph->path); if (cpi.maxio == 0) softc->maxio = DFLTPHYS; /* traditional default */ - else if (cpi.maxio > MAXPHYS) - softc->maxio = MAXPHYS; /* for safety */ + else if (cpi.maxio > maxphys) + softc->maxio = maxphys; /* for safety */ else softc->maxio = cpi.maxio; /* real value */ @@ -1507,7 +1507,7 @@ /* * We allocate buffers in io_zone_size increments for an - * S/G list. This will generally be MAXPHYS. + * S/G list. This will generally be maxphys. */ if (lengths[0] <= softc->io_zone_size) num_segs_needed = 1; Index: sys/cam/scsi/scsi_sa.c =================================================================== --- sys/cam/scsi/scsi_sa.c +++ sys/cam/scsi/scsi_sa.c @@ -2447,12 +2447,12 @@ /* * If maxio isn't set, we fall back to DFLTPHYS. Otherwise we take - * the smaller of cpi.maxio or MAXPHYS. + * the smaller of cpi.maxio or maxphys. */ if (cpi.maxio == 0) softc->maxio = DFLTPHYS; - else if (cpi.maxio > MAXPHYS) - softc->maxio = MAXPHYS; + else if (cpi.maxio > maxphys) + softc->maxio = maxphys; else softc->maxio = cpi.maxio; Index: sys/cam/scsi/scsi_sg.c =================================================================== --- sys/cam/scsi/scsi_sg.c +++ sys/cam/scsi/scsi_sg.c @@ -327,8 +327,8 @@ if (cpi.maxio == 0) softc->maxio = DFLTPHYS; /* traditional default */ - else if (cpi.maxio > MAXPHYS) - softc->maxio = MAXPHYS; /* for safety */ + else if (cpi.maxio > maxphys) + softc->maxio = maxphys; /* for safety */ else softc->maxio = cpi.maxio; /* real value */ Index: sys/cam/scsi/scsi_target.c =================================================================== --- sys/cam/scsi/scsi_target.c +++ sys/cam/scsi/scsi_target.c @@ -404,8 +404,8 @@ } if (cpi.maxio == 0) softc->maxio = DFLTPHYS; /* traditional default */ - else if (cpi.maxio > MAXPHYS) - softc->maxio = MAXPHYS; /* for safety */ + else if (cpi.maxio > maxphys) + softc->maxio = maxphys; /* for safety */ else softc->maxio = cpi.maxio; /* real value */ Index: sys/dev/ahci/ahci.h =================================================================== --- sys/dev/ahci/ahci.h +++ sys/dev/ahci/ahci.h @@ -310,13 +310,8 @@ #define AHCI_P_DEVSLP_DM 0x0e000000 #define AHCI_P_DEVSLP_DM_SHIFT 25 -/* Just to be sure, if building as module. */ -#if MAXPHYS < 512 * 1024 -#undef MAXPHYS -#define MAXPHYS 512 * 1024 -#endif /* Pessimistic prognosis on number of required S/G entries */ -#define AHCI_SG_ENTRIES (roundup(btoc(MAXPHYS) + 1, 8)) +#define AHCI_SG_ENTRIES MIN(roundup(btoc(maxphys) + 1, 8), 65528) /* Command list. 32 commands. First, 1Kbyte aligned. */ #define AHCI_CL_OFFSET 0 #define AHCI_CL_SIZE 32 @@ -344,7 +339,7 @@ u_int8_t cfis[64]; u_int8_t acmd[32]; u_int8_t reserved[32]; - struct ahci_dma_prd prd_tab[AHCI_SG_ENTRIES]; + struct ahci_dma_prd prd_tab[]; } __packed; struct ahci_cmd_list { @@ -394,6 +389,7 @@ struct ahci_channel *ch; /* Channel */ u_int8_t slot; /* Number of this slot */ enum ahci_slot_states state; /* Slot state */ + u_int ct_offset; /* cmd_tab offset */ union ccb *ccb; /* CCB occupying slot */ struct ata_dmaslot dma; /* DMA data of this slot */ struct callout timeout; /* Execution timeout */ Index: sys/dev/ahci/ahci.c =================================================================== --- sys/dev/ahci/ahci.c +++ sys/dev/ahci/ahci.c @@ -1124,8 +1124,7 @@ error = bus_dma_tag_create(bus_get_dma_tag(dev), 2, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, - AHCI_SG_ENTRIES * PAGE_SIZE * ch->numslots, - AHCI_SG_ENTRIES, AHCI_PRD_MAX, + AHCI_SG_ENTRIES * PAGE_SIZE, AHCI_SG_ENTRIES, AHCI_PRD_MAX, 0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag); if (error != 0) goto error; @@ -1187,6 +1186,7 @@ slot->ch = ch; slot->slot = i; slot->state = AHCI_SLOT_EMPTY; + slot->ct_offset = AHCI_CT_OFFSET + AHCI_CT_SIZE * i; slot->ccb = NULL; callout_init_mtx(&slot->timeout, &ch->mtx, 0); @@ -1642,8 +1642,7 @@ } KASSERT(nsegs <= AHCI_SG_ENTRIES, ("too many DMA segment entries\n")); /* Get a piece of the workspace for this request */ - ctp = (struct ahci_cmd_tab *) - (ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); + ctp = (struct ahci_cmd_tab *)(ch->dma.work + slot->ct_offset); /* Fill S/G table */ prd = &ctp->prd_tab[0]; for (i = 0; i < nsegs; i++) { @@ -1672,8 +1671,7 @@ uint16_t cmd_flags; /* Get a piece of the workspace for this request */ - ctp = (struct ahci_cmd_tab *) - (ch->dma.work + AHCI_CT_OFFSET + (AHCI_CT_SIZE * slot->slot)); + ctp = (struct ahci_cmd_tab *)(ch->dma.work + slot->ct_offset); /* Setup the FIS for this request */ if (!(fis_size = ahci_setup_fis(ch, ctp, ccb, slot->slot))) { device_printf(ch->dev, "Setting up SATA FIS failed\n"); @@ -1710,8 +1708,7 @@ softreset = 0; clp->bytecount = 0; clp->cmd_flags = htole16(cmd_flags); - clp->cmd_table_phys = htole64(ch->dma.work_bus + AHCI_CT_OFFSET + - (AHCI_CT_SIZE * slot->slot)); + clp->cmd_table_phys = htole64(ch->dma.work_bus + slot->ct_offset); bus_dmamap_sync(ch->dma.work_tag, ch->dma.work_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(ch->dma.rfis_tag, ch->dma.rfis_map, @@ -2868,7 +2865,7 @@ cpi->transport_version = XPORT_VERSION_UNSPECIFIED; cpi->protocol = PROTO_ATA; cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; - cpi->maxio = MAXPHYS; + cpi->maxio = ctob(AHCI_SG_ENTRIES - 1); /* ATI SB600 can't handle 256 sectors with FPDMA (NCQ). */ if (ch->quirks & AHCI_Q_MAXIO_64K) cpi->maxio = min(cpi->maxio, 128 * 512); Index: sys/dev/ahci/ahciem.c =================================================================== --- sys/dev/ahci/ahciem.c +++ sys/dev/ahci/ahciem.c @@ -641,7 +641,7 @@ cpi->transport_version = XPORT_VERSION_UNSPECIFIED; cpi->protocol = PROTO_ATA; cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; - cpi->maxio = MAXPHYS; + cpi->maxio = maxphys; cpi->hba_vendor = pci_get_vendor(parent); cpi->hba_device = pci_get_device(parent); cpi->hba_subvendor = pci_get_subvendor(parent); Index: sys/dev/ata/ata-all.h =================================================================== --- sys/dev/ata/ata-all.h +++ sys/dev/ata/ata-all.h @@ -152,7 +152,7 @@ #define ATA_SACTIVE 16 /* DMA register defines */ -#define ATA_DMA_ENTRIES 256 +#define ATA_DMA_ENTRIES MAX(17, btoc(maxphys) + 1) #define ATA_DMA_EOT 0x80000000 #define ATA_BMCMD_PORT 17 Index: sys/dev/ata/ata-all.c =================================================================== --- sys/dev/ata/ata-all.c +++ sys/dev/ata/ata-all.c @@ -139,7 +139,7 @@ if (ch->flags & ATA_SATA) ch->user[i].bytecount = 8192; else - ch->user[i].bytecount = MAXPHYS; + ch->user[i].bytecount = 65536; ch->user[i].caps = 0; ch->curr[i] = ch->user[i]; if (ch->flags & ATA_SATA) { Index: sys/dev/ata/ata-dma.c =================================================================== --- sys/dev/ata/ata-dma.c +++ sys/dev/ata/ata-dma.c @@ -87,7 +87,7 @@ if (ch->dma.segsize == 0) ch->dma.segsize = 65536; if (ch->dma.max_iosize == 0) - ch->dma.max_iosize = MIN((ATA_DMA_ENTRIES - 1) * PAGE_SIZE, MAXPHYS); + ch->dma.max_iosize = (ATA_DMA_ENTRIES - 1) * PAGE_SIZE; if (ch->dma.max_address == 0) ch->dma.max_address = BUS_SPACE_MAXADDR_32BIT; if (ch->dma.dma_slots == 0) Index: sys/dev/md/md.c =================================================================== --- sys/dev/md/md.c +++ sys/dev/md/md.c @@ -960,9 +960,10 @@ piov = auio.uio_iov; } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { pb = uma_zalloc(md_pbuf_zone, M_WAITOK); + MPASS((pb->b_flags & B_MAXPHYS) != 0); bp->bio_resid = len; unmapped_step: - npages = atop(min(MAXPHYS, round_page(len + (ma_offs & + npages = atop(min(maxphys, round_page(len + (ma_offs & PAGE_MASK)))); iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); KASSERT(iolen > 0, ("zero iolen")); @@ -1684,7 +1685,7 @@ sectsize = DEV_BSIZE; else sectsize = mdr->md_sectorsize; - if (sectsize > MAXPHYS || mdr->md_mediasize < sectsize) + if (sectsize > maxphys || mdr->md_mediasize < sectsize) return (EINVAL); if (mdr->md_options & MD_AUTOUNIT) sc = mdnew(-1, &error, mdr->md_type); Index: sys/dev/mpr/mpr.c =================================================================== --- sys/dev/mpr/mpr.c +++ sys/dev/mpr/mpr.c @@ -436,14 +436,14 @@ /* * If I/O size limitation requested then use it and pass up to CAM. - * If not, use MAXPHYS as an optimization hint, but report HW limit. + * If not, use maxphys as an optimization hint, but report HW limit. */ if (sc->max_io_pages > 0) { maxio = min(maxio, sc->max_io_pages * PAGE_SIZE); sc->maxio = maxio; } else { sc->maxio = maxio; - maxio = min(maxio, MAXPHYS); + maxio = min(maxio, maxphys); } sc->num_chains = (maxio / PAGE_SIZE + sges_per_frame - 2) / Index: sys/dev/mps/mps.c =================================================================== --- sys/dev/mps/mps.c +++ sys/dev/mps/mps.c @@ -418,14 +418,14 @@ /* * If I/O size limitation requested, then use it and pass up to CAM. - * If not, use MAXPHYS as an optimization hint, but report HW limit. + * If not, use maxphys as an optimization hint, but report HW limit. */ if (sc->max_io_pages > 0) { maxio = min(maxio, sc->max_io_pages * PAGE_SIZE); sc->maxio = maxio; } else { sc->maxio = maxio; - maxio = min(maxio, MAXPHYS); + maxio = min(maxio, maxphys); } sc->num_chains = (maxio / PAGE_SIZE + sges_per_frame - 2) / Index: sys/dev/mpt/mpt.h =================================================================== --- sys/dev/mpt/mpt.h +++ sys/dev/mpt/mpt.h @@ -668,7 +668,7 @@ bus_addr_t request_phys; /* BusAddr of request memory */ uint32_t max_seg_cnt; /* calculated after IOC facts */ - uint32_t max_cam_seg_cnt;/* calculated from MAXPHYS*/ + uint32_t max_cam_seg_cnt;/* calculated from maxphys */ /* * Hardware management Index: sys/dev/mpt/mpt.c =================================================================== --- sys/dev/mpt/mpt.c +++ sys/dev/mpt/mpt.c @@ -2691,7 +2691,7 @@ /* * Use this as the basis for reporting the maximum I/O size to CAM. */ - mpt->max_cam_seg_cnt = min(mpt->max_seg_cnt, (MAXPHYS / PAGE_SIZE) + 1); + mpt->max_cam_seg_cnt = min(mpt->max_seg_cnt, btoc(maxphys) + 1); /* XXX Lame Locking! */ MPT_UNLOCK(mpt); Index: sys/dev/mvs/mvs.h =================================================================== --- sys/dev/mvs/mvs.h +++ sys/dev/mvs/mvs.h @@ -392,7 +392,7 @@ #define MVS_MAX_SLOTS 32 /* Pessimistic prognosis on number of required S/G entries */ -#define MVS_SG_ENTRIES (btoc(MAXPHYS) + 1) +#define MVS_SG_ENTRIES (btoc(maxphys) + 1) /* EDMA Command Request Block (CRQB) Data */ struct mvs_crqb { @@ -505,6 +505,7 @@ int slot; /* Number of this slot */ int tag; /* Used command tag */ enum mvs_slot_states state; /* Slot state */ + u_int eprd_offset; /* EPRD offset */ union ccb *ccb; /* CCB occupying slot */ struct ata_dmaslot dma; /* DMA data of this slot */ struct callout timeout; /* Execution timeout */ Index: sys/dev/mvs/mvs.c =================================================================== --- sys/dev/mvs/mvs.c +++ sys/dev/mvs/mvs.c @@ -370,8 +370,7 @@ if (bus_dma_tag_create(bus_get_dma_tag(dev), 2, MVS_EPRD_MAX, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, - MVS_SG_ENTRIES * PAGE_SIZE * MVS_MAX_SLOTS, - MVS_SG_ENTRIES, MVS_EPRD_MAX, + MVS_SG_ENTRIES * PAGE_SIZE, MVS_SG_ENTRIES, MVS_EPRD_MAX, 0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag)) { goto error; } @@ -438,6 +437,7 @@ slot->dev = dev; slot->slot = i; slot->state = MVS_SLOT_EMPTY; + slot->eprd_offset = MVS_EPRD_OFFSET + MVS_EPRD_SIZE * i; slot->ccb = NULL; callout_init_mtx(&slot->timeout, &ch->mtx, 0); @@ -1286,8 +1286,7 @@ } else { slot->dma.addr = 0; /* Get a piece of the workspace for this EPRD */ - eprd = (struct mvs_eprd *) - (ch->dma.workrq + MVS_EPRD_OFFSET + (MVS_EPRD_SIZE * slot->slot)); + eprd = (struct mvs_eprd *)(ch->dma.workrq + slot->eprd_offset); /* Fill S/G table */ for (i = 0; i < nsegs; i++) { eprd[i].prdbal = htole32(segs[i].ds_addr); @@ -1405,8 +1404,7 @@ DELAY(10); if (ch->basic_dma) { /* Start basic DMA. */ - eprd = ch->dma.workrq_bus + MVS_EPRD_OFFSET + - (MVS_EPRD_SIZE * slot->slot); + eprd = ch->dma.workrq_bus + slot->eprd_offset; ATA_OUTL(ch->r_mem, DMA_DTLBA, eprd); ATA_OUTL(ch->r_mem, DMA_DTHBA, (eprd >> 16) >> 16); ATA_OUTL(ch->r_mem, DMA_C, DMA_C_START | @@ -1433,7 +1431,7 @@ int i; /* Get address of the prepared EPRD */ - eprd = ch->dma.workrq_bus + MVS_EPRD_OFFSET + (MVS_EPRD_SIZE * slot->slot); + eprd = ch->dma.workrq_bus + slot->eprd_offset; /* Prepare CRQB. Gen IIe uses different CRQB format. */ if (ch->quirks & MVS_Q_GENIIE) { crqb2e = (struct mvs_crqb_gen2e *) @@ -2423,7 +2421,7 @@ cpi->transport_version = XPORT_VERSION_UNSPECIFIED; cpi->protocol = PROTO_ATA; cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; - cpi->maxio = MAXPHYS; + cpi->maxio = maxphys; if ((ch->quirks & MVS_Q_SOC) == 0) { cpi->hba_vendor = pci_get_vendor(parent); cpi->hba_device = pci_get_device(parent); Index: sys/dev/siis/siis.h =================================================================== --- sys/dev/siis/siis.h +++ sys/dev/siis/siis.h @@ -263,18 +263,12 @@ #define SIIS_OFFSET 0x100 #define SIIS_STEP 0x80 -/* Just to be sure, if building as module. */ -#if MAXPHYS < 512 * 1024 -#undef MAXPHYS -#define MAXPHYS 512 * 1024 -#endif /* Pessimistic prognosis on number of required S/G entries */ -#define SIIS_SG_ENTRIES (roundup(btoc(MAXPHYS), 4) + 1) -/* Command tables. Up to 32 commands, Each, 128byte aligned. */ -#define SIIS_CT_OFFSET 0 -#define SIIS_CT_SIZE (32 + 16 + SIIS_SG_ENTRIES * 16) +#define SIIS_SG_ENTRIES (roundup(btoc(maxphys), 4) + 1) +/* Port Request Block + S/G entries. 128byte aligned. */ +#define SIIS_PRB_SIZE (32 + 16 + SIIS_SG_ENTRIES * 16) /* Total main work area. */ -#define SIIS_WORK_SIZE (SIIS_CT_OFFSET + SIIS_CT_SIZE * SIIS_MAX_SLOTS) +#define SIIS_WORK_SIZE (SIIS_PRB_SIZE * SIIS_MAX_SLOTS) struct siis_dma_prd { u_int64_t dba; @@ -287,12 +281,12 @@ } __packed; struct siis_cmd_ata { - struct siis_dma_prd prd[1 + SIIS_SG_ENTRIES]; + struct siis_dma_prd prd[2]; } __packed; struct siis_cmd_atapi { u_int8_t ccb[16]; - struct siis_dma_prd prd[SIIS_SG_ENTRIES]; + struct siis_dma_prd prd[1]; } __packed; struct siis_cmd { @@ -349,6 +343,7 @@ device_t dev; /* Device handle */ u_int8_t slot; /* Number of this slot */ enum siis_slot_states state; /* Slot state */ + u_int prb_offset; /* PRB offset */ union ccb *ccb; /* CCB occupying slot */ struct ata_dmaslot dma; /* DMA data of this slot */ struct callout timeout; /* Execution timeout */ Index: sys/dev/siis/siis.c =================================================================== --- sys/dev/siis/siis.c +++ sys/dev/siis/siis.c @@ -688,8 +688,7 @@ if (bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, - SIIS_SG_ENTRIES * PAGE_SIZE * SIIS_MAX_SLOTS, - SIIS_SG_ENTRIES, 0xFFFFFFFF, + SIIS_SG_ENTRIES * PAGE_SIZE, SIIS_SG_ENTRIES, 0xFFFFFFFF, 0, busdma_lock_mutex, &ch->mtx, &ch->dma.data_tag)) { goto error; } @@ -745,6 +744,7 @@ slot->dev = dev; slot->slot = i; slot->state = SIIS_SLOT_EMPTY; + slot->prb_offset = SIIS_PRB_SIZE * i; slot->ccb = NULL; callout_init_mtx(&slot->timeout, &ch->mtx, 0); @@ -1034,8 +1034,7 @@ slot->dma.nsegs = nsegs; if (nsegs != 0) { /* Get a piece of the workspace for this request */ - ctp = (struct siis_cmd *)(ch->dma.work + SIIS_CT_OFFSET + - (SIIS_CT_SIZE * slot->slot)); + ctp = (struct siis_cmd *)(ch->dma.work + slot->prb_offset); /* Fill S/G table */ if (slot->ccb->ccb_h.func_code == XPT_ATA_IO) prd = &ctp->u.ata.prd[0]; @@ -1066,8 +1065,7 @@ mtx_assert(&ch->mtx, MA_OWNED); /* Get a piece of the workspace for this request */ - ctp = (struct siis_cmd *) - (ch->dma.work + SIIS_CT_OFFSET + (SIIS_CT_SIZE * slot->slot)); + ctp = (struct siis_cmd *)(ch->dma.work + slot->prb_offset); ctp->control = 0; ctp->protocol_override = 0; ctp->transfer_count = 0; @@ -1117,8 +1115,7 @@ /* Issue command to the controller. */ slot->state = SIIS_SLOT_RUNNING; ch->rslots |= (1 << slot->slot); - prb_bus = ch->dma.work_bus + - SIIS_CT_OFFSET + (SIIS_CT_SIZE * slot->slot); + prb_bus = ch->dma.work_bus + slot->prb_offset; ATA_OUTL(ch->r_mem, SIIS_P_CACTL(slot->slot), prb_bus); ATA_OUTL(ch->r_mem, SIIS_P_CACTH(slot->slot), prb_bus >> 32); /* Start command execution timeout */ @@ -1967,7 +1964,7 @@ cpi->transport_version = XPORT_VERSION_UNSPECIFIED; cpi->protocol = PROTO_ATA; cpi->protocol_version = PROTO_VERSION_UNSPECIFIED; - cpi->maxio = MAXPHYS; + cpi->maxio = maxphys; cpi->hba_vendor = pci_get_vendor(parent); cpi->hba_device = pci_get_device(parent); cpi->hba_subvendor = pci_get_subvendor(parent); Index: sys/fs/cd9660/cd9660_vfsops.c =================================================================== --- sys/fs/cd9660/cd9660_vfsops.c +++ sys/fs/cd9660/cd9660_vfsops.c @@ -238,8 +238,8 @@ goto out; if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; - if (mp->mnt_iosize_max > MAXPHYS) - mp->mnt_iosize_max = MAXPHYS; + if (mp->mnt_iosize_max > maxphys) + mp->mnt_iosize_max = maxphys; bo = &devvp->v_bufobj; Index: sys/fs/ext2fs/ext2_vfsops.c =================================================================== --- sys/fs/ext2fs/ext2_vfsops.c +++ sys/fs/ext2fs/ext2_vfsops.c @@ -876,8 +876,8 @@ bo->bo_ops = g_vfs_bufops; if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; - if (mp->mnt_iosize_max > MAXPHYS) - mp->mnt_iosize_max = MAXPHYS; + if (mp->mnt_iosize_max > maxphys) + mp->mnt_iosize_max = maxphys; bp = NULL; ump = NULL; Index: sys/fs/fuse/fuse_vfsops.c =================================================================== --- sys/fs/fuse/fuse_vfsops.c +++ sys/fs/fuse/fuse_vfsops.c @@ -441,7 +441,7 @@ } memset(mp->mnt_stat.f_mntfromname, 0, MNAMELEN); strlcpy(mp->mnt_stat.f_mntfromname, fspec, MNAMELEN); - mp->mnt_iosize_max = MAXPHYS; + mp->mnt_iosize_max = maxphys; /* Now handshaking with daemon */ fuse_internal_send_init(data, td); Index: sys/fs/msdosfs/msdosfs_vfsops.c =================================================================== --- sys/fs/msdosfs/msdosfs_vfsops.c +++ sys/fs/msdosfs/msdosfs_vfsops.c @@ -429,8 +429,8 @@ VOP_UNLOCK(devvp); if (dev->si_iosize_max != 0) mp->mnt_iosize_max = dev->si_iosize_max; - if (mp->mnt_iosize_max > MAXPHYS) - mp->mnt_iosize_max = MAXPHYS; + if (mp->mnt_iosize_max > maxphys) + mp->mnt_iosize_max = maxphys; /* * Read the boot sector of the filesystem, and then check the Index: sys/fs/udf/udf_vfsops.c =================================================================== --- sys/fs/udf/udf_vfsops.c +++ sys/fs/udf/udf_vfsops.c @@ -338,8 +338,8 @@ if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; - if (mp->mnt_iosize_max > MAXPHYS) - mp->mnt_iosize_max = MAXPHYS; + if (mp->mnt_iosize_max > maxphys) + mp->mnt_iosize_max = maxphys; /* XXX: should be M_WAITOK */ udfmp = malloc(sizeof(struct udf_mnt), M_UDFMOUNT, Index: sys/kern/kern_mib.c =================================================================== --- sys/kern/kern_mib.c +++ sys/kern/kern_mib.c @@ -146,8 +146,29 @@ SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW | CTLFLAG_MPSAFE, kernelname, sizeof kernelname, "Name of kernel file booted"); -SYSCTL_INT(_kern, KERN_MAXPHYS, maxphys, CTLFLAG_RD | CTLFLAG_CAPRD, - SYSCTL_NULL_INT_PTR, MAXPHYS, "Maximum block I/O access size"); +#ifdef COMPAT_FREEBSD12 +static int +sysctl_maxphys(SYSCTL_HANDLER_ARGS) +{ + u_long lvalue; + int ivalue; + + lvalue = maxphys; + if (sizeof(int) == sizeof(u_long) || req->oldlen >= sizeof(u_long)) + return (sysctl_handle_long(oidp, &lvalue, 0, req)); + if (lvalue > INT_MAX) + return (sysctl_handle_long(oidp, &lvalue, 0, req)); + ivalue = lvalue; + return (sysctl_handle_int(oidp, &ivalue, 0, req)); +} +SYSCTL_PROC(_kern, KERN_MAXPHYS, maxphys, CTLTYPE_LONG | CTLFLAG_RDTUN | + CTLFLAG_NOFETCH | CTLFLAG_CAPRD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_maxphys, "UL", "Maximum block I/O access size"); +#else +SYSCTL_ULONG(_kern, KERN_MAXPHYS, maxphys, + CTLFLAG_RDTUN | CTLFLAG_NOFETCH | CTLFLAG_CAPRD, + &maxphys, 0, "Maximum block I/O access size"); +#endif SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_ncpus, 0, "Number of active CPUs"); Index: sys/kern/kern_physio.c =================================================================== --- sys/kern/kern_physio.c +++ sys/kern/kern_physio.c @@ -69,7 +69,7 @@ * need to reject any requests that will not fit into one buffer. */ if (dev->si_flags & SI_NOSPLIT && - (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS || + (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > maxphys || uio->uio_iovcnt > 1)) { /* * Tell the user why his I/O was rejected. @@ -78,10 +78,10 @@ uprintf("%s: request size=%zd > si_iosize_max=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, dev->si_iosize_max); - if (uio->uio_resid > MAXPHYS) - uprintf("%s: request size=%zd > MAXPHYS=%d; " + if (uio->uio_resid > maxphys) + uprintf("%s: request size=%zd > maxphys=%lu; " "cannot split request\n", devtoname(dev), - uio->uio_resid, MAXPHYS); + uio->uio_resid, maxphys); if (uio->uio_iovcnt > 1) uprintf("%s: request vectors=%d > 1; " "cannot split request\n", devtoname(dev), @@ -101,12 +101,13 @@ pages = NULL; } else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { pbuf = NULL; - maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1; + maxpages = btoc(MIN(uio->uio_resid, maxphys)) + 1; pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK); } else { pbuf = uma_zalloc(pbuf_zone, M_WAITOK); + MPASS((pbuf->b_flags & B_MAXPHYS) != 0); sa = pbuf->b_data; - maxpages = btoc(MAXPHYS); + maxpages = btoc(maxphys); pages = pbuf->b_pages; } prot = VM_PROT_READ; @@ -144,13 +145,13 @@ bp->bio_length = uio->uio_iov[i].iov_len; if (bp->bio_length > dev->si_iosize_max) bp->bio_length = dev->si_iosize_max; - if (bp->bio_length > MAXPHYS) - bp->bio_length = MAXPHYS; + if (bp->bio_length > maxphys) + bp->bio_length = maxphys; /* * Make sure the pbuf can map the request. - * The pbuf has kvasize = MAXPHYS, so a request - * larger than MAXPHYS - PAGE_SIZE must be + * The pbuf has kvasize = maxphys, so a request + * larger than maxphys - PAGE_SIZE must be * page aligned or it will be fragmented. */ poff = (vm_offset_t)base & PAGE_MASK; Index: sys/kern/kern_sendfile.c =================================================================== --- sys/kern/kern_sendfile.c +++ sys/kern/kern_sendfile.c @@ -885,7 +885,7 @@ * do any heuristics and use exactly the value supplied by * application. Otherwise, we allow readahead up to "rem". * If application wants more, let it be, but there is no - * reason to go above MAXPHYS. Also check against "obj_size", + * reason to go above maxphys. Also check against "obj_size", * since vm_pager_has_page() can hint beyond EOF. */ if (flags & SF_USER_READAHEAD) { @@ -895,7 +895,7 @@ npages; rhpages += SF_READAHEAD(flags); } - rhpages = min(howmany(MAXPHYS, PAGE_SIZE), rhpages); + rhpages = min(howmany(maxphys, PAGE_SIZE), rhpages); rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) - npages, rhpages); Index: sys/kern/subr_param.c =================================================================== --- sys/kern/subr_param.c +++ sys/kern/subr_param.c @@ -99,9 +99,10 @@ int ngroups_max; /* max # groups per process */ int nswbuf; pid_t pid_max = PID_MAX; -long maxswzone; /* max swmeta KVA storage */ -long maxbcache; /* max buffer cache KVA storage */ -long maxpipekva; /* Limit on pipe KVA */ +u_long maxswzone; /* max swmeta KVA storage */ +u_long maxbcache; /* max buffer cache KVA storage */ +u_long maxpipekva; /* Limit on pipe KVA */ +u_long maxphys; int vm_guest = VM_GUEST_NO; /* Running as virtual machine guest? */ u_long maxtsiz; /* max text size */ u_long dfldsiz; /* initial data size limit */ @@ -289,6 +290,8 @@ nbuf = NBUF; TUNABLE_INT_FETCH("kern.nbuf", &nbuf); TUNABLE_INT_FETCH("kern.bio_transient_maxcnt", &bio_transient_maxcnt); + maxphys = MAXPHYS; + TUNABLE_ULONG_FETCH("kern.maxphys", &maxphys); /* * Physical buffers are pre-allocated buffers (struct buf) that @@ -300,7 +303,7 @@ * The default for maxpipekva is min(1/64 of the kernel address space, * max(1/64 of main memory, 512KB)). See sys_pipe.c for more details. */ - maxpipekva = (physpages / 64) * PAGE_SIZE; + maxpipekva = ptoa(physpages / 64); TUNABLE_LONG_FETCH("kern.ipc.maxpipekva", &maxpipekva); if (maxpipekva < 512 * 1024) maxpipekva = 512 * 1024; Index: sys/kern/vfs_aio.c =================================================================== --- sys/kern/vfs_aio.c +++ sys/kern/vfs_aio.c @@ -1252,14 +1252,16 @@ ki = p->p_aioinfo; poff = (vm_offset_t)cb->aio_buf & PAGE_MASK; if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { - if (cb->aio_nbytes > MAXPHYS) { + if (cb->aio_nbytes > maxphys) { error = -1; goto unref; } pbuf = NULL; + job->pages = malloc(sizeof(vm_page_t) * atop(round_page( + cb->aio_nbytes)) + 1, M_TEMP, M_WAITOK | M_ZERO); } else { - if (cb->aio_nbytes > MAXPHYS - poff) { + if (cb->aio_nbytes > maxphys - poff) { error = -1; goto unref; } @@ -1273,6 +1275,7 @@ AIO_LOCK(ki); ki->kaio_buffer_count++; AIO_UNLOCK(ki); + job->pages = pbuf->b_pages; } job->bp = bp = g_alloc_bio(); @@ -1320,6 +1323,8 @@ AIO_UNLOCK(ki); uma_zfree(pbuf_zone, pbuf); job->pbuf = NULL; + } else { + free(job->pages, M_TEMP); } g_destroy_bio(bp); job->bp = NULL; @@ -2342,7 +2347,8 @@ /* Release mapping into kernel space. */ userp = job->userproc; ki = userp->p_aioinfo; - if (job->pbuf) { + vm_page_unhold_pages(job->pages, job->npages); + if (job->pbuf != NULL) { pmap_qremove((vm_offset_t)job->pbuf->b_data, job->npages); uma_zfree(pbuf_zone, job->pbuf); job->pbuf = NULL; @@ -2350,9 +2356,10 @@ AIO_LOCK(ki); ki->kaio_buffer_count--; AIO_UNLOCK(ki); - } else + } else { + free(job->pages, M_TEMP); atomic_subtract_int(&num_unmapped_aio, 1); - vm_page_unhold_pages(job->pages, job->npages); + } bp = job->bp; job->bp = NULL; Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -147,8 +147,14 @@ #define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd))) #define BD_DOMAIN(bd) (bd - bdomain) -static struct buf *buf; /* buffer header pool */ -extern struct buf *swbuf; /* Swap buffer header pool. */ +static char *buf; /* buffer header pool */ +static struct buf * +nbufp(unsigned i) +{ + return ((struct buf *)(buf + (sizeof(struct buf) + + sizeof(vm_page_t) * atop(maxbcachebuf)) * i)); +} + caddr_t __read_mostly unmapped_buf; /* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */ @@ -994,8 +1000,8 @@ maxbcachebuf = i; if (maxbcachebuf < MAXBSIZE) maxbcachebuf = MAXBSIZE; - if (maxbcachebuf > MAXPHYS) - maxbcachebuf = MAXPHYS; + if (maxbcachebuf > maxphys) + maxbcachebuf = maxphys; if (bootverbose != 0 && maxbcachebuf != MAXBCACHEBUF) printf("maxbcachebuf=%d\n", maxbcachebuf); } @@ -1113,10 +1119,10 @@ biotmap_sz = buf_sz / TRANSIENT_DENOM; buf_sz -= biotmap_sz; } - if (biotmap_sz / INT_MAX > MAXPHYS) + if (biotmap_sz / INT_MAX > maxphys) bio_transient_maxcnt = INT_MAX; else - bio_transient_maxcnt = biotmap_sz / MAXPHYS; + bio_transient_maxcnt = biotmap_sz / maxphys; /* * Artificially limit to 1024 simultaneous in-flight I/Os * using the transient mapping. @@ -1136,10 +1142,11 @@ /* * Reserve space for the buffer cache buffers */ - buf = (void *)v; - v = (caddr_t)(buf + nbuf); + buf = (char *)v; + v = (caddr_t)buf + (sizeof(struct buf) + sizeof(vm_page_t) * + atop(maxbcachebuf)) * nbuf; - return(v); + return (v); } /* Initialize the buffer subsystem. Called before use of any buffers. */ @@ -1157,12 +1164,12 @@ mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF); mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF); - unmapped_buf = (caddr_t)kva_alloc(MAXPHYS); + unmapped_buf = (caddr_t)kva_alloc(maxphys); /* finally, initialize each buffer header and stick on empty q */ for (i = 0; i < nbuf; i++) { - bp = &buf[i]; - bzero(bp, sizeof *bp); + bp = nbufp(i); + bzero(bp, sizeof(*bp) + sizeof(vm_page_t) * atop(maxbcachebuf)); bp->b_flags = B_INVAL; bp->b_rcred = NOCRED; bp->b_wcred = NOCRED; @@ -1246,7 +1253,8 @@ /* Setup the kva and free list allocators. */ vmem_set_reclaim(buffer_arena, bufkva_reclaim); - buf_zone = uma_zcache_create("buf free cache", sizeof(struct buf), + buf_zone = uma_zcache_create("buf free cache", + sizeof(struct buf) + sizeof(vm_page_t) * atop(maxbcachebuf), NULL, NULL, NULL, NULL, buf_import, buf_release, NULL, 0); /* @@ -1295,7 +1303,7 @@ KASSERT(bp->b_data != unmapped_buf, ("mapped buf: b_data was not updated %p", bp)); KASSERT(bp->b_data < unmapped_buf || bp->b_data >= unmapped_buf + - MAXPHYS, ("b_data + b_offset unmapped %p", bp)); + maxphys, ("b_data + b_offset unmapped %p", bp)); } static inline void @@ -1330,7 +1338,7 @@ { static int first_buf_printf = 1; struct buf *bp; - int iter, nbusy, pbusy; + int i, iter, nbusy, pbusy; #ifndef PREEMPTION int subiter; #endif @@ -1348,9 +1356,11 @@ */ for (iter = pbusy = 0; iter < 20; iter++) { nbusy = 0; - for (bp = &buf[nbuf]; --bp >= buf; ) + for (i = nbuf - 1; i >= 0; i--) { + bp = nbufp(i); if (isbufbusy(bp)) nbusy++; + } if (nbusy == 0) { if (first_buf_printf) printf("All buffers synced."); @@ -1391,7 +1401,8 @@ * a fsck if we're just a client of a wedged NFS server */ nbusy = 0; - for (bp = &buf[nbuf]; --bp >= buf; ) { + for (i = nbuf - 1; i >= 0; i--) { + bp = nbufp(i); if (isbufbusy(bp)) { #if 0 /* XXX: This is bogus. We should probably have a BO_REMOTE flag instead */ @@ -1571,6 +1582,7 @@ buf_deallocate(bp); bufkva_free(bp); atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1); + MPASS((bp->b_flags & B_MAXPHYS) == 0); BUF_UNLOCK(bp); uma_zfree(buf_zone, bp); } @@ -1674,6 +1686,7 @@ ("bp: %p still has %d vm pages\n", bp, bp->b_npages)); KASSERT(bp->b_kvasize == 0, ("bp: %p still has kva\n", bp)); KASSERT(bp->b_bufsize == 0, ("bp: %p still has bufspace\n", bp)); + MPASS((bp->b_flags & B_MAXPHYS) == 0); bp->b_domain = BD_DOMAIN(bd); bp->b_flags = 0; @@ -2018,6 +2031,9 @@ KASSERT((gbflags & GB_UNMAPPED) == 0 || (gbflags & GB_KVAALLOC) != 0, ("Invalid gbflags 0x%x in %s", gbflags, __func__)); + MPASS((bp->b_flags & B_MAXPHYS) == 0); + KASSERT(maxsize <= maxbcachebuf, + ("bufkva_alloc kva too large %d %u", maxsize, maxbcachebuf)); bufkva_free(bp); @@ -3036,6 +3052,10 @@ */ obj = bp->b_bufobj->bo_object; if (bp->b_npages < desiredpages) { + KASSERT(desiredpages <= atop(maxbcachebuf), + ("vfs_vmio_extend past maxbcachebuf %p %d %u", + bp, desiredpages, maxbcachebuf)); + /* * We must allocate system pages since blocking * here could interfere with paging I/O, no @@ -3163,7 +3183,7 @@ (vp->v_mount != 0) && /* Only on nodes that have the size info */ (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { size = vp->v_mount->mnt_stat.f_iosize; - maxcl = MAXPHYS / size; + maxcl = maxphys / size; BO_RLOCK(bo); for (i = 1; i < maxcl; i++) @@ -4853,6 +4873,10 @@ to = round_page(to); from = round_page(from); index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; + MPASS((bp->b_flags & B_MAXPHYS) == 0); + KASSERT(to - from <= maxbcachebuf, + ("vm_hold_load_pages too large %p %#jx %#jx %u", + bp, (uintmax_t)from, (uintmax_t)to, maxbcachebuf)); for (pg = from; pg < to; pg += PAGE_SIZE, index++) { /* @@ -4912,12 +4936,12 @@ vm_prot_t prot; int pidx; + MPASS((bp->b_flags & B_MAXPHYS) != 0); prot = VM_PROT_READ; if (bp->b_iocmd == BIO_READ) prot |= VM_PROT_WRITE; /* Less backwards than it looks */ if ((pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, - (vm_offset_t)uaddr, len, prot, bp->b_pages, - btoc(MAXPHYS))) < 0) + (vm_offset_t)uaddr, len, prot, bp->b_pages, btoc(maxphys))) < 0) return (-1); bp->b_bufsize = len; bp->b_npages = pidx; @@ -5398,19 +5422,23 @@ db_printf("\n"); cnt = 0; total = 0; - for (j = 0; j < nbuf; j++) - if (buf[j].b_domain == i && BUF_ISLOCKED(&buf[j])) { + for (j = 0; j < nbuf; j++) { + bp = nbufp(j); + if (bp->b_domain == i && BUF_ISLOCKED(bp)) { cnt++; - total += buf[j].b_bufsize; + total += bp->b_bufsize; } + } db_printf("\tLocked buffers: %d space %ld\n", cnt, total); cnt = 0; total = 0; - for (j = 0; j < nbuf; j++) - if (buf[j].b_domain == i) { + for (j = 0; j < nbuf; j++) { + bp = nbufp(j); + if (bp->b_domain == i) { cnt++; - total += buf[j].b_bufsize; + total += bp->b_bufsize; } + } db_printf("\tTotal buffers: %d space %ld\n", cnt, total); } } @@ -5421,7 +5449,7 @@ int i; for (i = 0; i < nbuf; i++) { - bp = &buf[i]; + bp = nbufp(i); if (BUF_ISLOCKED(bp)) { db_show_buffer((uintptr_t)bp, 1, 0, NULL); db_printf("\n"); @@ -5464,7 +5492,7 @@ } for (i = 0; i < nbuf; i++) { - bp = &buf[i]; + bp = nbufp(i); if (bp->b_qindex == QUEUE_EMPTY) nfree++; else Index: sys/kern/vfs_cluster.c =================================================================== --- sys/kern/vfs_cluster.c +++ sys/kern/vfs_cluster.c @@ -386,6 +386,7 @@ bp = uma_zalloc(cluster_pbuf_zone, M_NOWAIT); if (bp == NULL) return tbp; + MPASS((bp->b_flags & B_MAXPHYS) != 0); /* * We are synthesizing a buffer out of vm_page_t's, but @@ -871,6 +872,7 @@ --len; continue; } + MPASS((bp->b_flags & B_MAXPHYS) != 0); /* * We got a pbuf to make the cluster in. Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -974,8 +974,8 @@ iosize = vap->va_blocksize; if (iosize == 0) iosize = BLKDEV_IOSIZE; - if (iosize > MAXPHYS) - iosize = MAXPHYS; + if (iosize > maxphys) + iosize = maxphys; buf = malloc(iosize, M_TEMP, M_WAITOK); #ifdef __notyet__ Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -3162,8 +3162,8 @@ struct sbuf *sb; int error, full = 0, valid_len, max_len; - /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */ - max_len = MAXPHYS - 1; + /* Limit initial buffer size to maxphys to avoid DoS from userspace. */ + max_len = maxphys - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) Index: sys/sys/aio.h =================================================================== --- sys/sys/aio.h +++ sys/sys/aio.h @@ -140,8 +140,8 @@ struct { /* BIO backend */ struct bio *bp; /* (*) BIO pointer */ struct buf *pbuf; /* (*) buffer pointer */ - struct vm_page *pages[btoc(MAXPHYS)+1]; /* (*) */ int npages; /* (*) number of pages */ + struct vm_page **pages; /* (*) */ }; struct { /* fsync() requests */ int pending; /* (a) number of pending I/O */ Index: sys/sys/buf.h =================================================================== --- sys/sys/buf.h +++ sys/sys/buf.h @@ -141,7 +141,6 @@ TAILQ_HEAD(cluster_list_head, buf) cluster_head; TAILQ_ENTRY(buf) cluster_entry; } b_cluster; - struct vm_page *b_pages[btoc(MAXPHYS)]; int b_npages; struct workhead b_dep; /* (D) List of filesystem dependencies. */ void *b_fsprivate1; @@ -156,6 +155,7 @@ #elif defined(BUF_TRACKING) const char *b_io_tracking; #endif + struct vm_page *b_pages[]; }; #define b_object b_bufobj->bo_object @@ -234,7 +234,7 @@ #define B_INVALONERR 0x00040000 /* Invalidate on write error. */ #define B_00080000 0x00080000 /* Available flag. */ #define B_00100000 0x00100000 /* Available flag. */ -#define B_00200000 0x00200000 /* Available flag. */ +#define B_MAXPHYS 0x00200000 /* nitems(b_pages[]) = atop(MAXPHYS). */ #define B_RELBUF 0x00400000 /* Release VMIO buffer. */ #define B_FS_FLAG1 0x00800000 /* Available flag for FS use. */ #define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */ @@ -247,7 +247,7 @@ #define B_REMFREE 0x80000000 /* Delayed bremfree */ #define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \ - "\33paging\32infreecnt\31nocopy\30b23\27relbuf\26b21\25b20" \ + "\33paging\32infreecnt\31nocopy\30b23\27relbuf\26maxphys\25b20" \ "\24b19\23invalonerr\22clusterok\21malloc\20nocache\17b14\16inval" \ "\15reuse\14noreuse\13eintr\12done\11b8\10delwri" \ "\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age" @@ -496,8 +496,8 @@ #ifdef _KERNEL extern int nbuf; /* The number of buffer headers */ -extern long maxswzone; /* Max KVA for swap structures */ -extern long maxbcache; /* Max KVA for buffer cache */ +extern u_long maxswzone; /* Max KVA for swap structures */ +extern u_long maxbcache; /* Max KVA for buffer cache */ extern int maxbcachebuf; /* Max buffer cache block size */ extern long runningbufspace; extern long hibufspace; Index: sys/sys/param.h =================================================================== --- sys/sys/param.h +++ sys/sys/param.h @@ -160,7 +160,7 @@ #define DFLTPHYS (64 * 1024) /* default max raw I/O transfer size */ #endif #ifndef MAXPHYS -#define MAXPHYS (128 * 1024) /* max raw I/O transfer size */ +#define MAXPHYS (1024 * 1024) /* max raw I/O transfer size */ #endif #ifndef MAXDUMPPGS #define MAXDUMPPGS (DFLTPHYS/PAGE_SIZE) Index: sys/sys/systm.h =================================================================== --- sys/sys/systm.h +++ sys/sys/systm.h @@ -74,6 +74,8 @@ extern int ngroups_max; /* max # of supplemental groups */ extern int vm_guest; /* Running as virtual machine guest? */ +extern u_long maxphys; + /* * Detected virtual machine guest types. The intention is to expand * and/or add to the VM_GUEST_VM type if specific VM functionality is Index: sys/ufs/ffs/ffs_vfsops.c =================================================================== --- sys/ufs/ffs/ffs_vfsops.c +++ sys/ufs/ffs/ffs_vfsops.c @@ -1055,8 +1055,8 @@ BO_UNLOCK(&odevvp->v_bufobj); if (dev->si_iosize_max != 0) mp->mnt_iosize_max = dev->si_iosize_max; - if (mp->mnt_iosize_max > MAXPHYS) - mp->mnt_iosize_max = MAXPHYS; + if (mp->mnt_iosize_max > maxphys) + mp->mnt_iosize_max = maxphys; if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) { error = EINVAL; vfs_mount_error(mp, Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -586,7 +586,7 @@ * but it isn't very efficient). * * The nsw_cluster_max is constrained by the bp->b_pages[] - * array, which has MAXPHYS / PAGE_SIZE entries, and our locally + * array, which has maxphys / PAGE_SIZE entries, and our locally * defined MAX_PAGEOUT_CLUSTER. Also be aware that swap ops are * constrained by the swap device interleave stripe size. * @@ -601,7 +601,7 @@ * have one NFS swap device due to the command/ack latency over NFS. * So it all works out pretty well. */ - nsw_cluster_max = min(MAXPHYS / PAGE_SIZE, MAX_PAGEOUT_CLUSTER); + nsw_cluster_max = min(maxphys / PAGE_SIZE, MAX_PAGEOUT_CLUSTER); nsw_wcount_async = 4; nsw_wcount_async_max = nsw_wcount_async; @@ -1314,6 +1314,7 @@ VM_OBJECT_WUNLOCK(object); bp = uma_zalloc(swrbuf_zone, M_WAITOK); + MPASS((bp->b_flags & B_MAXPHYS) != 0); /* Pages cannot leave the object while busy. */ for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) { MPASS(p->pindex == bm->pindex + i); @@ -1522,8 +1523,9 @@ VM_OBJECT_WUNLOCK(object); bp = uma_zalloc(swwbuf_zone, M_WAITOK); + MPASS((bp->b_flags & B_MAXPHYS) != 0); if (async) - bp->b_flags = B_ASYNC; + bp->b_flags |= B_ASYNC; bp->b_flags |= B_PAGING; bp->b_iocmd = BIO_WRITE; Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -115,7 +115,6 @@ #define PFFOR 4 #define VM_FAULT_READ_DEFAULT (1 + VM_FAULT_READ_AHEAD_INIT) -#define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX) #define VM_FAULT_DONTNEED_MIN 1048576 Index: sys/vm/vm_init.c =================================================================== --- sys/vm/vm_init.c +++ sys/vm/vm_init.c @@ -212,7 +212,7 @@ /* * Allocate the clean map to hold all of I/O virtual memory. */ - size = (long)nbuf * BKVASIZE + (long)bio_transient_maxcnt * MAXPHYS; + size = (long)nbuf * BKVASIZE + (long)bio_transient_maxcnt * maxphys; kmi->clean_sva = firstaddr = kva_alloc(size); kmi->clean_eva = firstaddr + size; @@ -233,7 +233,7 @@ * And optionally transient bio space. */ if (bio_transient_maxcnt != 0) { - size = (long)bio_transient_maxcnt * MAXPHYS; + size = (long)bio_transient_maxcnt * maxphys; vmem_init(transient_arena, "transient arena", firstaddr, size, PAGE_SIZE, 0, 0); firstaddr += size; Index: sys/vm/vm_map.h =================================================================== --- sys/vm/vm_map.h +++ sys/vm/vm_map.h @@ -396,7 +396,7 @@ */ #define VM_FAULT_READ_AHEAD_MIN 7 #define VM_FAULT_READ_AHEAD_INIT 15 -#define VM_FAULT_READ_AHEAD_MAX min(atop(MAXPHYS) - 1, UINT8_MAX) +#define VM_FAULT_READ_AHEAD_MAX min(atop(maxphys) - 1, UINT8_MAX) /* * The following "find_space" options are supported by vm_map_find(). Index: sys/vm/vm_pager.c =================================================================== --- sys/vm/vm_pager.c +++ sys/vm/vm_pager.c @@ -183,7 +183,8 @@ { /* Main zone for paging bufs. */ - pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf), + pbuf_zone = uma_zcreate("pbuf", + sizeof(struct buf) + atop(maxphys) * sizeof(vm_page_t), pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE, UMA_ZONE_NOFREE); /* Few systems may still use this zone directly, so it needs a limit. */ @@ -384,7 +385,7 @@ bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */ bp->b_data = bp->b_kvabase; bp->b_xflags = 0; - bp->b_flags = 0; + bp->b_flags = B_MAXPHYS; bp->b_ioflags = 0; bp->b_iodone = NULL; bp->b_error = 0; @@ -415,10 +416,10 @@ { struct buf *bp = mem; - bp->b_kvabase = (void *)kva_alloc(MAXPHYS); + bp->b_kvabase = (void *)kva_alloc(maxphys); if (bp->b_kvabase == NULL) return (ENOMEM); - bp->b_kvasize = MAXPHYS; + bp->b_kvasize = maxphys; BUF_LOCKINIT(bp); LIST_INIT(&bp->b_dep); bp->b_rcred = bp->b_wcred = NOCRED; Index: sys/vm/vnode_pager.c =================================================================== --- sys/vm/vnode_pager.c +++ sys/vm/vnode_pager.c @@ -817,7 +817,7 @@ KASSERT(foff < object->un_pager.vnp.vnp_size, ("%s: page %p offset beyond vp %p size", __func__, m[0], vp)); - KASSERT(count <= nitems(bp->b_pages), + KASSERT(count <= atop(maxphys), ("%s: requested %d pages", __func__, count)); /* @@ -832,6 +832,7 @@ } bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK); + MPASS((bp->b_flags & B_MAXPHYS) != 0); /* * Get the underlying device blocks for the file with VOP_BMAP(). @@ -916,10 +917,10 @@ * Check that total amount of pages fit into buf. Trim rbehind and * rahead evenly if not. */ - if (rbehind + rahead + count > nitems(bp->b_pages)) { + if (rbehind + rahead + count > atop(maxphys)) { int trim, sum; - trim = rbehind + rahead + count - nitems(bp->b_pages) + 1; + trim = rbehind + rahead + count - atop(maxphys) + 1; sum = rbehind + rahead; if (rbehind == before) { /* Roundup rbehind trim to block size. */ @@ -930,9 +931,9 @@ rbehind -= trim * rbehind / sum; rahead -= trim * rahead / sum; } - KASSERT(rbehind + rahead + count <= nitems(bp->b_pages), - ("%s: behind %d ahead %d count %d", __func__, - rbehind, rahead, count)); + KASSERT(rbehind + rahead + count <= atop(maxphys), + ("%s: behind %d ahead %d count %d maxphys %lu", __func__, + rbehind, rahead, count, maxphys)); /* * Fill in the bp->b_pages[] array with requested and optional @@ -1014,7 +1015,7 @@ *a_rahead = bp->b_pgafter; #ifdef INVARIANTS - KASSERT(bp->b_npages <= nitems(bp->b_pages), + KASSERT(bp->b_npages <= atop(maxphys), ("%s: buf %p overflowed", __func__, bp)); for (int j = 1, prev = 0; j < bp->b_npages; j++) { if (bp->b_pages[j] == bogus_page)