Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -307,7 +307,7 @@ error = 0; for (; off < offset; off += maxio, p += maxio, size -= maxio) { - bzero(bp, sizeof(*bp)); + g_reset_bio(bp); bp->bio_cmd = cmd; bp->bio_done = NULL; bp->bio_offset = off; Index: sys/dev/mmc/mmcsd.c =================================================================== --- sys/dev/mmc/mmcsd.c +++ sys/dev/mmc/mmcsd.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -487,7 +488,7 @@ if (!length) return (0); - bzero(&bp, sizeof(struct bio)); + g_reset_bio(&bp); bp.bio_disk = disk; bp.bio_pblkno = offset / disk->d_sectorsize; bp.bio_bcount = length; Index: sys/dev/virtio/block/virtio_blk.c =================================================================== --- sys/dev/virtio/block/virtio_blk.c +++ sys/dev/virtio/block/virtio_blk.c @@ -41,6 +41,7 @@ #include #include +#include #include #include @@ -1146,7 +1147,7 @@ req->vbr_hdr.sector = 0; req->vbr_bp = &buf; - bzero(&buf, sizeof(struct bio)); + g_reset_bio(&buf); buf.bio_cmd = BIO_READ; buf.bio_data = dp->d_ident; @@ -1278,7 +1279,7 @@ req->vbr_hdr.sector = offset / 512; req->vbr_bp = &buf; - bzero(&buf, sizeof(struct bio)); + g_reset_bio(&buf); buf.bio_cmd = BIO_WRITE; buf.bio_data = virtual; @@ -1300,7 +1301,7 @@ req->vbr_hdr.sector = 0; req->vbr_bp = &buf; - bzero(&buf, sizeof(struct bio)); + g_reset_bio(&buf); buf.bio_cmd = BIO_FLUSH; Index: sys/geom/geom.h =================================================================== --- sys/geom/geom.h +++ sys/geom/geom.h @@ -45,6 +45,7 @@ #include #include #include +#include struct g_class; struct g_geom; @@ -315,6 +316,7 @@ /* geom_io.c */ struct bio * g_clone_bio(struct bio *); struct bio * g_duplicate_bio(struct bio *); +struct bio * g_clone_bio_gen(struct bio *bp, uma_zone_t zone, int flags); void g_destroy_bio(struct bio *); void g_io_deliver(struct bio *bp, int error); int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr); @@ -324,6 +326,8 @@ void g_io_request(struct bio *bp, struct g_consumer *cp); struct bio *g_new_bio(void); struct bio *g_alloc_bio(void); +struct bio *g_alloc_bio_gen(uma_zone_t zone, int flags); +void g_reset_bio(struct bio *); void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error); int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length); int g_delete_data(struct g_consumer *cp, off_t offset, off_t length); Index: sys/geom/geom_dev.c =================================================================== --- sys/geom/geom_dev.c +++ sys/geom/geom_dev.c @@ -628,17 +628,7 @@ sc->sc_active++; mtx_unlock(&sc->sc_mtx); - for (;;) { - /* - * XXX: This is not an ideal solution, but I belive it to - * XXX: deadlock safe, all things considered. - */ - bp2 = g_clone_bio(bp); - if (bp2 != NULL) - break; - pause("gdstrat", hz / 10); - } - KASSERT(bp2 != NULL, ("XXX: ENOMEM in a bad place")); + bp2 = g_duplicate_bio(bp); bp2->bio_done = g_dev_done; g_trace(G_T_BIO, "g_dev_strategy(%p/%p) offset %jd length %jd data %p cmd %d", Index: sys/geom/geom_io.c =================================================================== --- sys/geom/geom_io.c +++ sys/geom/geom_io.c @@ -65,7 +65,7 @@ #include #include -static int g_io_transient_map_bio(struct bio *bp); +static int g_io_transient_map_bio(struct bio *bp, int direct); static struct g_bioq g_bio_run_down; static struct g_bioq g_bio_run_up; @@ -143,39 +143,37 @@ } struct bio * -g_new_bio(void) +g_alloc_bio_gen(uma_zone_t zone, int flags) { struct bio *bp; - bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO); + bp = uma_zalloc(zone, flags | M_ZERO); #ifdef KTR if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { struct stack st; - CTR1(KTR_GEOM, "g_new_bio(): %p", bp); + CTR2(KTR_GEOM, "g_alloc_bio_gen(): %p %x", bp, flags); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3, 0); } #endif + if (bp) + bp->bio_zone = zone; return (bp); } struct bio * +g_new_bio(void) +{ + + return g_alloc_bio_gen(biozone, M_NOWAIT); +} + +struct bio * g_alloc_bio(void) { - struct bio *bp; - bp = uma_zalloc(biozone, M_WAITOK | M_ZERO); -#ifdef KTR - if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { - struct stack st; - - CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp); - stack_save(&st); - CTRSTACK(KTR_GEOM, &st, 3, 0); - } -#endif - return (bp); + return g_alloc_bio_gen(biozone, M_WAITOK); } void @@ -190,16 +188,23 @@ CTRSTACK(KTR_GEOM, &st, 3, 0); } #endif - uma_zfree(biozone, bp); + uma_zfree(bp->bio_zone, bp); } struct bio * -g_clone_bio(struct bio *bp) +g_clone_bio_gen(struct bio *bp, uma_zone_t zone, int flags) { struct bio *bp2; - bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO); + if (zone == NULL) { + if ((bp->bio_flags & BIO_SAME_ZONE) && bp->bio_zone != NULL) + zone = bp->bio_zone; + else + zone = biozone; + } + bp2 = uma_zalloc(zone, flags | M_ZERO); if (bp2 != NULL) { + bp2->bio_zone = zone; bp2->bio_parent = bp; bp2->bio_cmd = bp->bio_cmd; /* @@ -227,41 +232,38 @@ if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { struct stack st; - CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2); + CTR3(KTR_GEOM, "g_clone_bio_gen(%p): %p %x", bp, bp2, flags); stack_save(&st); CTRSTACK(KTR_GEOM, &st, 3, 0); } #endif - return(bp2); + return (bp2); +} + +struct bio * +g_clone_bio(struct bio *bp) +{ + + return (g_clone_bio_gen(bp, NULL, M_NOWAIT)); } struct bio * g_duplicate_bio(struct bio *bp) { - struct bio *bp2; + return (g_clone_bio_gen(bp, NULL, M_WAITOK)); +} - bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO); - bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST); - bp2->bio_parent = bp; - bp2->bio_cmd = bp->bio_cmd; - bp2->bio_length = bp->bio_length; - bp2->bio_offset = bp->bio_offset; - bp2->bio_data = bp->bio_data; - bp2->bio_ma = bp->bio_ma; - bp2->bio_ma_n = bp->bio_ma_n; - bp2->bio_ma_offset = bp->bio_ma_offset; - bp2->bio_attribute = bp->bio_attribute; - bp->bio_children++; -#ifdef KTR - if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) { - struct stack st; - - CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2); - stack_save(&st); - CTRSTACK(KTR_GEOM, &st, 3, 0); - } -#endif - return(bp2); +void +g_reset_bio(struct bio *bp) +{ + uma_zone_t zone; + uint8_t flags; + + zone = bp->bio_zone; + flags = bp->bio_flags & BIO_SAME_ZONE; + bzero(bp, sizeof(bp)); + bp->bio_zone = zone; + bp->bio_flags = flags; } void @@ -319,7 +321,7 @@ } static int -g_io_check(struct bio *bp) +g_io_check(struct bio *bp, int direct) { struct g_consumer *cp; struct g_provider *pp; @@ -399,7 +401,7 @@ if ((bp->bio_flags & BIO_UNMAPPED) != 0 && (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 && (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { - if ((error = g_io_transient_map_bio(bp)) >= 0) + if ((error = g_io_transient_map_bio(bp, direct)) >= 0) return (error); } break; @@ -477,6 +479,7 @@ KASSERT(bp != NULL, ("NULL bp in g_io_request")); pp = cp->provider; KASSERT(pp != NULL, ("consumer not attached in g_io_request")); + KASSERT(bp->bio_zone != NULL, ("bio in g_io_request without zone")); #ifdef DIAGNOSTIC KASSERT(bp->bio_driver1 == NULL, ("bio_driver1 used by the consumer (geom %s)", cp->geom->name)); @@ -484,6 +487,7 @@ ("bio_driver2 used by the consumer (geom %s)", cp->geom->name)); KASSERT(bp->bio_pflags == 0, ("bio_pflags used by the consumer (geom %s)", cp->geom->name)); + /* * Remember consumer's private fields, so we can detect if they were * modified by the provider. @@ -567,7 +571,7 @@ mtx_unlock(mtxp); if (direct) { - error = g_io_check(bp); + error = g_io_check(bp, direct); if (error >= 0) { CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p " "provider %s returned %d", bp, bp->bio_to->name, @@ -729,7 +733,7 @@ "Current count of the active transient maps"); static int -g_io_transient_map_bio(struct bio *bp) +g_io_transient_map_bio(struct bio *bp, int direct) { vm_offset_t addr; long size; @@ -744,6 +748,8 @@ atomic_add_long(&transient_maps, 1); retry: if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) { + if (direct) + return (ENOMEM); if (transient_map_retries != 0 && retried >= transient_map_retries) { CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s", @@ -819,7 +825,7 @@ } CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, bp->bio_to->name); - error = g_io_check(bp); + error = g_io_check(bp, 0); if (error >= 0) { CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider " "%s returned %d", bp, bp->bio_to->name, error); Index: sys/geom/journal/g_journal.c =================================================================== --- sys/geom/journal/g_journal.c +++ sys/geom/journal/g_journal.c @@ -1296,7 +1296,7 @@ data = bp->bio_data; if (sc->sc_flags & GJF_DEVICE_CHECKSUM) MD5Update(&ctx, data, ent->je_length); - bzero(bp, sizeof(*bp)); + g_reset_bio(bp); bp->bio_cflags = GJ_BIO_JOURNAL; bp->bio_offset = ent->je_offset; bp->bio_joffset = ent->je_joffset; @@ -1772,7 +1772,7 @@ { int error; - bzero(bp, sizeof(*bp)); + g_reset_bio(bp); bp->bio_cmd = BIO_READ; bp->bio_done = NULL; bp->bio_offset = offset; Index: sys/geom/mirror/g_mirror.c =================================================================== --- sys/geom/mirror/g_mirror.c +++ sys/geom/mirror/g_mirror.c @@ -1372,7 +1372,7 @@ /* Send next synchronization request. */ data = bp->bio_data; - bzero(bp, sizeof(*bp)); + g_reset_bio(bp); bp->bio_cmd = BIO_READ; bp->bio_offset = sync->ds_offset; bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); Index: sys/geom/raid/g_raid.c =================================================================== --- sys/geom/raid/g_raid.c +++ sys/geom/raid/g_raid.c @@ -1011,7 +1011,7 @@ vol = tr->tro_volume; sc = vol->v_softc; - bzero(&bp, sizeof(bp)); + g_reset_bio(&bp); bp.bio_cmd = BIO_WRITE; bp.bio_done = g_raid_tr_kerneldump_common_done; bp.bio_attribute = NULL; Index: sys/geom/raid3/g_raid3.c =================================================================== --- sys/geom/raid3/g_raid3.c +++ sys/geom/raid3/g_raid3.c @@ -1717,7 +1717,7 @@ /* Send next synchronization request. */ data = bp->bio_data; - bzero(bp, sizeof(*bp)); + g_reset_bio(bp); bp->bio_cmd = BIO_READ; bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1); bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); Index: sys/kern/kern_physio.c =================================================================== --- sys/kern/kern_physio.c +++ sys/kern/kern_physio.c @@ -110,7 +110,7 @@ error = 0; for (i = 0; i < uio->uio_iovcnt; i++) { while (uio->uio_iov[i].iov_len) { - bzero(bp, sizeof(*bp)); + g_reset_bio(bp); if (uio->uio_rw == UIO_READ) { bp->bio_cmd = BIO_READ; curthread->td_ru.ru_inblock++; Index: sys/kern/subr_disk.c =================================================================== --- sys/kern/subr_disk.c +++ sys/kern/subr_disk.c @@ -21,8 +21,13 @@ #include #include #include +#include #include +static int bioq_batchsize = 128; +SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW, + &bioq_batchsize, 0, "BIOQ batch size"); + /*- * Disk error is the preface to plaintive error messages * about failing disk transfers. It prints messages of the form @@ -150,6 +155,7 @@ TAILQ_INIT(&head->queue); head->last_offset = 0; head->insert_point = NULL; + head->batched = 0; } void @@ -188,6 +194,7 @@ { TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); + head->batched = 0; head->insert_point = bp; head->last_offset = bp->bio_offset; } @@ -246,6 +253,16 @@ return; } + /* + * Impose a maximum number of passengers in any given + * elevator car. This limits the maximum latency for any + * given request without signficantly affecting the average. + */ + if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { + bioq_insert_tail(head, bp); + return; + } + prev = NULL; key = bioq_bio_key(head, bp); cur = TAILQ_FIRST(&head->queue); @@ -264,4 +281,6 @@ TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); else TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); + + head->batched++; } Index: sys/sys/bio.h =================================================================== --- sys/sys/bio.h +++ sys/sys/bio.h @@ -62,8 +62,10 @@ #define BIO_UNMAPPED 0x10 #define BIO_TRANSIENT_MAPPING 0x20 #define BIO_VLIST 0x40 +#define BIO_SAME_ZONE 0x80 /* Allocate from same zone on clones */ #ifdef _KERNEL +#include struct disk; struct bio; struct vm_map; @@ -113,6 +115,8 @@ void *bio_classifier1; /* Classifier tag. */ void *bio_classifier2; /* Classifier tag. */ + uma_zone_t bio_zone; /* Zone used for the allocation */ + #ifdef DIAGNOSTIC void *_bio_caller1; void *_bio_caller2; @@ -130,6 +134,7 @@ TAILQ_HEAD(bio_queue, bio) queue; off_t last_offset; struct bio *insert_point; + int batched; }; extern struct vm_map *bio_transient_map; Index: sys/vm/swap_pager.c =================================================================== --- sys/vm/swap_pager.c +++ sys/vm/swap_pager.c @@ -146,6 +146,8 @@ daddr_t swb_pages[SWAP_META_PAGES]; }; +static uma_zone_t swap_biozone; + static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data"); static struct mtx sw_dev_mtx; static TAILQ_HEAD(, swdevt) swtailq = TAILQ_HEAD_INITIALIZER(swtailq); @@ -578,6 +580,18 @@ swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK | M_ZERO); swhash_mask = n - 1; mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF); + + /* + * Setup our uma zone for bios. We pre-allocate 32 bios and + * never return memory to the OS. We do both of these things + * so we'll have memory for bios when otherwise there might be + * a shortage. + */ + swap_biozone = uma_zcreate("swap_bio", sizeof (struct bio), + NULL, NULL, + NULL, NULL, + 0, UMA_ZONE_NOFREE); + uma_prealloc(swap_biozone, 32); } /* @@ -2507,9 +2521,9 @@ swapgeom_acquire(cp); mtx_unlock(&sw_dev_mtx); if (bp->b_iocmd == BIO_WRITE) - bio = g_new_bio(); + bio = g_alloc_bio_gen(swap_biozone, M_NOWAIT); else - bio = g_alloc_bio(); + bio = g_alloc_bio_gen(swap_biozone, M_WAITOK); if (bio == NULL) { mtx_lock(&sw_dev_mtx); swapgeom_release(cp, sp); @@ -2520,6 +2534,7 @@ return; } + bio->bio_flags |= BIO_SAME_ZONE; bio->bio_caller1 = sp; bio->bio_caller2 = bp; bio->bio_cmd = bp->b_iocmd;