Index: sys/dev/virtio/block/virtio_blk.c =================================================================== --- sys/dev/virtio/block/virtio_blk.c +++ sys/dev/virtio/block/virtio_blk.c @@ -85,6 +85,7 @@ struct virtqueue *vtblk_vq; struct sglist *vtblk_sglist; + struct sglist *vtblk_sglist_stash; struct disk *vtblk_disk; struct bio_queue_head vtblk_bioq; @@ -94,6 +95,7 @@ vtblk_req_ready; struct vtblk_request *vtblk_req_ordered; + size_t vtblk_size_max; int vtblk_max_nsegs; int vtblk_request_count; enum vtblk_cache_mode vtblk_write_cache; @@ -316,20 +318,10 @@ vtblk_read_config(sc, &blkcfg); - /* - * With the current sglist(9) implementation, it is not easy - * for us to support a maximum segment size as adjacent - * segments are coalesced. For now, just make sure it's larger - * than the maximum supported transfer size. - */ - if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { - if (blkcfg.size_max < MAXPHYS) { - error = ENOTSUP; - device_printf(dev, "host requires unsupported " - "maximum segment size feature\n"); - goto fail; - } - } + if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) + sc->vtblk_size_max = MIN(blkcfg.size_max, MAXPHYS); + else + sc->vtblk_size_max = SIZE_T_MAX; sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { @@ -345,6 +337,15 @@ device_printf(dev, "cannot allocate sglist\n"); goto fail; } + if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { + sc->vtblk_sglist_stash = sglist_alloc(sc->vtblk_max_nsegs, + M_NOWAIT); + if (sc->vtblk_sglist_stash == NULL) { + error = ENOMEM; + device_printf(dev, "cannot allocate sglist stash\n"); + goto fail; + } + } error = vtblk_alloc_virtqueue(sc); if (error) { @@ -401,6 +402,10 @@ sglist_free(sc->vtblk_sglist); sc->vtblk_sglist = NULL; } + if (sc->vtblk_sglist_stash != NULL) { + sglist_free(sc->vtblk_sglist_stash); + sc->vtblk_sglist_stash = NULL; + } VTBLK_LOCK_DESTROY(sc); @@ -913,16 +918,77 @@ return (req); } +static int +vtblk_sg_split_append( + struct vtblk_softc *sc, struct sglist *to, vm_paddr_t addr, size_t len) +{ + while (len > 0) { + size_t xfer; + struct sglist_seg *seg; + + if (to->sg_nseg == 0) { + xfer = MIN(len, sc->vtblk_size_max); + seg = &to->sg_segs[0]; + seg->ss_paddr = addr; + seg->ss_len = xfer; + to->sg_nseg++; + len -= xfer; + addr += xfer; + continue; + } + + seg = &to->sg_segs[to->sg_nseg - 1]; + if (seg->ss_len == 0 || seg->ss_paddr + seg->ss_len != addr) { + KASSERT(to->sg_nseg <= to->sg_maxseg, + ("%s: Bogus number of items in the sglist", + __func__)); + if (to->sg_nseg == to->sg_maxseg) + return (EFBIG); + xfer = MIN(len, sc->vtblk_size_max); + seg = &to->sg_segs[to->sg_nseg++]; + seg->ss_paddr = addr; + seg->ss_len = xfer; + } else { + xfer = MIN(len, sc->vtblk_size_max - seg->ss_len); + seg->ss_len += xfer; + } + len -= xfer; + addr += xfer; + } + + return (0); +} + +static void +vtblk_sg_split(struct vtblk_softc *sc, struct sglist *to, struct sglist *from) +{ + unsigned int i; + + for (i = 0; i < from->sg_nseg; i++) { + int error; + + error = vtblk_sg_split_append( + sc, to, from->sg_segs[i].ss_paddr, from->sg_segs[i].ss_len); + if (error) + panic( + "%s: unexpected entries number in sglist. max: %hu", + __func__, to->sg_maxseg); + } +} + static int vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req) { struct virtqueue *vq; - struct sglist *sg; + struct sglist *sg, *sg_stash; struct bio *bp; int ordered, readable, writable, error; vq = sc->vtblk_vq; sg = sc->vtblk_sglist; + sg_stash = sc->vtblk_sglist_stash; + if (sg_stash == NULL) + sg_stash = sg; bp = req->vbr_bp; ordered = 0; writable = 0; @@ -943,19 +1009,21 @@ } } - sglist_reset(sg); - sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr)); + sglist_reset(sg_stash); + if (sg != sg_stash) + sglist_reset(sg); + sglist_append(sg_stash, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr)); if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { - error = sglist_append_bio(sg, bp); - if (error || sg->sg_nseg == sg->sg_maxseg) { + error = sglist_append_bio(sg_stash, bp); + if (error || sg_stash->sg_nseg == sg_stash->sg_maxseg) { panic("%s: bio %p data buffer too big %d", __func__, bp, error); } /* BIO_READ means the host writes into our buffer. */ if (bp->bio_cmd == BIO_READ) - writable = sg->sg_nseg - 1; + writable = sg_stash->sg_nseg - 1; } else if (bp->bio_cmd == BIO_DELETE) { struct virtio_blk_discard_write_zeroes *discard; @@ -965,15 +1033,23 @@ discard->sector = bp->bio_offset / VTBLK_BSIZE; discard->num_sectors = bp->bio_bcount / VTBLK_BSIZE; bp->bio_driver1 = discard; - error = sglist_append(sg, discard, sizeof(*discard)); - if (error || sg->sg_nseg == sg->sg_maxseg) { + error = sglist_append(sg_stash, discard, sizeof(*discard)); + if (error || sg_stash->sg_nseg == sg_stash->sg_maxseg) { panic("%s: bio %p data buffer too big %d", __func__, bp, error); } } writable++; - sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); + sglist_append(sg_stash, &req->vbr_ack, sizeof(uint8_t)); + + if (sg != sg_stash) { + /* + * Break down the sglist if VIRTIO_BLK_F_SIZE_MAX is set + */ + vtblk_sg_split(sc, sg, sg_stash); + } + readable = sg->sg_nseg - writable; error = virtqueue_enqueue(vq, req, sg, readable, writable);