Changeset View
Changeset View
Standalone View
Standalone View
sys/dev/virtio/block/virtio_blk.c
Show First 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | |||||||||||
#define VTBLK_FLAG_DETACH 0x0004 | #define VTBLK_FLAG_DETACH 0x0004 | ||||||||||
#define VTBLK_FLAG_SUSPEND 0x0008 | #define VTBLK_FLAG_SUSPEND 0x0008 | ||||||||||
#define VTBLK_FLAG_BARRIER 0x0010 | #define VTBLK_FLAG_BARRIER 0x0010 | ||||||||||
#define VTBLK_FLAG_WC_CONFIG 0x0020 | #define VTBLK_FLAG_WC_CONFIG 0x0020 | ||||||||||
#define VTBLK_FLAG_DISCARD 0x0040 | #define VTBLK_FLAG_DISCARD 0x0040 | ||||||||||
struct virtqueue *vtblk_vq; | struct virtqueue *vtblk_vq; | ||||||||||
struct sglist *vtblk_sglist; | struct sglist *vtblk_sglist; | ||||||||||
struct sglist *vtblk_sglist_stash; | |||||||||||
struct disk *vtblk_disk; | struct disk *vtblk_disk; | ||||||||||
struct bio_queue_head vtblk_bioq; | struct bio_queue_head vtblk_bioq; | ||||||||||
TAILQ_HEAD(, vtblk_request) | TAILQ_HEAD(, vtblk_request) | ||||||||||
vtblk_req_free; | vtblk_req_free; | ||||||||||
TAILQ_HEAD(, vtblk_request) | TAILQ_HEAD(, vtblk_request) | ||||||||||
vtblk_req_ready; | vtblk_req_ready; | ||||||||||
struct vtblk_request *vtblk_req_ordered; | struct vtblk_request *vtblk_req_ordered; | ||||||||||
uint32_t vtblk_max_size; | |||||||||||
int vtblk_max_nsegs; | int vtblk_max_nsegs; | ||||||||||
int vtblk_request_count; | int vtblk_request_count; | ||||||||||
enum vtblk_cache_mode vtblk_write_cache; | enum vtblk_cache_mode vtblk_write_cache; | ||||||||||
struct bio_queue vtblk_dump_queue; | struct bio_queue vtblk_dump_queue; | ||||||||||
struct vtblk_request vtblk_dump_request; | struct vtblk_request vtblk_dump_request; | ||||||||||
}; | }; | ||||||||||
▲ Show 20 Lines • Show All 206 Lines • ▼ Show 20 Lines | vtblk_attach(device_t dev) | ||||||||||
TAILQ_INIT(&sc->vtblk_req_free); | TAILQ_INIT(&sc->vtblk_req_free); | ||||||||||
TAILQ_INIT(&sc->vtblk_req_ready); | TAILQ_INIT(&sc->vtblk_req_ready); | ||||||||||
vtblk_setup_sysctl(sc); | vtblk_setup_sysctl(sc); | ||||||||||
vtblk_setup_features(sc); | vtblk_setup_features(sc); | ||||||||||
vtblk_read_config(sc, &blkcfg); | vtblk_read_config(sc, &blkcfg); | ||||||||||
/* | if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) | ||||||||||
* With the current sglist(9) implementation, it is not easy | sc->vtblk_max_size = MIN(blkcfg.size_max, MAXPHYS); | ||||||||||
delphij: (Please rebase with the MAXPHYS -> maxphys conversion) | |||||||||||
* for us to support a maximum segment size as adjacent | else | ||||||||||
* segments are coalesced. For now, just make sure it's larger | sc->vtblk_max_size = UINT32_MAX; | ||||||||||
* than the maximum supported transfer size. | |||||||||||
*/ | |||||||||||
if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { | |||||||||||
if (blkcfg.size_max < MAXPHYS) { | |||||||||||
error = ENOTSUP; | |||||||||||
device_printf(dev, "host requires unsupported " | |||||||||||
"maximum segment size feature\n"); | |||||||||||
goto fail; | |||||||||||
} | |||||||||||
} | |||||||||||
sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); | sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg); | ||||||||||
if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { | if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) { | ||||||||||
Not Done Inline Actions
delphij: | |||||||||||
error = EINVAL; | error = EINVAL; | ||||||||||
device_printf(dev, "fewer than minimum number of segments " | device_printf(dev, "fewer than minimum number of segments " | ||||||||||
"allowed: %d\n", sc->vtblk_max_nsegs); | "allowed: %d\n", sc->vtblk_max_nsegs); | ||||||||||
goto fail; | goto fail; | ||||||||||
} | } | ||||||||||
sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT); | sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT); | ||||||||||
if (sc->vtblk_sglist == NULL) { | if (sc->vtblk_sglist == NULL) { | ||||||||||
error = ENOMEM; | error = ENOMEM; | ||||||||||
device_printf(dev, "cannot allocate sglist\n"); | device_printf(dev, "cannot allocate sglist\n"); | ||||||||||
goto fail; | goto fail; | ||||||||||
} | } | ||||||||||
if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) { | |||||||||||
sc->vtblk_sglist_stash = sglist_alloc(sc->vtblk_max_nsegs, | |||||||||||
M_NOWAIT); | |||||||||||
if (sc->vtblk_sglist_stash == NULL) { | |||||||||||
error = ENOMEM; | |||||||||||
device_printf(dev, "cannot allocate sglist stash\n"); | |||||||||||
goto fail; | |||||||||||
} | |||||||||||
} | |||||||||||
error = vtblk_alloc_virtqueue(sc); | error = vtblk_alloc_virtqueue(sc); | ||||||||||
if (error) { | if (error) { | ||||||||||
device_printf(dev, "cannot allocate virtqueue\n"); | device_printf(dev, "cannot allocate virtqueue\n"); | ||||||||||
goto fail; | goto fail; | ||||||||||
} | } | ||||||||||
error = vtblk_request_prealloc(sc); | error = vtblk_request_prealloc(sc); | ||||||||||
Show All 40 Lines | if (sc->vtblk_disk != NULL) { | ||||||||||
disk_destroy(sc->vtblk_disk); | disk_destroy(sc->vtblk_disk); | ||||||||||
sc->vtblk_disk = NULL; | sc->vtblk_disk = NULL; | ||||||||||
} | } | ||||||||||
if (sc->vtblk_sglist != NULL) { | if (sc->vtblk_sglist != NULL) { | ||||||||||
sglist_free(sc->vtblk_sglist); | sglist_free(sc->vtblk_sglist); | ||||||||||
sc->vtblk_sglist = NULL; | sc->vtblk_sglist = NULL; | ||||||||||
} | } | ||||||||||
if (sc->vtblk_sglist_stash != NULL) { | |||||||||||
sglist_free(sc->vtblk_sglist_stash); | |||||||||||
sc->vtblk_sglist_stash = NULL; | |||||||||||
} | |||||||||||
VTBLK_LOCK_DESTROY(sc); | VTBLK_LOCK_DESTROY(sc); | ||||||||||
return (0); | return (0); | ||||||||||
} | } | ||||||||||
static int | static int | ||||||||||
vtblk_suspend(device_t dev) | vtblk_suspend(device_t dev) | ||||||||||
▲ Show 20 Lines • Show All 206 Lines • ▼ Show 20 Lines | |||||||||||
{ | { | ||||||||||
device_t dev; | device_t dev; | ||||||||||
int nsegs; | int nsegs; | ||||||||||
dev = sc->vtblk_dev; | dev = sc->vtblk_dev; | ||||||||||
nsegs = VTBLK_MIN_SEGMENTS; | nsegs = VTBLK_MIN_SEGMENTS; | ||||||||||
if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { | if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) { | ||||||||||
nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1); | int segsz; | ||||||||||
segsz = MIN(PAGE_SIZE, sc->vtblk_max_size); | |||||||||||
nsegs += MIN(blkcfg->seg_max, MAXPHYS / segsz + 1); | |||||||||||
if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) | if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT) | ||||||||||
nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); | nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT); | ||||||||||
} else | } else | ||||||||||
nsegs += 1; | nsegs += 1; | ||||||||||
return (nsegs); | return (nsegs); | ||||||||||
} | } | ||||||||||
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) | ||||||||||
* maximum I/O size. But in practice, since QEMU advertises 128 | * maximum I/O size. But in practice, since QEMU advertises 128 | ||||||||||
* segments, this gives us a maximum IO size of 125 * PAGE_SIZE, | * segments, this gives us a maximum IO size of 125 * PAGE_SIZE, | ||||||||||
* which is typically greater than MAXPHYS. Eventually we should | * which is typically greater than MAXPHYS. Eventually we should | ||||||||||
* just advertise MAXPHYS and split buffers that are too big. | * just advertise MAXPHYS and split buffers that are too big. | ||||||||||
* | * | ||||||||||
* Note we must subtract one additional segment in case of non | * Note we must subtract one additional segment in case of non | ||||||||||
* page aligned buffers. | * page aligned buffers. | ||||||||||
*/ | */ | ||||||||||
dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) * | dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) * | ||||||||||
Not Done Inline ActionsBased on the context, it appears to me that d_maxsize should be determined by the smaller of maxphys and blkcfg.size_max (we could potentially break atomic semantics if we have to split an I/O request into smaller chunks without upper layers knowing) after this change, so maybe it's time to make the adjustment here? delphij: Based on the context, it appears to me that `d_maxsize` should be determined by the smaller of… | |||||||||||
PAGE_SIZE; | PAGE_SIZE; | ||||||||||
if (dp->d_maxsize < PAGE_SIZE) | if (dp->d_maxsize < PAGE_SIZE) | ||||||||||
dp->d_maxsize = PAGE_SIZE; /* XXX */ | dp->d_maxsize = PAGE_SIZE; /* XXX */ | ||||||||||
Not Done Inline ActionsWe should probably assert that d_maxsize is always greater than PAGE_SIZE here, and fail attachment when this can't be satisfied. See the proposed "fewer than minimum number of segments" change above. delphij: We should probably assert that `d_maxsize` is always greater than `PAGE_SIZE` here, and fail… | |||||||||||
if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) { | if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) { | ||||||||||
dp->d_fwsectors = blkcfg->geometry.sectors; | dp->d_fwsectors = blkcfg->geometry.sectors; | ||||||||||
dp->d_fwheads = blkcfg->geometry.heads; | dp->d_fwheads = blkcfg->geometry.heads; | ||||||||||
} | } | ||||||||||
if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) && | if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) && | ||||||||||
blkcfg->topology.physical_block_exp > 0) { | blkcfg->topology.physical_block_exp > 0) { | ||||||||||
▲ Show 20 Lines • Show All 175 Lines • ▼ Show 20 Lines | vtblk_request_bio(struct vtblk_softc *sc) | ||||||||||
if (bp->bio_flags & BIO_ORDERED) | if (bp->bio_flags & BIO_ORDERED) | ||||||||||
req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; | req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; | ||||||||||
return (req); | return (req); | ||||||||||
} | } | ||||||||||
static int | static int | ||||||||||
vtblk_sg_split_append( | |||||||||||
struct vtblk_softc *sc, struct sglist *to, vm_paddr_t addr, size_t len) | |||||||||||
{ | |||||||||||
while (len > 0) { | |||||||||||
size_t xfer; | |||||||||||
struct sglist_seg *seg; | |||||||||||
if (to->sg_nseg == 0) { | |||||||||||
xfer = MIN(len, sc->vtblk_max_size); | |||||||||||
seg = &to->sg_segs[0]; | |||||||||||
seg->ss_paddr = addr; | |||||||||||
seg->ss_len = xfer; | |||||||||||
to->sg_nseg++; | |||||||||||
len -= xfer; | |||||||||||
addr += xfer; | |||||||||||
continue; | |||||||||||
} | |||||||||||
seg = &to->sg_segs[to->sg_nseg - 1]; | |||||||||||
if (seg->ss_len == 0 || seg->ss_paddr + seg->ss_len != addr) { | |||||||||||
KASSERT(to->sg_nseg <= to->sg_maxseg, | |||||||||||
("%s: Bogus number of items in the sglist", | |||||||||||
__func__)); | |||||||||||
if (to->sg_nseg == to->sg_maxseg) | |||||||||||
return (EFBIG); | |||||||||||
xfer = MIN(len, sc->vtblk_max_size); | |||||||||||
seg = &to->sg_segs[to->sg_nseg++]; | |||||||||||
seg->ss_paddr = addr; | |||||||||||
seg->ss_len = xfer; | |||||||||||
} else { | |||||||||||
xfer = MIN(len, sc->vtblk_max_size - seg->ss_len); | |||||||||||
seg->ss_len += xfer; | |||||||||||
} | |||||||||||
len -= xfer; | |||||||||||
addr += xfer; | |||||||||||
} | |||||||||||
return (0); | |||||||||||
} | |||||||||||
static void | |||||||||||
vtblk_sg_split(struct vtblk_softc *sc, struct sglist *to, struct sglist *from) | |||||||||||
{ | |||||||||||
unsigned int i; | |||||||||||
if ((sc->vtblk_features & VIRTIO_BLK_F_SIZE_MAX) == 0) { | |||||||||||
sglist_join(to, from); | |||||||||||
return; | |||||||||||
} | |||||||||||
for (i = 0; i < from->sg_nseg; i++) { | |||||||||||
int error; | |||||||||||
error = vtblk_sg_split_append( | |||||||||||
sc, to, from->sg_segs[i].ss_paddr, from->sg_segs[i].ss_len); | |||||||||||
if (error) | |||||||||||
panic( | |||||||||||
"%s: unexpected entries number in sglist. max: %hu", | |||||||||||
__func__, to->sg_maxseg); | |||||||||||
} | |||||||||||
} | |||||||||||
static int | |||||||||||
vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req) | vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req) | ||||||||||
{ | { | ||||||||||
struct virtqueue *vq; | struct virtqueue *vq; | ||||||||||
struct sglist *sg; | struct sglist *sg, *sg_stash; | ||||||||||
struct bio *bp; | struct bio *bp; | ||||||||||
int ordered, readable, writable, error; | int ordered, readable, writable, error; | ||||||||||
vq = sc->vtblk_vq; | vq = sc->vtblk_vq; | ||||||||||
sg = sc->vtblk_sglist; | sg = sc->vtblk_sglist; | ||||||||||
sg_stash = sc->vtblk_sglist_stash; | |||||||||||
if (sg_stash == NULL) | |||||||||||
sg_stash = sg; | |||||||||||
Not Done Inline Actions
delphij: | |||||||||||
bp = req->vbr_bp; | bp = req->vbr_bp; | ||||||||||
ordered = 0; | ordered = 0; | ||||||||||
writable = 0; | writable = 0; | ||||||||||
/* | /* | ||||||||||
* Some hosts (such as bhyve) do not implement the barrier feature, | * Some hosts (such as bhyve) do not implement the barrier feature, | ||||||||||
* so we emulate it in the driver by allowing the barrier request | * so we emulate it in the driver by allowing the barrier request | ||||||||||
* to be the only one in flight. | * to be the only one in flight. | ||||||||||
*/ | */ | ||||||||||
if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) { | if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) { | ||||||||||
if (sc->vtblk_req_ordered != NULL) | if (sc->vtblk_req_ordered != NULL) | ||||||||||
return (EBUSY); | return (EBUSY); | ||||||||||
if (bp->bio_flags & BIO_ORDERED) { | if (bp->bio_flags & BIO_ORDERED) { | ||||||||||
if (!virtqueue_empty(vq)) | if (!virtqueue_empty(vq)) | ||||||||||
return (EBUSY); | return (EBUSY); | ||||||||||
ordered = 1; | ordered = 1; | ||||||||||
req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER; | req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER; | ||||||||||
} | } | ||||||||||
} | } | ||||||||||
sglist_reset(sg_stash); | |||||||||||
if (sg != sg_stash) | |||||||||||
sglist_reset(sg); | sglist_reset(sg); | ||||||||||
sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr)); | sglist_append(sg_stash, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr)); | ||||||||||
if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { | if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { | ||||||||||
error = sglist_append_bio(sg, bp); | error = sglist_append_bio(sg_stash, bp); | ||||||||||
if (error || sg->sg_nseg == sg->sg_maxseg) { | if (error || sg_stash->sg_nseg == sg_stash->sg_maxseg) { | ||||||||||
panic("%s: bio %p data buffer too big %d", | panic("%s: bio %p data buffer too big %d", | ||||||||||
__func__, bp, error); | __func__, bp, error); | ||||||||||
} | } | ||||||||||
/* BIO_READ means the host writes into our buffer. */ | /* BIO_READ means the host writes into our buffer. */ | ||||||||||
if (bp->bio_cmd == BIO_READ) | if (bp->bio_cmd == BIO_READ) | ||||||||||
writable = sg->sg_nseg - 1; | writable = sg_stash->sg_nseg - 1; | ||||||||||
} else if (bp->bio_cmd == BIO_DELETE) { | } else if (bp->bio_cmd == BIO_DELETE) { | ||||||||||
struct virtio_blk_discard_write_zeroes *discard; | struct virtio_blk_discard_write_zeroes *discard; | ||||||||||
discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO); | discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO); | ||||||||||
if (discard == NULL) | if (discard == NULL) | ||||||||||
return (ENOMEM); | return (ENOMEM); | ||||||||||
discard->sector = bp->bio_offset / VTBLK_BSIZE; | discard->sector = bp->bio_offset / VTBLK_BSIZE; | ||||||||||
discard->num_sectors = bp->bio_bcount / VTBLK_BSIZE; | discard->num_sectors = bp->bio_bcount / VTBLK_BSIZE; | ||||||||||
bp->bio_driver1 = discard; | bp->bio_driver1 = discard; | ||||||||||
error = sglist_append(sg, discard, sizeof(*discard)); | error = sglist_append(sg_stash, discard, sizeof(*discard)); | ||||||||||
if (error || sg->sg_nseg == sg->sg_maxseg) { | if (error || sg_stash->sg_nseg == sg_stash->sg_maxseg) { | ||||||||||
panic("%s: bio %p data buffer too big %d", | panic("%s: bio %p data buffer too big %d", | ||||||||||
__func__, bp, error); | __func__, bp, error); | ||||||||||
} | } | ||||||||||
} | } | ||||||||||
writable++; | writable++; | ||||||||||
sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); | sglist_append(sg_stash, &req->vbr_ack, sizeof(uint8_t)); | ||||||||||
if (sg != sg_stash) { | |||||||||||
/* | |||||||||||
* Break down the sglist if VIRTIO_BLK_F_SIZE_MAX is set | |||||||||||
*/ | |||||||||||
vtblk_sg_split(sc, sg, sg_stash); | |||||||||||
} | |||||||||||
readable = sg->sg_nseg - writable; | readable = sg->sg_nseg - writable; | ||||||||||
error = virtqueue_enqueue(vq, req, sg, readable, writable); | error = virtqueue_enqueue(vq, req, sg, readable, writable); | ||||||||||
if (error == 0 && ordered) | if (error == 0 && ordered) | ||||||||||
sc->vtblk_req_ordered = req; | sc->vtblk_req_ordered = req; | ||||||||||
return (error); | return (error); | ||||||||||
} | } | ||||||||||
▲ Show 20 Lines • Show All 471 Lines • Show Last 20 Lines |
(Please rebase with the MAXPHYS -> maxphys conversion)