Index: sys/dev/virtio/block/virtio_blk.h =================================================================== --- sys/dev/virtio/block/virtio_blk.h +++ sys/dev/virtio/block/virtio_blk.h @@ -33,19 +33,26 @@ #ifndef _VIRTIO_BLK_H #define _VIRTIO_BLK_H +#define VTBLK_BSIZE 512 + /* Feature bits */ -#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ -#define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */ -#define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */ -#define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */ -#define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */ -#define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/ -#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ -#define VIRTIO_BLK_F_WCE 0x0200 /* Writeback mode enabled after reset */ -#define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */ -#define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */ - -#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ + +#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ +#define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */ +#define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */ +#define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */ +#define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */ +#define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/ +#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_FLUSH 0x0200 /* Flush command supported */ +#define VIRTIO_BLK_F_WCE 0x0200 /* Legacy alias for FLUSH */ +#define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */ +#define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ 0x1000 /* Support more than one vq */ +#define VIRTIO_BLK_F_DISCARD 0x2000 /* Trim blocks */ +#define VIRTIO_BLK_F_WRITE_ZEROES 0x4000 /* Write zeros */ + +#define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ struct virtio_blk_config { /* The capacity (in 512-byte sectors). */ @@ -66,15 +73,29 @@ /* Topology of the device (if VIRTIO_BLK_F_TOPOLOGY) */ struct virtio_blk_topology { + /* Exponent for physical block per logical block. */ uint8_t physical_block_exp; + /* Alignment offset in logical blocks. */ uint8_t alignment_offset; + /* Minimum I/O size without performance penalty in logical + * blocks. */ uint16_t min_io_size; + /* Optimal sustained I/O size in logical blocks. */ uint32_t opt_io_size; } topology; /* Writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ - uint8_t writeback; - + uint8_t wce; + uint8_t unused; + /* Number of vqs, only available when VIRTIO_BLK_F_MQ is set */ + uint16_t num_queues; + uint32_t max_discard_sectors; + uint32_t max_discard_seg; + uint32_t discard_sector_alignment; + uint32_t max_write_zeroes_sectors; + uint32_t max_write_zeroes_seg; + uint8_t write_zeroes_may_unmap; + uint8_t unused1[3]; } __packed; /* @@ -89,23 +110,34 @@ */ /* These two define direction. */ -#define VIRTIO_BLK_T_IN 0 -#define VIRTIO_BLK_T_OUT 1 +#define VIRTIO_BLK_T_IN 0 +#define VIRTIO_BLK_T_OUT 1 /* This bit says it's a scsi command, not an actual read or write. */ -#define VIRTIO_BLK_T_SCSI_CMD 2 +#define VIRTIO_BLK_T_SCSI_CMD 2 +#define VIRTIO_BLK_T_SCSI_CMD_OUT 3 /* Cache flush command */ -#define VIRTIO_BLK_T_FLUSH 4 +#define VIRTIO_BLK_T_FLUSH 4 +#define VIRTIO_BLK_T_FLUSH_OUT 5 /* Get device ID command */ -#define VIRTIO_BLK_T_GET_ID 8 +#define VIRTIO_BLK_T_GET_ID 8 + +/* Discard command */ +#define VIRTIO_BLK_T_DISCARD 11 + +/* Write zeros command */ +#define VIRTIO_BLK_T_WRITE_ZEROES 13 /* Barrier before this op. */ -#define VIRTIO_BLK_T_BARRIER 0x80000000 +#define VIRTIO_BLK_T_BARRIER 0x80000000 /* ID string length */ -#define VIRTIO_BLK_ID_BYTES 20 +#define VIRTIO_BLK_ID_BYTES 20 + +/* Unmap this range (only valid for write zeroes command) */ +#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP 0x00000001 /* This is the first element of the read scatter-gather list. */ struct virtio_blk_outhdr { @@ -117,6 +149,15 @@ uint64_t sector; }; +struct virtio_blk_discard_write_zeroes { + uint64_t sector; + uint32_t num_sectors; + struct { + uint32_t unmap:1; + uint32_t reserved:31; + } flags; +}; + struct virtio_scsi_inhdr { uint32_t errors; uint32_t data_len; Index: sys/dev/virtio/block/virtio_blk.c =================================================================== --- sys/dev/virtio/block/virtio_blk.c +++ sys/dev/virtio/block/virtio_blk.c @@ -81,6 +81,7 @@ #define VTBLK_FLAG_SUSPEND 0x0008 #define VTBLK_FLAG_BARRIER 0x0010 #define VTBLK_FLAG_WC_CONFIG 0x0020 +#define VTBLK_FLAG_DISCARD 0x0040 struct virtqueue *vtblk_vq; struct sglist *vtblk_sglist; @@ -112,6 +113,7 @@ { VIRTIO_BLK_F_WCE, "WriteCache" }, { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" }, + { VIRTIO_BLK_F_DISCARD, "Discard" }, { 0, NULL } }; @@ -210,6 +212,7 @@ VIRTIO_BLK_F_WCE | \ VIRTIO_BLK_F_TOPOLOGY | \ VIRTIO_BLK_F_CONFIG_WCE | \ + VIRTIO_BLK_F_DISCARD | \ VIRTIO_RING_F_INDIRECT_DESC) #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx @@ -459,7 +462,7 @@ vtblk_read_config(sc, &blkcfg); /* Capacity is always in 512-byte units. */ - capacity = blkcfg.capacity * 512; + capacity = blkcfg.capacity * VTBLK_BSIZE; if (sc->vtblk_disk->d_mediasize != capacity) vtblk_resize_disk(sc, capacity); @@ -544,13 +547,14 @@ * be a better way to report our readonly'ness to GEOM above. */ if (sc->vtblk_flags & VTBLK_FLAG_READONLY && - (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { + (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH || + bp->bio_cmd == BIO_DELETE)) { vtblk_bio_done(sc, bp, EROFS); return; } if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) && - (bp->bio_cmd != BIO_FLUSH)) { + (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) { vtblk_bio_done(sc, bp, EOPNOTSUPP); return; } @@ -563,6 +567,13 @@ return; } + if (bp->bio_cmd == BIO_DELETE) && + !(sc->vtblk_flags & VTBLK_FLAG_DISCARD)) { + VTBLK_UNLOCK(sc); + vtblk_bio_done(sc, bp, EOPNOTSUPP); + return; + } + bioq_insert_tail(&sc->vtblk_bioq, bp); vtblk_startio(sc); @@ -598,6 +609,8 @@ sc->vtblk_flags |= VTBLK_FLAG_BARRIER; if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; + if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) + sc->vtblk_flags |= VTBLK_FLAG_DISCARD; } static int @@ -687,12 +700,12 @@ dp->d_dump = vtblk_dump; /* Capacity is always in 512-byte units. */ - dp->d_mediasize = blkcfg->capacity * 512; + dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE; if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE)) dp->d_sectorsize = blkcfg->blk_size; else - dp->d_sectorsize = 512; + dp->d_sectorsize = VTBLK_BSIZE; /* * The VirtIO maximum I/O size is given in terms of segments. @@ -726,6 +739,11 @@ dp->d_stripesize; } + if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) { + dp->d_flags |= DISKFLAG_CANDELETE; + dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE; + } + if (vtblk_write_cache_enabled(sc, blkcfg) != 0) sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK; else @@ -876,11 +894,15 @@ break; case BIO_READ: req->vbr_hdr.type = VIRTIO_BLK_T_IN; - req->vbr_hdr.sector = bp->bio_offset / 512; + req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE; break; case BIO_WRITE: req->vbr_hdr.type = VIRTIO_BLK_T_OUT; - req->vbr_hdr.sector = bp->bio_offset / 512; + req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE; + break; + case BIO_DELETE: + req->vbr_hdr.type = VIRTIO_BLK_T_DISCARD; + req->vbr_hdr.sector = bp->bio_offset / VTBLK_BSIZE; break; default: panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd); @@ -935,6 +957,20 @@ /* BIO_READ means the host writes into our buffer. */ if (bp->bio_cmd == BIO_READ) writable = sg->sg_nseg - 1; + } else if (bp->bio_cmd == BIO_DELETE) { + struct virtio_blk_discard_write_zeroes *discard; + + discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO); + if (discard == NULL) + return (ENOMEM); + discard->sector = bp->bio_offset / VTBLK_BSIZE; + discard->num_sectors = bp->bio_bcount / VTBLK_BSIZE; + bp->bio_driver1 = discard; + error = sglist_append(sg, discard, sizeof(*discard)); + if (error || sg->sg_nseg == sg->sg_maxseg) { + panic("%s: bio %p data buffer too big %d", + __func__, bp, error); + } } writable++; @@ -1095,6 +1131,11 @@ bp->bio_flags |= BIO_ERROR; } + if (bp->bio_driver1 != NULL) { + free(bp->bio_driver1, M_DEVBUF); + bp->bio_driver1 = NULL; + } + biodone(bp); } @@ -1124,7 +1165,12 @@ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg); VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg); VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg); - VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors, + blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment, + blkcfg); } #undef VTBLK_GET_CONFIG @@ -1282,7 +1328,7 @@ req->vbr_ack = -1; req->vbr_hdr.type = VIRTIO_BLK_T_OUT; req->vbr_hdr.ioprio = 1; - req->vbr_hdr.sector = offset / 512; + req->vbr_hdr.sector = offset / VTBLK_BSIZE; req->vbr_bp = &buf; g_reset_bio(&buf); @@ -1331,7 +1377,7 @@ /* Set either writeback (1) or writethrough (0) mode. */ virtio_write_dev_config_1(sc->vtblk_dev, - offsetof(struct virtio_blk_config, writeback), wc); + offsetof(struct virtio_blk_config, wce), wc); } static int @@ -1346,7 +1392,7 @@ if (wc >= 0 && wc < VTBLK_CACHE_MAX) vtblk_set_write_cache(sc, wc); else - wc = blkcfg->writeback; + wc = blkcfg->wce; } else wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);