Index: head/usr.sbin/bhyve/block_if.c =================================================================== --- head/usr.sbin/bhyve/block_if.c (revision 279657) +++ head/usr.sbin/bhyve/block_if.c (revision 279658) @@ -1,629 +1,636 @@ /*- * Copyright (c) 2013 Peter Grehan * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "mevent.h" #include "block_if.h" #define BLOCKIF_SIG 0xb109b109 #define BLOCKIF_MAXREQ 33 enum blockop { BOP_READ, BOP_WRITE, BOP_FLUSH }; enum blockstat { BST_FREE, BST_PEND, BST_BUSY, BST_DONE }; struct blockif_elem { TAILQ_ENTRY(blockif_elem) be_link; struct blockif_req *be_req; enum blockop be_op; enum blockstat be_status; pthread_t be_tid; }; struct blockif_ctxt { int bc_magic; int bc_fd; + int bc_ischr; int bc_rdonly; off_t bc_size; int bc_sectsz; int bc_psectsz; int bc_psectoff; pthread_t bc_btid; pthread_mutex_t bc_mtx; pthread_cond_t bc_cond; int bc_closing; /* Request elements and free/pending/busy queues */ TAILQ_HEAD(, blockif_elem) bc_freeq; TAILQ_HEAD(, blockif_elem) bc_pendq; TAILQ_HEAD(, blockif_elem) bc_busyq; u_int bc_req_count; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; }; static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; struct blockif_sig_elem { pthread_mutex_t bse_mtx; pthread_cond_t bse_cond; int bse_pending; struct blockif_sig_elem *bse_next; }; static struct blockif_sig_elem *blockif_bse_head; static int blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { struct blockif_elem *be; assert(bc->bc_req_count < BLOCKIF_MAXREQ); be = TAILQ_FIRST(&bc->bc_freeq); assert(be != NULL); assert(be->be_status == BST_FREE); TAILQ_REMOVE(&bc->bc_freeq, be, be_link); be->be_status = BST_PEND; be->be_req = breq; be->be_op = op; TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); bc->bc_req_count++; return (0); } static int blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep) { struct blockif_elem *be; if (bc->bc_req_count == 0) return (ENOENT); be = TAILQ_FIRST(&bc->bc_pendq); assert(be != NULL); assert(be->be_status == BST_PEND); TAILQ_REMOVE(&bc->bc_pendq, be, be_link); be->be_status = BST_BUSY; be->be_tid = bc->bc_btid; TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); *bep = be; return (0); } static void blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) { assert(be->be_status == BST_DONE); TAILQ_REMOVE(&bc->bc_busyq, be, be_link); be->be_tid = 0; be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); bc->bc_req_count--; } static void blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be) { struct blockif_req *br; int err; br = be->be_req; err = 0; switch (be->be_op) { case BOP_READ: if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset) < 0) err = errno; break; case BOP_WRITE: if (bc->bc_rdonly) err = EROFS; else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, br->br_offset) < 0) err = errno; break; case BOP_FLUSH: + if (bc->bc_ischr) { + if (ioctl(bc->bc_fd, DIOCGFLUSH)) + err = errno; + } else if (fsync(bc->bc_fd)) + err = errno; break; default: err = EINVAL; break; } be->be_status = BST_DONE; (*br->br_callback)(br, err); } static void * blockif_thr(void *arg) { struct blockif_ctxt *bc; struct blockif_elem *be; bc = arg; for (;;) { pthread_mutex_lock(&bc->bc_mtx); while (!blockif_dequeue(bc, &be)) { pthread_mutex_unlock(&bc->bc_mtx); blockif_proc(bc, be); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); pthread_mutex_unlock(&bc->bc_mtx); /* * Check ctxt status here to see if exit requested */ if (bc->bc_closing) pthread_exit(NULL); } /* Not reached */ return (NULL); } static void blockif_sigcont_handler(int signal, enum ev_type type, void *arg) { struct blockif_sig_elem *bse; for (;;) { /* * Process the entire list even if not intended for * this thread. */ do { bse = blockif_bse_head; if (bse == NULL) return; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)bse, (uintptr_t)bse->bse_next)); pthread_mutex_lock(&bse->bse_mtx); bse->bse_pending = 0; pthread_cond_signal(&bse->bse_cond); pthread_mutex_unlock(&bse->bse_mtx); } } static void blockif_init(void) { mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); (void) signal(SIGCONT, SIG_IGN); } struct blockif_ctxt * blockif_open(const char *optstr, const char *ident) { char tname[MAXCOMLEN + 1]; char *nopt, *xopts; struct blockif_ctxt *bc; struct stat sbuf; off_t size, psectsz, psectoff; int extra, fd, i, sectsz; int nocache, sync, ro; pthread_once(&blockif_once, blockif_init); nocache = 0; sync = 0; ro = 0; /* * The first element in the optstring is always a pathname. * Optional elements follow */ nopt = strdup(optstr); for (xopts = strtok(nopt, ","); xopts != NULL; xopts = strtok(NULL, ",")) { if (!strcmp(xopts, "nocache")) nocache = 1; else if (!strcmp(xopts, "sync")) sync = 1; else if (!strcmp(xopts, "ro")) ro = 1; } extra = 0; if (nocache) extra |= O_DIRECT; if (sync) extra |= O_SYNC; fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); if (fd < 0 && !ro) { /* Attempt a r/w fail with a r/o open */ fd = open(nopt, O_RDONLY | extra); ro = 1; } if (fd < 0) { perror("Could not open backing file"); return (NULL); } if (fstat(fd, &sbuf) < 0) { perror("Could not stat backing file"); close(fd); return (NULL); } /* * Deal with raw devices */ size = sbuf.st_size; sectsz = DEV_BSIZE; psectsz = psectoff = 0; if (S_ISCHR(sbuf.st_mode)) { if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { perror("Could not fetch dev blk/sector size"); close(fd); return (NULL); } assert(size != 0); assert(sectsz != 0); if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); } else psectsz = sbuf.st_blksize; bc = calloc(1, sizeof(struct blockif_ctxt)); if (bc == NULL) { close(fd); return (NULL); } bc->bc_magic = BLOCKIF_SIG; bc->bc_fd = fd; + bc->bc_ischr = S_ISCHR(sbuf.st_mode); bc->bc_rdonly = ro; bc->bc_size = size; bc->bc_sectsz = sectsz; bc->bc_psectsz = psectsz; bc->bc_psectoff = psectoff; pthread_mutex_init(&bc->bc_mtx, NULL); pthread_cond_init(&bc->bc_cond, NULL); TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); TAILQ_INIT(&bc->bc_busyq); bc->bc_req_count = 0; for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); } pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); snprintf(tname, sizeof(tname), "blk-%s", ident); pthread_set_name_np(bc->bc_btid, tname); return (bc); } static int blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { int err; err = 0; pthread_mutex_lock(&bc->bc_mtx); if (bc->bc_req_count < BLOCKIF_MAXREQ) { /* * Enqueue and inform the block i/o thread * that there is work available */ blockif_enqueue(bc, breq, op); pthread_cond_signal(&bc->bc_cond); } else { /* * Callers are not allowed to enqueue more than * the specified blockif queue limit. Return an * error to indicate that the queue length has been * exceeded. */ err = E2BIG; } pthread_mutex_unlock(&bc->bc_mtx); return (err); } int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_READ)); } int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_WRITE)); } int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) { assert(bc->bc_magic == BLOCKIF_SIG); return (blockif_request(bc, breq, BOP_FLUSH)); } int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) { struct blockif_elem *be; assert(bc->bc_magic == BLOCKIF_SIG); pthread_mutex_lock(&bc->bc_mtx); /* * Check pending requests. */ TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { if (be->be_req == breq) break; } if (be != NULL) { /* * Found it. */ TAILQ_REMOVE(&bc->bc_pendq, be, be_link); be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); bc->bc_req_count--; pthread_mutex_unlock(&bc->bc_mtx); return (0); } /* * Check in-flight requests. */ TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { if (be->be_req == breq) break; } if (be == NULL) { /* * Didn't find it. */ pthread_mutex_unlock(&bc->bc_mtx); return (EINVAL); } /* * Interrupt the processing thread to force it return * prematurely via it's normal callback path. */ while (be->be_status == BST_BUSY) { struct blockif_sig_elem bse, *old_head; pthread_mutex_init(&bse.bse_mtx, NULL); pthread_cond_init(&bse.bse_cond, NULL); bse.bse_pending = 1; do { old_head = blockif_bse_head; bse.bse_next = old_head; } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, (uintptr_t)old_head, (uintptr_t)&bse)); pthread_kill(be->be_tid, SIGCONT); pthread_mutex_lock(&bse.bse_mtx); while (bse.bse_pending) pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); pthread_mutex_unlock(&bse.bse_mtx); } pthread_mutex_unlock(&bc->bc_mtx); /* * The processing thread has been interrupted. Since it's not * clear if the callback has been invoked yet, return EBUSY. */ return (EBUSY); } int blockif_close(struct blockif_ctxt *bc) { void *jval; int err; err = 0; assert(bc->bc_magic == BLOCKIF_SIG); /* * Stop the block i/o thread */ bc->bc_closing = 1; pthread_cond_signal(&bc->bc_cond); pthread_join(bc->bc_btid, &jval); /* XXX Cancel queued i/o's ??? */ /* * Release resources */ bc->bc_magic = 0; close(bc->bc_fd); free(bc); return (0); } /* * Return virtual C/H/S values for a given block. Use the algorithm * outlined in the VHD specification to calculate values. */ void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) { off_t sectors; /* total sectors of the block dev */ off_t hcyl; /* cylinders times heads */ uint16_t secpt; /* sectors per track */ uint8_t heads; assert(bc->bc_magic == BLOCKIF_SIG); sectors = bc->bc_size / bc->bc_sectsz; /* Clamp the size to the largest possible with CHS */ if (sectors > 65535UL*16*255) sectors = 65535UL*16*255; if (sectors >= 65536UL*16*63) { secpt = 255; heads = 16; hcyl = sectors / secpt; } else { secpt = 17; hcyl = sectors / secpt; heads = (hcyl + 1023) / 1024; if (heads < 4) heads = 4; if (hcyl >= (heads * 1024) || heads > 16) { secpt = 31; heads = 16; hcyl = sectors / secpt; } if (hcyl >= (heads * 1024)) { secpt = 63; heads = 16; hcyl = sectors / secpt; } } *c = hcyl / heads; *h = heads; *s = secpt; } /* * Accessors */ off_t blockif_size(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_size); } int blockif_sectsz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_sectsz); } void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) { assert(bc->bc_magic == BLOCKIF_SIG); *size = bc->bc_psectsz; *off = bc->bc_psectoff; } int blockif_queuesz(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (BLOCKIF_MAXREQ - 1); } int blockif_is_ro(struct blockif_ctxt *bc) { assert(bc->bc_magic == BLOCKIF_SIG); return (bc->bc_rdonly); } Index: head/usr.sbin/bhyve/pci_virtio_block.c =================================================================== --- head/usr.sbin/bhyve/pci_virtio_block.c (revision 279657) +++ head/usr.sbin/bhyve/pci_virtio_block.c (revision 279658) @@ -1,403 +1,411 @@ /*- * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bhyverun.h" #include "pci_emul.h" #include "virtio.h" #define VTBLK_RINGSZ 64 #define VTBLK_MAXSEGS 32 #define VTBLK_S_OK 0 #define VTBLK_S_IOERR 1 #define VTBLK_S_UNSUPP 2 #define VTBLK_BLK_ID_BYTES 20 /* Capability bits */ #define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */ #define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */ #define VTBLK_F_TOPOLOGY (1 << 10) /* Optimal I/O alignment */ /* * Host capabilities */ #define VTBLK_S_HOSTCAPS \ ( VTBLK_F_SEG_MAX | \ VTBLK_F_BLK_SIZE | \ VTBLK_F_TOPOLOGY | \ VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ /* * Config space "registers" */ struct vtblk_config { uint64_t vbc_capacity; uint32_t vbc_size_max; uint32_t vbc_seg_max; struct { uint16_t cylinders; uint8_t heads; uint8_t sectors; } vbc_geometry; uint32_t vbc_blk_size; struct { uint8_t physical_block_exp; uint8_t alignment_offset; uint16_t min_io_size; uint32_t opt_io_size; } vbc_topology; uint8_t vbc_writeback; } __packed; /* * Fixed-size block header */ struct virtio_blk_hdr { #define VBH_OP_READ 0 #define VBH_OP_WRITE 1 #define VBH_OP_FLUSH 4 #define VBH_OP_FLUSH_OUT 5 #define VBH_OP_IDENT 8 #define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ uint32_t vbh_type; uint32_t vbh_ioprio; uint64_t vbh_sector; } __packed; /* * Debug printf */ static int pci_vtblk_debug; #define DPRINTF(params) if (pci_vtblk_debug) printf params #define WPRINTF(params) printf params /* * Per-device softc */ struct pci_vtblk_softc { struct virtio_softc vbsc_vs; pthread_mutex_t vsc_mtx; struct vqueue_info vbsc_vq; int vbsc_fd; + int vbsc_ischr; struct vtblk_config vbsc_cfg; char vbsc_ident[VTBLK_BLK_ID_BYTES]; }; static void pci_vtblk_reset(void *); static void pci_vtblk_notify(void *, struct vqueue_info *); static int pci_vtblk_cfgread(void *, int, int, uint32_t *); static int pci_vtblk_cfgwrite(void *, int, int, uint32_t); static struct virtio_consts vtblk_vi_consts = { "vtblk", /* our name */ 1, /* we support 1 virtqueue */ sizeof(struct vtblk_config), /* config reg size */ pci_vtblk_reset, /* reset */ pci_vtblk_notify, /* device-wide qnotify */ pci_vtblk_cfgread, /* read PCI config */ pci_vtblk_cfgwrite, /* write PCI config */ NULL, /* apply negotiated features */ VTBLK_S_HOSTCAPS, /* our capabilities */ }; static void pci_vtblk_reset(void *vsc) { struct pci_vtblk_softc *sc = vsc; DPRINTF(("vtblk: device reset requested !\n")); vi_reset_dev(&sc->vbsc_vs); } static void pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) { struct virtio_blk_hdr *vbh; uint8_t *status; int i, n; int err; int iolen; int writeop, type; off_t offset; struct iovec iov[VTBLK_MAXSEGS + 2]; uint16_t flags[VTBLK_MAXSEGS + 2]; n = vq_getchain(vq, iov, VTBLK_MAXSEGS + 2, flags); /* * The first descriptor will be the read-only fixed header, * and the last is for status (hence +2 above and below). * The remaining iov's are the actual data I/O vectors. * * XXX - note - this fails on crash dump, which does a * VIRTIO_BLK_T_FLUSH with a zero transfer length */ assert(n >= 2 && n <= VTBLK_MAXSEGS + 2); assert((flags[0] & VRING_DESC_F_WRITE) == 0); assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); vbh = iov[0].iov_base; status = iov[--n].iov_base; assert(iov[n].iov_len == 1); assert(flags[n] & VRING_DESC_F_WRITE); /* * XXX * The guest should not be setting the BARRIER flag because * we don't advertise the capability. */ type = vbh->vbh_type & ~VBH_FLAG_BARRIER; writeop = (type == VBH_OP_WRITE); offset = vbh->vbh_sector * DEV_BSIZE; iolen = 0; for (i = 1; i < n; i++) { /* * - write op implies read-only descriptor, * - read/ident op implies write-only descriptor, * therefore test the inverse of the descriptor bit * to the op. */ assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); iolen += iov[i].iov_len; } DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r", writeop ? "write" : "read/ident", iolen, i - 1, offset)); switch (type) { case VBH_OP_WRITE: - err = pwritev(sc->vbsc_fd, iov + 1, i - 1, offset); + if (pwritev(sc->vbsc_fd, iov + 1, i - 1, offset) < 0) + err = errno; break; case VBH_OP_READ: - err = preadv(sc->vbsc_fd, iov + 1, i - 1, offset); + if (preadv(sc->vbsc_fd, iov + 1, i - 1, offset) < 0) + err = errno; break; case VBH_OP_IDENT: /* Assume a single buffer */ strlcpy(iov[1].iov_base, sc->vbsc_ident, MIN(iov[1].iov_len, sizeof(sc->vbsc_ident))); err = 0; break; case VBH_OP_FLUSH: case VBH_OP_FLUSH_OUT: - err = fsync(sc->vbsc_fd); + if (sc->vbsc_ischr) { + if (ioctl(sc->vbsc_fd, DIOCGFLUSH)) + err = errno; + } else if (fsync(sc->vbsc_fd)) + err = errno; break; default: err = -ENOSYS; break; } /* convert errno into a virtio block error return */ if (err < 0) { if (err == -ENOSYS) *status = VTBLK_S_UNSUPP; else *status = VTBLK_S_IOERR; } else *status = VTBLK_S_OK; /* * Return the descriptor back to the host. * We wrote 1 byte (our status) to host. */ vq_relchain(vq, 1); } static void pci_vtblk_notify(void *vsc, struct vqueue_info *vq) { struct pci_vtblk_softc *sc = vsc; vq_startchains(vq); while (vq_has_descs(vq)) pci_vtblk_proc(sc, vq); vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ } static int pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { struct stat sbuf; MD5_CTX mdctx; u_char digest[16]; struct pci_vtblk_softc *sc; off_t size, sts, sto; int fd; int sectsz; if (opts == NULL) { printf("virtio-block: backing device required\n"); return (1); } /* * The supplied backing file has to exist */ fd = open(opts, O_RDWR); if (fd < 0) { perror("Could not open backing file"); return (1); } if (fstat(fd, &sbuf) < 0) { perror("Could not stat backing file"); close(fd); return (1); } /* * Deal with raw devices */ size = sbuf.st_size; sectsz = DEV_BSIZE; sts = sto = 0; if (S_ISCHR(sbuf.st_mode)) { if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || ioctl(fd, DIOCGSECTORSIZE, §sz)) { perror("Could not fetch dev blk/sector size"); close(fd); return (1); } assert(size != 0); assert(sectsz != 0); if (ioctl(fd, DIOCGSTRIPESIZE, &sts) == 0 && sts > 0) ioctl(fd, DIOCGSTRIPEOFFSET, &sto); } else sts = sbuf.st_blksize; sc = calloc(1, sizeof(struct pci_vtblk_softc)); /* record fd of storage device/file */ sc->vbsc_fd = fd; + sc->vbsc_ischr = S_ISCHR(sbuf.st_mode); pthread_mutex_init(&sc->vsc_mtx, NULL); /* init virtio softc and virtqueues */ vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); sc->vbsc_vs.vs_mtx = &sc->vsc_mtx; sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ /* * Create an identifier for the backing file. Use parts of the * md5 sum of the filename */ MD5Init(&mdctx); MD5Update(&mdctx, opts, strlen(opts)); MD5Final(digest, &mdctx); sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); /* setup virtio block config space */ sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */ sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ sc->vbsc_cfg.vbc_seg_max = VTBLK_MAXSEGS; sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */ sc->vbsc_cfg.vbc_geometry.heads = 0; sc->vbsc_cfg.vbc_geometry.sectors = 0; sc->vbsc_cfg.vbc_blk_size = sectsz; sc->vbsc_cfg.vbc_topology.physical_block_exp = (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0; sc->vbsc_cfg.vbc_topology.alignment_offset = (sto != 0) ? ((sts - sto) / sectsz) : 0; sc->vbsc_cfg.vbc_topology.min_io_size = 0; sc->vbsc_cfg.vbc_topology.opt_io_size = 0; sc->vbsc_cfg.vbc_writeback = 0; /* * Should we move some of this into virtio.c? Could * have the device, class, and subdev_0 as fields in * the virtio constants structure. */ pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); pci_lintr_request(pi); if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) return (1); vi_set_io_bar(&sc->vbsc_vs, 0); return (0); } static int pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) { DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); return (1); } static int pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) { struct pci_vtblk_softc *sc = vsc; void *ptr; /* our caller has already verified offset and size */ ptr = (uint8_t *)&sc->vbsc_cfg + offset; memcpy(retval, ptr, size); return (0); } struct pci_devemu pci_de_vblk = { .pe_emu = "virtio-blk", .pe_init = pci_vtblk_init, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; PCI_EMUL_SET(pci_de_vblk);